From 747ada36ee23225d81657e4d633ac93b8ccbea7d Mon Sep 17 00:00:00 2001
From: Olaf Dabrunz <od@suse.de>
Date: Wed, 11 Jun 2008 16:35:13 +0200
Subject: pci: add PCI IDs for devices that need boot irq quirks

Signed-off-by: Stefan Assmann <sassmann@suse.de>
Signed-off-by: Olaf Dabrunz <od@suse.de>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/pci_ids.h | 5 +++++
 1 file changed, 5 insertions(+)

(limited to 'include')

diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h
index 65953822c9cb..7f3f101e03c1 100644
--- a/include/linux/pci_ids.h
+++ b/include/linux/pci_ids.h
@@ -2235,6 +2235,10 @@
 #define PCI_DEVICE_ID_INTEL_PXH_0	0x0329
 #define PCI_DEVICE_ID_INTEL_PXH_1	0x032A
 #define PCI_DEVICE_ID_INTEL_PXHV	0x032C
+#define PCI_DEVICE_ID_INTEL_80332_0	0x0330
+#define PCI_DEVICE_ID_INTEL_80332_1	0x0332
+#define PCI_DEVICE_ID_INTEL_80333_0	0x0370
+#define PCI_DEVICE_ID_INTEL_80333_1	0x0372
 #define PCI_DEVICE_ID_INTEL_82375	0x0482
 #define PCI_DEVICE_ID_INTEL_82424	0x0483
 #define PCI_DEVICE_ID_INTEL_82378	0x0484
@@ -2307,6 +2311,7 @@
 #define PCI_DEVICE_ID_INTEL_ESB_4	0x25a4
 #define PCI_DEVICE_ID_INTEL_ESB_5	0x25a6
 #define PCI_DEVICE_ID_INTEL_ESB_9	0x25ab
+#define PCI_DEVICE_ID_INTEL_ESB_10	0x25ac
 #define PCI_DEVICE_ID_INTEL_82820_HB	0x2500
 #define PCI_DEVICE_ID_INTEL_82820_UP_HB	0x2501
 #define PCI_DEVICE_ID_INTEL_82850_HB	0x2530
-- 
cgit v1.2.3


From a9322f6488b432ddc1e89be88242c827c633fb63 Mon Sep 17 00:00:00 2001
From: Stefan Assmann <sassmann@suse.de>
Date: Wed, 11 Jun 2008 16:35:14 +0200
Subject: x86, pci: introduce pci=noioapicquirk kernel cmdline option

Introduce pci=noioapicquirk kernel cmdline option to disable all boot
interrupt quirks

Signed-off-by: Stefan Assmann <sassmann@suse.de>
Signed-off-by: Olaf Dabrunz <od@suse.de>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 Documentation/kernel-parameters.txt | 3 +++
 arch/x86/pci/common.c               | 4 ++++
 include/asm-x86/io_apic.h           | 4 ++++
 include/asm-x86/pci.h               | 1 +
 4 files changed, 12 insertions(+)

(limited to 'include')

diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt
index 795c487af8e4..1aebe9dffbaa 100644
--- a/Documentation/kernel-parameters.txt
+++ b/Documentation/kernel-parameters.txt
@@ -1518,6 +1518,9 @@ and is between 256 and 4096 characters. It is defined in the file
 		nomsi		[MSI] If the PCI_MSI kernel config parameter is
 				enabled, this kernel boot option can be used to
 				disable the use of MSI interrupts system-wide.
+		noioapicquirk	[APIC] Disable all boot interrupt quirks.
+				Safety option to keep boot IRQs enabled. This
+				should never be necessary.
 		biosirq		[X86-32] Use PCI BIOS calls to get the interrupt
 				routing table. These calls are known to be buggy
 				on several machines and they hang the machine
diff --git a/arch/x86/pci/common.c b/arch/x86/pci/common.c
index 940185ecaeda..bc6a101ed7ec 100644
--- a/arch/x86/pci/common.c
+++ b/arch/x86/pci/common.c
@@ -22,6 +22,7 @@ unsigned int pci_probe = PCI_PROBE_BIOS | PCI_PROBE_CONF1 | PCI_PROBE_CONF2 |
 
 static int pci_bf_sort;
 int pci_routeirq;
+int noioapicquirk;
 int pcibios_last_bus = -1;
 unsigned long pirq_table_addr;
 struct pci_bus *pci_root_bus;
@@ -495,6 +496,9 @@ char * __devinit  pcibios_setup(char *str)
 	} else if (!strcmp(str, "skip_isa_align")) {
 		pci_probe |= PCI_CAN_SKIP_ISA_ALIGN;
 		return NULL;
+	} else if (!strcmp(str, "noioapicquirk")) {
+		noioapicquirk = 1;
+		return NULL;
 	}
 	return str;
 }
diff --git a/include/asm-x86/io_apic.h b/include/asm-x86/io_apic.h
index 14f82bbcb5fd..8ca0110819f4 100644
--- a/include/asm-x86/io_apic.h
+++ b/include/asm-x86/io_apic.h
@@ -157,11 +157,15 @@ extern int sis_apic_bug;
 /* 1 if "noapic" boot option passed */
 extern int skip_ioapic_setup;
 
+/* 1 if "noapic" boot option passed */
+extern int noioapicquirk;
+
 /* 1 if the timer IRQ uses the '8259A Virtual Wire' mode */
 extern int timer_through_8259;
 
 static inline void disable_ioapic_setup(void)
 {
+	noioapicquirk = 1;
 	skip_ioapic_setup = 1;
 }
 
diff --git a/include/asm-x86/pci.h b/include/asm-x86/pci.h
index 2db14cf17db8..30eec93a845e 100644
--- a/include/asm-x86/pci.h
+++ b/include/asm-x86/pci.h
@@ -19,6 +19,7 @@ struct pci_sysdata {
 };
 
 extern int pci_routeirq;
+extern int noioapicquirk;
 
 /* scan a bus after allocating a pci_sysdata for it */
 extern struct pci_bus *pci_scan_bus_on_node(int busno, struct pci_ops *ops,
-- 
cgit v1.2.3


From 9197979b518573999d52d9e85bce1680682ed85c Mon Sep 17 00:00:00 2001
From: Stefan Assmann <sassmann@suse.de>
Date: Wed, 11 Jun 2008 16:35:15 +0200
Subject: x86, pci: introduce pci=ioapicreroute kernel cmdline option

Introduce pci=ioapicreroute kernel cmdline option to enable rerouting of boot
interrupts to the primary io-apic.

Signed-off-by: Stefan Assmann <sassmann@suse.de>
Signed-off-by: Olaf Dabrunz <od@suse.de>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 Documentation/kernel-parameters.txt | 4 ++++
 arch/x86/pci/common.c               | 5 +++++
 include/asm-x86/io_apic.h           | 4 ++++
 include/asm-x86/pci.h               | 1 +
 4 files changed, 14 insertions(+)

(limited to 'include')

diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt
index 1aebe9dffbaa..df262b3c3d6e 100644
--- a/Documentation/kernel-parameters.txt
+++ b/Documentation/kernel-parameters.txt
@@ -1521,6 +1521,10 @@ and is between 256 and 4096 characters. It is defined in the file
 		noioapicquirk	[APIC] Disable all boot interrupt quirks.
 				Safety option to keep boot IRQs enabled. This
 				should never be necessary.
+		ioapicreroute	[APIC] Enable rerouting of boot IRQs to the
+				primary IO-APIC for bridges that cannot disable
+				boot IRQs. This fixes a source of spurious IRQs
+				when the system masks IRQs.
 		biosirq		[X86-32] Use PCI BIOS calls to get the interrupt
 				routing table. These calls are known to be buggy
 				on several machines and they hang the machine
diff --git a/arch/x86/pci/common.c b/arch/x86/pci/common.c
index bc6a101ed7ec..0a9eaa736d94 100644
--- a/arch/x86/pci/common.c
+++ b/arch/x86/pci/common.c
@@ -23,6 +23,7 @@ unsigned int pci_probe = PCI_PROBE_BIOS | PCI_PROBE_CONF1 | PCI_PROBE_CONF2 |
 static int pci_bf_sort;
 int pci_routeirq;
 int noioapicquirk;
+int noioapicreroute = 1;
 int pcibios_last_bus = -1;
 unsigned long pirq_table_addr;
 struct pci_bus *pci_root_bus;
@@ -499,6 +500,10 @@ char * __devinit  pcibios_setup(char *str)
 	} else if (!strcmp(str, "noioapicquirk")) {
 		noioapicquirk = 1;
 		return NULL;
+	} else if (!strcmp(str, "ioapicreroute")) {
+		if (noioapicreroute != -1)
+			noioapicreroute = 0;
+		return NULL;
 	}
 	return str;
 }
diff --git a/include/asm-x86/io_apic.h b/include/asm-x86/io_apic.h
index 8ca0110819f4..a39670ae17df 100644
--- a/include/asm-x86/io_apic.h
+++ b/include/asm-x86/io_apic.h
@@ -160,12 +160,16 @@ extern int skip_ioapic_setup;
 /* 1 if "noapic" boot option passed */
 extern int noioapicquirk;
 
+/* -1 if "noapic" boot option passed */
+extern int noioapicreroute;
+
 /* 1 if the timer IRQ uses the '8259A Virtual Wire' mode */
 extern int timer_through_8259;
 
 static inline void disable_ioapic_setup(void)
 {
 	noioapicquirk = 1;
+	noioapicreroute = -1;
 	skip_ioapic_setup = 1;
 }
 
diff --git a/include/asm-x86/pci.h b/include/asm-x86/pci.h
index 30eec93a845e..52a29f7668ef 100644
--- a/include/asm-x86/pci.h
+++ b/include/asm-x86/pci.h
@@ -20,6 +20,7 @@ struct pci_sysdata {
 
 extern int pci_routeirq;
 extern int noioapicquirk;
+extern int ioapicreroute;
 
 /* scan a bus after allocating a pci_sysdata for it */
 extern struct pci_bus *pci_scan_bus_on_node(int busno, struct pci_ops *ops,
-- 
cgit v1.2.3


From e1d3a90846b40ad3160bf4b648d36c6badad39ac Mon Sep 17 00:00:00 2001
From: Stefan Assmann <sassmann@suse.de>
Date: Wed, 11 Jun 2008 16:35:17 +0200
Subject: pci, acpi: reroute PCI interrupt to legacy boot interrupt equivalent

Some chipsets (e.g. intel 6700PXH) generate a legacy INTx when the
IRQ entry in the chipset's IO-APIC is masked (as, e.g. the RT kernel
does during interrupt handling). On chipsets where this INTx generation
cannot be disabled, we reroute the valid interrupts to their legacy
equivalent to get rid of spurious interrupts that might otherwise bring
down (vital) interrupt lines through spurious interrupt detection in
note_interrupt().

This patch benefited from discussions with Alexander Graf, Torsten Duwe,
Ihno Krumreich, Daniel Gollub, Hannes Reinecke. The conclusions we drew
and the patch itself are the authors' responsibility alone.

Signed-off-by: Stefan Assmann <sassmann@suse.de>
Signed-off-by: Olaf Dabrunz <od@suse.de>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 drivers/acpi/pci_irq.c | 56 ++++++++++++++++++++++++++++++++++++++++++++++++++
 drivers/pci/quirks.c   | 28 +++++++++++++++++++++++++
 include/linux/pci.h    |  6 ++++++
 3 files changed, 90 insertions(+)

(limited to 'include')

diff --git a/drivers/acpi/pci_irq.c b/drivers/acpi/pci_irq.c
index 89022a74faee..b37cb0a9826e 100644
--- a/drivers/acpi/pci_irq.c
+++ b/drivers/acpi/pci_irq.c
@@ -384,6 +384,27 @@ acpi_pci_free_irq(struct acpi_prt_entry *entry,
 	return irq;
 }
 
+#ifdef CONFIG_X86_IO_APIC
+extern int noioapicquirk;
+
+static int bridge_has_boot_interrupt_variant(struct pci_bus *bus)
+{
+	struct pci_bus *bus_it;
+
+	for (bus_it = bus ; bus_it ; bus_it = bus_it->parent) {
+		if (!bus_it->self)
+			return 0;
+
+		printk(KERN_INFO "vendor=%04x device=%04x\n", bus_it->self->vendor,
+				bus_it->self->device);
+
+		if (bus_it->self->irq_reroute_variant)
+			return bus_it->self->irq_reroute_variant;
+	}
+	return 0;
+}
+#endif /* CONFIG_X86_IO_APIC */
+
 /*
  * acpi_pci_irq_lookup
  * success: return IRQ >= 0
@@ -413,6 +434,41 @@ acpi_pci_irq_lookup(struct pci_bus *bus,
 	}
 
 	ret = func(entry, triggering, polarity, link);
+
+#ifdef CONFIG_X86_IO_APIC
+	/*
+	 * Some chipsets (e.g. intel 6700PXH) generate a legacy INTx when the
+	 * IRQ entry in the chipset's IO-APIC is masked (as, e.g. the RT kernel
+	 * does during interrupt handling). When this INTx generation cannot be
+	 * disabled, we reroute these interrupts to their legacy equivalent to
+	 * get rid of spurious interrupts.
+	 */
+        if (!noioapicquirk) {
+		switch (bridge_has_boot_interrupt_variant(bus)) {
+		case 0:
+			/* no rerouting necessary */
+			break;
+
+		case INTEL_IRQ_REROUTE_VARIANT:
+			/*
+			 * Remap according to INTx routing table in 6700PXH
+			 * specs, intel order number 302628-002, section
+			 * 2.15.2. Other chipsets (80332, ...) have the same
+			 * mapping and are handled here as well.
+			 */
+			printk(KERN_INFO "pci irq %d -> rerouted to legacy "
+					 "irq %d\n", ret, (ret % 4) + 16);
+			ret = (ret % 4) + 16;
+			break;
+
+		default:
+			printk(KERN_INFO "not rerouting irq %d to legacy irq: "
+					 "unknown mapping\n", ret);
+			break;
+		}
+	}
+#endif /* CONFIG_X86_IO_APIC */
+
 	return ret;
 }
 
diff --git a/drivers/pci/quirks.c b/drivers/pci/quirks.c
index eb97564316d0..ac634ae2eb08 100644
--- a/drivers/pci/quirks.c
+++ b/drivers/pci/quirks.c
@@ -1364,6 +1364,34 @@ DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL,	0x260a, quirk_intel_pcie_pm);
 DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL,	0x260b, quirk_intel_pcie_pm);
 
 #ifdef CONFIG_X86_IO_APIC
+/*
+ * Boot interrupts on some chipsets cannot be turned off. For these chipsets,
+ * remap the original interrupt in the linux kernel to the boot interrupt, so
+ * that a PCI device's interrupt handler is installed on the boot interrupt
+ * line instead.
+ */
+static void quirk_reroute_to_boot_interrupts_intel(struct pci_dev *dev)
+{
+	int i;
+
+	if (noioapicquirk)
+		return;
+
+	dev->irq_reroute_variant = INTEL_IRQ_REROUTE_VARIANT;
+
+	printk(KERN_INFO "PCI quirk: reroute interrupts for 0x%04x:0x%04x\n",
+			dev->vendor, dev->device);
+	return;
+}
+DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_INTEL,	PCI_DEVICE_ID_INTEL_80333_0,	quirk_reroute_to_boot_interrupts_intel);
+DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_INTEL,	PCI_DEVICE_ID_INTEL_80333_1,	quirk_reroute_to_boot_interrupts_intel);
+DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_INTEL,	PCI_DEVICE_ID_INTEL_ESB2_0,	quirk_reroute_to_boot_interrupts_intel);
+DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_INTEL,	PCI_DEVICE_ID_INTEL_PXH_0,	quirk_reroute_to_boot_interrupts_intel);
+DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_INTEL,	PCI_DEVICE_ID_INTEL_PXH_1,	quirk_reroute_to_boot_interrupts_intel);
+DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_INTEL,	PCI_DEVICE_ID_INTEL_PXHV,	quirk_reroute_to_boot_interrupts_intel);
+DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_INTEL,	PCI_DEVICE_ID_INTEL_80332_0,	quirk_reroute_to_boot_interrupts_intel);
+DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_INTEL,	PCI_DEVICE_ID_INTEL_80332_1,	quirk_reroute_to_boot_interrupts_intel);
+
 /*
  * On some chipsets we can disable the generation of legacy INTx boot
  * interrupts.
diff --git a/include/linux/pci.h b/include/linux/pci.h
index d18b1dd49fab..6755cf5ac109 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -117,6 +117,11 @@ enum pci_dev_flags {
 	PCI_DEV_FLAGS_MSI_INTX_DISABLE_BUG = (__force pci_dev_flags_t) 1,
 };
 
+enum pci_irq_reroute_variant {
+	INTEL_IRQ_REROUTE_VARIANT = 1,
+	MAX_IRQ_REROUTE_VARIANTS = 3
+};
+
 typedef unsigned short __bitwise pci_bus_flags_t;
 enum pci_bus_flags {
 	PCI_BUS_FLAGS_NO_MSI   = (__force pci_bus_flags_t) 1,
@@ -194,6 +199,7 @@ struct pci_dev {
 	unsigned int	no_d1d2:1;   /* only allow d0 or d3 */
 	unsigned int	block_ucfg_access:1;	/* userspace config space access is blocked */
 	unsigned int	broken_parity_status:1;	/* Device generates false positive parity */
+	unsigned int	irq_reroute_variant:2;	/* device needs IRQ rerouting variant */
 	unsigned int 	msi_enabled:1;
 	unsigned int	msix_enabled:1;
 	unsigned int	is_managed:1;
-- 
cgit v1.2.3


From 33be8333421f842789fa7e363ce4142947e094f0 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Wed, 18 Jun 2008 14:47:09 +0200
Subject: x86: boot IRQ quirks and rerouting, fix

fix:

init/built-in.o: In function `nosmp':
main.c:(.init.text+0x14): undefined reference to `noioapicquirk'
main.c:(.init.text+0x1e): undefined reference to `noioapicreroute'
init/built-in.o: In function `maxcpus':
main.c:(.init.text+0x133): undefined reference to `noioapicquirk'
main.c:(.init.text+0x13d): undefined reference to `noioapicreroute'
arch/x86/kernel/built-in.o: In function `parse_noapic':
io_apic_32.c:(.init.text+0x7836): undefined reference to `noioapicquirk'
io_apic_32.c:(.init.text+0x7840): undefined reference to `noioapicreroute'

Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/asm-x86/io_apic.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include')

diff --git a/include/asm-x86/io_apic.h b/include/asm-x86/io_apic.h
index a39670ae17df..721605d8f116 100644
--- a/include/asm-x86/io_apic.h
+++ b/include/asm-x86/io_apic.h
@@ -168,8 +168,10 @@ extern int timer_through_8259;
 
 static inline void disable_ioapic_setup(void)
 {
+#ifdef CONFIG_PCI
 	noioapicquirk = 1;
 	noioapicreroute = -1;
+#endif
 	skip_ioapic_setup = 1;
 }
 
-- 
cgit v1.2.3


From 41b9eb264c8407655db57b60b4457fe1b2ec9977 Mon Sep 17 00:00:00 2001
From: Stefan Assmann <sassmann@suse.de>
Date: Tue, 15 Jul 2008 13:48:55 +0200
Subject: x86, pci: introduce config option for pci reroute quirks (was: [PATCH
 0/3] Boot IRQ quirks for Broadcom and AMD/ATI)

This is against linux-2.6-tip, branch pci-ioapic-boot-irq-quirks.

From: Stefan Assmann <sassmann@suse.de>
Subject: Introduce config option for pci reroute quirks

The config option X86_REROUTE_FOR_BROKEN_BOOT_IRQS is introduced to
enable (or disable) the redirection of the interrupt handler to the boot
interrupt line by default. Depending on the existence of interrupt
masking / threaded interrupt handling in the kernel (vanilla, rt, ...)
and the maturity of the rerouting patch, users can enable or disable the
redirection by default.

This means that the reroute quirk can be applied to any kernel without
changing it.

Interrupt sharing could be increased if this option is enabled. However this
option is vital for threaded interrupt handling, as done by the RT kernel.
It should simplify the consolidation with the RT kernel.

The option can be overridden by either pci=ioapicreroute or
pci=noioapicreroute.

Signed-off-by: Stefan Assmann <sassmann@suse.de>
Signed-off-by: Olaf Dabrunz <od@suse.de>
Cc: Jesse Barnes <jbarnes@virtuousgeek.org>
Cc: Jon Masters <jonathan@jonmasters.org>
Cc: Ihno Krumreich <ihno@suse.de>
Cc: Sven Dietrich <sdietrich@suse.de>
Cc: Daniel Gollub <dgollub@suse.de>
Cc: Felix Foerster <ffoerster@suse.de>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 Documentation/kernel-parameters.txt |  4 ++++
 arch/x86/Kconfig                    | 24 ++++++++++++++++++++++++
 arch/x86/pci/common.c               |  8 ++++++++
 drivers/pci/quirks.c                |  2 +-
 include/asm-x86/pci.h               |  2 +-
 5 files changed, 38 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt
index f5662b7a34d1..62b6e8067a5b 100644
--- a/Documentation/kernel-parameters.txt
+++ b/Documentation/kernel-parameters.txt
@@ -1536,6 +1536,10 @@ and is between 256 and 4096 characters. It is defined in the file
 				primary IO-APIC for bridges that cannot disable
 				boot IRQs. This fixes a source of spurious IRQs
 				when the system masks IRQs.
+		noioapicreroute	[APIC] Disable workaround that uses the
+				boot IRQ equivalent of an IRQ that connects to
+				a chipset where boot IRQs cannot be disabled.
+				The opposite of ioapicreroute.
 		biosirq		[X86-32] Use PCI BIOS calls to get the interrupt
 				routing table. These calls are known to be buggy
 				on several machines and they hang the machine
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 96e0c2ebc388..09521332636b 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -665,6 +665,30 @@ config X86_VISWS_APIC
 	def_bool y
 	depends on X86_32 && X86_VISWS
 
+config X86_REROUTE_FOR_BROKEN_BOOT_IRQS
+	bool "Reroute for broken boot IRQs"
+	default n
+	depends on X86_IO_APIC
+	help
+	  This option enables a workaround that fixes a source of
+	  spurious interrupts. This is recommended when threaded
+	  interrupt handling is used on systems where the generation of
+	  superfluous "boot interrupts" cannot be disabled.
+
+	  Some chipsets generate a legacy INTx "boot IRQ" when the IRQ
+	  entry in the chipset's IO-APIC is masked (as, e.g. the RT
+	  kernel does during interrupt handling). On chipsets where this
+	  boot IRQ generation cannot be disabled, this workaround keeps
+	  the original IRQ line masked so that only the equivalent "boot
+	  IRQ" is delivered to the CPUs. The workaround also tells the
+	  kernel to set up the IRQ handler on the boot IRQ line. In this
+	  way only one interrupt is delivered to the kernel. Otherwise
+	  the spurious second interrupt may cause the kernel to bring
+	  down (vital) interrupt lines.
+
+	  Only affects "broken" chipsets. Interrupt sharing may be
+	  increased on these systems.
+
 config X86_MCE
 	bool "Machine Check Exception"
 	depends on !X86_VOYAGER
diff --git a/arch/x86/pci/common.c b/arch/x86/pci/common.c
index 1485a26ddcef..bb1a01f089e2 100644
--- a/arch/x86/pci/common.c
+++ b/arch/x86/pci/common.c
@@ -24,7 +24,11 @@ unsigned int pci_early_dump_regs;
 static int pci_bf_sort;
 int pci_routeirq;
 int noioapicquirk;
+#ifdef CONFIG_X86_REROUTE_FOR_BROKEN_BOOT_IRQS
+int noioapicreroute = 0;
+#else
 int noioapicreroute = 1;
+#endif
 int pcibios_last_bus = -1;
 unsigned long pirq_table_addr;
 struct pci_bus *pci_root_bus;
@@ -528,6 +532,10 @@ char * __devinit  pcibios_setup(char *str)
 		if (noioapicreroute != -1)
 			noioapicreroute = 0;
 		return NULL;
+	} else if (!strcmp(str, "noioapicreroute")) {
+		if (noioapicreroute != -1)
+			noioapicreroute = 1;
+		return NULL;
 	}
 	return str;
 }
diff --git a/drivers/pci/quirks.c b/drivers/pci/quirks.c
index 0911b0c60b64..c880dd0bbfb5 100644
--- a/drivers/pci/quirks.c
+++ b/drivers/pci/quirks.c
@@ -1397,7 +1397,7 @@ DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL,	0x260b, quirk_intel_pcie_pm);
  */
 static void quirk_reroute_to_boot_interrupts_intel(struct pci_dev *dev)
 {
-	if (noioapicquirk)
+	if (noioapicquirk || noioapicreroute)
 		return;
 
 	dev->irq_reroute_variant = INTEL_IRQ_REROUTE_VARIANT;
diff --git a/include/asm-x86/pci.h b/include/asm-x86/pci.h
index 52a29f7668ef..9584d6d5eb93 100644
--- a/include/asm-x86/pci.h
+++ b/include/asm-x86/pci.h
@@ -20,7 +20,7 @@ struct pci_sysdata {
 
 extern int pci_routeirq;
 extern int noioapicquirk;
-extern int ioapicreroute;
+extern int noioapicreroute;
 
 /* scan a bus after allocating a pci_sysdata for it */
 extern struct pci_bus *pci_scan_bus_on_node(int busno, struct pci_ops *ops,
-- 
cgit v1.2.3


From bd8fbdee6562ee526f3c2582a3b373ef195015dd Mon Sep 17 00:00:00 2001
From: Rui Sousa <rui.p.m.sousa@gmail.com>
Date: Wed, 3 Sep 2008 17:53:07 +0200
Subject: lockdep: fix compilation when CONFIG_TRACE_IRQFLAGS_SUPPORT is not
 set

This patch fixes compilation if CONFIG_TRACE_IRQFLAGS_SUPPORT is ever
disabled (which is currently not allowed by Kconfig). Alternatively
we could just remove the option altogether and the associated code
paths. Since the compilation error has been in the tree for at
least two years and no one noticed it, I guess we don't really have
the need for CONFIG_TRACE_IRQFLAGS_SUPPORT=n.

Boot tested on x86 UP.

Signed-off-by: Rui Sousa <rui.p.m.sousa@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/irqflags.h | 18 ++++++++----------
 1 file changed, 8 insertions(+), 10 deletions(-)

(limited to 'include')

diff --git a/include/linux/irqflags.h b/include/linux/irqflags.h
index 74bde13224c9..f2993512b3b5 100644
--- a/include/linux/irqflags.h
+++ b/include/linux/irqflags.h
@@ -52,10 +52,10 @@
 # define start_critical_timings() do { } while (0)
 #endif
 
-#ifdef CONFIG_TRACE_IRQFLAGS_SUPPORT
-
 #include <asm/irqflags.h>
 
+#ifdef CONFIG_TRACE_IRQFLAGS_SUPPORT
+
 #define local_irq_enable() \
 	do { trace_hardirqs_on(); raw_local_irq_enable(); } while (0)
 #define local_irq_disable() \
@@ -84,21 +84,20 @@
  * The local_irq_*() APIs are equal to the raw_local_irq*()
  * if !TRACE_IRQFLAGS.
  */
-# define raw_local_irq_disable()	local_irq_disable()
-# define raw_local_irq_enable()		local_irq_enable()
-# define raw_local_irq_save(flags)			\
+#define local_irq_disable()		raw_local_irq_disable()
+#define local_irq_enable()		raw_local_irq_enable()
+#define local_irq_save(flags)				\
 	do {						\
 		typecheck(unsigned long, flags);	\
-		local_irq_save(flags);			\
+		raw_local_irq_save(flags);		\
 	} while (0)
-# define raw_local_irq_restore(flags)			\
+# define local_irq_restore(flags)			\
 	do {						\
 		typecheck(unsigned long, flags);	\
-		local_irq_restore(flags);		\
+		raw_local_irq_restore(flags);		\
 	} while (0)
 #endif /* CONFIG_TRACE_IRQFLAGS_SUPPORT */
 
-#ifdef CONFIG_TRACE_IRQFLAGS_SUPPORT
 #define safe_halt()						\
 	do {							\
 		trace_hardirqs_on();				\
@@ -124,6 +123,5 @@
 	typecheck(unsigned long, flags);	\
 	raw_irqs_disabled_flags(flags);		\
 })
-#endif		/* CONFIG_X86 */
 
 #endif
-- 
cgit v1.2.3


From 8c56250f48347750c82ab18d98d647dcf99ca674 Mon Sep 17 00:00:00 2001
From: Rui Sousa <rui.p.m.sousa@gmail.com>
Date: Thu, 4 Sep 2008 19:47:53 +0200
Subject: lockdep, UML: fix compilation when CONFIG_TRACE_IRQFLAGS_SUPPORT is
 not set

Hiroshi Shimamoto reported:

> > !TRACE_IRQFLAGS_SUPPORT mode of build for future work, it can be
> > restored via a simple revert.
>
> Hi, it seems that this patch breaks uml build.
>
> kernel/printk.c: In function 'vprintk':
> kernel/printk.c:674: error: implicit declaration of function
> 'raw_local_irq_save' kernel/printk.c:772: error: implicit declaration of
> function 'raw_local_irq_restore'

With the patch bellow it compiles (make ARCH=um with a x86 host), but I'm
really out of my league on this one...

Reported-by: Hiroshi Shimamoto <h-shimamoto@ct.jp.nec.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/asm-um/system-generic.h | 32 ++++++++++++++++----------------
 1 file changed, 16 insertions(+), 16 deletions(-)

(limited to 'include')

diff --git a/include/asm-um/system-generic.h b/include/asm-um/system-generic.h
index 5bcfa35e7a22..f1ea4da34fad 100644
--- a/include/asm-um/system-generic.h
+++ b/include/asm-um/system-generic.h
@@ -4,15 +4,15 @@
 #include "asm/arch/system.h"
 
 #undef switch_to
-#undef local_irq_save
-#undef local_irq_restore
-#undef local_irq_disable
-#undef local_irq_enable
-#undef local_save_flags
-#undef local_irq_restore
-#undef local_irq_enable
-#undef local_irq_disable
-#undef local_irq_save
+#undef raw_local_irq_save
+#undef raw_local_irq_restore
+#undef raw_local_irq_disable
+#undef raw_local_irq_enable
+#undef raw_local_save_flags
+#undef raw_local_irq_restore
+#undef raw_local_irq_enable
+#undef raw_local_irq_disable
+#undef raw_local_irq_save
 #undef irqs_disabled
 
 extern void *switch_to(void *prev, void *next, void *last);
@@ -23,21 +23,21 @@ extern int get_signals(void);
 extern void block_signals(void);
 extern void unblock_signals(void);
 
-#define local_save_flags(flags) do { typecheck(unsigned long, flags); \
+#define raw_local_save_flags(flags) do { typecheck(unsigned long, flags); \
 				     (flags) = get_signals(); } while(0)
-#define local_irq_restore(flags) do { typecheck(unsigned long, flags); \
+#define raw_local_irq_restore(flags) do { typecheck(unsigned long, flags); \
 				      set_signals(flags); } while(0)
 
-#define local_irq_save(flags) do { local_save_flags(flags); \
-                                   local_irq_disable(); } while(0)
+#define raw_local_irq_save(flags) do { raw_local_save_flags(flags); \
+                                   raw_local_irq_disable(); } while(0)
 
-#define local_irq_enable() unblock_signals()
-#define local_irq_disable() block_signals()
+#define raw_local_irq_enable() unblock_signals()
+#define raw_local_irq_disable() block_signals()
 
 #define irqs_disabled()                 \
 ({                                      \
         unsigned long flags;            \
-        local_save_flags(flags);        \
+        raw_local_save_flags(flags);        \
         (flags == 0);                   \
 })
 
-- 
cgit v1.2.3


From ab7476cf76e560f0efda2a631a70aabe93009025 Mon Sep 17 00:00:00 2001
From: Arjan van de Ven <arjan@linux.intel.com>
Date: Fri, 15 Aug 2008 15:29:38 -0700
Subject: debug: add notifier chain debugging, v2

- unbreak ia64 (and powerpc) where function pointers dont
  point at code but at data (reported by Tony Luck)

[ mingo@elte.hu: various cleanups ]

Signed-off-by: Arjan van de Ven <arjan@linux.intel.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/kernel.h |  3 +++
 kernel/extable.c       | 16 ++++++++++++++++
 kernel/notifier.c      | 10 +---------
 lib/vsprintf.c         |  2 +-
 4 files changed, 21 insertions(+), 10 deletions(-)

(limited to 'include')

diff --git a/include/linux/kernel.h b/include/linux/kernel.h
index 2651f805ba6d..4e1366b552ae 100644
--- a/include/linux/kernel.h
+++ b/include/linux/kernel.h
@@ -187,6 +187,9 @@ extern unsigned long long memparse(char *ptr, char **retptr);
 extern int core_kernel_text(unsigned long addr);
 extern int __kernel_text_address(unsigned long addr);
 extern int kernel_text_address(unsigned long addr);
+extern int func_ptr_is_kernel_text(void *ptr);
+extern void *dereference_function_descriptor(void *ptr);
+
 struct pid;
 extern struct pid *session_of_pgrp(struct pid *pgrp);
 
diff --git a/kernel/extable.c b/kernel/extable.c
index a26cb2e17023..adf0cc9c02d6 100644
--- a/kernel/extable.c
+++ b/kernel/extable.c
@@ -66,3 +66,19 @@ int kernel_text_address(unsigned long addr)
 		return 1;
 	return module_text_address(addr) != NULL;
 }
+
+/*
+ * On some architectures (PPC64, IA64) function pointers
+ * are actually only tokens to some data that then holds the
+ * real function address. As a result, to find if a function
+ * pointer is part of the kernel text, we need to do some
+ * special dereferencing first.
+ */
+int func_ptr_is_kernel_text(void *ptr)
+{
+	unsigned long addr;
+	addr = (unsigned long) dereference_function_descriptor(ptr);
+	if (core_kernel_text(addr))
+		return 1;
+	return module_text_address(addr) != NULL;
+}
diff --git a/kernel/notifier.c b/kernel/notifier.c
index 143fdd77dbf7..0f39e398ef60 100644
--- a/kernel/notifier.c
+++ b/kernel/notifier.c
@@ -21,10 +21,6 @@ BLOCKING_NOTIFIER_HEAD(reboot_notifier_list);
 static int notifier_chain_register(struct notifier_block **nl,
 		struct notifier_block *n)
 {
-	if (!kernel_text_address((unsigned long)n->notifier_call)) {
-		WARN(1, "Invalid notifier registered!");
-		return 0;
-	}
 	while ((*nl) != NULL) {
 		if (n->priority > (*nl)->priority)
 			break;
@@ -38,10 +34,6 @@ static int notifier_chain_register(struct notifier_block **nl,
 static int notifier_chain_cond_register(struct notifier_block **nl,
 		struct notifier_block *n)
 {
-	if (!kernel_text_address((unsigned long)n->notifier_call)) {
-		WARN(1, "Invalid notifier registered!");
-		return 0;
-	}
 	while ((*nl) != NULL) {
 		if ((*nl) == n)
 			return 0;
@@ -92,7 +84,7 @@ static int __kprobes notifier_call_chain(struct notifier_block **nl,
 		next_nb = rcu_dereference(nb->next);
 
 #ifdef CONFIG_DEBUG_NOTIFIERS
-		if (!kernel_text_address((unsigned long)nb->notifier_call)) {
+		if (unlikely(!func_ptr_is_kernel_text(nb->notifier_call))) {
 			WARN(1, "Invalid notifier called!");
 			nb = next_nb;
 			continue;
diff --git a/lib/vsprintf.c b/lib/vsprintf.c
index d8d1d1142248..f5e5ffb9942f 100644
--- a/lib/vsprintf.c
+++ b/lib/vsprintf.c
@@ -513,7 +513,7 @@ static char *string(char *buf, char *end, char *s, int field_width, int precisio
 	return buf;
 }
 
-static inline void *dereference_function_descriptor(void *ptr)
+void *dereference_function_descriptor(void *ptr)
 {
 #if defined(CONFIG_IA64) || defined(CONFIG_PPC64)
 	void *p;
-- 
cgit v1.2.3


From 76b189e91845eab3a9d52bb97f971d312d25652d Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <a.p.zijlstra@chello.nl>
Date: Wed, 10 Sep 2008 09:57:35 +0200
Subject: lockdep: add might_lock() / might_lock_read()

useful to establish a lock dependency in case the actual dependency is
rare or hard to trigger.

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Acked-by: Nick Piggin <nickpiggin@yahoo.com.au>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/lockdep.h | 18 ++++++++++++++++++
 1 file changed, 18 insertions(+)

(limited to 'include')

diff --git a/include/linux/lockdep.h b/include/linux/lockdep.h
index 331e5f1c2d8e..0aa657aa8a1e 100644
--- a/include/linux/lockdep.h
+++ b/include/linux/lockdep.h
@@ -480,4 +480,22 @@ static inline void print_irqtrace_events(struct task_struct *curr)
 # define lock_map_release(l)			do { } while (0)
 #endif
 
+#ifdef CONFIG_PROVE_LOCKING
+# define might_lock(lock) 						\
+do {									\
+	typecheck(struct lockdep_map *, &(lock)->dep_map);		\
+	lock_acquire(&(lock)->dep_map, 0, 0, 0, 2, NULL, _THIS_IP_);	\
+	lock_release(&(lock)->dep_map, 0, _THIS_IP_);			\
+} while (0)
+# define might_lock_read(lock) 						\
+do {									\
+	typecheck(struct lockdep_map *, &(lock)->dep_map);		\
+	lock_acquire(&(lock)->dep_map, 0, 0, 1, 2, NULL, _THIS_IP_);	\
+	lock_release(&(lock)->dep_map, 0, _THIS_IP_);			\
+} while (0)
+#else
+# define might_lock(lock) do { } while (0)
+# define might_lock_read(lock) do { } while (0)
+#endif
+
 #endif /* __LINUX_LOCKDEP_H */
-- 
cgit v1.2.3


From c10d38dda1774ed4540380333cabd229eff37094 Mon Sep 17 00:00:00 2001
From: Nick Piggin <npiggin@suse.de>
Date: Wed, 10 Sep 2008 13:37:17 +0200
Subject: x86: some lock annotations for user copy paths

copy_to/from_user and all its variants (except the atomic ones) can take a
page fault and perform non-trivial work like taking mmap_sem and entering
the filesyste/pagecache.

Unfortunately, this often escapes lockdep because a common pattern is to
use it to read in some arguments just set up from userspace, or write data
back to a hot buffer. In those cases, it will be unlikely for page reclaim
to get a window in to cause copy_*_user to fault.

With the new might_lock primitives, add some annotations to x86. I don't
know if I caught all possible faulting points (it's a bit of a maze, and I
didn't really look at 32-bit). But this is a starting point.

Boots and runs OK so far.

Signed-off-by: Nick Piggin <npiggin@suse.de>
Acked-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/lib/usercopy_32.c   |  7 ++++++-
 arch/x86/lib/usercopy_64.c   |  4 ++++
 include/asm-x86/uaccess.h    | 14 ++++++++++++++
 include/asm-x86/uaccess_32.h | 10 ++++++++--
 include/asm-x86/uaccess_64.h | 12 ++++++++++++
 5 files changed, 44 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/arch/x86/lib/usercopy_32.c b/arch/x86/lib/usercopy_32.c
index 24e60944971a..8eedde2a9cac 100644
--- a/arch/x86/lib/usercopy_32.c
+++ b/arch/x86/lib/usercopy_32.c
@@ -33,6 +33,8 @@ static inline int __movsl_is_ok(unsigned long a1, unsigned long a2, unsigned lon
 do {									   \
 	int __d0, __d1, __d2;						   \
 	might_sleep();							   \
+	if (current->mm)						   \
+		might_lock_read(&current->mm->mmap_sem);		   \
 	__asm__ __volatile__(						   \
 		"	testl %1,%1\n"					   \
 		"	jz 2f\n"					   \
@@ -120,6 +122,8 @@ EXPORT_SYMBOL(strncpy_from_user);
 do {									\
 	int __d0;							\
 	might_sleep();							\
+	if (current->mm)						\
+		might_lock_read(&current->mm->mmap_sem);		\
 	__asm__ __volatile__(						\
 		"0:	rep; stosl\n"					\
 		"	movl %2,%0\n"					\
@@ -148,7 +152,6 @@ do {									\
 unsigned long
 clear_user(void __user *to, unsigned long n)
 {
-	might_sleep();
 	if (access_ok(VERIFY_WRITE, to, n))
 		__do_clear_user(to, n);
 	return n;
@@ -191,6 +194,8 @@ long strnlen_user(const char __user *s, long n)
 	unsigned long res, tmp;
 
 	might_sleep();
+	if (current->mm)
+		might_lock_read(&current->mm->mmap_sem);
 
 	__asm__ __volatile__(
 		"	testl %0, %0\n"
diff --git a/arch/x86/lib/usercopy_64.c b/arch/x86/lib/usercopy_64.c
index f4df6e7c718b..847d12945998 100644
--- a/arch/x86/lib/usercopy_64.c
+++ b/arch/x86/lib/usercopy_64.c
@@ -16,6 +16,8 @@
 do {									   \
 	long __d0, __d1, __d2;						   \
 	might_sleep();							   \
+	if (current->mm)						   \
+		might_lock_read(&current->mm->mmap_sem);		   \
 	__asm__ __volatile__(						   \
 		"	testq %1,%1\n"					   \
 		"	jz 2f\n"					   \
@@ -65,6 +67,8 @@ unsigned long __clear_user(void __user *addr, unsigned long size)
 {
 	long __d0;
 	might_sleep();
+	if (current->mm)
+		might_lock_read(&current->mm->mmap_sem);
 	/* no memory constraint because it doesn't change any memory gcc knows
 	   about */
 	asm volatile(
diff --git a/include/asm-x86/uaccess.h b/include/asm-x86/uaccess.h
index 5f702d1d5218..ad29752a1713 100644
--- a/include/asm-x86/uaccess.h
+++ b/include/asm-x86/uaccess.h
@@ -8,6 +8,8 @@
 #include <linux/thread_info.h>
 #include <linux/prefetch.h>
 #include <linux/string.h>
+#include <linux/lockdep.h>
+#include <linux/sched.h>
 #include <asm/asm.h>
 #include <asm/page.h>
 
@@ -157,6 +159,9 @@ extern int __get_user_bad(void);
 	int __ret_gu;							\
 	unsigned long __val_gu;						\
 	__chk_user_ptr(ptr);						\
+	might_sleep();							\
+	if (current->mm)						\
+		might_lock_read(&current->mm->mmap_sem);		\
 	switch (sizeof(*(ptr))) {					\
 	case 1:								\
 		__get_user_x(1, __ret_gu, __val_gu, ptr);		\
@@ -241,6 +246,9 @@ extern void __put_user_8(void);
 	int __ret_pu;						\
 	__typeof__(*(ptr)) __pu_val;				\
 	__chk_user_ptr(ptr);					\
+	might_sleep();						\
+	if (current->mm)					\
+		might_lock_read(&current->mm->mmap_sem);	\
 	__pu_val = x;						\
 	switch (sizeof(*(ptr))) {				\
 	case 1:							\
@@ -265,6 +273,9 @@ extern void __put_user_8(void);
 #define __put_user_size(x, ptr, size, retval, errret)			\
 do {									\
 	retval = 0;							\
+	might_sleep();							\
+	if (current->mm)						\
+		might_lock_read(&current->mm->mmap_sem);		\
 	__chk_user_ptr(ptr);						\
 	switch (size) {							\
 	case 1:								\
@@ -317,6 +328,9 @@ do {									\
 #define __get_user_size(x, ptr, size, retval, errret)			\
 do {									\
 	retval = 0;							\
+	might_sleep();							\
+	if (current->mm)						\
+		might_lock_read(&current->mm->mmap_sem);		\
 	__chk_user_ptr(ptr);						\
 	switch (size) {							\
 	case 1:								\
diff --git a/include/asm-x86/uaccess_32.h b/include/asm-x86/uaccess_32.h
index 6fdef39a0bcb..d725e2d703f7 100644
--- a/include/asm-x86/uaccess_32.h
+++ b/include/asm-x86/uaccess_32.h
@@ -82,8 +82,10 @@ __copy_to_user_inatomic(void __user *to, const void *from, unsigned long n)
 static __always_inline unsigned long __must_check
 __copy_to_user(void __user *to, const void *from, unsigned long n)
 {
-       might_sleep();
-       return __copy_to_user_inatomic(to, from, n);
+	might_sleep();
+	if (current->mm)
+		might_lock_read(&current->mm->mmap_sem);
+	return __copy_to_user_inatomic(to, from, n);
 }
 
 static __always_inline unsigned long
@@ -138,6 +140,8 @@ static __always_inline unsigned long
 __copy_from_user(void *to, const void __user *from, unsigned long n)
 {
 	might_sleep();
+	if (current->mm)
+		might_lock_read(&current->mm->mmap_sem);
 	if (__builtin_constant_p(n)) {
 		unsigned long ret;
 
@@ -160,6 +164,8 @@ static __always_inline unsigned long __copy_from_user_nocache(void *to,
 				const void __user *from, unsigned long n)
 {
 	might_sleep();
+	if (current->mm)
+		might_lock_read(&current->mm->mmap_sem);
 	if (__builtin_constant_p(n)) {
 		unsigned long ret;
 
diff --git a/include/asm-x86/uaccess_64.h b/include/asm-x86/uaccess_64.h
index 515d4dce96b5..40a7205fe576 100644
--- a/include/asm-x86/uaccess_64.h
+++ b/include/asm-x86/uaccess_64.h
@@ -28,6 +28,10 @@ static __always_inline __must_check
 int __copy_from_user(void *dst, const void __user *src, unsigned size)
 {
 	int ret = 0;
+
+	might_sleep();
+	if (current->mm)
+		might_lock_read(&current->mm->mmap_sem);
 	if (!__builtin_constant_p(size))
 		return copy_user_generic(dst, (__force void *)src, size);
 	switch (size) {
@@ -70,6 +74,10 @@ static __always_inline __must_check
 int __copy_to_user(void __user *dst, const void *src, unsigned size)
 {
 	int ret = 0;
+
+	might_sleep();
+	if (current->mm)
+		might_lock_read(&current->mm->mmap_sem);
 	if (!__builtin_constant_p(size))
 		return copy_user_generic((__force void *)dst, src, size);
 	switch (size) {
@@ -112,6 +120,10 @@ static __always_inline __must_check
 int __copy_in_user(void __user *dst, const void __user *src, unsigned size)
 {
 	int ret = 0;
+
+	might_sleep();
+	if (current->mm)
+		might_lock_read(&current->mm->mmap_sem);
 	if (!__builtin_constant_p(size))
 		return copy_user_generic((__force void *)dst,
 					 (__force void *)src, size);
-- 
cgit v1.2.3


From 3ee1afa308f2a38e5d1e2ad3752ad7abcf480da1 Mon Sep 17 00:00:00 2001
From: Nick Piggin <npiggin@suse.de>
Date: Wed, 10 Sep 2008 13:37:17 +0200
Subject: x86: some lock annotations for user copy paths, v2

 - introduce might_fault()
 - handle the atomic user copy paths correctly

[ mingo@elte.hu: move might_sleep() outside of in_atomic(). ]
Signed-off-by: Nick Piggin <npiggin@suse.de>
Acked-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/lib/usercopy_32.c   | 12 +++---------
 arch/x86/lib/usercopy_64.c   |  8 ++------
 include/asm-x86/uaccess.h    | 18 ++++--------------
 include/asm-x86/uaccess_32.h | 12 +++---------
 include/asm-x86/uaccess_64.h | 12 +++---------
 include/linux/kernel.h       |  9 +++++++++
 mm/memory.c                  | 15 +++++++++++++++
 7 files changed, 39 insertions(+), 47 deletions(-)

(limited to 'include')

diff --git a/arch/x86/lib/usercopy_32.c b/arch/x86/lib/usercopy_32.c
index 8eedde2a9cac..7393152a252e 100644
--- a/arch/x86/lib/usercopy_32.c
+++ b/arch/x86/lib/usercopy_32.c
@@ -32,9 +32,7 @@ static inline int __movsl_is_ok(unsigned long a1, unsigned long a2, unsigned lon
 #define __do_strncpy_from_user(dst, src, count, res)			   \
 do {									   \
 	int __d0, __d1, __d2;						   \
-	might_sleep();							   \
-	if (current->mm)						   \
-		might_lock_read(&current->mm->mmap_sem);		   \
+	might_fault();							   \
 	__asm__ __volatile__(						   \
 		"	testl %1,%1\n"					   \
 		"	jz 2f\n"					   \
@@ -121,9 +119,7 @@ EXPORT_SYMBOL(strncpy_from_user);
 #define __do_clear_user(addr,size)					\
 do {									\
 	int __d0;							\
-	might_sleep();							\
-	if (current->mm)						\
-		might_lock_read(&current->mm->mmap_sem);		\
+	might_fault();							\
 	__asm__ __volatile__(						\
 		"0:	rep; stosl\n"					\
 		"	movl %2,%0\n"					\
@@ -193,9 +189,7 @@ long strnlen_user(const char __user *s, long n)
 	unsigned long mask = -__addr_ok(s);
 	unsigned long res, tmp;
 
-	might_sleep();
-	if (current->mm)
-		might_lock_read(&current->mm->mmap_sem);
+	might_fault();
 
 	__asm__ __volatile__(
 		"	testl %0, %0\n"
diff --git a/arch/x86/lib/usercopy_64.c b/arch/x86/lib/usercopy_64.c
index 847d12945998..64d6c84e6353 100644
--- a/arch/x86/lib/usercopy_64.c
+++ b/arch/x86/lib/usercopy_64.c
@@ -15,9 +15,7 @@
 #define __do_strncpy_from_user(dst,src,count,res)			   \
 do {									   \
 	long __d0, __d1, __d2;						   \
-	might_sleep();							   \
-	if (current->mm)						   \
-		might_lock_read(&current->mm->mmap_sem);		   \
+	might_fault();							   \
 	__asm__ __volatile__(						   \
 		"	testq %1,%1\n"					   \
 		"	jz 2f\n"					   \
@@ -66,9 +64,7 @@ EXPORT_SYMBOL(strncpy_from_user);
 unsigned long __clear_user(void __user *addr, unsigned long size)
 {
 	long __d0;
-	might_sleep();
-	if (current->mm)
-		might_lock_read(&current->mm->mmap_sem);
+	might_fault();
 	/* no memory constraint because it doesn't change any memory gcc knows
 	   about */
 	asm volatile(
diff --git a/include/asm-x86/uaccess.h b/include/asm-x86/uaccess.h
index ad29752a1713..39f8420c75d9 100644
--- a/include/asm-x86/uaccess.h
+++ b/include/asm-x86/uaccess.h
@@ -8,8 +8,6 @@
 #include <linux/thread_info.h>
 #include <linux/prefetch.h>
 #include <linux/string.h>
-#include <linux/lockdep.h>
-#include <linux/sched.h>
 #include <asm/asm.h>
 #include <asm/page.h>
 
@@ -159,9 +157,7 @@ extern int __get_user_bad(void);
 	int __ret_gu;							\
 	unsigned long __val_gu;						\
 	__chk_user_ptr(ptr);						\
-	might_sleep();							\
-	if (current->mm)						\
-		might_lock_read(&current->mm->mmap_sem);		\
+	might_fault();							\
 	switch (sizeof(*(ptr))) {					\
 	case 1:								\
 		__get_user_x(1, __ret_gu, __val_gu, ptr);		\
@@ -246,9 +242,7 @@ extern void __put_user_8(void);
 	int __ret_pu;						\
 	__typeof__(*(ptr)) __pu_val;				\
 	__chk_user_ptr(ptr);					\
-	might_sleep();						\
-	if (current->mm)					\
-		might_lock_read(&current->mm->mmap_sem);	\
+	might_fault();						\
 	__pu_val = x;						\
 	switch (sizeof(*(ptr))) {				\
 	case 1:							\
@@ -273,9 +267,7 @@ extern void __put_user_8(void);
 #define __put_user_size(x, ptr, size, retval, errret)			\
 do {									\
 	retval = 0;							\
-	might_sleep();							\
-	if (current->mm)						\
-		might_lock_read(&current->mm->mmap_sem);		\
+	might_fault();							\
 	__chk_user_ptr(ptr);						\
 	switch (size) {							\
 	case 1:								\
@@ -328,9 +320,7 @@ do {									\
 #define __get_user_size(x, ptr, size, retval, errret)			\
 do {									\
 	retval = 0;							\
-	might_sleep();							\
-	if (current->mm)						\
-		might_lock_read(&current->mm->mmap_sem);		\
+	might_fault();							\
 	__chk_user_ptr(ptr);						\
 	switch (size) {							\
 	case 1:								\
diff --git a/include/asm-x86/uaccess_32.h b/include/asm-x86/uaccess_32.h
index d725e2d703f7..d10e842ec3ee 100644
--- a/include/asm-x86/uaccess_32.h
+++ b/include/asm-x86/uaccess_32.h
@@ -82,9 +82,7 @@ __copy_to_user_inatomic(void __user *to, const void *from, unsigned long n)
 static __always_inline unsigned long __must_check
 __copy_to_user(void __user *to, const void *from, unsigned long n)
 {
-	might_sleep();
-	if (current->mm)
-		might_lock_read(&current->mm->mmap_sem);
+	might_fault();
 	return __copy_to_user_inatomic(to, from, n);
 }
 
@@ -139,9 +137,7 @@ __copy_from_user_inatomic(void *to, const void __user *from, unsigned long n)
 static __always_inline unsigned long
 __copy_from_user(void *to, const void __user *from, unsigned long n)
 {
-	might_sleep();
-	if (current->mm)
-		might_lock_read(&current->mm->mmap_sem);
+	might_fault();
 	if (__builtin_constant_p(n)) {
 		unsigned long ret;
 
@@ -163,9 +159,7 @@ __copy_from_user(void *to, const void __user *from, unsigned long n)
 static __always_inline unsigned long __copy_from_user_nocache(void *to,
 				const void __user *from, unsigned long n)
 {
-	might_sleep();
-	if (current->mm)
-		might_lock_read(&current->mm->mmap_sem);
+	might_fault();
 	if (__builtin_constant_p(n)) {
 		unsigned long ret;
 
diff --git a/include/asm-x86/uaccess_64.h b/include/asm-x86/uaccess_64.h
index 40a7205fe576..13fd56fbc3ab 100644
--- a/include/asm-x86/uaccess_64.h
+++ b/include/asm-x86/uaccess_64.h
@@ -29,9 +29,7 @@ int __copy_from_user(void *dst, const void __user *src, unsigned size)
 {
 	int ret = 0;
 
-	might_sleep();
-	if (current->mm)
-		might_lock_read(&current->mm->mmap_sem);
+	might_fault();
 	if (!__builtin_constant_p(size))
 		return copy_user_generic(dst, (__force void *)src, size);
 	switch (size) {
@@ -75,9 +73,7 @@ int __copy_to_user(void __user *dst, const void *src, unsigned size)
 {
 	int ret = 0;
 
-	might_sleep();
-	if (current->mm)
-		might_lock_read(&current->mm->mmap_sem);
+	might_fault();
 	if (!__builtin_constant_p(size))
 		return copy_user_generic((__force void *)dst, src, size);
 	switch (size) {
@@ -121,9 +117,7 @@ int __copy_in_user(void __user *dst, const void __user *src, unsigned size)
 {
 	int ret = 0;
 
-	might_sleep();
-	if (current->mm)
-		might_lock_read(&current->mm->mmap_sem);
+	might_fault();
 	if (!__builtin_constant_p(size))
 		return copy_user_generic((__force void *)dst,
 					 (__force void *)src, size);
diff --git a/include/linux/kernel.h b/include/linux/kernel.h
index 2651f805ba6d..e580ec095765 100644
--- a/include/linux/kernel.h
+++ b/include/linux/kernel.h
@@ -140,6 +140,15 @@ extern int _cond_resched(void);
 		(__x < 0) ? -__x : __x;		\
 	})
 
+#ifdef CONFIG_PROVE_LOCKING
+void might_fault(void);
+#else
+static inline void might_fault(void)
+{
+	might_sleep();
+}
+#endif
+
 extern struct atomic_notifier_head panic_notifier_list;
 extern long (*panic_blink)(long time);
 NORET_TYPE void panic(const char * fmt, ...)
diff --git a/mm/memory.c b/mm/memory.c
index 1002f473f497..b8fdf4e5e65b 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -3016,3 +3016,18 @@ void print_vma_addr(char *prefix, unsigned long ip)
 	}
 	up_read(&current->mm->mmap_sem);
 }
+
+#ifdef CONFIG_PROVE_LOCKING
+void might_fault(void)
+{
+	might_sleep();
+	/*
+	 * it would be nicer only to annotate paths which are not under
+	 * pagefault_disable, however that requires a larger audit and
+	 * providing helpers like get_user_atomic.
+	 */
+	if (!in_atomic() && current->mm)
+		might_lock_read(&current->mm->mmap_sem);
+}
+EXPORT_SYMBOL(might_fault);
+#endif
-- 
cgit v1.2.3


From 1d18ef489509314506328b9e464dd47c24c1d68f Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Thu, 11 Sep 2008 20:53:21 +0200
Subject: x86: some lock annotations for user copy paths, v3

- add annotation back to clear_user()
- change probe_kernel_address() to _inatomic*() method

Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/lib/usercopy_32.c | 1 +
 include/asm-x86/uaccess.h  | 2 --
 include/linux/uaccess.h    | 2 +-
 3 files changed, 2 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/arch/x86/lib/usercopy_32.c b/arch/x86/lib/usercopy_32.c
index 7393152a252e..fab5faba1d3e 100644
--- a/arch/x86/lib/usercopy_32.c
+++ b/arch/x86/lib/usercopy_32.c
@@ -148,6 +148,7 @@ do {									\
 unsigned long
 clear_user(void __user *to, unsigned long n)
 {
+	might_fault();
 	if (access_ok(VERIFY_WRITE, to, n))
 		__do_clear_user(to, n);
 	return n;
diff --git a/include/asm-x86/uaccess.h b/include/asm-x86/uaccess.h
index 39f8420c75d9..dc8edb5c4659 100644
--- a/include/asm-x86/uaccess.h
+++ b/include/asm-x86/uaccess.h
@@ -267,7 +267,6 @@ extern void __put_user_8(void);
 #define __put_user_size(x, ptr, size, retval, errret)			\
 do {									\
 	retval = 0;							\
-	might_fault();							\
 	__chk_user_ptr(ptr);						\
 	switch (size) {							\
 	case 1:								\
@@ -320,7 +319,6 @@ do {									\
 #define __get_user_size(x, ptr, size, retval, errret)			\
 do {									\
 	retval = 0;							\
-	might_fault();							\
 	__chk_user_ptr(ptr);						\
 	switch (size) {							\
 	case 1:								\
diff --git a/include/linux/uaccess.h b/include/linux/uaccess.h
index fec6decfb983..2062293e57e6 100644
--- a/include/linux/uaccess.h
+++ b/include/linux/uaccess.h
@@ -78,7 +78,7 @@ static inline unsigned long __copy_from_user_nocache(void *to,
 							\
 		set_fs(KERNEL_DS);			\
 		pagefault_disable();			\
-		ret = __get_user(retval, (__force typeof(retval) __user *)(addr));		\
+		ret = __copy_from_user_inatomic((__force typeof(retval) __user *)(addr), &(retval), sizeof(retval));		\
 		pagefault_enable();			\
 		set_fs(old_fs);				\
 		ret;					\
-- 
cgit v1.2.3


From 53b9d87f41a3d8838210ad7cdef02d814817ce85 Mon Sep 17 00:00:00 2001
From: Andrew Morton <akpm@linux-foundation.org>
Date: Thu, 11 Sep 2008 17:02:58 -0700
Subject: lock debug: sit tight when we are already in a panic

in:

  > http://bugzilla.kernel.org/show_bug.cgi?id=11543

The panic code called the kexec code which called mutex_trylock() which
called spin_lock_mutex() which then stupidly went and blurted a load of
debug stuff because of in_interrupt().

Keep the lock debug code from escallating an already crappy situation.

Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/debug_locks.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/debug_locks.h b/include/linux/debug_locks.h
index 4aaa4afb1cb9..096476f1fb35 100644
--- a/include/linux/debug_locks.h
+++ b/include/linux/debug_locks.h
@@ -17,7 +17,7 @@ extern int debug_locks_off(void);
 ({									\
 	int __ret = 0;							\
 									\
-	if (unlikely(c)) {						\
+	if (!oops_in_progress && unlikely(c)) {				\
 		if (debug_locks_off() && !debug_locks_silent)		\
 			WARN_ON(1);					\
 		__ret = 1;						\
-- 
cgit v1.2.3


From 30742d5c2277c325fb0e9d2d817d55a19995fe8f Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Sun, 14 Sep 2008 14:43:39 +0200
Subject: Revert "lockdep: fix compilation when CONFIG_TRACE_IRQFLAGS_SUPPORT
 is not set"

This reverts commit bd8fbdee6562ee526f3c2582a3b373ef195015dd.

This broke the powerpc build - more fixes are needed before we can
undo this revert.
---
 include/linux/irqflags.h | 18 ++++++++++--------
 1 file changed, 10 insertions(+), 8 deletions(-)

(limited to 'include')

diff --git a/include/linux/irqflags.h b/include/linux/irqflags.h
index f2993512b3b5..74bde13224c9 100644
--- a/include/linux/irqflags.h
+++ b/include/linux/irqflags.h
@@ -52,10 +52,10 @@
 # define start_critical_timings() do { } while (0)
 #endif
 
-#include <asm/irqflags.h>
-
 #ifdef CONFIG_TRACE_IRQFLAGS_SUPPORT
 
+#include <asm/irqflags.h>
+
 #define local_irq_enable() \
 	do { trace_hardirqs_on(); raw_local_irq_enable(); } while (0)
 #define local_irq_disable() \
@@ -84,20 +84,21 @@
  * The local_irq_*() APIs are equal to the raw_local_irq*()
  * if !TRACE_IRQFLAGS.
  */
-#define local_irq_disable()		raw_local_irq_disable()
-#define local_irq_enable()		raw_local_irq_enable()
-#define local_irq_save(flags)				\
+# define raw_local_irq_disable()	local_irq_disable()
+# define raw_local_irq_enable()		local_irq_enable()
+# define raw_local_irq_save(flags)			\
 	do {						\
 		typecheck(unsigned long, flags);	\
-		raw_local_irq_save(flags);		\
+		local_irq_save(flags);			\
 	} while (0)
-# define local_irq_restore(flags)			\
+# define raw_local_irq_restore(flags)			\
 	do {						\
 		typecheck(unsigned long, flags);	\
-		raw_local_irq_restore(flags);		\
+		local_irq_restore(flags);		\
 	} while (0)
 #endif /* CONFIG_TRACE_IRQFLAGS_SUPPORT */
 
+#ifdef CONFIG_TRACE_IRQFLAGS_SUPPORT
 #define safe_halt()						\
 	do {							\
 		trace_hardirqs_on();				\
@@ -123,5 +124,6 @@
 	typecheck(unsigned long, flags);	\
 	raw_irqs_disabled_flags(flags);		\
 })
+#endif		/* CONFIG_X86 */
 
 #endif
-- 
cgit v1.2.3


From fb71e45338453698bd7460f7e8f171ea0304d218 Mon Sep 17 00:00:00 2001
From: Hiroshi Shimamoto <h-shimamoto@ct.jp.nec.com>
Date: Mon, 15 Sep 2008 18:04:26 -0700
Subject: uaccess: fix parameters inversion for __copy_from_user_inatomic()

The following patch changes to use __copy_from_user_inatomic(),
but the passing parameters incorrect:

    x86: some lock annotations for user copy paths, v3

This fixes the netfilter crash reported by Steven Noonan.

Reported-by: Steven Noonan <steven@uplinklabs.net>
Signed-off-by: Hiroshi Shimamoto <h-shimamoto@ct.jp.nec.com>
Tested-by: Steven Noonan <steven@uplinklabs.net>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/uaccess.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/uaccess.h b/include/linux/uaccess.h
index 2062293e57e6..6b58367d145e 100644
--- a/include/linux/uaccess.h
+++ b/include/linux/uaccess.h
@@ -78,7 +78,7 @@ static inline unsigned long __copy_from_user_nocache(void *to,
 							\
 		set_fs(KERNEL_DS);			\
 		pagefault_disable();			\
-		ret = __copy_from_user_inatomic((__force typeof(retval) __user *)(addr), &(retval), sizeof(retval));		\
+		ret = __copy_from_user_inatomic(&(retval), (__force typeof(retval) __user *)(addr), sizeof(retval));		\
 		pagefault_enable();			\
 		set_fs(old_fs);				\
 		ret;					\
-- 
cgit v1.2.3


From 38d47c1b7075bd7ec3881141bb3629da58f88dab Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <a.p.zijlstra@chello.nl>
Date: Fri, 26 Sep 2008 19:32:20 +0200
Subject: futex: rely on get_user_pages() for shared futexes

On the way of getting rid of the mmap_sem requirement for shared futexes,
start by relying on get_user_pages().

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Acked-by: Nick Piggin <nickpiggin@yahoo.com.au>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/futex.h |   2 +
 kernel/futex.c        | 162 +++++++++++++++++++++++++-------------------------
 2 files changed, 82 insertions(+), 82 deletions(-)

(limited to 'include')

diff --git a/include/linux/futex.h b/include/linux/futex.h
index 586ab56a3ec3..8f627b9ae2b1 100644
--- a/include/linux/futex.h
+++ b/include/linux/futex.h
@@ -164,6 +164,8 @@ union futex_key {
 	} both;
 };
 
+#define FUTEX_KEY_INIT (union futex_key) { .both = { .ptr = NULL } }
+
 #ifdef CONFIG_FUTEX
 extern void exit_robust_list(struct task_struct *curr);
 extern void exit_pi_state_list(struct task_struct *curr);
diff --git a/kernel/futex.c b/kernel/futex.c
index 7d1136e97c14..a4c39fa0a7a3 100644
--- a/kernel/futex.c
+++ b/kernel/futex.c
@@ -161,6 +161,45 @@ static inline int match_futex(union futex_key *key1, union futex_key *key2)
 		&& key1->both.offset == key2->both.offset);
 }
 
+/*
+ * Take a reference to the resource addressed by a key.
+ * Can be called while holding spinlocks.
+ *
+ */
+static void get_futex_key_refs(union futex_key *key)
+{
+	if (!key->both.ptr)
+		return;
+
+	switch (key->both.offset & (FUT_OFF_INODE|FUT_OFF_MMSHARED)) {
+	case FUT_OFF_INODE:
+		atomic_inc(&key->shared.inode->i_count);
+		break;
+	case FUT_OFF_MMSHARED:
+		atomic_inc(&key->private.mm->mm_count);
+		break;
+	}
+}
+
+/*
+ * Drop a reference to the resource addressed by a key.
+ * The hash bucket spinlock must not be held.
+ */
+static void drop_futex_key_refs(union futex_key *key)
+{
+	if (!key->both.ptr)
+		return;
+
+	switch (key->both.offset & (FUT_OFF_INODE|FUT_OFF_MMSHARED)) {
+	case FUT_OFF_INODE:
+		iput(key->shared.inode);
+		break;
+	case FUT_OFF_MMSHARED:
+		mmdrop(key->private.mm);
+		break;
+	}
+}
+
 /**
  * get_futex_key - Get parameters which are the keys for a futex.
  * @uaddr: virtual address of the futex
@@ -184,7 +223,6 @@ static int get_futex_key(u32 __user *uaddr, struct rw_semaphore *fshared,
 {
 	unsigned long address = (unsigned long)uaddr;
 	struct mm_struct *mm = current->mm;
-	struct vm_area_struct *vma;
 	struct page *page;
 	int err;
 
@@ -210,98 +248,47 @@ static int get_futex_key(u32 __user *uaddr, struct rw_semaphore *fshared,
 		key->private.address = address;
 		return 0;
 	}
-	/*
-	 * The futex is hashed differently depending on whether
-	 * it's in a shared or private mapping.  So check vma first.
-	 */
-	vma = find_extend_vma(mm, address);
-	if (unlikely(!vma))
-		return -EFAULT;
 
-	/*
-	 * Permissions.
-	 */
-	if (unlikely((vma->vm_flags & (VM_IO|VM_READ)) != VM_READ))
-		return (vma->vm_flags & VM_IO) ? -EPERM : -EACCES;
+again:
+	err = get_user_pages(current, mm, address, 1, 0, 0, &page, NULL);
+	if (err < 0)
+		return err;
+
+	lock_page(page);
+	if (!page->mapping) {
+		unlock_page(page);
+		put_page(page);
+		goto again;
+	}
 
 	/*
 	 * Private mappings are handled in a simple way.
 	 *
 	 * NOTE: When userspace waits on a MAP_SHARED mapping, even if
 	 * it's a read-only handle, it's expected that futexes attach to
-	 * the object not the particular process.  Therefore we use
-	 * VM_MAYSHARE here, not VM_SHARED which is restricted to shared
-	 * mappings of _writable_ handles.
+	 * the object not the particular process.
 	 */
-	if (likely(!(vma->vm_flags & VM_MAYSHARE))) {
-		key->both.offset |= FUT_OFF_MMSHARED; /* reference taken on mm */
+	if (PageAnon(page)) {
+		key->both.offset |= FUT_OFF_MMSHARED; /* ref taken on mm */
 		key->private.mm = mm;
 		key->private.address = address;
-		return 0;
-	}
-
-	/*
-	 * Linear file mappings are also simple.
-	 */
-	key->shared.inode = vma->vm_file->f_path.dentry->d_inode;
-	key->both.offset |= FUT_OFF_INODE; /* inode-based key. */
-	if (likely(!(vma->vm_flags & VM_NONLINEAR))) {
-		key->shared.pgoff = (((address - vma->vm_start) >> PAGE_SHIFT)
-				     + vma->vm_pgoff);
-		return 0;
+	} else {
+		key->both.offset |= FUT_OFF_INODE; /* inode-based key */
+		key->shared.inode = page->mapping->host;
+		key->shared.pgoff = page->index;
 	}
 
-	/*
-	 * We could walk the page table to read the non-linear
-	 * pte, and get the page index without fetching the page
-	 * from swap.  But that's a lot of code to duplicate here
-	 * for a rare case, so we simply fetch the page.
-	 */
-	err = get_user_pages(current, mm, address, 1, 0, 0, &page, NULL);
-	if (err >= 0) {
-		key->shared.pgoff =
-			page->index << (PAGE_CACHE_SHIFT - PAGE_SHIFT);
-		put_page(page);
-		return 0;
-	}
-	return err;
-}
+	get_futex_key_refs(key);
 
-/*
- * Take a reference to the resource addressed by a key.
- * Can be called while holding spinlocks.
- *
- */
-static void get_futex_key_refs(union futex_key *key)
-{
-	if (key->both.ptr == NULL)
-		return;
-	switch (key->both.offset & (FUT_OFF_INODE|FUT_OFF_MMSHARED)) {
-		case FUT_OFF_INODE:
-			atomic_inc(&key->shared.inode->i_count);
-			break;
-		case FUT_OFF_MMSHARED:
-			atomic_inc(&key->private.mm->mm_count);
-			break;
-	}
+	unlock_page(page);
+	put_page(page);
+	return 0;
 }
 
-/*
- * Drop a reference to the resource addressed by a key.
- * The hash bucket spinlock must not be held.
- */
-static void drop_futex_key_refs(union futex_key *key)
+static inline
+void put_futex_key(struct rw_semaphore *fshared, union futex_key *key)
 {
-	if (!key->both.ptr)
-		return;
-	switch (key->both.offset & (FUT_OFF_INODE|FUT_OFF_MMSHARED)) {
-		case FUT_OFF_INODE:
-			iput(key->shared.inode);
-			break;
-		case FUT_OFF_MMSHARED:
-			mmdrop(key->private.mm);
-			break;
-	}
+	drop_futex_key_refs(key);
 }
 
 static u32 cmpxchg_futex_value_locked(u32 __user *uaddr, u32 uval, u32 newval)
@@ -385,6 +372,7 @@ static int refill_pi_state_cache(void)
 	/* pi_mutex gets initialized later */
 	pi_state->owner = NULL;
 	atomic_set(&pi_state->refcount, 1);
+	pi_state->key = FUTEX_KEY_INIT;
 
 	current->pi_state_cache = pi_state;
 
@@ -462,7 +450,7 @@ void exit_pi_state_list(struct task_struct *curr)
 	struct list_head *next, *head = &curr->pi_state_list;
 	struct futex_pi_state *pi_state;
 	struct futex_hash_bucket *hb;
-	union futex_key key;
+	union futex_key key = FUTEX_KEY_INIT;
 
 	if (!futex_cmpxchg_enabled)
 		return;
@@ -725,7 +713,7 @@ static int futex_wake(u32 __user *uaddr, struct rw_semaphore *fshared,
 	struct futex_hash_bucket *hb;
 	struct futex_q *this, *next;
 	struct plist_head *head;
-	union futex_key key;
+	union futex_key key = FUTEX_KEY_INIT;
 	int ret;
 
 	if (!bitset)
@@ -760,6 +748,7 @@ static int futex_wake(u32 __user *uaddr, struct rw_semaphore *fshared,
 
 	spin_unlock(&hb->lock);
 out:
+	put_futex_key(fshared, &key);
 	futex_unlock_mm(fshared);
 	return ret;
 }
@@ -773,7 +762,7 @@ futex_wake_op(u32 __user *uaddr1, struct rw_semaphore *fshared,
 	      u32 __user *uaddr2,
 	      int nr_wake, int nr_wake2, int op)
 {
-	union futex_key key1, key2;
+	union futex_key key1 = FUTEX_KEY_INIT, key2 = FUTEX_KEY_INIT;
 	struct futex_hash_bucket *hb1, *hb2;
 	struct plist_head *head;
 	struct futex_q *this, *next;
@@ -873,6 +862,8 @@ retry:
 	if (hb1 != hb2)
 		spin_unlock(&hb2->lock);
 out:
+	put_futex_key(fshared, &key2);
+	put_futex_key(fshared, &key1);
 	futex_unlock_mm(fshared);
 
 	return ret;
@@ -886,7 +877,7 @@ static int futex_requeue(u32 __user *uaddr1, struct rw_semaphore *fshared,
 			 u32 __user *uaddr2,
 			 int nr_wake, int nr_requeue, u32 *cmpval)
 {
-	union futex_key key1, key2;
+	union futex_key key1 = FUTEX_KEY_INIT, key2 = FUTEX_KEY_INIT;
 	struct futex_hash_bucket *hb1, *hb2;
 	struct plist_head *head1;
 	struct futex_q *this, *next;
@@ -974,6 +965,8 @@ out_unlock:
 		drop_futex_key_refs(&key1);
 
 out:
+	put_futex_key(fshared, &key2);
+	put_futex_key(fshared, &key1);
 	futex_unlock_mm(fshared);
 	return ret;
 }
@@ -1220,6 +1213,7 @@ static int futex_wait(u32 __user *uaddr, struct rw_semaphore *fshared,
  retry:
 	futex_lock_mm(fshared);
 
+	q.key = FUTEX_KEY_INIT;
 	ret = get_futex_key(uaddr, fshared, &q.key);
 	if (unlikely(ret != 0))
 		goto out_release_sem;
@@ -1360,6 +1354,7 @@ static int futex_wait(u32 __user *uaddr, struct rw_semaphore *fshared,
 	queue_unlock(&q, hb);
 
  out_release_sem:
+	put_futex_key(fshared, &q.key);
 	futex_unlock_mm(fshared);
 	return ret;
 }
@@ -1411,6 +1406,7 @@ static int futex_lock_pi(u32 __user *uaddr, struct rw_semaphore *fshared,
  retry:
 	futex_lock_mm(fshared);
 
+	q.key = FUTEX_KEY_INIT;
 	ret = get_futex_key(uaddr, fshared, &q.key);
 	if (unlikely(ret != 0))
 		goto out_release_sem;
@@ -1625,6 +1621,7 @@ static int futex_lock_pi(u32 __user *uaddr, struct rw_semaphore *fshared,
 	queue_unlock(&q, hb);
 
  out_release_sem:
+	put_futex_key(fshared, &q.key);
 	futex_unlock_mm(fshared);
 	if (to)
 		destroy_hrtimer_on_stack(&to->timer);
@@ -1671,7 +1668,7 @@ static int futex_unlock_pi(u32 __user *uaddr, struct rw_semaphore *fshared)
 	struct futex_q *this, *next;
 	u32 uval;
 	struct plist_head *head;
-	union futex_key key;
+	union futex_key key = FUTEX_KEY_INIT;
 	int ret, attempt = 0;
 
 retry:
@@ -1744,6 +1741,7 @@ retry_unlocked:
 out_unlock:
 	spin_unlock(&hb->lock);
 out:
+	put_futex_key(fshared, &key);
 	futex_unlock_mm(fshared);
 
 	return ret;
-- 
cgit v1.2.3


From 6b9331165e9827e055389e22d1cbdb5fe3cff835 Mon Sep 17 00:00:00 2001
From: Mark Brown <broonie@opensource.wolfsonmicro.com>
Date: Thu, 16 Oct 2008 11:00:07 +0100
Subject: ALSA: ASoC: Remove snd_soc_dapm_connect_input()

This was marked as deprecated in 2.6.27 and all users except for
playpaq_wm8510 fixed in that release.

Signed-off-by: Mark Brown <broonie@opensource.wolfsonmicro.com>
Signed-off-by: Takashi Iwai <tiwai@suse.de>
---
 include/sound/soc-dapm.h |  2 --
 sound/soc/soc-dapm.c     | 22 ----------------------
 2 files changed, 24 deletions(-)

(limited to 'include')

diff --git a/include/sound/soc-dapm.h b/include/sound/soc-dapm.h
index ca699a3017f3..7ee2f70ca42e 100644
--- a/include/sound/soc-dapm.h
+++ b/include/sound/soc-dapm.h
@@ -221,8 +221,6 @@ int snd_soc_dapm_new_controls(struct snd_soc_codec *codec,
 	int num);
 
 /* dapm path setup */
-int  __deprecated snd_soc_dapm_connect_input(struct snd_soc_codec *codec,
-	const char *sink_name, const char *control_name, const char *src_name);
 int snd_soc_dapm_new_widgets(struct snd_soc_codec *codec);
 void snd_soc_dapm_free(struct snd_soc_device *socdev);
 int snd_soc_dapm_add_routes(struct snd_soc_codec *codec,
diff --git a/sound/soc/soc-dapm.c b/sound/soc/soc-dapm.c
index efbd0b37810a..4060fc54bbb1 100644
--- a/sound/soc/soc-dapm.c
+++ b/sound/soc/soc-dapm.c
@@ -1006,28 +1006,6 @@ err:
 	return ret;
 }
 
-/**
- * snd_soc_dapm_connect_input - connect dapm widgets
- * @codec: audio codec
- * @sink: name of target widget
- * @control: mixer control name
- * @source: name of source name
- *
- * Connects 2 dapm widgets together via a named audio path. The sink is
- * the widget receiving the audio signal, whilst the source is the sender
- * of the audio signal.
- *
- * This function has been deprecated in favour of snd_soc_dapm_add_routes().
- *
- * Returns 0 for success else error.
- */
-int snd_soc_dapm_connect_input(struct snd_soc_codec *codec, const char *sink,
-	const char *control, const char *source)
-{
-	return snd_soc_dapm_add_route(codec, sink, control, source);
-}
-EXPORT_SYMBOL_GPL(snd_soc_dapm_connect_input);
-
 /**
  * snd_soc_dapm_add_routes - Add routes between DAPM widgets
  * @codec: codec
-- 
cgit v1.2.3


From c7e78cff6b7518212247fb20b1dc6411540dc9af Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <a.p.zijlstra@chello.nl>
Date: Thu, 16 Oct 2008 23:17:09 +0200
Subject: lockstat: contend with points

We currently only provide points that have to wait on contention, also
lists the points we have to wait for.

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 Documentation/lockstat.txt | 50 +++++++++++++++++++++++++++++-----------------
 include/linux/lockdep.h    | 13 ++++++++----
 kernel/lockdep.c           | 33 +++++++++++++++++++-----------
 kernel/lockdep_proc.c      | 21 +++++++++++++++++--
 kernel/mutex.c             |  2 +-
 5 files changed, 82 insertions(+), 37 deletions(-)

(limited to 'include')

diff --git a/Documentation/lockstat.txt b/Documentation/lockstat.txt
index 02f36f5c64fe..9cb9138f7a79 100644
--- a/Documentation/lockstat.txt
+++ b/Documentation/lockstat.txt
@@ -71,34 +71,48 @@ Look at the current lock statistics:
 
 # less /proc/lock_stat
 
-01 lock_stat version 0.2
+01 lock_stat version 0.3
 02 -----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
 03                               class name    con-bounces    contentions   waittime-min   waittime-max waittime-total    acq-bounces   acquisitions   holdtime-min   holdtime-max holdtime-total
 04 -----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
 05
-06               &inode->i_data.tree_lock-W:            15          21657           0.18     1093295.30 11547131054.85             58          10415           0.16          87.51        6387.60
-07               &inode->i_data.tree_lock-R:             0              0           0.00           0.00           0.00          23302         231198           0.25           8.45       98023.38
-08               --------------------------
-09                 &inode->i_data.tree_lock              0          [<ffffffff8027c08f>] add_to_page_cache+0x5f/0x190
-10
-11 ...............................................................................................................................................................................................
-12
-13                              dcache_lock:          1037           1161           0.38          45.32         774.51           6611         243371           0.15         306.48       77387.24
-14                              -----------
-15                              dcache_lock            180          [<ffffffff802c0d7e>] sys_getcwd+0x11e/0x230
-16                              dcache_lock            165          [<ffffffff802c002a>] d_alloc+0x15a/0x210
-17                              dcache_lock             33          [<ffffffff8035818d>] _atomic_dec_and_lock+0x4d/0x70
-18                              dcache_lock              1          [<ffffffff802beef8>] shrink_dcache_parent+0x18/0x130
+06                          &mm->mmap_sem-W:           233            538 18446744073708       22924.27      607243.51           1342          45806           1.71        8595.89     1180582.34
+07                          &mm->mmap_sem-R:           205            587 18446744073708       28403.36      731975.00           1940         412426           0.58      187825.45     6307502.88
+08                          ---------------
+09                            &mm->mmap_sem            487          [<ffffffff8053491f>] do_page_fault+0x466/0x928
+10                            &mm->mmap_sem            179          [<ffffffff802a6200>] sys_mprotect+0xcd/0x21d
+11                            &mm->mmap_sem            279          [<ffffffff80210a57>] sys_mmap+0x75/0xce
+12                            &mm->mmap_sem             76          [<ffffffff802a490b>] sys_munmap+0x32/0x59
+13                          ---------------
+14                            &mm->mmap_sem            270          [<ffffffff80210a57>] sys_mmap+0x75/0xce
+15                            &mm->mmap_sem            431          [<ffffffff8053491f>] do_page_fault+0x466/0x928
+16                            &mm->mmap_sem            138          [<ffffffff802a490b>] sys_munmap+0x32/0x59
+17                            &mm->mmap_sem            145          [<ffffffff802a6200>] sys_mprotect+0xcd/0x21d
+18
+19 ...............................................................................................................................................................................................
+20
+21                              dcache_lock:           621            623           0.52         118.26        1053.02           6745          91930           0.29         316.29      118423.41
+22                              -----------
+23                              dcache_lock            179          [<ffffffff80378274>] _atomic_dec_and_lock+0x34/0x54
+24                              dcache_lock            113          [<ffffffff802cc17b>] d_alloc+0x19a/0x1eb
+25                              dcache_lock             99          [<ffffffff802ca0dc>] d_rehash+0x1b/0x44
+26                              dcache_lock            104          [<ffffffff802cbca0>] d_instantiate+0x36/0x8a
+27                              -----------
+28                              dcache_lock            192          [<ffffffff80378274>] _atomic_dec_and_lock+0x34/0x54
+29                              dcache_lock             98          [<ffffffff802ca0dc>] d_rehash+0x1b/0x44
+30                              dcache_lock             72          [<ffffffff802cc17b>] d_alloc+0x19a/0x1eb
+31                              dcache_lock            112          [<ffffffff802cbca0>] d_instantiate+0x36/0x8a
 
 This excerpt shows the first two lock class statistics. Line 01 shows the
 output version - each time the format changes this will be updated. Line 02-04
-show the header with column descriptions. Lines 05-10 and 13-18 show the actual
+show the header with column descriptions. Lines 05-18 and 20-31 show the actual
 statistics. These statistics come in two parts; the actual stats separated by a
-short separator (line 08, 14) from the contention points.
+short separator (line 08, 13) from the contention points.
 
-The first lock (05-10) is a read/write lock, and shows two lines above the
+The first lock (05-18) is a read/write lock, and shows two lines above the
 short separator. The contention points don't match the column descriptors,
-they have two: contentions and [<IP>] symbol.
+they have two: contentions and [<IP>] symbol. The second set of contention
+points are the points we're contending with.
 
 The integer part of the time values is in us.
 
diff --git a/include/linux/lockdep.h b/include/linux/lockdep.h
index 0aa657aa8a1e..fc9f8e88123b 100644
--- a/include/linux/lockdep.h
+++ b/include/linux/lockdep.h
@@ -73,6 +73,8 @@ struct lock_class_key {
 	struct lockdep_subclass_key	subkeys[MAX_LOCKDEP_SUBCLASSES];
 };
 
+#define LOCKSTAT_POINTS		4
+
 /*
  * The lock-class itself:
  */
@@ -119,7 +121,8 @@ struct lock_class {
 	int				name_version;
 
 #ifdef CONFIG_LOCK_STAT
-	unsigned long			contention_point[4];
+	unsigned long			contention_point[LOCKSTAT_POINTS];
+	unsigned long			contending_point[LOCKSTAT_POINTS];
 #endif
 };
 
@@ -144,6 +147,7 @@ enum bounce_type {
 
 struct lock_class_stats {
 	unsigned long			contention_point[4];
+	unsigned long			contending_point[4];
 	struct lock_time		read_waittime;
 	struct lock_time		write_waittime;
 	struct lock_time		read_holdtime;
@@ -165,6 +169,7 @@ struct lockdep_map {
 	const char			*name;
 #ifdef CONFIG_LOCK_STAT
 	int				cpu;
+	unsigned long			ip;
 #endif
 };
 
@@ -355,7 +360,7 @@ struct lock_class_key { };
 #ifdef CONFIG_LOCK_STAT
 
 extern void lock_contended(struct lockdep_map *lock, unsigned long ip);
-extern void lock_acquired(struct lockdep_map *lock);
+extern void lock_acquired(struct lockdep_map *lock, unsigned long ip);
 
 #define LOCK_CONTENDED(_lock, try, lock)			\
 do {								\
@@ -363,13 +368,13 @@ do {								\
 		lock_contended(&(_lock)->dep_map, _RET_IP_);	\
 		lock(_lock);					\
 	}							\
-	lock_acquired(&(_lock)->dep_map);			\
+	lock_acquired(&(_lock)->dep_map, _RET_IP_);			\
 } while (0)
 
 #else /* CONFIG_LOCK_STAT */
 
 #define lock_contended(lockdep_map, ip) do {} while (0)
-#define lock_acquired(lockdep_map) do {} while (0)
+#define lock_acquired(lockdep_map, ip) do {} while (0)
 
 #define LOCK_CONTENDED(_lock, try, lock) \
 	lock(_lock)
diff --git a/kernel/lockdep.c b/kernel/lockdep.c
index dbda475b13bd..234a9dccb4be 100644
--- a/kernel/lockdep.c
+++ b/kernel/lockdep.c
@@ -136,16 +136,16 @@ static inline struct lock_class *hlock_class(struct held_lock *hlock)
 #ifdef CONFIG_LOCK_STAT
 static DEFINE_PER_CPU(struct lock_class_stats[MAX_LOCKDEP_KEYS], lock_stats);
 
-static int lock_contention_point(struct lock_class *class, unsigned long ip)
+static int lock_point(unsigned long points[], unsigned long ip)
 {
 	int i;
 
-	for (i = 0; i < ARRAY_SIZE(class->contention_point); i++) {
-		if (class->contention_point[i] == 0) {
-			class->contention_point[i] = ip;
+	for (i = 0; i < LOCKSTAT_POINTS; i++) {
+		if (points[i] == 0) {
+			points[i] = ip;
 			break;
 		}
-		if (class->contention_point[i] == ip)
+		if (points[i] == ip)
 			break;
 	}
 
@@ -185,6 +185,9 @@ struct lock_class_stats lock_stats(struct lock_class *class)
 		for (i = 0; i < ARRAY_SIZE(stats.contention_point); i++)
 			stats.contention_point[i] += pcs->contention_point[i];
 
+		for (i = 0; i < ARRAY_SIZE(stats.contending_point); i++)
+			stats.contending_point[i] += pcs->contending_point[i];
+
 		lock_time_add(&pcs->read_waittime, &stats.read_waittime);
 		lock_time_add(&pcs->write_waittime, &stats.write_waittime);
 
@@ -209,6 +212,7 @@ void clear_lock_stats(struct lock_class *class)
 		memset(cpu_stats, 0, sizeof(struct lock_class_stats));
 	}
 	memset(class->contention_point, 0, sizeof(class->contention_point));
+	memset(class->contending_point, 0, sizeof(class->contending_point));
 }
 
 static struct lock_class_stats *get_lock_stats(struct lock_class *class)
@@ -3001,7 +3005,7 @@ __lock_contended(struct lockdep_map *lock, unsigned long ip)
 	struct held_lock *hlock, *prev_hlock;
 	struct lock_class_stats *stats;
 	unsigned int depth;
-	int i, point;
+	int i, contention_point, contending_point;
 
 	depth = curr->lockdep_depth;
 	if (DEBUG_LOCKS_WARN_ON(!depth))
@@ -3025,18 +3029,22 @@ __lock_contended(struct lockdep_map *lock, unsigned long ip)
 found_it:
 	hlock->waittime_stamp = sched_clock();
 
-	point = lock_contention_point(hlock_class(hlock), ip);
+	contention_point = lock_point(hlock_class(hlock)->contention_point, ip);
+	contending_point = lock_point(hlock_class(hlock)->contending_point,
+				      lock->ip);
 
 	stats = get_lock_stats(hlock_class(hlock));
-	if (point < ARRAY_SIZE(stats->contention_point))
-		stats->contention_point[point]++;
+	if (contention_point < LOCKSTAT_POINTS)
+		stats->contention_point[contention_point]++;
+	if (contending_point < LOCKSTAT_POINTS)
+		stats->contending_point[contending_point]++;
 	if (lock->cpu != smp_processor_id())
 		stats->bounces[bounce_contended + !!hlock->read]++;
 	put_lock_stats(stats);
 }
 
 static void
-__lock_acquired(struct lockdep_map *lock)
+__lock_acquired(struct lockdep_map *lock, unsigned long ip)
 {
 	struct task_struct *curr = current;
 	struct held_lock *hlock, *prev_hlock;
@@ -3085,6 +3093,7 @@ found_it:
 	put_lock_stats(stats);
 
 	lock->cpu = cpu;
+	lock->ip = ip;
 }
 
 void lock_contended(struct lockdep_map *lock, unsigned long ip)
@@ -3106,7 +3115,7 @@ void lock_contended(struct lockdep_map *lock, unsigned long ip)
 }
 EXPORT_SYMBOL_GPL(lock_contended);
 
-void lock_acquired(struct lockdep_map *lock)
+void lock_acquired(struct lockdep_map *lock, unsigned long ip)
 {
 	unsigned long flags;
 
@@ -3119,7 +3128,7 @@ void lock_acquired(struct lockdep_map *lock)
 	raw_local_irq_save(flags);
 	check_flags(flags);
 	current->lockdep_recursion = 1;
-	__lock_acquired(lock);
+	__lock_acquired(lock, ip);
 	current->lockdep_recursion = 0;
 	raw_local_irq_restore(flags);
 }
diff --git a/kernel/lockdep_proc.c b/kernel/lockdep_proc.c
index 8d3a6eba8d5a..13716b813896 100644
--- a/kernel/lockdep_proc.c
+++ b/kernel/lockdep_proc.c
@@ -557,7 +557,7 @@ static void seq_stats(struct seq_file *m, struct lock_stat_data *data)
 	if (stats->read_holdtime.nr)
 		namelen += 2;
 
-	for (i = 0; i < ARRAY_SIZE(class->contention_point); i++) {
+	for (i = 0; i < LOCKSTAT_POINTS; i++) {
 		char sym[KSYM_SYMBOL_LEN];
 		char ip[32];
 
@@ -574,6 +574,23 @@ static void seq_stats(struct seq_file *m, struct lock_stat_data *data)
 				stats->contention_point[i],
 				ip, sym);
 	}
+	for (i = 0; i < LOCKSTAT_POINTS; i++) {
+		char sym[KSYM_SYMBOL_LEN];
+		char ip[32];
+
+		if (class->contending_point[i] == 0)
+			break;
+
+		if (!i)
+			seq_line(m, '-', 40-namelen, namelen);
+
+		sprint_symbol(sym, class->contending_point[i]);
+		snprintf(ip, sizeof(ip), "[<%p>]",
+				(void *)class->contending_point[i]);
+		seq_printf(m, "%40s %14lu %29s %s\n", name,
+				stats->contending_point[i],
+				ip, sym);
+	}
 	if (i) {
 		seq_puts(m, "\n");
 		seq_line(m, '.', 0, 40 + 1 + 10 * (14 + 1));
@@ -583,7 +600,7 @@ static void seq_stats(struct seq_file *m, struct lock_stat_data *data)
 
 static void seq_header(struct seq_file *m)
 {
-	seq_printf(m, "lock_stat version 0.2\n");
+	seq_printf(m, "lock_stat version 0.3\n");
 	seq_line(m, '-', 0, 40 + 1 + 10 * (14 + 1));
 	seq_printf(m, "%40s %14s %14s %14s %14s %14s %14s %14s %14s "
 			"%14s %14s\n",
diff --git a/kernel/mutex.c b/kernel/mutex.c
index 12c779dc65d4..39a3816b68d9 100644
--- a/kernel/mutex.c
+++ b/kernel/mutex.c
@@ -184,7 +184,7 @@ __mutex_lock_common(struct mutex *lock, long state, unsigned int subclass,
 	}
 
 done:
-	lock_acquired(&lock->dep_map);
+	lock_acquired(&lock->dep_map, ip);
 	/* got the lock - rejoice! */
 	mutex_remove_waiter(lock, &waiter, task_thread_info(task));
 	debug_mutex_set_owner(lock, task_thread_info(task));
-- 
cgit v1.2.3


From bbaf5e97337287479eb78dbc3822d9560bbfd2e2 Mon Sep 17 00:00:00 2001
From: Takashi Iwai <tiwai@suse.de>
Date: Fri, 24 Oct 2008 18:16:50 +0200
Subject: ALSA: Add hrtimer backend for ALSA timer interface

Added the hrtimer backend for ALSA timer interface.
It can be used for the sequencer timer source.

Signed-off-by: Takashi Iwai <tiwai@suse.de>
---
 include/sound/asound.h |   1 +
 sound/core/Kconfig     |  21 ++++++++
 sound/core/Makefile    |   2 +
 sound/core/hrtimer.c   | 140 +++++++++++++++++++++++++++++++++++++++++++++++++
 sound/core/seq/seq.c   |   4 +-
 5 files changed, 167 insertions(+), 1 deletion(-)
 create mode 100644 sound/core/hrtimer.c

(limited to 'include')

diff --git a/include/sound/asound.h b/include/sound/asound.h
index 2c4dc908a54a..1c02ed1d7c4a 100644
--- a/include/sound/asound.h
+++ b/include/sound/asound.h
@@ -575,6 +575,7 @@ enum {
 #define SNDRV_TIMER_GLOBAL_SYSTEM	0
 #define SNDRV_TIMER_GLOBAL_RTC		1
 #define SNDRV_TIMER_GLOBAL_HPET		2
+#define SNDRV_TIMER_GLOBAL_HRTIMER	3
 
 /* info flags */
 #define SNDRV_TIMER_FLG_SLAVE		(1<<0)	/* cannot be controlled */
diff --git a/sound/core/Kconfig b/sound/core/Kconfig
index 66348c92f88d..406a45010985 100644
--- a/sound/core/Kconfig
+++ b/sound/core/Kconfig
@@ -95,6 +95,26 @@ config SND_SEQUENCER_OSS
 	  this will be compiled as a module. The module will be called
 	  snd-seq-oss.
 
+config SND_HRTIMER
+	tristate "HR-timer backend support"
+	depends on HIGH_RES_TIMERS
+	select SND_TIMER
+	help
+	  Say Y here to enable HR-timer backend for ALSA timer.  ALSA uses
+	  the hrtimer as a precise timing source. The ALSA sequencer code
+	  also can use this timing source.
+
+	  To compile this driver as a module, choose M here: the module
+	  will be called snd-hrtimer.
+
+config SND_SEQ_HRTIMER_DEFAULT
+	bool "Use HR-timer as default sequencer timer"
+	depends on SND_HRTIMER && SND_SEQUENCER
+	default y
+	help
+	  Say Y here to use the HR-timer backend as the default sequencer
+	  timer.
+
 config SND_RTCTIMER
 	tristate "RTC Timer support"
 	depends on RTC
@@ -114,6 +134,7 @@ config SND_RTCTIMER
 config SND_SEQ_RTCTIMER_DEFAULT
 	bool "Use RTC as default sequencer timer"
 	depends on SND_RTCTIMER && SND_SEQUENCER
+	dpeends on !SND_SEQ_HRTIMER_DEFAULT
 	default y
 	help
 	  Say Y here to use the RTC timer as the default sequencer
diff --git a/sound/core/Makefile b/sound/core/Makefile
index d57125a5687d..4229052e7b91 100644
--- a/sound/core/Makefile
+++ b/sound/core/Makefile
@@ -17,12 +17,14 @@ snd-page-alloc-$(CONFIG_HAS_DMA) += sgbuf.o
 
 snd-rawmidi-objs  := rawmidi.o
 snd-timer-objs    := timer.o
+snd-hrtimer-objs  := hrtimer.o
 snd-rtctimer-objs := rtctimer.o
 snd-hwdep-objs    := hwdep.o
 
 obj-$(CONFIG_SND) 		+= snd.o
 obj-$(CONFIG_SND_HWDEP)		+= snd-hwdep.o
 obj-$(CONFIG_SND_TIMER)		+= snd-timer.o
+obj-$(CONFIG_SND_HRTIMER)	+= snd-hrtimer.o
 obj-$(CONFIG_SND_RTCTIMER)	+= snd-rtctimer.o
 obj-$(CONFIG_SND_PCM)		+= snd-pcm.o snd-page-alloc.o
 obj-$(CONFIG_SND_RAWMIDI)	+= snd-rawmidi.o
diff --git a/sound/core/hrtimer.c b/sound/core/hrtimer.c
new file mode 100644
index 000000000000..b712d7f211ff
--- /dev/null
+++ b/sound/core/hrtimer.c
@@ -0,0 +1,140 @@
+/*
+ */
+
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/moduleparam.h>
+#include <linux/hrtimer.h>
+#include <sound/core.h>
+#include <sound/timer.h>
+
+MODULE_AUTHOR("Takashi Iwai <tiwai@suse.de>");
+MODULE_DESCRIPTION("ALSA hrtimer backend");
+MODULE_LICENSE("GPL");
+
+MODULE_ALIAS("snd-timer-" __stringify(SNDRV_TIMER_GLOBAL_HRTIMER));
+
+#define NANO_SEC	1000000000UL	/* 10^9 in sec */
+static unsigned int resolution;
+
+struct snd_hrtimer {
+	struct snd_timer *timer;
+	struct hrtimer hrt;
+};
+
+static enum hrtimer_restart snd_hrtimer_callback(struct hrtimer *hrt)
+{
+	struct snd_hrtimer *stime = container_of(hrt, struct snd_hrtimer, hrt);
+	struct snd_timer *t = stime->timer;
+	hrtimer_forward_now(hrt, ktime_set(0, t->sticks * resolution));
+	snd_timer_interrupt(stime->timer, t->sticks);
+	return HRTIMER_RESTART;
+}
+
+static int snd_hrtimer_open(struct snd_timer *t)
+{
+	struct snd_hrtimer *stime;
+
+	stime = kmalloc(sizeof(*stime), GFP_KERNEL);
+	if (!stime)
+		return -ENOMEM;
+	hrtimer_init(&stime->hrt, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
+	stime->timer = t;
+	stime->hrt.cb_mode = HRTIMER_CB_SOFTIRQ;
+	stime->hrt.function = snd_hrtimer_callback;
+	t->private_data = stime;
+	return 0;
+}
+
+static int snd_hrtimer_close(struct snd_timer *t)
+{
+	struct snd_hrtimer *stime = t->private_data;
+
+	if (stime) {
+		hrtimer_cancel(&stime->hrt);
+		kfree(stime);
+		t->private_data = NULL;
+	}
+	return 0;
+}
+
+static int snd_hrtimer_start(struct snd_timer *t)
+{
+	struct snd_hrtimer *stime = t->private_data;
+
+	hrtimer_start(&stime->hrt, ktime_set(0, t->sticks * resolution),
+		      HRTIMER_MODE_REL);
+	return 0;
+}
+
+static int snd_hrtimer_stop(struct snd_timer *t)
+{
+	struct snd_hrtimer *stime = t->private_data;
+
+	hrtimer_cancel(&stime->hrt);
+	return 0;
+}
+
+static struct snd_timer_hardware hrtimer_hw = {
+	.flags =	(SNDRV_TIMER_HW_AUTO |
+			 /*SNDRV_TIMER_HW_FIRST |*/
+			 SNDRV_TIMER_HW_TASKLET),
+	.open =		snd_hrtimer_open,
+	.close =	snd_hrtimer_close,
+	.start =	snd_hrtimer_start,
+	.stop =		snd_hrtimer_stop,
+};
+
+/*
+ * entry functions
+ */
+
+static struct snd_timer *mytimer;
+
+static int __init snd_hrtimer_init(void)
+{
+	struct snd_timer *timer;
+	struct timespec tp;
+	int err;
+
+	hrtimer_get_res(CLOCK_MONOTONIC, &tp);
+	if (tp.tv_sec > 0 || !tp.tv_nsec) {
+		snd_printk(KERN_ERR
+			   "snd-hrtimer: Invalid resolution %u.%09u",
+			   (unsigned)tp.tv_sec, (unsigned)tp.tv_nsec);
+		return -EINVAL;
+	}
+	resolution = tp.tv_nsec;
+
+	/* Create a new timer and set up the fields */
+	err = snd_timer_global_new("hrtimer", SNDRV_TIMER_GLOBAL_HRTIMER,
+				   &timer);
+	if (err < 0)
+		return err;
+
+	timer->module = THIS_MODULE;
+	strcpy(timer->name, "HR timer");
+	timer->hw = hrtimer_hw;
+	timer->hw.resolution = resolution;
+	timer->hw.ticks = NANO_SEC / resolution;
+
+	err = snd_timer_global_register(timer);
+	if (err < 0) {
+		snd_timer_global_free(timer);
+		return err;
+	}
+	mytimer = timer; /* remember this */
+
+	return 0;
+}
+
+static void __exit snd_hrtimer_exit(void)
+{
+	if (mytimer) {
+		snd_timer_global_free(mytimer);
+		mytimer = NULL;
+	}
+}
+
+module_init(snd_hrtimer_init);
+module_exit(snd_hrtimer_exit);
diff --git a/sound/core/seq/seq.c b/sound/core/seq/seq.c
index ee0f8405ab35..bf09a5ad1865 100644
--- a/sound/core/seq/seq.c
+++ b/sound/core/seq/seq.c
@@ -43,7 +43,9 @@ int seq_default_timer_class = SNDRV_TIMER_CLASS_GLOBAL;
 int seq_default_timer_sclass = SNDRV_TIMER_SCLASS_NONE;
 int seq_default_timer_card = -1;
 int seq_default_timer_device =
-#ifdef CONFIG_SND_SEQ_RTCTIMER_DEFAULT
+#ifdef CONFIG_SND_SEQ_HRTIMER_DEFAULT
+	SNDRV_TIMER_GLOBAL_HRTIMER
+#elif defined(CONFIG_SND_SEQ_RTCTIMER_DEFAULT)
 	SNDRV_TIMER_GLOBAL_RTC
 #else
 	SNDRV_TIMER_GLOBAL_SYSTEM
-- 
cgit v1.2.3


From a53ccab3ccac9e8676a683df9822a2daec83ef54 Mon Sep 17 00:00:00 2001
From: Matthew Ranostay <mranostay@embeddedalley.com>
Date: Sat, 25 Oct 2008 01:05:04 -0400
Subject: ALSA: jack: lineout support to jack abstraction layer

This patch introduces support for reporting SW_LINEOUT_INSERT detection events
via the jack abstraction layer.

Also adds a SND_JACK_LINEOUT define to the input system header.

Signed-off-by: Matthew Ranostay <mranostay@embeddedalley.com>
Cc: Dmitry Torokhov <dtor@mail.ru>
Acked-by: Mark Brown <broonie@opensource.wolfsonmicro.com>
Signed-off-by: Takashi Iwai <tiwai@suse.de>
---
 include/linux/input.h | 1 +
 include/sound/jack.h  | 1 +
 sound/core/jack.c     | 6 ++++++
 3 files changed, 8 insertions(+)

(limited to 'include')

diff --git a/include/linux/input.h b/include/linux/input.h
index a5802c9c81a4..7323d2ff5151 100644
--- a/include/linux/input.h
+++ b/include/linux/input.h
@@ -644,6 +644,7 @@ struct input_absinfo {
 #define SW_RADIO		SW_RFKILL_ALL	/* deprecated */
 #define SW_MICROPHONE_INSERT	0x04  /* set = inserted */
 #define SW_DOCK			0x05  /* set = plugged into dock */
+#define SW_LINEOUT_INSERT	0x06  /* set = inserted */
 #define SW_MAX			0x0f
 #define SW_CNT			(SW_MAX+1)
 
diff --git a/include/sound/jack.h b/include/sound/jack.h
index b1b2b8b59adb..7cb25f4b50bb 100644
--- a/include/sound/jack.h
+++ b/include/sound/jack.h
@@ -35,6 +35,7 @@ enum snd_jack_types {
 	SND_JACK_HEADPHONE	= 0x0001,
 	SND_JACK_MICROPHONE	= 0x0002,
 	SND_JACK_HEADSET	= SND_JACK_HEADPHONE | SND_JACK_MICROPHONE,
+	SND_JACK_LINEOUT	= 0x0004,
 };
 
 struct snd_jack {
diff --git a/sound/core/jack.c b/sound/core/jack.c
index 8133a2b173a5..c4bb9bad3133 100644
--- a/sound/core/jack.c
+++ b/sound/core/jack.c
@@ -102,6 +102,9 @@ int snd_jack_new(struct snd_card *card, const char *id, int type,
 	if (type & SND_JACK_HEADPHONE)
 		input_set_capability(jack->input_dev, EV_SW,
 				     SW_HEADPHONE_INSERT);
+	if (type & SND_JACK_LINEOUT)
+		input_set_capability(jack->input_dev, EV_SW,
+				     SW_LINEOUT_INSERT);
 	if (type & SND_JACK_MICROPHONE)
 		input_set_capability(jack->input_dev, EV_SW,
 				     SW_MICROPHONE_INSERT);
@@ -150,6 +153,9 @@ void snd_jack_report(struct snd_jack *jack, int status)
 	if (jack->type & SND_JACK_HEADPHONE)
 		input_report_switch(jack->input_dev, SW_HEADPHONE_INSERT,
 				    status & SND_JACK_HEADPHONE);
+	if (jack->type & SND_JACK_LINEOUT)
+		input_report_switch(jack->input_dev, SW_LINEOUT_INSERT,
+				    status & SND_JACK_LINEOUT);
 	if (jack->type & SND_JACK_MICROPHONE)
 		input_report_switch(jack->input_dev, SW_MICROPHONE_INSERT,
 				    status & SND_JACK_MICROPHONE);
-- 
cgit v1.2.3


From 505e371da195fad20cb8aaf45407a2849774d6d0 Mon Sep 17 00:00:00 2001
From: Lai Jiangshan <laijs@cn.fujitsu.com>
Date: Wed, 15 Oct 2008 14:56:42 +0800
Subject: markers: remove exported symbol marker_probe_cb_noarg()

marker_probe_cb_noarg() should not be seen by outer code.
this patch remove it.

Signed-off-by: Lai Jiangshan <laijs@cn.fujitsu.com>
Acked-by: Mathieu Desnoyers <mathieu.desnoyers@polymtl.ca>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/marker.h | 2 --
 kernel/marker.c        | 3 +--
 2 files changed, 1 insertion(+), 4 deletions(-)

(limited to 'include')

diff --git a/include/linux/marker.h b/include/linux/marker.h
index 889196c7fbb1..4cf45472d9f5 100644
--- a/include/linux/marker.h
+++ b/include/linux/marker.h
@@ -136,8 +136,6 @@ extern marker_probe_func __mark_empty_function;
 
 extern void marker_probe_cb(const struct marker *mdata,
 	void *call_private, ...);
-extern void marker_probe_cb_noarg(const struct marker *mdata,
-	void *call_private, ...);
 
 /*
  * Connect a probe to a marker.
diff --git a/kernel/marker.c b/kernel/marker.c
index 75a1a17cd78a..23192646555f 100644
--- a/kernel/marker.c
+++ b/kernel/marker.c
@@ -158,7 +158,7 @@ EXPORT_SYMBOL_GPL(marker_probe_cb);
  *
  * Should be connected to markers "MARK_NOARGS".
  */
-void marker_probe_cb_noarg(const struct marker *mdata, void *call_private, ...)
+static void marker_probe_cb_noarg(const struct marker *mdata, void *call_private, ...)
 {
 	va_list args;	/* not initialized */
 	char ptype;
@@ -198,7 +198,6 @@ void marker_probe_cb_noarg(const struct marker *mdata, void *call_private, ...)
 	}
 	rcu_read_unlock_sched();
 }
-EXPORT_SYMBOL_GPL(marker_probe_cb_noarg);
 
 static void free_old_closure(struct rcu_head *head)
 {
-- 
cgit v1.2.3


From 944ac4259e39801c843a915c3da8194ac9af0440 Mon Sep 17 00:00:00 2001
From: Steven Rostedt <rostedt@goodmis.org>
Date: Thu, 23 Oct 2008 19:26:08 -0400
Subject: ftrace: ftrace dump on oops control

Impact: add (default-off) dump-trace-on-oops flag

Currently, ftrace is set up to dump its contents to the console if the
kernel panics or oops. This can be annoying if you have trace data in
the buffers and you experience an oops, but the trace data is old or
static.

Usually when you want ftrace to dump its contents is when you are debugging
your system and you have set up ftrace to trace the events leading to
an oops.

This patch adds a control variable called "ftrace_dump_on_oops" that will
enable the ftrace dump to console on oops. This variable is default off
but a developer can enable it either through the kernel command line
by adding "ftrace_dump_on_oops" or at run time by setting (or disabling)
/proc/sys/kernel/ftrace_dump_on_oops.

v2:

   Replaced /** with /* as Randy explained that kernel-doc does
    not yet handle variables.

Signed-off-by: Steven Rostedt <srostedt@redhat.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/ftrace.h |  2 ++
 kernel/sysctl.c        | 10 ++++++++++
 kernel/trace/trace.c   | 29 ++++++++++++++++++++++++++---
 3 files changed, 38 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h
index a3d46151be19..9623b7b9e5a5 100644
--- a/include/linux/ftrace.h
+++ b/include/linux/ftrace.h
@@ -165,6 +165,8 @@ static inline void __ftrace_enabled_restore(int enabled)
 #endif
 
 #ifdef CONFIG_TRACING
+extern int ftrace_dump_on_oops;
+
 extern void
 ftrace_special(unsigned long arg1, unsigned long arg2, unsigned long arg3);
 
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index a13bd4dfaeb1..84754f5801e6 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -484,6 +484,16 @@ static struct ctl_table kern_table[] = {
 		.proc_handler	= &ftrace_enable_sysctl,
 	},
 #endif
+#ifdef CONFIG_TRACING
+	{
+		.ctl_name	= CTL_UNNUMBERED,
+		.procname	= "ftrace_dump_on_opps",
+		.data		= &ftrace_dump_on_oops,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= &proc_dointvec,
+	},
+#endif
 #ifdef CONFIG_MODULES
 	{
 		.ctl_name	= KERN_MODPROBE,
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index d345d649d073..47f46cbdd860 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -63,6 +63,28 @@ static cpumask_t __read_mostly		tracing_buffer_mask;
 
 static int tracing_disabled = 1;
 
+/*
+ * ftrace_dump_on_oops - variable to dump ftrace buffer on oops
+ *
+ * If there is an oops (or kernel panic) and the ftrace_dump_on_oops
+ * is set, then ftrace_dump is called. This will output the contents
+ * of the ftrace buffers to the console.  This is very useful for
+ * capturing traces that lead to crashes and outputing it to a
+ * serial console.
+ *
+ * It is default off, but you can enable it with either specifying
+ * "ftrace_dump_on_oops" in the kernel command line, or setting
+ * /proc/sys/kernel/ftrace_dump_on_oops to true.
+ */
+int ftrace_dump_on_oops;
+
+static int __init set_ftrace_dump_on_oops(char *str)
+{
+	ftrace_dump_on_oops = 1;
+	return 1;
+}
+__setup("ftrace_dump_on_oops", set_ftrace_dump_on_oops);
+
 long
 ns2usecs(cycle_t nsec)
 {
@@ -3021,7 +3043,8 @@ EXPORT_SYMBOL_GPL(__ftrace_printk);
 static int trace_panic_handler(struct notifier_block *this,
 			       unsigned long event, void *unused)
 {
-	ftrace_dump();
+	if (ftrace_dump_on_oops)
+		ftrace_dump();
 	return NOTIFY_OK;
 }
 
@@ -3037,7 +3060,8 @@ static int trace_die_handler(struct notifier_block *self,
 {
 	switch (val) {
 	case DIE_OOPS:
-		ftrace_dump();
+		if (ftrace_dump_on_oops)
+			ftrace_dump();
 		break;
 	default:
 		break;
@@ -3078,7 +3102,6 @@ trace_printk_seq(struct trace_seq *s)
 	trace_seq_reset(s);
 }
 
-
 void ftrace_dump(void)
 {
 	static DEFINE_SPINLOCK(ftrace_dump_lock);
-- 
cgit v1.2.3


From 1080d709fb9d8cd4392f93476ee46a9d6ea05a5b Mon Sep 17 00:00:00 2001
From: Neil Horman <nhorman@tuxdriver.com>
Date: Mon, 27 Oct 2008 12:28:25 -0700
Subject: net: implement emergency route cache rebulds when gc_elasticity is
 exceeded

This is a patch to provide on demand route cache rebuilding.  Currently, our
route cache is rebulid periodically regardless of need.  This introduced
unneeded periodic latency.  This patch offers a better approach.  Using code
provided by Eric Dumazet, we compute the standard deviation of the average hash
bucket chain length while running rt_check_expire.  Should any given chain
length grow to larger that average plus 4 standard deviations, we trigger an
emergency hash table rebuild for that net namespace.  This allows for the common
case in which chains are well behaved and do not grow unevenly to not incur any
latency at all, while those systems (which may be being maliciously attacked),
only rebuild when the attack is detected.  This patch take 2 other factors into
account:
1) chains with multiple entries that differ by attributes that do not affect the
hash value are only counted once, so as not to unduly bias system to rebuilding
if features like QOS are heavily used
2) if rebuilding crosses a certain threshold (which is adjustable via the added
sysctl in this patch), route caching is disabled entirely for that net
namespace, since constant rebuilding is less efficient that no caching at all

Tested successfully by me.

Signed-off-by: Neil Horman <nhorman@tuxdriver.com>
Signed-off-by: Eric Dumazet <dada1@cosmosbay.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 Documentation/networking/ip-sysctl.txt |   6 ++
 include/net/netns/ipv4.h               |   2 +
 net/ipv4/route.c                       | 132 ++++++++++++++++++++++++++++++++-
 net/ipv4/sysctl_net_ipv4.c             |  12 +++
 4 files changed, 150 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/Documentation/networking/ip-sysctl.txt b/Documentation/networking/ip-sysctl.txt
index d84932650fd3..c7712787933c 100644
--- a/Documentation/networking/ip-sysctl.txt
+++ b/Documentation/networking/ip-sysctl.txt
@@ -27,6 +27,12 @@ min_adv_mss - INTEGER
 	The advertised MSS depends on the first hop route MTU, but will
 	never be lower than this setting.
 
+rt_cache_rebuild_count - INTEGER
+	The per net-namespace route cache emergency rebuild threshold.
+	Any net-namespace having its route cache rebuilt due to
+	a hash bucket chain being too long more than this many times
+	will have its route caching disabled
+
 IP Fragmentation:
 
 ipfrag_high_thresh - INTEGER
diff --git a/include/net/netns/ipv4.h b/include/net/netns/ipv4.h
index ece1c926b5d1..977f482d97a9 100644
--- a/include/net/netns/ipv4.h
+++ b/include/net/netns/ipv4.h
@@ -49,6 +49,8 @@ struct netns_ipv4 {
 	int sysctl_icmp_ratelimit;
 	int sysctl_icmp_ratemask;
 	int sysctl_icmp_errors_use_inbound_ifaddr;
+	int sysctl_rt_cache_rebuild_count;
+	int current_rt_cache_rebuild_count;
 
 	struct timer_list rt_secret_timer;
 	atomic_t rt_genid;
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index 2ea6dcc3e2cc..21ce7e1b2284 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -129,6 +129,7 @@ static int ip_rt_mtu_expires __read_mostly	= 10 * 60 * HZ;
 static int ip_rt_min_pmtu __read_mostly		= 512 + 20 + 20;
 static int ip_rt_min_advmss __read_mostly	= 256;
 static int ip_rt_secret_interval __read_mostly	= 10 * 60 * HZ;
+static int rt_chain_length_max __read_mostly	= 20;
 
 static void rt_worker_func(struct work_struct *work);
 static DECLARE_DELAYED_WORK(expires_work, rt_worker_func);
@@ -145,6 +146,7 @@ static struct dst_entry *ipv4_negative_advice(struct dst_entry *dst);
 static void		 ipv4_link_failure(struct sk_buff *skb);
 static void		 ip_rt_update_pmtu(struct dst_entry *dst, u32 mtu);
 static int rt_garbage_collect(struct dst_ops *ops);
+static void rt_emergency_hash_rebuild(struct net *net);
 
 
 static struct dst_ops ipv4_dst_ops = {
@@ -201,6 +203,7 @@ const __u8 ip_tos2prio[16] = {
 struct rt_hash_bucket {
 	struct rtable	*chain;
 };
+
 #if defined(CONFIG_SMP) || defined(CONFIG_DEBUG_SPINLOCK) || \
 	defined(CONFIG_PROVE_LOCKING)
 /*
@@ -674,6 +677,20 @@ static inline u32 rt_score(struct rtable *rt)
 	return score;
 }
 
+static inline bool rt_caching(const struct net *net)
+{
+	return net->ipv4.current_rt_cache_rebuild_count <=
+		net->ipv4.sysctl_rt_cache_rebuild_count;
+}
+
+static inline bool compare_hash_inputs(const struct flowi *fl1,
+					const struct flowi *fl2)
+{
+	return (__force u32)(((fl1->nl_u.ip4_u.daddr ^ fl2->nl_u.ip4_u.daddr) |
+		(fl1->nl_u.ip4_u.saddr ^ fl2->nl_u.ip4_u.saddr) |
+		(fl1->iif ^ fl2->iif)) == 0);
+}
+
 static inline int compare_keys(struct flowi *fl1, struct flowi *fl2)
 {
 	return ((__force u32)((fl1->nl_u.ip4_u.daddr ^ fl2->nl_u.ip4_u.daddr) |
@@ -753,11 +770,24 @@ static void rt_do_flush(int process_context)
 	}
 }
 
+/*
+ * While freeing expired entries, we compute average chain length
+ * and standard deviation, using fixed-point arithmetic.
+ * This to have an estimation of rt_chain_length_max
+ *  rt_chain_length_max = max(elasticity, AVG + 4*SD)
+ * We use 3 bits for frational part, and 29 (or 61) for magnitude.
+ */
+
+#define FRACT_BITS 3
+#define ONE (1UL << FRACT_BITS)
+
 static void rt_check_expire(void)
 {
 	static unsigned int rover;
 	unsigned int i = rover, goal;
 	struct rtable *rth, **rthp;
+	unsigned long length = 0, samples = 0;
+	unsigned long sum = 0, sum2 = 0;
 	u64 mult;
 
 	mult = ((u64)ip_rt_gc_interval) << rt_hash_log;
@@ -766,6 +796,7 @@ static void rt_check_expire(void)
 	goal = (unsigned int)mult;
 	if (goal > rt_hash_mask)
 		goal = rt_hash_mask + 1;
+	length = 0;
 	for (; goal > 0; goal--) {
 		unsigned long tmo = ip_rt_gc_timeout;
 
@@ -775,6 +806,8 @@ static void rt_check_expire(void)
 		if (need_resched())
 			cond_resched();
 
+		samples++;
+
 		if (*rthp == NULL)
 			continue;
 		spin_lock_bh(rt_hash_lock_addr(i));
@@ -789,11 +822,29 @@ static void rt_check_expire(void)
 				if (time_before_eq(jiffies, rth->u.dst.expires)) {
 					tmo >>= 1;
 					rthp = &rth->u.dst.rt_next;
+					/*
+					 * Only bump our length if the hash
+					 * inputs on entries n and n+1 are not
+					 * the same, we only count entries on
+					 * a chain with equal hash inputs once
+					 * so that entries for different QOS
+					 * levels, and other non-hash input
+					 * attributes don't unfairly skew
+					 * the length computation
+					 */
+					if ((*rthp == NULL) ||
+					    !compare_hash_inputs(&(*rthp)->fl,
+								 &rth->fl))
+						length += ONE;
 					continue;
 				}
 			} else if (!rt_may_expire(rth, tmo, ip_rt_gc_timeout)) {
 				tmo >>= 1;
 				rthp = &rth->u.dst.rt_next;
+				if ((*rthp == NULL) ||
+				    !compare_hash_inputs(&(*rthp)->fl,
+							 &rth->fl))
+					length += ONE;
 				continue;
 			}
 
@@ -802,6 +853,15 @@ static void rt_check_expire(void)
 			rt_free(rth);
 		}
 		spin_unlock_bh(rt_hash_lock_addr(i));
+		sum += length;
+		sum2 += length*length;
+	}
+	if (samples) {
+		unsigned long avg = sum / samples;
+		unsigned long sd = int_sqrt(sum2 / samples - avg*avg);
+		rt_chain_length_max = max_t(unsigned long,
+					ip_rt_gc_elasticity,
+					(avg + 4*sd) >> FRACT_BITS);
 	}
 	rover = i;
 }
@@ -851,6 +911,26 @@ static void rt_secret_rebuild(unsigned long __net)
 	mod_timer(&net->ipv4.rt_secret_timer, jiffies + ip_rt_secret_interval);
 }
 
+static void rt_secret_rebuild_oneshot(struct net *net)
+{
+	del_timer_sync(&net->ipv4.rt_secret_timer);
+	rt_cache_invalidate(net);
+	if (ip_rt_secret_interval) {
+		net->ipv4.rt_secret_timer.expires += ip_rt_secret_interval;
+		add_timer(&net->ipv4.rt_secret_timer);
+	}
+}
+
+static void rt_emergency_hash_rebuild(struct net *net)
+{
+	if (net_ratelimit()) {
+		printk(KERN_WARNING "Route hash chain too long!\n");
+		printk(KERN_WARNING "Adjust your secret_interval!\n");
+	}
+
+	rt_secret_rebuild_oneshot(net);
+}
+
 /*
    Short description of GC goals.
 
@@ -989,6 +1069,7 @@ out:	return 0;
 static int rt_intern_hash(unsigned hash, struct rtable *rt, struct rtable **rp)
 {
 	struct rtable	*rth, **rthp;
+	struct rtable	*rthi;
 	unsigned long	now;
 	struct rtable *cand, **candp;
 	u32 		min_score;
@@ -1002,7 +1083,13 @@ restart:
 	candp = NULL;
 	now = jiffies;
 
+	if (!rt_caching(dev_net(rt->u.dst.dev))) {
+		rt_drop(rt);
+		return 0;
+	}
+
 	rthp = &rt_hash_table[hash].chain;
+	rthi = NULL;
 
 	spin_lock_bh(rt_hash_lock_addr(hash));
 	while ((rth = *rthp) != NULL) {
@@ -1048,6 +1135,17 @@ restart:
 		chain_length++;
 
 		rthp = &rth->u.dst.rt_next;
+
+		/*
+		 * check to see if the next entry in the chain
+		 * contains the same hash input values as rt.  If it does
+		 * This is where we will insert into the list, instead of
+		 * at the head.  This groups entries that differ by aspects not
+		 * relvant to the hash function together, which we use to adjust
+		 * our chain length
+		 */
+		if (*rthp && compare_hash_inputs(&(*rthp)->fl, &rt->fl))
+			rthi = rth;
 	}
 
 	if (cand) {
@@ -1061,6 +1159,16 @@ restart:
 			*candp = cand->u.dst.rt_next;
 			rt_free(cand);
 		}
+	} else {
+		if (chain_length > rt_chain_length_max) {
+			struct net *net = dev_net(rt->u.dst.dev);
+			int num = ++net->ipv4.current_rt_cache_rebuild_count;
+			if (!rt_caching(dev_net(rt->u.dst.dev))) {
+				printk(KERN_WARNING "%s: %d rebuilds is over limit, route caching disabled\n",
+					rt->u.dst.dev->name, num);
+			}
+			rt_emergency_hash_rebuild(dev_net(rt->u.dst.dev));
+		}
 	}
 
 	/* Try to bind route to arp only if it is output
@@ -1098,7 +1206,11 @@ restart:
 		}
 	}
 
-	rt->u.dst.rt_next = rt_hash_table[hash].chain;
+	if (rthi)
+		rt->u.dst.rt_next = rthi->u.dst.rt_next;
+	else
+		rt->u.dst.rt_next = rt_hash_table[hash].chain;
+
 #if RT_CACHE_DEBUG >= 2
 	if (rt->u.dst.rt_next) {
 		struct rtable *trt;
@@ -1114,7 +1226,11 @@ restart:
 	 * previous writes to rt are comitted to memory
 	 * before making rt visible to other CPUS.
 	 */
-	rcu_assign_pointer(rt_hash_table[hash].chain, rt);
+	if (rthi)
+		rcu_assign_pointer(rthi->u.dst.rt_next, rt);
+	else
+		rcu_assign_pointer(rt_hash_table[hash].chain, rt);
+
 	spin_unlock_bh(rt_hash_lock_addr(hash));
 	*rp = rt;
 	return 0;
@@ -1217,6 +1333,9 @@ void ip_rt_redirect(__be32 old_gw, __be32 daddr, __be32 new_gw,
 	    || ipv4_is_zeronet(new_gw))
 		goto reject_redirect;
 
+	if (!rt_caching(net))
+		goto reject_redirect;
+
 	if (!IN_DEV_SHARED_MEDIA(in_dev)) {
 		if (!inet_addr_onlink(in_dev, new_gw, old_gw))
 			goto reject_redirect;
@@ -2130,6 +2249,10 @@ int ip_route_input(struct sk_buff *skb, __be32 daddr, __be32 saddr,
 	struct net *net;
 
 	net = dev_net(dev);
+
+	if (!rt_caching(net))
+		goto skip_cache;
+
 	tos &= IPTOS_RT_MASK;
 	hash = rt_hash(daddr, saddr, iif, rt_genid(net));
 
@@ -2154,6 +2277,7 @@ int ip_route_input(struct sk_buff *skb, __be32 daddr, __be32 saddr,
 	}
 	rcu_read_unlock();
 
+skip_cache:
 	/* Multicast recognition logic is moved from route cache to here.
 	   The problem was that too many Ethernet cards have broken/missing
 	   hardware multicast filters :-( As result the host on multicasting
@@ -2539,6 +2663,9 @@ int __ip_route_output_key(struct net *net, struct rtable **rp,
 	unsigned hash;
 	struct rtable *rth;
 
+	if (!rt_caching(net))
+		goto slow_output;
+
 	hash = rt_hash(flp->fl4_dst, flp->fl4_src, flp->oif, rt_genid(net));
 
 	rcu_read_lock_bh();
@@ -2563,6 +2690,7 @@ int __ip_route_output_key(struct net *net, struct rtable **rp,
 	}
 	rcu_read_unlock_bh();
 
+slow_output:
 	return ip_route_output_slow(net, rp, flp);
 }
 
diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c
index 1bb10df8ce7d..0cc8d31f9ac0 100644
--- a/net/ipv4/sysctl_net_ipv4.c
+++ b/net/ipv4/sysctl_net_ipv4.c
@@ -795,6 +795,14 @@ static struct ctl_table ipv4_net_table[] = {
 		.mode		= 0644,
 		.proc_handler	= &proc_dointvec
 	},
+	{
+		.ctl_name	= CTL_UNNUMBERED,
+		.procname	= "rt_cache_rebuild_count",
+		.data		= &init_net.ipv4.sysctl_rt_cache_rebuild_count,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= &proc_dointvec
+	},
 	{ }
 };
 
@@ -827,8 +835,12 @@ static __net_init int ipv4_sysctl_init_net(struct net *net)
 			&net->ipv4.sysctl_icmp_ratelimit;
 		table[5].data =
 			&net->ipv4.sysctl_icmp_ratemask;
+		table[6].data =
+			&net->ipv4.sysctl_rt_cache_rebuild_count;
 	}
 
+	net->ipv4.sysctl_rt_cache_rebuild_count = 4;
+
 	net->ipv4.ipv4_hdr = register_net_sysctl_table(net,
 			net_ipv4_ctl_path, table);
 	if (net->ipv4.ipv4_hdr == NULL)
-- 
cgit v1.2.3


From b057efd4d226fcc3a92b0dc6d8ea8e8185ecb260 Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Tue, 28 Oct 2008 11:59:11 -0700
Subject: netlink: constify struct nlattr * arg to parsing functions

Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/netlink.h | 22 +++++++++++-----------
 net/netlink/attr.c    |  2 +-
 2 files changed, 12 insertions(+), 12 deletions(-)

(limited to 'include')

diff --git a/include/net/netlink.h b/include/net/netlink.h
index 3643bbb8e585..46b7764f1774 100644
--- a/include/net/netlink.h
+++ b/include/net/netlink.h
@@ -233,7 +233,7 @@ extern int		nla_parse(struct nlattr *tb[], int maxtype,
 extern struct nlattr *	nla_find(struct nlattr *head, int len, int attrtype);
 extern size_t		nla_strlcpy(char *dst, const struct nlattr *nla,
 				    size_t dstsize);
-extern int		nla_memcpy(void *dest, struct nlattr *src, int count);
+extern int		nla_memcpy(void *dest, const struct nlattr *src, int count);
 extern int		nla_memcmp(const struct nlattr *nla, const void *data,
 				   size_t size);
 extern int		nla_strcmp(const struct nlattr *nla, const char *str);
@@ -741,7 +741,7 @@ static inline struct nlattr *nla_find_nested(struct nlattr *nla, int attrtype)
  * See nla_parse()
  */
 static inline int nla_parse_nested(struct nlattr *tb[], int maxtype,
-				   struct nlattr *nla,
+				   const struct nlattr *nla,
 				   const struct nla_policy *policy)
 {
 	return nla_parse(tb, maxtype, nla_data(nla), nla_len(nla), policy);
@@ -875,7 +875,7 @@ static inline int nla_put_msecs(struct sk_buff *skb, int attrtype,
  * nla_get_u32 - return payload of u32 attribute
  * @nla: u32 netlink attribute
  */
-static inline u32 nla_get_u32(struct nlattr *nla)
+static inline u32 nla_get_u32(const struct nlattr *nla)
 {
 	return *(u32 *) nla_data(nla);
 }
@@ -884,7 +884,7 @@ static inline u32 nla_get_u32(struct nlattr *nla)
  * nla_get_be32 - return payload of __be32 attribute
  * @nla: __be32 netlink attribute
  */
-static inline __be32 nla_get_be32(struct nlattr *nla)
+static inline __be32 nla_get_be32(const struct nlattr *nla)
 {
 	return *(__be32 *) nla_data(nla);
 }
@@ -893,7 +893,7 @@ static inline __be32 nla_get_be32(struct nlattr *nla)
  * nla_get_u16 - return payload of u16 attribute
  * @nla: u16 netlink attribute
  */
-static inline u16 nla_get_u16(struct nlattr *nla)
+static inline u16 nla_get_u16(const struct nlattr *nla)
 {
 	return *(u16 *) nla_data(nla);
 }
@@ -902,7 +902,7 @@ static inline u16 nla_get_u16(struct nlattr *nla)
  * nla_get_be16 - return payload of __be16 attribute
  * @nla: __be16 netlink attribute
  */
-static inline __be16 nla_get_be16(struct nlattr *nla)
+static inline __be16 nla_get_be16(const struct nlattr *nla)
 {
 	return *(__be16 *) nla_data(nla);
 }
@@ -911,7 +911,7 @@ static inline __be16 nla_get_be16(struct nlattr *nla)
  * nla_get_le16 - return payload of __le16 attribute
  * @nla: __le16 netlink attribute
  */
-static inline __le16 nla_get_le16(struct nlattr *nla)
+static inline __le16 nla_get_le16(const struct nlattr *nla)
 {
 	return *(__le16 *) nla_data(nla);
 }
@@ -920,7 +920,7 @@ static inline __le16 nla_get_le16(struct nlattr *nla)
  * nla_get_u8 - return payload of u8 attribute
  * @nla: u8 netlink attribute
  */
-static inline u8 nla_get_u8(struct nlattr *nla)
+static inline u8 nla_get_u8(const struct nlattr *nla)
 {
 	return *(u8 *) nla_data(nla);
 }
@@ -929,7 +929,7 @@ static inline u8 nla_get_u8(struct nlattr *nla)
  * nla_get_u64 - return payload of u64 attribute
  * @nla: u64 netlink attribute
  */
-static inline u64 nla_get_u64(struct nlattr *nla)
+static inline u64 nla_get_u64(const struct nlattr *nla)
 {
 	u64 tmp;
 
@@ -942,7 +942,7 @@ static inline u64 nla_get_u64(struct nlattr *nla)
  * nla_get_flag - return payload of flag attribute
  * @nla: flag netlink attribute
  */
-static inline int nla_get_flag(struct nlattr *nla)
+static inline int nla_get_flag(const struct nlattr *nla)
 {
 	return !!nla;
 }
@@ -953,7 +953,7 @@ static inline int nla_get_flag(struct nlattr *nla)
  *
  * Returns the number of milliseconds in jiffies.
  */
-static inline unsigned long nla_get_msecs(struct nlattr *nla)
+static inline unsigned long nla_get_msecs(const struct nlattr *nla)
 {
 	u64 msecs = nla_get_u64(nla);
 
diff --git a/net/netlink/attr.c b/net/netlink/attr.c
index 2d106cfe1d27..c83fea7da9a8 100644
--- a/net/netlink/attr.c
+++ b/net/netlink/attr.c
@@ -233,7 +233,7 @@ size_t nla_strlcpy(char *dst, const struct nlattr *nla, size_t dstsize)
  *
  * Returns the number of bytes copied.
  */
-int nla_memcpy(void *dest, struct nlattr *src, int count)
+int nla_memcpy(void *dest, const struct nlattr *src, int count)
 {
 	int minlen = min_t(int, count, nla_len(src));
 
-- 
cgit v1.2.3


From def8b4faff5ca349beafbbfeb2c51f3602a6ef3a Mon Sep 17 00:00:00 2001
From: Alexey Dobriyan <adobriyan@gmail.com>
Date: Tue, 28 Oct 2008 13:24:06 -0700
Subject: net: reduce structures when XFRM=n

ifdef out
* struct sk_buff::sp		(pointer)
* struct dst_entry::xfrm	(pointer)
* struct sock::sk_policy	(2 pointers)

Signed-off-by: Alexey Dobriyan <adobriyan@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/skbuff.h   | 15 ++++++++++++++-
 include/net/dst.h        |  3 ++-
 include/net/sock.h       |  2 ++
 include/net/xfrm.h       |  4 ++++
 net/core/skbuff.c        |  2 +-
 net/ipv4/icmp.c          |  3 ++-
 net/ipv4/ip_forward.c    |  2 +-
 net/ipv4/route.c         |  2 ++
 net/ipv6/icmp.c          |  3 ++-
 net/ipv6/ip6_output.c    |  2 +-
 security/selinux/hooks.c |  4 ++--
 11 files changed, 33 insertions(+), 9 deletions(-)

(limited to 'include')

diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index 2725f4e5a9bf..487e34507b41 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -269,8 +269,9 @@ struct sk_buff {
 		struct  dst_entry	*dst;
 		struct  rtable		*rtable;
 	};
+#ifdef CONFIG_XFRM
 	struct	sec_path	*sp;
-
+#endif
 	/*
 	 * This is the control buffer. It is free to use for every
 	 * layer. Please put your private variables there. If you
@@ -1864,6 +1865,18 @@ static inline void skb_copy_queue_mapping(struct sk_buff *to, const struct sk_bu
 	to->queue_mapping = from->queue_mapping;
 }
 
+#ifdef CONFIG_XFRM
+static inline struct sec_path *skb_sec_path(struct sk_buff *skb)
+{
+	return skb->sp;
+}
+#else
+static inline struct sec_path *skb_sec_path(struct sk_buff *skb)
+{
+	return NULL;
+}
+#endif
+
 static inline int skb_is_gso(const struct sk_buff *skb)
 {
 	return skb_shinfo(skb)->gso_size;
diff --git a/include/net/dst.h b/include/net/dst.h
index 8a8b71e5f3f1..f96c4ba4dd32 100644
--- a/include/net/dst.h
+++ b/include/net/dst.h
@@ -59,8 +59,9 @@ struct dst_entry
 
 	struct neighbour	*neighbour;
 	struct hh_cache		*hh;
+#ifdef CONFIG_XFRM
 	struct xfrm_state	*xfrm;
-
+#endif
 	int			(*input)(struct sk_buff*);
 	int			(*output)(struct sk_buff*);
 
diff --git a/include/net/sock.h b/include/net/sock.h
index ada50c04d09f..d6b750a25078 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -229,7 +229,9 @@ struct sock {
 	} sk_backlog;
 	wait_queue_head_t	*sk_sleep;
 	struct dst_entry	*sk_dst_cache;
+#ifdef CONFIG_XFRM
 	struct xfrm_policy	*sk_policy[2];
+#endif
 	rwlock_t		sk_dst_lock;
 	atomic_t		sk_rmem_alloc;
 	atomic_t		sk_wmem_alloc;
diff --git a/include/net/xfrm.h b/include/net/xfrm.h
index 11c890ad8ebb..f2c5ba28a428 100644
--- a/include/net/xfrm.h
+++ b/include/net/xfrm.h
@@ -882,6 +882,7 @@ struct xfrm_dst
 	u32 path_cookie;
 };
 
+#ifdef CONFIG_XFRM
 static inline void xfrm_dst_destroy(struct xfrm_dst *xdst)
 {
 	dst_release(xdst->route);
@@ -894,6 +895,7 @@ static inline void xfrm_dst_destroy(struct xfrm_dst *xdst)
 	xdst->partner = NULL;
 #endif
 }
+#endif
 
 extern void xfrm_dst_ifdown(struct dst_entry *dst, struct net_device *dev);
 
@@ -1536,9 +1538,11 @@ static inline void xfrm_states_delete(struct xfrm_state **states, int n)
 }
 #endif
 
+#ifdef CONFIG_XFRM
 static inline struct xfrm_state *xfrm_input_state(struct sk_buff *skb)
 {
 	return skb->sp->xvec[skb->sp->len - 1];
 }
+#endif
 
 #endif	/* _NET_XFRM_H */
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index 4e22e3a35359..cdfe473181af 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -489,7 +489,7 @@ static void __copy_skb_header(struct sk_buff *new, const struct sk_buff *old)
 	new->network_header	= old->network_header;
 	new->mac_header		= old->mac_header;
 	new->dst		= dst_clone(old->dst);
-#ifdef CONFIG_INET
+#ifdef CONFIG_XFRM
 	new->sp			= secpath_get(old->sp);
 #endif
 	memcpy(new->cb, old->cb, sizeof(old->cb));
diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c
index 72b2de76f1cd..e9d6ea0b49ca 100644
--- a/net/ipv4/icmp.c
+++ b/net/ipv4/icmp.c
@@ -976,9 +976,10 @@ int icmp_rcv(struct sk_buff *skb)
 	struct net *net = dev_net(rt->u.dst.dev);
 
 	if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb)) {
+		struct sec_path *sp = skb_sec_path(skb);
 		int nh;
 
-		if (!(skb->sp && skb->sp->xvec[skb->sp->len - 1]->props.flags &
+		if (!(sp && sp->xvec[sp->len - 1]->props.flags &
 				 XFRM_STATE_ICMP))
 			goto drop;
 
diff --git a/net/ipv4/ip_forward.c b/net/ipv4/ip_forward.c
index 450016b89a18..df3fe50bbf0d 100644
--- a/net/ipv4/ip_forward.c
+++ b/net/ipv4/ip_forward.c
@@ -106,7 +106,7 @@ int ip_forward(struct sk_buff *skb)
 	 *	We now generate an ICMP HOST REDIRECT giving the route
 	 *	we calculated.
 	 */
-	if (rt->rt_flags&RTCF_DOREDIRECT && !opt->srr && !skb->sp)
+	if (rt->rt_flags&RTCF_DOREDIRECT && !opt->srr && !skb_sec_path(skb))
 		ip_rt_send_redirect(skb);
 
 	skb->priority = rt_tos2priority(iph->tos);
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index 21ce7e1b2284..ffb2c5705432 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -1399,7 +1399,9 @@ void ip_rt_redirect(__be32 old_gw, __be32 daddr, __be32 new_gw,
 				rt->u.dst.path		= &rt->u.dst;
 				rt->u.dst.neighbour	= NULL;
 				rt->u.dst.hh		= NULL;
+#ifdef CONFIG_XFRM
 				rt->u.dst.xfrm		= NULL;
+#endif
 				rt->rt_genid		= rt_genid(net);
 				rt->rt_flags		|= RTCF_REDIRECTED;
 
diff --git a/net/ipv6/icmp.c b/net/ipv6/icmp.c
index 9b7d19ae5ced..508a713ac045 100644
--- a/net/ipv6/icmp.c
+++ b/net/ipv6/icmp.c
@@ -646,9 +646,10 @@ static int icmpv6_rcv(struct sk_buff *skb)
 	int type;
 
 	if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb)) {
+		struct sec_path *sp = skb_sec_path(skb);
 		int nh;
 
-		if (!(skb->sp && skb->sp->xvec[skb->sp->len - 1]->props.flags &
+		if (!(sp && sp->xvec[sp->len - 1]->props.flags &
 				 XFRM_STATE_ICMP))
 			goto drop_no_count;
 
diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c
index c77db0b95e26..7d92fd97cfb9 100644
--- a/net/ipv6/ip6_output.c
+++ b/net/ipv6/ip6_output.c
@@ -490,7 +490,7 @@ int ip6_forward(struct sk_buff *skb)
 	   We don't send redirects to frames decapsulated from IPsec.
 	 */
 	if (skb->dev == dst->dev && dst->neighbour && opt->srcrt == 0 &&
-	    !skb->sp) {
+	    !skb_sec_path(skb)) {
 		struct in6_addr *target = NULL;
 		struct rt6_info *rt;
 		struct neighbour *n = dst->neighbour;
diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c
index 3e3fde7c1d2b..aedf02b1345a 100644
--- a/security/selinux/hooks.c
+++ b/security/selinux/hooks.c
@@ -4626,7 +4626,7 @@ static unsigned int selinux_ip_postroute(struct sk_buff *skb, int ifindex,
 	 * as fast and as clean as possible. */
 	if (selinux_compat_net || !selinux_policycap_netpeer)
 		return selinux_ip_postroute_compat(skb, ifindex, family);
-
+#ifdef CONFIG_XFRM
 	/* If skb->dst->xfrm is non-NULL then the packet is undergoing an IPsec
 	 * packet transformation so allow the packet to pass without any checks
 	 * since we'll have another chance to perform access control checks
@@ -4635,7 +4635,7 @@ static unsigned int selinux_ip_postroute(struct sk_buff *skb, int ifindex,
 	 *       is NULL, in this case go ahead and apply access control. */
 	if (skb->dst != NULL && skb->dst->xfrm != NULL)
 		return NF_ACCEPT;
-
+#endif
 	secmark_active = selinux_secmark_enabled();
 	peerlbl_active = netlbl_enabled() || selinux_xfrm_enabled();
 	if (!secmark_active && !peerlbl_active)
-- 
cgit v1.2.3


From 3a2dfbe8acb154905fdc2fd03ec56df42e6c4cc4 Mon Sep 17 00:00:00 2001
From: Martin Willi <martin@strongswan.org>
Date: Tue, 28 Oct 2008 16:01:07 -0700
Subject: xfrm: Notify changes in UDP encapsulation via netlink

Add new_mapping() implementation to the netlink xfrm_mgr to notify
address/port changes detected in UDP encapsulated ESP packets.

Signed-off-by: Martin Willi <martin@strongswan.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/xfrm.h | 14 ++++++++++++++
 net/xfrm/xfrm_user.c | 52 ++++++++++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 66 insertions(+)

(limited to 'include')

diff --git a/include/linux/xfrm.h b/include/linux/xfrm.h
index 4bc1e6b86cb2..52f3abd453a1 100644
--- a/include/linux/xfrm.h
+++ b/include/linux/xfrm.h
@@ -199,6 +199,9 @@ enum {
 #define XFRM_MSG_NEWSPDINFO XFRM_MSG_NEWSPDINFO
 	XFRM_MSG_GETSPDINFO,
 #define XFRM_MSG_GETSPDINFO XFRM_MSG_GETSPDINFO
+
+	XFRM_MSG_MAPPING,
+#define XFRM_MSG_MAPPING XFRM_MSG_MAPPING
 	__XFRM_MSG_MAX
 };
 #define XFRM_MSG_MAX (__XFRM_MSG_MAX - 1)
@@ -438,6 +441,15 @@ struct xfrm_user_migrate {
 	__u16				new_family;
 };
 
+struct xfrm_user_mapping {
+	struct xfrm_usersa_id		id;
+	__u32				reqid;
+	xfrm_address_t			old_saddr;
+	xfrm_address_t			new_saddr;
+	__be16				old_sport;
+	__be16				new_sport;
+};
+
 #ifndef __KERNEL__
 /* backwards compatibility for userspace */
 #define XFRMGRP_ACQUIRE		1
@@ -464,6 +476,8 @@ enum xfrm_nlgroups {
 #define XFRMNLGRP_REPORT	XFRMNLGRP_REPORT
 	XFRMNLGRP_MIGRATE,
 #define XFRMNLGRP_MIGRATE	XFRMNLGRP_MIGRATE
+	XFRMNLGRP_MAPPING,
+#define XFRMNLGRP_MAPPING	XFRMNLGRP_MAPPING
 	__XFRMNLGRP_MAX
 };
 #define XFRMNLGRP_MAX	(__XFRMNLGRP_MAX - 1)
diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c
index 4a8a1abb59ee..76cf56d5d834 100644
--- a/net/xfrm/xfrm_user.c
+++ b/net/xfrm/xfrm_user.c
@@ -2503,6 +2503,57 @@ static int xfrm_send_report(u8 proto, struct xfrm_selector *sel,
 	return nlmsg_multicast(xfrm_nl, skb, 0, XFRMNLGRP_REPORT, GFP_ATOMIC);
 }
 
+static inline size_t xfrm_mapping_msgsize(void)
+{
+	return NLMSG_ALIGN(sizeof(struct xfrm_user_mapping));
+}
+
+static int build_mapping(struct sk_buff *skb, struct xfrm_state *x,
+			 xfrm_address_t *new_saddr, __be16 new_sport)
+{
+	struct xfrm_user_mapping *um;
+	struct nlmsghdr *nlh;
+
+	nlh = nlmsg_put(skb, 0, 0, XFRM_MSG_MAPPING, sizeof(*um), 0);
+	if (nlh == NULL)
+		return -EMSGSIZE;
+
+	um = nlmsg_data(nlh);
+
+	memcpy(&um->id.daddr, &x->id.daddr, sizeof(um->id.daddr));
+	um->id.spi = x->id.spi;
+	um->id.family = x->props.family;
+	um->id.proto = x->id.proto;
+	memcpy(&um->new_saddr, new_saddr, sizeof(um->new_saddr));
+	memcpy(&um->old_saddr, &x->props.saddr, sizeof(um->old_saddr));
+	um->new_sport = new_sport;
+	um->old_sport = x->encap->encap_sport;
+	um->reqid = x->props.reqid;
+
+	return nlmsg_end(skb, nlh);
+}
+
+static int xfrm_send_mapping(struct xfrm_state *x, xfrm_address_t *ipaddr,
+			     __be16 sport)
+{
+	struct sk_buff *skb;
+
+	if (x->id.proto != IPPROTO_ESP)
+		return -EINVAL;
+
+	if (!x->encap)
+		return -EINVAL;
+
+	skb = nlmsg_new(xfrm_mapping_msgsize(), GFP_ATOMIC);
+	if (skb == NULL)
+		return -ENOMEM;
+
+	if (build_mapping(skb, x, ipaddr, sport) < 0)
+		BUG();
+
+	return nlmsg_multicast(xfrm_nl, skb, 0, XFRMNLGRP_MAPPING, GFP_ATOMIC);
+}
+
 static struct xfrm_mgr netlink_mgr = {
 	.id		= "netlink",
 	.notify		= xfrm_send_state_notify,
@@ -2511,6 +2562,7 @@ static struct xfrm_mgr netlink_mgr = {
 	.notify_policy	= xfrm_send_policy_notify,
 	.report		= xfrm_send_report,
 	.migrate	= xfrm_send_migrate,
+	.new_mapping	= xfrm_send_mapping,
 };
 
 static int __init xfrm_user_init(void)
-- 
cgit v1.2.3


From 0c6ce78abf6e228d44c3840edb8a4ae0c1299825 Mon Sep 17 00:00:00 2001
From: Harvey Harrison <harvey.harrison@gmail.com>
Date: Tue, 28 Oct 2008 16:09:23 -0700
Subject: net: replace uses of NIP6_FMT with %p6

Signed-off-by: Harvey Harrison <harvey.harrison@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/sunrpc/svc_xprt.h                |  4 +--
 include/net/ip_vs.h                            |  4 +--
 include/net/netfilter/nf_conntrack_tuple.h     |  6 ++---
 include/net/sctp/sctp.h                        |  4 +--
 net/ipv4/tcp_input.c                           |  4 +--
 net/ipv4/tcp_timer.c                           |  4 +--
 net/ipv6/addrlabel.c                           | 34 +++++++++-----------------
 net/ipv6/ah6.c                                 |  4 +--
 net/ipv6/esp6.c                                |  4 +--
 net/ipv6/exthdrs.c                             |  2 +-
 net/ipv6/icmp.c                                |  4 +--
 net/ipv6/ip6mr.c                               |  5 ++--
 net/ipv6/ipcomp6.c                             |  4 +--
 net/ipv6/ndisc.c                               |  7 ++----
 net/ipv6/netfilter/ip6t_LOG.c                  |  2 +-
 net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c |  5 ++--
 net/ipv6/tcp_ipv6.c                            |  8 +++---
 17 files changed, 43 insertions(+), 62 deletions(-)

(limited to 'include')

diff --git a/include/linux/sunrpc/svc_xprt.h b/include/linux/sunrpc/svc_xprt.h
index 6fd7b016517f..42e01c93c7ea 100644
--- a/include/linux/sunrpc/svc_xprt.h
+++ b/include/linux/sunrpc/svc_xprt.h
@@ -145,8 +145,8 @@ static inline char *__svc_print_addr(struct sockaddr *addr,
 		break;
 
 	case AF_INET6:
-		snprintf(buf, len, "%x:%x:%x:%x:%x:%x:%x:%x, port=%u",
-			NIP6(((struct sockaddr_in6 *) addr)->sin6_addr),
+		snprintf(buf, len, "%p6, port=%u",
+			 &((struct sockaddr_in6 *)addr)->sin6_addr,
 			ntohs(((struct sockaddr_in6 *) addr)->sin6_port));
 		break;
 
diff --git a/include/net/ip_vs.h b/include/net/ip_vs.h
index fe9fcf73c85e..6a6692067092 100644
--- a/include/net/ip_vs.h
+++ b/include/net/ip_vs.h
@@ -87,8 +87,8 @@ static inline const char *ip_vs_dbg_addr(int af, char *buf, size_t buf_len,
 	int len;
 #ifdef CONFIG_IP_VS_IPV6
 	if (af == AF_INET6)
-		len = snprintf(&buf[*idx], buf_len - *idx, "[" NIP6_FMT "]",
-			       NIP6(addr->in6)) + 1;
+		len = snprintf(&buf[*idx], buf_len - *idx, "[%p6]",
+			       &addr->in6) + 1;
 	else
 #endif
 		len = snprintf(&buf[*idx], buf_len - *idx, NIPQUAD_FMT,
diff --git a/include/net/netfilter/nf_conntrack_tuple.h b/include/net/netfilter/nf_conntrack_tuple.h
index a6874ba22d54..303efaf68d08 100644
--- a/include/net/netfilter/nf_conntrack_tuple.h
+++ b/include/net/netfilter/nf_conntrack_tuple.h
@@ -122,10 +122,10 @@ static inline void nf_ct_dump_tuple_ip(const struct nf_conntrack_tuple *t)
 static inline void nf_ct_dump_tuple_ipv6(const struct nf_conntrack_tuple *t)
 {
 #ifdef DEBUG
-	printk("tuple %p: %u " NIP6_FMT " %hu -> " NIP6_FMT " %hu\n",
+	printk("tuple %p: %u %p6 %hu -> %p6 %hu\n",
 	       t, t->dst.protonum,
-	       NIP6(*(struct in6_addr *)t->src.u3.all), ntohs(t->src.u.all),
-	       NIP6(*(struct in6_addr *)t->dst.u3.all), ntohs(t->dst.u.all));
+	       t->src.u3.all, ntohs(t->src.u.all),
+	       t->dst.u3.all, ntohs(t->dst.u.all));
 #endif
 }
 
diff --git a/include/net/sctp/sctp.h b/include/net/sctp/sctp.h
index ed71b110edf7..a84c3976e1b0 100644
--- a/include/net/sctp/sctp.h
+++ b/include/net/sctp/sctp.h
@@ -285,9 +285,9 @@ extern int sctp_debug_flag;
 	if (sctp_debug_flag) { \
 		if (saddr->sa.sa_family == AF_INET6) { \
 			printk(KERN_DEBUG \
-			       lead NIP6_FMT trail, \
+			       lead "%p6" trail, \
 			       leadparm, \
-			       NIP6(saddr->v6.sin6_addr), \
+			       &saddr->v6.sin6_addr, \
 			       otherparms); \
 		} else { \
 			printk(KERN_DEBUG \
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index d77c0d29e239..191c06bb0f67 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -2346,9 +2346,9 @@ static void DBGUNDO(struct sock *sk, const char *msg)
 #if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
 	else if (sk->sk_family == AF_INET6) {
 		struct ipv6_pinfo *np = inet6_sk(sk);
-		printk(KERN_DEBUG "Undo %s " NIP6_FMT "/%u c%u l%u ss%u/%u p%u\n",
+		printk(KERN_DEBUG "Undo %s %p6/%u c%u l%u ss%u/%u p%u\n",
 		       msg,
-		       NIP6(np->daddr), ntohs(inet->dport),
+		       &np->daddr, ntohs(inet->dport),
 		       tp->snd_cwnd, tcp_left_out(tp),
 		       tp->snd_ssthresh, tp->prior_ssthresh,
 		       tp->packets_out);
diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c
index 6b6dff1164b9..4e6ee5205237 100644
--- a/net/ipv4/tcp_timer.c
+++ b/net/ipv4/tcp_timer.c
@@ -306,8 +306,8 @@ static void tcp_retransmit_timer(struct sock *sk)
 #if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
 		else if (sk->sk_family == AF_INET6) {
 			struct ipv6_pinfo *np = inet6_sk(sk);
-			LIMIT_NETDEBUG(KERN_DEBUG "TCP: Treason uncloaked! Peer " NIP6_FMT ":%u/%u shrinks window %u:%u. Repaired.\n",
-			       NIP6(np->daddr), ntohs(inet->dport),
+			LIMIT_NETDEBUG(KERN_DEBUG "TCP: Treason uncloaked! Peer %p6:%u/%u shrinks window %u:%u. Repaired.\n",
+			       &np->daddr, ntohs(inet->dport),
 			       inet->num, tp->snd_una, tp->snd_nxt);
 		}
 #endif
diff --git a/net/ipv6/addrlabel.c b/net/ipv6/addrlabel.c
index 08909039d87b..d28036659d27 100644
--- a/net/ipv6/addrlabel.c
+++ b/net/ipv6/addrlabel.c
@@ -186,10 +186,8 @@ u32 ipv6_addr_label(struct net *net,
 	label = p ? p->label : IPV6_ADDR_LABEL_DEFAULT;
 	rcu_read_unlock();
 
-	ADDRLABEL(KERN_DEBUG "%s(addr=" NIP6_FMT ", type=%d, ifindex=%d) => %08x\n",
-			__func__,
-			NIP6(*addr), type, ifindex,
-			label);
+	ADDRLABEL(KERN_DEBUG "%s(addr=%p6, type=%d, ifindex=%d) => %08x\n",
+		  __func__, addr, type, ifindex, label);
 
 	return label;
 }
@@ -203,11 +201,8 @@ static struct ip6addrlbl_entry *ip6addrlbl_alloc(struct net *net,
 	struct ip6addrlbl_entry *newp;
 	int addrtype;
 
-	ADDRLABEL(KERN_DEBUG "%s(prefix=" NIP6_FMT ", prefixlen=%d, ifindex=%d, label=%u)\n",
-			__func__,
-			NIP6(*prefix), prefixlen,
-			ifindex,
-			(unsigned int)label);
+	ADDRLABEL(KERN_DEBUG "%s(prefix=%p6, prefixlen=%d, ifindex=%d, label=%u)\n",
+		  __func__, prefix, prefixlen, ifindex, (unsigned int)label);
 
 	addrtype = ipv6_addr_type(prefix) & (IPV6_ADDR_MAPPED | IPV6_ADDR_COMPATv4 | IPV6_ADDR_LOOPBACK);
 
@@ -294,12 +289,9 @@ static int ip6addrlbl_add(struct net *net,
 	struct ip6addrlbl_entry *newp;
 	int ret = 0;
 
-	ADDRLABEL(KERN_DEBUG "%s(prefix=" NIP6_FMT ", prefixlen=%d, ifindex=%d, label=%u, replace=%d)\n",
-			__func__,
-			NIP6(*prefix), prefixlen,
-			ifindex,
-			(unsigned int)label,
-			replace);
+	ADDRLABEL(KERN_DEBUG "%s(prefix=%p6, prefixlen=%d, ifindex=%d, label=%u, replace=%d)\n",
+		  __func__, prefix, prefixlen, ifindex, (unsigned int)label,
+		  replace);
 
 	newp = ip6addrlbl_alloc(net, prefix, prefixlen, ifindex, label);
 	if (IS_ERR(newp))
@@ -321,10 +313,8 @@ static int __ip6addrlbl_del(struct net *net,
 	struct hlist_node *pos, *n;
 	int ret = -ESRCH;
 
-	ADDRLABEL(KERN_DEBUG "%s(prefix=" NIP6_FMT ", prefixlen=%d, ifindex=%d)\n",
-			__func__,
-			NIP6(*prefix), prefixlen,
-			ifindex);
+	ADDRLABEL(KERN_DEBUG "%s(prefix=%p6, prefixlen=%d, ifindex=%d)\n",
+		  __func__, prefix, prefixlen, ifindex);
 
 	hlist_for_each_entry_safe(p, pos, n, &ip6addrlbl_table.head, list) {
 		if (p->prefixlen == prefixlen &&
@@ -347,10 +337,8 @@ static int ip6addrlbl_del(struct net *net,
 	struct in6_addr prefix_buf;
 	int ret;
 
-	ADDRLABEL(KERN_DEBUG "%s(prefix=" NIP6_FMT ", prefixlen=%d, ifindex=%d)\n",
-			__func__,
-			NIP6(*prefix), prefixlen,
-			ifindex);
+	ADDRLABEL(KERN_DEBUG "%s(prefix=%p6, prefixlen=%d, ifindex=%d)\n",
+		  __func__, prefix, prefixlen, ifindex);
 
 	ipv6_addr_prefix(&prefix_buf, prefix, prefixlen);
 	spin_lock(&ip6addrlbl_table.lock);
diff --git a/net/ipv6/ah6.c b/net/ipv6/ah6.c
index 2ff0c8233e47..9bc43f2527ce 100644
--- a/net/ipv6/ah6.c
+++ b/net/ipv6/ah6.c
@@ -419,8 +419,8 @@ static void ah6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
 	if (!x)
 		return;
 
-	NETDEBUG(KERN_DEBUG "pmtu discovery on SA AH/%08x/" NIP6_FMT "\n",
-		 ntohl(ah->spi), NIP6(iph->daddr));
+	NETDEBUG(KERN_DEBUG "pmtu discovery on SA AH/%08x/%p6\n",
+		 ntohl(ah->spi), &iph->daddr);
 
 	xfrm_state_put(x);
 }
diff --git a/net/ipv6/esp6.c b/net/ipv6/esp6.c
index b181b08fb761..ec4be188c341 100644
--- a/net/ipv6/esp6.c
+++ b/net/ipv6/esp6.c
@@ -367,8 +367,8 @@ static void esp6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
 	x = xfrm_state_lookup((xfrm_address_t *)&iph->daddr, esph->spi, IPPROTO_ESP, AF_INET6);
 	if (!x)
 		return;
-	printk(KERN_DEBUG "pmtu discovery on SA ESP/%08x/" NIP6_FMT "\n",
-			ntohl(esph->spi), NIP6(iph->daddr));
+	printk(KERN_DEBUG "pmtu discovery on SA ESP/%08x/%p6\n",
+			ntohl(esph->spi), &iph->daddr);
 	xfrm_state_put(x);
 }
 
diff --git a/net/ipv6/exthdrs.c b/net/ipv6/exthdrs.c
index 6bfffec2371c..a89051468359 100644
--- a/net/ipv6/exthdrs.c
+++ b/net/ipv6/exthdrs.c
@@ -219,7 +219,7 @@ static int ipv6_dest_hao(struct sk_buff *skb, int optoff)
 
 	if (!(ipv6_addr_type(&hao->addr) & IPV6_ADDR_UNICAST)) {
 		LIMIT_NETDEBUG(
-			KERN_DEBUG "hao is not an unicast addr: " NIP6_FMT "\n", NIP6(hao->addr));
+			KERN_DEBUG "hao is not an unicast addr: %p6\n", &hao->addr);
 		goto discard;
 	}
 
diff --git a/net/ipv6/icmp.c b/net/ipv6/icmp.c
index 508a713ac045..b3fa38e40dc4 100644
--- a/net/ipv6/icmp.c
+++ b/net/ipv6/icmp.c
@@ -681,8 +681,8 @@ static int icmpv6_rcv(struct sk_buff *skb)
 		skb->csum = ~csum_unfold(csum_ipv6_magic(saddr, daddr, skb->len,
 					     IPPROTO_ICMPV6, 0));
 		if (__skb_checksum_complete(skb)) {
-			LIMIT_NETDEBUG(KERN_DEBUG "ICMPv6 checksum failed [" NIP6_FMT " > " NIP6_FMT "]\n",
-				       NIP6(*saddr), NIP6(*daddr));
+			LIMIT_NETDEBUG(KERN_DEBUG "ICMPv6 checksum failed [%p6 > %p6]\n",
+				       saddr, daddr);
 			goto discard_it;
 		}
 	}
diff --git a/net/ipv6/ip6mr.c b/net/ipv6/ip6mr.c
index 182f8a177e7f..798e6a29dd83 100644
--- a/net/ipv6/ip6mr.c
+++ b/net/ipv6/ip6mr.c
@@ -297,9 +297,8 @@ static int ipmr_mfc_seq_show(struct seq_file *seq, void *v)
 		const struct mfc6_cache *mfc = v;
 		const struct ipmr_mfc_iter *it = seq->private;
 
-		seq_printf(seq,
-			   NIP6_FMT " " NIP6_FMT " %-3d %8ld %8ld %8ld",
-			   NIP6(mfc->mf6c_mcastgrp), NIP6(mfc->mf6c_origin),
+		seq_printf(seq, "%p6 %p6 %-3d %8ld %8ld %8ld",
+			   &mfc->mf6c_mcastgrp, &mfc->mf6c_origin,
 			   mfc->mf6c_parent,
 			   mfc->mfc_un.res.pkt,
 			   mfc->mfc_un.res.bytes,
diff --git a/net/ipv6/ipcomp6.c b/net/ipv6/ipcomp6.c
index 4545e4306862..9566d8f73140 100644
--- a/net/ipv6/ipcomp6.c
+++ b/net/ipv6/ipcomp6.c
@@ -67,8 +67,8 @@ static void ipcomp6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
 	if (!x)
 		return;
 
-	printk(KERN_DEBUG "pmtu discovery on SA IPCOMP/%08x/" NIP6_FMT "\n",
-			spi, NIP6(iph->daddr));
+	printk(KERN_DEBUG "pmtu discovery on SA IPCOMP/%08x/%p6\n",
+			spi, &iph->daddr);
 	xfrm_state_put(x);
 }
 
diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c
index 172438320eec..191bb0722a7c 100644
--- a/net/ipv6/ndisc.c
+++ b/net/ipv6/ndisc.c
@@ -647,11 +647,8 @@ static void ndisc_solicit(struct neighbour *neigh, struct sk_buff *skb)
 
 	if ((probes -= neigh->parms->ucast_probes) < 0) {
 		if (!(neigh->nud_state & NUD_VALID)) {
-			ND_PRINTK1(KERN_DEBUG
-				   "%s(): trying to ucast probe in NUD_INVALID: "
-				   NIP6_FMT "\n",
-				   __func__,
-				   NIP6(*target));
+			ND_PRINTK1(KERN_DEBUG "%s(): trying to ucast probe in NUD_INVALID: %p6\n",
+				   __func__, target);
 		}
 		ndisc_send_ns(dev, neigh, target, target, saddr);
 	} else if ((probes -= neigh->parms->app_probes) < 0) {
diff --git a/net/ipv6/netfilter/ip6t_LOG.c b/net/ipv6/netfilter/ip6t_LOG.c
index caa441d09567..a61ce3010007 100644
--- a/net/ipv6/netfilter/ip6t_LOG.c
+++ b/net/ipv6/netfilter/ip6t_LOG.c
@@ -61,7 +61,7 @@ static void dump_packet(const struct nf_loginfo *info,
 	}
 
 	/* Max length: 88 "SRC=0000.0000.0000.0000.0000.0000.0000.0000 DST=0000.0000.0000.0000.0000.0000.0000.0000 " */
-	printk("SRC=" NIP6_FMT " DST=" NIP6_FMT " ", NIP6(ih->saddr), NIP6(ih->daddr));
+	printk("SRC=%p6 DST=%p6 ", &ih->saddr, &ih->daddr);
 
 	/* Max length: 44 "LEN=65535 TC=255 HOPLIMIT=255 FLOWLBL=FFFFF " */
 	printk("LEN=%Zu TC=%u HOPLIMIT=%u FLOWLBL=%u ",
diff --git a/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c b/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c
index e91db16611d9..b165a273c6c0 100644
--- a/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c
+++ b/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c
@@ -56,9 +56,8 @@ static bool ipv6_invert_tuple(struct nf_conntrack_tuple *tuple,
 static int ipv6_print_tuple(struct seq_file *s,
 			    const struct nf_conntrack_tuple *tuple)
 {
-	return seq_printf(s, "src=" NIP6_FMT " dst=" NIP6_FMT " ",
-			  NIP6(*((struct in6_addr *)tuple->src.u3.ip6)),
-			  NIP6(*((struct in6_addr *)tuple->dst.u3.ip6)));
+	return seq_printf(s, "src=%p6 dst=%p6 ",
+			  tuple->src.u3.ip6, tuple->dst.u3.ip6);
 }
 
 /*
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index b6b356b7912a..483550cfdf33 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -872,12 +872,10 @@ static int tcp_v6_inbound_md5_hash (struct sock *sk, struct sk_buff *skb)
 
 	if (genhash || memcmp(hash_location, newhash, 16) != 0) {
 		if (net_ratelimit()) {
-			printk(KERN_INFO "MD5 Hash %s for "
-			       "(" NIP6_FMT ", %u)->"
-			       "(" NIP6_FMT ", %u)\n",
+			printk(KERN_INFO "MD5 Hash %s for (%p6, %u)->(%p6, %u)\n",
 			       genhash ? "failed" : "mismatch",
-			       NIP6(ip6h->saddr), ntohs(th->source),
-			       NIP6(ip6h->daddr), ntohs(th->dest));
+			       &ip6h->saddr, ntohs(th->source),
+			       &ip6h->daddr, ntohs(th->dest));
 		}
 		return 1;
 	}
-- 
cgit v1.2.3


From b189db5d299c6824780af5590564ff608adb3dea Mon Sep 17 00:00:00 2001
From: Harvey Harrison <harvey.harrison@gmail.com>
Date: Tue, 28 Oct 2008 22:38:52 -0700
Subject: net: remove NIP6(), NIP6_FMT, NIP6_SEQFMT and final users

Open code NIP6_FMT in the one call inside sscanf and one user
of NIP6() that could use %p6 in the netfilter code.

Signed-off-by: Harvey Harrison <harvey.harrison@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/kernel.h         | 12 ------------
 net/bridge/netfilter/ebt_log.c |  6 ++----
 net/sunrpc/svcauth_unix.c      |  2 +-
 3 files changed, 3 insertions(+), 17 deletions(-)

(limited to 'include')

diff --git a/include/linux/kernel.h b/include/linux/kernel.h
index 396a350b87a6..77777c460099 100644
--- a/include/linux/kernel.h
+++ b/include/linux/kernel.h
@@ -357,18 +357,6 @@ static inline char *pack_hex_byte(char *buf, u8 byte)
 	((unsigned char *)&addr)[3]
 #define NIPQUAD_FMT "%u.%u.%u.%u"
 
-#define NIP6(addr) \
-	ntohs((addr).s6_addr16[0]), \
-	ntohs((addr).s6_addr16[1]), \
-	ntohs((addr).s6_addr16[2]), \
-	ntohs((addr).s6_addr16[3]), \
-	ntohs((addr).s6_addr16[4]), \
-	ntohs((addr).s6_addr16[5]), \
-	ntohs((addr).s6_addr16[6]), \
-	ntohs((addr).s6_addr16[7])
-#define NIP6_FMT "%04x:%04x:%04x:%04x:%04x:%04x:%04x:%04x"
-#define NIP6_SEQFMT "%04x%04x%04x%04x%04x%04x%04x%04x"
-
 #if defined(__LITTLE_ENDIAN)
 #define HIPQUAD(addr) \
 	((unsigned char *)&addr)[3], \
diff --git a/net/bridge/netfilter/ebt_log.c b/net/bridge/netfilter/ebt_log.c
index 3d33c608906a..3654f3ebdb8f 100644
--- a/net/bridge/netfilter/ebt_log.c
+++ b/net/bridge/netfilter/ebt_log.c
@@ -133,10 +133,8 @@ ebt_log_packet(u_int8_t pf, unsigned int hooknum,
 			printk(" INCOMPLETE IPv6 header");
 			goto out;
 		}
-		printk(" IPv6 SRC=%x:%x:%x:%x:%x:%x:%x:%x "
-		       "IPv6 DST=%x:%x:%x:%x:%x:%x:%x:%x, IPv6 "
-		       "priority=0x%01X, Next Header=%d", NIP6(ih->saddr),
-		       NIP6(ih->daddr), ih->priority, ih->nexthdr);
+		printk(" IPv6 SRC=%p6 IPv6 DST=%p6, IPv6 priority=0x%01X, Next Header=%d",
+		       &ih->saddr, &ih->daddr, ih->priority, ih->nexthdr);
 		nexthdr = ih->nexthdr;
 		offset_ph = ipv6_skip_exthdr(skb, sizeof(_iph), &nexthdr);
 		if (offset_ph == -1)
diff --git a/net/sunrpc/svcauth_unix.c b/net/sunrpc/svcauth_unix.c
index 9a6d0cfd4ce3..eb640c1a1bc9 100644
--- a/net/sunrpc/svcauth_unix.c
+++ b/net/sunrpc/svcauth_unix.c
@@ -214,7 +214,7 @@ static int ip_map_parse(struct cache_detail *cd,
 		addr.s6_addr32[2] = htonl(0xffff);
 		addr.s6_addr32[3] =
 			htonl((((((b1<<8)|b2)<<8)|b3)<<8)|b4);
-       } else if (sscanf(buf, NIP6_FMT "%c",
+       } else if (sscanf(buf, "%04x:%04x:%04x:%04x:%04x:%04x:%04x:%04x%c",
 			&b1, &b2, &b3, &b4, &b5, &b6, &b7, &b8, &c) == 8) {
 		addr.s6_addr16[0] = htons(b1);
 		addr.s6_addr16[1] = htons(b2);
-- 
cgit v1.2.3


From 645ca708f936b2fbeb79e52d7823e3eb2c0905f8 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <dada1@cosmosbay.com>
Date: Wed, 29 Oct 2008 01:41:45 -0700
Subject: udp: introduce struct udp_table and multiple spinlocks

UDP sockets are hashed in a 128 slots hash table.

This hash table is protected by *one* rwlock.

This rwlock is readlocked each time an incoming UDP message is handled.

This rwlock is writelocked each time a socket must be inserted in
hash table (bind time), or deleted from this table (close time)

This is not scalable on SMP machines :

1) Even in read mode, lock() and unlock() are atomic operations and
 must dirty a contended cache line, shared by all cpus.

2) A writer might be starved if many readers are 'in flight'. This can
 happen on a machine with some NIC receiving many UDP messages. User
 process can be delayed a long time at socket creation/dismantle time.

This patch prepares RCU migration, by introducing 'struct udp_table
and struct udp_hslot', and using one spinlock per chain, to reduce
contention on central rwlock.

Introducing one spinlock per chain reduces latencies, for port
randomization on heavily loaded UDP servers. This also speedup
bindings to specific ports.

udp_lib_unhash() was uninlined, becoming to big.

Some cleanups were done to ease review of following patch
(RCUification of UDP Unicast lookups)

Signed-off-by: Eric Dumazet <dada1@cosmosbay.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/sock.h    |   2 +-
 include/net/udp.h     |  25 +++---
 include/net/udplite.h |   2 +-
 net/ipv4/udp.c        | 207 ++++++++++++++++++++++++++++++--------------------
 net/ipv4/udp_impl.h   |   4 +-
 net/ipv4/udplite.c    |  13 ++--
 net/ipv6/udp.c        | 112 +++++++++++++++------------
 net/ipv6/udp_impl.h   |   4 +-
 net/ipv6/udplite.c    |   8 +-
 9 files changed, 214 insertions(+), 163 deletions(-)

(limited to 'include')

diff --git a/include/net/sock.h b/include/net/sock.h
index d6b750a25078..d200dfbe1ef6 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -599,7 +599,7 @@ struct proto {
 
 	union {
 		struct inet_hashinfo	*hashinfo;
-		struct hlist_head	*udp_hash;
+		struct udp_table	*udp_table;
 		struct raw_hashinfo	*raw_hash;
 	} h;
 
diff --git a/include/net/udp.h b/include/net/udp.h
index 1e205095ea68..df2bfe545374 100644
--- a/include/net/udp.h
+++ b/include/net/udp.h
@@ -50,8 +50,15 @@ struct udp_skb_cb {
 };
 #define UDP_SKB_CB(__skb)	((struct udp_skb_cb *)((__skb)->cb))
 
-extern struct hlist_head udp_hash[UDP_HTABLE_SIZE];
-extern rwlock_t udp_hash_lock;
+struct udp_hslot {
+	struct hlist_head	head;
+	spinlock_t		lock;
+} __attribute__((aligned(2 * sizeof(long))));
+struct udp_table {
+	struct udp_hslot	hash[UDP_HTABLE_SIZE];
+};
+extern struct udp_table udp_table;
+extern void udp_table_init(struct udp_table *);
 
 
 /* Note: this must match 'valbool' in sock_setsockopt */
@@ -110,15 +117,7 @@ static inline void udp_lib_hash(struct sock *sk)
 	BUG();
 }
 
-static inline void udp_lib_unhash(struct sock *sk)
-{
-	write_lock_bh(&udp_hash_lock);
-	if (sk_del_node_init(sk)) {
-		inet_sk(sk)->num = 0;
-		sock_prot_inuse_add(sock_net(sk), sk->sk_prot, -1);
-	}
-	write_unlock_bh(&udp_hash_lock);
-}
+extern void udp_lib_unhash(struct sock *sk);
 
 static inline void udp_lib_close(struct sock *sk, long timeout)
 {
@@ -187,7 +186,7 @@ extern struct sock *udp4_lib_lookup(struct net *net, __be32 saddr, __be16 sport,
 struct udp_seq_afinfo {
 	char			*name;
 	sa_family_t		family;
-	struct hlist_head	*hashtable;
+	struct udp_table	*udp_table;
 	struct file_operations	seq_fops;
 	struct seq_operations	seq_ops;
 };
@@ -196,7 +195,7 @@ struct udp_iter_state {
 	struct seq_net_private  p;
 	sa_family_t		family;
 	int			bucket;
-	struct hlist_head	*hashtable;
+	struct udp_table	*udp_table;
 };
 
 #ifdef CONFIG_PROC_FS
diff --git a/include/net/udplite.h b/include/net/udplite.h
index b76b2e377af4..afdffe607b24 100644
--- a/include/net/udplite.h
+++ b/include/net/udplite.h
@@ -11,7 +11,7 @@
 #define UDPLITE_RECV_CSCOV   11 /* receiver partial coverage (threshold ) */
 
 extern struct proto 		udplite_prot;
-extern struct hlist_head 	udplite_hash[UDP_HTABLE_SIZE];
+extern struct udp_table		udplite_table;
 
 /*
  *	Checksum computation is all in software, hence simpler getfrag.
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index 2095abc3caba..2a6c491f97d7 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -104,12 +104,8 @@
 #include <net/xfrm.h>
 #include "udp_impl.h"
 
-/*
- *	Snmp MIB for the UDP layer
- */
-
-struct hlist_head udp_hash[UDP_HTABLE_SIZE];
-DEFINE_RWLOCK(udp_hash_lock);
+struct udp_table udp_table;
+EXPORT_SYMBOL(udp_table);
 
 int sysctl_udp_mem[3] __read_mostly;
 int sysctl_udp_rmem_min __read_mostly;
@@ -123,7 +119,7 @@ atomic_t udp_memory_allocated;
 EXPORT_SYMBOL(udp_memory_allocated);
 
 static int udp_lib_lport_inuse(struct net *net, __u16 num,
-			       const struct hlist_head udptable[],
+			       const struct udp_hslot *hslot,
 			       struct sock *sk,
 			       int (*saddr_comp)(const struct sock *sk1,
 						 const struct sock *sk2))
@@ -131,7 +127,7 @@ static int udp_lib_lport_inuse(struct net *net, __u16 num,
 	struct sock *sk2;
 	struct hlist_node *node;
 
-	sk_for_each(sk2, node, &udptable[udp_hashfn(net, num)])
+	sk_for_each(sk2, node, &hslot->head)
 		if (net_eq(sock_net(sk2), net)			&&
 		    sk2 != sk					&&
 		    sk2->sk_hash == num				&&
@@ -154,12 +150,11 @@ int udp_lib_get_port(struct sock *sk, unsigned short snum,
 		       int (*saddr_comp)(const struct sock *sk1,
 					 const struct sock *sk2 )    )
 {
-	struct hlist_head *udptable = sk->sk_prot->h.udp_hash;
+	struct udp_hslot *hslot;
+	struct udp_table *udptable = sk->sk_prot->h.udp_table;
 	int    error = 1;
 	struct net *net = sock_net(sk);
 
-	write_lock_bh(&udp_hash_lock);
-
 	if (!snum) {
 		int low, high, remaining;
 		unsigned rand;
@@ -171,26 +166,34 @@ int udp_lib_get_port(struct sock *sk, unsigned short snum,
 		rand = net_random();
 		snum = first = rand % remaining + low;
 		rand |= 1;
-		while (udp_lib_lport_inuse(net, snum, udptable, sk,
-					   saddr_comp)) {
+		for (;;) {
+			hslot = &udptable->hash[udp_hashfn(net, snum)];
+			spin_lock_bh(&hslot->lock);
+			if (!udp_lib_lport_inuse(net, snum, hslot, sk, saddr_comp))
+				break;
+			spin_unlock_bh(&hslot->lock);
 			do {
 				snum = snum + rand;
 			} while (snum < low || snum > high);
 			if (snum == first)
 				goto fail;
 		}
-	} else if (udp_lib_lport_inuse(net, snum, udptable, sk, saddr_comp))
-		goto fail;
-
+	} else {
+		hslot = &udptable->hash[udp_hashfn(net, snum)];
+		spin_lock_bh(&hslot->lock);
+		if (udp_lib_lport_inuse(net, snum, hslot, sk, saddr_comp))
+			goto fail_unlock;
+	}
 	inet_sk(sk)->num = snum;
 	sk->sk_hash = snum;
 	if (sk_unhashed(sk)) {
-		sk_add_node(sk, &udptable[udp_hashfn(net, snum)]);
+		sk_add_node(sk, &hslot->head);
 		sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1);
 	}
 	error = 0;
+fail_unlock:
+	spin_unlock_bh(&hslot->lock);
 fail:
-	write_unlock_bh(&udp_hash_lock);
 	return error;
 }
 
@@ -208,63 +211,73 @@ int udp_v4_get_port(struct sock *sk, unsigned short snum)
 	return udp_lib_get_port(sk, snum, ipv4_rcv_saddr_equal);
 }
 
+static inline int compute_score(struct sock *sk, struct net *net, __be32 saddr,
+			 unsigned short hnum,
+			 __be16 sport, __be32 daddr, __be16 dport, int dif)
+{
+	int score = -1;
+
+	if (net_eq(sock_net(sk), net) && sk->sk_hash == hnum &&
+			!ipv6_only_sock(sk)) {
+		struct inet_sock *inet = inet_sk(sk);
+
+		score = (sk->sk_family == PF_INET ? 1 : 0);
+		if (inet->rcv_saddr) {
+			if (inet->rcv_saddr != daddr)
+				return -1;
+			score += 2;
+		}
+		if (inet->daddr) {
+			if (inet->daddr != saddr)
+				return -1;
+			score += 2;
+		}
+		if (inet->dport) {
+			if (inet->dport != sport)
+				return -1;
+			score += 2;
+		}
+		if (sk->sk_bound_dev_if) {
+			if (sk->sk_bound_dev_if != dif)
+				return -1;
+			score += 2;
+		}
+	}
+	return score;
+}
+
 /* UDP is nearly always wildcards out the wazoo, it makes no sense to try
  * harder than this. -DaveM
  */
 static struct sock *__udp4_lib_lookup(struct net *net, __be32 saddr,
 		__be16 sport, __be32 daddr, __be16 dport,
-		int dif, struct hlist_head udptable[])
+		int dif, struct udp_table *udptable)
 {
 	struct sock *sk, *result = NULL;
 	struct hlist_node *node;
 	unsigned short hnum = ntohs(dport);
-	int badness = -1;
-
-	read_lock(&udp_hash_lock);
-	sk_for_each(sk, node, &udptable[udp_hashfn(net, hnum)]) {
-		struct inet_sock *inet = inet_sk(sk);
-
-		if (net_eq(sock_net(sk), net) && sk->sk_hash == hnum &&
-				!ipv6_only_sock(sk)) {
-			int score = (sk->sk_family == PF_INET ? 1 : 0);
-			if (inet->rcv_saddr) {
-				if (inet->rcv_saddr != daddr)
-					continue;
-				score+=2;
-			}
-			if (inet->daddr) {
-				if (inet->daddr != saddr)
-					continue;
-				score+=2;
-			}
-			if (inet->dport) {
-				if (inet->dport != sport)
-					continue;
-				score+=2;
-			}
-			if (sk->sk_bound_dev_if) {
-				if (sk->sk_bound_dev_if != dif)
-					continue;
-				score+=2;
-			}
-			if (score == 9) {
-				result = sk;
-				break;
-			} else if (score > badness) {
-				result = sk;
-				badness = score;
-			}
+	unsigned int hash = udp_hashfn(net, hnum);
+	struct udp_hslot *hslot = &udptable->hash[hash];
+	int score, badness = -1;
+
+	spin_lock(&hslot->lock);
+	sk_for_each(sk, node, &hslot->head) {
+		score = compute_score(sk, net, saddr, hnum, sport,
+				      daddr, dport, dif);
+		if (score > badness) {
+			result = sk;
+			badness = score;
 		}
 	}
 	if (result)
 		sock_hold(result);
-	read_unlock(&udp_hash_lock);
+	spin_unlock(&hslot->lock);
 	return result;
 }
 
 static inline struct sock *__udp4_lib_lookup_skb(struct sk_buff *skb,
 						 __be16 sport, __be16 dport,
-						 struct hlist_head udptable[])
+						 struct udp_table *udptable)
 {
 	struct sock *sk;
 	const struct iphdr *iph = ip_hdr(skb);
@@ -280,7 +293,7 @@ static inline struct sock *__udp4_lib_lookup_skb(struct sk_buff *skb,
 struct sock *udp4_lib_lookup(struct net *net, __be32 saddr, __be16 sport,
 			     __be32 daddr, __be16 dport, int dif)
 {
-	return __udp4_lib_lookup(net, saddr, sport, daddr, dport, dif, udp_hash);
+	return __udp4_lib_lookup(net, saddr, sport, daddr, dport, dif, &udp_table);
 }
 EXPORT_SYMBOL_GPL(udp4_lib_lookup);
 
@@ -323,7 +336,7 @@ found:
  * to find the appropriate port.
  */
 
-void __udp4_lib_err(struct sk_buff *skb, u32 info, struct hlist_head udptable[])
+void __udp4_lib_err(struct sk_buff *skb, u32 info, struct udp_table *udptable)
 {
 	struct inet_sock *inet;
 	struct iphdr *iph = (struct iphdr*)skb->data;
@@ -392,7 +405,7 @@ out:
 
 void udp_err(struct sk_buff *skb, u32 info)
 {
-	__udp4_lib_err(skb, info, udp_hash);
+	__udp4_lib_err(skb, info, &udp_table);
 }
 
 /*
@@ -933,6 +946,21 @@ int udp_disconnect(struct sock *sk, int flags)
 	return 0;
 }
 
+void udp_lib_unhash(struct sock *sk)
+{
+	struct udp_table *udptable = sk->sk_prot->h.udp_table;
+	unsigned int hash = udp_hashfn(sock_net(sk), sk->sk_hash);
+	struct udp_hslot *hslot = &udptable->hash[hash];
+
+	spin_lock(&hslot->lock);
+	if (sk_del_node_init(sk)) {
+		inet_sk(sk)->num = 0;
+		sock_prot_inuse_add(sock_net(sk), sk->sk_prot, -1);
+	}
+	spin_unlock(&hslot->lock);
+}
+EXPORT_SYMBOL(udp_lib_unhash);
+
 static int __udp_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
 {
 	int is_udplite = IS_UDPLITE(sk);
@@ -1071,13 +1099,14 @@ drop:
 static int __udp4_lib_mcast_deliver(struct net *net, struct sk_buff *skb,
 				    struct udphdr  *uh,
 				    __be32 saddr, __be32 daddr,
-				    struct hlist_head udptable[])
+				    struct udp_table *udptable)
 {
 	struct sock *sk;
+	struct udp_hslot *hslot = &udptable->hash[udp_hashfn(net, ntohs(uh->dest))];
 	int dif;
 
-	read_lock(&udp_hash_lock);
-	sk = sk_head(&udptable[udp_hashfn(net, ntohs(uh->dest))]);
+	spin_lock(&hslot->lock);
+	sk = sk_head(&hslot->head);
 	dif = skb->dev->ifindex;
 	sk = udp_v4_mcast_next(sk, uh->dest, daddr, uh->source, saddr, dif);
 	if (sk) {
@@ -1102,7 +1131,7 @@ static int __udp4_lib_mcast_deliver(struct net *net, struct sk_buff *skb,
 		} while (sknext);
 	} else
 		kfree_skb(skb);
-	read_unlock(&udp_hash_lock);
+	spin_unlock(&hslot->lock);
 	return 0;
 }
 
@@ -1148,7 +1177,7 @@ static inline int udp4_csum_init(struct sk_buff *skb, struct udphdr *uh,
  *	All we need to do is get the socket, and then do a checksum.
  */
 
-int __udp4_lib_rcv(struct sk_buff *skb, struct hlist_head udptable[],
+int __udp4_lib_rcv(struct sk_buff *skb, struct udp_table *udptable,
 		   int proto)
 {
 	struct sock *sk;
@@ -1246,7 +1275,7 @@ drop:
 
 int udp_rcv(struct sk_buff *skb)
 {
-	return __udp4_lib_rcv(skb, udp_hash, IPPROTO_UDP);
+	return __udp4_lib_rcv(skb, &udp_table, IPPROTO_UDP);
 }
 
 void udp_destroy_sock(struct sock *sk)
@@ -1488,7 +1517,7 @@ struct proto udp_prot = {
 	.sysctl_wmem	   = &sysctl_udp_wmem_min,
 	.sysctl_rmem	   = &sysctl_udp_rmem_min,
 	.obj_size	   = sizeof(struct udp_sock),
-	.h.udp_hash	   = udp_hash,
+	.h.udp_table	   = &udp_table,
 #ifdef CONFIG_COMPAT
 	.compat_setsockopt = compat_udp_setsockopt,
 	.compat_getsockopt = compat_udp_getsockopt,
@@ -1498,20 +1527,23 @@ struct proto udp_prot = {
 /* ------------------------------------------------------------------------ */
 #ifdef CONFIG_PROC_FS
 
-static struct sock *udp_get_first(struct seq_file *seq)
+static struct sock *udp_get_first(struct seq_file *seq, int start)
 {
 	struct sock *sk;
 	struct udp_iter_state *state = seq->private;
 	struct net *net = seq_file_net(seq);
 
-	for (state->bucket = 0; state->bucket < UDP_HTABLE_SIZE; ++state->bucket) {
+	for (state->bucket = start; state->bucket < UDP_HTABLE_SIZE; ++state->bucket) {
 		struct hlist_node *node;
-		sk_for_each(sk, node, state->hashtable + state->bucket) {
+		struct udp_hslot *hslot = &state->udp_table->hash[state->bucket];
+		spin_lock_bh(&hslot->lock);
+		sk_for_each(sk, node, &hslot->head) {
 			if (!net_eq(sock_net(sk), net))
 				continue;
 			if (sk->sk_family == state->family)
 				goto found;
 		}
+		spin_unlock_bh(&hslot->lock);
 	}
 	sk = NULL;
 found:
@@ -1525,20 +1557,18 @@ static struct sock *udp_get_next(struct seq_file *seq, struct sock *sk)
 
 	do {
 		sk = sk_next(sk);
-try_again:
-		;
 	} while (sk && (!net_eq(sock_net(sk), net) || sk->sk_family != state->family));
 
-	if (!sk && ++state->bucket < UDP_HTABLE_SIZE) {
-		sk = sk_head(state->hashtable + state->bucket);
-		goto try_again;
+	if (!sk) {
+		spin_unlock(&state->udp_table->hash[state->bucket].lock);
+		return udp_get_first(seq, state->bucket + 1);
 	}
 	return sk;
 }
 
 static struct sock *udp_get_idx(struct seq_file *seq, loff_t pos)
 {
-	struct sock *sk = udp_get_first(seq);
+	struct sock *sk = udp_get_first(seq, 0);
 
 	if (sk)
 		while (pos && (sk = udp_get_next(seq, sk)) != NULL)
@@ -1547,9 +1577,7 @@ static struct sock *udp_get_idx(struct seq_file *seq, loff_t pos)
 }
 
 static void *udp_seq_start(struct seq_file *seq, loff_t *pos)
-	__acquires(udp_hash_lock)
 {
-	read_lock(&udp_hash_lock);
 	return *pos ? udp_get_idx(seq, *pos-1) : SEQ_START_TOKEN;
 }
 
@@ -1567,9 +1595,11 @@ static void *udp_seq_next(struct seq_file *seq, void *v, loff_t *pos)
 }
 
 static void udp_seq_stop(struct seq_file *seq, void *v)
-	__releases(udp_hash_lock)
 {
-	read_unlock(&udp_hash_lock);
+	struct udp_iter_state *state = seq->private;
+
+	if (state->bucket < UDP_HTABLE_SIZE)
+		spin_unlock_bh(&state->udp_table->hash[state->bucket].lock);
 }
 
 static int udp_seq_open(struct inode *inode, struct file *file)
@@ -1585,7 +1615,7 @@ static int udp_seq_open(struct inode *inode, struct file *file)
 
 	s = ((struct seq_file *)file->private_data)->private;
 	s->family		= afinfo->family;
-	s->hashtable		= afinfo->hashtable;
+	s->udp_table		= afinfo->udp_table;
 	return err;
 }
 
@@ -1657,7 +1687,7 @@ int udp4_seq_show(struct seq_file *seq, void *v)
 static struct udp_seq_afinfo udp4_seq_afinfo = {
 	.name		= "udp",
 	.family		= AF_INET,
-	.hashtable	= udp_hash,
+	.udp_table	= &udp_table,
 	.seq_fops	= {
 		.owner	=	THIS_MODULE,
 	},
@@ -1692,10 +1722,21 @@ void udp4_proc_exit(void)
 }
 #endif /* CONFIG_PROC_FS */
 
+void __init udp_table_init(struct udp_table *table)
+{
+	int i;
+
+	for (i = 0; i < UDP_HTABLE_SIZE; i++) {
+		INIT_HLIST_HEAD(&table->hash[i].head);
+		spin_lock_init(&table->hash[i].lock);
+	}
+}
+
 void __init udp_init(void)
 {
 	unsigned long limit;
 
+	udp_table_init(&udp_table);
 	/* Set the pressure threshold up by the same strategy of TCP. It is a
 	 * fraction of global memory that is up to 1/2 at 256 MB, decreasing
 	 * toward zero with the amount of memory, with a floor of 128 pages.
@@ -1712,8 +1753,6 @@ void __init udp_init(void)
 }
 
 EXPORT_SYMBOL(udp_disconnect);
-EXPORT_SYMBOL(udp_hash);
-EXPORT_SYMBOL(udp_hash_lock);
 EXPORT_SYMBOL(udp_ioctl);
 EXPORT_SYMBOL(udp_prot);
 EXPORT_SYMBOL(udp_sendmsg);
diff --git a/net/ipv4/udp_impl.h b/net/ipv4/udp_impl.h
index 2e9bad2fa1bc..9f4a6165f722 100644
--- a/net/ipv4/udp_impl.h
+++ b/net/ipv4/udp_impl.h
@@ -5,8 +5,8 @@
 #include <net/protocol.h>
 #include <net/inet_common.h>
 
-extern int  	__udp4_lib_rcv(struct sk_buff *, struct hlist_head [], int );
-extern void 	__udp4_lib_err(struct sk_buff *, u32, struct hlist_head []);
+extern int  	__udp4_lib_rcv(struct sk_buff *, struct udp_table *, int );
+extern void 	__udp4_lib_err(struct sk_buff *, u32, struct udp_table *);
 
 extern int	udp_v4_get_port(struct sock *sk, unsigned short snum);
 
diff --git a/net/ipv4/udplite.c b/net/ipv4/udplite.c
index 3c807964da96..d8ea8e5f5ea3 100644
--- a/net/ipv4/udplite.c
+++ b/net/ipv4/udplite.c
@@ -12,16 +12,17 @@
  */
 #include "udp_impl.h"
 
-struct hlist_head 	udplite_hash[UDP_HTABLE_SIZE];
+struct udp_table 	udplite_table;
+EXPORT_SYMBOL(udplite_table);
 
 static int udplite_rcv(struct sk_buff *skb)
 {
-	return __udp4_lib_rcv(skb, udplite_hash, IPPROTO_UDPLITE);
+	return __udp4_lib_rcv(skb, &udplite_table, IPPROTO_UDPLITE);
 }
 
 static void udplite_err(struct sk_buff *skb, u32 info)
 {
-	__udp4_lib_err(skb, info, udplite_hash);
+	__udp4_lib_err(skb, info, &udplite_table);
 }
 
 static	struct net_protocol udplite_protocol = {
@@ -50,7 +51,7 @@ struct proto 	udplite_prot = {
 	.unhash		   = udp_lib_unhash,
 	.get_port	   = udp_v4_get_port,
 	.obj_size	   = sizeof(struct udp_sock),
-	.h.udp_hash	   = udplite_hash,
+	.h.udp_table	   = &udplite_table,
 #ifdef CONFIG_COMPAT
 	.compat_setsockopt = compat_udp_setsockopt,
 	.compat_getsockopt = compat_udp_getsockopt,
@@ -71,7 +72,7 @@ static struct inet_protosw udplite4_protosw = {
 static struct udp_seq_afinfo udplite4_seq_afinfo = {
 	.name		= "udplite",
 	.family		= AF_INET,
-	.hashtable	= udplite_hash,
+	.udp_table 	= &udplite_table,
 	.seq_fops	= {
 		.owner	=	THIS_MODULE,
 	},
@@ -108,6 +109,7 @@ static inline int udplite4_proc_init(void)
 
 void __init udplite4_register(void)
 {
+	udp_table_init(&udplite_table);
 	if (proto_register(&udplite_prot, 1))
 		goto out_register_err;
 
@@ -126,5 +128,4 @@ out_register_err:
 	printk(KERN_CRIT "%s: Cannot add UDP-Lite protocol.\n", __func__);
 }
 
-EXPORT_SYMBOL(udplite_hash);
 EXPORT_SYMBOL(udplite_prot);
diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c
index e51da8c092fa..ccee7244ca0f 100644
--- a/net/ipv6/udp.c
+++ b/net/ipv6/udp.c
@@ -54,62 +54,73 @@ int udp_v6_get_port(struct sock *sk, unsigned short snum)
 	return udp_lib_get_port(sk, snum, ipv6_rcv_saddr_equal);
 }
 
+static inline int compute_score(struct sock *sk, struct net *net,
+				unsigned short hnum,
+				struct in6_addr *saddr, __be16 sport,
+				struct in6_addr *daddr, __be16 dport,
+				int dif)
+{
+	int score = -1;
+
+	if (net_eq(sock_net(sk), net) && sk->sk_hash == hnum &&
+			sk->sk_family == PF_INET6) {
+		struct ipv6_pinfo *np = inet6_sk(sk);
+		struct inet_sock *inet = inet_sk(sk);
+
+		score = 0;
+		if (inet->dport) {
+			if (inet->dport != sport)
+				return -1;
+			score++;
+		}
+		if (!ipv6_addr_any(&np->rcv_saddr)) {
+			if (!ipv6_addr_equal(&np->rcv_saddr, daddr))
+				return -1;
+			score++;
+		}
+		if (!ipv6_addr_any(&np->daddr)) {
+			if (!ipv6_addr_equal(&np->daddr, saddr))
+				return -1;
+			score++;
+		}
+		if (sk->sk_bound_dev_if) {
+			if (sk->sk_bound_dev_if != dif)
+				return -1;
+			score++;
+		}
+	}
+	return score;
+}
+
 static struct sock *__udp6_lib_lookup(struct net *net,
 				      struct in6_addr *saddr, __be16 sport,
 				      struct in6_addr *daddr, __be16 dport,
-				      int dif, struct hlist_head udptable[])
+				      int dif, struct udp_table *udptable)
 {
 	struct sock *sk, *result = NULL;
 	struct hlist_node *node;
 	unsigned short hnum = ntohs(dport);
-	int badness = -1;
-
-	read_lock(&udp_hash_lock);
-	sk_for_each(sk, node, &udptable[udp_hashfn(net, hnum)]) {
-		struct inet_sock *inet = inet_sk(sk);
-
-		if (net_eq(sock_net(sk), net) && sk->sk_hash == hnum &&
-				sk->sk_family == PF_INET6) {
-			struct ipv6_pinfo *np = inet6_sk(sk);
-			int score = 0;
-			if (inet->dport) {
-				if (inet->dport != sport)
-					continue;
-				score++;
-			}
-			if (!ipv6_addr_any(&np->rcv_saddr)) {
-				if (!ipv6_addr_equal(&np->rcv_saddr, daddr))
-					continue;
-				score++;
-			}
-			if (!ipv6_addr_any(&np->daddr)) {
-				if (!ipv6_addr_equal(&np->daddr, saddr))
-					continue;
-				score++;
-			}
-			if (sk->sk_bound_dev_if) {
-				if (sk->sk_bound_dev_if != dif)
-					continue;
-				score++;
-			}
-			if (score == 4) {
-				result = sk;
-				break;
-			} else if (score > badness) {
-				result = sk;
-				badness = score;
-			}
+	unsigned int hash = udp_hashfn(net, hnum);
+	struct udp_hslot *hslot = &udptable->hash[hash];
+	int score, badness = -1;
+
+	spin_lock(&hslot->lock);
+	sk_for_each(sk, node, &hslot->head) {
+		score = compute_score(sk, net, hnum, saddr, sport, daddr, dport, dif);
+		if (score > badness) {
+			result = sk;
+			badness = score;
 		}
 	}
 	if (result)
 		sock_hold(result);
-	read_unlock(&udp_hash_lock);
+	spin_unlock(&hslot->lock);
 	return result;
 }
 
 static struct sock *__udp6_lib_lookup_skb(struct sk_buff *skb,
 					  __be16 sport, __be16 dport,
-					  struct hlist_head udptable[])
+					  struct udp_table *udptable)
 {
 	struct sock *sk;
 	struct ipv6hdr *iph = ipv6_hdr(skb);
@@ -239,7 +250,7 @@ csum_copy_err:
 
 void __udp6_lib_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
 		    int type, int code, int offset, __be32 info,
-		    struct hlist_head udptable[]                    )
+		    struct udp_table *udptable)
 {
 	struct ipv6_pinfo *np;
 	struct ipv6hdr *hdr = (struct ipv6hdr*)skb->data;
@@ -275,7 +286,7 @@ static __inline__ void udpv6_err(struct sk_buff *skb,
 				 struct inet6_skb_parm *opt, int type,
 				 int code, int offset, __be32 info     )
 {
-	__udp6_lib_err(skb, opt, type, code, offset, info, udp_hash);
+	__udp6_lib_err(skb, opt, type, code, offset, info, &udp_table);
 }
 
 int udpv6_queue_rcv_skb(struct sock * sk, struct sk_buff *skb)
@@ -374,14 +385,15 @@ static struct sock *udp_v6_mcast_next(struct sock *sk,
  */
 static int __udp6_lib_mcast_deliver(struct net *net, struct sk_buff *skb,
 		struct in6_addr *saddr, struct in6_addr *daddr,
-		struct hlist_head udptable[])
+		struct udp_table *udptable)
 {
 	struct sock *sk, *sk2;
 	const struct udphdr *uh = udp_hdr(skb);
+	struct udp_hslot *hslot = &udptable->hash[udp_hashfn(net, ntohs(uh->dest))];
 	int dif;
 
-	read_lock(&udp_hash_lock);
-	sk = sk_head(&udptable[udp_hashfn(net, ntohs(uh->dest))]);
+	spin_lock(&hslot->lock);
+	sk = sk_head(&hslot->head);
 	dif = inet6_iif(skb);
 	sk = udp_v6_mcast_next(sk, uh->dest, daddr, uh->source, saddr, dif);
 	if (!sk) {
@@ -409,7 +421,7 @@ static int __udp6_lib_mcast_deliver(struct net *net, struct sk_buff *skb,
 		sk_add_backlog(sk, skb);
 	bh_unlock_sock(sk);
 out:
-	read_unlock(&udp_hash_lock);
+	spin_unlock(&hslot->lock);
 	return 0;
 }
 
@@ -447,7 +459,7 @@ static inline int udp6_csum_init(struct sk_buff *skb, struct udphdr *uh,
 	return 0;
 }
 
-int __udp6_lib_rcv(struct sk_buff *skb, struct hlist_head udptable[],
+int __udp6_lib_rcv(struct sk_buff *skb, struct udp_table *udptable,
 		   int proto)
 {
 	struct sock *sk;
@@ -544,7 +556,7 @@ discard:
 
 static __inline__ int udpv6_rcv(struct sk_buff *skb)
 {
-	return __udp6_lib_rcv(skb, udp_hash, IPPROTO_UDP);
+	return __udp6_lib_rcv(skb, &udp_table, IPPROTO_UDP);
 }
 
 /*
@@ -1008,7 +1020,7 @@ int udp6_seq_show(struct seq_file *seq, void *v)
 static struct udp_seq_afinfo udp6_seq_afinfo = {
 	.name		= "udp6",
 	.family		= AF_INET6,
-	.hashtable	= udp_hash,
+	.udp_table	= &udp_table,
 	.seq_fops	= {
 		.owner	=	THIS_MODULE,
 	},
@@ -1050,7 +1062,7 @@ struct proto udpv6_prot = {
 	.sysctl_wmem	   = &sysctl_udp_wmem_min,
 	.sysctl_rmem	   = &sysctl_udp_rmem_min,
 	.obj_size	   = sizeof(struct udp6_sock),
-	.h.udp_hash	   = udp_hash,
+	.h.udp_table	   = &udp_table,
 #ifdef CONFIG_COMPAT
 	.compat_setsockopt = compat_udpv6_setsockopt,
 	.compat_getsockopt = compat_udpv6_getsockopt,
diff --git a/net/ipv6/udp_impl.h b/net/ipv6/udp_impl.h
index 92dd7da766d8..23779208c334 100644
--- a/net/ipv6/udp_impl.h
+++ b/net/ipv6/udp_impl.h
@@ -7,9 +7,9 @@
 #include <net/inet_common.h>
 #include <net/transp_v6.h>
 
-extern int  	__udp6_lib_rcv(struct sk_buff *, struct hlist_head [], int );
+extern int  	__udp6_lib_rcv(struct sk_buff *, struct udp_table *, int );
 extern void 	__udp6_lib_err(struct sk_buff *, struct inet6_skb_parm *,
-			       int , int , int , __be32 , struct hlist_head []);
+			       int , int , int , __be32 , struct udp_table *);
 
 extern int	udp_v6_get_port(struct sock *sk, unsigned short snum);
 
diff --git a/net/ipv6/udplite.c b/net/ipv6/udplite.c
index 3cd1a1ac3d6c..f1e892a99e05 100644
--- a/net/ipv6/udplite.c
+++ b/net/ipv6/udplite.c
@@ -15,14 +15,14 @@
 
 static int udplitev6_rcv(struct sk_buff *skb)
 {
-	return __udp6_lib_rcv(skb, udplite_hash, IPPROTO_UDPLITE);
+	return __udp6_lib_rcv(skb, &udplite_table, IPPROTO_UDPLITE);
 }
 
 static void udplitev6_err(struct sk_buff *skb,
 			  struct inet6_skb_parm *opt,
 			  int type, int code, int offset, __be32 info)
 {
-	__udp6_lib_err(skb, opt, type, code, offset, info, udplite_hash);
+	__udp6_lib_err(skb, opt, type, code, offset, info, &udplite_table);
 }
 
 static struct inet6_protocol udplitev6_protocol = {
@@ -49,7 +49,7 @@ struct proto udplitev6_prot = {
 	.unhash		   = udp_lib_unhash,
 	.get_port	   = udp_v6_get_port,
 	.obj_size	   = sizeof(struct udp6_sock),
-	.h.udp_hash	   = udplite_hash,
+	.h.udp_table	   = &udplite_table,
 #ifdef CONFIG_COMPAT
 	.compat_setsockopt = compat_udpv6_setsockopt,
 	.compat_getsockopt = compat_udpv6_getsockopt,
@@ -95,7 +95,7 @@ void udplitev6_exit(void)
 static struct udp_seq_afinfo udplite6_seq_afinfo = {
 	.name		= "udplite6",
 	.family		= AF_INET6,
-	.hashtable	= udplite_hash,
+	.udp_table	= &udplite_table,
 	.seq_fops	= {
 		.owner	=	THIS_MODULE,
 	},
-- 
cgit v1.2.3


From 271b72c7fa82c2c7a795bc16896149933110672d Mon Sep 17 00:00:00 2001
From: Eric Dumazet <dada1@cosmosbay.com>
Date: Wed, 29 Oct 2008 02:11:14 -0700
Subject: udp: RCU handling for Unicast packets.

Goals are :

1) Optimizing handling of incoming Unicast UDP frames, so that no memory
 writes should happen in the fast path.

 Note: Multicasts and broadcasts still will need to take a lock,
 because doing a full lockless lookup in this case is difficult.

2) No expensive operations in the socket bind/unhash phases :
  - No expensive synchronize_rcu() calls.

  - No added rcu_head in socket structure, increasing memory needs,
  but more important, forcing us to use call_rcu() calls,
  that have the bad property of making sockets structure cold.
  (rcu grace period between socket freeing and its potential reuse
   make this socket being cold in CPU cache).
  David did a previous patch using call_rcu() and noticed a 20%
  impact on TCP connection rates.
  Quoting Cristopher Lameter :
   "Right. That results in cacheline cooldown. You'd want to recycle
    the object as they are cache hot on a per cpu basis. That is screwed
    up by the delayed regular rcu processing. We have seen multiple
    regressions due to cacheline cooldown.
    The only choice in cacheline hot sensitive areas is to deal with the
    complexity that comes with SLAB_DESTROY_BY_RCU or give up on RCU."

  - Because udp sockets are allocated from dedicated kmem_cache,
  use of SLAB_DESTROY_BY_RCU can help here.

Theory of operation :
---------------------

As the lookup is lockfree (using rcu_read_lock()/rcu_read_unlock()),
special attention must be taken by readers and writers.

Use of SLAB_DESTROY_BY_RCU is tricky too, because a socket can be freed,
reused, inserted in a different chain or in worst case in the same chain
while readers could do lookups in the same time.

In order to avoid loops, a reader must check each socket found in a chain
really belongs to the chain the reader was traversing. If it finds a
mismatch, lookup must start again at the begining. This *restart* loop
is the reason we had to use rdlock for the multicast case, because
we dont want to send same message several times to the same socket.

We use RCU only for fast path.
Thus, /proc/net/udp still takes spinlocks.

Signed-off-by: Eric Dumazet <dada1@cosmosbay.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/sock.h | 37 ++++++++++++++++++++++++++++++++++++-
 net/core/sock.c    |  3 ++-
 net/ipv4/udp.c     | 35 ++++++++++++++++++++++++++---------
 net/ipv4/udplite.c |  1 +
 net/ipv6/udp.c     | 31 ++++++++++++++++++++++++-------
 net/ipv6/udplite.c |  1 +
 6 files changed, 90 insertions(+), 18 deletions(-)

(limited to 'include')

diff --git a/include/net/sock.h b/include/net/sock.h
index d200dfbe1ef6..0bea25db5471 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -363,6 +363,27 @@ static __inline__ int sk_del_node_init(struct sock *sk)
 	return rc;
 }
 
+static __inline__ int __sk_del_node_init_rcu(struct sock *sk)
+{
+	if (sk_hashed(sk)) {
+		hlist_del_init_rcu(&sk->sk_node);
+		return 1;
+	}
+	return 0;
+}
+
+static __inline__ int sk_del_node_init_rcu(struct sock *sk)
+{
+	int rc = __sk_del_node_init_rcu(sk);
+
+	if (rc) {
+		/* paranoid for a while -acme */
+		WARN_ON(atomic_read(&sk->sk_refcnt) == 1);
+		__sock_put(sk);
+	}
+	return rc;
+}
+
 static __inline__ void __sk_add_node(struct sock *sk, struct hlist_head *list)
 {
 	hlist_add_head(&sk->sk_node, list);
@@ -374,6 +395,17 @@ static __inline__ void sk_add_node(struct sock *sk, struct hlist_head *list)
 	__sk_add_node(sk, list);
 }
 
+static __inline__ void __sk_add_node_rcu(struct sock *sk, struct hlist_head *list)
+{
+	hlist_add_head_rcu(&sk->sk_node, list);
+}
+
+static __inline__ void sk_add_node_rcu(struct sock *sk, struct hlist_head *list)
+{
+	sock_hold(sk);
+	__sk_add_node_rcu(sk, list);
+}
+
 static __inline__ void __sk_del_bind_node(struct sock *sk)
 {
 	__hlist_del(&sk->sk_bind_node);
@@ -387,6 +419,8 @@ static __inline__ void sk_add_bind_node(struct sock *sk,
 
 #define sk_for_each(__sk, node, list) \
 	hlist_for_each_entry(__sk, node, list, sk_node)
+#define sk_for_each_rcu(__sk, node, list) \
+	hlist_for_each_entry_rcu(__sk, node, list, sk_node)
 #define sk_for_each_from(__sk, node) \
 	if (__sk && ({ node = &(__sk)->sk_node; 1; })) \
 		hlist_for_each_entry_from(__sk, node, sk_node)
@@ -589,8 +623,9 @@ struct proto {
 	int			*sysctl_rmem;
 	int			max_header;
 
-	struct kmem_cache		*slab;
+	struct kmem_cache	*slab;
 	unsigned int		obj_size;
+	int			slab_flags;
 
 	atomic_t		*orphan_count;
 
diff --git a/net/core/sock.c b/net/core/sock.c
index 5e2a3132a8c9..ded1eb5d2fd4 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -2042,7 +2042,8 @@ int proto_register(struct proto *prot, int alloc_slab)
 
 	if (alloc_slab) {
 		prot->slab = kmem_cache_create(prot->name, prot->obj_size, 0,
-					       SLAB_HWCACHE_ALIGN, NULL);
+					SLAB_HWCACHE_ALIGN | prot->slab_flags,
+					NULL);
 
 		if (prot->slab == NULL) {
 			printk(KERN_CRIT "%s: Can't create sock SLAB cache!\n",
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index 2a6c491f97d7..0ea974bf7962 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -187,7 +187,7 @@ int udp_lib_get_port(struct sock *sk, unsigned short snum,
 	inet_sk(sk)->num = snum;
 	sk->sk_hash = snum;
 	if (sk_unhashed(sk)) {
-		sk_add_node(sk, &hslot->head);
+		sk_add_node_rcu(sk, &hslot->head);
 		sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1);
 	}
 	error = 0;
@@ -253,15 +253,24 @@ static struct sock *__udp4_lib_lookup(struct net *net, __be32 saddr,
 		__be16 sport, __be32 daddr, __be16 dport,
 		int dif, struct udp_table *udptable)
 {
-	struct sock *sk, *result = NULL;
+	struct sock *sk, *result;
 	struct hlist_node *node;
 	unsigned short hnum = ntohs(dport);
 	unsigned int hash = udp_hashfn(net, hnum);
 	struct udp_hslot *hslot = &udptable->hash[hash];
-	int score, badness = -1;
+	int score, badness;
 
-	spin_lock(&hslot->lock);
-	sk_for_each(sk, node, &hslot->head) {
+	rcu_read_lock();
+begin:
+	result = NULL;
+	badness = -1;
+	sk_for_each_rcu(sk, node, &hslot->head) {
+		/*
+		 * lockless reader, and SLAB_DESTROY_BY_RCU items:
+		 * We must check this item was not moved to another chain
+		 */
+		if (udp_hashfn(net, sk->sk_hash) != hash)
+			goto begin;
 		score = compute_score(sk, net, saddr, hnum, sport,
 				      daddr, dport, dif);
 		if (score > badness) {
@@ -269,9 +278,16 @@ static struct sock *__udp4_lib_lookup(struct net *net, __be32 saddr,
 			badness = score;
 		}
 	}
-	if (result)
-		sock_hold(result);
-	spin_unlock(&hslot->lock);
+	if (result) {
+		if (unlikely(!atomic_inc_not_zero(&result->sk_refcnt)))
+			result = NULL;
+		else if (unlikely(compute_score(result, net, saddr, hnum, sport,
+				  daddr, dport, dif) < badness)) {
+			sock_put(result);
+			goto begin;
+		}
+	}
+	rcu_read_unlock();
 	return result;
 }
 
@@ -953,7 +969,7 @@ void udp_lib_unhash(struct sock *sk)
 	struct udp_hslot *hslot = &udptable->hash[hash];
 
 	spin_lock(&hslot->lock);
-	if (sk_del_node_init(sk)) {
+	if (sk_del_node_init_rcu(sk)) {
 		inet_sk(sk)->num = 0;
 		sock_prot_inuse_add(sock_net(sk), sk->sk_prot, -1);
 	}
@@ -1517,6 +1533,7 @@ struct proto udp_prot = {
 	.sysctl_wmem	   = &sysctl_udp_wmem_min,
 	.sysctl_rmem	   = &sysctl_udp_rmem_min,
 	.obj_size	   = sizeof(struct udp_sock),
+	.slab_flags	   = SLAB_DESTROY_BY_RCU,
 	.h.udp_table	   = &udp_table,
 #ifdef CONFIG_COMPAT
 	.compat_setsockopt = compat_udp_setsockopt,
diff --git a/net/ipv4/udplite.c b/net/ipv4/udplite.c
index d8ea8e5f5ea3..c784891cb7e5 100644
--- a/net/ipv4/udplite.c
+++ b/net/ipv4/udplite.c
@@ -51,6 +51,7 @@ struct proto 	udplite_prot = {
 	.unhash		   = udp_lib_unhash,
 	.get_port	   = udp_v4_get_port,
 	.obj_size	   = sizeof(struct udp_sock),
+	.slab_flags	   = SLAB_DESTROY_BY_RCU,
 	.h.udp_table	   = &udplite_table,
 #ifdef CONFIG_COMPAT
 	.compat_setsockopt = compat_udp_setsockopt,
diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c
index ccee7244ca0f..1d9790e43dfc 100644
--- a/net/ipv6/udp.c
+++ b/net/ipv6/udp.c
@@ -97,24 +97,40 @@ static struct sock *__udp6_lib_lookup(struct net *net,
 				      struct in6_addr *daddr, __be16 dport,
 				      int dif, struct udp_table *udptable)
 {
-	struct sock *sk, *result = NULL;
+	struct sock *sk, *result;
 	struct hlist_node *node;
 	unsigned short hnum = ntohs(dport);
 	unsigned int hash = udp_hashfn(net, hnum);
 	struct udp_hslot *hslot = &udptable->hash[hash];
-	int score, badness = -1;
+	int score, badness;
 
-	spin_lock(&hslot->lock);
-	sk_for_each(sk, node, &hslot->head) {
+	rcu_read_lock();
+begin:
+	result = NULL;
+	badness = -1;
+	sk_for_each_rcu(sk, node, &hslot->head) {
+		/*
+		 * lockless reader, and SLAB_DESTROY_BY_RCU items:
+		 * We must check this item was not moved to another chain
+		 */
+		if (udp_hashfn(net, sk->sk_hash) != hash)
+			goto begin;
 		score = compute_score(sk, net, hnum, saddr, sport, daddr, dport, dif);
 		if (score > badness) {
 			result = sk;
 			badness = score;
 		}
 	}
-	if (result)
-		sock_hold(result);
-	spin_unlock(&hslot->lock);
+	if (result) {
+		if (unlikely(!atomic_inc_not_zero(&result->sk_refcnt)))
+			result = NULL;
+		else if (unlikely(compute_score(result, net, hnum, saddr, sport,
+					daddr, dport, dif) < badness)) {
+			sock_put(result);
+			goto begin;
+		}
+	}
+	rcu_read_unlock();
 	return result;
 }
 
@@ -1062,6 +1078,7 @@ struct proto udpv6_prot = {
 	.sysctl_wmem	   = &sysctl_udp_wmem_min,
 	.sysctl_rmem	   = &sysctl_udp_rmem_min,
 	.obj_size	   = sizeof(struct udp6_sock),
+	.slab_flags	   = SLAB_DESTROY_BY_RCU,
 	.h.udp_table	   = &udp_table,
 #ifdef CONFIG_COMPAT
 	.compat_setsockopt = compat_udpv6_setsockopt,
diff --git a/net/ipv6/udplite.c b/net/ipv6/udplite.c
index f1e892a99e05..ba162a824585 100644
--- a/net/ipv6/udplite.c
+++ b/net/ipv6/udplite.c
@@ -49,6 +49,7 @@ struct proto udplitev6_prot = {
 	.unhash		   = udp_lib_unhash,
 	.get_port	   = udp_v6_get_port,
 	.obj_size	   = sizeof(struct udp6_sock),
+	.slab_flags	   = SLAB_DESTROY_BY_RCU,
 	.h.udp_table	   = &udplite_table,
 #ifdef CONFIG_COMPAT
 	.compat_setsockopt = compat_udpv6_setsockopt,
-- 
cgit v1.2.3


From 9cbbb3ac628227ec5b65fc043539949db606cd17 Mon Sep 17 00:00:00 2001
From: Jaroslav Kysela <perex@perex.cz>
Date: Wed, 29 Oct 2008 13:41:35 +0100
Subject: ALSA: Release v1.0.18

Signed-off-by: Jaroslav Kysela <perex@perex.cz>
Signed-off-by: Takashi Iwai <tiwai@suse.de>
---
 include/sound/version.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/sound/version.h b/include/sound/version.h
index 4aafeda88634..eae315573563 100644
--- a/include/sound/version.h
+++ b/include/sound/version.h
@@ -1,3 +1,3 @@
 /* include/version.h */
-#define CONFIG_SND_VERSION "1.0.18rc3"
+#define CONFIG_SND_VERSION "1.0.18"
 #define CONFIG_SND_DATE ""
-- 
cgit v1.2.3


From 96631ed16c514cf8b28fab991a076985ce378c26 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <dada1@cosmosbay.com>
Date: Wed, 29 Oct 2008 11:19:58 -0700
Subject: udp: introduce sk_for_each_rcu_safenext()

Corey Minyard found a race added in commit 271b72c7fa82c2c7a795bc16896149933110672d
(udp: RCU handling for Unicast packets.)

 "If the socket is moved from one list to another list in-between the
 time the hash is calculated and the next field is accessed, and the
 socket has moved to the end of the new list, the traversal will not
 complete properly on the list it should have, since the socket will
 be on the end of the new list and there's not a way to tell it's on a
 new list and restart the list traversal.  I think that this can be
 solved by pre-fetching the "next" field (with proper barriers) before
 checking the hash."

This patch corrects this problem, introducing a new
sk_for_each_rcu_safenext() macro.

Signed-off-by: Eric Dumazet <dada1@cosmosbay.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/rculist.h | 17 +++++++++++++++++
 include/net/sock.h      |  4 ++--
 net/ipv4/udp.c          |  4 ++--
 net/ipv6/udp.c          |  4 ++--
 4 files changed, 23 insertions(+), 6 deletions(-)

(limited to 'include')

diff --git a/include/linux/rculist.h b/include/linux/rculist.h
index e649bd3f2c97..3ba2998b22ba 100644
--- a/include/linux/rculist.h
+++ b/include/linux/rculist.h
@@ -383,5 +383,22 @@ static inline void hlist_add_after_rcu(struct hlist_node *prev,
 		({ tpos = hlist_entry(pos, typeof(*tpos), member); 1; }); \
 		pos = rcu_dereference(pos->next))
 
+/**
+ * hlist_for_each_entry_rcu_safenext - iterate over rcu list of given type
+ * @tpos:	the type * to use as a loop cursor.
+ * @pos:	the &struct hlist_node to use as a loop cursor.
+ * @head:	the head for your list.
+ * @member:	the name of the hlist_node within the struct.
+ * @next:       the &struct hlist_node to use as a next cursor
+ *
+ * Special version of hlist_for_each_entry_rcu that make sure
+ * each next pointer is fetched before each iteration.
+ */
+#define hlist_for_each_entry_rcu_safenext(tpos, pos, head, member, next) \
+	for (pos = rcu_dereference((head)->first);			 \
+		pos && ({ next = pos->next; smp_rmb(); prefetch(next); 1; }) &&	\
+		({ tpos = hlist_entry(pos, typeof(*tpos), member); 1; }); \
+		pos = rcu_dereference(next))
+
 #endif	/* __KERNEL__ */
 #endif
diff --git a/include/net/sock.h b/include/net/sock.h
index 0bea25db5471..a4f6d3fc0470 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -419,8 +419,8 @@ static __inline__ void sk_add_bind_node(struct sock *sk,
 
 #define sk_for_each(__sk, node, list) \
 	hlist_for_each_entry(__sk, node, list, sk_node)
-#define sk_for_each_rcu(__sk, node, list) \
-	hlist_for_each_entry_rcu(__sk, node, list, sk_node)
+#define sk_for_each_rcu_safenext(__sk, node, list, next) \
+	hlist_for_each_entry_rcu_safenext(__sk, node, list, sk_node, next)
 #define sk_for_each_from(__sk, node) \
 	if (__sk && ({ node = &(__sk)->sk_node; 1; })) \
 		hlist_for_each_entry_from(__sk, node, sk_node)
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index ced820318f94..c3ecec8a9e1c 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -256,7 +256,7 @@ static struct sock *__udp4_lib_lookup(struct net *net, __be32 saddr,
 		int dif, struct udp_table *udptable)
 {
 	struct sock *sk, *result;
-	struct hlist_node *node;
+	struct hlist_node *node, *next;
 	unsigned short hnum = ntohs(dport);
 	unsigned int hash = udp_hashfn(net, hnum);
 	struct udp_hslot *hslot = &udptable->hash[hash];
@@ -266,7 +266,7 @@ static struct sock *__udp4_lib_lookup(struct net *net, __be32 saddr,
 begin:
 	result = NULL;
 	badness = -1;
-	sk_for_each_rcu(sk, node, &hslot->head) {
+	sk_for_each_rcu_safenext(sk, node, &hslot->head, next) {
 		/*
 		 * lockless reader, and SLAB_DESTROY_BY_RCU items:
 		 * We must check this item was not moved to another chain
diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c
index 1d9790e43dfc..32d914db6c4f 100644
--- a/net/ipv6/udp.c
+++ b/net/ipv6/udp.c
@@ -98,7 +98,7 @@ static struct sock *__udp6_lib_lookup(struct net *net,
 				      int dif, struct udp_table *udptable)
 {
 	struct sock *sk, *result;
-	struct hlist_node *node;
+	struct hlist_node *node, *next;
 	unsigned short hnum = ntohs(dport);
 	unsigned int hash = udp_hashfn(net, hnum);
 	struct udp_hslot *hslot = &udptable->hash[hash];
@@ -108,7 +108,7 @@ static struct sock *__udp6_lib_lookup(struct net *net,
 begin:
 	result = NULL;
 	badness = -1;
-	sk_for_each_rcu(sk, node, &hslot->head) {
+	sk_for_each_rcu_safenext(sk, node, &hslot->head, next) {
 		/*
 		 * lockless reader, and SLAB_DESTROY_BY_RCU items:
 		 * We must check this item was not moved to another chain
-- 
cgit v1.2.3


From 5b095d98928fdb9e3b75be20a54b7a6cbf6ca9ad Mon Sep 17 00:00:00 2001
From: Harvey Harrison <harvey.harrison@gmail.com>
Date: Wed, 29 Oct 2008 12:52:50 -0700
Subject: net: replace %p6 with %pI6

Signed-off-by: Harvey Harrison <harvey.harrison@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/firmware/iscsi_ibft.c                  |  2 +-
 drivers/infiniband/core/sysfs.c                |  2 +-
 drivers/infiniband/hw/mthca/mthca_mcg.c        |  4 ++--
 drivers/infiniband/ulp/ipoib/ipoib_cm.c        |  4 ++--
 drivers/infiniband/ulp/ipoib/ipoib_main.c      | 12 +++++------
 drivers/infiniband/ulp/ipoib/ipoib_multicast.c | 30 +++++++++++++-------------
 drivers/infiniband/ulp/srp/ib_srp.c            |  6 +++---
 drivers/net/mlx4/mcg.c                         |  4 ++--
 drivers/scsi/iscsi_tcp.c                       |  2 +-
 fs/lockd/host.c                                |  2 +-
 fs/nfs/super.c                                 |  2 +-
 include/linux/sunrpc/svc_xprt.h                |  2 +-
 include/net/ip_vs.h                            |  2 +-
 include/net/netfilter/nf_conntrack_tuple.h     |  2 +-
 include/net/sctp/sctp.h                        |  2 +-
 net/bridge/netfilter/ebt_log.c                 |  2 +-
 net/ipv4/tcp_input.c                           |  2 +-
 net/ipv4/tcp_timer.c                           |  2 +-
 net/ipv6/addrlabel.c                           | 10 ++++-----
 net/ipv6/ah6.c                                 |  2 +-
 net/ipv6/esp6.c                                |  2 +-
 net/ipv6/exthdrs.c                             |  2 +-
 net/ipv6/icmp.c                                |  2 +-
 net/ipv6/ip6mr.c                               |  2 +-
 net/ipv6/ipcomp6.c                             |  2 +-
 net/ipv6/ndisc.c                               |  2 +-
 net/ipv6/netfilter/ip6t_LOG.c                  |  2 +-
 net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c |  2 +-
 net/ipv6/tcp_ipv6.c                            |  2 +-
 net/netfilter/ipvs/ip_vs_conn.c                |  4 ++--
 net/netfilter/ipvs/ip_vs_core.c                |  4 ++--
 net/netfilter/ipvs/ip_vs_ctl.c                 |  4 ++--
 net/netfilter/ipvs/ip_vs_proto.c               |  6 +++---
 net/netfilter/ipvs/ip_vs_proto_ah_esp.c        |  2 +-
 net/netfilter/ipvs/ip_vs_xmit.c                |  8 +++----
 net/netfilter/nf_conntrack_ftp.c               |  2 +-
 net/netfilter/nf_conntrack_h323_main.c         |  4 ++--
 net/netfilter/xt_hashlimit.c                   |  2 +-
 net/netfilter/xt_recent.c                      |  2 +-
 net/netlabel/netlabel_addrlist.c               |  2 +-
 net/sctp/ipv6.c                                | 18 ++++++++--------
 net/sctp/sm_statefuns.c                        |  2 +-
 net/sunrpc/clnt.c                              |  2 +-
 net/sunrpc/rpcb_clnt.c                         |  2 +-
 net/sunrpc/svcauth_unix.c                      |  4 ++--
 net/sunrpc/xprtsock.c                          |  8 +++----
 net/xfrm/xfrm_policy.c                         |  4 ++--
 net/xfrm/xfrm_state.c                          |  4 ++--
 security/selinux/avc.c                         |  2 +-
 49 files changed, 100 insertions(+), 100 deletions(-)

(limited to 'include')

diff --git a/drivers/firmware/iscsi_ibft.c b/drivers/firmware/iscsi_ibft.c
index 0a6472097a81..acb82aff8808 100644
--- a/drivers/firmware/iscsi_ibft.c
+++ b/drivers/firmware/iscsi_ibft.c
@@ -290,7 +290,7 @@ static ssize_t sprintf_ipaddr(char *buf, u8 *ip)
 		/*
 		 * IPv6
 		 */
-		str += sprintf(str, "%p6", ip);
+		str += sprintf(str, "%pI6", ip);
 	}
 	str += sprintf(str, "\n");
 	return str - buf;
diff --git a/drivers/infiniband/core/sysfs.c b/drivers/infiniband/core/sysfs.c
index e985193d631c..4f4d1bb9f069 100644
--- a/drivers/infiniband/core/sysfs.c
+++ b/drivers/infiniband/core/sysfs.c
@@ -262,7 +262,7 @@ static ssize_t show_port_gid(struct ib_port *p, struct port_attribute *attr,
 	if (ret)
 		return ret;
 
-	return sprintf(buf, "%p6\n", gid.raw);
+	return sprintf(buf, "%pI6\n", gid.raw);
 }
 
 static ssize_t show_port_pkey(struct ib_port *p, struct port_attribute *attr,
diff --git a/drivers/infiniband/hw/mthca/mthca_mcg.c b/drivers/infiniband/hw/mthca/mthca_mcg.c
index 693bed0b2d1c..d4c81053e439 100644
--- a/drivers/infiniband/hw/mthca/mthca_mcg.c
+++ b/drivers/infiniband/hw/mthca/mthca_mcg.c
@@ -87,7 +87,7 @@ static int find_mgm(struct mthca_dev *dev,
 	}
 
 	if (0)
-		mthca_dbg(dev, "Hash for %p6 is %04x\n", gid, *hash);
+		mthca_dbg(dev, "Hash for %pI6 is %04x\n", gid, *hash);
 
 	*index = *hash;
 	*prev  = -1;
@@ -254,7 +254,7 @@ int mthca_multicast_detach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid)
 		goto out;
 
 	if (index == -1) {
-		mthca_err(dev, "MGID %p6 not found\n", gid->raw);
+		mthca_err(dev, "MGID %pI6 not found\n", gid->raw);
 		err = -EINVAL;
 		goto out;
 	}
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_cm.c b/drivers/infiniband/ulp/ipoib/ipoib_cm.c
index d98d87bfe365..47d588ba2a7f 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_cm.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_cm.c
@@ -1128,7 +1128,7 @@ static int ipoib_cm_tx_init(struct ipoib_cm_tx *p, u32 qpn,
 		goto err_send_cm;
 	}
 
-	ipoib_dbg(priv, "Request connection 0x%x for gid %p6 qpn 0x%x\n",
+	ipoib_dbg(priv, "Request connection 0x%x for gid %pI6 qpn 0x%x\n",
 		  p->qp->qp_num, pathrec->dgid.raw, qpn);
 
 	return 0;
@@ -1276,7 +1276,7 @@ void ipoib_cm_destroy_tx(struct ipoib_cm_tx *tx)
 	if (test_and_clear_bit(IPOIB_FLAG_INITIALIZED, &tx->flags)) {
 		list_move(&tx->list, &priv->cm.reap_list);
 		queue_work(ipoib_workqueue, &priv->cm.reap_task);
-		ipoib_dbg(priv, "Reap connection for gid %p6\n",
+		ipoib_dbg(priv, "Reap connection for gid %pI6\n",
 			  tx->neigh->dgid.raw);
 		tx->neigh = NULL;
 	}
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_main.c b/drivers/infiniband/ulp/ipoib/ipoib_main.c
index e7f4f94c3e92..b3a671895bdc 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_main.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_main.c
@@ -359,7 +359,7 @@ void ipoib_mark_paths_invalid(struct net_device *dev)
 	spin_lock_irq(&priv->lock);
 
 	list_for_each_entry_safe(path, tp, &priv->path_list, list) {
-		ipoib_dbg(priv, "mark path LID 0x%04x GID %p6 invalid\n",
+		ipoib_dbg(priv, "mark path LID 0x%04x GID %pI6 invalid\n",
 			be16_to_cpu(path->pathrec.dlid),
 			path->pathrec.dgid.raw);
 		path->valid =  0;
@@ -413,10 +413,10 @@ static void path_rec_completion(int status,
 	unsigned long flags;
 
 	if (!status)
-		ipoib_dbg(priv, "PathRec LID 0x%04x for GID %p6\n",
+		ipoib_dbg(priv, "PathRec LID 0x%04x for GID %pI6\n",
 			  be16_to_cpu(pathrec->dlid), pathrec->dgid.raw);
 	else
-		ipoib_dbg(priv, "PathRec status %d for GID %p6\n",
+		ipoib_dbg(priv, "PathRec status %d for GID %pI6\n",
 			  status, path->pathrec.dgid.raw);
 
 	skb_queue_head_init(&skqueue);
@@ -527,7 +527,7 @@ static int path_rec_start(struct net_device *dev,
 {
 	struct ipoib_dev_priv *priv = netdev_priv(dev);
 
-	ipoib_dbg(priv, "Start path record lookup for %p6\n",
+	ipoib_dbg(priv, "Start path record lookup for %pI6\n",
 		  path->pathrec.dgid.raw);
 
 	init_completion(&path->done);
@@ -764,7 +764,7 @@ static int ipoib_start_xmit(struct sk_buff *skb, struct net_device *dev)
 
 			if ((be16_to_cpup((__be16 *) skb->data) != ETH_P_ARP) &&
 			    (be16_to_cpup((__be16 *) skb->data) != ETH_P_RARP)) {
-				ipoib_warn(priv, "Unicast, no %s: type %04x, QPN %06x %p6\n",
+				ipoib_warn(priv, "Unicast, no %s: type %04x, QPN %06x %pI6\n",
 					   skb->dst ? "neigh" : "dst",
 					   be16_to_cpup((__be16 *) skb->data),
 					   IPOIB_QPN(phdr->hwaddr),
@@ -844,7 +844,7 @@ static void ipoib_neigh_cleanup(struct neighbour *n)
 	else
 		return;
 	ipoib_dbg(priv,
-		  "neigh_cleanup for %06x %p6\n",
+		  "neigh_cleanup for %06x %pI6\n",
 		  IPOIB_QPN(n->ha),
 		  n->ha + 4);
 
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_multicast.c b/drivers/infiniband/ulp/ipoib/ipoib_multicast.c
index 0de79cf4c07c..a2eb3b9789eb 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_multicast.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_multicast.c
@@ -71,7 +71,7 @@ static void ipoib_mcast_free(struct ipoib_mcast *mcast)
 	struct ipoib_neigh *neigh, *tmp;
 	int tx_dropped = 0;
 
-	ipoib_dbg_mcast(netdev_priv(dev), "deleting multicast group %p6\n",
+	ipoib_dbg_mcast(netdev_priv(dev), "deleting multicast group %pI6\n",
 			mcast->mcmember.mgid.raw);
 
 	spin_lock_irq(&priv->lock);
@@ -204,7 +204,7 @@ static int ipoib_mcast_join_finish(struct ipoib_mcast *mcast,
 
 	if (!test_bit(IPOIB_MCAST_FLAG_SENDONLY, &mcast->flags)) {
 		if (test_and_set_bit(IPOIB_MCAST_FLAG_ATTACHED, &mcast->flags)) {
-			ipoib_warn(priv, "multicast group %p6 already attached\n",
+			ipoib_warn(priv, "multicast group %pI6 already attached\n",
 				   mcast->mcmember.mgid.raw);
 
 			return 0;
@@ -213,7 +213,7 @@ static int ipoib_mcast_join_finish(struct ipoib_mcast *mcast,
 		ret = ipoib_mcast_attach(dev, be16_to_cpu(mcast->mcmember.mlid),
 					 &mcast->mcmember.mgid, set_qkey);
 		if (ret < 0) {
-			ipoib_warn(priv, "couldn't attach QP to multicast group %p6\n",
+			ipoib_warn(priv, "couldn't attach QP to multicast group %pI6\n",
 				   mcast->mcmember.mgid.raw);
 
 			clear_bit(IPOIB_MCAST_FLAG_ATTACHED, &mcast->flags);
@@ -245,7 +245,7 @@ static int ipoib_mcast_join_finish(struct ipoib_mcast *mcast,
 			mcast->ah = ah;
 			spin_unlock_irq(&priv->lock);
 
-			ipoib_dbg_mcast(priv, "MGID %p6 AV %p, LID 0x%04x, SL %d\n",
+			ipoib_dbg_mcast(priv, "MGID %pI6 AV %p, LID 0x%04x, SL %d\n",
 					mcast->mcmember.mgid.raw,
 					mcast->ah->ah,
 					be16_to_cpu(mcast->mcmember.mlid),
@@ -291,7 +291,7 @@ ipoib_mcast_sendonly_join_complete(int status,
 
 	if (status) {
 		if (mcast->logcount++ < 20)
-			ipoib_dbg_mcast(netdev_priv(dev), "multicast join failed for %p6, status %d\n",
+			ipoib_dbg_mcast(netdev_priv(dev), "multicast join failed for %pI6, status %d\n",
 					mcast->mcmember.mgid.raw, status);
 
 		/* Flush out any queued packets */
@@ -351,7 +351,7 @@ static int ipoib_mcast_sendonly_join(struct ipoib_mcast *mcast)
 		ipoib_warn(priv, "ib_sa_join_multicast failed (ret = %d)\n",
 			   ret);
 	} else {
-		ipoib_dbg_mcast(priv, "no multicast record for %p6, starting join\n",
+		ipoib_dbg_mcast(priv, "no multicast record for %pI6, starting join\n",
 				mcast->mcmember.mgid.raw);
 	}
 
@@ -380,7 +380,7 @@ static int ipoib_mcast_join_complete(int status,
 	struct net_device *dev = mcast->dev;
 	struct ipoib_dev_priv *priv = netdev_priv(dev);
 
-	ipoib_dbg_mcast(priv, "join completion for %p6 (status %d)\n",
+	ipoib_dbg_mcast(priv, "join completion for %pI6 (status %d)\n",
 			mcast->mcmember.mgid.raw, status);
 
 	/* We trap for port events ourselves. */
@@ -410,10 +410,10 @@ static int ipoib_mcast_join_complete(int status,
 
 	if (mcast->logcount++ < 20) {
 		if (status == -ETIMEDOUT) {
-			ipoib_dbg_mcast(priv, "multicast join failed for %p6, status %d\n",
+			ipoib_dbg_mcast(priv, "multicast join failed for %pI6, status %d\n",
 					mcast->mcmember.mgid.raw, status);
 		} else {
-			ipoib_warn(priv, "multicast join failed for %p6, status %d\n",
+			ipoib_warn(priv, "multicast join failed for %pI6, status %d\n",
 				   mcast->mcmember.mgid.raw, status);
 		}
 	}
@@ -446,7 +446,7 @@ static void ipoib_mcast_join(struct net_device *dev, struct ipoib_mcast *mcast,
 	ib_sa_comp_mask comp_mask;
 	int ret = 0;
 
-	ipoib_dbg_mcast(priv, "joining MGID %p6\n", mcast->mcmember.mgid.raw);
+	ipoib_dbg_mcast(priv, "joining MGID %pI6\n", mcast->mcmember.mgid.raw);
 
 	rec.mgid     = mcast->mcmember.mgid;
 	rec.port_gid = priv->local_gid;
@@ -631,7 +631,7 @@ static int ipoib_mcast_leave(struct net_device *dev, struct ipoib_mcast *mcast)
 		ib_sa_free_multicast(mcast->mc);
 
 	if (test_and_clear_bit(IPOIB_MCAST_FLAG_ATTACHED, &mcast->flags)) {
-		ipoib_dbg_mcast(priv, "leaving MGID %p6\n",
+		ipoib_dbg_mcast(priv, "leaving MGID %pI6\n",
 				mcast->mcmember.mgid.raw);
 
 		/* Remove ourselves from the multicast group */
@@ -663,7 +663,7 @@ void ipoib_mcast_send(struct net_device *dev, void *mgid, struct sk_buff *skb)
 	mcast = __ipoib_mcast_find(dev, mgid);
 	if (!mcast) {
 		/* Let's create a new send only group now */
-		ipoib_dbg_mcast(priv, "setting up send only multicast group for %p6\n",
+		ipoib_dbg_mcast(priv, "setting up send only multicast group for %pI6\n",
 				mgid);
 
 		mcast = ipoib_mcast_alloc(dev, 0);
@@ -797,13 +797,13 @@ void ipoib_mcast_restart_task(struct work_struct *work)
 			/* ignore group which is directly joined by userspace */
 			if (test_bit(IPOIB_FLAG_UMCAST, &priv->flags) &&
 			    !ib_sa_get_mcmember_rec(priv->ca, priv->port, &mgid, &rec)) {
-				ipoib_dbg_mcast(priv, "ignoring multicast entry for mgid %p6\n",
+				ipoib_dbg_mcast(priv, "ignoring multicast entry for mgid %pI6\n",
 						mgid.raw);
 				continue;
 			}
 
 			/* Not found or send-only group, let's add a new entry */
-			ipoib_dbg_mcast(priv, "adding multicast entry for mgid %p6\n",
+			ipoib_dbg_mcast(priv, "adding multicast entry for mgid %pI6\n",
 					mgid.raw);
 
 			nmcast = ipoib_mcast_alloc(dev, 0);
@@ -837,7 +837,7 @@ void ipoib_mcast_restart_task(struct work_struct *work)
 	list_for_each_entry_safe(mcast, tmcast, &priv->multicast_list, list) {
 		if (!test_bit(IPOIB_MCAST_FLAG_FOUND, &mcast->flags) &&
 		    !test_bit(IPOIB_MCAST_FLAG_SENDONLY, &mcast->flags)) {
-			ipoib_dbg_mcast(priv, "deleting multicast group %p6\n",
+			ipoib_dbg_mcast(priv, "deleting multicast group %pI6\n",
 					mcast->mcmember.mgid.raw);
 
 			rb_erase(&mcast->rb_node, &priv->multicast_tree);
diff --git a/drivers/infiniband/ulp/srp/ib_srp.c b/drivers/infiniband/ulp/srp/ib_srp.c
index bc825310c6db..7c13db885bf6 100644
--- a/drivers/infiniband/ulp/srp/ib_srp.c
+++ b/drivers/infiniband/ulp/srp/ib_srp.c
@@ -1514,7 +1514,7 @@ static ssize_t show_dgid(struct device *dev, struct device_attribute *attr,
 	    target->state == SRP_TARGET_REMOVED)
 		return -ENODEV;
 
-	return sprintf(buf, "%p6\n", target->path.dgid.raw);
+	return sprintf(buf, "%pI6\n", target->path.dgid.raw);
 }
 
 static ssize_t show_orig_dgid(struct device *dev,
@@ -1526,7 +1526,7 @@ static ssize_t show_orig_dgid(struct device *dev,
 	    target->state == SRP_TARGET_REMOVED)
 		return -ENODEV;
 
-	return sprintf(buf, "%p6\n", target->orig_dgid);
+	return sprintf(buf, "%pI6\n", target->orig_dgid);
 }
 
 static ssize_t show_zero_req_lim(struct device *dev,
@@ -1867,7 +1867,7 @@ static ssize_t srp_create_target(struct device *dev,
 
 	shost_printk(KERN_DEBUG, target->scsi_host, PFX
 		     "new target: id_ext %016llx ioc_guid %016llx pkey %04x "
-		     "service_id %016llx dgid %p6\n",
+		     "service_id %016llx dgid %pI6\n",
 	       (unsigned long long) be64_to_cpu(target->id_ext),
 	       (unsigned long long) be64_to_cpu(target->ioc_guid),
 	       be16_to_cpu(target->path.pkey),
diff --git a/drivers/net/mlx4/mcg.c b/drivers/net/mlx4/mcg.c
index 6f79e84a5c9a..b1622062b12d 100644
--- a/drivers/net/mlx4/mcg.c
+++ b/drivers/net/mlx4/mcg.c
@@ -118,7 +118,7 @@ static int find_mgm(struct mlx4_dev *dev,
 		return err;
 
 	if (0)
-		mlx4_dbg(dev, "Hash for %p6 is %04x\n", gid, *hash);
+		mlx4_dbg(dev, "Hash for %pI6 is %04x\n", gid, *hash);
 
 	*index = *hash;
 	*prev  = -1;
@@ -267,7 +267,7 @@ int mlx4_multicast_detach(struct mlx4_dev *dev, struct mlx4_qp *qp, u8 gid[16])
 		goto out;
 
 	if (index == -1) {
-		mlx4_err(dev, "MGID %p6 not found\n", gid);
+		mlx4_err(dev, "MGID %pI6 not found\n", gid);
 		err = -EINVAL;
 		goto out;
 	}
diff --git a/drivers/scsi/iscsi_tcp.c b/drivers/scsi/iscsi_tcp.c
index ef929aef7c12..24d09028a27f 100644
--- a/drivers/scsi/iscsi_tcp.c
+++ b/drivers/scsi/iscsi_tcp.c
@@ -1608,7 +1608,7 @@ static int iscsi_tcp_get_addr(struct iscsi_conn *conn, struct socket *sock,
 	case AF_INET6:
 		sin6 = (struct sockaddr_in6 *)addr;
 		spin_lock_bh(&conn->session->lock);
-		sprintf(buf, "%p6", &sin6->sin6_addr);
+		sprintf(buf, "%pI6", &sin6->sin6_addr);
 		*port = be16_to_cpu(sin6->sin6_port);
 		spin_unlock_bh(&conn->session->lock);
 		break;
diff --git a/fs/lockd/host.c b/fs/lockd/host.c
index 344e6b475e05..c8ab7d70390d 100644
--- a/fs/lockd/host.c
+++ b/fs/lockd/host.c
@@ -122,7 +122,7 @@ static void nlm_display_address(const struct sockaddr *sap,
 			snprintf(buf, len, NIPQUAD_FMT,
 				 NIPQUAD(sin6->sin6_addr.s6_addr32[3]));
 		else
-			snprintf(buf, len, "%p6", &sin6->sin6_addr);
+			snprintf(buf, len, "%pI6", &sin6->sin6_addr);
 		break;
 	default:
 		snprintf(buf, len, "unsupported address family");
diff --git a/fs/nfs/super.c b/fs/nfs/super.c
index 5fe77219df78..eb391d8d70ba 100644
--- a/fs/nfs/super.c
+++ b/fs/nfs/super.c
@@ -468,7 +468,7 @@ static void nfs_show_mountd_options(struct seq_file *m, struct nfs_server *nfss,
 	}
 	case AF_INET6: {
 		struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)sap;
-		seq_printf(m, ",mountaddr=%p6", &sin6->sin6_addr);
+		seq_printf(m, ",mountaddr=%pI6", &sin6->sin6_addr);
 		break;
 	}
 	default:
diff --git a/include/linux/sunrpc/svc_xprt.h b/include/linux/sunrpc/svc_xprt.h
index 42e01c93c7ea..51cb75ea42d5 100644
--- a/include/linux/sunrpc/svc_xprt.h
+++ b/include/linux/sunrpc/svc_xprt.h
@@ -145,7 +145,7 @@ static inline char *__svc_print_addr(struct sockaddr *addr,
 		break;
 
 	case AF_INET6:
-		snprintf(buf, len, "%p6, port=%u",
+		snprintf(buf, len, "%pI6, port=%u",
 			 &((struct sockaddr_in6 *)addr)->sin6_addr,
 			ntohs(((struct sockaddr_in6 *) addr)->sin6_port));
 		break;
diff --git a/include/net/ip_vs.h b/include/net/ip_vs.h
index 6a6692067092..af48cada561e 100644
--- a/include/net/ip_vs.h
+++ b/include/net/ip_vs.h
@@ -87,7 +87,7 @@ static inline const char *ip_vs_dbg_addr(int af, char *buf, size_t buf_len,
 	int len;
 #ifdef CONFIG_IP_VS_IPV6
 	if (af == AF_INET6)
-		len = snprintf(&buf[*idx], buf_len - *idx, "[%p6]",
+		len = snprintf(&buf[*idx], buf_len - *idx, "[%pI6]",
 			       &addr->in6) + 1;
 	else
 #endif
diff --git a/include/net/netfilter/nf_conntrack_tuple.h b/include/net/netfilter/nf_conntrack_tuple.h
index 303efaf68d08..42f1fc96f3ec 100644
--- a/include/net/netfilter/nf_conntrack_tuple.h
+++ b/include/net/netfilter/nf_conntrack_tuple.h
@@ -122,7 +122,7 @@ static inline void nf_ct_dump_tuple_ip(const struct nf_conntrack_tuple *t)
 static inline void nf_ct_dump_tuple_ipv6(const struct nf_conntrack_tuple *t)
 {
 #ifdef DEBUG
-	printk("tuple %p: %u %p6 %hu -> %p6 %hu\n",
+	printk("tuple %p: %u %pI6 %hu -> %pI6 %hu\n",
 	       t, t->dst.protonum,
 	       t->src.u3.all, ntohs(t->src.u.all),
 	       t->dst.u3.all, ntohs(t->dst.u.all));
diff --git a/include/net/sctp/sctp.h b/include/net/sctp/sctp.h
index a84c3976e1b0..e71b0f7ce88e 100644
--- a/include/net/sctp/sctp.h
+++ b/include/net/sctp/sctp.h
@@ -285,7 +285,7 @@ extern int sctp_debug_flag;
 	if (sctp_debug_flag) { \
 		if (saddr->sa.sa_family == AF_INET6) { \
 			printk(KERN_DEBUG \
-			       lead "%p6" trail, \
+			       lead "%pI6" trail, \
 			       leadparm, \
 			       &saddr->v6.sin6_addr, \
 			       otherparms); \
diff --git a/net/bridge/netfilter/ebt_log.c b/net/bridge/netfilter/ebt_log.c
index 3654f3ebdb8f..5e7cff3542f2 100644
--- a/net/bridge/netfilter/ebt_log.c
+++ b/net/bridge/netfilter/ebt_log.c
@@ -133,7 +133,7 @@ ebt_log_packet(u_int8_t pf, unsigned int hooknum,
 			printk(" INCOMPLETE IPv6 header");
 			goto out;
 		}
-		printk(" IPv6 SRC=%p6 IPv6 DST=%p6, IPv6 priority=0x%01X, Next Header=%d",
+		printk(" IPv6 SRC=%pI6 IPv6 DST=%pI6, IPv6 priority=0x%01X, Next Header=%d",
 		       &ih->saddr, &ih->daddr, ih->priority, ih->nexthdr);
 		nexthdr = ih->nexthdr;
 		offset_ph = ipv6_skip_exthdr(skb, sizeof(_iph), &nexthdr);
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 191c06bb0f67..04909e4b3c4c 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -2346,7 +2346,7 @@ static void DBGUNDO(struct sock *sk, const char *msg)
 #if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
 	else if (sk->sk_family == AF_INET6) {
 		struct ipv6_pinfo *np = inet6_sk(sk);
-		printk(KERN_DEBUG "Undo %s %p6/%u c%u l%u ss%u/%u p%u\n",
+		printk(KERN_DEBUG "Undo %s %pI6/%u c%u l%u ss%u/%u p%u\n",
 		       msg,
 		       &np->daddr, ntohs(inet->dport),
 		       tp->snd_cwnd, tcp_left_out(tp),
diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c
index 4e6ee5205237..979c9d604eb0 100644
--- a/net/ipv4/tcp_timer.c
+++ b/net/ipv4/tcp_timer.c
@@ -306,7 +306,7 @@ static void tcp_retransmit_timer(struct sock *sk)
 #if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
 		else if (sk->sk_family == AF_INET6) {
 			struct ipv6_pinfo *np = inet6_sk(sk);
-			LIMIT_NETDEBUG(KERN_DEBUG "TCP: Treason uncloaked! Peer %p6:%u/%u shrinks window %u:%u. Repaired.\n",
+			LIMIT_NETDEBUG(KERN_DEBUG "TCP: Treason uncloaked! Peer %pI6:%u/%u shrinks window %u:%u. Repaired.\n",
 			       &np->daddr, ntohs(inet->dport),
 			       inet->num, tp->snd_una, tp->snd_nxt);
 		}
diff --git a/net/ipv6/addrlabel.c b/net/ipv6/addrlabel.c
index d28036659d27..6ff73c4c126a 100644
--- a/net/ipv6/addrlabel.c
+++ b/net/ipv6/addrlabel.c
@@ -186,7 +186,7 @@ u32 ipv6_addr_label(struct net *net,
 	label = p ? p->label : IPV6_ADDR_LABEL_DEFAULT;
 	rcu_read_unlock();
 
-	ADDRLABEL(KERN_DEBUG "%s(addr=%p6, type=%d, ifindex=%d) => %08x\n",
+	ADDRLABEL(KERN_DEBUG "%s(addr=%pI6, type=%d, ifindex=%d) => %08x\n",
 		  __func__, addr, type, ifindex, label);
 
 	return label;
@@ -201,7 +201,7 @@ static struct ip6addrlbl_entry *ip6addrlbl_alloc(struct net *net,
 	struct ip6addrlbl_entry *newp;
 	int addrtype;
 
-	ADDRLABEL(KERN_DEBUG "%s(prefix=%p6, prefixlen=%d, ifindex=%d, label=%u)\n",
+	ADDRLABEL(KERN_DEBUG "%s(prefix=%pI6, prefixlen=%d, ifindex=%d, label=%u)\n",
 		  __func__, prefix, prefixlen, ifindex, (unsigned int)label);
 
 	addrtype = ipv6_addr_type(prefix) & (IPV6_ADDR_MAPPED | IPV6_ADDR_COMPATv4 | IPV6_ADDR_LOOPBACK);
@@ -289,7 +289,7 @@ static int ip6addrlbl_add(struct net *net,
 	struct ip6addrlbl_entry *newp;
 	int ret = 0;
 
-	ADDRLABEL(KERN_DEBUG "%s(prefix=%p6, prefixlen=%d, ifindex=%d, label=%u, replace=%d)\n",
+	ADDRLABEL(KERN_DEBUG "%s(prefix=%pI6, prefixlen=%d, ifindex=%d, label=%u, replace=%d)\n",
 		  __func__, prefix, prefixlen, ifindex, (unsigned int)label,
 		  replace);
 
@@ -313,7 +313,7 @@ static int __ip6addrlbl_del(struct net *net,
 	struct hlist_node *pos, *n;
 	int ret = -ESRCH;
 
-	ADDRLABEL(KERN_DEBUG "%s(prefix=%p6, prefixlen=%d, ifindex=%d)\n",
+	ADDRLABEL(KERN_DEBUG "%s(prefix=%pI6, prefixlen=%d, ifindex=%d)\n",
 		  __func__, prefix, prefixlen, ifindex);
 
 	hlist_for_each_entry_safe(p, pos, n, &ip6addrlbl_table.head, list) {
@@ -337,7 +337,7 @@ static int ip6addrlbl_del(struct net *net,
 	struct in6_addr prefix_buf;
 	int ret;
 
-	ADDRLABEL(KERN_DEBUG "%s(prefix=%p6, prefixlen=%d, ifindex=%d)\n",
+	ADDRLABEL(KERN_DEBUG "%s(prefix=%pI6, prefixlen=%d, ifindex=%d)\n",
 		  __func__, prefix, prefixlen, ifindex);
 
 	ipv6_addr_prefix(&prefix_buf, prefix, prefixlen);
diff --git a/net/ipv6/ah6.c b/net/ipv6/ah6.c
index 9bc43f2527ce..7a8a01369e5c 100644
--- a/net/ipv6/ah6.c
+++ b/net/ipv6/ah6.c
@@ -419,7 +419,7 @@ static void ah6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
 	if (!x)
 		return;
 
-	NETDEBUG(KERN_DEBUG "pmtu discovery on SA AH/%08x/%p6\n",
+	NETDEBUG(KERN_DEBUG "pmtu discovery on SA AH/%08x/%pI6\n",
 		 ntohl(ah->spi), &iph->daddr);
 
 	xfrm_state_put(x);
diff --git a/net/ipv6/esp6.c b/net/ipv6/esp6.c
index ec4be188c341..c02a6308defe 100644
--- a/net/ipv6/esp6.c
+++ b/net/ipv6/esp6.c
@@ -367,7 +367,7 @@ static void esp6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
 	x = xfrm_state_lookup((xfrm_address_t *)&iph->daddr, esph->spi, IPPROTO_ESP, AF_INET6);
 	if (!x)
 		return;
-	printk(KERN_DEBUG "pmtu discovery on SA ESP/%08x/%p6\n",
+	printk(KERN_DEBUG "pmtu discovery on SA ESP/%08x/%pI6\n",
 			ntohl(esph->spi), &iph->daddr);
 	xfrm_state_put(x);
 }
diff --git a/net/ipv6/exthdrs.c b/net/ipv6/exthdrs.c
index a89051468359..1c7f400a3cfe 100644
--- a/net/ipv6/exthdrs.c
+++ b/net/ipv6/exthdrs.c
@@ -219,7 +219,7 @@ static int ipv6_dest_hao(struct sk_buff *skb, int optoff)
 
 	if (!(ipv6_addr_type(&hao->addr) & IPV6_ADDR_UNICAST)) {
 		LIMIT_NETDEBUG(
-			KERN_DEBUG "hao is not an unicast addr: %p6\n", &hao->addr);
+			KERN_DEBUG "hao is not an unicast addr: %pI6\n", &hao->addr);
 		goto discard;
 	}
 
diff --git a/net/ipv6/icmp.c b/net/ipv6/icmp.c
index b3fa38e40dc4..3c2821f9b529 100644
--- a/net/ipv6/icmp.c
+++ b/net/ipv6/icmp.c
@@ -681,7 +681,7 @@ static int icmpv6_rcv(struct sk_buff *skb)
 		skb->csum = ~csum_unfold(csum_ipv6_magic(saddr, daddr, skb->len,
 					     IPPROTO_ICMPV6, 0));
 		if (__skb_checksum_complete(skb)) {
-			LIMIT_NETDEBUG(KERN_DEBUG "ICMPv6 checksum failed [%p6 > %p6]\n",
+			LIMIT_NETDEBUG(KERN_DEBUG "ICMPv6 checksum failed [%pI6 > %pI6]\n",
 				       saddr, daddr);
 			goto discard_it;
 		}
diff --git a/net/ipv6/ip6mr.c b/net/ipv6/ip6mr.c
index 798e6a29dd83..c491fb98a5e3 100644
--- a/net/ipv6/ip6mr.c
+++ b/net/ipv6/ip6mr.c
@@ -297,7 +297,7 @@ static int ipmr_mfc_seq_show(struct seq_file *seq, void *v)
 		const struct mfc6_cache *mfc = v;
 		const struct ipmr_mfc_iter *it = seq->private;
 
-		seq_printf(seq, "%p6 %p6 %-3d %8ld %8ld %8ld",
+		seq_printf(seq, "%pI6 %pI6 %-3d %8ld %8ld %8ld",
 			   &mfc->mf6c_mcastgrp, &mfc->mf6c_origin,
 			   mfc->mf6c_parent,
 			   mfc->mfc_un.res.pkt,
diff --git a/net/ipv6/ipcomp6.c b/net/ipv6/ipcomp6.c
index 9566d8f73140..d4576a9c154f 100644
--- a/net/ipv6/ipcomp6.c
+++ b/net/ipv6/ipcomp6.c
@@ -67,7 +67,7 @@ static void ipcomp6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
 	if (!x)
 		return;
 
-	printk(KERN_DEBUG "pmtu discovery on SA IPCOMP/%08x/%p6\n",
+	printk(KERN_DEBUG "pmtu discovery on SA IPCOMP/%08x/%pI6\n",
 			spi, &iph->daddr);
 	xfrm_state_put(x);
 }
diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c
index 191bb0722a7c..2a6752dae09d 100644
--- a/net/ipv6/ndisc.c
+++ b/net/ipv6/ndisc.c
@@ -647,7 +647,7 @@ static void ndisc_solicit(struct neighbour *neigh, struct sk_buff *skb)
 
 	if ((probes -= neigh->parms->ucast_probes) < 0) {
 		if (!(neigh->nud_state & NUD_VALID)) {
-			ND_PRINTK1(KERN_DEBUG "%s(): trying to ucast probe in NUD_INVALID: %p6\n",
+			ND_PRINTK1(KERN_DEBUG "%s(): trying to ucast probe in NUD_INVALID: %pI6\n",
 				   __func__, target);
 		}
 		ndisc_send_ns(dev, neigh, target, target, saddr);
diff --git a/net/ipv6/netfilter/ip6t_LOG.c b/net/ipv6/netfilter/ip6t_LOG.c
index a61ce3010007..02885e8bb69b 100644
--- a/net/ipv6/netfilter/ip6t_LOG.c
+++ b/net/ipv6/netfilter/ip6t_LOG.c
@@ -61,7 +61,7 @@ static void dump_packet(const struct nf_loginfo *info,
 	}
 
 	/* Max length: 88 "SRC=0000.0000.0000.0000.0000.0000.0000.0000 DST=0000.0000.0000.0000.0000.0000.0000.0000 " */
-	printk("SRC=%p6 DST=%p6 ", &ih->saddr, &ih->daddr);
+	printk("SRC=%pI6 DST=%pI6 ", &ih->saddr, &ih->daddr);
 
 	/* Max length: 44 "LEN=65535 TC=255 HOPLIMIT=255 FLOWLBL=FFFFF " */
 	printk("LEN=%Zu TC=%u HOPLIMIT=%u FLOWLBL=%u ",
diff --git a/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c b/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c
index b165a273c6c0..727b9530448a 100644
--- a/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c
+++ b/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c
@@ -56,7 +56,7 @@ static bool ipv6_invert_tuple(struct nf_conntrack_tuple *tuple,
 static int ipv6_print_tuple(struct seq_file *s,
 			    const struct nf_conntrack_tuple *tuple)
 {
-	return seq_printf(s, "src=%p6 dst=%p6 ",
+	return seq_printf(s, "src=%pI6 dst=%pI6 ",
 			  tuple->src.u3.ip6, tuple->dst.u3.ip6);
 }
 
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index 483550cfdf33..984276463a8d 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -872,7 +872,7 @@ static int tcp_v6_inbound_md5_hash (struct sock *sk, struct sk_buff *skb)
 
 	if (genhash || memcmp(hash_location, newhash, 16) != 0) {
 		if (net_ratelimit()) {
-			printk(KERN_INFO "MD5 Hash %s for (%p6, %u)->(%p6, %u)\n",
+			printk(KERN_INFO "MD5 Hash %s for (%pI6, %u)->(%pI6, %u)\n",
 			       genhash ? "failed" : "mismatch",
 			       &ip6h->saddr, ntohs(th->source),
 			       &ip6h->daddr, ntohs(th->dest));
diff --git a/net/netfilter/ipvs/ip_vs_conn.c b/net/netfilter/ipvs/ip_vs_conn.c
index 89bf65be6f2c..60aba45023ff 100644
--- a/net/netfilter/ipvs/ip_vs_conn.c
+++ b/net/netfilter/ipvs/ip_vs_conn.c
@@ -820,7 +820,7 @@ static int ip_vs_conn_seq_show(struct seq_file *seq, void *v)
 
 #ifdef CONFIG_IP_VS_IPV6
 		if (cp->af == AF_INET6)
-			seq_printf(seq, "%-3s %p6 %04X %p6 %04X %p6 %04X %-11s %7lu\n",
+			seq_printf(seq, "%-3s %pI6 %04X %pI6 %04X %pI6 %04X %-11s %7lu\n",
 				ip_vs_proto_name(cp->protocol),
 				&cp->caddr.in6, ntohs(cp->cport),
 				&cp->vaddr.in6, ntohs(cp->vport),
@@ -881,7 +881,7 @@ static int ip_vs_conn_sync_seq_show(struct seq_file *seq, void *v)
 
 #ifdef CONFIG_IP_VS_IPV6
 		if (cp->af == AF_INET6)
-			seq_printf(seq, "%-3s %p6 %04X %p6 %04X %p6 %04X %-11s %-6s %7lu\n",
+			seq_printf(seq, "%-3s %pI6 %04X %pI6 %04X %pI6 %04X %-11s %-6s %7lu\n",
 				ip_vs_proto_name(cp->protocol),
 				&cp->caddr.in6, ntohs(cp->cport),
 				&cp->vaddr.in6, ntohs(cp->vport),
diff --git a/net/netfilter/ipvs/ip_vs_core.c b/net/netfilter/ipvs/ip_vs_core.c
index 9400587a01e7..c3c68443b5b1 100644
--- a/net/netfilter/ipvs/ip_vs_core.c
+++ b/net/netfilter/ipvs/ip_vs_core.c
@@ -805,7 +805,7 @@ static int ip_vs_out_icmp_v6(struct sk_buff *skb, int *related)
 	if (ic == NULL)
 		return NF_DROP;
 
-	IP_VS_DBG(12, "Outgoing ICMPv6 (%d,%d) %p6->%p6\n",
+	IP_VS_DBG(12, "Outgoing ICMPv6 (%d,%d) %pI6->%pI6\n",
 		  ic->icmp6_type, ntohs(icmpv6_id(ic)),
 		  &iph->saddr, &iph->daddr);
 
@@ -1175,7 +1175,7 @@ ip_vs_in_icmp_v6(struct sk_buff *skb, int *related, unsigned int hooknum)
 	if (ic == NULL)
 		return NF_DROP;
 
-	IP_VS_DBG(12, "Incoming ICMPv6 (%d,%d) %p6->%p6\n",
+	IP_VS_DBG(12, "Incoming ICMPv6 (%d,%d) %pI6->%pI6\n",
 		  ic->icmp6_type, ntohs(icmpv6_id(ic)),
 		  &iph->saddr, &iph->daddr);
 
diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c
index 28c47c0d5142..76db27ec9633 100644
--- a/net/netfilter/ipvs/ip_vs_ctl.c
+++ b/net/netfilter/ipvs/ip_vs_ctl.c
@@ -1867,7 +1867,7 @@ static int ip_vs_info_seq_show(struct seq_file *seq, void *v)
 		if (iter->table == ip_vs_svc_table) {
 #ifdef CONFIG_IP_VS_IPV6
 			if (svc->af == AF_INET6)
-				seq_printf(seq, "%s  [%p6]:%04X %s ",
+				seq_printf(seq, "%s  [%pI6]:%04X %s ",
 					   ip_vs_proto_name(svc->protocol),
 					   &svc->addr.in6,
 					   ntohs(svc->port),
@@ -1895,7 +1895,7 @@ static int ip_vs_info_seq_show(struct seq_file *seq, void *v)
 #ifdef CONFIG_IP_VS_IPV6
 			if (dest->af == AF_INET6)
 				seq_printf(seq,
-					   "  -> [%p6]:%04X"
+					   "  -> [%pI6]:%04X"
 					   "      %-7s %-6d %-10d %-10d\n",
 					   &dest->addr.in6,
 					   ntohs(dest->port),
diff --git a/net/netfilter/ipvs/ip_vs_proto.c b/net/netfilter/ipvs/ip_vs_proto.c
index d7ce4f1839c9..54cd67fbfe74 100644
--- a/net/netfilter/ipvs/ip_vs_proto.c
+++ b/net/netfilter/ipvs/ip_vs_proto.c
@@ -203,7 +203,7 @@ ip_vs_tcpudp_debug_packet_v6(struct ip_vs_protocol *pp,
 	if (ih == NULL)
 		sprintf(buf, "%s TRUNCATED", pp->name);
 	else if (ih->nexthdr == IPPROTO_FRAGMENT)
-		sprintf(buf, "%s %p6->%p6 frag",
+		sprintf(buf, "%s %pI6->%pI6 frag",
 			pp->name, &ih->saddr, &ih->daddr);
 	else {
 		__be16 _ports[2], *pptr;
@@ -211,10 +211,10 @@ ip_vs_tcpudp_debug_packet_v6(struct ip_vs_protocol *pp,
 		pptr = skb_header_pointer(skb, offset + sizeof(struct ipv6hdr),
 					  sizeof(_ports), _ports);
 		if (pptr == NULL)
-			sprintf(buf, "%s TRUNCATED %p6->%p6",
+			sprintf(buf, "%s TRUNCATED %pI6->%pI6",
 				pp->name, &ih->saddr, &ih->daddr);
 		else
-			sprintf(buf, "%s %p6:%u->%p6:%u",
+			sprintf(buf, "%s %pI6:%u->%pI6:%u",
 				pp->name,
 				&ih->saddr, ntohs(pptr[0]),
 				&ih->daddr, ntohs(pptr[1]));
diff --git a/net/netfilter/ipvs/ip_vs_proto_ah_esp.c b/net/netfilter/ipvs/ip_vs_proto_ah_esp.c
index 59f2d11b683e..6ede88812044 100644
--- a/net/netfilter/ipvs/ip_vs_proto_ah_esp.c
+++ b/net/netfilter/ipvs/ip_vs_proto_ah_esp.c
@@ -154,7 +154,7 @@ ah_esp_debug_packet_v6(struct ip_vs_protocol *pp, const struct sk_buff *skb,
 	if (ih == NULL)
 		sprintf(buf, "%s TRUNCATED", pp->name);
 	else
-		sprintf(buf, "%s %p6->%p6",
+		sprintf(buf, "%s %pI6->%pI6",
 			pp->name, &ih->saddr, &ih->daddr);
 
 	printk(KERN_DEBUG "IPVS: %s: %s\n", msg, buf);
diff --git a/net/netfilter/ipvs/ip_vs_xmit.c b/net/netfilter/ipvs/ip_vs_xmit.c
index be34c335cabe..fc342dda950a 100644
--- a/net/netfilter/ipvs/ip_vs_xmit.c
+++ b/net/netfilter/ipvs/ip_vs_xmit.c
@@ -141,12 +141,12 @@ __ip_vs_get_out_rt_v6(struct ip_vs_conn *cp)
 								 NULL, &fl);
 			if (!rt) {
 				spin_unlock(&dest->dst_lock);
-				IP_VS_DBG_RL("ip6_route_output error, dest: %p6\n",
+				IP_VS_DBG_RL("ip6_route_output error, dest: %pI6\n",
 					     &dest->addr.in6);
 				return NULL;
 			}
 			__ip_vs_dst_set(dest, 0, dst_clone(&rt->u.dst));
-			IP_VS_DBG(10, "new dst %p6, refcnt=%d\n",
+			IP_VS_DBG(10, "new dst %pI6, refcnt=%d\n",
 				  &dest->addr.in6,
 				  atomic_read(&rt->u.dst.__refcnt));
 		}
@@ -166,7 +166,7 @@ __ip_vs_get_out_rt_v6(struct ip_vs_conn *cp)
 
 		rt = (struct rt6_info *)ip6_route_output(&init_net, NULL, &fl);
 		if (!rt) {
-			IP_VS_DBG_RL("ip6_route_output error, dest: %p6\n",
+			IP_VS_DBG_RL("ip6_route_output error, dest: %pI6\n",
 				     &cp->daddr.in6);
 			return NULL;
 		}
@@ -300,7 +300,7 @@ ip_vs_bypass_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
 
 	rt = (struct rt6_info *)ip6_route_output(&init_net, NULL, &fl);
 	if (!rt) {
-		IP_VS_DBG_RL("ip_vs_bypass_xmit_v6(): ip6_route_output error, dest: %p6\n",
+		IP_VS_DBG_RL("ip_vs_bypass_xmit_v6(): ip6_route_output error, dest: %pI6\n",
 			     &iph->daddr);
 		goto tx_error_icmp;
 	}
diff --git a/net/netfilter/nf_conntrack_ftp.c b/net/netfilter/nf_conntrack_ftp.c
index 05bf82d345ce..8cab6d595909 100644
--- a/net/netfilter/nf_conntrack_ftp.c
+++ b/net/netfilter/nf_conntrack_ftp.c
@@ -467,7 +467,7 @@ static int help(struct sk_buff *skb,
 				 NIPQUAD(cmd.u3.ip),
 				 NIPQUAD(ct->tuplehash[dir].tuple.src.u3.ip));
 		} else {
-			pr_debug("conntrack_ftp: NOT RECORDING: %p6 != %p6\n",
+			pr_debug("conntrack_ftp: NOT RECORDING: %pI6 != %pI6\n",
 				 cmd.u3.ip6,
 				 ct->tuplehash[dir].tuple.src.u3.ip6);
 		}
diff --git a/net/netfilter/nf_conntrack_h323_main.c b/net/netfilter/nf_conntrack_h323_main.c
index 29e49b1c80b3..99bc803d1dd1 100644
--- a/net/netfilter/nf_conntrack_h323_main.c
+++ b/net/netfilter/nf_conntrack_h323_main.c
@@ -850,7 +850,7 @@ static int process_setup(struct sk_buff *skb, struct nf_conn *ct,
 	    get_h225_addr(ct, *data, &setup->destCallSignalAddress,
 			  &addr, &port) &&
 	    memcmp(&addr, &ct->tuplehash[!dir].tuple.src.u3, sizeof(addr))) {
-		pr_debug("nf_ct_q931: set destCallSignalAddress %p6:%hu->%p6:%hu\n",
+		pr_debug("nf_ct_q931: set destCallSignalAddress %pI6:%hu->%pI6:%hu\n",
 			 &addr, ntohs(port), &ct->tuplehash[!dir].tuple.src.u3,
 			 ntohs(ct->tuplehash[!dir].tuple.src.u.tcp.port));
 		ret = set_h225_addr(skb, data, dataoff,
@@ -866,7 +866,7 @@ static int process_setup(struct sk_buff *skb, struct nf_conn *ct,
 	    get_h225_addr(ct, *data, &setup->sourceCallSignalAddress,
 			  &addr, &port) &&
 	    memcmp(&addr, &ct->tuplehash[!dir].tuple.dst.u3, sizeof(addr))) {
-		pr_debug("nf_ct_q931: set sourceCallSignalAddress %p6:%hu->%p6:%hu\n",
+		pr_debug("nf_ct_q931: set sourceCallSignalAddress %pI6:%hu->%pI6:%hu\n",
 			 &addr, ntohs(port), &ct->tuplehash[!dir].tuple.dst.u3,
 			 ntohs(ct->tuplehash[!dir].tuple.dst.u.tcp.port));
 		ret = set_h225_addr(skb, data, dataoff,
diff --git a/net/netfilter/xt_hashlimit.c b/net/netfilter/xt_hashlimit.c
index f04c6ed43674..6379717f9044 100644
--- a/net/netfilter/xt_hashlimit.c
+++ b/net/netfilter/xt_hashlimit.c
@@ -904,7 +904,7 @@ static int dl_seq_real_show(struct dsthash_ent *ent, u_int8_t family,
 				 ent->rateinfo.cost);
 #if defined(CONFIG_IP6_NF_IPTABLES) || defined(CONFIG_IP6_NF_IPTABLES_MODULE)
 	case NFPROTO_IPV6:
-		return seq_printf(s, "%ld %p6:%u->%p6:%u %u %u %u\n",
+		return seq_printf(s, "%ld %pI6:%u->%pI6:%u %u %u %u\n",
 				 (long)(ent->expires - jiffies)/HZ,
 				 &ent->dst.ip6.src,
 				 ntohs(ent->dst.src_port),
diff --git a/net/netfilter/xt_recent.c b/net/netfilter/xt_recent.c
index a377ea333e16..b785727a5bf7 100644
--- a/net/netfilter/xt_recent.c
+++ b/net/netfilter/xt_recent.c
@@ -426,7 +426,7 @@ static int recent_seq_show(struct seq_file *seq, void *v)
 			   "oldest_pkt: %u", NIPQUAD(e->addr.ip), e->ttl,
 			   e->stamps[i], e->index);
 	else
-		seq_printf(seq, "src=%p6 ttl: %u last_seen: %lu oldest_pkt: %u",
+		seq_printf(seq, "src=%pI6 ttl: %u last_seen: %lu oldest_pkt: %u",
 			   &e->addr.in6, e->ttl, e->stamps[i], e->index);
 	for (i = 0; i < e->nstamps; i++)
 		seq_printf(seq, "%s %lu", i ? "," : "", e->stamps[i]);
diff --git a/net/netlabel/netlabel_addrlist.c b/net/netlabel/netlabel_addrlist.c
index 614c95ec39df..8b1c58b0820c 100644
--- a/net/netlabel/netlabel_addrlist.c
+++ b/net/netlabel/netlabel_addrlist.c
@@ -370,7 +370,7 @@ void netlbl_af6list_audit_addr(struct audit_buffer *audit_buf,
 
 	if (dev != NULL)
 		audit_log_format(audit_buf, " netif=%s", dev);
-	audit_log_format(audit_buf, " %s=%p6", dir, addr);
+	audit_log_format(audit_buf, " %s=%pI6", dir, addr);
 	if (ntohl(mask->s6_addr32[3]) != 0xffffffff) {
 		u32 mask_len = 0;
 		u32 mask_val;
diff --git a/net/sctp/ipv6.c b/net/sctp/ipv6.c
index e82668bd2b50..ceaa4aa066ea 100644
--- a/net/sctp/ipv6.c
+++ b/net/sctp/ipv6.c
@@ -223,7 +223,7 @@ static int sctp_v6_xmit(struct sk_buff *skb, struct sctp_transport *transport)
 		ipv6_addr_copy(&fl.fl6_dst, rt0->addr);
 	}
 
-	SCTP_DEBUG_PRINTK("%s: skb:%p, len:%d, src:%p6 dst:%p6\n",
+	SCTP_DEBUG_PRINTK("%s: skb:%p, len:%d, src:%pI6 dst:%pI6\n",
 			  __func__, skb, skb->len,
 			  &fl.fl6_src, &fl.fl6_dst);
 
@@ -251,18 +251,18 @@ static struct dst_entry *sctp_v6_get_dst(struct sctp_association *asoc,
 		fl.oif = daddr->v6.sin6_scope_id;
 
 
-	SCTP_DEBUG_PRINTK("%s: DST=%p6 ", __func__, &fl.fl6_dst);
+	SCTP_DEBUG_PRINTK("%s: DST=%pI6 ", __func__, &fl.fl6_dst);
 
 	if (saddr) {
 		ipv6_addr_copy(&fl.fl6_src, &saddr->v6.sin6_addr);
-		SCTP_DEBUG_PRINTK("SRC=%p6 - ", &fl.fl6_src);
+		SCTP_DEBUG_PRINTK("SRC=%pI6 - ", &fl.fl6_src);
 	}
 
 	dst = ip6_route_output(&init_net, NULL, &fl);
 	if (!dst->error) {
 		struct rt6_info *rt;
 		rt = (struct rt6_info *)dst;
-		SCTP_DEBUG_PRINTK("rt6_dst:%p6 rt6_src:%p6\n",
+		SCTP_DEBUG_PRINTK("rt6_dst:%pI6 rt6_src:%pI6\n",
 			&rt->rt6i_dst.addr, &rt->rt6i_src.addr);
 		return dst;
 	}
@@ -309,7 +309,7 @@ static void sctp_v6_get_saddr(struct sctp_sock *sk,
 	__u8 matchlen = 0;
 	__u8 bmatchlen;
 
-	SCTP_DEBUG_PRINTK("%s: asoc:%p dst:%p daddr:%p6 ",
+	SCTP_DEBUG_PRINTK("%s: asoc:%p dst:%p daddr:%pI6 ",
 			  __func__, asoc, dst, &daddr->v6.sin6_addr);
 
 	if (!asoc) {
@@ -318,7 +318,7 @@ static void sctp_v6_get_saddr(struct sctp_sock *sk,
 				   &daddr->v6.sin6_addr,
 				   inet6_sk(&sk->inet.sk)->srcprefs,
 				   &saddr->v6.sin6_addr);
-		SCTP_DEBUG_PRINTK("saddr from ipv6_get_saddr: %p6\n",
+		SCTP_DEBUG_PRINTK("saddr from ipv6_get_saddr: %pI6\n",
 				  &saddr->v6.sin6_addr);
 		return;
 	}
@@ -347,10 +347,10 @@ static void sctp_v6_get_saddr(struct sctp_sock *sk,
 
 	if (baddr) {
 		memcpy(saddr, baddr, sizeof(union sctp_addr));
-		SCTP_DEBUG_PRINTK("saddr: %p6\n", &saddr->v6.sin6_addr);
+		SCTP_DEBUG_PRINTK("saddr: %pI6\n", &saddr->v6.sin6_addr);
 	} else {
 		printk(KERN_ERR "%s: asoc:%p Could not find a valid source "
-		       "address for the dest:%p6\n",
+		       "address for the dest:%pI6\n",
 		       __func__, asoc, &daddr->v6.sin6_addr);
 	}
 
@@ -720,7 +720,7 @@ static int sctp_v6_is_ce(const struct sk_buff *skb)
 /* Dump the v6 addr to the seq file. */
 static void sctp_v6_seq_dump_addr(struct seq_file *seq, union sctp_addr *addr)
 {
-	seq_printf(seq, "%p6 ", &addr->v6.sin6_addr);
+	seq_printf(seq, "%pI6 ", &addr->v6.sin6_addr);
 }
 
 static void sctp_v6_ecn_capable(struct sock *sk)
diff --git a/net/sctp/sm_statefuns.c b/net/sctp/sm_statefuns.c
index 9f370964e733..d07b484b873a 100644
--- a/net/sctp/sm_statefuns.c
+++ b/net/sctp/sm_statefuns.c
@@ -1123,7 +1123,7 @@ sctp_disposition_t sctp_sf_backbeat_8_3(const struct sctp_endpoint *ep,
 		if (from_addr.sa.sa_family == AF_INET6) {
 			if (net_ratelimit())
 				printk(KERN_WARNING
-				    "%s association %p could not find address %p6\n",
+				    "%s association %p could not find address %pI6\n",
 				    __func__,
 				    asoc,
 				    &from_addr.v6.sin6_addr);
diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c
index 26f61fd11eab..8f067497c212 100644
--- a/net/sunrpc/clnt.c
+++ b/net/sunrpc/clnt.c
@@ -278,7 +278,7 @@ struct rpc_clnt *rpc_create(struct rpc_create_args *args)
 		case AF_INET6: {
 			struct sockaddr_in6 *sin =
 					(struct sockaddr_in6 *)args->address;
-			snprintf(servername, sizeof(servername), "%p6",
+			snprintf(servername, sizeof(servername), "%pI6",
 				 &sin->sin6_addr);
 			break;
 		}
diff --git a/net/sunrpc/rpcb_clnt.c b/net/sunrpc/rpcb_clnt.c
index 968ec1f66bc3..4c8adadc214d 100644
--- a/net/sunrpc/rpcb_clnt.c
+++ b/net/sunrpc/rpcb_clnt.c
@@ -305,7 +305,7 @@ static int rpcb_register_netid6(struct sockaddr_in6 *address_to_register,
 		snprintf(buf, sizeof(buf), "::.%u.%u",
 				port >> 8, port & 0xff);
 	else
-		snprintf(buf, sizeof(buf), "%p6.%u.%u",
+		snprintf(buf, sizeof(buf), "%pI6.%u.%u",
 			 &address_to_register->sin6_addr,
 			 port >> 8, port & 0xff);
 	map->r_addr = buf;
diff --git a/net/sunrpc/svcauth_unix.c b/net/sunrpc/svcauth_unix.c
index eb640c1a1bc9..16f714a247bc 100644
--- a/net/sunrpc/svcauth_unix.c
+++ b/net/sunrpc/svcauth_unix.c
@@ -168,7 +168,7 @@ static void ip_map_request(struct cache_detail *cd,
 				ntohl(im->m_addr.s6_addr32[3]) >>  8 & 0xff,
 				ntohl(im->m_addr.s6_addr32[3]) >>  0 & 0xff);
 	} else {
-		snprintf(text_addr, 40, "%p6", &im->m_addr);
+		snprintf(text_addr, 40, "%pI6", &im->m_addr);
 	}
 	qword_add(bpp, blen, im->m_class);
 	qword_add(bpp, blen, text_addr);
@@ -286,7 +286,7 @@ static int ip_map_show(struct seq_file *m,
 			ntohl(addr.s6_addr32[3]) >>  0 & 0xff,
 			dom);
 	} else {
-		seq_printf(m, "%s %p6 %s\n", im->m_class, &addr, dom);
+		seq_printf(m, "%s %pI6 %s\n", im->m_class, &addr, dom);
 	}
 	return 0;
 }
diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c
index 3c9aff584579..f9ce3c9949d5 100644
--- a/net/sunrpc/xprtsock.c
+++ b/net/sunrpc/xprtsock.c
@@ -341,7 +341,7 @@ static void xs_format_ipv6_peer_addresses(struct rpc_xprt *xprt,
 
 	buf = kzalloc(40, GFP_KERNEL);
 	if (buf) {
-		snprintf(buf, 40, "%p6",&addr->sin6_addr);
+		snprintf(buf, 40, "%pI6",&addr->sin6_addr);
 	}
 	xprt->address_strings[RPC_DISPLAY_ADDR] = buf;
 
@@ -356,7 +356,7 @@ static void xs_format_ipv6_peer_addresses(struct rpc_xprt *xprt,
 
 	buf = kzalloc(64, GFP_KERNEL);
 	if (buf) {
-		snprintf(buf, 64, "addr=%p6 port=%u proto=%s",
+		snprintf(buf, 64, "addr=%pI6 port=%u proto=%s",
 				&addr->sin6_addr,
 				ntohs(addr->sin6_port),
 				protocol);
@@ -378,7 +378,7 @@ static void xs_format_ipv6_peer_addresses(struct rpc_xprt *xprt,
 
 	buf = kzalloc(50, GFP_KERNEL);
 	if (buf) {
-		snprintf(buf, 50, "%p6.%u.%u",
+		snprintf(buf, 50, "%pI6.%u.%u",
 			 &addr->sin6_addr,
 			 ntohs(addr->sin6_port) >> 8,
 			 ntohs(addr->sin6_port) & 0xff);
@@ -1407,7 +1407,7 @@ static int xs_bind6(struct sock_xprt *transport, struct socket *sock)
 		if (port > last)
 			nloop++;
 	} while (err == -EADDRINUSE && nloop != 2);
-	dprintk("RPC:       xs_bind6 %p6:%u: %s (%d)\n",
+	dprintk("RPC:       xs_bind6 %pI6:%u: %s (%d)\n",
 		&myaddr.sin6_addr, port, err ? "failed" : "ok", err);
 	return err;
 }
diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c
index f052b069f983..80b13eea30e7 100644
--- a/net/xfrm/xfrm_policy.c
+++ b/net/xfrm/xfrm_policy.c
@@ -2467,11 +2467,11 @@ static void xfrm_audit_common_policyinfo(struct xfrm_policy *xp,
 					 sel->prefixlen_d);
 		break;
 	case AF_INET6:
-		audit_log_format(audit_buf, " src=%p6", sel->saddr.a6);
+		audit_log_format(audit_buf, " src=%pI6", sel->saddr.a6);
 		if (sel->prefixlen_s != 128)
 			audit_log_format(audit_buf, " src_prefixlen=%d",
 					 sel->prefixlen_s);
-		audit_log_format(audit_buf, " dst=%p6", sel->daddr.a6);
+		audit_log_format(audit_buf, " dst=%pI6", sel->daddr.a6);
 		if (sel->prefixlen_d != 128)
 			audit_log_format(audit_buf, " dst_prefixlen=%d",
 					 sel->prefixlen_d);
diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c
index 7944861fb9bf..304eca4ac970 100644
--- a/net/xfrm/xfrm_state.c
+++ b/net/xfrm/xfrm_state.c
@@ -2115,7 +2115,7 @@ static void xfrm_audit_helper_sainfo(struct xfrm_state *x,
 				 NIPQUAD(x->id.daddr.a4));
 		break;
 	case AF_INET6:
-		audit_log_format(audit_buf, " src=%p6 dst=%p6",
+		audit_log_format(audit_buf, " src=%pI6 dst=%pI6",
 				 x->props.saddr.a6, x->id.daddr.a6);
 		break;
 	}
@@ -2140,7 +2140,7 @@ static void xfrm_audit_helper_pktinfo(struct sk_buff *skb, u16 family,
 	case AF_INET6:
 		iph6 = ipv6_hdr(skb);
 		audit_log_format(audit_buf,
-				 " src=%p6 dst=%p6 flowlbl=0x%x%02x%02x",
+				 " src=%pI6 dst=%pI6 flowlbl=0x%x%02x%02x",
 				 &iph6->saddr,&iph6->daddr,
 				 iph6->flow_lbl[0] & 0x0f,
 				 iph6->flow_lbl[1],
diff --git a/security/selinux/avc.c b/security/selinux/avc.c
index c91008f438a2..ed6af12cdf43 100644
--- a/security/selinux/avc.c
+++ b/security/selinux/avc.c
@@ -495,7 +495,7 @@ static inline void avc_print_ipv6_addr(struct audit_buffer *ab,
 				       char *name1, char *name2)
 {
 	if (!ipv6_addr_any(addr))
-		audit_log_format(ab, " %s=%p6", name1, addr);
+		audit_log_format(ab, " %s=%pI6", name1, addr);
 	if (port)
 		audit_log_format(ab, " %s=%d", name2, ntohs(port));
 }
-- 
cgit v1.2.3


From 2cb1599f9b2ecdd7a9e59feeee647eb258966839 Mon Sep 17 00:00:00 2001
From: David Chinner <david@fromorbit.com>
Date: Thu, 30 Oct 2008 17:32:23 +1100
Subject: Inode: Allow external initialisers

To allow XFS to combine the XFS and linux inodes into a single
structure, we need to drive inode lookup from the XFS inode cache,
not the generic inode cache. This means that we need initialise a
struct inode from a context outside alloc_inode() as it is no longer
used by XFS.

Factor and export the struct inode initialisation code from
alloc_inode() to inode_init_always() as a counterpart to
inode_init_once().  i.e. we have to call this init function for each
inode instantiation (always), as opposed inode_init_once() which is
only called on slab object instantiation (once).

Signed-off-by: Dave Chinner <david@fromorbit.com>
Signed-off-by: Christoph Hellwig <hch@infradead.org>
Signed-off-by: Lachlan McIlroy <lachlan@sgi.com>
---
 fs/inode.c         | 140 +++++++++++++++++++++++++++++------------------------
 include/linux/fs.h |   1 +
 2 files changed, 79 insertions(+), 62 deletions(-)

(limited to 'include')

diff --git a/fs/inode.c b/fs/inode.c
index 0487ddba1397..e7ee99907d60 100644
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -108,84 +108,100 @@ static void wake_up_inode(struct inode *inode)
 	wake_up_bit(&inode->i_state, __I_LOCK);
 }
 
-static struct inode *alloc_inode(struct super_block *sb)
+/**
+ * inode_init_always - perform inode structure intialisation
+ * @sb		- superblock inode belongs to.
+ * @inode	- inode to initialise
+ *
+ * These are initializations that need to be done on every inode
+ * allocation as the fields are not initialised by slab allocation.
+ */
+struct inode *inode_init_always(struct super_block *sb, struct inode *inode)
 {
 	static const struct address_space_operations empty_aops;
 	static struct inode_operations empty_iops;
 	static const struct file_operations empty_fops;
-	struct inode *inode;
-
-	if (sb->s_op->alloc_inode)
-		inode = sb->s_op->alloc_inode(sb);
-	else
-		inode = (struct inode *) kmem_cache_alloc(inode_cachep, GFP_KERNEL);
 
-	if (inode) {
-		struct address_space * const mapping = &inode->i_data;
-
-		inode->i_sb = sb;
-		inode->i_blkbits = sb->s_blocksize_bits;
-		inode->i_flags = 0;
-		atomic_set(&inode->i_count, 1);
-		inode->i_op = &empty_iops;
-		inode->i_fop = &empty_fops;
-		inode->i_nlink = 1;
-		atomic_set(&inode->i_writecount, 0);
-		inode->i_size = 0;
-		inode->i_blocks = 0;
-		inode->i_bytes = 0;
-		inode->i_generation = 0;
+	struct address_space * const mapping = &inode->i_data;
+
+	inode->i_sb = sb;
+	inode->i_blkbits = sb->s_blocksize_bits;
+	inode->i_flags = 0;
+	atomic_set(&inode->i_count, 1);
+	inode->i_op = &empty_iops;
+	inode->i_fop = &empty_fops;
+	inode->i_nlink = 1;
+	atomic_set(&inode->i_writecount, 0);
+	inode->i_size = 0;
+	inode->i_blocks = 0;
+	inode->i_bytes = 0;
+	inode->i_generation = 0;
 #ifdef CONFIG_QUOTA
-		memset(&inode->i_dquot, 0, sizeof(inode->i_dquot));
+	memset(&inode->i_dquot, 0, sizeof(inode->i_dquot));
 #endif
-		inode->i_pipe = NULL;
-		inode->i_bdev = NULL;
-		inode->i_cdev = NULL;
-		inode->i_rdev = 0;
-		inode->dirtied_when = 0;
-		if (security_inode_alloc(inode)) {
-			if (inode->i_sb->s_op->destroy_inode)
-				inode->i_sb->s_op->destroy_inode(inode);
-			else
-				kmem_cache_free(inode_cachep, (inode));
-			return NULL;
-		}
+	inode->i_pipe = NULL;
+	inode->i_bdev = NULL;
+	inode->i_cdev = NULL;
+	inode->i_rdev = 0;
+	inode->dirtied_when = 0;
+	if (security_inode_alloc(inode)) {
+		if (inode->i_sb->s_op->destroy_inode)
+			inode->i_sb->s_op->destroy_inode(inode);
+		else
+			kmem_cache_free(inode_cachep, (inode));
+		return NULL;
+	}
 
-		spin_lock_init(&inode->i_lock);
-		lockdep_set_class(&inode->i_lock, &sb->s_type->i_lock_key);
+	spin_lock_init(&inode->i_lock);
+	lockdep_set_class(&inode->i_lock, &sb->s_type->i_lock_key);
 
-		mutex_init(&inode->i_mutex);
-		lockdep_set_class(&inode->i_mutex, &sb->s_type->i_mutex_key);
+	mutex_init(&inode->i_mutex);
+	lockdep_set_class(&inode->i_mutex, &sb->s_type->i_mutex_key);
 
-		init_rwsem(&inode->i_alloc_sem);
-		lockdep_set_class(&inode->i_alloc_sem, &sb->s_type->i_alloc_sem_key);
+	init_rwsem(&inode->i_alloc_sem);
+	lockdep_set_class(&inode->i_alloc_sem, &sb->s_type->i_alloc_sem_key);
 
-		mapping->a_ops = &empty_aops;
- 		mapping->host = inode;
-		mapping->flags = 0;
-		mapping_set_gfp_mask(mapping, GFP_HIGHUSER_PAGECACHE);
-		mapping->assoc_mapping = NULL;
-		mapping->backing_dev_info = &default_backing_dev_info;
-		mapping->writeback_index = 0;
+	mapping->a_ops = &empty_aops;
+	mapping->host = inode;
+	mapping->flags = 0;
+	mapping_set_gfp_mask(mapping, GFP_HIGHUSER_PAGECACHE);
+	mapping->assoc_mapping = NULL;
+	mapping->backing_dev_info = &default_backing_dev_info;
+	mapping->writeback_index = 0;
 
-		/*
-		 * If the block_device provides a backing_dev_info for client
-		 * inodes then use that.  Otherwise the inode share the bdev's
-		 * backing_dev_info.
-		 */
-		if (sb->s_bdev) {
-			struct backing_dev_info *bdi;
+	/*
+	 * If the block_device provides a backing_dev_info for client
+	 * inodes then use that.  Otherwise the inode share the bdev's
+	 * backing_dev_info.
+	 */
+	if (sb->s_bdev) {
+		struct backing_dev_info *bdi;
 
-			bdi = sb->s_bdev->bd_inode_backing_dev_info;
-			if (!bdi)
-				bdi = sb->s_bdev->bd_inode->i_mapping->backing_dev_info;
-			mapping->backing_dev_info = bdi;
-		}
-		inode->i_private = NULL;
-		inode->i_mapping = mapping;
+		bdi = sb->s_bdev->bd_inode_backing_dev_info;
+		if (!bdi)
+			bdi = sb->s_bdev->bd_inode->i_mapping->backing_dev_info;
+		mapping->backing_dev_info = bdi;
 	}
+	inode->i_private = NULL;
+	inode->i_mapping = mapping;
+
 	return inode;
 }
+EXPORT_SYMBOL(inode_init_always);
+
+static struct inode *alloc_inode(struct super_block *sb)
+{
+	struct inode *inode;
+
+	if (sb->s_op->alloc_inode)
+		inode = sb->s_op->alloc_inode(sb);
+	else
+		inode = kmem_cache_alloc(inode_cachep, GFP_KERNEL);
+
+	if (inode)
+		return inode_init_always(sb, inode);
+	return NULL;
+}
 
 void destroy_inode(struct inode *inode) 
 {
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 5b248d61430c..04abead4b021 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -1881,6 +1881,7 @@ extern loff_t default_llseek(struct file *file, loff_t offset, int origin);
 
 extern loff_t vfs_llseek(struct file *file, loff_t offset, int origin);
 
+extern struct inode * inode_init_always(struct super_block *, struct inode *);
 extern void inode_init_once(struct inode *);
 extern void iput(struct inode *);
 extern struct inode * igrab(struct inode *);
-- 
cgit v1.2.3


From 8290c35f87304a6b73d4fd17b03580b4f7425de8 Mon Sep 17 00:00:00 2001
From: David Chinner <david@fromorbit.com>
Date: Thu, 30 Oct 2008 17:35:24 +1100
Subject: Inode: Allow external list initialisation

To allow XFS to combine the XFS and linux inodes into a single
structure, we need to drive inode lookup from the XFS inode cache,
not the generic inode cache. This means that we need initialise a
struct inode from a context outside alloc_inode() as it is no longer
used by XFS.

After inode allocation and initialisation, we need to add the inode
to the superblock list, the in-use list, hash it and do some
accounting. This all needs to be done with the inode_lock held and
there are already several places in fs/inode.c that do this list
manipulation.  Factor out the common code, add a locking wrapper and
export the function so ti can be called from XFS.

Signed-off-by: Dave Chinner <david@fromorbit.com>
Signed-off-by: Christoph Hellwig <hch@infradead.org>
Signed-off-by: Lachlan McIlroy <lachlan@sgi.com>
---
 fs/inode.c         | 67 +++++++++++++++++++++++++++++++++++++-----------------
 include/linux/fs.h |  1 +
 2 files changed, 47 insertions(+), 21 deletions(-)

(limited to 'include')

diff --git a/fs/inode.c b/fs/inode.c
index e7ee99907d60..fbcf6c5e7605 100644
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -550,6 +550,49 @@ repeat:
 	return node ? inode : NULL;
 }
 
+static unsigned long hash(struct super_block *sb, unsigned long hashval)
+{
+	unsigned long tmp;
+
+	tmp = (hashval * (unsigned long)sb) ^ (GOLDEN_RATIO_PRIME + hashval) /
+			L1_CACHE_BYTES;
+	tmp = tmp ^ ((tmp ^ GOLDEN_RATIO_PRIME) >> I_HASHBITS);
+	return tmp & I_HASHMASK;
+}
+
+static inline void
+__inode_add_to_lists(struct super_block *sb, struct hlist_head *head,
+			struct inode *inode)
+{
+	inodes_stat.nr_inodes++;
+	list_add(&inode->i_list, &inode_in_use);
+	list_add(&inode->i_sb_list, &sb->s_inodes);
+	if (head)
+		hlist_add_head(&inode->i_hash, head);
+}
+
+/**
+ * inode_add_to_lists - add a new inode to relevant lists
+ * @sb		- superblock inode belongs to.
+ * @inode	- inode to mark in use
+ *
+ * When an inode is allocated it needs to be accounted for, added to the in use
+ * list, the owning superblock and the inode hash. This needs to be done under
+ * the inode_lock, so export a function to do this rather than the inode lock
+ * itself. We calculate the hash list to add to here so it is all internal
+ * which requires the caller to have already set up the inode number in the
+ * inode to add.
+ */
+void inode_add_to_lists(struct super_block *sb, struct inode *inode)
+{
+	struct hlist_head *head = inode_hashtable + hash(sb, inode->i_ino);
+
+	spin_lock(&inode_lock);
+	__inode_add_to_lists(sb, head, inode);
+	spin_unlock(&inode_lock);
+}
+EXPORT_SYMBOL_GPL(inode_add_to_lists);
+
 /**
  *	new_inode 	- obtain an inode
  *	@sb: superblock
@@ -577,9 +620,7 @@ struct inode *new_inode(struct super_block *sb)
 	inode = alloc_inode(sb);
 	if (inode) {
 		spin_lock(&inode_lock);
-		inodes_stat.nr_inodes++;
-		list_add(&inode->i_list, &inode_in_use);
-		list_add(&inode->i_sb_list, &sb->s_inodes);
+		__inode_add_to_lists(sb, NULL, inode);
 		inode->i_ino = ++last_ino;
 		inode->i_state = 0;
 		spin_unlock(&inode_lock);
@@ -638,10 +679,7 @@ static struct inode * get_new_inode(struct super_block *sb, struct hlist_head *h
 			if (set(inode, data))
 				goto set_failed;
 
-			inodes_stat.nr_inodes++;
-			list_add(&inode->i_list, &inode_in_use);
-			list_add(&inode->i_sb_list, &sb->s_inodes);
-			hlist_add_head(&inode->i_hash, head);
+			__inode_add_to_lists(sb, head, inode);
 			inode->i_state = I_LOCK|I_NEW;
 			spin_unlock(&inode_lock);
 
@@ -687,10 +725,7 @@ static struct inode * get_new_inode_fast(struct super_block *sb, struct hlist_he
 		old = find_inode_fast(sb, head, ino);
 		if (!old) {
 			inode->i_ino = ino;
-			inodes_stat.nr_inodes++;
-			list_add(&inode->i_list, &inode_in_use);
-			list_add(&inode->i_sb_list, &sb->s_inodes);
-			hlist_add_head(&inode->i_hash, head);
+			__inode_add_to_lists(sb, head, inode);
 			inode->i_state = I_LOCK|I_NEW;
 			spin_unlock(&inode_lock);
 
@@ -714,16 +749,6 @@ static struct inode * get_new_inode_fast(struct super_block *sb, struct hlist_he
 	return inode;
 }
 
-static unsigned long hash(struct super_block *sb, unsigned long hashval)
-{
-	unsigned long tmp;
-
-	tmp = (hashval * (unsigned long)sb) ^ (GOLDEN_RATIO_PRIME + hashval) /
-			L1_CACHE_BYTES;
-	tmp = tmp ^ ((tmp ^ GOLDEN_RATIO_PRIME) >> I_HASHBITS);
-	return tmp & I_HASHMASK;
-}
-
 /**
  *	iunique - get a unique inode number
  *	@sb: superblock
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 04abead4b021..1deedf235d55 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -1883,6 +1883,7 @@ extern loff_t vfs_llseek(struct file *file, loff_t offset, int origin);
 
 extern struct inode * inode_init_always(struct super_block *, struct inode *);
 extern void inode_init_once(struct inode *);
+extern void inode_add_to_lists(struct super_block *, struct inode *);
 extern void iput(struct inode *);
 extern struct inode * igrab(struct inode *);
 extern ino_t iunique(struct super_block *, ino_t);
-- 
cgit v1.2.3


From 180ee700ddfcc882d90410d979a4b3a804380ed2 Mon Sep 17 00:00:00 2001
From: Ben Dooks <ben-linux@fluff.org>
Date: Thu, 30 Oct 2008 10:14:32 +0000
Subject: [ARM] S3C: Move regs-watchdog.h to arch/arm/plat-s3c/include/plat

Move  regs-watchdog.h to arch/arm/plat-s3c/include/plat ready
to clean out the old include directories

Signed-off-by: Ben Dooks <ben-linux@fluff.org>
---
 arch/arm/mach-s3c2410/include/mach/system-reset.h |  2 +-
 arch/arm/plat-s3c/include/plat/regs-watchdog.h    | 41 +++++++++++++++++++++++
 arch/arm/plat-s3c/include/plat/uncompress.h       |  2 +-
 drivers/watchdog/s3c2410_wdt.c                    |  2 +-
 include/asm-arm/plat-s3c/regs-watchdog.h          | 41 -----------------------
 5 files changed, 44 insertions(+), 44 deletions(-)
 create mode 100644 arch/arm/plat-s3c/include/plat/regs-watchdog.h
 delete mode 100644 include/asm-arm/plat-s3c/regs-watchdog.h

(limited to 'include')

diff --git a/arch/arm/mach-s3c2410/include/mach/system-reset.h b/arch/arm/mach-s3c2410/include/mach/system-reset.h
index 43535a0e7186..7613d0a384ba 100644
--- a/arch/arm/mach-s3c2410/include/mach/system-reset.h
+++ b/arch/arm/mach-s3c2410/include/mach/system-reset.h
@@ -13,7 +13,7 @@
 #include <mach/hardware.h>
 #include <linux/io.h>
 
-#include <asm/plat-s3c/regs-watchdog.h>
+#include <plat/regs-watchdog.h>
 #include <mach/regs-clock.h>
 
 #include <linux/clk.h>
diff --git a/arch/arm/plat-s3c/include/plat/regs-watchdog.h b/arch/arm/plat-s3c/include/plat/regs-watchdog.h
new file mode 100644
index 000000000000..4938492470f7
--- /dev/null
+++ b/arch/arm/plat-s3c/include/plat/regs-watchdog.h
@@ -0,0 +1,41 @@
+/* arch/arm/mach-s3c2410/include/mach/regs-watchdog.h
+ *
+ * Copyright (c) 2003 Simtec Electronics <linux@simtec.co.uk>
+ *		      http://www.simtec.co.uk/products/SWLINUX/
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * S3C2410 Watchdog timer control
+*/
+
+
+#ifndef __ASM_ARCH_REGS_WATCHDOG_H
+#define __ASM_ARCH_REGS_WATCHDOG_H
+
+#define S3C_WDOGREG(x) ((x) + S3C_VA_WATCHDOG)
+
+#define S3C2410_WTCON	   S3C_WDOGREG(0x00)
+#define S3C2410_WTDAT	   S3C_WDOGREG(0x04)
+#define S3C2410_WTCNT	   S3C_WDOGREG(0x08)
+
+/* the watchdog can either generate a reset pulse, or an
+ * interrupt.
+ */
+
+#define S3C2410_WTCON_RSTEN   (0x01)
+#define S3C2410_WTCON_INTEN   (1<<2)
+#define S3C2410_WTCON_ENABLE  (1<<5)
+
+#define S3C2410_WTCON_DIV16   (0<<3)
+#define S3C2410_WTCON_DIV32   (1<<3)
+#define S3C2410_WTCON_DIV64   (2<<3)
+#define S3C2410_WTCON_DIV128  (3<<3)
+
+#define S3C2410_WTCON_PRESCALE(x) ((x) << 8)
+#define S3C2410_WTCON_PRESCALE_MASK (0xff00)
+
+#endif /* __ASM_ARCH_REGS_WATCHDOG_H */
+
+
diff --git a/arch/arm/plat-s3c/include/plat/uncompress.h b/arch/arm/plat-s3c/include/plat/uncompress.h
index 4df006b9cc10..8a8a927292e0 100644
--- a/arch/arm/plat-s3c/include/plat/uncompress.h
+++ b/arch/arm/plat-s3c/include/plat/uncompress.h
@@ -28,7 +28,7 @@ static void arch_detect_cpu(void);
 /* defines for UART registers */
 
 #include <plat/regs-serial.h>
-#include <asm/plat-s3c/regs-watchdog.h>
+#include <plat/regs-watchdog.h>
 
 /* working in physical space... */
 #undef S3C2410_WDOGREG
diff --git a/drivers/watchdog/s3c2410_wdt.c b/drivers/watchdog/s3c2410_wdt.c
index 86d42801de45..13a4b178b8cc 100644
--- a/drivers/watchdog/s3c2410_wdt.c
+++ b/drivers/watchdog/s3c2410_wdt.c
@@ -42,7 +42,7 @@
 #undef S3C_VA_WATCHDOG
 #define S3C_VA_WATCHDOG (0)
 
-#include <asm/plat-s3c/regs-watchdog.h>
+#include <plat/regs-watchdog.h>
 
 #define PFX "s3c2410-wdt: "
 
diff --git a/include/asm-arm/plat-s3c/regs-watchdog.h b/include/asm-arm/plat-s3c/regs-watchdog.h
deleted file mode 100644
index 4938492470f7..000000000000
--- a/include/asm-arm/plat-s3c/regs-watchdog.h
+++ /dev/null
@@ -1,41 +0,0 @@
-/* arch/arm/mach-s3c2410/include/mach/regs-watchdog.h
- *
- * Copyright (c) 2003 Simtec Electronics <linux@simtec.co.uk>
- *		      http://www.simtec.co.uk/products/SWLINUX/
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- *
- * S3C2410 Watchdog timer control
-*/
-
-
-#ifndef __ASM_ARCH_REGS_WATCHDOG_H
-#define __ASM_ARCH_REGS_WATCHDOG_H
-
-#define S3C_WDOGREG(x) ((x) + S3C_VA_WATCHDOG)
-
-#define S3C2410_WTCON	   S3C_WDOGREG(0x00)
-#define S3C2410_WTDAT	   S3C_WDOGREG(0x04)
-#define S3C2410_WTCNT	   S3C_WDOGREG(0x08)
-
-/* the watchdog can either generate a reset pulse, or an
- * interrupt.
- */
-
-#define S3C2410_WTCON_RSTEN   (0x01)
-#define S3C2410_WTCON_INTEN   (1<<2)
-#define S3C2410_WTCON_ENABLE  (1<<5)
-
-#define S3C2410_WTCON_DIV16   (0<<3)
-#define S3C2410_WTCON_DIV32   (1<<3)
-#define S3C2410_WTCON_DIV64   (2<<3)
-#define S3C2410_WTCON_DIV128  (3<<3)
-
-#define S3C2410_WTCON_PRESCALE(x) ((x) << 8)
-#define S3C2410_WTCON_PRESCALE_MASK (0xff00)
-
-#endif /* __ASM_ARCH_REGS_WATCHDOG_H */
-
-
-- 
cgit v1.2.3


From 9498cb79463c9b2abb243a4b0c2ce3ac1853d5b0 Mon Sep 17 00:00:00 2001
From: Ben Dooks <ben-linux@fluff.org>
Date: Thu, 30 Oct 2008 10:14:33 +0000
Subject: [ARM] S3C: Move i2c headers to arch/arm/plat-s3c/include/plat.

Move the i2c headers to arch/arm/plat-s3c/include/plat
ready to clean out the old include directories.

Signed-off-by: Ben Dooks <ben-linux@fluff.org>
---
 arch/arm/mach-s3c2410/mach-bast.c         |  2 +-
 arch/arm/mach-s3c2410/mach-n30.c          |  2 +-
 arch/arm/mach-s3c2412/mach-jive.c         |  2 +-
 arch/arm/plat-s3c/include/plat/iic.h      | 33 ++++++++++++++++++
 arch/arm/plat-s3c/include/plat/regs-iic.h | 56 +++++++++++++++++++++++++++++++
 drivers/i2c/busses/i2c-s3c2410.c          |  4 +--
 include/asm-arm/plat-s3c/iic.h            | 33 ------------------
 include/asm-arm/plat-s3c/regs-iic.h       | 56 -------------------------------
 8 files changed, 94 insertions(+), 94 deletions(-)
 create mode 100644 arch/arm/plat-s3c/include/plat/iic.h
 create mode 100644 arch/arm/plat-s3c/include/plat/regs-iic.h
 delete mode 100644 include/asm-arm/plat-s3c/iic.h
 delete mode 100644 include/asm-arm/plat-s3c/regs-iic.h

(limited to 'include')

diff --git a/arch/arm/mach-s3c2410/mach-bast.c b/arch/arm/mach-s3c2410/mach-bast.c
index 8db9c700e3c2..8da05f9e7bb4 100644
--- a/arch/arm/mach-s3c2410/mach-bast.c
+++ b/arch/arm/mach-s3c2410/mach-bast.c
@@ -45,7 +45,7 @@
 #include <mach/regs-lcd.h>
 
 #include <asm/plat-s3c/nand.h>
-#include <asm/plat-s3c/iic.h>
+#include <plat/iic.h>
 #include <mach/fb.h>
 
 #include <linux/mtd/mtd.h>
diff --git a/arch/arm/mach-s3c2410/mach-n30.c b/arch/arm/mach-s3c2410/mach-n30.c
index 82505517846c..836c9f639215 100644
--- a/arch/arm/mach-s3c2410/mach-n30.c
+++ b/arch/arm/mach-s3c2410/mach-n30.c
@@ -40,7 +40,7 @@
 #include <asm/mach/irq.h>
 #include <asm/mach/map.h>
 
-#include <asm/plat-s3c/iic.h>
+#include <plat/iic.h>
 #include <plat/regs-serial.h>
 
 #include <plat/clock.h>
diff --git a/arch/arm/mach-s3c2412/mach-jive.c b/arch/arm/mach-s3c2412/mach-jive.c
index b08f18c8c47a..80208d37756d 100644
--- a/arch/arm/mach-s3c2412/mach-jive.c
+++ b/arch/arm/mach-s3c2412/mach-jive.c
@@ -32,7 +32,7 @@
 
 #include <plat/regs-serial.h>
 #include <asm/plat-s3c/nand.h>
-#include <asm/plat-s3c/iic.h>
+#include <plat/iic.h>
 
 #include <mach/regs-power.h>
 #include <mach/regs-gpio.h>
diff --git a/arch/arm/plat-s3c/include/plat/iic.h b/arch/arm/plat-s3c/include/plat/iic.h
new file mode 100644
index 000000000000..5106acaa1d0e
--- /dev/null
+++ b/arch/arm/plat-s3c/include/plat/iic.h
@@ -0,0 +1,33 @@
+/* arch/arm/mach-s3c2410/include/mach/iic.h
+ *
+ * Copyright (c) 2004 Simtec Electronics
+ *	Ben Dooks <ben@simtec.co.uk>
+ *
+ * S3C2410 - I2C Controller platfrom_device info
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+*/
+
+#ifndef __ASM_ARCH_IIC_H
+#define __ASM_ARCH_IIC_H __FILE__
+
+#define S3C_IICFLG_FILTER	(1<<0)	/* enable s3c2440 filter */
+
+/* Notes:
+ *	1) All frequencies are expressed in Hz
+ *	2) A value of zero is `do not care`
+*/
+
+struct s3c2410_platform_i2c {
+	int		bus_num;	/* bus number to use */
+	unsigned int	flags;
+	unsigned int	slave_addr;	/* slave address for controller */
+	unsigned long	bus_freq;	/* standard bus frequency */
+	unsigned long	max_freq;	/* max frequency for the bus */
+	unsigned long	min_freq;	/* min frequency for the bus */
+	unsigned int	sda_delay;	/* pclks (s3c2440 only) */
+};
+
+#endif /* __ASM_ARCH_IIC_H */
diff --git a/arch/arm/plat-s3c/include/plat/regs-iic.h b/arch/arm/plat-s3c/include/plat/regs-iic.h
new file mode 100644
index 000000000000..2f7c17de8ac8
--- /dev/null
+++ b/arch/arm/plat-s3c/include/plat/regs-iic.h
@@ -0,0 +1,56 @@
+/* arch/arm/mach-s3c2410/include/mach/regs-iic.h
+ *
+ * Copyright (c) 2004 Simtec Electronics <linux@simtec.co.uk>
+ *		http://www.simtec.co.uk/products/SWLINUX/
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * S3C2410 I2C Controller
+*/
+
+#ifndef __ASM_ARCH_REGS_IIC_H
+#define __ASM_ARCH_REGS_IIC_H __FILE__
+
+/* see s3c2410x user guide, v1.1, section 9 (p447) for more info */
+
+#define S3C2410_IICREG(x) (x)
+
+#define S3C2410_IICCON    S3C2410_IICREG(0x00)
+#define S3C2410_IICSTAT   S3C2410_IICREG(0x04)
+#define S3C2410_IICADD    S3C2410_IICREG(0x08)
+#define S3C2410_IICDS     S3C2410_IICREG(0x0C)
+#define S3C2440_IICLC	  S3C2410_IICREG(0x10)
+
+#define S3C2410_IICCON_ACKEN		(1<<7)
+#define S3C2410_IICCON_TXDIV_16		(0<<6)
+#define S3C2410_IICCON_TXDIV_512	(1<<6)
+#define S3C2410_IICCON_IRQEN		(1<<5)
+#define S3C2410_IICCON_IRQPEND		(1<<4)
+#define S3C2410_IICCON_SCALE(x)		((x)&15)
+#define S3C2410_IICCON_SCALEMASK	(0xf)
+
+#define S3C2410_IICSTAT_MASTER_RX	(2<<6)
+#define S3C2410_IICSTAT_MASTER_TX	(3<<6)
+#define S3C2410_IICSTAT_SLAVE_RX	(0<<6)
+#define S3C2410_IICSTAT_SLAVE_TX	(1<<6)
+#define S3C2410_IICSTAT_MODEMASK	(3<<6)
+
+#define S3C2410_IICSTAT_START		(1<<5)
+#define S3C2410_IICSTAT_BUSBUSY		(1<<5)
+#define S3C2410_IICSTAT_TXRXEN		(1<<4)
+#define S3C2410_IICSTAT_ARBITR		(1<<3)
+#define S3C2410_IICSTAT_ASSLAVE		(1<<2)
+#define S3C2410_IICSTAT_ADDR0		(1<<1)
+#define S3C2410_IICSTAT_LASTBIT		(1<<0)
+
+#define S3C2410_IICLC_SDA_DELAY0	(0 << 0)
+#define S3C2410_IICLC_SDA_DELAY5	(1 << 0)
+#define S3C2410_IICLC_SDA_DELAY10	(2 << 0)
+#define S3C2410_IICLC_SDA_DELAY15	(3 << 0)
+#define S3C2410_IICLC_SDA_DELAY_MASK	(3 << 0)
+
+#define S3C2410_IICLC_FILTER_ON		(1<<2)
+
+#endif /* __ASM_ARCH_REGS_IIC_H */
diff --git a/drivers/i2c/busses/i2c-s3c2410.c b/drivers/i2c/busses/i2c-s3c2410.c
index c772e02c2803..4ad9c47ee4fd 100644
--- a/drivers/i2c/busses/i2c-s3c2410.c
+++ b/drivers/i2c/busses/i2c-s3c2410.c
@@ -40,8 +40,8 @@
 #include <asm/io.h>
 
 #include <mach/regs-gpio.h>
-#include <asm/plat-s3c/regs-iic.h>
-#include <asm/plat-s3c/iic.h>
+#include <plat/regs-iic.h>
+#include <plat/iic.h>
 
 /* i2c controller state */
 
diff --git a/include/asm-arm/plat-s3c/iic.h b/include/asm-arm/plat-s3c/iic.h
deleted file mode 100644
index 5106acaa1d0e..000000000000
--- a/include/asm-arm/plat-s3c/iic.h
+++ /dev/null
@@ -1,33 +0,0 @@
-/* arch/arm/mach-s3c2410/include/mach/iic.h
- *
- * Copyright (c) 2004 Simtec Electronics
- *	Ben Dooks <ben@simtec.co.uk>
- *
- * S3C2410 - I2C Controller platfrom_device info
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
-*/
-
-#ifndef __ASM_ARCH_IIC_H
-#define __ASM_ARCH_IIC_H __FILE__
-
-#define S3C_IICFLG_FILTER	(1<<0)	/* enable s3c2440 filter */
-
-/* Notes:
- *	1) All frequencies are expressed in Hz
- *	2) A value of zero is `do not care`
-*/
-
-struct s3c2410_platform_i2c {
-	int		bus_num;	/* bus number to use */
-	unsigned int	flags;
-	unsigned int	slave_addr;	/* slave address for controller */
-	unsigned long	bus_freq;	/* standard bus frequency */
-	unsigned long	max_freq;	/* max frequency for the bus */
-	unsigned long	min_freq;	/* min frequency for the bus */
-	unsigned int	sda_delay;	/* pclks (s3c2440 only) */
-};
-
-#endif /* __ASM_ARCH_IIC_H */
diff --git a/include/asm-arm/plat-s3c/regs-iic.h b/include/asm-arm/plat-s3c/regs-iic.h
deleted file mode 100644
index 2f7c17de8ac8..000000000000
--- a/include/asm-arm/plat-s3c/regs-iic.h
+++ /dev/null
@@ -1,56 +0,0 @@
-/* arch/arm/mach-s3c2410/include/mach/regs-iic.h
- *
- * Copyright (c) 2004 Simtec Electronics <linux@simtec.co.uk>
- *		http://www.simtec.co.uk/products/SWLINUX/
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- *
- * S3C2410 I2C Controller
-*/
-
-#ifndef __ASM_ARCH_REGS_IIC_H
-#define __ASM_ARCH_REGS_IIC_H __FILE__
-
-/* see s3c2410x user guide, v1.1, section 9 (p447) for more info */
-
-#define S3C2410_IICREG(x) (x)
-
-#define S3C2410_IICCON    S3C2410_IICREG(0x00)
-#define S3C2410_IICSTAT   S3C2410_IICREG(0x04)
-#define S3C2410_IICADD    S3C2410_IICREG(0x08)
-#define S3C2410_IICDS     S3C2410_IICREG(0x0C)
-#define S3C2440_IICLC	  S3C2410_IICREG(0x10)
-
-#define S3C2410_IICCON_ACKEN		(1<<7)
-#define S3C2410_IICCON_TXDIV_16		(0<<6)
-#define S3C2410_IICCON_TXDIV_512	(1<<6)
-#define S3C2410_IICCON_IRQEN		(1<<5)
-#define S3C2410_IICCON_IRQPEND		(1<<4)
-#define S3C2410_IICCON_SCALE(x)		((x)&15)
-#define S3C2410_IICCON_SCALEMASK	(0xf)
-
-#define S3C2410_IICSTAT_MASTER_RX	(2<<6)
-#define S3C2410_IICSTAT_MASTER_TX	(3<<6)
-#define S3C2410_IICSTAT_SLAVE_RX	(0<<6)
-#define S3C2410_IICSTAT_SLAVE_TX	(1<<6)
-#define S3C2410_IICSTAT_MODEMASK	(3<<6)
-
-#define S3C2410_IICSTAT_START		(1<<5)
-#define S3C2410_IICSTAT_BUSBUSY		(1<<5)
-#define S3C2410_IICSTAT_TXRXEN		(1<<4)
-#define S3C2410_IICSTAT_ARBITR		(1<<3)
-#define S3C2410_IICSTAT_ASSLAVE		(1<<2)
-#define S3C2410_IICSTAT_ADDR0		(1<<1)
-#define S3C2410_IICSTAT_LASTBIT		(1<<0)
-
-#define S3C2410_IICLC_SDA_DELAY0	(0 << 0)
-#define S3C2410_IICLC_SDA_DELAY5	(1 << 0)
-#define S3C2410_IICLC_SDA_DELAY10	(2 << 0)
-#define S3C2410_IICLC_SDA_DELAY15	(3 << 0)
-#define S3C2410_IICLC_SDA_DELAY_MASK	(3 << 0)
-
-#define S3C2410_IICLC_FILTER_ON		(1<<2)
-
-#endif /* __ASM_ARCH_REGS_IIC_H */
-- 
cgit v1.2.3


From e2cd00cfebd9a25e0e09712b0116ef18edc2cd98 Mon Sep 17 00:00:00 2001
From: Ben Dooks <ben-linux@fluff.org>
Date: Thu, 30 Oct 2008 10:14:34 +0000
Subject: [ARM] S3C: Move regs-rtc.h to arch/arm/plat-s3c/include/plat

Move regs-rtc.h to arch/arm/plat-s3c/include/plat ready
to clean out old include directories.

Signed-off-by: Ben Dooks <ben-linux@fluff.org>
---
 arch/arm/plat-s3c/include/plat/regs-rtc.h | 61 +++++++++++++++++++++++++++++++
 drivers/rtc/rtc-s3c.c                     |  2 +-
 include/asm-arm/plat-s3c/regs-rtc.h       | 61 -------------------------------
 3 files changed, 62 insertions(+), 62 deletions(-)
 create mode 100644 arch/arm/plat-s3c/include/plat/regs-rtc.h
 delete mode 100644 include/asm-arm/plat-s3c/regs-rtc.h

(limited to 'include')

diff --git a/arch/arm/plat-s3c/include/plat/regs-rtc.h b/arch/arm/plat-s3c/include/plat/regs-rtc.h
new file mode 100644
index 000000000000..d5837cf8e402
--- /dev/null
+++ b/arch/arm/plat-s3c/include/plat/regs-rtc.h
@@ -0,0 +1,61 @@
+/* arch/arm/mach-s3c2410/include/mach/regs-rtc.h
+ *
+ * Copyright (c) 2003 Simtec Electronics <linux@simtec.co.uk>
+ *		      http://www.simtec.co.uk/products/SWLINUX/
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * S3C2410 Internal RTC register definition
+*/
+
+#ifndef __ASM_ARCH_REGS_RTC_H
+#define __ASM_ARCH_REGS_RTC_H __FILE__
+
+#define S3C2410_RTCREG(x) (x)
+
+#define S3C2410_RTCCON	      S3C2410_RTCREG(0x40)
+#define S3C2410_RTCCON_RTCEN  (1<<0)
+#define S3C2410_RTCCON_CLKSEL (1<<1)
+#define S3C2410_RTCCON_CNTSEL (1<<2)
+#define S3C2410_RTCCON_CLKRST (1<<3)
+
+#define S3C2410_TICNT	      S3C2410_RTCREG(0x44)
+#define S3C2410_TICNT_ENABLE  (1<<7)
+
+#define S3C2410_RTCALM	      S3C2410_RTCREG(0x50)
+#define S3C2410_RTCALM_ALMEN  (1<<6)
+#define S3C2410_RTCALM_YEAREN (1<<5)
+#define S3C2410_RTCALM_MONEN  (1<<4)
+#define S3C2410_RTCALM_DAYEN  (1<<3)
+#define S3C2410_RTCALM_HOUREN (1<<2)
+#define S3C2410_RTCALM_MINEN  (1<<1)
+#define S3C2410_RTCALM_SECEN  (1<<0)
+
+#define S3C2410_RTCALM_ALL \
+  S3C2410_RTCALM_ALMEN | S3C2410_RTCALM_YEAREN | S3C2410_RTCALM_MONEN |\
+  S3C2410_RTCALM_DAYEN | S3C2410_RTCALM_HOUREN | S3C2410_RTCALM_MINEN |\
+  S3C2410_RTCALM_SECEN
+
+
+#define S3C2410_ALMSEC	      S3C2410_RTCREG(0x54)
+#define S3C2410_ALMMIN	      S3C2410_RTCREG(0x58)
+#define S3C2410_ALMHOUR	      S3C2410_RTCREG(0x5c)
+
+#define S3C2410_ALMDATE	      S3C2410_RTCREG(0x60)
+#define S3C2410_ALMMON	      S3C2410_RTCREG(0x64)
+#define S3C2410_ALMYEAR	      S3C2410_RTCREG(0x68)
+
+#define S3C2410_RTCRST	      S3C2410_RTCREG(0x6c)
+
+#define S3C2410_RTCSEC	      S3C2410_RTCREG(0x70)
+#define S3C2410_RTCMIN	      S3C2410_RTCREG(0x74)
+#define S3C2410_RTCHOUR	      S3C2410_RTCREG(0x78)
+#define S3C2410_RTCDATE	      S3C2410_RTCREG(0x7c)
+#define S3C2410_RTCDAY	      S3C2410_RTCREG(0x80)
+#define S3C2410_RTCMON	      S3C2410_RTCREG(0x84)
+#define S3C2410_RTCYEAR	      S3C2410_RTCREG(0x88)
+
+
+#endif /* __ASM_ARCH_REGS_RTC_H */
diff --git a/drivers/rtc/rtc-s3c.c b/drivers/rtc/rtc-s3c.c
index 910bc704939c..0273ebc4cf36 100644
--- a/drivers/rtc/rtc-s3c.c
+++ b/drivers/rtc/rtc-s3c.c
@@ -26,7 +26,7 @@
 #include <asm/uaccess.h>
 #include <asm/io.h>
 #include <asm/irq.h>
-#include <asm/plat-s3c/regs-rtc.h>
+#include <plat/regs-rtc.h>
 
 /* I have yet to find an S3C implementation with more than one
  * of these rtc blocks in */
diff --git a/include/asm-arm/plat-s3c/regs-rtc.h b/include/asm-arm/plat-s3c/regs-rtc.h
deleted file mode 100644
index d5837cf8e402..000000000000
--- a/include/asm-arm/plat-s3c/regs-rtc.h
+++ /dev/null
@@ -1,61 +0,0 @@
-/* arch/arm/mach-s3c2410/include/mach/regs-rtc.h
- *
- * Copyright (c) 2003 Simtec Electronics <linux@simtec.co.uk>
- *		      http://www.simtec.co.uk/products/SWLINUX/
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- *
- * S3C2410 Internal RTC register definition
-*/
-
-#ifndef __ASM_ARCH_REGS_RTC_H
-#define __ASM_ARCH_REGS_RTC_H __FILE__
-
-#define S3C2410_RTCREG(x) (x)
-
-#define S3C2410_RTCCON	      S3C2410_RTCREG(0x40)
-#define S3C2410_RTCCON_RTCEN  (1<<0)
-#define S3C2410_RTCCON_CLKSEL (1<<1)
-#define S3C2410_RTCCON_CNTSEL (1<<2)
-#define S3C2410_RTCCON_CLKRST (1<<3)
-
-#define S3C2410_TICNT	      S3C2410_RTCREG(0x44)
-#define S3C2410_TICNT_ENABLE  (1<<7)
-
-#define S3C2410_RTCALM	      S3C2410_RTCREG(0x50)
-#define S3C2410_RTCALM_ALMEN  (1<<6)
-#define S3C2410_RTCALM_YEAREN (1<<5)
-#define S3C2410_RTCALM_MONEN  (1<<4)
-#define S3C2410_RTCALM_DAYEN  (1<<3)
-#define S3C2410_RTCALM_HOUREN (1<<2)
-#define S3C2410_RTCALM_MINEN  (1<<1)
-#define S3C2410_RTCALM_SECEN  (1<<0)
-
-#define S3C2410_RTCALM_ALL \
-  S3C2410_RTCALM_ALMEN | S3C2410_RTCALM_YEAREN | S3C2410_RTCALM_MONEN |\
-  S3C2410_RTCALM_DAYEN | S3C2410_RTCALM_HOUREN | S3C2410_RTCALM_MINEN |\
-  S3C2410_RTCALM_SECEN
-
-
-#define S3C2410_ALMSEC	      S3C2410_RTCREG(0x54)
-#define S3C2410_ALMMIN	      S3C2410_RTCREG(0x58)
-#define S3C2410_ALMHOUR	      S3C2410_RTCREG(0x5c)
-
-#define S3C2410_ALMDATE	      S3C2410_RTCREG(0x60)
-#define S3C2410_ALMMON	      S3C2410_RTCREG(0x64)
-#define S3C2410_ALMYEAR	      S3C2410_RTCREG(0x68)
-
-#define S3C2410_RTCRST	      S3C2410_RTCREG(0x6c)
-
-#define S3C2410_RTCSEC	      S3C2410_RTCREG(0x70)
-#define S3C2410_RTCMIN	      S3C2410_RTCREG(0x74)
-#define S3C2410_RTCHOUR	      S3C2410_RTCREG(0x78)
-#define S3C2410_RTCDATE	      S3C2410_RTCREG(0x7c)
-#define S3C2410_RTCDAY	      S3C2410_RTCREG(0x80)
-#define S3C2410_RTCMON	      S3C2410_RTCREG(0x84)
-#define S3C2410_RTCYEAR	      S3C2410_RTCREG(0x88)
-
-
-#endif /* __ASM_ARCH_REGS_RTC_H */
-- 
cgit v1.2.3


From 7926b5a325f06745a1bed75bfb4ef814d0ae9d99 Mon Sep 17 00:00:00 2001
From: Ben Dooks <ben-linux@fluff.org>
Date: Thu, 30 Oct 2008 10:14:35 +0000
Subject: [ARM] S3C: Move nand headers to arch/arm/plat-s3c/include/plat

Move nand headers to arch/arm/plat-s3c/include/plat
ready to clean out the old include directories.

Signed-off-by: Ben Dooks <ben-linux@fluff.org>
---
 arch/arm/mach-s3c2410/mach-bast.c          |   2 +-
 arch/arm/mach-s3c2410/mach-qt2410.c        |   2 +-
 arch/arm/mach-s3c2412/mach-jive.c          |   2 +-
 arch/arm/mach-s3c2412/mach-vstms.c         |   2 +-
 arch/arm/mach-s3c2440/mach-anubis.c        |   2 +-
 arch/arm/mach-s3c2440/mach-at2440evb.c     |   2 +-
 arch/arm/mach-s3c2440/mach-osiris.c        |   2 +-
 arch/arm/mach-s3c2440/mach-rx3715.c        |   2 +-
 arch/arm/plat-s3c/include/plat/nand.h      |  50 ++++++++++++
 arch/arm/plat-s3c/include/plat/regs-nand.h | 123 +++++++++++++++++++++++++++++
 arch/arm/plat-s3c24xx/common-smdk.c        |   2 +-
 drivers/mtd/nand/s3c2410.c                 |   4 +-
 include/asm-arm/plat-s3c/nand.h            |  50 ------------
 include/asm-arm/plat-s3c/regs-nand.h       | 123 -----------------------------
 14 files changed, 184 insertions(+), 184 deletions(-)
 create mode 100644 arch/arm/plat-s3c/include/plat/nand.h
 create mode 100644 arch/arm/plat-s3c/include/plat/regs-nand.h
 delete mode 100644 include/asm-arm/plat-s3c/nand.h
 delete mode 100644 include/asm-arm/plat-s3c/regs-nand.h

(limited to 'include')

diff --git a/arch/arm/mach-s3c2410/mach-bast.c b/arch/arm/mach-s3c2410/mach-bast.c
index 8da05f9e7bb4..c04c24444e0d 100644
--- a/arch/arm/mach-s3c2410/mach-bast.c
+++ b/arch/arm/mach-s3c2410/mach-bast.c
@@ -44,7 +44,7 @@
 #include <mach/regs-mem.h>
 #include <mach/regs-lcd.h>
 
-#include <asm/plat-s3c/nand.h>
+#include <plat/nand.h>
 #include <plat/iic.h>
 #include <mach/fb.h>
 
diff --git a/arch/arm/mach-s3c2410/mach-qt2410.c b/arch/arm/mach-s3c2410/mach-qt2410.c
index 661807e14e8a..315c27271f18 100644
--- a/arch/arm/mach-s3c2410/mach-qt2410.c
+++ b/arch/arm/mach-s3c2410/mach-qt2410.c
@@ -50,7 +50,7 @@
 #include <mach/leds-gpio.h>
 #include <plat/regs-serial.h>
 #include <mach/fb.h>
-#include <asm/plat-s3c/nand.h>
+#include <plat/nand.h>
 #include <asm/plat-s3c24xx/udc.h>
 #include <mach/spi.h>
 #include <mach/spi-gpio.h>
diff --git a/arch/arm/mach-s3c2412/mach-jive.c b/arch/arm/mach-s3c2412/mach-jive.c
index 80208d37756d..c8d9a346b3bf 100644
--- a/arch/arm/mach-s3c2412/mach-jive.c
+++ b/arch/arm/mach-s3c2412/mach-jive.c
@@ -31,7 +31,7 @@
 #include <asm/mach/irq.h>
 
 #include <plat/regs-serial.h>
-#include <asm/plat-s3c/nand.h>
+#include <plat/nand.h>
 #include <plat/iic.h>
 
 #include <mach/regs-power.h>
diff --git a/arch/arm/mach-s3c2412/mach-vstms.c b/arch/arm/mach-s3c2412/mach-vstms.c
index 4cfa19ad9be0..da32a6cb17ae 100644
--- a/arch/arm/mach-s3c2412/mach-vstms.c
+++ b/arch/arm/mach-s3c2412/mach-vstms.c
@@ -39,7 +39,7 @@
 #include <mach/idle.h>
 #include <mach/fb.h>
 
-#include <asm/plat-s3c/nand.h>
+#include <plat/nand.h>
 
 #include <plat/s3c2410.h>
 #include <plat/s3c2412.h>
diff --git a/arch/arm/mach-s3c2440/mach-anubis.c b/arch/arm/mach-s3c2440/mach-anubis.c
index e2beca470484..334379bdfc6e 100644
--- a/arch/arm/mach-s3c2440/mach-anubis.c
+++ b/arch/arm/mach-s3c2440/mach-anubis.c
@@ -39,7 +39,7 @@
 #include <mach/regs-gpio.h>
 #include <mach/regs-mem.h>
 #include <mach/regs-lcd.h>
-#include <asm/plat-s3c/nand.h>
+#include <plat/nand.h>
 
 #include <linux/mtd/mtd.h>
 #include <linux/mtd/nand.h>
diff --git a/arch/arm/mach-s3c2440/mach-at2440evb.c b/arch/arm/mach-s3c2440/mach-at2440evb.c
index 66876c6f2f1c..07b42a0207d1 100644
--- a/arch/arm/mach-s3c2440/mach-at2440evb.c
+++ b/arch/arm/mach-s3c2440/mach-at2440evb.c
@@ -35,7 +35,7 @@
 #include <mach/regs-gpio.h>
 #include <mach/regs-mem.h>
 #include <mach/regs-lcd.h>
-#include <asm/plat-s3c/nand.h>
+#include <plat/nand.h>
 
 #include <linux/mtd/mtd.h>
 #include <linux/mtd/nand.h>
diff --git a/arch/arm/mach-s3c2440/mach-osiris.c b/arch/arm/mach-s3c2440/mach-osiris.c
index 2361d606abc5..884a3c7ae75f 100644
--- a/arch/arm/mach-s3c2440/mach-osiris.c
+++ b/arch/arm/mach-s3c2440/mach-osiris.c
@@ -37,7 +37,7 @@
 #include <mach/regs-gpio.h>
 #include <mach/regs-mem.h>
 #include <mach/regs-lcd.h>
-#include <asm/plat-s3c/nand.h>
+#include <plat/nand.h>
 
 #include <linux/mtd/mtd.h>
 #include <linux/mtd/nand.h>
diff --git a/arch/arm/mach-s3c2440/mach-rx3715.c b/arch/arm/mach-s3c2440/mach-rx3715.c
index 4d14c7cff892..fbd081de592f 100644
--- a/arch/arm/mach-s3c2440/mach-rx3715.c
+++ b/arch/arm/mach-s3c2440/mach-rx3715.c
@@ -42,7 +42,7 @@
 #include <mach/regs-lcd.h>
 
 #include <mach/h1940.h>
-#include <asm/plat-s3c/nand.h>
+#include <plat/nand.h>
 #include <mach/fb.h>
 
 #include <plat/clock.h>
diff --git a/arch/arm/plat-s3c/include/plat/nand.h b/arch/arm/plat-s3c/include/plat/nand.h
new file mode 100644
index 000000000000..f4dcd14af059
--- /dev/null
+++ b/arch/arm/plat-s3c/include/plat/nand.h
@@ -0,0 +1,50 @@
+/* arch/arm/mach-s3c2410/include/mach/nand.h
+ *
+ * Copyright (c) 2004 Simtec Electronics
+ *	Ben Dooks <ben@simtec.co.uk>
+ *
+ * S3C2410 - NAND device controller platfrom_device info
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+*/
+
+/* struct s3c2410_nand_set
+ *
+ * define an set of one or more nand chips registered with an unique mtd
+ *
+ * nr_chips	 = number of chips in this set
+ * nr_partitions = number of partitions pointed to be partitoons (or zero)
+ * name		 = name of set (optional)
+ * nr_map	 = map for low-layer logical to physical chip numbers (option)
+ * partitions	 = mtd partition list
+*/
+
+struct s3c2410_nand_set {
+	unsigned int		disable_ecc : 1;
+
+	int			nr_chips;
+	int			nr_partitions;
+	char			*name;
+	int			*nr_map;
+	struct mtd_partition	*partitions;
+	struct nand_ecclayout	*ecc_layout;
+};
+
+struct s3c2410_platform_nand {
+	/* timing information for controller, all times in nanoseconds */
+
+	int	tacls;	/* time for active CLE/ALE to nWE/nOE */
+	int	twrph0;	/* active time for nWE/nOE */
+	int	twrph1;	/* time for release CLE/ALE from nWE/nOE inactive */
+
+	unsigned int	ignore_unset_ecc : 1;
+
+	int			nr_sets;
+	struct s3c2410_nand_set *sets;
+
+	void			(*select_chip)(struct s3c2410_nand_set *,
+					       int chip);
+};
+
diff --git a/arch/arm/plat-s3c/include/plat/regs-nand.h b/arch/arm/plat-s3c/include/plat/regs-nand.h
new file mode 100644
index 000000000000..b2caa4bca270
--- /dev/null
+++ b/arch/arm/plat-s3c/include/plat/regs-nand.h
@@ -0,0 +1,123 @@
+/* arch/arm/mach-s3c2410/include/mach/regs-nand.h
+ *
+ * Copyright (c) 2004,2005 Simtec Electronics <linux@simtec.co.uk>
+ *		      http://www.simtec.co.uk/products/SWLINUX/
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * S3C2410 NAND register definitions
+*/
+
+#ifndef __ASM_ARM_REGS_NAND
+#define __ASM_ARM_REGS_NAND
+
+
+#define S3C2410_NFREG(x) (x)
+
+#define S3C2410_NFCONF  S3C2410_NFREG(0x00)
+#define S3C2410_NFCMD   S3C2410_NFREG(0x04)
+#define S3C2410_NFADDR  S3C2410_NFREG(0x08)
+#define S3C2410_NFDATA  S3C2410_NFREG(0x0C)
+#define S3C2410_NFSTAT  S3C2410_NFREG(0x10)
+#define S3C2410_NFECC   S3C2410_NFREG(0x14)
+
+#define S3C2440_NFCONT   S3C2410_NFREG(0x04)
+#define S3C2440_NFCMD    S3C2410_NFREG(0x08)
+#define S3C2440_NFADDR   S3C2410_NFREG(0x0C)
+#define S3C2440_NFDATA   S3C2410_NFREG(0x10)
+#define S3C2440_NFECCD0  S3C2410_NFREG(0x14)
+#define S3C2440_NFECCD1  S3C2410_NFREG(0x18)
+#define S3C2440_NFECCD   S3C2410_NFREG(0x1C)
+#define S3C2440_NFSTAT   S3C2410_NFREG(0x20)
+#define S3C2440_NFESTAT0 S3C2410_NFREG(0x24)
+#define S3C2440_NFESTAT1 S3C2410_NFREG(0x28)
+#define S3C2440_NFMECC0  S3C2410_NFREG(0x2C)
+#define S3C2440_NFMECC1  S3C2410_NFREG(0x30)
+#define S3C2440_NFSECC   S3C2410_NFREG(0x34)
+#define S3C2440_NFSBLK   S3C2410_NFREG(0x38)
+#define S3C2440_NFEBLK   S3C2410_NFREG(0x3C)
+
+#define S3C2412_NFSBLK		S3C2410_NFREG(0x20)
+#define S3C2412_NFEBLK		S3C2410_NFREG(0x24)
+#define S3C2412_NFSTAT		S3C2410_NFREG(0x28)
+#define S3C2412_NFMECC_ERR0	S3C2410_NFREG(0x2C)
+#define S3C2412_NFMECC_ERR1	S3C2410_NFREG(0x30)
+#define S3C2412_NFMECC0		S3C2410_NFREG(0x34)
+#define S3C2412_NFMECC1		S3C2410_NFREG(0x38)
+#define S3C2412_NFSECC		S3C2410_NFREG(0x3C)
+
+#define S3C2410_NFCONF_EN          (1<<15)
+#define S3C2410_NFCONF_512BYTE     (1<<14)
+#define S3C2410_NFCONF_4STEP       (1<<13)
+#define S3C2410_NFCONF_INITECC     (1<<12)
+#define S3C2410_NFCONF_nFCE        (1<<11)
+#define S3C2410_NFCONF_TACLS(x)    ((x)<<8)
+#define S3C2410_NFCONF_TWRPH0(x)   ((x)<<4)
+#define S3C2410_NFCONF_TWRPH1(x)   ((x)<<0)
+
+#define S3C2410_NFSTAT_BUSY        (1<<0)
+
+#define S3C2440_NFCONF_BUSWIDTH_8	(0<<0)
+#define S3C2440_NFCONF_BUSWIDTH_16	(1<<0)
+#define S3C2440_NFCONF_ADVFLASH		(1<<3)
+#define S3C2440_NFCONF_TACLS(x)		((x)<<12)
+#define S3C2440_NFCONF_TWRPH0(x)	((x)<<8)
+#define S3C2440_NFCONF_TWRPH1(x)	((x)<<4)
+
+#define S3C2440_NFCONT_LOCKTIGHT	(1<<13)
+#define S3C2440_NFCONT_SOFTLOCK		(1<<12)
+#define S3C2440_NFCONT_ILLEGALACC_EN	(1<<10)
+#define S3C2440_NFCONT_RNBINT_EN	(1<<9)
+#define S3C2440_NFCONT_RN_FALLING	(1<<8)
+#define S3C2440_NFCONT_SPARE_ECCLOCK	(1<<6)
+#define S3C2440_NFCONT_MAIN_ECCLOCK	(1<<5)
+#define S3C2440_NFCONT_INITECC		(1<<4)
+#define S3C2440_NFCONT_nFCE		(1<<1)
+#define S3C2440_NFCONT_ENABLE		(1<<0)
+
+#define S3C2440_NFSTAT_READY		(1<<0)
+#define S3C2440_NFSTAT_nCE		(1<<1)
+#define S3C2440_NFSTAT_RnB_CHANGE	(1<<2)
+#define S3C2440_NFSTAT_ILLEGAL_ACCESS	(1<<3)
+
+#define S3C2412_NFCONF_NANDBOOT		(1<<31)
+#define S3C2412_NFCONF_ECCCLKCON	(1<<30)
+#define S3C2412_NFCONF_ECC_MLC		(1<<24)
+#define S3C2412_NFCONF_TACLS_MASK	(7<<12)	/* 1 extra bit of Tacls */
+
+#define S3C2412_NFCONT_ECC4_DIRWR	(1<<18)
+#define S3C2412_NFCONT_LOCKTIGHT	(1<<17)
+#define S3C2412_NFCONT_SOFTLOCK		(1<<16)
+#define S3C2412_NFCONT_ECC4_ENCINT	(1<<13)
+#define S3C2412_NFCONT_ECC4_DECINT	(1<<12)
+#define S3C2412_NFCONT_MAIN_ECC_LOCK	(1<<7)
+#define S3C2412_NFCONT_INIT_MAIN_ECC	(1<<5)
+#define S3C2412_NFCONT_nFCE1		(1<<2)
+#define S3C2412_NFCONT_nFCE0		(1<<1)
+
+#define S3C2412_NFSTAT_ECC_ENCDONE	(1<<7)
+#define S3C2412_NFSTAT_ECC_DECDONE	(1<<6)
+#define S3C2412_NFSTAT_ILLEGAL_ACCESS	(1<<5)
+#define S3C2412_NFSTAT_RnB_CHANGE	(1<<4)
+#define S3C2412_NFSTAT_nFCE1		(1<<3)
+#define S3C2412_NFSTAT_nFCE0		(1<<2)
+#define S3C2412_NFSTAT_Res1		(1<<1)
+#define S3C2412_NFSTAT_READY		(1<<0)
+
+#define S3C2412_NFECCERR_SERRDATA(x)	(((x) >> 21) & 0xf)
+#define S3C2412_NFECCERR_SERRBIT(x)	(((x) >> 18) & 0x7)
+#define S3C2412_NFECCERR_MERRDATA(x)	(((x) >> 7) & 0x3ff)
+#define S3C2412_NFECCERR_MERRBIT(x)	(((x) >> 4) & 0x7)
+#define S3C2412_NFECCERR_SPARE_ERR(x)	(((x) >> 2) & 0x3)
+#define S3C2412_NFECCERR_MAIN_ERR(x)	(((x) >> 2) & 0x3)
+#define S3C2412_NFECCERR_NONE		(0)
+#define S3C2412_NFECCERR_1BIT		(1)
+#define S3C2412_NFECCERR_MULTIBIT	(2)
+#define S3C2412_NFECCERR_ECCAREA	(3)
+
+
+
+#endif /* __ASM_ARM_REGS_NAND */
+
diff --git a/arch/arm/plat-s3c24xx/common-smdk.c b/arch/arm/plat-s3c24xx/common-smdk.c
index 3098736c65d9..3d4837021ac7 100644
--- a/arch/arm/plat-s3c24xx/common-smdk.c
+++ b/arch/arm/plat-s3c24xx/common-smdk.c
@@ -38,7 +38,7 @@
 #include <mach/regs-gpio.h>
 #include <mach/leds-gpio.h>
 
-#include <asm/plat-s3c/nand.h>
+#include <plat/nand.h>
 
 #include <plat/common-smdk.h>
 #include <plat/devs.h>
diff --git a/drivers/mtd/nand/s3c2410.c b/drivers/mtd/nand/s3c2410.c
index 556139ed1fdf..098f5162388b 100644
--- a/drivers/mtd/nand/s3c2410.c
+++ b/drivers/mtd/nand/s3c2410.c
@@ -45,8 +45,8 @@
 
 #include <asm/io.h>
 
-#include <asm/plat-s3c/regs-nand.h>
-#include <asm/plat-s3c/nand.h>
+#include <plat/regs-nand.h>
+#include <plat/nand.h>
 
 #ifdef CONFIG_MTD_NAND_S3C2410_HWECC
 static int hardware_ecc = 1;
diff --git a/include/asm-arm/plat-s3c/nand.h b/include/asm-arm/plat-s3c/nand.h
deleted file mode 100644
index f4dcd14af059..000000000000
--- a/include/asm-arm/plat-s3c/nand.h
+++ /dev/null
@@ -1,50 +0,0 @@
-/* arch/arm/mach-s3c2410/include/mach/nand.h
- *
- * Copyright (c) 2004 Simtec Electronics
- *	Ben Dooks <ben@simtec.co.uk>
- *
- * S3C2410 - NAND device controller platfrom_device info
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
-*/
-
-/* struct s3c2410_nand_set
- *
- * define an set of one or more nand chips registered with an unique mtd
- *
- * nr_chips	 = number of chips in this set
- * nr_partitions = number of partitions pointed to be partitoons (or zero)
- * name		 = name of set (optional)
- * nr_map	 = map for low-layer logical to physical chip numbers (option)
- * partitions	 = mtd partition list
-*/
-
-struct s3c2410_nand_set {
-	unsigned int		disable_ecc : 1;
-
-	int			nr_chips;
-	int			nr_partitions;
-	char			*name;
-	int			*nr_map;
-	struct mtd_partition	*partitions;
-	struct nand_ecclayout	*ecc_layout;
-};
-
-struct s3c2410_platform_nand {
-	/* timing information for controller, all times in nanoseconds */
-
-	int	tacls;	/* time for active CLE/ALE to nWE/nOE */
-	int	twrph0;	/* active time for nWE/nOE */
-	int	twrph1;	/* time for release CLE/ALE from nWE/nOE inactive */
-
-	unsigned int	ignore_unset_ecc : 1;
-
-	int			nr_sets;
-	struct s3c2410_nand_set *sets;
-
-	void			(*select_chip)(struct s3c2410_nand_set *,
-					       int chip);
-};
-
diff --git a/include/asm-arm/plat-s3c/regs-nand.h b/include/asm-arm/plat-s3c/regs-nand.h
deleted file mode 100644
index b2caa4bca270..000000000000
--- a/include/asm-arm/plat-s3c/regs-nand.h
+++ /dev/null
@@ -1,123 +0,0 @@
-/* arch/arm/mach-s3c2410/include/mach/regs-nand.h
- *
- * Copyright (c) 2004,2005 Simtec Electronics <linux@simtec.co.uk>
- *		      http://www.simtec.co.uk/products/SWLINUX/
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- *
- * S3C2410 NAND register definitions
-*/
-
-#ifndef __ASM_ARM_REGS_NAND
-#define __ASM_ARM_REGS_NAND
-
-
-#define S3C2410_NFREG(x) (x)
-
-#define S3C2410_NFCONF  S3C2410_NFREG(0x00)
-#define S3C2410_NFCMD   S3C2410_NFREG(0x04)
-#define S3C2410_NFADDR  S3C2410_NFREG(0x08)
-#define S3C2410_NFDATA  S3C2410_NFREG(0x0C)
-#define S3C2410_NFSTAT  S3C2410_NFREG(0x10)
-#define S3C2410_NFECC   S3C2410_NFREG(0x14)
-
-#define S3C2440_NFCONT   S3C2410_NFREG(0x04)
-#define S3C2440_NFCMD    S3C2410_NFREG(0x08)
-#define S3C2440_NFADDR   S3C2410_NFREG(0x0C)
-#define S3C2440_NFDATA   S3C2410_NFREG(0x10)
-#define S3C2440_NFECCD0  S3C2410_NFREG(0x14)
-#define S3C2440_NFECCD1  S3C2410_NFREG(0x18)
-#define S3C2440_NFECCD   S3C2410_NFREG(0x1C)
-#define S3C2440_NFSTAT   S3C2410_NFREG(0x20)
-#define S3C2440_NFESTAT0 S3C2410_NFREG(0x24)
-#define S3C2440_NFESTAT1 S3C2410_NFREG(0x28)
-#define S3C2440_NFMECC0  S3C2410_NFREG(0x2C)
-#define S3C2440_NFMECC1  S3C2410_NFREG(0x30)
-#define S3C2440_NFSECC   S3C2410_NFREG(0x34)
-#define S3C2440_NFSBLK   S3C2410_NFREG(0x38)
-#define S3C2440_NFEBLK   S3C2410_NFREG(0x3C)
-
-#define S3C2412_NFSBLK		S3C2410_NFREG(0x20)
-#define S3C2412_NFEBLK		S3C2410_NFREG(0x24)
-#define S3C2412_NFSTAT		S3C2410_NFREG(0x28)
-#define S3C2412_NFMECC_ERR0	S3C2410_NFREG(0x2C)
-#define S3C2412_NFMECC_ERR1	S3C2410_NFREG(0x30)
-#define S3C2412_NFMECC0		S3C2410_NFREG(0x34)
-#define S3C2412_NFMECC1		S3C2410_NFREG(0x38)
-#define S3C2412_NFSECC		S3C2410_NFREG(0x3C)
-
-#define S3C2410_NFCONF_EN          (1<<15)
-#define S3C2410_NFCONF_512BYTE     (1<<14)
-#define S3C2410_NFCONF_4STEP       (1<<13)
-#define S3C2410_NFCONF_INITECC     (1<<12)
-#define S3C2410_NFCONF_nFCE        (1<<11)
-#define S3C2410_NFCONF_TACLS(x)    ((x)<<8)
-#define S3C2410_NFCONF_TWRPH0(x)   ((x)<<4)
-#define S3C2410_NFCONF_TWRPH1(x)   ((x)<<0)
-
-#define S3C2410_NFSTAT_BUSY        (1<<0)
-
-#define S3C2440_NFCONF_BUSWIDTH_8	(0<<0)
-#define S3C2440_NFCONF_BUSWIDTH_16	(1<<0)
-#define S3C2440_NFCONF_ADVFLASH		(1<<3)
-#define S3C2440_NFCONF_TACLS(x)		((x)<<12)
-#define S3C2440_NFCONF_TWRPH0(x)	((x)<<8)
-#define S3C2440_NFCONF_TWRPH1(x)	((x)<<4)
-
-#define S3C2440_NFCONT_LOCKTIGHT	(1<<13)
-#define S3C2440_NFCONT_SOFTLOCK		(1<<12)
-#define S3C2440_NFCONT_ILLEGALACC_EN	(1<<10)
-#define S3C2440_NFCONT_RNBINT_EN	(1<<9)
-#define S3C2440_NFCONT_RN_FALLING	(1<<8)
-#define S3C2440_NFCONT_SPARE_ECCLOCK	(1<<6)
-#define S3C2440_NFCONT_MAIN_ECCLOCK	(1<<5)
-#define S3C2440_NFCONT_INITECC		(1<<4)
-#define S3C2440_NFCONT_nFCE		(1<<1)
-#define S3C2440_NFCONT_ENABLE		(1<<0)
-
-#define S3C2440_NFSTAT_READY		(1<<0)
-#define S3C2440_NFSTAT_nCE		(1<<1)
-#define S3C2440_NFSTAT_RnB_CHANGE	(1<<2)
-#define S3C2440_NFSTAT_ILLEGAL_ACCESS	(1<<3)
-
-#define S3C2412_NFCONF_NANDBOOT		(1<<31)
-#define S3C2412_NFCONF_ECCCLKCON	(1<<30)
-#define S3C2412_NFCONF_ECC_MLC		(1<<24)
-#define S3C2412_NFCONF_TACLS_MASK	(7<<12)	/* 1 extra bit of Tacls */
-
-#define S3C2412_NFCONT_ECC4_DIRWR	(1<<18)
-#define S3C2412_NFCONT_LOCKTIGHT	(1<<17)
-#define S3C2412_NFCONT_SOFTLOCK		(1<<16)
-#define S3C2412_NFCONT_ECC4_ENCINT	(1<<13)
-#define S3C2412_NFCONT_ECC4_DECINT	(1<<12)
-#define S3C2412_NFCONT_MAIN_ECC_LOCK	(1<<7)
-#define S3C2412_NFCONT_INIT_MAIN_ECC	(1<<5)
-#define S3C2412_NFCONT_nFCE1		(1<<2)
-#define S3C2412_NFCONT_nFCE0		(1<<1)
-
-#define S3C2412_NFSTAT_ECC_ENCDONE	(1<<7)
-#define S3C2412_NFSTAT_ECC_DECDONE	(1<<6)
-#define S3C2412_NFSTAT_ILLEGAL_ACCESS	(1<<5)
-#define S3C2412_NFSTAT_RnB_CHANGE	(1<<4)
-#define S3C2412_NFSTAT_nFCE1		(1<<3)
-#define S3C2412_NFSTAT_nFCE0		(1<<2)
-#define S3C2412_NFSTAT_Res1		(1<<1)
-#define S3C2412_NFSTAT_READY		(1<<0)
-
-#define S3C2412_NFECCERR_SERRDATA(x)	(((x) >> 21) & 0xf)
-#define S3C2412_NFECCERR_SERRBIT(x)	(((x) >> 18) & 0x7)
-#define S3C2412_NFECCERR_MERRDATA(x)	(((x) >> 7) & 0x3ff)
-#define S3C2412_NFECCERR_MERRBIT(x)	(((x) >> 4) & 0x7)
-#define S3C2412_NFECCERR_SPARE_ERR(x)	(((x) >> 2) & 0x3)
-#define S3C2412_NFECCERR_MAIN_ERR(x)	(((x) >> 2) & 0x3)
-#define S3C2412_NFECCERR_NONE		(0)
-#define S3C2412_NFECCERR_1BIT		(1)
-#define S3C2412_NFECCERR_MULTIBIT	(2)
-#define S3C2412_NFECCERR_ECCAREA	(3)
-
-
-
-#endif /* __ASM_ARM_REGS_NAND */
-
-- 
cgit v1.2.3


From f74c95c20bad8e183e41283475f68a3e7b247af4 Mon Sep 17 00:00:00 2001
From: Ben Dooks <ben-linux@fluff.org>
Date: Thu, 30 Oct 2008 10:14:36 +0000
Subject: [ARM] S3C: Move regs-ac97.h to arch/arm/plat-s3c/include/plat.

Move regs-ac97.h to arch/arm/plat-s3c/include/plat ready
to clean out old include directories.

Signed-off-by: Ben Dooks <ben-linux@fluff.org>
---
 arch/arm/mach-s3c2410/dma.c                |  2 +-
 arch/arm/mach-s3c2412/dma.c                |  2 +-
 arch/arm/mach-s3c2440/dma.c                |  2 +-
 arch/arm/mach-s3c2443/dma.c                |  2 +-
 arch/arm/plat-s3c/include/plat/regs-ac97.h | 67 ++++++++++++++++++++++++++++++
 include/asm-arm/plat-s3c/regs-ac97.h       | 67 ------------------------------
 sound/soc/s3c24xx/s3c2443-ac97.c           |  2 +-
 7 files changed, 72 insertions(+), 72 deletions(-)
 create mode 100644 arch/arm/plat-s3c/include/plat/regs-ac97.h
 delete mode 100644 include/asm-arm/plat-s3c/regs-ac97.h

(limited to 'include')

diff --git a/arch/arm/mach-s3c2410/dma.c b/arch/arm/mach-s3c2410/dma.c
index 7d914a470b6c..7fa77effccf8 100644
--- a/arch/arm/mach-s3c2410/dma.c
+++ b/arch/arm/mach-s3c2410/dma.c
@@ -25,7 +25,7 @@
 
 #include <plat/regs-serial.h>
 #include <mach/regs-gpio.h>
-#include <asm/plat-s3c/regs-ac97.h>
+#include <plat/regs-ac97.h>
 #include <mach/regs-mem.h>
 #include <mach/regs-lcd.h>
 #include <mach/regs-sdi.h>
diff --git a/arch/arm/mach-s3c2412/dma.c b/arch/arm/mach-s3c2412/dma.c
index ba0591e71f32..7db581826676 100644
--- a/arch/arm/mach-s3c2412/dma.c
+++ b/arch/arm/mach-s3c2412/dma.c
@@ -26,7 +26,7 @@
 
 #include <plat/regs-serial.h>
 #include <mach/regs-gpio.h>
-#include <asm/plat-s3c/regs-ac97.h>
+#include <plat/regs-ac97.h>
 #include <mach/regs-mem.h>
 #include <mach/regs-lcd.h>
 #include <mach/regs-sdi.h>
diff --git a/arch/arm/mach-s3c2440/dma.c b/arch/arm/mach-s3c2440/dma.c
index 32303f6a8321..00d88782b23b 100644
--- a/arch/arm/mach-s3c2440/dma.c
+++ b/arch/arm/mach-s3c2440/dma.c
@@ -25,7 +25,7 @@
 
 #include <plat/regs-serial.h>
 #include <mach/regs-gpio.h>
-#include <asm/plat-s3c/regs-ac97.h>
+#include <plat/regs-ac97.h>
 #include <mach/regs-mem.h>
 #include <mach/regs-lcd.h>
 #include <mach/regs-sdi.h>
diff --git a/arch/arm/mach-s3c2443/dma.c b/arch/arm/mach-s3c2443/dma.c
index f73ccb25ff94..4185c57b5dd0 100644
--- a/arch/arm/mach-s3c2443/dma.c
+++ b/arch/arm/mach-s3c2443/dma.c
@@ -26,7 +26,7 @@
 
 #include <plat/regs-serial.h>
 #include <mach/regs-gpio.h>
-#include <asm/plat-s3c/regs-ac97.h>
+#include <plat/regs-ac97.h>
 #include <mach/regs-mem.h>
 #include <mach/regs-lcd.h>
 #include <mach/regs-sdi.h>
diff --git a/arch/arm/plat-s3c/include/plat/regs-ac97.h b/arch/arm/plat-s3c/include/plat/regs-ac97.h
new file mode 100644
index 000000000000..c3878f7acb83
--- /dev/null
+++ b/arch/arm/plat-s3c/include/plat/regs-ac97.h
@@ -0,0 +1,67 @@
+/* arch/arm/mach-s3c2410/include/mach/regs-ac97.h
+ *
+ * Copyright (c) 2006 Simtec Electronics <linux@simtec.co.uk>
+ *		http://www.simtec.co.uk/products/SWLINUX/
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * S3C2440 AC97 Controller
+*/
+
+#ifndef __ASM_ARCH_REGS_AC97_H
+#define __ASM_ARCH_REGS_AC97_H __FILE__
+
+#define S3C_AC97_GLBCTRL				(0x00)
+
+#define S3C_AC97_GLBCTRL_CODECREADYIE			(1<<22)
+#define S3C_AC97_GLBCTRL_PCMOUTURIE			(1<<21)
+#define S3C_AC97_GLBCTRL_PCMINORIE			(1<<20)
+#define S3C_AC97_GLBCTRL_MICINORIE			(1<<19)
+#define S3C_AC97_GLBCTRL_PCMOUTTIE			(1<<18)
+#define S3C_AC97_GLBCTRL_PCMINTIE			(1<<17)
+#define S3C_AC97_GLBCTRL_MICINTIE			(1<<16)
+#define S3C_AC97_GLBCTRL_PCMOUTTM_OFF			(0<<12)
+#define S3C_AC97_GLBCTRL_PCMOUTTM_PIO			(1<<12)
+#define S3C_AC97_GLBCTRL_PCMOUTTM_DMA			(2<<12)
+#define S3C_AC97_GLBCTRL_PCMOUTTM_MASK			(3<<12)
+#define S3C_AC97_GLBCTRL_PCMINTM_OFF			(0<<10)
+#define S3C_AC97_GLBCTRL_PCMINTM_PIO			(1<<10)
+#define S3C_AC97_GLBCTRL_PCMINTM_DMA			(2<<10)
+#define S3C_AC97_GLBCTRL_PCMINTM_MASK			(3<<10)
+#define S3C_AC97_GLBCTRL_MICINTM_OFF			(0<<8)
+#define S3C_AC97_GLBCTRL_MICINTM_PIO			(1<<8)
+#define S3C_AC97_GLBCTRL_MICINTM_DMA			(2<<8)
+#define S3C_AC97_GLBCTRL_MICINTM_MASK			(3<<8)
+#define S3C_AC97_GLBCTRL_TRANSFERDATAENABLE		(1<<3)
+#define S3C_AC97_GLBCTRL_ACLINKON			(1<<2)
+#define S3C_AC97_GLBCTRL_WARMRESET			(1<<1)
+#define S3C_AC97_GLBCTRL_COLDRESET			(1<<0)
+
+#define S3C_AC97_GLBSTAT				(0x04)
+
+#define S3C_AC97_GLBSTAT_CODECREADY			(1<<22)
+#define S3C_AC97_GLBSTAT_PCMOUTUR			(1<<21)
+#define S3C_AC97_GLBSTAT_PCMINORI			(1<<20)
+#define S3C_AC97_GLBSTAT_MICINORI			(1<<19)
+#define S3C_AC97_GLBSTAT_PCMOUTTI			(1<<18)
+#define S3C_AC97_GLBSTAT_PCMINTI			(1<<17)
+#define S3C_AC97_GLBSTAT_MICINTI			(1<<16)
+#define S3C_AC97_GLBSTAT_MAINSTATE_IDLE			(0<<0)
+#define S3C_AC97_GLBSTAT_MAINSTATE_INIT			(1<<0)
+#define S3C_AC97_GLBSTAT_MAINSTATE_READY		(2<<0)
+#define S3C_AC97_GLBSTAT_MAINSTATE_ACTIVE		(3<<0)
+#define S3C_AC97_GLBSTAT_MAINSTATE_LP			(4<<0)
+#define S3C_AC97_GLBSTAT_MAINSTATE_WARM			(5<<0)
+
+#define S3C_AC97_CODEC_CMD				(0x08)
+
+#define S3C_AC97_CODEC_CMD_READ				(1<<23)
+
+#define S3C_AC97_STAT					(0x0c)
+#define S3C_AC97_PCM_ADDR				(0x10)
+#define S3C_AC97_PCM_DATA				(0x18)
+#define S3C_AC97_MIC_DATA				(0x1C)
+
+#endif /* __ASM_ARCH_REGS_AC97_H */
diff --git a/include/asm-arm/plat-s3c/regs-ac97.h b/include/asm-arm/plat-s3c/regs-ac97.h
deleted file mode 100644
index c3878f7acb83..000000000000
--- a/include/asm-arm/plat-s3c/regs-ac97.h
+++ /dev/null
@@ -1,67 +0,0 @@
-/* arch/arm/mach-s3c2410/include/mach/regs-ac97.h
- *
- * Copyright (c) 2006 Simtec Electronics <linux@simtec.co.uk>
- *		http://www.simtec.co.uk/products/SWLINUX/
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- *
- * S3C2440 AC97 Controller
-*/
-
-#ifndef __ASM_ARCH_REGS_AC97_H
-#define __ASM_ARCH_REGS_AC97_H __FILE__
-
-#define S3C_AC97_GLBCTRL				(0x00)
-
-#define S3C_AC97_GLBCTRL_CODECREADYIE			(1<<22)
-#define S3C_AC97_GLBCTRL_PCMOUTURIE			(1<<21)
-#define S3C_AC97_GLBCTRL_PCMINORIE			(1<<20)
-#define S3C_AC97_GLBCTRL_MICINORIE			(1<<19)
-#define S3C_AC97_GLBCTRL_PCMOUTTIE			(1<<18)
-#define S3C_AC97_GLBCTRL_PCMINTIE			(1<<17)
-#define S3C_AC97_GLBCTRL_MICINTIE			(1<<16)
-#define S3C_AC97_GLBCTRL_PCMOUTTM_OFF			(0<<12)
-#define S3C_AC97_GLBCTRL_PCMOUTTM_PIO			(1<<12)
-#define S3C_AC97_GLBCTRL_PCMOUTTM_DMA			(2<<12)
-#define S3C_AC97_GLBCTRL_PCMOUTTM_MASK			(3<<12)
-#define S3C_AC97_GLBCTRL_PCMINTM_OFF			(0<<10)
-#define S3C_AC97_GLBCTRL_PCMINTM_PIO			(1<<10)
-#define S3C_AC97_GLBCTRL_PCMINTM_DMA			(2<<10)
-#define S3C_AC97_GLBCTRL_PCMINTM_MASK			(3<<10)
-#define S3C_AC97_GLBCTRL_MICINTM_OFF			(0<<8)
-#define S3C_AC97_GLBCTRL_MICINTM_PIO			(1<<8)
-#define S3C_AC97_GLBCTRL_MICINTM_DMA			(2<<8)
-#define S3C_AC97_GLBCTRL_MICINTM_MASK			(3<<8)
-#define S3C_AC97_GLBCTRL_TRANSFERDATAENABLE		(1<<3)
-#define S3C_AC97_GLBCTRL_ACLINKON			(1<<2)
-#define S3C_AC97_GLBCTRL_WARMRESET			(1<<1)
-#define S3C_AC97_GLBCTRL_COLDRESET			(1<<0)
-
-#define S3C_AC97_GLBSTAT				(0x04)
-
-#define S3C_AC97_GLBSTAT_CODECREADY			(1<<22)
-#define S3C_AC97_GLBSTAT_PCMOUTUR			(1<<21)
-#define S3C_AC97_GLBSTAT_PCMINORI			(1<<20)
-#define S3C_AC97_GLBSTAT_MICINORI			(1<<19)
-#define S3C_AC97_GLBSTAT_PCMOUTTI			(1<<18)
-#define S3C_AC97_GLBSTAT_PCMINTI			(1<<17)
-#define S3C_AC97_GLBSTAT_MICINTI			(1<<16)
-#define S3C_AC97_GLBSTAT_MAINSTATE_IDLE			(0<<0)
-#define S3C_AC97_GLBSTAT_MAINSTATE_INIT			(1<<0)
-#define S3C_AC97_GLBSTAT_MAINSTATE_READY		(2<<0)
-#define S3C_AC97_GLBSTAT_MAINSTATE_ACTIVE		(3<<0)
-#define S3C_AC97_GLBSTAT_MAINSTATE_LP			(4<<0)
-#define S3C_AC97_GLBSTAT_MAINSTATE_WARM			(5<<0)
-
-#define S3C_AC97_CODEC_CMD				(0x08)
-
-#define S3C_AC97_CODEC_CMD_READ				(1<<23)
-
-#define S3C_AC97_STAT					(0x0c)
-#define S3C_AC97_PCM_ADDR				(0x10)
-#define S3C_AC97_PCM_DATA				(0x18)
-#define S3C_AC97_MIC_DATA				(0x1C)
-
-#endif /* __ASM_ARCH_REGS_AC97_H */
diff --git a/sound/soc/s3c24xx/s3c2443-ac97.c b/sound/soc/s3c24xx/s3c2443-ac97.c
index 19c5c3cf5d8c..c473a3b97b55 100644
--- a/sound/soc/s3c24xx/s3c2443-ac97.c
+++ b/sound/soc/s3c24xx/s3c2443-ac97.c
@@ -28,7 +28,7 @@
 #include <sound/soc.h>
 
 #include <mach/hardware.h>
-#include <asm/plat-s3c/regs-ac97.h>
+#include <plat/regs-ac97.h>
 #include <mach/regs-gpio.h>
 #include <mach/regs-clock.h>
 #include <mach/audio.h>
-- 
cgit v1.2.3


From 57bd4b91a6cfc5bad4c5d829ef85293ea63643ea Mon Sep 17 00:00:00 2001
From: Ben Dooks <ben-linux@fluff.org>
Date: Thu, 30 Oct 2008 10:14:37 +0000
Subject: [ARM] S3C24XX: Movev udc headers to
 arch/arm/plat-s3c24xx/include/plat

Move the udc headers to the proper home in
arch/arm/plat-s3c24xx/include/plat ready to clean out
the old include directories.

Signed-off-by: Ben Dooks <ben-linux@fluff.org>
---
 arch/arm/mach-s3c2410/mach-h1940.c            |   2 +-
 arch/arm/mach-s3c2410/mach-n30.c              |   2 +-
 arch/arm/mach-s3c2410/mach-qt2410.c           |   2 +-
 arch/arm/mach-s3c2412/mach-jive.c             |   2 +-
 arch/arm/mach-s3c2412/mach-smdk2413.c         |   2 +-
 arch/arm/plat-s3c24xx/devs.c                  |   2 +-
 arch/arm/plat-s3c24xx/include/plat/regs-udc.h | 153 ++++++++++++++++++++++++++
 arch/arm/plat-s3c24xx/include/plat/udc.h      |  36 ++++++
 drivers/usb/gadget/s3c2410_udc.c              |   4 +-
 include/asm-arm/plat-s3c24xx/regs-udc.h       | 153 --------------------------
 include/asm-arm/plat-s3c24xx/udc.h            |  36 ------
 11 files changed, 197 insertions(+), 197 deletions(-)
 create mode 100644 arch/arm/plat-s3c24xx/include/plat/regs-udc.h
 create mode 100644 arch/arm/plat-s3c24xx/include/plat/udc.h
 delete mode 100644 include/asm-arm/plat-s3c24xx/regs-udc.h
 delete mode 100644 include/asm-arm/plat-s3c24xx/udc.h

(limited to 'include')

diff --git a/arch/arm/mach-s3c2410/mach-h1940.c b/arch/arm/mach-s3c2410/mach-h1940.c
index 98716d0108e9..32d550fcff4d 100644
--- a/arch/arm/mach-s3c2410/mach-h1940.c
+++ b/arch/arm/mach-s3c2410/mach-h1940.c
@@ -38,7 +38,7 @@
 #include <mach/h1940.h>
 #include <mach/h1940-latch.h>
 #include <mach/fb.h>
-#include <asm/plat-s3c24xx/udc.h>
+#include <plat/udc.h>
 
 #include <plat/clock.h>
 #include <plat/devs.h>
diff --git a/arch/arm/mach-s3c2410/mach-n30.c b/arch/arm/mach-s3c2410/mach-n30.c
index 836c9f639215..7a7c45d28fe7 100644
--- a/arch/arm/mach-s3c2410/mach-n30.c
+++ b/arch/arm/mach-s3c2410/mach-n30.c
@@ -47,7 +47,7 @@
 #include <plat/cpu.h>
 #include <plat/devs.h>
 #include <plat/s3c2410.h>
-#include <asm/plat-s3c24xx/udc.h>
+#include <plat/udc.h>
 
 static struct map_desc n30_iodesc[] __initdata = {
 	/* nothing here yet */
diff --git a/arch/arm/mach-s3c2410/mach-qt2410.c b/arch/arm/mach-s3c2410/mach-qt2410.c
index 315c27271f18..ef868472f6a4 100644
--- a/arch/arm/mach-s3c2410/mach-qt2410.c
+++ b/arch/arm/mach-s3c2410/mach-qt2410.c
@@ -51,7 +51,7 @@
 #include <plat/regs-serial.h>
 #include <mach/fb.h>
 #include <plat/nand.h>
-#include <asm/plat-s3c24xx/udc.h>
+#include <plat/udc.h>
 #include <mach/spi.h>
 #include <mach/spi-gpio.h>
 
diff --git a/arch/arm/mach-s3c2412/mach-jive.c b/arch/arm/mach-s3c2412/mach-jive.c
index c8d9a346b3bf..25ff1ec9f8ad 100644
--- a/arch/arm/mach-s3c2412/mach-jive.c
+++ b/arch/arm/mach-s3c2412/mach-jive.c
@@ -52,7 +52,7 @@
 #include <plat/devs.h>
 #include <plat/cpu.h>
 #include <plat/pm.h>
-#include <asm/plat-s3c24xx/udc.h>
+#include <plat/udc.h>
 
 static struct map_desc jive_iodesc[] __initdata = {
 };
diff --git a/arch/arm/mach-s3c2412/mach-smdk2413.c b/arch/arm/mach-s3c2412/mach-smdk2413.c
index c719b5a740a9..8fd17b8d5679 100644
--- a/arch/arm/mach-s3c2412/mach-smdk2413.c
+++ b/arch/arm/mach-s3c2412/mach-smdk2413.c
@@ -37,7 +37,7 @@
 #include <mach/regs-lcd.h>
 
 #include <mach/idle.h>
-#include <asm/plat-s3c24xx/udc.h>
+#include <plat/udc.h>
 #include <mach/fb.h>
 
 #include <plat/s3c2410.h>
diff --git a/arch/arm/plat-s3c24xx/devs.c b/arch/arm/plat-s3c24xx/devs.c
index e93f8bf6d338..07491bcd13ba 100644
--- a/arch/arm/plat-s3c24xx/devs.c
+++ b/arch/arm/plat-s3c24xx/devs.c
@@ -29,7 +29,7 @@
 #include <asm/irq.h>
 
 #include <plat/regs-serial.h>
-#include <asm/plat-s3c24xx/udc.h>
+#include <plat/udc.h>
 
 #include <plat/devs.h>
 #include <plat/cpu.h>
diff --git a/arch/arm/plat-s3c24xx/include/plat/regs-udc.h b/arch/arm/plat-s3c24xx/include/plat/regs-udc.h
new file mode 100644
index 000000000000..f0dd4a41b37b
--- /dev/null
+++ b/arch/arm/plat-s3c24xx/include/plat/regs-udc.h
@@ -0,0 +1,153 @@
+/* arch/arm/mach-s3c2410/include/mach/regs-udc.h
+ *
+ * Copyright (C) 2004 Herbert Poetzl <herbert@13thfloor.at>
+ *
+ * This include file is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation; either version 2 of
+ * the License, or (at your option) any later version.
+*/
+
+#ifndef __ASM_ARCH_REGS_UDC_H
+#define __ASM_ARCH_REGS_UDC_H
+
+#define S3C2410_USBDREG(x) (x)
+
+#define S3C2410_UDC_FUNC_ADDR_REG	S3C2410_USBDREG(0x0140)
+#define S3C2410_UDC_PWR_REG		S3C2410_USBDREG(0x0144)
+#define S3C2410_UDC_EP_INT_REG		S3C2410_USBDREG(0x0148)
+
+#define S3C2410_UDC_USB_INT_REG		S3C2410_USBDREG(0x0158)
+#define S3C2410_UDC_EP_INT_EN_REG	S3C2410_USBDREG(0x015c)
+
+#define S3C2410_UDC_USB_INT_EN_REG	S3C2410_USBDREG(0x016c)
+
+#define S3C2410_UDC_FRAME_NUM1_REG	S3C2410_USBDREG(0x0170)
+#define S3C2410_UDC_FRAME_NUM2_REG	S3C2410_USBDREG(0x0174)
+
+#define S3C2410_UDC_EP0_FIFO_REG	S3C2410_USBDREG(0x01c0)
+#define S3C2410_UDC_EP1_FIFO_REG	S3C2410_USBDREG(0x01c4)
+#define S3C2410_UDC_EP2_FIFO_REG	S3C2410_USBDREG(0x01c8)
+#define S3C2410_UDC_EP3_FIFO_REG	S3C2410_USBDREG(0x01cc)
+#define S3C2410_UDC_EP4_FIFO_REG	S3C2410_USBDREG(0x01d0)
+
+#define S3C2410_UDC_EP1_DMA_CON		S3C2410_USBDREG(0x0200)
+#define S3C2410_UDC_EP1_DMA_UNIT	S3C2410_USBDREG(0x0204)
+#define S3C2410_UDC_EP1_DMA_FIFO	S3C2410_USBDREG(0x0208)
+#define S3C2410_UDC_EP1_DMA_TTC_L	S3C2410_USBDREG(0x020c)
+#define S3C2410_UDC_EP1_DMA_TTC_M	S3C2410_USBDREG(0x0210)
+#define S3C2410_UDC_EP1_DMA_TTC_H	S3C2410_USBDREG(0x0214)
+
+#define S3C2410_UDC_EP2_DMA_CON		S3C2410_USBDREG(0x0218)
+#define S3C2410_UDC_EP2_DMA_UNIT	S3C2410_USBDREG(0x021c)
+#define S3C2410_UDC_EP2_DMA_FIFO	S3C2410_USBDREG(0x0220)
+#define S3C2410_UDC_EP2_DMA_TTC_L	S3C2410_USBDREG(0x0224)
+#define S3C2410_UDC_EP2_DMA_TTC_M	S3C2410_USBDREG(0x0228)
+#define S3C2410_UDC_EP2_DMA_TTC_H	S3C2410_USBDREG(0x022c)
+
+#define S3C2410_UDC_EP3_DMA_CON		S3C2410_USBDREG(0x0240)
+#define S3C2410_UDC_EP3_DMA_UNIT	S3C2410_USBDREG(0x0244)
+#define S3C2410_UDC_EP3_DMA_FIFO	S3C2410_USBDREG(0x0248)
+#define S3C2410_UDC_EP3_DMA_TTC_L	S3C2410_USBDREG(0x024c)
+#define S3C2410_UDC_EP3_DMA_TTC_M	S3C2410_USBDREG(0x0250)
+#define S3C2410_UDC_EP3_DMA_TTC_H	S3C2410_USBDREG(0x0254)
+
+#define S3C2410_UDC_EP4_DMA_CON		S3C2410_USBDREG(0x0258)
+#define S3C2410_UDC_EP4_DMA_UNIT	S3C2410_USBDREG(0x025c)
+#define S3C2410_UDC_EP4_DMA_FIFO	S3C2410_USBDREG(0x0260)
+#define S3C2410_UDC_EP4_DMA_TTC_L	S3C2410_USBDREG(0x0264)
+#define S3C2410_UDC_EP4_DMA_TTC_M	S3C2410_USBDREG(0x0268)
+#define S3C2410_UDC_EP4_DMA_TTC_H	S3C2410_USBDREG(0x026c)
+
+#define S3C2410_UDC_INDEX_REG		S3C2410_USBDREG(0x0178)
+
+/* indexed registers */
+
+#define S3C2410_UDC_MAXP_REG		S3C2410_USBDREG(0x0180)
+
+#define S3C2410_UDC_EP0_CSR_REG		S3C2410_USBDREG(0x0184)
+
+#define S3C2410_UDC_IN_CSR1_REG		S3C2410_USBDREG(0x0184)
+#define S3C2410_UDC_IN_CSR2_REG		S3C2410_USBDREG(0x0188)
+
+#define S3C2410_UDC_OUT_CSR1_REG	S3C2410_USBDREG(0x0190)
+#define S3C2410_UDC_OUT_CSR2_REG	S3C2410_USBDREG(0x0194)
+#define S3C2410_UDC_OUT_FIFO_CNT1_REG	S3C2410_USBDREG(0x0198)
+#define S3C2410_UDC_OUT_FIFO_CNT2_REG	S3C2410_USBDREG(0x019c)
+
+#define S3C2410_UDC_FUNCADDR_UPDATE	(1<<7)
+
+#define S3C2410_UDC_PWR_ISOUP		(1<<7) // R/W
+#define S3C2410_UDC_PWR_RESET		(1<<3) // R
+#define S3C2410_UDC_PWR_RESUME		(1<<2) // R/W
+#define S3C2410_UDC_PWR_SUSPEND		(1<<1) // R
+#define S3C2410_UDC_PWR_ENSUSPEND	(1<<0) // R/W
+
+#define S3C2410_UDC_PWR_DEFAULT		0x00
+
+#define S3C2410_UDC_INT_EP4		(1<<4) // R/W (clear only)
+#define S3C2410_UDC_INT_EP3		(1<<3) // R/W (clear only)
+#define S3C2410_UDC_INT_EP2		(1<<2) // R/W (clear only)
+#define S3C2410_UDC_INT_EP1		(1<<1) // R/W (clear only)
+#define S3C2410_UDC_INT_EP0		(1<<0) // R/W (clear only)
+
+#define S3C2410_UDC_USBINT_RESET	(1<<2) // R/W (clear only)
+#define S3C2410_UDC_USBINT_RESUME	(1<<1) // R/W (clear only)
+#define S3C2410_UDC_USBINT_SUSPEND	(1<<0) // R/W (clear only)
+
+#define S3C2410_UDC_INTE_EP4		(1<<4) // R/W
+#define S3C2410_UDC_INTE_EP3		(1<<3) // R/W
+#define S3C2410_UDC_INTE_EP2		(1<<2) // R/W
+#define S3C2410_UDC_INTE_EP1		(1<<1) // R/W
+#define S3C2410_UDC_INTE_EP0		(1<<0) // R/W
+
+#define S3C2410_UDC_USBINTE_RESET	(1<<2) // R/W
+#define S3C2410_UDC_USBINTE_SUSPEND	(1<<0) // R/W
+
+
+#define S3C2410_UDC_INDEX_EP0		(0x00)
+#define S3C2410_UDC_INDEX_EP1		(0x01) // ??
+#define S3C2410_UDC_INDEX_EP2		(0x02) // ??
+#define S3C2410_UDC_INDEX_EP3		(0x03) // ??
+#define S3C2410_UDC_INDEX_EP4		(0x04) // ??
+
+#define S3C2410_UDC_ICSR1_CLRDT		(1<<6) // R/W
+#define S3C2410_UDC_ICSR1_SENTSTL	(1<<5) // R/W (clear only)
+#define S3C2410_UDC_ICSR1_SENDSTL	(1<<4) // R/W
+#define S3C2410_UDC_ICSR1_FFLUSH	(1<<3) // W   (set only)
+#define S3C2410_UDC_ICSR1_UNDRUN	(1<<2) // R/W (clear only)
+#define S3C2410_UDC_ICSR1_PKTRDY	(1<<0) // R/W (set only)
+
+#define S3C2410_UDC_ICSR2_AUTOSET	(1<<7) // R/W
+#define S3C2410_UDC_ICSR2_ISO		(1<<6) // R/W
+#define S3C2410_UDC_ICSR2_MODEIN	(1<<5) // R/W
+#define S3C2410_UDC_ICSR2_DMAIEN	(1<<4) // R/W
+
+#define S3C2410_UDC_OCSR1_CLRDT		(1<<7) // R/W
+#define S3C2410_UDC_OCSR1_SENTSTL	(1<<6) // R/W (clear only)
+#define S3C2410_UDC_OCSR1_SENDSTL	(1<<5) // R/W
+#define S3C2410_UDC_OCSR1_FFLUSH	(1<<4) // R/W
+#define S3C2410_UDC_OCSR1_DERROR	(1<<3) // R
+#define S3C2410_UDC_OCSR1_OVRRUN	(1<<2) // R/W (clear only)
+#define S3C2410_UDC_OCSR1_PKTRDY	(1<<0) // R/W (clear only)
+
+#define S3C2410_UDC_OCSR2_AUTOCLR	(1<<7) // R/W
+#define S3C2410_UDC_OCSR2_ISO		(1<<6) // R/W
+#define S3C2410_UDC_OCSR2_DMAIEN	(1<<5) // R/W
+
+#define S3C2410_UDC_EP0_CSR_OPKRDY	(1<<0)
+#define S3C2410_UDC_EP0_CSR_IPKRDY	(1<<1)
+#define S3C2410_UDC_EP0_CSR_SENTSTL	(1<<2)
+#define S3C2410_UDC_EP0_CSR_DE		(1<<3)
+#define S3C2410_UDC_EP0_CSR_SE		(1<<4)
+#define S3C2410_UDC_EP0_CSR_SENDSTL	(1<<5)
+#define S3C2410_UDC_EP0_CSR_SOPKTRDY	(1<<6)
+#define S3C2410_UDC_EP0_CSR_SSE	(1<<7)
+
+#define S3C2410_UDC_MAXP_8		(1<<0)
+#define S3C2410_UDC_MAXP_16		(1<<1)
+#define S3C2410_UDC_MAXP_32		(1<<2)
+#define S3C2410_UDC_MAXP_64		(1<<3)
+
+
+#endif
diff --git a/arch/arm/plat-s3c24xx/include/plat/udc.h b/arch/arm/plat-s3c24xx/include/plat/udc.h
new file mode 100644
index 000000000000..546bb4008f49
--- /dev/null
+++ b/arch/arm/plat-s3c24xx/include/plat/udc.h
@@ -0,0 +1,36 @@
+/* arch/arm/mach-s3c2410/include/mach/udc.h
+ *
+ * Copyright (c) 2005 Arnaud Patard <arnaud.patard@rtp-net.org>
+ *
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ *
+ *  Changelog:
+ *	14-Mar-2005	RTP	Created file
+ *	02-Aug-2005	RTP	File rename
+ *	07-Sep-2005	BJD	Minor cleanups, changed cmd to enum
+ *	18-Jan-2007	HMW	Add per-platform vbus_draw function
+*/
+
+#ifndef __ASM_ARM_ARCH_UDC_H
+#define __ASM_ARM_ARCH_UDC_H
+
+enum s3c2410_udc_cmd_e {
+	S3C2410_UDC_P_ENABLE	= 1,	/* Pull-up enable        */
+	S3C2410_UDC_P_DISABLE	= 2,	/* Pull-up disable       */
+	S3C2410_UDC_P_RESET	= 3,	/* UDC reset, in case of */
+};
+
+struct s3c2410_udc_mach_info {
+	void	(*udc_command)(enum s3c2410_udc_cmd_e);
+ 	void	(*vbus_draw)(unsigned int ma);
+	unsigned int vbus_pin;
+	unsigned char vbus_pin_inverted;
+};
+
+extern void __init s3c24xx_udc_set_platdata(struct s3c2410_udc_mach_info *);
+
+#endif /* __ASM_ARM_ARCH_UDC_H */
diff --git a/drivers/usb/gadget/s3c2410_udc.c b/drivers/usb/gadget/s3c2410_udc.c
index 00ba06b44752..8d8d65165983 100644
--- a/drivers/usb/gadget/s3c2410_udc.c
+++ b/drivers/usb/gadget/s3c2410_udc.c
@@ -53,8 +53,8 @@
 #include <mach/hardware.h>
 #include <mach/regs-gpio.h>
 
-#include <asm/plat-s3c24xx/regs-udc.h>
-#include <asm/plat-s3c24xx/udc.h>
+#include <plat/regs-udc.h>
+#include <plat/udc.h>
 
 
 #include "s3c2410_udc.h"
diff --git a/include/asm-arm/plat-s3c24xx/regs-udc.h b/include/asm-arm/plat-s3c24xx/regs-udc.h
deleted file mode 100644
index f0dd4a41b37b..000000000000
--- a/include/asm-arm/plat-s3c24xx/regs-udc.h
+++ /dev/null
@@ -1,153 +0,0 @@
-/* arch/arm/mach-s3c2410/include/mach/regs-udc.h
- *
- * Copyright (C) 2004 Herbert Poetzl <herbert@13thfloor.at>
- *
- * This include file is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License as
- * published by the Free Software Foundation; either version 2 of
- * the License, or (at your option) any later version.
-*/
-
-#ifndef __ASM_ARCH_REGS_UDC_H
-#define __ASM_ARCH_REGS_UDC_H
-
-#define S3C2410_USBDREG(x) (x)
-
-#define S3C2410_UDC_FUNC_ADDR_REG	S3C2410_USBDREG(0x0140)
-#define S3C2410_UDC_PWR_REG		S3C2410_USBDREG(0x0144)
-#define S3C2410_UDC_EP_INT_REG		S3C2410_USBDREG(0x0148)
-
-#define S3C2410_UDC_USB_INT_REG		S3C2410_USBDREG(0x0158)
-#define S3C2410_UDC_EP_INT_EN_REG	S3C2410_USBDREG(0x015c)
-
-#define S3C2410_UDC_USB_INT_EN_REG	S3C2410_USBDREG(0x016c)
-
-#define S3C2410_UDC_FRAME_NUM1_REG	S3C2410_USBDREG(0x0170)
-#define S3C2410_UDC_FRAME_NUM2_REG	S3C2410_USBDREG(0x0174)
-
-#define S3C2410_UDC_EP0_FIFO_REG	S3C2410_USBDREG(0x01c0)
-#define S3C2410_UDC_EP1_FIFO_REG	S3C2410_USBDREG(0x01c4)
-#define S3C2410_UDC_EP2_FIFO_REG	S3C2410_USBDREG(0x01c8)
-#define S3C2410_UDC_EP3_FIFO_REG	S3C2410_USBDREG(0x01cc)
-#define S3C2410_UDC_EP4_FIFO_REG	S3C2410_USBDREG(0x01d0)
-
-#define S3C2410_UDC_EP1_DMA_CON		S3C2410_USBDREG(0x0200)
-#define S3C2410_UDC_EP1_DMA_UNIT	S3C2410_USBDREG(0x0204)
-#define S3C2410_UDC_EP1_DMA_FIFO	S3C2410_USBDREG(0x0208)
-#define S3C2410_UDC_EP1_DMA_TTC_L	S3C2410_USBDREG(0x020c)
-#define S3C2410_UDC_EP1_DMA_TTC_M	S3C2410_USBDREG(0x0210)
-#define S3C2410_UDC_EP1_DMA_TTC_H	S3C2410_USBDREG(0x0214)
-
-#define S3C2410_UDC_EP2_DMA_CON		S3C2410_USBDREG(0x0218)
-#define S3C2410_UDC_EP2_DMA_UNIT	S3C2410_USBDREG(0x021c)
-#define S3C2410_UDC_EP2_DMA_FIFO	S3C2410_USBDREG(0x0220)
-#define S3C2410_UDC_EP2_DMA_TTC_L	S3C2410_USBDREG(0x0224)
-#define S3C2410_UDC_EP2_DMA_TTC_M	S3C2410_USBDREG(0x0228)
-#define S3C2410_UDC_EP2_DMA_TTC_H	S3C2410_USBDREG(0x022c)
-
-#define S3C2410_UDC_EP3_DMA_CON		S3C2410_USBDREG(0x0240)
-#define S3C2410_UDC_EP3_DMA_UNIT	S3C2410_USBDREG(0x0244)
-#define S3C2410_UDC_EP3_DMA_FIFO	S3C2410_USBDREG(0x0248)
-#define S3C2410_UDC_EP3_DMA_TTC_L	S3C2410_USBDREG(0x024c)
-#define S3C2410_UDC_EP3_DMA_TTC_M	S3C2410_USBDREG(0x0250)
-#define S3C2410_UDC_EP3_DMA_TTC_H	S3C2410_USBDREG(0x0254)
-
-#define S3C2410_UDC_EP4_DMA_CON		S3C2410_USBDREG(0x0258)
-#define S3C2410_UDC_EP4_DMA_UNIT	S3C2410_USBDREG(0x025c)
-#define S3C2410_UDC_EP4_DMA_FIFO	S3C2410_USBDREG(0x0260)
-#define S3C2410_UDC_EP4_DMA_TTC_L	S3C2410_USBDREG(0x0264)
-#define S3C2410_UDC_EP4_DMA_TTC_M	S3C2410_USBDREG(0x0268)
-#define S3C2410_UDC_EP4_DMA_TTC_H	S3C2410_USBDREG(0x026c)
-
-#define S3C2410_UDC_INDEX_REG		S3C2410_USBDREG(0x0178)
-
-/* indexed registers */
-
-#define S3C2410_UDC_MAXP_REG		S3C2410_USBDREG(0x0180)
-
-#define S3C2410_UDC_EP0_CSR_REG		S3C2410_USBDREG(0x0184)
-
-#define S3C2410_UDC_IN_CSR1_REG		S3C2410_USBDREG(0x0184)
-#define S3C2410_UDC_IN_CSR2_REG		S3C2410_USBDREG(0x0188)
-
-#define S3C2410_UDC_OUT_CSR1_REG	S3C2410_USBDREG(0x0190)
-#define S3C2410_UDC_OUT_CSR2_REG	S3C2410_USBDREG(0x0194)
-#define S3C2410_UDC_OUT_FIFO_CNT1_REG	S3C2410_USBDREG(0x0198)
-#define S3C2410_UDC_OUT_FIFO_CNT2_REG	S3C2410_USBDREG(0x019c)
-
-#define S3C2410_UDC_FUNCADDR_UPDATE	(1<<7)
-
-#define S3C2410_UDC_PWR_ISOUP		(1<<7) // R/W
-#define S3C2410_UDC_PWR_RESET		(1<<3) // R
-#define S3C2410_UDC_PWR_RESUME		(1<<2) // R/W
-#define S3C2410_UDC_PWR_SUSPEND		(1<<1) // R
-#define S3C2410_UDC_PWR_ENSUSPEND	(1<<0) // R/W
-
-#define S3C2410_UDC_PWR_DEFAULT		0x00
-
-#define S3C2410_UDC_INT_EP4		(1<<4) // R/W (clear only)
-#define S3C2410_UDC_INT_EP3		(1<<3) // R/W (clear only)
-#define S3C2410_UDC_INT_EP2		(1<<2) // R/W (clear only)
-#define S3C2410_UDC_INT_EP1		(1<<1) // R/W (clear only)
-#define S3C2410_UDC_INT_EP0		(1<<0) // R/W (clear only)
-
-#define S3C2410_UDC_USBINT_RESET	(1<<2) // R/W (clear only)
-#define S3C2410_UDC_USBINT_RESUME	(1<<1) // R/W (clear only)
-#define S3C2410_UDC_USBINT_SUSPEND	(1<<0) // R/W (clear only)
-
-#define S3C2410_UDC_INTE_EP4		(1<<4) // R/W
-#define S3C2410_UDC_INTE_EP3		(1<<3) // R/W
-#define S3C2410_UDC_INTE_EP2		(1<<2) // R/W
-#define S3C2410_UDC_INTE_EP1		(1<<1) // R/W
-#define S3C2410_UDC_INTE_EP0		(1<<0) // R/W
-
-#define S3C2410_UDC_USBINTE_RESET	(1<<2) // R/W
-#define S3C2410_UDC_USBINTE_SUSPEND	(1<<0) // R/W
-
-
-#define S3C2410_UDC_INDEX_EP0		(0x00)
-#define S3C2410_UDC_INDEX_EP1		(0x01) // ??
-#define S3C2410_UDC_INDEX_EP2		(0x02) // ??
-#define S3C2410_UDC_INDEX_EP3		(0x03) // ??
-#define S3C2410_UDC_INDEX_EP4		(0x04) // ??
-
-#define S3C2410_UDC_ICSR1_CLRDT		(1<<6) // R/W
-#define S3C2410_UDC_ICSR1_SENTSTL	(1<<5) // R/W (clear only)
-#define S3C2410_UDC_ICSR1_SENDSTL	(1<<4) // R/W
-#define S3C2410_UDC_ICSR1_FFLUSH	(1<<3) // W   (set only)
-#define S3C2410_UDC_ICSR1_UNDRUN	(1<<2) // R/W (clear only)
-#define S3C2410_UDC_ICSR1_PKTRDY	(1<<0) // R/W (set only)
-
-#define S3C2410_UDC_ICSR2_AUTOSET	(1<<7) // R/W
-#define S3C2410_UDC_ICSR2_ISO		(1<<6) // R/W
-#define S3C2410_UDC_ICSR2_MODEIN	(1<<5) // R/W
-#define S3C2410_UDC_ICSR2_DMAIEN	(1<<4) // R/W
-
-#define S3C2410_UDC_OCSR1_CLRDT		(1<<7) // R/W
-#define S3C2410_UDC_OCSR1_SENTSTL	(1<<6) // R/W (clear only)
-#define S3C2410_UDC_OCSR1_SENDSTL	(1<<5) // R/W
-#define S3C2410_UDC_OCSR1_FFLUSH	(1<<4) // R/W
-#define S3C2410_UDC_OCSR1_DERROR	(1<<3) // R
-#define S3C2410_UDC_OCSR1_OVRRUN	(1<<2) // R/W (clear only)
-#define S3C2410_UDC_OCSR1_PKTRDY	(1<<0) // R/W (clear only)
-
-#define S3C2410_UDC_OCSR2_AUTOCLR	(1<<7) // R/W
-#define S3C2410_UDC_OCSR2_ISO		(1<<6) // R/W
-#define S3C2410_UDC_OCSR2_DMAIEN	(1<<5) // R/W
-
-#define S3C2410_UDC_EP0_CSR_OPKRDY	(1<<0)
-#define S3C2410_UDC_EP0_CSR_IPKRDY	(1<<1)
-#define S3C2410_UDC_EP0_CSR_SENTSTL	(1<<2)
-#define S3C2410_UDC_EP0_CSR_DE		(1<<3)
-#define S3C2410_UDC_EP0_CSR_SE		(1<<4)
-#define S3C2410_UDC_EP0_CSR_SENDSTL	(1<<5)
-#define S3C2410_UDC_EP0_CSR_SOPKTRDY	(1<<6)
-#define S3C2410_UDC_EP0_CSR_SSE	(1<<7)
-
-#define S3C2410_UDC_MAXP_8		(1<<0)
-#define S3C2410_UDC_MAXP_16		(1<<1)
-#define S3C2410_UDC_MAXP_32		(1<<2)
-#define S3C2410_UDC_MAXP_64		(1<<3)
-
-
-#endif
diff --git a/include/asm-arm/plat-s3c24xx/udc.h b/include/asm-arm/plat-s3c24xx/udc.h
deleted file mode 100644
index 546bb4008f49..000000000000
--- a/include/asm-arm/plat-s3c24xx/udc.h
+++ /dev/null
@@ -1,36 +0,0 @@
-/* arch/arm/mach-s3c2410/include/mach/udc.h
- *
- * Copyright (c) 2005 Arnaud Patard <arnaud.patard@rtp-net.org>
- *
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- *
- *
- *  Changelog:
- *	14-Mar-2005	RTP	Created file
- *	02-Aug-2005	RTP	File rename
- *	07-Sep-2005	BJD	Minor cleanups, changed cmd to enum
- *	18-Jan-2007	HMW	Add per-platform vbus_draw function
-*/
-
-#ifndef __ASM_ARM_ARCH_UDC_H
-#define __ASM_ARM_ARCH_UDC_H
-
-enum s3c2410_udc_cmd_e {
-	S3C2410_UDC_P_ENABLE	= 1,	/* Pull-up enable        */
-	S3C2410_UDC_P_DISABLE	= 2,	/* Pull-up disable       */
-	S3C2410_UDC_P_RESET	= 3,	/* UDC reset, in case of */
-};
-
-struct s3c2410_udc_mach_info {
-	void	(*udc_command)(enum s3c2410_udc_cmd_e);
- 	void	(*vbus_draw)(unsigned int ma);
-	unsigned int vbus_pin;
-	unsigned char vbus_pin_inverted;
-};
-
-extern void __init s3c24xx_udc_set_platdata(struct s3c2410_udc_mach_info *);
-
-#endif /* __ASM_ARM_ARCH_UDC_H */
-- 
cgit v1.2.3


From 13622708725990b01fbc6d59d54d93820a726d7c Mon Sep 17 00:00:00 2001
From: Ben Dooks <ben-linux@fluff.org>
Date: Thu, 30 Oct 2008 10:14:38 +0000
Subject: [ARM] S3C: Move plat/regs-spi.h to arch/arm/plat-s3c/include/plat.

Move plat/regs-spi.h to arch/arm/plat-s3c/include/plat ready
ready to clean out old include directories.

Signed-off-by: Ben Dooks <ben-linux@fluff.org>
---
 arch/arm/mach-s3c2410/dma.c                   |  2 +-
 arch/arm/mach-s3c2412/dma.c                   |  2 +-
 arch/arm/mach-s3c2412/s3c2412.c               |  2 +-
 arch/arm/mach-s3c2440/dma.c                   |  2 +-
 arch/arm/mach-s3c2443/dma.c                   |  2 +-
 arch/arm/plat-s3c24xx/devs.c                  |  2 +-
 arch/arm/plat-s3c24xx/include/plat/regs-spi.h | 82 +++++++++++++++++++++++++++
 drivers/spi/spi_s3c24xx.c                     |  2 +-
 include/asm-arm/plat-s3c24xx/regs-spi.h       | 82 ---------------------------
 9 files changed, 89 insertions(+), 89 deletions(-)
 create mode 100644 arch/arm/plat-s3c24xx/include/plat/regs-spi.h
 delete mode 100644 include/asm-arm/plat-s3c24xx/regs-spi.h

(limited to 'include')

diff --git a/arch/arm/mach-s3c2410/dma.c b/arch/arm/mach-s3c2410/dma.c
index 7fa77effccf8..30983cc612df 100644
--- a/arch/arm/mach-s3c2410/dma.c
+++ b/arch/arm/mach-s3c2410/dma.c
@@ -30,7 +30,7 @@
 #include <mach/regs-lcd.h>
 #include <mach/regs-sdi.h>
 #include <asm/plat-s3c24xx/regs-iis.h>
-#include <asm/plat-s3c24xx/regs-spi.h>
+#include <plat/regs-spi.h>
 
 static struct s3c24xx_dma_map __initdata s3c2410_dma_mappings[] = {
 	[DMACH_XD0] = {
diff --git a/arch/arm/mach-s3c2412/dma.c b/arch/arm/mach-s3c2412/dma.c
index 7db581826676..f8b2dd4a3632 100644
--- a/arch/arm/mach-s3c2412/dma.c
+++ b/arch/arm/mach-s3c2412/dma.c
@@ -32,7 +32,7 @@
 #include <mach/regs-sdi.h>
 #include <asm/plat-s3c24xx/regs-s3c2412-iis.h>
 #include <asm/plat-s3c24xx/regs-iis.h>
-#include <asm/plat-s3c24xx/regs-spi.h>
+#include <plat/regs-spi.h>
 
 #define MAP(x) { (x)| DMA_CH_VALID, (x)| DMA_CH_VALID, (x)| DMA_CH_VALID, (x)| DMA_CH_VALID }
 
diff --git a/arch/arm/mach-s3c2412/s3c2412.c b/arch/arm/mach-s3c2412/s3c2412.c
index 313759c3da69..a086818e117e 100644
--- a/arch/arm/mach-s3c2412/s3c2412.c
+++ b/arch/arm/mach-s3c2412/s3c2412.c
@@ -39,7 +39,7 @@
 #include <mach/regs-gpio.h>
 #include <mach/regs-gpioj.h>
 #include <mach/regs-dsc.h>
-#include <asm/plat-s3c24xx/regs-spi.h>
+#include <plat/regs-spi.h>
 #include <mach/regs-s3c2412.h>
 
 #include <plat/s3c2412.h>
diff --git a/arch/arm/mach-s3c2440/dma.c b/arch/arm/mach-s3c2440/dma.c
index 00d88782b23b..53be553bfc21 100644
--- a/arch/arm/mach-s3c2440/dma.c
+++ b/arch/arm/mach-s3c2440/dma.c
@@ -30,7 +30,7 @@
 #include <mach/regs-lcd.h>
 #include <mach/regs-sdi.h>
 #include <asm/plat-s3c24xx/regs-iis.h>
-#include <asm/plat-s3c24xx/regs-spi.h>
+#include <plat/regs-spi.h>
 
 static struct s3c24xx_dma_map __initdata s3c2440_dma_mappings[] = {
 	[DMACH_XD0] = {
diff --git a/arch/arm/mach-s3c2443/dma.c b/arch/arm/mach-s3c2443/dma.c
index 4185c57b5dd0..872482f02685 100644
--- a/arch/arm/mach-s3c2443/dma.c
+++ b/arch/arm/mach-s3c2443/dma.c
@@ -31,7 +31,7 @@
 #include <mach/regs-lcd.h>
 #include <mach/regs-sdi.h>
 #include <asm/plat-s3c24xx/regs-iis.h>
-#include <asm/plat-s3c24xx/regs-spi.h>
+#include <plat/regs-spi.h>
 
 #define MAP(x) { \
 		[0]	= (x) | DMA_CH_VALID,	\
diff --git a/arch/arm/plat-s3c24xx/devs.c b/arch/arm/plat-s3c24xx/devs.c
index 07491bcd13ba..adf535aaf43a 100644
--- a/arch/arm/plat-s3c24xx/devs.c
+++ b/arch/arm/plat-s3c24xx/devs.c
@@ -33,7 +33,7 @@
 
 #include <plat/devs.h>
 #include <plat/cpu.h>
-#include <asm/plat-s3c24xx/regs-spi.h>
+#include <plat/regs-spi.h>
 
 /* Serial port registrations */
 
diff --git a/arch/arm/plat-s3c24xx/include/plat/regs-spi.h b/arch/arm/plat-s3c24xx/include/plat/regs-spi.h
new file mode 100644
index 000000000000..2b35479ee35c
--- /dev/null
+++ b/arch/arm/plat-s3c24xx/include/plat/regs-spi.h
@@ -0,0 +1,82 @@
+/* arch/arm/mach-s3c2410/include/mach/regs-spi.h
+ *
+ * Copyright (c) 2004 Fetron GmbH
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * S3C2410 SPI register definition
+*/
+
+#ifndef __ASM_ARCH_REGS_SPI_H
+#define __ASM_ARCH_REGS_SPI_H
+
+#define S3C2410_SPI1	(0x20)
+#define S3C2412_SPI1	(0x100)
+
+#define S3C2410_SPCON	(0x00)
+
+#define S3C2412_SPCON_RXFIFO_RB2	(0<<14)
+#define S3C2412_SPCON_RXFIFO_RB4	(1<<14)
+#define S3C2412_SPCON_RXFIFO_RB12	(2<<14)
+#define S3C2412_SPCON_RXFIFO_RB14	(3<<14)
+#define S3C2412_SPCON_TXFIFO_RB2	(0<<12)
+#define S3C2412_SPCON_TXFIFO_RB4	(1<<12)
+#define S3C2412_SPCON_TXFIFO_RB12	(2<<12)
+#define S3C2412_SPCON_TXFIFO_RB14	(3<<12)
+#define S3C2412_SPCON_RXFIFO_RESET	(1<<11) /* RxFIFO reset */
+#define S3C2412_SPCON_TXFIFO_RESET	(1<<10) /* TxFIFO reset */
+#define S3C2412_SPCON_RXFIFO_EN		(1<<9)  /* RxFIFO Enable */
+#define S3C2412_SPCON_TXFIFO_EN		(1<<8)  /* TxFIFO Enable */
+
+#define S3C2412_SPCON_DIRC_RX	  (1<<7)
+
+#define S3C2410_SPCON_SMOD_DMA	  (2<<5)	/* DMA mode */
+#define S3C2410_SPCON_SMOD_INT	  (1<<5)	/* interrupt mode */
+#define S3C2410_SPCON_SMOD_POLL   (0<<5)	/* polling mode */
+#define S3C2410_SPCON_ENSCK	  (1<<4)	/* Enable SCK */
+#define S3C2410_SPCON_MSTR	  (1<<3)	/* Master/Slave select
+						   0: slave, 1: master */
+#define S3C2410_SPCON_CPOL_HIGH	  (1<<2)	/* Clock polarity select */
+#define S3C2410_SPCON_CPOL_LOW	  (0<<2)	/* Clock polarity select */
+
+#define S3C2410_SPCON_CPHA_FMTB	  (1<<1)	/* Clock Phase Select */
+#define S3C2410_SPCON_CPHA_FMTA	  (0<<1)	/* Clock Phase Select */
+
+#define S3C2410_SPCON_TAGD	  (1<<0)	/* Tx auto garbage data mode */
+
+
+#define S3C2410_SPSTA	 (0x04)
+
+#define S3C2412_SPSTA_RXFIFO_AE		(1<<11)
+#define S3C2412_SPSTA_TXFIFO_AE		(1<<10)
+#define S3C2412_SPSTA_RXFIFO_ERROR	(1<<9)
+#define S3C2412_SPSTA_TXFIFO_ERROR	(1<<8)
+#define S3C2412_SPSTA_RXFIFO_FIFO	(1<<7)
+#define S3C2412_SPSTA_RXFIFO_EMPTY	(1<<6)
+#define S3C2412_SPSTA_TXFIFO_NFULL	(1<<5)
+#define S3C2412_SPSTA_TXFIFO_EMPTY	(1<<4)
+
+#define S3C2410_SPSTA_DCOL	  (1<<2)	/* Data Collision Error */
+#define S3C2410_SPSTA_MULD	  (1<<1)	/* Multi Master Error */
+#define S3C2410_SPSTA_READY	  (1<<0)	/* Data Tx/Rx ready */
+#define S3C2412_SPSTA_READY_ORG	  (1<<3)
+
+#define S3C2410_SPPIN	 (0x08)
+
+#define S3C2410_SPPIN_ENMUL	  (1<<2)	/* Multi Master Error detect */
+#define S3C2410_SPPIN_RESERVED	  (1<<1)
+#define S3C2400_SPPIN_nCS     	  (1<<1)	/* SPI Card Select */
+#define S3C2410_SPPIN_KEEP	  (1<<0)	/* Master Out keep */
+
+#define S3C2410_SPPRE	 (0x0C)
+#define S3C2410_SPTDAT	 (0x10)
+#define S3C2410_SPRDAT	 (0x14)
+
+#define S3C2412_TXFIFO	 (0x18)
+#define S3C2412_RXFIFO	 (0x18)
+#define S3C2412_SPFIC	 (0x24)
+
+
+#endif /* __ASM_ARCH_REGS_SPI_H */
diff --git a/drivers/spi/spi_s3c24xx.c b/drivers/spi/spi_s3c24xx.c
index c252cbac00f1..256d18395a23 100644
--- a/drivers/spi/spi_s3c24xx.c
+++ b/drivers/spi/spi_s3c24xx.c
@@ -28,7 +28,7 @@
 #include <mach/hardware.h>
 
 #include <mach/regs-gpio.h>
-#include <asm/plat-s3c24xx/regs-spi.h>
+#include <plat/regs-spi.h>
 #include <mach/spi.h>
 
 struct s3c24xx_spi {
diff --git a/include/asm-arm/plat-s3c24xx/regs-spi.h b/include/asm-arm/plat-s3c24xx/regs-spi.h
deleted file mode 100644
index 2b35479ee35c..000000000000
--- a/include/asm-arm/plat-s3c24xx/regs-spi.h
+++ /dev/null
@@ -1,82 +0,0 @@
-/* arch/arm/mach-s3c2410/include/mach/regs-spi.h
- *
- * Copyright (c) 2004 Fetron GmbH
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- *
- * S3C2410 SPI register definition
-*/
-
-#ifndef __ASM_ARCH_REGS_SPI_H
-#define __ASM_ARCH_REGS_SPI_H
-
-#define S3C2410_SPI1	(0x20)
-#define S3C2412_SPI1	(0x100)
-
-#define S3C2410_SPCON	(0x00)
-
-#define S3C2412_SPCON_RXFIFO_RB2	(0<<14)
-#define S3C2412_SPCON_RXFIFO_RB4	(1<<14)
-#define S3C2412_SPCON_RXFIFO_RB12	(2<<14)
-#define S3C2412_SPCON_RXFIFO_RB14	(3<<14)
-#define S3C2412_SPCON_TXFIFO_RB2	(0<<12)
-#define S3C2412_SPCON_TXFIFO_RB4	(1<<12)
-#define S3C2412_SPCON_TXFIFO_RB12	(2<<12)
-#define S3C2412_SPCON_TXFIFO_RB14	(3<<12)
-#define S3C2412_SPCON_RXFIFO_RESET	(1<<11) /* RxFIFO reset */
-#define S3C2412_SPCON_TXFIFO_RESET	(1<<10) /* TxFIFO reset */
-#define S3C2412_SPCON_RXFIFO_EN		(1<<9)  /* RxFIFO Enable */
-#define S3C2412_SPCON_TXFIFO_EN		(1<<8)  /* TxFIFO Enable */
-
-#define S3C2412_SPCON_DIRC_RX	  (1<<7)
-
-#define S3C2410_SPCON_SMOD_DMA	  (2<<5)	/* DMA mode */
-#define S3C2410_SPCON_SMOD_INT	  (1<<5)	/* interrupt mode */
-#define S3C2410_SPCON_SMOD_POLL   (0<<5)	/* polling mode */
-#define S3C2410_SPCON_ENSCK	  (1<<4)	/* Enable SCK */
-#define S3C2410_SPCON_MSTR	  (1<<3)	/* Master/Slave select
-						   0: slave, 1: master */
-#define S3C2410_SPCON_CPOL_HIGH	  (1<<2)	/* Clock polarity select */
-#define S3C2410_SPCON_CPOL_LOW	  (0<<2)	/* Clock polarity select */
-
-#define S3C2410_SPCON_CPHA_FMTB	  (1<<1)	/* Clock Phase Select */
-#define S3C2410_SPCON_CPHA_FMTA	  (0<<1)	/* Clock Phase Select */
-
-#define S3C2410_SPCON_TAGD	  (1<<0)	/* Tx auto garbage data mode */
-
-
-#define S3C2410_SPSTA	 (0x04)
-
-#define S3C2412_SPSTA_RXFIFO_AE		(1<<11)
-#define S3C2412_SPSTA_TXFIFO_AE		(1<<10)
-#define S3C2412_SPSTA_RXFIFO_ERROR	(1<<9)
-#define S3C2412_SPSTA_TXFIFO_ERROR	(1<<8)
-#define S3C2412_SPSTA_RXFIFO_FIFO	(1<<7)
-#define S3C2412_SPSTA_RXFIFO_EMPTY	(1<<6)
-#define S3C2412_SPSTA_TXFIFO_NFULL	(1<<5)
-#define S3C2412_SPSTA_TXFIFO_EMPTY	(1<<4)
-
-#define S3C2410_SPSTA_DCOL	  (1<<2)	/* Data Collision Error */
-#define S3C2410_SPSTA_MULD	  (1<<1)	/* Multi Master Error */
-#define S3C2410_SPSTA_READY	  (1<<0)	/* Data Tx/Rx ready */
-#define S3C2412_SPSTA_READY_ORG	  (1<<3)
-
-#define S3C2410_SPPIN	 (0x08)
-
-#define S3C2410_SPPIN_ENMUL	  (1<<2)	/* Multi Master Error detect */
-#define S3C2410_SPPIN_RESERVED	  (1<<1)
-#define S3C2400_SPPIN_nCS     	  (1<<1)	/* SPI Card Select */
-#define S3C2410_SPPIN_KEEP	  (1<<0)	/* Master Out keep */
-
-#define S3C2410_SPPRE	 (0x0C)
-#define S3C2410_SPTDAT	 (0x10)
-#define S3C2410_SPRDAT	 (0x14)
-
-#define S3C2412_TXFIFO	 (0x18)
-#define S3C2412_RXFIFO	 (0x18)
-#define S3C2412_SPFIC	 (0x24)
-
-
-#endif /* __ASM_ARCH_REGS_SPI_H */
-- 
cgit v1.2.3


From e3bd9ec5d8bfc90f9e1bd995677829e57a404061 Mon Sep 17 00:00:00 2001
From: Ben Dooks <ben-linux@fluff.org>
Date: Thu, 30 Oct 2008 10:14:39 +0000
Subject: [ARM] S3C24XX: Move mci.h to arch/arm/plat-s3c24xx/include/plat

Move mci.h to new position in arch/arm/plat-s3c24xx/include/plat
ready to clean out old include directories.

Signed-off-by: Ben Dooks <ben-linux@fluff.org>
---
 arch/arm/plat-s3c24xx/include/plat/mci.h | 15 +++++++++++++++
 drivers/mmc/host/s3cmci.c                |  2 +-
 include/asm-arm/plat-s3c24xx/mci.h       | 15 ---------------
 3 files changed, 16 insertions(+), 16 deletions(-)
 create mode 100644 arch/arm/plat-s3c24xx/include/plat/mci.h
 delete mode 100644 include/asm-arm/plat-s3c24xx/mci.h

(limited to 'include')

diff --git a/arch/arm/plat-s3c24xx/include/plat/mci.h b/arch/arm/plat-s3c24xx/include/plat/mci.h
new file mode 100644
index 000000000000..2d0852ac3b27
--- /dev/null
+++ b/arch/arm/plat-s3c24xx/include/plat/mci.h
@@ -0,0 +1,15 @@
+#ifndef _ARCH_MCI_H
+#define _ARCH_MCI_H
+
+struct s3c24xx_mci_pdata {
+	unsigned int	wprotect_invert : 1;
+	unsigned int	detect_invert : 1;   /* set => detect active high. */
+
+	unsigned int	gpio_detect;
+	unsigned int	gpio_wprotect;
+	unsigned long	ocr_avail;
+	void		(*set_power)(unsigned char power_mode,
+				     unsigned short vdd);
+};
+
+#endif /* _ARCH_NCI_H */
diff --git a/drivers/mmc/host/s3cmci.c b/drivers/mmc/host/s3cmci.c
index 3b2085b57769..fcc98a4cce3c 100644
--- a/drivers/mmc/host/s3cmci.c
+++ b/drivers/mmc/host/s3cmci.c
@@ -25,7 +25,7 @@
 #include <mach/regs-sdi.h>
 #include <mach/regs-gpio.h>
 
-#include <asm/plat-s3c24xx/mci.h>
+#include <plat/mci.h>
 
 #include "s3cmci.h"
 
diff --git a/include/asm-arm/plat-s3c24xx/mci.h b/include/asm-arm/plat-s3c24xx/mci.h
deleted file mode 100644
index 2d0852ac3b27..000000000000
--- a/include/asm-arm/plat-s3c24xx/mci.h
+++ /dev/null
@@ -1,15 +0,0 @@
-#ifndef _ARCH_MCI_H
-#define _ARCH_MCI_H
-
-struct s3c24xx_mci_pdata {
-	unsigned int	wprotect_invert : 1;
-	unsigned int	detect_invert : 1;   /* set => detect active high. */
-
-	unsigned int	gpio_detect;
-	unsigned int	gpio_wprotect;
-	unsigned long	ocr_avail;
-	void		(*set_power)(unsigned char power_mode,
-				     unsigned short vdd);
-};
-
-#endif /* _ARCH_NCI_H */
-- 
cgit v1.2.3


From 12ef193d5817504621e503e78d641265f6a86ac4 Mon Sep 17 00:00:00 2001
From: Troy Kisky <troy.kisky@boundarydevices.com>
Date: Mon, 13 Oct 2008 17:42:14 -0700
Subject: ASoC: Allow setting codec register with debugfs filesystem

i.e. echo 6 59 >/sys/kernel/debug/soc-audio.0/codec_reg
will set register 0x06 to a value of 0x59.
Also, pop_time debugfs interface setup is moved so that it
is setup in the same function as codec_reg

Signed-off-by: Troy Kisky <troy.kisky@boundarydevices.com>
Signed-off-by: Mark Brown <broonie@opensource.wolfsonmicro.com>
---
 include/sound/soc.h  |   4 ++
 sound/soc/soc-core.c | 116 +++++++++++++++++++++++++++++++++++++++++++++++++--
 sound/soc/soc-dapm.c |  33 ++++-----------
 3 files changed, 125 insertions(+), 28 deletions(-)

(limited to 'include')

diff --git a/include/sound/soc.h b/include/sound/soc.h
index a1e0357a84d7..d33825d624a5 100644
--- a/include/sound/soc.h
+++ b/include/sound/soc.h
@@ -425,6 +425,7 @@ struct snd_soc_codec {
 	short reg_cache_step;
 
 	/* dapm */
+	u32 pop_time;
 	struct list_head dapm_widgets;
 	struct list_head dapm_paths;
 	enum snd_soc_bias_level bias_level;
@@ -516,6 +517,9 @@ struct snd_soc_device {
 	struct delayed_work delayed_work;
 	struct work_struct deferred_resume_work;
 	void *codec_data;
+#ifdef CONFIG_DEBUG_FS
+	struct dentry	*debugfs_root;
+#endif
 };
 
 /* runtime channel data */
diff --git a/sound/soc/soc-core.c b/sound/soc/soc-core.c
index 462e635dfc74..4707042b3dad 100644
--- a/sound/soc/soc-core.c
+++ b/sound/soc/soc-core.c
@@ -26,6 +26,7 @@
 #include <linux/delay.h>
 #include <linux/pm.h>
 #include <linux/bitops.h>
+#include <linux/debugfs.h>
 #include <linux/platform_device.h>
 #include <sound/core.h>
 #include <sound/pcm.h>
@@ -961,10 +962,8 @@ static int soc_new_pcm(struct snd_soc_device *socdev,
 }
 
 /* codec register dump */
-static ssize_t codec_reg_show(struct device *dev,
-	struct device_attribute *attr, char *buf)
+static ssize_t soc_codec_reg_show(struct snd_soc_device *devdata, char *buf)
 {
-	struct snd_soc_device *devdata = dev_get_drvdata(dev);
 	struct snd_soc_codec *codec = devdata->codec;
 	int i, step = 1, count = 0;
 
@@ -1001,8 +1000,117 @@ static ssize_t codec_reg_show(struct device *dev,
 
 	return count;
 }
+static ssize_t codec_reg_show(struct device *dev,
+	struct device_attribute *attr, char *buf)
+{
+	struct snd_soc_device *devdata = dev_get_drvdata(dev);
+	return soc_codec_reg_show(devdata, buf);
+}
+
 static DEVICE_ATTR(codec_reg, 0444, codec_reg_show, NULL);
 
+#ifdef CONFIG_DEBUG_FS
+static int codec_reg_open_file(struct inode *inode, struct file *file)
+{
+	file->private_data = inode->i_private;
+	return 0;
+}
+
+static ssize_t codec_reg_read_file(struct file *file, char __user *user_buf,
+			       size_t count, loff_t *ppos)
+{
+	ssize_t ret;
+	struct snd_soc_device *devdata = file->private_data;
+	char *buf = kmalloc(PAGE_SIZE, GFP_KERNEL);
+	if (!buf)
+		return -ENOMEM;
+	ret = soc_codec_reg_show(devdata, buf);
+	if (ret >= 0)
+		ret = simple_read_from_buffer(user_buf, count, ppos, buf, ret);
+	kfree(buf);
+	return ret;
+}
+
+static ssize_t codec_reg_write_file(struct file *file,
+		const char __user *user_buf, size_t count, loff_t *ppos)
+{
+	char buf[32];
+	int buf_size;
+	char *start = buf;
+	unsigned long reg, value;
+	int step = 1;
+	struct snd_soc_device *devdata = file->private_data;
+	struct snd_soc_codec *codec = devdata->codec;
+
+	buf_size = min(count, (sizeof(buf)-1));
+	if (copy_from_user(buf, user_buf, buf_size))
+		return -EFAULT;
+	buf[buf_size] = 0;
+
+	if (codec->reg_cache_step)
+		step = codec->reg_cache_step;
+
+	while (*start == ' ')
+		start++;
+	reg = simple_strtoul(start, &start, 16);
+	if ((reg >= codec->reg_cache_size) || (reg % step))
+		return -EINVAL;
+	while (*start == ' ')
+		start++;
+	if (strict_strtoul(start, 16, &value))
+		return -EINVAL;
+	codec->write(codec, reg, value);
+	return buf_size;
+}
+
+static const struct file_operations codec_reg_fops = {
+	.open = codec_reg_open_file,
+	.read = codec_reg_read_file,
+	.write = codec_reg_write_file,
+};
+
+static void soc_init_debugfs(struct snd_soc_device *socdev)
+{
+	struct dentry *root, *file;
+	struct snd_soc_codec *codec = socdev->codec;
+	root = debugfs_create_dir(dev_name(socdev->dev), NULL);
+	if (IS_ERR(root) || !root)
+		goto exit1;
+
+	file = debugfs_create_file("codec_reg", 0644,
+			root, socdev, &codec_reg_fops);
+	if (!file)
+		goto exit2;
+
+	file = debugfs_create_u32("dapm_pop_time", 0744,
+			root, &codec->pop_time);
+	if (!file)
+		goto exit2;
+	socdev->debugfs_root = root;
+	return;
+exit2:
+	debugfs_remove_recursive(root);
+exit1:
+	dev_err(socdev->dev, "debugfs is not available\n");
+}
+
+static void soc_cleanup_debugfs(struct snd_soc_device *socdev)
+{
+	debugfs_remove_recursive(socdev->debugfs_root);
+	socdev->debugfs_root = NULL;
+}
+
+#else
+
+static inline void soc_init_debugfs(struct snd_soc_device *socdev)
+{
+}
+
+static inline void soc_cleanup_debugfs(struct snd_soc_device *socdev)
+{
+}
+#endif
+
 /**
  * snd_soc_new_ac97_codec - initailise AC97 device
  * @codec: audio codec
@@ -1216,6 +1324,7 @@ int snd_soc_register_card(struct snd_soc_device *socdev)
 	if (err < 0)
 		printk(KERN_WARNING "asoc: failed to add codec sysfs files\n");
 
+	soc_init_debugfs(socdev);
 	mutex_unlock(&codec->mutex);
 
 out:
@@ -1239,6 +1348,7 @@ void snd_soc_free_pcms(struct snd_soc_device *socdev)
 #endif
 
 	mutex_lock(&codec->mutex);
+	soc_cleanup_debugfs(socdev);
 #ifdef CONFIG_SND_SOC_AC97_BUS
 	for (i = 0; i < codec->num_dai; i++) {
 		codec_dai = &codec->dai[i];
diff --git a/sound/soc/soc-dapm.c b/sound/soc/soc-dapm.c
index 7e9f423f5b09..b51d82285be4 100644
--- a/sound/soc/soc-dapm.c
+++ b/sound/soc/soc-dapm.c
@@ -37,7 +37,6 @@
 #include <linux/bitops.h>
 #include <linux/platform_device.h>
 #include <linux/jiffies.h>
-#include <linux/debugfs.h>
 #include <sound/core.h>
 #include <sound/pcm.h>
 #include <sound/pcm_params.h>
@@ -67,17 +66,13 @@ static int dapm_status = 1;
 module_param(dapm_status, int, 0);
 MODULE_PARM_DESC(dapm_status, "enable DPM sysfs entries");
 
-static struct dentry *asoc_debugfs;
-
-static u32 pop_time;
-
-static void pop_wait(void)
+static void pop_wait(u32 pop_time)
 {
 	if (pop_time)
 		schedule_timeout_uninterruptible(msecs_to_jiffies(pop_time));
 }
 
-static void pop_dbg(const char *fmt, ...)
+static void pop_dbg(u32 pop_time, const char *fmt, ...)
 {
 	va_list args;
 
@@ -85,7 +80,7 @@ static void pop_dbg(const char *fmt, ...)
 
 	if (pop_time) {
 		vprintk(fmt, args);
-		pop_wait();
+		pop_wait(pop_time);
 	}
 
 	va_end(args);
@@ -230,10 +225,11 @@ static int dapm_update_bits(struct snd_soc_dapm_widget *widget)
 
 	change = old != new;
 	if (change) {
-		pop_dbg("pop test %s : %s in %d ms\n", widget->name,
-			widget->power ? "on" : "off", pop_time);
+		pop_dbg(codec->pop_time, "pop test %s : %s in %d ms\n",
+			widget->name, widget->power ? "on" : "off",
+			codec->pop_time);
 		snd_soc_write(codec, widget->reg, new);
-		pop_wait();
+		pop_wait(codec->pop_time);
 	}
 	pr_debug("reg %x old %x new %x change %d\n", widget->reg,
 		 old, new, change);
@@ -821,23 +817,13 @@ static DEVICE_ATTR(dapm_widget, 0444, dapm_widget_show, NULL);
 
 int snd_soc_dapm_sys_add(struct device *dev)
 {
-	int ret = 0;
-
 	if (!dapm_status)
 		return 0;
 
 	ret = device_create_file(dev, &dev_attr_dapm_widget);
 	if (ret != 0)
 		return ret;
-
-	asoc_debugfs = debugfs_create_dir("asoc", NULL);
-	if (!IS_ERR(asoc_debugfs) && asoc_debugfs)
-		debugfs_create_u32("dapm_pop_time", 0744, asoc_debugfs,
-				   &pop_time);
-	else
-		asoc_debugfs = NULL;
-
-	return 0;
+	return device_create_file(dev, &dev_attr_dapm_widget);
 }
 
 static void snd_soc_dapm_sys_remove(struct device *dev)
@@ -845,9 +831,6 @@ static void snd_soc_dapm_sys_remove(struct device *dev)
 	if (dapm_status) {
 		device_remove_file(dev, &dev_attr_dapm_widget);
 	}
-
-	if (asoc_debugfs)
-		debugfs_remove_recursive(asoc_debugfs);
 }
 
 /* free all dapm widgets and resources */
-- 
cgit v1.2.3


From d98d38f2014ab79f28c126ff175d034891f7aefc Mon Sep 17 00:00:00 2001
From: Arjan van de Ven <arjan@linux.intel.com>
Date: Wed, 29 Oct 2008 14:24:09 -0700
Subject: mutex: improve header comment to be actually informative about the
 API

Impact: improve documentation

It's nice to say that mutex_trylock follows the spin_trylock convention.
It's a lot nicer if the comment also says which that is...  make it so.

Signed-off-by: Arjan van de Ven <arjan@linux.intel.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/mutex.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include')

diff --git a/include/linux/mutex.h b/include/linux/mutex.h
index bc6da10ceee0..7a0e5c4f8072 100644
--- a/include/linux/mutex.h
+++ b/include/linux/mutex.h
@@ -144,6 +144,8 @@ extern int __must_check mutex_lock_killable(struct mutex *lock);
 /*
  * NOTE: mutex_trylock() follows the spin_trylock() convention,
  *       not the down_trylock() convention!
+ *
+ * Returns 1 if the mutex has been acquired successfully, and 0 on contention.
  */
 extern int mutex_trylock(struct mutex *lock);
 extern void mutex_unlock(struct mutex *lock);
-- 
cgit v1.2.3


From 17666f02b118099028522dfc3df00a235700e216 Mon Sep 17 00:00:00 2001
From: Steven Rostedt <rostedt@goodmis.org>
Date: Thu, 30 Oct 2008 16:08:32 -0400
Subject: ftrace: nmi safe code modification

Impact: fix crashes that can occur in NMI handlers, if their code is modified

Modifying code is something that needs special care. On SMP boxes,
if code that is being modified is also being executed on another CPU,
that CPU will have undefined results.

The dynamic ftrace uses kstop_machine to make the system act like a
uniprocessor system. But this does not address NMIs, that can still
run on other CPUs.

One approach to handle this is to make all code that are used by NMIs
not be traced. But NMIs can call notifiers that spread throughout the
kernel and this will be very hard to maintain, and the chance of missing
a function is very high.

The approach that this patch takes is to have the NMIs modify the code
if the modification is taking place. The way this works is that just
writing to code executing on another CPU is not harmful if what is
written is the same as what exists.

Two buffers are used: an IP buffer and a "code" buffer.

The steps that the patcher takes are:

 1) Put in the instruction pointer into the IP buffer
    and the new code into the "code" buffer.
 2) Set a flag that says we are modifying code
 3) Wait for any running NMIs to finish.
 4) Write the code
 5) clear the flag.
 6) Wait for any running NMIs to finish.

If an NMI is executed, it will also write the pending code.
Multiple writes are OK, because what is being written is the same.
Then the patcher must wait for all running NMIs to finish before
going to the next line that must be patched.

This is basically the RCU approach to code modification.

Thanks to Ingo Molnar for suggesting the idea, and to Arjan van de Ven
for his guidence on what is safe and what is not.

Signed-off-by: Steven Rostedt <srostedt@redhat.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/arm/include/asm/ftrace.h     |   5 ++
 arch/powerpc/include/asm/ftrace.h |   5 ++
 arch/sh/include/asm/ftrace.h      |   5 ++
 arch/sparc/include/asm/ftrace.h   |   5 ++
 arch/x86/include/asm/ftrace.h     |  15 ++++++
 arch/x86/kernel/ftrace.c          | 107 +++++++++++++++++++++++++++++++++++++-
 include/linux/hardirq.h           |  15 +++++-
 7 files changed, 154 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/arch/arm/include/asm/ftrace.h b/arch/arm/include/asm/ftrace.h
index 39c8bc1a006a..d4c24a7a9280 100644
--- a/arch/arm/include/asm/ftrace.h
+++ b/arch/arm/include/asm/ftrace.h
@@ -1,6 +1,11 @@
 #ifndef _ASM_ARM_FTRACE
 #define _ASM_ARM_FTRACE
 
+#ifndef __ASSEMBLY__
+#define ftrace_nmi_enter()	do { } while (0)
+#define ftrace_nmi_exit()	do { } while (0)
+#endif
+
 #ifdef CONFIG_FUNCTION_TRACER
 #define MCOUNT_ADDR		((long)(mcount))
 #define MCOUNT_INSN_SIZE	4 /* sizeof mcount call */
diff --git a/arch/powerpc/include/asm/ftrace.h b/arch/powerpc/include/asm/ftrace.h
index b298f7a631e6..7652755dc000 100644
--- a/arch/powerpc/include/asm/ftrace.h
+++ b/arch/powerpc/include/asm/ftrace.h
@@ -1,6 +1,11 @@
 #ifndef _ASM_POWERPC_FTRACE
 #define _ASM_POWERPC_FTRACE
 
+#ifndef __ASSEMBLY__
+#define ftrace_nmi_enter()	do { } while (0)
+#define ftrace_nmi_exit()	do { } while (0)
+#endif
+
 #ifdef CONFIG_FUNCTION_TRACER
 #define MCOUNT_ADDR		((long)(_mcount))
 #define MCOUNT_INSN_SIZE	4 /* sizeof mcount call */
diff --git a/arch/sh/include/asm/ftrace.h b/arch/sh/include/asm/ftrace.h
index 3aed362c9463..cdf2cb0b9ffe 100644
--- a/arch/sh/include/asm/ftrace.h
+++ b/arch/sh/include/asm/ftrace.h
@@ -1,6 +1,11 @@
 #ifndef __ASM_SH_FTRACE_H
 #define __ASM_SH_FTRACE_H
 
+#ifndef __ASSEMBLY__
+#define ftrace_nmi_enter()	do { } while (0)
+#define ftrace_nmi_exit()	do { } while (0)
+#endif
+
 #ifndef __ASSEMBLY__
 extern void mcount(void);
 #endif
diff --git a/arch/sparc/include/asm/ftrace.h b/arch/sparc/include/asm/ftrace.h
index d27716cd38c1..33a95feeb137 100644
--- a/arch/sparc/include/asm/ftrace.h
+++ b/arch/sparc/include/asm/ftrace.h
@@ -1,6 +1,11 @@
 #ifndef _ASM_SPARC64_FTRACE
 #define _ASM_SPARC64_FTRACE
 
+#ifndef __ASSEMBLY__
+#define ftrace_nmi_enter()	do { } while (0)
+#define ftrace_nmi_exit()	do { } while (0)
+#endif
+
 #ifdef CONFIG_MCOUNT
 #define MCOUNT_ADDR		((long)(_mcount))
 #define MCOUNT_INSN_SIZE	4 /* sizeof mcount call */
diff --git a/arch/x86/include/asm/ftrace.h b/arch/x86/include/asm/ftrace.h
index 9e8bc29b8b17..f2ed6b704a75 100644
--- a/arch/x86/include/asm/ftrace.h
+++ b/arch/x86/include/asm/ftrace.h
@@ -17,6 +17,21 @@ static inline unsigned long ftrace_call_adjust(unsigned long addr)
 	 */
 	return addr - 1;
 }
+
+#ifdef CONFIG_DYNAMIC_FTRACE
+extern void ftrace_nmi_enter(void);
+extern void ftrace_nmi_exit(void);
+#else
+#define ftrace_nmi_enter()	do { } while (0)
+#define ftrace_nmi_exit()	do { } while (0)
+#endif
+#endif
+
+#else /* CONFIG_FUNCTION_TRACER */
+
+#ifndef __ASSEMBLY__
+#define ftrace_nmi_enter()	do { } while (0)
+#define ftrace_nmi_exit()	do { } while (0)
 #endif
 
 #endif /* CONFIG_FUNCTION_TRACER */
diff --git a/arch/x86/kernel/ftrace.c b/arch/x86/kernel/ftrace.c
index 50ea0ac8c9bf..fe5f859130b5 100644
--- a/arch/x86/kernel/ftrace.c
+++ b/arch/x86/kernel/ftrace.c
@@ -56,6 +56,111 @@ unsigned char *ftrace_call_replace(unsigned long ip, unsigned long addr)
 	return calc.code;
 }
 
+/*
+ * Modifying code must take extra care. On an SMP machine, if
+ * the code being modified is also being executed on another CPU
+ * that CPU will have undefined results and possibly take a GPF.
+ * We use kstop_machine to stop other CPUS from exectuing code.
+ * But this does not stop NMIs from happening. We still need
+ * to protect against that. We separate out the modification of
+ * the code to take care of this.
+ *
+ * Two buffers are added: An IP buffer and a "code" buffer.
+ *
+ * 1) Put in the instruction pointer into the IP buffer
+ *    and the new code into the "code" buffer.
+ * 2) Set a flag that says we are modifying code
+ * 3) Wait for any running NMIs to finish.
+ * 4) Write the code
+ * 5) clear the flag.
+ * 6) Wait for any running NMIs to finish.
+ *
+ * If an NMI is executed, the first thing it does is to call
+ * "ftrace_nmi_enter". This will check if the flag is set to write
+ * and if it is, it will write what is in the IP and "code" buffers.
+ *
+ * The trick is, it does not matter if everyone is writing the same
+ * content to the code location. Also, if a CPU is executing code
+ * it is OK to write to that code location if the contents being written
+ * are the same as what exists.
+ */
+
+static atomic_t in_nmi;
+static int mod_code_status;
+static int mod_code_write;
+static void *mod_code_ip;
+static void *mod_code_newcode;
+
+static void ftrace_mod_code(void)
+{
+	/*
+	 * Yes, more than one CPU process can be writing to mod_code_status.
+	 *    (and the code itself)
+	 * But if one were to fail, then they all should, and if one were
+	 * to succeed, then they all should.
+	 */
+	mod_code_status = probe_kernel_write(mod_code_ip, mod_code_newcode,
+					     MCOUNT_INSN_SIZE);
+
+}
+
+void ftrace_nmi_enter(void)
+{
+	atomic_inc(&in_nmi);
+	/* Must have in_nmi seen before reading write flag */
+	smp_mb();
+	if (mod_code_write)
+		ftrace_mod_code();
+}
+
+void ftrace_nmi_exit(void)
+{
+	/* Finish all executions before clearing in_nmi */
+	smp_wmb();
+	atomic_dec(&in_nmi);
+}
+
+static void wait_for_nmi(void)
+{
+	while (atomic_read(&in_nmi))
+		cpu_relax();
+}
+
+static int
+do_ftrace_mod_code(unsigned long ip, void *new_code)
+{
+	mod_code_ip = (void *)ip;
+	mod_code_newcode = new_code;
+
+	/* The buffers need to be visible before we let NMIs write them */
+	smp_wmb();
+
+	mod_code_write = 1;
+
+	/* Make sure write bit is visible before we wait on NMIs */
+	smp_mb();
+
+	wait_for_nmi();
+
+	/* Make sure all running NMIs have finished before we write the code */
+	smp_mb();
+
+	ftrace_mod_code();
+
+	/* Make sure the write happens before clearing the bit */
+	smp_wmb();
+
+	mod_code_write = 0;
+
+	/* make sure NMIs see the cleared bit */
+	smp_mb();
+
+	wait_for_nmi();
+
+	return mod_code_status;
+}
+
+
 int
 ftrace_modify_code(unsigned long ip, unsigned char *old_code,
 		   unsigned char *new_code)
@@ -81,7 +186,7 @@ ftrace_modify_code(unsigned long ip, unsigned char *old_code,
 		return -EINVAL;
 
 	/* replace the text with the new text */
-	if (probe_kernel_write((void *)ip, new_code, MCOUNT_INSN_SIZE))
+	if (do_ftrace_mod_code(ip, new_code))
 		return -EPERM;
 
 	sync_core();
diff --git a/include/linux/hardirq.h b/include/linux/hardirq.h
index 181006cc94a0..0087cb43becf 100644
--- a/include/linux/hardirq.h
+++ b/include/linux/hardirq.h
@@ -5,6 +5,7 @@
 #include <linux/smp_lock.h>
 #include <linux/lockdep.h>
 #include <asm/hardirq.h>
+#include <asm/ftrace.h>
 #include <asm/system.h>
 
 /*
@@ -161,7 +162,17 @@ extern void irq_enter(void);
  */
 extern void irq_exit(void);
 
-#define nmi_enter()		do { lockdep_off(); __irq_enter(); } while (0)
-#define nmi_exit()		do { __irq_exit(); lockdep_on(); } while (0)
+#define nmi_enter()				\
+	do {					\
+		ftrace_nmi_enter();		\
+		lockdep_off();			\
+		__irq_enter();			\
+	} while (0)
+#define nmi_exit()				\
+	do {					\
+		__irq_exit();			\
+		lockdep_on();			\
+		ftrace_nmi_exit();		\
+	} while (0)
 
 #endif /* LINUX_HARDIRQ_H */
-- 
cgit v1.2.3


From cc0fe83525d734bdd9c883b45eca6bb22f286daa Mon Sep 17 00:00:00 2001
From: Alexey Dobriyan <adobriyan@gmail.com>
Date: Fri, 31 Oct 2008 00:42:25 -0700
Subject: xfrm: remove unused struct xfrm_policy::next

Signed-off-by: Alexey Dobriyan <adobriyan@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/xfrm.h | 1 -
 1 file changed, 1 deletion(-)

(limited to 'include')

diff --git a/include/net/xfrm.h b/include/net/xfrm.h
index f2c5ba28a428..45e11b3631e4 100644
--- a/include/net/xfrm.h
+++ b/include/net/xfrm.h
@@ -467,7 +467,6 @@ struct xfrm_policy_walk {
 
 struct xfrm_policy
 {
-	struct xfrm_policy	*next;
 	struct hlist_node	bydst;
 	struct hlist_node	byidx;
 
-- 
cgit v1.2.3


From 90d841fd0a5e02affd4e2bbdde4f710c61599281 Mon Sep 17 00:00:00 2001
From: Jarek Poplawski <jarkao2@gmail.com>
Date: Fri, 31 Oct 2008 00:43:45 -0700
Subject: pkt_sched: sch_generic: Add Qdisc_ops peek() method.

Add Qdisc_ops peek() method in order to replace requeuing.

Based on ideas and patches of Herbert Xu, Patrick McHardy and
David S. Miller.

Signed-off-by: Jarek Poplawski <jarkao2@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/sch_generic.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h
index 3fe49d808957..f81f7c4d76fa 100644
--- a/include/net/sch_generic.h
+++ b/include/net/sch_generic.h
@@ -111,6 +111,7 @@ struct Qdisc_ops
 
 	int 			(*enqueue)(struct sk_buff *, struct Qdisc *);
 	struct sk_buff *	(*dequeue)(struct Qdisc *);
+	struct sk_buff *	(*peek)(struct Qdisc *);
 	int 			(*requeue)(struct sk_buff *, struct Qdisc *);
 	unsigned int		(*drop)(struct Qdisc *);
 
-- 
cgit v1.2.3


From 48a8f519e0fe22a5c98523286b2a120841a11dd5 Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Fri, 31 Oct 2008 00:44:18 -0700
Subject: pkt_sched: Add ->peek() methods for fifo, prio and SFQ qdiscs.

From: Patrick McHardy <kaber@trash.net>

Just as a demonstration how easy adding a peek operation to the
work-conserving qdiscs actually is. It doesn't need to keep or change
any internal state in many cases thanks to the guarantee that the
packet will either be dequeued or, if another packet arrives, the
upper qdisc will immediately ->peek again to reevaluate the state.

(This is only slightly modified Patrick's patch.)

Signed-off-by: Jarek Poplawski <jarkao2@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/sch_generic.h |  5 +++++
 net/sched/sch_fifo.c      |  2 ++
 net/sched/sch_prio.c      | 14 ++++++++++++++
 net/sched/sch_sfq.c       | 12 ++++++++++++
 4 files changed, 33 insertions(+)

(limited to 'include')

diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h
index f81f7c4d76fa..da6839a7ff50 100644
--- a/include/net/sch_generic.h
+++ b/include/net/sch_generic.h
@@ -433,6 +433,11 @@ static inline struct sk_buff *qdisc_dequeue_tail(struct Qdisc *sch)
 	return __qdisc_dequeue_tail(sch, &sch->q);
 }
 
+static inline struct sk_buff *qdisc_peek_head(struct Qdisc *sch)
+{
+	return skb_peek(&sch->q);
+}
+
 static inline int __qdisc_requeue(struct sk_buff *skb, struct Qdisc *sch,
 				  struct sk_buff_head *list)
 {
diff --git a/net/sched/sch_fifo.c b/net/sched/sch_fifo.c
index 23d258bfe8ac..8825e8806f41 100644
--- a/net/sched/sch_fifo.c
+++ b/net/sched/sch_fifo.c
@@ -83,6 +83,7 @@ struct Qdisc_ops pfifo_qdisc_ops __read_mostly = {
 	.priv_size	=	sizeof(struct fifo_sched_data),
 	.enqueue	=	pfifo_enqueue,
 	.dequeue	=	qdisc_dequeue_head,
+	.peek		=	qdisc_peek_head,
 	.requeue	=	qdisc_requeue,
 	.drop		=	qdisc_queue_drop,
 	.init		=	fifo_init,
@@ -98,6 +99,7 @@ struct Qdisc_ops bfifo_qdisc_ops __read_mostly = {
 	.priv_size	=	sizeof(struct fifo_sched_data),
 	.enqueue	=	bfifo_enqueue,
 	.dequeue	=	qdisc_dequeue_head,
+	.peek		=	qdisc_peek_head,
 	.requeue	=	qdisc_requeue,
 	.drop		=	qdisc_queue_drop,
 	.init		=	fifo_init,
diff --git a/net/sched/sch_prio.c b/net/sched/sch_prio.c
index 504a78cdb718..3651da3e2802 100644
--- a/net/sched/sch_prio.c
+++ b/net/sched/sch_prio.c
@@ -120,6 +120,19 @@ prio_requeue(struct sk_buff *skb, struct Qdisc* sch)
 	return ret;
 }
 
+static struct sk_buff *prio_peek(struct Qdisc *sch)
+{
+	struct prio_sched_data *q = qdisc_priv(sch);
+	int prio;
+
+	for (prio = 0; prio < q->bands; prio++) {
+		struct Qdisc *qdisc = q->queues[prio];
+		struct sk_buff *skb = qdisc->ops->peek(qdisc);
+		if (skb)
+			return skb;
+	}
+	return NULL;
+}
 
 static struct sk_buff *prio_dequeue(struct Qdisc* sch)
 {
@@ -421,6 +434,7 @@ static struct Qdisc_ops prio_qdisc_ops __read_mostly = {
 	.priv_size	=	sizeof(struct prio_sched_data),
 	.enqueue	=	prio_enqueue,
 	.dequeue	=	prio_dequeue,
+	.peek		=	prio_peek,
 	.requeue	=	prio_requeue,
 	.drop		=	prio_drop,
 	.init		=	prio_init,
diff --git a/net/sched/sch_sfq.c b/net/sched/sch_sfq.c
index fe1508ef0d3d..198b83d42ba8 100644
--- a/net/sched/sch_sfq.c
+++ b/net/sched/sch_sfq.c
@@ -391,8 +391,19 @@ sfq_requeue(struct sk_buff *skb, struct Qdisc *sch)
 	return NET_XMIT_CN;
 }
 
+static struct sk_buff *
+sfq_peek(struct Qdisc *sch)
+{
+	struct sfq_sched_data *q = qdisc_priv(sch);
+	sfq_index a;
 
+	/* No active slots */
+	if (q->tail == SFQ_DEPTH)
+		return NULL;
 
+	a = q->next[q->tail];
+	return skb_peek(&q->qs[a]);
+}
 
 static struct sk_buff *
 sfq_dequeue(struct Qdisc *sch)
@@ -624,6 +635,7 @@ static struct Qdisc_ops sfq_qdisc_ops __read_mostly = {
 	.priv_size	=	sizeof(struct sfq_sched_data),
 	.enqueue	=	sfq_enqueue,
 	.dequeue	=	sfq_dequeue,
+	.peek		=	sfq_peek,
 	.requeue	=	sfq_requeue,
 	.drop		=	sfq_drop,
 	.init		=	sfq_init,
-- 
cgit v1.2.3


From 77be155cba4e163e8bba9fd27222a8b6189ec4f7 Mon Sep 17 00:00:00 2001
From: Jarek Poplawski <jarkao2@gmail.com>
Date: Fri, 31 Oct 2008 00:47:01 -0700
Subject: pkt_sched: Add peek emulation for non-work-conserving qdiscs.

This patch adds qdisc_peek_dequeued() wrapper to emulate peek method
with qdisc->dequeue() and storing "peeked" skb in qdisc->gso_skb until
dequeuing. This is mainly for compatibility reasons not to break some
strange configs because peeking is expected for non-work-conserving
parent qdiscs to query work-conserving child qdiscs.

This implementation requires using qdisc_dequeue_peeked() wrapper
instead of directly calling qdisc->dequeue() for all qdiscs ever
querried with qdisc->ops->peek() or qdisc_peek_dequeued().

Signed-off-by: Jarek Poplawski <jarkao2@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/sch_generic.h | 23 +++++++++++++++++++++++
 net/sched/sch_atm.c       |  4 ++--
 net/sched/sch_cbq.c       |  1 +
 net/sched/sch_hfsc.c      |  3 ++-
 net/sched/sch_htb.c       |  1 +
 net/sched/sch_netem.c     |  5 +++--
 net/sched/sch_tbf.c       |  3 ++-
 7 files changed, 34 insertions(+), 6 deletions(-)

(limited to 'include')

diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h
index da6839a7ff50..9dcb5bfe094a 100644
--- a/include/net/sch_generic.h
+++ b/include/net/sch_generic.h
@@ -438,6 +438,29 @@ static inline struct sk_buff *qdisc_peek_head(struct Qdisc *sch)
 	return skb_peek(&sch->q);
 }
 
+/* generic pseudo peek method for non-work-conserving qdisc */
+static inline struct sk_buff *qdisc_peek_dequeued(struct Qdisc *sch)
+{
+	/* we can reuse ->gso_skb because peek isn't called for root qdiscs */
+	if (!sch->gso_skb)
+		sch->gso_skb = sch->dequeue(sch);
+
+	return sch->gso_skb;
+}
+
+/* use instead of qdisc->dequeue() for all qdiscs queried with ->peek() */
+static inline struct sk_buff *qdisc_dequeue_peeked(struct Qdisc *sch)
+{
+	struct sk_buff *skb = sch->gso_skb;
+
+	if (skb)
+		sch->gso_skb = NULL;
+	else
+		skb = sch->dequeue(sch);
+
+	return skb;
+}
+
 static inline int __qdisc_requeue(struct sk_buff *skb, struct Qdisc *sch,
 				  struct sk_buff_head *list)
 {
diff --git a/net/sched/sch_atm.c b/net/sched/sch_atm.c
index 2ee0c1a8efa9..6eb9a650b63d 100644
--- a/net/sched/sch_atm.c
+++ b/net/sched/sch_atm.c
@@ -484,7 +484,7 @@ static void sch_atm_dequeue(unsigned long data)
 			if (!atm_may_send(flow->vcc, skb->truesize))
 				break;
 
-			skb = flow->q->dequeue(flow->q);
+			skb = qdisc_dequeue_peeked(flow->q);
 			if (unlikely(!skb))
 				break;
 
@@ -519,7 +519,7 @@ static struct sk_buff *atm_tc_dequeue(struct Qdisc *sch)
 
 	pr_debug("atm_tc_dequeue(sch %p,[qdisc %p])\n", sch, p);
 	tasklet_schedule(&p->task);
-	skb = p->link.q->dequeue(p->link.q);
+	skb = qdisc_dequeue_peeked(p->link.q);
 	if (skb)
 		sch->q.qlen--;
 	return skb;
diff --git a/net/sched/sch_cbq.c b/net/sched/sch_cbq.c
index 03e389e8d945..63efa70abbea 100644
--- a/net/sched/sch_cbq.c
+++ b/net/sched/sch_cbq.c
@@ -2066,6 +2066,7 @@ static struct Qdisc_ops cbq_qdisc_ops __read_mostly = {
 	.priv_size	=	sizeof(struct cbq_sched_data),
 	.enqueue	=	cbq_enqueue,
 	.dequeue	=	cbq_dequeue,
+	.peek		=	qdisc_peek_dequeued,
 	.requeue	=	cbq_requeue,
 	.drop		=	cbq_drop,
 	.init		=	cbq_init,
diff --git a/net/sched/sch_hfsc.c b/net/sched/sch_hfsc.c
index ddfc40887848..d90b1652f2af 100644
--- a/net/sched/sch_hfsc.c
+++ b/net/sched/sch_hfsc.c
@@ -1634,7 +1634,7 @@ hfsc_dequeue(struct Qdisc *sch)
 		}
 	}
 
-	skb = cl->qdisc->dequeue(cl->qdisc);
+	skb = qdisc_dequeue_peeked(cl->qdisc);
 	if (skb == NULL) {
 		if (net_ratelimit())
 			printk("HFSC: Non-work-conserving qdisc ?\n");
@@ -1727,6 +1727,7 @@ static struct Qdisc_ops hfsc_qdisc_ops __read_mostly = {
 	.dump		= hfsc_dump_qdisc,
 	.enqueue	= hfsc_enqueue,
 	.dequeue	= hfsc_dequeue,
+	.peek		= qdisc_peek_dequeued,
 	.requeue	= hfsc_requeue,
 	.drop		= hfsc_drop,
 	.cl_ops		= &hfsc_class_ops,
diff --git a/net/sched/sch_htb.c b/net/sched/sch_htb.c
index d14f02056ae6..3fda8199713d 100644
--- a/net/sched/sch_htb.c
+++ b/net/sched/sch_htb.c
@@ -1565,6 +1565,7 @@ static struct Qdisc_ops htb_qdisc_ops __read_mostly = {
 	.priv_size	=	sizeof(struct htb_sched),
 	.enqueue	=	htb_enqueue,
 	.dequeue	=	htb_dequeue,
+	.peek		=	qdisc_peek_dequeued,
 	.requeue	=	htb_requeue,
 	.drop		=	htb_drop,
 	.init		=	htb_init,
diff --git a/net/sched/sch_netem.c b/net/sched/sch_netem.c
index 74fbdb52baed..3080bd6ee332 100644
--- a/net/sched/sch_netem.c
+++ b/net/sched/sch_netem.c
@@ -290,8 +290,8 @@ static struct sk_buff *netem_dequeue(struct Qdisc *sch)
 
 		/* if more time remaining? */
 		if (cb->time_to_send <= now) {
-			skb = q->qdisc->dequeue(q->qdisc);
-			if (!skb)
+			skb = qdisc_dequeue_peeked(q->qdisc);
+			if (unlikely(!skb))
 				return NULL;
 
 			pr_debug("netem_dequeue: return skb=%p\n", skb);
@@ -714,6 +714,7 @@ static struct Qdisc_ops netem_qdisc_ops __read_mostly = {
 	.priv_size	=	sizeof(struct netem_sched_data),
 	.enqueue	=	netem_enqueue,
 	.dequeue	=	netem_dequeue,
+	.peek		=	qdisc_peek_dequeued,
 	.requeue	=	netem_requeue,
 	.drop		=	netem_drop,
 	.init		=	netem_init,
diff --git a/net/sched/sch_tbf.c b/net/sched/sch_tbf.c
index 61fdc77a48d2..435076cf620e 100644
--- a/net/sched/sch_tbf.c
+++ b/net/sched/sch_tbf.c
@@ -192,7 +192,7 @@ static struct sk_buff *tbf_dequeue(struct Qdisc* sch)
 		toks -= L2T(q, len);
 
 		if ((toks|ptoks) >= 0) {
-			skb = q->qdisc->dequeue(q->qdisc);
+			skb = qdisc_dequeue_peeked(q->qdisc);
 			if (unlikely(!skb))
 				return NULL;
 
@@ -467,6 +467,7 @@ static struct Qdisc_ops tbf_qdisc_ops __read_mostly = {
 	.priv_size	=	sizeof(struct tbf_sched_data),
 	.enqueue	=	tbf_enqueue,
 	.dequeue	=	tbf_dequeue,
+	.peek		=	qdisc_peek_dequeued,
 	.requeue	=	tbf_requeue,
 	.drop		=	tbf_drop,
 	.init		=	tbf_init,
-- 
cgit v1.2.3


From 3685f25de1b0447fff381c420de1e25bd57c9efb Mon Sep 17 00:00:00 2001
From: Harvey Harrison <harvey.harrison@gmail.com>
Date: Fri, 31 Oct 2008 00:56:49 -0700
Subject: misc: replace NIPQUAD()

Using NIPQUAD() with NIPQUAD_FMT, %d.%d.%d.%d or %u.%u.%u.%u
can be replaced with %pI4

Signed-off-by: Harvey Harrison <harvey.harrison@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/sunrpc/svc_xprt.h            | 4 ++--
 include/net/ip_vs.h                        | 4 ++--
 include/net/netfilter/nf_conntrack_tuple.h | 6 +++---
 include/net/sctp/sctp.h                    | 4 ++--
 security/selinux/avc.c                     | 2 +-
 5 files changed, 10 insertions(+), 10 deletions(-)

(limited to 'include')

diff --git a/include/linux/sunrpc/svc_xprt.h b/include/linux/sunrpc/svc_xprt.h
index 51cb75ea42d5..0127daca4354 100644
--- a/include/linux/sunrpc/svc_xprt.h
+++ b/include/linux/sunrpc/svc_xprt.h
@@ -139,8 +139,8 @@ static inline char *__svc_print_addr(struct sockaddr *addr,
 {
 	switch (addr->sa_family) {
 	case AF_INET:
-		snprintf(buf, len, "%u.%u.%u.%u, port=%u",
-			NIPQUAD(((struct sockaddr_in *) addr)->sin_addr),
+		snprintf(buf, len, "%pI4, port=%u",
+			&((struct sockaddr_in *)addr)->sin_addr,
 			ntohs(((struct sockaddr_in *) addr)->sin_port));
 		break;
 
diff --git a/include/net/ip_vs.h b/include/net/ip_vs.h
index af48cada561e..fc63353779f0 100644
--- a/include/net/ip_vs.h
+++ b/include/net/ip_vs.h
@@ -91,8 +91,8 @@ static inline const char *ip_vs_dbg_addr(int af, char *buf, size_t buf_len,
 			       &addr->in6) + 1;
 	else
 #endif
-		len = snprintf(&buf[*idx], buf_len - *idx, NIPQUAD_FMT,
-			       NIPQUAD(addr->ip)) + 1;
+		len = snprintf(&buf[*idx], buf_len - *idx, "%pI4",
+			       &addr->ip) + 1;
 
 	*idx += len;
 	BUG_ON(*idx > buf_len + 1);
diff --git a/include/net/netfilter/nf_conntrack_tuple.h b/include/net/netfilter/nf_conntrack_tuple.h
index 42f1fc96f3ec..f2f6aa73dc10 100644
--- a/include/net/netfilter/nf_conntrack_tuple.h
+++ b/include/net/netfilter/nf_conntrack_tuple.h
@@ -112,10 +112,10 @@ struct nf_conntrack_tuple_mask
 static inline void nf_ct_dump_tuple_ip(const struct nf_conntrack_tuple *t)
 {
 #ifdef DEBUG
-	printk("tuple %p: %u " NIPQUAD_FMT ":%hu -> " NIPQUAD_FMT ":%hu\n",
+	printk("tuple %p: %u %pI4:%hu -> %pI4:%hu\n",
 	       t, t->dst.protonum,
-	       NIPQUAD(t->src.u3.ip), ntohs(t->src.u.all),
-	       NIPQUAD(t->dst.u3.ip), ntohs(t->dst.u.all));
+	       &t->src.u3.ip, ntohs(t->src.u.all),
+	       &t->dst.u3.ip, ntohs(t->dst.u.all));
 #endif
 }
 
diff --git a/include/net/sctp/sctp.h b/include/net/sctp/sctp.h
index e71b0f7ce88e..23797506f593 100644
--- a/include/net/sctp/sctp.h
+++ b/include/net/sctp/sctp.h
@@ -291,9 +291,9 @@ extern int sctp_debug_flag;
 			       otherparms); \
 		} else { \
 			printk(KERN_DEBUG \
-			       lead NIPQUAD_FMT trail, \
+			       lead "%pI4" trail, \
 			       leadparm, \
-			       NIPQUAD(saddr->v4.sin_addr.s_addr), \
+			       &saddr->v4.sin_addr.s_addr, \
 			       otherparms); \
 		} \
 	}
diff --git a/security/selinux/avc.c b/security/selinux/avc.c
index ed6af12cdf43..d43bd6baeeaa 100644
--- a/security/selinux/avc.c
+++ b/security/selinux/avc.c
@@ -504,7 +504,7 @@ static inline void avc_print_ipv4_addr(struct audit_buffer *ab, __be32 addr,
 				       __be16 port, char *name1, char *name2)
 {
 	if (addr)
-		audit_log_format(ab, " %s=" NIPQUAD_FMT, name1, NIPQUAD(addr));
+		audit_log_format(ab, " %s=%pI4", name1, &addr);
 	if (port)
 		audit_log_format(ab, " %s=%d", name2, ntohs(port));
 }
-- 
cgit v1.2.3


From 92be3d6bdf2cb34972ab50e12ad4da1076e690da Mon Sep 17 00:00:00 2001
From: Huang Ying <ying.huang@intel.com>
Date: Fri, 31 Oct 2008 09:48:08 +0800
Subject: kexec/i386: allocate page table pages dynamically

Impact: save .text size when kexec is built in but not loaded

This patch adds an architecture specific struct kimage_arch into
struct kimage. The pointers to page table pages used by kexec are
added to struct kimage_arch. The page tables pages are dynamically
allocated in machine_kexec_prepare instead of statically from BSS
segment. This will save up to 20k memory when kexec image is not
loaded.

Signed-off-by: Huang Ying <ying.huang@intel.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/include/asm/kexec.h       | 14 ++++++++
 arch/x86/kernel/machine_kexec_32.c | 67 ++++++++++++++++++++++++++------------
 include/linux/kexec.h              |  4 +++
 3 files changed, 64 insertions(+), 21 deletions(-)

(limited to 'include')

diff --git a/arch/x86/include/asm/kexec.h b/arch/x86/include/asm/kexec.h
index a1f22771a15a..df9c41a9c6ae 100644
--- a/arch/x86/include/asm/kexec.h
+++ b/arch/x86/include/asm/kexec.h
@@ -170,6 +170,20 @@ relocate_kernel(unsigned long indirection_page,
 		unsigned long start_address) ATTRIB_NORET;
 #endif
 
+#ifdef CONFIG_X86_32
+#define ARCH_HAS_KIMAGE_ARCH
+
+struct kimage_arch {
+	pgd_t *pgd;
+#ifdef CONFIG_X86_PAE
+	pmd_t *pmd0;
+	pmd_t *pmd1;
+#endif
+	pte_t *pte0;
+	pte_t *pte1;
+};
+#endif
+
 #endif /* __ASSEMBLY__ */
 
 #endif /* _ASM_X86_KEXEC_H */
diff --git a/arch/x86/kernel/machine_kexec_32.c b/arch/x86/kernel/machine_kexec_32.c
index 7a385746509a..1100312847a5 100644
--- a/arch/x86/kernel/machine_kexec_32.c
+++ b/arch/x86/kernel/machine_kexec_32.c
@@ -13,6 +13,7 @@
 #include <linux/numa.h>
 #include <linux/ftrace.h>
 #include <linux/suspend.h>
+#include <linux/gfp.h>
 
 #include <asm/pgtable.h>
 #include <asm/pgalloc.h>
@@ -25,15 +26,6 @@
 #include <asm/system.h>
 #include <asm/cacheflush.h>
 
-#define PAGE_ALIGNED __attribute__ ((__aligned__(PAGE_SIZE)))
-static u32 kexec_pgd[1024] PAGE_ALIGNED;
-#ifdef CONFIG_X86_PAE
-static u32 kexec_pmd0[1024] PAGE_ALIGNED;
-static u32 kexec_pmd1[1024] PAGE_ALIGNED;
-#endif
-static u32 kexec_pte0[1024] PAGE_ALIGNED;
-static u32 kexec_pte1[1024] PAGE_ALIGNED;
-
 static void set_idt(void *newidt, __u16 limit)
 {
 	struct desc_ptr curidt;
@@ -76,6 +68,37 @@ static void load_segments(void)
 #undef __STR
 }
 
+static void machine_kexec_free_page_tables(struct kimage *image)
+{
+	free_page((unsigned long)image->arch.pgd);
+#ifdef CONFIG_X86_PAE
+	free_page((unsigned long)image->arch.pmd0);
+	free_page((unsigned long)image->arch.pmd1);
+#endif
+	free_page((unsigned long)image->arch.pte0);
+	free_page((unsigned long)image->arch.pte1);
+}
+
+static int machine_kexec_alloc_page_tables(struct kimage *image)
+{
+	image->arch.pgd = (pgd_t *)get_zeroed_page(GFP_KERNEL);
+#ifdef CONFIG_X86_PAE
+	image->arch.pmd0 = (pmd_t *)get_zeroed_page(GFP_KERNEL);
+	image->arch.pmd1 = (pmd_t *)get_zeroed_page(GFP_KERNEL);
+#endif
+	image->arch.pte0 = (pte_t *)get_zeroed_page(GFP_KERNEL);
+	image->arch.pte1 = (pte_t *)get_zeroed_page(GFP_KERNEL);
+	if (!image->arch.pgd ||
+#ifdef CONFIG_X86_PAE
+	    !image->arch.pmd0 || !image->arch.pmd1 ||
+#endif
+	    !image->arch.pte0 || !image->arch.pte1) {
+		machine_kexec_free_page_tables(image);
+		return -ENOMEM;
+	}
+	return 0;
+}
+
 /*
  * A architecture hook called to validate the
  * proposed image and prepare the control pages
@@ -87,13 +110,14 @@ static void load_segments(void)
  * reboot code buffer to allow us to avoid allocations
  * later.
  *
- * Make control page executable.
+ * - Make control page executable.
+ * - Allocate page tables
  */
 int machine_kexec_prepare(struct kimage *image)
 {
 	if (nx_enabled)
 		set_pages_x(image->control_code_page, 1);
-	return 0;
+	return machine_kexec_alloc_page_tables(image);
 }
 
 /*
@@ -104,6 +128,7 @@ void machine_kexec_cleanup(struct kimage *image)
 {
 	if (nx_enabled)
 		set_pages_nx(image->control_code_page, 1);
+	machine_kexec_free_page_tables(image);
 }
 
 /*
@@ -150,18 +175,18 @@ void machine_kexec(struct kimage *image)
 	relocate_kernel_ptr = control_page;
 	page_list[PA_CONTROL_PAGE] = __pa(control_page);
 	page_list[VA_CONTROL_PAGE] = (unsigned long)control_page;
-	page_list[PA_PGD] = __pa(kexec_pgd);
-	page_list[VA_PGD] = (unsigned long)kexec_pgd;
+	page_list[PA_PGD] = __pa(image->arch.pgd);
+	page_list[VA_PGD] = (unsigned long)image->arch.pgd;
 #ifdef CONFIG_X86_PAE
-	page_list[PA_PMD_0] = __pa(kexec_pmd0);
-	page_list[VA_PMD_0] = (unsigned long)kexec_pmd0;
-	page_list[PA_PMD_1] = __pa(kexec_pmd1);
-	page_list[VA_PMD_1] = (unsigned long)kexec_pmd1;
+	page_list[PA_PMD_0] = __pa(image->arch.pmd0);
+	page_list[VA_PMD_0] = (unsigned long)image->arch.pmd0;
+	page_list[PA_PMD_1] = __pa(image->arch.pmd1);
+	page_list[VA_PMD_1] = (unsigned long)image->arch.pmd1;
 #endif
-	page_list[PA_PTE_0] = __pa(kexec_pte0);
-	page_list[VA_PTE_0] = (unsigned long)kexec_pte0;
-	page_list[PA_PTE_1] = __pa(kexec_pte1);
-	page_list[VA_PTE_1] = (unsigned long)kexec_pte1;
+	page_list[PA_PTE_0] = __pa(image->arch.pte0);
+	page_list[VA_PTE_0] = (unsigned long)image->arch.pte0;
+	page_list[PA_PTE_1] = __pa(image->arch.pte1);
+	page_list[VA_PTE_1] = (unsigned long)image->arch.pte1;
 
 	if (image->type == KEXEC_TYPE_DEFAULT)
 		page_list[PA_SWAP_PAGE] = (page_to_pfn(image->swap_page)
diff --git a/include/linux/kexec.h b/include/linux/kexec.h
index 17f76fc05173..adc34f2c6eff 100644
--- a/include/linux/kexec.h
+++ b/include/linux/kexec.h
@@ -100,6 +100,10 @@ struct kimage {
 #define KEXEC_TYPE_DEFAULT 0
 #define KEXEC_TYPE_CRASH   1
 	unsigned int preserve_context : 1;
+
+#ifdef ARCH_HAS_KIMAGE_ARCH
+	struct kimage_arch arch;
+#endif
 };
 
 
-- 
cgit v1.2.3


From a26a2a27396c0a0877aa701f8f92d08ba550a6c9 Mon Sep 17 00:00:00 2001
From: Steven Rostedt <rostedt@goodmis.org>
Date: Fri, 31 Oct 2008 00:03:22 -0400
Subject: ftrace: nmi safe code clean ups

Impact: cleanup

This patch cleans up the NMI safe code for dynamic ftrace as suggested
by Andrew Morton.

Signed-off-by: Steven Rostedt <srostedt@redhat.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/arm/include/asm/ftrace.h     |  4 ++--
 arch/powerpc/include/asm/ftrace.h |  4 ++--
 arch/sh/include/asm/ftrace.h      |  4 ++--
 arch/sparc/include/asm/ftrace.h   |  4 ++--
 arch/x86/include/asm/ftrace.h     | 10 +++++-----
 arch/x86/kernel/ftrace.c          | 16 ++++++++--------
 include/linux/ftrace.h            |  3 +++
 kernel/trace/trace.c              |  9 ++++-----
 8 files changed, 28 insertions(+), 26 deletions(-)

(limited to 'include')

diff --git a/arch/arm/include/asm/ftrace.h b/arch/arm/include/asm/ftrace.h
index d4c24a7a9280..3f3a1d1508ea 100644
--- a/arch/arm/include/asm/ftrace.h
+++ b/arch/arm/include/asm/ftrace.h
@@ -2,8 +2,8 @@
 #define _ASM_ARM_FTRACE
 
 #ifndef __ASSEMBLY__
-#define ftrace_nmi_enter()	do { } while (0)
-#define ftrace_nmi_exit()	do { } while (0)
+static inline void ftrace_nmi_enter(void) { }
+static inline void ftrace_nmi_exit(void) { }
 #endif
 
 #ifdef CONFIG_FUNCTION_TRACER
diff --git a/arch/powerpc/include/asm/ftrace.h b/arch/powerpc/include/asm/ftrace.h
index 7652755dc000..1cd72700fbc0 100644
--- a/arch/powerpc/include/asm/ftrace.h
+++ b/arch/powerpc/include/asm/ftrace.h
@@ -2,8 +2,8 @@
 #define _ASM_POWERPC_FTRACE
 
 #ifndef __ASSEMBLY__
-#define ftrace_nmi_enter()	do { } while (0)
-#define ftrace_nmi_exit()	do { } while (0)
+static inline void ftrace_nmi_enter(void) { }
+static inline void ftrace_nmi_exit(void) { }
 #endif
 
 #ifdef CONFIG_FUNCTION_TRACER
diff --git a/arch/sh/include/asm/ftrace.h b/arch/sh/include/asm/ftrace.h
index cdf2cb0b9ffe..31ada0370cb6 100644
--- a/arch/sh/include/asm/ftrace.h
+++ b/arch/sh/include/asm/ftrace.h
@@ -2,8 +2,8 @@
 #define __ASM_SH_FTRACE_H
 
 #ifndef __ASSEMBLY__
-#define ftrace_nmi_enter()	do { } while (0)
-#define ftrace_nmi_exit()	do { } while (0)
+static inline void ftrace_nmi_enter(void) { }
+static inline void ftrace_nmi_exit(void) { }
 #endif
 
 #ifndef __ASSEMBLY__
diff --git a/arch/sparc/include/asm/ftrace.h b/arch/sparc/include/asm/ftrace.h
index 33a95feeb137..62055ac0496e 100644
--- a/arch/sparc/include/asm/ftrace.h
+++ b/arch/sparc/include/asm/ftrace.h
@@ -2,8 +2,8 @@
 #define _ASM_SPARC64_FTRACE
 
 #ifndef __ASSEMBLY__
-#define ftrace_nmi_enter()	do { } while (0)
-#define ftrace_nmi_exit()	do { } while (0)
+static inline void ftrace_nmi_enter(void) { }
+static inline void ftrace_nmi_exit(void) { }
 #endif
 
 #ifdef CONFIG_MCOUNT
diff --git a/arch/x86/include/asm/ftrace.h b/arch/x86/include/asm/ftrace.h
index f2ed6b704a75..a23468194b8c 100644
--- a/arch/x86/include/asm/ftrace.h
+++ b/arch/x86/include/asm/ftrace.h
@@ -22,16 +22,16 @@ static inline unsigned long ftrace_call_adjust(unsigned long addr)
 extern void ftrace_nmi_enter(void);
 extern void ftrace_nmi_exit(void);
 #else
-#define ftrace_nmi_enter()	do { } while (0)
-#define ftrace_nmi_exit()	do { } while (0)
-#endif
+static inline void ftrace_nmi_enter(void) { }
+static inline void ftrace_nmi_exit(void) { }
 #endif
+#endif /* __ASSEMBLY__ */
 
 #else /* CONFIG_FUNCTION_TRACER */
 
 #ifndef __ASSEMBLY__
-#define ftrace_nmi_enter()	do { } while (0)
-#define ftrace_nmi_exit()	do { } while (0)
+static inline void ftrace_nmi_enter(void) { }
+static inline void ftrace_nmi_exit(void) { }
 #endif
 
 #endif /* CONFIG_FUNCTION_TRACER */
diff --git a/arch/x86/kernel/ftrace.c b/arch/x86/kernel/ftrace.c
index 6685b0fc1b44..69149337f2fe 100644
--- a/arch/x86/kernel/ftrace.c
+++ b/arch/x86/kernel/ftrace.c
@@ -67,7 +67,7 @@ unsigned char *ftrace_call_replace(unsigned long ip, unsigned long addr)
  *
  * Two buffers are added: An IP buffer and a "code" buffer.
  *
- * 1) Put in the instruction pointer into the IP buffer
+ * 1) Put the instruction pointer into the IP buffer
  *    and the new code into the "code" buffer.
  * 2) Set a flag that says we are modifying code
  * 3) Wait for any running NMIs to finish.
@@ -85,14 +85,14 @@ unsigned char *ftrace_call_replace(unsigned long ip, unsigned long addr)
  * are the same as what exists.
  */
 
-static atomic_t in_nmi;
-static int mod_code_status;
-static int mod_code_write;
-static void *mod_code_ip;
-static void *mod_code_newcode;
+static atomic_t in_nmi = ATOMIC_INIT(0);
+static int mod_code_status;		/* holds return value of text write */
+static int mod_code_write;		/* set when NMI should do the write */
+static void *mod_code_ip;		/* holds the IP to write to */
+static void *mod_code_newcode;		/* holds the text to write to the IP */
 
-static int nmi_wait_count;
-static atomic_t nmi_update_count;
+static unsigned nmi_wait_count;
+static atomic_t nmi_update_count = ATOMIC_INIT(0);
 
 int ftrace_arch_read_dyn_info(char *buf, int size)
 {
diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h
index 703eb53cfa2b..22240dfe912e 100644
--- a/include/linux/ftrace.h
+++ b/include/linux/ftrace.h
@@ -74,6 +74,9 @@ extern void ftrace_caller(void);
 extern void ftrace_call(void);
 extern void mcount_call(void);
 
+/* May be defined in arch */
+extern int ftrace_arch_read_dyn_info(char *buf, int size);
+
 /**
  * ftrace_modify_code - modify code segment
  * @ip: the address of the code segment
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index bc36febc0771..7f86067d760c 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -2815,10 +2815,6 @@ static struct file_operations tracing_mark_fops = {
 
 #ifdef CONFIG_DYNAMIC_FTRACE
 
-#define DYN_INFO_BUF_SIZE 1023
-static char ftrace_dyn_info_buffer[DYN_INFO_BUF_SIZE+1];
-static DEFINE_MUTEX(dyn_info_mutex);
-
 int __weak ftrace_arch_read_dyn_info(char *buf, int size)
 {
 	return 0;
@@ -2828,14 +2824,17 @@ static ssize_t
 tracing_read_dyn_info(struct file *filp, char __user *ubuf,
 		  size_t cnt, loff_t *ppos)
 {
+	static char ftrace_dyn_info_buffer[1024];
+	static DEFINE_MUTEX(dyn_info_mutex);
 	unsigned long *p = filp->private_data;
 	char *buf = ftrace_dyn_info_buffer;
+	int size = ARRAY_SIZE(ftrace_dyn_info_buffer);
 	int r;
 
 	mutex_lock(&dyn_info_mutex);
 	r = sprintf(buf, "%ld ", *p);
 
-	r += ftrace_arch_read_dyn_info(buf+r, DYN_INFO_BUF_SIZE-r);
+	r += ftrace_arch_read_dyn_info(buf+r, (size-1)-r);
 	buf[r++] = '\n';
 
 	r = simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
-- 
cgit v1.2.3


From 3db594380b8452eda4d88b12844077809607caaa Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes@sipsolutions.net>
Date: Tue, 7 Oct 2008 12:04:33 +0200
Subject: mac80211: remove wiphy_to_hw

This isn't used by anyone, if we ever need it we can add
it back, until then it's useless.

Signed-off-by: Johannes Berg <johannes@sipsolutions.net>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 include/net/mac80211.h | 2 --
 net/mac80211/cfg.c     | 7 -------
 2 files changed, 9 deletions(-)

(limited to 'include')

diff --git a/include/net/mac80211.h b/include/net/mac80211.h
index 8856e2d60e9f..e41b9a8d186f 100644
--- a/include/net/mac80211.h
+++ b/include/net/mac80211.h
@@ -868,8 +868,6 @@ struct ieee80211_hw {
 	u8 max_altrate_tries;
 };
 
-struct ieee80211_hw *wiphy_to_hw(struct wiphy *wiphy);
-
 /**
  * SET_IEEE80211_DEV - set device for 802.11 hardware
  *
diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c
index 855126a3039d..8dc5e46cea68 100644
--- a/net/mac80211/cfg.c
+++ b/net/mac80211/cfg.c
@@ -17,13 +17,6 @@
 #include "rate.h"
 #include "mesh.h"
 
-struct ieee80211_hw *wiphy_to_hw(struct wiphy *wiphy)
-{
-	struct ieee80211_local *local = wiphy_priv(wiphy);
-	return &local->hw;
-}
-EXPORT_SYMBOL(wiphy_to_hw);
-
 static bool nl80211_type_check(enum nl80211_iftype type)
 {
 	switch (type) {
-- 
cgit v1.2.3


From e87a2feea75e3cba7af43ed9317b56b282d87742 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes@sipsolutions.net>
Date: Tue, 7 Oct 2008 12:04:35 +0200
Subject: mac80211: remove max_antenna_gain config

The antenna gain isn't exactly configurable, despite the belief of
some unnamed individual who thinks that the EEPROM might influence
it.

Signed-off-by: Johannes Berg <johannes@sipsolutions.net>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 include/net/mac80211.h | 2 --
 net/mac80211/main.c    | 2 --
 2 files changed, 4 deletions(-)

(limited to 'include')

diff --git a/include/net/mac80211.h b/include/net/mac80211.h
index e41b9a8d186f..2fab5a7e5db7 100644
--- a/include/net/mac80211.h
+++ b/include/net/mac80211.h
@@ -470,7 +470,6 @@ enum ieee80211_conf_flags {
  * @listen_interval: listen interval in units of beacon interval
  * @flags: configuration flags defined above
  * @power_level: requested transmit power (in dBm)
- * @max_antenna_gain: maximum antenna gain (in dBi)
  * @antenna_sel_tx: transmit antenna selection, 0: default/diversity,
  *	1/2: antenna 0/1
  * @antenna_sel_rx: receive antenna selection, like @antenna_sel_tx
@@ -485,7 +484,6 @@ struct ieee80211_conf {
 	u16 listen_interval;
 	u32 flags;
 	int power_level;
-	int max_antenna_gain;
 	u8 antenna_sel_tx;
 	u8 antenna_sel_rx;
 
diff --git a/net/mac80211/main.c b/net/mac80211/main.c
index 2ff26d03cd50..d1c4e86b215c 100644
--- a/net/mac80211/main.c
+++ b/net/mac80211/main.c
@@ -215,8 +215,6 @@ int ieee80211_hw_config(struct ieee80211_local *local)
 		local->hw.conf.power_level = min(chan->max_power,
 					       local->hw.conf.power_level);
 
-	local->hw.conf.max_antenna_gain = chan->max_antenna_gain;
-
 #ifdef CONFIG_MAC80211_VERBOSE_DEBUG
 	printk(KERN_DEBUG "%s: HW CONFIG: freq=%d\n",
 	       wiphy_name(local->hw.wiphy), chan->center_freq);
-- 
cgit v1.2.3


From 7a5158ef8da70fdedeb0530faaa8128aa645be3c Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes@sipsolutions.net>
Date: Wed, 8 Oct 2008 10:59:33 +0200
Subject: mac80211: fix short slot handling

This patch makes mac80211 handle short slot requests from the AP
properly. Also warn about uses of IEEE80211_CONF_SHORT_SLOT_TIME
and optimise out the code since it cannot ever be hit anyway.

Signed-off-by: Johannes Berg <johannes@sipsolutions.net>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 include/net/mac80211.h | 28 +++++++++++--------
 net/mac80211/main.c    |  9 ++++--
 net/mac80211/mlme.c    | 74 +++++++++++++++++++++++++++-----------------------
 3 files changed, 62 insertions(+), 49 deletions(-)

(limited to 'include')

diff --git a/include/net/mac80211.h b/include/net/mac80211.h
index 2fab5a7e5db7..d1466e7a47b8 100644
--- a/include/net/mac80211.h
+++ b/include/net/mac80211.h
@@ -180,8 +180,12 @@ enum ieee80211_bss_change {
  * @assoc: association status
  * @aid: association ID number, valid only when @assoc is true
  * @use_cts_prot: use CTS protection
- * @use_short_preamble: use 802.11b short preamble
- * @use_short_slot: use short slot time (only relevant for ERP)
+ * @use_short_preamble: use 802.11b short preamble;
+ *	if the hardware cannot handle this it must set the
+ *	IEEE80211_HW_2GHZ_SHORT_PREAMBLE_INCAPABLE hardware flag
+ * @use_short_slot: use short slot time (only relevant for ERP);
+ *	if the hardware cannot handle this it must set the
+ *	IEEE80211_HW_2GHZ_SHORT_SLOT_INCAPABLE hardware flag
  * @dtim_period: num of beacons before the next DTIM, for PSM
  * @timestamp: beacon timestamp
  * @beacon_int: beacon interval
@@ -442,23 +446,23 @@ struct ieee80211_rx_status {
  *
  * Flags to define PHY configuration options
  *
- * @IEEE80211_CONF_SHORT_SLOT_TIME: use 802.11g short slot time
  * @IEEE80211_CONF_RADIOTAP: add radiotap header at receive time (if supported)
  * @IEEE80211_CONF_SUPPORT_HT_MODE: use 802.11n HT capabilities (if supported)
  * @IEEE80211_CONF_PS: Enable 802.11 power save mode
  */
 enum ieee80211_conf_flags {
-	/*
-	 * TODO: IEEE80211_CONF_SHORT_SLOT_TIME will be removed once drivers
-	 * have been converted to use bss_info_changed() for slot time
-	 * configuration
-	 */
-	IEEE80211_CONF_SHORT_SLOT_TIME	= (1<<0),
-	IEEE80211_CONF_RADIOTAP		= (1<<1),
-	IEEE80211_CONF_SUPPORT_HT_MODE	= (1<<2),
-	IEEE80211_CONF_PS		= (1<<3),
+	IEEE80211_CONF_RADIOTAP		= (1<<0),
+	IEEE80211_CONF_SUPPORT_HT_MODE	= (1<<1),
+	IEEE80211_CONF_PS		= (1<<2),
 };
 
+/* XXX: remove all this once drivers stop trying to use it */
+static inline int __deprecated __IEEE80211_CONF_SHORT_SLOT_TIME(void)
+{
+	return 0;
+}
+#define IEEE80211_CONF_SHORT_SLOT_TIME (__IEEE80211_CONF_SHORT_SLOT_TIME())
+
 /**
  * struct ieee80211_conf - configuration of the device
  *
diff --git a/net/mac80211/main.c b/net/mac80211/main.c
index d1c4e86b215c..c427954fe8e8 100644
--- a/net/mac80211/main.c
+++ b/net/mac80211/main.c
@@ -346,9 +346,12 @@ void ieee80211_bss_info_change_notify(struct ieee80211_sub_if_data *sdata,
 
 u32 ieee80211_reset_erp_info(struct ieee80211_sub_if_data *sdata)
 {
-	sdata->bss_conf.use_cts_prot = 0;
-	sdata->bss_conf.use_short_preamble = 0;
-	return BSS_CHANGED_ERP_CTS_PROT | BSS_CHANGED_ERP_PREAMBLE;
+	sdata->bss_conf.use_cts_prot = false;
+	sdata->bss_conf.use_short_preamble = false;
+	sdata->bss_conf.use_short_slot = false;
+	return BSS_CHANGED_ERP_CTS_PROT |
+	       BSS_CHANGED_ERP_PREAMBLE |
+	       BSS_CHANGED_ERP_SLOT;
 }
 
 void ieee80211_tx_status_irqsafe(struct ieee80211_hw *hw,
diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c
index 6ad2619db85f..829995e740a7 100644
--- a/net/mac80211/mlme.c
+++ b/net/mac80211/mlme.c
@@ -568,15 +568,27 @@ static void ieee80211_sta_wmm_params(struct ieee80211_local *local,
 	}
 }
 
-static u32 ieee80211_handle_protect_preamb(struct ieee80211_sub_if_data *sdata,
-					   bool use_protection,
-					   bool use_short_preamble)
+static u32 ieee80211_handle_bss_capability(struct ieee80211_sub_if_data *sdata,
+					   u16 capab, bool erp_valid, u8 erp)
 {
 	struct ieee80211_bss_conf *bss_conf = &sdata->bss_conf;
 #ifdef CONFIG_MAC80211_VERBOSE_DEBUG
 	struct ieee80211_if_sta *ifsta = &sdata->u.sta;
 #endif
 	u32 changed = 0;
+	bool use_protection;
+	bool use_short_preamble;
+	bool use_short_slot;
+
+	if (erp_valid) {
+		use_protection = (erp & WLAN_ERP_USE_PROTECTION) != 0;
+		use_short_preamble = (erp & WLAN_ERP_BARKER_PREAMBLE) == 0;
+	} else {
+		use_protection = false;
+		use_short_preamble = !!(capab & WLAN_CAPABILITY_SHORT_PREAMBLE);
+	}
+
+	use_short_slot = !!(capab & WLAN_CAPABILITY_SHORT_SLOT_TIME);
 
 	if (use_protection != bss_conf->use_cts_prot) {
 #ifdef CONFIG_MAC80211_VERBOSE_DEBUG
@@ -605,30 +617,18 @@ static u32 ieee80211_handle_protect_preamb(struct ieee80211_sub_if_data *sdata,
 		changed |= BSS_CHANGED_ERP_PREAMBLE;
 	}
 
-	return changed;
-}
-
-static u32 ieee80211_handle_erp_ie(struct ieee80211_sub_if_data *sdata,
-				   u8 erp_value)
-{
-	bool use_protection = (erp_value & WLAN_ERP_USE_PROTECTION) != 0;
-	bool use_short_preamble = (erp_value & WLAN_ERP_BARKER_PREAMBLE) == 0;
-
-	return ieee80211_handle_protect_preamb(sdata,
-			use_protection, use_short_preamble);
-}
-
-static u32 ieee80211_handle_bss_capability(struct ieee80211_sub_if_data *sdata,
-					   struct ieee80211_bss *bss)
-{
-	u32 changed = 0;
-
-	if (bss->has_erp_value)
-		changed |= ieee80211_handle_erp_ie(sdata, bss->erp_value);
-	else {
-		u16 capab = bss->capability;
-		changed |= ieee80211_handle_protect_preamb(sdata, false,
-				(capab & WLAN_CAPABILITY_SHORT_PREAMBLE) != 0);
+	if (use_short_slot != bss_conf->use_short_slot) {
+#ifdef CONFIG_MAC80211_VERBOSE_DEBUG
+		if (net_ratelimit()) {
+			printk(KERN_DEBUG "%s: switched to %s slot"
+			       " (BSSID=%s)\n",
+			       sdata->dev->name,
+			       use_short_slot ? "short" : "long",
+			       ifsta->bssid);
+		}
+#endif
+		bss_conf->use_short_slot = use_short_slot;
+		changed |= BSS_CHANGED_ERP_SLOT;
 	}
 
 	return changed;
@@ -721,7 +721,8 @@ static void ieee80211_set_associated(struct ieee80211_sub_if_data *sdata,
 		sdata->bss_conf.timestamp = bss->timestamp;
 		sdata->bss_conf.dtim_period = bss->dtim_period;
 
-		changed |= ieee80211_handle_bss_capability(sdata, bss);
+		changed |= ieee80211_handle_bss_capability(sdata,
+			bss->capability, bss->has_erp_value, bss->erp_value);
 
 		ieee80211_rx_bss_put(local, bss);
 	}
@@ -1657,6 +1658,8 @@ static void ieee80211_rx_mgmt_beacon(struct ieee80211_sub_if_data *sdata,
 	struct ieee80211_local *local = sdata->local;
 	struct ieee80211_conf *conf = &local->hw.conf;
 	u32 changed = 0;
+	bool erp_valid;
+	u8 erp_value = 0;
 
 	/* Process beacon from the current BSS */
 	baselen = (u8 *) mgmt->u.beacon.variable - (u8 *) mgmt;
@@ -1678,13 +1681,16 @@ static void ieee80211_rx_mgmt_beacon(struct ieee80211_sub_if_data *sdata,
 	ieee80211_sta_wmm_params(local, ifsta, elems.wmm_param,
 				 elems.wmm_param_len);
 
-	if (elems.erp_info && elems.erp_info_len >= 1)
-		changed |= ieee80211_handle_erp_ie(sdata, elems.erp_info[0]);
-	else {
-		u16 capab = le16_to_cpu(mgmt->u.beacon.capab_info);
-		changed |= ieee80211_handle_protect_preamb(sdata, false,
-				(capab & WLAN_CAPABILITY_SHORT_PREAMBLE) != 0);
+
+	if (elems.erp_info && elems.erp_info_len >= 1) {
+		erp_valid = true;
+		erp_value = elems.erp_info[0];
+	} else {
+		erp_valid = false;
 	}
+	changed |= ieee80211_handle_bss_capability(sdata,
+			le16_to_cpu(mgmt->u.beacon.capab_info),
+			erp_valid, erp_value);
 
 	if (elems.ht_cap_elem && elems.ht_info_elem &&
 	    elems.wmm_param && conf->flags & IEEE80211_CONF_SUPPORT_HT_MODE) {
-- 
cgit v1.2.3


From d9fe60dea7779d412b34679f1177c5ca1940ea8d Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes@sipsolutions.net>
Date: Thu, 9 Oct 2008 12:13:49 +0200
Subject: 802.11: clean up/fix HT support

This patch cleans up a number of things:
 * the unusable definition of the HT capabilities/HT information
   information elements
 * variable names that are hard to understand
 * mac80211: move ieee80211_handle_ht to ht.c and remove the unused
             enable_ht parameter
 * mac80211: fix bug with MCS rate 32 in ieee80211_handle_ht
 * mac80211: fix bug with casting the result of ieee80211_bss_get_ie
             to an information element _contents_ rather than the
             whole element, add size checking (another out-of-bounds
             access bug fixed!)
 * mac80211: remove some unused return values in favour of BUG_ON
             checking
 * a few minor other things

Signed-off-by: Johannes Berg <johannes@sipsolutions.net>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 drivers/net/wireless/ath9k/main.c         |  57 ++++++-----
 drivers/net/wireless/ath9k/rc.c           |  10 +-
 drivers/net/wireless/ath9k/rc.h           |   1 -
 drivers/net/wireless/ath9k/recv.c         |   2 +-
 drivers/net/wireless/ath9k/xmit.c         |   2 +-
 drivers/net/wireless/iwlwifi/iwl-agn-rs.c |  18 ++--
 drivers/net/wireless/iwlwifi/iwl-agn.c    |  20 ++--
 drivers/net/wireless/iwlwifi/iwl-core.c   |  71 +++++++-------
 drivers/net/wireless/iwlwifi/iwl-core.h   |   2 +-
 drivers/net/wireless/iwlwifi/iwl-dev.h    |   4 +-
 drivers/net/wireless/iwlwifi/iwl-scan.c   |  12 +--
 drivers/net/wireless/iwlwifi/iwl-sta.c    |   6 +-
 drivers/net/wireless/mac80211_hwsim.c     |  19 ++--
 include/linux/ieee80211.h                 | 133 ++++++++++++++++++--------
 include/net/mac80211.h                    |  12 +--
 include/net/wireless.h                    |  15 +--
 net/mac80211/cfg.c                        |   7 +-
 net/mac80211/ht.c                         | 151 +++++++++++++++++++++++++-----
 net/mac80211/ieee80211_i.h                |  16 ++--
 net/mac80211/main.c                       |  94 -------------------
 net/mac80211/mlme.c                       |  45 ++++-----
 net/mac80211/util.c                       |   4 +-
 net/mac80211/wext.c                       |   4 +-
 23 files changed, 386 insertions(+), 319 deletions(-)

(limited to 'include')

diff --git a/drivers/net/wireless/ath9k/main.c b/drivers/net/wireless/ath9k/main.c
index 186d75acb326..5e087c92a6d9 100644
--- a/drivers/net/wireless/ath9k/main.c
+++ b/drivers/net/wireless/ath9k/main.c
@@ -61,24 +61,24 @@ static u32 ath_get_extchanmode(struct ath_softc *sc,
 
 	switch (chan->band) {
 	case IEEE80211_BAND_2GHZ:
-		if ((ext_chan_offset == IEEE80211_HT_IE_CHA_SEC_NONE) &&
+		if ((ext_chan_offset == IEEE80211_HT_PARAM_CHA_SEC_NONE) &&
 		    (tx_chan_width == ATH9K_HT_MACMODE_20))
 			chanmode = CHANNEL_G_HT20;
-		if ((ext_chan_offset == IEEE80211_HT_IE_CHA_SEC_ABOVE) &&
+		if ((ext_chan_offset == IEEE80211_HT_PARAM_CHA_SEC_ABOVE) &&
 		    (tx_chan_width == ATH9K_HT_MACMODE_2040))
 			chanmode = CHANNEL_G_HT40PLUS;
-		if ((ext_chan_offset == IEEE80211_HT_IE_CHA_SEC_BELOW) &&
+		if ((ext_chan_offset == IEEE80211_HT_PARAM_CHA_SEC_BELOW) &&
 		    (tx_chan_width == ATH9K_HT_MACMODE_2040))
 			chanmode = CHANNEL_G_HT40MINUS;
 		break;
 	case IEEE80211_BAND_5GHZ:
-		if ((ext_chan_offset == IEEE80211_HT_IE_CHA_SEC_NONE) &&
+		if ((ext_chan_offset == IEEE80211_HT_PARAM_CHA_SEC_NONE) &&
 		    (tx_chan_width == ATH9K_HT_MACMODE_20))
 			chanmode = CHANNEL_A_HT20;
-		if ((ext_chan_offset == IEEE80211_HT_IE_CHA_SEC_ABOVE) &&
+		if ((ext_chan_offset == IEEE80211_HT_PARAM_CHA_SEC_ABOVE) &&
 		    (tx_chan_width == ATH9K_HT_MACMODE_2040))
 			chanmode = CHANNEL_A_HT40PLUS;
-		if ((ext_chan_offset == IEEE80211_HT_IE_CHA_SEC_BELOW) &&
+		if ((ext_chan_offset == IEEE80211_HT_PARAM_CHA_SEC_BELOW) &&
 		    (tx_chan_width == ATH9K_HT_MACMODE_2040))
 			chanmode = CHANNEL_A_HT40MINUS;
 		break;
@@ -215,24 +215,24 @@ static void ath_key_delete(struct ath_softc *sc, struct ieee80211_key_conf *key)
 	ath_key_reset(sc, key->keyidx, freeslot);
 }
 
-static void setup_ht_cap(struct ieee80211_ht_info *ht_info)
+static void setup_ht_cap(struct ieee80211_sta_ht_cap *ht_info)
 {
 #define	ATH9K_HT_CAP_MAXRXAMPDU_65536 0x3	/* 2 ^ 16 */
 #define	ATH9K_HT_CAP_MPDUDENSITY_8 0x6		/* 8 usec */
 
-	ht_info->ht_supported = 1;
-	ht_info->cap = (u16)IEEE80211_HT_CAP_SUP_WIDTH
-			|(u16)IEEE80211_HT_CAP_SM_PS
-			|(u16)IEEE80211_HT_CAP_SGI_40
-			|(u16)IEEE80211_HT_CAP_DSSSCCK40;
+	ht_info->ht_supported = true;
+	ht_info->cap = IEEE80211_HT_CAP_SUP_WIDTH_20_40 |
+		       IEEE80211_HT_CAP_SM_PS |
+		       IEEE80211_HT_CAP_SGI_40 |
+		       IEEE80211_HT_CAP_DSSSCCK40;
 
 	ht_info->ampdu_factor = ATH9K_HT_CAP_MAXRXAMPDU_65536;
 	ht_info->ampdu_density = ATH9K_HT_CAP_MPDUDENSITY_8;
-	/* setup supported mcs set */
-	memset(ht_info->supp_mcs_set, 0, 16);
-	ht_info->supp_mcs_set[0] = 0xff;
-	ht_info->supp_mcs_set[1] = 0xff;
-	ht_info->supp_mcs_set[12] = IEEE80211_HT_CAP_MCS_TX_DEFINED;
+	/* set up supported mcs set */
+	memset(&ht_info->mcs, 0, sizeof(ht_info->mcs));
+	ht_info->mcs.rx_mask[0] = 0xff;
+	ht_info->mcs.rx_mask[1] = 0xff;
+	ht_info->mcs.tx_params = IEEE80211_HT_MCS_TX_DEFINED;
 }
 
 static int ath_rate2idx(struct ath_softc *sc, int rate)
@@ -328,31 +328,28 @@ static u8 parse_mpdudensity(u8 mpdudensity)
 static void ath9k_ht_conf(struct ath_softc *sc,
 			  struct ieee80211_bss_conf *bss_conf)
 {
-#define IEEE80211_HT_CAP_40MHZ_INTOLERANT BIT(14)
 	struct ath_ht_info *ht_info = &sc->sc_ht_info;
 
 	if (bss_conf->assoc_ht) {
 		ht_info->ext_chan_offset =
 			bss_conf->ht_bss_conf->bss_cap &
-				IEEE80211_HT_IE_CHA_SEC_OFFSET;
+				IEEE80211_HT_PARAM_CHA_SEC_OFFSET;
 
-		if (!(bss_conf->ht_conf->cap &
+		if (!(bss_conf->ht_cap->cap &
 			IEEE80211_HT_CAP_40MHZ_INTOLERANT) &&
 			    (bss_conf->ht_bss_conf->bss_cap &
-				IEEE80211_HT_IE_CHA_WIDTH))
+				IEEE80211_HT_PARAM_CHAN_WIDTH_ANY))
 			ht_info->tx_chan_width = ATH9K_HT_MACMODE_2040;
 		else
 			ht_info->tx_chan_width = ATH9K_HT_MACMODE_20;
 
 		ath9k_hw_set11nmac2040(sc->sc_ah, ht_info->tx_chan_width);
 		ht_info->maxampdu = 1 << (IEEE80211_HTCAP_MAXRXAMPDU_FACTOR +
-					bss_conf->ht_conf->ampdu_factor);
+					bss_conf->ht_cap->ampdu_factor);
 		ht_info->mpdudensity =
-			parse_mpdudensity(bss_conf->ht_conf->ampdu_density);
+			parse_mpdudensity(bss_conf->ht_cap->ampdu_density);
 
 	}
-
-#undef IEEE80211_HT_CAP_40MHZ_INTOLERANT
 }
 
 static void ath9k_bss_assoc_info(struct ath_softc *sc,
@@ -411,7 +408,7 @@ static void ath9k_bss_assoc_info(struct ath_softc *sc,
 			return;
 		}
 
-		if (hw->conf.ht_conf.ht_supported)
+		if (hw->conf.ht_cap.ht_supported)
 			sc->sc_ah->ah_channels[pos].chanmode =
 				ath_get_extchanmode(sc, curchan);
 		else
@@ -534,7 +531,7 @@ int _ath_rx_indicate(struct ath_softc *sc,
 
 	if (an) {
 		ath_rx_input(sc, an,
-			     hw->conf.ht_conf.ht_supported,
+			     hw->conf.ht_cap.ht_supported,
 			     skb, status, &st);
 	}
 	if (!an || (st != ATH_RX_CONSUMED))
@@ -943,7 +940,7 @@ static int ath_attach(u16 devid,
 
 	if (sc->sc_ah->ah_caps.hw_caps & ATH9K_HW_CAP_HT)
 		/* Setup HT capabilities for 2.4Ghz*/
-		setup_ht_cap(&sc->sbands[IEEE80211_BAND_2GHZ].ht_info);
+		setup_ht_cap(&sc->sbands[IEEE80211_BAND_2GHZ].ht_cap);
 
 	hw->wiphy->bands[IEEE80211_BAND_2GHZ] =
 		&sc->sbands[IEEE80211_BAND_2GHZ];
@@ -958,7 +955,7 @@ static int ath_attach(u16 devid,
 
 		if (sc->sc_ah->ah_caps.hw_caps & ATH9K_HW_CAP_HT)
 			/* Setup HT capabilities for 5Ghz*/
-			setup_ht_cap(&sc->sbands[IEEE80211_BAND_5GHZ].ht_info);
+			setup_ht_cap(&sc->sbands[IEEE80211_BAND_5GHZ].ht_cap);
 
 		hw->wiphy->bands[IEEE80211_BAND_5GHZ] =
 			&sc->sbands[IEEE80211_BAND_5GHZ];
@@ -1254,7 +1251,7 @@ static int ath9k_config(struct ieee80211_hw *hw,
 		(curchan->band == IEEE80211_BAND_2GHZ) ?
 		CHANNEL_G : CHANNEL_A;
 
-	if (sc->sc_curaid && hw->conf.ht_conf.ht_supported)
+	if (sc->sc_curaid && hw->conf.ht_cap.ht_supported)
 		sc->sc_ah->ah_channels[pos].chanmode =
 			ath_get_extchanmode(sc, curchan);
 
diff --git a/drivers/net/wireless/ath9k/rc.c b/drivers/net/wireless/ath9k/rc.c
index b1e535b8ec48..ee2dbce42b4d 100644
--- a/drivers/net/wireless/ath9k/rc.c
+++ b/drivers/net/wireless/ath9k/rc.c
@@ -1838,7 +1838,7 @@ void ath_rc_node_update(struct ieee80211_hw *hw, struct ath_rate_node *rc_priv)
 	struct ath_softc *sc = hw->priv;
 	u32 capflag = 0;
 
-	if (hw->conf.ht_conf.ht_supported) {
+	if (hw->conf.ht_cap.ht_supported) {
 		capflag |= ATH_RC_HT_FLAG | ATH_RC_DS_FLAG;
 		if (sc->sc_ht_info.tx_chan_width == ATH9K_HT_MACMODE_2040)
 			capflag |= ATH_RC_CW40_FLAG;
@@ -1910,7 +1910,7 @@ static void ath_tx_aggr_resp(struct ath_softc *sc,
 	 */
 	si = container_of(sta, struct sta_info, sta);
 	buffersize = IEEE80211_MIN_AMPDU_BUF <<
-		sband->ht_info.ampdu_factor; /* FIXME */
+		sband->ht_cap.ampdu_factor; /* FIXME */
 	state = si->ampdu_mlme.tid_state_tx[tidno];
 
 	if (state & HT_ADDBA_RECEIVED_MSK) {
@@ -1979,7 +1979,7 @@ static void ath_get_rate(void *priv, struct ieee80211_supported_band *sband,
 
 	/* Check if aggregation has to be enabled for this tid */
 
-	if (hw->conf.ht_conf.ht_supported) {
+	if (hw->conf.ht_cap.ht_supported) {
 		if (ieee80211_is_data_qos(fc)) {
 			qc = ieee80211_get_qos_ctl(hdr);
 			tid = qc[0] & 0xf;
@@ -2027,8 +2027,8 @@ static void ath_rate_init(void *priv, struct ieee80211_supported_band *sband,
 
 	ath_setup_rates(sc, sband, sta, ath_rc_priv);
 	if (sc->hw->conf.flags & IEEE80211_CONF_SUPPORT_HT_MODE) {
-		for (i = 0; i < MCS_SET_SIZE; i++) {
-			if (sc->hw->conf.ht_conf.supp_mcs_set[i/8] & (1<<(i%8)))
+		for (i = 0; i < 77; i++) {
+			if (sc->hw->conf.ht_cap.mcs.rx_mask[i/8] & (1<<(i%8)))
 				ath_rc_priv->neg_ht_rates.rs_rates[j++] = i;
 			if (j == ATH_RATE_MAX)
 				break;
diff --git a/drivers/net/wireless/ath9k/rc.h b/drivers/net/wireless/ath9k/rc.h
index b95b41508b98..6671097fad72 100644
--- a/drivers/net/wireless/ath9k/rc.h
+++ b/drivers/net/wireless/ath9k/rc.h
@@ -59,7 +59,6 @@ struct ath_softc;
 #define FALSE 0
 
 #define ATH_RATE_MAX	30
-#define MCS_SET_SIZE	128
 
 enum ieee80211_fixed_rate_mode {
 	IEEE80211_FIXED_RATE_NONE  = 0,
diff --git a/drivers/net/wireless/ath9k/recv.c b/drivers/net/wireless/ath9k/recv.c
index 4983402af559..010fcdfec3f6 100644
--- a/drivers/net/wireless/ath9k/recv.c
+++ b/drivers/net/wireless/ath9k/recv.c
@@ -1119,7 +1119,7 @@ int ath_rx_aggr_start(struct ath_softc *sc,
 
 	sband = hw->wiphy->bands[hw->conf.channel->band];
 	buffersize = IEEE80211_MIN_AMPDU_BUF <<
-		sband->ht_info.ampdu_factor; /* FIXME */
+		sband->ht_cap.ampdu_factor; /* FIXME */
 
 	rxtid = &an->an_aggr.rx.tid[tid];
 
diff --git a/drivers/net/wireless/ath9k/xmit.c b/drivers/net/wireless/ath9k/xmit.c
index 13866043dec9..3770fbe84fce 100644
--- a/drivers/net/wireless/ath9k/xmit.c
+++ b/drivers/net/wireless/ath9k/xmit.c
@@ -300,7 +300,7 @@ static int ath_tx_prepare(struct ath_softc *sc,
 	if (ieee80211_is_data(fc) && !txctl->use_minrate) {
 
 		/* Enable HT only for DATA frames and not for EAPOL */
-		txctl->ht = (hw->conf.ht_conf.ht_supported &&
+		txctl->ht = (hw->conf.ht_cap.ht_supported &&
 			    (tx_info->flags & IEEE80211_TX_CTL_AMPDU));
 
 		if (is_multicast_ether_addr(hdr->addr1)) {
diff --git a/drivers/net/wireless/iwlwifi/iwl-agn-rs.c b/drivers/net/wireless/iwlwifi/iwl-agn-rs.c
index b497d40dc396..cd1bff590491 100644
--- a/drivers/net/wireless/iwlwifi/iwl-agn-rs.c
+++ b/drivers/net/wireless/iwlwifi/iwl-agn-rs.c
@@ -1134,10 +1134,10 @@ static int rs_switch_to_mimo2(struct iwl_priv *priv,
 	s8 is_green = lq_sta->is_green;
 
 	if (!(conf->flags & IEEE80211_CONF_SUPPORT_HT_MODE) ||
-	    !sta->ht_info.ht_supported)
+	    !sta->ht_cap.ht_supported)
 		return -1;
 
-	if (((sta->ht_info.cap & IEEE80211_HT_CAP_SM_PS) >> 2)
+	if (((sta->ht_cap.cap & IEEE80211_HT_CAP_SM_PS) >> 2)
 						== WLAN_HT_CAP_SM_PS_STATIC)
 		return -1;
 
@@ -1202,7 +1202,7 @@ static int rs_switch_to_siso(struct iwl_priv *priv,
 	s32 rate;
 
 	if (!(conf->flags & IEEE80211_CONF_SUPPORT_HT_MODE) ||
-	    !sta->ht_info.ht_supported)
+	    !sta->ht_cap.ht_supported)
 		return -1;
 
 	IWL_DEBUG_RATE("LQ: try to switch to SISO\n");
@@ -2238,19 +2238,19 @@ static void rs_rate_init(void *priv_r, struct ieee80211_supported_band *sband,
 	 * active_siso_rate mask includes 9 MBits (bit 5), and CCK (bits 0-3),
 	 * supp_rates[] does not; shift to convert format, force 9 MBits off.
 	 */
-	lq_sta->active_siso_rate = conf->ht_conf.supp_mcs_set[0] << 1;
-	lq_sta->active_siso_rate |= conf->ht_conf.supp_mcs_set[0] & 0x1;
+	lq_sta->active_siso_rate = conf->ht_cap.mcs.rx_mask[0] << 1;
+	lq_sta->active_siso_rate |= conf->ht_cap.mcs.rx_mask[0] & 0x1;
 	lq_sta->active_siso_rate &= ~((u16)0x2);
 	lq_sta->active_siso_rate <<= IWL_FIRST_OFDM_RATE;
 
 	/* Same here */
-	lq_sta->active_mimo2_rate = conf->ht_conf.supp_mcs_set[1] << 1;
-	lq_sta->active_mimo2_rate |= conf->ht_conf.supp_mcs_set[1] & 0x1;
+	lq_sta->active_mimo2_rate = conf->ht_cap.mcs.rx_mask[1] << 1;
+	lq_sta->active_mimo2_rate |= conf->ht_cap.mcs.rx_mask[1] & 0x1;
 	lq_sta->active_mimo2_rate &= ~((u16)0x2);
 	lq_sta->active_mimo2_rate <<= IWL_FIRST_OFDM_RATE;
 
-	lq_sta->active_mimo3_rate = conf->ht_conf.supp_mcs_set[2] << 1;
-	lq_sta->active_mimo3_rate |= conf->ht_conf.supp_mcs_set[2] & 0x1;
+	lq_sta->active_mimo3_rate = conf->ht_cap.mcs.rx_mask[2] << 1;
+	lq_sta->active_mimo3_rate |= conf->ht_cap.mcs.rx_mask[2] & 0x1;
 	lq_sta->active_mimo3_rate &= ~((u16)0x2);
 	lq_sta->active_mimo3_rate <<= IWL_FIRST_OFDM_RATE;
 
diff --git a/drivers/net/wireless/iwlwifi/iwl-agn.c b/drivers/net/wireless/iwlwifi/iwl-agn.c
index 2cac09405afe..e6695e80fb53 100644
--- a/drivers/net/wireless/iwlwifi/iwl-agn.c
+++ b/drivers/net/wireless/iwlwifi/iwl-agn.c
@@ -552,7 +552,7 @@ static int iwl4965_send_beacon_cmd(struct iwl_priv *priv)
 static void iwl4965_ht_conf(struct iwl_priv *priv,
 			    struct ieee80211_bss_conf *bss_conf)
 {
-	struct ieee80211_ht_info *ht_conf = bss_conf->ht_conf;
+	struct ieee80211_sta_ht_cap *ht_conf = bss_conf->ht_cap;
 	struct ieee80211_ht_bss_info *ht_bss_conf = bss_conf->ht_bss_conf;
 	struct iwl_ht_info *iwl_conf = &priv->current_ht_config;
 
@@ -573,27 +573,27 @@ static void iwl4965_ht_conf(struct iwl_priv *priv,
 		!!(ht_conf->cap & IEEE80211_HT_CAP_MAX_AMSDU);
 
 	iwl_conf->supported_chan_width =
-		!!(ht_conf->cap & IEEE80211_HT_CAP_SUP_WIDTH);
+		!!(ht_conf->cap & IEEE80211_HT_CAP_SUP_WIDTH_20_40);
 	iwl_conf->extension_chan_offset =
-		ht_bss_conf->bss_cap & IEEE80211_HT_IE_CHA_SEC_OFFSET;
+		ht_bss_conf->bss_cap & IEEE80211_HT_PARAM_CHA_SEC_OFFSET;
 	/* If no above or below channel supplied disable FAT channel */
-	if (iwl_conf->extension_chan_offset != IEEE80211_HT_IE_CHA_SEC_ABOVE &&
-	    iwl_conf->extension_chan_offset != IEEE80211_HT_IE_CHA_SEC_BELOW) {
-		iwl_conf->extension_chan_offset = IEEE80211_HT_IE_CHA_SEC_NONE;
+	if (iwl_conf->extension_chan_offset != IEEE80211_HT_PARAM_CHA_SEC_ABOVE &&
+	    iwl_conf->extension_chan_offset != IEEE80211_HT_PARAM_CHA_SEC_BELOW) {
+		iwl_conf->extension_chan_offset = IEEE80211_HT_PARAM_CHA_SEC_NONE;
 		iwl_conf->supported_chan_width = 0;
 	}
 
 	iwl_conf->sm_ps = (u8)((ht_conf->cap & IEEE80211_HT_CAP_SM_PS) >> 2);
 
-	memcpy(iwl_conf->supp_mcs_set, ht_conf->supp_mcs_set, 16);
+	memcpy(&iwl_conf->mcs, &ht_conf->mcs, 16);
 
 	iwl_conf->control_channel = ht_bss_conf->primary_channel;
 	iwl_conf->tx_chan_width =
-		!!(ht_bss_conf->bss_cap & IEEE80211_HT_IE_CHA_WIDTH);
+		!!(ht_bss_conf->bss_cap & IEEE80211_HT_PARAM_CHAN_WIDTH_ANY);
 	iwl_conf->ht_protection =
-		ht_bss_conf->bss_op_mode & IEEE80211_HT_IE_HT_PROTECTION;
+		ht_bss_conf->bss_op_mode & IEEE80211_HT_OP_MODE_PROTECTION;
 	iwl_conf->non_GF_STA_present =
-		!!(ht_bss_conf->bss_op_mode & IEEE80211_HT_IE_NON_GF_STA_PRSNT);
+		!!(ht_bss_conf->bss_op_mode & IEEE80211_HT_OP_MODE_NON_GF_STA_PRSNT);
 
 	IWL_DEBUG_MAC80211("control channel %d\n", iwl_conf->control_channel);
 	IWL_DEBUG_MAC80211("leave\n");
diff --git a/drivers/net/wireless/iwlwifi/iwl-core.c b/drivers/net/wireless/iwlwifi/iwl-core.c
index 4c312c55f90c..4678da447ff6 100644
--- a/drivers/net/wireless/iwlwifi/iwl-core.c
+++ b/drivers/net/wireless/iwlwifi/iwl-core.c
@@ -382,10 +382,10 @@ void iwl_reset_qos(struct iwl_priv *priv)
 }
 EXPORT_SYMBOL(iwl_reset_qos);
 
-#define MAX_BIT_RATE_40_MHZ 0x96 /* 150 Mbps */
-#define MAX_BIT_RATE_20_MHZ 0x48 /* 72 Mbps */
+#define MAX_BIT_RATE_40_MHZ 150 /* Mbps */
+#define MAX_BIT_RATE_20_MHZ 72 /* Mbps */
 static void iwlcore_init_ht_hw_capab(const struct iwl_priv *priv,
-			      struct ieee80211_ht_info *ht_info,
+			      struct ieee80211_sta_ht_cap *ht_info,
 			      enum ieee80211_band band)
 {
 	u16 max_bit_rate = 0;
@@ -393,45 +393,46 @@ static void iwlcore_init_ht_hw_capab(const struct iwl_priv *priv,
 	u8 tx_chains_num = priv->hw_params.tx_chains_num;
 
 	ht_info->cap = 0;
-	memset(ht_info->supp_mcs_set, 0, 16);
+	memset(&ht_info->mcs, 0, sizeof(ht_info->mcs));
 
-	ht_info->ht_supported = 1;
+	ht_info->ht_supported = true;
 
-	ht_info->cap |= (u16)IEEE80211_HT_CAP_GRN_FLD;
-	ht_info->cap |= (u16)IEEE80211_HT_CAP_SGI_20;
-	ht_info->cap |= (u16)(IEEE80211_HT_CAP_SM_PS &
+	ht_info->cap |= IEEE80211_HT_CAP_GRN_FLD;
+	ht_info->cap |= IEEE80211_HT_CAP_SGI_20;
+	ht_info->cap |= (IEEE80211_HT_CAP_SM_PS &
 			     (WLAN_HT_CAP_SM_PS_DISABLED << 2));
 
 	max_bit_rate = MAX_BIT_RATE_20_MHZ;
 	if (priv->hw_params.fat_channel & BIT(band)) {
-		ht_info->cap |= (u16)IEEE80211_HT_CAP_SUP_WIDTH;
-		ht_info->cap |= (u16)IEEE80211_HT_CAP_SGI_40;
-		ht_info->supp_mcs_set[4] = 0x01;
+		ht_info->cap |= IEEE80211_HT_CAP_SUP_WIDTH_20_40;
+		ht_info->cap |= IEEE80211_HT_CAP_SGI_40;
+		ht_info->mcs.rx_mask[4] = 0x01;
 		max_bit_rate = MAX_BIT_RATE_40_MHZ;
 	}
 
 	if (priv->cfg->mod_params->amsdu_size_8K)
-		ht_info->cap |= (u16)IEEE80211_HT_CAP_MAX_AMSDU;
+		ht_info->cap |= IEEE80211_HT_CAP_MAX_AMSDU;
 
 	ht_info->ampdu_factor = CFG_HT_RX_AMPDU_FACTOR_DEF;
 	ht_info->ampdu_density = CFG_HT_MPDU_DENSITY_DEF;
 
-	ht_info->supp_mcs_set[0] = 0xFF;
+	ht_info->mcs.rx_mask[0] = 0xFF;
 	if (rx_chains_num >= 2)
-		ht_info->supp_mcs_set[1] = 0xFF;
+		ht_info->mcs.rx_mask[1] = 0xFF;
 	if (rx_chains_num >= 3)
-		ht_info->supp_mcs_set[2] = 0xFF;
+		ht_info->mcs.rx_mask[2] = 0xFF;
 
 	/* Highest supported Rx data rate */
 	max_bit_rate *= rx_chains_num;
-	ht_info->supp_mcs_set[10] = (u8)(max_bit_rate & 0x00FF);
-	ht_info->supp_mcs_set[11] = (u8)((max_bit_rate & 0xFF00) >> 8);
+	WARN_ON(max_bit_rate & ~IEEE80211_HT_MCS_RX_HIGHEST_MASK);
+	ht_info->mcs.rx_highest = cpu_to_le16(max_bit_rate);
 
 	/* Tx MCS capabilities */
-	ht_info->supp_mcs_set[12] = IEEE80211_HT_CAP_MCS_TX_DEFINED;
+	ht_info->mcs.tx_params = IEEE80211_HT_MCS_TX_DEFINED;
 	if (tx_chains_num != rx_chains_num) {
-		ht_info->supp_mcs_set[12] |= IEEE80211_HT_CAP_MCS_TX_RX_DIFF;
-		ht_info->supp_mcs_set[12] |= ((tx_chains_num - 1) << 2);
+		ht_info->mcs.tx_params |= IEEE80211_HT_MCS_TX_RX_DIFF;
+		ht_info->mcs.tx_params |= ((tx_chains_num - 1) <<
+				IEEE80211_HT_MCS_TX_MAX_STREAMS_SHIFT);
 	}
 }
 
@@ -495,7 +496,7 @@ static int iwlcore_init_geos(struct iwl_priv *priv)
 	sband->n_bitrates = IWL_RATE_COUNT - IWL_FIRST_OFDM_RATE;
 
 	if (priv->cfg->sku & IWL_SKU_N)
-		iwlcore_init_ht_hw_capab(priv, &sband->ht_info,
+		iwlcore_init_ht_hw_capab(priv, &sband->ht_cap,
 					 IEEE80211_BAND_5GHZ);
 
 	sband = &priv->bands[IEEE80211_BAND_2GHZ];
@@ -505,7 +506,7 @@ static int iwlcore_init_geos(struct iwl_priv *priv)
 	sband->n_bitrates = IWL_RATE_COUNT;
 
 	if (priv->cfg->sku & IWL_SKU_N)
-		iwlcore_init_ht_hw_capab(priv, &sband->ht_info,
+		iwlcore_init_ht_hw_capab(priv, &sband->ht_cap,
 					 IEEE80211_BAND_2GHZ);
 
 	priv->ieee_channels = channels;
@@ -595,8 +596,8 @@ static void iwlcore_free_geos(struct iwl_priv *priv)
 static bool is_single_rx_stream(struct iwl_priv *priv)
 {
 	return !priv->current_ht_config.is_ht ||
-	       ((priv->current_ht_config.supp_mcs_set[1] == 0) &&
-		(priv->current_ht_config.supp_mcs_set[2] == 0));
+	       ((priv->current_ht_config.mcs.rx_mask[1] == 0) &&
+		(priv->current_ht_config.mcs.rx_mask[2] == 0));
 }
 
 static u8 iwl_is_channel_extension(struct iwl_priv *priv,
@@ -609,10 +610,10 @@ static u8 iwl_is_channel_extension(struct iwl_priv *priv,
 	if (!is_channel_valid(ch_info))
 		return 0;
 
-	if (extension_chan_offset == IEEE80211_HT_IE_CHA_SEC_ABOVE)
+	if (extension_chan_offset == IEEE80211_HT_PARAM_CHA_SEC_ABOVE)
 		return !(ch_info->fat_extension_channel &
 					IEEE80211_CHAN_NO_FAT_ABOVE);
-	else if (extension_chan_offset == IEEE80211_HT_IE_CHA_SEC_BELOW)
+	else if (extension_chan_offset == IEEE80211_HT_PARAM_CHA_SEC_BELOW)
 		return !(ch_info->fat_extension_channel &
 					IEEE80211_CHAN_NO_FAT_BELOW);
 
@@ -620,18 +621,18 @@ static u8 iwl_is_channel_extension(struct iwl_priv *priv,
 }
 
 u8 iwl_is_fat_tx_allowed(struct iwl_priv *priv,
-			     struct ieee80211_ht_info *sta_ht_inf)
+			 struct ieee80211_sta_ht_cap *sta_ht_inf)
 {
 	struct iwl_ht_info *iwl_ht_conf = &priv->current_ht_config;
 
 	if ((!iwl_ht_conf->is_ht) ||
 	   (iwl_ht_conf->supported_chan_width != IWL_CHANNEL_WIDTH_40MHZ) ||
-	   (iwl_ht_conf->extension_chan_offset == IEEE80211_HT_IE_CHA_SEC_NONE))
+	   (iwl_ht_conf->extension_chan_offset == IEEE80211_HT_PARAM_CHA_SEC_NONE))
 		return 0;
 
 	if (sta_ht_inf) {
 		if ((!sta_ht_inf->ht_supported) ||
-		   (!(sta_ht_inf->cap & IEEE80211_HT_CAP_SUP_WIDTH)))
+		   (!(sta_ht_inf->cap & IEEE80211_HT_CAP_SUP_WIDTH_20_40)))
 			return 0;
 	}
 
@@ -671,13 +672,13 @@ void iwl_set_rxon_ht(struct iwl_priv *priv, struct iwl_ht_info *ht_info)
 
 	/* Note: control channel is opposite of extension channel */
 	switch (ht_info->extension_chan_offset) {
-	case IEEE80211_HT_IE_CHA_SEC_ABOVE:
+	case IEEE80211_HT_PARAM_CHA_SEC_ABOVE:
 		rxon->flags &= ~(RXON_FLG_CTRL_CHANNEL_LOC_HI_MSK);
 		break;
-	case IEEE80211_HT_IE_CHA_SEC_BELOW:
+	case IEEE80211_HT_PARAM_CHA_SEC_BELOW:
 		rxon->flags |= RXON_FLG_CTRL_CHANNEL_LOC_HI_MSK;
 		break;
-	case IEEE80211_HT_IE_CHA_SEC_NONE:
+	case IEEE80211_HT_PARAM_CHA_SEC_NONE:
 	default:
 		rxon->flags &= ~RXON_FLG_CHANNEL_MODE_MIXED_MSK;
 		break;
@@ -693,9 +694,9 @@ void iwl_set_rxon_ht(struct iwl_priv *priv, struct iwl_ht_info *ht_info)
 			"rxon flags 0x%X operation mode :0x%X "
 			"extension channel offset 0x%x "
 			"control chan %d\n",
-			ht_info->supp_mcs_set[0],
-			ht_info->supp_mcs_set[1],
-			ht_info->supp_mcs_set[2],
+			ht_info->mcs.rx_mask[0],
+			ht_info->mcs.rx_mask[1],
+			ht_info->mcs.rx_mask[2],
 			le32_to_cpu(rxon->flags), ht_info->ht_protection,
 			ht_info->extension_chan_offset,
 			ht_info->control_channel);
diff --git a/drivers/net/wireless/iwlwifi/iwl-core.h b/drivers/net/wireless/iwlwifi/iwl-core.h
index 288b6a800e03..1a3ad8b1ebdb 100644
--- a/drivers/net/wireless/iwlwifi/iwl-core.h
+++ b/drivers/net/wireless/iwlwifi/iwl-core.h
@@ -190,7 +190,7 @@ void iwl_set_rxon_chain(struct iwl_priv *priv);
 int iwl_set_rxon_channel(struct iwl_priv *priv, struct ieee80211_channel *ch);
 void iwl_set_rxon_ht(struct iwl_priv *priv, struct iwl_ht_info *ht_info);
 u8 iwl_is_fat_tx_allowed(struct iwl_priv *priv,
-			 struct ieee80211_ht_info *sta_ht_inf);
+			 struct ieee80211_sta_ht_cap *sta_ht_inf);
 int iwl_hw_nic_init(struct iwl_priv *priv);
 int iwl_setup_mac(struct iwl_priv *priv);
 int iwl_set_hw_params(struct iwl_priv *priv);
diff --git a/drivers/net/wireless/iwlwifi/iwl-dev.h b/drivers/net/wireless/iwlwifi/iwl-dev.h
index 34306b6798e2..572250ee9d58 100644
--- a/drivers/net/wireless/iwlwifi/iwl-dev.h
+++ b/drivers/net/wireless/iwlwifi/iwl-dev.h
@@ -411,7 +411,7 @@ struct iwl_ht_info {
 	u8 max_amsdu_size;
 	u8 ampdu_factor;
 	u8 mpdu_density;
-	u8 supp_mcs_set[16];
+	struct ieee80211_mcs_info mcs;
 	/* BSS related data */
 	u8 control_channel;
 	u8 extension_chan_offset;
@@ -585,7 +585,7 @@ struct iwl_addsta_cmd;
 extern int iwl_send_add_sta(struct iwl_priv *priv,
 			    struct iwl_addsta_cmd *sta, u8 flags);
 extern u8 iwl_add_station_flags(struct iwl_priv *priv, const u8 *addr,
-			int is_ap, u8 flags, struct ieee80211_ht_info *ht_info);
+			int is_ap, u8 flags, struct ieee80211_sta_ht_cap *ht_info);
 extern void iwl4965_update_chain_flags(struct iwl_priv *priv);
 extern int iwl4965_set_pwr_src(struct iwl_priv *priv, enum iwl_pwr_src src);
 extern const u8 iwl_bcast_addr[ETH_ALEN];
diff --git a/drivers/net/wireless/iwlwifi/iwl-scan.c b/drivers/net/wireless/iwlwifi/iwl-scan.c
index 3b0bee331a33..78d16bd7b745 100644
--- a/drivers/net/wireless/iwlwifi/iwl-scan.c
+++ b/drivers/net/wireless/iwlwifi/iwl-scan.c
@@ -550,7 +550,7 @@ static void iwl_ht_cap_to_ie(const struct ieee80211_supported_band *sband,
 {
 	struct ieee80211_ht_cap *ht_cap;
 
-	if (!sband || !sband->ht_info.ht_supported)
+	if (!sband || !sband->ht_cap.ht_supported)
 		return;
 
 	if (*left < sizeof(struct ieee80211_ht_cap))
@@ -559,12 +559,12 @@ static void iwl_ht_cap_to_ie(const struct ieee80211_supported_band *sband,
 	*pos++ = sizeof(struct ieee80211_ht_cap);
 	ht_cap = (struct ieee80211_ht_cap *) pos;
 
-	ht_cap->cap_info = cpu_to_le16(sband->ht_info.cap);
-	memcpy(ht_cap->supp_mcs_set, sband->ht_info.supp_mcs_set, 16);
+	ht_cap->cap_info = cpu_to_le16(sband->ht_cap.cap);
+	memcpy(&ht_cap->mcs, &sband->ht_cap.mcs, 16);
 	ht_cap->ampdu_params_info =
-		(sband->ht_info.ampdu_factor & IEEE80211_HT_CAP_AMPDU_FACTOR) |
-		((sband->ht_info.ampdu_density << 2) &
-			IEEE80211_HT_CAP_AMPDU_DENSITY);
+		(sband->ht_cap.ampdu_factor & IEEE80211_HT_AMPDU_PARM_FACTOR) |
+		((sband->ht_cap.ampdu_density << 2) &
+			IEEE80211_HT_AMPDU_PARM_DENSITY);
 	*left -= sizeof(struct ieee80211_ht_cap);
 }
 
diff --git a/drivers/net/wireless/iwlwifi/iwl-sta.c b/drivers/net/wireless/iwlwifi/iwl-sta.c
index a28a8decc79c..b9b8554433a6 100644
--- a/drivers/net/wireless/iwlwifi/iwl-sta.c
+++ b/drivers/net/wireless/iwlwifi/iwl-sta.c
@@ -181,7 +181,7 @@ int iwl_send_add_sta(struct iwl_priv *priv,
 EXPORT_SYMBOL(iwl_send_add_sta);
 
 static void iwl_set_ht_add_station(struct iwl_priv *priv, u8 index,
-				   struct ieee80211_ht_info *sta_ht_inf)
+				   struct ieee80211_sta_ht_cap *sta_ht_inf)
 {
 	__le32 sta_flags;
 	u8 mimo_ps_mode;
@@ -229,7 +229,7 @@ static void iwl_set_ht_add_station(struct iwl_priv *priv, u8 index,
  * iwl_add_station_flags - Add station to tables in driver and device
  */
 u8 iwl_add_station_flags(struct iwl_priv *priv, const u8 *addr, int is_ap,
-			 u8 flags, struct ieee80211_ht_info *ht_info)
+			 u8 flags, struct ieee80211_sta_ht_cap *ht_info)
 {
 	int i;
 	int sta_id = IWL_INVALID_STATION;
@@ -894,7 +894,7 @@ int iwl_rxon_add_station(struct iwl_priv *priv, const u8 *addr, int is_ap)
 
 	/* Add station to device's station table */
 	struct ieee80211_conf *conf = &priv->hw->conf;
-	struct ieee80211_ht_info *cur_ht_config = &conf->ht_conf;
+	struct ieee80211_sta_ht_cap *cur_ht_config = &conf->ht_cap;
 
 	if ((is_ap) &&
 	    (conf->flags & IEEE80211_CONF_SUPPORT_HT_MODE) &&
diff --git a/drivers/net/wireless/mac80211_hwsim.c b/drivers/net/wireless/mac80211_hwsim.c
index e23d9a52d083..3f236b546683 100644
--- a/drivers/net/wireless/mac80211_hwsim.c
+++ b/drivers/net/wireless/mac80211_hwsim.c
@@ -563,19 +563,18 @@ static int __init init_mac80211_hwsim(void)
 		data->band.n_channels = ARRAY_SIZE(hwsim_channels);
 		data->band.bitrates = data->rates;
 		data->band.n_bitrates = ARRAY_SIZE(hwsim_rates);
-		data->band.ht_info.ht_supported = 1;
-		data->band.ht_info.cap = IEEE80211_HT_CAP_SUP_WIDTH |
+		data->band.ht_cap.ht_supported = true;
+		data->band.ht_cap.cap = IEEE80211_HT_CAP_SUP_WIDTH_20_40 |
 			IEEE80211_HT_CAP_GRN_FLD |
 			IEEE80211_HT_CAP_SGI_40 |
 			IEEE80211_HT_CAP_DSSSCCK40;
-		data->band.ht_info.ampdu_factor = 0x3;
-		data->band.ht_info.ampdu_density = 0x6;
-		memset(data->band.ht_info.supp_mcs_set, 0,
-		       sizeof(data->band.ht_info.supp_mcs_set));
-		data->band.ht_info.supp_mcs_set[0] = 0xff;
-		data->band.ht_info.supp_mcs_set[1] = 0xff;
-		data->band.ht_info.supp_mcs_set[12] =
-			IEEE80211_HT_CAP_MCS_TX_DEFINED;
+		data->band.ht_cap.ampdu_factor = 0x3;
+		data->band.ht_cap.ampdu_density = 0x6;
+		memset(&data->band.ht_cap.mcs, 0,
+		       sizeof(data->band.ht_cap.mcs));
+		data->band.ht_cap.mcs.rx_mask[0] = 0xff;
+		data->band.ht_cap.mcs.rx_mask[1] = 0xff;
+		data->band.ht_cap.mcs.tx_params = IEEE80211_HT_MCS_TX_DEFINED;
 		hw->wiphy->bands[IEEE80211_BAND_2GHZ] = &data->band;
 
 		err = ieee80211_register_hw(hw);
diff --git a/include/linux/ieee80211.h b/include/linux/ieee80211.h
index 14126bc36641..64a4abce6d91 100644
--- a/include/linux/ieee80211.h
+++ b/include/linux/ieee80211.h
@@ -685,28 +685,88 @@ struct ieee80211_bar {
 #define IEEE80211_BAR_CTRL_ACK_POLICY_NORMAL     0x0000
 #define IEEE80211_BAR_CTRL_CBMTID_COMPRESSED_BA  0x0004
 
+
+#define IEEE80211_HT_MCS_MASK_LEN		10
+
+/**
+ * struct ieee80211_mcs_info - MCS information
+ * @rx_mask: RX mask
+ * @rx_highest: highest supported RX rate
+ * @tx_params: TX parameters
+ */
+struct ieee80211_mcs_info {
+	u8 rx_mask[IEEE80211_HT_MCS_MASK_LEN];
+	__le16 rx_highest;
+	u8 tx_params;
+	u8 reserved[3];
+} __attribute__((packed));
+
+/* 802.11n HT capability MSC set */
+#define IEEE80211_HT_MCS_RX_HIGHEST_MASK	0x3ff
+#define IEEE80211_HT_MCS_TX_DEFINED		0x01
+#define IEEE80211_HT_MCS_TX_RX_DIFF		0x02
+/* value 0 == 1 stream etc */
+#define IEEE80211_HT_MCS_TX_MAX_STREAMS_MASK	0x0C
+#define IEEE80211_HT_MCS_TX_MAX_STREAMS_SHIFT	2
+#define		IEEE80211_HT_MCS_TX_MAX_STREAMS	4
+#define IEEE80211_HT_MCS_TX_UNEQUAL_MODULATION	0x10
+
+/*
+ * 802.11n D5.0 20.3.5 / 20.6 says:
+ * - indices 0 to 7 and 32 are single spatial stream
+ * - 8 to 31 are multiple spatial streams using equal modulation
+ *   [8..15 for two streams, 16..23 for three and 24..31 for four]
+ * - remainder are multiple spatial streams using unequal modulation
+ */
+#define IEEE80211_HT_MCS_UNEQUAL_MODULATION_START 33
+#define IEEE80211_HT_MCS_UNEQUAL_MODULATION_START_BYTE \
+	(IEEE80211_HT_MCS_UNEQUAL_MODULATION_START / 8)
+
 /**
  * struct ieee80211_ht_cap - HT capabilities
  *
- * This structure refers to "HT capabilities element" as
- * described in 802.11n draft section 7.3.2.52
+ * This structure is the "HT capabilities element" as
+ * described in 802.11n D5.0 7.3.2.57
  */
 struct ieee80211_ht_cap {
 	__le16 cap_info;
 	u8 ampdu_params_info;
-	u8 supp_mcs_set[16];
+
+	/* 16 bytes MCS information */
+	struct ieee80211_mcs_info mcs;
+
 	__le16 extended_ht_cap_info;
 	__le32 tx_BF_cap_info;
 	u8 antenna_selection_info;
 } __attribute__ ((packed));
 
+/* 802.11n HT capabilities masks (for cap_info) */
+#define IEEE80211_HT_CAP_LDPC_CODING		0x0001
+#define IEEE80211_HT_CAP_SUP_WIDTH_20_40	0x0002
+#define IEEE80211_HT_CAP_SM_PS			0x000C
+#define IEEE80211_HT_CAP_GRN_FLD		0x0010
+#define IEEE80211_HT_CAP_SGI_20			0x0020
+#define IEEE80211_HT_CAP_SGI_40			0x0040
+#define IEEE80211_HT_CAP_TX_STBC		0x0080
+#define IEEE80211_HT_CAP_RX_STBC		0x0300
+#define IEEE80211_HT_CAP_DELAY_BA		0x0400
+#define IEEE80211_HT_CAP_MAX_AMSDU		0x0800
+#define IEEE80211_HT_CAP_DSSSCCK40		0x1000
+#define IEEE80211_HT_CAP_PSMP_SUPPORT		0x2000
+#define IEEE80211_HT_CAP_40MHZ_INTOLERANT	0x4000
+#define IEEE80211_HT_CAP_LSIG_TXOP_PROT		0x8000
+
+/* 802.11n HT capability AMPDU settings (for ampdu_params_info) */
+#define IEEE80211_HT_AMPDU_PARM_FACTOR		0x03
+#define IEEE80211_HT_AMPDU_PARM_DENSITY		0x1C
+
 /**
- * struct ieee80211_ht_cap - HT additional information
+ * struct ieee80211_ht_info - HT information
  *
- * This structure refers to "HT information element" as
- * described in 802.11n draft section 7.3.2.53
+ * This structure is the "HT information element" as
+ * described in 802.11n D5.0 7.3.2.58
  */
-struct ieee80211_ht_addt_info {
+struct ieee80211_ht_info {
 	u8 control_chan;
 	u8 ht_param;
 	__le16 operation_mode;
@@ -714,36 +774,33 @@ struct ieee80211_ht_addt_info {
 	u8 basic_set[16];
 } __attribute__ ((packed));
 
-/* 802.11n HT capabilities masks */
-#define IEEE80211_HT_CAP_SUP_WIDTH		0x0002
-#define IEEE80211_HT_CAP_SM_PS			0x000C
-#define IEEE80211_HT_CAP_GRN_FLD		0x0010
-#define IEEE80211_HT_CAP_SGI_20			0x0020
-#define IEEE80211_HT_CAP_SGI_40			0x0040
-#define IEEE80211_HT_CAP_DELAY_BA		0x0400
-#define IEEE80211_HT_CAP_MAX_AMSDU		0x0800
-#define IEEE80211_HT_CAP_DSSSCCK40		0x1000
-/* 802.11n HT capability AMPDU settings */
-#define IEEE80211_HT_CAP_AMPDU_FACTOR		0x03
-#define IEEE80211_HT_CAP_AMPDU_DENSITY		0x1C
-/* 802.11n HT capability MSC set */
-#define IEEE80211_SUPP_MCS_SET_UEQM		4
-#define IEEE80211_HT_CAP_MAX_STREAMS		4
-#define IEEE80211_SUPP_MCS_SET_LEN		10
-/* maximum streams the spec allows */
-#define IEEE80211_HT_CAP_MCS_TX_DEFINED		0x01
-#define IEEE80211_HT_CAP_MCS_TX_RX_DIFF		0x02
-#define IEEE80211_HT_CAP_MCS_TX_STREAMS		0x0C
-#define IEEE80211_HT_CAP_MCS_TX_UEQM		0x10
-/* 802.11n HT IE masks */
-#define IEEE80211_HT_IE_CHA_SEC_OFFSET		0x03
-#define IEEE80211_HT_IE_CHA_SEC_NONE	 	0x00
-#define IEEE80211_HT_IE_CHA_SEC_ABOVE 		0x01
-#define IEEE80211_HT_IE_CHA_SEC_BELOW 		0x03
-#define IEEE80211_HT_IE_CHA_WIDTH		0x04
-#define IEEE80211_HT_IE_HT_PROTECTION		0x0003
-#define IEEE80211_HT_IE_NON_GF_STA_PRSNT	0x0004
-#define IEEE80211_HT_IE_NON_HT_STA_PRSNT	0x0010
+/* for ht_param */
+#define IEEE80211_HT_PARAM_CHA_SEC_OFFSET		0x03
+#define		IEEE80211_HT_PARAM_CHA_SEC_NONE		0x00
+#define		IEEE80211_HT_PARAM_CHA_SEC_ABOVE	0x01
+#define		IEEE80211_HT_PARAM_CHA_SEC_BELOW	0x03
+#define IEEE80211_HT_PARAM_CHAN_WIDTH_ANY		0x04
+#define IEEE80211_HT_PARAM_RIFS_MODE			0x08
+#define IEEE80211_HT_PARAM_SPSMP_SUPPORT		0x10
+#define IEEE80211_HT_PARAM_SERV_INTERVAL_GRAN		0xE0
+
+/* for operation_mode */
+#define IEEE80211_HT_OP_MODE_PROTECTION			0x0003
+#define		IEEE80211_HT_OP_MODE_PROTECTION_NONE		0
+#define		IEEE80211_HT_OP_MODE_PROTECTION_NONMEMBER	1
+#define		IEEE80211_HT_OP_MODE_PROTECTION_20MHZ		2
+#define		IEEE80211_HT_OP_MODE_PROTECTION_NONHT_MIXED	3
+#define IEEE80211_HT_OP_MODE_NON_GF_STA_PRSNT		0x0004
+#define IEEE80211_HT_OP_MODE_NON_HT_STA_PRSNT		0x0010
+
+/* for stbc_param */
+#define IEEE80211_HT_STBC_PARAM_DUAL_BEACON		0x0040
+#define IEEE80211_HT_STBC_PARAM_DUAL_CTS_PROT		0x0080
+#define IEEE80211_HT_STBC_PARAM_STBC_BEACON		0x0100
+#define IEEE80211_HT_STBC_PARAM_LSIG_TXOP_FULLPROT	0x0200
+#define IEEE80211_HT_STBC_PARAM_PCO_ACTIVE		0x0400
+#define IEEE80211_HT_STBC_PARAM_PCO_PHASE		0x0800
+
 
 /* block-ack parameters */
 #define IEEE80211_ADDBA_PARAM_POLICY_MASK 0x0002
@@ -949,7 +1006,7 @@ enum ieee80211_eid {
 	WLAN_EID_EXT_SUPP_RATES = 50,
 	/* 802.11n */
 	WLAN_EID_HT_CAPABILITY = 45,
-	WLAN_EID_HT_EXTRA_INFO = 61,
+	WLAN_EID_HT_INFORMATION = 61,
 	/* 802.11i */
 	WLAN_EID_RSN = 48,
 	WLAN_EID_WPA = 221,
diff --git a/include/net/mac80211.h b/include/net/mac80211.h
index d1466e7a47b8..2870f3973f1a 100644
--- a/include/net/mac80211.h
+++ b/include/net/mac80211.h
@@ -191,7 +191,7 @@ enum ieee80211_bss_change {
  * @beacon_int: beacon interval
  * @assoc_capability: capabilities taken from assoc resp
  * @assoc_ht: association in HT mode
- * @ht_conf: ht capabilities
+ * @ht_cap: ht capabilities
  * @ht_bss_conf: ht extended capabilities
  * @basic_rates: bitmap of basic rates, each bit stands for an
  *	index into the rate table configured by the driver in
@@ -212,7 +212,7 @@ struct ieee80211_bss_conf {
 	u64 basic_rates;
 	/* ht related data */
 	bool assoc_ht;
-	struct ieee80211_ht_info *ht_conf;
+	struct ieee80211_sta_ht_cap *ht_cap;
 	struct ieee80211_ht_bss_info *ht_bss_conf;
 };
 
@@ -477,7 +477,7 @@ static inline int __deprecated __IEEE80211_CONF_SHORT_SLOT_TIME(void)
  * @antenna_sel_tx: transmit antenna selection, 0: default/diversity,
  *	1/2: antenna 0/1
  * @antenna_sel_rx: receive antenna selection, like @antenna_sel_tx
- * @ht_conf: describes current self configuration of 802.11n HT capabilies
+ * @ht_cap: describes current self configuration of 802.11n HT capabilities
  * @ht_bss_conf: describes current BSS configuration of 802.11n HT parameters
  * @channel: the channel to tune to
  */
@@ -493,7 +493,7 @@ struct ieee80211_conf {
 
 	struct ieee80211_channel *channel;
 
-	struct ieee80211_ht_info ht_conf;
+	struct ieee80211_sta_ht_cap ht_cap;
 	struct ieee80211_ht_bss_info ht_bss_conf;
 };
 
@@ -687,7 +687,7 @@ enum set_key_cmd {
  * @addr: MAC address
  * @aid: AID we assigned to the station if we're an AP
  * @supp_rates: Bitmap of supported rates (per band)
- * @ht_info: HT capabilities of this STA
+ * @ht_cap: HT capabilities of this STA
  * @drv_priv: data area for driver use, will always be aligned to
  *	sizeof(void *), size is determined in hw information.
  */
@@ -695,7 +695,7 @@ struct ieee80211_sta {
 	u64 supp_rates[IEEE80211_NUM_BANDS];
 	u8 addr[ETH_ALEN];
 	u16 aid;
-	struct ieee80211_ht_info ht_info;
+	struct ieee80211_sta_ht_cap ht_cap;
 
 	/* must be last */
 	u8 drv_priv[0] __attribute__((__aligned__(sizeof(void *))));
diff --git a/include/net/wireless.h b/include/net/wireless.h
index 721efb363db7..c0aa0fb8db5e 100644
--- a/include/net/wireless.h
+++ b/include/net/wireless.h
@@ -10,6 +10,7 @@
 #include <linux/netdevice.h>
 #include <linux/debugfs.h>
 #include <linux/list.h>
+#include <linux/ieee80211.h>
 #include <net/cfg80211.h>
 
 /**
@@ -133,23 +134,23 @@ struct ieee80211_rate {
 };
 
 /**
- * struct ieee80211_ht_info - describing STA's HT capabilities
+ * struct ieee80211_sta_ht_cap - STA's HT capabilities
  *
  * This structure describes most essential parameters needed
  * to describe 802.11n HT capabilities for an STA.
  *
- * @ht_supported: is HT supported by STA, 0: no, 1: yes
+ * @ht_supported: is HT supported by the STA
  * @cap: HT capabilities map as described in 802.11n spec
  * @ampdu_factor: Maximum A-MPDU length factor
  * @ampdu_density: Minimum A-MPDU spacing
- * @supp_mcs_set: Supported MCS set as described in 802.11n spec
+ * @mcs: Supported MCS rates
  */
-struct ieee80211_ht_info {
+struct ieee80211_sta_ht_cap {
 	u16 cap; /* use IEEE80211_HT_CAP_ */
-	u8 ht_supported;
+	bool ht_supported;
 	u8 ampdu_factor;
 	u8 ampdu_density;
-	u8 supp_mcs_set[16];
+	struct ieee80211_mcs_info mcs;
 };
 
 /**
@@ -173,7 +174,7 @@ struct ieee80211_supported_band {
 	enum ieee80211_band band;
 	int n_channels;
 	int n_bitrates;
-	struct ieee80211_ht_info ht_info;
+	struct ieee80211_sta_ht_cap ht_cap;
 };
 
 /**
diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c
index cf3fd5d60665..a5dea617aab3 100644
--- a/net/mac80211/cfg.c
+++ b/net/mac80211/cfg.c
@@ -633,10 +633,9 @@ static void sta_apply_parameters(struct ieee80211_local *local,
 		sta->sta.supp_rates[local->oper_channel->band] = rates;
 	}
 
-	if (params->ht_capa) {
-		ieee80211_ht_cap_ie_to_ht_info(params->ht_capa,
-					       &sta->sta.ht_info);
-	}
+	if (params->ht_capa)
+		ieee80211_ht_cap_ie_to_sta_ht_cap(params->ht_capa,
+						  &sta->sta.ht_cap);
 
 	if (ieee80211_vif_is_mesh(&sdata->vif) && params->plink_action) {
 		switch (params->plink_action) {
diff --git a/net/mac80211/ht.c b/net/mac80211/ht.c
index b854483cf23f..e2d121bf2745 100644
--- a/net/mac80211/ht.c
+++ b/net/mac80211/ht.c
@@ -20,37 +20,33 @@
 #include "sta_info.h"
 #include "wme.h"
 
-int ieee80211_ht_cap_ie_to_ht_info(struct ieee80211_ht_cap *ht_cap_ie,
-				   struct ieee80211_ht_info *ht_info)
+void ieee80211_ht_cap_ie_to_sta_ht_cap(struct ieee80211_ht_cap *ht_cap_ie,
+				       struct ieee80211_sta_ht_cap *ht_cap)
 {
 
-	if (ht_info == NULL)
-		return -EINVAL;
+	BUG_ON(!ht_cap);
 
-	memset(ht_info, 0, sizeof(*ht_info));
+	memset(ht_cap, 0, sizeof(*ht_cap));
 
 	if (ht_cap_ie) {
 		u8 ampdu_info = ht_cap_ie->ampdu_params_info;
 
-		ht_info->ht_supported = 1;
-		ht_info->cap = le16_to_cpu(ht_cap_ie->cap_info);
-		ht_info->ampdu_factor =
-			ampdu_info & IEEE80211_HT_CAP_AMPDU_FACTOR;
-		ht_info->ampdu_density =
-			(ampdu_info & IEEE80211_HT_CAP_AMPDU_DENSITY) >> 2;
-		memcpy(ht_info->supp_mcs_set, ht_cap_ie->supp_mcs_set, 16);
+		ht_cap->ht_supported = true;
+		ht_cap->cap = le16_to_cpu(ht_cap_ie->cap_info);
+		ht_cap->ampdu_factor =
+			ampdu_info & IEEE80211_HT_AMPDU_PARM_FACTOR;
+		ht_cap->ampdu_density =
+			(ampdu_info & IEEE80211_HT_AMPDU_PARM_DENSITY) >> 2;
+		memcpy(&ht_cap->mcs, &ht_cap_ie->mcs, sizeof(ht_cap->mcs));
 	} else
-		ht_info->ht_supported = 0;
-
-	return 0;
+		ht_cap->ht_supported = false;
 }
 
-int ieee80211_ht_addt_info_ie_to_ht_bss_info(
-			struct ieee80211_ht_addt_info *ht_add_info_ie,
+void ieee80211_ht_info_ie_to_ht_bss_info(
+			struct ieee80211_ht_info *ht_add_info_ie,
 			struct ieee80211_ht_bss_info *bss_info)
 {
-	if (bss_info == NULL)
-		return -EINVAL;
+	BUG_ON(!bss_info);
 
 	memset(bss_info, 0, sizeof(*bss_info));
 
@@ -62,8 +58,119 @@ int ieee80211_ht_addt_info_ie_to_ht_bss_info(
 		bss_info->bss_cap = ht_add_info_ie->ht_param;
 		bss_info->bss_op_mode = (u8)(op_mode & 0xff);
 	}
+}
+
+/*
+ * ieee80211_handle_ht should be called only after the operating band
+ * has been determined as ht configuration depends on the hw's
+ * HT abilities for a specific band.
+ */
+u32 ieee80211_handle_ht(struct ieee80211_local *local,
+			struct ieee80211_sta_ht_cap *req_ht_cap,
+			struct ieee80211_ht_bss_info *req_bss_cap)
+{
+	struct ieee80211_conf *conf = &local->hw.conf;
+	struct ieee80211_supported_band *sband;
+	struct ieee80211_sta_ht_cap ht_cap;
+	struct ieee80211_ht_bss_info ht_bss_conf;
+	u32 changed = 0;
+	int i;
+	u8 max_tx_streams;
+	u8 tx_mcs_set_cap;
+	bool enable_ht = true;
+
+	sband = local->hw.wiphy->bands[conf->channel->band];
+
+	memset(&ht_cap, 0, sizeof(ht_cap));
+	memset(&ht_bss_conf, 0, sizeof(struct ieee80211_ht_bss_info));
+
+	/* HT is not supported */
+	if (!sband->ht_cap.ht_supported)
+		enable_ht = false;
+
+	/* disable HT */
+	if (!enable_ht) {
+		if (conf->flags & IEEE80211_CONF_SUPPORT_HT_MODE)
+			changed |= BSS_CHANGED_HT;
+		conf->flags &= ~IEEE80211_CONF_SUPPORT_HT_MODE;
+		conf->ht_cap.ht_supported = false;
+		return changed;
+	}
+
+
+	if (!(conf->flags & IEEE80211_CONF_SUPPORT_HT_MODE))
+		changed |= BSS_CHANGED_HT;
+
+	conf->flags |= IEEE80211_CONF_SUPPORT_HT_MODE;
+	ht_cap.ht_supported = true;
+
+	ht_cap.cap = req_ht_cap->cap & sband->ht_cap.cap;
+	ht_cap.cap &= ~IEEE80211_HT_CAP_SM_PS;
+	ht_cap.cap |= sband->ht_cap.cap & IEEE80211_HT_CAP_SM_PS;
+
+	ht_bss_conf.primary_channel = req_bss_cap->primary_channel;
+	ht_bss_conf.bss_cap = req_bss_cap->bss_cap;
+	ht_bss_conf.bss_op_mode = req_bss_cap->bss_op_mode;
+
+	ht_cap.ampdu_factor = req_ht_cap->ampdu_factor;
+	ht_cap.ampdu_density = req_ht_cap->ampdu_density;
+
+	/* own MCS TX capabilities */
+	tx_mcs_set_cap = sband->ht_cap.mcs.tx_params;
+
+	/*
+	 * configure supported Tx MCS according to requested MCS
+	 * (based in most cases on Rx capabilities of peer) and self
+	 * Tx MCS capabilities (as defined by low level driver HW
+	 * Tx capabilities)
+	 */
+
+	/* can we TX with MCS rates? */
+	if (!(tx_mcs_set_cap & IEEE80211_HT_MCS_TX_DEFINED))
+		goto check_changed;
+
+	/* Counting from 0, therefore +1 */
+	if (tx_mcs_set_cap & IEEE80211_HT_MCS_TX_RX_DIFF)
+		max_tx_streams =
+			((tx_mcs_set_cap & IEEE80211_HT_MCS_TX_MAX_STREAMS_MASK)
+				>> IEEE80211_HT_MCS_TX_MAX_STREAMS_SHIFT) + 1;
+	else
+		max_tx_streams = IEEE80211_HT_MCS_TX_MAX_STREAMS;
+
+	/*
+	 * 802.11n D5.0 20.3.5 / 20.6 says:
+	 * - indices 0 to 7 and 32 are single spatial stream
+	 * - 8 to 31 are multiple spatial streams using equal modulation
+	 *   [8..15 for two streams, 16..23 for three and 24..31 for four]
+	 * - remainder are multiple spatial streams using unequal modulation
+	 */
+	for (i = 0; i < max_tx_streams; i++)
+		ht_cap.mcs.rx_mask[i] =
+			sband->ht_cap.mcs.rx_mask[i] &
+					req_ht_cap->mcs.rx_mask[i];
+
+	if (tx_mcs_set_cap & IEEE80211_HT_MCS_TX_UNEQUAL_MODULATION)
+		for (i = IEEE80211_HT_MCS_UNEQUAL_MODULATION_START_BYTE;
+		     i < IEEE80211_HT_MCS_MASK_LEN; i++)
+			ht_cap.mcs.rx_mask[i] =
+				sband->ht_cap.mcs.rx_mask[i] &
+					req_ht_cap->mcs.rx_mask[i];
+
+	/* handle MCS rate 32 too */
+	if (sband->ht_cap.mcs.rx_mask[32/8] &
+	    req_ht_cap->mcs.rx_mask[32/8] & 1)
+		ht_cap.mcs.rx_mask[32/8] |= 1;
+
+ check_changed:
+	/* if bss configuration changed store the new one */
+	if (memcmp(&conf->ht_cap, &ht_cap, sizeof(ht_cap)) ||
+	    memcmp(&conf->ht_bss_conf, &ht_bss_conf, sizeof(ht_bss_conf))) {
+		changed |= BSS_CHANGED_HT;
+		memcpy(&conf->ht_cap, &ht_cap, sizeof(ht_cap));
+		memcpy(&conf->ht_bss_conf, &ht_bss_conf, sizeof(ht_bss_conf));
+	}
 
-	return 0;
+	return changed;
 }
 
 static void ieee80211_send_addba_request(struct ieee80211_sub_if_data *sdata,
@@ -794,7 +901,7 @@ void ieee80211_process_addba_request(struct ieee80211_local *local,
 	 * check if configuration can support the BA policy
 	 * and if buffer size does not exceeds max value */
 	if (((ba_policy != 1)
-		&& (!(conf->ht_conf.cap & IEEE80211_HT_CAP_DELAY_BA)))
+		&& (!(conf->ht_cap.cap & IEEE80211_HT_CAP_DELAY_BA)))
 		|| (buf_size > IEEE80211_MAX_AMPDU_BUF)) {
 		status = WLAN_STATUS_INVALID_QOS_PARAM;
 #ifdef CONFIG_MAC80211_HT_DEBUG
@@ -812,7 +919,7 @@ void ieee80211_process_addba_request(struct ieee80211_local *local,
 
 		sband = local->hw.wiphy->bands[conf->channel->band];
 		buf_size = IEEE80211_MIN_AMPDU_BUF;
-		buf_size = buf_size << sband->ht_info.ampdu_factor;
+		buf_size = buf_size << sband->ht_cap.ampdu_factor;
 	}
 
 
diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h
index f49cb9e8bb42..ae4ca3e8b443 100644
--- a/net/mac80211/ieee80211_i.h
+++ b/net/mac80211/ieee80211_i.h
@@ -817,7 +817,7 @@ struct ieee802_11_elems {
 	u8 *wmm_info;
 	u8 *wmm_param;
 	struct ieee80211_ht_cap *ht_cap_elem;
-	struct ieee80211_ht_addt_info *ht_info_elem;
+	struct ieee80211_ht_info *ht_info_elem;
 	u8 *mesh_config;
 	u8 *mesh_id;
 	u8 *peer_link;
@@ -880,9 +880,6 @@ static inline int ieee80211_bssid_match(const u8 *raddr, const u8 *addr)
 int ieee80211_hw_config(struct ieee80211_local *local);
 int ieee80211_if_config(struct ieee80211_sub_if_data *sdata, u32 changed);
 void ieee80211_tx_set_protected(struct ieee80211_tx_data *tx);
-u32 ieee80211_handle_ht(struct ieee80211_local *local, int enable_ht,
-			struct ieee80211_ht_info *req_ht_cap,
-			struct ieee80211_ht_bss_info *req_bss_cap);
 void ieee80211_bss_info_change_notify(struct ieee80211_sub_if_data *sdata,
 				      u32 changed);
 void ieee80211_configure_filter(struct ieee80211_local *local);
@@ -963,11 +960,14 @@ int ieee80211_monitor_start_xmit(struct sk_buff *skb, struct net_device *dev);
 int ieee80211_subif_start_xmit(struct sk_buff *skb, struct net_device *dev);
 
 /* HT */
-int ieee80211_ht_cap_ie_to_ht_info(struct ieee80211_ht_cap *ht_cap_ie,
-				   struct ieee80211_ht_info *ht_info);
-int ieee80211_ht_addt_info_ie_to_ht_bss_info(
-			struct ieee80211_ht_addt_info *ht_add_info_ie,
+void ieee80211_ht_cap_ie_to_sta_ht_cap(struct ieee80211_ht_cap *ht_cap_ie,
+				       struct ieee80211_sta_ht_cap *ht_cap);
+void ieee80211_ht_info_ie_to_ht_bss_info(
+			struct ieee80211_ht_info *ht_add_info_ie,
 			struct ieee80211_ht_bss_info *bss_info);
+u32 ieee80211_handle_ht(struct ieee80211_local *local,
+			struct ieee80211_sta_ht_cap *req_ht_cap,
+			struct ieee80211_ht_bss_info *req_bss_cap);
 void ieee80211_send_bar(struct ieee80211_sub_if_data *sdata, u8 *ra, u16 tid, u16 ssn);
 
 void ieee80211_sta_stop_rx_ba_session(struct ieee80211_sub_if_data *sdata, u8 *da,
diff --git a/net/mac80211/main.c b/net/mac80211/main.c
index c427954fe8e8..07f812755e55 100644
--- a/net/mac80211/main.c
+++ b/net/mac80211/main.c
@@ -232,100 +232,6 @@ int ieee80211_hw_config(struct ieee80211_local *local)
 	return ret;
 }
 
-/**
- * ieee80211_handle_ht should be used only after legacy configuration
- * has been determined namely band, as ht configuration depends upon
- * the hardware's HT abilities for a _specific_ band.
- */
-u32 ieee80211_handle_ht(struct ieee80211_local *local, int enable_ht,
-			   struct ieee80211_ht_info *req_ht_cap,
-			   struct ieee80211_ht_bss_info *req_bss_cap)
-{
-	struct ieee80211_conf *conf = &local->hw.conf;
-	struct ieee80211_supported_band *sband;
-	struct ieee80211_ht_info ht_conf;
-	struct ieee80211_ht_bss_info ht_bss_conf;
-	u32 changed = 0;
-	int i;
-	u8 max_tx_streams = IEEE80211_HT_CAP_MAX_STREAMS;
-	u8 tx_mcs_set_cap;
-
-	sband = local->hw.wiphy->bands[conf->channel->band];
-
-	memset(&ht_conf, 0, sizeof(struct ieee80211_ht_info));
-	memset(&ht_bss_conf, 0, sizeof(struct ieee80211_ht_bss_info));
-
-	/* HT is not supported */
-	if (!sband->ht_info.ht_supported) {
-		conf->flags &= ~IEEE80211_CONF_SUPPORT_HT_MODE;
-		goto out;
-	}
-
-	/* disable HT */
-	if (!enable_ht) {
-		if (conf->flags & IEEE80211_CONF_SUPPORT_HT_MODE)
-			changed |= BSS_CHANGED_HT;
-		conf->flags &= ~IEEE80211_CONF_SUPPORT_HT_MODE;
-		conf->ht_conf.ht_supported = 0;
-		goto out;
-	}
-
-
-	if (!(conf->flags & IEEE80211_CONF_SUPPORT_HT_MODE))
-		changed |= BSS_CHANGED_HT;
-
-	conf->flags |= IEEE80211_CONF_SUPPORT_HT_MODE;
-	ht_conf.ht_supported = 1;
-
-	ht_conf.cap = req_ht_cap->cap & sband->ht_info.cap;
-	ht_conf.cap &= ~(IEEE80211_HT_CAP_SM_PS);
-	ht_conf.cap |= sband->ht_info.cap & IEEE80211_HT_CAP_SM_PS;
-	ht_bss_conf.primary_channel = req_bss_cap->primary_channel;
-	ht_bss_conf.bss_cap = req_bss_cap->bss_cap;
-	ht_bss_conf.bss_op_mode = req_bss_cap->bss_op_mode;
-
-	ht_conf.ampdu_factor = req_ht_cap->ampdu_factor;
-	ht_conf.ampdu_density = req_ht_cap->ampdu_density;
-
-	/* Bits 96-100 */
-	tx_mcs_set_cap = sband->ht_info.supp_mcs_set[12];
-
-	/* configure suppoerted Tx MCS according to requested MCS
-	 * (based in most cases on Rx capabilities of peer) and self
-	 * Tx MCS capabilities (as defined by low level driver HW
-	 * Tx capabilities) */
-	if (!(tx_mcs_set_cap & IEEE80211_HT_CAP_MCS_TX_DEFINED))
-		goto check_changed;
-
-	/* Counting from 0 therfore + 1 */
-	if (tx_mcs_set_cap & IEEE80211_HT_CAP_MCS_TX_RX_DIFF)
-		max_tx_streams = ((tx_mcs_set_cap &
-				IEEE80211_HT_CAP_MCS_TX_STREAMS) >> 2) + 1;
-
-	for (i = 0; i < max_tx_streams; i++)
-		ht_conf.supp_mcs_set[i] =
-			sband->ht_info.supp_mcs_set[i] &
-					req_ht_cap->supp_mcs_set[i];
-
-	if (tx_mcs_set_cap & IEEE80211_HT_CAP_MCS_TX_UEQM)
-		for (i = IEEE80211_SUPP_MCS_SET_UEQM;
-		     i < IEEE80211_SUPP_MCS_SET_LEN; i++)
-			ht_conf.supp_mcs_set[i] =
-				sband->ht_info.supp_mcs_set[i] &
-					req_ht_cap->supp_mcs_set[i];
-
-check_changed:
-	/* if bss configuration changed store the new one */
-	if (memcmp(&conf->ht_conf, &ht_conf, sizeof(ht_conf)) ||
-	    memcmp(&conf->ht_bss_conf, &ht_bss_conf, sizeof(ht_bss_conf))) {
-		changed |= BSS_CHANGED_HT;
-		memcpy(&conf->ht_conf, &ht_conf, sizeof(ht_conf));
-		memcpy(&conf->ht_bss_conf, &ht_bss_conf, sizeof(ht_bss_conf));
-	}
-out:
-	return changed;
-}
-
 void ieee80211_bss_info_change_notify(struct ieee80211_sub_if_data *sdata,
 				      u32 changed)
 {
diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c
index 829995e740a7..196dd39f6286 100644
--- a/net/mac80211/mlme.c
+++ b/net/mac80211/mlme.c
@@ -236,7 +236,7 @@ static void ieee80211_send_assoc(struct ieee80211_sub_if_data *sdata,
 	struct ieee80211_local *local = sdata->local;
 	struct sk_buff *skb;
 	struct ieee80211_mgmt *mgmt;
-	u8 *pos, *ies, *ht_add_ie;
+	u8 *pos, *ies, *ht_ie;
 	int i, len, count, rates_len, supp_rates_len;
 	u16 capab;
 	struct ieee80211_bss *bss;
@@ -393,24 +393,25 @@ static void ieee80211_send_assoc(struct ieee80211_sub_if_data *sdata,
 
 	/* wmm support is a must to HT */
 	if (wmm && (ifsta->flags & IEEE80211_STA_WMM_ENABLED) &&
-	    sband->ht_info.ht_supported &&
-	    (ht_add_ie = ieee80211_bss_get_ie(bss, WLAN_EID_HT_EXTRA_INFO))) {
-		struct ieee80211_ht_addt_info *ht_add_info =
-			(struct ieee80211_ht_addt_info *)ht_add_ie;
-		u16 cap = sband->ht_info.cap;
+	    sband->ht_cap.ht_supported &&
+	    (ht_ie = ieee80211_bss_get_ie(bss, WLAN_EID_HT_INFORMATION)) &&
+	    ht_ie[1] >= sizeof(struct ieee80211_ht_info)) {
+		struct ieee80211_ht_info *ht_info =
+			(struct ieee80211_ht_info *)(ht_ie + 2);
+		u16 cap = sband->ht_cap.cap;
 		__le16 tmp;
 		u32 flags = local->hw.conf.channel->flags;
 
-		switch (ht_add_info->ht_param & IEEE80211_HT_IE_CHA_SEC_OFFSET) {
-		case IEEE80211_HT_IE_CHA_SEC_ABOVE:
+		switch (ht_info->ht_param & IEEE80211_HT_PARAM_CHA_SEC_OFFSET) {
+		case IEEE80211_HT_PARAM_CHA_SEC_ABOVE:
 			if (flags & IEEE80211_CHAN_NO_FAT_ABOVE) {
-				cap &= ~IEEE80211_HT_CAP_SUP_WIDTH;
+				cap &= ~IEEE80211_HT_CAP_SUP_WIDTH_20_40;
 				cap &= ~IEEE80211_HT_CAP_SGI_40;
 			}
 			break;
-		case IEEE80211_HT_IE_CHA_SEC_BELOW:
+		case IEEE80211_HT_PARAM_CHA_SEC_BELOW:
 			if (flags & IEEE80211_CHAN_NO_FAT_BELOW) {
-				cap &= ~IEEE80211_HT_CAP_SUP_WIDTH;
+				cap &= ~IEEE80211_HT_CAP_SUP_WIDTH_20_40;
 				cap &= ~IEEE80211_HT_CAP_SGI_40;
 			}
 			break;
@@ -424,9 +425,9 @@ static void ieee80211_send_assoc(struct ieee80211_sub_if_data *sdata,
 		memcpy(pos, &tmp, sizeof(u16));
 		pos += sizeof(u16);
 		/* TODO: needs a define here for << 2 */
-		*pos++ = sband->ht_info.ampdu_factor |
-			 (sband->ht_info.ampdu_density << 2);
-		memcpy(pos, sband->ht_info.supp_mcs_set, 16);
+		*pos++ = sband->ht_cap.ampdu_factor |
+			 (sband->ht_cap.ampdu_density << 2);
+		memcpy(pos, &sband->ht_cap.mcs, sizeof(sband->ht_cap.mcs));
 	}
 
 	kfree(ifsta->assocreq_ies);
@@ -730,7 +731,7 @@ static void ieee80211_set_associated(struct ieee80211_sub_if_data *sdata,
 	if (conf->flags & IEEE80211_CONF_SUPPORT_HT_MODE) {
 		changed |= BSS_CHANGED_HT;
 		sdata->bss_conf.assoc_ht = 1;
-		sdata->bss_conf.ht_conf = &conf->ht_conf;
+		sdata->bss_conf.ht_cap = &conf->ht_cap;
 		sdata->bss_conf.ht_bss_conf = &conf->ht_bss_conf;
 	}
 
@@ -850,7 +851,7 @@ static void ieee80211_set_disassoc(struct ieee80211_sub_if_data *sdata,
 		changed |= BSS_CHANGED_HT;
 
 	sdata->bss_conf.assoc_ht = 0;
-	sdata->bss_conf.ht_conf = NULL;
+	sdata->bss_conf.ht_cap = NULL;
 	sdata->bss_conf.ht_bss_conf = NULL;
 
 	ieee80211_led_assoc(local, 0);
@@ -1335,11 +1336,11 @@ static void ieee80211_rx_mgmt_assoc_resp(struct ieee80211_sub_if_data *sdata,
 	if (elems.ht_cap_elem && elems.ht_info_elem && elems.wmm_param &&
 	    (ifsta->flags & IEEE80211_STA_WMM_ENABLED)) {
 		struct ieee80211_ht_bss_info bss_info;
-		ieee80211_ht_cap_ie_to_ht_info(
-				elems.ht_cap_elem, &sta->sta.ht_info);
-		ieee80211_ht_addt_info_ie_to_ht_bss_info(
+		ieee80211_ht_cap_ie_to_sta_ht_cap(
+				elems.ht_cap_elem, &sta->sta.ht_cap);
+		ieee80211_ht_info_ie_to_ht_bss_info(
 				elems.ht_info_elem, &bss_info);
-		ieee80211_handle_ht(local, 1, &sta->sta.ht_info, &bss_info);
+		ieee80211_handle_ht(local, &sta->sta.ht_cap, &bss_info);
 	}
 
 	rate_control_rate_init(sta);
@@ -1696,9 +1697,9 @@ static void ieee80211_rx_mgmt_beacon(struct ieee80211_sub_if_data *sdata,
 	    elems.wmm_param && conf->flags & IEEE80211_CONF_SUPPORT_HT_MODE) {
 		struct ieee80211_ht_bss_info bss_info;
 
-		ieee80211_ht_addt_info_ie_to_ht_bss_info(
+		ieee80211_ht_info_ie_to_ht_bss_info(
 				elems.ht_info_elem, &bss_info);
-		changed |= ieee80211_handle_ht(local, 1, &conf->ht_conf,
+		changed |= ieee80211_handle_ht(local, &conf->ht_cap,
 					       &bss_info);
 	}
 
diff --git a/net/mac80211/util.c b/net/mac80211/util.c
index 1b605457017e..9941a60a2327 100644
--- a/net/mac80211/util.c
+++ b/net/mac80211/util.c
@@ -532,8 +532,8 @@ void ieee802_11_parse_elems(u8 *start, size_t len,
 			if (elen >= sizeof(struct ieee80211_ht_cap))
 				elems->ht_cap_elem = (void *)pos;
 			break;
-		case WLAN_EID_HT_EXTRA_INFO:
-			if (elen >= sizeof(struct ieee80211_ht_addt_info))
+		case WLAN_EID_HT_INFORMATION:
+			if (elen >= sizeof(struct ieee80211_ht_info))
 				elems->ht_info_elem = (void *)pos;
 			break;
 		case WLAN_EID_MESH_ID:
diff --git a/net/mac80211/wext.c b/net/mac80211/wext.c
index 29c41040c8c9..a3af15141244 100644
--- a/net/mac80211/wext.c
+++ b/net/mac80211/wext.c
@@ -147,7 +147,7 @@ static int ieee80211_ioctl_giwname(struct net_device *dev,
 	sband = local->hw.wiphy->bands[IEEE80211_BAND_5GHZ];
 	if (sband) {
 		is_a = 1;
-		is_ht |= sband->ht_info.ht_supported;
+		is_ht |= sband->ht_cap.ht_supported;
 	}
 
 	sband = local->hw.wiphy->bands[IEEE80211_BAND_2GHZ];
@@ -160,7 +160,7 @@ static int ieee80211_ioctl_giwname(struct net_device *dev,
 			if (sband->bitrates[i].bitrate == 60)
 				is_g = 1;
 		}
-		is_ht |= sband->ht_info.ht_supported;
+		is_ht |= sband->ht_cap.ht_supported;
 	}
 
 	strcpy(name, "IEEE 802.11");
-- 
cgit v1.2.3


From 0f4ac38b5999c3d51adad52d61c56c1b99c247ec Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes@sipsolutions.net>
Date: Thu, 9 Oct 2008 12:18:04 +0200
Subject: mac80211: kill hw.conf.antenna_sel_{rx,tx}

Never actually used.

Signed-off-by: Johannes Berg <johannes@sipsolutions.net>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 drivers/net/wireless/b43/main.c            | 25 +++----------------------
 drivers/net/wireless/b43legacy/main.c      | 18 ++----------------
 drivers/net/wireless/p54/p54common.c       |  3 +--
 drivers/net/wireless/rt2x00/rt2x00config.c | 20 ++++----------------
 drivers/net/wireless/rt2x00/rt2x00dev.c    |  6 ++----
 include/net/mac80211.h                     |  7 +------
 net/mac80211/debugfs.c                     |  8 --------
 net/mac80211/ieee80211_i.h                 |  2 --
 net/mac80211/tx.c                          |  1 -
 9 files changed, 13 insertions(+), 77 deletions(-)

(limited to 'include')

diff --git a/drivers/net/wireless/b43/main.c b/drivers/net/wireless/b43/main.c
index 6e773018cba0..6b85428b0e1d 100644
--- a/drivers/net/wireless/b43/main.c
+++ b/drivers/net/wireless/b43/main.c
@@ -1339,25 +1339,6 @@ u8 b43_ieee80211_antenna_sanitize(struct b43_wldev *dev,
 	return antenna_nr;
 }
 
-static int b43_antenna_from_ieee80211(struct b43_wldev *dev, u8 antenna)
-{
-	antenna = b43_ieee80211_antenna_sanitize(dev, antenna);
-	switch (antenna) {
-	case 0:		/* default/diversity */
-		return B43_ANTENNA_DEFAULT;
-	case 1:		/* Antenna 0 */
-		return B43_ANTENNA0;
-	case 2:		/* Antenna 1 */
-		return B43_ANTENNA1;
-	case 3:		/* Antenna 2 */
-		return B43_ANTENNA2;
-	case 4:		/* Antenna 3 */
-		return B43_ANTENNA3;
-	default:
-		return B43_ANTENNA_DEFAULT;
-	}
-}
-
 /* Convert a b43 antenna number value to the PHY TX control value. */
 static u16 b43_antenna_to_phyctl(int antenna)
 {
@@ -1399,7 +1380,7 @@ static void b43_write_beacon_template(struct b43_wldev *dev,
 				  len, ram_offset, shm_size_offset, rate);
 
 	/* Write the PHY TX control parameters. */
-	antenna = b43_antenna_from_ieee80211(dev, info->antenna_sel_tx);
+	antenna = B43_ANTENNA_DEFAULT;
 	antenna = b43_antenna_to_phyctl(antenna);
 	ctl = b43_shm_read16(dev, B43_SHM_SHARED, B43_SHM_SH_BEACPHYCTL);
 	/* We can't send beacons with short preamble. Would get PHY errors. */
@@ -3399,9 +3380,9 @@ static int b43_op_config(struct ieee80211_hw *hw, struct ieee80211_conf *conf)
 	}
 
 	/* Antennas for RX and management frame TX. */
-	antenna = b43_antenna_from_ieee80211(dev, conf->antenna_sel_tx);
+	antenna = B43_ANTENNA_DEFAULT;
 	b43_mgmtframe_txantenna(dev, antenna);
-	antenna = b43_antenna_from_ieee80211(dev, conf->antenna_sel_rx);
+	antenna = B43_ANTENNA_DEFAULT;
 	if (phy->ops->set_rx_antenna)
 		phy->ops->set_rx_antenna(dev, antenna);
 
diff --git a/drivers/net/wireless/b43legacy/main.c b/drivers/net/wireless/b43legacy/main.c
index c66d57560e7c..867f01ce45c7 100644
--- a/drivers/net/wireless/b43legacy/main.c
+++ b/drivers/net/wireless/b43legacy/main.c
@@ -2556,20 +2556,6 @@ init_failure:
 	return err;
 }
 
-static int b43legacy_antenna_from_ieee80211(u8 antenna)
-{
-	switch (antenna) {
-	case 0: /* default/diversity */
-		return B43legacy_ANTENNA_DEFAULT;
-	case 1: /* Antenna 0 */
-		return B43legacy_ANTENNA0;
-	case 2: /* Antenna 1 */
-		return B43legacy_ANTENNA1;
-	default:
-		return B43legacy_ANTENNA_DEFAULT;
-	}
-}
-
 static int b43legacy_op_dev_config(struct ieee80211_hw *hw,
 				   struct ieee80211_conf *conf)
 {
@@ -2583,8 +2569,8 @@ static int b43legacy_op_dev_config(struct ieee80211_hw *hw,
 	int err = 0;
 	u32 savedirqs;
 
-	antenna_tx = b43legacy_antenna_from_ieee80211(conf->antenna_sel_tx);
-	antenna_rx = b43legacy_antenna_from_ieee80211(conf->antenna_sel_rx);
+	antenna_tx = B43legacy_ANTENNA_DEFAULT;
+	antenna_rx = B43legacy_ANTENNA_DEFAULT;
 
 	mutex_lock(&wl->mutex);
 	dev = wl->current_dev;
diff --git a/drivers/net/wireless/p54/p54common.c b/drivers/net/wireless/p54/p54common.c
index b1dc10ccd32c..b3e75eb4d5ba 100644
--- a/drivers/net/wireless/p54/p54common.c
+++ b/drivers/net/wireless/p54/p54common.c
@@ -1198,8 +1198,7 @@ static int p54_config(struct ieee80211_hw *dev, struct ieee80211_conf *conf)
 	struct p54_common *priv = dev->priv;
 
 	mutex_lock(&priv->conf_mutex);
-	priv->rx_antenna = (conf->antenna_sel_rx == 0) ?
-		2 : conf->antenna_sel_tx - 1;
+	priv->rx_antenna = 2; /* automatic */
 	priv->output_power = conf->power_level << 2;
 	ret = p54_set_freq(dev, cpu_to_le16(conf->channel->center_freq));
 	p54_set_vdcf(dev);
diff --git a/drivers/net/wireless/rt2x00/rt2x00config.c b/drivers/net/wireless/rt2x00/rt2x00config.c
index 4d5e87b015a0..7910147157b5 100644
--- a/drivers/net/wireless/rt2x00/rt2x00config.c
+++ b/drivers/net/wireless/rt2x00/rt2x00config.c
@@ -199,23 +199,15 @@ void rt2x00lib_config(struct rt2x00_dev *rt2x00dev,
 	 * to work with untill the link tuner decides that an antenna
 	 * switch should be performed.
 	 */
-	if (!conf->antenna_sel_rx &&
-	    default_ant->rx != ANTENNA_SW_DIVERSITY &&
+	if (default_ant->rx != ANTENNA_SW_DIVERSITY &&
 	    default_ant->rx != active_ant->rx)
 		flags |= CONFIG_UPDATE_ANTENNA;
-	else if (conf->antenna_sel_rx &&
-		 conf->antenna_sel_rx != active_ant->rx)
-		flags |= CONFIG_UPDATE_ANTENNA;
 	else if (active_ant->rx == ANTENNA_SW_DIVERSITY)
 		flags |= CONFIG_UPDATE_ANTENNA;
 
-	if (!conf->antenna_sel_tx &&
-	    default_ant->tx != ANTENNA_SW_DIVERSITY &&
+	if (default_ant->tx != ANTENNA_SW_DIVERSITY &&
 	    default_ant->tx != active_ant->tx)
 		flags |= CONFIG_UPDATE_ANTENNA;
-	else if (conf->antenna_sel_tx &&
-		 conf->antenna_sel_tx != active_ant->tx)
-		flags |= CONFIG_UPDATE_ANTENNA;
 	else if (active_ant->tx == ANTENNA_SW_DIVERSITY)
 		flags |= CONFIG_UPDATE_ANTENNA;
 
@@ -252,18 +244,14 @@ config:
 	}
 
 	if (flags & CONFIG_UPDATE_ANTENNA) {
-		if (conf->antenna_sel_rx)
-			libconf.ant.rx = conf->antenna_sel_rx;
-		else if (default_ant->rx != ANTENNA_SW_DIVERSITY)
+		if (default_ant->rx != ANTENNA_SW_DIVERSITY)
 			libconf.ant.rx = default_ant->rx;
 		else if (active_ant->rx == ANTENNA_SW_DIVERSITY)
 			libconf.ant.rx = ANTENNA_B;
 		else
 			libconf.ant.rx = active_ant->rx;
 
-		if (conf->antenna_sel_tx)
-			libconf.ant.tx = conf->antenna_sel_tx;
-		else if (default_ant->tx != ANTENNA_SW_DIVERSITY)
+		if (default_ant->tx != ANTENNA_SW_DIVERSITY)
 			libconf.ant.tx = default_ant->tx;
 		else if (active_ant->tx == ANTENNA_SW_DIVERSITY)
 			libconf.ant.tx = ANTENNA_B;
diff --git a/drivers/net/wireless/rt2x00/rt2x00dev.c b/drivers/net/wireless/rt2x00/rt2x00dev.c
index 86840e3585e8..9e0472bd1edf 100644
--- a/drivers/net/wireless/rt2x00/rt2x00dev.c
+++ b/drivers/net/wireless/rt2x00/rt2x00dev.c
@@ -249,11 +249,9 @@ static void rt2x00lib_evaluate_antenna(struct rt2x00_dev *rt2x00dev)
 	rt2x00dev->link.ant.flags &= ~ANTENNA_RX_DIVERSITY;
 	rt2x00dev->link.ant.flags &= ~ANTENNA_TX_DIVERSITY;
 
-	if (rt2x00dev->hw->conf.antenna_sel_rx == 0 &&
-	    rt2x00dev->default_ant.rx == ANTENNA_SW_DIVERSITY)
+	if (rt2x00dev->default_ant.rx == ANTENNA_SW_DIVERSITY)
 		rt2x00dev->link.ant.flags |= ANTENNA_RX_DIVERSITY;
-	if (rt2x00dev->hw->conf.antenna_sel_tx == 0 &&
-	    rt2x00dev->default_ant.tx == ANTENNA_SW_DIVERSITY)
+	if (rt2x00dev->default_ant.tx == ANTENNA_SW_DIVERSITY)
 		rt2x00dev->link.ant.flags |= ANTENNA_TX_DIVERSITY;
 
 	if (!(rt2x00dev->link.ant.flags & ANTENNA_RX_DIVERSITY) &&
diff --git a/include/net/mac80211.h b/include/net/mac80211.h
index 2870f3973f1a..5f28b7f89887 100644
--- a/include/net/mac80211.h
+++ b/include/net/mac80211.h
@@ -324,7 +324,7 @@ struct ieee80211_tx_altrate {
  * @flags: transmit info flags, defined above
  * @band: TBD
  * @tx_rate_idx: TBD
- * @antenna_sel_tx: TBD
+ * @antenna_sel_tx: antenna to use, 0 for automatic diversity
  * @control: union for control data
  * @status: union for status data
  * @driver_data: array of driver_data pointers
@@ -474,9 +474,6 @@ static inline int __deprecated __IEEE80211_CONF_SHORT_SLOT_TIME(void)
  * @listen_interval: listen interval in units of beacon interval
  * @flags: configuration flags defined above
  * @power_level: requested transmit power (in dBm)
- * @antenna_sel_tx: transmit antenna selection, 0: default/diversity,
- *	1/2: antenna 0/1
- * @antenna_sel_rx: receive antenna selection, like @antenna_sel_tx
  * @ht_cap: describes current self configuration of 802.11n HT capabilities
  * @ht_bss_conf: describes current BSS configuration of 802.11n HT parameters
  * @channel: the channel to tune to
@@ -488,8 +485,6 @@ struct ieee80211_conf {
 	u16 listen_interval;
 	u32 flags;
 	int power_level;
-	u8 antenna_sel_tx;
-	u8 antenna_sel_rx;
 
 	struct ieee80211_channel *channel;
 
diff --git a/net/mac80211/debugfs.c b/net/mac80211/debugfs.c
index 24ce54463310..767dbca50adb 100644
--- a/net/mac80211/debugfs.c
+++ b/net/mac80211/debugfs.c
@@ -47,10 +47,6 @@ static const struct file_operations name## _ops = {			\
 
 DEBUGFS_READONLY_FILE(frequency, 20, "%d",
 		      local->hw.conf.channel->center_freq);
-DEBUGFS_READONLY_FILE(antenna_sel_tx, 20, "%d",
-		      local->hw.conf.antenna_sel_tx);
-DEBUGFS_READONLY_FILE(antenna_sel_rx, 20, "%d",
-		      local->hw.conf.antenna_sel_rx);
 DEBUGFS_READONLY_FILE(rts_threshold, 20, "%d",
 		      local->rts_threshold);
 DEBUGFS_READONLY_FILE(fragmentation_threshold, 20, "%d",
@@ -202,8 +198,6 @@ void debugfs_hw_add(struct ieee80211_local *local)
 	local->debugfs.keys = debugfs_create_dir("keys", phyd);
 
 	DEBUGFS_ADD(frequency);
-	DEBUGFS_ADD(antenna_sel_tx);
-	DEBUGFS_ADD(antenna_sel_rx);
 	DEBUGFS_ADD(rts_threshold);
 	DEBUGFS_ADD(fragmentation_threshold);
 	DEBUGFS_ADD(short_retry_limit);
@@ -258,8 +252,6 @@ void debugfs_hw_add(struct ieee80211_local *local)
 void debugfs_hw_del(struct ieee80211_local *local)
 {
 	DEBUGFS_DEL(frequency);
-	DEBUGFS_DEL(antenna_sel_tx);
-	DEBUGFS_DEL(antenna_sel_rx);
 	DEBUGFS_DEL(rts_threshold);
 	DEBUGFS_DEL(fragmentation_threshold);
 	DEBUGFS_DEL(short_retry_limit);
diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h
index ae4ca3e8b443..88015838a63c 100644
--- a/net/mac80211/ieee80211_i.h
+++ b/net/mac80211/ieee80211_i.h
@@ -727,8 +727,6 @@ struct ieee80211_local {
 		struct dentry *rcdir;
 		struct dentry *rcname;
 		struct dentry *frequency;
-		struct dentry *antenna_sel_tx;
-		struct dentry *antenna_sel_rx;
 		struct dentry *rts_threshold;
 		struct dentry *fragmentation_threshold;
 		struct dentry *short_retry_limit;
diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c
index 9191b510bff8..22494bcf488a 100644
--- a/net/mac80211/tx.c
+++ b/net/mac80211/tx.c
@@ -1971,7 +1971,6 @@ struct sk_buff *ieee80211_beacon_get(struct ieee80211_hw *hw,
 	    sband->bitrates[rsel.rate_idx].flags & IEEE80211_RATE_SHORT_PREAMBLE)
 		info->flags |= IEEE80211_TX_CTL_SHORT_PREAMBLE;
 
-	info->antenna_sel_tx = local->hw.conf.antenna_sel_tx;
 	info->control.retry_limit = 1;
 
 out:
-- 
cgit v1.2.3


From e8975581f63870be42ff4662b293d1b0c8c21350 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes@sipsolutions.net>
Date: Thu, 9 Oct 2008 12:18:51 +0200
Subject: mac80211: introduce hw config change flags

This makes mac80211 notify the driver which configuration
actually changed, e.g. channel etc.

No driver changes, this is just plumbing, driver authors are
expected to act on this if they want to.

Also remove the HW CONFIG debug printk, it's incorrect, often
we configure something else.

Signed-off-by: Johannes Berg <johannes@sipsolutions.net>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 drivers/net/wireless/adm8211.c              |  3 ++-
 drivers/net/wireless/ath5k/base.c           |  7 +++----
 drivers/net/wireless/ath9k/main.c           |  4 ++--
 drivers/net/wireless/b43/main.c             |  3 ++-
 drivers/net/wireless/b43legacy/main.c       |  3 ++-
 drivers/net/wireless/iwlwifi/iwl-agn.c      |  3 ++-
 drivers/net/wireless/iwlwifi/iwl3945-base.c |  7 ++++---
 drivers/net/wireless/libertas_tf/main.c     |  4 +++-
 drivers/net/wireless/mac80211_hwsim.c       |  4 ++--
 drivers/net/wireless/p54/p54common.c        |  3 ++-
 drivers/net/wireless/rt2x00/rt2x00.h        |  2 +-
 drivers/net/wireless/rt2x00/rt2x00dev.c     |  2 +-
 drivers/net/wireless/rt2x00/rt2x00mac.c     |  3 ++-
 drivers/net/wireless/rtl8180_dev.c          |  3 ++-
 drivers/net/wireless/rtl8187_dev.c          |  3 ++-
 drivers/net/wireless/zd1211rw/zd_mac.c      |  4 +++-
 include/net/mac80211.h                      | 30 ++++++++++++++++++++++++-----
 net/mac80211/cfg.c                          |  3 ++-
 net/mac80211/ieee80211_i.h                  |  2 +-
 net/mac80211/iface.c                        | 25 ++++++++++++++++++------
 net/mac80211/main.c                         | 29 +++++++++++++++-------------
 net/mac80211/scan.c                         | 12 +++++++++---
 net/mac80211/util.c                         |  3 ++-
 net/mac80211/wext.c                         | 14 +++++++-------
 24 files changed, 116 insertions(+), 60 deletions(-)

(limited to 'include')

diff --git a/drivers/net/wireless/adm8211.c b/drivers/net/wireless/adm8211.c
index 6e18c9d36787..9a1e0c514c08 100644
--- a/drivers/net/wireless/adm8211.c
+++ b/drivers/net/wireless/adm8211.c
@@ -1314,9 +1314,10 @@ static int adm8211_set_ssid(struct ieee80211_hw *dev, u8 *ssid, size_t ssid_len)
 	return 0;
 }
 
-static int adm8211_config(struct ieee80211_hw *dev, struct ieee80211_conf *conf)
+static int adm8211_config(struct ieee80211_hw *dev, u32 changed)
 {
 	struct adm8211_priv *priv = dev->priv;
+	struct ieee80211_conf *conf = &dev->conf;
 	int channel = ieee80211_frequency_to_channel(conf->channel->center_freq);
 
 	if (channel != priv->channel) {
diff --git a/drivers/net/wireless/ath5k/base.c b/drivers/net/wireless/ath5k/base.c
index 9e47d727e220..fcd688765d04 100644
--- a/drivers/net/wireless/ath5k/base.c
+++ b/drivers/net/wireless/ath5k/base.c
@@ -219,8 +219,7 @@ static int ath5k_add_interface(struct ieee80211_hw *hw,
 		struct ieee80211_if_init_conf *conf);
 static void ath5k_remove_interface(struct ieee80211_hw *hw,
 		struct ieee80211_if_init_conf *conf);
-static int ath5k_config(struct ieee80211_hw *hw,
-		struct ieee80211_conf *conf);
+static int ath5k_config(struct ieee80211_hw *hw, u32 changed);
 static int ath5k_config_interface(struct ieee80211_hw *hw,
 		struct ieee80211_vif *vif,
 		struct ieee80211_if_conf *conf);
@@ -2780,10 +2779,10 @@ end:
  * TODO: Phy disable/diversity etc
  */
 static int
-ath5k_config(struct ieee80211_hw *hw,
-			struct ieee80211_conf *conf)
+ath5k_config(struct ieee80211_hw *hw, u32 changed)
 {
 	struct ath5k_softc *sc = hw->priv;
+	struct ieee80211_conf *conf = &hw->conf;
 
 	sc->bintval = conf->beacon_int;
 	sc->power_level = conf->power_level;
diff --git a/drivers/net/wireless/ath9k/main.c b/drivers/net/wireless/ath9k/main.c
index 5e087c92a6d9..f49910799ede 100644
--- a/drivers/net/wireless/ath9k/main.c
+++ b/drivers/net/wireless/ath9k/main.c
@@ -1230,11 +1230,11 @@ static void ath9k_remove_interface(struct ieee80211_hw *hw,
 			__func__, error);
 }
 
-static int ath9k_config(struct ieee80211_hw *hw,
-			struct ieee80211_conf *conf)
+static int ath9k_config(struct ieee80211_hw *hw, u32 changed)
 {
 	struct ath_softc *sc = hw->priv;
 	struct ieee80211_channel *curchan = hw->conf.channel;
+	struct ieee80211_conf *conf = &hw->conf;
 	int pos;
 
 	DPRINTF(sc, ATH_DBG_CONFIG, "%s: Set channel: %d MHz\n",
diff --git a/drivers/net/wireless/b43/main.c b/drivers/net/wireless/b43/main.c
index 6b85428b0e1d..2e81af1022e4 100644
--- a/drivers/net/wireless/b43/main.c
+++ b/drivers/net/wireless/b43/main.c
@@ -3320,11 +3320,12 @@ init_failure:
 	return err;
 }
 
-static int b43_op_config(struct ieee80211_hw *hw, struct ieee80211_conf *conf)
+static int b43_op_config(struct ieee80211_hw *hw, u32 changed)
 {
 	struct b43_wl *wl = hw_to_b43_wl(hw);
 	struct b43_wldev *dev;
 	struct b43_phy *phy;
+	struct ieee80211_conf *conf = &hw->conf;
 	unsigned long flags;
 	int antenna;
 	int err = 0;
diff --git a/drivers/net/wireless/b43legacy/main.c b/drivers/net/wireless/b43legacy/main.c
index 867f01ce45c7..793cc396562f 100644
--- a/drivers/net/wireless/b43legacy/main.c
+++ b/drivers/net/wireless/b43legacy/main.c
@@ -2557,11 +2557,12 @@ init_failure:
 }
 
 static int b43legacy_op_dev_config(struct ieee80211_hw *hw,
-				   struct ieee80211_conf *conf)
+				   u32 changed)
 {
 	struct b43legacy_wl *wl = hw_to_b43legacy_wl(hw);
 	struct b43legacy_wldev *dev;
 	struct b43legacy_phy *phy;
+	struct ieee80211_conf *conf = &hw->conf;
 	unsigned long flags;
 	unsigned int new_phymode = 0xFFFF;
 	int antenna_tx;
diff --git a/drivers/net/wireless/iwlwifi/iwl-agn.c b/drivers/net/wireless/iwlwifi/iwl-agn.c
index e6695e80fb53..79a24410dd0a 100644
--- a/drivers/net/wireless/iwlwifi/iwl-agn.c
+++ b/drivers/net/wireless/iwlwifi/iwl-agn.c
@@ -2734,10 +2734,11 @@ static int iwl4965_mac_add_interface(struct ieee80211_hw *hw,
  * be set inappropriately and the driver currently sets the hardware up to
  * use it whenever needed.
  */
-static int iwl4965_mac_config(struct ieee80211_hw *hw, struct ieee80211_conf *conf)
+static int iwl4965_mac_config(struct ieee80211_hw *hw, u32 changed)
 {
 	struct iwl_priv *priv = hw->priv;
 	const struct iwl_channel_info *ch_info;
+	struct ieee80211_conf *conf = &hw->conf;
 	unsigned long flags;
 	int ret = 0;
 	u16 channel;
diff --git a/drivers/net/wireless/iwlwifi/iwl3945-base.c b/drivers/net/wireless/iwlwifi/iwl3945-base.c
index d3a2966d9181..b1464c71ea0a 100644
--- a/drivers/net/wireless/iwlwifi/iwl3945-base.c
+++ b/drivers/net/wireless/iwlwifi/iwl3945-base.c
@@ -6415,7 +6415,7 @@ static void iwl3945_bg_abort_scan(struct work_struct *work)
 	mutex_unlock(&priv->mutex);
 }
 
-static int iwl3945_mac_config(struct ieee80211_hw *hw, struct ieee80211_conf *conf);
+static int iwl3945_mac_config(struct ieee80211_hw *hw, u32 changed);
 
 static void iwl3945_bg_scan_completed(struct work_struct *work)
 {
@@ -6428,7 +6428,7 @@ static void iwl3945_bg_scan_completed(struct work_struct *work)
 		return;
 
 	if (test_bit(STATUS_CONF_PENDING, &priv->status))
-		iwl3945_mac_config(priv->hw, ieee80211_get_hw_conf(priv->hw));
+		iwl3945_mac_config(priv->hw, 0);
 
 	ieee80211_scan_completed(priv->hw);
 
@@ -6616,10 +6616,11 @@ static int iwl3945_mac_add_interface(struct ieee80211_hw *hw,
  * be set inappropriately and the driver currently sets the hardware up to
  * use it whenever needed.
  */
-static int iwl3945_mac_config(struct ieee80211_hw *hw, struct ieee80211_conf *conf)
+static int iwl3945_mac_config(struct ieee80211_hw *hw, u32 changed)
 {
 	struct iwl3945_priv *priv = hw->priv;
 	const struct iwl3945_channel_info *ch_info;
+	struct ieee80211_conf *conf = &hw->conf;
 	unsigned long flags;
 	int ret = 0;
 
diff --git a/drivers/net/wireless/libertas_tf/main.c b/drivers/net/wireless/libertas_tf/main.c
index feff945ad856..241ddcfa352e 100644
--- a/drivers/net/wireless/libertas_tf/main.c
+++ b/drivers/net/wireless/libertas_tf/main.c
@@ -354,9 +354,11 @@ static void lbtf_op_remove_interface(struct ieee80211_hw *hw,
 	priv->vif = NULL;
 }
 
-static int lbtf_op_config(struct ieee80211_hw *hw, struct ieee80211_conf *conf)
+static int lbtf_op_config(struct ieee80211_hw *hw, u32 changed)
 {
 	struct lbtf_private *priv = hw->priv;
+	struct ieee80211_conf *conf = &hw->conf;
+
 	if (conf->channel->center_freq != priv->cur_freq) {
 		priv->cur_freq = conf->channel->center_freq;
 		lbtf_set_channel(priv, conf->channel->hw_value);
diff --git a/drivers/net/wireless/mac80211_hwsim.c b/drivers/net/wireless/mac80211_hwsim.c
index 3f236b546683..e2aeef8de707 100644
--- a/drivers/net/wireless/mac80211_hwsim.c
+++ b/drivers/net/wireless/mac80211_hwsim.c
@@ -359,10 +359,10 @@ static void mac80211_hwsim_beacon(unsigned long arg)
 }
 
 
-static int mac80211_hwsim_config(struct ieee80211_hw *hw,
-				 struct ieee80211_conf *conf)
+static int mac80211_hwsim_config(struct ieee80211_hw *hw, u32 changed)
 {
 	struct mac80211_hwsim_data *data = hw->priv;
+	struct ieee80211_conf *conf = &hw->conf;
 
 	printk(KERN_DEBUG "%s:%s (freq=%d radio_enabled=%d beacon_int=%d)\n",
 	       wiphy_name(hw->wiphy), __func__,
diff --git a/drivers/net/wireless/p54/p54common.c b/drivers/net/wireless/p54/p54common.c
index b3e75eb4d5ba..b3c297ed00cd 100644
--- a/drivers/net/wireless/p54/p54common.c
+++ b/drivers/net/wireless/p54/p54common.c
@@ -1192,10 +1192,11 @@ static void p54_remove_interface(struct ieee80211_hw *dev,
 	p54_set_filter(dev, 0, NULL);
 }
 
-static int p54_config(struct ieee80211_hw *dev, struct ieee80211_conf *conf)
+static int p54_config(struct ieee80211_hw *dev, u32 changed)
 {
 	int ret;
 	struct p54_common *priv = dev->priv;
+	struct ieee80211_conf *conf = &dev->conf;
 
 	mutex_lock(&priv->conf_mutex);
 	priv->rx_antenna = 2; /* automatic */
diff --git a/drivers/net/wireless/rt2x00/rt2x00.h b/drivers/net/wireless/rt2x00/rt2x00.h
index 1359a3768404..8ec8f7e9ec64 100644
--- a/drivers/net/wireless/rt2x00/rt2x00.h
+++ b/drivers/net/wireless/rt2x00/rt2x00.h
@@ -997,7 +997,7 @@ int rt2x00mac_add_interface(struct ieee80211_hw *hw,
 			    struct ieee80211_if_init_conf *conf);
 void rt2x00mac_remove_interface(struct ieee80211_hw *hw,
 				struct ieee80211_if_init_conf *conf);
-int rt2x00mac_config(struct ieee80211_hw *hw, struct ieee80211_conf *conf);
+int rt2x00mac_config(struct ieee80211_hw *hw, u32 changed);
 int rt2x00mac_config_interface(struct ieee80211_hw *hw,
 			       struct ieee80211_vif *vif,
 			       struct ieee80211_if_conf *conf);
diff --git a/drivers/net/wireless/rt2x00/rt2x00dev.c b/drivers/net/wireless/rt2x00/rt2x00dev.c
index 9e0472bd1edf..697806cf94e2 100644
--- a/drivers/net/wireless/rt2x00/rt2x00dev.c
+++ b/drivers/net/wireless/rt2x00/rt2x00dev.c
@@ -1245,7 +1245,7 @@ int rt2x00lib_resume(struct rt2x00_dev *rt2x00dev)
 	/*
 	 * Reconfigure device.
 	 */
-	retval = rt2x00mac_config(rt2x00dev->hw, &rt2x00dev->hw->conf);
+	retval = rt2x00mac_config(rt2x00dev->hw, ~0);
 	if (retval)
 		goto exit;
 
diff --git a/drivers/net/wireless/rt2x00/rt2x00mac.c b/drivers/net/wireless/rt2x00/rt2x00mac.c
index 2c6cc5c374ff..da7b49a364ff 100644
--- a/drivers/net/wireless/rt2x00/rt2x00mac.c
+++ b/drivers/net/wireless/rt2x00/rt2x00mac.c
@@ -335,9 +335,10 @@ void rt2x00mac_remove_interface(struct ieee80211_hw *hw,
 }
 EXPORT_SYMBOL_GPL(rt2x00mac_remove_interface);
 
-int rt2x00mac_config(struct ieee80211_hw *hw, struct ieee80211_conf *conf)
+int rt2x00mac_config(struct ieee80211_hw *hw, u32 changed)
 {
 	struct rt2x00_dev *rt2x00dev = hw->priv;
+	struct ieee80211_conf *conf = &hw->conf;
 	int radio_on;
 	int status;
 
diff --git a/drivers/net/wireless/rtl8180_dev.c b/drivers/net/wireless/rtl8180_dev.c
index 9de8f57486df..e8d22393797f 100644
--- a/drivers/net/wireless/rtl8180_dev.c
+++ b/drivers/net/wireless/rtl8180_dev.c
@@ -692,9 +692,10 @@ static void rtl8180_remove_interface(struct ieee80211_hw *dev,
 	priv->vif = NULL;
 }
 
-static int rtl8180_config(struct ieee80211_hw *dev, struct ieee80211_conf *conf)
+static int rtl8180_config(struct ieee80211_hw *dev, u32 changed)
 {
 	struct rtl8180_priv *priv = dev->priv;
+	struct ieee80211_conf *conf = &dev->conf;
 
 	priv->rf->set_chan(dev, conf);
 
diff --git a/drivers/net/wireless/rtl8187_dev.c b/drivers/net/wireless/rtl8187_dev.c
index 9ceae9017f86..2c69ab37c650 100644
--- a/drivers/net/wireless/rtl8187_dev.c
+++ b/drivers/net/wireless/rtl8187_dev.c
@@ -873,9 +873,10 @@ static void rtl8187_remove_interface(struct ieee80211_hw *dev,
 	mutex_unlock(&priv->conf_mutex);
 }
 
-static int rtl8187_config(struct ieee80211_hw *dev, struct ieee80211_conf *conf)
+static int rtl8187_config(struct ieee80211_hw *dev, u32 changed)
 {
 	struct rtl8187_priv *priv = dev->priv;
+	struct ieee80211_conf *conf = &dev->conf;
 	u32 reg;
 
 	mutex_lock(&priv->conf_mutex);
diff --git a/drivers/net/wireless/zd1211rw/zd_mac.c b/drivers/net/wireless/zd1211rw/zd_mac.c
index de45509d757e..6c3e21887fc8 100644
--- a/drivers/net/wireless/zd1211rw/zd_mac.c
+++ b/drivers/net/wireless/zd1211rw/zd_mac.c
@@ -743,9 +743,11 @@ static void zd_op_remove_interface(struct ieee80211_hw *hw,
 	zd_write_mac_addr(&mac->chip, NULL);
 }
 
-static int zd_op_config(struct ieee80211_hw *hw, struct ieee80211_conf *conf)
+static int zd_op_config(struct ieee80211_hw *hw, u32 changed)
 {
 	struct zd_mac *mac = zd_hw_mac(hw);
+	struct ieee80211_conf *conf = &hw->conf;
+
 	return zd_chip_set_channel(&mac->chip, conf->channel->hw_value);
 }
 
diff --git a/include/net/mac80211.h b/include/net/mac80211.h
index 5f28b7f89887..34e8569b59bb 100644
--- a/include/net/mac80211.h
+++ b/include/net/mac80211.h
@@ -463,13 +463,33 @@ static inline int __deprecated __IEEE80211_CONF_SHORT_SLOT_TIME(void)
 }
 #define IEEE80211_CONF_SHORT_SLOT_TIME (__IEEE80211_CONF_SHORT_SLOT_TIME())
 
+/**
+ * enum ieee80211_conf_changed - denotes which configuration changed
+ *
+ * @IEEE80211_CONF_CHANGE_RADIO_ENABLED: the value of radio_enabled changed
+ * @IEEE80211_CONF_CHANGE_BEACON_INTERVAL: the beacon interval changed
+ * @IEEE80211_CONF_CHANGE_LISTEN_INTERVAL: the listen interval changed
+ * @IEEE80211_CONF_CHANGE_RADIOTAP: the radiotap flag changed
+ * @IEEE80211_CONF_CHANGE_PS: the PS flag changed
+ * @IEEE80211_CONF_CHANGE_POWER: the TX power changed
+ * @IEEE80211_CONF_CHANGE_CHANNEL: the channel changed
+ */
+enum ieee80211_conf_changed {
+	IEEE80211_CONF_CHANGE_RADIO_ENABLED	= BIT(0),
+	IEEE80211_CONF_CHANGE_BEACON_INTERVAL	= BIT(1),
+	IEEE80211_CONF_CHANGE_LISTEN_INTERVAL	= BIT(2),
+	IEEE80211_CONF_CHANGE_RADIOTAP		= BIT(3),
+	IEEE80211_CONF_CHANGE_PS		= BIT(4),
+	IEEE80211_CONF_CHANGE_POWER		= BIT(5),
+	IEEE80211_CONF_CHANGE_CHANNEL		= BIT(6),
+};
+
 /**
  * struct ieee80211_conf - configuration of the device
  *
  * This struct indicates how the driver shall configure the hardware.
  *
  * @radio_enabled: when zero, driver is required to switch off the radio.
- *	TODO make a flag
  * @beacon_int: beacon interval (TODO make interface config)
  * @listen_interval: listen interval in units of beacon interval
  * @flags: configuration flags defined above
@@ -479,13 +499,13 @@ static inline int __deprecated __IEEE80211_CONF_SHORT_SLOT_TIME(void)
  * @channel: the channel to tune to
  */
 struct ieee80211_conf {
-	int radio_enabled;
-
 	int beacon_int;
-	u16 listen_interval;
 	u32 flags;
 	int power_level;
 
+	u16 listen_interval;
+	bool radio_enabled;
+
 	struct ieee80211_channel *channel;
 
 	struct ieee80211_sta_ht_cap ht_cap;
@@ -1214,7 +1234,7 @@ struct ieee80211_ops {
 			     struct ieee80211_if_init_conf *conf);
 	void (*remove_interface)(struct ieee80211_hw *hw,
 				 struct ieee80211_if_init_conf *conf);
-	int (*config)(struct ieee80211_hw *hw, struct ieee80211_conf *conf);
+	int (*config)(struct ieee80211_hw *hw, u32 changed);
 	int (*config_interface)(struct ieee80211_hw *hw,
 				struct ieee80211_vif *vif,
 				struct ieee80211_if_conf *conf);
diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c
index a5dea617aab3..8ea30902d5db 100644
--- a/net/mac80211/cfg.c
+++ b/net/mac80211/cfg.c
@@ -394,7 +394,8 @@ static int ieee80211_config_beacon(struct ieee80211_sub_if_data *sdata,
 	 */
 	if (params->interval) {
 		sdata->local->hw.conf.beacon_int = params->interval;
-		ieee80211_hw_config(sdata->local);
+		ieee80211_hw_config(sdata->local,
+				    IEEE80211_CONF_CHANGE_BEACON_INTERVAL);
 		/*
 		 * We updated some parameter so if below bails out
 		 * it's not an error.
diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h
index 88015838a63c..1deb787ff8dc 100644
--- a/net/mac80211/ieee80211_i.h
+++ b/net/mac80211/ieee80211_i.h
@@ -875,7 +875,7 @@ static inline int ieee80211_bssid_match(const u8 *raddr, const u8 *addr)
 }
 
 
-int ieee80211_hw_config(struct ieee80211_local *local);
+int ieee80211_hw_config(struct ieee80211_local *local, u32 changed);
 int ieee80211_if_config(struct ieee80211_sub_if_data *sdata, u32 changed);
 void ieee80211_tx_set_protected(struct ieee80211_tx_data *tx);
 void ieee80211_bss_info_change_notify(struct ieee80211_sub_if_data *sdata,
diff --git a/net/mac80211/iface.c b/net/mac80211/iface.c
index 8336fee68d3e..df28c5f7c9c0 100644
--- a/net/mac80211/iface.c
+++ b/net/mac80211/iface.c
@@ -65,7 +65,7 @@ static int ieee80211_open(struct net_device *dev)
 	struct ieee80211_if_init_conf conf;
 	u32 changed = 0;
 	int res;
-	bool need_hw_reconfig = 0;
+	u32 hw_reconf_flags = 0;
 	u8 null_addr[ETH_ALEN] = {0};
 
 	/* fail early if user set an invalid address */
@@ -152,7 +152,8 @@ static int ieee80211_open(struct net_device *dev)
 			res = local->ops->start(local_to_hw(local));
 		if (res)
 			goto err_del_bss;
-		need_hw_reconfig = 1;
+		/* we're brought up, everything changes */
+		hw_reconf_flags = ~0;
 		ieee80211_led_radio(local, local->hw.conf.radio_enabled);
 	}
 
@@ -198,8 +199,10 @@ static int ieee80211_open(struct net_device *dev)
 
 		/* must be before the call to ieee80211_configure_filter */
 		local->monitors++;
-		if (local->monitors == 1)
+		if (local->monitors == 1) {
 			local->hw.conf.flags |= IEEE80211_CONF_RADIOTAP;
+			hw_reconf_flags |= IEEE80211_CONF_CHANGE_RADIOTAP;
+		}
 
 		if (sdata->u.mntr_flags & MONITOR_FLAG_FCSFAIL)
 			local->fif_fcsfail++;
@@ -279,8 +282,8 @@ static int ieee80211_open(struct net_device *dev)
 		atomic_inc(&local->iff_promiscs);
 
 	local->open_count++;
-	if (need_hw_reconfig) {
-		ieee80211_hw_config(local);
+	if (hw_reconf_flags) {
+		ieee80211_hw_config(local, hw_reconf_flags);
 		/*
 		 * set default queue parameters so drivers don't
 		 * need to initialise the hardware if the hardware
@@ -322,6 +325,7 @@ static int ieee80211_stop(struct net_device *dev)
 	struct ieee80211_local *local = sdata->local;
 	struct ieee80211_if_init_conf conf;
 	struct sta_info *sta;
+	u32 hw_reconf_flags = 0;
 
 	/*
 	 * Stop TX on this interface first.
@@ -405,8 +409,10 @@ static int ieee80211_stop(struct net_device *dev)
 		}
 
 		local->monitors--;
-		if (local->monitors == 0)
+		if (local->monitors == 0) {
 			local->hw.conf.flags &= ~IEEE80211_CONF_RADIOTAP;
+			hw_reconf_flags |= IEEE80211_CONF_CHANGE_RADIOTAP;
+		}
 
 		if (sdata->u.mntr_flags & MONITOR_FLAG_FCSFAIL)
 			local->fif_fcsfail--;
@@ -504,8 +510,15 @@ static int ieee80211_stop(struct net_device *dev)
 
 		tasklet_disable(&local->tx_pending_tasklet);
 		tasklet_disable(&local->tasklet);
+
+		/* no reconfiguring after stop! */
+		hw_reconf_flags = 0;
 	}
 
+	/* do after stop to avoid reconfiguring when we stop anyway */
+	if (hw_reconf_flags)
+		ieee80211_hw_config(local, hw_reconf_flags);
+
 	return 0;
 }
 
diff --git a/net/mac80211/main.c b/net/mac80211/main.c
index 07f812755e55..c936017f6d48 100644
--- a/net/mac80211/main.c
+++ b/net/mac80211/main.c
@@ -197,31 +197,34 @@ int ieee80211_if_config(struct ieee80211_sub_if_data *sdata, u32 changed)
 					    &sdata->vif, &conf);
 }
 
-int ieee80211_hw_config(struct ieee80211_local *local)
+int ieee80211_hw_config(struct ieee80211_local *local, u32 changed)
 {
 	struct ieee80211_channel *chan;
 	int ret = 0;
+	int power;
 
 	if (local->sw_scanning)
 		chan = local->scan_channel;
 	else
 		chan = local->oper_channel;
 
-	local->hw.conf.channel = chan;
+	if (chan != local->hw.conf.channel) {
+		local->hw.conf.channel = chan;
+		changed |= IEEE80211_CONF_CHANGE_CHANNEL;
+	}
+
 
 	if (!local->hw.conf.power_level)
-		local->hw.conf.power_level = chan->max_power;
+		power = chan->max_power;
 	else
-		local->hw.conf.power_level = min(chan->max_power,
-					       local->hw.conf.power_level);
-
-#ifdef CONFIG_MAC80211_VERBOSE_DEBUG
-	printk(KERN_DEBUG "%s: HW CONFIG: freq=%d\n",
-	       wiphy_name(local->hw.wiphy), chan->center_freq);
-#endif
+		power = min(chan->max_power, local->hw.conf.power_level);
+	if (local->hw.conf.power_level != power) {
+		changed |= IEEE80211_CONF_CHANGE_POWER;
+		local->hw.conf.power_level = power;
+	}
 
-	if (local->open_count) {
-		ret = local->ops->config(local_to_hw(local), &local->hw.conf);
+	if (changed && local->open_count) {
+		ret = local->ops->config(local_to_hw(local), changed);
 		/*
 		 * HW reconfiguration should never fail, the driver has told
 		 * us what it can support so it should live up to that promise.
@@ -672,7 +675,7 @@ struct ieee80211_hw *ieee80211_alloc_hw(size_t priv_data_len,
 	local->fragmentation_threshold = IEEE80211_MAX_FRAG_THRESHOLD;
 	local->short_retry_limit = 7;
 	local->long_retry_limit = 4;
-	local->hw.conf.radio_enabled = 1;
+	local->hw.conf.radio_enabled = true;
 
 	INIT_LIST_HEAD(&local->interfaces);
 
diff --git a/net/mac80211/scan.c b/net/mac80211/scan.c
index 0989b1c062e3..7372d7abb8c0 100644
--- a/net/mac80211/scan.c
+++ b/net/mac80211/scan.c
@@ -448,12 +448,17 @@ void ieee80211_scan_completed(struct ieee80211_hw *hw)
 
 	if (local->hw_scanning) {
 		local->hw_scanning = false;
-		ieee80211_hw_config(local);
+		/*
+		 * Somebody might have requested channel change during scan
+		 * that we won't have acted upon, try now. ieee80211_hw_config
+		 * will set the flag based on actual changes.
+		 */
+		ieee80211_hw_config(local, 0);
 		goto done;
 	}
 
 	local->sw_scanning = false;
-	ieee80211_hw_config(local);
+	ieee80211_hw_config(local, IEEE80211_CONF_CHANGE_CHANNEL);
 
 	netif_tx_lock_bh(local->mdev);
 	netif_addr_lock(local->mdev);
@@ -540,7 +545,8 @@ void ieee80211_scan_work(struct work_struct *work)
 
 		if (!skip) {
 			local->scan_channel = chan;
-			if (ieee80211_hw_config(local))
+			if (ieee80211_hw_config(local,
+						IEEE80211_CONF_CHANGE_CHANNEL))
 				skip = 1;
 		}
 
diff --git a/net/mac80211/util.c b/net/mac80211/util.c
index 9941a60a2327..3288c3de67ca 100644
--- a/net/mac80211/util.c
+++ b/net/mac80211/util.c
@@ -645,7 +645,8 @@ int ieee80211_set_freq(struct ieee80211_sub_if_data *sdata, int freqMHz)
 		if (local->sw_scanning || local->hw_scanning)
 			ret = 0;
 		else
-			ret = ieee80211_hw_config(local);
+			ret = ieee80211_hw_config(
+				local, IEEE80211_CONF_CHANGE_CHANNEL);
 
 		rate_control_clear(local);
 	}
diff --git a/net/mac80211/wext.c b/net/mac80211/wext.c
index a3af15141244..94c4b35eeb14 100644
--- a/net/mac80211/wext.c
+++ b/net/mac80211/wext.c
@@ -656,7 +656,7 @@ static int ieee80211_ioctl_siwtxpower(struct net_device *dev,
 				      union iwreq_data *data, char *extra)
 {
 	struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr);
-	bool need_reconfig = 0;
+	u32 reconf_flags = 0;
 	int new_power_level;
 
 	if ((data->txpower.flags & IW_TXPOW_TYPE) != IW_TXPOW_DBM)
@@ -680,17 +680,17 @@ static int ieee80211_ioctl_siwtxpower(struct net_device *dev,
 
 	if (local->hw.conf.power_level != new_power_level) {
 		local->hw.conf.power_level = new_power_level;
-		need_reconfig = 1;
+		reconf_flags |= IEEE80211_CONF_CHANGE_POWER;
 	}
 
 	if (local->hw.conf.radio_enabled != !(data->txpower.disabled)) {
 		local->hw.conf.radio_enabled = !(data->txpower.disabled);
-		need_reconfig = 1;
+		reconf_flags |= IEEE80211_CONF_CHANGE_RADIO_ENABLED;
 		ieee80211_led_radio(local, local->hw.conf.radio_enabled);
 	}
 
-	if (need_reconfig)
-		ieee80211_hw_config(local);
+	if (reconf_flags)
+		ieee80211_hw_config(local, reconf_flags);
 
 	return 0;
 }
@@ -976,7 +976,7 @@ static int ieee80211_ioctl_siwpower(struct net_device *dev,
 
 	if (wrq->disabled) {
 		conf->flags &= ~IEEE80211_CONF_PS;
-		return ieee80211_hw_config(local);
+		return ieee80211_hw_config(local, IEEE80211_CONF_CHANGE_PS);
 	}
 
 	switch (wrq->flags & IW_POWER_MODE) {
@@ -989,7 +989,7 @@ static int ieee80211_ioctl_siwpower(struct net_device *dev,
 		return -EINVAL;
 	}
 
-	return ieee80211_hw_config(local);
+	return ieee80211_hw_config(local, IEEE80211_CONF_CHANGE_PS);
 }
 
 static int ieee80211_ioctl_giwpower(struct net_device *dev,
-- 
cgit v1.2.3


From d51626df5747efaa8d2c00678f64cb503845effe Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes@sipsolutions.net>
Date: Thu, 9 Oct 2008 12:20:13 +0200
Subject: nl80211: export HT capabilities

This exports the local HT capabilities in nl80211.

Signed-off-by: Johannes Berg <johannes@sipsolutions.net>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 include/linux/nl80211.h | 12 ++++++++++++
 net/wireless/nl80211.c  | 13 +++++++++++++
 2 files changed, 25 insertions(+)

(limited to 'include')

diff --git a/include/linux/nl80211.h b/include/linux/nl80211.h
index 9bad65400fba..41720d47d618 100644
--- a/include/linux/nl80211.h
+++ b/include/linux/nl80211.h
@@ -452,17 +452,29 @@ enum nl80211_mpath_info {
  *	an array of nested frequency attributes
  * @NL80211_BAND_ATTR_RATES: supported bitrates in this band,
  *	an array of nested bitrate attributes
+ * @NL80211_BAND_ATTR_HT_MCS_SET: 16-byte attribute containing the MCS set as
+ *	defined in 802.11n
+ * @NL80211_BAND_ATTR_HT_CAPA: HT capabilities, as in the HT information IE
+ * @NL80211_BAND_ATTR_HT_AMPDU_FACTOR: A-MPDU factor, as in 11n
+ * @NL80211_BAND_ATTR_HT_AMPDU_DENSITY: A-MPDU density, as in 11n
  */
 enum nl80211_band_attr {
 	__NL80211_BAND_ATTR_INVALID,
 	NL80211_BAND_ATTR_FREQS,
 	NL80211_BAND_ATTR_RATES,
 
+	NL80211_BAND_ATTR_HT_MCS_SET,
+	NL80211_BAND_ATTR_HT_CAPA,
+	NL80211_BAND_ATTR_HT_AMPDU_FACTOR,
+	NL80211_BAND_ATTR_HT_AMPDU_DENSITY,
+
 	/* keep last */
 	__NL80211_BAND_ATTR_AFTER_LAST,
 	NL80211_BAND_ATTR_MAX = __NL80211_BAND_ATTR_AFTER_LAST - 1
 };
 
+#define NL80211_BAND_ATTR_HT_CAPA NL80211_BAND_ATTR_HT_CAPA
+
 /**
  * enum nl80211_frequency_attr - frequency attributes
  * @NL80211_FREQUENCY_ATTR_FREQ: Frequency in MHz
diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c
index 572793c8c7ab..4d12e885170e 100644
--- a/net/wireless/nl80211.c
+++ b/net/wireless/nl80211.c
@@ -157,6 +157,19 @@ static int nl80211_send_wiphy(struct sk_buff *msg, u32 pid, u32 seq, int flags,
 		if (!nl_band)
 			goto nla_put_failure;
 
+		/* add HT info */
+		if (dev->wiphy.bands[band]->ht_cap.ht_supported) {
+			NLA_PUT(msg, NL80211_BAND_ATTR_HT_MCS_SET,
+				sizeof(dev->wiphy.bands[band]->ht_cap.mcs),
+				&dev->wiphy.bands[band]->ht_cap.mcs);
+			NLA_PUT_U16(msg, NL80211_BAND_ATTR_HT_CAPA,
+				dev->wiphy.bands[band]->ht_cap.cap);
+			NLA_PUT_U8(msg, NL80211_BAND_ATTR_HT_AMPDU_FACTOR,
+				dev->wiphy.bands[band]->ht_cap.ampdu_factor);
+			NLA_PUT_U8(msg, NL80211_BAND_ATTR_HT_AMPDU_DENSITY,
+				dev->wiphy.bands[band]->ht_cap.ampdu_density);
+		}
+
 		/* add frequencies */
 		nl_freqs = nla_nest_start(msg, NL80211_BAND_ATTR_FREQS);
 		if (!nl_freqs)
-- 
cgit v1.2.3


From 9124b07740c51cbc6e358dd0c4abc6ee8ded084d Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes@sipsolutions.net>
Date: Tue, 14 Oct 2008 19:17:54 +0200
Subject: mac80211: make retry limits part of hw config

Instead of having a separate callback, use the HW config callback
with a new flag to change retry limits.

Signed-off-by: Johannes Berg <johannes@sipsolutions.net>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 drivers/net/wireless/ath9k/main.c       |  1 -
 drivers/net/wireless/b43/main.c         | 60 +++++++++++++--------------------
 drivers/net/wireless/b43legacy/main.c   | 59 ++++++++++++--------------------
 drivers/net/wireless/rt2x00/rt2400pci.c |  2 +-
 drivers/net/wireless/rt2x00/rt2500pci.c |  2 +-
 drivers/net/wireless/rt2x00/rt2x00.h    |  3 ++
 drivers/net/wireless/rt2x00/rt2x00mac.c |  9 +++++
 drivers/net/wireless/rt2x00/rt61pci.c   |  2 +-
 drivers/net/wireless/rt2x00/rt73usb.c   |  2 +-
 include/net/mac80211.h                  | 14 +++++---
 net/mac80211/debugfs.c                  |  4 +--
 net/mac80211/ieee80211_i.h              |  2 --
 net/mac80211/main.c                     |  4 +--
 net/mac80211/tx.c                       |  4 +--
 net/mac80211/wext.c                     | 28 +++++++--------
 15 files changed, 89 insertions(+), 107 deletions(-)

(limited to 'include')

diff --git a/drivers/net/wireless/ath9k/main.c b/drivers/net/wireless/ath9k/main.c
index f49910799ede..41cd114c438c 100644
--- a/drivers/net/wireless/ath9k/main.c
+++ b/drivers/net/wireless/ath9k/main.c
@@ -1659,7 +1659,6 @@ static struct ieee80211_ops ath9k_ops = {
 	.get_tkip_seq       = NULL,
 	.set_rts_threshold  = NULL,
 	.set_frag_threshold = NULL,
-	.set_retry_limit    = NULL,
 	.get_tsf 	    = ath9k_get_tsf,
 	.reset_tsf 	    = ath9k_reset_tsf,
 	.tx_last_beacon     = NULL,
diff --git a/drivers/net/wireless/b43/main.c b/drivers/net/wireless/b43/main.c
index 2e81af1022e4..9aeeb6553a91 100644
--- a/drivers/net/wireless/b43/main.c
+++ b/drivers/net/wireless/b43/main.c
@@ -3320,6 +3320,22 @@ init_failure:
 	return err;
 }
 
+/* Write the short and long frame retry limit values. */
+static void b43_set_retry_limits(struct b43_wldev *dev,
+				 unsigned int short_retry,
+				 unsigned int long_retry)
+{
+	/* The retry limit is a 4-bit counter. Enforce this to avoid overflowing
+	 * the chip-internal counter. */
+	short_retry = min(short_retry, (unsigned int)0xF);
+	long_retry = min(long_retry, (unsigned int)0xF);
+
+	b43_shm_write16(dev, B43_SHM_SCRATCH, B43_SHM_SC_SRLIMIT,
+			short_retry);
+	b43_shm_write16(dev, B43_SHM_SCRATCH, B43_SHM_SC_LRLIMIT,
+			long_retry);
+}
+
 static int b43_op_config(struct ieee80211_hw *hw, u32 changed)
 {
 	struct b43_wl *wl = hw_to_b43_wl(hw);
@@ -3340,6 +3356,13 @@ static int b43_op_config(struct ieee80211_hw *hw, u32 changed)
 	dev = wl->current_dev;
 	phy = &dev->phy;
 
+	if (changed & IEEE80211_CONF_CHANGE_RETRY_LIMITS)
+		b43_set_retry_limits(dev, conf->short_frame_max_tx_count,
+					  conf->long_frame_max_tx_count);
+	changed &= ~IEEE80211_CONF_CHANGE_RETRY_LIMITS;
+	if (!changed)
+		goto out_unlock_mutex;
+
 	/* Disable IRQs while reconfiguring the device.
 	 * This makes it possible to drop the spinlock throughout
 	 * the reconfiguration process. */
@@ -3859,22 +3882,6 @@ static void b43_imcfglo_timeouts_workaround(struct b43_wldev *dev)
 #endif /* CONFIG_SSB_DRIVER_PCICORE */
 }
 
-/* Write the short and long frame retry limit values. */
-static void b43_set_retry_limits(struct b43_wldev *dev,
-				 unsigned int short_retry,
-				 unsigned int long_retry)
-{
-	/* The retry limit is a 4-bit counter. Enforce this to avoid overflowing
-	 * the chip-internal counter. */
-	short_retry = min(short_retry, (unsigned int)0xF);
-	long_retry = min(long_retry, (unsigned int)0xF);
-
-	b43_shm_write16(dev, B43_SHM_SCRATCH, B43_SHM_SC_SRLIMIT,
-			short_retry);
-	b43_shm_write16(dev, B43_SHM_SCRATCH, B43_SHM_SC_LRLIMIT,
-			long_retry);
-}
-
 static void b43_set_synth_pu_delay(struct b43_wldev *dev, bool idle)
 {
 	u16 pu_delay;
@@ -4195,26 +4202,6 @@ static void b43_op_stop(struct ieee80211_hw *hw)
 	cancel_work_sync(&(wl->txpower_adjust_work));
 }
 
-static int b43_op_set_retry_limit(struct ieee80211_hw *hw,
-				  u32 short_retry_limit, u32 long_retry_limit)
-{
-	struct b43_wl *wl = hw_to_b43_wl(hw);
-	struct b43_wldev *dev;
-	int err = 0;
-
-	mutex_lock(&wl->mutex);
-	dev = wl->current_dev;
-	if (unlikely(!dev || (b43_status(dev) < B43_STAT_INITIALIZED))) {
-		err = -ENODEV;
-		goto out_unlock;
-	}
-	b43_set_retry_limits(dev, short_retry_limit, long_retry_limit);
-out_unlock:
-	mutex_unlock(&wl->mutex);
-
-	return err;
-}
-
 static int b43_op_beacon_set_tim(struct ieee80211_hw *hw,
 				 struct ieee80211_sta *sta, bool set)
 {
@@ -4251,7 +4238,6 @@ static const struct ieee80211_ops b43_hw_ops = {
 	.get_tx_stats		= b43_op_get_tx_stats,
 	.start			= b43_op_start,
 	.stop			= b43_op_stop,
-	.set_retry_limit	= b43_op_set_retry_limit,
 	.set_tim		= b43_op_beacon_set_tim,
 	.sta_notify		= b43_op_sta_notify,
 };
diff --git a/drivers/net/wireless/b43legacy/main.c b/drivers/net/wireless/b43legacy/main.c
index 793cc396562f..78e46365f69e 100644
--- a/drivers/net/wireless/b43legacy/main.c
+++ b/drivers/net/wireless/b43legacy/main.c
@@ -2556,6 +2556,20 @@ init_failure:
 	return err;
 }
 
+/* Write the short and long frame retry limit values. */
+static void b43legacy_set_retry_limits(struct b43legacy_wldev *dev,
+				       unsigned int short_retry,
+				       unsigned int long_retry)
+{
+	/* The retry limit is a 4-bit counter. Enforce this to avoid overflowing
+	 * the chip-internal counter. */
+	short_retry = min(short_retry, (unsigned int)0xF);
+	long_retry = min(long_retry, (unsigned int)0xF);
+
+	b43legacy_shm_write16(dev, B43legacy_SHM_WIRELESS, 0x0006, short_retry);
+	b43legacy_shm_write16(dev, B43legacy_SHM_WIRELESS, 0x0007, long_retry);
+}
+
 static int b43legacy_op_dev_config(struct ieee80211_hw *hw,
 				   u32 changed)
 {
@@ -2577,6 +2591,14 @@ static int b43legacy_op_dev_config(struct ieee80211_hw *hw,
 	dev = wl->current_dev;
 	phy = &dev->phy;
 
+	if (changed & IEEE80211_CONF_CHANGE_RETRY_LIMITS)
+		b43legacy_set_retry_limits(dev,
+					   conf->short_frame_max_tx_count,
+					   conf->long_frame_max_tx_count);
+	changed &= ~IEEE80211_CONF_CHANGE_RETRY_LIMITS;
+	if (!changed)
+		goto out_unlock_mutex;
+
 	/* Switch the PHY mode (if necessary). */
 	switch (conf->channel->band) {
 	case IEEE80211_BAND_2GHZ:
@@ -2989,20 +3011,6 @@ static void b43legacy_imcfglo_timeouts_workaround(struct b43legacy_wldev *dev)
 #endif /* CONFIG_SSB_DRIVER_PCICORE */
 }
 
-/* Write the short and long frame retry limit values. */
-static void b43legacy_set_retry_limits(struct b43legacy_wldev *dev,
-				       unsigned int short_retry,
-				       unsigned int long_retry)
-{
-	/* The retry limit is a 4-bit counter. Enforce this to avoid overflowing
-	 * the chip-internal counter. */
-	short_retry = min(short_retry, (unsigned int)0xF);
-	long_retry = min(long_retry, (unsigned int)0xF);
-
-	b43legacy_shm_write16(dev, B43legacy_SHM_WIRELESS, 0x0006, short_retry);
-	b43legacy_shm_write16(dev, B43legacy_SHM_WIRELESS, 0x0007, long_retry);
-}
-
 static void b43legacy_set_synth_pu_delay(struct b43legacy_wldev *dev,
 					  bool idle) {
 	u16 pu_delay = 1050;
@@ -3367,28 +3375,6 @@ static void b43legacy_op_stop(struct ieee80211_hw *hw)
 	mutex_unlock(&wl->mutex);
 }
 
-static int b43legacy_op_set_retry_limit(struct ieee80211_hw *hw,
-					u32 short_retry_limit,
-					u32 long_retry_limit)
-{
-	struct b43legacy_wl *wl = hw_to_b43legacy_wl(hw);
-	struct b43legacy_wldev *dev;
-	int err = 0;
-
-	mutex_lock(&wl->mutex);
-	dev = wl->current_dev;
-	if (unlikely(!dev ||
-		     (b43legacy_status(dev) < B43legacy_STAT_INITIALIZED))) {
-		err = -ENODEV;
-		goto out_unlock;
-	}
-	b43legacy_set_retry_limits(dev, short_retry_limit, long_retry_limit);
-out_unlock:
-	mutex_unlock(&wl->mutex);
-
-	return err;
-}
-
 static int b43legacy_op_beacon_set_tim(struct ieee80211_hw *hw,
 				       struct ieee80211_sta *sta, bool set)
 {
@@ -3414,7 +3400,6 @@ static const struct ieee80211_ops b43legacy_hw_ops = {
 	.get_tx_stats		= b43legacy_op_get_tx_stats,
 	.start			= b43legacy_op_start,
 	.stop			= b43legacy_op_stop,
-	.set_retry_limit	= b43legacy_op_set_retry_limit,
 	.set_tim		= b43legacy_op_beacon_set_tim,
 };
 
diff --git a/drivers/net/wireless/rt2x00/rt2400pci.c b/drivers/net/wireless/rt2x00/rt2400pci.c
index 96eaf5f4dc68..1adca7a1b9dc 100644
--- a/drivers/net/wireless/rt2x00/rt2400pci.c
+++ b/drivers/net/wireless/rt2x00/rt2400pci.c
@@ -1574,7 +1574,6 @@ static const struct ieee80211_ops rt2400pci_mac80211_ops = {
 	.config_interface	= rt2x00mac_config_interface,
 	.configure_filter	= rt2x00mac_configure_filter,
 	.get_stats		= rt2x00mac_get_stats,
-	.set_retry_limit	= rt2400pci_set_retry_limit,
 	.bss_info_changed	= rt2x00mac_bss_info_changed,
 	.conf_tx		= rt2400pci_conf_tx,
 	.get_tx_stats		= rt2x00mac_get_tx_stats,
@@ -1603,6 +1602,7 @@ static const struct rt2x00lib_ops rt2400pci_rt2x00_ops = {
 	.config_intf		= rt2400pci_config_intf,
 	.config_erp		= rt2400pci_config_erp,
 	.config			= rt2400pci_config,
+	.set_retry_limit	= rt2400pci_set_retry_limit,
 };
 
 static const struct data_queue_desc rt2400pci_queue_rx = {
diff --git a/drivers/net/wireless/rt2x00/rt2500pci.c b/drivers/net/wireless/rt2x00/rt2500pci.c
index 8b772ab613e4..85b0387f46eb 100644
--- a/drivers/net/wireless/rt2x00/rt2500pci.c
+++ b/drivers/net/wireless/rt2x00/rt2500pci.c
@@ -1874,7 +1874,6 @@ static const struct ieee80211_ops rt2500pci_mac80211_ops = {
 	.config_interface	= rt2x00mac_config_interface,
 	.configure_filter	= rt2x00mac_configure_filter,
 	.get_stats		= rt2x00mac_get_stats,
-	.set_retry_limit	= rt2500pci_set_retry_limit,
 	.bss_info_changed	= rt2x00mac_bss_info_changed,
 	.conf_tx		= rt2x00mac_conf_tx,
 	.get_tx_stats		= rt2x00mac_get_tx_stats,
@@ -1903,6 +1902,7 @@ static const struct rt2x00lib_ops rt2500pci_rt2x00_ops = {
 	.config_intf		= rt2500pci_config_intf,
 	.config_erp		= rt2500pci_config_erp,
 	.config			= rt2500pci_config,
+	.set_retry_limit	= rt2500pci_set_retry_limit,
 };
 
 static const struct data_queue_desc rt2500pci_queue_rx = {
diff --git a/drivers/net/wireless/rt2x00/rt2x00.h b/drivers/net/wireless/rt2x00/rt2x00.h
index 8ec8f7e9ec64..0887e895d5c1 100644
--- a/drivers/net/wireless/rt2x00/rt2x00.h
+++ b/drivers/net/wireless/rt2x00/rt2x00.h
@@ -599,6 +599,9 @@ struct rt2x00lib_ops {
 #define CONFIG_UPDATE_SLOT_TIME 	( 1 << 5 )
 #define CONFIG_UPDATE_BEACON_INT	( 1 << 6 )
 #define CONFIG_UPDATE_ALL		0xffff
+
+	int (*set_retry_limit) (struct ieee80211_hw *hw,
+				u32 short_limit, u32 long_limit);
 };
 
 /*
diff --git a/drivers/net/wireless/rt2x00/rt2x00mac.c b/drivers/net/wireless/rt2x00/rt2x00mac.c
index da7b49a364ff..931183369f07 100644
--- a/drivers/net/wireless/rt2x00/rt2x00mac.c
+++ b/drivers/net/wireless/rt2x00/rt2x00mac.c
@@ -349,6 +349,15 @@ int rt2x00mac_config(struct ieee80211_hw *hw, u32 changed)
 	if (!test_bit(DEVICE_STATE_PRESENT, &rt2x00dev->flags))
 		return 0;
 
+	if (changed & IEEE80211_CONF_CHANGE_RETRY_LIMITS) {
+		rt2x00dev->ops->lib->set_retry_limit(hw,
+			conf->short_frame_max_tx_count,
+			conf->long_frame_max_tx_count);
+	}
+	changed &= ~IEEE80211_CONF_CHANGE_RETRY_LIMITS;
+	if (!changed)
+		return 0;
+
 	/*
 	 * Only change device state when the radio is enabled. It does not
 	 * matter what parameters we have configured when the radio is disabled
diff --git a/drivers/net/wireless/rt2x00/rt61pci.c b/drivers/net/wireless/rt2x00/rt61pci.c
index 45f69f83552c..08eb896ae36c 100644
--- a/drivers/net/wireless/rt2x00/rt61pci.c
+++ b/drivers/net/wireless/rt2x00/rt61pci.c
@@ -2724,7 +2724,6 @@ static const struct ieee80211_ops rt61pci_mac80211_ops = {
 	.configure_filter	= rt2x00mac_configure_filter,
 	.set_key		= rt2x00mac_set_key,
 	.get_stats		= rt2x00mac_get_stats,
-	.set_retry_limit	= rt61pci_set_retry_limit,
 	.bss_info_changed	= rt2x00mac_bss_info_changed,
 	.conf_tx		= rt61pci_conf_tx,
 	.get_tx_stats		= rt2x00mac_get_tx_stats,
@@ -2757,6 +2756,7 @@ static const struct rt2x00lib_ops rt61pci_rt2x00_ops = {
 	.config_intf		= rt61pci_config_intf,
 	.config_erp		= rt61pci_config_erp,
 	.config			= rt61pci_config,
+	.set_retry_limit	= rt61pci_set_retry_limit,
 };
 
 static const struct data_queue_desc rt61pci_queue_rx = {
diff --git a/drivers/net/wireless/rt2x00/rt73usb.c b/drivers/net/wireless/rt2x00/rt73usb.c
index 336fecb04c46..7f89782a34e4 100644
--- a/drivers/net/wireless/rt2x00/rt73usb.c
+++ b/drivers/net/wireless/rt2x00/rt73usb.c
@@ -2315,7 +2315,6 @@ static const struct ieee80211_ops rt73usb_mac80211_ops = {
 	.configure_filter	= rt2x00mac_configure_filter,
 	.set_key		= rt2x00mac_set_key,
 	.get_stats		= rt2x00mac_get_stats,
-	.set_retry_limit	= rt73usb_set_retry_limit,
 	.bss_info_changed	= rt2x00mac_bss_info_changed,
 	.conf_tx		= rt73usb_conf_tx,
 	.get_tx_stats		= rt2x00mac_get_tx_stats,
@@ -2347,6 +2346,7 @@ static const struct rt2x00lib_ops rt73usb_rt2x00_ops = {
 	.config_intf		= rt73usb_config_intf,
 	.config_erp		= rt73usb_config_erp,
 	.config			= rt73usb_config,
+	.set_retry_limit	= rt73usb_set_retry_limit,
 };
 
 static const struct data_queue_desc rt73usb_queue_rx = {
diff --git a/include/net/mac80211.h b/include/net/mac80211.h
index 34e8569b59bb..fd52300b96d0 100644
--- a/include/net/mac80211.h
+++ b/include/net/mac80211.h
@@ -473,6 +473,7 @@ static inline int __deprecated __IEEE80211_CONF_SHORT_SLOT_TIME(void)
  * @IEEE80211_CONF_CHANGE_PS: the PS flag changed
  * @IEEE80211_CONF_CHANGE_POWER: the TX power changed
  * @IEEE80211_CONF_CHANGE_CHANNEL: the channel changed
+ * @IEEE80211_CONF_CHANGE_RETRY_LIMITS: retry limits changed
  */
 enum ieee80211_conf_changed {
 	IEEE80211_CONF_CHANGE_RADIO_ENABLED	= BIT(0),
@@ -482,6 +483,7 @@ enum ieee80211_conf_changed {
 	IEEE80211_CONF_CHANGE_PS		= BIT(4),
 	IEEE80211_CONF_CHANGE_POWER		= BIT(5),
 	IEEE80211_CONF_CHANGE_CHANNEL		= BIT(6),
+	IEEE80211_CONF_CHANGE_RETRY_LIMITS	= BIT(7),
 };
 
 /**
@@ -497,6 +499,12 @@ enum ieee80211_conf_changed {
  * @ht_cap: describes current self configuration of 802.11n HT capabilities
  * @ht_bss_conf: describes current BSS configuration of 802.11n HT parameters
  * @channel: the channel to tune to
+ * @long_frame_max_tx_count: Maximum number of transmissions for a "long" frame
+ *    (a frame not RTS protected), called "dot11LongRetryLimit" in 802.11,
+ *    but actually means the number of transmissions not the number of retries
+ * @short_frame_max_tx_count: Maximum number of transmissions for a "short"
+ *    frame, called "dot11ShortRetryLimit" in 802.11, but actually means the
+ *    number of transmissions not the number of retries
  */
 struct ieee80211_conf {
 	int beacon_int;
@@ -506,6 +514,8 @@ struct ieee80211_conf {
 	u16 listen_interval;
 	bool radio_enabled;
 
+	u8 long_frame_max_tx_count, short_frame_max_tx_count;
+
 	struct ieee80211_channel *channel;
 
 	struct ieee80211_sta_ht_cap ht_cap;
@@ -1190,8 +1200,6 @@ enum ieee80211_ampdu_mlme_action {
  *	the device does fragmentation by itself; if this method is assigned then
  *	the stack will not do fragmentation.
  *
- * @set_retry_limit: Configuration of retry limits (if device needs it)
- *
  * @sta_notify: Notifies low level driver about addition or removal
  *	of assocaited station or AP.
  *
@@ -1261,8 +1269,6 @@ struct ieee80211_ops {
 			     u32 *iv32, u16 *iv16);
 	int (*set_rts_threshold)(struct ieee80211_hw *hw, u32 value);
 	int (*set_frag_threshold)(struct ieee80211_hw *hw, u32 value);
-	int (*set_retry_limit)(struct ieee80211_hw *hw,
-			       u32 short_retry, u32 long_retr);
 	void (*sta_notify)(struct ieee80211_hw *hw, struct ieee80211_vif *vif,
 			enum sta_notify_cmd, struct ieee80211_sta *sta);
 	int (*conf_tx)(struct ieee80211_hw *hw, u16 queue,
diff --git a/net/mac80211/debugfs.c b/net/mac80211/debugfs.c
index 767dbca50adb..2697a2fe608f 100644
--- a/net/mac80211/debugfs.c
+++ b/net/mac80211/debugfs.c
@@ -52,9 +52,9 @@ DEBUGFS_READONLY_FILE(rts_threshold, 20, "%d",
 DEBUGFS_READONLY_FILE(fragmentation_threshold, 20, "%d",
 		      local->fragmentation_threshold);
 DEBUGFS_READONLY_FILE(short_retry_limit, 20, "%d",
-		      local->short_retry_limit);
+		      local->hw.conf.short_frame_max_tx_count);
 DEBUGFS_READONLY_FILE(long_retry_limit, 20, "%d",
-		      local->long_retry_limit);
+		      local->hw.conf.long_frame_max_tx_count);
 DEBUGFS_READONLY_FILE(total_ps_buffered, 20, "%d",
 		      local->total_ps_buffered);
 DEBUGFS_READONLY_FILE(wep_iv, 20, "%#06x",
diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h
index 819844726dd4..f1ef522bfe07 100644
--- a/net/mac80211/ieee80211_i.h
+++ b/net/mac80211/ieee80211_i.h
@@ -635,8 +635,6 @@ struct ieee80211_local {
 
 	int rts_threshold;
 	int fragmentation_threshold;
-	int short_retry_limit; /* dot11ShortRetryLimit */
-	int long_retry_limit; /* dot11LongRetryLimit */
 
 	struct crypto_blkcipher *wep_tx_tfm;
 	struct crypto_blkcipher *wep_rx_tfm;
diff --git a/net/mac80211/main.c b/net/mac80211/main.c
index c936017f6d48..59be9e782699 100644
--- a/net/mac80211/main.c
+++ b/net/mac80211/main.c
@@ -673,8 +673,8 @@ struct ieee80211_hw *ieee80211_alloc_hw(size_t priv_data_len,
 
 	local->rts_threshold = IEEE80211_MAX_RTS_THRESHOLD;
 	local->fragmentation_threshold = IEEE80211_MAX_FRAG_THRESHOLD;
-	local->short_retry_limit = 7;
-	local->long_retry_limit = 4;
+	local->hw.conf.long_frame_max_tx_count = 4;
+	local->hw.conf.short_frame_max_tx_count = 7;
 	local->hw.conf.radio_enabled = true;
 
 	INIT_LIST_HEAD(&local->interfaces);
diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c
index 8bcabefa86e0..dd440a07634e 100644
--- a/net/mac80211/tx.c
+++ b/net/mac80211/tx.c
@@ -505,10 +505,10 @@ ieee80211_tx_h_misc(struct ieee80211_tx_data *tx)
 				info->flags |=
 					IEEE80211_TX_CTL_LONG_RETRY_LIMIT;
 				info->control.retry_limit =
-					tx->local->long_retry_limit;
+					tx->local->hw.conf.long_frame_max_tx_count - 1;
 			} else {
 				info->control.retry_limit =
-					tx->local->short_retry_limit;
+					tx->local->hw.conf.short_frame_max_tx_count - 1;
 			}
 		} else {
 			info->control.retry_limit = 1;
diff --git a/net/mac80211/wext.c b/net/mac80211/wext.c
index 94c4b35eeb14..f7e442f80a17 100644
--- a/net/mac80211/wext.c
+++ b/net/mac80211/wext.c
@@ -802,21 +802,16 @@ static int ieee80211_ioctl_siwretry(struct net_device *dev,
 	    (retry->flags & IW_RETRY_TYPE) != IW_RETRY_LIMIT)
 		return -EINVAL;
 
-	if (retry->flags & IW_RETRY_MAX)
-		local->long_retry_limit = retry->value;
-	else if (retry->flags & IW_RETRY_MIN)
-		local->short_retry_limit = retry->value;
-	else {
-		local->long_retry_limit = retry->value;
-		local->short_retry_limit = retry->value;
+	if (retry->flags & IW_RETRY_MAX) {
+		local->hw.conf.long_frame_max_tx_count = retry->value;
+	} else if (retry->flags & IW_RETRY_MIN) {
+		local->hw.conf.short_frame_max_tx_count = retry->value;
+	} else {
+		local->hw.conf.long_frame_max_tx_count = retry->value;
+		local->hw.conf.short_frame_max_tx_count = retry->value;
 	}
 
-	if (local->ops->set_retry_limit) {
-		return local->ops->set_retry_limit(
-			local_to_hw(local),
-			local->short_retry_limit,
-			local->long_retry_limit);
-	}
+	ieee80211_hw_config(local, IEEE80211_CONF_CHANGE_RETRY_LIMITS);
 
 	return 0;
 }
@@ -833,14 +828,15 @@ static int ieee80211_ioctl_giwretry(struct net_device *dev,
 		/* first return min value, iwconfig will ask max value
 		 * later if needed */
 		retry->flags |= IW_RETRY_LIMIT;
-		retry->value = local->short_retry_limit;
-		if (local->long_retry_limit != local->short_retry_limit)
+		retry->value = local->hw.conf.short_frame_max_tx_count;
+		if (local->hw.conf.long_frame_max_tx_count !=
+		    local->hw.conf.short_frame_max_tx_count)
 			retry->flags |= IW_RETRY_MIN;
 		return 0;
 	}
 	if (retry->flags & IW_RETRY_MAX) {
 		retry->flags = IW_RETRY_LIMIT | IW_RETRY_MAX;
-		retry->value = local->long_retry_limit;
+		retry->value = local->hw.conf.long_frame_max_tx_count;
 	}
 
 	return 0;
-- 
cgit v1.2.3


From bda3933a8aceedd03e0dd410844bd310033ca756 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes@sipsolutions.net>
Date: Sat, 11 Oct 2008 01:51:51 +0200
Subject: mac80211: move bss_conf into vif

Move bss_conf into the vif struct so that drivers can
access it during ->tx without having to store it in
the private data or similar. No driver updates because
this is only for when they want to start using it.

Signed-off-by: Johannes Berg <johannes@sipsolutions.net>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 include/net/mac80211.h        |  3 +++
 net/mac80211/cfg.c            |  6 +++---
 net/mac80211/debugfs_netdev.c |  2 +-
 net/mac80211/ieee80211_i.h    |  3 ---
 net/mac80211/iface.c          |  2 +-
 net/mac80211/main.c           |  8 ++++----
 net/mac80211/mlme.c           | 30 +++++++++++++++---------------
 net/mac80211/tx.c             | 16 ++++++++--------
 net/mac80211/util.c           |  6 +++---
 9 files changed, 38 insertions(+), 38 deletions(-)

(limited to 'include')

diff --git a/include/net/mac80211.h b/include/net/mac80211.h
index fd52300b96d0..94ff3eface49 100644
--- a/include/net/mac80211.h
+++ b/include/net/mac80211.h
@@ -529,11 +529,14 @@ struct ieee80211_conf {
  * use during the life of a virtual interface.
  *
  * @type: type of this virtual interface
+ * @bss_conf: BSS configuration for this interface, either our own
+ *	or the BSS we're associated to
  * @drv_priv: data area for driver use, will always be aligned to
  *	sizeof(void *).
  */
 struct ieee80211_vif {
 	enum nl80211_iftype type;
+	struct ieee80211_bss_conf bss_conf;
 	/* must be last */
 	u8 drv_priv[0] __attribute__((__aligned__(sizeof(void *))));
 };
diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c
index 8ea30902d5db..28382b5c7c25 100644
--- a/net/mac80211/cfg.c
+++ b/net/mac80211/cfg.c
@@ -964,16 +964,16 @@ static int ieee80211_change_bss(struct wiphy *wiphy,
 		return -EINVAL;
 
 	if (params->use_cts_prot >= 0) {
-		sdata->bss_conf.use_cts_prot = params->use_cts_prot;
+		sdata->vif.bss_conf.use_cts_prot = params->use_cts_prot;
 		changed |= BSS_CHANGED_ERP_CTS_PROT;
 	}
 	if (params->use_short_preamble >= 0) {
-		sdata->bss_conf.use_short_preamble =
+		sdata->vif.bss_conf.use_short_preamble =
 			params->use_short_preamble;
 		changed |= BSS_CHANGED_ERP_PREAMBLE;
 	}
 	if (params->use_short_slot_time >= 0) {
-		sdata->bss_conf.use_short_slot =
+		sdata->vif.bss_conf.use_short_slot =
 			params->use_short_slot_time;
 		changed |= BSS_CHANGED_ERP_SLOT;
 	}
diff --git a/net/mac80211/debugfs_netdev.c b/net/mac80211/debugfs_netdev.c
index 56f3f29385cb..c54219301724 100644
--- a/net/mac80211/debugfs_netdev.c
+++ b/net/mac80211/debugfs_netdev.c
@@ -119,7 +119,7 @@ static ssize_t ieee80211_if_fmt_flags(
 		 sdata->u.sta.flags & IEEE80211_STA_AUTHENTICATED ? "AUTH\n" : "",
 		 sdata->u.sta.flags & IEEE80211_STA_ASSOCIATED ? "ASSOC\n" : "",
 		 sdata->u.sta.flags & IEEE80211_STA_PROBEREQ_POLL ? "PROBEREQ POLL\n" : "",
-		 sdata->bss_conf.use_cts_prot ? "CTS prot\n" : "");
+		 sdata->vif.bss_conf.use_cts_prot ? "CTS prot\n" : "");
 }
 __IEEE80211_IF_FILE(flags);
 
diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h
index f1ef522bfe07..859b5b001f22 100644
--- a/net/mac80211/ieee80211_i.h
+++ b/net/mac80211/ieee80211_i.h
@@ -440,9 +440,6 @@ struct ieee80211_sub_if_data {
 
 	u16 sequence_number;
 
-	/* BSS configuration for this interface. */
-	struct ieee80211_bss_conf bss_conf;
-
 	/*
 	 * AP this belongs to: self in AP mode and
 	 * corresponding AP in VLAN mode, NULL for
diff --git a/net/mac80211/iface.c b/net/mac80211/iface.c
index df28c5f7c9c0..cde145221b61 100644
--- a/net/mac80211/iface.c
+++ b/net/mac80211/iface.c
@@ -695,7 +695,7 @@ int ieee80211_if_change_type(struct ieee80211_sub_if_data *sdata,
 	ieee80211_setup_sdata(sdata, type);
 
 	/* reset some values that shouldn't be kept across type changes */
-	sdata->bss_conf.basic_rates =
+	sdata->vif.bss_conf.basic_rates =
 		ieee80211_mandatory_rates(sdata->local,
 			sdata->local->hw.conf.channel->band);
 	sdata->drop_unencrypted = 0;
diff --git a/net/mac80211/main.c b/net/mac80211/main.c
index 59be9e782699..ab8a860444af 100644
--- a/net/mac80211/main.c
+++ b/net/mac80211/main.c
@@ -249,15 +249,15 @@ void ieee80211_bss_info_change_notify(struct ieee80211_sub_if_data *sdata,
 	if (local->ops->bss_info_changed)
 		local->ops->bss_info_changed(local_to_hw(local),
 					     &sdata->vif,
-					     &sdata->bss_conf,
+					     &sdata->vif.bss_conf,
 					     changed);
 }
 
 u32 ieee80211_reset_erp_info(struct ieee80211_sub_if_data *sdata)
 {
-	sdata->bss_conf.use_cts_prot = false;
-	sdata->bss_conf.use_short_preamble = false;
-	sdata->bss_conf.use_short_slot = false;
+	sdata->vif.bss_conf.use_cts_prot = false;
+	sdata->vif.bss_conf.use_short_preamble = false;
+	sdata->vif.bss_conf.use_short_slot = false;
 	return BSS_CHANGED_ERP_CTS_PROT |
 	       BSS_CHANGED_ERP_PREAMBLE |
 	       BSS_CHANGED_ERP_SLOT;
diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c
index 196dd39f6286..9c5f5c37a49e 100644
--- a/net/mac80211/mlme.c
+++ b/net/mac80211/mlme.c
@@ -572,7 +572,7 @@ static void ieee80211_sta_wmm_params(struct ieee80211_local *local,
 static u32 ieee80211_handle_bss_capability(struct ieee80211_sub_if_data *sdata,
 					   u16 capab, bool erp_valid, u8 erp)
 {
-	struct ieee80211_bss_conf *bss_conf = &sdata->bss_conf;
+	struct ieee80211_bss_conf *bss_conf = &sdata->vif.bss_conf;
 #ifdef CONFIG_MAC80211_VERBOSE_DEBUG
 	struct ieee80211_if_sta *ifsta = &sdata->u.sta;
 #endif
@@ -718,9 +718,9 @@ static void ieee80211_set_associated(struct ieee80211_sub_if_data *sdata,
 				   ifsta->ssid, ifsta->ssid_len);
 	if (bss) {
 		/* set timing information */
-		sdata->bss_conf.beacon_int = bss->beacon_int;
-		sdata->bss_conf.timestamp = bss->timestamp;
-		sdata->bss_conf.dtim_period = bss->dtim_period;
+		sdata->vif.bss_conf.beacon_int = bss->beacon_int;
+		sdata->vif.bss_conf.timestamp = bss->timestamp;
+		sdata->vif.bss_conf.dtim_period = bss->dtim_period;
 
 		changed |= ieee80211_handle_bss_capability(sdata,
 			bss->capability, bss->has_erp_value, bss->erp_value);
@@ -730,9 +730,9 @@ static void ieee80211_set_associated(struct ieee80211_sub_if_data *sdata,
 
 	if (conf->flags & IEEE80211_CONF_SUPPORT_HT_MODE) {
 		changed |= BSS_CHANGED_HT;
-		sdata->bss_conf.assoc_ht = 1;
-		sdata->bss_conf.ht_cap = &conf->ht_cap;
-		sdata->bss_conf.ht_bss_conf = &conf->ht_bss_conf;
+		sdata->vif.bss_conf.assoc_ht = 1;
+		sdata->vif.bss_conf.ht_cap = &conf->ht_cap;
+		sdata->vif.bss_conf.ht_bss_conf = &conf->ht_bss_conf;
 	}
 
 	ifsta->flags |= IEEE80211_STA_PREV_BSSID_SET;
@@ -742,7 +742,7 @@ static void ieee80211_set_associated(struct ieee80211_sub_if_data *sdata,
 	ifsta->last_probe = jiffies;
 	ieee80211_led_assoc(local, 1);
 
-	sdata->bss_conf.assoc = 1;
+	sdata->vif.bss_conf.assoc = 1;
 	/*
 	 * For now just always ask the driver to update the basic rateset
 	 * when we have associated, we aren't checking whether it actually
@@ -847,15 +847,15 @@ static void ieee80211_set_disassoc(struct ieee80211_sub_if_data *sdata,
 	ifsta->flags &= ~IEEE80211_STA_ASSOCIATED;
 	changed |= ieee80211_reset_erp_info(sdata);
 
-	if (sdata->bss_conf.assoc_ht)
+	if (sdata->vif.bss_conf.assoc_ht)
 		changed |= BSS_CHANGED_HT;
 
-	sdata->bss_conf.assoc_ht = 0;
-	sdata->bss_conf.ht_cap = NULL;
-	sdata->bss_conf.ht_bss_conf = NULL;
+	sdata->vif.bss_conf.assoc_ht = 0;
+	sdata->vif.bss_conf.ht_cap = NULL;
+	sdata->vif.bss_conf.ht_bss_conf = NULL;
 
 	ieee80211_led_assoc(local, 0);
-	sdata->bss_conf.assoc = 0;
+	sdata->vif.bss_conf.assoc = 0;
 
 	ieee80211_sta_send_apinfo(sdata, ifsta);
 
@@ -1182,7 +1182,7 @@ static void ieee80211_rx_mgmt_assoc_resp(struct ieee80211_sub_if_data *sdata,
 	u64 rates, basic_rates;
 	u16 capab_info, status_code, aid;
 	struct ieee802_11_elems elems;
-	struct ieee80211_bss_conf *bss_conf = &sdata->bss_conf;
+	struct ieee80211_bss_conf *bss_conf = &sdata->vif.bss_conf;
 	u8 *pos;
 	int i, j;
 	bool have_higher_than_11mbit = false;
@@ -1324,7 +1324,7 @@ static void ieee80211_rx_mgmt_assoc_resp(struct ieee80211_sub_if_data *sdata,
 	}
 
 	sta->sta.supp_rates[local->hw.conf.channel->band] = rates;
-	sdata->bss_conf.basic_rates = basic_rates;
+	sdata->vif.bss_conf.basic_rates = basic_rates;
 
 	/* cf. IEEE 802.11 9.2.12 */
 	if (local->hw.conf.channel->band == IEEE80211_BAND_2GHZ &&
diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c
index dd440a07634e..6f3e4be97631 100644
--- a/net/mac80211/tx.c
+++ b/net/mac80211/tx.c
@@ -116,7 +116,7 @@ static __le16 ieee80211_duration(struct ieee80211_tx_data *tx, int group_addr,
 		if (r->bitrate > txrate->bitrate)
 			break;
 
-		if (tx->sdata->bss_conf.basic_rates & BIT(i))
+		if (tx->sdata->vif.bss_conf.basic_rates & BIT(i))
 			rate = r->bitrate;
 
 		switch (sband->band) {
@@ -150,7 +150,7 @@ static __le16 ieee80211_duration(struct ieee80211_tx_data *tx, int group_addr,
 	 * to closest integer */
 
 	dur = ieee80211_frame_duration(local, 10, rate, erp,
-				tx->sdata->bss_conf.use_short_preamble);
+				tx->sdata->vif.bss_conf.use_short_preamble);
 
 	if (next_frag_len) {
 		/* Frame is fragmented: duration increases with time needed to
@@ -159,7 +159,7 @@ static __le16 ieee80211_duration(struct ieee80211_tx_data *tx, int group_addr,
 		/* next fragment */
 		dur += ieee80211_frame_duration(local, next_frag_len,
 				txrate->bitrate, erp,
-				tx->sdata->bss_conf.use_short_preamble);
+				tx->sdata->vif.bss_conf.use_short_preamble);
 	}
 
 	return cpu_to_le16(dur);
@@ -463,7 +463,7 @@ ieee80211_tx_h_rate_ctrl(struct ieee80211_tx_data *tx)
 	} else
 		info->control.retries[0].rate_idx = -1;
 
-	if (tx->sdata->bss_conf.use_cts_prot &&
+	if (tx->sdata->vif.bss_conf.use_cts_prot &&
 	    (tx->flags & IEEE80211_TX_FRAGMENTED) && (rsel.nonerp_idx >= 0)) {
 		tx->last_frag_rate_idx = tx->rate_idx;
 		if (rsel.probe_idx >= 0)
@@ -529,7 +529,7 @@ ieee80211_tx_h_misc(struct ieee80211_tx_data *tx)
 	if ((tx->sdata->flags & IEEE80211_SDATA_OPERATING_GMODE) &&
 	    (sband->bitrates[tx->rate_idx].flags & IEEE80211_RATE_ERP_G) &&
 	    (tx->flags & IEEE80211_TX_UNICAST) &&
-	    tx->sdata->bss_conf.use_cts_prot &&
+	    tx->sdata->vif.bss_conf.use_cts_prot &&
 	    !(info->flags & IEEE80211_TX_CTL_USE_RTS_CTS))
 		info->flags |= IEEE80211_TX_CTL_USE_CTS_PROTECT;
 
@@ -538,7 +538,7 @@ ieee80211_tx_h_misc(struct ieee80211_tx_data *tx)
 	 * available on the network at the current point in time. */
 	if (ieee80211_is_data(hdr->frame_control) &&
 	    (sband->bitrates[tx->rate_idx].flags & IEEE80211_RATE_SHORT_PREAMBLE) &&
-	    tx->sdata->bss_conf.use_short_preamble &&
+	    tx->sdata->vif.bss_conf.use_short_preamble &&
 	    (!tx->sta || test_sta_flags(tx->sta, WLAN_STA_SHORT_PREAMBLE))) {
 		info->flags |= IEEE80211_TX_CTL_SHORT_PREAMBLE;
 	}
@@ -558,7 +558,7 @@ ieee80211_tx_h_misc(struct ieee80211_tx_data *tx)
 		for (idx = 0; idx < sband->n_bitrates; idx++) {
 			if (sband->bitrates[idx].bitrate > rate->bitrate)
 				continue;
-			if (tx->sdata->bss_conf.basic_rates & BIT(idx) &&
+			if (tx->sdata->vif.bss_conf.basic_rates & BIT(idx) &&
 			    (baserate < 0 ||
 			     (sband->bitrates[baserate].bitrate
 			      < sband->bitrates[idx].bitrate)))
@@ -1977,7 +1977,7 @@ struct sk_buff *ieee80211_beacon_get(struct ieee80211_hw *hw,
 	info->flags |= IEEE80211_TX_CTL_NO_ACK;
 	info->flags |= IEEE80211_TX_CTL_CLEAR_PS_FILT;
 	info->flags |= IEEE80211_TX_CTL_ASSIGN_SEQ;
-	if (sdata->bss_conf.use_short_preamble &&
+	if (sdata->vif.bss_conf.use_short_preamble &&
 	    sband->bitrates[rsel.rate_idx].flags & IEEE80211_RATE_SHORT_PREAMBLE)
 		info->flags |= IEEE80211_TX_CTL_SHORT_PREAMBLE;
 
diff --git a/net/mac80211/util.c b/net/mac80211/util.c
index 3288c3de67ca..ec8b6335f0c1 100644
--- a/net/mac80211/util.c
+++ b/net/mac80211/util.c
@@ -239,7 +239,7 @@ __le16 ieee80211_generic_frame_duration(struct ieee80211_hw *hw,
 	erp = 0;
 	if (vif) {
 		sdata = vif_to_sdata(vif);
-		short_preamble = sdata->bss_conf.use_short_preamble;
+		short_preamble = sdata->vif.bss_conf.use_short_preamble;
 		if (sdata->flags & IEEE80211_SDATA_OPERATING_GMODE)
 			erp = rate->flags & IEEE80211_RATE_ERP_G;
 	}
@@ -272,7 +272,7 @@ __le16 ieee80211_rts_duration(struct ieee80211_hw *hw,
 	erp = 0;
 	if (vif) {
 		sdata = vif_to_sdata(vif);
-		short_preamble = sdata->bss_conf.use_short_preamble;
+		short_preamble = sdata->vif.bss_conf.use_short_preamble;
 		if (sdata->flags & IEEE80211_SDATA_OPERATING_GMODE)
 			erp = rate->flags & IEEE80211_RATE_ERP_G;
 	}
@@ -312,7 +312,7 @@ __le16 ieee80211_ctstoself_duration(struct ieee80211_hw *hw,
 	erp = 0;
 	if (vif) {
 		sdata = vif_to_sdata(vif);
-		short_preamble = sdata->bss_conf.use_short_preamble;
+		short_preamble = sdata->vif.bss_conf.use_short_preamble;
 		if (sdata->flags & IEEE80211_SDATA_OPERATING_GMODE)
 			erp = rate->flags & IEEE80211_RATE_ERP_G;
 	}
-- 
cgit v1.2.3


From ae5eb02641233a4e9d1b92d22090f1b1afa14466 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes@sipsolutions.net>
Date: Tue, 14 Oct 2008 16:58:37 +0200
Subject: mac80211: rewrite HT handling

The HT handling has the following deficiencies, which I've
(partially) fixed:
 * it always uses the AP info even if there is no AP,
   hence has no chance of working as an AP
 * it pretends to be HW config, but really is per-BSS
 * channel sanity checking is left to the drivers
 * it generally lets the driver control too much

HT enabling is still wrong with this patch if you have more than
one virtual STA mode interface, but that never happens currently.
Once WDS, IBSS or AP/VLAN gets HT capabilities, it will also be
wrong, see the comment in ieee80211_enable_ht().

Additionally, this fixes a number of bugs:
 * mac80211: ieee80211_set_disassoc doesn't notify the driver any
             more since the refactoring
 * iwl-agn-rs: always uses the HT capabilities from the wrong stuff
               mac80211 gives it rather than the actual peer STA
 * ath9k: a number of bugs resulting from the broken HT API

I'm not entirely happy with putting the HT capabilities into
struct ieee80211_sta as restricted to our own HT TX capabilities,
but I see no cleaner solution for now.

Signed-off-by: Johannes Berg <johannes@sipsolutions.net>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 drivers/net/wireless/ath9k/core.h         |   6 +-
 drivers/net/wireless/ath9k/main.c         |  41 ++++---
 drivers/net/wireless/ath9k/rc.c           |   8 +-
 drivers/net/wireless/ath9k/recv.c         |   3 +-
 drivers/net/wireless/ath9k/xmit.c         |  17 +--
 drivers/net/wireless/iwlwifi/iwl-agn-rs.c |  23 ++--
 drivers/net/wireless/iwlwifi/iwl-agn.c    |  39 ++++---
 drivers/net/wireless/iwlwifi/iwl-core.c   |  17 +--
 drivers/net/wireless/iwlwifi/iwl-dev.h    |   1 -
 drivers/net/wireless/iwlwifi/iwl-sta.c    |  33 ++++--
 include/net/mac80211.h                    |  59 +++++-----
 net/mac80211/cfg.c                        |   6 +-
 net/mac80211/ht.c                         | 181 +++++++++++++-----------------
 net/mac80211/ieee80211_i.h                |  12 +-
 net/mac80211/mlme.c                       |  88 ++++++++-------
 15 files changed, 260 insertions(+), 274 deletions(-)

(limited to 'include')

diff --git a/drivers/net/wireless/ath9k/core.h b/drivers/net/wireless/ath9k/core.h
index cb3e61e57c4d..59d835b72cd8 100644
--- a/drivers/net/wireless/ath9k/core.h
+++ b/drivers/net/wireless/ath9k/core.h
@@ -380,7 +380,6 @@ void ath_rx_cleanup(struct ath_softc *sc);
 int ath_rx_tasklet(struct ath_softc *sc, int flush);
 int ath_rx_input(struct ath_softc *sc,
 		 struct ath_node *node,
-		 int is_ampdu,
 		 struct sk_buff *skb,
 		 struct ath_recv_status *rx_status,
 		 enum ATH_RX_TYPE *status);
@@ -650,6 +649,9 @@ struct ath_node {
 	u8 an_smmode; /* SM Power save mode */
 	u8 an_flags;
 	u8 an_addr[ETH_ALEN];
+
+	u16 maxampdu;
+	u8 mpdudensity;
 };
 
 void ath_tx_resume_tid(struct ath_softc *sc,
@@ -919,8 +921,6 @@ enum RATE_TYPE {
 
 struct ath_ht_info {
 	enum ath9k_ht_macmode tx_chan_width;
-	u16 maxampdu;
-	u8 mpdudensity;
 	u8 ext_chan_offset;
 };
 
diff --git a/drivers/net/wireless/ath9k/main.c b/drivers/net/wireless/ath9k/main.c
index 41cd114c438c..7555c3413384 100644
--- a/drivers/net/wireless/ath9k/main.c
+++ b/drivers/net/wireless/ath9k/main.c
@@ -330,25 +330,15 @@ static void ath9k_ht_conf(struct ath_softc *sc,
 {
 	struct ath_ht_info *ht_info = &sc->sc_ht_info;
 
-	if (bss_conf->assoc_ht) {
-		ht_info->ext_chan_offset =
-			bss_conf->ht_bss_conf->bss_cap &
-				IEEE80211_HT_PARAM_CHA_SEC_OFFSET;
-
-		if (!(bss_conf->ht_cap->cap &
-			IEEE80211_HT_CAP_40MHZ_INTOLERANT) &&
-			    (bss_conf->ht_bss_conf->bss_cap &
-				IEEE80211_HT_PARAM_CHAN_WIDTH_ANY))
+	if (sc->hw->conf.ht.enabled) {
+		ht_info->ext_chan_offset = bss_conf->ht.secondary_channel_offset;
+
+		if (bss_conf->ht.width_40_ok)
 			ht_info->tx_chan_width = ATH9K_HT_MACMODE_2040;
 		else
 			ht_info->tx_chan_width = ATH9K_HT_MACMODE_20;
 
 		ath9k_hw_set11nmac2040(sc->sc_ah, ht_info->tx_chan_width);
-		ht_info->maxampdu = 1 << (IEEE80211_HTCAP_MAXRXAMPDU_FACTOR +
-					bss_conf->ht_cap->ampdu_factor);
-		ht_info->mpdudensity =
-			parse_mpdudensity(bss_conf->ht_cap->ampdu_density);
-
 	}
 }
 
@@ -390,7 +380,7 @@ static void ath9k_bss_assoc_info(struct ath_softc *sc,
 		sc->sc_halstats.ns_avgtxrate = ATH_RATE_DUMMY_MARKER;
 
 		/* Update chainmask */
-		ath_update_chainmask(sc, bss_conf->assoc_ht);
+		ath_update_chainmask(sc, hw->conf.ht.enabled);
 
 		DPRINTF(sc, ATH_DBG_CONFIG,
 			"%s: bssid %pM aid 0x%x\n",
@@ -408,7 +398,7 @@ static void ath9k_bss_assoc_info(struct ath_softc *sc,
 			return;
 		}
 
-		if (hw->conf.ht_cap.ht_supported)
+		if (hw->conf.ht.enabled)
 			sc->sc_ah->ah_channels[pos].chanmode =
 				ath_get_extchanmode(sc, curchan);
 		else
@@ -531,7 +521,6 @@ int _ath_rx_indicate(struct ath_softc *sc,
 
 	if (an) {
 		ath_rx_input(sc, an,
-			     hw->conf.ht_cap.ht_supported,
 			     skb, status, &st);
 	}
 	if (!an || (st != ATH_RX_CONSUMED))
@@ -1241,6 +1230,9 @@ static int ath9k_config(struct ieee80211_hw *hw, u32 changed)
 		__func__,
 		curchan->center_freq);
 
+	/* Update chainmask */
+	ath_update_chainmask(sc, conf->ht.enabled);
+
 	pos = ath_get_channel(sc, curchan);
 	if (pos == -1) {
 		DPRINTF(sc, ATH_DBG_FATAL, "%s: Invalid channel\n", __func__);
@@ -1251,7 +1243,7 @@ static int ath9k_config(struct ieee80211_hw *hw, u32 changed)
 		(curchan->band == IEEE80211_BAND_2GHZ) ?
 		CHANNEL_G : CHANNEL_A;
 
-	if (sc->sc_curaid && hw->conf.ht_cap.ht_supported)
+	if (sc->sc_curaid && hw->conf.ht.enabled)
 		sc->sc_ah->ah_channels[pos].chanmode =
 			ath_get_extchanmode(sc, curchan);
 
@@ -1434,6 +1426,14 @@ static void ath9k_sta_notify(struct ieee80211_hw *hw,
 		} else {
 			ath_node_get(sc, sta->addr);
 		}
+
+		/* XXX: Is this right? Can the capabilities change? */
+		an = ath_node_find(sc, sta->addr);
+		an->maxampdu = 1 << (IEEE80211_HTCAP_MAXRXAMPDU_FACTOR +
+					sta->ht_cap.ampdu_factor);
+		an->mpdudensity =
+			parse_mpdudensity(sta->ht_cap.ampdu_density);
+
 		spin_unlock_irqrestore(&sc->node_lock, flags);
 		break;
 	case STA_NOTIFY_REMOVE:
@@ -1552,9 +1552,8 @@ static void ath9k_bss_info_changed(struct ieee80211_hw *hw,
 	}
 
 	if (changed & BSS_CHANGED_HT) {
-		DPRINTF(sc, ATH_DBG_CONFIG, "%s: BSS Changed HT %d\n",
-			__func__,
-			bss_conf->assoc_ht);
+		DPRINTF(sc, ATH_DBG_CONFIG, "%s: BSS Changed HT\n",
+			__func__);
 		ath9k_ht_conf(sc, bss_conf);
 	}
 
diff --git a/drivers/net/wireless/ath9k/rc.c b/drivers/net/wireless/ath9k/rc.c
index ee2dbce42b4d..9b2526030965 100644
--- a/drivers/net/wireless/ath9k/rc.c
+++ b/drivers/net/wireless/ath9k/rc.c
@@ -1838,7 +1838,7 @@ void ath_rc_node_update(struct ieee80211_hw *hw, struct ath_rate_node *rc_priv)
 	struct ath_softc *sc = hw->priv;
 	u32 capflag = 0;
 
-	if (hw->conf.ht_cap.ht_supported) {
+	if (hw->conf.ht.enabled) {
 		capflag |= ATH_RC_HT_FLAG | ATH_RC_DS_FLAG;
 		if (sc->sc_ht_info.tx_chan_width == ATH9K_HT_MACMODE_2040)
 			capflag |= ATH_RC_CW40_FLAG;
@@ -1979,7 +1979,7 @@ static void ath_get_rate(void *priv, struct ieee80211_supported_band *sband,
 
 	/* Check if aggregation has to be enabled for this tid */
 
-	if (hw->conf.ht_cap.ht_supported) {
+	if (hw->conf.ht.enabled) {
 		if (ieee80211_is_data_qos(fc)) {
 			qc = ieee80211_get_qos_ctl(hdr);
 			tid = qc[0] & 0xf;
@@ -2026,9 +2026,9 @@ static void ath_rate_init(void *priv, struct ieee80211_supported_band *sband,
 	DPRINTF(sc, ATH_DBG_RATE, "%s\n", __func__);
 
 	ath_setup_rates(sc, sband, sta, ath_rc_priv);
-	if (sc->hw->conf.flags & IEEE80211_CONF_SUPPORT_HT_MODE) {
+	if (sc->hw->conf.ht.enabled) {
 		for (i = 0; i < 77; i++) {
-			if (sc->hw->conf.ht_cap.mcs.rx_mask[i/8] & (1<<(i%8)))
+			if (sta->ht_cap.mcs.rx_mask[i/8] & (1<<(i%8)))
 				ath_rc_priv->neg_ht_rates.rs_rates[j++] = i;
 			if (j == ATH_RATE_MAX)
 				break;
diff --git a/drivers/net/wireless/ath9k/recv.c b/drivers/net/wireless/ath9k/recv.c
index 010fcdfec3f6..828322840a86 100644
--- a/drivers/net/wireless/ath9k/recv.c
+++ b/drivers/net/wireless/ath9k/recv.c
@@ -720,12 +720,11 @@ void ath_flushrecv(struct ath_softc *sc)
 
 int ath_rx_input(struct ath_softc *sc,
 		 struct ath_node *an,
-		 int is_ampdu,
 		 struct sk_buff *skb,
 		 struct ath_recv_status *rx_status,
 		 enum ATH_RX_TYPE *status)
 {
-	if (is_ampdu && (sc->sc_flags & SC_OP_RXAGGR)) {
+	if (sc->sc_flags & SC_OP_RXAGGR) {
 		*status = ATH_RX_CONSUMED;
 		return ath_ampdu_input(sc, an, skb, rx_status);
 	} else {
diff --git a/drivers/net/wireless/ath9k/xmit.c b/drivers/net/wireless/ath9k/xmit.c
index 3770fbe84fce..ba818cc2fb5c 100644
--- a/drivers/net/wireless/ath9k/xmit.c
+++ b/drivers/net/wireless/ath9k/xmit.c
@@ -300,7 +300,8 @@ static int ath_tx_prepare(struct ath_softc *sc,
 	if (ieee80211_is_data(fc) && !txctl->use_minrate) {
 
 		/* Enable HT only for DATA frames and not for EAPOL */
-		txctl->ht = (hw->conf.ht_cap.ht_supported &&
+		/* XXX why AMPDU only?? */
+		txctl->ht = (hw->conf.ht.enabled &&
 			    (tx_info->flags & IEEE80211_TX_CTL_AMPDU));
 
 		if (is_multicast_ether_addr(hdr->addr1)) {
@@ -1450,7 +1451,8 @@ static int ath_tx_send_ampdu(struct ath_softc *sc,
  */
 
 static u32 ath_lookup_rate(struct ath_softc *sc,
-				 struct ath_buf *bf)
+			   struct ath_buf *bf,
+			   struct ath_atx_tid *tid)
 {
 	const struct ath9k_rate_table *rt = sc->sc_currates;
 	struct sk_buff *skb;
@@ -1504,7 +1506,7 @@ static u32 ath_lookup_rate(struct ath_softc *sc,
 	 * The IE, however can hold upto 65536, which shows up here
 	 * as zero. Ignore 65536 since we  are constrained by hw.
 	 */
-	maxampdu = sc->sc_ht_info.maxampdu;
+	maxampdu = tid->an->maxampdu;
 	if (maxampdu)
 		aggr_limit = min(aggr_limit, maxampdu);
 
@@ -1518,6 +1520,7 @@ static u32 ath_lookup_rate(struct ath_softc *sc,
  */
 
 static int ath_compute_num_delims(struct ath_softc *sc,
+				  struct ath_atx_tid *tid,
 				  struct ath_buf *bf,
 				  u16 frmlen)
 {
@@ -1545,7 +1548,7 @@ static int ath_compute_num_delims(struct ath_softc *sc,
 	 * required minimum length for subframe. Take into account
 	 * whether high rate is 20 or 40Mhz and half or full GI.
 	 */
-	mpdudensity = sc->sc_ht_info.mpdudensity;
+	mpdudensity = tid->an->mpdudensity;
 
 	/*
 	 * If there is no mpdu density restriction, no further calculation
@@ -1619,7 +1622,7 @@ static enum ATH_AGGR_STATUS ath_tx_form_aggr(struct ath_softc *sc,
 		}
 
 		if (!rl) {
-			aggr_limit = ath_lookup_rate(sc, bf);
+			aggr_limit = ath_lookup_rate(sc, bf, tid);
 			rl = 1;
 			/*
 			 * Is rate dual stream
@@ -1657,7 +1660,7 @@ static enum ATH_AGGR_STATUS ath_tx_form_aggr(struct ath_softc *sc,
 		 * Get the delimiters needed to meet the MPDU
 		 * density for this node.
 		 */
-		ndelim = ath_compute_num_delims(sc, bf_first, bf->bf_frmlen);
+		ndelim = ath_compute_num_delims(sc, tid, bf_first, bf->bf_frmlen);
 
 		bpad = PADBYTES(al_delta) + (ndelim << 2);
 
@@ -2629,7 +2632,7 @@ void ath_tx_node_init(struct ath_softc *sc, struct ath_node *an)
 		struct ath_atx_ac *ac;
 		int tidno, acno;
 
-		sc->sc_ht_info.maxampdu = ATH_AMPDU_LIMIT_DEFAULT;
+		an->maxampdu = ATH_AMPDU_LIMIT_DEFAULT;
 
 		/*
 		 * Init per tid tx state
diff --git a/drivers/net/wireless/iwlwifi/iwl-agn-rs.c b/drivers/net/wireless/iwlwifi/iwl-agn-rs.c
index cd1bff590491..e10e0ca09ce9 100644
--- a/drivers/net/wireless/iwlwifi/iwl-agn-rs.c
+++ b/drivers/net/wireless/iwlwifi/iwl-agn-rs.c
@@ -1133,8 +1133,7 @@ static int rs_switch_to_mimo2(struct iwl_priv *priv,
 	s32 rate;
 	s8 is_green = lq_sta->is_green;
 
-	if (!(conf->flags & IEEE80211_CONF_SUPPORT_HT_MODE) ||
-	    !sta->ht_cap.ht_supported)
+	if (!conf->ht.enabled || !sta->ht_cap.ht_supported)
 		return -1;
 
 	if (((sta->ht_cap.cap & IEEE80211_HT_CAP_SM_PS) >> 2)
@@ -1201,8 +1200,7 @@ static int rs_switch_to_siso(struct iwl_priv *priv,
 	u8 is_green = lq_sta->is_green;
 	s32 rate;
 
-	if (!(conf->flags & IEEE80211_CONF_SUPPORT_HT_MODE) ||
-	    !sta->ht_cap.ht_supported)
+	if (!conf->ht.enabled || !sta->ht_cap.ht_supported)
 		return -1;
 
 	IWL_DEBUG_RATE("LQ: try to switch to SISO\n");
@@ -2001,9 +1999,8 @@ lq_update:
 		 * stay with best antenna legacy modulation for a while
 		 * before next round of mode comparisons. */
 		tbl1 = &(lq_sta->lq_info[lq_sta->active_tbl]);
-		if (is_legacy(tbl1->lq_type) &&
-		   (!(conf->flags & IEEE80211_CONF_SUPPORT_HT_MODE)) &&
-		    (lq_sta->action_counter >= 1)) {
+		if (is_legacy(tbl1->lq_type) && !conf->ht.enabled &&
+		    lq_sta->action_counter >= 1) {
 			lq_sta->action_counter = 0;
 			IWL_DEBUG_RATE("LQ: STAY in legacy table\n");
 			rs_set_stay_in_table(priv, 1, lq_sta);
@@ -2238,19 +2235,19 @@ static void rs_rate_init(void *priv_r, struct ieee80211_supported_band *sband,
 	 * active_siso_rate mask includes 9 MBits (bit 5), and CCK (bits 0-3),
 	 * supp_rates[] does not; shift to convert format, force 9 MBits off.
 	 */
-	lq_sta->active_siso_rate = conf->ht_cap.mcs.rx_mask[0] << 1;
-	lq_sta->active_siso_rate |= conf->ht_cap.mcs.rx_mask[0] & 0x1;
+	lq_sta->active_siso_rate = sta->ht_cap.mcs.rx_mask[0] << 1;
+	lq_sta->active_siso_rate |= sta->ht_cap.mcs.rx_mask[0] & 0x1;
 	lq_sta->active_siso_rate &= ~((u16)0x2);
 	lq_sta->active_siso_rate <<= IWL_FIRST_OFDM_RATE;
 
 	/* Same here */
-	lq_sta->active_mimo2_rate = conf->ht_cap.mcs.rx_mask[1] << 1;
-	lq_sta->active_mimo2_rate |= conf->ht_cap.mcs.rx_mask[1] & 0x1;
+	lq_sta->active_mimo2_rate = sta->ht_cap.mcs.rx_mask[1] << 1;
+	lq_sta->active_mimo2_rate |= sta->ht_cap.mcs.rx_mask[1] & 0x1;
 	lq_sta->active_mimo2_rate &= ~((u16)0x2);
 	lq_sta->active_mimo2_rate <<= IWL_FIRST_OFDM_RATE;
 
-	lq_sta->active_mimo3_rate = conf->ht_cap.mcs.rx_mask[2] << 1;
-	lq_sta->active_mimo3_rate |= conf->ht_cap.mcs.rx_mask[2] & 0x1;
+	lq_sta->active_mimo3_rate = sta->ht_cap.mcs.rx_mask[2] << 1;
+	lq_sta->active_mimo3_rate |= sta->ht_cap.mcs.rx_mask[2] & 0x1;
 	lq_sta->active_mimo3_rate &= ~((u16)0x2);
 	lq_sta->active_mimo3_rate <<= IWL_FIRST_OFDM_RATE;
 
diff --git a/drivers/net/wireless/iwlwifi/iwl-agn.c b/drivers/net/wireless/iwlwifi/iwl-agn.c
index 79a24410dd0a..7c3eb3d8f7e7 100644
--- a/drivers/net/wireless/iwlwifi/iwl-agn.c
+++ b/drivers/net/wireless/iwlwifi/iwl-agn.c
@@ -552,17 +552,30 @@ static int iwl4965_send_beacon_cmd(struct iwl_priv *priv)
 static void iwl4965_ht_conf(struct iwl_priv *priv,
 			    struct ieee80211_bss_conf *bss_conf)
 {
-	struct ieee80211_sta_ht_cap *ht_conf = bss_conf->ht_cap;
-	struct ieee80211_ht_bss_info *ht_bss_conf = bss_conf->ht_bss_conf;
+	struct ieee80211_sta_ht_cap *ht_conf;
 	struct iwl_ht_info *iwl_conf = &priv->current_ht_config;
+	struct ieee80211_sta *sta;
 
 	IWL_DEBUG_MAC80211("enter: \n");
 
-	iwl_conf->is_ht = bss_conf->assoc_ht;
-
 	if (!iwl_conf->is_ht)
 		return;
 
+
+	/*
+	 * It is totally wrong to base global information on something
+	 * that is valid only when associated, alas, this driver works
+	 * that way and I don't know how to fix it.
+	 */
+
+	rcu_read_lock();
+	sta = ieee80211_find_sta(priv->hw, priv->bssid);
+	if (!sta) {
+		rcu_read_unlock();
+		return;
+	}
+	ht_conf = &sta->ht_cap;
+
 	if (ht_conf->cap & IEEE80211_HT_CAP_SGI_20)
 		iwl_conf->sgf |= HT_SHORT_GI_20MHZ;
 	if (ht_conf->cap & IEEE80211_HT_CAP_SGI_40)
@@ -574,8 +587,8 @@ static void iwl4965_ht_conf(struct iwl_priv *priv,
 
 	iwl_conf->supported_chan_width =
 		!!(ht_conf->cap & IEEE80211_HT_CAP_SUP_WIDTH_20_40);
-	iwl_conf->extension_chan_offset =
-		ht_bss_conf->bss_cap & IEEE80211_HT_PARAM_CHA_SEC_OFFSET;
+
+	iwl_conf->extension_chan_offset = bss_conf->ht.secondary_channel_offset;
 	/* If no above or below channel supplied disable FAT channel */
 	if (iwl_conf->extension_chan_offset != IEEE80211_HT_PARAM_CHA_SEC_ABOVE &&
 	    iwl_conf->extension_chan_offset != IEEE80211_HT_PARAM_CHA_SEC_BELOW) {
@@ -587,15 +600,14 @@ static void iwl4965_ht_conf(struct iwl_priv *priv,
 
 	memcpy(&iwl_conf->mcs, &ht_conf->mcs, 16);
 
-	iwl_conf->control_channel = ht_bss_conf->primary_channel;
-	iwl_conf->tx_chan_width =
-		!!(ht_bss_conf->bss_cap & IEEE80211_HT_PARAM_CHAN_WIDTH_ANY);
+	iwl_conf->tx_chan_width = bss_conf->ht.width_40_ok;
 	iwl_conf->ht_protection =
-		ht_bss_conf->bss_op_mode & IEEE80211_HT_OP_MODE_PROTECTION;
+		bss_conf->ht.operation_mode & IEEE80211_HT_OP_MODE_PROTECTION;
 	iwl_conf->non_GF_STA_present =
-		!!(ht_bss_conf->bss_op_mode & IEEE80211_HT_OP_MODE_NON_GF_STA_PRSNT);
+		!!(bss_conf->ht.operation_mode & IEEE80211_HT_OP_MODE_NON_GF_STA_PRSNT);
+
+	rcu_read_unlock();
 
-	IWL_DEBUG_MAC80211("control channel %d\n", iwl_conf->control_channel);
 	IWL_DEBUG_MAC80211("leave\n");
 }
 
@@ -2746,6 +2758,8 @@ static int iwl4965_mac_config(struct ieee80211_hw *hw, u32 changed)
 	mutex_lock(&priv->mutex);
 	IWL_DEBUG_MAC80211("enter to channel %d\n", conf->channel->hw_value);
 
+	priv->current_ht_config.is_ht = conf->ht.enabled;
+
 	if (conf->radio_enabled && iwl_radio_kill_sw_enable_radio(priv)) {
 		IWL_DEBUG_MAC80211("leave - RF-KILL - waiting for uCode\n");
 		goto out;
@@ -3104,7 +3118,6 @@ static void iwl4965_bss_info_changed(struct ieee80211_hw *hw,
 	}
 
 	if (changes & BSS_CHANGED_HT) {
-		IWL_DEBUG_MAC80211("HT %d\n", bss_conf->assoc_ht);
 		iwl4965_ht_conf(priv, bss_conf);
 		iwl_set_rxon_chain(priv);
 	}
diff --git a/drivers/net/wireless/iwlwifi/iwl-core.c b/drivers/net/wireless/iwlwifi/iwl-core.c
index 4678da447ff6..10f5a0a233fe 100644
--- a/drivers/net/wireless/iwlwifi/iwl-core.c
+++ b/drivers/net/wireless/iwlwifi/iwl-core.c
@@ -637,8 +637,8 @@ u8 iwl_is_fat_tx_allowed(struct iwl_priv *priv,
 	}
 
 	return iwl_is_channel_extension(priv, priv->band,
-					 iwl_ht_conf->control_channel,
-					 iwl_ht_conf->extension_chan_offset);
+					le16_to_cpu(priv->staging_rxon.channel),
+					iwl_ht_conf->extension_chan_offset);
 }
 EXPORT_SYMBOL(iwl_is_fat_tx_allowed);
 
@@ -663,13 +663,6 @@ void iwl_set_rxon_ht(struct iwl_priv *priv, struct iwl_ht_info *ht_info)
 		rxon->flags &= ~(RXON_FLG_CHANNEL_MODE_MIXED_MSK |
 				 RXON_FLG_CHANNEL_MODE_PURE_40_MSK);
 
-	if (le16_to_cpu(rxon->channel) != ht_info->control_channel) {
-		IWL_DEBUG_ASSOC("control diff than current %d %d\n",
-				le16_to_cpu(rxon->channel),
-				ht_info->control_channel);
-		return;
-	}
-
 	/* Note: control channel is opposite of extension channel */
 	switch (ht_info->extension_chan_offset) {
 	case IEEE80211_HT_PARAM_CHA_SEC_ABOVE:
@@ -692,14 +685,12 @@ void iwl_set_rxon_ht(struct iwl_priv *priv, struct iwl_ht_info *ht_info)
 
 	IWL_DEBUG_ASSOC("supported HT rate 0x%X 0x%X 0x%X "
 			"rxon flags 0x%X operation mode :0x%X "
-			"extension channel offset 0x%x "
-			"control chan %d\n",
+			"extension channel offset 0x%x\n",
 			ht_info->mcs.rx_mask[0],
 			ht_info->mcs.rx_mask[1],
 			ht_info->mcs.rx_mask[2],
 			le32_to_cpu(rxon->flags), ht_info->ht_protection,
-			ht_info->extension_chan_offset,
-			ht_info->control_channel);
+			ht_info->extension_chan_offset);
 	return;
 }
 EXPORT_SYMBOL(iwl_set_rxon_ht);
diff --git a/drivers/net/wireless/iwlwifi/iwl-dev.h b/drivers/net/wireless/iwlwifi/iwl-dev.h
index 572250ee9d58..2e9514933970 100644
--- a/drivers/net/wireless/iwlwifi/iwl-dev.h
+++ b/drivers/net/wireless/iwlwifi/iwl-dev.h
@@ -413,7 +413,6 @@ struct iwl_ht_info {
 	u8 mpdu_density;
 	struct ieee80211_mcs_info mcs;
 	/* BSS related data */
-	u8 control_channel;
 	u8 extension_chan_offset;
 	u8 tx_chan_width;
 	u8 ht_protection;
diff --git a/drivers/net/wireless/iwlwifi/iwl-sta.c b/drivers/net/wireless/iwlwifi/iwl-sta.c
index b9b8554433a6..218483d096cc 100644
--- a/drivers/net/wireless/iwlwifi/iwl-sta.c
+++ b/drivers/net/wireless/iwlwifi/iwl-sta.c
@@ -890,20 +890,31 @@ static void iwl_sta_init_lq(struct iwl_priv *priv, const u8 *addr, int is_ap)
  */
 int iwl_rxon_add_station(struct iwl_priv *priv, const u8 *addr, int is_ap)
 {
+	struct ieee80211_sta *sta;
+	struct ieee80211_sta_ht_cap ht_config;
+	struct ieee80211_sta_ht_cap *cur_ht_config = NULL;
 	u8 sta_id;
 
 	/* Add station to device's station table */
-	struct ieee80211_conf *conf = &priv->hw->conf;
-	struct ieee80211_sta_ht_cap *cur_ht_config = &conf->ht_cap;
-
-	if ((is_ap) &&
-	    (conf->flags & IEEE80211_CONF_SUPPORT_HT_MODE) &&
-	    (priv->iw_mode == NL80211_IFTYPE_STATION))
-		sta_id = iwl_add_station_flags(priv, addr, is_ap,
-						   0, cur_ht_config);
-	else
-		sta_id = iwl_add_station_flags(priv, addr, is_ap,
-						   0, NULL);
+
+	/*
+	 * XXX: This check is definitely not correct, if we're an AP
+	 *	it'll always be false which is not what we want, but
+	 *	it doesn't look like iwlagn is prepared to be an HT
+	 *	AP anyway.
+	 */
+	if (priv->current_ht_config.is_ht) {
+		rcu_read_lock();
+		sta = ieee80211_find_sta(priv->hw, addr);
+		if (sta) {
+			memcpy(&ht_config, &sta->ht_cap, sizeof(ht_config));
+			cur_ht_config = &ht_config;
+		}
+		rcu_read_unlock();
+	}
+
+	sta_id = iwl_add_station_flags(priv, addr, is_ap,
+				       0, cur_ht_config);
 
 	/* Set up default rate scaling table in device's station table */
 	iwl_sta_init_lq(priv, addr, is_ap);
diff --git a/include/net/mac80211.h b/include/net/mac80211.h
index 94ff3eface49..9801afb62545 100644
--- a/include/net/mac80211.h
+++ b/include/net/mac80211.h
@@ -3,7 +3,7 @@
  *
  * Copyright 2002-2005, Devicescape Software, Inc.
  * Copyright 2006-2007	Jiri Benc <jbenc@suse.cz>
- * Copyright 2007	Johannes Berg <johannes@sipsolutions.net>
+ * Copyright 2007-2008	Johannes Berg <johannes@sipsolutions.net>
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License version 2 as
@@ -81,22 +81,6 @@ enum ieee80211_notification_types {
 	IEEE80211_NOTIFY_RE_ASSOC,
 };
 
-/**
- * struct ieee80211_ht_bss_info - describing BSS's HT characteristics
- *
- * This structure describes most essential parameters needed
- * to describe 802.11n HT characteristics in a BSS.
- *
- * @primary_channel: channel number of primery channel
- * @bss_cap: 802.11n's general BSS capabilities (e.g. channel width)
- * @bss_op_mode: 802.11n's BSS operation modes (e.g. HT protection)
- */
-struct ieee80211_ht_bss_info {
-	u8 primary_channel;
-	u8 bss_cap;  /* use IEEE80211_HT_IE_CHA_ */
-	u8 bss_op_mode; /* use IEEE80211_HT_IE_ */
-};
-
 /**
  * enum ieee80211_max_queues - maximum number of queues
  *
@@ -171,6 +155,19 @@ enum ieee80211_bss_change {
 	BSS_CHANGED_BASIC_RATES		= 1<<5,
 };
 
+/**
+ * struct ieee80211_bss_ht_conf - BSS's changing HT configuration
+ * @secondary_channel_offset: secondary channel offset, uses
+ *	%IEEE80211_HT_PARAM_CHA_SEC_ values
+ * @width_40_ok: indicates that 40 MHz bandwidth may be used for TX
+ * @operation_mode: HT operation mode (like in &struct ieee80211_ht_info)
+ */
+struct ieee80211_bss_ht_conf {
+	u8 secondary_channel_offset;
+	bool width_40_ok;
+	u16 operation_mode;
+};
+
 /**
  * struct ieee80211_bss_conf - holds the BSS's changing parameters
  *
@@ -190,9 +187,7 @@ enum ieee80211_bss_change {
  * @timestamp: beacon timestamp
  * @beacon_int: beacon interval
  * @assoc_capability: capabilities taken from assoc resp
- * @assoc_ht: association in HT mode
- * @ht_cap: ht capabilities
- * @ht_bss_conf: ht extended capabilities
+ * @ht: BSS's HT configuration
  * @basic_rates: bitmap of basic rates, each bit stands for an
  *	index into the rate table configured by the driver in
  *	the current band.
@@ -210,10 +205,7 @@ struct ieee80211_bss_conf {
 	u16 assoc_capability;
 	u64 timestamp;
 	u64 basic_rates;
-	/* ht related data */
-	bool assoc_ht;
-	struct ieee80211_sta_ht_cap *ht_cap;
-	struct ieee80211_ht_bss_info *ht_bss_conf;
+	struct ieee80211_bss_ht_conf ht;
 };
 
 /**
@@ -447,13 +439,11 @@ struct ieee80211_rx_status {
  * Flags to define PHY configuration options
  *
  * @IEEE80211_CONF_RADIOTAP: add radiotap header at receive time (if supported)
- * @IEEE80211_CONF_SUPPORT_HT_MODE: use 802.11n HT capabilities (if supported)
  * @IEEE80211_CONF_PS: Enable 802.11 power save mode
  */
 enum ieee80211_conf_flags {
 	IEEE80211_CONF_RADIOTAP		= (1<<0),
-	IEEE80211_CONF_SUPPORT_HT_MODE	= (1<<1),
-	IEEE80211_CONF_PS		= (1<<2),
+	IEEE80211_CONF_PS		= (1<<1),
 };
 
 /* XXX: remove all this once drivers stop trying to use it */
@@ -463,6 +453,10 @@ static inline int __deprecated __IEEE80211_CONF_SHORT_SLOT_TIME(void)
 }
 #define IEEE80211_CONF_SHORT_SLOT_TIME (__IEEE80211_CONF_SHORT_SLOT_TIME())
 
+struct ieee80211_ht_conf {
+	bool enabled;
+};
+
 /**
  * enum ieee80211_conf_changed - denotes which configuration changed
  *
@@ -474,6 +468,7 @@ static inline int __deprecated __IEEE80211_CONF_SHORT_SLOT_TIME(void)
  * @IEEE80211_CONF_CHANGE_POWER: the TX power changed
  * @IEEE80211_CONF_CHANGE_CHANNEL: the channel changed
  * @IEEE80211_CONF_CHANGE_RETRY_LIMITS: retry limits changed
+ * @IEEE80211_CONF_CHANGE_HT: HT configuration changed
  */
 enum ieee80211_conf_changed {
 	IEEE80211_CONF_CHANGE_RADIO_ENABLED	= BIT(0),
@@ -484,6 +479,7 @@ enum ieee80211_conf_changed {
 	IEEE80211_CONF_CHANGE_POWER		= BIT(5),
 	IEEE80211_CONF_CHANGE_CHANNEL		= BIT(6),
 	IEEE80211_CONF_CHANGE_RETRY_LIMITS	= BIT(7),
+	IEEE80211_CONF_CHANGE_HT		= BIT(8),
 };
 
 /**
@@ -496,9 +492,8 @@ enum ieee80211_conf_changed {
  * @listen_interval: listen interval in units of beacon interval
  * @flags: configuration flags defined above
  * @power_level: requested transmit power (in dBm)
- * @ht_cap: describes current self configuration of 802.11n HT capabilities
- * @ht_bss_conf: describes current BSS configuration of 802.11n HT parameters
  * @channel: the channel to tune to
+ * @ht: the HT configuration for the device
  * @long_frame_max_tx_count: Maximum number of transmissions for a "long" frame
  *    (a frame not RTS protected), called "dot11LongRetryLimit" in 802.11,
  *    but actually means the number of transmissions not the number of retries
@@ -517,9 +512,7 @@ struct ieee80211_conf {
 	u8 long_frame_max_tx_count, short_frame_max_tx_count;
 
 	struct ieee80211_channel *channel;
-
-	struct ieee80211_sta_ht_cap ht_cap;
-	struct ieee80211_ht_bss_info ht_bss_conf;
+	struct ieee80211_ht_conf ht;
 };
 
 /**
@@ -715,7 +708,7 @@ enum set_key_cmd {
  * @addr: MAC address
  * @aid: AID we assigned to the station if we're an AP
  * @supp_rates: Bitmap of supported rates (per band)
- * @ht_cap: HT capabilities of this STA
+ * @ht_cap: HT capabilities of this STA; restricted to our own TX capabilities
  * @drv_priv: data area for driver use, will always be aligned to
  *	sizeof(void *), size is determined in hw information.
  */
diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c
index 28382b5c7c25..55e3a26510ed 100644
--- a/net/mac80211/cfg.c
+++ b/net/mac80211/cfg.c
@@ -582,6 +582,8 @@ static void sta_apply_parameters(struct ieee80211_local *local,
 	struct ieee80211_supported_band *sband;
 	struct ieee80211_sub_if_data *sdata = sta->sdata;
 
+	sband = local->hw.wiphy->bands[local->oper_channel->band];
+
 	/*
 	 * FIXME: updating the flags is racy when this function is
 	 *	  called from ieee80211_change_station(), this will
@@ -622,7 +624,6 @@ static void sta_apply_parameters(struct ieee80211_local *local,
 
 	if (params->supported_rates) {
 		rates = 0;
-		sband = local->hw.wiphy->bands[local->oper_channel->band];
 
 		for (i = 0; i < params->supported_rates_len; i++) {
 			int rate = (params->supported_rates[i] & 0x7f) * 5;
@@ -635,7 +636,8 @@ static void sta_apply_parameters(struct ieee80211_local *local,
 	}
 
 	if (params->ht_capa)
-		ieee80211_ht_cap_ie_to_sta_ht_cap(params->ht_capa,
+		ieee80211_ht_cap_ie_to_sta_ht_cap(sband,
+						  params->ht_capa,
 						  &sta->sta.ht_cap);
 
 	if (ieee80211_vif_is_mesh(&sdata->vif) && params->plink_action) {
diff --git a/net/mac80211/ht.c b/net/mac80211/ht.c
index e2d121bf2745..42c3e590df98 100644
--- a/net/mac80211/ht.c
+++ b/net/mac80211/ht.c
@@ -20,114 +20,38 @@
 #include "sta_info.h"
 #include "wme.h"
 
-void ieee80211_ht_cap_ie_to_sta_ht_cap(struct ieee80211_ht_cap *ht_cap_ie,
+void ieee80211_ht_cap_ie_to_sta_ht_cap(struct ieee80211_supported_band *sband,
+				       struct ieee80211_ht_cap *ht_cap_ie,
 				       struct ieee80211_sta_ht_cap *ht_cap)
 {
+	u8 ampdu_info, tx_mcs_set_cap;
+	int i, max_tx_streams;
 
 	BUG_ON(!ht_cap);
 
 	memset(ht_cap, 0, sizeof(*ht_cap));
 
-	if (ht_cap_ie) {
-		u8 ampdu_info = ht_cap_ie->ampdu_params_info;
-
-		ht_cap->ht_supported = true;
-		ht_cap->cap = le16_to_cpu(ht_cap_ie->cap_info);
-		ht_cap->ampdu_factor =
-			ampdu_info & IEEE80211_HT_AMPDU_PARM_FACTOR;
-		ht_cap->ampdu_density =
-			(ampdu_info & IEEE80211_HT_AMPDU_PARM_DENSITY) >> 2;
-		memcpy(&ht_cap->mcs, &ht_cap_ie->mcs, sizeof(ht_cap->mcs));
-	} else
-		ht_cap->ht_supported = false;
-}
-
-void ieee80211_ht_info_ie_to_ht_bss_info(
-			struct ieee80211_ht_info *ht_add_info_ie,
-			struct ieee80211_ht_bss_info *bss_info)
-{
-	BUG_ON(!bss_info);
-
-	memset(bss_info, 0, sizeof(*bss_info));
-
-	if (ht_add_info_ie) {
-		u16 op_mode;
-		op_mode = le16_to_cpu(ht_add_info_ie->operation_mode);
-
-		bss_info->primary_channel = ht_add_info_ie->control_chan;
-		bss_info->bss_cap = ht_add_info_ie->ht_param;
-		bss_info->bss_op_mode = (u8)(op_mode & 0xff);
-	}
-}
-
-/*
- * ieee80211_handle_ht should be called only after the operating band
- * has been determined as ht configuration depends on the hw's
- * HT abilities for a specific band.
- */
-u32 ieee80211_handle_ht(struct ieee80211_local *local,
-			struct ieee80211_sta_ht_cap *req_ht_cap,
-			struct ieee80211_ht_bss_info *req_bss_cap)
-{
-	struct ieee80211_conf *conf = &local->hw.conf;
-	struct ieee80211_supported_band *sband;
-	struct ieee80211_sta_ht_cap ht_cap;
-	struct ieee80211_ht_bss_info ht_bss_conf;
-	u32 changed = 0;
-	int i;
-	u8 max_tx_streams;
-	u8 tx_mcs_set_cap;
-	bool enable_ht = true;
-
-	sband = local->hw.wiphy->bands[conf->channel->band];
-
-	memset(&ht_cap, 0, sizeof(ht_cap));
-	memset(&ht_bss_conf, 0, sizeof(struct ieee80211_ht_bss_info));
-
-	/* HT is not supported */
-	if (!sband->ht_cap.ht_supported)
-		enable_ht = false;
-
-	/* disable HT */
-	if (!enable_ht) {
-		if (conf->flags & IEEE80211_CONF_SUPPORT_HT_MODE)
-			changed |= BSS_CHANGED_HT;
-		conf->flags &= ~IEEE80211_CONF_SUPPORT_HT_MODE;
-		conf->ht_cap.ht_supported = false;
-		return changed;
-	}
-
-
-	if (!(conf->flags & IEEE80211_CONF_SUPPORT_HT_MODE))
-		changed |= BSS_CHANGED_HT;
-
-	conf->flags |= IEEE80211_CONF_SUPPORT_HT_MODE;
-	ht_cap.ht_supported = true;
+	if (!ht_cap_ie)
+		return;
 
-	ht_cap.cap = req_ht_cap->cap & sband->ht_cap.cap;
-	ht_cap.cap &= ~IEEE80211_HT_CAP_SM_PS;
-	ht_cap.cap |= sband->ht_cap.cap & IEEE80211_HT_CAP_SM_PS;
+	ht_cap->ht_supported = true;
 
-	ht_bss_conf.primary_channel = req_bss_cap->primary_channel;
-	ht_bss_conf.bss_cap = req_bss_cap->bss_cap;
-	ht_bss_conf.bss_op_mode = req_bss_cap->bss_op_mode;
+	ht_cap->cap = ht_cap->cap & sband->ht_cap.cap;
+	ht_cap->cap &= ~IEEE80211_HT_CAP_SM_PS;
+	ht_cap->cap |= sband->ht_cap.cap & IEEE80211_HT_CAP_SM_PS;
 
-	ht_cap.ampdu_factor = req_ht_cap->ampdu_factor;
-	ht_cap.ampdu_density = req_ht_cap->ampdu_density;
+	ampdu_info = ht_cap_ie->ampdu_params_info;
+	ht_cap->ampdu_factor =
+		ampdu_info & IEEE80211_HT_AMPDU_PARM_FACTOR;
+	ht_cap->ampdu_density =
+		(ampdu_info & IEEE80211_HT_AMPDU_PARM_DENSITY) >> 2;
 
 	/* own MCS TX capabilities */
 	tx_mcs_set_cap = sband->ht_cap.mcs.tx_params;
 
-	/*
-	 * configure supported Tx MCS according to requested MCS
-	 * (based in most cases on Rx capabilities of peer) and self
-	 * Tx MCS capabilities (as defined by low level driver HW
-	 * Tx capabilities)
-	 */
-
 	/* can we TX with MCS rates? */
 	if (!(tx_mcs_set_cap & IEEE80211_HT_MCS_TX_DEFINED))
-		goto check_changed;
+		return;
 
 	/* Counting from 0, therefore +1 */
 	if (tx_mcs_set_cap & IEEE80211_HT_MCS_TX_RX_DIFF)
@@ -145,29 +69,73 @@ u32 ieee80211_handle_ht(struct ieee80211_local *local,
 	 * - remainder are multiple spatial streams using unequal modulation
 	 */
 	for (i = 0; i < max_tx_streams; i++)
-		ht_cap.mcs.rx_mask[i] =
-			sband->ht_cap.mcs.rx_mask[i] &
-					req_ht_cap->mcs.rx_mask[i];
+		ht_cap->mcs.rx_mask[i] =
+			sband->ht_cap.mcs.rx_mask[i] & ht_cap_ie->mcs.rx_mask[i];
 
 	if (tx_mcs_set_cap & IEEE80211_HT_MCS_TX_UNEQUAL_MODULATION)
 		for (i = IEEE80211_HT_MCS_UNEQUAL_MODULATION_START_BYTE;
 		     i < IEEE80211_HT_MCS_MASK_LEN; i++)
-			ht_cap.mcs.rx_mask[i] =
+			ht_cap->mcs.rx_mask[i] =
 				sband->ht_cap.mcs.rx_mask[i] &
-					req_ht_cap->mcs.rx_mask[i];
+					ht_cap_ie->mcs.rx_mask[i];
 
 	/* handle MCS rate 32 too */
-	if (sband->ht_cap.mcs.rx_mask[32/8] &
-	    req_ht_cap->mcs.rx_mask[32/8] & 1)
-		ht_cap.mcs.rx_mask[32/8] |= 1;
+	if (sband->ht_cap.mcs.rx_mask[32/8] & ht_cap_ie->mcs.rx_mask[32/8] & 1)
+		ht_cap->mcs.rx_mask[32/8] |= 1;
+}
+
+/*
+ * ieee80211_enable_ht should be called only after the operating band
+ * has been determined as ht configuration depends on the hw's
+ * HT abilities for a specific band.
+ */
+u32 ieee80211_enable_ht(struct ieee80211_sub_if_data *sdata,
+			struct ieee80211_ht_info *hti,
+			u16 ap_ht_cap_flags)
+{
+	struct ieee80211_local *local = sdata->local;
+	struct ieee80211_supported_band *sband;
+	struct ieee80211_bss_ht_conf ht;
+	u32 changed = 0;
+	bool enable_ht = true, ht_changed;
+
+	sband = local->hw.wiphy->bands[local->hw.conf.channel->band];
+
+	memset(&ht, 0, sizeof(ht));
+
+	/* HT is not supported */
+	if (!sband->ht_cap.ht_supported)
+		enable_ht = false;
+
+	/* check that channel matches the right operating channel */
+	if (local->hw.conf.channel->center_freq !=
+	    ieee80211_channel_to_frequency(hti->control_chan))
+		enable_ht = false;
+
+	/*
+	 * XXX: This is totally incorrect when there are multiple virtual
+	 *	interfaces, needs to be fixed later.
+	 */
+	ht_changed = local->hw.conf.ht.enabled != enable_ht;
+	local->hw.conf.ht.enabled = enable_ht;
+	if (ht_changed)
+		ieee80211_hw_config(local, IEEE80211_CONF_CHANGE_HT);
+
+	/* disable HT */
+	if (!enable_ht)
+		return 0;
+	ht.secondary_channel_offset =
+		hti->ht_param & IEEE80211_HT_PARAM_CHA_SEC_OFFSET;
+	ht.width_40_ok =
+		!(ap_ht_cap_flags & IEEE80211_HT_CAP_40MHZ_INTOLERANT) &&
+		(sband->ht_cap.cap & IEEE80211_HT_CAP_SUP_WIDTH_20_40) &&
+		(hti->ht_param & IEEE80211_HT_PARAM_CHAN_WIDTH_ANY);
+	ht.operation_mode = le16_to_cpu(hti->operation_mode);
 
- check_changed:
 	/* if bss configuration changed store the new one */
-	if (memcmp(&conf->ht_cap, &ht_cap, sizeof(ht_cap)) ||
-	    memcmp(&conf->ht_bss_conf, &ht_bss_conf, sizeof(ht_bss_conf))) {
+	if (memcmp(&sdata->vif.bss_conf.ht, &ht, sizeof(ht))) {
 		changed |= BSS_CHANGED_HT;
-		memcpy(&conf->ht_cap, &ht_cap, sizeof(ht_cap));
-		memcpy(&conf->ht_bss_conf, &ht_bss_conf, sizeof(ht_bss_conf));
+		sdata->vif.bss_conf.ht = ht;
 	}
 
 	return changed;
@@ -900,8 +868,9 @@ void ieee80211_process_addba_request(struct ieee80211_local *local,
 	/* sanity check for incoming parameters:
 	 * check if configuration can support the BA policy
 	 * and if buffer size does not exceeds max value */
+	/* XXX: check own ht delayed BA capability?? */
 	if (((ba_policy != 1)
-		&& (!(conf->ht_cap.cap & IEEE80211_HT_CAP_DELAY_BA)))
+		&& (!(sta->sta.ht_cap.cap & IEEE80211_HT_CAP_DELAY_BA)))
 		|| (buf_size > IEEE80211_MAX_AMPDU_BUF)) {
 		status = WLAN_STATUS_INVALID_QOS_PARAM;
 #ifdef CONFIG_MAC80211_HT_DEBUG
diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h
index 859b5b001f22..6f8756d26a93 100644
--- a/net/mac80211/ieee80211_i.h
+++ b/net/mac80211/ieee80211_i.h
@@ -955,14 +955,12 @@ int ieee80211_monitor_start_xmit(struct sk_buff *skb, struct net_device *dev);
 int ieee80211_subif_start_xmit(struct sk_buff *skb, struct net_device *dev);
 
 /* HT */
-void ieee80211_ht_cap_ie_to_sta_ht_cap(struct ieee80211_ht_cap *ht_cap_ie,
+void ieee80211_ht_cap_ie_to_sta_ht_cap(struct ieee80211_supported_band *sband,
+				       struct ieee80211_ht_cap *ht_cap_ie,
 				       struct ieee80211_sta_ht_cap *ht_cap);
-void ieee80211_ht_info_ie_to_ht_bss_info(
-			struct ieee80211_ht_info *ht_add_info_ie,
-			struct ieee80211_ht_bss_info *bss_info);
-u32 ieee80211_handle_ht(struct ieee80211_local *local,
-			struct ieee80211_sta_ht_cap *req_ht_cap,
-			struct ieee80211_ht_bss_info *req_bss_cap);
+u32 ieee80211_enable_ht(struct ieee80211_sub_if_data *sdata,
+			struct ieee80211_ht_info *hti,
+			u16 ap_ht_cap_flags);
 void ieee80211_send_bar(struct ieee80211_sub_if_data *sdata, u8 *ra, u16 tid, u16 ssn);
 
 void ieee80211_sta_stop_rx_ba_session(struct ieee80211_sub_if_data *sdata, u8 *da,
diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c
index 9c5f5c37a49e..39bc9c69893b 100644
--- a/net/mac80211/mlme.c
+++ b/net/mac80211/mlme.c
@@ -700,14 +700,15 @@ static void ieee80211_sta_send_associnfo(struct ieee80211_sub_if_data *sdata,
 
 
 static void ieee80211_set_associated(struct ieee80211_sub_if_data *sdata,
-				     struct ieee80211_if_sta *ifsta)
+				     struct ieee80211_if_sta *ifsta,
+				     u32 bss_info_changed)
 {
 	struct ieee80211_local *local = sdata->local;
 	struct ieee80211_conf *conf = &local_to_hw(local)->conf;
-	u32 changed = BSS_CHANGED_ASSOC;
 
 	struct ieee80211_bss *bss;
 
+	bss_info_changed |= BSS_CHANGED_ASSOC;
 	ifsta->flags |= IEEE80211_STA_ASSOCIATED;
 
 	if (sdata->vif.type != NL80211_IFTYPE_STATION)
@@ -722,19 +723,12 @@ static void ieee80211_set_associated(struct ieee80211_sub_if_data *sdata,
 		sdata->vif.bss_conf.timestamp = bss->timestamp;
 		sdata->vif.bss_conf.dtim_period = bss->dtim_period;
 
-		changed |= ieee80211_handle_bss_capability(sdata,
+		bss_info_changed |= ieee80211_handle_bss_capability(sdata,
 			bss->capability, bss->has_erp_value, bss->erp_value);
 
 		ieee80211_rx_bss_put(local, bss);
 	}
 
-	if (conf->flags & IEEE80211_CONF_SUPPORT_HT_MODE) {
-		changed |= BSS_CHANGED_HT;
-		sdata->vif.bss_conf.assoc_ht = 1;
-		sdata->vif.bss_conf.ht_cap = &conf->ht_cap;
-		sdata->vif.bss_conf.ht_bss_conf = &conf->ht_bss_conf;
-	}
-
 	ifsta->flags |= IEEE80211_STA_PREV_BSSID_SET;
 	memcpy(ifsta->prev_bssid, sdata->u.sta.bssid, ETH_ALEN);
 	ieee80211_sta_send_associnfo(sdata, ifsta);
@@ -748,8 +742,8 @@ static void ieee80211_set_associated(struct ieee80211_sub_if_data *sdata,
 	 * when we have associated, we aren't checking whether it actually
 	 * changed or not.
 	 */
-	changed |= BSS_CHANGED_BASIC_RATES;
-	ieee80211_bss_info_change_notify(sdata, changed);
+	bss_info_changed |= BSS_CHANGED_BASIC_RATES;
+	ieee80211_bss_info_change_notify(sdata, bss_info_changed);
 
 	netif_tx_start_all_queues(sdata->dev);
 	netif_carrier_on(sdata->dev);
@@ -813,7 +807,7 @@ static void ieee80211_set_disassoc(struct ieee80211_sub_if_data *sdata,
 {
 	struct ieee80211_local *local = sdata->local;
 	struct sta_info *sta;
-	u32 changed = BSS_CHANGED_ASSOC;
+	u32 changed = 0;
 
 	rcu_read_lock();
 
@@ -847,15 +841,9 @@ static void ieee80211_set_disassoc(struct ieee80211_sub_if_data *sdata,
 	ifsta->flags &= ~IEEE80211_STA_ASSOCIATED;
 	changed |= ieee80211_reset_erp_info(sdata);
 
-	if (sdata->vif.bss_conf.assoc_ht)
-		changed |= BSS_CHANGED_HT;
-
-	sdata->vif.bss_conf.assoc_ht = 0;
-	sdata->vif.bss_conf.ht_cap = NULL;
-	sdata->vif.bss_conf.ht_bss_conf = NULL;
-
 	ieee80211_led_assoc(local, 0);
-	sdata->vif.bss_conf.assoc = 0;
+	changed |= BSS_CHANGED_ASSOC;
+	sdata->vif.bss_conf.assoc = false;
 
 	ieee80211_sta_send_apinfo(sdata, ifsta);
 
@@ -867,6 +855,11 @@ static void ieee80211_set_disassoc(struct ieee80211_sub_if_data *sdata,
 	rcu_read_unlock();
 
 	sta_info_destroy(sta);
+
+	local->hw.conf.ht.enabled = false;
+	ieee80211_hw_config(local, IEEE80211_CONF_CHANGE_HT);
+
+	ieee80211_bss_info_change_notify(sdata, changed);
 }
 
 static int ieee80211_sta_wep_configured(struct ieee80211_sub_if_data *sdata)
@@ -1184,8 +1177,10 @@ static void ieee80211_rx_mgmt_assoc_resp(struct ieee80211_sub_if_data *sdata,
 	struct ieee802_11_elems elems;
 	struct ieee80211_bss_conf *bss_conf = &sdata->vif.bss_conf;
 	u8 *pos;
+	u32 changed = 0;
 	int i, j;
 	bool have_higher_than_11mbit = false;
+	u16 ap_ht_cap_flags;
 
 	/* AssocResp and ReassocResp have identical structure, so process both
 	 * of them in this function. */
@@ -1333,15 +1328,11 @@ static void ieee80211_rx_mgmt_assoc_resp(struct ieee80211_sub_if_data *sdata,
 	else
 		sdata->flags &= ~IEEE80211_SDATA_OPERATING_GMODE;
 
-	if (elems.ht_cap_elem && elems.ht_info_elem && elems.wmm_param &&
-	    (ifsta->flags & IEEE80211_STA_WMM_ENABLED)) {
-		struct ieee80211_ht_bss_info bss_info;
-		ieee80211_ht_cap_ie_to_sta_ht_cap(
+	if (elems.ht_cap_elem)
+		ieee80211_ht_cap_ie_to_sta_ht_cap(sband,
 				elems.ht_cap_elem, &sta->sta.ht_cap);
-		ieee80211_ht_info_ie_to_ht_bss_info(
-				elems.ht_info_elem, &bss_info);
-		ieee80211_handle_ht(local, &sta->sta.ht_cap, &bss_info);
-	}
+
+	ap_ht_cap_flags = sta->sta.ht_cap.cap;
 
 	rate_control_rate_init(sta);
 
@@ -1353,11 +1344,16 @@ static void ieee80211_rx_mgmt_assoc_resp(struct ieee80211_sub_if_data *sdata,
 	} else
 		rcu_read_unlock();
 
+	if (elems.ht_info_elem && elems.wmm_param &&
+	    (ifsta->flags & IEEE80211_STA_WMM_ENABLED))
+		changed |= ieee80211_enable_ht(sdata, elems.ht_info_elem,
+					       ap_ht_cap_flags);
+
 	/* set AID and assoc capability,
 	 * ieee80211_set_associated() will tell the driver */
 	bss_conf->aid = aid;
 	bss_conf->assoc_capability = capab_info;
-	ieee80211_set_associated(sdata, ifsta);
+	ieee80211_set_associated(sdata, ifsta, changed);
 
 	ieee80211_associated(sdata, ifsta);
 }
@@ -1657,7 +1653,6 @@ static void ieee80211_rx_mgmt_beacon(struct ieee80211_sub_if_data *sdata,
 	size_t baselen;
 	struct ieee802_11_elems elems;
 	struct ieee80211_local *local = sdata->local;
-	struct ieee80211_conf *conf = &local->hw.conf;
 	u32 changed = 0;
 	bool erp_valid;
 	u8 erp_value = 0;
@@ -1693,14 +1688,31 @@ static void ieee80211_rx_mgmt_beacon(struct ieee80211_sub_if_data *sdata,
 			le16_to_cpu(mgmt->u.beacon.capab_info),
 			erp_valid, erp_value);
 
-	if (elems.ht_cap_elem && elems.ht_info_elem &&
-	    elems.wmm_param && conf->flags & IEEE80211_CONF_SUPPORT_HT_MODE) {
-		struct ieee80211_ht_bss_info bss_info;
 
-		ieee80211_ht_info_ie_to_ht_bss_info(
-				elems.ht_info_elem, &bss_info);
-		changed |= ieee80211_handle_ht(local, &conf->ht_cap,
-					       &bss_info);
+	if (elems.ht_cap_elem && elems.ht_info_elem && elems.wmm_param) {
+		struct sta_info *sta;
+		struct ieee80211_supported_band *sband;
+		u16 ap_ht_cap_flags;
+
+		rcu_read_lock();
+
+		sta = sta_info_get(local, ifsta->bssid);
+		if (!sta) {
+			rcu_read_unlock();
+			return;
+		}
+
+		sband = local->hw.wiphy->bands[local->hw.conf.channel->band];
+
+		ieee80211_ht_cap_ie_to_sta_ht_cap(sband,
+				elems.ht_cap_elem, &sta->sta.ht_cap);
+
+		ap_ht_cap_flags = sta->sta.ht_cap.cap;
+
+		rcu_read_unlock();
+
+		changed |= ieee80211_enable_ht(sdata, elems.ht_info_elem,
+					       ap_ht_cap_flags);
 	}
 
 	ieee80211_bss_info_change_notify(sdata, changed);
-- 
cgit v1.2.3


From e6a9854b05c1a6af1308fe2b8c68f35abf28a3ee Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes@sipsolutions.net>
Date: Tue, 21 Oct 2008 12:40:02 +0200
Subject: mac80211/drivers: rewrite the rate control API

So after the previous changes we were still unhappy with how
convoluted the API is and decided to make things simpler for
everybody. This completely changes the rate control API, now
taking into account 802.11n with MCS rates and more control,
most drivers don't support that though.

Signed-off-by: Felix Fietkau <nbd@openwrt.org>
Signed-off-by: Johannes Berg <johannes@sipsolutions.net>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 drivers/net/wireless/adm8211.c              |  21 +-
 drivers/net/wireless/ath5k/base.c           |  34 ++-
 drivers/net/wireless/ath9k/main.c           |  18 +-
 drivers/net/wireless/ath9k/rc.c             |  40 +--
 drivers/net/wireless/ath9k/xmit.c           |  28 ++-
 drivers/net/wireless/b43/dma.c              |   4 +-
 drivers/net/wireless/b43/main.c             |   2 +-
 drivers/net/wireless/b43/pio.c              |   3 +-
 drivers/net/wireless/b43/xmit.c             |  60 ++++-
 drivers/net/wireless/b43/xmit.h             |   5 +-
 drivers/net/wireless/b43legacy/dma.c        |  46 ++--
 drivers/net/wireless/b43legacy/main.c       |   2 +-
 drivers/net/wireless/b43legacy/pio.c        |  31 ++-
 drivers/net/wireless/b43legacy/xmit.c       |  26 +-
 drivers/net/wireless/b43legacy/xmit.h       |   2 +-
 drivers/net/wireless/iwlwifi/iwl-3945-rs.c  |  57 ++---
 drivers/net/wireless/iwlwifi/iwl-3945.c     |  65 ++++-
 drivers/net/wireless/iwlwifi/iwl-3945.h     |   2 +
 drivers/net/wireless/iwlwifi/iwl-4965.c     |   8 +-
 drivers/net/wireless/iwlwifi/iwl-5000.c     |   8 +-
 drivers/net/wireless/iwlwifi/iwl-agn-rs.c   |  37 ++-
 drivers/net/wireless/iwlwifi/iwl-core.c     |  19 +-
 drivers/net/wireless/iwlwifi/iwl3945-base.c |   5 +-
 drivers/net/wireless/libertas_tf/main.c     |   4 +-
 drivers/net/wireless/mac80211_hwsim.c       |  12 +-
 drivers/net/wireless/p54/p54common.c        |  26 +-
 drivers/net/wireless/rt2x00/rt2x00dev.c     |  14 +-
 drivers/net/wireless/rt2x00/rt2x00mac.c     |  14 +-
 drivers/net/wireless/rt2x00/rt2x00queue.c   |  22 +-
 drivers/net/wireless/rt2x00/rt2x00queue.h   |   9 +-
 drivers/net/wireless/rtl8180_dev.c          |  28 +--
 drivers/net/wireless/rtl8187_dev.c          |  18 +-
 drivers/net/wireless/zd1211rw/zd_mac.c      |  32 +--
 drivers/net/wireless/zd1211rw/zd_usb.c      |   2 +-
 include/net/mac80211.h                      | 254 +++++++++++--------
 net/mac80211/ieee80211_i.h                  |   8 -
 net/mac80211/main.c                         |  54 +++-
 net/mac80211/mesh_hwmp.c                    |   6 +-
 net/mac80211/rate.c                         |  52 ++--
 net/mac80211/rate.h                         |   5 +-
 net/mac80211/rc80211_minstrel.c             |  72 +++---
 net/mac80211/rc80211_pid.h                  |   1 +
 net/mac80211/rc80211_pid_algo.c             |  27 +-
 net/mac80211/rc80211_pid_debugfs.c          |   5 +-
 net/mac80211/sta_info.h                     |   4 +-
 net/mac80211/tx.c                           | 378 +++++++++++++---------------
 net/mac80211/wext.c                         |   4 +-
 47 files changed, 889 insertions(+), 685 deletions(-)

(limited to 'include')

diff --git a/drivers/net/wireless/adm8211.c b/drivers/net/wireless/adm8211.c
index 9a1e0c514c08..b96ebfe4ef3e 100644
--- a/drivers/net/wireless/adm8211.c
+++ b/drivers/net/wireless/adm8211.c
@@ -341,15 +341,14 @@ static void adm8211_interrupt_tci(struct ieee80211_hw *dev)
 		pci_unmap_single(priv->pdev, info->mapping,
 				 info->skb->len, PCI_DMA_TODEVICE);
 
-		memset(&txi->status, 0, sizeof(txi->status));
+		ieee80211_tx_info_clear_status(txi);
+
 		skb_pull(skb, sizeof(struct adm8211_tx_hdr));
 		memcpy(skb_push(skb, info->hdrlen), skb->cb, info->hdrlen);
-		if (!(txi->flags & IEEE80211_TX_CTL_NO_ACK)) {
-			if (status & TDES0_STATUS_ES)
-				txi->status.excessive_retries = 1;
-			else
-				txi->flags |= IEEE80211_TX_STAT_ACK;
-		}
+		if (!(txi->flags & IEEE80211_TX_CTL_NO_ACK) &&
+		    !(status & TDES0_STATUS_ES))
+			txi->flags |= IEEE80211_TX_STAT_ACK;
+
 		ieee80211_tx_status_irqsafe(dev, skb);
 
 		info->skb = NULL;
@@ -1691,8 +1690,10 @@ static int adm8211_tx(struct ieee80211_hw *dev, struct sk_buff *skb)
 	struct ieee80211_hdr *hdr;
 	struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb);
 	struct ieee80211_rate *txrate = ieee80211_get_tx_rate(dev, info);
+	u8 rc_flags;
 
-	short_preamble = !!(txrate->flags & IEEE80211_TX_CTL_SHORT_PREAMBLE);
+	rc_flags = info->control.rates[0].flags;
+	short_preamble = !!(rc_flags & IEEE80211_TX_RC_USE_SHORT_PREAMBLE);
 	plcp_signal = txrate->bitrate;
 
 	hdr = (struct ieee80211_hdr *)skb->data;
@@ -1724,10 +1725,10 @@ static int adm8211_tx(struct ieee80211_hw *dev, struct sk_buff *skb)
 	if (short_preamble)
 		txhdr->header_control |= cpu_to_le16(ADM8211_TXHDRCTL_SHORT_PREAMBLE);
 
-	if (info->flags & IEEE80211_TX_CTL_USE_RTS_CTS)
+	if (rc_flags & IEEE80211_TX_RC_USE_RTS_CTS)
 		txhdr->header_control |= cpu_to_le16(ADM8211_TXHDRCTL_ENABLE_RTS);
 
-	txhdr->retry_limit = info->control.retry_limit;
+	txhdr->retry_limit = info->control.rates[0].count;
 
 	adm8211_tx_raw(dev, skb, plcp_signal, hdrlen);
 
diff --git a/drivers/net/wireless/ath5k/base.c b/drivers/net/wireless/ath5k/base.c
index 44401f6f578d..3773d983ea66 100644
--- a/drivers/net/wireless/ath5k/base.c
+++ b/drivers/net/wireless/ath5k/base.c
@@ -541,8 +541,8 @@ ath5k_pci_probe(struct pci_dev *pdev,
 
 	/* set up multi-rate retry capabilities */
 	if (sc->ah->ah_version == AR5K_AR5212) {
-		hw->max_altrates = 3;
-		hw->max_altrate_tries = 11;
+		hw->max_rates = 4;
+		hw->max_rate_tries = 11;
 	}
 
 	/* Finish private driver data initialization */
@@ -1181,7 +1181,7 @@ ath5k_txbuf_setup(struct ath5k_softc *sc, struct ath5k_buf *bf)
 		ieee80211_get_hdrlen_from_skb(skb), AR5K_PKT_TYPE_NORMAL,
 		(sc->power_level * 2),
 		ieee80211_get_tx_rate(sc->hw, info)->hw_value,
-		info->control.retry_limit, keyidx, 0, flags, 0, 0);
+		info->control.rates[0].count, keyidx, 0, flags, 0, 0);
 	if (ret)
 		goto err_unmap;
 
@@ -1193,7 +1193,7 @@ ath5k_txbuf_setup(struct ath5k_softc *sc, struct ath5k_buf *bf)
 			break;
 
 		mrr_rate[i] = rate->hw_value;
-		mrr_tries[i] = info->control.retries[i].limit;
+		mrr_tries[i] = info->control.rates[i + 1].count;
 	}
 
 	ah->ah_setup_mrr_tx_desc(ah, ds,
@@ -1849,30 +1849,26 @@ ath5k_tx_processq(struct ath5k_softc *sc, struct ath5k_txq *txq)
 		pci_unmap_single(sc->pdev, bf->skbaddr, skb->len,
 				PCI_DMA_TODEVICE);
 
-		memset(&info->status, 0, sizeof(info->status));
-		info->tx_rate_idx = ath5k_hw_to_driver_rix(sc,
-				ts.ts_rate[ts.ts_final_idx]);
-		info->status.retry_count = ts.ts_longretry;
-
+		ieee80211_tx_info_clear_status(info);
 		for (i = 0; i < 4; i++) {
-			struct ieee80211_tx_altrate *r =
-				&info->status.retries[i];
+			struct ieee80211_tx_rate *r =
+				&info->status.rates[i];
 
 			if (ts.ts_rate[i]) {
-				r->rate_idx = ath5k_hw_to_driver_rix(sc, ts.ts_rate[i]);
-				r->limit = ts.ts_retry[i];
+				r->idx = ath5k_hw_to_driver_rix(sc, ts.ts_rate[i]);
+				r->count = ts.ts_retry[i];
 			} else {
-				r->rate_idx = -1;
-				r->limit = 0;
+				r->idx = -1;
+				r->count = 0;
 			}
 		}
 
-		info->status.excessive_retries = 0;
+		/* count the successful attempt as well */
+		info->status.rates[ts.ts_final_idx].count++;
+
 		if (unlikely(ts.ts_status)) {
 			sc->ll_stats.dot11ACKFailureCount++;
-			if (ts.ts_status & AR5K_TXERR_XRETRY)
-				info->status.excessive_retries = 1;
-			else if (ts.ts_status & AR5K_TXERR_FILT)
+			if (ts.ts_status & AR5K_TXERR_FILT)
 				info->flags |= IEEE80211_TX_STAT_TX_FILTERED;
 		} else {
 			info->flags |= IEEE80211_TX_STAT_ACK;
diff --git a/drivers/net/wireless/ath9k/main.c b/drivers/net/wireless/ath9k/main.c
index 32acaf7ff622..a7656a3ea1b0 100644
--- a/drivers/net/wireless/ath9k/main.c
+++ b/drivers/net/wireless/ath9k/main.c
@@ -457,12 +457,13 @@ void ath_tx_complete(struct ath_softc *sc, struct sk_buff *skb,
 	DPRINTF(sc, ATH_DBG_XMIT,
 		"%s: TX complete: skb: %p\n", __func__, skb);
 
+	ieee80211_tx_info_clear_status(tx_info);
 	if (tx_info->flags & IEEE80211_TX_CTL_NO_ACK ||
 		tx_info->flags & IEEE80211_TX_STAT_TX_FILTERED) {
-		/* free driver's private data area of tx_info */
-		if (tx_info->driver_data[0] != NULL)
-			kfree(tx_info->driver_data[0]);
-			tx_info->driver_data[0] = NULL;
+		/* free driver's private data area of tx_info, XXX: HACK! */
+		if (tx_info->control.vif != NULL)
+			kfree(tx_info->control.vif);
+			tx_info->control.vif = NULL;
 	}
 
 	if (tx_status->flags & ATH_TX_BAR) {
@@ -470,17 +471,12 @@ void ath_tx_complete(struct ath_softc *sc, struct sk_buff *skb,
 		tx_status->flags &= ~ATH_TX_BAR;
 	}
 
-	if (tx_status->flags & (ATH_TX_ERROR | ATH_TX_XRETRY)) {
-		if (!(tx_info->flags & IEEE80211_TX_CTL_NO_ACK)) {
-			/* Frame was not ACKed, but an ACK was expected */
-			tx_info->status.excessive_retries = 1;
-		}
-	} else {
+	if (!(tx_status->flags & (ATH_TX_ERROR | ATH_TX_XRETRY))) {
 		/* Frame was ACKed */
 		tx_info->flags |= IEEE80211_TX_STAT_ACK;
 	}
 
-	tx_info->status.retry_count = tx_status->retries;
+	tx_info->status.rates[0].count = tx_status->retries + 1;
 
 	ieee80211_tx_status(hw, skb);
 	if (an)
diff --git a/drivers/net/wireless/ath9k/rc.c b/drivers/net/wireless/ath9k/rc.c
index 9b2526030965..6afafeddeda2 100644
--- a/drivers/net/wireless/ath9k/rc.c
+++ b/drivers/net/wireless/ath9k/rc.c
@@ -1864,24 +1864,21 @@ static void ath_tx_status(void *priv, struct ieee80211_supported_band *sband,
 
 	hdr = (struct ieee80211_hdr *)skb->data;
 	fc = hdr->frame_control;
-	tx_info_priv = (struct ath_tx_info_priv *)tx_info->driver_data[0];
+	/* XXX: UGLY HACK!! */
+	tx_info_priv = (struct ath_tx_info_priv *)tx_info->control.vif;
 
 	spin_lock_bh(&sc->node_lock);
 	an = ath_node_find(sc, hdr->addr1);
 	spin_unlock_bh(&sc->node_lock);
 
-	if (!an || !priv_sta || !ieee80211_is_data(fc)) {
-		if (tx_info->driver_data[0] != NULL) {
-			kfree(tx_info->driver_data[0]);
-			tx_info->driver_data[0] = NULL;
-		}
+	if (tx_info_priv == NULL)
 		return;
-	}
-	if (tx_info->driver_data[0] != NULL) {
+
+	if (an && priv_sta && ieee80211_is_data(fc))
 		ath_rate_tx_complete(sc, an, priv_sta, tx_info_priv);
-		kfree(tx_info->driver_data[0]);
-		tx_info->driver_data[0] = NULL;
-	}
+
+	kfree(tx_info_priv);
+	tx_info->control.vif = NULL;
 }
 
 static void ath_tx_aggr_resp(struct ath_softc *sc,
@@ -1927,10 +1924,11 @@ static void ath_tx_aggr_resp(struct ath_softc *sc,
 	}
 }
 
-static void ath_get_rate(void *priv, struct ieee80211_supported_band *sband,
-			 struct ieee80211_sta *sta, void *priv_sta,
-			 struct sk_buff *skb, struct rate_selection *sel)
+static void ath_get_rate(void *priv, struct ieee80211_sta *sta, void *priv_sta,
+			 struct ieee80211_tx_rate_control *txrc)
 {
+	struct ieee80211_supported_band *sband = txrc->sband;
+	struct sk_buff *skb = txrc->skb;
 	struct ieee80211_hdr *hdr = (struct ieee80211_hdr *)skb->data;
 	struct ath_softc *sc = priv;
 	struct ieee80211_hw *hw = sc->hw;
@@ -1945,17 +1943,17 @@ static void ath_get_rate(void *priv, struct ieee80211_supported_band *sband,
 
 	DPRINTF(sc, ATH_DBG_RATE, "%s\n", __func__);
 
-	/* allocate driver private area of tx_info */
-	tx_info->driver_data[0] = kzalloc(sizeof(*tx_info_priv), GFP_ATOMIC);
-	ASSERT(tx_info->driver_data[0] != NULL);
-	tx_info_priv = (struct ath_tx_info_priv *)tx_info->driver_data[0];
+	/* allocate driver private area of tx_info, XXX: UGLY HACK! */
+	tx_info->control.vif = kzalloc(sizeof(*tx_info_priv), GFP_ATOMIC);
+	tx_info_priv = (struct ath_tx_info_priv *)tx_info->control.vif;
+	ASSERT(tx_info_priv != NULL);
 
 	lowest_idx = rate_lowest_index(sband, sta);
 	tx_info_priv->min_rate = (sband->bitrates[lowest_idx].bitrate * 2) / 10;
 	/* lowest rate for management and multicast/broadcast frames */
 	if (!ieee80211_is_data(fc) ||
 	    is_multicast_ether_addr(hdr->addr1) || !sta) {
-		sel->rate_idx = lowest_idx;
+		tx_info->control.rates[0].idx = lowest_idx;
 		return;
 	}
 
@@ -1966,8 +1964,10 @@ static void ath_get_rate(void *priv, struct ieee80211_supported_band *sband,
 			  tx_info_priv->rcs,
 			  &is_probe,
 			  false);
+#if 0
 	if (is_probe)
 		sel->probe_idx = ath_rc_priv->tx_ratectrl.probe_rate;
+#endif
 
 	/* Ratecontrol sometimes returns invalid rate index */
 	if (tx_info_priv->rcs[0].rix != 0xff)
@@ -1975,7 +1975,7 @@ static void ath_get_rate(void *priv, struct ieee80211_supported_band *sband,
 	else
 		tx_info_priv->rcs[0].rix = ath_rc_priv->prev_data_rix;
 
-	sel->rate_idx = tx_info_priv->rcs[0].rix;
+	tx_info->control.rates[0].idx = tx_info_priv->rcs[0].rix;
 
 	/* Check if aggregation has to be enabled for this tid */
 
diff --git a/drivers/net/wireless/ath9k/xmit.c b/drivers/net/wireless/ath9k/xmit.c
index ba818cc2fb5c..9fa395418a6c 100644
--- a/drivers/net/wireless/ath9k/xmit.c
+++ b/drivers/net/wireless/ath9k/xmit.c
@@ -168,7 +168,9 @@ static void fill_min_rates(struct sk_buff *skb, struct ath_tx_control *txctl)
 
 	hdr = (struct ieee80211_hdr *)skb->data;
 	fc = hdr->frame_control;
-	tx_info_priv = (struct ath_tx_info_priv *)tx_info->driver_data[0];
+
+	/* XXX: HACK! */
+	tx_info_priv = (struct ath_tx_info_priv *)tx_info->control.vif;
 
 	if (ieee80211_is_mgmt(fc) || ieee80211_is_ctl(fc)) {
 		txctl->use_minrate = 1;
@@ -288,13 +290,16 @@ static int ath_tx_prepare(struct ath_softc *sc,
 
 	if (tx_info->flags & IEEE80211_TX_CTL_NO_ACK)
 		txctl->flags |= ATH9K_TXDESC_NOACK;
-	if (tx_info->flags & IEEE80211_TX_CTL_USE_RTS_CTS)
+
+	if (tx_info->control.rates[0].flags & IEEE80211_TX_RC_USE_RTS_CTS)
 		txctl->flags |= ATH9K_TXDESC_RTSENA;
 
 	/*
 	 * Setup for rate calculations.
 	 */
-	tx_info_priv = (struct ath_tx_info_priv *)tx_info->driver_data[0];
+
+	/* XXX: HACK! */
+	tx_info_priv = (struct ath_tx_info_priv *)tx_info->control.vif;
 	rcs = tx_info_priv->rcs;
 
 	if (ieee80211_is_data(fc) && !txctl->use_minrate) {
@@ -855,7 +860,9 @@ static int ath_tx_send_normal(struct ath_softc *sc,
 
 	skb = (struct sk_buff *)bf->bf_mpdu;
 	tx_info = IEEE80211_SKB_CB(skb);
-	tx_info_priv = (struct ath_tx_info_priv *)tx_info->driver_data[0];
+
+	/* XXX: HACK! */
+	tx_info_priv = (struct ath_tx_info_priv *)tx_info->control.vif;
 	memcpy(bf->bf_rcs, tx_info_priv->rcs, 4 * sizeof(tx_info_priv->rcs[0]));
 
 	/* update starting sequence number for subsequent ADDBA request */
@@ -1249,8 +1256,9 @@ static int ath_tx_processq(struct ath_softc *sc, struct ath_txq *txq)
 		}
 		skb = bf->bf_mpdu;
 		tx_info = IEEE80211_SKB_CB(skb);
-		tx_info_priv = (struct ath_tx_info_priv *)
-			tx_info->driver_data[0];
+
+		/* XXX: HACK! */
+		tx_info_priv = (struct ath_tx_info_priv *) tx_info->control.vif;
 		if (ds->ds_txstat.ts_status & ATH9K_TXERR_FILT)
 			tx_info->flags |= IEEE80211_TX_STAT_TX_FILTERED;
 		if ((ds->ds_txstat.ts_status & ATH9K_TXERR_FILT) == 0 &&
@@ -1431,7 +1439,8 @@ static int ath_tx_send_ampdu(struct ath_softc *sc,
 
 	skb = (struct sk_buff *)bf->bf_mpdu;
 	tx_info = IEEE80211_SKB_CB(skb);
-	tx_info_priv = (struct ath_tx_info_priv *)tx_info->driver_data[0];
+	/* XXX: HACK! */
+	tx_info_priv = (struct ath_tx_info_priv *)tx_info->control.vif;
 	memcpy(bf->bf_rcs, tx_info_priv->rcs, 4 * sizeof(tx_info_priv->rcs[0]));
 
 	/* Add sub-frame to BAW */
@@ -1466,7 +1475,7 @@ static u32 ath_lookup_rate(struct ath_softc *sc,
 	skb = (struct sk_buff *)bf->bf_mpdu;
 	tx_info = IEEE80211_SKB_CB(skb);
 	tx_info_priv = (struct ath_tx_info_priv *)
-		tx_info->driver_data[0];
+		tx_info->control.vif; /* XXX: HACK! */
 	memcpy(bf->bf_rcs,
 		tx_info_priv->rcs, 4 * sizeof(tx_info_priv->rcs[0]));
 
@@ -1927,7 +1936,8 @@ static int ath_tx_start_dma(struct ath_softc *sc,
 
 	bf->bf_flags = txctl->flags;
 	bf->bf_keytype = txctl->keytype;
-	tx_info_priv = (struct ath_tx_info_priv *)tx_info->driver_data[0];
+	/* XXX: HACK! */
+	tx_info_priv = (struct ath_tx_info_priv *)tx_info->control.vif;
 	rcs = tx_info_priv->rcs;
 	bf->bf_rcs[0] = rcs[0];
 	bf->bf_rcs[1] = rcs[1];
diff --git a/drivers/net/wireless/b43/dma.c b/drivers/net/wireless/b43/dma.c
index 098f886976f6..6d65a02b7052 100644
--- a/drivers/net/wireless/b43/dma.c
+++ b/drivers/net/wireless/b43/dma.c
@@ -1387,13 +1387,11 @@ void b43_dma_handle_txstatus(struct b43_wldev *dev,
 
 			info = IEEE80211_SKB_CB(meta->skb);
 
-			memset(&info->status, 0, sizeof(info->status));
-
 			/*
 			 * Call back to inform the ieee80211 subsystem about
 			 * the status of the transmission.
 			 */
-			frame_succeed = b43_fill_txstatus_report(info, status);
+			frame_succeed = b43_fill_txstatus_report(dev, info, status);
 #ifdef CONFIG_B43_DEBUG
 			if (frame_succeed)
 				ring->nr_succeed_tx_packets++;
diff --git a/drivers/net/wireless/b43/main.c b/drivers/net/wireless/b43/main.c
index 9aeeb6553a91..2a599fb772d9 100644
--- a/drivers/net/wireless/b43/main.c
+++ b/drivers/net/wireless/b43/main.c
@@ -4555,7 +4555,7 @@ static int b43_wireless_init(struct ssb_device *dev)
 		BIT(NL80211_IFTYPE_ADHOC);
 
 	hw->queues = b43_modparam_qos ? 4 : 1;
-	hw->max_altrates = 1;
+	hw->max_rates = 2;
 	SET_IEEE80211_DEV(hw, dev->dev);
 	if (is_valid_ether_addr(sprom->et1mac))
 		SET_IEEE80211_PERM_ADDR(hw, sprom->et1mac);
diff --git a/drivers/net/wireless/b43/pio.c b/drivers/net/wireless/b43/pio.c
index 401591267592..1036bef8c4cc 100644
--- a/drivers/net/wireless/b43/pio.c
+++ b/drivers/net/wireless/b43/pio.c
@@ -587,9 +587,8 @@ void b43_pio_handle_txstatus(struct b43_wldev *dev,
 	spin_lock(&q->lock); /* IRQs are already disabled. */
 
 	info = IEEE80211_SKB_CB(pack->skb);
-	memset(&info->status, 0, sizeof(info->status));
 
-	b43_fill_txstatus_report(info, status);
+	b43_fill_txstatus_report(dev, info, status);
 
 	total_len = pack->skb->len + b43_txhdr_size(dev);
 	total_len = roundup(total_len, 4);
diff --git a/drivers/net/wireless/b43/xmit.c b/drivers/net/wireless/b43/xmit.c
index 2fabcf8f0474..adba89b816d4 100644
--- a/drivers/net/wireless/b43/xmit.c
+++ b/drivers/net/wireless/b43/xmit.c
@@ -185,7 +185,7 @@ int b43_generate_txhdr(struct b43_wldev *dev,
 		       u8 *_txhdr,
 		       const unsigned char *fragment_data,
 		       unsigned int fragment_len,
-		       const struct ieee80211_tx_info *info,
+		       struct ieee80211_tx_info *info,
 		       u16 cookie)
 {
 	struct b43_txhdr *txhdr = (struct b43_txhdr *)_txhdr;
@@ -202,6 +202,7 @@ int b43_generate_txhdr(struct b43_wldev *dev,
 	u16 phy_ctl = 0;
 	u8 extra_ft = 0;
 	struct ieee80211_rate *txrate;
+	struct ieee80211_tx_rate *rates;
 
 	memset(txhdr, 0, sizeof(*txhdr));
 
@@ -291,7 +292,7 @@ int b43_generate_txhdr(struct b43_wldev *dev,
 		phy_ctl |= B43_TXH_PHY_ENC_OFDM;
 	else
 		phy_ctl |= B43_TXH_PHY_ENC_CCK;
-	if (info->flags & IEEE80211_TX_CTL_SHORT_PREAMBLE)
+	if (info->control.rates[0].flags & IEEE80211_TX_RC_USE_SHORT_PREAMBLE)
 		phy_ctl |= B43_TXH_PHY_SHORTPRMBL;
 
 	switch (b43_ieee80211_antenna_sanitize(dev, info->antenna_sel_tx)) {
@@ -314,6 +315,7 @@ int b43_generate_txhdr(struct b43_wldev *dev,
 		B43_WARN_ON(1);
 	}
 
+	rates = info->control.rates;
 	/* MAC control */
 	if (!(info->flags & IEEE80211_TX_CTL_NO_ACK))
 		mac_ctl |= B43_TXH_MAC_ACK;
@@ -324,12 +326,22 @@ int b43_generate_txhdr(struct b43_wldev *dev,
 		mac_ctl |= B43_TXH_MAC_STMSDU;
 	if (phy->type == B43_PHYTYPE_A)
 		mac_ctl |= B43_TXH_MAC_5GHZ;
-	if (info->flags & IEEE80211_TX_CTL_LONG_RETRY_LIMIT)
+
+	/* Overwrite rates[0].count to make the retry calculation
+	 * in the tx status easier. need the actual retry limit to
+	 * detect whether the fallback rate was used.
+	 */
+	if ((rates[0].flags & IEEE80211_TX_RC_USE_RTS_CTS) ||
+	    (rates[0].count <= dev->wl->hw->conf.long_frame_max_tx_count)) {
+		rates[0].count = dev->wl->hw->conf.long_frame_max_tx_count;
 		mac_ctl |= B43_TXH_MAC_LONGFRAME;
+	} else {
+		rates[0].count = dev->wl->hw->conf.short_frame_max_tx_count;
+	}
 
 	/* Generate the RTS or CTS-to-self frame */
-	if ((info->flags & IEEE80211_TX_CTL_USE_RTS_CTS) ||
-	    (info->flags & IEEE80211_TX_CTL_USE_CTS_PROTECT)) {
+	if ((rates[0].flags & IEEE80211_TX_RC_USE_RTS_CTS) ||
+	    (rates[0].flags & IEEE80211_TX_RC_USE_CTS_PROTECT)) {
 		unsigned int len;
 		struct ieee80211_hdr *hdr;
 		int rts_rate, rts_rate_fb;
@@ -344,7 +356,7 @@ int b43_generate_txhdr(struct b43_wldev *dev,
 		rts_rate_fb = b43_calc_fallback_rate(rts_rate);
 		rts_rate_fb_ofdm = b43_is_ofdm_rate(rts_rate_fb);
 
-		if (info->flags & IEEE80211_TX_CTL_USE_CTS_PROTECT) {
+		if (rates[0].flags & IEEE80211_TX_RC_USE_CTS_PROTECT) {
 			struct ieee80211_cts *cts;
 
 			if (b43_is_old_txhdr_format(dev)) {
@@ -687,10 +699,18 @@ void b43_handle_txstatus(struct b43_wldev *dev,
 /* Fill out the mac80211 TXstatus report based on the b43-specific
  * txstatus report data. This returns a boolean whether the frame was
  * successfully transmitted. */
-bool b43_fill_txstatus_report(struct ieee80211_tx_info *report,
+bool b43_fill_txstatus_report(struct b43_wldev *dev,
+			      struct ieee80211_tx_info *report,
 			      const struct b43_txstatus *status)
 {
 	bool frame_success = 1;
+	int retry_limit;
+
+	/* preserve the confiured retry limit before clearing the status
+	 * The xmit function has overwritten the rc's value with the actual
+	 * retry limit done by the hardware */
+	retry_limit = report->status.rates[0].count;
+	ieee80211_tx_info_clear_status(report);
 
 	if (status->acked) {
 		/* The frame was ACKed. */
@@ -700,14 +720,32 @@ bool b43_fill_txstatus_report(struct ieee80211_tx_info *report,
 		if (!(report->flags & IEEE80211_TX_CTL_NO_ACK)) {
 			/* ...but we expected an ACK. */
 			frame_success = 0;
-			report->status.excessive_retries = 1;
 		}
 	}
 	if (status->frame_count == 0) {
 		/* The frame was not transmitted at all. */
-		report->status.retry_count = 0;
-	} else
-		report->status.retry_count = status->frame_count - 1;
+		report->status.rates[0].count = 0;
+	} else if (status->rts_count > dev->wl->hw->conf.short_frame_max_tx_count) {
+		/*
+		 * If the short retries (RTS, not data frame) have exceeded
+		 * the limit, the hw will not have tried the selected rate,
+		 * but will have used the fallback rate instead.
+		 * Don't let the rate control count attempts for the selected
+		 * rate in this case, otherwise the statistics will be off.
+		 */
+		report->status.rates[0].count = 0;
+		report->status.rates[1].count = status->frame_count;
+	} else {
+		if (status->frame_count > retry_limit) {
+			report->status.rates[0].count = retry_limit;
+			report->status.rates[1].count = status->frame_count -
+					retry_limit;
+
+		} else {
+			report->status.rates[0].count = status->frame_count;
+			report->status.rates[1].idx = -1;
+		}
+	}
 
 	return frame_success;
 }
diff --git a/drivers/net/wireless/b43/xmit.h b/drivers/net/wireless/b43/xmit.h
index 0215faf47541..4fb2a190f7a7 100644
--- a/drivers/net/wireless/b43/xmit.h
+++ b/drivers/net/wireless/b43/xmit.h
@@ -178,7 +178,7 @@ int b43_generate_txhdr(struct b43_wldev *dev,
 		       u8 * txhdr,
 		       const unsigned char *fragment_data,
 		       unsigned int fragment_len,
-		       const struct ieee80211_tx_info *txctl, u16 cookie);
+		       struct ieee80211_tx_info *txctl, u16 cookie);
 
 /* Transmit Status */
 struct b43_txstatus {
@@ -294,7 +294,8 @@ void b43_rx(struct b43_wldev *dev, struct sk_buff *skb, const void *_rxhdr);
 
 void b43_handle_txstatus(struct b43_wldev *dev,
 			 const struct b43_txstatus *status);
-bool b43_fill_txstatus_report(struct ieee80211_tx_info *report,
+bool b43_fill_txstatus_report(struct b43_wldev *dev,
+			      struct ieee80211_tx_info *report,
 			      const struct b43_txstatus *status);
 
 void b43_tx_suspend(struct b43_wldev *dev);
diff --git a/drivers/net/wireless/b43legacy/dma.c b/drivers/net/wireless/b43legacy/dma.c
index fb6819e40f38..308c2647f002 100644
--- a/drivers/net/wireless/b43legacy/dma.c
+++ b/drivers/net/wireless/b43legacy/dma.c
@@ -1411,6 +1411,7 @@ void b43legacy_dma_handle_txstatus(struct b43legacy_wldev *dev,
 	struct b43legacy_dmaring *ring;
 	struct b43legacy_dmadesc_generic *desc;
 	struct b43legacy_dmadesc_meta *meta;
+	int retry_limit;
 	int slot;
 
 	ring = parse_cookie(dev, status->cookie, &slot);
@@ -1437,25 +1438,42 @@ void b43legacy_dma_handle_txstatus(struct b43legacy_wldev *dev,
 			struct ieee80211_tx_info *info;
 			BUG_ON(!meta->skb);
 			info = IEEE80211_SKB_CB(meta->skb);
-			/* Call back to inform the ieee80211 subsystem about the
-			 * status of the transmission.
-			 * Some fields of txstat are already filled in dma_tx().
-			 */
 
-			memset(&info->status, 0, sizeof(info->status));
+			/* preserve the confiured retry limit before clearing the status
+			 * The xmit function has overwritten the rc's value with the actual
+			 * retry limit done by the hardware */
+			retry_limit = info->status.rates[0].count;
+			ieee80211_tx_info_clear_status(info);
 
-			if (status->acked) {
+			if (status->acked)
 				info->flags |= IEEE80211_TX_STAT_ACK;
+
+			if (status->rts_count > dev->wl->hw->conf.short_frame_max_tx_count) {
+				/*
+				 * If the short retries (RTS, not data frame) have exceeded
+				 * the limit, the hw will not have tried the selected rate,
+				 * but will have used the fallback rate instead.
+				 * Don't let the rate control count attempts for the selected
+				 * rate in this case, otherwise the statistics will be off.
+				 */
+				info->status.rates[0].count = 0;
+				info->status.rates[1].count = status->frame_count;
 			} else {
-				if (!(info->flags & IEEE80211_TX_CTL_NO_ACK))
-					 info->status.excessive_retries = 1;
+				if (status->frame_count > retry_limit) {
+					info->status.rates[0].count = retry_limit;
+					info->status.rates[1].count = status->frame_count -
+							retry_limit;
+
+				} else {
+					info->status.rates[0].count = status->frame_count;
+					info->status.rates[1].idx = -1;
+				}
 			}
-			if (status->frame_count == 0) {
-				/* The frame was not transmitted at all. */
-				info->status.retry_count = 0;
-			} else
-				info->status.retry_count = status->frame_count
-							   - 1;
+
+			/* Call back to inform the ieee80211 subsystem about the
+			 * status of the transmission.
+			 * Some fields of txstat are already filled in dma_tx().
+			 */
 			ieee80211_tx_status_irqsafe(dev->wl->hw, meta->skb);
 			/* skb is freed by ieee80211_tx_status_irqsafe() */
 			meta->skb = NULL;
diff --git a/drivers/net/wireless/b43legacy/main.c b/drivers/net/wireless/b43legacy/main.c
index 78e46365f69e..9edbdf9cb50f 100644
--- a/drivers/net/wireless/b43legacy/main.c
+++ b/drivers/net/wireless/b43legacy/main.c
@@ -3682,7 +3682,7 @@ static int b43legacy_wireless_init(struct ssb_device *dev)
 		BIT(NL80211_IFTYPE_WDS) |
 		BIT(NL80211_IFTYPE_ADHOC);
 	hw->queues = 1; /* FIXME: hardware has more queues */
-	hw->max_altrates = 1;
+	hw->max_rates = 2;
 	SET_IEEE80211_DEV(hw, dev->dev);
 	if (is_valid_ether_addr(sprom->et1mac))
 		SET_IEEE80211_PERM_ADDR(hw, sprom->et1mac);
diff --git a/drivers/net/wireless/b43legacy/pio.c b/drivers/net/wireless/b43legacy/pio.c
index a86c7647fa2d..746d5361bba0 100644
--- a/drivers/net/wireless/b43legacy/pio.c
+++ b/drivers/net/wireless/b43legacy/pio.c
@@ -491,6 +491,7 @@ void b43legacy_pio_handle_txstatus(struct b43legacy_wldev *dev,
 	struct b43legacy_pioqueue *queue;
 	struct b43legacy_pio_txpacket *packet;
 	struct ieee80211_tx_info *info;
+	int retry_limit;
 
 	queue = parse_cookie(dev, status->cookie, &packet);
 	B43legacy_WARN_ON(!queue);
@@ -503,11 +504,37 @@ void b43legacy_pio_handle_txstatus(struct b43legacy_wldev *dev,
 				sizeof(struct b43legacy_txhdr_fw3));
 
 	info = IEEE80211_SKB_CB(packet->skb);
-	memset(&info->status, 0, sizeof(info->status));
+
+	/* preserve the confiured retry limit before clearing the status
+	 * The xmit function has overwritten the rc's value with the actual
+	 * retry limit done by the hardware */
+	retry_limit = info->status.rates[0].count;
+	ieee80211_tx_info_clear_status(info);
 
 	if (status->acked)
 		info->flags |= IEEE80211_TX_STAT_ACK;
-	info->status.retry_count = status->frame_count - 1;
+
+	if (status->rts_count > dev->wl->hw->conf.short_frame_max_tx_count) {
+		/*
+		 * If the short retries (RTS, not data frame) have exceeded
+		 * the limit, the hw will not have tried the selected rate,
+		 * but will have used the fallback rate instead.
+		 * Don't let the rate control count attempts for the selected
+		 * rate in this case, otherwise the statistics will be off.
+		 */
+		info->status.rates[0].count = 0;
+		info->status.rates[1].count = status->frame_count;
+	} else {
+		if (status->frame_count > retry_limit) {
+			info->status.rates[0].count = retry_limit;
+			info->status.rates[1].count = status->frame_count -
+					retry_limit;
+
+		} else {
+			info->status.rates[0].count = status->frame_count;
+			info->status.rates[1].idx = -1;
+		}
+	}
 	ieee80211_tx_status_irqsafe(dev->wl->hw, packet->skb);
 	packet->skb = NULL;
 
diff --git a/drivers/net/wireless/b43legacy/xmit.c b/drivers/net/wireless/b43legacy/xmit.c
index 65e833781608..12fca99f7578 100644
--- a/drivers/net/wireless/b43legacy/xmit.c
+++ b/drivers/net/wireless/b43legacy/xmit.c
@@ -188,7 +188,7 @@ static int generate_txhdr_fw3(struct b43legacy_wldev *dev,
 			       struct b43legacy_txhdr_fw3 *txhdr,
 			       const unsigned char *fragment_data,
 			       unsigned int fragment_len,
-			       const struct ieee80211_tx_info *info,
+			       struct ieee80211_tx_info *info,
 			       u16 cookie)
 {
 	const struct ieee80211_hdr *wlhdr;
@@ -201,6 +201,7 @@ static int generate_txhdr_fw3(struct b43legacy_wldev *dev,
 	u32 mac_ctl = 0;
 	u16 phy_ctl = 0;
 	struct ieee80211_rate *tx_rate;
+	struct ieee80211_tx_rate *rates;
 
 	wlhdr = (const struct ieee80211_hdr *)fragment_data;
 
@@ -274,7 +275,7 @@ static int generate_txhdr_fw3(struct b43legacy_wldev *dev,
 	/* PHY TX Control word */
 	if (rate_ofdm)
 		phy_ctl |= B43legacy_TX4_PHY_OFDM;
-	if (dev->short_preamble)
+	if (info->control.rates[0].flags & IEEE80211_TX_RC_USE_SHORT_PREAMBLE)
 		phy_ctl |= B43legacy_TX4_PHY_SHORTPRMBL;
 	switch (info->antenna_sel_tx) {
 	case 0:
@@ -291,6 +292,7 @@ static int generate_txhdr_fw3(struct b43legacy_wldev *dev,
 	}
 
 	/* MAC control */
+	rates = info->control.rates;
 	if (!(info->flags & IEEE80211_TX_CTL_NO_ACK))
 		mac_ctl |= B43legacy_TX4_MAC_ACK;
 	if (info->flags & IEEE80211_TX_CTL_ASSIGN_SEQ)
@@ -299,12 +301,22 @@ static int generate_txhdr_fw3(struct b43legacy_wldev *dev,
 		mac_ctl |= B43legacy_TX4_MAC_STMSDU;
 	if (rate_fb_ofdm)
 		mac_ctl |= B43legacy_TX4_MAC_FALLBACKOFDM;
-	if (info->flags & IEEE80211_TX_CTL_LONG_RETRY_LIMIT)
+
+	/* Overwrite rates[0].count to make the retry calculation
+	 * in the tx status easier. need the actual retry limit to
+	 * detect whether the fallback rate was used.
+	 */
+	if ((rates[0].flags & IEEE80211_TX_RC_USE_RTS_CTS) ||
+	    (rates[0].count <= dev->wl->hw->conf.long_frame_max_tx_count)) {
+		rates[0].count = dev->wl->hw->conf.long_frame_max_tx_count;
 		mac_ctl |= B43legacy_TX4_MAC_LONGFRAME;
+	} else {
+		rates[0].count = dev->wl->hw->conf.short_frame_max_tx_count;
+	}
 
 	/* Generate the RTS or CTS-to-self frame */
-	if ((info->flags & IEEE80211_TX_CTL_USE_RTS_CTS) ||
-	    (info->flags & IEEE80211_TX_CTL_USE_CTS_PROTECT)) {
+	if ((rates[0].flags & IEEE80211_TX_RC_USE_RTS_CTS) ||
+	    (rates[0].flags & IEEE80211_TX_RC_USE_CTS_PROTECT)) {
 		unsigned int len;
 		struct ieee80211_hdr *hdr;
 		int rts_rate;
@@ -319,7 +331,7 @@ static int generate_txhdr_fw3(struct b43legacy_wldev *dev,
 		if (rts_rate_fb_ofdm)
 			mac_ctl |= B43legacy_TX4_MAC_CTSFALLBACKOFDM;
 
-		if (info->flags & IEEE80211_TX_CTL_USE_CTS_PROTECT) {
+		if (rates[0].flags & IEEE80211_TX_RC_USE_CTS_PROTECT) {
 			ieee80211_ctstoself_get(dev->wl->hw,
 						info->control.vif,
 						fragment_data,
@@ -362,7 +374,7 @@ int b43legacy_generate_txhdr(struct b43legacy_wldev *dev,
 			      u8 *txhdr,
 			      const unsigned char *fragment_data,
 			      unsigned int fragment_len,
-			      const struct ieee80211_tx_info *info,
+			      struct ieee80211_tx_info *info,
 			      u16 cookie)
 {
 	return generate_txhdr_fw3(dev, (struct b43legacy_txhdr_fw3 *)txhdr,
diff --git a/drivers/net/wireless/b43legacy/xmit.h b/drivers/net/wireless/b43legacy/xmit.h
index e56777e0feab..62e09d02788f 100644
--- a/drivers/net/wireless/b43legacy/xmit.h
+++ b/drivers/net/wireless/b43legacy/xmit.h
@@ -80,7 +80,7 @@ int b43legacy_generate_txhdr(struct b43legacy_wldev *dev,
 			      u8 *txhdr,
 			      const unsigned char *fragment_data,
 			      unsigned int fragment_len,
-			      const struct ieee80211_tx_info *info,
+			      struct ieee80211_tx_info *info,
 			      u16 cookie);
 
 
diff --git a/drivers/net/wireless/iwlwifi/iwl-3945-rs.c b/drivers/net/wireless/iwlwifi/iwl-3945-rs.c
index c25daec4f93d..f440ed0fe543 100644
--- a/drivers/net/wireless/iwlwifi/iwl-3945-rs.c
+++ b/drivers/net/wireless/iwlwifi/iwl-3945-rs.c
@@ -422,34 +422,6 @@ static void rs_free_sta(void *priv, struct ieee80211_sta *sta,
 }
 
 
-/*
- * get ieee prev rate from rate scale table.
- * for A and B mode we need to overright prev
- * value
- */
-static int rs_adjust_next_rate(struct iwl3945_priv *priv, int rate)
-{
-	int next_rate = iwl3945_get_prev_ieee_rate(rate);
-
-	switch (priv->band) {
-	case IEEE80211_BAND_5GHZ:
-		if (rate == IWL_RATE_12M_INDEX)
-			next_rate = IWL_RATE_9M_INDEX;
-		else if (rate == IWL_RATE_6M_INDEX)
-			next_rate = IWL_RATE_6M_INDEX;
-		break;
-/* XXX cannot be invoked in current mac80211 so not a regression
-	case MODE_IEEE80211B:
-		if (rate == IWL_RATE_11M_INDEX_TABLE)
-			next_rate = IWL_RATE_5M_INDEX_TABLE;
-		break;
- */
-	default:
-		break;
-	}
-
-	return next_rate;
-}
 /**
  * rs_tx_status - Update rate control values based on Tx results
  *
@@ -460,17 +432,21 @@ static void rs_tx_status(void *priv_rate, struct ieee80211_supported_band *sband
 			 struct ieee80211_sta *sta, void *priv_sta,
 			 struct sk_buff *skb)
 {
-	u8 retries, current_count;
+	u8 retries = 0, current_count;
 	int scale_rate_index, first_index, last_index;
 	unsigned long flags;
 	struct iwl3945_priv *priv = (struct iwl3945_priv *)priv_rate;
 	struct iwl3945_rs_sta *rs_sta = priv_sta;
 	struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb);
+	int i;
 
 	IWL_DEBUG_RATE("enter\n");
 
-	retries = info->status.retry_count;
-	first_index = sband->bitrates[info->tx_rate_idx].hw_value;
+	for (i = 0; i < IEEE80211_TX_MAX_RATES; i++)
+		retries += info->status.rates[i].count;
+	retries--;
+
+	first_index = sband->bitrates[info->status.rates[0].idx].hw_value;
 	if ((first_index < 0) || (first_index >= IWL_RATE_COUNT)) {
 		IWL_DEBUG_RATE("leave: Rate out of bounds: %d\n", first_index);
 		return;
@@ -502,7 +478,7 @@ static void rs_tx_status(void *priv_rate, struct ieee80211_supported_band *sband
 			last_index = scale_rate_index;
 		} else {
 			current_count = priv->retry_rate;
-			last_index = rs_adjust_next_rate(priv,
+			last_index = iwl3945_rs_next_rate(priv,
 							 scale_rate_index);
 		}
 
@@ -518,7 +494,7 @@ static void rs_tx_status(void *priv_rate, struct ieee80211_supported_band *sband
 
 		if (retries)
 			scale_rate_index =
-			    rs_adjust_next_rate(priv, scale_rate_index);
+			    iwl3945_rs_next_rate(priv, scale_rate_index);
 	}
 
 
@@ -630,10 +606,11 @@ static u16 iwl3945_get_adjacent_rate(struct iwl3945_rs_sta *rs_sta,
  * rate table and must reference the driver allocated rate table
  *
  */
-static void rs_get_rate(void *priv_r, struct ieee80211_supported_band *sband,
-			struct ieee80211_sta *sta, void *priv_sta,
-			struct sk_buff *skb, struct rate_selection *sel)
+static void rs_get_rate(void *priv_r, struct ieee80211_sta *sta,
+			void *priv_sta,	struct ieee80211_tx_rate_control *txrc)
 {
+	struct ieee80211_supported_band *sband = txrc->sband;
+	struct sk_buff *skb = txrc->skb;
 	u8 low = IWL_RATE_INVALID;
 	u8 high = IWL_RATE_INVALID;
 	u16 high_low;
@@ -649,6 +626,7 @@ static void rs_get_rate(void *priv_r, struct ieee80211_supported_band *sband,
 	struct ieee80211_hdr *hdr = (struct ieee80211_hdr *)skb->data;
 	u16 fc, rate_mask;
 	struct iwl3945_priv *priv = (struct iwl3945_priv *)priv_r;
+	struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb);
 
 	IWL_DEBUG_RATE("enter\n");
 
@@ -659,7 +637,7 @@ static void rs_get_rate(void *priv_r, struct ieee80211_supported_band *sband,
 	    is_multicast_ether_addr(hdr->addr1) ||
 	    !sta || !priv_sta) {
 		IWL_DEBUG_RATE("leave: No STA priv data to update!\n");
-		sel->rate_idx = rate_lowest_index(sband, sta);
+		info->control.rates[0].idx = rate_lowest_index(sband, sta);
 		return;
 	}
 
@@ -792,9 +770,10 @@ static void rs_get_rate(void *priv_r, struct ieee80211_supported_band *sband,
 
 	rs_sta->last_txrate_idx = index;
 	if (sband->band == IEEE80211_BAND_5GHZ)
-		sel->rate_idx = rs_sta->last_txrate_idx - IWL_FIRST_OFDM_RATE;
+		info->control.rates[0].idx = rs_sta->last_txrate_idx -
+				IWL_FIRST_OFDM_RATE;
 	else
-		sel->rate_idx = rs_sta->last_txrate_idx;
+		info->control.rates[0].idx = rs_sta->last_txrate_idx;
 
 	IWL_DEBUG_RATE("leave: %d\n", index);
 }
diff --git a/drivers/net/wireless/iwlwifi/iwl-3945.c b/drivers/net/wireless/iwlwifi/iwl-3945.c
index 8a00245be51e..7afafb629706 100644
--- a/drivers/net/wireless/iwlwifi/iwl-3945.c
+++ b/drivers/net/wireless/iwlwifi/iwl-3945.c
@@ -261,6 +261,35 @@ static inline const char *iwl3945_get_tx_fail_reason(u32 status)
 }
 #endif
 
+/*
+ * get ieee prev rate from rate scale table.
+ * for A and B mode we need to overright prev
+ * value
+ */
+int iwl3945_rs_next_rate(struct iwl3945_priv *priv, int rate)
+{
+	int next_rate = iwl3945_get_prev_ieee_rate(rate);
+
+	switch (priv->band) {
+	case IEEE80211_BAND_5GHZ:
+		if (rate == IWL_RATE_12M_INDEX)
+			next_rate = IWL_RATE_9M_INDEX;
+		else if (rate == IWL_RATE_6M_INDEX)
+			next_rate = IWL_RATE_6M_INDEX;
+		break;
+/* XXX cannot be invoked in current mac80211 so not a regression
+	case MODE_IEEE80211B:
+		if (rate == IWL_RATE_11M_INDEX_TABLE)
+			next_rate = IWL_RATE_5M_INDEX_TABLE;
+		break;
+ */
+	default:
+		break;
+	}
+
+	return next_rate;
+}
+
 
 /**
  * iwl3945_tx_queue_reclaim - Reclaim Tx queue entries already Tx'd
@@ -308,6 +337,7 @@ static void iwl3945_rx_reply_tx(struct iwl3945_priv *priv,
 	struct iwl3945_tx_resp *tx_resp = (void *)&pkt->u.raw[0];
 	u32  status = le32_to_cpu(tx_resp->status);
 	int rate_idx;
+	int fail, i;
 
 	if ((index >= txq->q.n_bd) || (iwl3945_x2_queue_used(&txq->q, index) == 0)) {
 		IWL_ERROR("Read index for DMA queue txq_id (%d) index %d "
@@ -318,9 +348,36 @@ static void iwl3945_rx_reply_tx(struct iwl3945_priv *priv,
 	}
 
 	info = IEEE80211_SKB_CB(txq->txb[txq->q.read_ptr].skb[0]);
-	memset(&info->status, 0, sizeof(info->status));
+	ieee80211_tx_info_clear_status(info);
+
+	/* Fill the MRR chain with some info about on-chip retransmissions */
+	rate_idx = iwl3945_hwrate_to_plcp_idx(tx_resp->rate);
+	if (info->band == IEEE80211_BAND_5GHZ)
+		rate_idx -= IWL_FIRST_OFDM_RATE;
+
+	fail = tx_resp->failure_frame;
+	for (i = 0; i < IEEE80211_TX_MAX_RATES; i++) {
+		int next = iwl3945_rs_next_rate(priv, rate_idx);
+
+		info->status.rates[i].idx = rate_idx;
+
+		/*
+		 * Put remaining into the last count as best approximation
+		 * of saying exactly what the hardware would have done...
+		 */
+		if ((rate_idx == next) || (i == IEEE80211_TX_MAX_RATES - 1)) {
+			info->status.rates[i].count = fail;
+			break;
+		}
+
+		info->status.rates[i].count = priv->retry_rate;
+		fail -= priv->retry_rate;
+		rate_idx = next;
+		if (fail <= 0)
+			break;
+	}
+	info->status.rates[i].count++; /* add final attempt */
 
-	info->status.retry_count = tx_resp->failure_frame;
 	/* tx_status->rts_retry_count = tx_resp->failure_rts; */
 	info->flags |= ((status & TX_STATUS_MSK) == TX_STATUS_SUCCESS) ?
 				IEEE80211_TX_STAT_ACK : 0;
@@ -329,10 +386,6 @@ static void iwl3945_rx_reply_tx(struct iwl3945_priv *priv,
 			txq_id, iwl3945_get_tx_fail_reason(status), status,
 			tx_resp->rate, tx_resp->failure_frame);
 
-	rate_idx = iwl3945_hwrate_to_plcp_idx(tx_resp->rate);
-	if (info->band == IEEE80211_BAND_5GHZ)
-		rate_idx -= IWL_FIRST_OFDM_RATE;
-	info->tx_rate_idx = rate_idx;
 	IWL_DEBUG_TX_REPLY("Tx queue reclaim %d\n", index);
 	iwl3945_tx_queue_reclaim(priv, txq_id, index);
 
diff --git a/drivers/net/wireless/iwlwifi/iwl-3945.h b/drivers/net/wireless/iwlwifi/iwl-3945.h
index bdd32475b99c..592c5958723b 100644
--- a/drivers/net/wireless/iwlwifi/iwl-3945.h
+++ b/drivers/net/wireless/iwlwifi/iwl-3945.h
@@ -954,6 +954,8 @@ static inline int is_channel_ibss(const struct iwl3945_channel_info *ch)
 extern const struct iwl3945_channel_info *iwl3945_get_channel_info(
 	const struct iwl3945_priv *priv, enum ieee80211_band band, u16 channel);
 
+extern int iwl3945_rs_next_rate(struct iwl3945_priv *priv, int rate);
+
 /* Requires full declaration of iwl3945_priv before including */
 #include "iwl-3945-io.h"
 
diff --git a/drivers/net/wireless/iwlwifi/iwl-4965.c b/drivers/net/wireless/iwlwifi/iwl-4965.c
index 9838de5f4369..222c2baa95ca 100644
--- a/drivers/net/wireless/iwlwifi/iwl-4965.c
+++ b/drivers/net/wireless/iwlwifi/iwl-4965.c
@@ -619,10 +619,10 @@ static void iwl4965_gain_computation(struct iwl_priv *priv,
 static void iwl4965_rts_tx_cmd_flag(struct ieee80211_tx_info *info,
 			__le32 *tx_flags)
 {
-	if (info->flags & IEEE80211_TX_CTL_USE_RTS_CTS) {
+	if (info->control.rates[0].flags & IEEE80211_TX_RC_USE_RTS_CTS) {
 		*tx_flags |= TX_CMD_FLG_RTS_MSK;
 		*tx_flags &= ~TX_CMD_FLG_CTS_MSK;
-	} else if (info->flags & IEEE80211_TX_CTL_USE_CTS_PROTECT) {
+	} else if (info->control.rates[0].flags & IEEE80211_TX_RC_USE_CTS_PROTECT) {
 		*tx_flags &= ~TX_CMD_FLG_RTS_MSK;
 		*tx_flags |= TX_CMD_FLG_CTS_MSK;
 	}
@@ -2070,7 +2070,7 @@ static int iwl4965_tx_status_reply_tx(struct iwl_priv *priv,
 				   agg->frame_count, agg->start_idx, idx);
 
 		info = IEEE80211_SKB_CB(priv->txq[txq_id].txb[idx].skb[0]);
-		info->status.retry_count = tx_resp->failure_frame;
+		info->status.rates[0].count = tx_resp->failure_frame + 1;
 		info->flags &= ~IEEE80211_TX_CTL_AMPDU;
 		info->flags |= iwl_is_tx_success(status)?
 			IEEE80211_TX_STAT_ACK : 0;
@@ -2227,7 +2227,7 @@ static void iwl4965_rx_reply_tx(struct iwl_priv *priv,
 			iwl_txq_check_empty(priv, sta_id, tid, txq_id);
 		}
 	} else {
-		info->status.retry_count = tx_resp->failure_frame;
+		info->status.rates[0].count = tx_resp->failure_frame + 1;
 		info->flags |=
 			iwl_is_tx_success(status) ? IEEE80211_TX_STAT_ACK : 0;
 		iwl_hwrate_to_tx_control(priv,
diff --git a/drivers/net/wireless/iwlwifi/iwl-5000.c b/drivers/net/wireless/iwlwifi/iwl-5000.c
index 56a3f0c84a1e..d5282fa65084 100644
--- a/drivers/net/wireless/iwlwifi/iwl-5000.c
+++ b/drivers/net/wireless/iwlwifi/iwl-5000.c
@@ -390,8 +390,8 @@ static void iwl5000_chain_noise_reset(struct iwl_priv *priv)
 static void iwl5000_rts_tx_cmd_flag(struct ieee80211_tx_info *info,
 			__le32 *tx_flags)
 {
-	if ((info->flags & IEEE80211_TX_CTL_USE_RTS_CTS) ||
-	    (info->flags & IEEE80211_TX_CTL_USE_CTS_PROTECT))
+	if ((info->control.rates[0].flags & IEEE80211_TX_RC_USE_RTS_CTS) ||
+	    (info->control.rates[0].flags & IEEE80211_TX_RC_USE_CTS_PROTECT))
 		*tx_flags |= TX_CMD_FLG_RTS_CTS_MSK;
 	else
 		*tx_flags &= ~TX_CMD_FLG_RTS_CTS_MSK;
@@ -1154,7 +1154,7 @@ static int iwl5000_tx_status_reply_tx(struct iwl_priv *priv,
 				   agg->frame_count, agg->start_idx, idx);
 
 		info = IEEE80211_SKB_CB(priv->txq[txq_id].txb[idx].skb[0]);
-		info->status.retry_count = tx_resp->failure_frame;
+		info->status.rates[0].count = tx_resp->failure_frame + 1;
 		info->flags &= ~IEEE80211_TX_CTL_AMPDU;
 		info->flags |= iwl_is_tx_success(status)?
 			IEEE80211_TX_STAT_ACK : 0;
@@ -1307,7 +1307,7 @@ static void iwl5000_rx_reply_tx(struct iwl_priv *priv,
 			iwl_txq_check_empty(priv, sta_id, tid, txq_id);
 		}
 	} else {
-		info->status.retry_count = tx_resp->failure_frame;
+		info->status.rates[0].count = tx_resp->failure_frame + 1;
 		info->flags =
 			iwl_is_tx_success(status) ? IEEE80211_TX_STAT_ACK : 0;
 		iwl_hwrate_to_tx_control(priv,
diff --git a/drivers/net/wireless/iwlwifi/iwl-agn-rs.c b/drivers/net/wireless/iwlwifi/iwl-agn-rs.c
index e10e0ca09ce9..f685e5d6c281 100644
--- a/drivers/net/wireless/iwlwifi/iwl-agn-rs.c
+++ b/drivers/net/wireless/iwlwifi/iwl-agn-rs.c
@@ -798,7 +798,7 @@ static void rs_tx_status(void *priv_r, struct ieee80211_supported_band *sband,
 	    !(info->flags & IEEE80211_TX_STAT_AMPDU))
 		return;
 
-	retries = info->status.retry_count;
+	retries = info->status.rates[0].count - 1;
 
 	if (retries > 15)
 		retries = 15;
@@ -830,20 +830,15 @@ static void rs_tx_status(void *priv_r, struct ieee80211_supported_band *sband,
 	if (priv->band == IEEE80211_BAND_5GHZ)
 		rs_index -= IWL_FIRST_OFDM_RATE;
 
-	if ((info->tx_rate_idx < 0) ||
-	    (tbl_type.is_SGI ^
-		!!(info->flags & IEEE80211_TX_CTL_SHORT_GI)) ||
-	    (tbl_type.is_fat ^
-		!!(info->flags & IEEE80211_TX_CTL_40_MHZ_WIDTH)) ||
-	    (tbl_type.is_dup ^
-		!!(info->flags & IEEE80211_TX_CTL_DUP_DATA)) ||
-	    (tbl_type.ant_type ^ info->antenna_sel_tx) ||
-	    (!!(tx_rate & RATE_MCS_HT_MSK) ^
-		!!(info->flags & IEEE80211_TX_CTL_OFDM_HT)) ||
-	    (!!(tx_rate & RATE_MCS_GF_MSK) ^
-		!!(info->flags & IEEE80211_TX_CTL_GREEN_FIELD)) ||
+	if ((info->status.rates[0].idx < 0) ||
+	    (tbl_type.is_SGI != !!(info->status.rates[0].flags & IEEE80211_TX_RC_SHORT_GI)) ||
+	    (tbl_type.is_fat != !!(info->status.rates[0].flags & IEEE80211_TX_RC_40_MHZ_WIDTH)) ||
+	    (tbl_type.is_dup != !!(info->status.rates[0].flags & IEEE80211_TX_RC_DUP_DATA)) ||
+	    (tbl_type.ant_type != info->antenna_sel_tx) ||
+	    (!!(tx_rate & RATE_MCS_HT_MSK) != !!(info->status.rates[0].flags & IEEE80211_TX_RC_MCS)) ||
+	    (!!(tx_rate & RATE_MCS_GF_MSK) != !!(info->status.rates[0].flags & IEEE80211_TX_RC_GREEN_FIELD)) ||
 	    (hw->wiphy->bands[priv->band]->bitrates[rs_index].bitrate !=
-	     hw->wiphy->bands[info->band]->bitrates[info->tx_rate_idx].bitrate)) {
+	     hw->wiphy->bands[info->band]->bitrates[info->status.rates[0].idx].bitrate)) {
 		IWL_DEBUG_RATE("initial rate does not match 0x%x\n", tx_rate);
 		goto out;
 	}
@@ -2098,15 +2093,17 @@ static void rs_initialize_lq(struct iwl_priv *priv,
 	return;
 }
 
-static void rs_get_rate(void *priv_r, struct ieee80211_supported_band *sband,
-			struct ieee80211_sta *sta, void *priv_sta,
-			struct sk_buff *skb, struct rate_selection *sel)
+static void rs_get_rate(void *priv_r, struct ieee80211_sta *sta, void *priv_sta,
+			struct ieee80211_tx_rate_control *txrc)
 {
 
 	int i;
+	struct sk_buff *skb = txrc->skb;
+	struct ieee80211_supported_band *sband = txrc->sband;
 	struct iwl_priv *priv = (struct iwl_priv *)priv_r;
 	struct ieee80211_conf *conf = &priv->hw->conf;
 	struct ieee80211_hdr *hdr = (struct ieee80211_hdr *)skb->data;
+	struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb);
 	__le16 fc;
 	struct iwl_lq_sta *lq_sta;
 
@@ -2117,7 +2114,7 @@ static void rs_get_rate(void *priv_r, struct ieee80211_supported_band *sband,
 	fc = hdr->frame_control;
 	if (!ieee80211_is_data(fc) || is_multicast_ether_addr(hdr->addr1) ||
 	    !sta || !priv_sta) {
-		sel->rate_idx = rate_lowest_index(sband, sta);
+		info->control.rates[0].idx = rate_lowest_index(sband, sta);
 		return;
 	}
 
@@ -2143,13 +2140,13 @@ static void rs_get_rate(void *priv_r, struct ieee80211_supported_band *sband,
 	}
 
 	if ((i < 0) || (i > IWL_RATE_COUNT)) {
-		sel->rate_idx = rate_lowest_index(sband, sta);
+		info->control.rates[0].idx = rate_lowest_index(sband, sta);
 		return;
 	}
 
 	if (sband->band == IEEE80211_BAND_5GHZ)
 		i -= IWL_FIRST_OFDM_RATE;
-	sel->rate_idx = i;
+	info->control.rates[0].idx = i;
 }
 
 static void *rs_alloc_sta(void *priv_rate, struct ieee80211_sta *sta,
diff --git a/drivers/net/wireless/iwlwifi/iwl-core.c b/drivers/net/wireless/iwlwifi/iwl-core.c
index 10f5a0a233fe..2c4162e48140 100644
--- a/drivers/net/wireless/iwlwifi/iwl-core.c
+++ b/drivers/net/wireless/iwlwifi/iwl-core.c
@@ -88,26 +88,27 @@ EXPORT_SYMBOL(iwl_rates);
  * translate ucode response to mac80211 tx status control values
  */
 void iwl_hwrate_to_tx_control(struct iwl_priv *priv, u32 rate_n_flags,
-				  struct ieee80211_tx_info *control)
+				  struct ieee80211_tx_info *info)
 {
 	int rate_index;
+	struct ieee80211_tx_rate *r = &info->control.rates[0];
 
-	control->antenna_sel_tx =
+	info->antenna_sel_tx =
 		((rate_n_flags & RATE_MCS_ANT_ABC_MSK) >> RATE_MCS_ANT_POS);
 	if (rate_n_flags & RATE_MCS_HT_MSK)
-		control->flags |= IEEE80211_TX_CTL_OFDM_HT;
+		r->flags |= IEEE80211_TX_RC_MCS;
 	if (rate_n_flags & RATE_MCS_GF_MSK)
-		control->flags |= IEEE80211_TX_CTL_GREEN_FIELD;
+		r->flags |= IEEE80211_TX_RC_GREEN_FIELD;
 	if (rate_n_flags & RATE_MCS_FAT_MSK)
-		control->flags |= IEEE80211_TX_CTL_40_MHZ_WIDTH;
+		r->flags |= IEEE80211_TX_RC_40_MHZ_WIDTH;
 	if (rate_n_flags & RATE_MCS_DUP_MSK)
-		control->flags |= IEEE80211_TX_CTL_DUP_DATA;
+		r->flags |= IEEE80211_TX_RC_DUP_DATA;
 	if (rate_n_flags & RATE_MCS_SGI_MSK)
-		control->flags |= IEEE80211_TX_CTL_SHORT_GI;
+		r->flags |= IEEE80211_TX_RC_SHORT_GI;
 	rate_index = iwl_hwrate_to_plcp_idx(rate_n_flags);
-	if (control->band == IEEE80211_BAND_5GHZ)
+	if (info->band == IEEE80211_BAND_5GHZ)
 		rate_index -= IWL_FIRST_OFDM_RATE;
-	control->tx_rate_idx = rate_index;
+	r->idx = rate_index;
 }
 EXPORT_SYMBOL(iwl_hwrate_to_tx_control);
 
diff --git a/drivers/net/wireless/iwlwifi/iwl3945-base.c b/drivers/net/wireless/iwlwifi/iwl3945-base.c
index b1464c71ea0a..2cd33b4e9e13 100644
--- a/drivers/net/wireless/iwlwifi/iwl3945-base.c
+++ b/drivers/net/wireless/iwlwifi/iwl3945-base.c
@@ -2395,6 +2395,7 @@ static void iwl3945_build_tx_cmd_basic(struct iwl3945_priv *priv,
 {
 	__le16 fc = hdr->frame_control;
 	__le32 tx_flags = cmd->cmd.tx.tx_flags;
+	u8 rc_flags = info->control.rates[0].flags;
 
 	cmd->cmd.tx.stop_time.life_time = TX_CMD_LIFE_TIME_INFINITE;
 	if (!(info->flags & IEEE80211_TX_CTL_NO_ACK)) {
@@ -2421,10 +2422,10 @@ static void iwl3945_build_tx_cmd_basic(struct iwl3945_priv *priv,
 		tx_flags |= TX_CMD_FLG_SEQ_CTL_MSK;
 	}
 
-	if (info->flags & IEEE80211_TX_CTL_USE_RTS_CTS) {
+	if (rc_flags & IEEE80211_TX_RC_USE_RTS_CTS) {
 		tx_flags |= TX_CMD_FLG_RTS_MSK;
 		tx_flags &= ~TX_CMD_FLG_CTS_MSK;
-	} else if (info->flags & IEEE80211_TX_CTL_USE_CTS_PROTECT) {
+	} else if (rc_flags & IEEE80211_TX_RC_USE_CTS_PROTECT) {
 		tx_flags &= ~TX_CMD_FLG_RTS_MSK;
 		tx_flags |= TX_CMD_FLG_CTS_MSK;
 	}
diff --git a/drivers/net/wireless/libertas_tf/main.c b/drivers/net/wireless/libertas_tf/main.c
index 241ddcfa352e..d1fc305de5fe 100644
--- a/drivers/net/wireless/libertas_tf/main.c
+++ b/drivers/net/wireless/libertas_tf/main.c
@@ -592,14 +592,14 @@ EXPORT_SYMBOL_GPL(lbtf_remove_card);
 void lbtf_send_tx_feedback(struct lbtf_private *priv, u8 retrycnt, u8 fail)
 {
 	struct ieee80211_tx_info *info = IEEE80211_SKB_CB(priv->tx_skb);
-	memset(&info->status, 0, sizeof(info->status));
+
+	ieee80211_tx_info_clear_status(info);
 	/*
 	 * Commented out, otherwise we never go beyond 1Mbit/s using mac80211
 	 * default pid rc algorithm.
 	 *
 	 * info->status.retry_count = MRVL_DEFAULT_RETRIES - retrycnt;
 	 */
-	info->status.excessive_retries = fail ? 1 : 0;
 	if (!(info->flags & IEEE80211_TX_CTL_NO_ACK) && !fail)
 		info->flags |= IEEE80211_TX_STAT_ACK;
 	skb_pull(priv->tx_skb, sizeof(struct txpd));
diff --git a/drivers/net/wireless/mac80211_hwsim.c b/drivers/net/wireless/mac80211_hwsim.c
index e2aeef8de707..c57652325286 100644
--- a/drivers/net/wireless/mac80211_hwsim.c
+++ b/drivers/net/wireless/mac80211_hwsim.c
@@ -209,7 +209,7 @@ static bool mac80211_hwsim_tx_frame(struct ieee80211_hw *hw,
 	/* TODO: set mactime */
 	rx_status.freq = data->channel->center_freq;
 	rx_status.band = data->channel->band;
-	rx_status.rate_idx = info->tx_rate_idx;
+	rx_status.rate_idx = info->control.rates[0].idx;
 	/* TODO: simulate signal strength (and optional packet drop) */
 
 	/* Copy skb to all enabled radios that are on the current frequency */
@@ -269,13 +269,9 @@ static int mac80211_hwsim_tx(struct ieee80211_hw *hw, struct sk_buff *skb)
 	if (txi->control.sta)
 		hwsim_check_sta_magic(txi->control.sta);
 
-	memset(&txi->status, 0, sizeof(txi->status));
-	if (!(txi->flags & IEEE80211_TX_CTL_NO_ACK)) {
-		if (ack)
-			txi->flags |= IEEE80211_TX_STAT_ACK;
-		else
-			txi->status.excessive_retries = 1;
-	}
+	ieee80211_tx_info_clear_status(txi);
+	if (!(txi->flags & IEEE80211_TX_CTL_NO_ACK) && ack)
+		txi->flags |= IEEE80211_TX_STAT_ACK;
 	ieee80211_tx_status_irqsafe(hw, skb);
 	return NETDEV_TX_OK;
 }
diff --git a/drivers/net/wireless/p54/p54common.c b/drivers/net/wireless/p54/p54common.c
index 81a9756254fb..65be5eca2340 100644
--- a/drivers/net/wireless/p54/p54common.c
+++ b/drivers/net/wireless/p54/p54common.c
@@ -548,7 +548,7 @@ static void p54_rx_frame_sent(struct ieee80211_hw *dev, struct sk_buff *skb)
 	spin_lock_irqsave(&priv->tx_queue.lock, flags);
 	while (entry != (struct sk_buff *)&priv->tx_queue) {
 		struct ieee80211_tx_info *info = IEEE80211_SKB_CB(entry);
-		range = (void *)info->driver_data;
+		range = (void *)info->rate_driver_data;
 		if (range->start_addr == addr) {
 			struct p54_control_hdr *entry_hdr;
 			struct p54_tx_control_allocdata *entry_data;
@@ -559,7 +559,7 @@ static void p54_rx_frame_sent(struct ieee80211_hw *dev, struct sk_buff *skb)
 				struct memrecord *mr;
 
 				ni = IEEE80211_SKB_CB(entry->next);
-				mr = (struct memrecord *)ni->driver_data;
+				mr = (struct memrecord *)ni->rate_driver_data;
 				freed = mr->start_addr - last_addr;
 			} else
 				freed = priv->rx_end - last_addr;
@@ -568,7 +568,7 @@ static void p54_rx_frame_sent(struct ieee80211_hw *dev, struct sk_buff *skb)
 			__skb_unlink(entry, &priv->tx_queue);
 			spin_unlock_irqrestore(&priv->tx_queue.lock, flags);
 
-			memset(&info->status, 0, sizeof(info->status));
+			ieee80211_tx_info_clear_status(info);
 			entry_hdr = (struct p54_control_hdr *) entry->data;
 			entry_data = (struct p54_tx_control_allocdata *) entry_hdr->data;
 			if ((entry_hdr->magic1 & cpu_to_le16(0x4000)) != 0)
@@ -578,10 +578,8 @@ static void p54_rx_frame_sent(struct ieee80211_hw *dev, struct sk_buff *skb)
 			if (!(info->flags & IEEE80211_TX_CTL_NO_ACK)) {
 				if (!(payload->status & 0x01))
 					info->flags |= IEEE80211_TX_STAT_ACK;
-				else
-					info->status.excessive_retries = 1;
 			}
-			info->status.retry_count = payload->retries - 1;
+			info->status.rates[0].count = payload->retries;
 			info->status.ack_signal = p54_rssi_to_dbm(dev,
 					le16_to_cpu(payload->ack_rssi));
 			skb_pull(entry, sizeof(*hdr) + pad + sizeof(*entry_data));
@@ -699,7 +697,7 @@ static void p54_assign_address(struct ieee80211_hw *dev, struct sk_buff *skb,
 	while (left--) {
 		u32 hole_size;
 		struct ieee80211_tx_info *info = IEEE80211_SKB_CB(entry);
-		struct memrecord *range = (void *)info->driver_data;
+		struct memrecord *range = (void *)info->rate_driver_data;
 		hole_size = range->start_addr - last_addr;
 		if (!target_skb && hole_size >= len) {
 			target_skb = entry->prev;
@@ -715,7 +713,7 @@ static void p54_assign_address(struct ieee80211_hw *dev, struct sk_buff *skb,
 		largest_hole = max(largest_hole, priv->rx_end - last_addr - len);
 		if (!skb_queue_empty(&priv->tx_queue)) {
 			struct ieee80211_tx_info *info = IEEE80211_SKB_CB(target_skb);
-			struct memrecord *range = (void *)info->driver_data;
+			struct memrecord *range = (void *)info->rate_driver_data;
 			target_addr = range->end_addr;
 		}
 	} else
@@ -723,7 +721,7 @@ static void p54_assign_address(struct ieee80211_hw *dev, struct sk_buff *skb,
 
 	if (skb) {
 		struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb);
-		struct memrecord *range = (void *)info->driver_data;
+		struct memrecord *range = (void *)info->rate_driver_data;
 		range->start_addr = target_addr;
 		range->end_addr = target_addr + len;
 		__skb_queue_after(&priv->tx_queue, target_skb, skb);
@@ -806,6 +804,7 @@ static int p54_tx(struct ieee80211_hw *dev, struct sk_buff *skb)
 	size_t padding, len;
 	u8 rate;
 	u8 cts_rate = 0x20;
+	u8 rc_flags;
 
 	current_queue = &priv->tx_stats[skb_get_queue_mapping(skb) + 4];
 	if (unlikely(current_queue->len > current_queue->limit))
@@ -828,18 +827,19 @@ static int p54_tx(struct ieee80211_hw *dev, struct sk_buff *skb)
 		hdr->magic1 = cpu_to_le16(0x0010);
 	hdr->len = cpu_to_le16(len);
 	hdr->type = (info->flags & IEEE80211_TX_CTL_NO_ACK) ? 0 : cpu_to_le16(1);
-	hdr->retry1 = hdr->retry2 = info->control.retry_limit;
+	hdr->retry1 = hdr->retry2 = info->control.rates[0].count;
 
 	/* TODO: add support for alternate retry TX rates */
 	rate = ieee80211_get_tx_rate(dev, info)->hw_value;
-	if (info->flags & IEEE80211_TX_CTL_SHORT_PREAMBLE) {
+	rc_flags = info->control.rates[0].flags;
+	if (rc_flags & IEEE80211_TX_RC_USE_SHORT_PREAMBLE) {
 		rate |= 0x10;
 		cts_rate |= 0x10;
 	}
-	if (info->flags & IEEE80211_TX_CTL_USE_RTS_CTS) {
+	if (rc_flags & IEEE80211_TX_RC_USE_RTS_CTS) {
 		rate |= 0x40;
 		cts_rate |= ieee80211_get_rts_cts_rate(dev, info)->hw_value;
-	} else if (info->flags & IEEE80211_TX_CTL_USE_CTS_PROTECT) {
+	} else if (rc_flags & IEEE80211_TX_RC_USE_CTS_PROTECT) {
 		rate |= 0x20;
 		cts_rate |= ieee80211_get_rts_cts_rate(dev, info)->hw_value;
 	}
diff --git a/drivers/net/wireless/rt2x00/rt2x00dev.c b/drivers/net/wireless/rt2x00/rt2x00dev.c
index 697806cf94e2..e1feab8b6b02 100644
--- a/drivers/net/wireless/rt2x00/rt2x00dev.c
+++ b/drivers/net/wireless/rt2x00/rt2x00dev.c
@@ -498,7 +498,9 @@ void rt2x00lib_txdone(struct queue_entry *entry,
 {
 	struct rt2x00_dev *rt2x00dev = entry->queue->rt2x00dev;
 	struct ieee80211_tx_info *tx_info = IEEE80211_SKB_CB(entry->skb);
+	struct skb_frame_desc *skbdesc = get_skb_frame_desc(entry->skb);
 	enum data_queue_qid qid = skb_get_queue_mapping(entry->skb);
+	u8 rate_idx, rate_flags;
 
 	/*
 	 * Unmap the skb.
@@ -528,14 +530,18 @@ void rt2x00lib_txdone(struct queue_entry *entry,
 	rt2x00dev->link.qual.tx_failed +=
 	    test_bit(TXDONE_FAILURE, &txdesc->flags);
 
+	rate_idx = skbdesc->tx_rate_idx;
+	rate_flags = skbdesc->tx_rate_flags;
+
 	/*
 	 * Initialize TX status
 	 */
 	memset(&tx_info->status, 0, sizeof(tx_info->status));
 	tx_info->status.ack_signal = 0;
-	tx_info->status.excessive_retries =
-	    test_bit(TXDONE_EXCESSIVE_RETRY, &txdesc->flags);
-	tx_info->status.retry_count = txdesc->retry;
+	tx_info->status.rates[0].idx = rate_idx;
+	tx_info->status.rates[0].flags = rate_flags;
+	tx_info->status.rates[0].count = txdesc->retry + 1;
+	tx_info->status.rates[1].idx = -1; /* terminate */
 
 	if (!(tx_info->flags & IEEE80211_TX_CTL_NO_ACK)) {
 		if (test_bit(TXDONE_SUCCESS, &txdesc->flags))
@@ -544,7 +550,7 @@ void rt2x00lib_txdone(struct queue_entry *entry,
 			rt2x00dev->low_level_stats.dot11ACKFailureCount++;
 	}
 
-	if (tx_info->flags & IEEE80211_TX_CTL_USE_RTS_CTS) {
+	if (rate_flags & IEEE80211_TX_RC_USE_RTS_CTS) {
 		if (test_bit(TXDONE_SUCCESS, &txdesc->flags))
 			rt2x00dev->low_level_stats.dot11RTSSuccessCount++;
 		else if (test_bit(TXDONE_FAILURE, &txdesc->flags))
diff --git a/drivers/net/wireless/rt2x00/rt2x00mac.c b/drivers/net/wireless/rt2x00/rt2x00mac.c
index 931183369f07..b32d59eafaa3 100644
--- a/drivers/net/wireless/rt2x00/rt2x00mac.c
+++ b/drivers/net/wireless/rt2x00/rt2x00mac.c
@@ -39,7 +39,7 @@ static int rt2x00mac_tx_rts_cts(struct rt2x00_dev *rt2x00dev,
 	unsigned int data_length;
 	int retval = 0;
 
-	if (tx_info->flags & IEEE80211_TX_CTL_USE_CTS_PROTECT)
+	if (tx_info->control.rates[0].flags & IEEE80211_TX_RC_USE_CTS_PROTECT)
 		data_length = sizeof(struct ieee80211_cts);
 	else
 		data_length = sizeof(struct ieee80211_rts);
@@ -64,11 +64,11 @@ static int rt2x00mac_tx_rts_cts(struct rt2x00_dev *rt2x00dev,
 	 */
 	memcpy(skb->cb, frag_skb->cb, sizeof(skb->cb));
 	rts_info = IEEE80211_SKB_CB(skb);
-	rts_info->flags &= ~IEEE80211_TX_CTL_USE_RTS_CTS;
-	rts_info->flags &= ~IEEE80211_TX_CTL_USE_CTS_PROTECT;
+	rts_info->control.rates[0].flags &= ~IEEE80211_TX_RC_USE_RTS_CTS;
+	rts_info->control.rates[0].flags &= ~IEEE80211_TX_RC_USE_CTS_PROTECT;
 	rts_info->flags &= ~IEEE80211_TX_CTL_REQ_TX_STATUS;
 
-	if (tx_info->flags & IEEE80211_TX_CTL_USE_CTS_PROTECT)
+	if (tx_info->control.rates[0].flags & IEEE80211_TX_RC_USE_CTS_PROTECT)
 		rts_info->flags |= IEEE80211_TX_CTL_NO_ACK;
 	else
 		rts_info->flags &= ~IEEE80211_TX_CTL_NO_ACK;
@@ -84,7 +84,7 @@ static int rt2x00mac_tx_rts_cts(struct rt2x00_dev *rt2x00dev,
 		data_length += rt2x00crypto_tx_overhead(tx_info);
 #endif /* CONFIG_RT2X00_LIB_CRYPTO */
 
-	if (tx_info->flags & IEEE80211_TX_CTL_USE_CTS_PROTECT)
+	if (tx_info->control.rates[0].flags & IEEE80211_TX_RC_USE_CTS_PROTECT)
 		ieee80211_ctstoself_get(rt2x00dev->hw, tx_info->control.vif,
 					frag_skb->data, data_length, tx_info,
 					(struct ieee80211_cts *)(skb->data));
@@ -146,8 +146,8 @@ int rt2x00mac_tx(struct ieee80211_hw *hw, struct sk_buff *skb)
 	 * inside the hardware.
 	 */
 	frame_control = le16_to_cpu(ieee80211hdr->frame_control);
-	if ((tx_info->flags & (IEEE80211_TX_CTL_USE_RTS_CTS |
-			       IEEE80211_TX_CTL_USE_CTS_PROTECT)) &&
+	if ((tx_info->control.rates[0].flags & (IEEE80211_TX_RC_USE_RTS_CTS |
+						IEEE80211_TX_RC_USE_CTS_PROTECT)) &&
 	    !rt2x00dev->ops->hw->set_rts_threshold) {
 		if (rt2x00queue_available(queue) <= 1)
 			goto exit_fail;
diff --git a/drivers/net/wireless/rt2x00/rt2x00queue.c b/drivers/net/wireless/rt2x00/rt2x00queue.c
index 451d410ecdae..070786ebd076 100644
--- a/drivers/net/wireless/rt2x00/rt2x00queue.c
+++ b/drivers/net/wireless/rt2x00/rt2x00queue.c
@@ -230,8 +230,15 @@ static void rt2x00queue_create_tx_descriptor(struct queue_entry *entry,
 	/*
 	 * Determine retry information.
 	 */
-	txdesc->retry_limit = tx_info->control.retry_limit;
-	if (tx_info->flags & IEEE80211_TX_CTL_LONG_RETRY_LIMIT)
+	txdesc->retry_limit = tx_info->control.rates[0].count - 1;
+	/*
+	 * XXX: If at this point we knew whether the HW is going to use
+	 *	the RETRY_MODE bit or the retry_limit (currently all
+	 *	use the RETRY_MODE bit) we could do something like b43
+	 *	does, set the RETRY_MODE bit when the RC algorithm is
+	 *	requesting more than the long retry limit.
+	 */
+	if (tx_info->control.rates[0].flags & IEEE80211_TX_RC_USE_RTS_CTS)
 		__set_bit(ENTRY_TXD_RETRY_MODE, &txdesc->flags);
 
 	/*
@@ -371,10 +378,12 @@ static void rt2x00queue_write_tx_descriptor(struct queue_entry *entry,
 
 int rt2x00queue_write_tx_frame(struct data_queue *queue, struct sk_buff *skb)
 {
+	struct ieee80211_tx_info *tx_info;
 	struct queue_entry *entry = rt2x00queue_get_entry(queue, Q_INDEX);
 	struct txentry_desc txdesc;
 	struct skb_frame_desc *skbdesc;
 	unsigned int iv_len = 0;
+	u8 rate_idx, rate_flags;
 
 	if (unlikely(rt2x00queue_full(queue)))
 		return -EINVAL;
@@ -399,13 +408,18 @@ int rt2x00queue_write_tx_frame(struct data_queue *queue, struct sk_buff *skb)
 		iv_len = IEEE80211_SKB_CB(skb)->control.hw_key->iv_len;
 
 	/*
-	 * All information is retreived from the skb->cb array,
+	 * All information is retrieved from the skb->cb array,
 	 * now we should claim ownership of the driver part of that
-	 * array.
+	 * array, preserving the bitrate index and flags.
 	 */
+	tx_info = IEEE80211_SKB_CB(skb);
+	rate_idx = tx_info->control.rates[0].idx;
+	rate_flags = tx_info->control.rates[0].flags;
 	skbdesc = get_skb_frame_desc(entry->skb);
 	memset(skbdesc, 0, sizeof(*skbdesc));
 	skbdesc->entry = entry;
+	skbdesc->tx_rate_idx = rate_idx;
+	skbdesc->tx_rate_flags = rate_flags;
 
 	/*
 	 * When hardware encryption is supported, and this frame
diff --git a/drivers/net/wireless/rt2x00/rt2x00queue.h b/drivers/net/wireless/rt2x00/rt2x00queue.h
index 9dbf04f0f04c..4d3c7246f9ae 100644
--- a/drivers/net/wireless/rt2x00/rt2x00queue.h
+++ b/drivers/net/wireless/rt2x00/rt2x00queue.h
@@ -104,6 +104,8 @@ enum skb_frame_desc_flags {
  *
  * @flags: Frame flags, see &enum skb_frame_desc_flags.
  * @desc_len: Length of the frame descriptor.
+ * @tx_rate_idx: the index of the TX rate, used for TX status reporting
+ * @tx_rate_flags: the TX rate flags, used for TX status reporting
  * @desc: Pointer to descriptor part of the frame.
  *	Note that this pointer could point to something outside
  *	of the scope of the skb->data pointer.
@@ -113,9 +115,12 @@ enum skb_frame_desc_flags {
  * @entry: The entry to which this sk buffer belongs.
  */
 struct skb_frame_desc {
-	unsigned int flags;
+	u8 flags;
+
+	u8 desc_len;
+	u8 tx_rate_idx;
+	u8 tx_rate_flags;
 
-	unsigned int desc_len;
 	void *desc;
 
 	__le32 iv;
diff --git a/drivers/net/wireless/rtl8180_dev.c b/drivers/net/wireless/rtl8180_dev.c
index e8d22393797f..6c226c024dd9 100644
--- a/drivers/net/wireless/rtl8180_dev.c
+++ b/drivers/net/wireless/rtl8180_dev.c
@@ -182,15 +182,13 @@ static void rtl8180_handle_tx(struct ieee80211_hw *dev, unsigned int prio)
 				 skb->len, PCI_DMA_TODEVICE);
 
 		info = IEEE80211_SKB_CB(skb);
-		memset(&info->status, 0, sizeof(info->status));
+		ieee80211_tx_info_clear_status(info);
 
-		if (!(info->flags & IEEE80211_TX_CTL_NO_ACK)) {
-			if (flags & RTL818X_TX_DESC_FLAG_TX_OK)
-				info->flags |= IEEE80211_TX_STAT_ACK;
-			else
-				info->status.excessive_retries = 1;
-		}
-		info->status.retry_count = flags & 0xFF;
+		if (!(info->flags & IEEE80211_TX_CTL_NO_ACK) &&
+		    (flags & RTL818X_TX_DESC_FLAG_TX_OK))
+			info->flags |= IEEE80211_TX_STAT_ACK;
+
+		info->status.rates[0].count = (flags & 0xFF) + 1;
 
 		ieee80211_tx_status_irqsafe(dev, skb);
 		if (ring->entries - skb_queue_len(&ring->queue) == 2)
@@ -243,6 +241,7 @@ static int rtl8180_tx(struct ieee80211_hw *dev, struct sk_buff *skb)
 	unsigned int idx, prio;
 	dma_addr_t mapping;
 	u32 tx_flags;
+	u8 rc_flags;
 	u16 plcp_len = 0;
 	__le16 rts_duration = 0;
 
@@ -261,15 +260,16 @@ static int rtl8180_tx(struct ieee80211_hw *dev, struct sk_buff *skb)
 		tx_flags |= RTL818X_TX_DESC_FLAG_DMA |
 			    RTL818X_TX_DESC_FLAG_NO_ENC;
 
-	if (info->flags & IEEE80211_TX_CTL_USE_RTS_CTS) {
+	rc_flags = info->control.rates[0].flags;
+	if (rc_flags & IEEE80211_TX_RC_USE_RTS_CTS) {
 		tx_flags |= RTL818X_TX_DESC_FLAG_RTS;
 		tx_flags |= ieee80211_get_rts_cts_rate(dev, info)->hw_value << 19;
-	} else if (info->flags & IEEE80211_TX_CTL_USE_CTS_PROTECT) {
+	} else if (rc_flags & IEEE80211_TX_RC_USE_CTS_PROTECT) {
 		tx_flags |= RTL818X_TX_DESC_FLAG_CTS;
 		tx_flags |= ieee80211_get_rts_cts_rate(dev, info)->hw_value << 19;
 	}
 
-	if (info->flags & IEEE80211_TX_CTL_USE_RTS_CTS)
+	if (rc_flags & IEEE80211_TX_RC_USE_RTS_CTS)
 		rts_duration = ieee80211_rts_duration(dev, priv->vif, skb->len,
 						      info);
 
@@ -292,9 +292,9 @@ static int rtl8180_tx(struct ieee80211_hw *dev, struct sk_buff *skb)
 	entry->plcp_len = cpu_to_le16(plcp_len);
 	entry->tx_buf = cpu_to_le32(mapping);
 	entry->frame_len = cpu_to_le32(skb->len);
-	entry->flags2 = info->control.retries[0].rate_idx >= 0 ?
+	entry->flags2 = info->control.rates[1].idx >= 0 ?
 		ieee80211_get_alt_retry_rate(dev, info, 0)->bitrate << 4 : 0;
-	entry->retry_limit = info->control.retry_limit;
+	entry->retry_limit = info->control.rates[0].count;
 	entry->flags = cpu_to_le32(tx_flags);
 	__skb_queue_tail(&ring->queue, skb);
 	if (ring->entries - skb_queue_len(&ring->queue) < 2)
@@ -855,7 +855,7 @@ static int __devinit rtl8180_probe(struct pci_dev *pdev,
 	priv = dev->priv;
 	priv->pdev = pdev;
 
-	dev->max_altrates = 1;
+	dev->max_rates = 2;
 	SET_IEEE80211_DEV(dev, &pdev->dev);
 	pci_set_drvdata(pdev, dev);
 
diff --git a/drivers/net/wireless/rtl8187_dev.c b/drivers/net/wireless/rtl8187_dev.c
index cd839bcb6d78..6260ed8ce12b 100644
--- a/drivers/net/wireless/rtl8187_dev.c
+++ b/drivers/net/wireless/rtl8187_dev.c
@@ -160,13 +160,13 @@ static void rtl8187_tx_cb(struct urb *urb)
 {
 	struct sk_buff *skb = (struct sk_buff *)urb->context;
 	struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb);
-	struct ieee80211_hw *hw = info->driver_data[0];
+	struct ieee80211_hw *hw = info->rate_driver_data[0];
 	struct rtl8187_priv *priv = hw->priv;
 
-	usb_free_urb(info->driver_data[1]);
+	usb_free_urb(info->rate_driver_data[1]);
 	skb_pull(skb, priv->is_rtl8187b ? sizeof(struct rtl8187b_tx_hdr) :
 					  sizeof(struct rtl8187_tx_hdr));
-	memset(&info->status, 0, sizeof(info->status));
+	ieee80211_tx_info_clear_status(info);
 	info->flags |= IEEE80211_TX_STAT_ACK;
 	ieee80211_tx_status_irqsafe(hw, skb);
 }
@@ -194,12 +194,12 @@ static int rtl8187_tx(struct ieee80211_hw *dev, struct sk_buff *skb)
 	flags |= ieee80211_get_tx_rate(dev, info)->hw_value << 24;
 	if (ieee80211_has_morefrags(((struct ieee80211_hdr *)skb->data)->frame_control))
 		flags |= RTL818X_TX_DESC_FLAG_MOREFRAG;
-	if (info->flags & IEEE80211_TX_CTL_USE_RTS_CTS) {
+	if (info->control.rates[0].flags & IEEE80211_TX_RC_USE_RTS_CTS) {
 		flags |= RTL818X_TX_DESC_FLAG_RTS;
 		flags |= ieee80211_get_rts_cts_rate(dev, info)->hw_value << 19;
 		rts_dur = ieee80211_rts_duration(dev, priv->vif,
 						 skb->len, info);
-	} else if (info->flags & IEEE80211_TX_CTL_USE_CTS_PROTECT) {
+	} else if (info->control.rates[0].flags & IEEE80211_TX_RC_USE_CTS_PROTECT) {
 		flags |= RTL818X_TX_DESC_FLAG_CTS;
 		flags |= ieee80211_get_rts_cts_rate(dev, info)->hw_value << 19;
 	}
@@ -210,7 +210,7 @@ static int rtl8187_tx(struct ieee80211_hw *dev, struct sk_buff *skb)
 		hdr->flags = cpu_to_le32(flags);
 		hdr->len = 0;
 		hdr->rts_duration = rts_dur;
-		hdr->retry = cpu_to_le32(info->control.retry_limit << 8);
+		hdr->retry = cpu_to_le32((info->control.rates[0].count - 1) << 8);
 		buf = hdr;
 
 		ep = 2;
@@ -228,7 +228,7 @@ static int rtl8187_tx(struct ieee80211_hw *dev, struct sk_buff *skb)
 		memset(hdr, 0, sizeof(*hdr));
 		hdr->flags = cpu_to_le32(flags);
 		hdr->rts_duration = rts_dur;
-		hdr->retry = cpu_to_le32(info->control.retry_limit << 8);
+		hdr->retry = cpu_to_le32((info->control.rates[0].count - 1) << 8);
 		hdr->tx_duration =
 			ieee80211_generic_frame_duration(dev, priv->vif,
 							 skb->len, txrate);
@@ -240,8 +240,8 @@ static int rtl8187_tx(struct ieee80211_hw *dev, struct sk_buff *skb)
 			ep = epmap[skb_get_queue_mapping(skb)];
 	}
 
-	info->driver_data[0] = dev;
-	info->driver_data[1] = urb;
+	info->rate_driver_data[0] = dev;
+	info->rate_driver_data[1] = urb;
 
 	usb_fill_bulk_urb(urb, priv->udev, usb_sndbulkpipe(priv->udev, ep),
 			  buf, skb->len, rtl8187_tx_cb, skb);
diff --git a/drivers/net/wireless/zd1211rw/zd_mac.c b/drivers/net/wireless/zd1211rw/zd_mac.c
index 6c3e21887fc8..2f0802b29c4b 100644
--- a/drivers/net/wireless/zd1211rw/zd_mac.c
+++ b/drivers/net/wireless/zd1211rw/zd_mac.c
@@ -296,15 +296,14 @@ static void zd_op_stop(struct ieee80211_hw *hw)
  * If no status information has been requested, the skb is freed.
  */
 static void tx_status(struct ieee80211_hw *hw, struct sk_buff *skb,
-		      u32 flags, int ackssi, bool success)
+		      int ackssi, bool success)
 {
 	struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb);
 
-	memset(&info->status, 0, sizeof(info->status));
+	ieee80211_tx_info_clear_status(info);
 
-	if (!success)
-		info->status.excessive_retries = 1;
-	info->flags |= flags;
+	if (success)
+		info->flags |= IEEE80211_TX_STAT_ACK;
 	info->status.ack_signal = ackssi;
 	ieee80211_tx_status_irqsafe(hw, skb);
 }
@@ -326,7 +325,7 @@ void zd_mac_tx_failed(struct ieee80211_hw *hw)
 	if (skb == NULL)
 		return;
 
-	tx_status(hw, skb, 0, 0, 0);
+	tx_status(hw, skb, 0, 0);
 }
 
 /**
@@ -342,12 +341,12 @@ void zd_mac_tx_failed(struct ieee80211_hw *hw)
 void zd_mac_tx_to_dev(struct sk_buff *skb, int error)
 {
 	struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb);
-	struct ieee80211_hw *hw = info->driver_data[0];
+	struct ieee80211_hw *hw = info->rate_driver_data[0];
 
 	skb_pull(skb, sizeof(struct zd_ctrlset));
 	if (unlikely(error ||
 	    (info->flags & IEEE80211_TX_CTL_NO_ACK))) {
-		tx_status(hw, skb, 0, 0, !error);
+		tx_status(hw, skb, 0, !error);
 	} else {
 		struct sk_buff_head *q =
 			&zd_hw_mac(hw)->ack_wait_queue;
@@ -406,7 +405,8 @@ static int zd_calc_tx_length_us(u8 *service, u8 zd_rate, u16 tx_length)
 }
 
 static void cs_set_control(struct zd_mac *mac, struct zd_ctrlset *cs,
-	                   struct ieee80211_hdr *header, u32 flags)
+	                   struct ieee80211_hdr *header,
+	                   struct ieee80211_tx_info *info)
 {
 	/*
 	 * CONTROL TODO:
@@ -417,7 +417,7 @@ static void cs_set_control(struct zd_mac *mac, struct zd_ctrlset *cs,
 	cs->control = 0;
 
 	/* First fragment */
-	if (flags & IEEE80211_TX_CTL_FIRST_FRAGMENT)
+	if (info->flags & IEEE80211_TX_CTL_FIRST_FRAGMENT)
 		cs->control |= ZD_CS_NEED_RANDOM_BACKOFF;
 
 	/* Multicast */
@@ -428,10 +428,10 @@ static void cs_set_control(struct zd_mac *mac, struct zd_ctrlset *cs,
 	if (ieee80211_is_pspoll(header->frame_control))
 		cs->control |= ZD_CS_PS_POLL_FRAME;
 
-	if (flags & IEEE80211_TX_CTL_USE_RTS_CTS)
+	if (info->control.rates[0].flags & IEEE80211_TX_RC_USE_RTS_CTS)
 		cs->control |= ZD_CS_RTS;
 
-	if (flags & IEEE80211_TX_CTL_USE_CTS_PROTECT)
+	if (info->control.rates[0].flags & IEEE80211_TX_RC_USE_CTS_PROTECT)
 		cs->control |= ZD_CS_SELF_CTS;
 
 	/* FIXME: Management frame? */
@@ -517,12 +517,12 @@ static int fill_ctrlset(struct zd_mac *mac,
 	txrate = ieee80211_get_tx_rate(mac->hw, info);
 
 	cs->modulation = txrate->hw_value;
-	if (info->flags & IEEE80211_TX_CTL_SHORT_PREAMBLE)
+	if (info->control.rates[0].flags & IEEE80211_TX_RC_USE_SHORT_PREAMBLE)
 		cs->modulation = txrate->hw_value_short;
 
 	cs->tx_length = cpu_to_le16(frag_len);
 
-	cs_set_control(mac, cs, hdr, info->flags);
+	cs_set_control(mac, cs, hdr, info);
 
 	packet_length = frag_len + sizeof(struct zd_ctrlset) + 10;
 	ZD_ASSERT(packet_length <= 0xffff);
@@ -577,7 +577,7 @@ static int zd_op_tx(struct ieee80211_hw *hw, struct sk_buff *skb)
 	if (r)
 		return r;
 
-	info->driver_data[0] = hw;
+	info->rate_driver_data[0] = hw;
 
 	r = zd_usb_tx(&mac->chip.usb, skb);
 	if (r)
@@ -618,7 +618,7 @@ static int filter_ack(struct ieee80211_hw *hw, struct ieee80211_hdr *rx_hdr,
 		if (likely(!compare_ether_addr(tx_hdr->addr2, rx_hdr->addr1)))
 		{
 			__skb_unlink(skb, q);
-			tx_status(hw, skb, IEEE80211_TX_STAT_ACK, stats->signal, 1);
+			tx_status(hw, skb, stats->signal, 1);
 			goto out;
 		}
 	}
diff --git a/drivers/net/wireless/zd1211rw/zd_usb.c b/drivers/net/wireless/zd1211rw/zd_usb.c
index a60ae86bd5c9..d7a2f52e40cf 100644
--- a/drivers/net/wireless/zd1211rw/zd_usb.c
+++ b/drivers/net/wireless/zd1211rw/zd_usb.c
@@ -907,7 +907,7 @@ free_urb:
 	 * it might be freed by zd_mac_tx_to_dev or mac80211)
 	 */
 	info = IEEE80211_SKB_CB(skb);
-	usb = &zd_hw_mac(info->driver_data[0])->chip.usb;
+	usb = &zd_hw_mac(info->rate_driver_data[0])->chip.usb;
 	zd_mac_tx_to_dev(skb, urb->status);
 	free_tx_urb(usb, urb);
 	tx_dec_submitted_urbs(usb);
diff --git a/include/net/mac80211.h b/include/net/mac80211.h
index 9801afb62545..3741c0a7978a 100644
--- a/include/net/mac80211.h
+++ b/include/net/mac80211.h
@@ -214,29 +214,24 @@ struct ieee80211_bss_conf {
  * These flags are used with the @flags member of &ieee80211_tx_info.
  *
  * @IEEE80211_TX_CTL_REQ_TX_STATUS: request TX status callback for this frame.
- * @IEEE80211_TX_CTL_USE_RTS_CTS: use RTS-CTS before sending frame
- * @IEEE80211_TX_CTL_USE_CTS_PROTECT: use CTS protection for the frame (e.g.,
- *	for combined 802.11g / 802.11b networks)
+ * @IEEE80211_TX_CTL_ASSIGN_SEQ: The driver has to assign a sequence
+ *	number to this frame, taking care of not overwriting the fragment
+ *	number and increasing the sequence number only when the
+ *	IEEE80211_TX_CTL_FIRST_FRAGMENT flag is set. mac80211 will properly
+ *	assign sequence numbers to QoS-data frames but cannot do so correctly
+ *	for non-QoS-data and management frames because beacons need them from
+ *	that counter as well and mac80211 cannot guarantee proper sequencing.
+ *	If this flag is set, the driver should instruct the hardware to
+ *	assign a sequence number to the frame or assign one itself. Cf. IEEE
+ *	802.11-2007 7.1.3.4.1 paragraph 3. This flag will always be set for
+ *	beacons and always be clear for frames without a sequence number field.
  * @IEEE80211_TX_CTL_NO_ACK: tell the low level not to wait for an ack
- * @IEEE80211_TX_CTL_RATE_CTRL_PROBE: TBD
  * @IEEE80211_TX_CTL_CLEAR_PS_FILT: clear powersave filter for destination
  *	station
- * @IEEE80211_TX_CTL_REQUEUE: TBD
  * @IEEE80211_TX_CTL_FIRST_FRAGMENT: this is a first fragment of the frame
- * @IEEE80211_TX_CTL_SHORT_PREAMBLE: TBD
- * @IEEE80211_TX_CTL_LONG_RETRY_LIMIT: this frame should be send using the
- *	through set_retry_limit configured long retry value
  * @IEEE80211_TX_CTL_SEND_AFTER_DTIM: send this frame after DTIM beacon
  * @IEEE80211_TX_CTL_AMPDU: this frame should be sent as part of an A-MPDU
- * @IEEE80211_TX_CTL_OFDM_HT: this frame can be sent in HT OFDM rates. number
- *	of streams when this flag is on can be extracted from antenna_sel_tx,
- *	so if 1 antenna is marked use SISO, 2 antennas marked use MIMO, n
- *	antennas marked use MIMO_n.
- * @IEEE80211_TX_CTL_GREEN_FIELD: use green field protection for this frame
- * @IEEE80211_TX_CTL_40_MHZ_WIDTH: send this frame using 40 Mhz channel width
- * @IEEE80211_TX_CTL_DUP_DATA: duplicate data frame on both 20 Mhz channels
- * @IEEE80211_TX_CTL_SHORT_GI: send this frame using short guard interval
- * @IEEE80211_TX_CTL_INJECTED: TBD
+ * @IEEE80211_TX_CTL_INJECTED: Frame was injected, internal to mac80211.
  * @IEEE80211_TX_STAT_TX_FILTERED: The frame was not transmitted
  *	because the destination STA was in powersave mode.
  * @IEEE80211_TX_STAT_ACK: Frame was acknowledged
@@ -244,62 +239,70 @@ struct ieee80211_bss_conf {
  * 	is for the whole aggregation.
  * @IEEE80211_TX_STAT_AMPDU_NO_BACK: no block ack was returned,
  * 	so consider using block ack request (BAR).
- * @IEEE80211_TX_CTL_ASSIGN_SEQ: The driver has to assign a sequence
- *	number to this frame, taking care of not overwriting the fragment
- *	number and increasing the sequence number only when the
- *	IEEE80211_TX_CTL_FIRST_FRAGMENT flags is set. mac80211 will properly
- *	assign sequence numbers to QoS-data frames but cannot do so correctly
- *	for non-QoS-data and management frames because beacons need them from
- *	that counter as well and mac80211 cannot guarantee proper sequencing.
- *	If this flag is set, the driver should instruct the hardware to
- *	assign a sequence number to the frame or assign one itself. Cf. IEEE
- *	802.11-2007 7.1.3.4.1 paragraph 3. This flag will always be set for
- *	beacons always be clear for frames without a sequence number field.
+ * @IEEE80211_TX_CTL_RATE_CTRL_PROBE: internal to mac80211, can be
+ *	set by rate control algorithms to indicate probe rate, will
+ *	be cleared for fragmented frames (except on the last fragment)
+ * @IEEE80211_TX_CTL_REQUEUE: REMOVE THIS
  */
 enum mac80211_tx_control_flags {
 	IEEE80211_TX_CTL_REQ_TX_STATUS		= BIT(0),
-	IEEE80211_TX_CTL_USE_RTS_CTS		= BIT(2),
-	IEEE80211_TX_CTL_USE_CTS_PROTECT	= BIT(3),
-	IEEE80211_TX_CTL_NO_ACK			= BIT(4),
-	IEEE80211_TX_CTL_RATE_CTRL_PROBE	= BIT(5),
-	IEEE80211_TX_CTL_CLEAR_PS_FILT		= BIT(6),
-	IEEE80211_TX_CTL_REQUEUE		= BIT(7),
-	IEEE80211_TX_CTL_FIRST_FRAGMENT		= BIT(8),
-	IEEE80211_TX_CTL_SHORT_PREAMBLE		= BIT(9),
-	IEEE80211_TX_CTL_LONG_RETRY_LIMIT	= BIT(10),
-	IEEE80211_TX_CTL_SEND_AFTER_DTIM	= BIT(12),
-	IEEE80211_TX_CTL_AMPDU			= BIT(13),
-	IEEE80211_TX_CTL_OFDM_HT		= BIT(14),
-	IEEE80211_TX_CTL_GREEN_FIELD		= BIT(15),
-	IEEE80211_TX_CTL_40_MHZ_WIDTH		= BIT(16),
-	IEEE80211_TX_CTL_DUP_DATA		= BIT(17),
-	IEEE80211_TX_CTL_SHORT_GI		= BIT(18),
-	IEEE80211_TX_CTL_INJECTED		= BIT(19),
-	IEEE80211_TX_STAT_TX_FILTERED		= BIT(20),
-	IEEE80211_TX_STAT_ACK			= BIT(21),
-	IEEE80211_TX_STAT_AMPDU			= BIT(22),
-	IEEE80211_TX_STAT_AMPDU_NO_BACK		= BIT(23),
-	IEEE80211_TX_CTL_ASSIGN_SEQ		= BIT(24),
+	IEEE80211_TX_CTL_ASSIGN_SEQ		= BIT(1),
+	IEEE80211_TX_CTL_NO_ACK			= BIT(2),
+	IEEE80211_TX_CTL_CLEAR_PS_FILT		= BIT(3),
+	IEEE80211_TX_CTL_FIRST_FRAGMENT		= BIT(4),
+	IEEE80211_TX_CTL_SEND_AFTER_DTIM	= BIT(5),
+	IEEE80211_TX_CTL_AMPDU			= BIT(6),
+	IEEE80211_TX_CTL_INJECTED		= BIT(7),
+	IEEE80211_TX_STAT_TX_FILTERED		= BIT(8),
+	IEEE80211_TX_STAT_ACK			= BIT(9),
+	IEEE80211_TX_STAT_AMPDU			= BIT(10),
+	IEEE80211_TX_STAT_AMPDU_NO_BACK		= BIT(11),
+	IEEE80211_TX_CTL_RATE_CTRL_PROBE	= BIT(12),
+
+	/* XXX: remove this */
+	IEEE80211_TX_CTL_REQUEUE		= BIT(13),
 };
 
+enum mac80211_rate_control_flags {
+	IEEE80211_TX_RC_USE_RTS_CTS		= BIT(0),
+	IEEE80211_TX_RC_USE_CTS_PROTECT		= BIT(1),
+	IEEE80211_TX_RC_USE_SHORT_PREAMBLE	= BIT(2),
+
+	/* rate index is an MCS rate number instead of an index */
+	IEEE80211_TX_RC_MCS			= BIT(3),
+	IEEE80211_TX_RC_GREEN_FIELD		= BIT(4),
+	IEEE80211_TX_RC_40_MHZ_WIDTH		= BIT(5),
+	IEEE80211_TX_RC_DUP_DATA		= BIT(6),
+	IEEE80211_TX_RC_SHORT_GI		= BIT(7),
+};
+
+
+/* there are 40 bytes if you don't need the rateset to be kept */
+#define IEEE80211_TX_INFO_DRIVER_DATA_SIZE 40
 
-#define IEEE80211_TX_INFO_DRIVER_DATA_SIZE \
-	(sizeof(((struct sk_buff *)0)->cb) - 8)
-#define IEEE80211_TX_INFO_DRIVER_DATA_PTRS \
-	(IEEE80211_TX_INFO_DRIVER_DATA_SIZE / sizeof(void *))
+/* if you do need the rateset, then you have less space */
+#define IEEE80211_TX_INFO_RATE_DRIVER_DATA_SIZE 24
 
-/* maximum number of alternate rate retry stages */
-#define IEEE80211_TX_MAX_ALTRATE	3
+/* maximum number of rate stages */
+#define IEEE80211_TX_MAX_RATES	5
 
 /**
- * struct ieee80211_tx_altrate - alternate rate selection/status
+ * struct ieee80211_tx_rate - rate selection/status
  *
- * @rate_idx: rate index to attempt to send with
+ * @idx: rate index to attempt to send with
+ * @flags: rate control flags (&enum mac80211_rate_control_flags)
  * @limit: number of retries before fallback
+ *
+ * A value of -1 for @idx indicates an invalid rate and, if used
+ * in an array of retry rates, that no more rates should be tried.
+ *
+ * When used for transmit status reporting, the driver should
+ * always report the rate along with the flags it used.
  */
-struct ieee80211_tx_altrate {
-	s8 rate_idx;
-	u8 limit;
+struct ieee80211_tx_rate {
+	s8 idx;
+	u8 count;
+	u8 flags;
 };
 
 /**
@@ -314,15 +317,12 @@ struct ieee80211_tx_altrate {
  * it may be NULL.
  *
  * @flags: transmit info flags, defined above
- * @band: TBD
- * @tx_rate_idx: TBD
+ * @band: the band to transmit on (use for checking for races)
  * @antenna_sel_tx: antenna to use, 0 for automatic diversity
  * @control: union for control data
  * @status: union for status data
  * @driver_data: array of driver_data pointers
  * @retry_count: number of retries
- * @excessive_retries: set to 1 if the frame was retried many times
- *	but not acknowledged
  * @ampdu_ack_len: number of aggregated frames.
  * 	relevant only if IEEE80211_TX_STATUS_AMPDU was set.
  * @ampdu_ack_map: block ack bit map for the aggregation.
@@ -333,31 +333,43 @@ struct ieee80211_tx_info {
 	/* common information */
 	u32 flags;
 	u8 band;
-	s8 tx_rate_idx;
+
 	u8 antenna_sel_tx;
 
-	/* 1 byte hole */
+	/* 2 byte hole */
 
 	union {
 		struct {
+			union {
+				/* rate control */
+				struct {
+					struct ieee80211_tx_rate rates[
+						IEEE80211_TX_MAX_RATES];
+					s8 rts_cts_rate_idx;
+				};
+				/* only needed before rate control */
+				unsigned long jiffies;
+			};
 			/* NB: vif can be NULL for injected frames */
 			struct ieee80211_vif *vif;
 			struct ieee80211_key_conf *hw_key;
 			struct ieee80211_sta *sta;
-			unsigned long jiffies;
-			s8 rts_cts_rate_idx;
-			u8 retry_limit;
-			struct ieee80211_tx_altrate retries[IEEE80211_TX_MAX_ALTRATE];
 		} control;
 		struct {
+			struct ieee80211_tx_rate rates[IEEE80211_TX_MAX_RATES];
+			u8 ampdu_ack_len;
 			u64 ampdu_ack_map;
 			int ack_signal;
-			struct ieee80211_tx_altrate retries[IEEE80211_TX_MAX_ALTRATE + 1];
-			u8 retry_count;
-			bool excessive_retries;
-			u8 ampdu_ack_len;
+			/* 8 bytes free */
 		} status;
-		void *driver_data[IEEE80211_TX_INFO_DRIVER_DATA_PTRS];
+		struct {
+			struct ieee80211_tx_rate driver_rates[
+				IEEE80211_TX_MAX_RATES];
+			void *rate_driver_data[
+				IEEE80211_TX_INFO_RATE_DRIVER_DATA_SIZE / sizeof(void *)];
+		};
+		void *driver_data[
+			IEEE80211_TX_INFO_DRIVER_DATA_SIZE / sizeof(void *)];
 	};
 };
 
@@ -366,6 +378,41 @@ static inline struct ieee80211_tx_info *IEEE80211_SKB_CB(struct sk_buff *skb)
 	return (struct ieee80211_tx_info *)skb->cb;
 }
 
+/**
+ * ieee80211_tx_info_clear_status - clear TX status
+ *
+ * @info: The &struct ieee80211_tx_info to be cleared.
+ *
+ * When the driver passes an skb back to mac80211, it must report
+ * a number of things in TX status. This function clears everything
+ * in the TX status but the rate control information (it does clear
+ * the count since you need to fill that in anyway).
+ *
+ * NOTE: You can only use this function if you do NOT use
+ *	 info->driver_data! Use info->rate_driver_data
+ *	 instead if you need only the less space that allows.
+ */
+static inline void
+ieee80211_tx_info_clear_status(struct ieee80211_tx_info *info)
+{
+	int i;
+
+	BUILD_BUG_ON(offsetof(struct ieee80211_tx_info, status.rates) !=
+		     offsetof(struct ieee80211_tx_info, control.rates));
+	BUILD_BUG_ON(offsetof(struct ieee80211_tx_info, status.rates) !=
+		     offsetof(struct ieee80211_tx_info, driver_rates));
+	BUILD_BUG_ON(offsetof(struct ieee80211_tx_info, status.rates) != 8);
+	/* clear the rate counts */
+	for (i = 0; i < IEEE80211_TX_MAX_RATES; i++)
+		info->status.rates[i].count = 0;
+
+	BUILD_BUG_ON(
+	    offsetof(struct ieee80211_tx_info, status.ampdu_ack_len) != 23);
+	memset(&info->status.ampdu_ack_len, 0,
+	       sizeof(struct ieee80211_tx_info) -
+	       offsetof(struct ieee80211_tx_info, status.ampdu_ack_len));
+}
+
 
 /**
  * enum mac80211_rx_flags - receive flags
@@ -869,8 +916,8 @@ enum ieee80211_hw_flags {
  * @sta_data_size: size (in bytes) of the drv_priv data area
  *	within &struct ieee80211_sta.
  *
- * @max_altrates: maximum number of alternate rate retry stages
- * @max_altrate_tries: maximum number of tries for each stage
+ * @max_rates: maximum number of alternate rate retry stages
+ * @max_rate_tries: maximum number of tries for each stage
  */
 struct ieee80211_hw {
 	struct ieee80211_conf conf;
@@ -887,8 +934,8 @@ struct ieee80211_hw {
 	u16 ampdu_queues;
 	u16 max_listen_interval;
 	s8 max_signal;
-	u8 max_altrates;
-	u8 max_altrate_tries;
+	u8 max_rates;
+	u8 max_rate_tries;
 };
 
 /**
@@ -927,9 +974,9 @@ static inline struct ieee80211_rate *
 ieee80211_get_tx_rate(const struct ieee80211_hw *hw,
 		      const struct ieee80211_tx_info *c)
 {
-	if (WARN_ON(c->tx_rate_idx < 0))
+	if (WARN_ON(c->control.rates[0].idx < 0))
 		return NULL;
-	return &hw->wiphy->bands[c->band]->bitrates[c->tx_rate_idx];
+	return &hw->wiphy->bands[c->band]->bitrates[c->control.rates[0].idx];
 }
 
 static inline struct ieee80211_rate *
@@ -945,9 +992,9 @@ static inline struct ieee80211_rate *
 ieee80211_get_alt_retry_rate(const struct ieee80211_hw *hw,
 			     const struct ieee80211_tx_info *c, int idx)
 {
-	if (c->control.retries[idx].rate_idx < 0)
+	if (c->control.rates[idx + 1].idx < 0)
 		return NULL;
-	return &hw->wiphy->bands[c->band]->bitrates[c->control.retries[idx].rate_idx];
+	return &hw->wiphy->bands[c->band]->bitrates[c->control.rates[idx + 1].idx];
 }
 
 /**
@@ -1840,17 +1887,30 @@ struct ieee80211_sta *ieee80211_find_sta(struct ieee80211_hw *hw,
 
 
 /* Rate control API */
+
 /**
- * struct rate_selection - rate information for/from rate control algorithms
- *
- * @rate_idx: selected transmission rate index
- * @nonerp_idx: Non-ERP rate to use instead if ERP cannot be used
- * @probe_idx: rate for probing (or -1)
- * @max_rate_idx: maximum rate index that can be used, this is
- *	input to the algorithm and will be enforced
- */
-struct rate_selection {
-	s8 rate_idx, nonerp_idx, probe_idx, max_rate_idx;
+ * struct ieee80211_tx_rate_control - rate control information for/from RC algo
+ *
+ * @hw: The hardware the algorithm is invoked for.
+ * @sband: The band this frame is being transmitted on.
+ * @bss_conf: the current BSS configuration
+ * @reported_rate: The rate control algorithm can fill this in to indicate
+ *	which rate should be reported to userspace as the current rate and
+ *	used for rate calculations in the mesh network.
+ * @rts: whether RTS will be used for this frame because it is longer than the
+ *	RTS threshold
+ * @short_preamble: whether mac80211 will request short-preamble transmission
+ *	if the selected rate supports it
+ * @max_rate_idx: user-requested maximum rate (not MCS for now)
+ */
+struct ieee80211_tx_rate_control {
+	struct ieee80211_hw *hw;
+	struct ieee80211_supported_band *sband;
+	struct ieee80211_bss_conf *bss_conf;
+	struct sk_buff *skb;
+	struct ieee80211_tx_rate reported_rate;
+	bool rts, short_preamble;
+	u8 max_rate_idx;
 };
 
 struct rate_control_ops {
@@ -1869,10 +1929,8 @@ struct rate_control_ops {
 	void (*tx_status)(void *priv, struct ieee80211_supported_band *sband,
 			  struct ieee80211_sta *sta, void *priv_sta,
 			  struct sk_buff *skb);
-	void (*get_rate)(void *priv, struct ieee80211_supported_band *sband,
-			 struct ieee80211_sta *sta, void *priv_sta,
-			 struct sk_buff *skb,
-			 struct rate_selection *sel);
+	void (*get_rate)(void *priv, struct ieee80211_sta *sta, void *priv_sta,
+			 struct ieee80211_tx_rate_control *txrc);
 
 	void (*add_sta_debugfs)(void *priv, void *priv_sta,
 				struct dentry *dir);
diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h
index 6f8756d26a93..fe4efdd4253d 100644
--- a/net/mac80211/ieee80211_i.h
+++ b/net/mac80211/ieee80211_i.h
@@ -142,7 +142,6 @@ typedef unsigned __bitwise__ ieee80211_tx_result;
 #define IEEE80211_TX_FRAGMENTED		BIT(0)
 #define IEEE80211_TX_UNICAST		BIT(1)
 #define IEEE80211_TX_PS_BUFFERED	BIT(2)
-#define IEEE80211_TX_PROBE_LAST_FRAG	BIT(3)
 
 struct ieee80211_tx_data {
 	struct sk_buff *skb;
@@ -153,11 +152,6 @@ struct ieee80211_tx_data {
 	struct ieee80211_key *key;
 
 	struct ieee80211_channel *channel;
-	s8 rate_idx;
-	/* use this rate (if set) for last fragment; rate can
-	 * be set to lower rate for the first fragments, e.g.,
-	 * when using CTS protection with IEEE 802.11g. */
-	s8 last_frag_rate_idx;
 
 	/* Extra fragments (in addition to the first fragment
 	 * in skb) */
@@ -203,9 +197,7 @@ struct ieee80211_rx_data {
 struct ieee80211_tx_stored_packet {
 	struct sk_buff *skb;
 	struct sk_buff **extra_frag;
-	s8 last_frag_rate_idx;
 	int num_extra_frag;
-	bool last_frag_rate_ctrl_probe;
 };
 
 struct beacon_data {
diff --git a/net/mac80211/main.c b/net/mac80211/main.c
index ffff54944f9d..88c1975a97a5 100644
--- a/net/mac80211/main.c
+++ b/net/mac80211/main.c
@@ -41,6 +41,8 @@
  */
 struct ieee80211_tx_status_rtap_hdr {
 	struct ieee80211_radiotap_header hdr;
+	u8 rate;
+	u8 padding_for_rate;
 	__le16 tx_flags;
 	u8 data_retries;
 } __attribute__ ((packed));
@@ -465,13 +467,28 @@ void ieee80211_tx_status(struct ieee80211_hw *hw, struct sk_buff *skb)
 	struct ieee80211_sub_if_data *sdata;
 	struct net_device *prev_dev = NULL;
 	struct sta_info *sta;
+	int retry_count = -1, i;
+
+	for (i = 0; i < IEEE80211_TX_MAX_RATES; i++) {
+		/* the HW cannot have attempted that rate */
+		if (i >= hw->max_rates) {
+			info->status.rates[i].idx = -1;
+			info->status.rates[i].count = 0;
+		}
+
+		retry_count += info->status.rates[i].count;
+	}
+	if (retry_count < 0)
+		retry_count = 0;
 
 	rcu_read_lock();
 
+	sband = local->hw.wiphy->bands[info->band];
+
 	sta = sta_info_get(local, hdr->addr1);
 
 	if (sta) {
-		if (info->status.excessive_retries &&
+		if (!(info->flags & IEEE80211_TX_STAT_ACK) &&
 		    test_sta_flags(sta, WLAN_STA_PS)) {
 			/*
 			 * The STA is in power save mode, so assume
@@ -502,12 +519,11 @@ void ieee80211_tx_status(struct ieee80211_hw *hw, struct sk_buff *skb)
 			rcu_read_unlock();
 			return;
 		} else {
-			if (info->status.excessive_retries)
+			if (!(info->flags & IEEE80211_TX_STAT_ACK))
 				sta->tx_retry_failed++;
-			sta->tx_retry_count += info->status.retry_count;
+			sta->tx_retry_count += retry_count;
 		}
 
-		sband = local->hw.wiphy->bands[info->band];
 		rate_control_tx_status(local, sband, sta, skb);
 	}
 
@@ -528,9 +544,9 @@ void ieee80211_tx_status(struct ieee80211_hw *hw, struct sk_buff *skb)
 			local->dot11TransmittedFrameCount++;
 			if (is_multicast_ether_addr(hdr->addr1))
 				local->dot11MulticastTransmittedFrameCount++;
-			if (info->status.retry_count > 0)
+			if (retry_count > 0)
 				local->dot11RetryCount++;
-			if (info->status.retry_count > 1)
+			if (retry_count > 1)
 				local->dot11MultipleRetryCount++;
 		}
 
@@ -574,19 +590,30 @@ void ieee80211_tx_status(struct ieee80211_hw *hw, struct sk_buff *skb)
 	rthdr->hdr.it_len = cpu_to_le16(sizeof(*rthdr));
 	rthdr->hdr.it_present =
 		cpu_to_le32((1 << IEEE80211_RADIOTAP_TX_FLAGS) |
-			    (1 << IEEE80211_RADIOTAP_DATA_RETRIES));
+			    (1 << IEEE80211_RADIOTAP_DATA_RETRIES) |
+			    (1 << IEEE80211_RADIOTAP_RATE));
 
 	if (!(info->flags & IEEE80211_TX_STAT_ACK) &&
 	    !is_multicast_ether_addr(hdr->addr1))
 		rthdr->tx_flags |= cpu_to_le16(IEEE80211_RADIOTAP_F_TX_FAIL);
 
-	if ((info->flags & IEEE80211_TX_CTL_USE_RTS_CTS) &&
-	    (info->flags & IEEE80211_TX_CTL_USE_CTS_PROTECT))
+	/*
+	 * XXX: Once radiotap gets the bitmap reset thing the vendor
+	 *	extensions proposal contains, we can actually report
+	 *	the whole set of tries we did.
+	 */
+	if ((info->status.rates[0].flags & IEEE80211_TX_RC_USE_RTS_CTS) ||
+	    (info->status.rates[0].flags & IEEE80211_TX_RC_USE_CTS_PROTECT))
 		rthdr->tx_flags |= cpu_to_le16(IEEE80211_RADIOTAP_F_TX_CTS);
-	else if (info->flags & IEEE80211_TX_CTL_USE_RTS_CTS)
+	else if (info->status.rates[0].flags & IEEE80211_TX_RC_USE_RTS_CTS)
 		rthdr->tx_flags |= cpu_to_le16(IEEE80211_RADIOTAP_F_TX_RTS);
+	if (info->status.rates[0].idx >= 0 &&
+	    !(info->status.rates[0].flags & IEEE80211_TX_RC_MCS))
+		rthdr->rate = sband->bitrates[
+				info->status.rates[0].idx].bitrate / 5;
 
-	rthdr->data_retries = info->status.retry_count;
+	/* for now report the total retry_count */
+	rthdr->data_retries = retry_count;
 
 	/* XXX: is this sufficient for BPF? */
 	skb_set_mac_header(skb, 0);
@@ -671,8 +698,9 @@ struct ieee80211_hw *ieee80211_alloc_hw(size_t priv_data_len,
 	BUG_ON(!ops->configure_filter);
 	local->ops = ops;
 
-	local->hw.queues = 1; /* default */
-
+	/* set up some defaults */
+	local->hw.queues = 1;
+	local->hw.max_rates = 1;
 	local->rts_threshold = IEEE80211_MAX_RTS_THRESHOLD;
 	local->fragmentation_threshold = IEEE80211_MAX_FRAG_THRESHOLD;
 	local->hw.conf.long_frame_max_tx_count = 4;
diff --git a/net/mac80211/mesh_hwmp.c b/net/mac80211/mesh_hwmp.c
index 501c7831adb4..e8d573d592e7 100644
--- a/net/mac80211/mesh_hwmp.c
+++ b/net/mac80211/mesh_hwmp.c
@@ -218,12 +218,16 @@ static u32 airtime_link_metric_get(struct ieee80211_local *local,
 
 	if (sta->fail_avg >= 100)
 		return MAX_METRIC;
+
+	if (sta->last_tx_rate.flags & IEEE80211_TX_RC_MCS)
+		return MAX_METRIC;
+
 	err = (sta->fail_avg << ARITH_SHIFT) / 100;
 
 	/* bitrate is in units of 100 Kbps, while we need rate in units of
 	 * 1Mbps. This will be corrected on tx_time computation.
 	 */
-	rate = sband->bitrates[sta->last_txrate_idx].bitrate;
+	rate = sband->bitrates[sta->last_tx_rate.idx].bitrate;
 	tx_time = (device_constant + 10 * test_frame_len / rate);
 	estimated_retx = ((1 << (2 * ARITH_SHIFT)) / (s_unit - err));
 	result = (tx_time * estimated_retx) >> (2 * ARITH_SHIFT) ;
diff --git a/net/mac80211/rate.c b/net/mac80211/rate.c
index 5d786720d935..3fa7ab285066 100644
--- a/net/mac80211/rate.c
+++ b/net/mac80211/rate.c
@@ -199,48 +199,44 @@ static void rate_control_release(struct kref *kref)
 }
 
 void rate_control_get_rate(struct ieee80211_sub_if_data *sdata,
-			   struct ieee80211_supported_band *sband,
-			   struct sta_info *sta, struct sk_buff *skb,
-			   struct rate_selection *sel)
+			   struct sta_info *sta,
+			   struct ieee80211_tx_rate_control *txrc)
 {
 	struct rate_control_ref *ref = sdata->local->rate_ctrl;
 	void *priv_sta = NULL;
 	struct ieee80211_sta *ista = NULL;
+	struct ieee80211_tx_info *info = IEEE80211_SKB_CB(txrc->skb);
 	int i;
 
-	sel->rate_idx = -1;
-	sel->nonerp_idx = -1;
-	sel->probe_idx = -1;
-	sel->max_rate_idx = sdata->max_ratectrl_rateidx;
-
 	if (sta) {
 		ista = &sta->sta;
 		priv_sta = sta->rate_ctrl_priv;
 	}
 
+	for (i = 0; i < IEEE80211_TX_MAX_RATES; i++) {
+		info->control.rates[i].idx = -1;
+		info->control.rates[i].flags = 0;
+		info->control.rates[i].count = 1;
+	}
+
 	if (sta && sdata->force_unicast_rateidx > -1)
-		sel->rate_idx = sdata->force_unicast_rateidx;
+		info->control.rates[0].idx = sdata->force_unicast_rateidx;
 	else
-		ref->ops->get_rate(ref->priv, sband, ista, priv_sta, skb, sel);
-
-	if (sdata->max_ratectrl_rateidx > -1 &&
-	    sel->rate_idx > sdata->max_ratectrl_rateidx)
-		sel->rate_idx = sdata->max_ratectrl_rateidx;
-
-	BUG_ON(sel->rate_idx < 0);
-
-	/* Select a non-ERP backup rate. */
-	if (sel->nonerp_idx < 0) {
-		for (i = 0; i < sband->n_bitrates; i++) {
-			struct ieee80211_rate *rate = &sband->bitrates[i];
-			if (sband->bitrates[sel->rate_idx].bitrate < rate->bitrate)
-				break;
-
-			if (rate_supported(ista, sband->band, i) &&
-			    !(rate->flags & IEEE80211_RATE_ERP_G))
-				sel->nonerp_idx = i;
-		}
+		ref->ops->get_rate(ref->priv, ista, priv_sta, txrc);
+
+	/*
+	 * try to enforce the maximum rate the user wanted
+	 */
+	if (sdata->max_ratectrl_rateidx > -1)
+		for (i = 0; i < IEEE80211_TX_MAX_RATES; i++) {
+			if (info->control.rates[i].flags & IEEE80211_TX_RC_MCS)
+				continue;
+			info->control.rates[i].idx =
+				min_t(s8, info->control.rates[i].idx,
+				      sdata->max_ratectrl_rateidx);
 	}
+
+	BUG_ON(info->control.rates[0].idx < 0);
 }
 
 struct rate_control_ref *rate_control_get(struct rate_control_ref *ref)
diff --git a/net/mac80211/rate.h b/net/mac80211/rate.h
index d0092f847f82..7c25edf9ac55 100644
--- a/net/mac80211/rate.h
+++ b/net/mac80211/rate.h
@@ -31,9 +31,8 @@ struct rate_control_ref {
 struct rate_control_ref *rate_control_alloc(const char *name,
 					    struct ieee80211_local *local);
 void rate_control_get_rate(struct ieee80211_sub_if_data *sdata,
-			   struct ieee80211_supported_band *sband,
-			   struct sta_info *sta, struct sk_buff *skb,
-			   struct rate_selection *sel);
+			   struct sta_info *sta,
+			   struct ieee80211_tx_rate_control *txrc);
 struct rate_control_ref *rate_control_get(struct rate_control_ref *ref);
 void rate_control_put(struct rate_control_ref *ref);
 
diff --git a/net/mac80211/rc80211_minstrel.c b/net/mac80211/rc80211_minstrel.c
index f6d69dab07a3..759ddd8bf0f4 100644
--- a/net/mac80211/rc80211_minstrel.c
+++ b/net/mac80211/rc80211_minstrel.c
@@ -169,30 +169,20 @@ minstrel_tx_status(void *priv, struct ieee80211_supported_band *sband,
 {
 	struct minstrel_sta_info *mi = priv_sta;
 	struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb);
-	struct ieee80211_tx_altrate *ar = info->status.retries;
-	struct minstrel_priv *mp = priv;
-	int i, ndx, tries;
-	int success = 0;
+	struct ieee80211_tx_rate *ar = info->status.rates;
+	int i, ndx;
+	int success;
 
-	if (!info->status.excessive_retries)
-		success = 1;
+	success = !!(info->flags & IEEE80211_TX_STAT_ACK);
 
-	if (!mp->has_mrr || (ar[0].rate_idx < 0)) {
-		ndx = rix_to_ndx(mi, info->tx_rate_idx);
-		tries = info->status.retry_count + 1;
-		mi->r[ndx].success += success;
-		mi->r[ndx].attempts += tries;
-		return;
-	}
-
-	for (i = 0; i < 4; i++) {
-		if (ar[i].rate_idx < 0)
+	for (i = 0; i < IEEE80211_TX_MAX_RATES; i++) {
+		if (ar[i].idx < 0)
 			break;
 
-		ndx = rix_to_ndx(mi, ar[i].rate_idx);
-		mi->r[ndx].attempts += ar[i].limit + 1;
+		ndx = rix_to_ndx(mi, ar[i].idx);
+		mi->r[ndx].attempts += ar[i].count;
 
-		if ((i != 3) && (ar[i + 1].rate_idx < 0))
+		if ((i != IEEE80211_TX_MAX_RATES - 1) && (ar[i + 1].idx < 0))
 			mi->r[ndx].success += success;
 	}
 
@@ -210,9 +200,9 @@ minstrel_get_retry_count(struct minstrel_rate *mr,
 {
 	unsigned int retry = mr->adjusted_retry_count;
 
-	if (info->flags & IEEE80211_TX_CTL_USE_RTS_CTS)
+	if (info->control.rates[0].flags & IEEE80211_TX_RC_USE_RTS_CTS)
 		retry = max(2U, min(mr->retry_count_rtscts, retry));
-	else if (info->flags & IEEE80211_TX_CTL_USE_CTS_PROTECT)
+	else if (info->control.rates[0].flags & IEEE80211_TX_RC_USE_CTS_PROTECT)
 		retry = max(2U, min(mr->retry_count_cts, retry));
 	return retry;
 }
@@ -234,14 +224,15 @@ minstrel_get_next_sample(struct minstrel_sta_info *mi)
 }
 
 void
-minstrel_get_rate(void *priv, struct ieee80211_supported_band *sband,
-                  struct ieee80211_sta *sta, void *priv_sta,
-                  struct sk_buff *skb, struct rate_selection *sel)
+minstrel_get_rate(void *priv, struct ieee80211_sta *sta,
+		  void *priv_sta, struct ieee80211_tx_rate_control *txrc)
 {
+	struct sk_buff *skb = txrc->skb;
+	struct ieee80211_supported_band *sband = txrc->sband;
 	struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb);
 	struct minstrel_sta_info *mi = priv_sta;
 	struct minstrel_priv *mp = priv;
-	struct ieee80211_tx_altrate *ar = info->control.retries;
+	struct ieee80211_tx_rate *ar = info->control.rates;
 	unsigned int ndx, sample_ndx = 0;
 	bool mrr;
 	bool sample_slower = false;
@@ -251,16 +242,12 @@ minstrel_get_rate(void *priv, struct ieee80211_supported_band *sband,
 	int sample_rate;
 
 	if (!sta || !mi || use_low_rate(skb)) {
-		sel->rate_idx = rate_lowest_index(sband, sta);
+		ar[0].idx = rate_lowest_index(sband, sta);
+		ar[0].count = mp->max_retry;
 		return;
 	}
 
-	mrr = mp->has_mrr;
-
-	/* mac80211 does not allow mrr for RTS/CTS */
-	if ((info->flags & IEEE80211_TX_CTL_USE_RTS_CTS) ||
-	    (info->flags & IEEE80211_TX_CTL_USE_CTS_PROTECT))
-		mrr = false;
+	mrr = mp->has_mrr && !txrc->rts && !txrc->bss_conf->use_cts_prot;
 
 	if (time_after(jiffies, mi->stats_update + (mp->update_interval *
 			HZ) / 1000))
@@ -315,13 +302,12 @@ minstrel_get_rate(void *priv, struct ieee80211_supported_band *sband,
 			mi->sample_deferred++;
 		}
 	}
-	sel->rate_idx = mi->r[ndx].rix;
-	info->control.retry_limit = minstrel_get_retry_count(&mi->r[ndx], info);
+	ar[0].idx = mi->r[ndx].rix;
+	ar[0].count = minstrel_get_retry_count(&mi->r[ndx], info);
 
 	if (!mrr) {
-		ar[0].rate_idx = mi->lowest_rix;
-		ar[0].limit = mp->max_retry;
-		ar[1].rate_idx = -1;
+		ar[1].idx = mi->lowest_rix;
+		ar[1].count = mp->max_retry;
 		return;
 	}
 
@@ -336,9 +322,9 @@ minstrel_get_rate(void *priv, struct ieee80211_supported_band *sband,
 	}
 	mrr_ndx[1] = mi->max_prob_rate;
 	mrr_ndx[2] = 0;
-	for (i = 0; i < 3; i++) {
-		ar[i].rate_idx = mi->r[mrr_ndx[i]].rix;
-		ar[i].limit = mi->r[mrr_ndx[i]].adjusted_retry_count;
+	for (i = 1; i < 4; i++) {
+		ar[i].idx = mi->r[mrr_ndx[i - 1]].rix;
+		ar[i].count = mi->r[mrr_ndx[i - 1]].adjusted_retry_count;
 	}
 }
 
@@ -532,13 +518,13 @@ minstrel_alloc(struct ieee80211_hw *hw, struct dentry *debugfsdir)
 	/* maximum time that the hw is allowed to stay in one MRR segment */
 	mp->segment_size = 6000;
 
-	if (hw->max_altrate_tries > 0)
-		mp->max_retry = hw->max_altrate_tries;
+	if (hw->max_rate_tries > 0)
+		mp->max_retry = hw->max_rate_tries;
 	else
 		/* safe default, does not necessarily have to match hw properties */
 		mp->max_retry = 7;
 
-	if (hw->max_altrates >= 3)
+	if (hw->max_rates >= 4)
 		mp->has_mrr = true;
 
 	mp->hw = hw;
diff --git a/net/mac80211/rc80211_pid.h b/net/mac80211/rc80211_pid.h
index ce099ea1d5d3..1a873f00691a 100644
--- a/net/mac80211/rc80211_pid.h
+++ b/net/mac80211/rc80211_pid.h
@@ -61,6 +61,7 @@ enum rc_pid_event_type {
 union rc_pid_event_data {
 	/* RC_PID_EVENT_TX_STATUS */
 	struct {
+		u32 flags;
 		struct ieee80211_tx_info tx_status;
 	};
 	/* RC_PID_EVENT_TYPE_RATE_CHANGE */
diff --git a/net/mac80211/rc80211_pid_algo.c b/net/mac80211/rc80211_pid_algo.c
index 86eb374e3b87..92caecfcee78 100644
--- a/net/mac80211/rc80211_pid_algo.c
+++ b/net/mac80211/rc80211_pid_algo.c
@@ -241,7 +241,7 @@ static void rate_control_pid_tx_status(void *priv, struct ieee80211_supported_ba
 
 	/* Ignore all frames that were sent with a different rate than the rate
 	 * we currently advise mac80211 to use. */
-	if (info->tx_rate_idx != spinfo->txrate_idx)
+	if (info->status.rates[0].idx != spinfo->txrate_idx)
 		return;
 
 	spinfo->tx_num_xmit++;
@@ -253,10 +253,10 @@ static void rate_control_pid_tx_status(void *priv, struct ieee80211_supported_ba
 	/* We count frames that totally failed to be transmitted as two bad
 	 * frames, those that made it out but had some retries as one good and
 	 * one bad frame. */
-	if (info->status.excessive_retries) {
+	if (!(info->flags & IEEE80211_TX_STAT_ACK)) {
 		spinfo->tx_num_failed += 2;
 		spinfo->tx_num_xmit++;
-	} else if (info->status.retry_count) {
+	} else if (info->status.rates[0].count) {
 		spinfo->tx_num_failed++;
 		spinfo->tx_num_xmit++;
 	}
@@ -270,23 +270,32 @@ static void rate_control_pid_tx_status(void *priv, struct ieee80211_supported_ba
 }
 
 static void
-rate_control_pid_get_rate(void *priv, struct ieee80211_supported_band *sband,
-			  struct ieee80211_sta *sta, void *priv_sta,
-			  struct sk_buff *skb,
-			  struct rate_selection *sel)
+rate_control_pid_get_rate(void *priv, struct ieee80211_sta *sta,
+			  void *priv_sta,
+			  struct ieee80211_tx_rate_control *txrc)
 {
+	struct sk_buff *skb = txrc->skb;
+	struct ieee80211_supported_band *sband = txrc->sband;
 	struct ieee80211_hdr *hdr = (struct ieee80211_hdr *) skb->data;
+	struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb);
 	struct rc_pid_sta_info *spinfo = priv_sta;
 	int rateidx;
 	u16 fc;
 
+	if (txrc->rts)
+		info->control.rates[0].count =
+			txrc->hw->conf.long_frame_max_tx_count;
+	else
+		info->control.rates[0].count =
+			txrc->hw->conf.short_frame_max_tx_count;
+
 	/* Send management frames and broadcast/multicast data using lowest
 	 * rate. */
 	fc = le16_to_cpu(hdr->frame_control);
 	if (!sta || !spinfo ||
 	    (fc & IEEE80211_FCTL_FTYPE) != IEEE80211_FTYPE_DATA ||
 	    is_multicast_ether_addr(hdr->addr1)) {
-		sel->rate_idx = rate_lowest_index(sband, sta);
+		info->control.rates[0].idx = rate_lowest_index(sband, sta);
 		return;
 	}
 
@@ -295,7 +304,7 @@ rate_control_pid_get_rate(void *priv, struct ieee80211_supported_band *sband,
 	if (rateidx >= sband->n_bitrates)
 		rateidx = sband->n_bitrates - 1;
 
-	sel->rate_idx = rateidx;
+	info->control.rates[0].idx = rateidx;
 
 #ifdef CONFIG_MAC80211_DEBUGFS
 	rate_control_pid_event_tx_rate(&spinfo->events,
diff --git a/net/mac80211/rc80211_pid_debugfs.c b/net/mac80211/rc80211_pid_debugfs.c
index 8121d3bc6835..a08a9b530347 100644
--- a/net/mac80211/rc80211_pid_debugfs.c
+++ b/net/mac80211/rc80211_pid_debugfs.c
@@ -43,6 +43,7 @@ void rate_control_pid_event_tx_status(struct rc_pid_event_buffer *buf,
 {
 	union rc_pid_event_data evd;
 
+	evd.flags = stat->flags;
 	memcpy(&evd.tx_status, stat, sizeof(struct ieee80211_tx_info));
 	rate_control_pid_event(buf, RC_PID_EVENT_TYPE_TX_STATUS, &evd);
 }
@@ -167,8 +168,8 @@ static ssize_t rate_control_pid_events_read(struct file *file, char __user *buf,
 	switch (ev->type) {
 	case RC_PID_EVENT_TYPE_TX_STATUS:
 		p += snprintf(pb + p, length - p, "tx_status %u %u",
-			      ev->data.tx_status.status.excessive_retries,
-			      ev->data.tx_status.status.retry_count);
+			      !(ev->data.flags & IEEE80211_TX_STAT_ACK),
+			      ev->data.tx_status.status.rates[0].idx);
 		break;
 	case RC_PID_EVENT_TYPE_RATE_CHANGE:
 		p += snprintf(pb + p, length - p, "rate_change %d %d",
diff --git a/net/mac80211/sta_info.h b/net/mac80211/sta_info.h
index 168a39a298bd..4ac372aa75ce 100644
--- a/net/mac80211/sta_info.h
+++ b/net/mac80211/sta_info.h
@@ -196,7 +196,7 @@ struct sta_ampdu_mlme {
  * @tx_packets: number of RX/TX MSDUs
  * @tx_bytes: TBD
  * @tx_fragments: number of transmitted MPDUs
- * @last_txrate_idx: Index of the last used transmit rate
+ * @last_txrate: description of the last used transmit rate
  * @tid_seq: TBD
  * @ampdu_mlme: TBD
  * @timer_to_tid: identity mapping to ID timers
@@ -267,7 +267,7 @@ struct sta_info {
 	unsigned long tx_packets;
 	unsigned long tx_bytes;
 	unsigned long tx_fragments;
-	unsigned int last_txrate_idx;
+	struct ieee80211_tx_rate last_tx_rate;
 	u16 tid_seq[IEEE80211_QOS_CTL_TID_MASK + 1];
 
 	/*
diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c
index 6f3e4be97631..21951bac1ef7 100644
--- a/net/mac80211/tx.c
+++ b/net/mac80211/tx.c
@@ -46,13 +46,20 @@ static __le16 ieee80211_duration(struct ieee80211_tx_data *tx, int group_addr,
 	struct ieee80211_local *local = tx->local;
 	struct ieee80211_supported_band *sband;
 	struct ieee80211_hdr *hdr;
+	struct ieee80211_tx_info *info = IEEE80211_SKB_CB(tx->skb);
+
+	/* assume HW handles this */
+	if (info->control.rates[0].flags & IEEE80211_TX_RC_MCS)
+		return 0;
+
+	/* uh huh? */
+	if (WARN_ON_ONCE(info->control.rates[0].idx < 0))
+		return 0;
 
 	sband = local->hw.wiphy->bands[tx->channel->band];
-	txrate = &sband->bitrates[tx->rate_idx];
+	txrate = &sband->bitrates[info->control.rates[0].idx];
 
-	erp = 0;
-	if (tx->sdata->flags & IEEE80211_SDATA_OPERATING_GMODE)
-		erp = txrate->flags & IEEE80211_RATE_ERP_G;
+	erp = txrate->flags & IEEE80211_RATE_ERP_G;
 
 	/*
 	 * data and mgmt (except PS Poll):
@@ -437,140 +444,154 @@ ieee80211_tx_h_select_key(struct ieee80211_tx_data *tx)
 static ieee80211_tx_result debug_noinline
 ieee80211_tx_h_rate_ctrl(struct ieee80211_tx_data *tx)
 {
-	struct rate_selection rsel;
-	struct ieee80211_supported_band *sband;
 	struct ieee80211_tx_info *info = IEEE80211_SKB_CB(tx->skb);
+	struct ieee80211_hdr *hdr = (void *)tx->skb->data;
+	struct ieee80211_supported_band *sband;
+	struct ieee80211_rate *rate;
+	int i, len;
+	bool inval = false, rts = false, short_preamble = false;
+	struct ieee80211_tx_rate_control txrc;
 
-	sband = tx->local->hw.wiphy->bands[tx->channel->band];
+	memset(&txrc, 0, sizeof(txrc));
 
-	if (likely(tx->rate_idx < 0)) {
-		rate_control_get_rate(tx->sdata, sband, tx->sta,
-				      tx->skb, &rsel);
-		if (tx->sta)
-			tx->sta->last_txrate_idx = rsel.rate_idx;
-		tx->rate_idx = rsel.rate_idx;
-		if (unlikely(rsel.probe_idx >= 0)) {
-			info->flags |= IEEE80211_TX_CTL_RATE_CTRL_PROBE;
-			tx->flags |= IEEE80211_TX_PROBE_LAST_FRAG;
-			info->control.retries[0].rate_idx = tx->rate_idx;
-			info->control.retries[0].limit = tx->local->hw.max_altrate_tries;
-			tx->rate_idx = rsel.probe_idx;
-		} else if (info->control.retries[0].limit == 0)
-			info->control.retries[0].rate_idx = -1;
-
-		if (unlikely(tx->rate_idx < 0))
-			return TX_DROP;
-	} else
-		info->control.retries[0].rate_idx = -1;
+	sband = tx->local->hw.wiphy->bands[tx->channel->band];
 
-	if (tx->sdata->vif.bss_conf.use_cts_prot &&
-	    (tx->flags & IEEE80211_TX_FRAGMENTED) && (rsel.nonerp_idx >= 0)) {
-		tx->last_frag_rate_idx = tx->rate_idx;
-		if (rsel.probe_idx >= 0)
-			tx->flags &= ~IEEE80211_TX_PROBE_LAST_FRAG;
-		else
-			tx->flags |= IEEE80211_TX_PROBE_LAST_FRAG;
-		tx->rate_idx = rsel.nonerp_idx;
-		info->tx_rate_idx = rsel.nonerp_idx;
-		info->flags &= ~IEEE80211_TX_CTL_RATE_CTRL_PROBE;
-	} else {
-		tx->last_frag_rate_idx = tx->rate_idx;
-		info->tx_rate_idx = tx->rate_idx;
+	len = min_t(int, tx->skb->len + FCS_LEN,
+			 tx->local->fragmentation_threshold);
+
+	/* set up the tx rate control struct we give the RC algo */
+	txrc.hw = local_to_hw(tx->local);
+	txrc.sband = sband;
+	txrc.bss_conf = &tx->sdata->vif.bss_conf;
+	txrc.skb = tx->skb;
+	txrc.reported_rate.idx = -1;
+	txrc.max_rate_idx = tx->sdata->max_ratectrl_rateidx;
+
+	/* set up RTS protection if desired */
+	if (tx->local->rts_threshold < IEEE80211_MAX_RTS_THRESHOLD &&
+	    len > tx->local->rts_threshold) {
+		txrc.rts = rts = true;
 	}
-	info->tx_rate_idx = tx->rate_idx;
 
-	return TX_CONTINUE;
-}
+	/*
+	 * Use short preamble if the BSS can handle it, but not for
+	 * management frames unless we know the receiver can handle
+	 * that -- the management frame might be to a station that
+	 * just wants a probe response.
+	 */
+	if (tx->sdata->vif.bss_conf.use_short_preamble &&
+	    (ieee80211_is_data(hdr->frame_control) ||
+	     (tx->sta && test_sta_flags(tx->sta, WLAN_STA_SHORT_PREAMBLE))))
+		txrc.short_preamble = short_preamble = true;
 
-static ieee80211_tx_result debug_noinline
-ieee80211_tx_h_misc(struct ieee80211_tx_data *tx)
-{
-	struct ieee80211_hdr *hdr = (struct ieee80211_hdr *)tx->skb->data;
-	struct ieee80211_tx_info *info = IEEE80211_SKB_CB(tx->skb);
-	struct ieee80211_supported_band *sband;
 
-	sband = tx->local->hw.wiphy->bands[tx->channel->band];
+	rate_control_get_rate(tx->sdata, tx->sta, &txrc);
+
+	if (unlikely(info->control.rates[0].idx < 0))
+		return TX_DROP;
+
+	if (txrc.reported_rate.idx < 0)
+		txrc.reported_rate = info->control.rates[0];
 
 	if (tx->sta)
-		info->control.sta = &tx->sta->sta;
+		tx->sta->last_tx_rate = txrc.reported_rate;
 
-	if (!info->control.retry_limit) {
-		if (!is_multicast_ether_addr(hdr->addr1)) {
-			int len = min_t(int, tx->skb->len + FCS_LEN,
-					tx->local->fragmentation_threshold);
-			if (len > tx->local->rts_threshold
-			    && tx->local->rts_threshold <
-						IEEE80211_MAX_RTS_THRESHOLD) {
-				info->flags |= IEEE80211_TX_CTL_USE_RTS_CTS;
-				info->flags |=
-					IEEE80211_TX_CTL_LONG_RETRY_LIMIT;
-				info->control.retry_limit =
-					tx->local->hw.conf.long_frame_max_tx_count - 1;
-			} else {
-				info->control.retry_limit =
-					tx->local->hw.conf.short_frame_max_tx_count - 1;
-			}
-		} else {
-			info->control.retry_limit = 1;
-		}
-	}
+	if (unlikely(!info->control.rates[0].count))
+		info->control.rates[0].count = 1;
 
-	if (tx->flags & IEEE80211_TX_FRAGMENTED) {
-		/* Do not use multiple retry rates when sending fragmented
-		 * frames.
-		 * TODO: The last fragment could still use multiple retry
-		 * rates. */
-		info->control.retries[0].rate_idx = -1;
+	if (is_multicast_ether_addr(hdr->addr1)) {
+		/*
+		 * XXX: verify the rate is in the basic rateset
+		 */
+		return TX_CONTINUE;
 	}
 
-	/* Use CTS protection for unicast frames sent using extended rates if
-	 * there are associated non-ERP stations and RTS/CTS is not configured
-	 * for the frame. */
-	if ((tx->sdata->flags & IEEE80211_SDATA_OPERATING_GMODE) &&
-	    (sband->bitrates[tx->rate_idx].flags & IEEE80211_RATE_ERP_G) &&
-	    (tx->flags & IEEE80211_TX_UNICAST) &&
-	    tx->sdata->vif.bss_conf.use_cts_prot &&
-	    !(info->flags & IEEE80211_TX_CTL_USE_RTS_CTS))
-		info->flags |= IEEE80211_TX_CTL_USE_CTS_PROTECT;
-
-	/* Transmit data frames using short preambles if the driver supports
-	 * short preambles at the selected rate and short preambles are
-	 * available on the network at the current point in time. */
-	if (ieee80211_is_data(hdr->frame_control) &&
-	    (sband->bitrates[tx->rate_idx].flags & IEEE80211_RATE_SHORT_PREAMBLE) &&
-	    tx->sdata->vif.bss_conf.use_short_preamble &&
-	    (!tx->sta || test_sta_flags(tx->sta, WLAN_STA_SHORT_PREAMBLE))) {
-		info->flags |= IEEE80211_TX_CTL_SHORT_PREAMBLE;
+	/*
+	 * set up the RTS/CTS rate as the fastest basic rate
+	 * that is not faster than the data rate
+	 *
+	 * XXX: Should this check all retry rates?
+	 */
+	if (!(info->control.rates[0].flags & IEEE80211_TX_RC_MCS)) {
+		s8 baserate = 0;
+
+		rate = &sband->bitrates[info->control.rates[0].idx];
+
+		for (i = 0; i < sband->n_bitrates; i++) {
+			/* must be a basic rate */
+			if (!(tx->sdata->vif.bss_conf.basic_rates & BIT(i)))
+				continue;
+			/* must not be faster than the data rate */
+			if (sband->bitrates[i].bitrate > rate->bitrate)
+				continue;
+			/* maximum */
+			if (sband->bitrates[baserate].bitrate <
+			     sband->bitrates[i].bitrate)
+				baserate = i;
+		}
+
+		info->control.rts_cts_rate_idx = baserate;
 	}
 
-	if ((info->flags & IEEE80211_TX_CTL_USE_RTS_CTS) ||
-	    (info->flags & IEEE80211_TX_CTL_USE_CTS_PROTECT)) {
-		struct ieee80211_rate *rate;
-		s8 baserate = -1;
-		int idx;
+	for (i = 0; i < IEEE80211_TX_MAX_RATES; i++) {
+		/*
+		 * make sure there's no valid rate following
+		 * an invalid one, just in case drivers don't
+		 * take the API seriously to stop at -1.
+		 */
+		if (inval) {
+			info->control.rates[i].idx = -1;
+			continue;
+		}
+		if (info->control.rates[i].idx < 0) {
+			inval = true;
+			continue;
+		}
 
-		/* Do not use multiple retry rates when using RTS/CTS */
-		info->control.retries[0].rate_idx = -1;
+		/*
+		 * For now assume MCS is already set up correctly, this
+		 * needs to be fixed.
+		 */
+		if (info->control.rates[i].flags & IEEE80211_TX_RC_MCS) {
+			WARN_ON(info->control.rates[i].idx > 76);
+			continue;
+		}
 
-		/* Use min(data rate, max base rate) as CTS/RTS rate */
-		rate = &sband->bitrates[tx->rate_idx];
+		/* set up RTS protection if desired */
+		if (rts)
+			info->control.rates[i].flags |=
+				IEEE80211_TX_RC_USE_RTS_CTS;
 
-		for (idx = 0; idx < sband->n_bitrates; idx++) {
-			if (sband->bitrates[idx].bitrate > rate->bitrate)
-				continue;
-			if (tx->sdata->vif.bss_conf.basic_rates & BIT(idx) &&
-			    (baserate < 0 ||
-			     (sband->bitrates[baserate].bitrate
-			      < sband->bitrates[idx].bitrate)))
-				baserate = idx;
+		/* RC is busted */
+		if (WARN_ON(info->control.rates[i].idx >=
+			    sband->n_bitrates)) {
+			info->control.rates[i].idx = -1;
+			continue;
 		}
 
-		if (baserate >= 0)
-			info->control.rts_cts_rate_idx = baserate;
-		else
-			info->control.rts_cts_rate_idx = 0;
+		rate = &sband->bitrates[info->control.rates[i].idx];
+
+		/* set up short preamble */
+		if (short_preamble &&
+		    rate->flags & IEEE80211_RATE_SHORT_PREAMBLE)
+			info->control.rates[i].flags |=
+				IEEE80211_TX_RC_USE_SHORT_PREAMBLE;
+
+		/* set up G protection */
+		if (!rts && tx->sdata->vif.bss_conf.use_cts_prot &&
+		    rate->flags & IEEE80211_RATE_ERP_G)
+			info->control.rates[i].flags |=
+				IEEE80211_TX_RC_USE_CTS_PROTECT;
 	}
 
+	return TX_CONTINUE;
+}
+
+static ieee80211_tx_result debug_noinline
+ieee80211_tx_h_misc(struct ieee80211_tx_data *tx)
+{
+	struct ieee80211_tx_info *info = IEEE80211_SKB_CB(tx->skb);
+
 	if (tx->sta)
 		info->control.sta = &tx->sta->sta;
 
@@ -678,6 +699,7 @@ ieee80211_tx_h_fragment(struct ieee80211_tx_data *tx)
 	left = payload_len - per_fragm;
 	for (i = 0; i < num_fragm - 1; i++) {
 		struct ieee80211_hdr *fhdr;
+		struct ieee80211_tx_info *info;
 		size_t copylen;
 
 		if (left <= 0)
@@ -692,20 +714,45 @@ ieee80211_tx_h_fragment(struct ieee80211_tx_data *tx)
 				      IEEE80211_ENCRYPT_TAILROOM);
 		if (!frag)
 			goto fail;
+
 		/* Make sure that all fragments use the same priority so
 		 * that they end up using the same TX queue */
 		frag->priority = first->priority;
+
 		skb_reserve(frag, tx->local->tx_headroom +
 				  IEEE80211_ENCRYPT_HEADROOM);
+
+		/* copy TX information */
+		info = IEEE80211_SKB_CB(frag);
+		memcpy(info, first->cb, sizeof(frag->cb));
+
+		/* copy/fill in 802.11 header */
 		fhdr = (struct ieee80211_hdr *) skb_put(frag, hdrlen);
 		memcpy(fhdr, first->data, hdrlen);
-		if (i == num_fragm - 2)
-			fhdr->frame_control &= cpu_to_le16(~IEEE80211_FCTL_MOREFRAGS);
 		fhdr->seq_ctrl = cpu_to_le16(seq | ((i + 1) & IEEE80211_SCTL_FRAG));
+
+		if (i == num_fragm - 2) {
+			/* clear MOREFRAGS bit for the last fragment */
+			fhdr->frame_control &= cpu_to_le16(~IEEE80211_FCTL_MOREFRAGS);
+		} else {
+			/*
+			 * No multi-rate retries for fragmented frames, that
+			 * would completely throw off the NAV at other STAs.
+			 */
+			info->control.rates[1].idx = -1;
+			info->control.rates[2].idx = -1;
+			info->control.rates[3].idx = -1;
+			info->control.rates[4].idx = -1;
+			BUILD_BUG_ON(IEEE80211_TX_MAX_RATES != 5);
+			info->flags &= ~IEEE80211_TX_CTL_RATE_CTRL_PROBE;
+		}
+
+		/* copy data */
 		copylen = left > per_fragm ? per_fragm : left;
 		memcpy(skb_put(frag, copylen), pos, copylen);
-		memcpy(frag->cb, first->cb, sizeof(frag->cb));
+
 		skb_copy_queue_mapping(frag, first);
+
 		frag->do_not_encrypt = first->do_not_encrypt;
 
 		pos += copylen;
@@ -765,12 +812,10 @@ ieee80211_tx_h_calculate_duration(struct ieee80211_tx_data *tx)
 					      tx->extra_frag[0]->len);
 
 	for (i = 0; i < tx->num_extra_frag; i++) {
-		if (i + 1 < tx->num_extra_frag) {
+		if (i + 1 < tx->num_extra_frag)
 			next_len = tx->extra_frag[i + 1]->len;
-		} else {
+		else
 			next_len = 0;
-			tx->rate_idx = tx->last_frag_rate_idx;
-		}
 
 		hdr = (struct ieee80211_hdr *)tx->extra_frag[i]->data;
 		hdr->duration_id = ieee80211_duration(tx, 0, next_len);
@@ -823,7 +868,6 @@ __ieee80211_parse_tx_radiotap(struct ieee80211_tx_data *tx,
 		(struct ieee80211_radiotap_header *) skb->data;
 	struct ieee80211_supported_band *sband;
 	int ret = ieee80211_radiotap_iterator_init(&iterator, rthdr, skb->len);
-	struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb);
 
 	sband = tx->local->hw.wiphy->bands[tx->channel->band];
 
@@ -837,8 +881,6 @@ __ieee80211_parse_tx_radiotap(struct ieee80211_tx_data *tx,
 	 */
 
 	while (!ret) {
-		int i, target_rate;
-
 		ret = ieee80211_radiotap_iterator_next(&iterator);
 
 		if (ret)
@@ -852,38 +894,6 @@ __ieee80211_parse_tx_radiotap(struct ieee80211_tx_data *tx,
 		 * get_unaligned((type *)iterator.this_arg) to dereference
 		 * iterator.this_arg for type "type" safely on all arches.
 		*/
-		case IEEE80211_RADIOTAP_RATE:
-			/*
-			 * radiotap rate u8 is in 500kbps units eg, 0x02=1Mbps
-			 * ieee80211 rate int is in 100kbps units eg, 0x0a=1Mbps
-			 */
-			target_rate = (*iterator.this_arg) * 5;
-			for (i = 0; i < sband->n_bitrates; i++) {
-				struct ieee80211_rate *r;
-
-				r = &sband->bitrates[i];
-
-				if (r->bitrate == target_rate) {
-					tx->rate_idx = i;
-					break;
-				}
-			}
-			break;
-
-		case IEEE80211_RADIOTAP_ANTENNA:
-			/*
-			 * radiotap uses 0 for 1st ant, mac80211 is 1 for
-			 * 1st ant
-			 */
-			info->antenna_sel_tx = (*iterator.this_arg) + 1;
-			break;
-
-#if 0
-		case IEEE80211_RADIOTAP_DBM_TX_POWER:
-			control->power_level = *iterator.this_arg;
-			break;
-#endif
-
 		case IEEE80211_RADIOTAP_FLAGS:
 			if (*iterator.this_arg & IEEE80211_RADIOTAP_F_FCS) {
 				/*
@@ -949,8 +959,6 @@ __ieee80211_tx_prepare(struct ieee80211_tx_data *tx,
 	tx->local = local;
 	tx->sdata = IEEE80211_DEV_TO_SUB_IF(dev);
 	tx->channel = local->hw.conf.channel;
-	tx->rate_idx = -1;
-	tx->last_frag_rate_idx = -1;
 	/*
 	 * Set this flag (used below to indicate "automatic fragmentation"),
 	 * it will be cleared/left by radiotap as desired.
@@ -1051,23 +1059,11 @@ static int __ieee80211_tx(struct ieee80211_local *local, struct sk_buff *skb,
 			if (!tx->extra_frag[i])
 				continue;
 			info = IEEE80211_SKB_CB(tx->extra_frag[i]);
-			info->flags &= ~(IEEE80211_TX_CTL_USE_RTS_CTS |
-					 IEEE80211_TX_CTL_USE_CTS_PROTECT |
-					 IEEE80211_TX_CTL_CLEAR_PS_FILT |
+			info->flags &= ~(IEEE80211_TX_CTL_CLEAR_PS_FILT |
 					 IEEE80211_TX_CTL_FIRST_FRAGMENT);
 			if (netif_subqueue_stopped(local->mdev,
 						   tx->extra_frag[i]))
 				return IEEE80211_TX_FRAG_AGAIN;
-			if (i == tx->num_extra_frag) {
-				info->tx_rate_idx = tx->last_frag_rate_idx;
-
-				if (tx->flags & IEEE80211_TX_PROBE_LAST_FRAG)
-					info->flags |=
-						IEEE80211_TX_CTL_RATE_CTRL_PROBE;
-				else
-					info->flags &=
-						~IEEE80211_TX_CTL_RATE_CTRL_PROBE;
-			}
 
 			ret = local->ops->tx(local_to_hw(local),
 					    tx->extra_frag[i]);
@@ -1204,9 +1200,6 @@ retry:
 		store->skb = skb;
 		store->extra_frag = tx.extra_frag;
 		store->num_extra_frag = tx.num_extra_frag;
-		store->last_frag_rate_idx = tx.last_frag_rate_idx;
-		store->last_frag_rate_ctrl_probe =
-			!!(tx.flags & IEEE80211_TX_PROBE_LAST_FRAG);
 	}
  out:
 	rcu_read_unlock();
@@ -1763,10 +1756,7 @@ void ieee80211_tx_pending(unsigned long data)
 		store = &local->pending_packet[i];
 		tx.extra_frag = store->extra_frag;
 		tx.num_extra_frag = store->num_extra_frag;
-		tx.last_frag_rate_idx = store->last_frag_rate_idx;
 		tx.flags = 0;
-		if (store->last_frag_rate_ctrl_probe)
-			tx.flags |= IEEE80211_TX_PROBE_LAST_FRAG;
 		ret = __ieee80211_tx(local, store->skb, &tx);
 		if (ret) {
 			if (ret == IEEE80211_TX_FRAG_AGAIN)
@@ -1854,7 +1844,6 @@ struct sk_buff *ieee80211_beacon_get(struct ieee80211_hw *hw,
 	struct ieee80211_sub_if_data *sdata = NULL;
 	struct ieee80211_if_ap *ap = NULL;
 	struct ieee80211_if_sta *ifsta = NULL;
-	struct rate_selection rsel;
 	struct beacon_data *beacon;
 	struct ieee80211_supported_band *sband;
 	enum ieee80211_band band = local->hw.conf.channel->band;
@@ -1958,32 +1947,23 @@ struct sk_buff *ieee80211_beacon_get(struct ieee80211_hw *hw,
 	skb->do_not_encrypt = 1;
 
 	info->band = band;
-	rate_control_get_rate(sdata, sband, NULL, skb, &rsel);
-
-	if (unlikely(rsel.rate_idx < 0)) {
-		if (net_ratelimit()) {
-			printk(KERN_DEBUG "%s: ieee80211_beacon_get: "
-			       "no rate found\n",
-			       wiphy_name(local->hw.wiphy));
-		}
-		dev_kfree_skb_any(skb);
-		skb = NULL;
-		goto out;
-	}
+	/*
+	 * XXX: For now, always use the lowest rate
+	 */
+	info->control.rates[0].idx = 0;
+	info->control.rates[0].count = 1;
+	info->control.rates[1].idx = -1;
+	info->control.rates[2].idx = -1;
+	info->control.rates[3].idx = -1;
+	info->control.rates[4].idx = -1;
+	BUILD_BUG_ON(IEEE80211_TX_MAX_RATES != 5);
 
 	info->control.vif = vif;
-	info->tx_rate_idx = rsel.rate_idx;
 
 	info->flags |= IEEE80211_TX_CTL_NO_ACK;
 	info->flags |= IEEE80211_TX_CTL_CLEAR_PS_FILT;
 	info->flags |= IEEE80211_TX_CTL_ASSIGN_SEQ;
-	if (sdata->vif.bss_conf.use_short_preamble &&
-	    sband->bitrates[rsel.rate_idx].flags & IEEE80211_RATE_SHORT_PREAMBLE)
-		info->flags |= IEEE80211_TX_CTL_SHORT_PREAMBLE;
-
-	info->control.retry_limit = 1;
-
-out:
+ out:
 	rcu_read_unlock();
 	return skb;
 }
diff --git a/net/mac80211/wext.c b/net/mac80211/wext.c
index f7e442f80a17..31d2e74a1bc0 100644
--- a/net/mac80211/wext.c
+++ b/net/mac80211/wext.c
@@ -636,8 +636,8 @@ static int ieee80211_ioctl_giwrate(struct net_device *dev,
 
 	sta = sta_info_get(local, sdata->u.sta.bssid);
 
-	if (sta && sta->last_txrate_idx < sband->n_bitrates)
-		rate->value = sband->bitrates[sta->last_txrate_idx].bitrate;
+	if (sta && !(sta->last_tx_rate.flags & IEEE80211_TX_RC_MCS))
+		rate->value = sband->bitrates[sta->last_tx_rate.idx].bitrate;
 	else
 		rate->value = 0;
 
-- 
cgit v1.2.3


From 50fb2e4572141770380f5919793c6e575fa3474b Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes@sipsolutions.net>
Date: Thu, 16 Oct 2008 11:21:49 +0200
Subject: mac80211: remove rate_control_clear

"Clearing" the rate control algorithm is pointless, none of
the algorithms actually uses this operation and it's not even
invoked properly for all channel switching. Also, there's no
need to since rate control algorithms work per station.

Signed-off-by: Johannes Berg <johannes@sipsolutions.net>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 drivers/net/wireless/ath9k/rc.c            |  6 ------
 drivers/net/wireless/iwlwifi/iwl-3945-rs.c |  7 -------
 drivers/net/wireless/iwlwifi/iwl-agn-rs.c  | 14 --------------
 include/net/mac80211.h                     |  1 -
 net/mac80211/rate.h                        |  6 ------
 net/mac80211/rc80211_minstrel.c            |  6 ------
 net/mac80211/rc80211_pid_algo.c            |  5 -----
 net/mac80211/util.c                        |  2 --
 8 files changed, 47 deletions(-)

(limited to 'include')

diff --git a/drivers/net/wireless/ath9k/rc.c b/drivers/net/wireless/ath9k/rc.c
index 6afafeddeda2..9ce535915a1a 100644
--- a/drivers/net/wireless/ath9k/rc.c
+++ b/drivers/net/wireless/ath9k/rc.c
@@ -2038,11 +2038,6 @@ static void ath_rate_init(void *priv, struct ieee80211_supported_band *sband,
 	ath_rc_node_update(sc->hw, priv_sta);
 }
 
-static void ath_rate_clear(void *priv)
-{
-	return;
-}
-
 static void *ath_rate_alloc(struct ieee80211_hw *hw, struct dentry *debugfsdir)
 {
 	struct ath_softc *sc = hw->priv;
@@ -2092,7 +2087,6 @@ static struct rate_control_ops ath_rate_ops = {
 	.tx_status = ath_tx_status,
 	.get_rate = ath_get_rate,
 	.rate_init = ath_rate_init,
-	.clear = ath_rate_clear,
 	.alloc = ath_rate_alloc,
 	.free = ath_rate_free,
 	.alloc_sta = ath_rate_alloc_sta,
diff --git a/drivers/net/wireless/iwlwifi/iwl-3945-rs.c b/drivers/net/wireless/iwlwifi/iwl-3945-rs.c
index f440ed0fe543..bfeef701b1fd 100644
--- a/drivers/net/wireless/iwlwifi/iwl-3945-rs.c
+++ b/drivers/net/wireless/iwlwifi/iwl-3945-rs.c
@@ -355,12 +355,6 @@ static void rs_free(void *priv)
 	return;
 }
 
-static void rs_clear(void *priv)
-{
-	return;
-}
-
-
 static void *rs_alloc_sta(void *priv, struct ieee80211_sta *sta, gfp_t gfp)
 {
 	struct iwl3945_rs_sta *rs_sta;
@@ -784,7 +778,6 @@ static struct rate_control_ops rs_ops = {
 	.tx_status = rs_tx_status,
 	.get_rate = rs_get_rate,
 	.rate_init = rs_rate_init,
-	.clear = rs_clear,
 	.alloc = rs_alloc,
 	.free = rs_free,
 	.alloc_sta = rs_alloc_sta,
diff --git a/drivers/net/wireless/iwlwifi/iwl-agn-rs.c b/drivers/net/wireless/iwlwifi/iwl-agn-rs.c
index f685e5d6c281..6fdb2fb755b0 100644
--- a/drivers/net/wireless/iwlwifi/iwl-agn-rs.c
+++ b/drivers/net/wireless/iwlwifi/iwl-agn-rs.c
@@ -2393,19 +2393,6 @@ static void rs_free(void *priv_rate)
 	return;
 }
 
-static void rs_clear(void *priv_rate)
-{
-#ifdef CONFIG_IWLWIFI_DEBUG
-	struct iwl_priv *priv = (struct iwl_priv *) priv_rate;
-
-	IWL_DEBUG_RATE("enter\n");
-
-	/* TODO - add rate scale state reset */
-
-	IWL_DEBUG_RATE("leave\n");
-#endif /* CONFIG_IWLWIFI_DEBUG */
-}
-
 static void rs_free_sta(void *priv_r, struct ieee80211_sta *sta,
 			void *priv_sta)
 {
@@ -2593,7 +2580,6 @@ static struct rate_control_ops rs_ops = {
 	.tx_status = rs_tx_status,
 	.get_rate = rs_get_rate,
 	.rate_init = rs_rate_init,
-	.clear = rs_clear,
 	.alloc = rs_alloc,
 	.free = rs_free,
 	.alloc_sta = rs_alloc_sta,
diff --git a/include/net/mac80211.h b/include/net/mac80211.h
index 3741c0a7978a..e0b1ff9a3148 100644
--- a/include/net/mac80211.h
+++ b/include/net/mac80211.h
@@ -1917,7 +1917,6 @@ struct rate_control_ops {
 	struct module *module;
 	const char *name;
 	void *(*alloc)(struct ieee80211_hw *hw, struct dentry *debugfsdir);
-	void (*clear)(void *priv);
 	void (*free)(void *priv);
 
 	void *(*alloc_sta)(void *priv, struct ieee80211_sta *sta, gfp_t gfp);
diff --git a/net/mac80211/rate.h b/net/mac80211/rate.h
index 7c25edf9ac55..928da625e281 100644
--- a/net/mac80211/rate.h
+++ b/net/mac80211/rate.h
@@ -63,12 +63,6 @@ static inline void rate_control_rate_init(struct sta_info *sta)
 }
 
 
-static inline void rate_control_clear(struct ieee80211_local *local)
-{
-	struct rate_control_ref *ref = local->rate_ctrl;
-	ref->ops->clear(ref->priv);
-}
-
 static inline void *rate_control_alloc_sta(struct rate_control_ref *ref,
 					   struct ieee80211_sta *sta,
 					   gfp_t gfp)
diff --git a/net/mac80211/rc80211_minstrel.c b/net/mac80211/rc80211_minstrel.c
index c10debc29ad6..c643e373fc50 100644
--- a/net/mac80211/rc80211_minstrel.c
+++ b/net/mac80211/rc80211_minstrel.c
@@ -507,11 +507,6 @@ minstrel_free_sta(void *priv, struct ieee80211_sta *sta, void *priv_sta)
 	kfree(mi);
 }
 
-static void
-minstrel_clear(void *priv)
-{
-}
-
 static void *
 minstrel_alloc(struct ieee80211_hw *hw, struct dentry *debugfsdir)
 {
@@ -565,7 +560,6 @@ static struct rate_control_ops mac80211_minstrel = {
 	.tx_status = minstrel_tx_status,
 	.get_rate = minstrel_get_rate,
 	.rate_init = minstrel_rate_init,
-	.clear = minstrel_clear,
 	.alloc = minstrel_alloc,
 	.free = minstrel_free,
 	.alloc_sta = minstrel_alloc_sta,
diff --git a/net/mac80211/rc80211_pid_algo.c b/net/mac80211/rc80211_pid_algo.c
index 92caecfcee78..2328ba568039 100644
--- a/net/mac80211/rc80211_pid_algo.c
+++ b/net/mac80211/rc80211_pid_algo.c
@@ -446,10 +446,6 @@ static void rate_control_pid_free(void *priv)
 	kfree(pinfo);
 }
 
-static void rate_control_pid_clear(void *priv)
-{
-}
-
 static void *rate_control_pid_alloc_sta(void *priv, struct ieee80211_sta *sta,
 					gfp_t gfp)
 {
@@ -480,7 +476,6 @@ static struct rate_control_ops mac80211_rcpid = {
 	.tx_status = rate_control_pid_tx_status,
 	.get_rate = rate_control_pid_get_rate,
 	.rate_init = rate_control_pid_rate_init,
-	.clear = rate_control_pid_clear,
 	.alloc = rate_control_pid_alloc,
 	.free = rate_control_pid_free,
 	.alloc_sta = rate_control_pid_alloc_sta,
diff --git a/net/mac80211/util.c b/net/mac80211/util.c
index ec8b6335f0c1..0f841317c7e9 100644
--- a/net/mac80211/util.c
+++ b/net/mac80211/util.c
@@ -647,8 +647,6 @@ int ieee80211_set_freq(struct ieee80211_sub_if_data *sdata, int freqMHz)
 		else
 			ret = ieee80211_hw_config(
 				local, IEEE80211_CONF_CHANGE_CHANNEL);
-
-		rate_control_clear(local);
 	}
 
 	return ret;
-- 
cgit v1.2.3


From 93da9cc17c5ae8a751886fd4732db89ad5e9bdb9 Mon Sep 17 00:00:00 2001
From: "colin@cozybit.com" <colin@cozybit.com>
Date: Tue, 21 Oct 2008 12:03:48 -0700
Subject: Add nl80211 commands to get and set o11s mesh networking parameters

The two new commands are NL80211_CMD_GET_MESH_PARAMS and
NL80211_CMD_SET_MESH_PARAMS. There is a new attribute enum,
NL80211_ATTR_MESH_PARAMS, which enumerates the various mesh configuration
parameters.

Moved struct mesh_config from mac80211/ieee80211_i.h to net/cfg80211.h.
nl80211_get_mesh_params and nl80211_set_mesh_params unpack the netlink messages
and ask the driver to get or set the configuration.  This is done via two new
function stubs, get_mesh_params and set_mesh_params, in struct cfg80211_ops.

Signed-off-by: Colin McCabe <colin@cozybit.com>
Acked-by: Johannes Berg <johannes@sipsolutions.net>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 include/linux/nl80211.h    |  86 ++++++++++++++++++++
 include/net/cfg80211.h     |  32 +++++++-
 net/mac80211/cfg.c         |  68 ++++++++++++++++
 net/mac80211/ieee80211_i.h |  21 +----
 net/wireless/nl80211.c     | 191 +++++++++++++++++++++++++++++++++++++++++++++
 5 files changed, 377 insertions(+), 21 deletions(-)

(limited to 'include')

diff --git a/include/linux/nl80211.h b/include/linux/nl80211.h
index 41720d47d618..e4cc7869b22f 100644
--- a/include/linux/nl80211.h
+++ b/include/linux/nl80211.h
@@ -106,6 +106,12 @@
  * 	to the the specified ISO/IEC 3166-1 alpha2 country code. The core will
  * 	store this as a valid request and then query userspace for it.
  *
+ * @NL80211_CMD_GET_MESH_PARAMS: Get mesh networking properties for the
+ *	interface identified by %NL80211_ATTR_IFINDEX
+ *
+ * @NL80211_CMD_SET_MESH_PARAMS: Set mesh networking properties for the
+ *      interface identified by %NL80211_ATTR_IFINDEX
+ *
  * @NL80211_CMD_MAX: highest used command number
  * @__NL80211_CMD_AFTER_LAST: internal use
  */
@@ -148,6 +154,9 @@ enum nl80211_commands {
 	NL80211_CMD_SET_REG,
 	NL80211_CMD_REQ_SET_REG,
 
+	NL80211_CMD_GET_MESH_PARAMS,
+	NL80211_CMD_SET_MESH_PARAMS,
+
 	/* add new commands above here */
 
 	/* used to define NL80211_CMD_MAX below */
@@ -296,6 +305,8 @@ enum nl80211_attrs {
 	NL80211_ATTR_REG_ALPHA2,
 	NL80211_ATTR_REG_RULES,
 
+	NL80211_ATTR_MESH_PARAMS,
+
 	/* add attributes here, update the policy in nl80211.c */
 
 	__NL80211_ATTR_AFTER_LAST,
@@ -606,4 +617,79 @@ enum nl80211_mntr_flags {
 	NL80211_MNTR_FLAG_MAX = __NL80211_MNTR_FLAG_AFTER_LAST - 1
 };
 
+/**
+ * enum nl80211_meshconf_params - mesh configuration parameters
+ *
+ * Mesh configuration parameters
+ *
+ * @__NL80211_MESHCONF_INVALID: internal use
+ *
+ * @NL80211_MESHCONF_RETRY_TIMEOUT: specifies the initial retry timeout in
+ * millisecond units, used by the Peer Link Open message
+ *
+ * @NL80211_MESHCONF_CONFIRM_TIMEOUT: specifies the inital confirm timeout, in
+ * millisecond units, used by the peer link management to close a peer link
+ *
+ * @NL80211_MESHCONF_HOLDING_TIMEOUT: specifies the holding timeout, in
+ * millisecond units
+ *
+ * @NL80211_MESHCONF_MAX_PEER_LINKS: maximum number of peer links allowed
+ * on this mesh interface
+ *
+ * @NL80211_MESHCONF_MAX_RETRIES: specifies the maximum number of peer link
+ * open retries that can be sent to establish a new peer link instance in a
+ * mesh
+ *
+ * @NL80211_MESHCONF_TTL: specifies the value of TTL field set at a source mesh
+ * point.
+ *
+ * @NL80211_MESHCONF_AUTO_OPEN_PLINKS: whether we should automatically
+ * open peer links when we detect compatible mesh peers.
+ *
+ * @NL80211_MESHCONF_HWMP_MAX_PREQ_RETRIES: the number of action frames
+ * containing a PREQ that an MP can send to a particular destination (path
+ * target)
+ *
+ * @NL80211_MESHCONF_PATH_REFRESH_TIME: how frequently to refresh mesh paths
+ * (in milliseconds)
+ *
+ * @NL80211_MESHCONF_MIN_DISCOVERY_TIMEOUT: minimum length of time to wait
+ * until giving up on a path discovery (in milliseconds)
+ *
+ * @NL80211_MESHCONF_HWMP_ACTIVE_PATH_TIMEOUT: The time (in TUs) for which mesh
+ * points receiving a PREQ shall consider the forwarding information from the
+ * root to be valid. (TU = time unit)
+ *
+ * @NL80211_MESHCONF_HWMP_PREQ_MIN_INTERVAL: The minimum interval of time (in
+ * TUs) during which an MP can send only one action frame containing a PREQ
+ * reference element
+ *
+ * @NL80211_MESHCONF_HWMP_NET_DIAM_TRVS_TIME: The interval of time (in TUs)
+ * that it takes for an HWMP information element to propagate across the mesh
+ *
+ * @NL80211_MESHCONF_ATTR_MAX: highest possible mesh configuration attribute
+ *
+ * @__NL80211_MESHCONF_ATTR_AFTER_LAST: internal use
+ */
+enum nl80211_meshconf_params {
+	__NL80211_MESHCONF_INVALID,
+	NL80211_MESHCONF_RETRY_TIMEOUT,
+	NL80211_MESHCONF_CONFIRM_TIMEOUT,
+	NL80211_MESHCONF_HOLDING_TIMEOUT,
+	NL80211_MESHCONF_MAX_PEER_LINKS,
+	NL80211_MESHCONF_MAX_RETRIES,
+	NL80211_MESHCONF_TTL,
+	NL80211_MESHCONF_AUTO_OPEN_PLINKS,
+	NL80211_MESHCONF_HWMP_MAX_PREQ_RETRIES,
+	NL80211_MESHCONF_PATH_REFRESH_TIME,
+	NL80211_MESHCONF_MIN_DISCOVERY_TIMEOUT,
+	NL80211_MESHCONF_HWMP_ACTIVE_PATH_TIMEOUT,
+	NL80211_MESHCONF_HWMP_PREQ_MIN_INTERVAL,
+	NL80211_MESHCONF_HWMP_NET_DIAM_TRVS_TIME,
+
+	/* keep last */
+	__NL80211_MESHCONF_ATTR_AFTER_LAST,
+	NL80211_MESHCONF_ATTR_MAX = __NL80211_MESHCONF_ATTR_AFTER_LAST - 1
+};
+
 #endif /* __LINUX_NL80211_H */
diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h
index 0e85ec39b638..03e1e88c6a09 100644
--- a/include/net/cfg80211.h
+++ b/include/net/cfg80211.h
@@ -347,6 +347,25 @@ struct ieee80211_regdomain {
 	.flags = reg_flags, \
 	}
 
+struct mesh_config {
+	/* Timeouts in ms */
+	/* Mesh plink management parameters */
+	u16 dot11MeshRetryTimeout;
+	u16 dot11MeshConfirmTimeout;
+	u16 dot11MeshHoldingTimeout;
+	u16 dot11MeshMaxPeerLinks;
+	u8  dot11MeshMaxRetries;
+	u8  dot11MeshTTL;
+	bool auto_open_plinks;
+	/* HWMP parameters */
+	u8  dot11MeshHWMPmaxPREQretries;
+	u32 path_refresh_time;
+	u16 min_discovery_timeout;
+	u32 dot11MeshHWMPactivePathTimeout;
+	u16 dot11MeshHWMPpreqMinInterval;
+	u16 dot11MeshHWMPnetDiameterTraversalTime;
+};
+
 /* from net/wireless.h */
 struct wiphy;
 
@@ -397,6 +416,12 @@ struct wiphy;
  *
  * @change_station: Modify a given station.
  *
+ * @get_mesh_params: Put the current mesh parameters into *params
+ *
+ * @set_mesh_params: Set mesh parameters.
+ *	The mask is a bitfield which tells us which parameters to
+ *	set, and which to leave alone.
+ *
  * @set_mesh_cfg: set mesh parameters (by now, just mesh id)
  *
  * @change_bss: Modify parameters for a given BSS.
@@ -452,7 +477,12 @@ struct cfg80211_ops {
 	int	(*dump_mpath)(struct wiphy *wiphy, struct net_device *dev,
 			       int idx, u8 *dst, u8 *next_hop,
 			       struct mpath_info *pinfo);
-
+	int	(*get_mesh_params)(struct wiphy *wiphy,
+				struct net_device *dev,
+				struct mesh_config *conf);
+	int	(*set_mesh_params)(struct wiphy *wiphy,
+				struct net_device *dev,
+				const struct mesh_config *nconf, u32 mask);
 	int	(*change_bss)(struct wiphy *wiphy, struct net_device *dev,
 			      struct bss_parameters *params);
 };
diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c
index 55e3a26510ed..91f56a48e2b4 100644
--- a/net/mac80211/cfg.c
+++ b/net/mac80211/cfg.c
@@ -951,6 +951,72 @@ static int ieee80211_dump_mpath(struct wiphy *wiphy, struct net_device *dev,
 	rcu_read_unlock();
 	return 0;
 }
+
+static int ieee80211_get_mesh_params(struct wiphy *wiphy,
+				struct net_device *dev,
+				struct mesh_config *conf)
+{
+	struct ieee80211_sub_if_data *sdata;
+	sdata = IEEE80211_DEV_TO_SUB_IF(dev);
+
+	if (sdata->vif.type != NL80211_IFTYPE_MESH_POINT)
+		return -ENOTSUPP;
+	memcpy(conf, &(sdata->u.mesh.mshcfg), sizeof(struct mesh_config));
+	return 0;
+}
+
+static inline bool _chg_mesh_attr(enum nl80211_meshconf_params parm, u32 mask)
+{
+	return (mask >> (parm-1)) & 0x1;
+}
+
+static int ieee80211_set_mesh_params(struct wiphy *wiphy,
+				struct net_device *dev,
+				const struct mesh_config *nconf, u32 mask)
+{
+	struct mesh_config *conf;
+	struct ieee80211_sub_if_data *sdata;
+	sdata = IEEE80211_DEV_TO_SUB_IF(dev);
+
+	if (sdata->vif.type != NL80211_IFTYPE_MESH_POINT)
+		return -ENOTSUPP;
+
+	/* Set the config options which we are interested in setting */
+	conf = &(sdata->u.mesh.mshcfg);
+	if (_chg_mesh_attr(NL80211_MESHCONF_RETRY_TIMEOUT, mask))
+		conf->dot11MeshRetryTimeout = nconf->dot11MeshRetryTimeout;
+	if (_chg_mesh_attr(NL80211_MESHCONF_CONFIRM_TIMEOUT, mask))
+		conf->dot11MeshConfirmTimeout = nconf->dot11MeshConfirmTimeout;
+	if (_chg_mesh_attr(NL80211_MESHCONF_HOLDING_TIMEOUT, mask))
+		conf->dot11MeshHoldingTimeout = nconf->dot11MeshHoldingTimeout;
+	if (_chg_mesh_attr(NL80211_MESHCONF_MAX_PEER_LINKS, mask))
+		conf->dot11MeshMaxPeerLinks = nconf->dot11MeshMaxPeerLinks;
+	if (_chg_mesh_attr(NL80211_MESHCONF_MAX_RETRIES, mask))
+		conf->dot11MeshMaxRetries = nconf->dot11MeshMaxRetries;
+	if (_chg_mesh_attr(NL80211_MESHCONF_TTL, mask))
+		conf->dot11MeshTTL = nconf->dot11MeshTTL;
+	if (_chg_mesh_attr(NL80211_MESHCONF_AUTO_OPEN_PLINKS, mask))
+		conf->auto_open_plinks = nconf->auto_open_plinks;
+	if (_chg_mesh_attr(NL80211_MESHCONF_HWMP_MAX_PREQ_RETRIES, mask))
+		conf->dot11MeshHWMPmaxPREQretries =
+			nconf->dot11MeshHWMPmaxPREQretries;
+	if (_chg_mesh_attr(NL80211_MESHCONF_PATH_REFRESH_TIME, mask))
+		conf->path_refresh_time = nconf->path_refresh_time;
+	if (_chg_mesh_attr(NL80211_MESHCONF_MIN_DISCOVERY_TIMEOUT, mask))
+		conf->min_discovery_timeout = nconf->min_discovery_timeout;
+	if (_chg_mesh_attr(NL80211_MESHCONF_HWMP_ACTIVE_PATH_TIMEOUT, mask))
+		conf->dot11MeshHWMPactivePathTimeout =
+			nconf->dot11MeshHWMPactivePathTimeout;
+	if (_chg_mesh_attr(NL80211_MESHCONF_HWMP_PREQ_MIN_INTERVAL, mask))
+		conf->dot11MeshHWMPpreqMinInterval =
+			nconf->dot11MeshHWMPpreqMinInterval;
+	if (_chg_mesh_attr(NL80211_MESHCONF_HWMP_NET_DIAM_TRVS_TIME,
+			   mask))
+		conf->dot11MeshHWMPnetDiameterTraversalTime =
+			nconf->dot11MeshHWMPnetDiameterTraversalTime;
+	return 0;
+}
+
 #endif
 
 static int ieee80211_change_bss(struct wiphy *wiphy,
@@ -1007,6 +1073,8 @@ struct cfg80211_ops mac80211_config_ops = {
 	.change_mpath = ieee80211_change_mpath,
 	.get_mpath = ieee80211_get_mpath,
 	.dump_mpath = ieee80211_dump_mpath,
+	.set_mesh_params = ieee80211_set_mesh_params,
+	.get_mesh_params = ieee80211_get_mesh_params,
 #endif
 	.change_bss = ieee80211_change_bss,
 };
diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h
index fe4efdd4253d..2c91108e3901 100644
--- a/net/mac80211/ieee80211_i.h
+++ b/net/mac80211/ieee80211_i.h
@@ -23,6 +23,7 @@
 #include <linux/types.h>
 #include <linux/spinlock.h>
 #include <linux/etherdevice.h>
+#include <net/cfg80211.h>
 #include <net/wireless.h>
 #include <net/iw_handler.h>
 #include <net/mac80211.h>
@@ -247,26 +248,6 @@ struct mesh_preq_queue {
 	u8 flags;
 };
 
-struct mesh_config {
-	/* Timeouts in ms */
-	/* Mesh plink management parameters */
-	u16 dot11MeshRetryTimeout;
-	u16 dot11MeshConfirmTimeout;
-	u16 dot11MeshHoldingTimeout;
-	u16 dot11MeshMaxPeerLinks;
-	u8  dot11MeshMaxRetries;
-	u8  dot11MeshTTL;
-	bool auto_open_plinks;
-	/* HWMP parameters */
-	u8  dot11MeshHWMPmaxPREQretries;
-	u32 path_refresh_time;
-	u16 min_discovery_timeout;
-	u32 dot11MeshHWMPactivePathTimeout;
-	u16 dot11MeshHWMPpreqMinInterval;
-	u16 dot11MeshHWMPnetDiameterTraversalTime;
-};
-
-
 /* flags used in struct ieee80211_if_sta.flags */
 #define IEEE80211_STA_SSID_SET		BIT(0)
 #define IEEE80211_STA_BSSID_SET		BIT(1)
diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c
index 2b87aec231ea..9a16e9e6c5ca 100644
--- a/net/wireless/nl80211.c
+++ b/net/wireless/nl80211.c
@@ -96,6 +96,8 @@ static struct nla_policy nl80211_policy[NL80211_ATTR_MAX+1] __read_mostly = {
 	[NL80211_ATTR_BSS_SHORT_PREAMBLE] = { .type = NLA_U8 },
 	[NL80211_ATTR_BSS_SHORT_SLOT_TIME] = { .type = NLA_U8 },
 
+	[NL80211_ATTR_MESH_PARAMS] = { .type = NLA_NESTED },
+
 	[NL80211_ATTR_HT_CAPABILITY] = { .type = NLA_BINARY,
 					 .len = NL80211_HT_CAPABILITY_LEN },
 };
@@ -1698,6 +1700,183 @@ static int nl80211_req_set_reg(struct sk_buff *skb, struct genl_info *info)
 	return r;
 }
 
+static int nl80211_get_mesh_params(struct sk_buff *skb,
+	struct genl_info *info)
+{
+	struct cfg80211_registered_device *drv;
+	struct mesh_config cur_params;
+	int err;
+	struct net_device *dev;
+	void *hdr;
+	struct nlattr *pinfoattr;
+	struct sk_buff *msg;
+
+	/* Look up our device */
+	err = get_drv_dev_by_info_ifindex(info->attrs, &drv, &dev);
+	if (err)
+		return err;
+
+	/* Get the mesh params */
+	rtnl_lock();
+	err = drv->ops->get_mesh_params(&drv->wiphy, dev, &cur_params);
+	rtnl_unlock();
+	if (err)
+		goto out;
+
+	/* Draw up a netlink message to send back */
+	msg = nlmsg_new(NLMSG_GOODSIZE, GFP_KERNEL);
+	if (!msg) {
+		err = -ENOBUFS;
+		goto out;
+	}
+	hdr = nl80211hdr_put(msg, info->snd_pid, info->snd_seq, 0,
+			     NL80211_CMD_GET_MESH_PARAMS);
+	if (!hdr)
+		goto nla_put_failure;
+	pinfoattr = nla_nest_start(msg, NL80211_ATTR_MESH_PARAMS);
+	if (!pinfoattr)
+		goto nla_put_failure;
+	NLA_PUT_U32(msg, NL80211_ATTR_IFINDEX, dev->ifindex);
+	NLA_PUT_U16(msg, NL80211_MESHCONF_RETRY_TIMEOUT,
+			cur_params.dot11MeshRetryTimeout);
+	NLA_PUT_U16(msg, NL80211_MESHCONF_CONFIRM_TIMEOUT,
+			cur_params.dot11MeshConfirmTimeout);
+	NLA_PUT_U16(msg, NL80211_MESHCONF_HOLDING_TIMEOUT,
+			cur_params.dot11MeshHoldingTimeout);
+	NLA_PUT_U16(msg, NL80211_MESHCONF_MAX_PEER_LINKS,
+			cur_params.dot11MeshMaxPeerLinks);
+	NLA_PUT_U8(msg, NL80211_MESHCONF_MAX_RETRIES,
+			cur_params.dot11MeshMaxRetries);
+	NLA_PUT_U8(msg, NL80211_MESHCONF_TTL,
+			cur_params.dot11MeshTTL);
+	NLA_PUT_U8(msg, NL80211_MESHCONF_AUTO_OPEN_PLINKS,
+			cur_params.auto_open_plinks);
+	NLA_PUT_U8(msg, NL80211_MESHCONF_HWMP_MAX_PREQ_RETRIES,
+			cur_params.dot11MeshHWMPmaxPREQretries);
+	NLA_PUT_U32(msg, NL80211_MESHCONF_PATH_REFRESH_TIME,
+			cur_params.path_refresh_time);
+	NLA_PUT_U16(msg, NL80211_MESHCONF_MIN_DISCOVERY_TIMEOUT,
+			cur_params.min_discovery_timeout);
+	NLA_PUT_U32(msg, NL80211_MESHCONF_HWMP_ACTIVE_PATH_TIMEOUT,
+			cur_params.dot11MeshHWMPactivePathTimeout);
+	NLA_PUT_U16(msg, NL80211_MESHCONF_HWMP_PREQ_MIN_INTERVAL,
+			cur_params.dot11MeshHWMPpreqMinInterval);
+	NLA_PUT_U16(msg, NL80211_MESHCONF_HWMP_NET_DIAM_TRVS_TIME,
+			cur_params.dot11MeshHWMPnetDiameterTraversalTime);
+	nla_nest_end(msg, pinfoattr);
+	genlmsg_end(msg, hdr);
+	err = genlmsg_unicast(msg, info->snd_pid);
+	goto out;
+
+nla_put_failure:
+	genlmsg_cancel(msg, hdr);
+	err = -EMSGSIZE;
+out:
+	/* Cleanup */
+	cfg80211_put_dev(drv);
+	dev_put(dev);
+	return err;
+}
+
+#define FILL_IN_MESH_PARAM_IF_SET(table, cfg, param, mask, attr_num, nla_fn) \
+do {\
+	if (table[attr_num]) {\
+		cfg.param = nla_fn(table[attr_num]); \
+		mask |= (1 << (attr_num - 1)); \
+	} \
+} while (0);\
+
+static struct nla_policy
+nl80211_meshconf_params_policy[NL80211_MESHCONF_ATTR_MAX+1] __read_mostly = {
+	[NL80211_MESHCONF_RETRY_TIMEOUT] = { .type = NLA_U16 },
+	[NL80211_MESHCONF_CONFIRM_TIMEOUT] = { .type = NLA_U16 },
+	[NL80211_MESHCONF_HOLDING_TIMEOUT] = { .type = NLA_U16 },
+	[NL80211_MESHCONF_MAX_PEER_LINKS] = { .type = NLA_U16 },
+	[NL80211_MESHCONF_MAX_RETRIES] = { .type = NLA_U8 },
+	[NL80211_MESHCONF_TTL] = { .type = NLA_U8 },
+	[NL80211_MESHCONF_AUTO_OPEN_PLINKS] = { .type = NLA_U8 },
+
+	[NL80211_MESHCONF_HWMP_MAX_PREQ_RETRIES] = { .type = NLA_U8 },
+	[NL80211_MESHCONF_PATH_REFRESH_TIME] = { .type = NLA_U32 },
+	[NL80211_MESHCONF_MIN_DISCOVERY_TIMEOUT] = { .type = NLA_U16 },
+	[NL80211_MESHCONF_HWMP_ACTIVE_PATH_TIMEOUT] = { .type = NLA_U32 },
+	[NL80211_MESHCONF_HWMP_PREQ_MIN_INTERVAL] = { .type = NLA_U16 },
+	[NL80211_MESHCONF_HWMP_NET_DIAM_TRVS_TIME] = { .type = NLA_U16 },
+};
+
+static int nl80211_set_mesh_params(struct sk_buff *skb, struct genl_info *info)
+{
+	int err;
+	u32 mask;
+	struct cfg80211_registered_device *drv;
+	struct net_device *dev;
+	struct mesh_config cfg;
+	struct nlattr *tb[NL80211_MESHCONF_ATTR_MAX + 1];
+	struct nlattr *parent_attr;
+
+	parent_attr = info->attrs[NL80211_ATTR_MESH_PARAMS];
+	if (!parent_attr)
+		return -EINVAL;
+	if (nla_parse_nested(tb, NL80211_MESHCONF_ATTR_MAX,
+			parent_attr, nl80211_meshconf_params_policy))
+		return -EINVAL;
+
+	err = get_drv_dev_by_info_ifindex(info->attrs, &drv, &dev);
+	if (err)
+		return err;
+
+	/* This makes sure that there aren't more than 32 mesh config
+	 * parameters (otherwise our bitfield scheme would not work.) */
+	BUILD_BUG_ON(NL80211_MESHCONF_ATTR_MAX > 32);
+
+	/* Fill in the params struct */
+	mask = 0;
+	FILL_IN_MESH_PARAM_IF_SET(tb, cfg, dot11MeshRetryTimeout,
+			mask, NL80211_MESHCONF_RETRY_TIMEOUT, nla_get_u16);
+	FILL_IN_MESH_PARAM_IF_SET(tb, cfg, dot11MeshConfirmTimeout,
+			mask, NL80211_MESHCONF_CONFIRM_TIMEOUT, nla_get_u16);
+	FILL_IN_MESH_PARAM_IF_SET(tb, cfg, dot11MeshHoldingTimeout,
+			mask, NL80211_MESHCONF_HOLDING_TIMEOUT, nla_get_u16);
+	FILL_IN_MESH_PARAM_IF_SET(tb, cfg, dot11MeshMaxPeerLinks,
+			mask, NL80211_MESHCONF_MAX_PEER_LINKS, nla_get_u16);
+	FILL_IN_MESH_PARAM_IF_SET(tb, cfg, dot11MeshMaxRetries,
+			mask, NL80211_MESHCONF_MAX_RETRIES, nla_get_u8);
+	FILL_IN_MESH_PARAM_IF_SET(tb, cfg, dot11MeshTTL,
+			mask, NL80211_MESHCONF_TTL, nla_get_u8);
+	FILL_IN_MESH_PARAM_IF_SET(tb, cfg, auto_open_plinks,
+			mask, NL80211_MESHCONF_AUTO_OPEN_PLINKS, nla_get_u8);
+	FILL_IN_MESH_PARAM_IF_SET(tb, cfg, dot11MeshHWMPmaxPREQretries,
+			mask, NL80211_MESHCONF_HWMP_MAX_PREQ_RETRIES,
+			nla_get_u8);
+	FILL_IN_MESH_PARAM_IF_SET(tb, cfg, path_refresh_time,
+			mask, NL80211_MESHCONF_PATH_REFRESH_TIME, nla_get_u32);
+	FILL_IN_MESH_PARAM_IF_SET(tb, cfg, min_discovery_timeout,
+			mask, NL80211_MESHCONF_MIN_DISCOVERY_TIMEOUT,
+			nla_get_u16);
+	FILL_IN_MESH_PARAM_IF_SET(tb, cfg, dot11MeshHWMPactivePathTimeout,
+			mask, NL80211_MESHCONF_HWMP_ACTIVE_PATH_TIMEOUT,
+			nla_get_u32);
+	FILL_IN_MESH_PARAM_IF_SET(tb, cfg, dot11MeshHWMPpreqMinInterval,
+			mask, NL80211_MESHCONF_HWMP_PREQ_MIN_INTERVAL,
+			nla_get_u16);
+	FILL_IN_MESH_PARAM_IF_SET(tb, cfg,
+			dot11MeshHWMPnetDiameterTraversalTime,
+			mask, NL80211_MESHCONF_HWMP_NET_DIAM_TRVS_TIME,
+			nla_get_u16);
+
+	/* Apply changes */
+	rtnl_lock();
+	err = drv->ops->set_mesh_params(&drv->wiphy, dev, &cfg, mask);
+	rtnl_unlock();
+
+	/* cleanup */
+	cfg80211_put_dev(drv);
+	dev_put(dev);
+	return err;
+}
+
+#undef FILL_IN_MESH_PARAM_IF_SET
+
 static int nl80211_set_reg(struct sk_buff *skb, struct genl_info *info)
 {
 	struct nlattr *tb[NL80211_REG_RULE_ATTR_MAX + 1];
@@ -1915,6 +2094,18 @@ static struct genl_ops nl80211_ops[] = {
 		.policy = nl80211_policy,
 		.flags = GENL_ADMIN_PERM,
 	},
+	{
+		.cmd = NL80211_CMD_GET_MESH_PARAMS,
+		.doit = nl80211_get_mesh_params,
+		.policy = nl80211_policy,
+		/* can be retrieved by unprivileged users */
+	},
+	{
+		.cmd = NL80211_CMD_SET_MESH_PARAMS,
+		.doit = nl80211_set_mesh_params,
+		.policy = nl80211_policy,
+		.flags = GENL_ADMIN_PERM,
+	},
 };
 
 /* multicast groups */
-- 
cgit v1.2.3


From e37d4dffdffb7f834bd28d4ae8e3dcdf07fce508 Mon Sep 17 00:00:00 2001
From: Bob Copeland <me@bobcopeland.com>
Date: Mon, 20 Oct 2008 21:20:27 -0400
Subject: mac80211: fix a few typos in mac80211 kernel doc

Correct a handful of errors found while reading the mac80211 book.

Signed-off-by: Bob Copeland <me@bobcopeland.com>
Acked-by: Johannes Berg <johannes@sipsolutions.net>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 include/net/mac80211.h | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

(limited to 'include')

diff --git a/include/net/mac80211.h b/include/net/mac80211.h
index e0b1ff9a3148..16c895969e6e 100644
--- a/include/net/mac80211.h
+++ b/include/net/mac80211.h
@@ -99,7 +99,7 @@ enum ieee80211_max_queues {
  * The information provided in this structure is required for QoS
  * transmit queue configuration. Cf. IEEE 802.11 7.3.2.29.
  *
- * @aifs: arbitration interface space [0..255]
+ * @aifs: arbitration interframe space [0..255]
  * @cw_min: minimum contention window [a value of the form
  *	2^n-1 in the range 1..32767]
  * @cw_max: maximum contention window [like @cw_min]
@@ -950,7 +950,7 @@ static inline void SET_IEEE80211_DEV(struct ieee80211_hw *hw, struct device *dev
 }
 
 /**
- * SET_IEEE80211_PERM_ADDR - set the permanenet MAC address for 802.11 hardware
+ * SET_IEEE80211_PERM_ADDR - set the permanent MAC address for 802.11 hardware
  *
  * @hw: the &struct ieee80211_hw to set the MAC address for
  * @addr: the address to set
@@ -1043,7 +1043,7 @@ ieee80211_get_alt_retry_rate(const struct ieee80211_hw *hw,
  * This happens everytime the iv16 wraps around (every 65536 packets). The
  * set_key() call will happen only once for each key (unless the AP did
  * rekeying), it will not include a valid phase 1 key. The valid phase 1 key is
- * provided by udpate_tkip_key only. The trigger that makes mac80211 call this
+ * provided by update_tkip_key only. The trigger that makes mac80211 call this
  * handler is software decryption with wrap around of iv16.
  */
 
@@ -1177,7 +1177,7 @@ enum ieee80211_ampdu_mlme_action {
  *	Must be implemented.
  *
  * @add_interface: Called when a netdevice attached to the hardware is
- *	enabled. Because it is not called for monitor mode devices, @open
+ *	enabled. Because it is not called for monitor mode devices, @start
  *	and @stop must be implemented.
  *	The driver should perform any initialization it needs before
  *	the device can be enabled. The initial configuration for the
@@ -1244,7 +1244,7 @@ enum ieee80211_ampdu_mlme_action {
  *	the stack will not do fragmentation.
  *
  * @sta_notify: Notifies low level driver about addition or removal
- *	of assocaited station or AP.
+ *	of associated station or AP.
  *
  * @conf_tx: Configure TX queue parameters (EDCF (aifs, cw_min, cw_max),
  *	bursting) for a hardware TX queue.
@@ -1544,7 +1544,7 @@ void ieee80211_tx_status_irqsafe(struct ieee80211_hw *hw,
  * the next beacon frame from the 802.11 code. The low-level is responsible
  * for calling this function before beacon data is needed (e.g., based on
  * hardware interrupt). Returned skb is used only once and low-level driver
- * is responsible of freeing it.
+ * is responsible for freeing it.
  */
 struct sk_buff *ieee80211_beacon_get(struct ieee80211_hw *hw,
 				     struct ieee80211_vif *vif);
-- 
cgit v1.2.3


From cf03268e6ed6cfacaa5e32db41ea832c4d10438b Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes@sipsolutions.net>
Date: Tue, 21 Oct 2008 09:42:38 +0200
Subject: wireless: don't publish __regulatory_hint

This function requires an internal lock to be held, so it cannot
be published to other modules in the kernel.

Signed-off-by: Johannes Berg <johannes@sipsolutions.net>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 include/net/wireless.h | 37 +++++++++----------------------------
 net/wireless/reg.h     | 28 ++++++++++++++++++++++++++++
 2 files changed, 37 insertions(+), 28 deletions(-)

(limited to 'include')

diff --git a/include/net/wireless.h b/include/net/wireless.h
index c0aa0fb8db5e..061fe5017e5c 100644
--- a/include/net/wireless.h
+++ b/include/net/wireless.h
@@ -340,33 +340,6 @@ ieee80211_get_channel(struct wiphy *wiphy, int freq)
 	return __ieee80211_get_channel(wiphy, freq);
 }
 
-/**
- * __regulatory_hint - hint to the wireless core a regulatory domain
- * @wiphy: if a driver is providing the hint this is the driver's very
- * 	own &struct wiphy
- * @alpha2: the ISO/IEC 3166 alpha2 being claimed the regulatory domain
- * 	should be in. If @rd is set this should be NULL
- * @rd: a complete regulatory domain, if passed the caller need not worry
- * 	about freeing it
- *
- * The Wireless subsystem can use this function to hint to the wireless core
- * what it believes should be the current regulatory domain by
- * giving it an ISO/IEC 3166 alpha2 country code it knows its regulatory
- * domain should be in or by providing a completely build regulatory domain.
- *
- * Returns -EALREADY if *a regulatory domain* has already been set. Note that
- * this could be by another driver. It is safe for drivers to continue if
- * -EALREADY is returned, if drivers are not capable of world roaming they
- * should not register more channels than they support. Right now we only
- * support listening to the first driver hint. If the driver is capable
- * of world roaming but wants to respect its own EEPROM mappings for
- * specific regulatory domains it should register the @reg_notifier callback
- * on the &struct wiphy. Returns 0 if the hint went through fine or through an
- * intersection operation. Otherwise a standard error code is returned.
- *
- */
-extern int __regulatory_hint(struct wiphy *wiphy, enum reg_set_by set_by,
-		const char *alpha2, struct ieee80211_regdomain *rd);
 /**
  * regulatory_hint - driver hint to the wireless core a regulatory domain
  * @wiphy: the driver's very own &struct wiphy
@@ -388,7 +361,15 @@ extern int __regulatory_hint(struct wiphy *wiphy, enum reg_set_by set_by,
  * the wireless core it is unknown. If you pass a built regulatory domain
  * and we return non zero you are in charge of kfree()'ing the structure.
  *
- * See __regulatory_hint() documentation for possible return values.
+ * Returns -EALREADY if *a regulatory domain* has already been set. Note that
+ * this could be by another driver. It is safe for drivers to continue if
+ * -EALREADY is returned, if drivers are not capable of world roaming they
+ * should not register more channels than they support. Right now we only
+ * support listening to the first driver hint. If the driver is capable
+ * of world roaming but wants to respect its own EEPROM mappings for
+ * specific regulatory domains it should register the @reg_notifier callback
+ * on the &struct wiphy. Returns 0 if the hint went through fine or through an
+ * intersection operation. Otherwise a standard error code is returned.
  */
 extern int regulatory_hint(struct wiphy *wiphy,
 		const char *alpha2, struct ieee80211_regdomain *rd);
diff --git a/net/wireless/reg.h b/net/wireless/reg.h
index a33362872f3c..a4845b140a84 100644
--- a/net/wireless/reg.h
+++ b/net/wireless/reg.h
@@ -10,4 +10,32 @@ void regulatory_exit(void);
 
 int set_regdom(const struct ieee80211_regdomain *rd);
 
+/**
+ * __regulatory_hint - hint to the wireless core a regulatory domain
+ * @wiphy: if a driver is providing the hint this is the driver's very
+ * 	own &struct wiphy
+ * @alpha2: the ISO/IEC 3166 alpha2 being claimed the regulatory domain
+ * 	should be in. If @rd is set this should be NULL
+ * @rd: a complete regulatory domain, if passed the caller need not worry
+ * 	about freeing it
+ *
+ * The Wireless subsystem can use this function to hint to the wireless core
+ * what it believes should be the current regulatory domain by
+ * giving it an ISO/IEC 3166 alpha2 country code it knows its regulatory
+ * domain should be in or by providing a completely build regulatory domain.
+ *
+ * Returns -EALREADY if *a regulatory domain* has already been set. Note that
+ * this could be by another driver. It is safe for drivers to continue if
+ * -EALREADY is returned, if drivers are not capable of world roaming they
+ * should not register more channels than they support. Right now we only
+ * support listening to the first driver hint. If the driver is capable
+ * of world roaming but wants to respect its own EEPROM mappings for
+ * specific regulatory domains it should register the @reg_notifier callback
+ * on the &struct wiphy. Returns 0 if the hint went through fine or through an
+ * intersection operation. Otherwise a standard error code is returned.
+ *
+ */
+extern int __regulatory_hint(struct wiphy *wiphy, enum reg_set_by set_by,
+		const char *alpha2, struct ieee80211_regdomain *rd);
+
 #endif  /* __NET_WIRELESS_REG_H */
-- 
cgit v1.2.3


From 7e272fcff6f0a32a3d46e600ea5895f6058f4e2d Mon Sep 17 00:00:00 2001
From: "John W. Linville" <linville@tuxdriver.com>
Date: Wed, 24 Sep 2008 18:13:14 -0400
Subject: wireless: consolidate on a single escape_essid implementation

Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 drivers/net/wireless/Kconfig                |   1 +
 drivers/net/wireless/ipw2100.c              |  10 +--
 drivers/net/wireless/ipw2200.c              | 118 ++++++++++++++--------------
 drivers/net/wireless/ipw2200.h              |   1 +
 drivers/net/wireless/iwlwifi/Kconfig        |   2 +
 drivers/net/wireless/iwlwifi/iwl-scan.c     |  52 ++----------
 drivers/net/wireless/iwlwifi/iwl3945-base.c |  48 +----------
 drivers/net/wireless/libertas/assoc.c       |  14 ++--
 drivers/net/wireless/libertas/cmd.c         |   3 +-
 drivers/net/wireless/libertas/debugfs.c     |   3 +-
 drivers/net/wireless/libertas/decl.h        |   4 -
 drivers/net/wireless/libertas/main.c        |  27 -------
 drivers/net/wireless/libertas/scan.c        |  11 ++-
 drivers/net/wireless/libertas/wext.c        |   3 +-
 include/net/ieee80211.h                     |  20 -----
 include/net/lib80211.h                      |  31 ++++++++
 net/ieee80211/Kconfig                       |   1 +
 net/ieee80211/ieee80211_module.c            |  26 ------
 net/ieee80211/ieee80211_rx.c                |  31 ++++----
 net/ieee80211/ieee80211_wx.c                |   6 +-
 net/wireless/Kconfig                        |  10 +++
 net/wireless/Makefile                       |   1 +
 net/wireless/lib80211.c                     |  58 ++++++++++++++
 23 files changed, 219 insertions(+), 262 deletions(-)
 create mode 100644 include/net/lib80211.h
 create mode 100644 net/wireless/lib80211.c

(limited to 'include')

diff --git a/drivers/net/wireless/Kconfig b/drivers/net/wireless/Kconfig
index 45bdf0b339bb..42afaedbb219 100644
--- a/drivers/net/wireless/Kconfig
+++ b/drivers/net/wireless/Kconfig
@@ -271,6 +271,7 @@ config LIBERTAS
 	tristate "Marvell 8xxx Libertas WLAN driver support"
 	depends on WLAN_80211
 	select WIRELESS_EXT
+	select LIB80211
 	select FW_LOADER
 	---help---
 	  A library for Marvell Libertas 8xxx devices.
diff --git a/drivers/net/wireless/ipw2100.c b/drivers/net/wireless/ipw2100.c
index 079dbd07e2f6..223914e3e07e 100644
--- a/drivers/net/wireless/ipw2100.c
+++ b/drivers/net/wireless/ipw2100.c
@@ -1975,7 +1975,7 @@ static void isr_indicate_associated(struct ipw2100_priv *priv, u32 status)
 	}
 
 	IPW_DEBUG_INFO("%s: Associated with '%s' at %s, channel %d (BSSID=%pM)\n",
-		       priv->net_dev->name, escape_essid(essid, essid_len),
+		       priv->net_dev->name, escape_ssid(essid, essid_len),
 		       txratename, chan, bssid);
 
 	/* now we copy read ssid into dev */
@@ -2003,7 +2003,7 @@ static int ipw2100_set_essid(struct ipw2100_priv *priv, char *essid,
 	};
 	int err;
 
-	IPW_DEBUG_HC("SSID: '%s'\n", escape_essid(essid, ssid_len));
+	IPW_DEBUG_HC("SSID: '%s'\n", escape_ssid(essid, ssid_len));
 
 	if (ssid_len)
 		memcpy(cmd.host_command_parameters, essid, ssid_len);
@@ -2046,7 +2046,7 @@ static void isr_indicate_association_lost(struct ipw2100_priv *priv, u32 status)
 {
 	IPW_DEBUG(IPW_DL_NOTIF | IPW_DL_STATE | IPW_DL_ASSOC,
 		  "disassociated: '%s' %pM \n",
-		  escape_essid(priv->essid, priv->essid_len),
+		  escape_ssid(priv->essid, priv->essid_len),
 		  priv->bssid);
 
 	priv->status &= ~(STATUS_ASSOCIATED | STATUS_ASSOCIATING);
@@ -6987,7 +6987,7 @@ static int ipw2100_wx_set_essid(struct net_device *dev,
 		goto done;
 	}
 
-	IPW_DEBUG_WX("Setting ESSID: '%s' (%d)\n", escape_essid(essid, length),
+	IPW_DEBUG_WX("Setting ESSID: '%s' (%d)\n", escape_ssid(essid, length),
 		     length);
 
 	priv->essid_len = length;
@@ -7014,7 +7014,7 @@ static int ipw2100_wx_get_essid(struct net_device *dev,
 	 * configured ESSID then return that; otherwise return ANY */
 	if (priv->config & CFG_STATIC_ESSID || priv->status & STATUS_ASSOCIATED) {
 		IPW_DEBUG_WX("Getting essid: '%s'\n",
-			     escape_essid(priv->essid, priv->essid_len));
+			     escape_ssid(priv->essid, priv->essid_len));
 		memcpy(extra, priv->essid, priv->essid_len);
 		wrqu->essid.length = priv->essid_len;
 		wrqu->essid.flags = 1;	/* active */
diff --git a/drivers/net/wireless/ipw2200.c b/drivers/net/wireless/ipw2200.c
index 6ec6de2960ee..22278f87d1c1 100644
--- a/drivers/net/wireless/ipw2200.c
+++ b/drivers/net/wireless/ipw2200.c
@@ -4409,8 +4409,8 @@ static void ipw_rx_notification(struct ipw_priv *priv,
 					IPW_DEBUG(IPW_DL_NOTIF | IPW_DL_STATE |
 						  IPW_DL_ASSOC,
 						  "associated: '%s' %pM \n",
-						  escape_essid(priv->essid,
-							       priv->essid_len),
+						  escape_ssid(priv->essid,
+							      priv->essid_len),
 						  priv->bssid);
 
 					switch (priv->ieee->iw_mode) {
@@ -4490,10 +4490,10 @@ static void ipw_rx_notification(struct ipw_priv *priv,
 							  "deauthenticated: '%s' "
 							  "%pM"
 							  ": (0x%04X) - %s \n",
-							  escape_essid(priv->
-								       essid,
-								       priv->
-								       essid_len),
+							  escape_ssid(priv->
+								      essid,
+								      priv->
+								      essid_len),
 							  priv->bssid,
 							  le16_to_cpu(auth->status),
 							  ipw_get_status_code
@@ -4512,7 +4512,7 @@ static void ipw_rx_notification(struct ipw_priv *priv,
 					IPW_DEBUG(IPW_DL_NOTIF | IPW_DL_STATE |
 						  IPW_DL_ASSOC,
 						  "authenticated: '%s' %pM\n",
-						  escape_essid(priv->essid,
+						  escape_ssid(priv->essid,
 							       priv->essid_len),
 						  priv->bssid);
 					break;
@@ -4540,7 +4540,7 @@ static void ipw_rx_notification(struct ipw_priv *priv,
 					IPW_DEBUG(IPW_DL_NOTIF | IPW_DL_STATE |
 						  IPW_DL_ASSOC,
 						  "disassociated: '%s' %pM \n",
-						  escape_essid(priv->essid,
+						  escape_ssid(priv->essid,
 							       priv->essid_len),
 						  priv->bssid);
 
@@ -4578,7 +4578,7 @@ static void ipw_rx_notification(struct ipw_priv *priv,
 			case CMAS_AUTHENTICATED:
 				IPW_DEBUG(IPW_DL_NOTIF | IPW_DL_STATE,
 					  "authenticated: '%s' %pM \n",
-					  escape_essid(priv->essid,
+					  escape_ssid(priv->essid,
 						       priv->essid_len),
 					  priv->bssid);
 				priv->status |= STATUS_AUTH;
@@ -4597,8 +4597,8 @@ static void ipw_rx_notification(struct ipw_priv *priv,
 				IPW_DEBUG(IPW_DL_NOTIF | IPW_DL_STATE |
 					  IPW_DL_ASSOC,
 					  "deauthenticated: '%s' %pM\n",
-					  escape_essid(priv->essid,
-						       priv->essid_len),
+					  escape_ssid(priv->essid,
+						      priv->essid_len),
 					  priv->bssid);
 
 				priv->status &= ~(STATUS_ASSOCIATING |
@@ -5430,7 +5430,7 @@ static int ipw_find_adhoc_network(struct ipw_priv *priv,
 	     !(network->capability & WLAN_CAPABILITY_IBSS))) {
 		IPW_DEBUG_MERGE("Network '%s (%pM)' excluded due to "
 				"capability mismatch.\n",
-				escape_essid(network->ssid, network->ssid_len),
+				escape_ssid(network->ssid, network->ssid_len),
 				network->bssid);
 		return 0;
 	}
@@ -5440,7 +5440,7 @@ static int ipw_find_adhoc_network(struct ipw_priv *priv,
 	if (network->flags & NETWORK_EMPTY_ESSID) {
 		IPW_DEBUG_MERGE("Network '%s (%pM)' excluded "
 				"because of hidden ESSID.\n",
-				escape_essid(network->ssid, network->ssid_len),
+				escape_ssid(network->ssid, network->ssid_len),
 				network->bssid);
 		return 0;
 	}
@@ -5453,8 +5453,8 @@ static int ipw_find_adhoc_network(struct ipw_priv *priv,
 			   network->ssid_len)) {
 			IPW_DEBUG_MERGE("Network '%s (%pM)' excluded "
 					"because of non-network ESSID.\n",
-					escape_essid(network->ssid,
-						     network->ssid_len),
+					escape_ssid(network->ssid,
+						    network->ssid_len),
 					network->bssid);
 			return 0;
 		}
@@ -5468,13 +5468,13 @@ static int ipw_find_adhoc_network(struct ipw_priv *priv,
 			char escaped[IW_ESSID_MAX_SIZE * 2 + 1];
 
 			strncpy(escaped,
-				escape_essid(network->ssid, network->ssid_len),
+				escape_ssid(network->ssid, network->ssid_len),
 				sizeof(escaped));
 			IPW_DEBUG_MERGE("Network '%s (%pM)' excluded "
 					"because of ESSID mismatch: '%s'.\n",
 					escaped, network->bssid,
-					escape_essid(priv->essid,
-						     priv->essid_len));
+					escape_ssid(priv->essid,
+						    priv->essid_len));
 			return 0;
 		}
 	}
@@ -5485,14 +5485,14 @@ static int ipw_find_adhoc_network(struct ipw_priv *priv,
 	if (network->time_stamp[0] < match->network->time_stamp[0]) {
 		IPW_DEBUG_MERGE("Network '%s excluded because newer than "
 				"current network.\n",
-				escape_essid(match->network->ssid,
-					     match->network->ssid_len));
+				escape_ssid(match->network->ssid,
+					    match->network->ssid_len));
 		return 0;
 	} else if (network->time_stamp[1] < match->network->time_stamp[1]) {
 		IPW_DEBUG_MERGE("Network '%s excluded because newer than "
 				"current network.\n",
-				escape_essid(match->network->ssid,
-					     match->network->ssid_len));
+				escape_ssid(match->network->ssid,
+					    match->network->ssid_len));
 		return 0;
 	}
 
@@ -5501,7 +5501,7 @@ static int ipw_find_adhoc_network(struct ipw_priv *priv,
 	    time_after(jiffies, network->last_scanned + priv->ieee->scan_age)) {
 		IPW_DEBUG_MERGE("Network '%s (%pM)' excluded "
 				"because of age: %ums.\n",
-				escape_essid(network->ssid, network->ssid_len),
+				escape_ssid(network->ssid, network->ssid_len),
 				network->bssid,
 				jiffies_to_msecs(jiffies -
 						 network->last_scanned));
@@ -5512,7 +5512,7 @@ static int ipw_find_adhoc_network(struct ipw_priv *priv,
 	    (network->channel != priv->channel)) {
 		IPW_DEBUG_MERGE("Network '%s (%pM)' excluded "
 				"because of channel mismatch: %d != %d.\n",
-				escape_essid(network->ssid, network->ssid_len),
+				escape_ssid(network->ssid, network->ssid_len),
 				network->bssid,
 				network->channel, priv->channel);
 		return 0;
@@ -5523,7 +5523,7 @@ static int ipw_find_adhoc_network(struct ipw_priv *priv,
 	    ((network->capability & WLAN_CAPABILITY_PRIVACY) ? 1 : 0)) {
 		IPW_DEBUG_MERGE("Network '%s (%pM)' excluded "
 				"because of privacy mismatch: %s != %s.\n",
-				escape_essid(network->ssid, network->ssid_len),
+				escape_ssid(network->ssid, network->ssid_len),
 				network->bssid,
 				priv->
 				capability & CAP_PRIVACY_ON ? "on" : "off",
@@ -5536,8 +5536,8 @@ static int ipw_find_adhoc_network(struct ipw_priv *priv,
 	if (!memcmp(network->bssid, priv->bssid, ETH_ALEN)) {
 		IPW_DEBUG_MERGE("Network '%s (%pM)' excluded "
 				"because of the same BSSID match: %pM"
-				".\n", escape_essid(network->ssid,
-						    network->ssid_len),
+				".\n", escape_ssid(network->ssid,
+						   network->ssid_len),
 				network->bssid,
 				priv->bssid);
 		return 0;
@@ -5548,7 +5548,7 @@ static int ipw_find_adhoc_network(struct ipw_priv *priv,
 		IPW_DEBUG_MERGE("Network '%s (%pM)' excluded "
 				"because of invalid frequency/mode "
 				"combination.\n",
-				escape_essid(network->ssid, network->ssid_len),
+				escape_ssid(network->ssid, network->ssid_len),
 				network->bssid);
 		return 0;
 	}
@@ -5559,7 +5559,7 @@ static int ipw_find_adhoc_network(struct ipw_priv *priv,
 		IPW_DEBUG_MERGE("Network '%s (%pM)' excluded "
 				"because configured rate mask excludes "
 				"AP mandatory rate.\n",
-				escape_essid(network->ssid, network->ssid_len),
+				escape_ssid(network->ssid, network->ssid_len),
 				network->bssid);
 		return 0;
 	}
@@ -5567,7 +5567,7 @@ static int ipw_find_adhoc_network(struct ipw_priv *priv,
 	if (rates.num_rates == 0) {
 		IPW_DEBUG_MERGE("Network '%s (%pM)' excluded "
 				"because of no compatible rates.\n",
-				escape_essid(network->ssid, network->ssid_len),
+				escape_ssid(network->ssid, network->ssid_len),
 				network->bssid);
 		return 0;
 	}
@@ -5580,7 +5580,7 @@ static int ipw_find_adhoc_network(struct ipw_priv *priv,
 	ipw_copy_rates(&match->rates, &rates);
 	match->network = network;
 	IPW_DEBUG_MERGE("Network '%s (%pM)' is a viable match.\n",
-			escape_essid(network->ssid, network->ssid_len),
+			escape_ssid(network->ssid, network->ssid_len),
 			network->bssid);
 
 	return 1;
@@ -5618,8 +5618,8 @@ static void ipw_merge_adhoc_network(struct work_struct *work)
 		mutex_lock(&priv->mutex);
 		if ((priv->ieee->iw_mode == IW_MODE_ADHOC)) {
 			IPW_DEBUG_MERGE("remove network %s\n",
-					escape_essid(priv->essid,
-						     priv->essid_len));
+					escape_ssid(priv->essid,
+						    priv->essid_len));
 			ipw_remove_current_network(priv);
 		}
 
@@ -5644,7 +5644,7 @@ static int ipw_best_network(struct ipw_priv *priv,
 	     !(network->capability & WLAN_CAPABILITY_IBSS))) {
 		IPW_DEBUG_ASSOC("Network '%s (%pM)' excluded due to "
 				"capability mismatch.\n",
-				escape_essid(network->ssid, network->ssid_len),
+				escape_ssid(network->ssid, network->ssid_len),
 				network->bssid);
 		return 0;
 	}
@@ -5654,7 +5654,7 @@ static int ipw_best_network(struct ipw_priv *priv,
 	if (network->flags & NETWORK_EMPTY_ESSID) {
 		IPW_DEBUG_ASSOC("Network '%s (%pM)' excluded "
 				"because of hidden ESSID.\n",
-				escape_essid(network->ssid, network->ssid_len),
+				escape_ssid(network->ssid, network->ssid_len),
 				network->bssid);
 		return 0;
 	}
@@ -5667,7 +5667,7 @@ static int ipw_best_network(struct ipw_priv *priv,
 			   network->ssid_len)) {
 			IPW_DEBUG_ASSOC("Network '%s (%pM)' excluded "
 					"because of non-network ESSID.\n",
-					escape_essid(network->ssid,
+					escape_ssid(network->ssid,
 						     network->ssid_len),
 					network->bssid);
 			return 0;
@@ -5681,13 +5681,13 @@ static int ipw_best_network(struct ipw_priv *priv,
 			    min(network->ssid_len, priv->essid_len)))) {
 			char escaped[IW_ESSID_MAX_SIZE * 2 + 1];
 			strncpy(escaped,
-				escape_essid(network->ssid, network->ssid_len),
+				escape_ssid(network->ssid, network->ssid_len),
 				sizeof(escaped));
 			IPW_DEBUG_ASSOC("Network '%s (%pM)' excluded "
 					"because of ESSID mismatch: '%s'.\n",
 					escaped, network->bssid,
-					escape_essid(priv->essid,
-						     priv->essid_len));
+					escape_ssid(priv->essid,
+						    priv->essid_len));
 			return 0;
 		}
 	}
@@ -5697,13 +5697,13 @@ static int ipw_best_network(struct ipw_priv *priv,
 	if (match->network && match->network->stats.rssi > network->stats.rssi) {
 		char escaped[IW_ESSID_MAX_SIZE * 2 + 1];
 		strncpy(escaped,
-			escape_essid(network->ssid, network->ssid_len),
+			escape_ssid(network->ssid, network->ssid_len),
 			sizeof(escaped));
 		IPW_DEBUG_ASSOC("Network '%s (%pM)' excluded because "
 				"'%s (%pM)' has a stronger signal.\n",
 				escaped, network->bssid,
-				escape_essid(match->network->ssid,
-					     match->network->ssid_len),
+				escape_ssid(match->network->ssid,
+					    match->network->ssid_len),
 				match->network->bssid);
 		return 0;
 	}
@@ -5715,7 +5715,7 @@ static int ipw_best_network(struct ipw_priv *priv,
 		IPW_DEBUG_ASSOC("Network '%s (%pM)' excluded "
 				"because of storming (%ums since last "
 				"assoc attempt).\n",
-				escape_essid(network->ssid, network->ssid_len),
+				escape_ssid(network->ssid, network->ssid_len),
 				network->bssid,
 				jiffies_to_msecs(jiffies -
 						 network->last_associate));
@@ -5727,7 +5727,7 @@ static int ipw_best_network(struct ipw_priv *priv,
 	    time_after(jiffies, network->last_scanned + priv->ieee->scan_age)) {
 		IPW_DEBUG_ASSOC("Network '%s (%pM)' excluded "
 				"because of age: %ums.\n",
-				escape_essid(network->ssid, network->ssid_len),
+				escape_ssid(network->ssid, network->ssid_len),
 				network->bssid,
 				jiffies_to_msecs(jiffies -
 						 network->last_scanned));
@@ -5738,7 +5738,7 @@ static int ipw_best_network(struct ipw_priv *priv,
 	    (network->channel != priv->channel)) {
 		IPW_DEBUG_ASSOC("Network '%s (%pM)' excluded "
 				"because of channel mismatch: %d != %d.\n",
-				escape_essid(network->ssid, network->ssid_len),
+				escape_ssid(network->ssid, network->ssid_len),
 				network->bssid,
 				network->channel, priv->channel);
 		return 0;
@@ -5749,7 +5749,7 @@ static int ipw_best_network(struct ipw_priv *priv,
 	    ((network->capability & WLAN_CAPABILITY_PRIVACY) ? 1 : 0)) {
 		IPW_DEBUG_ASSOC("Network '%s (%pM)' excluded "
 				"because of privacy mismatch: %s != %s.\n",
-				escape_essid(network->ssid, network->ssid_len),
+				escape_ssid(network->ssid, network->ssid_len),
 				network->bssid,
 				priv->capability & CAP_PRIVACY_ON ? "on" :
 				"off",
@@ -5762,7 +5762,7 @@ static int ipw_best_network(struct ipw_priv *priv,
 	    memcmp(network->bssid, priv->bssid, ETH_ALEN)) {
 		IPW_DEBUG_ASSOC("Network '%s (%pM)' excluded "
 				"because of BSSID mismatch: %pM.\n",
-				escape_essid(network->ssid, network->ssid_len),
+				escape_ssid(network->ssid, network->ssid_len),
 				network->bssid, priv->bssid);
 		return 0;
 	}
@@ -5772,7 +5772,7 @@ static int ipw_best_network(struct ipw_priv *priv,
 		IPW_DEBUG_ASSOC("Network '%s (%pM)' excluded "
 				"because of invalid frequency/mode "
 				"combination.\n",
-				escape_essid(network->ssid, network->ssid_len),
+				escape_ssid(network->ssid, network->ssid_len),
 				network->bssid);
 		return 0;
 	}
@@ -5781,7 +5781,7 @@ static int ipw_best_network(struct ipw_priv *priv,
 	if (!ieee80211_is_valid_channel(priv->ieee, network->channel)) {
 		IPW_DEBUG_ASSOC("Network '%s (%pM)' excluded "
 				"because of invalid channel in current GEO\n",
-				escape_essid(network->ssid, network->ssid_len),
+				escape_ssid(network->ssid, network->ssid_len),
 				network->bssid);
 		return 0;
 	}
@@ -5792,7 +5792,7 @@ static int ipw_best_network(struct ipw_priv *priv,
 		IPW_DEBUG_ASSOC("Network '%s (%pM)' excluded "
 				"because configured rate mask excludes "
 				"AP mandatory rate.\n",
-				escape_essid(network->ssid, network->ssid_len),
+				escape_ssid(network->ssid, network->ssid_len),
 				network->bssid);
 		return 0;
 	}
@@ -5800,7 +5800,7 @@ static int ipw_best_network(struct ipw_priv *priv,
 	if (rates.num_rates == 0) {
 		IPW_DEBUG_ASSOC("Network '%s (%pM)' excluded "
 				"because of no compatible rates.\n",
-				escape_essid(network->ssid, network->ssid_len),
+				escape_ssid(network->ssid, network->ssid_len),
 				network->bssid);
 		return 0;
 	}
@@ -5814,7 +5814,7 @@ static int ipw_best_network(struct ipw_priv *priv,
 	match->network = network;
 
 	IPW_DEBUG_ASSOC("Network '%s (%pM)' is a viable match.\n",
-			escape_essid(network->ssid, network->ssid_len),
+			escape_ssid(network->ssid, network->ssid_len),
 			network->bssid);
 
 	return 1;
@@ -6065,7 +6065,7 @@ static void ipw_debug_config(struct ipw_priv *priv)
 		IPW_DEBUG_INFO("Channel unlocked.\n");
 	if (priv->config & CFG_STATIC_ESSID)
 		IPW_DEBUG_INFO("ESSID locked to '%s'\n",
-			       escape_essid(priv->essid, priv->essid_len));
+			       escape_ssid(priv->essid, priv->essid_len));
 	else
 		IPW_DEBUG_INFO("ESSID unlocked.\n");
 	if (priv->config & CFG_STATIC_BSSID)
@@ -7352,7 +7352,7 @@ static int ipw_associate_network(struct ipw_priv *priv,
 	IPW_DEBUG_ASSOC("%sssocation attempt: '%s', channel %d, "
 			"802.11%c [%d], %s[:%s], enc=%s%s%s%c%c\n",
 			roaming ? "Rea" : "A",
-			escape_essid(priv->essid, priv->essid_len),
+			escape_ssid(priv->essid, priv->essid_len),
 			network->channel,
 			ipw_modes[priv->assoc_request.ieee_mode],
 			rates->num_rates,
@@ -7452,7 +7452,7 @@ static int ipw_associate_network(struct ipw_priv *priv,
 	}
 
 	IPW_DEBUG(IPW_DL_STATE, "associating: '%s' %pM \n",
-		  escape_essid(priv->essid, priv->essid_len),
+		  escape_ssid(priv->essid, priv->essid_len),
 		  priv->bssid);
 
 	return 0;
@@ -7604,8 +7604,8 @@ static int ipw_associate(void *data)
 			target = oldest;
 			IPW_DEBUG_ASSOC("Expired '%s' (%pM) from "
 					"network list.\n",
-					escape_essid(target->ssid,
-						     target->ssid_len),
+					escape_ssid(target->ssid,
+						    target->ssid_len),
 					target->bssid);
 			list_add_tail(&target->list,
 				      &priv->ieee->network_free_list);
@@ -9057,7 +9057,7 @@ static int ipw_wx_set_essid(struct net_device *dev,
 		return 0;
 	}
 
-	IPW_DEBUG_WX("Setting ESSID: '%s' (%d)\n", escape_essid(extra, length),
+	IPW_DEBUG_WX("Setting ESSID: '%s' (%d)\n", escape_ssid(extra, length),
 		     length);
 
 	priv->essid_len = length;
@@ -9084,7 +9084,7 @@ static int ipw_wx_get_essid(struct net_device *dev,
 	if (priv->config & CFG_STATIC_ESSID ||
 	    priv->status & (STATUS_ASSOCIATED | STATUS_ASSOCIATING)) {
 		IPW_DEBUG_WX("Getting essid: '%s'\n",
-			     escape_essid(priv->essid, priv->essid_len));
+			     escape_ssid(priv->essid, priv->essid_len));
 		memcpy(extra, priv->essid, priv->essid_len);
 		wrqu->essid.length = priv->essid_len;
 		wrqu->essid.flags = 1;	/* active */
diff --git a/drivers/net/wireless/ipw2200.h b/drivers/net/wireless/ipw2200.h
index 0bad1ec3e7e0..0a84d52147bd 100644
--- a/drivers/net/wireless/ipw2200.h
+++ b/drivers/net/wireless/ipw2200.h
@@ -48,6 +48,7 @@
 #include <linux/jiffies.h>
 #include <asm/io.h>
 
+#include <net/lib80211.h>
 #include <net/ieee80211.h>
 #include <net/ieee80211_radiotap.h>
 
diff --git a/drivers/net/wireless/iwlwifi/Kconfig b/drivers/net/wireless/iwlwifi/Kconfig
index b0ac0ce3fb9f..47bee0ee0a7c 100644
--- a/drivers/net/wireless/iwlwifi/Kconfig
+++ b/drivers/net/wireless/iwlwifi/Kconfig
@@ -4,6 +4,7 @@ config IWLWIFI
 config IWLCORE
 	tristate "Intel Wireless Wifi Core"
 	depends on PCI && MAC80211 && WLAN_80211 && EXPERIMENTAL
+	select LIB80211
 	select IWLWIFI
 	select MAC80211_LEDS if IWLWIFI_LEDS
 	select LEDS_CLASS if IWLWIFI_LEDS
@@ -105,6 +106,7 @@ config IWL3945
 	tristate "Intel PRO/Wireless 3945ABG/BG Network Connection"
 	depends on PCI && MAC80211 && WLAN_80211 && EXPERIMENTAL
 	select FW_LOADER
+	select LIB80211
 	select IWLWIFI
 	select MAC80211_LEDS if IWL3945_LEDS
 	select LEDS_CLASS if IWL3945_LEDS
diff --git a/drivers/net/wireless/iwlwifi/iwl-scan.c b/drivers/net/wireless/iwlwifi/iwl-scan.c
index 86b74571b513..1cc8aa592821 100644
--- a/drivers/net/wireless/iwlwifi/iwl-scan.c
+++ b/drivers/net/wireless/iwlwifi/iwl-scan.c
@@ -25,8 +25,10 @@
  * Tomas Winkler <tomas.winkler@intel.com>
  * Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
  *****************************************************************************/
-#include <net/mac80211.h>
+#include <linux/types.h>
 #include <linux/etherdevice.h>
+#include <net/lib80211.h>
+#include <net/mac80211.h>
 
 #include "iwl-eeprom.h"
 #include "iwl-dev.h"
@@ -64,48 +66,6 @@
 #define IWL_SCAN_PROBE_MASK(n) 	cpu_to_le32((BIT(n) | (BIT(n) - BIT(1))))
 
 
-static int iwl_is_empty_essid(const char *essid, int essid_len)
-{
-	/* Single white space is for Linksys APs */
-	if (essid_len == 1 && essid[0] == ' ')
-		return 1;
-
-	/* Otherwise, if the entire essid is 0, we assume it is hidden */
-	while (essid_len) {
-		essid_len--;
-		if (essid[essid_len] != '\0')
-			return 0;
-	}
-
-	return 1;
-}
-
-
-
-static const char *iwl_escape_essid(const char *essid, u8 essid_len)
-{
-	static char escaped[IW_ESSID_MAX_SIZE * 2 + 1];
-	const char *s = essid;
-	char *d = escaped;
-
-	if (iwl_is_empty_essid(essid, essid_len)) {
-		memcpy(escaped, "<hidden>", sizeof("<hidden>"));
-		return escaped;
-	}
-
-	essid_len = min(essid_len, (u8) IW_ESSID_MAX_SIZE);
-	while (essid_len--) {
-		if (*s == '\0') {
-			*d++ = '\\';
-			*d++ = '0';
-			s++;
-		} else
-			*d++ = *s++;
-	}
-	*d = '\0';
-	return escaped;
-}
-
 /**
  * iwl_scan_cancel - Cancel any currently executing HW scan
  *
@@ -775,8 +735,8 @@ static void iwl_bg_request_scan(struct work_struct *data)
 	/* We should add the ability for user to lock to PASSIVE ONLY */
 	if (priv->one_direct_scan) {
 		IWL_DEBUG_SCAN("Start direct scan for '%s'\n",
-				iwl_escape_essid(priv->direct_ssid,
-				priv->direct_ssid_len));
+				escape_ssid(priv->direct_ssid,
+					    priv->direct_ssid_len));
 		scan->direct_scan[0].id = WLAN_EID_SSID;
 		scan->direct_scan[0].len = priv->direct_ssid_len;
 		memcpy(scan->direct_scan[0].ssid,
@@ -784,7 +744,7 @@ static void iwl_bg_request_scan(struct work_struct *data)
 		n_probes++;
 	} else if (!iwl_is_associated(priv) && priv->essid_len) {
 		IWL_DEBUG_SCAN("Start direct scan for '%s' (not associated)\n",
-				iwl_escape_essid(priv->essid, priv->essid_len));
+				escape_ssid(priv->essid, priv->essid_len));
 		scan->direct_scan[0].id = WLAN_EID_SSID;
 		scan->direct_scan[0].len = priv->essid_len;
 		memcpy(scan->direct_scan[0].ssid, priv->essid, priv->essid_len);
diff --git a/drivers/net/wireless/iwlwifi/iwl3945-base.c b/drivers/net/wireless/iwlwifi/iwl3945-base.c
index 2cd33b4e9e13..370cc46b4888 100644
--- a/drivers/net/wireless/iwlwifi/iwl3945-base.c
+++ b/drivers/net/wireless/iwlwifi/iwl3945-base.c
@@ -41,6 +41,7 @@
 #include <linux/if_arp.h>
 
 #include <net/ieee80211_radiotap.h>
+#include <net/lib80211.h>
 #include <net/mac80211.h>
 
 #include <asm/div64.h>
@@ -107,46 +108,6 @@ static const struct ieee80211_supported_band *iwl3945_get_band(
 	return priv->hw->wiphy->bands[band];
 }
 
-static int iwl3945_is_empty_essid(const char *essid, int essid_len)
-{
-	/* Single white space is for Linksys APs */
-	if (essid_len == 1 && essid[0] == ' ')
-		return 1;
-
-	/* Otherwise, if the entire essid is 0, we assume it is hidden */
-	while (essid_len) {
-		essid_len--;
-		if (essid[essid_len] != '\0')
-			return 0;
-	}
-
-	return 1;
-}
-
-static const char *iwl3945_escape_essid(const char *essid, u8 essid_len)
-{
-	static char escaped[IW_ESSID_MAX_SIZE * 2 + 1];
-	const char *s = essid;
-	char *d = escaped;
-
-	if (iwl3945_is_empty_essid(essid, essid_len)) {
-		memcpy(escaped, "<hidden>", sizeof("<hidden>"));
-		return escaped;
-	}
-
-	essid_len = min(essid_len, (u8) IW_ESSID_MAX_SIZE);
-	while (essid_len--) {
-		if (*s == '\0') {
-			*d++ = '\\';
-			*d++ = '0';
-			s++;
-		} else
-			*d++ = *s++;
-	}
-	*d = '\0';
-	return escaped;
-}
-
 /*************** DMA-QUEUE-GENERAL-FUNCTIONS  *****
  * DMA services
  *
@@ -6193,8 +6154,7 @@ static void iwl3945_bg_request_scan(struct work_struct *data)
 	if (priv->one_direct_scan) {
 		IWL_DEBUG_SCAN
 		    ("Kicking off one direct scan for '%s'\n",
-		     iwl3945_escape_essid(priv->direct_ssid,
-				      priv->direct_ssid_len));
+		     escape_ssid(priv->direct_ssid, priv->direct_ssid_len));
 		scan->direct_scan[0].id = WLAN_EID_SSID;
 		scan->direct_scan[0].len = priv->direct_ssid_len;
 		memcpy(scan->direct_scan[0].ssid,
@@ -6203,7 +6163,7 @@ static void iwl3945_bg_request_scan(struct work_struct *data)
 	} else if (!iwl3945_is_associated(priv) && priv->essid_len) {
 		IWL_DEBUG_SCAN
 		  ("Kicking off one direct scan for '%s' when not associated\n",
-		   iwl3945_escape_essid(priv->essid, priv->essid_len));
+		   escape_ssid(priv->essid, priv->essid_len));
 		scan->direct_scan[0].id = WLAN_EID_SSID;
 		scan->direct_scan[0].len = priv->essid_len;
 		memcpy(scan->direct_scan[0].ssid, priv->essid, priv->essid_len);
@@ -7018,7 +6978,7 @@ static int iwl3945_mac_hw_scan(struct ieee80211_hw *hw, u8 *ssid, size_t len)
 	}
 	if (len) {
 		IWL_DEBUG_SCAN("direct scan for %s [%d]\n ",
-			       iwl3945_escape_essid(ssid, len), (int)len);
+			       escape_ssid(ssid, len), (int)len);
 
 		priv->one_direct_scan = 1;
 		priv->direct_ssid_len = (u8)
diff --git a/drivers/net/wireless/libertas/assoc.c b/drivers/net/wireless/libertas/assoc.c
index 8b88e9544418..3492e89d1dd5 100644
--- a/drivers/net/wireless/libertas/assoc.c
+++ b/drivers/net/wireless/libertas/assoc.c
@@ -1,6 +1,8 @@
 /* Copyright (C) 2006, Red Hat, Inc. */
 
+#include <linux/types.h>
 #include <linux/etherdevice.h>
+#include <net/lib80211.h>
 
 #include "assoc.h"
 #include "decl.h"
@@ -157,11 +159,11 @@ static int lbs_adhoc_join(struct lbs_private *priv,
 	lbs_deb_enter(LBS_DEB_ASSOC);
 
 	lbs_deb_join("current SSID '%s', ssid length %u\n",
-		escape_essid(priv->curbssparams.ssid,
+		escape_ssid(priv->curbssparams.ssid,
 		priv->curbssparams.ssid_len),
 		priv->curbssparams.ssid_len);
 	lbs_deb_join("requested ssid '%s', ssid length %u\n",
-		escape_essid(bss->ssid, bss->ssid_len),
+		escape_ssid(bss->ssid, bss->ssid_len),
 		bss->ssid_len);
 
 	/* check if the requested SSID is already joined */
@@ -325,7 +327,7 @@ static int lbs_adhoc_start(struct lbs_private *priv,
 	memcpy(cmd.ssid, assoc_req->ssid, assoc_req->ssid_len);
 
 	lbs_deb_join("ADHOC_START: SSID '%s', ssid length %u\n",
-		escape_essid(assoc_req->ssid, assoc_req->ssid_len),
+		escape_ssid(assoc_req->ssid, assoc_req->ssid_len),
 		assoc_req->ssid_len);
 
 	cmd.bsstype = CMD_BSS_TYPE_IBSS;
@@ -704,7 +706,7 @@ static int assoc_helper_essid(struct lbs_private *priv,
 		channel = assoc_req->channel;
 
 	lbs_deb_assoc("SSID '%s' requested\n",
-	              escape_essid(assoc_req->ssid, assoc_req->ssid_len));
+	              escape_ssid(assoc_req->ssid, assoc_req->ssid_len));
 	if (assoc_req->mode == IW_MODE_INFRA) {
 		lbs_send_specific_ssid_scan(priv, assoc_req->ssid,
 			assoc_req->ssid_len);
@@ -1228,7 +1230,7 @@ void lbs_association_worker(struct work_struct *work)
 		"    secinfo:  %s%s%s\n"
 		"    auth_mode: %d\n",
 		assoc_req->flags,
-		escape_essid(assoc_req->ssid, assoc_req->ssid_len),
+		escape_ssid(assoc_req->ssid, assoc_req->ssid_len),
 		assoc_req->channel, assoc_req->band, assoc_req->mode,
 		assoc_req->bssid,
 		assoc_req->secinfo.WPAenabled ? " WPA" : "",
@@ -1814,7 +1816,7 @@ static int lbs_adhoc_post(struct lbs_private *priv, struct cmd_header *resp)
 	wireless_send_event(priv->dev, SIOCGIWAP, &wrqu, NULL);
 
 	lbs_deb_join("ADHOC_RESP: Joined/started '%s', BSSID %pM, channel %d\n",
-		     escape_essid(bss->ssid, bss->ssid_len),
+		     escape_ssid(bss->ssid, bss->ssid_len),
 		     priv->curbssparams.bssid,
 		     priv->curbssparams.channel);
 
diff --git a/drivers/net/wireless/libertas/cmd.c b/drivers/net/wireless/libertas/cmd.c
index d45b07cf6a62..52feab69ee49 100644
--- a/drivers/net/wireless/libertas/cmd.c
+++ b/drivers/net/wireless/libertas/cmd.c
@@ -4,6 +4,7 @@
   */
 
 #include <net/iw_handler.h>
+#include <net/lib80211.h>
 #include <net/ieee80211.h>
 #include <linux/kfifo.h>
 #include "host.h"
@@ -1092,7 +1093,7 @@ int lbs_mesh_config(struct lbs_private *priv, uint16_t action, uint16_t chan)
 	}
 	lbs_deb_cmd("mesh config action %d type %x channel %d SSID %s\n",
 		    action, priv->mesh_tlv, chan,
-		    escape_essid(priv->mesh_ssid, priv->mesh_ssid_len));
+		    escape_ssid(priv->mesh_ssid, priv->mesh_ssid_len));
 
 	return __lbs_mesh_config_send(priv, &cmd, action, priv->mesh_tlv);
 }
diff --git a/drivers/net/wireless/libertas/debugfs.c b/drivers/net/wireless/libertas/debugfs.c
index 5f6bee493f23..84933203be74 100644
--- a/drivers/net/wireless/libertas/debugfs.c
+++ b/drivers/net/wireless/libertas/debugfs.c
@@ -5,6 +5,7 @@
 #include <linux/mm.h>
 #include <linux/string.h>
 #include <net/iw_handler.h>
+#include <net/lib80211.h>
 
 #include "dev.h"
 #include "decl.h"
@@ -85,7 +86,7 @@ static ssize_t lbs_getscantable(struct file *file, char __user *userbuf,
 				spectrum_mgmt ? 'S' : ' ');
 		pos += snprintf(buf+pos, len-pos, " %04d |", SCAN_RSSI(iter_bss->rssi));
 		pos += snprintf(buf+pos, len-pos, " %s\n",
-		                escape_essid(iter_bss->ssid, iter_bss->ssid_len));
+		                escape_ssid(iter_bss->ssid, iter_bss->ssid_len));
 
 		numscansdone++;
 	}
diff --git a/drivers/net/wireless/libertas/decl.h b/drivers/net/wireless/libertas/decl.h
index 1a8888cceadc..0b84bdca0726 100644
--- a/drivers/net/wireless/libertas/decl.h
+++ b/drivers/net/wireless/libertas/decl.h
@@ -74,8 +74,4 @@ void lbs_host_to_card_done(struct lbs_private *priv);
 
 int lbs_update_channel(struct lbs_private *priv);
 
-#ifndef CONFIG_IEEE80211
-const char *escape_essid(const char *essid, u8 essid_len);
-#endif
-
 #endif
diff --git a/drivers/net/wireless/libertas/main.c b/drivers/net/wireless/libertas/main.c
index 34a47f692bd6..e9d23f68174f 100644
--- a/drivers/net/wireless/libertas/main.c
+++ b/drivers/net/wireless/libertas/main.c
@@ -1646,33 +1646,6 @@ out:
 	return ret;
 }
 
-#ifndef CONFIG_IEEE80211
-const char *escape_essid(const char *essid, u8 essid_len)
-{
-	static char escaped[IW_ESSID_MAX_SIZE * 2 + 1];
-	const char *s = essid;
-	char *d = escaped;
-
-	if (ieee80211_is_empty_essid(essid, essid_len)) {
-		memcpy(escaped, "<hidden>", sizeof("<hidden>"));
-		return escaped;
-	}
-
-	essid_len = min(essid_len, (u8) IW_ESSID_MAX_SIZE);
-	while (essid_len--) {
-		if (*s == '\0') {
-			*d++ = '\\';
-			*d++ = '0';
-			s++;
-		} else {
-			*d++ = *s++;
-		}
-	}
-	*d = '\0';
-	return escaped;
-}
-#endif
-
 module_init(lbs_init_module);
 module_exit(lbs_exit_module);
 
diff --git a/drivers/net/wireless/libertas/scan.c b/drivers/net/wireless/libertas/scan.c
index 351b3f6e5664..7881890a4e98 100644
--- a/drivers/net/wireless/libertas/scan.c
+++ b/drivers/net/wireless/libertas/scan.c
@@ -4,9 +4,12 @@
   * IOCTL handlers as well as command preperation and response routines
   *  for sending scan commands to the firmware.
   */
+#include <linux/types.h>
 #include <linux/etherdevice.h>
 #include <asm/unaligned.h>
 
+#include <net/lib80211.h>
+
 #include "host.h"
 #include "decl.h"
 #include "dev.h"
@@ -452,7 +455,7 @@ int lbs_scan_networks(struct lbs_private *priv, int full_scan)
 	list_for_each_entry(iter, &priv->network_list, list)
 		lbs_deb_scan("%02d: BSSID %pM, RSSI %d, SSID '%s'\n",
 			     i++, iter->bssid, iter->rssi,
-			     escape_essid(iter->ssid, iter->ssid_len));
+			     escape_ssid(iter->ssid, iter->ssid_len));
 	mutex_unlock(&priv->lock);
 #endif
 
@@ -599,7 +602,7 @@ static int lbs_process_bss(struct bss_descriptor *bss,
 			bss->ssid_len = min_t(int, 32, elem->len);
 			memcpy(bss->ssid, elem->data, bss->ssid_len);
 			lbs_deb_scan("got SSID IE: '%s', len %u\n",
-			             escape_essid(bss->ssid, bss->ssid_len),
+			             escape_ssid(bss->ssid, bss->ssid_len),
 			             bss->ssid_len);
 			break;
 
@@ -742,7 +745,7 @@ int lbs_send_specific_ssid_scan(struct lbs_private *priv, uint8_t *ssid,
 	int ret = 0;
 
 	lbs_deb_enter_args(LBS_DEB_SCAN, "SSID '%s'\n",
-			   escape_essid(ssid, ssid_len));
+			   escape_ssid(ssid, ssid_len));
 
 	if (!ssid_len)
 		goto out;
@@ -966,7 +969,7 @@ int lbs_set_scan(struct net_device *dev, struct iw_request_info *info,
 		priv->scan_ssid_len = req->essid_len;
 		memcpy(priv->scan_ssid, req->essid, priv->scan_ssid_len);
 		lbs_deb_wext("set_scan, essid '%s'\n",
-			escape_essid(priv->scan_ssid, priv->scan_ssid_len));
+			escape_ssid(priv->scan_ssid, priv->scan_ssid_len));
 	} else {
 		priv->scan_ssid_len = 0;
 	}
diff --git a/drivers/net/wireless/libertas/wext.c b/drivers/net/wireless/libertas/wext.c
index 04f0bf2ef0a8..247579951858 100644
--- a/drivers/net/wireless/libertas/wext.c
+++ b/drivers/net/wireless/libertas/wext.c
@@ -8,6 +8,7 @@
 #include <linux/wireless.h>
 #include <linux/bitops.h>
 
+#include <net/lib80211.h>
 #include <net/ieee80211.h>
 #include <net/iw_handler.h>
 
@@ -2005,7 +2006,7 @@ static int lbs_set_essid(struct net_device *dev, struct iw_request_info *info,
 		lbs_deb_wext("requested any SSID\n");
 	} else {
 		lbs_deb_wext("requested SSID '%s'\n",
-		             escape_essid(ssid, ssid_len));
+		             escape_ssid(ssid, ssid_len));
 	}
 
 out:
diff --git a/include/net/ieee80211.h b/include/net/ieee80211.h
index 93a56de3594b..dec10c41e2ec 100644
--- a/include/net/ieee80211.h
+++ b/include/net/ieee80211.h
@@ -127,10 +127,6 @@ static inline bool ieee80211_ratelimit_debug(u32 level)
 }
 #endif				/* CONFIG_IEEE80211_DEBUG */
 
-/* escape_essid() is intended to be used in debug (and possibly error)
- * messages. It should never be used for passing essid to user space. */
-const char *escape_essid(const char *essid, u8 essid_len);
-
 /*
  * To use the debug system:
  *
@@ -1135,22 +1131,6 @@ static inline void *ieee80211_priv(struct net_device *dev)
 	return ((struct ieee80211_device *)netdev_priv(dev))->priv;
 }
 
-static inline int ieee80211_is_empty_essid(const char *essid, int essid_len)
-{
-	/* Single white space is for Linksys APs */
-	if (essid_len == 1 && essid[0] == ' ')
-		return 1;
-
-	/* Otherwise, if the entire essid is 0, we assume it is hidden */
-	while (essid_len) {
-		essid_len--;
-		if (essid[essid_len] != '\0')
-			return 0;
-	}
-
-	return 1;
-}
-
 static inline int ieee80211_is_valid_mode(struct ieee80211_device *ieee,
 					  int mode)
 {
diff --git a/include/net/lib80211.h b/include/net/lib80211.h
new file mode 100644
index 000000000000..91a64f358cef
--- /dev/null
+++ b/include/net/lib80211.h
@@ -0,0 +1,31 @@
+/*
+ * lib80211.h -- common bits for IEEE802.11 wireless drivers
+ *
+ * Copyright (c) 2008, John W. Linville <linville@tuxdriver.com>
+ *
+ */
+
+#ifndef LIB80211_H
+#define LIB80211_H
+
+/* escape_ssid() is intended to be used in debug (and possibly error)
+ * messages. It should never be used for passing ssid to user space. */
+const char *escape_ssid(const char *ssid, u8 ssid_len);
+
+static inline int is_empty_ssid(const char *ssid, int ssid_len)
+{
+	/* Single white space is for Linksys APs */
+	if (ssid_len == 1 && ssid[0] == ' ')
+		return 1;
+
+	/* Otherwise, if the entire ssid is 0, we assume it is hidden */
+	while (ssid_len) {
+		ssid_len--;
+		if (ssid[ssid_len] != '\0')
+			return 0;
+	}
+
+	return 1;
+}
+
+#endif /* LIB80211_H */
diff --git a/net/ieee80211/Kconfig b/net/ieee80211/Kconfig
index df9624c3cebf..d2282bb2e4f1 100644
--- a/net/ieee80211/Kconfig
+++ b/net/ieee80211/Kconfig
@@ -11,6 +11,7 @@ config IEEE80211
 	select IEEE80211_CRYPT_WEP
 	select IEEE80211_CRYPT_TKIP
 	select IEEE80211_CRYPT_CCMP
+	select LIB80211
 	---help---
 	This option enables the hardware independent IEEE 802.11
 	networking stack.  This component is deprecated in favor of the
diff --git a/net/ieee80211/ieee80211_module.c b/net/ieee80211/ieee80211_module.c
index 949772a5a7dc..d34d4e79b6f7 100644
--- a/net/ieee80211/ieee80211_module.c
+++ b/net/ieee80211/ieee80211_module.c
@@ -308,31 +308,5 @@ MODULE_PARM_DESC(debug, "debug output mask");
 module_exit(ieee80211_exit);
 module_init(ieee80211_init);
 
-const char *escape_essid(const char *essid, u8 essid_len)
-{
-	static char escaped[IW_ESSID_MAX_SIZE * 2 + 1];
-	const char *s = essid;
-	char *d = escaped;
-
-	if (ieee80211_is_empty_essid(essid, essid_len)) {
-		memcpy(escaped, "<hidden>", sizeof("<hidden>"));
-		return escaped;
-	}
-
-	essid_len = min(essid_len, (u8) IW_ESSID_MAX_SIZE);
-	while (essid_len--) {
-		if (*s == '\0') {
-			*d++ = '\\';
-			*d++ = '0';
-			s++;
-		} else {
-			*d++ = *s++;
-		}
-	}
-	*d = '\0';
-	return escaped;
-}
-
 EXPORT_SYMBOL(alloc_ieee80211);
 EXPORT_SYMBOL(free_ieee80211);
-EXPORT_SYMBOL(escape_essid);
diff --git a/net/ieee80211/ieee80211_rx.c b/net/ieee80211/ieee80211_rx.c
index 281223e41c58..876a004918b0 100644
--- a/net/ieee80211/ieee80211_rx.c
+++ b/net/ieee80211/ieee80211_rx.c
@@ -32,6 +32,7 @@
 #include <asm/uaccess.h>
 #include <linux/ctype.h>
 
+#include <net/lib80211.h>
 #include <net/ieee80211.h>
 
 static void ieee80211_monitor_rx(struct ieee80211_device *ieee,
@@ -1145,8 +1146,8 @@ static int ieee80211_parse_info_param(struct ieee80211_info_element
 
 		switch (info_element->id) {
 		case MFIE_TYPE_SSID:
-			if (ieee80211_is_empty_essid(info_element->data,
-						     info_element->len)) {
+			if (is_empty_ssid(info_element->data,
+					  info_element->len)) {
 				network->flags |= NETWORK_EMPTY_ESSID;
 				break;
 			}
@@ -1390,7 +1391,7 @@ static int ieee80211_handle_assoc_resp(struct ieee80211_device *ieee, struct iee
 			network->mode |= IEEE_B;
 	}
 
-	if (ieee80211_is_empty_essid(network->ssid, network->ssid_len))
+	if (is_empty_ssid(network->ssid, network->ssid_len))
 		network->flags |= NETWORK_EMPTY_ESSID;
 
 	memcpy(&network->stats, stats, sizeof(network->stats));
@@ -1456,13 +1457,13 @@ static int ieee80211_network_init(struct ieee80211_device *ieee, struct ieee8021
 	if (network->mode == 0) {
 		IEEE80211_DEBUG_SCAN("Filtered out '%s (%pM)' "
 				     "network.\n",
-				     escape_essid(network->ssid,
-						  network->ssid_len),
+				     escape_ssid(network->ssid,
+						 network->ssid_len),
 				     network->bssid);
 		return 1;
 	}
 
-	if (ieee80211_is_empty_essid(network->ssid, network->ssid_len))
+	if (is_empty_ssid(network->ssid, network->ssid_len))
 		network->flags |= NETWORK_EMPTY_ESSID;
 
 	memcpy(&network->stats, stats, sizeof(network->stats));
@@ -1576,7 +1577,7 @@ static void ieee80211_process_probe_response(struct ieee80211_device
 
 	IEEE80211_DEBUG_SCAN("'%s' (%pM"
 		     "): %c%c%c%c %c%c%c%c-%c%c%c%c %c%c%c%c\n",
-		     escape_essid(info_element->data, info_element->len),
+		     escape_ssid(info_element->data, info_element->len),
 		     beacon->header.addr3,
 		     (beacon->capability & cpu_to_le16(1 << 0xf)) ? '1' : '0',
 		     (beacon->capability & cpu_to_le16(1 << 0xe)) ? '1' : '0',
@@ -1597,8 +1598,8 @@ static void ieee80211_process_probe_response(struct ieee80211_device
 
 	if (ieee80211_network_init(ieee, beacon, &network, stats)) {
 		IEEE80211_DEBUG_SCAN("Dropped '%s' (%pM) via %s.\n",
-				     escape_essid(info_element->data,
-						  info_element->len),
+				     escape_ssid(info_element->data,
+						 info_element->len),
 				     beacon->header.addr3,
 				     is_beacon(beacon->header.frame_ctl) ?
 				     "BEACON" : "PROBE RESPONSE");
@@ -1635,8 +1636,8 @@ static void ieee80211_process_probe_response(struct ieee80211_device
 			target = oldest;
 			IEEE80211_DEBUG_SCAN("Expired '%s' (%pM) from "
 					     "network list.\n",
-					     escape_essid(target->ssid,
-							  target->ssid_len),
+					     escape_ssid(target->ssid,
+							 target->ssid_len),
 					     target->bssid);
 			ieee80211_network_reset(target);
 		} else {
@@ -1648,8 +1649,8 @@ static void ieee80211_process_probe_response(struct ieee80211_device
 
 #ifdef CONFIG_IEEE80211_DEBUG
 		IEEE80211_DEBUG_SCAN("Adding '%s' (%pM) via %s.\n",
-				     escape_essid(network.ssid,
-						  network.ssid_len),
+				     escape_ssid(network.ssid,
+						 network.ssid_len),
 				     network.bssid,
 				     is_beacon(beacon->header.frame_ctl) ?
 				     "BEACON" : "PROBE RESPONSE");
@@ -1659,8 +1660,8 @@ static void ieee80211_process_probe_response(struct ieee80211_device
 		list_add_tail(&target->list, &ieee->network_list);
 	} else {
 		IEEE80211_DEBUG_SCAN("Updating '%s' (%pM) via %s.\n",
-				     escape_essid(target->ssid,
-						  target->ssid_len),
+				     escape_ssid(target->ssid,
+						 target->ssid_len),
 				     target->bssid,
 				     is_beacon(beacon->header.frame_ctl) ?
 				     "BEACON" : "PROBE RESPONSE");
diff --git a/net/ieee80211/ieee80211_wx.c b/net/ieee80211/ieee80211_wx.c
index 89a81062ab4b..3b031c2910ac 100644
--- a/net/ieee80211/ieee80211_wx.c
+++ b/net/ieee80211/ieee80211_wx.c
@@ -283,8 +283,8 @@ int ieee80211_wx_get_scan(struct ieee80211_device *ieee,
 		else
 			IEEE80211_DEBUG_SCAN("Not showing network '%s ("
 					     "%pM)' due to age (%dms).\n",
-					     escape_essid(network->ssid,
-							  network->ssid_len),
+					     escape_ssid(network->ssid,
+							 network->ssid_len),
 					     network->bssid,
 					     jiffies_to_msecs(jiffies -
 							      network->
@@ -408,7 +408,7 @@ int ieee80211_wx_set_encode(struct ieee80211_device *ieee,
 			memset(sec.keys[key] + erq->length, 0,
 			       len - erq->length);
 		IEEE80211_DEBUG_WX("Setting key %d to '%s' (%d:%d bytes)\n",
-				   key, escape_essid(sec.keys[key], len),
+				   key, escape_ssid(sec.keys[key], len),
 				   erq->length, len);
 		sec.key_sizes[key] = len;
 		if (*crypt)
diff --git a/net/wireless/Kconfig b/net/wireless/Kconfig
index 646c7121dbc0..ae7f2262dfb5 100644
--- a/net/wireless/Kconfig
+++ b/net/wireless/Kconfig
@@ -72,3 +72,13 @@ config WIRELESS_EXT_SYSFS
 
 	  Say Y if you have programs using it, like old versions of
 	  hal.
+
+config LIB80211
+	tristate "Common routines for IEEE802.11 drivers"
+	default n
+	help
+	  This options enables a library of common routines used
+	  by IEEE802.11 wireless LAN drivers.
+
+	  Drivers should select this themselves if needed.  Say Y if
+	  you want this built into your kernel.
diff --git a/net/wireless/Makefile b/net/wireless/Makefile
index b9f943c45f3b..d2d848d445f2 100644
--- a/net/wireless/Makefile
+++ b/net/wireless/Makefile
@@ -1,5 +1,6 @@
 obj-$(CONFIG_WIRELESS_EXT) += wext.o
 obj-$(CONFIG_CFG80211) += cfg80211.o
+obj-$(CONFIG_LIB80211) += lib80211.o
 
 cfg80211-y += core.o sysfs.o radiotap.o util.o reg.o
 cfg80211-$(CONFIG_NL80211) += nl80211.o
diff --git a/net/wireless/lib80211.c b/net/wireless/lib80211.c
new file mode 100644
index 000000000000..b22d271fb675
--- /dev/null
+++ b/net/wireless/lib80211.c
@@ -0,0 +1,58 @@
+/*
+ * lib80211 -- common bits for IEEE802.11 drivers
+ *
+ * Copyright(c) 2008 John W. Linville <linville@tuxdriver.com>
+ *
+ */
+
+#include <linux/module.h>
+#include <linux/ieee80211.h>
+
+#include <net/lib80211.h>
+
+#define DRV_NAME        "lib80211"
+
+#define DRV_DESCRIPTION	"common routines for IEEE802.11 drivers"
+
+MODULE_DESCRIPTION(DRV_DESCRIPTION);
+MODULE_AUTHOR("John W. Linville <linville@tuxdriver.com>");
+MODULE_LICENSE("GPL");
+
+const char *escape_ssid(const char *ssid, u8 ssid_len)
+{
+	static char escaped[IEEE80211_MAX_SSID_LEN * 2 + 1];
+	const char *s = ssid;
+	char *d = escaped;
+
+	if (is_empty_ssid(ssid, ssid_len)) {
+		memcpy(escaped, "<hidden>", sizeof("<hidden>"));
+		return escaped;
+	}
+
+	ssid_len = min_t(u8, ssid_len, IEEE80211_MAX_SSID_LEN);
+	while (ssid_len--) {
+		if (*s == '\0') {
+			*d++ = '\\';
+			*d++ = '0';
+			s++;
+		} else {
+			*d++ = *s++;
+		}
+	}
+	*d = '\0';
+	return escaped;
+}
+EXPORT_SYMBOL(escape_ssid);
+
+static int __init ieee80211_init(void)
+{
+	printk(KERN_INFO DRV_NAME ": " DRV_DESCRIPTION "\n");
+	return 0;
+}
+
+static void __exit ieee80211_exit(void)
+{
+}
+
+module_init(ieee80211_init);
+module_exit(ieee80211_exit);
-- 
cgit v1.2.3


From c5d3dce875ef055ed9b14f169cc967cc2c8faf1f Mon Sep 17 00:00:00 2001
From: "John W. Linville" <linville@tuxdriver.com>
Date: Tue, 30 Sep 2008 17:17:26 -0400
Subject: wireless: remove NETWORK_EMPTY_ESSID flag

It is unnecessary and of questionable value.  Also remove
is_empty_ssid, as it is also unnecessary.

Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 drivers/net/wireless/ipw2200.c | 23 +----------------------
 include/net/ieee80211.h        |  1 -
 include/net/lib80211.h         | 16 ----------------
 net/ieee80211/ieee80211_rx.c   | 15 ++-------------
 net/ieee80211/ieee80211_wx.c   | 12 +++---------
 net/wireless/lib80211.c        |  5 -----
 6 files changed, 6 insertions(+), 66 deletions(-)

(limited to 'include')

diff --git a/drivers/net/wireless/ipw2200.c b/drivers/net/wireless/ipw2200.c
index 22278f87d1c1..6e0c55c64e1f 100644
--- a/drivers/net/wireless/ipw2200.c
+++ b/drivers/net/wireless/ipw2200.c
@@ -5435,16 +5435,6 @@ static int ipw_find_adhoc_network(struct ipw_priv *priv,
 		return 0;
 	}
 
-	/* If we do not have an ESSID for this AP, we can not associate with
-	 * it */
-	if (network->flags & NETWORK_EMPTY_ESSID) {
-		IPW_DEBUG_MERGE("Network '%s (%pM)' excluded "
-				"because of hidden ESSID.\n",
-				escape_ssid(network->ssid, network->ssid_len),
-				network->bssid);
-		return 0;
-	}
-
 	if (unlikely(roaming)) {
 		/* If we are roaming, then ensure check if this is a valid
 		 * network to try and roam to */
@@ -5649,16 +5639,6 @@ static int ipw_best_network(struct ipw_priv *priv,
 		return 0;
 	}
 
-	/* If we do not have an ESSID for this AP, we can not associate with
-	 * it */
-	if (network->flags & NETWORK_EMPTY_ESSID) {
-		IPW_DEBUG_ASSOC("Network '%s (%pM)' excluded "
-				"because of hidden ESSID.\n",
-				escape_ssid(network->ssid, network->ssid_len),
-				network->bssid);
-		return 0;
-	}
-
 	if (unlikely(roaming)) {
 		/* If we are roaming, then ensure check if this is a valid
 		 * network to try and roam to */
@@ -6881,8 +6861,7 @@ static int ipw_qos_handle_probe_response(struct ipw_priv *priv,
 	if ((priv->status & STATUS_ASSOCIATED) &&
 	    (priv->ieee->iw_mode == IW_MODE_ADHOC) && (active_network == 0)) {
 		if (memcmp(network->bssid, priv->bssid, ETH_ALEN))
-			if ((network->capability & WLAN_CAPABILITY_IBSS) &&
-			    !(network->flags & NETWORK_EMPTY_ESSID))
+			if (network->capability & WLAN_CAPABILITY_IBSS)
 				if ((network->ssid_len ==
 				     priv->assoc_network->ssid_len) &&
 				    !memcmp(network->ssid,
diff --git a/include/net/ieee80211.h b/include/net/ieee80211.h
index dec10c41e2ec..afa34d3be721 100644
--- a/include/net/ieee80211.h
+++ b/include/net/ieee80211.h
@@ -729,7 +729,6 @@ struct ieee80211_txb {
 
 #define MAX_WPA_IE_LEN 64
 
-#define NETWORK_EMPTY_ESSID    (1<<0)
 #define NETWORK_HAS_OFDM       (1<<1)
 #define NETWORK_HAS_CCK        (1<<2)
 
diff --git a/include/net/lib80211.h b/include/net/lib80211.h
index 91a64f358cef..ce49a30033b6 100644
--- a/include/net/lib80211.h
+++ b/include/net/lib80211.h
@@ -12,20 +12,4 @@
  * messages. It should never be used for passing ssid to user space. */
 const char *escape_ssid(const char *ssid, u8 ssid_len);
 
-static inline int is_empty_ssid(const char *ssid, int ssid_len)
-{
-	/* Single white space is for Linksys APs */
-	if (ssid_len == 1 && ssid[0] == ' ')
-		return 1;
-
-	/* Otherwise, if the entire ssid is 0, we assume it is hidden */
-	while (ssid_len) {
-		ssid_len--;
-		if (ssid[ssid_len] != '\0')
-			return 0;
-	}
-
-	return 1;
-}
-
 #endif /* LIB80211_H */
diff --git a/net/ieee80211/ieee80211_rx.c b/net/ieee80211/ieee80211_rx.c
index 876a004918b0..f15f82e7bbfd 100644
--- a/net/ieee80211/ieee80211_rx.c
+++ b/net/ieee80211/ieee80211_rx.c
@@ -1146,12 +1146,6 @@ static int ieee80211_parse_info_param(struct ieee80211_info_element
 
 		switch (info_element->id) {
 		case MFIE_TYPE_SSID:
-			if (is_empty_ssid(info_element->data,
-					  info_element->len)) {
-				network->flags |= NETWORK_EMPTY_ESSID;
-				break;
-			}
-
 			network->ssid_len = min(info_element->len,
 						(u8) IW_ESSID_MAX_SIZE);
 			memcpy(network->ssid, info_element->data,
@@ -1161,7 +1155,8 @@ static int ieee80211_parse_info_param(struct ieee80211_info_element
 				       IW_ESSID_MAX_SIZE - network->ssid_len);
 
 			IEEE80211_DEBUG_MGMT("MFIE_TYPE_SSID: '%s' len=%d.\n",
-					     network->ssid, network->ssid_len);
+					     escape_ssid(network->ssid),
+					     network->ssid_len);
 			break;
 
 		case MFIE_TYPE_RATES:
@@ -1391,9 +1386,6 @@ static int ieee80211_handle_assoc_resp(struct ieee80211_device *ieee, struct iee
 			network->mode |= IEEE_B;
 	}
 
-	if (is_empty_ssid(network->ssid, network->ssid_len))
-		network->flags |= NETWORK_EMPTY_ESSID;
-
 	memcpy(&network->stats, stats, sizeof(network->stats));
 
 	if (ieee->handle_assoc_response != NULL)
@@ -1463,9 +1455,6 @@ static int ieee80211_network_init(struct ieee80211_device *ieee, struct ieee8021
 		return 1;
 	}
 
-	if (is_empty_ssid(network->ssid, network->ssid_len))
-		network->flags |= NETWORK_EMPTY_ESSID;
-
 	memcpy(&network->stats, stats, sizeof(network->stats));
 
 	return 0;
diff --git a/net/ieee80211/ieee80211_wx.c b/net/ieee80211/ieee80211_wx.c
index 3b031c2910ac..3025140ae721 100644
--- a/net/ieee80211/ieee80211_wx.c
+++ b/net/ieee80211/ieee80211_wx.c
@@ -65,15 +65,9 @@ static char *ieee80211_translate_scan(struct ieee80211_device *ieee,
 	/* Add the ESSID */
 	iwe.cmd = SIOCGIWESSID;
 	iwe.u.data.flags = 1;
-	if (network->flags & NETWORK_EMPTY_ESSID) {
-		iwe.u.data.length = sizeof("<hidden>");
-		start = iwe_stream_add_point(info, start, stop,
-					     &iwe, "<hidden>");
-	} else {
-		iwe.u.data.length = min(network->ssid_len, (u8) 32);
-		start = iwe_stream_add_point(info, start, stop,
-					     &iwe, network->ssid);
-	}
+	iwe.u.data.length = min(network->ssid_len, (u8) 32);
+	start = iwe_stream_add_point(info, start, stop,
+				     &iwe, network->ssid);
 
 	/* Add the protocol name */
 	iwe.cmd = SIOCGIWNAME;
diff --git a/net/wireless/lib80211.c b/net/wireless/lib80211.c
index b22d271fb675..872cc8dc00a3 100644
--- a/net/wireless/lib80211.c
+++ b/net/wireless/lib80211.c
@@ -24,11 +24,6 @@ const char *escape_ssid(const char *ssid, u8 ssid_len)
 	const char *s = ssid;
 	char *d = escaped;
 
-	if (is_empty_ssid(ssid, ssid_len)) {
-		memcpy(escaped, "<hidden>", sizeof("<hidden>"));
-		return escaped;
-	}
-
 	ssid_len = min_t(u8, ssid_len, IEEE80211_MAX_SSID_LEN);
 	while (ssid_len--) {
 		if (*s == '\0') {
-- 
cgit v1.2.3


From 9387b7caf3049168fc97a8a9111af8fe2143af18 Mon Sep 17 00:00:00 2001
From: "John W. Linville" <linville@tuxdriver.com>
Date: Tue, 30 Sep 2008 20:59:05 -0400
Subject: wireless: use individual buffers for printing ssid values

Also change escape_ssid to print_ssid to match print_mac semantics.

Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 drivers/net/wireless/ipw2100.c              |  20 ++--
 drivers/net/wireless/ipw2200.c              | 153 +++++++++++++++++-----------
 drivers/net/wireless/iwlwifi/iwl-scan.c     |   7 +-
 drivers/net/wireless/iwlwifi/iwl3945-base.c |   9 +-
 drivers/net/wireless/libertas/assoc.c       |  17 ++--
 drivers/net/wireless/libertas/cmd.c         |   3 +-
 drivers/net/wireless/libertas/debugfs.c     |   4 +-
 drivers/net/wireless/libertas/scan.c        |  12 ++-
 drivers/net/wireless/libertas/wext.c        |   3 +-
 include/linux/ieee80211.h                   |   6 +-
 include/net/lib80211.h                      |   5 +-
 net/ieee80211/ieee80211_rx.c                |  19 ++--
 net/ieee80211/ieee80211_wx.c                |   7 +-
 net/wireless/lib80211.c                     |   9 +-
 14 files changed, 168 insertions(+), 106 deletions(-)

(limited to 'include')

diff --git a/drivers/net/wireless/ipw2100.c b/drivers/net/wireless/ipw2100.c
index 223914e3e07e..062c9f280304 100644
--- a/drivers/net/wireless/ipw2100.c
+++ b/drivers/net/wireless/ipw2100.c
@@ -163,6 +163,8 @@ that only one external action is invoked at a time.
 #include <linux/ctype.h>
 #include <linux/pm_qos_params.h>
 
+#include <net/lib80211.h>
+
 #include "ipw2100.h"
 
 #define IPW2100_VERSION "git-1.2.2"
@@ -1914,6 +1916,7 @@ static void isr_indicate_associated(struct ipw2100_priv *priv, u32 status)
 	u32 chan;
 	char *txratename;
 	u8 bssid[ETH_ALEN];
+	DECLARE_SSID_BUF(ssid);
 
 	/*
 	 * TBD: BSSID is usually 00:00:00:00:00:00 here and not
@@ -1975,7 +1978,7 @@ static void isr_indicate_associated(struct ipw2100_priv *priv, u32 status)
 	}
 
 	IPW_DEBUG_INFO("%s: Associated with '%s' at %s, channel %d (BSSID=%pM)\n",
-		       priv->net_dev->name, escape_ssid(essid, essid_len),
+		       priv->net_dev->name, print_ssid(ssid, essid, essid_len),
 		       txratename, chan, bssid);
 
 	/* now we copy read ssid into dev */
@@ -2002,8 +2005,9 @@ static int ipw2100_set_essid(struct ipw2100_priv *priv, char *essid,
 		.host_command_length = ssid_len
 	};
 	int err;
+	DECLARE_SSID_BUF(ssid);
 
-	IPW_DEBUG_HC("SSID: '%s'\n", escape_ssid(essid, ssid_len));
+	IPW_DEBUG_HC("SSID: '%s'\n", print_ssid(ssid, essid, ssid_len));
 
 	if (ssid_len)
 		memcpy(cmd.host_command_parameters, essid, ssid_len);
@@ -2044,9 +2048,11 @@ static int ipw2100_set_essid(struct ipw2100_priv *priv, char *essid,
 
 static void isr_indicate_association_lost(struct ipw2100_priv *priv, u32 status)
 {
+	DECLARE_SSID_BUF(ssid);
+
 	IPW_DEBUG(IPW_DL_NOTIF | IPW_DL_STATE | IPW_DL_ASSOC,
 		  "disassociated: '%s' %pM \n",
-		  escape_ssid(priv->essid, priv->essid_len),
+		  print_ssid(ssid, priv->essid, priv->essid_len),
 		  priv->bssid);
 
 	priv->status &= ~(STATUS_ASSOCIATED | STATUS_ASSOCIATING);
@@ -6958,6 +6964,7 @@ static int ipw2100_wx_set_essid(struct net_device *dev,
 	char *essid = "";	/* ANY */
 	int length = 0;
 	int err = 0;
+	DECLARE_SSID_BUF(ssid);
 
 	mutex_lock(&priv->action_mutex);
 	if (!(priv->status & STATUS_INITIALIZED)) {
@@ -6987,8 +6994,8 @@ static int ipw2100_wx_set_essid(struct net_device *dev,
 		goto done;
 	}
 
-	IPW_DEBUG_WX("Setting ESSID: '%s' (%d)\n", escape_ssid(essid, length),
-		     length);
+	IPW_DEBUG_WX("Setting ESSID: '%s' (%d)\n",
+		     print_ssid(ssid, essid, length), length);
 
 	priv->essid_len = length;
 	memcpy(priv->essid, essid, priv->essid_len);
@@ -7009,12 +7016,13 @@ static int ipw2100_wx_get_essid(struct net_device *dev,
 	 */
 
 	struct ipw2100_priv *priv = ieee80211_priv(dev);
+	DECLARE_SSID_BUF(ssid);
 
 	/* If we are associated, trying to associate, or have a statically
 	 * configured ESSID then return that; otherwise return ANY */
 	if (priv->config & CFG_STATIC_ESSID || priv->status & STATUS_ASSOCIATED) {
 		IPW_DEBUG_WX("Getting essid: '%s'\n",
-			     escape_ssid(priv->essid, priv->essid_len));
+			     print_ssid(ssid, priv->essid, priv->essid_len));
 		memcpy(extra, priv->essid, priv->essid_len);
 		wrqu->essid.length = priv->essid_len;
 		wrqu->essid.flags = 1;	/* active */
diff --git a/drivers/net/wireless/ipw2200.c b/drivers/net/wireless/ipw2200.c
index 6e0c55c64e1f..2b9d96a5c10e 100644
--- a/drivers/net/wireless/ipw2200.c
+++ b/drivers/net/wireless/ipw2200.c
@@ -4395,6 +4395,7 @@ static void handle_scan_event(struct ipw_priv *priv)
 static void ipw_rx_notification(struct ipw_priv *priv,
 				       struct ipw_rx_notification *notif)
 {
+	DECLARE_SSID_BUF(ssid);
 	u16 size = le16_to_cpu(notif->size);
 	notif->size = le16_to_cpu(notif->size);
 
@@ -4409,8 +4410,8 @@ static void ipw_rx_notification(struct ipw_priv *priv,
 					IPW_DEBUG(IPW_DL_NOTIF | IPW_DL_STATE |
 						  IPW_DL_ASSOC,
 						  "associated: '%s' %pM \n",
-						  escape_ssid(priv->essid,
-							      priv->essid_len),
+						  print_ssid(ssid, priv->essid,
+							     priv->essid_len),
 						  priv->bssid);
 
 					switch (priv->ieee->iw_mode) {
@@ -4490,10 +4491,11 @@ static void ipw_rx_notification(struct ipw_priv *priv,
 							  "deauthenticated: '%s' "
 							  "%pM"
 							  ": (0x%04X) - %s \n",
-							  escape_ssid(priv->
-								      essid,
-								      priv->
-								      essid_len),
+							  print_ssid(ssid,
+								     priv->
+								     essid,
+								     priv->
+								     essid_len),
 							  priv->bssid,
 							  le16_to_cpu(auth->status),
 							  ipw_get_status_code
@@ -4512,8 +4514,8 @@ static void ipw_rx_notification(struct ipw_priv *priv,
 					IPW_DEBUG(IPW_DL_NOTIF | IPW_DL_STATE |
 						  IPW_DL_ASSOC,
 						  "authenticated: '%s' %pM\n",
-						  escape_ssid(priv->essid,
-							       priv->essid_len),
+						  print_ssid(ssid, priv->essid,
+							     priv->essid_len),
 						  priv->bssid);
 					break;
 				}
@@ -4540,8 +4542,8 @@ static void ipw_rx_notification(struct ipw_priv *priv,
 					IPW_DEBUG(IPW_DL_NOTIF | IPW_DL_STATE |
 						  IPW_DL_ASSOC,
 						  "disassociated: '%s' %pM \n",
-						  escape_ssid(priv->essid,
-							       priv->essid_len),
+						  print_ssid(ssid, priv->essid,
+							     priv->essid_len),
 						  priv->bssid);
 
 					priv->status &=
@@ -4578,8 +4580,8 @@ static void ipw_rx_notification(struct ipw_priv *priv,
 			case CMAS_AUTHENTICATED:
 				IPW_DEBUG(IPW_DL_NOTIF | IPW_DL_STATE,
 					  "authenticated: '%s' %pM \n",
-					  escape_ssid(priv->essid,
-						       priv->essid_len),
+					  print_ssid(ssid, priv->essid,
+						     priv->essid_len),
 					  priv->bssid);
 				priv->status |= STATUS_AUTH;
 				break;
@@ -4597,8 +4599,8 @@ static void ipw_rx_notification(struct ipw_priv *priv,
 				IPW_DEBUG(IPW_DL_NOTIF | IPW_DL_STATE |
 					  IPW_DL_ASSOC,
 					  "deauthenticated: '%s' %pM\n",
-					  escape_ssid(priv->essid,
-						      priv->essid_len),
+					  print_ssid(ssid, priv->essid,
+						     priv->essid_len),
 					  priv->bssid);
 
 				priv->status &= ~(STATUS_ASSOCIATING |
@@ -5423,6 +5425,7 @@ static int ipw_find_adhoc_network(struct ipw_priv *priv,
 				  int roaming)
 {
 	struct ipw_supported_rates rates;
+	DECLARE_SSID_BUF(ssid);
 
 	/* Verify that this network's capability is compatible with the
 	 * current mode (AdHoc or Infrastructure) */
@@ -5430,7 +5433,8 @@ static int ipw_find_adhoc_network(struct ipw_priv *priv,
 	     !(network->capability & WLAN_CAPABILITY_IBSS))) {
 		IPW_DEBUG_MERGE("Network '%s (%pM)' excluded due to "
 				"capability mismatch.\n",
-				escape_ssid(network->ssid, network->ssid_len),
+				print_ssid(ssid, network->ssid,
+					   network->ssid_len),
 				network->bssid);
 		return 0;
 	}
@@ -5443,8 +5447,8 @@ static int ipw_find_adhoc_network(struct ipw_priv *priv,
 			   network->ssid_len)) {
 			IPW_DEBUG_MERGE("Network '%s (%pM)' excluded "
 					"because of non-network ESSID.\n",
-					escape_ssid(network->ssid,
-						    network->ssid_len),
+					print_ssid(ssid, network->ssid,
+						   network->ssid_len),
 					network->bssid);
 			return 0;
 		}
@@ -5458,13 +5462,14 @@ static int ipw_find_adhoc_network(struct ipw_priv *priv,
 			char escaped[IW_ESSID_MAX_SIZE * 2 + 1];
 
 			strncpy(escaped,
-				escape_ssid(network->ssid, network->ssid_len),
+				print_ssid(ssid, network->ssid,
+					   network->ssid_len),
 				sizeof(escaped));
 			IPW_DEBUG_MERGE("Network '%s (%pM)' excluded "
 					"because of ESSID mismatch: '%s'.\n",
 					escaped, network->bssid,
-					escape_ssid(priv->essid,
-						    priv->essid_len));
+					print_ssid(ssid, priv->essid,
+						   priv->essid_len));
 			return 0;
 		}
 	}
@@ -5475,14 +5480,14 @@ static int ipw_find_adhoc_network(struct ipw_priv *priv,
 	if (network->time_stamp[0] < match->network->time_stamp[0]) {
 		IPW_DEBUG_MERGE("Network '%s excluded because newer than "
 				"current network.\n",
-				escape_ssid(match->network->ssid,
-					    match->network->ssid_len));
+				print_ssid(ssid, match->network->ssid,
+					   match->network->ssid_len));
 		return 0;
 	} else if (network->time_stamp[1] < match->network->time_stamp[1]) {
 		IPW_DEBUG_MERGE("Network '%s excluded because newer than "
 				"current network.\n",
-				escape_ssid(match->network->ssid,
-					    match->network->ssid_len));
+				print_ssid(ssid, match->network->ssid,
+					   match->network->ssid_len));
 		return 0;
 	}
 
@@ -5491,7 +5496,8 @@ static int ipw_find_adhoc_network(struct ipw_priv *priv,
 	    time_after(jiffies, network->last_scanned + priv->ieee->scan_age)) {
 		IPW_DEBUG_MERGE("Network '%s (%pM)' excluded "
 				"because of age: %ums.\n",
-				escape_ssid(network->ssid, network->ssid_len),
+				print_ssid(ssid, network->ssid,
+					   network->ssid_len),
 				network->bssid,
 				jiffies_to_msecs(jiffies -
 						 network->last_scanned));
@@ -5502,7 +5508,8 @@ static int ipw_find_adhoc_network(struct ipw_priv *priv,
 	    (network->channel != priv->channel)) {
 		IPW_DEBUG_MERGE("Network '%s (%pM)' excluded "
 				"because of channel mismatch: %d != %d.\n",
-				escape_ssid(network->ssid, network->ssid_len),
+				print_ssid(ssid, network->ssid,
+					   network->ssid_len),
 				network->bssid,
 				network->channel, priv->channel);
 		return 0;
@@ -5513,7 +5520,8 @@ static int ipw_find_adhoc_network(struct ipw_priv *priv,
 	    ((network->capability & WLAN_CAPABILITY_PRIVACY) ? 1 : 0)) {
 		IPW_DEBUG_MERGE("Network '%s (%pM)' excluded "
 				"because of privacy mismatch: %s != %s.\n",
-				escape_ssid(network->ssid, network->ssid_len),
+				print_ssid(ssid, network->ssid,
+					   network->ssid_len),
 				network->bssid,
 				priv->
 				capability & CAP_PRIVACY_ON ? "on" : "off",
@@ -5526,8 +5534,8 @@ static int ipw_find_adhoc_network(struct ipw_priv *priv,
 	if (!memcmp(network->bssid, priv->bssid, ETH_ALEN)) {
 		IPW_DEBUG_MERGE("Network '%s (%pM)' excluded "
 				"because of the same BSSID match: %pM"
-				".\n", escape_ssid(network->ssid,
-						   network->ssid_len),
+				".\n", print_ssid(ssid, network->ssid,
+						  network->ssid_len),
 				network->bssid,
 				priv->bssid);
 		return 0;
@@ -5538,7 +5546,8 @@ static int ipw_find_adhoc_network(struct ipw_priv *priv,
 		IPW_DEBUG_MERGE("Network '%s (%pM)' excluded "
 				"because of invalid frequency/mode "
 				"combination.\n",
-				escape_ssid(network->ssid, network->ssid_len),
+				print_ssid(ssid, network->ssid,
+					   network->ssid_len),
 				network->bssid);
 		return 0;
 	}
@@ -5549,7 +5558,8 @@ static int ipw_find_adhoc_network(struct ipw_priv *priv,
 		IPW_DEBUG_MERGE("Network '%s (%pM)' excluded "
 				"because configured rate mask excludes "
 				"AP mandatory rate.\n",
-				escape_ssid(network->ssid, network->ssid_len),
+				print_ssid(ssid, network->ssid,
+					   network->ssid_len),
 				network->bssid);
 		return 0;
 	}
@@ -5557,7 +5567,8 @@ static int ipw_find_adhoc_network(struct ipw_priv *priv,
 	if (rates.num_rates == 0) {
 		IPW_DEBUG_MERGE("Network '%s (%pM)' excluded "
 				"because of no compatible rates.\n",
-				escape_ssid(network->ssid, network->ssid_len),
+				print_ssid(ssid, network->ssid,
+					   network->ssid_len),
 				network->bssid);
 		return 0;
 	}
@@ -5570,7 +5581,7 @@ static int ipw_find_adhoc_network(struct ipw_priv *priv,
 	ipw_copy_rates(&match->rates, &rates);
 	match->network = network;
 	IPW_DEBUG_MERGE("Network '%s (%pM)' is a viable match.\n",
-			escape_ssid(network->ssid, network->ssid_len),
+			print_ssid(ssid, network->ssid, network->ssid_len),
 			network->bssid);
 
 	return 1;
@@ -5578,6 +5589,7 @@ static int ipw_find_adhoc_network(struct ipw_priv *priv,
 
 static void ipw_merge_adhoc_network(struct work_struct *work)
 {
+	DECLARE_SSID_BUF(ssid);
 	struct ipw_priv *priv =
 		container_of(work, struct ipw_priv, merge_networks);
 	struct ieee80211_network *network = NULL;
@@ -5608,8 +5620,8 @@ static void ipw_merge_adhoc_network(struct work_struct *work)
 		mutex_lock(&priv->mutex);
 		if ((priv->ieee->iw_mode == IW_MODE_ADHOC)) {
 			IPW_DEBUG_MERGE("remove network %s\n",
-					escape_ssid(priv->essid,
-						    priv->essid_len));
+					print_ssid(ssid, priv->essid,
+						   priv->essid_len));
 			ipw_remove_current_network(priv);
 		}
 
@@ -5625,6 +5637,7 @@ static int ipw_best_network(struct ipw_priv *priv,
 			    struct ieee80211_network *network, int roaming)
 {
 	struct ipw_supported_rates rates;
+	DECLARE_SSID_BUF(ssid);
 
 	/* Verify that this network's capability is compatible with the
 	 * current mode (AdHoc or Infrastructure) */
@@ -5634,7 +5647,8 @@ static int ipw_best_network(struct ipw_priv *priv,
 	     !(network->capability & WLAN_CAPABILITY_IBSS))) {
 		IPW_DEBUG_ASSOC("Network '%s (%pM)' excluded due to "
 				"capability mismatch.\n",
-				escape_ssid(network->ssid, network->ssid_len),
+				print_ssid(ssid, network->ssid,
+					   network->ssid_len),
 				network->bssid);
 		return 0;
 	}
@@ -5647,8 +5661,8 @@ static int ipw_best_network(struct ipw_priv *priv,
 			   network->ssid_len)) {
 			IPW_DEBUG_ASSOC("Network '%s (%pM)' excluded "
 					"because of non-network ESSID.\n",
-					escape_ssid(network->ssid,
-						     network->ssid_len),
+					print_ssid(ssid, network->ssid,
+						   network->ssid_len),
 					network->bssid);
 			return 0;
 		}
@@ -5661,13 +5675,14 @@ static int ipw_best_network(struct ipw_priv *priv,
 			    min(network->ssid_len, priv->essid_len)))) {
 			char escaped[IW_ESSID_MAX_SIZE * 2 + 1];
 			strncpy(escaped,
-				escape_ssid(network->ssid, network->ssid_len),
+				print_ssid(ssid, network->ssid,
+					   network->ssid_len),
 				sizeof(escaped));
 			IPW_DEBUG_ASSOC("Network '%s (%pM)' excluded "
 					"because of ESSID mismatch: '%s'.\n",
 					escaped, network->bssid,
-					escape_ssid(priv->essid,
-						    priv->essid_len));
+					print_ssid(ssid, priv->essid,
+						   priv->essid_len));
 			return 0;
 		}
 	}
@@ -5677,13 +5692,13 @@ static int ipw_best_network(struct ipw_priv *priv,
 	if (match->network && match->network->stats.rssi > network->stats.rssi) {
 		char escaped[IW_ESSID_MAX_SIZE * 2 + 1];
 		strncpy(escaped,
-			escape_ssid(network->ssid, network->ssid_len),
+			print_ssid(ssid, network->ssid, network->ssid_len),
 			sizeof(escaped));
 		IPW_DEBUG_ASSOC("Network '%s (%pM)' excluded because "
 				"'%s (%pM)' has a stronger signal.\n",
 				escaped, network->bssid,
-				escape_ssid(match->network->ssid,
-					    match->network->ssid_len),
+				print_ssid(ssid, match->network->ssid,
+					   match->network->ssid_len),
 				match->network->bssid);
 		return 0;
 	}
@@ -5695,7 +5710,8 @@ static int ipw_best_network(struct ipw_priv *priv,
 		IPW_DEBUG_ASSOC("Network '%s (%pM)' excluded "
 				"because of storming (%ums since last "
 				"assoc attempt).\n",
-				escape_ssid(network->ssid, network->ssid_len),
+				print_ssid(ssid, network->ssid,
+					   network->ssid_len),
 				network->bssid,
 				jiffies_to_msecs(jiffies -
 						 network->last_associate));
@@ -5707,7 +5723,8 @@ static int ipw_best_network(struct ipw_priv *priv,
 	    time_after(jiffies, network->last_scanned + priv->ieee->scan_age)) {
 		IPW_DEBUG_ASSOC("Network '%s (%pM)' excluded "
 				"because of age: %ums.\n",
-				escape_ssid(network->ssid, network->ssid_len),
+				print_ssid(ssid, network->ssid,
+					   network->ssid_len),
 				network->bssid,
 				jiffies_to_msecs(jiffies -
 						 network->last_scanned));
@@ -5718,7 +5735,8 @@ static int ipw_best_network(struct ipw_priv *priv,
 	    (network->channel != priv->channel)) {
 		IPW_DEBUG_ASSOC("Network '%s (%pM)' excluded "
 				"because of channel mismatch: %d != %d.\n",
-				escape_ssid(network->ssid, network->ssid_len),
+				print_ssid(ssid, network->ssid,
+					   network->ssid_len),
 				network->bssid,
 				network->channel, priv->channel);
 		return 0;
@@ -5729,7 +5747,8 @@ static int ipw_best_network(struct ipw_priv *priv,
 	    ((network->capability & WLAN_CAPABILITY_PRIVACY) ? 1 : 0)) {
 		IPW_DEBUG_ASSOC("Network '%s (%pM)' excluded "
 				"because of privacy mismatch: %s != %s.\n",
-				escape_ssid(network->ssid, network->ssid_len),
+				print_ssid(ssid, network->ssid,
+					   network->ssid_len),
 				network->bssid,
 				priv->capability & CAP_PRIVACY_ON ? "on" :
 				"off",
@@ -5742,7 +5761,8 @@ static int ipw_best_network(struct ipw_priv *priv,
 	    memcmp(network->bssid, priv->bssid, ETH_ALEN)) {
 		IPW_DEBUG_ASSOC("Network '%s (%pM)' excluded "
 				"because of BSSID mismatch: %pM.\n",
-				escape_ssid(network->ssid, network->ssid_len),
+				print_ssid(ssid, network->ssid,
+					   network->ssid_len),
 				network->bssid, priv->bssid);
 		return 0;
 	}
@@ -5752,7 +5772,8 @@ static int ipw_best_network(struct ipw_priv *priv,
 		IPW_DEBUG_ASSOC("Network '%s (%pM)' excluded "
 				"because of invalid frequency/mode "
 				"combination.\n",
-				escape_ssid(network->ssid, network->ssid_len),
+				print_ssid(ssid, network->ssid,
+					   network->ssid_len),
 				network->bssid);
 		return 0;
 	}
@@ -5761,7 +5782,8 @@ static int ipw_best_network(struct ipw_priv *priv,
 	if (!ieee80211_is_valid_channel(priv->ieee, network->channel)) {
 		IPW_DEBUG_ASSOC("Network '%s (%pM)' excluded "
 				"because of invalid channel in current GEO\n",
-				escape_ssid(network->ssid, network->ssid_len),
+				print_ssid(ssid, network->ssid,
+					   network->ssid_len),
 				network->bssid);
 		return 0;
 	}
@@ -5772,7 +5794,8 @@ static int ipw_best_network(struct ipw_priv *priv,
 		IPW_DEBUG_ASSOC("Network '%s (%pM)' excluded "
 				"because configured rate mask excludes "
 				"AP mandatory rate.\n",
-				escape_ssid(network->ssid, network->ssid_len),
+				print_ssid(ssid, network->ssid,
+					   network->ssid_len),
 				network->bssid);
 		return 0;
 	}
@@ -5780,7 +5803,8 @@ static int ipw_best_network(struct ipw_priv *priv,
 	if (rates.num_rates == 0) {
 		IPW_DEBUG_ASSOC("Network '%s (%pM)' excluded "
 				"because of no compatible rates.\n",
-				escape_ssid(network->ssid, network->ssid_len),
+				print_ssid(ssid, network->ssid,
+					   network->ssid_len),
 				network->bssid);
 		return 0;
 	}
@@ -5794,7 +5818,7 @@ static int ipw_best_network(struct ipw_priv *priv,
 	match->network = network;
 
 	IPW_DEBUG_ASSOC("Network '%s (%pM)' is a viable match.\n",
-			escape_ssid(network->ssid, network->ssid_len),
+			print_ssid(ssid, network->ssid, network->ssid_len),
 			network->bssid);
 
 	return 1;
@@ -6037,6 +6061,7 @@ static void ipw_bg_adhoc_check(struct work_struct *work)
 
 static void ipw_debug_config(struct ipw_priv *priv)
 {
+	DECLARE_SSID_BUF(ssid);
 	IPW_DEBUG_INFO("Scan completed, no valid APs matched "
 		       "[CFG 0x%08X]\n", priv->config);
 	if (priv->config & CFG_STATIC_CHANNEL)
@@ -6045,7 +6070,7 @@ static void ipw_debug_config(struct ipw_priv *priv)
 		IPW_DEBUG_INFO("Channel unlocked.\n");
 	if (priv->config & CFG_STATIC_ESSID)
 		IPW_DEBUG_INFO("ESSID locked to '%s'\n",
-			       escape_ssid(priv->essid, priv->essid_len));
+			       print_ssid(ssid, priv->essid, priv->essid_len));
 	else
 		IPW_DEBUG_INFO("ESSID unlocked.\n");
 	if (priv->config & CFG_STATIC_BSSID)
@@ -7263,6 +7288,7 @@ static int ipw_associate_network(struct ipw_priv *priv,
 				 struct ipw_supported_rates *rates, int roaming)
 {
 	int err;
+	DECLARE_SSID_BUF(ssid);
 
 	if (priv->config & CFG_FIXED_RATE)
 		ipw_set_fixed_rate(priv, network->mode);
@@ -7331,7 +7357,7 @@ static int ipw_associate_network(struct ipw_priv *priv,
 	IPW_DEBUG_ASSOC("%sssocation attempt: '%s', channel %d, "
 			"802.11%c [%d], %s[:%s], enc=%s%s%s%c%c\n",
 			roaming ? "Rea" : "A",
-			escape_ssid(priv->essid, priv->essid_len),
+			print_ssid(ssid, priv->essid, priv->essid_len),
 			network->channel,
 			ipw_modes[priv->assoc_request.ieee_mode],
 			rates->num_rates,
@@ -7431,7 +7457,7 @@ static int ipw_associate_network(struct ipw_priv *priv,
 	}
 
 	IPW_DEBUG(IPW_DL_STATE, "associating: '%s' %pM \n",
-		  escape_ssid(priv->essid, priv->essid_len),
+		  print_ssid(ssid, priv->essid, priv->essid_len),
 		  priv->bssid);
 
 	return 0;
@@ -7522,6 +7548,7 @@ static int ipw_associate(void *data)
 	struct ipw_supported_rates *rates;
 	struct list_head *element;
 	unsigned long flags;
+	DECLARE_SSID_BUF(ssid);
 
 	if (priv->ieee->iw_mode == IW_MODE_MONITOR) {
 		IPW_DEBUG_ASSOC("Not attempting association (monitor mode)\n");
@@ -7583,8 +7610,8 @@ static int ipw_associate(void *data)
 			target = oldest;
 			IPW_DEBUG_ASSOC("Expired '%s' (%pM) from "
 					"network list.\n",
-					escape_ssid(target->ssid,
-						    target->ssid_len),
+					print_ssid(ssid, target->ssid,
+						   target->ssid_len),
 					target->bssid);
 			list_add_tail(&target->list,
 				      &priv->ieee->network_free_list);
@@ -9012,6 +9039,7 @@ static int ipw_wx_set_essid(struct net_device *dev,
 {
 	struct ipw_priv *priv = ieee80211_priv(dev);
         int length;
+	DECLARE_SSID_BUF(ssid);
 
         mutex_lock(&priv->mutex);
 
@@ -9036,8 +9064,8 @@ static int ipw_wx_set_essid(struct net_device *dev,
 		return 0;
 	}
 
-	IPW_DEBUG_WX("Setting ESSID: '%s' (%d)\n", escape_ssid(extra, length),
-		     length);
+	IPW_DEBUG_WX("Setting ESSID: '%s' (%d)\n",
+		     print_ssid(ssid, extra, length), length);
 
 	priv->essid_len = length;
 	memcpy(priv->essid, extra, priv->essid_len);
@@ -9056,6 +9084,7 @@ static int ipw_wx_get_essid(struct net_device *dev,
 			    union iwreq_data *wrqu, char *extra)
 {
 	struct ipw_priv *priv = ieee80211_priv(dev);
+	DECLARE_SSID_BUF(ssid);
 
 	/* If we are associated, trying to associate, or have a statically
 	 * configured ESSID then return that; otherwise return ANY */
@@ -9063,7 +9092,7 @@ static int ipw_wx_get_essid(struct net_device *dev,
 	if (priv->config & CFG_STATIC_ESSID ||
 	    priv->status & (STATUS_ASSOCIATED | STATUS_ASSOCIATING)) {
 		IPW_DEBUG_WX("Getting essid: '%s'\n",
-			     escape_ssid(priv->essid, priv->essid_len));
+			     print_ssid(ssid, priv->essid, priv->essid_len));
 		memcpy(extra, priv->essid, priv->essid_len);
 		wrqu->essid.length = priv->essid_len;
 		wrqu->essid.flags = 1;	/* active */
diff --git a/drivers/net/wireless/iwlwifi/iwl-scan.c b/drivers/net/wireless/iwlwifi/iwl-scan.c
index 1cc8aa592821..3379b41fb5ee 100644
--- a/drivers/net/wireless/iwlwifi/iwl-scan.c
+++ b/drivers/net/wireless/iwlwifi/iwl-scan.c
@@ -643,6 +643,7 @@ static void iwl_bg_request_scan(struct work_struct *data)
 	u8 n_probes = 2;
 	u8 rx_chain = priv->hw_params.valid_rx_ant;
 	u8 rate;
+	DECLARE_SSID_BUF(ssid);
 
 	conf = ieee80211_get_hw_conf(priv->hw);
 
@@ -735,8 +736,8 @@ static void iwl_bg_request_scan(struct work_struct *data)
 	/* We should add the ability for user to lock to PASSIVE ONLY */
 	if (priv->one_direct_scan) {
 		IWL_DEBUG_SCAN("Start direct scan for '%s'\n",
-				escape_ssid(priv->direct_ssid,
-					    priv->direct_ssid_len));
+				print_ssid(ssid, priv->direct_ssid,
+					   priv->direct_ssid_len));
 		scan->direct_scan[0].id = WLAN_EID_SSID;
 		scan->direct_scan[0].len = priv->direct_ssid_len;
 		memcpy(scan->direct_scan[0].ssid,
@@ -744,7 +745,7 @@ static void iwl_bg_request_scan(struct work_struct *data)
 		n_probes++;
 	} else if (!iwl_is_associated(priv) && priv->essid_len) {
 		IWL_DEBUG_SCAN("Start direct scan for '%s' (not associated)\n",
-				escape_ssid(priv->essid, priv->essid_len));
+				print_ssid(ssid, priv->essid, priv->essid_len));
 		scan->direct_scan[0].id = WLAN_EID_SSID;
 		scan->direct_scan[0].len = priv->essid_len;
 		memcpy(scan->direct_scan[0].ssid, priv->essid, priv->essid_len);
diff --git a/drivers/net/wireless/iwlwifi/iwl3945-base.c b/drivers/net/wireless/iwlwifi/iwl3945-base.c
index 370cc46b4888..8009094503e4 100644
--- a/drivers/net/wireless/iwlwifi/iwl3945-base.c
+++ b/drivers/net/wireless/iwlwifi/iwl3945-base.c
@@ -6054,6 +6054,7 @@ static void iwl3945_bg_request_scan(struct work_struct *data)
 	struct ieee80211_conf *conf = NULL;
 	u8 n_probes = 2;
 	enum ieee80211_band band;
+	DECLARE_SSID_BUF(ssid);
 
 	conf = ieee80211_get_hw_conf(priv->hw);
 
@@ -6154,7 +6155,8 @@ static void iwl3945_bg_request_scan(struct work_struct *data)
 	if (priv->one_direct_scan) {
 		IWL_DEBUG_SCAN
 		    ("Kicking off one direct scan for '%s'\n",
-		     escape_ssid(priv->direct_ssid, priv->direct_ssid_len));
+		     print_ssid(ssid, priv->direct_ssid,
+				priv->direct_ssid_len));
 		scan->direct_scan[0].id = WLAN_EID_SSID;
 		scan->direct_scan[0].len = priv->direct_ssid_len;
 		memcpy(scan->direct_scan[0].ssid,
@@ -6163,7 +6165,7 @@ static void iwl3945_bg_request_scan(struct work_struct *data)
 	} else if (!iwl3945_is_associated(priv) && priv->essid_len) {
 		IWL_DEBUG_SCAN
 		  ("Kicking off one direct scan for '%s' when not associated\n",
-		   escape_ssid(priv->essid, priv->essid_len));
+		   print_ssid(ssid, priv->essid, priv->essid_len));
 		scan->direct_scan[0].id = WLAN_EID_SSID;
 		scan->direct_scan[0].len = priv->essid_len;
 		memcpy(scan->direct_scan[0].ssid, priv->essid, priv->essid_len);
@@ -6945,6 +6947,7 @@ static int iwl3945_mac_hw_scan(struct ieee80211_hw *hw, u8 *ssid, size_t len)
 	int rc = 0;
 	unsigned long flags;
 	struct iwl3945_priv *priv = hw->priv;
+	DECLARE_SSID_BUF(ssid_buf);
 
 	IWL_DEBUG_MAC80211("enter\n");
 
@@ -6978,7 +6981,7 @@ static int iwl3945_mac_hw_scan(struct ieee80211_hw *hw, u8 *ssid, size_t len)
 	}
 	if (len) {
 		IWL_DEBUG_SCAN("direct scan for %s [%d]\n ",
-			       escape_ssid(ssid, len), (int)len);
+			       print_ssid(ssid_buf, ssid, len), (int)len);
 
 		priv->one_direct_scan = 1;
 		priv->direct_ssid_len = (u8)
diff --git a/drivers/net/wireless/libertas/assoc.c b/drivers/net/wireless/libertas/assoc.c
index 3492e89d1dd5..92863780286f 100644
--- a/drivers/net/wireless/libertas/assoc.c
+++ b/drivers/net/wireless/libertas/assoc.c
@@ -153,17 +153,18 @@ static int lbs_adhoc_join(struct lbs_private *priv,
 	struct cmd_ds_802_11_ad_hoc_join cmd;
 	struct bss_descriptor *bss = &assoc_req->bss;
 	u8 preamble = RADIO_PREAMBLE_LONG;
+	DECLARE_SSID_BUF(ssid);
 	u16 ratesize = 0;
 	int ret = 0;
 
 	lbs_deb_enter(LBS_DEB_ASSOC);
 
 	lbs_deb_join("current SSID '%s', ssid length %u\n",
-		escape_ssid(priv->curbssparams.ssid,
+		print_ssid(ssid, priv->curbssparams.ssid,
 		priv->curbssparams.ssid_len),
 		priv->curbssparams.ssid_len);
 	lbs_deb_join("requested ssid '%s', ssid length %u\n",
-		escape_ssid(bss->ssid, bss->ssid_len),
+		print_ssid(ssid, bss->ssid, bss->ssid_len),
 		bss->ssid_len);
 
 	/* check if the requested SSID is already joined */
@@ -308,6 +309,7 @@ static int lbs_adhoc_start(struct lbs_private *priv,
 	size_t ratesize = 0;
 	u16 tmpcap = 0;
 	int ret = 0;
+	DECLARE_SSID_BUF(ssid);
 
 	lbs_deb_enter(LBS_DEB_ASSOC);
 
@@ -327,7 +329,7 @@ static int lbs_adhoc_start(struct lbs_private *priv,
 	memcpy(cmd.ssid, assoc_req->ssid, assoc_req->ssid_len);
 
 	lbs_deb_join("ADHOC_START: SSID '%s', ssid length %u\n",
-		escape_ssid(assoc_req->ssid, assoc_req->ssid_len),
+		print_ssid(ssid, assoc_req->ssid, assoc_req->ssid_len),
 		assoc_req->ssid_len);
 
 	cmd.bsstype = CMD_BSS_TYPE_IBSS;
@@ -695,6 +697,7 @@ static int assoc_helper_essid(struct lbs_private *priv,
 	int ret = 0;
 	struct bss_descriptor * bss;
 	int channel = -1;
+	DECLARE_SSID_BUF(ssid);
 
 	lbs_deb_enter(LBS_DEB_ASSOC);
 
@@ -706,7 +709,7 @@ static int assoc_helper_essid(struct lbs_private *priv,
 		channel = assoc_req->channel;
 
 	lbs_deb_assoc("SSID '%s' requested\n",
-	              escape_ssid(assoc_req->ssid, assoc_req->ssid_len));
+	              print_ssid(ssid, assoc_req->ssid, assoc_req->ssid_len));
 	if (assoc_req->mode == IW_MODE_INFRA) {
 		lbs_send_specific_ssid_scan(priv, assoc_req->ssid,
 			assoc_req->ssid_len);
@@ -1207,6 +1210,7 @@ void lbs_association_worker(struct work_struct *work)
 	struct assoc_request * assoc_req = NULL;
 	int ret = 0;
 	int find_any_ssid = 0;
+	DECLARE_SSID_BUF(ssid);
 
 	lbs_deb_enter(LBS_DEB_ASSOC);
 
@@ -1230,7 +1234,7 @@ void lbs_association_worker(struct work_struct *work)
 		"    secinfo:  %s%s%s\n"
 		"    auth_mode: %d\n",
 		assoc_req->flags,
-		escape_ssid(assoc_req->ssid, assoc_req->ssid_len),
+		print_ssid(ssid, assoc_req->ssid, assoc_req->ssid_len),
 		assoc_req->channel, assoc_req->band, assoc_req->mode,
 		assoc_req->bssid,
 		assoc_req->secinfo.WPAenabled ? " WPA" : "",
@@ -1767,6 +1771,7 @@ static int lbs_adhoc_post(struct lbs_private *priv, struct cmd_header *resp)
 	struct cmd_ds_802_11_ad_hoc_result *adhoc_resp;
 	union iwreq_data wrqu;
 	struct bss_descriptor *bss;
+	DECLARE_SSID_BUF(ssid);
 
 	lbs_deb_enter(LBS_DEB_JOIN);
 
@@ -1816,7 +1821,7 @@ static int lbs_adhoc_post(struct lbs_private *priv, struct cmd_header *resp)
 	wireless_send_event(priv->dev, SIOCGIWAP, &wrqu, NULL);
 
 	lbs_deb_join("ADHOC_RESP: Joined/started '%s', BSSID %pM, channel %d\n",
-		     escape_ssid(bss->ssid, bss->ssid_len),
+		     print_ssid(ssid, bss->ssid, bss->ssid_len),
 		     priv->curbssparams.bssid,
 		     priv->curbssparams.channel);
 
diff --git a/drivers/net/wireless/libertas/cmd.c b/drivers/net/wireless/libertas/cmd.c
index 52feab69ee49..38843c8b919c 100644
--- a/drivers/net/wireless/libertas/cmd.c
+++ b/drivers/net/wireless/libertas/cmd.c
@@ -1063,6 +1063,7 @@ int lbs_mesh_config(struct lbs_private *priv, uint16_t action, uint16_t chan)
 {
 	struct cmd_ds_mesh_config cmd;
 	struct mrvl_meshie *ie;
+	DECLARE_SSID_BUF(ssid);
 
 	memset(&cmd, 0, sizeof(cmd));
 	cmd.channel = cpu_to_le16(chan);
@@ -1093,7 +1094,7 @@ int lbs_mesh_config(struct lbs_private *priv, uint16_t action, uint16_t chan)
 	}
 	lbs_deb_cmd("mesh config action %d type %x channel %d SSID %s\n",
 		    action, priv->mesh_tlv, chan,
-		    escape_ssid(priv->mesh_ssid, priv->mesh_ssid_len));
+		    print_ssid(ssid, priv->mesh_ssid, priv->mesh_ssid_len));
 
 	return __lbs_mesh_config_send(priv, &cmd, action, priv->mesh_tlv);
 }
diff --git a/drivers/net/wireless/libertas/debugfs.c b/drivers/net/wireless/libertas/debugfs.c
index 84933203be74..ec4efd7ff3c8 100644
--- a/drivers/net/wireless/libertas/debugfs.c
+++ b/drivers/net/wireless/libertas/debugfs.c
@@ -66,6 +66,7 @@ static ssize_t lbs_getscantable(struct file *file, char __user *userbuf,
 	int numscansdone = 0, res;
 	unsigned long addr = get_zeroed_page(GFP_KERNEL);
 	char *buf = (char *)addr;
+	DECLARE_SSID_BUF(ssid);
 	struct bss_descriptor * iter_bss;
 
 	pos += snprintf(buf+pos, len-pos,
@@ -86,7 +87,8 @@ static ssize_t lbs_getscantable(struct file *file, char __user *userbuf,
 				spectrum_mgmt ? 'S' : ' ');
 		pos += snprintf(buf+pos, len-pos, " %04d |", SCAN_RSSI(iter_bss->rssi));
 		pos += snprintf(buf+pos, len-pos, " %s\n",
-		                escape_ssid(iter_bss->ssid, iter_bss->ssid_len));
+		                print_ssid(ssid, iter_bss->ssid,
+					   iter_bss->ssid_len));
 
 		numscansdone++;
 	}
diff --git a/drivers/net/wireless/libertas/scan.c b/drivers/net/wireless/libertas/scan.c
index 7881890a4e98..5c34ac588189 100644
--- a/drivers/net/wireless/libertas/scan.c
+++ b/drivers/net/wireless/libertas/scan.c
@@ -362,6 +362,7 @@ int lbs_scan_networks(struct lbs_private *priv, int full_scan)
 #ifdef CONFIG_LIBERTAS_DEBUG
 	struct bss_descriptor *iter;
 	int i = 0;
+	DECLARE_SSID_BUF(ssid);
 #endif
 
 	lbs_deb_enter_args(LBS_DEB_SCAN, "full_scan %d", full_scan);
@@ -455,7 +456,7 @@ int lbs_scan_networks(struct lbs_private *priv, int full_scan)
 	list_for_each_entry(iter, &priv->network_list, list)
 		lbs_deb_scan("%02d: BSSID %pM, RSSI %d, SSID '%s'\n",
 			     i++, iter->bssid, iter->rssi,
-			     escape_ssid(iter->ssid, iter->ssid_len));
+			     print_ssid(ssid, iter->ssid, iter->ssid_len));
 	mutex_unlock(&priv->lock);
 #endif
 
@@ -514,6 +515,7 @@ static int lbs_process_bss(struct bss_descriptor *bss,
 	struct ieeetypes_dsparamset *pDS;
 	struct ieeetypes_cfparamset *pCF;
 	struct ieeetypes_ibssparamset *pibss;
+	DECLARE_SSID_BUF(ssid);
 	struct ieeetypes_countryinfoset *pcountryinfo;
 	uint8_t *pos, *end, *p;
 	uint8_t n_ex_rates = 0, got_basic_rates = 0, n_basic_rates = 0;
@@ -602,7 +604,7 @@ static int lbs_process_bss(struct bss_descriptor *bss,
 			bss->ssid_len = min_t(int, 32, elem->len);
 			memcpy(bss->ssid, elem->data, bss->ssid_len);
 			lbs_deb_scan("got SSID IE: '%s', len %u\n",
-			             escape_ssid(bss->ssid, bss->ssid_len),
+			             print_ssid(ssid, bss->ssid, bss->ssid_len),
 			             bss->ssid_len);
 			break;
 
@@ -742,10 +744,11 @@ done:
 int lbs_send_specific_ssid_scan(struct lbs_private *priv, uint8_t *ssid,
 				uint8_t ssid_len)
 {
+	DECLARE_SSID_BUF(ssid_buf);
 	int ret = 0;
 
 	lbs_deb_enter_args(LBS_DEB_SCAN, "SSID '%s'\n",
-			   escape_ssid(ssid, ssid_len));
+			   print_ssid(ssid_buf, ssid, ssid_len));
 
 	if (!ssid_len)
 		goto out;
@@ -940,6 +943,7 @@ out:
 int lbs_set_scan(struct net_device *dev, struct iw_request_info *info,
 		 union iwreq_data *wrqu, char *extra)
 {
+	DECLARE_SSID_BUF(ssid);
 	struct lbs_private *priv = dev->priv;
 	int ret = 0;
 
@@ -969,7 +973,7 @@ int lbs_set_scan(struct net_device *dev, struct iw_request_info *info,
 		priv->scan_ssid_len = req->essid_len;
 		memcpy(priv->scan_ssid, req->essid, priv->scan_ssid_len);
 		lbs_deb_wext("set_scan, essid '%s'\n",
-			escape_ssid(priv->scan_ssid, priv->scan_ssid_len));
+			print_ssid(ssid, priv->scan_ssid, priv->scan_ssid_len));
 	} else {
 		priv->scan_ssid_len = 0;
 	}
diff --git a/drivers/net/wireless/libertas/wext.c b/drivers/net/wireless/libertas/wext.c
index 247579951858..d4c6a659b562 100644
--- a/drivers/net/wireless/libertas/wext.c
+++ b/drivers/net/wireless/libertas/wext.c
@@ -1978,6 +1978,7 @@ static int lbs_set_essid(struct net_device *dev, struct iw_request_info *info,
 	u8 ssid_len = 0;
 	struct assoc_request * assoc_req;
 	int in_ssid_len = dwrq->length;
+	DECLARE_SSID_BUF(ssid_buf);
 
 	lbs_deb_enter(LBS_DEB_WEXT);
 
@@ -2006,7 +2007,7 @@ static int lbs_set_essid(struct net_device *dev, struct iw_request_info *info,
 		lbs_deb_wext("requested any SSID\n");
 	} else {
 		lbs_deb_wext("requested SSID '%s'\n",
-		             escape_ssid(ssid, ssid_len));
+		             print_ssid(ssid_buf, ssid, ssid_len));
 	}
 
 out:
diff --git a/include/linux/ieee80211.h b/include/linux/ieee80211.h
index 64a4abce6d91..b0726e2079b5 100644
--- a/include/linux/ieee80211.h
+++ b/include/linux/ieee80211.h
@@ -12,8 +12,8 @@
  * published by the Free Software Foundation.
  */
 
-#ifndef IEEE80211_H
-#define IEEE80211_H
+#ifndef LINUX_IEEE80211_H
+#define LINUX_IEEE80211_H
 
 #include <linux/types.h>
 #include <asm/byteorder.h>
@@ -1114,4 +1114,4 @@ static inline u8 *ieee80211_get_DA(struct ieee80211_hdr *hdr)
 		return hdr->addr1;
 }
 
-#endif /* IEEE80211_H */
+#endif /* LINUX_IEEE80211_H */
diff --git a/include/net/lib80211.h b/include/net/lib80211.h
index ce49a30033b6..906d96f1b264 100644
--- a/include/net/lib80211.h
+++ b/include/net/lib80211.h
@@ -8,8 +8,9 @@
 #ifndef LIB80211_H
 #define LIB80211_H
 
-/* escape_ssid() is intended to be used in debug (and possibly error)
+/* print_ssid() is intended to be used in debug (and possibly error)
  * messages. It should never be used for passing ssid to user space. */
-const char *escape_ssid(const char *ssid, u8 ssid_len);
+const char *print_ssid(char *buf, const char *ssid, u8 ssid_len);
+#define DECLARE_SSID_BUF(var) char var[32 * 4 + 1] __maybe_unused
 
 #endif /* LIB80211_H */
diff --git a/net/ieee80211/ieee80211_rx.c b/net/ieee80211/ieee80211_rx.c
index f15f82e7bbfd..d19c8de6ef25 100644
--- a/net/ieee80211/ieee80211_rx.c
+++ b/net/ieee80211/ieee80211_rx.c
@@ -1124,6 +1124,7 @@ static int ieee80211_parse_info_param(struct ieee80211_info_element
 				      *info_element, u16 length,
 				      struct ieee80211_network *network)
 {
+	DECLARE_SSID_BUF(ssid);
 	u8 i;
 #ifdef CONFIG_IEEE80211_DEBUG
 	char rates_str[64];
@@ -1155,7 +1156,8 @@ static int ieee80211_parse_info_param(struct ieee80211_info_element
 				       IW_ESSID_MAX_SIZE - network->ssid_len);
 
 			IEEE80211_DEBUG_MGMT("MFIE_TYPE_SSID: '%s' len=%d.\n",
-					     escape_ssid(network->ssid),
+					     print_ssid(ssid, network->ssid,
+							network->ssid_len),
 					     network->ssid_len);
 			break;
 
@@ -1401,6 +1403,8 @@ static int ieee80211_network_init(struct ieee80211_device *ieee, struct ieee8021
 					 struct ieee80211_network *network,
 					 struct ieee80211_rx_stats *stats)
 {
+	DECLARE_SSID_BUF(ssid);
+
 	network->qos_data.active = 0;
 	network->qos_data.supported = 0;
 	network->qos_data.param_count = 0;
@@ -1449,7 +1453,7 @@ static int ieee80211_network_init(struct ieee80211_device *ieee, struct ieee8021
 	if (network->mode == 0) {
 		IEEE80211_DEBUG_SCAN("Filtered out '%s (%pM)' "
 				     "network.\n",
-				     escape_ssid(network->ssid,
+				     print_ssid(ssid, network->ssid,
 						 network->ssid_len),
 				     network->bssid);
 		return 1;
@@ -1563,10 +1567,11 @@ static void ieee80211_process_probe_response(struct ieee80211_device
 	struct ieee80211_info_element *info_element = beacon->info_element;
 #endif
 	unsigned long flags;
+	DECLARE_SSID_BUF(ssid);
 
 	IEEE80211_DEBUG_SCAN("'%s' (%pM"
 		     "): %c%c%c%c %c%c%c%c-%c%c%c%c %c%c%c%c\n",
-		     escape_ssid(info_element->data, info_element->len),
+		     print_ssid(ssid, info_element->data, info_element->len),
 		     beacon->header.addr3,
 		     (beacon->capability & cpu_to_le16(1 << 0xf)) ? '1' : '0',
 		     (beacon->capability & cpu_to_le16(1 << 0xe)) ? '1' : '0',
@@ -1587,7 +1592,7 @@ static void ieee80211_process_probe_response(struct ieee80211_device
 
 	if (ieee80211_network_init(ieee, beacon, &network, stats)) {
 		IEEE80211_DEBUG_SCAN("Dropped '%s' (%pM) via %s.\n",
-				     escape_ssid(info_element->data,
+				     print_ssid(ssid, info_element->data,
 						 info_element->len),
 				     beacon->header.addr3,
 				     is_beacon(beacon->header.frame_ctl) ?
@@ -1625,7 +1630,7 @@ static void ieee80211_process_probe_response(struct ieee80211_device
 			target = oldest;
 			IEEE80211_DEBUG_SCAN("Expired '%s' (%pM) from "
 					     "network list.\n",
-					     escape_ssid(target->ssid,
+					     print_ssid(ssid, target->ssid,
 							 target->ssid_len),
 					     target->bssid);
 			ieee80211_network_reset(target);
@@ -1638,7 +1643,7 @@ static void ieee80211_process_probe_response(struct ieee80211_device
 
 #ifdef CONFIG_IEEE80211_DEBUG
 		IEEE80211_DEBUG_SCAN("Adding '%s' (%pM) via %s.\n",
-				     escape_ssid(network.ssid,
+				     print_ssid(ssid, network.ssid,
 						 network.ssid_len),
 				     network.bssid,
 				     is_beacon(beacon->header.frame_ctl) ?
@@ -1649,7 +1654,7 @@ static void ieee80211_process_probe_response(struct ieee80211_device
 		list_add_tail(&target->list, &ieee->network_list);
 	} else {
 		IEEE80211_DEBUG_SCAN("Updating '%s' (%pM) via %s.\n",
-				     escape_ssid(target->ssid,
+				     print_ssid(ssid, target->ssid,
 						 target->ssid_len),
 				     target->bssid,
 				     is_beacon(beacon->header.frame_ctl) ?
diff --git a/net/ieee80211/ieee80211_wx.c b/net/ieee80211/ieee80211_wx.c
index 3025140ae721..29eb41695a82 100644
--- a/net/ieee80211/ieee80211_wx.c
+++ b/net/ieee80211/ieee80211_wx.c
@@ -34,6 +34,7 @@
 #include <linux/module.h>
 #include <linux/jiffies.h>
 
+#include <net/lib80211.h>
 #include <net/ieee80211.h>
 #include <linux/wireless.h>
 
@@ -258,6 +259,7 @@ int ieee80211_wx_get_scan(struct ieee80211_device *ieee,
 	char *ev = extra;
 	char *stop = ev + wrqu->data.length;
 	int i = 0;
+	DECLARE_SSID_BUF(ssid);
 
 	IEEE80211_DEBUG_WX("Getting scan\n");
 
@@ -277,7 +279,7 @@ int ieee80211_wx_get_scan(struct ieee80211_device *ieee,
 		else
 			IEEE80211_DEBUG_SCAN("Not showing network '%s ("
 					     "%pM)' due to age (%dms).\n",
-					     escape_ssid(network->ssid,
+					     print_ssid(ssid, network->ssid,
 							 network->ssid_len),
 					     network->bssid,
 					     jiffies_to_msecs(jiffies -
@@ -307,6 +309,7 @@ int ieee80211_wx_set_encode(struct ieee80211_device *ieee,
 	int i, key, key_provided, len;
 	struct ieee80211_crypt_data **crypt;
 	int host_crypto = ieee->host_encrypt || ieee->host_decrypt || ieee->host_build_iv;
+	DECLARE_SSID_BUF(ssid);
 
 	IEEE80211_DEBUG_WX("SET_ENCODE\n");
 
@@ -402,7 +405,7 @@ int ieee80211_wx_set_encode(struct ieee80211_device *ieee,
 			memset(sec.keys[key] + erq->length, 0,
 			       len - erq->length);
 		IEEE80211_DEBUG_WX("Setting key %d to '%s' (%d:%d bytes)\n",
-				   key, escape_ssid(sec.keys[key], len),
+				   key, print_ssid(ssid, sec.keys[key], len),
 				   erq->length, len);
 		sec.key_sizes[key] = len;
 		if (*crypt)
diff --git a/net/wireless/lib80211.c b/net/wireless/lib80211.c
index b8e34d31e757..e71f7d085621 100644
--- a/net/wireless/lib80211.c
+++ b/net/wireless/lib80211.c
@@ -19,11 +19,10 @@ MODULE_DESCRIPTION(DRV_DESCRIPTION);
 MODULE_AUTHOR("John W. Linville <linville@tuxdriver.com>");
 MODULE_LICENSE("GPL");
 
-const char *escape_ssid(const char *ssid, u8 ssid_len)
+const char *print_ssid(char *buf, const char *ssid, u8 ssid_len)
 {
-	static char escaped[IEEE80211_MAX_SSID_LEN * 4 + 1];
 	const char *s = ssid;
-	char *d = escaped;
+	char *d = buf;
 
 	ssid_len = min_t(u8, ssid_len, IEEE80211_MAX_SSID_LEN);
 	while (ssid_len--) {
@@ -48,9 +47,9 @@ const char *escape_ssid(const char *ssid, u8 ssid_len)
 		s++;
 	}
 	*d = '\0';
-	return escaped;
+	return buf;
 }
-EXPORT_SYMBOL(escape_ssid);
+EXPORT_SYMBOL(print_ssid);
 
 static int __init ieee80211_init(void)
 {
-- 
cgit v1.2.3


From 72118015271e6d3852cb9f647efe0987d131adaa Mon Sep 17 00:00:00 2001
From: "John W. Linville" <linville@tuxdriver.com>
Date: Tue, 30 Sep 2008 21:43:03 -0400
Subject: wireless: avoid some net/ieee80211.h vs. linux/ieee80211.h conflicts

There is quite a lot of overlap in definitions between these headers...

Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 drivers/net/wireless/hostap/hostap_common.h |  13 ----
 drivers/net/wireless/ipw2200.c              |  24 +++---
 include/linux/ieee80211.h                   |   3 +-
 include/net/ieee80211.h                     | 112 +---------------------------
 include/net/lib80211.h                      |   4 +-
 net/ieee80211/ieee80211_rx.c                |   2 +-
 6 files changed, 19 insertions(+), 139 deletions(-)

(limited to 'include')

diff --git a/drivers/net/wireless/hostap/hostap_common.h b/drivers/net/wireless/hostap/hostap_common.h
index b470c743c2d1..90b64b092007 100644
--- a/drivers/net/wireless/hostap/hostap_common.h
+++ b/drivers/net/wireless/hostap/hostap_common.h
@@ -6,19 +6,6 @@
 
 /* IEEE 802.11 defines */
 
-/* Information Element IDs */
-#define WLAN_EID_SSID 0
-#define WLAN_EID_SUPP_RATES 1
-#define WLAN_EID_FH_PARAMS 2
-#define WLAN_EID_DS_PARAMS 3
-#define WLAN_EID_CF_PARAMS 4
-#define WLAN_EID_TIM 5
-#define WLAN_EID_IBSS_PARAMS 6
-#define WLAN_EID_CHALLENGE 16
-#define WLAN_EID_RSN 48
-#define WLAN_EID_GENERIC 221
-
-
 /* HFA384X Configuration RIDs */
 #define HFA384X_RID_CNFPORTTYPE 0xFC00
 #define HFA384X_RID_CNFOWNMACADDR 0xFC01
diff --git a/drivers/net/wireless/ipw2200.c b/drivers/net/wireless/ipw2200.c
index 2b9d96a5c10e..051ae92d8b65 100644
--- a/drivers/net/wireless/ipw2200.c
+++ b/drivers/net/wireless/ipw2200.c
@@ -4446,7 +4446,7 @@ static void ipw_rx_notification(struct ipw_priv *priv,
 
 #ifdef CONFIG_IPW2200_QOS
 #define IPW_GET_PACKET_STYPE(x) WLAN_FC_GET_STYPE( \
-			 le16_to_cpu(((struct ieee80211_hdr *)(x))->frame_ctl))
+			 le16_to_cpu(((struct ieee80211_hdr *)(x))->frame_control))
 					if ((priv->status & STATUS_AUTH) &&
 					    (IPW_GET_PACKET_STYPE(&notif->u.raw)
 					     == IEEE80211_STYPE_ASSOC_RESP)) {
@@ -7665,12 +7665,12 @@ static void ipw_rebuild_decrypted_skb(struct ipw_priv *priv,
 	u16 fc;
 
 	hdr = (struct ieee80211_hdr *)skb->data;
-	fc = le16_to_cpu(hdr->frame_ctl);
+	fc = le16_to_cpu(hdr->frame_control);
 	if (!(fc & IEEE80211_FCTL_PROTECTED))
 		return;
 
 	fc &= ~IEEE80211_FCTL_PROTECTED;
-	hdr->frame_ctl = cpu_to_le16(fc);
+	hdr->frame_control = cpu_to_le16(fc);
 	switch (priv->ieee->sec.level) {
 	case SEC_LEVEL_3:
 		/* Remove CCMP HDR */
@@ -7982,17 +7982,17 @@ static void ipw_handle_promiscuous_rx(struct ipw_priv *priv,
 	}
 
 	hdr = (void *)rxb->skb->data + IPW_RX_FRAME_SIZE;
-	if (ieee80211_is_management(le16_to_cpu(hdr->frame_ctl))) {
+	if (ieee80211_is_management(le16_to_cpu(hdr->frame_control))) {
 		if (filter & IPW_PROM_NO_MGMT)
 			return;
 		if (filter & IPW_PROM_MGMT_HEADER_ONLY)
 			hdr_only = 1;
-	} else if (ieee80211_is_control(le16_to_cpu(hdr->frame_ctl))) {
+	} else if (ieee80211_is_control(le16_to_cpu(hdr->frame_control))) {
 		if (filter & IPW_PROM_NO_CTL)
 			return;
 		if (filter & IPW_PROM_CTL_HEADER_ONLY)
 			hdr_only = 1;
-	} else if (ieee80211_is_data(le16_to_cpu(hdr->frame_ctl))) {
+	} else if (ieee80211_is_data(le16_to_cpu(hdr->frame_control))) {
 		if (filter & IPW_PROM_NO_DATA)
 			return;
 		if (filter & IPW_PROM_DATA_HEADER_ONLY)
@@ -8010,7 +8010,7 @@ static void ipw_handle_promiscuous_rx(struct ipw_priv *priv,
 	ipw_rt = (void *)skb->data;
 
 	if (hdr_only)
-		len = ieee80211_get_hdrlen(le16_to_cpu(hdr->frame_ctl));
+		len = ieee80211_get_hdrlen(le16_to_cpu(hdr->frame_control));
 
 	memcpy(ipw_rt->payload, hdr, len);
 
@@ -8230,7 +8230,7 @@ static  int is_duplicate_packet(struct ipw_priv *priv,
 	/* Comment this line now since we observed the card receives
 	 * duplicate packets but the FCTL_RETRY bit is not set in the
 	 * IBSS mode with fragmentation enabled.
-	 BUG_ON(!(le16_to_cpu(header->frame_ctl) & IEEE80211_FCTL_RETRY)); */
+	 BUG_ON(!(le16_to_cpu(header->frame_control) & IEEE80211_FCTL_RETRY)); */
 	return 1;
 }
 
@@ -10381,17 +10381,17 @@ static void ipw_handle_promiscuous_tx(struct ipw_priv *priv,
 
 	/* Filtering of fragment chains is done agains the first fragment */
 	hdr = (void *)txb->fragments[0]->data;
-	if (ieee80211_is_management(le16_to_cpu(hdr->frame_ctl))) {
+	if (ieee80211_is_management(le16_to_cpu(hdr->frame_control))) {
 		if (filter & IPW_PROM_NO_MGMT)
 			return;
 		if (filter & IPW_PROM_MGMT_HEADER_ONLY)
 			hdr_only = 1;
-	} else if (ieee80211_is_control(le16_to_cpu(hdr->frame_ctl))) {
+	} else if (ieee80211_is_control(le16_to_cpu(hdr->frame_control))) {
 		if (filter & IPW_PROM_NO_CTL)
 			return;
 		if (filter & IPW_PROM_CTL_HEADER_ONLY)
 			hdr_only = 1;
-	} else if (ieee80211_is_data(le16_to_cpu(hdr->frame_ctl))) {
+	} else if (ieee80211_is_data(le16_to_cpu(hdr->frame_control))) {
 		if (filter & IPW_PROM_NO_DATA)
 			return;
 		if (filter & IPW_PROM_DATA_HEADER_ONLY)
@@ -10406,7 +10406,7 @@ static void ipw_handle_promiscuous_tx(struct ipw_priv *priv,
 
 		if (hdr_only) {
 			hdr = (void *)src->data;
-			len = ieee80211_get_hdrlen(le16_to_cpu(hdr->frame_ctl));
+			len = ieee80211_get_hdrlen(le16_to_cpu(hdr->frame_control));
 		} else
 			len = src->len;
 
diff --git a/include/linux/ieee80211.h b/include/linux/ieee80211.h
index b0726e2079b5..aad99195a4cc 100644
--- a/include/linux/ieee80211.h
+++ b/include/linux/ieee80211.h
@@ -826,8 +826,7 @@ struct ieee80211_ht_info {
 /* Authentication algorithms */
 #define WLAN_AUTH_OPEN 0
 #define WLAN_AUTH_SHARED_KEY 1
-#define WLAN_AUTH_FAST_BSS_TRANSITION 2
-#define WLAN_AUTH_LEAP 128
+#define WLAN_AUTH_LEAP 2
 
 #define WLAN_AUTH_CHALLENGE_LEN 128
 
diff --git a/include/net/ieee80211.h b/include/net/ieee80211.h
index afa34d3be721..738734a4653b 100644
--- a/include/net/ieee80211.h
+++ b/include/net/ieee80211.h
@@ -28,6 +28,7 @@
 #include <linux/if_ether.h>	/* ETH_ALEN */
 #include <linux/kernel.h>	/* ARRAY_SIZE */
 #include <linux/wireless.h>
+#include <linux/ieee80211.h>
 
 #define IEEE80211_VERSION "git-1.1.13"
 
@@ -214,94 +215,6 @@ struct ieee80211_snap_hdr {
 #define WLAN_GET_SEQ_FRAG(seq) ((seq) & IEEE80211_SCTL_FRAG)
 #define WLAN_GET_SEQ_SEQ(seq)  (((seq) & IEEE80211_SCTL_SEQ) >> 4)
 
-/* Authentication algorithms */
-#define WLAN_AUTH_OPEN 0
-#define WLAN_AUTH_SHARED_KEY 1
-#define WLAN_AUTH_LEAP 2
-
-#define WLAN_AUTH_CHALLENGE_LEN 128
-
-#define WLAN_CAPABILITY_ESS (1<<0)
-#define WLAN_CAPABILITY_IBSS (1<<1)
-#define WLAN_CAPABILITY_CF_POLLABLE (1<<2)
-#define WLAN_CAPABILITY_CF_POLL_REQUEST (1<<3)
-#define WLAN_CAPABILITY_PRIVACY (1<<4)
-#define WLAN_CAPABILITY_SHORT_PREAMBLE (1<<5)
-#define WLAN_CAPABILITY_PBCC (1<<6)
-#define WLAN_CAPABILITY_CHANNEL_AGILITY (1<<7)
-#define WLAN_CAPABILITY_SPECTRUM_MGMT (1<<8)
-#define WLAN_CAPABILITY_QOS (1<<9)
-#define WLAN_CAPABILITY_SHORT_SLOT_TIME (1<<10)
-#define WLAN_CAPABILITY_DSSS_OFDM (1<<13)
-
-/* 802.11g ERP information element */
-#define WLAN_ERP_NON_ERP_PRESENT (1<<0)
-#define WLAN_ERP_USE_PROTECTION (1<<1)
-#define WLAN_ERP_BARKER_PREAMBLE (1<<2)
-
-/* Status codes */
-enum ieee80211_statuscode {
-	WLAN_STATUS_SUCCESS = 0,
-	WLAN_STATUS_UNSPECIFIED_FAILURE = 1,
-	WLAN_STATUS_CAPS_UNSUPPORTED = 10,
-	WLAN_STATUS_REASSOC_NO_ASSOC = 11,
-	WLAN_STATUS_ASSOC_DENIED_UNSPEC = 12,
-	WLAN_STATUS_NOT_SUPPORTED_AUTH_ALG = 13,
-	WLAN_STATUS_UNKNOWN_AUTH_TRANSACTION = 14,
-	WLAN_STATUS_CHALLENGE_FAIL = 15,
-	WLAN_STATUS_AUTH_TIMEOUT = 16,
-	WLAN_STATUS_AP_UNABLE_TO_HANDLE_NEW_STA = 17,
-	WLAN_STATUS_ASSOC_DENIED_RATES = 18,
-	/* 802.11b */
-	WLAN_STATUS_ASSOC_DENIED_NOSHORTPREAMBLE = 19,
-	WLAN_STATUS_ASSOC_DENIED_NOPBCC = 20,
-	WLAN_STATUS_ASSOC_DENIED_NOAGILITY = 21,
-	/* 802.11h */
-	WLAN_STATUS_ASSOC_DENIED_NOSPECTRUM = 22,
-	WLAN_STATUS_ASSOC_REJECTED_BAD_POWER = 23,
-	WLAN_STATUS_ASSOC_REJECTED_BAD_SUPP_CHAN = 24,
-	/* 802.11g */
-	WLAN_STATUS_ASSOC_DENIED_NOSHORTTIME = 25,
-	WLAN_STATUS_ASSOC_DENIED_NODSSSOFDM = 26,
-	/* 802.11i */
-	WLAN_STATUS_INVALID_IE = 40,
-	WLAN_STATUS_INVALID_GROUP_CIPHER = 41,
-	WLAN_STATUS_INVALID_PAIRWISE_CIPHER = 42,
-	WLAN_STATUS_INVALID_AKMP = 43,
-	WLAN_STATUS_UNSUPP_RSN_VERSION = 44,
-	WLAN_STATUS_INVALID_RSN_IE_CAP = 45,
-	WLAN_STATUS_CIPHER_SUITE_REJECTED = 46,
-};
-
-/* Reason codes */
-enum ieee80211_reasoncode {
-	WLAN_REASON_UNSPECIFIED = 1,
-	WLAN_REASON_PREV_AUTH_NOT_VALID = 2,
-	WLAN_REASON_DEAUTH_LEAVING = 3,
-	WLAN_REASON_DISASSOC_DUE_TO_INACTIVITY = 4,
-	WLAN_REASON_DISASSOC_AP_BUSY = 5,
-	WLAN_REASON_CLASS2_FRAME_FROM_NONAUTH_STA = 6,
-	WLAN_REASON_CLASS3_FRAME_FROM_NONASSOC_STA = 7,
-	WLAN_REASON_DISASSOC_STA_HAS_LEFT = 8,
-	WLAN_REASON_STA_REQ_ASSOC_WITHOUT_AUTH = 9,
-	/* 802.11h */
-	WLAN_REASON_DISASSOC_BAD_POWER = 10,
-	WLAN_REASON_DISASSOC_BAD_SUPP_CHAN = 11,
-	/* 802.11i */
-	WLAN_REASON_INVALID_IE = 13,
-	WLAN_REASON_MIC_FAILURE = 14,
-	WLAN_REASON_4WAY_HANDSHAKE_TIMEOUT = 15,
-	WLAN_REASON_GROUP_KEY_HANDSHAKE_TIMEOUT = 16,
-	WLAN_REASON_IE_DIFFERENT = 17,
-	WLAN_REASON_INVALID_GROUP_CIPHER = 18,
-	WLAN_REASON_INVALID_PAIRWISE_CIPHER = 19,
-	WLAN_REASON_INVALID_AKMP = 20,
-	WLAN_REASON_UNSUPP_RSN_VERSION = 21,
-	WLAN_REASON_INVALID_RSN_IE_CAP = 22,
-	WLAN_REASON_IEEE8021X_FAILED = 23,
-	WLAN_REASON_CIPHER_SUITE_REJECTED = 24,
-};
-
 /* Action categories - 802.11h */
 enum ieee80211_actioncategories {
 	WLAN_ACTION_SPECTRUM_MGMT = 0,
@@ -530,15 +443,6 @@ enum ieee80211_mfie {
 	MFIE_TYPE_QOS_PARAMETER = 222,
 };
 
-/* Minimal header; can be used for passing 802.11 frames with sufficient
- * information to determine what type of underlying data type is actually
- * stored in the data. */
-struct ieee80211_hdr {
-	__le16 frame_ctl;
-	__le16 duration_id;
-	u8 payload[0];
-} __attribute__ ((packed));
-
 struct ieee80211_hdr_1addr {
 	__le16 frame_ctl;
 	__le16 duration_id;
@@ -586,18 +490,6 @@ struct ieee80211_hdr_3addrqos {
 	__le16 qos_ctl;
 } __attribute__ ((packed));
 
-struct ieee80211_hdr_4addrqos {
-	__le16 frame_ctl;
-	__le16 duration_id;
-	u8 addr1[ETH_ALEN];
-	u8 addr2[ETH_ALEN];
-	u8 addr3[ETH_ALEN];
-	__le16 seq_ctl;
-	u8 addr4[ETH_ALEN];
-	u8 payload[0];
-	__le16 qos_ctl;
-} __attribute__ ((packed));
-
 struct ieee80211_info_element {
 	u8 id;
 	u8 len;
@@ -1187,7 +1079,7 @@ static inline int ieee80211_get_hdrlen(u16 fc)
 
 static inline u8 *ieee80211_get_payload(struct ieee80211_hdr *hdr)
 {
-	switch (ieee80211_get_hdrlen(le16_to_cpu(hdr->frame_ctl))) {
+	switch (ieee80211_get_hdrlen(le16_to_cpu(hdr->frame_control))) {
 	case IEEE80211_1ADDR_LEN:
 		return ((struct ieee80211_hdr_1addr *)hdr)->payload;
 	case IEEE80211_2ADDR_LEN:
diff --git a/include/net/lib80211.h b/include/net/lib80211.h
index 906d96f1b264..e1558a187ac0 100644
--- a/include/net/lib80211.h
+++ b/include/net/lib80211.h
@@ -8,9 +8,11 @@
 #ifndef LIB80211_H
 #define LIB80211_H
 
+#include <linux/ieee80211.h>
+
 /* print_ssid() is intended to be used in debug (and possibly error)
  * messages. It should never be used for passing ssid to user space. */
 const char *print_ssid(char *buf, const char *ssid, u8 ssid_len);
-#define DECLARE_SSID_BUF(var) char var[32 * 4 + 1] __maybe_unused
+#define DECLARE_SSID_BUF(var) char var[IEEE80211_MAX_SSID_LEN * 4 + 1] __maybe_unused
 
 #endif /* LIB80211_H */
diff --git a/net/ieee80211/ieee80211_rx.c b/net/ieee80211/ieee80211_rx.c
index d19c8de6ef25..a91ef8475467 100644
--- a/net/ieee80211/ieee80211_rx.c
+++ b/net/ieee80211/ieee80211_rx.c
@@ -40,7 +40,7 @@ static void ieee80211_monitor_rx(struct ieee80211_device *ieee,
 					struct ieee80211_rx_stats *rx_stats)
 {
 	struct ieee80211_hdr *hdr = (struct ieee80211_hdr *)skb->data;
-	u16 fc = le16_to_cpu(hdr->frame_ctl);
+	u16 fc = le16_to_cpu(hdr->frame_control);
 
 	skb->dev = ieee->dev;
 	skb_reset_mac_header(skb);
-- 
cgit v1.2.3


From 8b30b1fe368ab03049435884c11c5c50e4c4ef0b Mon Sep 17 00:00:00 2001
From: Sujith <Sujith.Manoharan@atheros.com>
Date: Fri, 24 Oct 2008 09:55:27 +0530
Subject: mac80211: Re-enable aggregation

Wireless HW without any dedicated queues for aggregation
do not need the ampdu_queues mechanism present right now
in mac80211. Since mac80211 is still incomplete wrt TX MQ
changes, do not allow aggregation sessions for drivers that
set ampdu_queues.

This is only an interim hack until Intel fixes the requeue issue.

Signed-off-by: Sujith <Sujith.Manoharan@atheros.com>
Signed-off-by: Luis Rodriguez <Luis.Rodriguez@Atheros.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 drivers/net/wireless/ath9k/main.c       |  6 ++--
 drivers/net/wireless/iwlwifi/iwl-core.c |  3 +-
 include/linux/skbuff.h                  |  4 +++
 include/net/mac80211.h                  |  8 ++---
 net/core/skbuff.c                       |  1 +
 net/mac80211/ht.c                       | 60 +++++++++++++++++++--------------
 net/mac80211/main.c                     |  7 ++--
 net/mac80211/rx.c                       |  7 ++--
 net/mac80211/tx.c                       | 19 ++++++++---
 net/mac80211/wme.c                      | 24 ++++++-------
 10 files changed, 76 insertions(+), 63 deletions(-)

(limited to 'include')

diff --git a/drivers/net/wireless/ath9k/main.c b/drivers/net/wireless/ath9k/main.c
index 795fed5cadfa..f6dc4c826044 100644
--- a/drivers/net/wireless/ath9k/main.c
+++ b/drivers/net/wireless/ath9k/main.c
@@ -953,10 +953,7 @@ static int ath_attach(u16 devid,
 			&sc->sbands[IEEE80211_BAND_5GHZ];
 	}
 
-	/* FIXME: Have to figure out proper hw init values later */
-
 	hw->queues = 4;
-	hw->ampdu_queues = 1;
 
 	/* Register rate control */
 	hw->rate_control_algorithm = "ath9k_rate_control";
@@ -1745,7 +1742,8 @@ static int ath_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id)
 	hw->flags = IEEE80211_HW_RX_INCLUDES_FCS |
 		IEEE80211_HW_HOST_BROADCAST_PS_BUFFERING |
 		IEEE80211_HW_SIGNAL_DBM |
-		IEEE80211_HW_NOISE_DBM;
+		IEEE80211_HW_NOISE_DBM |
+		IEEE80211_HW_AMPDU_AGGREGATION;
 
 	hw->wiphy->interface_modes =
 		BIT(NL80211_IFTYPE_AP) |
diff --git a/drivers/net/wireless/iwlwifi/iwl-core.c b/drivers/net/wireless/iwlwifi/iwl-core.c
index 20c7ff382914..ba05f5ddc6d0 100644
--- a/drivers/net/wireless/iwlwifi/iwl-core.c
+++ b/drivers/net/wireless/iwlwifi/iwl-core.c
@@ -871,7 +871,8 @@ int iwl_setup_mac(struct iwl_priv *priv)
 
 	/* Tell mac80211 our characteristics */
 	hw->flags = IEEE80211_HW_SIGNAL_DBM |
-		    IEEE80211_HW_NOISE_DBM;
+		    IEEE80211_HW_NOISE_DBM |
+		    IEEE80211_HW_AMPDU_AGGREGATION;
 	hw->wiphy->interface_modes =
 		BIT(NL80211_IFTYPE_AP) |
 		BIT(NL80211_IFTYPE_STATION) |
diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index 487e34507b41..a01b6f84e3bc 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -250,6 +250,9 @@ typedef unsigned char *sk_buff_data_t;
  *	@tc_verd: traffic control verdict
  *	@ndisc_nodetype: router type (from link layer)
  *	@do_not_encrypt: set to prevent encryption of this frame
+ *	@requeue: set to indicate that the wireless core should attempt
+ *		a software retry on this frame if we failed to
+ *		receive an ACK for it
  *	@dma_cookie: a cookie to one of several possible DMA operations
  *		done by skb DMA functions
  *	@secmark: security marking
@@ -326,6 +329,7 @@ struct sk_buff {
 #endif
 #if defined(CONFIG_MAC80211) || defined(CONFIG_MAC80211_MODULE)
 	__u8			do_not_encrypt:1;
+	__u8			requeue:1;
 #endif
 	/* 0/13/14 bit hole */
 
diff --git a/include/net/mac80211.h b/include/net/mac80211.h
index 16c895969e6e..bba96a203885 100644
--- a/include/net/mac80211.h
+++ b/include/net/mac80211.h
@@ -242,7 +242,6 @@ struct ieee80211_bss_conf {
  * @IEEE80211_TX_CTL_RATE_CTRL_PROBE: internal to mac80211, can be
  *	set by rate control algorithms to indicate probe rate, will
  *	be cleared for fragmented frames (except on the last fragment)
- * @IEEE80211_TX_CTL_REQUEUE: REMOVE THIS
  */
 enum mac80211_tx_control_flags {
 	IEEE80211_TX_CTL_REQ_TX_STATUS		= BIT(0),
@@ -258,9 +257,6 @@ enum mac80211_tx_control_flags {
 	IEEE80211_TX_STAT_AMPDU			= BIT(10),
 	IEEE80211_TX_STAT_AMPDU_NO_BACK		= BIT(11),
 	IEEE80211_TX_CTL_RATE_CTRL_PROBE	= BIT(12),
-
-	/* XXX: remove this */
-	IEEE80211_TX_CTL_REQUEUE		= BIT(13),
 };
 
 enum mac80211_rate_control_flags {
@@ -847,6 +843,9 @@ enum ieee80211_tkip_key_type {
  * @IEEE80211_HW_SPECTRUM_MGMT:
  * 	Hardware supports spectrum management defined in 802.11h
  * 	Measurement, Channel Switch, Quieting, TPC
+ *
+ * @IEEE80211_HW_AMPDU_AGGREGATION:
+ *	Hardware supports 11n A-MPDU aggregation.
  */
 enum ieee80211_hw_flags {
 	IEEE80211_HW_RX_INCLUDES_FCS			= 1<<1,
@@ -858,6 +857,7 @@ enum ieee80211_hw_flags {
 	IEEE80211_HW_SIGNAL_DBM				= 1<<7,
 	IEEE80211_HW_NOISE_DBM				= 1<<8,
 	IEEE80211_HW_SPECTRUM_MGMT			= 1<<9,
+	IEEE80211_HW_AMPDU_AGGREGATION			= 1<<10,
 };
 
 /**
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index cdfe473181af..c4c8a33f3418 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -544,6 +544,7 @@ static struct sk_buff *__skb_clone(struct sk_buff *n, struct sk_buff *skb)
 	C(truesize);
 #if defined(CONFIG_MAC80211) || defined(CONFIG_MAC80211_MODULE)
 	C(do_not_encrypt);
+	C(requeue);
 #endif
 	atomic_set(&n->users, 1);
 
diff --git a/net/mac80211/ht.c b/net/mac80211/ht.c
index 42c3e590df98..08009d4b7d6e 100644
--- a/net/mac80211/ht.c
+++ b/net/mac80211/ht.c
@@ -458,7 +458,7 @@ int ieee80211_start_tx_ba_session(struct ieee80211_hw *hw, u8 *ra, u16 tid)
 	u8 *state;
 	int ret;
 
-	if (tid >= STA_TID_NUM)
+	if ((tid >= STA_TID_NUM) || !(hw->flags & IEEE80211_HW_AMPDU_AGGREGATION))
 		return -EINVAL;
 
 #ifdef CONFIG_MAC80211_HT_DEBUG
@@ -515,17 +515,19 @@ int ieee80211_start_tx_ba_session(struct ieee80211_hw *hw, u8 *ra, u16 tid)
 			(unsigned long)&sta->timer_to_tid[tid];
 	init_timer(&sta->ampdu_mlme.tid_tx[tid]->addba_resp_timer);
 
-	/* create a new queue for this aggregation */
-	ret = ieee80211_ht_agg_queue_add(local, sta, tid);
+	if (hw->ampdu_queues) {
+		/* create a new queue for this aggregation */
+		ret = ieee80211_ht_agg_queue_add(local, sta, tid);
 
-	/* case no queue is available to aggregation
-	 * don't switch to aggregation */
-	if (ret) {
+		/* case no queue is available to aggregation
+		 * don't switch to aggregation */
+		if (ret) {
 #ifdef CONFIG_MAC80211_HT_DEBUG
-		printk(KERN_DEBUG "BA request denied - queue unavailable for"
-					" tid %d\n", tid);
+			printk(KERN_DEBUG "BA request denied - "
+			       "queue unavailable for tid %d\n", tid);
 #endif /* CONFIG_MAC80211_HT_DEBUG */
-		goto err_unlock_queue;
+			goto err_unlock_queue;
+		}
 	}
 	sdata = sta->sdata;
 
@@ -544,7 +546,8 @@ int ieee80211_start_tx_ba_session(struct ieee80211_hw *hw, u8 *ra, u16 tid)
 		/* No need to requeue the packets in the agg queue, since we
 		 * held the tx lock: no packet could be enqueued to the newly
 		 * allocated queue */
-		ieee80211_ht_agg_queue_remove(local, sta, tid, 0);
+		if (hw->ampdu_queues)
+			ieee80211_ht_agg_queue_remove(local, sta, tid, 0);
 #ifdef CONFIG_MAC80211_HT_DEBUG
 		printk(KERN_DEBUG "BA request denied - HW unavailable for"
 					" tid %d\n", tid);
@@ -554,7 +557,8 @@ int ieee80211_start_tx_ba_session(struct ieee80211_hw *hw, u8 *ra, u16 tid)
 	}
 
 	/* Will put all the packets in the new SW queue */
-	ieee80211_requeue(local, ieee802_1d_to_ac[tid]);
+	if (hw->ampdu_queues)
+		ieee80211_requeue(local, ieee802_1d_to_ac[tid]);
 	spin_unlock_bh(&sta->lock);
 
 	/* send an addBA request */
@@ -622,7 +626,8 @@ int ieee80211_stop_tx_ba_session(struct ieee80211_hw *hw,
 	       ra, tid);
 #endif /* CONFIG_MAC80211_HT_DEBUG */
 
-	ieee80211_stop_queue(hw, sta->tid_to_tx_q[tid]);
+	if (hw->ampdu_queues)
+		ieee80211_stop_queue(hw, sta->tid_to_tx_q[tid]);
 
 	*state = HT_AGG_STATE_REQ_STOP_BA_MSK |
 		(initiator << HT_AGG_STATE_INITIATOR_SHIFT);
@@ -635,7 +640,8 @@ int ieee80211_stop_tx_ba_session(struct ieee80211_hw *hw,
 	if (ret) {
 		WARN_ON(ret != -EBUSY);
 		*state = HT_AGG_STATE_OPERATIONAL;
-		ieee80211_wake_queue(hw, sta->tid_to_tx_q[tid]);
+		if (hw->ampdu_queues)
+			ieee80211_wake_queue(hw, sta->tid_to_tx_q[tid]);
 		goto stop_BA_exit;
 	}
 
@@ -691,7 +697,8 @@ void ieee80211_start_tx_ba_cb(struct ieee80211_hw *hw, u8 *ra, u16 tid)
 #ifdef CONFIG_MAC80211_HT_DEBUG
 		printk(KERN_DEBUG "Aggregation is on for tid %d \n", tid);
 #endif
-		ieee80211_wake_queue(hw, sta->tid_to_tx_q[tid]);
+		if (hw->ampdu_queues)
+			ieee80211_wake_queue(hw, sta->tid_to_tx_q[tid]);
 	}
 	spin_unlock_bh(&sta->lock);
 	rcu_read_unlock();
@@ -745,16 +752,18 @@ void ieee80211_stop_tx_ba_cb(struct ieee80211_hw *hw, u8 *ra, u8 tid)
 		ieee80211_send_delba(sta->sdata, ra, tid,
 			WLAN_BACK_INITIATOR, WLAN_REASON_QSTA_NOT_USE);
 
-	agg_queue = sta->tid_to_tx_q[tid];
-
-	ieee80211_ht_agg_queue_remove(local, sta, tid, 1);
-
-	/* We just requeued the all the frames that were in the
-	 * removed queue, and since we might miss a softirq we do
-	 * netif_schedule_queue.  ieee80211_wake_queue is not used
-	 * here as this queue is not necessarily stopped
-	 */
-	netif_schedule_queue(netdev_get_tx_queue(local->mdev, agg_queue));
+	if (hw->ampdu_queues) {
+		agg_queue = sta->tid_to_tx_q[tid];
+		ieee80211_ht_agg_queue_remove(local, sta, tid, 1);
+
+		/* We just requeued the all the frames that were in the
+		 * removed queue, and since we might miss a softirq we do
+		 * netif_schedule_queue.  ieee80211_wake_queue is not used
+		 * here as this queue is not necessarily stopped
+		 */
+		netif_schedule_queue(netdev_get_tx_queue(local->mdev,
+							 agg_queue));
+	}
 	spin_lock_bh(&sta->lock);
 	*state = HT_AGG_STATE_IDLE;
 	sta->ampdu_mlme.addba_req_num[tid] = 0;
@@ -1011,7 +1020,8 @@ void ieee80211_process_addba_resp(struct ieee80211_local *local,
 		*state |= HT_ADDBA_RECEIVED_MSK;
 		sta->ampdu_mlme.addba_req_num[tid] = 0;
 
-		if (*state == HT_AGG_STATE_OPERATIONAL)
+		if (*state == HT_AGG_STATE_OPERATIONAL &&
+		    local->hw.ampdu_queues)
 			ieee80211_wake_queue(hw, sta->tid_to_tx_q[tid]);
 
 		spin_unlock_bh(&sta->lock);
diff --git a/net/mac80211/main.c b/net/mac80211/main.c
index 88c1975a97a5..fa0cc7a1e6b4 100644
--- a/net/mac80211/main.c
+++ b/net/mac80211/main.c
@@ -386,8 +386,6 @@ static void ieee80211_handle_filtered_frame(struct ieee80211_local *local,
 					    struct sta_info *sta,
 					    struct sk_buff *skb)
 {
-	struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb);
-
 	sta->tx_filtered_count++;
 
 	/*
@@ -434,10 +432,9 @@ static void ieee80211_handle_filtered_frame(struct ieee80211_local *local,
 		return;
 	}
 
-	if (!test_sta_flags(sta, WLAN_STA_PS) &&
-	    !(info->flags & IEEE80211_TX_CTL_REQUEUE)) {
+	if (!test_sta_flags(sta, WLAN_STA_PS) && !skb->requeue) {
 		/* Software retry the packet once */
-		info->flags |= IEEE80211_TX_CTL_REQUEUE;
+		skb->requeue = 1;
 		ieee80211_remove_tx_extra(local, sta->key, skb);
 		dev_queue_xmit(skb);
 		return;
diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c
index c4c95f1db605..648a1d0e6c82 100644
--- a/net/mac80211/rx.c
+++ b/net/mac80211/rx.c
@@ -669,7 +669,6 @@ static int ap_sta_ps_end(struct sta_info *sta)
 	struct ieee80211_local *local = sdata->local;
 	struct sk_buff *skb;
 	int sent = 0;
-	struct ieee80211_tx_info *info;
 
 	atomic_dec(&sdata->bss->num_sta_ps);
 
@@ -685,13 +684,11 @@ static int ap_sta_ps_end(struct sta_info *sta)
 
 	/* Send all buffered frames to the station */
 	while ((skb = skb_dequeue(&sta->tx_filtered)) != NULL) {
-		info = IEEE80211_SKB_CB(skb);
 		sent++;
-		info->flags |= IEEE80211_TX_CTL_REQUEUE;
+		skb->requeue = 1;
 		dev_queue_xmit(skb);
 	}
 	while ((skb = skb_dequeue(&sta->ps_tx_buf)) != NULL) {
-		info = IEEE80211_SKB_CB(skb);
 		local->total_ps_buffered--;
 		sent++;
 #ifdef CONFIG_MAC80211_VERBOSE_PS_DEBUG
@@ -699,7 +696,7 @@ static int ap_sta_ps_end(struct sta_info *sta)
 		       "since STA not sleeping anymore\n", sdata->dev->name,
 		       sta->sta.addr, sta->sta.aid);
 #endif /* CONFIG_MAC80211_VERBOSE_PS_DEBUG */
-		info->flags |= IEEE80211_TX_CTL_REQUEUE;
+		skb->requeue = 1;
 		dev_queue_xmit(skb);
 	}
 
diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c
index 541e3e64493d..d6392af9cd20 100644
--- a/net/mac80211/tx.c
+++ b/net/mac80211/tx.c
@@ -661,6 +661,7 @@ ieee80211_tx_h_sequence(struct ieee80211_tx_data *tx)
 static ieee80211_tx_result debug_noinline
 ieee80211_tx_h_fragment(struct ieee80211_tx_data *tx)
 {
+	struct ieee80211_tx_info *info = IEEE80211_SKB_CB(tx->skb);
 	struct ieee80211_hdr *hdr = (struct ieee80211_hdr *)tx->skb->data;
 	size_t hdrlen, per_fragm, num_fragm, payload_len, left;
 	struct sk_buff **frags, *first, *frag;
@@ -677,9 +678,7 @@ ieee80211_tx_h_fragment(struct ieee80211_tx_data *tx)
 	 * This scenario is handled in __ieee80211_tx_prepare but extra
 	 * caution taken here as fragmented ampdu may cause Tx stop.
 	 */
-	if (WARN_ON(tx->flags & IEEE80211_TX_CTL_AMPDU ||
-		    skb_get_queue_mapping(tx->skb) >=
-			ieee80211_num_regular_queues(&tx->local->hw)))
+	if (WARN_ON(info->flags & IEEE80211_TX_CTL_AMPDU))
 		return TX_DROP;
 
 	first = tx->skb;
@@ -951,7 +950,8 @@ __ieee80211_tx_prepare(struct ieee80211_tx_data *tx,
 	struct ieee80211_sub_if_data *sdata;
 	struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb);
 
-	int hdrlen;
+	int hdrlen, tid;
+	u8 *qc, *state;
 
 	memset(tx, 0, sizeof(*tx));
 	tx->skb = skb;
@@ -982,6 +982,15 @@ __ieee80211_tx_prepare(struct ieee80211_tx_data *tx,
 
 	tx->sta = sta_info_get(local, hdr->addr1);
 
+	if (tx->sta && ieee80211_is_data_qos(hdr->frame_control)) {
+		qc = ieee80211_get_qos_ctl(hdr);
+		tid = *qc & IEEE80211_QOS_CTL_TID_MASK;
+
+		state = &tx->sta->ampdu_mlme.tid_state_tx[tid];
+		if (*state == HT_AGG_STATE_OPERATIONAL)
+			info->flags |= IEEE80211_TX_CTL_AMPDU;
+	}
+
 	if (is_multicast_ether_addr(hdr->addr1)) {
 		tx->flags &= ~IEEE80211_TX_UNICAST;
 		info->flags |= IEEE80211_TX_CTL_NO_ACK;
@@ -1172,7 +1181,7 @@ retry:
 		 * queues, there's no reason for a driver to reject
 		 * a frame there, warn and drop it.
 		 */
-		if (WARN_ON(queue >= ieee80211_num_regular_queues(&local->hw)))
+		if (WARN_ON(info->flags & IEEE80211_TX_CTL_AMPDU))
 			goto drop;
 
 		store = &local->pending_packet[queue];
diff --git a/net/mac80211/wme.c b/net/mac80211/wme.c
index d27ef7f2d4a7..ac71b38f7cb5 100644
--- a/net/mac80211/wme.c
+++ b/net/mac80211/wme.c
@@ -114,8 +114,8 @@ u16 ieee80211_select_queue(struct net_device *dev, struct sk_buff *skb)
 {
 	struct ieee80211_master_priv *mpriv = netdev_priv(dev);
 	struct ieee80211_local *local = mpriv->local;
+	struct ieee80211_hw *hw = &local->hw;
 	struct ieee80211_hdr *hdr = (struct ieee80211_hdr *) skb->data;
-	struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb);
 	struct sta_info *sta;
 	u16 queue;
 	u8 tid;
@@ -124,21 +124,19 @@ u16 ieee80211_select_queue(struct net_device *dev, struct sk_buff *skb)
 	if (unlikely(queue >= local->hw.queues))
 		queue = local->hw.queues - 1;
 
-	if (info->flags & IEEE80211_TX_CTL_REQUEUE) {
+	if (skb->requeue) {
+		if (!hw->ampdu_queues)
+			return queue;
+
 		rcu_read_lock();
 		sta = sta_info_get(local, hdr->addr1);
 		tid = skb->priority & IEEE80211_QOS_CTL_TAG1D_MASK;
 		if (sta) {
-			struct ieee80211_hw *hw = &local->hw;
 			int ampdu_queue = sta->tid_to_tx_q[tid];
 
 			if ((ampdu_queue < ieee80211_num_queues(hw)) &&
-			    test_bit(ampdu_queue, local->queue_pool)) {
+			    test_bit(ampdu_queue, local->queue_pool))
 				queue = ampdu_queue;
-				info->flags |= IEEE80211_TX_CTL_AMPDU;
-			} else {
-				info->flags &= ~IEEE80211_TX_CTL_AMPDU;
-			}
 		}
 		rcu_read_unlock();
 
@@ -159,20 +157,18 @@ u16 ieee80211_select_queue(struct net_device *dev, struct sk_buff *skb)
 		*p++ = ack_policy | tid;
 		*p = 0;
 
+		if (!hw->ampdu_queues)
+			return queue;
+
 		rcu_read_lock();
 
 		sta = sta_info_get(local, hdr->addr1);
 		if (sta) {
 			int ampdu_queue = sta->tid_to_tx_q[tid];
-			struct ieee80211_hw *hw = &local->hw;
 
 			if ((ampdu_queue < ieee80211_num_queues(hw)) &&
-			    test_bit(ampdu_queue, local->queue_pool)) {
+			    test_bit(ampdu_queue, local->queue_pool))
 				queue = ampdu_queue;
-				info->flags |= IEEE80211_TX_CTL_AMPDU;
-			} else {
-				info->flags &= ~IEEE80211_TX_CTL_AMPDU;
-			}
 		}
 
 		rcu_read_unlock();
-- 
cgit v1.2.3


From d2372b315289aec9f565a855023c40654a5bff68 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes@sipsolutions.net>
Date: Fri, 24 Oct 2008 20:32:20 +0200
Subject: wireless: make regdom passing semantics simpler

The regdom struct is given to the core, so it might as well
free it in error conditions.

Signed-off-by: Johannes Berg <johannes@sipsolutions.net>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 Documentation/networking/regulatory.txt | 13 +++----------
 include/net/wireless.h                  |  3 +--
 net/wireless/nl80211.c                  |  5 +----
 net/wireless/reg.c                      |  9 +++++----
 4 files changed, 10 insertions(+), 20 deletions(-)

(limited to 'include')

diff --git a/Documentation/networking/regulatory.txt b/Documentation/networking/regulatory.txt
index a96989a8ff35..357d4ba4f135 100644
--- a/Documentation/networking/regulatory.txt
+++ b/Documentation/networking/regulatory.txt
@@ -167,7 +167,6 @@ struct ieee80211_regdomain mydriver_jp_regdom = {
 
 Then in some part of your code after your wiphy has been registered:
 
-	int r;
 	struct ieee80211_regdomain *rd;
 	int size_of_regd;
 	int num_rules = mydriver_jp_regdom.n_reg_rules;
@@ -178,17 +177,11 @@ Then in some part of your code after your wiphy has been registered:
 
 	rd = kzalloc(size_of_regd, GFP_KERNEL);
 	if (!rd)
-	return -ENOMEM;
+		return -ENOMEM;
 
 	memcpy(rd, &mydriver_jp_regdom, sizeof(struct ieee80211_regdomain));
 
-	for (i=0; i < num_rules; i++) {
+	for (i=0; i < num_rules; i++)
 		memcpy(&rd->reg_rules[i], &mydriver_jp_regdom.reg_rules[i],
 			sizeof(struct ieee80211_reg_rule));
-	}
-	r = regulatory_hint(hw->wiphy, NULL, rd);
-	if (r) {
-		kfree(rd);
-		return r;
-	}
-
+	return regulatory_hint(hw->wiphy, NULL, rd);
diff --git a/include/net/wireless.h b/include/net/wireless.h
index 061fe5017e5c..6e3ea0159045 100644
--- a/include/net/wireless.h
+++ b/include/net/wireless.h
@@ -358,8 +358,7 @@ ieee80211_get_channel(struct wiphy *wiphy, int freq)
  * for a regulatory domain structure for the respective country. If
  * a regulatory domain is build and passed you should set the alpha2
  * if possible, otherwise set it to the special value of "99" which tells
- * the wireless core it is unknown. If you pass a built regulatory domain
- * and we return non zero you are in charge of kfree()'ing the structure.
+ * the wireless core it is unknown.
  *
  * Returns -EALREADY if *a regulatory domain* has already been set. Note that
  * this could be by another driver. It is safe for drivers to continue if
diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c
index 9a16e9e6c5ca..f82cc9aa6908 100644
--- a/net/wireless/nl80211.c
+++ b/net/wireless/nl80211.c
@@ -1935,12 +1935,9 @@ static int nl80211_set_reg(struct sk_buff *skb, struct genl_info *info)
 	mutex_lock(&cfg80211_drv_mutex);
 	r = set_regdom(rd);
 	mutex_unlock(&cfg80211_drv_mutex);
-	if (r)
-		goto bad_reg;
-
 	return r;
 
-bad_reg:
+ bad_reg:
 	kfree(rd);
 	return -EINVAL;
 }
diff --git a/net/wireless/reg.c b/net/wireless/reg.c
index 00c326b66c03..038f8f133c54 100644
--- a/net/wireless/reg.c
+++ b/net/wireless/reg.c
@@ -605,7 +605,6 @@ int __regulatory_hint(struct wiphy *wiphy, enum reg_set_by set_by,
 	return r;
 }
 
-/* If rd is not NULL and if this call fails the caller must free it */
 int regulatory_hint(struct wiphy *wiphy, const char *alpha2,
 	struct ieee80211_regdomain *rd)
 {
@@ -690,6 +689,7 @@ void print_regdomain_info(const struct ieee80211_regdomain *rd)
 	print_rd_rules(rd);
 }
 
+/* Takes ownership of rd only if it doesn't fail */
 static int __set_regdom(const struct ieee80211_regdomain *rd)
 {
 	/* Some basic sanity checks first */
@@ -750,16 +750,17 @@ static int __set_regdom(const struct ieee80211_regdomain *rd)
 
 /* Use this call to set the current regulatory domain. Conflicts with
  * multiple drivers can be ironed out later. Caller must've already
- * kmalloc'd the rd structure. If this calls fails you should kfree()
- * the passed rd. Caller must hold cfg80211_drv_mutex */
+ * kmalloc'd the rd structure. Caller must hold cfg80211_drv_mutex */
 int set_regdom(const struct ieee80211_regdomain *rd)
 {
 	int r;
 
 	/* Note that this doesn't update the wiphys, this is done below */
 	r = __set_regdom(rd);
-	if (r)
+	if (r) {
+		kfree(rd);
 		return r;
+	}
 
 	/* This would make this whole thing pointless */
 	BUG_ON(rd != cfg80211_regdomain);
-- 
cgit v1.2.3


From be3d48106c1e5d075784e5e67928a6b5ffc0f3b6 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes@sipsolutions.net>
Date: Fri, 24 Oct 2008 20:32:21 +0200
Subject: wireless: remove struct regdom hinting

The code needs to be split out and cleaned up, so as a
first step remove the capability, to add it back in a
subsequent patch as a separate function. Also remove the
publically facing return value of the function and the
wiphy argument. A number of internal functions go from
being generic helpers to just being used for alpha2
setting.

Signed-off-by: Johannes Berg <johannes@sipsolutions.net>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 Documentation/networking/regulatory.txt | 11 +++---
 drivers/net/wireless/zd1211rw/zd_mac.c  |  2 +-
 include/net/wireless.h                  | 23 +++---------
 net/wireless/nl80211.c                  |  2 +-
 net/wireless/reg.c                      | 63 +++++++++------------------------
 net/wireless/reg.h                      | 23 ++++--------
 6 files changed, 36 insertions(+), 88 deletions(-)

(limited to 'include')

diff --git a/Documentation/networking/regulatory.txt b/Documentation/networking/regulatory.txt
index 357d4ba4f135..dcf31648414a 100644
--- a/Documentation/networking/regulatory.txt
+++ b/Documentation/networking/regulatory.txt
@@ -131,11 +131,13 @@ are expected to do this during initialization.
 
 	r = zd_reg2alpha2(mac->regdomain, alpha2);
 	if (!r)
-		regulatory_hint(hw->wiphy, alpha2, NULL);
+		regulatory_hint(hw->wiphy, alpha2);
 
 Example code - drivers providing a built in regulatory domain:
 --------------------------------------------------------------
 
+[NOTE: This API is not currently available, it can be added when required]
+
 If you have regulatory information you can obtain from your
 driver and you *need* to use this we let you build a regulatory domain
 structure and pass it to the wireless core. To do this you should
@@ -182,6 +184,7 @@ Then in some part of your code after your wiphy has been registered:
 	memcpy(rd, &mydriver_jp_regdom, sizeof(struct ieee80211_regdomain));
 
 	for (i=0; i < num_rules; i++)
-		memcpy(&rd->reg_rules[i], &mydriver_jp_regdom.reg_rules[i],
-			sizeof(struct ieee80211_reg_rule));
-	return regulatory_hint(hw->wiphy, NULL, rd);
+		memcpy(&rd->reg_rules[i],
+		       &mydriver_jp_regdom.reg_rules[i],
+		       sizeof(struct ieee80211_reg_rule));
+	regulatory_struct_hint(rd);
diff --git a/drivers/net/wireless/zd1211rw/zd_mac.c b/drivers/net/wireless/zd1211rw/zd_mac.c
index 2f0802b29c4b..07513e48b8f2 100644
--- a/drivers/net/wireless/zd1211rw/zd_mac.c
+++ b/drivers/net/wireless/zd1211rw/zd_mac.c
@@ -171,7 +171,7 @@ int zd_mac_init_hw(struct ieee80211_hw *hw)
 
 	r = zd_reg2alpha2(mac->regdomain, alpha2);
 	if (!r)
-		regulatory_hint(hw->wiphy, alpha2, NULL);
+		regulatory_hint(hw->wiphy, alpha2);
 
 	r = 0;
 disable_int:
diff --git a/include/net/wireless.h b/include/net/wireless.h
index 6e3ea0159045..41294c5f6f8f 100644
--- a/include/net/wireless.h
+++ b/include/net/wireless.h
@@ -342,34 +342,19 @@ ieee80211_get_channel(struct wiphy *wiphy, int freq)
 
 /**
  * regulatory_hint - driver hint to the wireless core a regulatory domain
- * @wiphy: the driver's very own &struct wiphy
+ * @wiphy: the wireless device giving the hint (used only for reporting
+ *	conflicts)
  * @alpha2: the ISO/IEC 3166 alpha2 the driver claims its regulatory domain
  * 	should be in. If @rd is set this should be NULL. Note that if you
  * 	set this to NULL you should still set rd->alpha2 to some accepted
  * 	alpha2.
- * @rd: a complete regulatory domain provided by the driver. If passed
- * 	the driver does not need to worry about freeing it.
  *
  * Wireless drivers can use this function to hint to the wireless core
  * what it believes should be the current regulatory domain by
  * giving it an ISO/IEC 3166 alpha2 country code it knows its regulatory
  * domain should be in or by providing a completely build regulatory domain.
  * If the driver provides an ISO/IEC 3166 alpha2 userspace will be queried
- * for a regulatory domain structure for the respective country. If
- * a regulatory domain is build and passed you should set the alpha2
- * if possible, otherwise set it to the special value of "99" which tells
- * the wireless core it is unknown.
- *
- * Returns -EALREADY if *a regulatory domain* has already been set. Note that
- * this could be by another driver. It is safe for drivers to continue if
- * -EALREADY is returned, if drivers are not capable of world roaming they
- * should not register more channels than they support. Right now we only
- * support listening to the first driver hint. If the driver is capable
- * of world roaming but wants to respect its own EEPROM mappings for
- * specific regulatory domains it should register the @reg_notifier callback
- * on the &struct wiphy. Returns 0 if the hint went through fine or through an
- * intersection operation. Otherwise a standard error code is returned.
+ * for a regulatory domain structure for the respective country.
  */
-extern int regulatory_hint(struct wiphy *wiphy,
-		const char *alpha2, struct ieee80211_regdomain *rd);
+extern void regulatory_hint(struct wiphy *wiphy, const char *alpha2);
 #endif /* __NET_WIRELESS_H */
diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c
index f82cc9aa6908..5e1d658a8b5a 100644
--- a/net/wireless/nl80211.c
+++ b/net/wireless/nl80211.c
@@ -1695,7 +1695,7 @@ static int nl80211_req_set_reg(struct sk_buff *skb, struct genl_info *info)
 		return -EINVAL;
 #endif
 	mutex_lock(&cfg80211_drv_mutex);
-	r = __regulatory_hint(NULL, REGDOM_SET_BY_USER, data, NULL);
+	r = __regulatory_hint(NULL, REGDOM_SET_BY_USER, data);
 	mutex_unlock(&cfg80211_drv_mutex);
 	return r;
 }
diff --git a/net/wireless/reg.c b/net/wireless/reg.c
index 038f8f133c54..dc10071deaaa 100644
--- a/net/wireless/reg.c
+++ b/net/wireless/reg.c
@@ -42,7 +42,10 @@
 #include "core.h"
 #include "reg.h"
 
-/* wiphy is set if this request's initiator is REGDOM_SET_BY_DRIVER */
+/*
+ * wiphy is set if this request's initiator is
+ * REGDOM_SET_BY_COUNTRY_IE or _DRIVER
+ */
 struct regulatory_request {
 	struct wiphy *wiphy;
 	enum reg_set_by initiator;
@@ -298,7 +301,7 @@ static int call_crda(const char *alpha2)
 /* This has the logic which determines when a new request
  * should be ignored. */
 static int ignore_request(struct wiphy *wiphy, enum reg_set_by set_by,
-	char *alpha2, struct ieee80211_regdomain *rd)
+			  const char *alpha2)
 {
 	/* All initial requests are respected */
 	if (!last_request)
@@ -343,22 +346,8 @@ static int ignore_request(struct wiphy *wiphy, enum reg_set_by set_by,
 		return 1;
 	case REGDOM_SET_BY_DRIVER:
 		BUG_ON(!wiphy);
-		if (last_request->initiator == REGDOM_SET_BY_DRIVER) {
-			/* Two separate drivers hinting different things,
-			 * this is possible if you have two devices present
-			 * on a system with different EEPROM regulatory
-			 * readings. XXX: Do intersection, we support only
-			 * the first regulatory hint for now */
-			if (last_request->wiphy != wiphy)
-				return -EALREADY;
-			if (rd)
-				return -EALREADY;
-			/* Driver should not be trying to hint different
-			 * regulatory domains! */
-			BUG_ON(!alpha2_equal(alpha2,
-					cfg80211_regdomain->alpha2));
+		if (last_request->initiator == REGDOM_SET_BY_DRIVER)
 			return -EALREADY;
-		}
 		if (last_request->initiator == REGDOM_SET_BY_CORE)
 			return 0;
 		/* XXX: Handle intersection, and add the
@@ -557,40 +546,32 @@ void wiphy_update_regulatory(struct wiphy *wiphy, enum reg_set_by setby)
 
 /* Caller must hold &cfg80211_drv_mutex */
 int __regulatory_hint(struct wiphy *wiphy, enum reg_set_by set_by,
-		      const char *alpha2, struct ieee80211_regdomain *rd)
+		      const char *alpha2)
 {
 	struct regulatory_request *request;
-	char *rd_alpha2;
 	int r = 0;
 
-	r = ignore_request(wiphy, set_by, (char *) alpha2, rd);
+	r = ignore_request(wiphy, set_by, alpha2);
 	if (r)
 		return r;
 
-	if (rd)
-		rd_alpha2 = rd->alpha2;
-	else
-		rd_alpha2 = (char *) alpha2;
-
 	switch (set_by) {
 	case REGDOM_SET_BY_CORE:
 	case REGDOM_SET_BY_COUNTRY_IE:
 	case REGDOM_SET_BY_DRIVER:
 	case REGDOM_SET_BY_USER:
 		request = kzalloc(sizeof(struct regulatory_request),
-			GFP_KERNEL);
+				  GFP_KERNEL);
 		if (!request)
 			return -ENOMEM;
 
-		request->alpha2[0] = rd_alpha2[0];
-		request->alpha2[1] = rd_alpha2[1];
+		request->alpha2[0] = alpha2[0];
+		request->alpha2[1] = alpha2[1];
 		request->initiator = set_by;
 		request->wiphy = wiphy;
 
 		kfree(last_request);
 		last_request = request;
-		if (rd)
-			break;
 		r = call_crda(alpha2);
 #ifndef CONFIG_WIRELESS_OLD_REGULATORY
 		if (r)
@@ -605,25 +586,13 @@ int __regulatory_hint(struct wiphy *wiphy, enum reg_set_by set_by,
 	return r;
 }
 
-int regulatory_hint(struct wiphy *wiphy, const char *alpha2,
-	struct ieee80211_regdomain *rd)
+void regulatory_hint(struct wiphy *wiphy, const char *alpha2)
 {
-	int r;
-	BUG_ON(!rd && !alpha2);
+	BUG_ON(!alpha2);
 
 	mutex_lock(&cfg80211_drv_mutex);
-
-	r = __regulatory_hint(wiphy, REGDOM_SET_BY_DRIVER, alpha2, rd);
-	if (r || !rd)
-		goto unlock_and_exit;
-
-	/* If the driver passed a regulatory domain we skipped asking
-	 * userspace for one so we can now go ahead and set it */
-	r = set_regdom(rd);
-
-unlock_and_exit:
+	__regulatory_hint(wiphy, REGDOM_SET_BY_DRIVER, alpha2);
 	mutex_unlock(&cfg80211_drv_mutex);
-	return r;
 }
 EXPORT_SYMBOL(regulatory_hint);
 
@@ -792,11 +761,11 @@ int regulatory_init(void)
 	 * that is not a valid ISO / IEC 3166 alpha2 */
 	if (ieee80211_regdom[0] != 'E' || ieee80211_regdom[1] != 'U')
 		err = __regulatory_hint(NULL, REGDOM_SET_BY_CORE,
-					ieee80211_regdom, NULL);
+					ieee80211_regdom);
 #else
 	cfg80211_regdomain = cfg80211_world_regdom;
 
-	err = __regulatory_hint(NULL, REGDOM_SET_BY_CORE, "00", NULL);
+	err = __regulatory_hint(NULL, REGDOM_SET_BY_CORE, "00");
 	if (err)
 		printk(KERN_ERR "cfg80211: calling CRDA failed - "
 		       "unable to update world regulatory domain, "
diff --git a/net/wireless/reg.h b/net/wireless/reg.h
index 0c1572b92fef..c9b6b6358bbe 100644
--- a/net/wireless/reg.h
+++ b/net/wireless/reg.h
@@ -11,30 +11,21 @@ int set_regdom(const struct ieee80211_regdomain *rd);
 
 /**
  * __regulatory_hint - hint to the wireless core a regulatory domain
- * @wiphy: if a driver is providing the hint this is the driver's very
- * 	own &struct wiphy
+ * @wiphy: if the hint comes from country information from an AP, this
+ *	is required to be set to the wiphy that received the information
  * @alpha2: the ISO/IEC 3166 alpha2 being claimed the regulatory domain
- * 	should be in. If @rd is set this should be NULL
- * @rd: a complete regulatory domain, if passed the caller need not worry
- * 	about freeing it
+ *	should be in.
  *
  * The Wireless subsystem can use this function to hint to the wireless core
  * what it believes should be the current regulatory domain by
  * giving it an ISO/IEC 3166 alpha2 country code it knows its regulatory
- * domain should be in or by providing a completely build regulatory domain.
+ * domain should be in.
  *
- * Returns -EALREADY if *a regulatory domain* has already been set. Note that
- * this could be by another driver. It is safe for drivers to continue if
- * -EALREADY is returned, if drivers are not capable of world roaming they
- * should not register more channels than they support. Right now we only
- * support listening to the first driver hint. If the driver is capable
- * of world roaming but wants to respect its own EEPROM mappings for
- * specific regulatory domains it should register the @reg_notifier callback
- * on the &struct wiphy. Returns 0 if the hint went through fine or through an
- * intersection operation. Otherwise a standard error code is returned.
+ * Returns zero if all went fine, %-EALREADY if a regulatory domain had
+ * already been set or other standard error codes.
  *
  */
 extern int __regulatory_hint(struct wiphy *wiphy, enum reg_set_by set_by,
-		const char *alpha2, struct ieee80211_regdomain *rd);
+			     const char *alpha2);
 
 #endif  /* __NET_WIRELESS_REG_H */
-- 
cgit v1.2.3


From e25cf4a6945e0f859186231be7164ba565412e0a Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes@sipsolutions.net>
Date: Thu, 23 Oct 2008 08:51:20 +0200
Subject: mac80211: fix two kernel-doc warnings

One parameter wasn't described and one I forgot to update when
renaming it; also update TBDs in sta_info.

Signed-off-by: Johannes Berg <johannes@sipsolutions.net>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 include/net/mac80211.h  |  4 +++-
 net/mac80211/sta_info.h | 38 ++++++++++++++++++++------------------
 2 files changed, 23 insertions(+), 19 deletions(-)

(limited to 'include')

diff --git a/include/net/mac80211.h b/include/net/mac80211.h
index bba96a203885..0b983bed3829 100644
--- a/include/net/mac80211.h
+++ b/include/net/mac80211.h
@@ -287,7 +287,7 @@ enum mac80211_rate_control_flags {
  *
  * @idx: rate index to attempt to send with
  * @flags: rate control flags (&enum mac80211_rate_control_flags)
- * @limit: number of retries before fallback
+ * @count: number of tries in this rate before going to the next rate
  *
  * A value of -1 for @idx indicates an invalid rate and, if used
  * in an array of retry rates, that no more rates should be tried.
@@ -1902,6 +1902,8 @@ struct ieee80211_sta *ieee80211_find_sta(struct ieee80211_hw *hw,
  * @short_preamble: whether mac80211 will request short-preamble transmission
  *	if the selected rate supports it
  * @max_rate_idx: user-requested maximum rate (not MCS for now)
+ * @skb: the skb that will be transmitted, the control information in it needs
+ *	to be filled in
  */
 struct ieee80211_tx_rate_control {
 	struct ieee80211_hw *hw;
diff --git a/net/mac80211/sta_info.h b/net/mac80211/sta_info.h
index 4ac372aa75ce..5ad9250b63ab 100644
--- a/net/mac80211/sta_info.h
+++ b/net/mac80211/sta_info.h
@@ -160,18 +160,20 @@ struct sta_ampdu_mlme {
  * @list: global linked list entry
  * @hnext: hash table linked list pointer
  * @local: pointer to the global information
- * @sdata: TBD
- * @key: TBD
- * @rate_ctrl: TBD
- * @rate_ctrl_priv: TBD
+ * @sdata: virtual interface this station belongs to
+ * @key: peer key negotiated with this station, if any
+ * @rate_ctrl: rate control algorithm reference
+ * @rate_ctrl_priv: rate control private per-STA pointer
+ * @last_tx_rate: rate used for last transmit, to report to userspace as
+ *	"the" transmit rate
  * @lock: used for locking all fields that require locking, see comments
  *	in the header file.
  * @flaglock: spinlock for flags accesses
  * @addr: MAC address of this STA
  * @aid: STA's unique AID (1..2007, 0 = not assigned yet),
  *	only used in AP (and IBSS?) mode
- * @listen_interval: TBD
- * @pin_status: TBD
+ * @listen_interval: listen interval of this station, when we're acting as AP
+ * @pin_status: used internally for pinning a STA struct into memory
  * @flags: STA flags, see &enum ieee80211_sta_info_flags
  * @ps_tx_buf: buffer of frames to transmit to this station
  *	when it leaves power saving state
@@ -180,8 +182,8 @@ struct sta_ampdu_mlme {
  *	power saving state
  * @rx_packets: Number of MSDUs received from this STA
  * @rx_bytes: Number of bytes received from this STA
- * @wep_weak_iv_count: TBD
- * @last_rx: TBD
+ * @wep_weak_iv_count: number of weak WEP IVs received from this station
+ * @last_rx: time (in jiffies) when last frame was received from this STA
  * @num_duplicates: number of duplicate frames received from this STA
  * @rx_fragments: number of received MPDUs
  * @rx_dropped: number of dropped MPDUs from this STA
@@ -189,26 +191,26 @@ struct sta_ampdu_mlme {
  * @last_qual: qual of last received frame from this STA
  * @last_noise: noise of last received frame from this STA
  * @last_seq_ctrl: last received seq/frag number from this STA (per RX queue)
- * @tx_filtered_count: TBD
- * @tx_retry_failed: TBD
- * @tx_retry_count: TBD
+ * @tx_filtered_count: number of frames the hardware filtered for this STA
+ * @tx_retry_failed: number of frames that failed retry
+ * @tx_retry_count: total number of retries for frames to this STA
  * @fail_avg: moving percentage of failed MSDUs
  * @tx_packets: number of RX/TX MSDUs
- * @tx_bytes: TBD
+ * @tx_bytes: number of bytes transmitted to this STA
  * @tx_fragments: number of transmitted MPDUs
  * @last_txrate: description of the last used transmit rate
- * @tid_seq: TBD
- * @ampdu_mlme: TBD
+ * @tid_seq: per-TID sequence numbers for sending to this STA
+ * @ampdu_mlme: A-MPDU state machine state
  * @timer_to_tid: identity mapping to ID timers
  * @tid_to_tx_q: map tid to tx queue
  * @llid: Local link ID
  * @plid: Peer link ID
  * @reason: Cancel reason on PLINK_HOLDING state
  * @plink_retries: Retries in establishment
- * @ignore_plink_timer: TBD
- * @plink_state plink_state: TBD
- * @plink_timeout: TBD
- * @plink_timer: TBD
+ * @ignore_plink_timer: ignore the peer-link timer (used internally)
+ * @plink_state: peer link state
+ * @plink_timeout: timeout of peer link
+ * @plink_timer: peer link watch timer
  * @debugfs: debug filesystem info
  * @sta: station information we share with the driver
  */
-- 
cgit v1.2.3


From 127cafbb276266b1b8da967bfe25a062ab1d42ab Mon Sep 17 00:00:00 2001
From: Lai Jiangshan <laijs@cn.fujitsu.com>
Date: Tue, 28 Oct 2008 10:51:53 +0800
Subject: tracepoint: introduce *_noupdate APIs.

Impact: add new tracepoint APIs to allow the batched registration of probes

new APIs separate tracepoint_probe_register(),
tracepoint_probe_unregister() into 2 steps. The first step of them
is just update tracepoint_entry, not connect or disconnect.

this patch introduces tracepoint_probe_update_all() for update all.

these APIs are very useful for registering lots of probes
but just updating once. Another very important thing is that
*_noupdate APIs do not require module_mutex.

Signed-off-by: Lai Jiangshan <laijs@cn.fujitsu.com>
Acked-by: Mathieu Desnoyers <mathieu.desnoyers@polymtl.ca>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/tracepoint.h |   4 ++
 kernel/tracepoint.c        | 170 +++++++++++++++++++++++++++++++++++----------
 2 files changed, 136 insertions(+), 38 deletions(-)

(limited to 'include')

diff --git a/include/linux/tracepoint.h b/include/linux/tracepoint.h
index c5bb39c7a770..63064e9403f2 100644
--- a/include/linux/tracepoint.h
+++ b/include/linux/tracepoint.h
@@ -112,6 +112,10 @@ extern int tracepoint_probe_register(const char *name, void *probe);
  */
 extern int tracepoint_probe_unregister(const char *name, void *probe);
 
+extern int tracepoint_probe_register_noupdate(const char *name, void *probe);
+extern int tracepoint_probe_unregister_noupdate(const char *name, void *probe);
+extern void tracepoint_probe_update_all(void);
+
 struct tracepoint_iter {
 	struct module *module;
 	struct tracepoint *tracepoint;
diff --git a/kernel/tracepoint.c b/kernel/tracepoint.c
index 3e22867184e3..e96590f17de1 100644
--- a/kernel/tracepoint.c
+++ b/kernel/tracepoint.c
@@ -59,7 +59,10 @@ struct tracepoint_entry {
 };
 
 struct tp_probes {
-	struct rcu_head rcu;
+	union {
+		struct rcu_head rcu;
+		struct list_head list;
+	} u;
 	void *probes[0];
 };
 
@@ -72,7 +75,7 @@ static inline void *allocate_probes(int count)
 
 static void rcu_free_old_probes(struct rcu_head *head)
 {
-	kfree(container_of(head, struct tp_probes, rcu));
+	kfree(container_of(head, struct tp_probes, u.rcu));
 }
 
 static inline void release_probes(void *old)
@@ -80,7 +83,7 @@ static inline void release_probes(void *old)
 	if (old) {
 		struct tp_probes *tp_probes = container_of(old,
 			struct tp_probes, probes[0]);
-		call_rcu(&tp_probes->rcu, rcu_free_old_probes);
+		call_rcu_sched(&tp_probes->u.rcu, rcu_free_old_probes);
 	}
 }
 
@@ -299,6 +302,23 @@ static void tracepoint_update_probes(void)
 	module_update_tracepoints();
 }
 
+static void *tracepoint_add_probe(const char *name, void *probe)
+{
+	struct tracepoint_entry *entry;
+	void *old;
+
+	entry = get_tracepoint(name);
+	if (!entry) {
+		entry = add_tracepoint(name);
+		if (IS_ERR(entry))
+			return entry;
+	}
+	old = tracepoint_entry_add_probe(entry, probe);
+	if (IS_ERR(old) && !entry->refcount)
+		remove_tracepoint(entry);
+	return old;
+}
+
 /**
  * tracepoint_probe_register -  Connect a probe to a tracepoint
  * @name: tracepoint name
@@ -309,36 +329,36 @@ static void tracepoint_update_probes(void)
  */
 int tracepoint_probe_register(const char *name, void *probe)
 {
-	struct tracepoint_entry *entry;
-	int ret = 0;
 	void *old;
 
 	mutex_lock(&tracepoints_mutex);
-	entry = get_tracepoint(name);
-	if (!entry) {
-		entry = add_tracepoint(name);
-		if (IS_ERR(entry)) {
-			ret = PTR_ERR(entry);
-			goto end;
-		}
-	}
-	old = tracepoint_entry_add_probe(entry, probe);
-	if (IS_ERR(old)) {
-		if (!entry->refcount)
-			remove_tracepoint(entry);
-		ret = PTR_ERR(old);
-		goto end;
-	}
+	old = tracepoint_add_probe(name, probe);
 	mutex_unlock(&tracepoints_mutex);
+	if (IS_ERR(old))
+		return PTR_ERR(old);
+
 	tracepoint_update_probes();		/* may update entry */
 	release_probes(old);
 	return 0;
-end:
-	mutex_unlock(&tracepoints_mutex);
-	return ret;
 }
 EXPORT_SYMBOL_GPL(tracepoint_probe_register);
 
+static void *tracepoint_remove_probe(const char *name, void *probe)
+{
+	struct tracepoint_entry *entry;
+	void *old;
+
+	entry = get_tracepoint(name);
+	if (!entry)
+		return ERR_PTR(-ENOENT);
+	old = tracepoint_entry_remove_probe(entry, probe);
+	if (IS_ERR(old))
+		return old;
+	if (!entry->refcount)
+		remove_tracepoint(entry);
+	return old;
+}
+
 /**
  * tracepoint_probe_unregister -  Disconnect a probe from a tracepoint
  * @name: tracepoint name
@@ -351,31 +371,105 @@ EXPORT_SYMBOL_GPL(tracepoint_probe_register);
  */
 int tracepoint_probe_unregister(const char *name, void *probe)
 {
-	struct tracepoint_entry *entry;
 	void *old;
-	int ret = -ENOENT;
 
 	mutex_lock(&tracepoints_mutex);
-	entry = get_tracepoint(name);
-	if (!entry)
-		goto end;
-	old = tracepoint_entry_remove_probe(entry, probe);
-	if (IS_ERR(old)) {
-		ret = PTR_ERR(old);
-		goto end;
-	}
-	if (!entry->refcount)
-		remove_tracepoint(entry);
+	old = tracepoint_remove_probe(name, probe);
 	mutex_unlock(&tracepoints_mutex);
+	if (IS_ERR(old))
+		return PTR_ERR(old);
+
 	tracepoint_update_probes();		/* may update entry */
 	release_probes(old);
 	return 0;
-end:
-	mutex_unlock(&tracepoints_mutex);
-	return ret;
 }
 EXPORT_SYMBOL_GPL(tracepoint_probe_unregister);
 
+static LIST_HEAD(old_probes);
+static int need_update;
+
+static void tracepoint_add_old_probes(void *old)
+{
+	need_update = 1;
+	if (old) {
+		struct tp_probes *tp_probes = container_of(old,
+			struct tp_probes, probes[0]);
+		list_add(&tp_probes->u.list, &old_probes);
+	}
+}
+
+/**
+ * tracepoint_probe_register_noupdate -  register a probe but not connect
+ * @name: tracepoint name
+ * @probe: probe handler
+ *
+ * caller must call tracepoint_probe_update_all()
+ */
+int tracepoint_probe_register_noupdate(const char *name, void *probe)
+{
+	void *old;
+
+	mutex_lock(&tracepoints_mutex);
+	old = tracepoint_add_probe(name, probe);
+	if (IS_ERR(old)) {
+		mutex_unlock(&tracepoints_mutex);
+		return PTR_ERR(old);
+	}
+	tracepoint_add_old_probes(old);
+	mutex_unlock(&tracepoints_mutex);
+	return 0;
+}
+EXPORT_SYMBOL_GPL(tracepoint_probe_register_noupdate);
+
+/**
+ * tracepoint_probe_unregister_noupdate -  remove a probe but not disconnect
+ * @name: tracepoint name
+ * @probe: probe function pointer
+ *
+ * caller must call tracepoint_probe_update_all()
+ */
+int tracepoint_probe_unregister_noupdate(const char *name, void *probe)
+{
+	void *old;
+
+	mutex_lock(&tracepoints_mutex);
+	old = tracepoint_remove_probe(name, probe);
+	if (IS_ERR(old)) {
+		mutex_unlock(&tracepoints_mutex);
+		return PTR_ERR(old);
+	}
+	tracepoint_add_old_probes(old);
+	mutex_unlock(&tracepoints_mutex);
+	return 0;
+}
+EXPORT_SYMBOL_GPL(tracepoint_probe_unregister_noupdate);
+
+/**
+ * tracepoint_probe_update_all -  update tracepoints
+ */
+void tracepoint_probe_update_all(void)
+{
+	LIST_HEAD(release_probes);
+	struct tp_probes *pos, *next;
+
+	mutex_lock(&tracepoints_mutex);
+	if (!need_update) {
+		mutex_unlock(&tracepoints_mutex);
+		return;
+	}
+	if (!list_empty(&old_probes))
+		list_replace_init(&old_probes, &release_probes);
+	need_update = 0;
+	mutex_unlock(&tracepoints_mutex);
+
+	tracepoint_update_probes();
+	list_for_each_entry_safe(pos, next, &release_probes, u.list) {
+		list_del(&pos->u.list);
+		call_rcu_sched(&pos->u.rcu, rcu_free_old_probes);
+	}
+}
+EXPORT_SYMBOL_GPL(tracepoint_probe_update_all);
+
 /**
  * tracepoint_get_iter_range - Get a next tracepoint iterator given a range.
  * @tracepoint: current tracepoints (in), next tracepoint (out)
-- 
cgit v1.2.3


From 7e5e26a3d8ac4bcadb380073dc9604c07a9a6198 Mon Sep 17 00:00:00 2001
From: Steven Rostedt <rostedt@goodmis.org>
Date: Fri, 31 Oct 2008 09:36:38 -0400
Subject: ftrace: fix hardirq header for non ftrace archs

Impact: build fix for non-ftrace architectures

Not all archs implement ftrace, and therefore do not have an asm/ftrace.h.
This patch corrects the problem.

The ftrace_nmi_enter/exit now must be defined for all archs that implement
dynamic ftrace. Currently, only x86 does.

Signed-off-by: Steven Rostedt <srostedt@redhat.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/arm/include/asm/ftrace.h     |  5 -----
 arch/powerpc/include/asm/ftrace.h |  5 -----
 arch/sh/include/asm/ftrace.h      |  5 -----
 arch/sparc/include/asm/ftrace.h   |  5 -----
 arch/x86/include/asm/ftrace.h     | 16 ----------------
 include/linux/ftrace.h            |  5 ++++-
 include/linux/hardirq.h           |  2 +-
 7 files changed, 5 insertions(+), 38 deletions(-)

(limited to 'include')

diff --git a/arch/arm/include/asm/ftrace.h b/arch/arm/include/asm/ftrace.h
index 3f3a1d1508ea..39c8bc1a006a 100644
--- a/arch/arm/include/asm/ftrace.h
+++ b/arch/arm/include/asm/ftrace.h
@@ -1,11 +1,6 @@
 #ifndef _ASM_ARM_FTRACE
 #define _ASM_ARM_FTRACE
 
-#ifndef __ASSEMBLY__
-static inline void ftrace_nmi_enter(void) { }
-static inline void ftrace_nmi_exit(void) { }
-#endif
-
 #ifdef CONFIG_FUNCTION_TRACER
 #define MCOUNT_ADDR		((long)(mcount))
 #define MCOUNT_INSN_SIZE	4 /* sizeof mcount call */
diff --git a/arch/powerpc/include/asm/ftrace.h b/arch/powerpc/include/asm/ftrace.h
index 1cd72700fbc0..b298f7a631e6 100644
--- a/arch/powerpc/include/asm/ftrace.h
+++ b/arch/powerpc/include/asm/ftrace.h
@@ -1,11 +1,6 @@
 #ifndef _ASM_POWERPC_FTRACE
 #define _ASM_POWERPC_FTRACE
 
-#ifndef __ASSEMBLY__
-static inline void ftrace_nmi_enter(void) { }
-static inline void ftrace_nmi_exit(void) { }
-#endif
-
 #ifdef CONFIG_FUNCTION_TRACER
 #define MCOUNT_ADDR		((long)(_mcount))
 #define MCOUNT_INSN_SIZE	4 /* sizeof mcount call */
diff --git a/arch/sh/include/asm/ftrace.h b/arch/sh/include/asm/ftrace.h
index 31ada0370cb6..3aed362c9463 100644
--- a/arch/sh/include/asm/ftrace.h
+++ b/arch/sh/include/asm/ftrace.h
@@ -1,11 +1,6 @@
 #ifndef __ASM_SH_FTRACE_H
 #define __ASM_SH_FTRACE_H
 
-#ifndef __ASSEMBLY__
-static inline void ftrace_nmi_enter(void) { }
-static inline void ftrace_nmi_exit(void) { }
-#endif
-
 #ifndef __ASSEMBLY__
 extern void mcount(void);
 #endif
diff --git a/arch/sparc/include/asm/ftrace.h b/arch/sparc/include/asm/ftrace.h
index 62055ac0496e..d27716cd38c1 100644
--- a/arch/sparc/include/asm/ftrace.h
+++ b/arch/sparc/include/asm/ftrace.h
@@ -1,11 +1,6 @@
 #ifndef _ASM_SPARC64_FTRACE
 #define _ASM_SPARC64_FTRACE
 
-#ifndef __ASSEMBLY__
-static inline void ftrace_nmi_enter(void) { }
-static inline void ftrace_nmi_exit(void) { }
-#endif
-
 #ifdef CONFIG_MCOUNT
 #define MCOUNT_ADDR		((long)(_mcount))
 #define MCOUNT_INSN_SIZE	4 /* sizeof mcount call */
diff --git a/arch/x86/include/asm/ftrace.h b/arch/x86/include/asm/ftrace.h
index a23468194b8c..f8173ed1c970 100644
--- a/arch/x86/include/asm/ftrace.h
+++ b/arch/x86/include/asm/ftrace.h
@@ -17,23 +17,7 @@ static inline unsigned long ftrace_call_adjust(unsigned long addr)
 	 */
 	return addr - 1;
 }
-
-#ifdef CONFIG_DYNAMIC_FTRACE
-extern void ftrace_nmi_enter(void);
-extern void ftrace_nmi_exit(void);
-#else
-static inline void ftrace_nmi_enter(void) { }
-static inline void ftrace_nmi_exit(void) { }
-#endif
 #endif /* __ASSEMBLY__ */
-
-#else /* CONFIG_FUNCTION_TRACER */
-
-#ifndef __ASSEMBLY__
-static inline void ftrace_nmi_enter(void) { }
-static inline void ftrace_nmi_exit(void) { }
-#endif
-
 #endif /* CONFIG_FUNCTION_TRACER */
 
 #endif /* _ASM_X86_FTRACE_H */
diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h
index e46a7b34037c..0ad1b48aea69 100644
--- a/include/linux/ftrace.h
+++ b/include/linux/ftrace.h
@@ -44,7 +44,6 @@ static inline void ftrace_kill(void) { }
 #endif /* CONFIG_FUNCTION_TRACER */
 
 #ifdef CONFIG_DYNAMIC_FTRACE
-
 enum {
 	FTRACE_FL_FREE		= (1 << 0),
 	FTRACE_FL_FAILED	= (1 << 1),
@@ -105,6 +104,8 @@ extern void ftrace_release(void *start, unsigned long size);
 
 extern void ftrace_disable_daemon(void);
 extern void ftrace_enable_daemon(void);
+extern void ftrace_nmi_enter(void);
+extern void ftrace_nmi_exit(void);
 
 #else
 # define skip_trace(ip)				({ 0; })
@@ -113,6 +114,8 @@ extern void ftrace_enable_daemon(void);
 # define ftrace_disable_daemon()		do { } while (0)
 # define ftrace_enable_daemon()			do { } while (0)
 static inline void ftrace_release(void *start, unsigned long size) { }
+static inline void ftrace_nmi_enter(void) { }
+static inline void ftrace_nmi_exit(void) { }
 #endif /* CONFIG_DYNAMIC_FTRACE */
 
 /* totally disable ftrace - can not re-enable after this */
diff --git a/include/linux/hardirq.h b/include/linux/hardirq.h
index 0087cb43becf..ffc16ab5a878 100644
--- a/include/linux/hardirq.h
+++ b/include/linux/hardirq.h
@@ -4,8 +4,8 @@
 #include <linux/preempt.h>
 #include <linux/smp_lock.h>
 #include <linux/lockdep.h>
+#include <linux/ftrace.h>
 #include <asm/hardirq.h>
-#include <asm/ftrace.h>
 #include <asm/system.h>
 
 /*
-- 
cgit v1.2.3


From 29cbda77a67cf263d636feea65d3bbc9c7de2e24 Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com>
Date: Mon, 3 Nov 2008 09:16:39 -0800
Subject: rcu: increase RCU stall-check timeouts

Impact: increase timeout of debug check feature

Increase RCU stall period timeouts to reduce the likelyhood of
false positives.

Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/rcuclassic.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/rcuclassic.h b/include/linux/rcuclassic.h
index 5f89b62e6983..301dda829e37 100644
--- a/include/linux/rcuclassic.h
+++ b/include/linux/rcuclassic.h
@@ -41,7 +41,7 @@
 #include <linux/seqlock.h>
 
 #ifdef CONFIG_RCU_CPU_STALL_DETECTOR
-#define RCU_SECONDS_TILL_STALL_CHECK	( 3 * HZ) /* for rcp->jiffies_stall */
+#define RCU_SECONDS_TILL_STALL_CHECK	(10 * HZ) /* for rcp->jiffies_stall */
 #define RCU_SECONDS_TILL_STALL_RECHECK	(30 * HZ) /* for rcp->jiffies_stall */
 #endif /* #ifdef CONFIG_RCU_CPU_STALL_DETECTOR */
 
-- 
cgit v1.2.3


From 48148938b494cd57029a43c758e9972307a31d2a Mon Sep 17 00:00:00 2001
From: Julius Volz <julius.volz@gmail.com>
Date: Mon, 3 Nov 2008 17:08:56 -0800
Subject: IPVS: Remove supports_ipv6 scheduler flag

Remove the 'supports_ipv6' scheduler flag since all schedulers now
support IPv6.

Signed-off-by: Julius Volz <julius.volz@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/ip_vs.h              |  3 ---
 net/netfilter/ipvs/ip_vs_ctl.c   | 24 ++++++------------------
 net/netfilter/ipvs/ip_vs_dh.c    |  3 ---
 net/netfilter/ipvs/ip_vs_lblc.c  |  3 ---
 net/netfilter/ipvs/ip_vs_lblcr.c |  3 ---
 net/netfilter/ipvs/ip_vs_lc.c    |  3 ---
 net/netfilter/ipvs/ip_vs_nq.c    |  3 ---
 net/netfilter/ipvs/ip_vs_rr.c    |  3 ---
 net/netfilter/ipvs/ip_vs_sed.c   |  3 ---
 net/netfilter/ipvs/ip_vs_sh.c    |  3 ---
 net/netfilter/ipvs/ip_vs_wlc.c   |  3 ---
 net/netfilter/ipvs/ip_vs_wrr.c   |  3 ---
 12 files changed, 6 insertions(+), 51 deletions(-)

(limited to 'include')

diff --git a/include/net/ip_vs.h b/include/net/ip_vs.h
index fc63353779f0..8f6abf4883e3 100644
--- a/include/net/ip_vs.h
+++ b/include/net/ip_vs.h
@@ -503,9 +503,6 @@ struct ip_vs_scheduler {
 	char			*name;		/* scheduler name */
 	atomic_t		refcnt;		/* reference counter */
 	struct module		*module;	/* THIS_MODULE/NULL */
-#ifdef CONFIG_IP_VS_IPV6
-	int			supports_ipv6;	/* scheduler has IPv6 support */
-#endif
 
 	/* scheduler initializing service */
 	int (*init_service)(struct ip_vs_service *svc);
diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c
index 98e0a65646a1..d0ccdaff4276 100644
--- a/net/netfilter/ipvs/ip_vs_ctl.c
+++ b/net/netfilter/ipvs/ip_vs_ctl.c
@@ -1168,15 +1168,9 @@ ip_vs_add_service(struct ip_vs_service_user_kern *u,
 	}
 
 #ifdef CONFIG_IP_VS_IPV6
-	if (u->af == AF_INET6) {
-		if (!sched->supports_ipv6) {
-			ret = -EAFNOSUPPORT;
-			goto out_err;
-		}
-		if ((u->netmask < 1) || (u->netmask > 128)) {
-			ret = -EINVAL;
-			goto out_err;
-		}
+	if (u->af == AF_INET6 && (u->netmask < 1 || u->netmask > 128)) {
+		ret = -EINVAL;
+		goto out_err;
 	}
 #endif
 
@@ -1272,15 +1266,9 @@ ip_vs_edit_service(struct ip_vs_service *svc, struct ip_vs_service_user_kern *u)
 	old_sched = sched;
 
 #ifdef CONFIG_IP_VS_IPV6
-	if (u->af == AF_INET6) {
-		if (!sched->supports_ipv6) {
-			ret = -EAFNOSUPPORT;
-			goto out;
-		}
-		if ((u->netmask < 1) || (u->netmask > 128)) {
-			ret = -EINVAL;
-			goto out;
-		}
+	if (u->af == AF_INET6 && (u->netmask < 1 || u->netmask > 128)) {
+		ret = -EINVAL;
+		goto out;
 	}
 #endif
 
diff --git a/net/netfilter/ipvs/ip_vs_dh.c b/net/netfilter/ipvs/ip_vs_dh.c
index d8258e0cb58c..a9dac74bb13f 100644
--- a/net/netfilter/ipvs/ip_vs_dh.c
+++ b/net/netfilter/ipvs/ip_vs_dh.c
@@ -243,9 +243,6 @@ static struct ip_vs_scheduler ip_vs_dh_scheduler =
 	.refcnt =		ATOMIC_INIT(0),
 	.module =		THIS_MODULE,
 	.n_list =		LIST_HEAD_INIT(ip_vs_dh_scheduler.n_list),
-#ifdef CONFIG_IP_VS_IPV6
-	.supports_ipv6 =	1,
-#endif
 	.init_service =		ip_vs_dh_init_svc,
 	.done_service =		ip_vs_dh_done_svc,
 	.update_service =	ip_vs_dh_update_svc,
diff --git a/net/netfilter/ipvs/ip_vs_lblc.c b/net/netfilter/ipvs/ip_vs_lblc.c
index 135a59454f1f..4256cfad8d31 100644
--- a/net/netfilter/ipvs/ip_vs_lblc.c
+++ b/net/netfilter/ipvs/ip_vs_lblc.c
@@ -534,9 +534,6 @@ static struct ip_vs_scheduler ip_vs_lblc_scheduler =
 	.refcnt =		ATOMIC_INIT(0),
 	.module =		THIS_MODULE,
 	.n_list =		LIST_HEAD_INIT(ip_vs_lblc_scheduler.n_list),
-#ifdef CONFIG_IP_VS_IPV6
-	.supports_ipv6 =	1,
-#endif
 	.init_service =		ip_vs_lblc_init_svc,
 	.done_service =		ip_vs_lblc_done_svc,
 	.schedule =		ip_vs_lblc_schedule,
diff --git a/net/netfilter/ipvs/ip_vs_lblcr.c b/net/netfilter/ipvs/ip_vs_lblcr.c
index 4d6534a71b8f..321b49fa41d8 100644
--- a/net/netfilter/ipvs/ip_vs_lblcr.c
+++ b/net/netfilter/ipvs/ip_vs_lblcr.c
@@ -735,9 +735,6 @@ static struct ip_vs_scheduler ip_vs_lblcr_scheduler =
 	.refcnt =		ATOMIC_INIT(0),
 	.module =		THIS_MODULE,
 	.n_list =		LIST_HEAD_INIT(ip_vs_lblcr_scheduler.n_list),
-#ifdef CONFIG_IP_VS_IPV6
-	.supports_ipv6 =	1,
-#endif
 	.init_service =		ip_vs_lblcr_init_svc,
 	.done_service =		ip_vs_lblcr_done_svc,
 	.schedule =		ip_vs_lblcr_schedule,
diff --git a/net/netfilter/ipvs/ip_vs_lc.c b/net/netfilter/ipvs/ip_vs_lc.c
index b69f808ac461..51912cab777b 100644
--- a/net/netfilter/ipvs/ip_vs_lc.c
+++ b/net/netfilter/ipvs/ip_vs_lc.c
@@ -81,9 +81,6 @@ static struct ip_vs_scheduler ip_vs_lc_scheduler = {
 	.refcnt =		ATOMIC_INIT(0),
 	.module =		THIS_MODULE,
 	.n_list =		LIST_HEAD_INIT(ip_vs_lc_scheduler.n_list),
-#ifdef CONFIG_IP_VS_IPV6
-	.supports_ipv6 =	1,
-#endif
 	.schedule =		ip_vs_lc_schedule,
 };
 
diff --git a/net/netfilter/ipvs/ip_vs_nq.c b/net/netfilter/ipvs/ip_vs_nq.c
index 9a2d8033f08f..6758ad2ceaaf 100644
--- a/net/netfilter/ipvs/ip_vs_nq.c
+++ b/net/netfilter/ipvs/ip_vs_nq.c
@@ -116,9 +116,6 @@ static struct ip_vs_scheduler ip_vs_nq_scheduler =
 	.refcnt =		ATOMIC_INIT(0),
 	.module =		THIS_MODULE,
 	.n_list =		LIST_HEAD_INIT(ip_vs_nq_scheduler.n_list),
-#ifdef CONFIG_IP_VS_IPV6
-	.supports_ipv6 =	1,
-#endif
 	.schedule =		ip_vs_nq_schedule,
 };
 
diff --git a/net/netfilter/ipvs/ip_vs_rr.c b/net/netfilter/ipvs/ip_vs_rr.c
index a22195f68ac4..8fb51c169eb8 100644
--- a/net/netfilter/ipvs/ip_vs_rr.c
+++ b/net/netfilter/ipvs/ip_vs_rr.c
@@ -89,9 +89,6 @@ static struct ip_vs_scheduler ip_vs_rr_scheduler = {
 	.refcnt =		ATOMIC_INIT(0),
 	.module =		THIS_MODULE,
 	.n_list =		LIST_HEAD_INIT(ip_vs_rr_scheduler.n_list),
-#ifdef CONFIG_IP_VS_IPV6
-	.supports_ipv6 =	1,
-#endif
 	.init_service =		ip_vs_rr_init_svc,
 	.update_service =	ip_vs_rr_update_svc,
 	.schedule =		ip_vs_rr_schedule,
diff --git a/net/netfilter/ipvs/ip_vs_sed.c b/net/netfilter/ipvs/ip_vs_sed.c
index 7d2f22f04b83..691a6a0086e1 100644
--- a/net/netfilter/ipvs/ip_vs_sed.c
+++ b/net/netfilter/ipvs/ip_vs_sed.c
@@ -118,9 +118,6 @@ static struct ip_vs_scheduler ip_vs_sed_scheduler =
 	.refcnt =		ATOMIC_INIT(0),
 	.module =		THIS_MODULE,
 	.n_list =		LIST_HEAD_INIT(ip_vs_sed_scheduler.n_list),
-#ifdef CONFIG_IP_VS_IPV6
-	.supports_ipv6 =	1,
-#endif
 	.schedule =		ip_vs_sed_schedule,
 };
 
diff --git a/net/netfilter/ipvs/ip_vs_sh.c b/net/netfilter/ipvs/ip_vs_sh.c
index 4074ccf49208..0e53955ef139 100644
--- a/net/netfilter/ipvs/ip_vs_sh.c
+++ b/net/netfilter/ipvs/ip_vs_sh.c
@@ -240,9 +240,6 @@ static struct ip_vs_scheduler ip_vs_sh_scheduler =
 	.refcnt =		ATOMIC_INIT(0),
 	.module =		THIS_MODULE,
 	.n_list	 =		LIST_HEAD_INIT(ip_vs_sh_scheduler.n_list),
-#ifdef CONFIG_IP_VS_IPV6
-	.supports_ipv6 =	1,
-#endif
 	.init_service =		ip_vs_sh_init_svc,
 	.done_service =		ip_vs_sh_done_svc,
 	.update_service =	ip_vs_sh_update_svc,
diff --git a/net/netfilter/ipvs/ip_vs_wlc.c b/net/netfilter/ipvs/ip_vs_wlc.c
index 8c596e712599..57b452bbb4ea 100644
--- a/net/netfilter/ipvs/ip_vs_wlc.c
+++ b/net/netfilter/ipvs/ip_vs_wlc.c
@@ -106,9 +106,6 @@ static struct ip_vs_scheduler ip_vs_wlc_scheduler =
 	.refcnt =		ATOMIC_INIT(0),
 	.module =		THIS_MODULE,
 	.n_list =		LIST_HEAD_INIT(ip_vs_wlc_scheduler.n_list),
-#ifdef CONFIG_IP_VS_IPV6
-	.supports_ipv6 =	1,
-#endif
 	.schedule =		ip_vs_wlc_schedule,
 };
 
diff --git a/net/netfilter/ipvs/ip_vs_wrr.c b/net/netfilter/ipvs/ip_vs_wrr.c
index 7ea92fed50bf..2f618dc29c5b 100644
--- a/net/netfilter/ipvs/ip_vs_wrr.c
+++ b/net/netfilter/ipvs/ip_vs_wrr.c
@@ -213,9 +213,6 @@ static struct ip_vs_scheduler ip_vs_wrr_scheduler = {
 	.refcnt =		ATOMIC_INIT(0),
 	.module =		THIS_MODULE,
 	.n_list =		LIST_HEAD_INIT(ip_vs_wrr_scheduler.n_list),
-#ifdef CONFIG_IP_VS_IPV6
-	.supports_ipv6 =	1,
-#endif
 	.init_service =		ip_vs_wrr_init_svc,
 	.done_service =		ip_vs_wrr_done_svc,
 	.update_service =	ip_vs_wrr_update_svc,
-- 
cgit v1.2.3


From 6cf3f41e6c08bca6641a695449791c38a25f35ff Mon Sep 17 00:00:00 2001
From: Jay Vosburgh <fubar@us.ibm.com>
Date: Mon, 3 Nov 2008 18:16:50 -0800
Subject: bonding, net: Move last_rx update into bonding recv logic

	The only user of the net_device->last_rx field is bonding.
This patch adds a conditional update of last_rx to the bonding special
logic in skb_bond_should_drop, causing last_rx to only be updated when
the ARP monitor is running.

	This frees network device drivers from the necessity of
updating last_rx, which can have cache line thrash issues.

Signed-off-by: Jay Vosburgh <fubar@us.ibm.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/bonding/bond_main.c  |  2 ++
 drivers/net/bonding/bond_sysfs.c |  3 +++
 include/linux/if.h               |  1 +
 include/linux/netdevice.h        | 32 ++++++++++++++++++--------------
 4 files changed, 24 insertions(+), 14 deletions(-)

(limited to 'include')

diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c
index 56c823c175fe..39575d764974 100644
--- a/drivers/net/bonding/bond_main.c
+++ b/drivers/net/bonding/bond_main.c
@@ -4564,6 +4564,8 @@ static int bond_init(struct net_device *bond_dev, struct bond_params *params)
 	bond_dev->tx_queue_len = 0;
 	bond_dev->flags |= IFF_MASTER|IFF_MULTICAST;
 	bond_dev->priv_flags |= IFF_BONDING;
+	if (bond->params.arp_interval)
+		bond_dev->priv_flags |= IFF_MASTER_ARPMON;
 
 	/* At first, we block adding VLANs. That's the only way to
 	 * prevent problems that occur when adding VLANs over an
diff --git a/drivers/net/bonding/bond_sysfs.c b/drivers/net/bonding/bond_sysfs.c
index 296a865b75d2..e400d7dfdfc8 100644
--- a/drivers/net/bonding/bond_sysfs.c
+++ b/drivers/net/bonding/bond_sysfs.c
@@ -620,6 +620,8 @@ static ssize_t bonding_store_arp_interval(struct device *d,
 	       ": %s: Setting ARP monitoring interval to %d.\n",
 	       bond->dev->name, new_value);
 	bond->params.arp_interval = new_value;
+	if (bond->params.arp_interval)
+		bond->dev->priv_flags |= IFF_MASTER_ARPMON;
 	if (bond->params.miimon) {
 		printk(KERN_INFO DRV_NAME
 		       ": %s: ARP monitoring cannot be used with MII monitoring. "
@@ -1039,6 +1041,7 @@ static ssize_t bonding_store_miimon(struct device *d,
 			       "ARP monitoring. Disabling ARP monitoring...\n",
 			       bond->dev->name);
 			bond->params.arp_interval = 0;
+			bond->dev->priv_flags &= ~IFF_MASTER_ARPMON;
 			if (bond->params.arp_validate) {
 				bond_unregister_arp(bond);
 				bond->params.arp_validate =
diff --git a/include/linux/if.h b/include/linux/if.h
index 65246846c844..2a6e29620a96 100644
--- a/include/linux/if.h
+++ b/include/linux/if.h
@@ -65,6 +65,7 @@
 #define IFF_BONDING	0x20		/* bonding master or slave	*/
 #define IFF_SLAVE_NEEDARP 0x40		/* need ARPs for validation	*/
 #define IFF_ISATAP	0x80		/* ISATAP interface (RFC4214)	*/
+#define IFF_MASTER_ARPMON 0x100		/* bonding master, ARP mon in use */
 
 #define IF_GET_IFACE	0x0001		/* for querying only */
 #define IF_GET_PROTO	0x0002
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 9d77b1d7dca8..f1b0dbe58464 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -1742,22 +1742,26 @@ static inline int skb_bond_should_drop(struct sk_buff *skb)
 	struct net_device *dev = skb->dev;
 	struct net_device *master = dev->master;
 
-	if (master &&
-	    (dev->priv_flags & IFF_SLAVE_INACTIVE)) {
-		if ((dev->priv_flags & IFF_SLAVE_NEEDARP) &&
-		    skb->protocol == __constant_htons(ETH_P_ARP))
-			return 0;
-
-		if (master->priv_flags & IFF_MASTER_ALB) {
-			if (skb->pkt_type != PACKET_BROADCAST &&
-			    skb->pkt_type != PACKET_MULTICAST)
+	if (master) {
+		if (master->priv_flags & IFF_MASTER_ARPMON)
+			dev->last_rx = jiffies;
+
+		if (dev->priv_flags & IFF_SLAVE_INACTIVE) {
+			if ((dev->priv_flags & IFF_SLAVE_NEEDARP) &&
+			    skb->protocol == __constant_htons(ETH_P_ARP))
 				return 0;
-		}
-		if (master->priv_flags & IFF_MASTER_8023AD &&
-		    skb->protocol == __constant_htons(ETH_P_SLOW))
-			return 0;
 
-		return 1;
+			if (master->priv_flags & IFF_MASTER_ALB) {
+				if (skb->pkt_type != PACKET_BROADCAST &&
+				    skb->pkt_type != PACKET_MULTICAST)
+					return 0;
+			}
+			if (master->priv_flags & IFF_MASTER_8023AD &&
+			    skb->protocol == __constant_htons(ETH_P_SLOW))
+				return 0;
+
+			return 1;
+		}
 	}
 	return 0;
 }
-- 
cgit v1.2.3


From 5f7340eff8f68f41b7e5c7ad47ec4cd1ea1afb40 Mon Sep 17 00:00:00 2001
From: Eric Leblond <eric@inl.fr>
Date: Tue, 4 Nov 2008 14:21:08 +0100
Subject: netfilter: xt_NFLOG: don't call nf_log_packet in NFLOG module.

This patch modifies xt_NFLOG to suppress the call to nf_log_packet()
function. The call of this wrapper in xt_NFLOG was causing NFLOG to
use the first initialized module. Thus, if ipt_ULOG is loaded before
nfnetlink_log all NFLOG rules are treated as plain LOG rules.

Signed-off-by: Eric Leblond <eric@inl.fr>
Signed-off-by: Patrick McHardy <kaber@trash.net>
---
 include/net/netfilter/nfnetlink_log.h | 14 ++++++++++++++
 net/netfilter/nfnetlink_log.c         |  3 ++-
 net/netfilter/xt_NFLOG.c              |  5 +++--
 3 files changed, 19 insertions(+), 3 deletions(-)
 create mode 100644 include/net/netfilter/nfnetlink_log.h

(limited to 'include')

diff --git a/include/net/netfilter/nfnetlink_log.h b/include/net/netfilter/nfnetlink_log.h
new file mode 100644
index 000000000000..9b67f948a8d7
--- /dev/null
+++ b/include/net/netfilter/nfnetlink_log.h
@@ -0,0 +1,14 @@
+#ifndef _KER_NFNETLINK_LOG_H
+#define _KER_NFNETLINK_LOG_H
+
+void
+nfulnl_log_packet(unsigned int pf,
+		  unsigned int hooknum,
+		  const struct sk_buff *skb,
+		  const struct net_device *in,
+		  const struct net_device *out,
+		  const struct nf_loginfo *li_user,
+		  const char *prefix);
+
+#endif /* _KER_NFNETLINK_LOG_H */
+
diff --git a/net/netfilter/nfnetlink_log.c b/net/netfilter/nfnetlink_log.c
index 41e0105d3828..a51892b3f01a 100644
--- a/net/netfilter/nfnetlink_log.c
+++ b/net/netfilter/nfnetlink_log.c
@@ -533,7 +533,7 @@ static struct nf_loginfo default_loginfo = {
 };
 
 /* log handler for internal netfilter logging api */
-static void
+void
 nfulnl_log_packet(u_int8_t pf,
 		  unsigned int hooknum,
 		  const struct sk_buff *skb,
@@ -648,6 +648,7 @@ alloc_failure:
 	/* FIXME: statistics */
 	goto unlock_and_release;
 }
+EXPORT_SYMBOL_GPL(nfulnl_log_packet);
 
 static int
 nfulnl_rcv_nl_event(struct notifier_block *this,
diff --git a/net/netfilter/xt_NFLOG.c b/net/netfilter/xt_NFLOG.c
index 50e3a52d3b31..a57c5cf018ec 100644
--- a/net/netfilter/xt_NFLOG.c
+++ b/net/netfilter/xt_NFLOG.c
@@ -13,6 +13,7 @@
 #include <linux/netfilter/x_tables.h>
 #include <linux/netfilter/xt_NFLOG.h>
 #include <net/netfilter/nf_log.h>
+#include <net/netfilter/nfnetlink_log.h>
 
 MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>");
 MODULE_DESCRIPTION("Xtables: packet logging to netlink using NFLOG");
@@ -31,8 +32,8 @@ nflog_tg(struct sk_buff *skb, const struct xt_target_param *par)
 	li.u.ulog.group	     = info->group;
 	li.u.ulog.qthreshold = info->threshold;
 
-	nf_log_packet(par->family, par->hooknum, skb, par->in,
-	              par->out, &li, "%s", info->prefix);
+	nfulnl_log_packet(par->family, par->hooknum, skb, par->in,
+			  par->out, &li, info->prefix);
 	return XT_CONTINUE;
 }
 
-- 
cgit v1.2.3


From 511061e2dd1b84bb21bb97c9216a19606c29ac02 Mon Sep 17 00:00:00 2001
From: Alexey Dobriyan <adobriyan@gmail.com>
Date: Tue, 4 Nov 2008 14:22:55 +0100
Subject: netfilter: netns ebtables: part 1

* propagate netns from userspace, register table in passed netns
* remporarily register every ebt_table in init_net

P. S.: one needs to add ".netns_ok = 1" to igmp_protocol to test with
ebtables(8) in netns.

Signed-off-by: Alexey Dobriyan <adobriyan@gmail.com>
Signed-off-by: Patrick McHardy <kaber@trash.net>
---
 include/linux/netfilter_bridge/ebtables.h |  2 +-
 net/bridge/netfilter/ebtable_broute.c     |  2 +-
 net/bridge/netfilter/ebtable_filter.c     |  2 +-
 net/bridge/netfilter/ebtable_nat.c        |  2 +-
 net/bridge/netfilter/ebtables.c           | 27 ++++++++++++++-------------
 5 files changed, 18 insertions(+), 17 deletions(-)

(limited to 'include')

diff --git a/include/linux/netfilter_bridge/ebtables.h b/include/linux/netfilter_bridge/ebtables.h
index d45e29cd1cfb..624e7883068c 100644
--- a/include/linux/netfilter_bridge/ebtables.h
+++ b/include/linux/netfilter_bridge/ebtables.h
@@ -300,7 +300,7 @@ struct ebt_table
 
 #define EBT_ALIGN(s) (((s) + (__alignof__(struct ebt_replace)-1)) & \
 		     ~(__alignof__(struct ebt_replace)-1))
-extern int ebt_register_table(struct ebt_table *table);
+extern int ebt_register_table(struct net *net, struct ebt_table *table);
 extern void ebt_unregister_table(struct ebt_table *table);
 extern unsigned int ebt_do_table(unsigned int hook, struct sk_buff *skb,
    const struct net_device *in, const struct net_device *out,
diff --git a/net/bridge/netfilter/ebtable_broute.c b/net/bridge/netfilter/ebtable_broute.c
index 246626bb0c87..1731ce8f7479 100644
--- a/net/bridge/netfilter/ebtable_broute.c
+++ b/net/bridge/netfilter/ebtable_broute.c
@@ -66,7 +66,7 @@ static int __init ebtable_broute_init(void)
 {
 	int ret;
 
-	ret = ebt_register_table(&broute_table);
+	ret = ebt_register_table(&init_net, &broute_table);
 	if (ret < 0)
 		return ret;
 	/* see br_input.c */
diff --git a/net/bridge/netfilter/ebtable_filter.c b/net/bridge/netfilter/ebtable_filter.c
index 1a58af51a2e2..af8953c9a57c 100644
--- a/net/bridge/netfilter/ebtable_filter.c
+++ b/net/bridge/netfilter/ebtable_filter.c
@@ -95,7 +95,7 @@ static int __init ebtable_filter_init(void)
 {
 	int ret;
 
-	ret = ebt_register_table(&frame_filter);
+	ret = ebt_register_table(&init_net, &frame_filter);
 	if (ret < 0)
 		return ret;
 	ret = nf_register_hooks(ebt_ops_filter, ARRAY_SIZE(ebt_ops_filter));
diff --git a/net/bridge/netfilter/ebtable_nat.c b/net/bridge/netfilter/ebtable_nat.c
index f60c1e78e575..bafe16029bd7 100644
--- a/net/bridge/netfilter/ebtable_nat.c
+++ b/net/bridge/netfilter/ebtable_nat.c
@@ -102,7 +102,7 @@ static int __init ebtable_nat_init(void)
 {
 	int ret;
 
-	ret = ebt_register_table(&frame_nat);
+	ret = ebt_register_table(&init_net, &frame_nat);
 	if (ret < 0)
 		return ret;
 	ret = nf_register_hooks(ebt_ops_nat, ARRAY_SIZE(ebt_ops_nat));
diff --git a/net/bridge/netfilter/ebtables.c b/net/bridge/netfilter/ebtables.c
index 0fa208e86405..c1a82b2826eb 100644
--- a/net/bridge/netfilter/ebtables.c
+++ b/net/bridge/netfilter/ebtables.c
@@ -55,7 +55,6 @@
 
 
 static DEFINE_MUTEX(ebt_mutex);
-static LIST_HEAD(ebt_tables);
 
 static struct xt_target ebt_standard_target = {
 	.name       = "standard",
@@ -315,9 +314,11 @@ find_inlist_lock(struct list_head *head, const char *name, const char *prefix,
 }
 
 static inline struct ebt_table *
-find_table_lock(const char *name, int *error, struct mutex *mutex)
+find_table_lock(struct net *net, const char *name, int *error,
+		struct mutex *mutex)
 {
-	return find_inlist_lock(&ebt_tables, name, "ebtable_", error, mutex);
+	return find_inlist_lock(&net->xt.tables[NFPROTO_BRIDGE], name,
+				"ebtable_", error, mutex);
 }
 
 static inline int
@@ -944,7 +945,7 @@ static void get_counters(struct ebt_counter *oldcounters,
 }
 
 /* replace the table */
-static int do_replace(void __user *user, unsigned int len)
+static int do_replace(struct net *net, void __user *user, unsigned int len)
 {
 	int ret, i, countersize;
 	struct ebt_table_info *newinfo;
@@ -1016,7 +1017,7 @@ static int do_replace(void __user *user, unsigned int len)
 	if (ret != 0)
 		goto free_counterstmp;
 
-	t = find_table_lock(tmp.name, &ret, &ebt_mutex);
+	t = find_table_lock(net, tmp.name, &ret, &ebt_mutex);
 	if (!t) {
 		ret = -ENOENT;
 		goto free_iterate;
@@ -1097,7 +1098,7 @@ free_newinfo:
 	return ret;
 }
 
-int ebt_register_table(struct ebt_table *table)
+int ebt_register_table(struct net *net, struct ebt_table *table)
 {
 	struct ebt_table_info *newinfo;
 	struct ebt_table *t;
@@ -1157,7 +1158,7 @@ int ebt_register_table(struct ebt_table *table)
 	if (ret != 0)
 		goto free_chainstack;
 
-	list_for_each_entry(t, &ebt_tables, list) {
+	list_for_each_entry(t, &net->xt.tables[NFPROTO_BRIDGE], list) {
 		if (strcmp(t->name, table->name) == 0) {
 			ret = -EEXIST;
 			BUGPRINT("Table name already exists\n");
@@ -1170,7 +1171,7 @@ int ebt_register_table(struct ebt_table *table)
 		ret = -ENOENT;
 		goto free_unlock;
 	}
-	list_add(&table->list, &ebt_tables);
+	list_add(&table->list, &net->xt.tables[NFPROTO_BRIDGE]);
 	mutex_unlock(&ebt_mutex);
 	return 0;
 free_unlock:
@@ -1208,7 +1209,7 @@ void ebt_unregister_table(struct ebt_table *table)
 }
 
 /* userspace just supplied us with counters */
-static int update_counters(void __user *user, unsigned int len)
+static int update_counters(struct net *net, void __user *user, unsigned int len)
 {
 	int i, ret;
 	struct ebt_counter *tmp;
@@ -1228,7 +1229,7 @@ static int update_counters(void __user *user, unsigned int len)
 		return -ENOMEM;
 	}
 
-	t = find_table_lock(hlp.name, &ret, &ebt_mutex);
+	t = find_table_lock(net, hlp.name, &ret, &ebt_mutex);
 	if (!t)
 		goto free_tmp;
 
@@ -1386,10 +1387,10 @@ static int do_ebt_set_ctl(struct sock *sk,
 
 	switch(cmd) {
 	case EBT_SO_SET_ENTRIES:
-		ret = do_replace(user, len);
+		ret = do_replace(sock_net(sk), user, len);
 		break;
 	case EBT_SO_SET_COUNTERS:
-		ret = update_counters(user, len);
+		ret = update_counters(sock_net(sk), user, len);
 		break;
 	default:
 		ret = -EINVAL;
@@ -1406,7 +1407,7 @@ static int do_ebt_get_ctl(struct sock *sk, int cmd, void __user *user, int *len)
 	if (copy_from_user(&tmp, user, sizeof(tmp)))
 		return -EFAULT;
 
-	t = find_table_lock(tmp.name, &ret, &ebt_mutex);
+	t = find_table_lock(sock_net(sk), tmp.name, &ret, &ebt_mutex);
 	if (!t)
 		return ret;
 
-- 
cgit v1.2.3


From 6beceee5aa2cb94c4ae9f0784c7d3135d343f5b5 Mon Sep 17 00:00:00 2001
From: Alexey Dobriyan <adobriyan@gmail.com>
Date: Tue, 4 Nov 2008 14:27:15 +0100
Subject: netfilter: netns ebtables: part 2

* return ebt_table from ebt_register_table(), module code will save it into
  per-netns data for unregistration
* duplicate ebt_table at the very beginning of registration -- it's added into
  list, so one ebt_table wouldn't end up in many lists (and each netns has
  different one)
* introduce underscored tables in individial modules, this is temporary to not
  break bisection.

Signed-off-by: Alexey Dobriyan <adobriyan@gmail.com>
Signed-off-by: Patrick McHardy <kaber@trash.net>
---
 include/linux/netfilter_bridge/ebtables.h |  3 ++-
 net/bridge/netfilter/ebtable_broute.c     | 19 +++++++++----------
 net/bridge/netfilter/ebtable_filter.c     | 17 +++++++++--------
 net/bridge/netfilter/ebtable_nat.c        | 19 ++++++++++---------
 net/bridge/netfilter/ebtables.c           | 23 +++++++++++++++++------
 5 files changed, 47 insertions(+), 34 deletions(-)

(limited to 'include')

diff --git a/include/linux/netfilter_bridge/ebtables.h b/include/linux/netfilter_bridge/ebtables.h
index 624e7883068c..e40ddb94b1af 100644
--- a/include/linux/netfilter_bridge/ebtables.h
+++ b/include/linux/netfilter_bridge/ebtables.h
@@ -300,7 +300,8 @@ struct ebt_table
 
 #define EBT_ALIGN(s) (((s) + (__alignof__(struct ebt_replace)-1)) & \
 		     ~(__alignof__(struct ebt_replace)-1))
-extern int ebt_register_table(struct net *net, struct ebt_table *table);
+extern struct ebt_table *ebt_register_table(struct net *net,
+					    struct ebt_table *table);
 extern void ebt_unregister_table(struct ebt_table *table);
 extern unsigned int ebt_do_table(unsigned int hook, struct sk_buff *skb,
    const struct net_device *in, const struct net_device *out,
diff --git a/net/bridge/netfilter/ebtable_broute.c b/net/bridge/netfilter/ebtable_broute.c
index 1731ce8f7479..3277d682dfcf 100644
--- a/net/bridge/netfilter/ebtable_broute.c
+++ b/net/bridge/netfilter/ebtable_broute.c
@@ -41,22 +41,23 @@ static int check(const struct ebt_table_info *info, unsigned int valid_hooks)
 	return 0;
 }
 
-static struct ebt_table broute_table =
+static struct ebt_table __broute_table =
 {
 	.name		= "broute",
 	.table		= &initial_table,
 	.valid_hooks	= 1 << NF_BR_BROUTING,
-	.lock		= __RW_LOCK_UNLOCKED(broute_table.lock),
+	.lock		= __RW_LOCK_UNLOCKED(__broute_table.lock),
 	.check		= check,
 	.me		= THIS_MODULE,
 };
+static struct ebt_table *broute_table;
 
 static int ebt_broute(struct sk_buff *skb)
 {
 	int ret;
 
 	ret = ebt_do_table(NF_BR_BROUTING, skb, skb->dev, NULL,
-	   &broute_table);
+	   broute_table);
 	if (ret == NF_DROP)
 		return 1; /* route it */
 	return 0; /* bridge it */
@@ -64,21 +65,19 @@ static int ebt_broute(struct sk_buff *skb)
 
 static int __init ebtable_broute_init(void)
 {
-	int ret;
-
-	ret = ebt_register_table(&init_net, &broute_table);
-	if (ret < 0)
-		return ret;
+	broute_table = ebt_register_table(&init_net, &__broute_table);
+	if (IS_ERR(broute_table))
+		return PTR_ERR(broute_table);
 	/* see br_input.c */
 	rcu_assign_pointer(br_should_route_hook, ebt_broute);
-	return ret;
+	return 0;
 }
 
 static void __exit ebtable_broute_fini(void)
 {
 	rcu_assign_pointer(br_should_route_hook, NULL);
 	synchronize_net();
-	ebt_unregister_table(&broute_table);
+	ebt_unregister_table(broute_table);
 }
 
 module_init(ebtable_broute_init);
diff --git a/net/bridge/netfilter/ebtable_filter.c b/net/bridge/netfilter/ebtable_filter.c
index af8953c9a57c..596564c7aa58 100644
--- a/net/bridge/netfilter/ebtable_filter.c
+++ b/net/bridge/netfilter/ebtable_filter.c
@@ -50,21 +50,22 @@ static int check(const struct ebt_table_info *info, unsigned int valid_hooks)
 	return 0;
 }
 
-static struct ebt_table frame_filter =
+static struct ebt_table __frame_filter =
 {
 	.name		= "filter",
 	.table		= &initial_table,
 	.valid_hooks	= FILTER_VALID_HOOKS,
-	.lock		= __RW_LOCK_UNLOCKED(frame_filter.lock),
+	.lock		= __RW_LOCK_UNLOCKED(__frame_filter.lock),
 	.check		= check,
 	.me		= THIS_MODULE,
 };
+static struct ebt_table *frame_filter;
 
 static unsigned int
 ebt_hook(unsigned int hook, struct sk_buff *skb, const struct net_device *in,
    const struct net_device *out, int (*okfn)(struct sk_buff *))
 {
-	return ebt_do_table(hook, skb, in, out, &frame_filter);
+	return ebt_do_table(hook, skb, in, out, frame_filter);
 }
 
 static struct nf_hook_ops ebt_ops_filter[] __read_mostly = {
@@ -95,19 +96,19 @@ static int __init ebtable_filter_init(void)
 {
 	int ret;
 
-	ret = ebt_register_table(&init_net, &frame_filter);
-	if (ret < 0)
-		return ret;
+	frame_filter = ebt_register_table(&init_net, &__frame_filter);
+	if (IS_ERR(frame_filter))
+		return PTR_ERR(frame_filter);
 	ret = nf_register_hooks(ebt_ops_filter, ARRAY_SIZE(ebt_ops_filter));
 	if (ret < 0)
-		ebt_unregister_table(&frame_filter);
+		ebt_unregister_table(frame_filter);
 	return ret;
 }
 
 static void __exit ebtable_filter_fini(void)
 {
 	nf_unregister_hooks(ebt_ops_filter, ARRAY_SIZE(ebt_ops_filter));
-	ebt_unregister_table(&frame_filter);
+	ebt_unregister_table(frame_filter);
 }
 
 module_init(ebtable_filter_init);
diff --git a/net/bridge/netfilter/ebtable_nat.c b/net/bridge/netfilter/ebtable_nat.c
index bafe16029bd7..0d8fc5bcddd1 100644
--- a/net/bridge/netfilter/ebtable_nat.c
+++ b/net/bridge/netfilter/ebtable_nat.c
@@ -50,28 +50,29 @@ static int check(const struct ebt_table_info *info, unsigned int valid_hooks)
 	return 0;
 }
 
-static struct ebt_table frame_nat =
+static struct ebt_table __frame_nat =
 {
 	.name		= "nat",
 	.table		= &initial_table,
 	.valid_hooks	= NAT_VALID_HOOKS,
-	.lock		= __RW_LOCK_UNLOCKED(frame_nat.lock),
+	.lock		= __RW_LOCK_UNLOCKED(__frame_nat.lock),
 	.check		= check,
 	.me		= THIS_MODULE,
 };
+static struct ebt_table *frame_nat;
 
 static unsigned int
 ebt_nat_dst(unsigned int hook, struct sk_buff *skb, const struct net_device *in
    , const struct net_device *out, int (*okfn)(struct sk_buff *))
 {
-	return ebt_do_table(hook, skb, in, out, &frame_nat);
+	return ebt_do_table(hook, skb, in, out, frame_nat);
 }
 
 static unsigned int
 ebt_nat_src(unsigned int hook, struct sk_buff *skb, const struct net_device *in
    , const struct net_device *out, int (*okfn)(struct sk_buff *))
 {
-	return ebt_do_table(hook, skb, in, out, &frame_nat);
+	return ebt_do_table(hook, skb, in, out, frame_nat);
 }
 
 static struct nf_hook_ops ebt_ops_nat[] __read_mostly = {
@@ -102,19 +103,19 @@ static int __init ebtable_nat_init(void)
 {
 	int ret;
 
-	ret = ebt_register_table(&init_net, &frame_nat);
-	if (ret < 0)
-		return ret;
+	frame_nat = ebt_register_table(&init_net, &__frame_nat);
+	if (IS_ERR(frame_nat))
+		return PTR_ERR(frame_nat);
 	ret = nf_register_hooks(ebt_ops_nat, ARRAY_SIZE(ebt_ops_nat));
 	if (ret < 0)
-		ebt_unregister_table(&frame_nat);
+		ebt_unregister_table(frame_nat);
 	return ret;
 }
 
 static void __exit ebtable_nat_fini(void)
 {
 	nf_unregister_hooks(ebt_ops_nat, ARRAY_SIZE(ebt_ops_nat));
-	ebt_unregister_table(&frame_nat);
+	ebt_unregister_table(frame_nat);
 }
 
 module_init(ebtable_nat_init);
diff --git a/net/bridge/netfilter/ebtables.c b/net/bridge/netfilter/ebtables.c
index c1a82b2826eb..82e17527e21e 100644
--- a/net/bridge/netfilter/ebtables.c
+++ b/net/bridge/netfilter/ebtables.c
@@ -1098,7 +1098,7 @@ free_newinfo:
 	return ret;
 }
 
-int ebt_register_table(struct net *net, struct ebt_table *table)
+struct ebt_table *ebt_register_table(struct net *net, struct ebt_table *table)
 {
 	struct ebt_table_info *newinfo;
 	struct ebt_table *t;
@@ -1110,14 +1110,21 @@ int ebt_register_table(struct net *net, struct ebt_table *table)
 	    repl->entries_size == 0 ||
 	    repl->counters || table->private) {
 		BUGPRINT("Bad table data for ebt_register_table!!!\n");
-		return -EINVAL;
+		return ERR_PTR(-EINVAL);
+	}
+
+	/* Don't add one table to multiple lists. */
+	table = kmemdup(table, sizeof(struct ebt_table), GFP_KERNEL);
+	if (!table) {
+		ret = -ENOMEM;
+		goto out;
 	}
 
 	countersize = COUNTER_OFFSET(repl->nentries) * nr_cpu_ids;
 	newinfo = vmalloc(sizeof(*newinfo) + countersize);
 	ret = -ENOMEM;
 	if (!newinfo)
-		return -ENOMEM;
+		goto free_table;
 
 	p = vmalloc(repl->entries_size);
 	if (!p)
@@ -1149,7 +1156,7 @@ int ebt_register_table(struct net *net, struct ebt_table *table)
 
 	if (table->check && table->check(newinfo, table->valid_hooks)) {
 		BUGPRINT("The table doesn't like its own initial data, lol\n");
-		return -EINVAL;
+		return ERR_PTR(-EINVAL);
 	}
 
 	table->private = newinfo;
@@ -1173,7 +1180,7 @@ int ebt_register_table(struct net *net, struct ebt_table *table)
 	}
 	list_add(&table->list, &net->xt.tables[NFPROTO_BRIDGE]);
 	mutex_unlock(&ebt_mutex);
-	return 0;
+	return table;
 free_unlock:
 	mutex_unlock(&ebt_mutex);
 free_chainstack:
@@ -1185,7 +1192,10 @@ free_chainstack:
 	vfree(newinfo->entries);
 free_newinfo:
 	vfree(newinfo);
-	return ret;
+free_table:
+	kfree(table);
+out:
+	return ERR_PTR(ret);
 }
 
 void ebt_unregister_table(struct ebt_table *table)
@@ -1206,6 +1216,7 @@ void ebt_unregister_table(struct ebt_table *table)
 		vfree(table->private->chainstack);
 	}
 	vfree(table->private);
+	kfree(table);
 }
 
 /* userspace just supplied us with counters */
-- 
cgit v1.2.3


From 8157e6d16af86e4a8d31a035db7be02a8a171c26 Mon Sep 17 00:00:00 2001
From: Alexey Dobriyan <adobriyan@gmail.com>
Date: Tue, 4 Nov 2008 14:29:03 +0100
Subject: netfilter: netns ebtables: ebtable_broute in netns

Signed-off-by: Alexey Dobriyan <adobriyan@gmail.com>
Signed-off-by: Patrick McHardy <kaber@trash.net>
---
 include/net/netns/x_tables.h          |  3 +++
 net/bridge/netfilter/ebtable_broute.c | 35 +++++++++++++++++++++++++++--------
 2 files changed, 30 insertions(+), 8 deletions(-)

(limited to 'include')

diff --git a/include/net/netns/x_tables.h b/include/net/netns/x_tables.h
index b8093971ccb4..055e684d29b6 100644
--- a/include/net/netns/x_tables.h
+++ b/include/net/netns/x_tables.h
@@ -4,7 +4,10 @@
 #include <linux/list.h>
 #include <linux/netfilter.h>
 
+struct ebt_table;
+
 struct netns_xt {
 	struct list_head tables[NFPROTO_NUMPROTO];
+	struct ebt_table *broute_table;
 };
 #endif
diff --git a/net/bridge/netfilter/ebtable_broute.c b/net/bridge/netfilter/ebtable_broute.c
index 3277d682dfcf..8604dfc1fc3b 100644
--- a/net/bridge/netfilter/ebtable_broute.c
+++ b/net/bridge/netfilter/ebtable_broute.c
@@ -41,33 +41,52 @@ static int check(const struct ebt_table_info *info, unsigned int valid_hooks)
 	return 0;
 }
 
-static struct ebt_table __broute_table =
+static struct ebt_table broute_table =
 {
 	.name		= "broute",
 	.table		= &initial_table,
 	.valid_hooks	= 1 << NF_BR_BROUTING,
-	.lock		= __RW_LOCK_UNLOCKED(__broute_table.lock),
+	.lock		= __RW_LOCK_UNLOCKED(broute_table.lock),
 	.check		= check,
 	.me		= THIS_MODULE,
 };
-static struct ebt_table *broute_table;
 
 static int ebt_broute(struct sk_buff *skb)
 {
 	int ret;
 
 	ret = ebt_do_table(NF_BR_BROUTING, skb, skb->dev, NULL,
-	   broute_table);
+			   dev_net(skb->dev)->xt.broute_table);
 	if (ret == NF_DROP)
 		return 1; /* route it */
 	return 0; /* bridge it */
 }
 
+static int __net_init broute_net_init(struct net *net)
+{
+	net->xt.broute_table = ebt_register_table(net, &broute_table);
+	if (IS_ERR(net->xt.broute_table))
+		return PTR_ERR(net->xt.broute_table);
+	return 0;
+}
+
+static void __net_exit broute_net_exit(struct net *net)
+{
+	ebt_unregister_table(net->xt.broute_table);
+}
+
+static struct pernet_operations broute_net_ops = {
+	.init = broute_net_init,
+	.exit = broute_net_exit,
+};
+
 static int __init ebtable_broute_init(void)
 {
-	broute_table = ebt_register_table(&init_net, &__broute_table);
-	if (IS_ERR(broute_table))
-		return PTR_ERR(broute_table);
+	int ret;
+
+	ret = register_pernet_subsys(&broute_net_ops);
+	if (ret < 0)
+		return ret;
 	/* see br_input.c */
 	rcu_assign_pointer(br_should_route_hook, ebt_broute);
 	return 0;
@@ -77,7 +96,7 @@ static void __exit ebtable_broute_fini(void)
 {
 	rcu_assign_pointer(br_should_route_hook, NULL);
 	synchronize_net();
-	ebt_unregister_table(broute_table);
+	unregister_pernet_subsys(&broute_net_ops);
 }
 
 module_init(ebtable_broute_init);
-- 
cgit v1.2.3


From 4aad10938d4e4e8364b664cd5420c3bfeb9b679b Mon Sep 17 00:00:00 2001
From: Alexey Dobriyan <adobriyan@gmail.com>
Date: Tue, 4 Nov 2008 14:29:58 +0100
Subject: netfilter: netns ebtables: ebtable_filter in netns

Signed-off-by: Alexey Dobriyan <adobriyan@gmail.com>
Signed-off-by: Patrick McHardy <kaber@trash.net>
---
 include/net/netns/x_tables.h          |  1 +
 net/bridge/netfilter/ebtable_filter.c | 50 ++++++++++++++++++++++++++---------
 2 files changed, 38 insertions(+), 13 deletions(-)

(limited to 'include')

diff --git a/include/net/netns/x_tables.h b/include/net/netns/x_tables.h
index 055e684d29b6..d258e16c894e 100644
--- a/include/net/netns/x_tables.h
+++ b/include/net/netns/x_tables.h
@@ -9,5 +9,6 @@ struct ebt_table;
 struct netns_xt {
 	struct list_head tables[NFPROTO_NUMPROTO];
 	struct ebt_table *broute_table;
+	struct ebt_table *frame_filter;
 };
 #endif
diff --git a/net/bridge/netfilter/ebtable_filter.c b/net/bridge/netfilter/ebtable_filter.c
index 596564c7aa58..2b2e8040a9c6 100644
--- a/net/bridge/netfilter/ebtable_filter.c
+++ b/net/bridge/netfilter/ebtable_filter.c
@@ -50,41 +50,47 @@ static int check(const struct ebt_table_info *info, unsigned int valid_hooks)
 	return 0;
 }
 
-static struct ebt_table __frame_filter =
+static struct ebt_table frame_filter =
 {
 	.name		= "filter",
 	.table		= &initial_table,
 	.valid_hooks	= FILTER_VALID_HOOKS,
-	.lock		= __RW_LOCK_UNLOCKED(__frame_filter.lock),
+	.lock		= __RW_LOCK_UNLOCKED(frame_filter.lock),
 	.check		= check,
 	.me		= THIS_MODULE,
 };
-static struct ebt_table *frame_filter;
 
 static unsigned int
-ebt_hook(unsigned int hook, struct sk_buff *skb, const struct net_device *in,
+ebt_in_hook(unsigned int hook, struct sk_buff *skb, const struct net_device *in,
    const struct net_device *out, int (*okfn)(struct sk_buff *))
 {
-	return ebt_do_table(hook, skb, in, out, frame_filter);
+	return ebt_do_table(hook, skb, in, out, dev_net(in)->xt.frame_filter);
+}
+
+static unsigned int
+ebt_out_hook(unsigned int hook, struct sk_buff *skb, const struct net_device *in,
+   const struct net_device *out, int (*okfn)(struct sk_buff *))
+{
+	return ebt_do_table(hook, skb, in, out, dev_net(out)->xt.frame_filter);
 }
 
 static struct nf_hook_ops ebt_ops_filter[] __read_mostly = {
 	{
-		.hook		= ebt_hook,
+		.hook		= ebt_in_hook,
 		.owner		= THIS_MODULE,
 		.pf		= PF_BRIDGE,
 		.hooknum	= NF_BR_LOCAL_IN,
 		.priority	= NF_BR_PRI_FILTER_BRIDGED,
 	},
 	{
-		.hook		= ebt_hook,
+		.hook		= ebt_in_hook,
 		.owner		= THIS_MODULE,
 		.pf		= PF_BRIDGE,
 		.hooknum	= NF_BR_FORWARD,
 		.priority	= NF_BR_PRI_FILTER_BRIDGED,
 	},
 	{
-		.hook		= ebt_hook,
+		.hook		= ebt_out_hook,
 		.owner		= THIS_MODULE,
 		.pf		= PF_BRIDGE,
 		.hooknum	= NF_BR_LOCAL_OUT,
@@ -92,23 +98,41 @@ static struct nf_hook_ops ebt_ops_filter[] __read_mostly = {
 	},
 };
 
+static int __net_init frame_filter_net_init(struct net *net)
+{
+	net->xt.frame_filter = ebt_register_table(net, &frame_filter);
+	if (IS_ERR(net->xt.frame_filter))
+		return PTR_ERR(net->xt.frame_filter);
+	return 0;
+}
+
+static void __net_exit frame_filter_net_exit(struct net *net)
+{
+	ebt_unregister_table(net->xt.frame_filter);
+}
+
+static struct pernet_operations frame_filter_net_ops = {
+	.init = frame_filter_net_init,
+	.exit = frame_filter_net_exit,
+};
+
 static int __init ebtable_filter_init(void)
 {
 	int ret;
 
-	frame_filter = ebt_register_table(&init_net, &__frame_filter);
-	if (IS_ERR(frame_filter))
-		return PTR_ERR(frame_filter);
+	ret = register_pernet_subsys(&frame_filter_net_ops);
+	if (ret < 0)
+		return ret;
 	ret = nf_register_hooks(ebt_ops_filter, ARRAY_SIZE(ebt_ops_filter));
 	if (ret < 0)
-		ebt_unregister_table(frame_filter);
+		unregister_pernet_subsys(&frame_filter_net_ops);
 	return ret;
 }
 
 static void __exit ebtable_filter_fini(void)
 {
 	nf_unregister_hooks(ebt_ops_filter, ARRAY_SIZE(ebt_ops_filter));
-	ebt_unregister_table(frame_filter);
+	unregister_pernet_subsys(&frame_filter_net_ops);
 }
 
 module_init(ebtable_filter_init);
-- 
cgit v1.2.3


From b71b30a626fd0e43c825a05036e7a2c3f377a563 Mon Sep 17 00:00:00 2001
From: Alexey Dobriyan <adobriyan@gmail.com>
Date: Tue, 4 Nov 2008 14:30:46 +0100
Subject: netfilter: netns ebtables: ebtable_nat in netns

Signed-off-by: Alexey Dobriyan <adobriyan@gmail.com>
Signed-off-by: Patrick McHardy <kaber@trash.net>
---
 include/net/netns/x_tables.h       |  1 +
 net/bridge/netfilter/ebtable_nat.c | 47 ++++++++++++++++++++++++++------------
 2 files changed, 33 insertions(+), 15 deletions(-)

(limited to 'include')

diff --git a/include/net/netns/x_tables.h b/include/net/netns/x_tables.h
index d258e16c894e..9554a644a8f8 100644
--- a/include/net/netns/x_tables.h
+++ b/include/net/netns/x_tables.h
@@ -10,5 +10,6 @@ struct netns_xt {
 	struct list_head tables[NFPROTO_NUMPROTO];
 	struct ebt_table *broute_table;
 	struct ebt_table *frame_filter;
+	struct ebt_table *frame_nat;
 };
 #endif
diff --git a/net/bridge/netfilter/ebtable_nat.c b/net/bridge/netfilter/ebtable_nat.c
index 0d8fc5bcddd1..3fe1ae87e35f 100644
--- a/net/bridge/netfilter/ebtable_nat.c
+++ b/net/bridge/netfilter/ebtable_nat.c
@@ -50,48 +50,47 @@ static int check(const struct ebt_table_info *info, unsigned int valid_hooks)
 	return 0;
 }
 
-static struct ebt_table __frame_nat =
+static struct ebt_table frame_nat =
 {
 	.name		= "nat",
 	.table		= &initial_table,
 	.valid_hooks	= NAT_VALID_HOOKS,
-	.lock		= __RW_LOCK_UNLOCKED(__frame_nat.lock),
+	.lock		= __RW_LOCK_UNLOCKED(frame_nat.lock),
 	.check		= check,
 	.me		= THIS_MODULE,
 };
-static struct ebt_table *frame_nat;
 
 static unsigned int
-ebt_nat_dst(unsigned int hook, struct sk_buff *skb, const struct net_device *in
+ebt_nat_in(unsigned int hook, struct sk_buff *skb, const struct net_device *in
    , const struct net_device *out, int (*okfn)(struct sk_buff *))
 {
-	return ebt_do_table(hook, skb, in, out, frame_nat);
+	return ebt_do_table(hook, skb, in, out, dev_net(in)->xt.frame_nat);
 }
 
 static unsigned int
-ebt_nat_src(unsigned int hook, struct sk_buff *skb, const struct net_device *in
+ebt_nat_out(unsigned int hook, struct sk_buff *skb, const struct net_device *in
    , const struct net_device *out, int (*okfn)(struct sk_buff *))
 {
-	return ebt_do_table(hook, skb, in, out, frame_nat);
+	return ebt_do_table(hook, skb, in, out, dev_net(out)->xt.frame_nat);
 }
 
 static struct nf_hook_ops ebt_ops_nat[] __read_mostly = {
 	{
-		.hook		= ebt_nat_dst,
+		.hook		= ebt_nat_out,
 		.owner		= THIS_MODULE,
 		.pf		= PF_BRIDGE,
 		.hooknum	= NF_BR_LOCAL_OUT,
 		.priority	= NF_BR_PRI_NAT_DST_OTHER,
 	},
 	{
-		.hook		= ebt_nat_src,
+		.hook		= ebt_nat_out,
 		.owner		= THIS_MODULE,
 		.pf		= PF_BRIDGE,
 		.hooknum	= NF_BR_POST_ROUTING,
 		.priority	= NF_BR_PRI_NAT_SRC,
 	},
 	{
-		.hook		= ebt_nat_dst,
+		.hook		= ebt_nat_in,
 		.owner		= THIS_MODULE,
 		.pf		= PF_BRIDGE,
 		.hooknum	= NF_BR_PRE_ROUTING,
@@ -99,23 +98,41 @@ static struct nf_hook_ops ebt_ops_nat[] __read_mostly = {
 	},
 };
 
+static int __net_init frame_nat_net_init(struct net *net)
+{
+	net->xt.frame_nat = ebt_register_table(net, &frame_nat);
+	if (IS_ERR(net->xt.frame_nat))
+		return PTR_ERR(net->xt.frame_nat);
+	return 0;
+}
+
+static void __net_exit frame_nat_net_exit(struct net *net)
+{
+	ebt_unregister_table(net->xt.frame_nat);
+}
+
+static struct pernet_operations frame_nat_net_ops = {
+	.init = frame_nat_net_init,
+	.exit = frame_nat_net_exit,
+};
+
 static int __init ebtable_nat_init(void)
 {
 	int ret;
 
-	frame_nat = ebt_register_table(&init_net, &__frame_nat);
-	if (IS_ERR(frame_nat))
-		return PTR_ERR(frame_nat);
+	ret = register_pernet_subsys(&frame_nat_net_ops);
+	if (ret < 0)
+		return ret;
 	ret = nf_register_hooks(ebt_ops_nat, ARRAY_SIZE(ebt_ops_nat));
 	if (ret < 0)
-		ebt_unregister_table(frame_nat);
+		unregister_pernet_subsys(&frame_nat_net_ops);
 	return ret;
 }
 
 static void __exit ebtable_nat_fini(void)
 {
 	nf_unregister_hooks(ebt_ops_nat, ARRAY_SIZE(ebt_ops_nat));
-	ebt_unregister_table(frame_nat);
+	unregister_pernet_subsys(&frame_nat_net_ops);
 }
 
 module_init(ebtable_nat_init);
-- 
cgit v1.2.3


From 71566a0d161edec70361b7f90f6e54af6a6d5d05 Mon Sep 17 00:00:00 2001
From: Frederic Weisbecker <fweisbec@gmail.com>
Date: Fri, 31 Oct 2008 12:57:20 +0100
Subject: tracing/fastboot: Enable boot tracing only during initcalls

Impact: modify boot tracer

We used to disable the initcall tracing at a specified time (IE: end
of builtin initcalls). But we don't need it anymore. It will be
stopped when initcalls are finished.

However we want two things:

_Start this tracing only after pre-smp initcalls are finished.

_Since we are planning to trace sched_switches at the same time, we
want to enable them only during the initcall execution.

For this purpose, this patch introduce two functions to enable/disable
the sched_switch tracing during boot.

Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/ftrace.h    | 24 ++++++++++++++++++++++--
 init/main.c               |  4 +++-
 kernel/trace/trace_boot.c | 28 ++++++++++++++++------------
 3 files changed, 41 insertions(+), 15 deletions(-)

(limited to 'include')

diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h
index e46a7b34037c..4642959e5bda 100644
--- a/include/linux/ftrace.h
+++ b/include/linux/ftrace.h
@@ -234,6 +234,11 @@ ftrace_init_module(unsigned long *start, unsigned long *end) { }
 #endif
 
 
+/*
+ * Structure which defines the trace of an initcall.
+ * You don't have to fill the func field since it is
+ * only used internally by the tracer.
+ */
 struct boot_trace {
 	pid_t			caller;
 	char			func[KSYM_NAME_LEN];
@@ -244,13 +249,28 @@ struct boot_trace {
 };
 
 #ifdef CONFIG_BOOT_TRACER
+/* Append the trace on the ring-buffer */
 extern void trace_boot(struct boot_trace *it, initcall_t fn);
+
+/* Tells the tracer that smp_pre_initcall is finished.
+ * So we can start the tracing
+ */
 extern void start_boot_trace(void);
-extern void stop_boot_trace(void);
+
+/* Resume the tracing of other necessary events
+ * such as sched switches
+ */
+extern void enable_boot_trace(void);
+
+/* Suspend this tracing. Actually, only sched_switches tracing have
+ * to be suspended. Initcalls doesn't need it.)
+ */
+extern void disable_boot_trace(void);
 #else
 static inline void trace_boot(struct boot_trace *it, initcall_t fn) { }
 static inline void start_boot_trace(void) { }
-static inline void stop_boot_trace(void) { }
+static inline void enable_boot_trace(void) { }
+static inline void disable_boot_trace(void) { }
 #endif
 
 
diff --git a/init/main.c b/init/main.c
index 7e117a231af1..4b03cd5656ca 100644
--- a/init/main.c
+++ b/init/main.c
@@ -711,6 +711,7 @@ int do_one_initcall(initcall_t fn)
 		it.caller = task_pid_nr(current);
 		printk("calling  %pF @ %i\n", fn, it.caller);
 		it.calltime = ktime_get();
+		enable_boot_trace();
 	}
 
 	it.result = fn();
@@ -722,6 +723,7 @@ int do_one_initcall(initcall_t fn)
 		printk("initcall %pF returned %d after %Ld usecs\n", fn,
 			it.result, it.duration);
 		trace_boot(&it, fn);
+		disable_boot_trace();
 	}
 
 	msgbuf[0] = 0;
@@ -882,7 +884,7 @@ static int __init kernel_init(void * unused)
 	 * we're essentially up and running. Get rid of the
 	 * initmem segments and start the user-mode stuff..
 	 */
-	stop_boot_trace();
+
 	init_post();
 	return 0;
 }
diff --git a/kernel/trace/trace_boot.c b/kernel/trace/trace_boot.c
index d0a5e50eeff2..d104d5b46413 100644
--- a/kernel/trace/trace_boot.c
+++ b/kernel/trace/trace_boot.c
@@ -13,23 +13,29 @@
 #include "trace.h"
 
 static struct trace_array *boot_trace;
-static int trace_boot_enabled;
+static bool pre_initcalls_finished;
 
-
-/* Should be started after do_pre_smp_initcalls() in init/main.c */
+/* Tells the boot tracer that the pre_smp_initcalls are finished.
+ * So we are ready .
+ * It doesn't enable sched events tracing however.
+ * You have to call enable_boot_trace to do so.
+ */
 void start_boot_trace(void)
 {
-	trace_boot_enabled = 1;
+	pre_initcalls_finished = true;
+}
+
+void enable_boot_trace(void)
+{
 }
 
-void stop_boot_trace(void)
+void disable_boot_trace(void)
 {
-	trace_boot_enabled = 0;
 }
 
 void reset_boot_trace(struct trace_array *tr)
 {
-	stop_boot_trace();
+	disable_boot_trace();
 }
 
 static void boot_trace_init(struct trace_array *tr)
@@ -37,8 +43,6 @@ static void boot_trace_init(struct trace_array *tr)
 	int cpu;
 	boot_trace = tr;
 
-	trace_boot_enabled = 0;
-
 	for_each_cpu_mask(cpu, cpu_possible_map)
 		tracing_reset(tr, cpu);
 }
@@ -46,9 +50,9 @@ static void boot_trace_init(struct trace_array *tr)
 static void boot_trace_ctrl_update(struct trace_array *tr)
 {
 	if (tr->ctrl)
-		start_boot_trace();
+		enable_boot_trace();
 	else
-		stop_boot_trace();
+		disable_boot_trace();
 }
 
 static enum print_line_t initcall_print_line(struct trace_iterator *iter)
@@ -99,7 +103,7 @@ void trace_boot(struct boot_trace *it, initcall_t fn)
 	unsigned long irq_flags;
 	struct trace_array *tr = boot_trace;
 
-	if (!trace_boot_enabled)
+	if (!pre_initcalls_finished)
 		return;
 
 	/* Get its name now since this function could
-- 
cgit v1.2.3


From fd8cd7e1919fc1c27fe2fdccd2a1cd32f791ef0f Mon Sep 17 00:00:00 2001
From: Alok Kataria <akataria@vmware.com>
Date: Mon, 3 Nov 2008 15:50:38 -0800
Subject: x86: vmware: look for DMI string in the product serial key

Impact: Should permit VMware detection on older platforms where the
vendor is changed.  Could theoretically cause a regression if some
weird serial number scheme contains the string "VMware" by pure
chance.  Seems unlikely, especially with the mixed case.

In some user configured cases, VMware may choose not to put a VMware specific
DMI string, but the product serial key is always there and is VMware specific.
Add a interface to check the serial key, when checking for VMware in the DMI
information.

Signed-off-by: Alok N Kataria <akataria@vmware.com>
Signed-off-by: H. Peter Anvin <hpa@zytor.com>
---
 arch/x86/kernel/cpu/vmware.c |  7 ++++++-
 drivers/firmware/dmi_scan.c  | 11 +++++++++++
 include/linux/dmi.h          |  2 ++
 3 files changed, 19 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/arch/x86/kernel/cpu/vmware.c b/arch/x86/kernel/cpu/vmware.c
index a0905ecfe7d2..c034bda842d9 100644
--- a/arch/x86/kernel/cpu/vmware.c
+++ b/arch/x86/kernel/cpu/vmware.c
@@ -61,6 +61,11 @@ static unsigned long __vmware_get_tsc_khz(void)
         return tsc_hz;
 }
 
+/*
+ * While checking the dmi string infomation, just checking the product
+ * serial key should be enough, as this will always have a VMware
+ * specific string when running under VMware hypervisor.
+ */
 int vmware_platform(void)
 {
 	if (cpu_has_hypervisor) {
@@ -74,7 +79,7 @@ int vmware_platform(void)
 		hyper_vendor_id[12] = '\0';
 		if (!strcmp(hyper_vendor_id, "VMwareVMware"))
 			return 1;
-	} else if (dmi_available && dmi_name_in_vendors("VMware") &&
+	} else if (dmi_available && dmi_name_in_serial("VMware") &&
 		   __vmware_platform())
 		return 1;
 
diff --git a/drivers/firmware/dmi_scan.c b/drivers/firmware/dmi_scan.c
index 3e526b6d00cb..d66d41282907 100644
--- a/drivers/firmware/dmi_scan.c
+++ b/drivers/firmware/dmi_scan.c
@@ -467,6 +467,17 @@ const char *dmi_get_system_info(int field)
 }
 EXPORT_SYMBOL(dmi_get_system_info);
 
+/**
+ *	dmi_name_in_serial - 	Check if string is in the DMI product serial
+ *				information.
+ */
+int dmi_name_in_serial(const char *str)
+{
+	int f = DMI_PRODUCT_SERIAL;
+	if (dmi_ident[f] && strstr(dmi_ident[f], str))
+		return 1;
+	return 0;
+}
 
 /**
  *	dmi_name_in_vendors - Check if string is anywhere in the DMI vendor information.
diff --git a/include/linux/dmi.h b/include/linux/dmi.h
index e5084eb5943a..2bfda178f274 100644
--- a/include/linux/dmi.h
+++ b/include/linux/dmi.h
@@ -44,6 +44,7 @@ extern const struct dmi_device * dmi_find_device(int type, const char *name,
 extern void dmi_scan_machine(void);
 extern int dmi_get_year(int field);
 extern int dmi_name_in_vendors(const char *str);
+extern int dmi_name_in_serial(const char *str);
 extern int dmi_available;
 extern int dmi_walk(void (*decode)(const struct dmi_header *));
 
@@ -56,6 +57,7 @@ static inline const struct dmi_device * dmi_find_device(int type, const char *na
 static inline void dmi_scan_machine(void) { return; }
 static inline int dmi_get_year(int year) { return 0; }
 static inline int dmi_name_in_vendors(const char *s) { return 0; }
+static inline int dmi_name_in_serial(const char *s) { return 0; }
 #define dmi_available 0
 static inline int dmi_walk(void (*decode)(const struct dmi_header *))
 	{ return -1; }
-- 
cgit v1.2.3


From d5f642384e9da75393160350f75bbb9a527f7c58 Mon Sep 17 00:00:00 2001
From: Alexey Dobriyan <adobriyan@gmail.com>
Date: Tue, 4 Nov 2008 14:45:58 -0800
Subject: net: #ifdef ->sk_security

Signed-off-by: Alexey Dobriyan <adobriyan@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/sock.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include')

diff --git a/include/net/sock.h b/include/net/sock.h
index 941ad7c830a3..08291c1be41e 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -271,7 +271,9 @@ struct sock {
 	struct sk_buff		*sk_send_head;
 	__u32			sk_sndmsg_off;
 	int			sk_write_pending;
+#ifdef CONFIG_SECURITY
 	void			*sk_security;
+#endif
 	__u32			sk_mark;
 	/* XXX 4 bytes hole on 64 bit */
 	void			(*sk_state_change)(struct sock *sk);
-- 
cgit v1.2.3


From 7d43d1a0f2cf535167ec7247f110a1f85cecac43 Mon Sep 17 00:00:00 2001
From: Gerrit Renker <gerrit@erg.abdn.ac.uk>
Date: Tue, 4 Nov 2008 23:43:47 -0800
Subject: dccp: Implement lookup table for feature-negotiation information

A lookup table for feature-negotiation information, extracted from RFC
4340/42, is provided by this patch. All currently known features can
be found in this table, along with their feature location, their
default value, and type.

Signed-off-by: Gerrit Renker <gerrit@erg.abdn.ac.uk>
Acked-by: Ian McDonald <ian.mcdonald@jandi.co.nz>
Acked-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/dccp.h |  9 +++++----
 net/dccp/feat.c      | 39 +++++++++++++++++++++++++++++++++++++++
 2 files changed, 44 insertions(+), 4 deletions(-)

(limited to 'include')

diff --git a/include/linux/dccp.h b/include/linux/dccp.h
index 6080449fbec9..3978aff197d9 100644
--- a/include/linux/dccp.h
+++ b/include/linux/dccp.h
@@ -176,19 +176,20 @@ enum {
 };
 
 /* DCCP features (RFC 4340 section 6.4) */
-enum {
+enum dccp_feature_numbers {
 	DCCPF_RESERVED = 0,
 	DCCPF_CCID = 1,
-	DCCPF_SHORT_SEQNOS = 2,		/* XXX: not yet implemented */
+	DCCPF_SHORT_SEQNOS = 2,
 	DCCPF_SEQUENCE_WINDOW = 3,
-	DCCPF_ECN_INCAPABLE = 4,	/* XXX: not yet implemented */
+	DCCPF_ECN_INCAPABLE = 4,
 	DCCPF_ACK_RATIO = 5,
 	DCCPF_SEND_ACK_VECTOR = 6,
 	DCCPF_SEND_NDP_COUNT = 7,
 	DCCPF_MIN_CSUM_COVER = 8,
-	DCCPF_DATA_CHECKSUM = 9,	/* XXX: not yet implemented */
+	DCCPF_DATA_CHECKSUM = 9,
 	/* 10-127 reserved */
 	DCCPF_MIN_CCID_SPECIFIC = 128,
+	DCCPF_SEND_LEV_RATE = 192,	/* RFC 4342, sec. 8.4 */
 	DCCPF_MAX_CCID_SPECIFIC = 255,
 };
 
diff --git a/net/dccp/feat.c b/net/dccp/feat.c
index 933a0ecf8d46..45e36fc7943b 100644
--- a/net/dccp/feat.c
+++ b/net/dccp/feat.c
@@ -23,6 +23,43 @@
 
 #define DCCP_FEAT_SP_NOAGREE (-123)
 
+static const struct {
+	u8			feat_num;		/* DCCPF_xxx */
+	enum dccp_feat_type	rxtx;			/* RX or TX  */
+	enum dccp_feat_type	reconciliation;		/* SP or NN  */
+	u8			default_value;		/* as in 6.4 */
+/*
+ *    Lookup table for location and type of features (from RFC 4340/4342)
+ *  +--------------------------+----+-----+----+----+---------+-----------+
+ *  | Feature                  | Location | Reconc. | Initial |  Section  |
+ *  |                          | RX | TX  | SP | NN |  Value  | Reference |
+ *  +--------------------------+----+-----+----+----+---------+-----------+
+ *  | DCCPF_CCID               |    |  X  | X  |    |   2     | 10        |
+ *  | DCCPF_SHORT_SEQNOS       |    |  X  | X  |    |   0     |  7.6.1    |
+ *  | DCCPF_SEQUENCE_WINDOW    |    |  X  |    | X  | 100     |  7.5.2    |
+ *  | DCCPF_ECN_INCAPABLE      | X  |     | X  |    |   0     | 12.1      |
+ *  | DCCPF_ACK_RATIO          |    |  X  |    | X  |   2     | 11.3      |
+ *  | DCCPF_SEND_ACK_VECTOR    | X  |     | X  |    |   0     | 11.5      |
+ *  | DCCPF_SEND_NDP_COUNT     |    |  X  | X  |    |   0     |  7.7.2    |
+ *  | DCCPF_MIN_CSUM_COVER     | X  |     | X  |    |   0     |  9.2.1    |
+ *  | DCCPF_DATA_CHECKSUM      | X  |     | X  |    |   0     |  9.3.1    |
+ *  | DCCPF_SEND_LEV_RATE      | X  |     | X  |    |   0     | 4342/8.4  |
+ *  +--------------------------+----+-----+----+----+---------+-----------+
+ */
+} dccp_feat_table[] = {
+	{ DCCPF_CCID,		 FEAT_AT_TX, FEAT_SP, 2 },
+	{ DCCPF_SHORT_SEQNOS,	 FEAT_AT_TX, FEAT_SP, 0 },
+	{ DCCPF_SEQUENCE_WINDOW, FEAT_AT_TX, FEAT_NN, 100 },
+	{ DCCPF_ECN_INCAPABLE,	 FEAT_AT_RX, FEAT_SP, 0 },
+	{ DCCPF_ACK_RATIO,	 FEAT_AT_TX, FEAT_NN, 2 },
+	{ DCCPF_SEND_ACK_VECTOR, FEAT_AT_RX, FEAT_SP, 0 },
+	{ DCCPF_SEND_NDP_COUNT,  FEAT_AT_TX, FEAT_SP, 0 },
+	{ DCCPF_MIN_CSUM_COVER,  FEAT_AT_RX, FEAT_SP, 0 },
+	{ DCCPF_DATA_CHECKSUM,	 FEAT_AT_RX, FEAT_SP, 0 },
+	{ DCCPF_SEND_LEV_RATE,	 FEAT_AT_RX, FEAT_SP, 0 },
+};
+#define DCCP_FEAT_SUPPORTED_MAX		ARRAY_SIZE(dccp_feat_table)
+
 int dccp_feat_change(struct dccp_minisock *dmsk, u8 type, u8 feature,
 		     u8 *val, u8 len, gfp_t gfp)
 {
@@ -639,6 +676,8 @@ const char *dccp_feat_name(const u8 feat)
 	if (feat > DCCPF_DATA_CHECKSUM && feat < DCCPF_MIN_CCID_SPECIFIC)
 		return feature_names[DCCPF_RESERVED];
 
+	if (feat ==  DCCPF_SEND_LEV_RATE)
+		return "Send Loss Event Rate";
 	if (feat >= DCCPF_MIN_CCID_SPECIFIC)
 		return "CCID-specific";
 
-- 
cgit v1.2.3


From ac75773c2742d82cbcb078708df406e9017224b7 Mon Sep 17 00:00:00 2001
From: Gerrit Renker <gerrit@erg.abdn.ac.uk>
Date: Tue, 4 Nov 2008 23:55:49 -0800
Subject: dccp: Per-socket initialisation of feature negotiation

This provides feature-negotiation initialisation for both DCCP sockets
and DCCP request_sockets, to support feature negotiation during
connection setup.

It also resolves a FIXME regarding the congestion control
initialisation.

Thanks to Wei Yongjun for help with the IPv6 side of this patch.

Signed-off-by: Gerrit Renker <gerrit@erg.abdn.ac.uk>
Acked-by: Ian McDonald <ian.mcdonald@jandi.co.nz>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/dccp.h |  4 ++++
 net/dccp/dccp.h      |  3 ++-
 net/dccp/feat.c      | 55 ++++++++++++++++++++++++++++++++++++++++++++++++++++
 net/dccp/feat.h      |  1 +
 net/dccp/input.c     |  2 --
 net/dccp/ipv4.c      |  3 ++-
 net/dccp/ipv6.c      |  3 ++-
 net/dccp/minisocks.c |  7 ++++++-
 net/dccp/proto.c     |  1 +
 9 files changed, 73 insertions(+), 6 deletions(-)

(limited to 'include')

diff --git a/include/linux/dccp.h b/include/linux/dccp.h
index 3978aff197d9..484b8a1fb023 100644
--- a/include/linux/dccp.h
+++ b/include/linux/dccp.h
@@ -412,6 +412,7 @@ extern void dccp_minisock_init(struct dccp_minisock *dmsk);
  * @dreq_iss: initial sequence number sent on the Response (RFC 4340, 7.1)
  * @dreq_isr: initial sequence number received on the Request
  * @dreq_service: service code present on the Request (there is just one)
+ * @dreq_featneg: feature negotiation options for this connection
  * The following two fields are analogous to the ones in dccp_sock:
  * @dreq_timestamp_echo: last received timestamp to echo (13.1)
  * @dreq_timestamp_echo: the time of receiving the last @dreq_timestamp_echo
@@ -421,6 +422,7 @@ struct dccp_request_sock {
 	__u64			 dreq_iss;
 	__u64			 dreq_isr;
 	__be32			 dreq_service;
+	struct list_head	 dreq_featneg;
 	__u32			 dreq_timestamp_echo;
 	__u32			 dreq_timestamp_time;
 };
@@ -498,6 +500,7 @@ struct dccp_ackvec;
  * @dccps_mss_cache - current value of MSS (path MTU minus header sizes)
  * @dccps_rate_last - timestamp for rate-limiting DCCP-Sync (RFC 4340, 7.5.4)
  * @dccps_minisock - associated minisock (accessed via dccp_msk)
+ * @dccps_featneg - tracks feature-negotiation state (mostly during handshake)
  * @dccps_hc_rx_ackvec - rx half connection ack vector
  * @dccps_hc_rx_ccid - CCID used for the receiver (or receiving half-connection)
  * @dccps_hc_tx_ccid - CCID used for the sender (or sending half-connection)
@@ -535,6 +538,7 @@ struct dccp_sock {
 	__u64				dccps_ndp_count:48;
 	unsigned long			dccps_rate_last;
 	struct dccp_minisock		dccps_minisock;
+	struct list_head		dccps_featneg;
 	struct dccp_ackvec		*dccps_hc_rx_ackvec;
 	struct ccid			*dccps_hc_rx_ccid;
 	struct ccid			*dccps_hc_tx_ccid;
diff --git a/net/dccp/dccp.h b/net/dccp/dccp.h
index d6fed595a3ff..dee4a90886d6 100644
--- a/net/dccp/dccp.h
+++ b/net/dccp/dccp.h
@@ -252,7 +252,8 @@ extern const char *dccp_state_name(const int state);
 extern void dccp_set_state(struct sock *sk, const int state);
 extern void dccp_done(struct sock *sk);
 
-extern void dccp_reqsk_init(struct request_sock *req, struct sk_buff *skb);
+extern int  dccp_reqsk_init(struct request_sock *rq, struct dccp_sock const *dp,
+			    struct sk_buff const *skb);
 
 extern int dccp_v4_conn_request(struct sock *sk, struct sk_buff *skb);
 
diff --git a/net/dccp/feat.c b/net/dccp/feat.c
index 9a37b6ce3aca..069d8ffe4c6f 100644
--- a/net/dccp/feat.c
+++ b/net/dccp/feat.c
@@ -90,6 +90,20 @@ static u8 dccp_feat_type(u8 feat_num)
 	return dccp_feat_table[idx].reconciliation;
 }
 
+/* copy constructor, fval must not already contain allocated memory */
+static int dccp_feat_clone_sp_val(dccp_feat_val *fval, u8 const *val, u8 len)
+{
+	fval->sp.len = len;
+	if (fval->sp.len > 0) {
+		fval->sp.vec = kmemdup(val, len, gfp_any());
+		if (fval->sp.vec == NULL) {
+			fval->sp.len = 0;
+			return -ENOBUFS;
+		}
+	}
+	return 0;
+}
+
 static void dccp_feat_val_destructor(u8 feat_num, dccp_feat_val *val)
 {
 	if (unlikely(val == NULL))
@@ -99,6 +113,28 @@ static void dccp_feat_val_destructor(u8 feat_num, dccp_feat_val *val)
 	memset(val, 0, sizeof(*val));
 }
 
+static struct dccp_feat_entry *
+	      dccp_feat_clone_entry(struct dccp_feat_entry const *original)
+{
+	struct dccp_feat_entry *new;
+	u8 type = dccp_feat_type(original->feat_num);
+
+	if (type == FEAT_UNKNOWN)
+		return NULL;
+
+	new = kmemdup(original, sizeof(struct dccp_feat_entry), gfp_any());
+	if (new == NULL)
+		return NULL;
+
+	if (type == FEAT_SP && dccp_feat_clone_sp_val(&new->val,
+						      original->val.sp.vec,
+						      original->val.sp.len)) {
+		kfree(new);
+		return NULL;
+	}
+	return new;
+}
+
 static void dccp_feat_entry_destructor(struct dccp_feat_entry *entry)
 {
 	if (entry != NULL) {
@@ -133,6 +169,25 @@ void dccp_feat_list_purge(struct list_head *fn_list)
 }
 EXPORT_SYMBOL_GPL(dccp_feat_list_purge);
 
+/* generate @to as full clone of @from - @to must not contain any nodes */
+int dccp_feat_clone_list(struct list_head const *from, struct list_head *to)
+{
+	struct dccp_feat_entry *entry, *new;
+
+	INIT_LIST_HEAD(to);
+	list_for_each_entry(entry, from, node) {
+		new = dccp_feat_clone_entry(entry);
+		if (new == NULL)
+			goto cloning_failed;
+		list_add_tail(&new->node, to);
+	}
+	return 0;
+
+cloning_failed:
+	dccp_feat_list_purge(to);
+	return -ENOMEM;
+}
+
 int dccp_feat_change(struct dccp_minisock *dmsk, u8 type, u8 feature,
 		     u8 *val, u8 len, gfp_t gfp)
 {
diff --git a/net/dccp/feat.h b/net/dccp/feat.h
index 56df82ceef09..f5e99b39712a 100644
--- a/net/dccp/feat.h
+++ b/net/dccp/feat.h
@@ -96,6 +96,7 @@ extern int  dccp_feat_confirm_recv(struct sock *sk, u8 type, u8 feature,
 				   u8 *val, u8 len);
 extern void dccp_feat_clean(struct dccp_minisock *dmsk);
 extern int  dccp_feat_clone(struct sock *oldsk, struct sock *newsk);
+extern int  dccp_feat_clone_list(struct list_head const *, struct list_head *);
 extern int  dccp_feat_init(struct dccp_minisock *dmsk);
 
 #endif /* _DCCP_FEAT_H */
diff --git a/net/dccp/input.c b/net/dccp/input.c
index 779d0ed9ae94..3070015edc75 100644
--- a/net/dccp/input.c
+++ b/net/dccp/input.c
@@ -590,8 +590,6 @@ int dccp_rcv_state_process(struct sock *sk, struct sk_buff *skb,
 			if (inet_csk(sk)->icsk_af_ops->conn_request(sk,
 								    skb) < 0)
 				return 1;
-
-			/* FIXME: do congestion control initialization */
 			goto discard;
 		}
 		if (dh->dccph_type == DCCP_PKT_RESET)
diff --git a/net/dccp/ipv4.c b/net/dccp/ipv4.c
index 01e3e0206254..cbf522dfecc4 100644
--- a/net/dccp/ipv4.c
+++ b/net/dccp/ipv4.c
@@ -595,7 +595,8 @@ int dccp_v4_conn_request(struct sock *sk, struct sk_buff *skb)
 	if (req == NULL)
 		goto drop;
 
-	dccp_reqsk_init(req, skb);
+	if (dccp_reqsk_init(req, dccp_sk(sk), skb))
+		goto drop_and_free;
 
 	dreq = dccp_rsk(req);
 	if (dccp_parse_options(sk, dreq, skb))
diff --git a/net/dccp/ipv6.c b/net/dccp/ipv6.c
index d4ce1224e008..4e172ccfd76c 100644
--- a/net/dccp/ipv6.c
+++ b/net/dccp/ipv6.c
@@ -426,7 +426,8 @@ static int dccp_v6_conn_request(struct sock *sk, struct sk_buff *skb)
 	if (req == NULL)
 		goto drop;
 
-	dccp_reqsk_init(req, skb);
+	if (dccp_reqsk_init(req, dccp_sk(sk), skb))
+		goto drop_and_free;
 
 	dreq = dccp_rsk(req);
 	if (dccp_parse_options(sk, dreq, skb))
diff --git a/net/dccp/minisocks.c b/net/dccp/minisocks.c
index e6bf99e3e41a..afdacbb94d75 100644
--- a/net/dccp/minisocks.c
+++ b/net/dccp/minisocks.c
@@ -125,6 +125,7 @@ struct sock *dccp_create_openreq_child(struct sock *sk,
 		newdp->dccps_timestamp_time = dreq->dreq_timestamp_time;
 		newicsk->icsk_rto	    = DCCP_TIMEOUT_INIT;
 
+		INIT_LIST_HEAD(&newdp->dccps_featneg);
 		if (dccp_feat_clone(sk, newsk))
 			goto out_free;
 
@@ -304,7 +305,8 @@ void dccp_reqsk_send_ack(struct sock *sk, struct sk_buff *skb,
 
 EXPORT_SYMBOL_GPL(dccp_reqsk_send_ack);
 
-void dccp_reqsk_init(struct request_sock *req, struct sk_buff *skb)
+int dccp_reqsk_init(struct request_sock *req,
+		    struct dccp_sock const *dp, struct sk_buff const *skb)
 {
 	struct dccp_request_sock *dreq = dccp_rsk(req);
 
@@ -313,6 +315,9 @@ void dccp_reqsk_init(struct request_sock *req, struct sk_buff *skb)
 	inet_rsk(req)->acked	  = 0;
 	req->rcv_wnd		  = sysctl_dccp_feat_sequence_window;
 	dreq->dreq_timestamp_echo = 0;
+
+	/* inherit feature negotiation options from listening socket */
+	return dccp_feat_clone_list(&dp->dccps_featneg, &dreq->dreq_featneg);
 }
 
 EXPORT_SYMBOL_GPL(dccp_reqsk_init);
diff --git a/net/dccp/proto.c b/net/dccp/proto.c
index d0bd34819761..1cdf4ae99605 100644
--- a/net/dccp/proto.c
+++ b/net/dccp/proto.c
@@ -193,6 +193,7 @@ int dccp_init_sock(struct sock *sk, const __u8 ctl_sock_initialized)
 
 	dccp_init_xmit_timers(sk);
 
+	INIT_LIST_HEAD(&dp->dccps_featneg);
 	/*
 	 * FIXME: We're hardcoding the CCID, and doing this at this point makes
 	 * the listening (master) sock get CCID control blocks, which is not
-- 
cgit v1.2.3


From ea913940c39a61214c799cc7093d7b20fe11a94c Mon Sep 17 00:00:00 2001
From: Mark Brown <broonie@opensource.wolfsonmicro.com>
Date: Wed, 5 Nov 2008 11:13:21 +0000
Subject: ASoC: Remove core version number

Rather than try to remember to keep the core version number updated
(which hasn't been happening) just remove it.  It was much more useful
when ASoC was out of tree.

Signed-off-by: Mark brown <broonie@opensource.wolfsonmicro.com>
---
 include/sound/soc.h  | 2 --
 sound/soc/soc-core.c | 1 -
 2 files changed, 3 deletions(-)

(limited to 'include')

diff --git a/include/sound/soc.h b/include/sound/soc.h
index da0040b69c2d..fa1b99b45893 100644
--- a/include/sound/soc.h
+++ b/include/sound/soc.h
@@ -21,8 +21,6 @@
 #include <sound/control.h>
 #include <sound/ac97_codec.h>
 
-#define SND_SOC_VERSION "0.13.2"
-
 /*
  * Convenience kcontrol builders
  */
diff --git a/sound/soc/soc-core.c b/sound/soc/soc-core.c
index 1a173682965a..9feaa7b6dc34 100644
--- a/sound/soc/soc-core.c
+++ b/sound/soc/soc-core.c
@@ -1965,7 +1965,6 @@ EXPORT_SYMBOL_GPL(snd_soc_dai_digital_mute);
 
 static int __devinit snd_soc_init(void)
 {
-	printk(KERN_INFO "ASoC version %s\n", SND_SOC_VERSION);
 	return platform_driver_register(&soc_driver);
 }
 
-- 
cgit v1.2.3


From 1f29fae29709b4668979e244c09b2fa78ff1ad59 Mon Sep 17 00:00:00 2001
From: "Serge E. Hallyn" <serue@us.ibm.com>
Date: Wed, 5 Nov 2008 16:08:52 -0600
Subject: file capabilities: add no_file_caps switch (v4)

Add a no_file_caps boot option when file capabilities are
compiled into the kernel (CONFIG_SECURITY_FILE_CAPABILITIES=y).

This allows distributions to ship a kernel with file capabilities
compiled in, without forcing users to use (and understand and
trust) them.

When no_file_caps is specified at boot, then when a process executes
a file, any file capabilities stored with that file will not be
used in the calculation of the process' new capability sets.

This means that booting with the no_file_caps boot option will
not be the same as booting a kernel with file capabilities
compiled out - in particular a task with  CAP_SETPCAP will not
have any chance of passing capabilities to another task (which
isn't "really" possible anyway, and which may soon by killed
altogether by David Howells in any case), and it will instead
be able to put new capabilities in its pI.  However since fI
will always be empty and pI is masked with fI, it gains the
task nothing.

We also support the extra prctl options, setting securebits and
dropping capabilities from the per-process bounding set.

The other remaining difference is that killpriv, task_setscheduler,
setioprio, and setnice will continue to be hooked.  That will
be noticable in the case where a root task changed its uid
while keeping some caps, and another task owned by the new uid
tries to change settings for the more privileged task.

Changelog:
	Nov 05 2008: (v4) trivial port on top of always-start-\
		with-clear-caps patch
	Sep 23 2008: nixed file_caps_enabled when file caps are
		not compiled in as it isn't used.
		Document no_file_caps in kernel-parameters.txt.

Signed-off-by: Serge Hallyn <serue@us.ibm.com>
Acked-by: Andrew G. Morgan <morgan@kernel.org>
Signed-off-by: James Morris <jmorris@namei.org>
---
 Documentation/kernel-parameters.txt |  4 ++++
 include/linux/capability.h          |  3 +++
 kernel/capability.c                 | 11 +++++++++++
 security/commoncap.c                |  3 +++
 4 files changed, 21 insertions(+)

(limited to 'include')

diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt
index 1bbcaa8982b6..784443acca9c 100644
--- a/Documentation/kernel-parameters.txt
+++ b/Documentation/kernel-parameters.txt
@@ -1459,6 +1459,10 @@ and is between 256 and 4096 characters. It is defined in the file
 			instruction doesn't work correctly and not to
 			use it.
 
+	no_file_caps	Tells the kernel not to honor file capabilities.  The
+			only way then for a file to be executed with privilege
+			is to be setuid root or executed by root.
+
 	nohalt		[IA-64] Tells the kernel not to use the power saving
 			function PAL_HALT_LIGHT when idle. This increases
 			power-consumption. On the positive side, it reduces
diff --git a/include/linux/capability.h b/include/linux/capability.h
index 9d1fe30b6f6c..5bc145bd759a 100644
--- a/include/linux/capability.h
+++ b/include/linux/capability.h
@@ -68,6 +68,9 @@ typedef struct __user_cap_data_struct {
 #define VFS_CAP_U32             VFS_CAP_U32_2
 #define VFS_CAP_REVISION	VFS_CAP_REVISION_2
 
+#ifdef CONFIG_SECURITY_FILE_CAPABILITIES
+extern int file_caps_enabled;
+#endif
 
 struct vfs_cap_data {
 	__le32 magic_etc;            /* Little endian */
diff --git a/kernel/capability.c b/kernel/capability.c
index 33e51e78c2d8..e13a68535ad5 100644
--- a/kernel/capability.c
+++ b/kernel/capability.c
@@ -33,6 +33,17 @@ EXPORT_SYMBOL(__cap_empty_set);
 EXPORT_SYMBOL(__cap_full_set);
 EXPORT_SYMBOL(__cap_init_eff_set);
 
+#ifdef CONFIG_SECURITY_FILE_CAPABILITIES
+int file_caps_enabled = 1;
+
+static int __init file_caps_disable(char *str)
+{
+	file_caps_enabled = 0;
+	return 1;
+}
+__setup("no_file_caps", file_caps_disable);
+#endif
+
 /*
  * More recent versions of libcap are available from:
  *
diff --git a/security/commoncap.c b/security/commoncap.c
index 3976613db829..f88119cb2bc2 100644
--- a/security/commoncap.c
+++ b/security/commoncap.c
@@ -281,6 +281,9 @@ static int get_file_caps(struct linux_binprm *bprm)
 
 	bprm_clear_caps(bprm);
 
+	if (!file_caps_enabled)
+		return 0;
+
 	if (bprm->file->f_vfsmnt->mnt_flags & MNT_NOSUID)
 		return 0;
 
-- 
cgit v1.2.3


From ae33bc40c0d96d02f51a996482ea7e41c5152695 Mon Sep 17 00:00:00 2001
From: "Eric W. Biederman" <ebiederm@xmission.com>
Date: Wed, 5 Nov 2008 16:00:02 -0800
Subject: net: Guaranetee the proper ordering of the loopback device.

I was recently hunting a bug that occurred in network namespace
cleanup.  In looking at the code it became apparrent that we have
and will continue to have cases where if we have anything going
on in a network namespace there will be assumptions that the
loopback device is present.   Things like sending igmp unsubscribe
messages when we bring down network devices invokes the routing
code which assumes that at least the loopback driver is present.

Therefore to avoid magic initcall ordering hackery that is hard
to follow and hard to get right insert a call to register the
loopback device directly from net_dev_init().    This guarantes
that the loopback device is the first device registered and
the last network device to go away.

Signed-off-by: Eric W. Biederman <ebiederm@xmission.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/loopback.c    | 13 ++-----------
 include/linux/netdevice.h |  1 +
 net/core/dev.c            | 12 ++++++++++++
 3 files changed, 15 insertions(+), 11 deletions(-)

(limited to 'include')

diff --git a/drivers/net/loopback.c b/drivers/net/loopback.c
index 91d08585a6d8..c4516b580ba5 100644
--- a/drivers/net/loopback.c
+++ b/drivers/net/loopback.c
@@ -204,17 +204,8 @@ static __net_exit void loopback_net_exit(struct net *net)
 	unregister_netdev(dev);
 }
 
-static struct pernet_operations __net_initdata loopback_net_ops = {
+/* Registered in net/core/dev.c */
+struct pernet_operations __net_initdata loopback_net_ops = {
        .init = loopback_net_init,
        .exit = loopback_net_exit,
 };
-
-static int __init loopback_init(void)
-{
-	return register_pernet_device(&loopback_net_ops);
-}
-
-/* Loopback is special. It should be initialized before any other network
- * device and network subsystem.
- */
-fs_initcall(loopback_init);
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index f1b0dbe58464..12d7f4469dc9 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -1766,6 +1766,7 @@ static inline int skb_bond_should_drop(struct sk_buff *skb)
 	return 0;
 }
 
+extern struct pernet_operations __net_initdata loopback_net_ops;
 #endif /* __KERNEL__ */
 
 #endif	/* _LINUX_DEV_H */
diff --git a/net/core/dev.c b/net/core/dev.c
index 9475f3e624a8..811507c39805 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -4904,6 +4904,18 @@ static int __init net_dev_init(void)
 	if (register_pernet_subsys(&netdev_net_ops))
 		goto out;
 
+	/* The loopback device is special if any other network devices
+	 * is present in a network namespace the loopback device must
+	 * be present. Since we now dynamically allocate and free the
+	 * loopback device ensure this invariant is maintained by
+	 * keeping the loopback device as the first device on the
+	 * list of network devices.  Ensuring the loopback devices
+	 * is the first device that appears and the last network device
+	 * that disappears.
+	 */
+	if (register_pernet_device(&loopback_net_ops))
+		goto out;
+
 	if (register_pernet_device(&default_device_ops))
 		goto out;
 
-- 
cgit v1.2.3


From 61c9eaf90081cbe6dc4f389e0056bff76eca19ec Mon Sep 17 00:00:00 2001
From: Jarek Poplawski <jarkao2@gmail.com>
Date: Wed, 5 Nov 2008 16:02:34 -0800
Subject: pkt_sched: Fix qdisc len in qdisc_peek_dequeued()

A packet dequeued and stored as gso_skb in qdisc_peek_dequeued() should
be seen as part of the queue for sch->q.qlen queries until it's really
dequeued with qdisc_dequeue_peeked(), so qlen needs additional updating
in these functions. (Updating qstats.backlog shouldn't matter here.)

Signed-off-by: Jarek Poplawski <jarkao2@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/sch_generic.h | 12 +++++++++---
 1 file changed, 9 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h
index 9dcb5bfe094a..64ae1ba9f554 100644
--- a/include/net/sch_generic.h
+++ b/include/net/sch_generic.h
@@ -442,8 +442,12 @@ static inline struct sk_buff *qdisc_peek_head(struct Qdisc *sch)
 static inline struct sk_buff *qdisc_peek_dequeued(struct Qdisc *sch)
 {
 	/* we can reuse ->gso_skb because peek isn't called for root qdiscs */
-	if (!sch->gso_skb)
+	if (!sch->gso_skb) {
 		sch->gso_skb = sch->dequeue(sch);
+		if (sch->gso_skb)
+			/* it's still part of the queue */
+			sch->q.qlen++;
+	}
 
 	return sch->gso_skb;
 }
@@ -453,10 +457,12 @@ static inline struct sk_buff *qdisc_dequeue_peeked(struct Qdisc *sch)
 {
 	struct sk_buff *skb = sch->gso_skb;
 
-	if (skb)
+	if (skb) {
 		sch->gso_skb = NULL;
-	else
+		sch->q.qlen--;
+	} else {
 		skb = sch->dequeue(sch);
+	}
 
 	return skb;
 }
-- 
cgit v1.2.3


From 305d552accae6afb859c493ebc7d98ca3371dae2 Mon Sep 17 00:00:00 2001
From: Brian Haley <brian.haley@hp.com>
Date: Tue, 4 Nov 2008 17:51:14 -0800
Subject: bonding: send IPv6 neighbor advertisement on failover

This patch adds better IPv6 failover support for bonding devices,
especially when in active-backup mode and there are only IPv6 addresses
configured, as reported by Alex Sidorenko.

- Creates a new file, net/drivers/bonding/bond_ipv6.c, for the
   IPv6-specific routines.  Both regular bonds and VLANs over bonds
   are supported.

- Adds a new tunable, num_unsol_na, to limit the number of unsolicited
   IPv6 Neighbor Advertisements that are sent on a failover event.
   Default is 1.

- Creates two new IPv6 neighbor discovery functions:

   ndisc_build_skb()
   ndisc_send_skb()

   These were required to support VLANs since we have to be able to
   add the VLAN id to the skb since ndisc_send_na() and friends
   shouldn't be asked to do this.  These two routines are basically
   __ndisc_send() split into two pieces, in a slightly different order.

- Updates Documentation/networking/bonding.txt and bumps the rev of bond
   support to 3.4.0.

On failover, this new code will generate one packet:

- An unsolicited IPv6 Neighbor Advertisement, which helps the switch
   learn that the address has moved to the new slave.

Testing has shown that sending just the NA results in pretty good
behavior when in active-back mode, I saw no lost ping packets for example.

Signed-off-by: Brian Haley <brian.haley@hp.com>
Signed-off-by: Jay Vosburgh <fubar@us.ibm.com>
Signed-off-by: Jeff Garzik <jgarzik@redhat.com>
---
 Documentation/networking/bonding.txt |  10 ++
 drivers/net/Kconfig                  |   1 +
 drivers/net/bonding/Makefile         |   3 +
 drivers/net/bonding/bond_ipv6.c      | 218 +++++++++++++++++++++++++++++++++++
 drivers/net/bonding/bond_main.c      |  33 +++++-
 drivers/net/bonding/bond_sysfs.c     |  42 +++++++
 drivers/net/bonding/bonding.h        |  34 +++++-
 include/net/ndisc.h                  |  14 +++
 net/ipv6/ndisc.c                     |  92 ++++++++++-----
 9 files changed, 416 insertions(+), 31 deletions(-)
 create mode 100644 drivers/net/bonding/bond_ipv6.c

(limited to 'include')

diff --git a/Documentation/networking/bonding.txt b/Documentation/networking/bonding.txt
index d733a428eff6..3f4d0fae7081 100644
--- a/Documentation/networking/bonding.txt
+++ b/Documentation/networking/bonding.txt
@@ -551,6 +551,16 @@ num_grat_arp
 	affects only the active-backup mode.  This option was added for
 	bonding version 3.3.0.
 
+num_unsol_na
+
+	Specifies the number of unsolicited IPv6 Neighbor Advertisements
+	to be issued after a failover event.  One unsolicited NA is issued
+	immediately after the failover.
+
+	The valid range is 0 - 255; the default value is 1.  This option
+	affects only the active-backup mode.  This option was added for
+	bonding version 3.4.0.
+
 primary
 
 	A string (eth0, eth2, etc) specifying which slave is the
diff --git a/drivers/net/Kconfig b/drivers/net/Kconfig
index 0f3e6b2d2808..f1d0a1371695 100644
--- a/drivers/net/Kconfig
+++ b/drivers/net/Kconfig
@@ -61,6 +61,7 @@ config DUMMY
 config BONDING
 	tristate "Bonding driver support"
 	depends on INET
+	depends on IPV6 || IPV6=n
 	---help---
 	  Say 'Y' or 'M' if you wish to be able to 'bond' multiple Ethernet
 	  Channels together. This is called 'Etherchannel' by Cisco,
diff --git a/drivers/net/bonding/Makefile b/drivers/net/bonding/Makefile
index 5cdae2bc055a..6f9c6faef24c 100644
--- a/drivers/net/bonding/Makefile
+++ b/drivers/net/bonding/Makefile
@@ -6,3 +6,6 @@ obj-$(CONFIG_BONDING) += bonding.o
 
 bonding-objs := bond_main.o bond_3ad.o bond_alb.o bond_sysfs.o
 
+ipv6-$(subst m,y,$(CONFIG_IPV6)) += bond_ipv6.o
+bonding-objs += $(ipv6-y)
+
diff --git a/drivers/net/bonding/bond_ipv6.c b/drivers/net/bonding/bond_ipv6.c
new file mode 100644
index 000000000000..7c78b7bf671c
--- /dev/null
+++ b/drivers/net/bonding/bond_ipv6.c
@@ -0,0 +1,218 @@
+/*
+ * Copyright(c) 2008 Hewlett-Packard Development Company, L.P.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the
+ * Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+ * or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
+ *
+ * The full GNU General Public License is included in this distribution in the
+ * file called LICENSE.
+ *
+ */
+
+//#define BONDING_DEBUG 1
+
+#include <linux/types.h>
+#include <linux/if_vlan.h>
+#include <net/ipv6.h>
+#include <net/ndisc.h>
+#include <net/addrconf.h>
+#include "bonding.h"
+
+/*
+ * Assign bond->master_ipv6 to the next IPv6 address in the list, or
+ * zero it out if there are none.
+ */
+static void bond_glean_dev_ipv6(struct net_device *dev, struct in6_addr *addr)
+{
+	struct inet6_dev *idev;
+	struct inet6_ifaddr *ifa;
+
+	if (!dev)
+		return;
+
+	idev = in6_dev_get(dev);
+	if (!idev)
+		return;
+
+	read_lock_bh(&idev->lock);
+	ifa = idev->addr_list;
+	if (ifa)
+		ipv6_addr_copy(addr, &ifa->addr);
+	else
+		ipv6_addr_set(addr, 0, 0, 0, 0);
+
+	read_unlock_bh(&idev->lock);
+
+	in6_dev_put(idev);
+}
+
+static void bond_na_send(struct net_device *slave_dev,
+			 struct in6_addr *daddr,
+			 int router,
+			 unsigned short vlan_id)
+{
+	struct in6_addr mcaddr;
+	struct icmp6hdr icmp6h = {
+		.icmp6_type = NDISC_NEIGHBOUR_ADVERTISEMENT,
+	};
+	struct sk_buff *skb;
+
+	icmp6h.icmp6_router = router;
+	icmp6h.icmp6_solicited = 0;
+	icmp6h.icmp6_override = 1;
+
+	addrconf_addr_solict_mult(daddr, &mcaddr);
+
+	dprintk("ipv6 na on slave %s: dest %pI6, src %pI6\n",
+	       slave->name, &mcaddr, daddr);
+
+	skb = ndisc_build_skb(slave_dev, &mcaddr, daddr, &icmp6h, daddr,
+			      ND_OPT_TARGET_LL_ADDR);
+
+	if (!skb) {
+		printk(KERN_ERR DRV_NAME ": NA packet allocation failed\n");
+		return;
+	}
+
+	if (vlan_id) {
+		skb = vlan_put_tag(skb, vlan_id);
+		if (!skb) {
+			printk(KERN_ERR DRV_NAME ": failed to insert VLAN tag\n");
+			return;
+		}
+	}
+
+	ndisc_send_skb(skb, slave_dev, NULL, &mcaddr, daddr, &icmp6h);
+}
+
+/*
+ * Kick out an unsolicited Neighbor Advertisement for an IPv6 address on
+ * the bonding master.  This will help the switch learn our address
+ * if in active-backup mode.
+ *
+ * Caller must hold curr_slave_lock for read or better
+ */
+void bond_send_unsolicited_na(struct bonding *bond)
+{
+	struct slave *slave = bond->curr_active_slave;
+	struct vlan_entry *vlan;
+	struct inet6_dev *idev;
+	int is_router;
+
+	dprintk("bond_send_unsol_na: bond %s slave %s\n", bond->dev->name,
+				slave ? slave->dev->name : "NULL");
+
+	if (!slave || !bond->send_unsol_na ||
+	    test_bit(__LINK_STATE_LINKWATCH_PENDING, &slave->dev->state))
+		return;
+
+	bond->send_unsol_na--;
+
+	idev = in6_dev_get(bond->dev);
+	if (!idev)
+		return;
+
+	is_router = !!idev->cnf.forwarding;
+
+	in6_dev_put(idev);
+
+	if (!ipv6_addr_any(&bond->master_ipv6))
+		bond_na_send(slave->dev, &bond->master_ipv6, is_router, 0);
+
+	list_for_each_entry(vlan, &bond->vlan_list, vlan_list) {
+		if (!ipv6_addr_any(&vlan->vlan_ipv6)) {
+			bond_na_send(slave->dev, &vlan->vlan_ipv6, is_router,
+				     vlan->vlan_id);
+		}
+	}
+}
+
+/*
+ * bond_inet6addr_event: handle inet6addr notifier chain events.
+ *
+ * We keep track of device IPv6 addresses primarily to use as source
+ * addresses in NS probes.
+ *
+ * We track one IPv6 for the main device (if it has one).
+ */
+static int bond_inet6addr_event(struct notifier_block *this,
+				unsigned long event,
+				void *ptr)
+{
+	struct inet6_ifaddr *ifa = ptr;
+	struct net_device *vlan_dev, *event_dev = ifa->idev->dev;
+	struct bonding *bond;
+	struct vlan_entry *vlan;
+
+	if (dev_net(event_dev) != &init_net)
+		return NOTIFY_DONE;
+
+	list_for_each_entry(bond, &bond_dev_list, bond_list) {
+		if (bond->dev == event_dev) {
+			switch (event) {
+			case NETDEV_UP:
+				if (ipv6_addr_any(&bond->master_ipv6))
+					ipv6_addr_copy(&bond->master_ipv6,
+						       &ifa->addr);
+				return NOTIFY_OK;
+			case NETDEV_DOWN:
+				if (ipv6_addr_equal(&bond->master_ipv6,
+						    &ifa->addr))
+					bond_glean_dev_ipv6(bond->dev,
+							    &bond->master_ipv6);
+				return NOTIFY_OK;
+			default:
+				return NOTIFY_DONE;
+			}
+		}
+
+		list_for_each_entry(vlan, &bond->vlan_list, vlan_list) {
+			vlan_dev = vlan_group_get_device(bond->vlgrp,
+							 vlan->vlan_id);
+			if (vlan_dev == event_dev) {
+				switch (event) {
+				case NETDEV_UP:
+					if (ipv6_addr_any(&vlan->vlan_ipv6))
+						ipv6_addr_copy(&vlan->vlan_ipv6,
+							       &ifa->addr);
+					return NOTIFY_OK;
+				case NETDEV_DOWN:
+					if (ipv6_addr_equal(&vlan->vlan_ipv6,
+							    &ifa->addr))
+						bond_glean_dev_ipv6(vlan_dev,
+								    &vlan->vlan_ipv6);
+					return NOTIFY_OK;
+				default:
+					return NOTIFY_DONE;
+				}
+			}
+		}
+	}
+	return NOTIFY_DONE;
+}
+
+static struct notifier_block bond_inet6addr_notifier = {
+	.notifier_call = bond_inet6addr_event,
+};
+
+void bond_register_ipv6_notifier(void)
+{
+	register_inet6addr_notifier(&bond_inet6addr_notifier);
+}
+
+void bond_unregister_ipv6_notifier(void)
+{
+	unregister_inet6addr_notifier(&bond_inet6addr_notifier);
+}
+
diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c
index 39575d764974..798d98ce2d97 100644
--- a/drivers/net/bonding/bond_main.c
+++ b/drivers/net/bonding/bond_main.c
@@ -89,6 +89,7 @@
 
 static int max_bonds	= BOND_DEFAULT_MAX_BONDS;
 static int num_grat_arp = 1;
+static int num_unsol_na = 1;
 static int miimon	= BOND_LINK_MON_INTERV;
 static int updelay	= 0;
 static int downdelay	= 0;
@@ -107,6 +108,8 @@ module_param(max_bonds, int, 0);
 MODULE_PARM_DESC(max_bonds, "Max number of bonded devices");
 module_param(num_grat_arp, int, 0644);
 MODULE_PARM_DESC(num_grat_arp, "Number of gratuitous ARP packets to send on failover event");
+module_param(num_unsol_na, int, 0644);
+MODULE_PARM_DESC(num_unsol_na, "Number of unsolicited IPv6 Neighbor Advertisements packets to send on failover event");
 module_param(miimon, int, 0);
 MODULE_PARM_DESC(miimon, "Link check interval in milliseconds");
 module_param(updelay, int, 0);
@@ -242,14 +245,13 @@ static int bond_add_vlan(struct bonding *bond, unsigned short vlan_id)
 	dprintk("bond: %s, vlan id %d\n",
 		(bond ? bond->dev->name: "None"), vlan_id);
 
-	vlan = kmalloc(sizeof(struct vlan_entry), GFP_KERNEL);
+	vlan = kzalloc(sizeof(struct vlan_entry), GFP_KERNEL);
 	if (!vlan) {
 		return -ENOMEM;
 	}
 
 	INIT_LIST_HEAD(&vlan->vlan_list);
 	vlan->vlan_id = vlan_id;
-	vlan->vlan_ip = 0;
 
 	write_lock_bh(&bond->lock);
 
@@ -1208,6 +1210,9 @@ void bond_change_active_slave(struct bonding *bond, struct slave *new_active)
 			bond->send_grat_arp = bond->params.num_grat_arp;
 			bond_send_gratuitous_arp(bond);
 
+			bond->send_unsol_na = bond->params.num_unsol_na;
+			bond_send_unsolicited_na(bond);
+
 			write_unlock_bh(&bond->curr_slave_lock);
 			read_unlock(&bond->lock);
 
@@ -2463,6 +2468,12 @@ void bond_mii_monitor(struct work_struct *work)
 		read_unlock(&bond->curr_slave_lock);
 	}
 
+	if (bond->send_unsol_na) {
+		read_lock(&bond->curr_slave_lock);
+		bond_send_unsolicited_na(bond);
+		read_unlock(&bond->curr_slave_lock);
+	}
+
 	if (bond_miimon_inspect(bond)) {
 		read_unlock(&bond->lock);
 		rtnl_lock();
@@ -3158,6 +3169,12 @@ void bond_activebackup_arp_mon(struct work_struct *work)
 		read_unlock(&bond->curr_slave_lock);
 	}
 
+	if (bond->send_unsol_na) {
+		read_lock(&bond->curr_slave_lock);
+		bond_send_unsolicited_na(bond);
+		read_unlock(&bond->curr_slave_lock);
+	}
+
 	if (bond_ab_arp_inspect(bond, delta_in_ticks)) {
 		read_unlock(&bond->lock);
 		rtnl_lock();
@@ -3827,6 +3844,7 @@ static int bond_close(struct net_device *bond_dev)
 	write_lock_bh(&bond->lock);
 
 	bond->send_grat_arp = 0;
+	bond->send_unsol_na = 0;
 
 	/* signal timers not to re-arm */
 	bond->kill_timers = 1;
@@ -4542,6 +4560,7 @@ static int bond_init(struct net_device *bond_dev, struct bond_params *params)
 	bond->primary_slave = NULL;
 	bond->dev = bond_dev;
 	bond->send_grat_arp = 0;
+	bond->send_unsol_na = 0;
 	bond->setup_by_slave = 0;
 	INIT_LIST_HEAD(&bond->vlan_list);
 
@@ -4791,6 +4810,13 @@ static int bond_check_params(struct bond_params *params)
 		num_grat_arp = 1;
 	}
 
+	if (num_unsol_na < 0 || num_unsol_na > 255) {
+		printk(KERN_WARNING DRV_NAME
+		       ": Warning: num_unsol_na (%d) not in range 0-255 so it "
+		       "was reset to 1 \n", num_unsol_na);
+		num_unsol_na = 1;
+	}
+
 	/* reset values for 802.3ad */
 	if (bond_mode == BOND_MODE_8023AD) {
 		if (!miimon) {
@@ -4992,6 +5018,7 @@ static int bond_check_params(struct bond_params *params)
 	params->xmit_policy = xmit_hashtype;
 	params->miimon = miimon;
 	params->num_grat_arp = num_grat_arp;
+	params->num_unsol_na = num_unsol_na;
 	params->arp_interval = arp_interval;
 	params->arp_validate = arp_validate_value;
 	params->updelay = updelay;
@@ -5144,6 +5171,7 @@ static int __init bonding_init(void)
 
 	register_netdevice_notifier(&bond_netdev_notifier);
 	register_inetaddr_notifier(&bond_inetaddr_notifier);
+	bond_register_ipv6_notifier();
 
 	goto out;
 err:
@@ -5166,6 +5194,7 @@ static void __exit bonding_exit(void)
 {
 	unregister_netdevice_notifier(&bond_netdev_notifier);
 	unregister_inetaddr_notifier(&bond_inetaddr_notifier);
+	bond_unregister_ipv6_notifier();
 
 	bond_destroy_sysfs();
 
diff --git a/drivers/net/bonding/bond_sysfs.c b/drivers/net/bonding/bond_sysfs.c
index e400d7dfdfc8..8788e3e33852 100644
--- a/drivers/net/bonding/bond_sysfs.c
+++ b/drivers/net/bonding/bond_sysfs.c
@@ -983,6 +983,47 @@ out:
 	return ret;
 }
 static DEVICE_ATTR(num_grat_arp, S_IRUGO | S_IWUSR, bonding_show_n_grat_arp, bonding_store_n_grat_arp);
+
+/*
+ * Show and set the number of unsolicted NA's to send after a failover event.
+ */
+static ssize_t bonding_show_n_unsol_na(struct device *d,
+				       struct device_attribute *attr,
+				       char *buf)
+{
+	struct bonding *bond = to_bond(d);
+
+	return sprintf(buf, "%d\n", bond->params.num_unsol_na);
+}
+
+static ssize_t bonding_store_n_unsol_na(struct device *d,
+					struct device_attribute *attr,
+					const char *buf, size_t count)
+{
+	int new_value, ret = count;
+	struct bonding *bond = to_bond(d);
+
+	if (sscanf(buf, "%d", &new_value) != 1) {
+		printk(KERN_ERR DRV_NAME
+		       ": %s: no num_unsol_na value specified.\n",
+		       bond->dev->name);
+		ret = -EINVAL;
+		goto out;
+	}
+	if (new_value < 0 || new_value > 255) {
+		printk(KERN_ERR DRV_NAME
+		       ": %s: Invalid num_unsol_na value %d not in range 0-255; rejected.\n",
+		       bond->dev->name, new_value);
+		ret = -EINVAL;
+		goto out;
+	} else {
+		bond->params.num_unsol_na = new_value;
+	}
+out:
+	return ret;
+}
+static DEVICE_ATTR(num_unsol_na, S_IRUGO | S_IWUSR, bonding_show_n_unsol_na, bonding_store_n_unsol_na);
+
 /*
  * Show and set the MII monitor interval.  There are two tricky bits
  * here.  First, if MII monitoring is activated, then we must disable
@@ -1420,6 +1461,7 @@ static struct attribute *per_bond_attrs[] = {
 	&dev_attr_lacp_rate.attr,
 	&dev_attr_xmit_hash_policy.attr,
 	&dev_attr_num_grat_arp.attr,
+	&dev_attr_num_unsol_na.attr,
 	&dev_attr_miimon.attr,
 	&dev_attr_primary.attr,
 	&dev_attr_use_carrier.attr,
diff --git a/drivers/net/bonding/bonding.h b/drivers/net/bonding/bonding.h
index ffb668dd6d3b..0491c7c2645b 100644
--- a/drivers/net/bonding/bonding.h
+++ b/drivers/net/bonding/bonding.h
@@ -19,16 +19,19 @@
 #include <linux/proc_fs.h>
 #include <linux/if_bonding.h>
 #include <linux/kobject.h>
+#include <linux/in6.h>
 #include "bond_3ad.h"
 #include "bond_alb.h"
 
-#define DRV_VERSION	"3.3.0"
-#define DRV_RELDATE	"June 10, 2008"
+#define DRV_VERSION	"3.4.0"
+#define DRV_RELDATE	"October 7, 2008"
 #define DRV_NAME	"bonding"
 #define DRV_DESCRIPTION	"Ethernet Channel Bonding Driver"
 
 #define BOND_MAX_ARP_TARGETS	16
 
+extern struct list_head bond_dev_list;
+
 #ifdef BONDING_DEBUG
 #define dprintk(fmt, args...) \
 	printk(KERN_DEBUG     \
@@ -126,6 +129,7 @@ struct bond_params {
 	int xmit_policy;
 	int miimon;
 	int num_grat_arp;
+	int num_unsol_na;
 	int arp_interval;
 	int arp_validate;
 	int use_carrier;
@@ -148,6 +152,9 @@ struct vlan_entry {
 	struct list_head vlan_list;
 	__be32 vlan_ip;
 	unsigned short vlan_id;
+#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+	struct in6_addr vlan_ipv6;
+#endif
 };
 
 struct slave {
@@ -195,6 +202,7 @@ struct bonding {
 	rwlock_t curr_slave_lock;
 	s8       kill_timers;
 	s8	 send_grat_arp;
+	s8	 send_unsol_na;
 	s8	 setup_by_slave;
 	struct   net_device_stats stats;
 #ifdef CONFIG_PROC_FS
@@ -218,6 +226,9 @@ struct bonding {
 	struct   delayed_work arp_work;
 	struct   delayed_work alb_work;
 	struct   delayed_work ad_work;
+#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+	struct   in6_addr master_ipv6;
+#endif
 };
 
 /**
@@ -341,5 +352,24 @@ extern struct bond_parm_tbl xmit_hashtype_tbl[];
 extern struct bond_parm_tbl arp_validate_tbl[];
 extern struct bond_parm_tbl fail_over_mac_tbl[];
 
+#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+void bond_send_unsolicited_na(struct bonding *bond);
+void bond_register_ipv6_notifier(void);
+void bond_unregister_ipv6_notifier(void);
+#else
+static inline void bond_send_unsolicited_na(struct bonding *bond)
+{
+	return;
+}
+static inline void bond_register_ipv6_notifier(void)
+{
+	return;
+}
+static inline void bond_unregister_ipv6_notifier(void)
+{
+	return;
+}
+#endif
+
 #endif /* _LINUX_BONDING_H */
 
diff --git a/include/net/ndisc.h b/include/net/ndisc.h
index 11dd0137c6a5..ce532f2222ce 100644
--- a/include/net/ndisc.h
+++ b/include/net/ndisc.h
@@ -108,6 +108,20 @@ extern void			ndisc_send_redirect(struct sk_buff *skb,
 
 extern int			ndisc_mc_map(struct in6_addr *addr, char *buf, struct net_device *dev, int dir);
 
+extern struct sk_buff		*ndisc_build_skb(struct net_device *dev,
+						 const struct in6_addr *daddr,
+						 const struct in6_addr *saddr,
+						 struct icmp6hdr *icmp6h,
+						 const struct in6_addr *target,
+						 int llinfo);
+
+extern void			ndisc_send_skb(struct sk_buff *skb,
+					       struct net_device *dev,
+					       struct neighbour *neigh,
+					       const struct in6_addr *daddr,
+					       const struct in6_addr *saddr,
+					       struct icmp6hdr *icmp6h);
+
 
 
 /*
diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c
index 2a6752dae09d..fbf451c0d77a 100644
--- a/net/ipv6/ndisc.c
+++ b/net/ipv6/ndisc.c
@@ -437,38 +437,20 @@ static void pndisc_destructor(struct pneigh_entry *n)
 	ipv6_dev_mc_dec(dev, &maddr);
 }
 
-/*
- *	Send a Neighbour Advertisement
- */
-static void __ndisc_send(struct net_device *dev,
-			 struct neighbour *neigh,
-			 const struct in6_addr *daddr,
-			 const struct in6_addr *saddr,
-			 struct icmp6hdr *icmp6h, const struct in6_addr *target,
-			 int llinfo)
+struct sk_buff *ndisc_build_skb(struct net_device *dev,
+				const struct in6_addr *daddr,
+				const struct in6_addr *saddr,
+				struct icmp6hdr *icmp6h,
+				const struct in6_addr *target,
+				int llinfo)
 {
-	struct flowi fl;
-	struct dst_entry *dst;
 	struct net *net = dev_net(dev);
 	struct sock *sk = net->ipv6.ndisc_sk;
 	struct sk_buff *skb;
 	struct icmp6hdr *hdr;
-	struct inet6_dev *idev;
 	int len;
 	int err;
-	u8 *opt, type;
-
-	type = icmp6h->icmp6_type;
-
-	icmpv6_flow_init(sk, &fl, type, saddr, daddr, dev->ifindex);
-
-	dst = icmp6_dst_alloc(dev, neigh, daddr);
-	if (!dst)
-		return;
-
-	err = xfrm_lookup(&dst, &fl, NULL, 0);
-	if (err < 0)
-		return;
+	u8 *opt;
 
 	if (!dev->addr_len)
 		llinfo = 0;
@@ -485,8 +467,7 @@ static void __ndisc_send(struct net_device *dev,
 		ND_PRINTK0(KERN_ERR
 			   "ICMPv6 ND: %s() failed to allocate an skb.\n",
 			   __func__);
-		dst_release(dst);
-		return;
+		return NULL;
 	}
 
 	skb_reserve(skb, LL_RESERVED_SPACE(dev));
@@ -513,6 +494,42 @@ static void __ndisc_send(struct net_device *dev,
 					   csum_partial((__u8 *) hdr,
 							len, 0));
 
+	return skb;
+}
+
+EXPORT_SYMBOL(ndisc_build_skb);
+
+void ndisc_send_skb(struct sk_buff *skb,
+		    struct net_device *dev,
+		    struct neighbour *neigh,
+		    const struct in6_addr *daddr,
+		    const struct in6_addr *saddr,
+		    struct icmp6hdr *icmp6h)
+{
+	struct flowi fl;
+	struct dst_entry *dst;
+	struct net *net = dev_net(dev);
+	struct sock *sk = net->ipv6.ndisc_sk;
+	struct inet6_dev *idev;
+	int err;
+	u8 type;
+
+	type = icmp6h->icmp6_type;
+
+	icmpv6_flow_init(sk, &fl, type, saddr, daddr, dev->ifindex);
+
+	dst = icmp6_dst_alloc(dev, neigh, daddr);
+	if (!dst) {
+		kfree_skb(skb);
+		return;
+	}
+
+	err = xfrm_lookup(&dst, &fl, NULL, 0);
+	if (err < 0) {
+		kfree_skb(skb);
+		return;
+	}
+
 	skb->dst = dst;
 
 	idev = in6_dev_get(dst->dev);
@@ -529,6 +546,27 @@ static void __ndisc_send(struct net_device *dev,
 		in6_dev_put(idev);
 }
 
+EXPORT_SYMBOL(ndisc_send_skb);
+
+/*
+ *	Send a Neighbour Discover packet
+ */
+static void __ndisc_send(struct net_device *dev,
+			 struct neighbour *neigh,
+			 const struct in6_addr *daddr,
+			 const struct in6_addr *saddr,
+			 struct icmp6hdr *icmp6h, const struct in6_addr *target,
+			 int llinfo)
+{
+	struct sk_buff *skb;
+
+	skb = ndisc_build_skb(dev, daddr, saddr, icmp6h, target, llinfo);
+	if (!skb)
+		return;
+
+	ndisc_send_skb(skb, dev, neigh, daddr, saddr, icmp6h);
+}
+
 static void ndisc_send_na(struct net_device *dev, struct neighbour *neigh,
 			  const struct in6_addr *daddr,
 			  const struct in6_addr *solicited_addr,
-- 
cgit v1.2.3


From fd9abb3d97c2ab883e4732ec1214fe64190236e7 Mon Sep 17 00:00:00 2001
From: Steve Glendinning <steve.glendinning@smsc.com>
Date: Wed, 5 Nov 2008 00:35:37 +0000
Subject: SMSC LAN911x and LAN921x vendor driver

Attached is a driver for SMSC's LAN911x and LAN921x families of embedded
ethernet controllers.

There is an existing smc911x driver in the tree; this is intended to
replace it.  Dustin McIntire (the author of the smc911x driver) has
expressed his support for switching to this driver.

This driver contains workarounds for all known hardware issues, and has
been tested on all flavours of the chip on multiple architectures.

This driver now uses phylib, so this patch also adds support for the
device's internal phy

Signed-off-by: Steve Glendinning <steve.glendinning@smsc.com>
Signed-off-by: Bahadir Balban <Bahadir.Balban@arm.com>
Signed-off-by: Dustin Mcintire <dustin@sensoria.com>
Signed-off-by: Bill Gatliff <bgat@billgatliff.com>
Signed-off-by: Jeff Garzik <jgarzik@redhat.com>
---
 MAINTAINERS              |    6 +
 drivers/net/Kconfig      |   14 +
 drivers/net/Makefile     |    1 +
 drivers/net/phy/smsc.c   |   28 +
 drivers/net/smsc911x.c   | 2091 ++++++++++++++++++++++++++++++++++++++++++++++
 drivers/net/smsc911x.h   |  394 +++++++++
 include/linux/smsc911x.h |   42 +
 7 files changed, 2576 insertions(+)
 create mode 100644 drivers/net/smsc911x.c
 create mode 100644 drivers/net/smsc911x.h
 create mode 100644 include/linux/smsc911x.h

(limited to 'include')

diff --git a/MAINTAINERS b/MAINTAINERS
index 129117fe6490..5d951f3db018 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -3853,6 +3853,12 @@ M:	mhoffman@lightlink.com
 L:	lm-sensors@lm-sensors.org
 S:	Maintained
 
+SMSC911x ETHERNET DRIVER
+P:	Steve Glendinning
+M:	steve.glendinning@smsc.com
+L:	netdev@vger.kernel.org
+S:	Supported
+
 SMX UIO Interface
 P:	Ben Nizette
 M:	bn@niasdigital.com
diff --git a/drivers/net/Kconfig b/drivers/net/Kconfig
index f1d0a1371695..74a18a7f8f40 100644
--- a/drivers/net/Kconfig
+++ b/drivers/net/Kconfig
@@ -979,6 +979,20 @@ config SMC911X
 	  called smc911x.  If you want to compile it as a module, say M 
 	  here and read <file:Documentation/kbuild/modules.txt>
 
+config SMSC911X
+	tristate "SMSC LAN911x/LAN921x families embedded ethernet support"
+	depends on ARM || SUPERH
+	select CRC32
+	select MII
+	select PHYLIB
+	---help---
+	  Say Y here if you want support for SMSC LAN911x and LAN921x families
+	  of ethernet controllers.
+
+	  To compile this driver as a module, choose M here and read
+	  <file:Documentation/networking/net-modules.txt>. The module
+	  will be called smsc911x.
+
 config NET_VENDOR_RACAL
 	bool "Racal-Interlan (Micom) NI cards"
 	depends on ISA
diff --git a/drivers/net/Makefile b/drivers/net/Makefile
index 657c47b1a6b6..e06829aa75b0 100644
--- a/drivers/net/Makefile
+++ b/drivers/net/Makefile
@@ -219,6 +219,7 @@ obj-$(CONFIG_S2IO) += s2io.o
 obj-$(CONFIG_MYRI10GE) += myri10ge/
 obj-$(CONFIG_SMC91X) += smc91x.o
 obj-$(CONFIG_SMC911X) += smc911x.o
+obj-$(CONFIG_SMSC911X) += smsc911x.o
 obj-$(CONFIG_BFIN_MAC) += bfin_mac.o
 obj-$(CONFIG_DM9000) += dm9000.o
 obj-$(CONFIG_PASEMI_MAC) += pasemi_mac_driver.o
diff --git a/drivers/net/phy/smsc.c b/drivers/net/phy/smsc.c
index 73baa7a3bb0e..c05d38d46350 100644
--- a/drivers/net/phy/smsc.c
+++ b/drivers/net/phy/smsc.c
@@ -126,6 +126,27 @@ static struct phy_driver lan8700_driver = {
 	.driver		= { .owner = THIS_MODULE, }
 };
 
+static struct phy_driver lan911x_int_driver = {
+	.phy_id		= 0x0007c0d0, /* OUI=0x00800f, Model#=0x0d */
+	.phy_id_mask	= 0xfffffff0,
+	.name		= "SMSC LAN911x Internal PHY",
+
+	.features	= (PHY_BASIC_FEATURES | SUPPORTED_Pause
+				| SUPPORTED_Asym_Pause),
+	.flags		= PHY_HAS_INTERRUPT | PHY_HAS_MAGICANEG,
+
+	/* basic functions */
+	.config_aneg	= genphy_config_aneg,
+	.read_status	= genphy_read_status,
+	.config_init	= smsc_phy_config_init,
+
+	/* IRQ related */
+	.ack_interrupt	= smsc_phy_ack_interrupt,
+	.config_intr	= smsc_phy_config_intr,
+
+	.driver		= { .owner = THIS_MODULE, }
+};
+
 static int __init smsc_init(void)
 {
 	int ret;
@@ -142,8 +163,14 @@ static int __init smsc_init(void)
 	if (ret)
 		goto err3;
 
+	ret = phy_driver_register (&lan911x_int_driver);
+	if (ret)
+		goto err4;
+
 	return 0;
 
+err4:
+	phy_driver_unregister (&lan8700_driver);
 err3:
 	phy_driver_unregister (&lan8187_driver);
 err2:
@@ -154,6 +181,7 @@ err1:
 
 static void __exit smsc_exit(void)
 {
+	phy_driver_unregister (&lan911x_int_driver);
 	phy_driver_unregister (&lan8700_driver);
 	phy_driver_unregister (&lan8187_driver);
 	phy_driver_unregister (&lan83c185_driver);
diff --git a/drivers/net/smsc911x.c b/drivers/net/smsc911x.c
new file mode 100644
index 000000000000..fe517880fc97
--- /dev/null
+++ b/drivers/net/smsc911x.c
@@ -0,0 +1,2091 @@
+/***************************************************************************
+ *
+ * Copyright (C) 2004-2008 SMSC
+ * Copyright (C) 2005-2008 ARM
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
+ *
+ ***************************************************************************
+ * Rewritten, heavily based on smsc911x simple driver by SMSC.
+ * Partly uses io macros from smc91x.c by Nicolas Pitre
+ *
+ * Supported devices:
+ *   LAN9115, LAN9116, LAN9117, LAN9118
+ *   LAN9215, LAN9216, LAN9217, LAN9218
+ *   LAN9210, LAN9211
+ *   LAN9220, LAN9221
+ *
+ */
+
+#include <linux/crc32.h>
+#include <linux/delay.h>
+#include <linux/errno.h>
+#include <linux/etherdevice.h>
+#include <linux/ethtool.h>
+#include <linux/init.h>
+#include <linux/ioport.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/netdevice.h>
+#include <linux/platform_device.h>
+#include <linux/sched.h>
+#include <linux/slab.h>
+#include <linux/timer.h>
+#include <linux/version.h>
+#include <linux/bug.h>
+#include <linux/bitops.h>
+#include <linux/irq.h>
+#include <linux/io.h>
+#include <linux/phy.h>
+#include <linux/smsc911x.h>
+#include "smsc911x.h"
+
+#define SMSC_CHIPNAME		"smsc911x"
+#define SMSC_MDIONAME		"smsc911x-mdio"
+#define SMSC_DRV_VERSION	"2008-10-21"
+
+MODULE_LICENSE("GPL");
+MODULE_VERSION(SMSC_DRV_VERSION);
+
+#if USE_DEBUG > 0
+static int debug = 16;
+#else
+static int debug = 3;
+#endif
+
+module_param(debug, int, 0);
+MODULE_PARM_DESC(debug, "Debug level (0=none,...,16=all)");
+
+struct smsc911x_data {
+	void __iomem *ioaddr;
+
+	unsigned int idrev;
+
+	/* used to decide which workarounds apply */
+	unsigned int generation;
+
+	/* device configuration (copied from platform_data during probe) */
+	unsigned int irq_polarity;
+	unsigned int irq_type;
+	phy_interface_t phy_interface;
+
+	/* This needs to be acquired before calling any of below:
+	 * smsc911x_mac_read(), smsc911x_mac_write()
+	 */
+	spinlock_t mac_lock;
+
+#if (!SMSC_CAN_USE_32BIT)
+	/* spinlock to ensure 16-bit accesses are serialised */
+	spinlock_t dev_lock;
+#endif
+
+	struct phy_device *phy_dev;
+	struct mii_bus *mii_bus;
+	int phy_irq[PHY_MAX_ADDR];
+	unsigned int using_extphy;
+	int last_duplex;
+	int last_carrier;
+
+	u32 msg_enable;
+	unsigned int gpio_setting;
+	unsigned int gpio_orig_setting;
+	struct net_device *dev;
+	struct napi_struct napi;
+
+	unsigned int software_irq_signal;
+
+#ifdef USE_PHY_WORK_AROUND
+#define MIN_PACKET_SIZE (64)
+	char loopback_tx_pkt[MIN_PACKET_SIZE];
+	char loopback_rx_pkt[MIN_PACKET_SIZE];
+	unsigned int resetcount;
+#endif
+
+	/* Members for Multicast filter workaround */
+	unsigned int multicast_update_pending;
+	unsigned int set_bits_mask;
+	unsigned int clear_bits_mask;
+	unsigned int hashhi;
+	unsigned int hashlo;
+};
+
+#if SMSC_CAN_USE_32BIT
+
+static inline u32 smsc911x_reg_read(struct smsc911x_data *pdata, u32 reg)
+{
+	return readl(pdata->ioaddr + reg);
+}
+
+static inline void smsc911x_reg_write(struct smsc911x_data *pdata, u32 reg,
+				      u32 val)
+{
+	writel(val, pdata->ioaddr + reg);
+}
+
+/* Writes a packet to the TX_DATA_FIFO */
+static inline void
+smsc911x_tx_writefifo(struct smsc911x_data *pdata, unsigned int *buf,
+		      unsigned int wordcount)
+{
+	writesl(pdata->ioaddr + TX_DATA_FIFO, buf, wordcount);
+}
+
+/* Reads a packet out of the RX_DATA_FIFO */
+static inline void
+smsc911x_rx_readfifo(struct smsc911x_data *pdata, unsigned int *buf,
+		     unsigned int wordcount)
+{
+	readsl(pdata->ioaddr + RX_DATA_FIFO, buf, wordcount);
+}
+
+#else				/* SMSC_CAN_USE_32BIT */
+
+/* These 16-bit access functions are significantly slower, due to the locking
+ * necessary.  If your bus hardware can be configured to do this for you
+ * (in response to a single 32-bit operation from software), you should use
+ * the 32-bit access functions instead. */
+
+static inline u32 smsc911x_reg_read(struct smsc911x_data *pdata, u32 reg)
+{
+	unsigned long flags;
+	u32 data;
+
+	/* these two 16-bit reads must be performed consecutively, so must
+	 * not be interrupted by our own ISR (which would start another
+	 * read operation) */
+	spin_lock_irqsave(&pdata->dev_lock, flags);
+	data = ((readw(pdata->ioaddr + reg) & 0xFFFF) |
+		((readw(pdata->ioaddr + reg + 2) & 0xFFFF) << 16));
+	spin_unlock_irqrestore(&pdata->dev_lock, flags);
+
+	return data;
+}
+
+static inline void smsc911x_reg_write(struct smsc911x_data *pdata, u32 reg,
+				      u32 val)
+{
+	unsigned long flags;
+
+	/* these two 16-bit writes must be performed consecutively, so must
+	 * not be interrupted by our own ISR (which would start another
+	 * read operation) */
+	spin_lock_irqsave(&pdata->dev_lock, flags);
+	writew(val & 0xFFFF, pdata->ioaddr + reg);
+	writew((val >> 16) & 0xFFFF, pdata->ioaddr + reg + 2);
+	spin_unlock_irqrestore(&pdata->dev_lock, flags);
+}
+
+/* Writes a packet to the TX_DATA_FIFO */
+static inline void
+smsc911x_tx_writefifo(struct smsc911x_data *pdata, unsigned int *buf,
+		      unsigned int wordcount)
+{
+	while (wordcount--)
+		smsc911x_reg_write(pdata, TX_DATA_FIFO, *buf++);
+}
+
+/* Reads a packet out of the RX_DATA_FIFO */
+static inline void
+smsc911x_rx_readfifo(struct smsc911x_data *pdata, unsigned int *buf,
+		     unsigned int wordcount)
+{
+	while (wordcount--)
+		*buf++ = smsc911x_reg_read(pdata, RX_DATA_FIFO);
+}
+
+#endif				/* SMSC_CAN_USE_32BIT */
+
+/* waits for MAC not busy, with timeout.  Only called by smsc911x_mac_read
+ * and smsc911x_mac_write, so assumes mac_lock is held */
+static int smsc911x_mac_complete(struct smsc911x_data *pdata)
+{
+	int i;
+	u32 val;
+
+	SMSC_ASSERT_MAC_LOCK(pdata);
+
+	for (i = 0; i < 40; i++) {
+		val = smsc911x_reg_read(pdata, MAC_CSR_CMD);
+		if (!(val & MAC_CSR_CMD_CSR_BUSY_))
+			return 0;
+	}
+	SMSC_WARNING(HW, "Timed out waiting for MAC not BUSY. "
+		"MAC_CSR_CMD: 0x%08X", val);
+	return -EIO;
+}
+
+/* Fetches a MAC register value. Assumes mac_lock is acquired */
+static u32 smsc911x_mac_read(struct smsc911x_data *pdata, unsigned int offset)
+{
+	unsigned int temp;
+
+	SMSC_ASSERT_MAC_LOCK(pdata);
+
+	temp = smsc911x_reg_read(pdata, MAC_CSR_CMD);
+	if (unlikely(temp & MAC_CSR_CMD_CSR_BUSY_)) {
+		SMSC_WARNING(HW, "MAC busy at entry");
+		return 0xFFFFFFFF;
+	}
+
+	/* Send the MAC cmd */
+	smsc911x_reg_write(pdata, MAC_CSR_CMD, ((offset & 0xFF) |
+		MAC_CSR_CMD_CSR_BUSY_ | MAC_CSR_CMD_R_NOT_W_));
+
+	/* Workaround for hardware read-after-write restriction */
+	temp = smsc911x_reg_read(pdata, BYTE_TEST);
+
+	/* Wait for the read to complete */
+	if (likely(smsc911x_mac_complete(pdata) == 0))
+		return smsc911x_reg_read(pdata, MAC_CSR_DATA);
+
+	SMSC_WARNING(HW, "MAC busy after read");
+	return 0xFFFFFFFF;
+}
+
+/* Set a mac register, mac_lock must be acquired before calling */
+static void smsc911x_mac_write(struct smsc911x_data *pdata,
+			       unsigned int offset, u32 val)
+{
+	unsigned int temp;
+
+	SMSC_ASSERT_MAC_LOCK(pdata);
+
+	temp = smsc911x_reg_read(pdata, MAC_CSR_CMD);
+	if (unlikely(temp & MAC_CSR_CMD_CSR_BUSY_)) {
+		SMSC_WARNING(HW,
+			"smsc911x_mac_write failed, MAC busy at entry");
+		return;
+	}
+
+	/* Send data to write */
+	smsc911x_reg_write(pdata, MAC_CSR_DATA, val);
+
+	/* Write the actual data */
+	smsc911x_reg_write(pdata, MAC_CSR_CMD, ((offset & 0xFF) |
+		MAC_CSR_CMD_CSR_BUSY_));
+
+	/* Workaround for hardware read-after-write restriction */
+	temp = smsc911x_reg_read(pdata, BYTE_TEST);
+
+	/* Wait for the write to complete */
+	if (likely(smsc911x_mac_complete(pdata) == 0))
+		return;
+
+	SMSC_WARNING(HW,
+		"smsc911x_mac_write failed, MAC busy after write");
+}
+
+/* Get a phy register */
+static int smsc911x_mii_read(struct mii_bus *bus, int phyaddr, int regidx)
+{
+	struct smsc911x_data *pdata = (struct smsc911x_data *)bus->priv;
+	unsigned long flags;
+	unsigned int addr;
+	int i, reg;
+
+	spin_lock_irqsave(&pdata->mac_lock, flags);
+
+	/* Confirm MII not busy */
+	if (unlikely(smsc911x_mac_read(pdata, MII_ACC) & MII_ACC_MII_BUSY_)) {
+		SMSC_WARNING(HW,
+			"MII is busy in smsc911x_mii_read???");
+		reg = -EIO;
+		goto out;
+	}
+
+	/* Set the address, index & direction (read from PHY) */
+	addr = ((phyaddr & 0x1F) << 11) | ((regidx & 0x1F) << 6);
+	smsc911x_mac_write(pdata, MII_ACC, addr);
+
+	/* Wait for read to complete w/ timeout */
+	for (i = 0; i < 100; i++)
+		if (!(smsc911x_mac_read(pdata, MII_ACC) & MII_ACC_MII_BUSY_)) {
+			reg = smsc911x_mac_read(pdata, MII_DATA);
+			goto out;
+		}
+
+	SMSC_WARNING(HW, "Timed out waiting for MII write to finish");
+	reg = -EIO;
+
+out:
+	spin_unlock_irqrestore(&pdata->mac_lock, flags);
+	return reg;
+}
+
+/* Set a phy register */
+static int smsc911x_mii_write(struct mii_bus *bus, int phyaddr, int regidx,
+			   u16 val)
+{
+	struct smsc911x_data *pdata = (struct smsc911x_data *)bus->priv;
+	unsigned long flags;
+	unsigned int addr;
+	int i, reg;
+
+	spin_lock_irqsave(&pdata->mac_lock, flags);
+
+	/* Confirm MII not busy */
+	if (unlikely(smsc911x_mac_read(pdata, MII_ACC) & MII_ACC_MII_BUSY_)) {
+		SMSC_WARNING(HW,
+			"MII is busy in smsc911x_mii_write???");
+		reg = -EIO;
+		goto out;
+	}
+
+	/* Put the data to write in the MAC */
+	smsc911x_mac_write(pdata, MII_DATA, val);
+
+	/* Set the address, index & direction (write to PHY) */
+	addr = ((phyaddr & 0x1F) << 11) | ((regidx & 0x1F) << 6) |
+		MII_ACC_MII_WRITE_;
+	smsc911x_mac_write(pdata, MII_ACC, addr);
+
+	/* Wait for write to complete w/ timeout */
+	for (i = 0; i < 100; i++)
+		if (!(smsc911x_mac_read(pdata, MII_ACC) & MII_ACC_MII_BUSY_)) {
+			reg = 0;
+			goto out;
+		}
+
+	SMSC_WARNING(HW, "Timed out waiting for MII write to finish");
+	reg = -EIO;
+
+out:
+	spin_unlock_irqrestore(&pdata->mac_lock, flags);
+	return reg;
+}
+
+/* Autodetects and initialises external phy for SMSC9115 and SMSC9117 flavors.
+ * If something goes wrong, returns -ENODEV to revert back to internal phy.
+ * Performed at initialisation only, so interrupts are enabled */
+static int smsc911x_phy_initialise_external(struct smsc911x_data *pdata)
+{
+	unsigned int hwcfg = smsc911x_reg_read(pdata, HW_CFG);
+
+	/* External phy is requested, supported, and detected */
+	if (hwcfg & HW_CFG_EXT_PHY_DET_) {
+
+		/* Switch to external phy. Assuming tx and rx are stopped
+		 * because smsc911x_phy_initialise is called before
+		 * smsc911x_rx_initialise and tx_initialise. */
+
+		/* Disable phy clocks to the MAC */
+		hwcfg &= (~HW_CFG_PHY_CLK_SEL_);
+		hwcfg |= HW_CFG_PHY_CLK_SEL_CLK_DIS_;
+		smsc911x_reg_write(pdata, HW_CFG, hwcfg);
+		udelay(10);	/* Enough time for clocks to stop */
+
+		/* Switch to external phy */
+		hwcfg |= HW_CFG_EXT_PHY_EN_;
+		smsc911x_reg_write(pdata, HW_CFG, hwcfg);
+
+		/* Enable phy clocks to the MAC */
+		hwcfg &= (~HW_CFG_PHY_CLK_SEL_);
+		hwcfg |= HW_CFG_PHY_CLK_SEL_EXT_PHY_;
+		smsc911x_reg_write(pdata, HW_CFG, hwcfg);
+		udelay(10);	/* Enough time for clocks to restart */
+
+		hwcfg |= HW_CFG_SMI_SEL_;
+		smsc911x_reg_write(pdata, HW_CFG, hwcfg);
+
+		SMSC_TRACE(HW, "Successfully switched to external PHY");
+		pdata->using_extphy = 1;
+	} else {
+		SMSC_WARNING(HW, "No external PHY detected, "
+			"Using internal PHY instead.");
+		/* Use internal phy */
+		return -ENODEV;
+	}
+	return 0;
+}
+
+/* Fetches a tx status out of the status fifo */
+static unsigned int smsc911x_tx_get_txstatus(struct smsc911x_data *pdata)
+{
+	unsigned int result =
+	    smsc911x_reg_read(pdata, TX_FIFO_INF) & TX_FIFO_INF_TSUSED_;
+
+	if (result != 0)
+		result = smsc911x_reg_read(pdata, TX_STATUS_FIFO);
+
+	return result;
+}
+
+/* Fetches the next rx status */
+static unsigned int smsc911x_rx_get_rxstatus(struct smsc911x_data *pdata)
+{
+	unsigned int result =
+	    smsc911x_reg_read(pdata, RX_FIFO_INF) & RX_FIFO_INF_RXSUSED_;
+
+	if (result != 0)
+		result = smsc911x_reg_read(pdata, RX_STATUS_FIFO);
+
+	return result;
+}
+
+#ifdef USE_PHY_WORK_AROUND
+static int smsc911x_phy_check_loopbackpkt(struct smsc911x_data *pdata)
+{
+	unsigned int tries;
+	u32 wrsz;
+	u32 rdsz;
+	ulong bufp;
+
+	for (tries = 0; tries < 10; tries++) {
+		unsigned int txcmd_a;
+		unsigned int txcmd_b;
+		unsigned int status;
+		unsigned int pktlength;
+		unsigned int i;
+
+		/* Zero-out rx packet memory */
+		memset(pdata->loopback_rx_pkt, 0, MIN_PACKET_SIZE);
+
+		/* Write tx packet to 118 */
+		txcmd_a = (u32)((ulong)pdata->loopback_tx_pkt & 0x03) << 16;
+		txcmd_a |= TX_CMD_A_FIRST_SEG_ | TX_CMD_A_LAST_SEG_;
+		txcmd_a |= MIN_PACKET_SIZE;
+
+		txcmd_b = MIN_PACKET_SIZE << 16 | MIN_PACKET_SIZE;
+
+		smsc911x_reg_write(pdata, TX_DATA_FIFO, txcmd_a);
+		smsc911x_reg_write(pdata, TX_DATA_FIFO, txcmd_b);
+
+		bufp = (ulong)pdata->loopback_tx_pkt & (~0x3);
+		wrsz = MIN_PACKET_SIZE + 3;
+		wrsz += (u32)((ulong)pdata->loopback_tx_pkt & 0x3);
+		wrsz >>= 2;
+
+		smsc911x_tx_writefifo(pdata, (unsigned int *)bufp, wrsz);
+
+		/* Wait till transmit is done */
+		i = 60;
+		do {
+			udelay(5);
+			status = smsc911x_tx_get_txstatus(pdata);
+		} while ((i--) && (!status));
+
+		if (!status) {
+			SMSC_WARNING(HW, "Failed to transmit "
+				"during loopback test");
+			continue;
+		}
+		if (status & TX_STS_ES_) {
+			SMSC_WARNING(HW, "Transmit encountered "
+				"errors during loopback test");
+			continue;
+		}
+
+		/* Wait till receive is done */
+		i = 60;
+		do {
+			udelay(5);
+			status = smsc911x_rx_get_rxstatus(pdata);
+		} while ((i--) && (!status));
+
+		if (!status) {
+			SMSC_WARNING(HW,
+				"Failed to receive during loopback test");
+			continue;
+		}
+		if (status & RX_STS_ES_) {
+			SMSC_WARNING(HW, "Receive encountered "
+				"errors during loopback test");
+			continue;
+		}
+
+		pktlength = ((status & 0x3FFF0000UL) >> 16);
+		bufp = (ulong)pdata->loopback_rx_pkt;
+		rdsz = pktlength + 3;
+		rdsz += (u32)((ulong)pdata->loopback_rx_pkt & 0x3);
+		rdsz >>= 2;
+
+		smsc911x_rx_readfifo(pdata, (unsigned int *)bufp, rdsz);
+
+		if (pktlength != (MIN_PACKET_SIZE + 4)) {
+			SMSC_WARNING(HW, "Unexpected packet size "
+				"during loop back test, size=%d, will retry",
+				pktlength);
+		} else {
+			unsigned int j;
+			int mismatch = 0;
+			for (j = 0; j < MIN_PACKET_SIZE; j++) {
+				if (pdata->loopback_tx_pkt[j]
+				    != pdata->loopback_rx_pkt[j]) {
+					mismatch = 1;
+					break;
+				}
+			}
+			if (!mismatch) {
+				SMSC_TRACE(HW, "Successfully verified "
+					   "loopback packet");
+				return 0;
+			} else {
+				SMSC_WARNING(HW, "Data mismatch "
+					"during loop back test, will retry");
+			}
+		}
+	}
+
+	return -EIO;
+}
+
+static int smsc911x_phy_reset(struct smsc911x_data *pdata)
+{
+	struct phy_device *phy_dev = pdata->phy_dev;
+	unsigned int temp;
+	unsigned int i = 100000;
+
+	BUG_ON(!phy_dev);
+	BUG_ON(!phy_dev->bus);
+
+	SMSC_TRACE(HW, "Performing PHY BCR Reset");
+	smsc911x_mii_write(phy_dev->bus, phy_dev->addr, MII_BMCR, BMCR_RESET);
+	do {
+		msleep(1);
+		temp = smsc911x_mii_read(phy_dev->bus, phy_dev->addr,
+			MII_BMCR);
+	} while ((i--) && (temp & BMCR_RESET));
+
+	if (temp & BMCR_RESET) {
+		SMSC_WARNING(HW, "PHY reset failed to complete.");
+		return -EIO;
+	}
+	/* Extra delay required because the phy may not be completed with
+	* its reset when BMCR_RESET is cleared. Specs say 256 uS is
+	* enough delay but using 1ms here to be safe */
+	msleep(1);
+
+	return 0;
+}
+
+static int smsc911x_phy_loopbacktest(struct net_device *dev)
+{
+	struct smsc911x_data *pdata = netdev_priv(dev);
+	struct phy_device *phy_dev = pdata->phy_dev;
+	int result = -EIO;
+	unsigned int i, val;
+	unsigned long flags;
+
+	/* Initialise tx packet using broadcast destination address */
+	memset(pdata->loopback_tx_pkt, 0xff, ETH_ALEN);
+
+	/* Use incrementing source address */
+	for (i = 6; i < 12; i++)
+		pdata->loopback_tx_pkt[i] = (char)i;
+
+	/* Set length type field */
+	pdata->loopback_tx_pkt[12] = 0x00;
+	pdata->loopback_tx_pkt[13] = 0x00;
+
+	for (i = 14; i < MIN_PACKET_SIZE; i++)
+		pdata->loopback_tx_pkt[i] = (char)i;
+
+	val = smsc911x_reg_read(pdata, HW_CFG);
+	val &= HW_CFG_TX_FIF_SZ_;
+	val |= HW_CFG_SF_;
+	smsc911x_reg_write(pdata, HW_CFG, val);
+
+	smsc911x_reg_write(pdata, TX_CFG, TX_CFG_TX_ON_);
+	smsc911x_reg_write(pdata, RX_CFG,
+		(u32)((ulong)pdata->loopback_rx_pkt & 0x03) << 8);
+
+	for (i = 0; i < 10; i++) {
+		/* Set PHY to 10/FD, no ANEG, and loopback mode */
+		smsc911x_mii_write(phy_dev->bus, phy_dev->addr,	MII_BMCR,
+			BMCR_LOOPBACK | BMCR_FULLDPLX);
+
+		/* Enable MAC tx/rx, FD */
+		spin_lock_irqsave(&pdata->mac_lock, flags);
+		smsc911x_mac_write(pdata, MAC_CR, MAC_CR_FDPX_
+				   | MAC_CR_TXEN_ | MAC_CR_RXEN_);
+		spin_unlock_irqrestore(&pdata->mac_lock, flags);
+
+		if (smsc911x_phy_check_loopbackpkt(pdata) == 0) {
+			result = 0;
+			break;
+		}
+		pdata->resetcount++;
+
+		/* Disable MAC rx */
+		spin_lock_irqsave(&pdata->mac_lock, flags);
+		smsc911x_mac_write(pdata, MAC_CR, 0);
+		spin_unlock_irqrestore(&pdata->mac_lock, flags);
+
+		smsc911x_phy_reset(pdata);
+	}
+
+	/* Disable MAC */
+	spin_lock_irqsave(&pdata->mac_lock, flags);
+	smsc911x_mac_write(pdata, MAC_CR, 0);
+	spin_unlock_irqrestore(&pdata->mac_lock, flags);
+
+	/* Cancel PHY loopback mode */
+	smsc911x_mii_write(phy_dev->bus, phy_dev->addr, MII_BMCR, 0);
+
+	smsc911x_reg_write(pdata, TX_CFG, 0);
+	smsc911x_reg_write(pdata, RX_CFG, 0);
+
+	return result;
+}
+#endif				/* USE_PHY_WORK_AROUND */
+
+static u8 smsc95xx_resolve_flowctrl_fulldplx(u16 lcladv, u16 rmtadv)
+{
+	u8 cap = 0;
+
+	if (lcladv & ADVERTISE_PAUSE_CAP) {
+		if (lcladv & ADVERTISE_PAUSE_ASYM) {
+			if (rmtadv & LPA_PAUSE_CAP)
+				cap = FLOW_CTRL_TX | FLOW_CTRL_RX;
+			else if (rmtadv & LPA_PAUSE_ASYM)
+				cap = FLOW_CTRL_RX;
+		} else {
+			if (rmtadv & LPA_PAUSE_CAP)
+				cap = FLOW_CTRL_TX | FLOW_CTRL_RX;
+		}
+	} else if (lcladv & ADVERTISE_PAUSE_ASYM) {
+		if ((rmtadv & LPA_PAUSE_CAP) && (rmtadv & LPA_PAUSE_ASYM))
+			cap = FLOW_CTRL_TX;
+	}
+
+	return cap;
+}
+
+static void smsc911x_phy_update_flowcontrol(struct smsc911x_data *pdata)
+{
+	struct phy_device *phy_dev = pdata->phy_dev;
+	u32 afc = smsc911x_reg_read(pdata, AFC_CFG);
+	u32 flow;
+	unsigned long flags;
+
+	if (phy_dev->duplex == DUPLEX_FULL) {
+		u16 lcladv = phy_read(phy_dev, MII_ADVERTISE);
+		u16 rmtadv = phy_read(phy_dev, MII_LPA);
+		u8 cap = smsc95xx_resolve_flowctrl_fulldplx(lcladv, rmtadv);
+
+		if (cap & FLOW_CTRL_RX)
+			flow = 0xFFFF0002;
+		else
+			flow = 0;
+
+		if (cap & FLOW_CTRL_TX)
+			afc |= 0xF;
+		else
+			afc &= ~0xF;
+
+		SMSC_TRACE(HW, "rx pause %s, tx pause %s",
+			(cap & FLOW_CTRL_RX ? "enabled" : "disabled"),
+			(cap & FLOW_CTRL_TX ? "enabled" : "disabled"));
+	} else {
+		SMSC_TRACE(HW, "half duplex");
+		flow = 0;
+		afc |= 0xF;
+	}
+
+	spin_lock_irqsave(&pdata->mac_lock, flags);
+	smsc911x_mac_write(pdata, FLOW, flow);
+	spin_unlock_irqrestore(&pdata->mac_lock, flags);
+
+	smsc911x_reg_write(pdata, AFC_CFG, afc);
+}
+
+/* Update link mode if anything has changed.  Called periodically when the
+ * PHY is in polling mode, even if nothing has changed. */
+static void smsc911x_phy_adjust_link(struct net_device *dev)
+{
+	struct smsc911x_data *pdata = netdev_priv(dev);
+	struct phy_device *phy_dev = pdata->phy_dev;
+	unsigned long flags;
+	int carrier;
+
+	if (phy_dev->duplex != pdata->last_duplex) {
+		unsigned int mac_cr;
+		SMSC_TRACE(HW, "duplex state has changed");
+
+		spin_lock_irqsave(&pdata->mac_lock, flags);
+		mac_cr = smsc911x_mac_read(pdata, MAC_CR);
+		if (phy_dev->duplex) {
+			SMSC_TRACE(HW,
+				"configuring for full duplex mode");
+			mac_cr |= MAC_CR_FDPX_;
+		} else {
+			SMSC_TRACE(HW,
+				"configuring for half duplex mode");
+			mac_cr &= ~MAC_CR_FDPX_;
+		}
+		smsc911x_mac_write(pdata, MAC_CR, mac_cr);
+		spin_unlock_irqrestore(&pdata->mac_lock, flags);
+
+		smsc911x_phy_update_flowcontrol(pdata);
+		pdata->last_duplex = phy_dev->duplex;
+	}
+
+	carrier = netif_carrier_ok(dev);
+	if (carrier != pdata->last_carrier) {
+		SMSC_TRACE(HW, "carrier state has changed");
+		if (carrier) {
+			SMSC_TRACE(HW, "configuring for carrier OK");
+			if ((pdata->gpio_orig_setting & GPIO_CFG_LED1_EN_) &&
+			    (!pdata->using_extphy)) {
+				/* Restore orginal GPIO configuration */
+				pdata->gpio_setting = pdata->gpio_orig_setting;
+				smsc911x_reg_write(pdata, GPIO_CFG,
+					pdata->gpio_setting);
+			}
+		} else {
+			SMSC_TRACE(HW, "configuring for no carrier");
+			/* Check global setting that LED1
+			 * usage is 10/100 indicator */
+			pdata->gpio_setting = smsc911x_reg_read(pdata,
+				GPIO_CFG);
+			if ((pdata->gpio_setting & GPIO_CFG_LED1_EN_)
+			    && (!pdata->using_extphy)) {
+				/* Force 10/100 LED off, after saving
+				 * orginal GPIO configuration */
+				pdata->gpio_orig_setting = pdata->gpio_setting;
+
+				pdata->gpio_setting &= ~GPIO_CFG_LED1_EN_;
+				pdata->gpio_setting |= (GPIO_CFG_GPIOBUF0_
+							| GPIO_CFG_GPIODIR0_
+							| GPIO_CFG_GPIOD0_);
+				smsc911x_reg_write(pdata, GPIO_CFG,
+					pdata->gpio_setting);
+			}
+		}
+		pdata->last_carrier = carrier;
+	}
+}
+
+static int smsc911x_mii_probe(struct net_device *dev)
+{
+	struct smsc911x_data *pdata = netdev_priv(dev);
+	struct phy_device *phydev = NULL;
+	int phy_addr;
+
+	/* find the first phy */
+	for (phy_addr = 0; phy_addr < PHY_MAX_ADDR; phy_addr++) {
+		if (pdata->mii_bus->phy_map[phy_addr]) {
+			phydev = pdata->mii_bus->phy_map[phy_addr];
+			SMSC_TRACE(PROBE, "PHY %d: addr %d, phy_id 0x%08X",
+				phy_addr, phydev->addr, phydev->phy_id);
+			break;
+		}
+	}
+
+	if (!phydev) {
+		pr_err("%s: no PHY found\n", dev->name);
+		return -ENODEV;
+	}
+
+	phydev = phy_connect(dev, phydev->dev.bus_id,
+		&smsc911x_phy_adjust_link, 0, pdata->phy_interface);
+
+	if (IS_ERR(phydev)) {
+		pr_err("%s: Could not attach to PHY\n", dev->name);
+		return PTR_ERR(phydev);
+	}
+
+	pr_info("%s: attached PHY driver [%s] (mii_bus:phy_addr=%s, irq=%d)\n",
+		dev->name, phydev->drv->name, phydev->dev.bus_id, phydev->irq);
+
+	/* mask with MAC supported features */
+	phydev->supported &= (PHY_BASIC_FEATURES | SUPPORTED_Pause |
+			      SUPPORTED_Asym_Pause);
+	phydev->advertising = phydev->supported;
+
+	pdata->phy_dev = phydev;
+	pdata->last_duplex = -1;
+	pdata->last_carrier = -1;
+
+#ifdef USE_PHY_WORK_AROUND
+	if (smsc911x_phy_loopbacktest(dev) < 0) {
+		SMSC_WARNING(HW, "Failed Loop Back Test");
+		return -ENODEV;
+	}
+	SMSC_TRACE(HW, "Passed Loop Back Test");
+#endif				/* USE_PHY_WORK_AROUND */
+
+	SMSC_TRACE(HW, "phy initialised succesfully");
+	return 0;
+}
+
+static int __devinit smsc911x_mii_init(struct platform_device *pdev,
+				       struct net_device *dev)
+{
+	struct smsc911x_data *pdata = netdev_priv(dev);
+	int err = -ENXIO, i;
+
+	pdata->mii_bus = mdiobus_alloc();
+	if (!pdata->mii_bus) {
+		err = -ENOMEM;
+		goto err_out_1;
+	}
+
+	pdata->mii_bus->name = SMSC_MDIONAME;
+	snprintf(pdata->mii_bus->id, MII_BUS_ID_SIZE, "%x", pdev->id);
+	pdata->mii_bus->priv = pdata;
+	pdata->mii_bus->read = smsc911x_mii_read;
+	pdata->mii_bus->write = smsc911x_mii_write;
+	pdata->mii_bus->irq = pdata->phy_irq;
+	for (i = 0; i < PHY_MAX_ADDR; ++i)
+		pdata->mii_bus->irq[i] = PHY_POLL;
+
+	pdata->mii_bus->parent = &pdev->dev;
+	dev_set_drvdata(&pdev->dev, &pdata->mii_bus);
+
+	pdata->using_extphy = 0;
+
+	switch (pdata->idrev & 0xFFFF0000) {
+	case 0x01170000:
+	case 0x01150000:
+	case 0x117A0000:
+	case 0x115A0000:
+		/* External PHY supported, try to autodetect */
+		if (smsc911x_phy_initialise_external(pdata) < 0) {
+			SMSC_TRACE(HW, "No external PHY detected, "
+				"using internal PHY");
+		}
+		break;
+	default:
+		SMSC_TRACE(HW, "External PHY is not supported, "
+			"using internal PHY");
+		break;
+	}
+
+	if (!pdata->using_extphy) {
+		/* Mask all PHYs except ID 1 (internal) */
+		pdata->mii_bus->phy_mask = ~(1 << 1);
+	}
+
+	if (mdiobus_register(pdata->mii_bus)) {
+		SMSC_WARNING(PROBE, "Error registering mii bus");
+		goto err_out_free_bus_2;
+	}
+
+	if (smsc911x_mii_probe(dev) < 0) {
+		SMSC_WARNING(PROBE, "Error registering mii bus");
+		goto err_out_unregister_bus_3;
+	}
+
+	return 0;
+
+err_out_unregister_bus_3:
+	mdiobus_unregister(pdata->mii_bus);
+err_out_free_bus_2:
+	mdiobus_free(pdata->mii_bus);
+err_out_1:
+	return err;
+}
+
+/* Gets the number of tx statuses in the fifo */
+static unsigned int smsc911x_tx_get_txstatcount(struct smsc911x_data *pdata)
+{
+	return (smsc911x_reg_read(pdata, TX_FIFO_INF)
+		& TX_FIFO_INF_TSUSED_) >> 16;
+}
+
+/* Reads tx statuses and increments counters where necessary */
+static void smsc911x_tx_update_txcounters(struct net_device *dev)
+{
+	struct smsc911x_data *pdata = netdev_priv(dev);
+	unsigned int tx_stat;
+
+	while ((tx_stat = smsc911x_tx_get_txstatus(pdata)) != 0) {
+		if (unlikely(tx_stat & 0x80000000)) {
+			/* In this driver the packet tag is used as the packet
+			 * length. Since a packet length can never reach the
+			 * size of 0x8000, this bit is reserved. It is worth
+			 * noting that the "reserved bit" in the warning above
+			 * does not reference a hardware defined reserved bit
+			 * but rather a driver defined one.
+			 */
+			SMSC_WARNING(HW,
+				"Packet tag reserved bit is high");
+		} else {
+			if (unlikely(tx_stat & 0x00008000)) {
+				dev->stats.tx_errors++;
+			} else {
+				dev->stats.tx_packets++;
+				dev->stats.tx_bytes += (tx_stat >> 16);
+			}
+			if (unlikely(tx_stat & 0x00000100)) {
+				dev->stats.collisions += 16;
+				dev->stats.tx_aborted_errors += 1;
+			} else {
+				dev->stats.collisions +=
+				    ((tx_stat >> 3) & 0xF);
+			}
+			if (unlikely(tx_stat & 0x00000800))
+				dev->stats.tx_carrier_errors += 1;
+			if (unlikely(tx_stat & 0x00000200)) {
+				dev->stats.collisions++;
+				dev->stats.tx_aborted_errors++;
+			}
+		}
+	}
+}
+
+/* Increments the Rx error counters */
+static void
+smsc911x_rx_counterrors(struct net_device *dev, unsigned int rxstat)
+{
+	int crc_err = 0;
+
+	if (unlikely(rxstat & 0x00008000)) {
+		dev->stats.rx_errors++;
+		if (unlikely(rxstat & 0x00000002)) {
+			dev->stats.rx_crc_errors++;
+			crc_err = 1;
+		}
+	}
+	if (likely(!crc_err)) {
+		if (unlikely((rxstat & 0x00001020) == 0x00001020)) {
+			/* Frame type indicates length,
+			 * and length error is set */
+			dev->stats.rx_length_errors++;
+		}
+		if (rxstat & RX_STS_MCAST_)
+			dev->stats.multicast++;
+	}
+}
+
+/* Quickly dumps bad packets */
+static void
+smsc911x_rx_fastforward(struct smsc911x_data *pdata, unsigned int pktbytes)
+{
+	unsigned int pktwords = (pktbytes + NET_IP_ALIGN + 3) >> 2;
+
+	if (likely(pktwords >= 4)) {
+		unsigned int timeout = 500;
+		unsigned int val;
+		smsc911x_reg_write(pdata, RX_DP_CTRL, RX_DP_CTRL_RX_FFWD_);
+		do {
+			udelay(1);
+			val = smsc911x_reg_read(pdata, RX_DP_CTRL);
+		} while (timeout-- && (val & RX_DP_CTRL_RX_FFWD_));
+
+		if (unlikely(timeout == 0))
+			SMSC_WARNING(HW, "Timed out waiting for "
+				"RX FFWD to finish, RX_DP_CTRL: 0x%08X", val);
+	} else {
+		unsigned int temp;
+		while (pktwords--)
+			temp = smsc911x_reg_read(pdata, RX_DATA_FIFO);
+	}
+}
+
+/* NAPI poll function */
+static int smsc911x_poll(struct napi_struct *napi, int budget)
+{
+	struct smsc911x_data *pdata =
+		container_of(napi, struct smsc911x_data, napi);
+	struct net_device *dev = pdata->dev;
+	int npackets = 0;
+
+	while (likely(netif_running(dev)) && (npackets < budget)) {
+		unsigned int pktlength;
+		unsigned int pktwords;
+		struct sk_buff *skb;
+		unsigned int rxstat = smsc911x_rx_get_rxstatus(pdata);
+
+		if (!rxstat) {
+			unsigned int temp;
+			/* We processed all packets available.  Tell NAPI it can
+			 * stop polling then re-enable rx interrupts */
+			smsc911x_reg_write(pdata, INT_STS, INT_STS_RSFL_);
+			netif_rx_complete(dev, napi);
+			temp = smsc911x_reg_read(pdata, INT_EN);
+			temp |= INT_EN_RSFL_EN_;
+			smsc911x_reg_write(pdata, INT_EN, temp);
+			break;
+		}
+
+		/* Count packet for NAPI scheduling, even if it has an error.
+		 * Error packets still require cycles to discard */
+		npackets++;
+
+		pktlength = ((rxstat & 0x3FFF0000) >> 16);
+		pktwords = (pktlength + NET_IP_ALIGN + 3) >> 2;
+		smsc911x_rx_counterrors(dev, rxstat);
+
+		if (unlikely(rxstat & RX_STS_ES_)) {
+			SMSC_WARNING(RX_ERR,
+				"Discarding packet with error bit set");
+			/* Packet has an error, discard it and continue with
+			 * the next */
+			smsc911x_rx_fastforward(pdata, pktwords);
+			dev->stats.rx_dropped++;
+			continue;
+		}
+
+		skb = netdev_alloc_skb(dev, pktlength + NET_IP_ALIGN);
+		if (unlikely(!skb)) {
+			SMSC_WARNING(RX_ERR,
+				"Unable to allocate skb for rx packet");
+			/* Drop the packet and stop this polling iteration */
+			smsc911x_rx_fastforward(pdata, pktwords);
+			dev->stats.rx_dropped++;
+			break;
+		}
+
+		skb->data = skb->head;
+		skb_reset_tail_pointer(skb);
+
+		/* Align IP on 16B boundary */
+		skb_reserve(skb, NET_IP_ALIGN);
+		skb_put(skb, pktlength - 4);
+		smsc911x_rx_readfifo(pdata, (unsigned int *)skb->head,
+				     pktwords);
+		skb->protocol = eth_type_trans(skb, dev);
+		skb->ip_summed = CHECKSUM_NONE;
+		netif_receive_skb(skb);
+
+		/* Update counters */
+		dev->stats.rx_packets++;
+		dev->stats.rx_bytes += (pktlength - 4);
+		dev->last_rx = jiffies;
+	}
+
+	/* Return total received packets */
+	return npackets;
+}
+
+/* Returns hash bit number for given MAC address
+ * Example:
+ * 01 00 5E 00 00 01 -> returns bit number 31 */
+static unsigned int smsc911x_hash(char addr[ETH_ALEN])
+{
+	return (ether_crc(ETH_ALEN, addr) >> 26) & 0x3f;
+}
+
+static void smsc911x_rx_multicast_update(struct smsc911x_data *pdata)
+{
+	/* Performs the multicast & mac_cr update.  This is called when
+	 * safe on the current hardware, and with the mac_lock held */
+	unsigned int mac_cr;
+
+	SMSC_ASSERT_MAC_LOCK(pdata);
+
+	mac_cr = smsc911x_mac_read(pdata, MAC_CR);
+	mac_cr |= pdata->set_bits_mask;
+	mac_cr &= ~(pdata->clear_bits_mask);
+	smsc911x_mac_write(pdata, MAC_CR, mac_cr);
+	smsc911x_mac_write(pdata, HASHH, pdata->hashhi);
+	smsc911x_mac_write(pdata, HASHL, pdata->hashlo);
+	SMSC_TRACE(HW, "maccr 0x%08X, HASHH 0x%08X, HASHL 0x%08X",
+		mac_cr, pdata->hashhi, pdata->hashlo);
+}
+
+static void smsc911x_rx_multicast_update_workaround(struct smsc911x_data *pdata)
+{
+	unsigned int mac_cr;
+
+	/* This function is only called for older LAN911x devices
+	 * (revA or revB), where MAC_CR, HASHH and HASHL should not
+	 * be modified during Rx - newer devices immediately update the
+	 * registers.
+	 *
+	 * This is called from interrupt context */
+
+	spin_lock(&pdata->mac_lock);
+
+	/* Check Rx has stopped */
+	if (smsc911x_mac_read(pdata, MAC_CR) & MAC_CR_RXEN_)
+		SMSC_WARNING(DRV, "Rx not stopped");
+
+	/* Perform the update - safe to do now Rx has stopped */
+	smsc911x_rx_multicast_update(pdata);
+
+	/* Re-enable Rx */
+	mac_cr = smsc911x_mac_read(pdata, MAC_CR);
+	mac_cr |= MAC_CR_RXEN_;
+	smsc911x_mac_write(pdata, MAC_CR, mac_cr);
+
+	pdata->multicast_update_pending = 0;
+
+	spin_unlock(&pdata->mac_lock);
+}
+
+static int smsc911x_soft_reset(struct smsc911x_data *pdata)
+{
+	unsigned int timeout;
+	unsigned int temp;
+
+	/* Reset the LAN911x */
+	smsc911x_reg_write(pdata, HW_CFG, HW_CFG_SRST_);
+	timeout = 10;
+	do {
+		udelay(10);
+		temp = smsc911x_reg_read(pdata, HW_CFG);
+	} while ((--timeout) && (temp & HW_CFG_SRST_));
+
+	if (unlikely(temp & HW_CFG_SRST_)) {
+		SMSC_WARNING(DRV, "Failed to complete reset");
+		return -EIO;
+	}
+	return 0;
+}
+
+/* Sets the device MAC address to dev_addr, called with mac_lock held */
+static void
+smsc911x_set_mac_address(struct smsc911x_data *pdata, u8 dev_addr[6])
+{
+	u32 mac_high16 = (dev_addr[5] << 8) | dev_addr[4];
+	u32 mac_low32 = (dev_addr[3] << 24) | (dev_addr[2] << 16) |
+	    (dev_addr[1] << 8) | dev_addr[0];
+
+	SMSC_ASSERT_MAC_LOCK(pdata);
+
+	smsc911x_mac_write(pdata, ADDRH, mac_high16);
+	smsc911x_mac_write(pdata, ADDRL, mac_low32);
+}
+
+static int smsc911x_open(struct net_device *dev)
+{
+	struct smsc911x_data *pdata = netdev_priv(dev);
+	unsigned int timeout;
+	unsigned int temp;
+	unsigned int intcfg;
+
+	/* if the phy is not yet registered, retry later*/
+	if (!pdata->phy_dev) {
+		SMSC_WARNING(HW, "phy_dev is NULL");
+		return -EAGAIN;
+	}
+
+	if (!is_valid_ether_addr(dev->dev_addr)) {
+		SMSC_WARNING(HW, "dev_addr is not a valid MAC address");
+		return -EADDRNOTAVAIL;
+	}
+
+	/* Reset the LAN911x */
+	if (smsc911x_soft_reset(pdata)) {
+		SMSC_WARNING(HW, "soft reset failed");
+		return -EIO;
+	}
+
+	smsc911x_reg_write(pdata, HW_CFG, 0x00050000);
+	smsc911x_reg_write(pdata, AFC_CFG, 0x006E3740);
+
+	/* Make sure EEPROM has finished loading before setting GPIO_CFG */
+	timeout = 50;
+	while ((timeout--) &&
+	       (smsc911x_reg_read(pdata, E2P_CMD) & E2P_CMD_EPC_BUSY_)) {
+		udelay(10);
+	}
+
+	if (unlikely(timeout == 0))
+		SMSC_WARNING(IFUP,
+			"Timed out waiting for EEPROM busy bit to clear");
+
+	smsc911x_reg_write(pdata, GPIO_CFG, 0x70070000);
+
+	/* The soft reset above cleared the device's MAC address,
+	 * restore it from local copy (set in probe) */
+	spin_lock_irq(&pdata->mac_lock);
+	smsc911x_set_mac_address(pdata, dev->dev_addr);
+	spin_unlock_irq(&pdata->mac_lock);
+
+	/* Initialise irqs, but leave all sources disabled */
+	smsc911x_reg_write(pdata, INT_EN, 0);
+	smsc911x_reg_write(pdata, INT_STS, 0xFFFFFFFF);
+
+	/* Set interrupt deassertion to 100uS */
+	intcfg = ((10 << 24) | INT_CFG_IRQ_EN_);
+
+	if (pdata->irq_polarity) {
+		SMSC_TRACE(IFUP, "irq polarity: active high");
+		intcfg |= INT_CFG_IRQ_POL_;
+	} else {
+		SMSC_TRACE(IFUP, "irq polarity: active low");
+	}
+
+	if (pdata->irq_type) {
+		SMSC_TRACE(IFUP, "irq type: push-pull");
+		intcfg |= INT_CFG_IRQ_TYPE_;
+	} else {
+		SMSC_TRACE(IFUP, "irq type: open drain");
+	}
+
+	smsc911x_reg_write(pdata, INT_CFG, intcfg);
+
+	SMSC_TRACE(IFUP, "Testing irq handler using IRQ %d", dev->irq);
+	pdata->software_irq_signal = 0;
+	smp_wmb();
+
+	temp = smsc911x_reg_read(pdata, INT_EN);
+	temp |= INT_EN_SW_INT_EN_;
+	smsc911x_reg_write(pdata, INT_EN, temp);
+
+	timeout = 1000;
+	while (timeout--) {
+		if (pdata->software_irq_signal)
+			break;
+		msleep(1);
+	}
+
+	if (!pdata->software_irq_signal) {
+		dev_warn(&dev->dev, "ISR failed signaling test (IRQ %d)\n",
+			 dev->irq);
+		return -ENODEV;
+	}
+	SMSC_TRACE(IFUP, "IRQ handler passed test using IRQ %d", dev->irq);
+
+	dev_info(&dev->dev, "SMSC911x/921x identified at %#08lx, IRQ: %d\n",
+		 (unsigned long)pdata->ioaddr, dev->irq);
+
+	/* Bring the PHY up */
+	phy_start(pdata->phy_dev);
+
+	temp = smsc911x_reg_read(pdata, HW_CFG);
+	/* Preserve TX FIFO size and external PHY configuration */
+	temp &= (HW_CFG_TX_FIF_SZ_|0x00000FFF);
+	temp |= HW_CFG_SF_;
+	smsc911x_reg_write(pdata, HW_CFG, temp);
+
+	temp = smsc911x_reg_read(pdata, FIFO_INT);
+	temp |= FIFO_INT_TX_AVAIL_LEVEL_;
+	temp &= ~(FIFO_INT_RX_STS_LEVEL_);
+	smsc911x_reg_write(pdata, FIFO_INT, temp);
+
+	/* set RX Data offset to 2 bytes for alignment */
+	smsc911x_reg_write(pdata, RX_CFG, (2 << 8));
+
+	/* enable NAPI polling before enabling RX interrupts */
+	napi_enable(&pdata->napi);
+
+	temp = smsc911x_reg_read(pdata, INT_EN);
+	temp |= (INT_EN_TDFA_EN_ | INT_EN_RSFL_EN_);
+	smsc911x_reg_write(pdata, INT_EN, temp);
+
+	spin_lock_irq(&pdata->mac_lock);
+	temp = smsc911x_mac_read(pdata, MAC_CR);
+	temp |= (MAC_CR_TXEN_ | MAC_CR_RXEN_ | MAC_CR_HBDIS_);
+	smsc911x_mac_write(pdata, MAC_CR, temp);
+	spin_unlock_irq(&pdata->mac_lock);
+
+	smsc911x_reg_write(pdata, TX_CFG, TX_CFG_TX_ON_);
+
+	netif_start_queue(dev);
+	return 0;
+}
+
+/* Entry point for stopping the interface */
+static int smsc911x_stop(struct net_device *dev)
+{
+	struct smsc911x_data *pdata = netdev_priv(dev);
+	unsigned int temp;
+
+	BUG_ON(!pdata->phy_dev);
+
+	/* Disable all device interrupts */
+	temp = smsc911x_reg_read(pdata, INT_CFG);
+	temp &= ~INT_CFG_IRQ_EN_;
+	smsc911x_reg_write(pdata, INT_CFG, temp);
+
+	/* Stop Tx and Rx polling */
+	netif_stop_queue(dev);
+	napi_disable(&pdata->napi);
+
+	/* At this point all Rx and Tx activity is stopped */
+	dev->stats.rx_dropped += smsc911x_reg_read(pdata, RX_DROP);
+	smsc911x_tx_update_txcounters(dev);
+
+	/* Bring the PHY down */
+	phy_stop(pdata->phy_dev);
+
+	SMSC_TRACE(IFDOWN, "Interface stopped");
+	return 0;
+}
+
+/* Entry point for transmitting a packet */
+static int smsc911x_hard_start_xmit(struct sk_buff *skb, struct net_device *dev)
+{
+	struct smsc911x_data *pdata = netdev_priv(dev);
+	unsigned int freespace;
+	unsigned int tx_cmd_a;
+	unsigned int tx_cmd_b;
+	unsigned int temp;
+	u32 wrsz;
+	ulong bufp;
+
+	freespace = smsc911x_reg_read(pdata, TX_FIFO_INF) & TX_FIFO_INF_TDFREE_;
+
+	if (unlikely(freespace < TX_FIFO_LOW_THRESHOLD))
+		SMSC_WARNING(TX_ERR,
+			"Tx data fifo low, space available: %d", freespace);
+
+	/* Word alignment adjustment */
+	tx_cmd_a = (u32)((ulong)skb->data & 0x03) << 16;
+	tx_cmd_a |= TX_CMD_A_FIRST_SEG_ | TX_CMD_A_LAST_SEG_;
+	tx_cmd_a |= (unsigned int)skb->len;
+
+	tx_cmd_b = ((unsigned int)skb->len) << 16;
+	tx_cmd_b |= (unsigned int)skb->len;
+
+	smsc911x_reg_write(pdata, TX_DATA_FIFO, tx_cmd_a);
+	smsc911x_reg_write(pdata, TX_DATA_FIFO, tx_cmd_b);
+
+	bufp = (ulong)skb->data & (~0x3);
+	wrsz = (u32)skb->len + 3;
+	wrsz += (u32)((ulong)skb->data & 0x3);
+	wrsz >>= 2;
+
+	smsc911x_tx_writefifo(pdata, (unsigned int *)bufp, wrsz);
+	freespace -= (skb->len + 32);
+	dev_kfree_skb(skb);
+	dev->trans_start = jiffies;
+
+	if (unlikely(smsc911x_tx_get_txstatcount(pdata) >= 30))
+		smsc911x_tx_update_txcounters(dev);
+
+	if (freespace < TX_FIFO_LOW_THRESHOLD) {
+		netif_stop_queue(dev);
+		temp = smsc911x_reg_read(pdata, FIFO_INT);
+		temp &= 0x00FFFFFF;
+		temp |= 0x32000000;
+		smsc911x_reg_write(pdata, FIFO_INT, temp);
+	}
+
+	return NETDEV_TX_OK;
+}
+
+/* Entry point for getting status counters */
+static struct net_device_stats *smsc911x_get_stats(struct net_device *dev)
+{
+	struct smsc911x_data *pdata = netdev_priv(dev);
+	smsc911x_tx_update_txcounters(dev);
+	dev->stats.rx_dropped += smsc911x_reg_read(pdata, RX_DROP);
+	return &dev->stats;
+}
+
+/* Entry point for setting addressing modes */
+static void smsc911x_set_multicast_list(struct net_device *dev)
+{
+	struct smsc911x_data *pdata = netdev_priv(dev);
+	unsigned long flags;
+
+	if (dev->flags & IFF_PROMISC) {
+		/* Enabling promiscuous mode */
+		pdata->set_bits_mask = MAC_CR_PRMS_;
+		pdata->clear_bits_mask = (MAC_CR_MCPAS_ | MAC_CR_HPFILT_);
+		pdata->hashhi = 0;
+		pdata->hashlo = 0;
+	} else if (dev->flags & IFF_ALLMULTI) {
+		/* Enabling all multicast mode */
+		pdata->set_bits_mask = MAC_CR_MCPAS_;
+		pdata->clear_bits_mask = (MAC_CR_PRMS_ | MAC_CR_HPFILT_);
+		pdata->hashhi = 0;
+		pdata->hashlo = 0;
+	} else if (dev->mc_count > 0) {
+		/* Enabling specific multicast addresses */
+		unsigned int hash_high = 0;
+		unsigned int hash_low = 0;
+		unsigned int count = 0;
+		struct dev_mc_list *mc_list = dev->mc_list;
+
+		pdata->set_bits_mask = MAC_CR_HPFILT_;
+		pdata->clear_bits_mask = (MAC_CR_PRMS_ | MAC_CR_MCPAS_);
+
+		while (mc_list) {
+			count++;
+			if ((mc_list->dmi_addrlen) == ETH_ALEN) {
+				unsigned int bitnum =
+				    smsc911x_hash(mc_list->dmi_addr);
+				unsigned int mask = 0x01 << (bitnum & 0x1F);
+				if (bitnum & 0x20)
+					hash_high |= mask;
+				else
+					hash_low |= mask;
+			} else {
+				SMSC_WARNING(DRV, "dmi_addrlen != 6");
+			}
+			mc_list = mc_list->next;
+		}
+		if (count != (unsigned int)dev->mc_count)
+			SMSC_WARNING(DRV, "mc_count != dev->mc_count");
+
+		pdata->hashhi = hash_high;
+		pdata->hashlo = hash_low;
+	} else {
+		/* Enabling local MAC address only */
+		pdata->set_bits_mask = 0;
+		pdata->clear_bits_mask =
+		    (MAC_CR_PRMS_ | MAC_CR_MCPAS_ | MAC_CR_HPFILT_);
+		pdata->hashhi = 0;
+		pdata->hashlo = 0;
+	}
+
+	spin_lock_irqsave(&pdata->mac_lock, flags);
+
+	if (pdata->generation <= 1) {
+		/* Older hardware revision - cannot change these flags while
+		 * receiving data */
+		if (!pdata->multicast_update_pending) {
+			unsigned int temp;
+			SMSC_TRACE(HW, "scheduling mcast update");
+			pdata->multicast_update_pending = 1;
+
+			/* Request the hardware to stop, then perform the
+			 * update when we get an RX_STOP interrupt */
+			smsc911x_reg_write(pdata, INT_STS, INT_STS_RXSTOP_INT_);
+			temp = smsc911x_reg_read(pdata, INT_EN);
+			temp |= INT_EN_RXSTOP_INT_EN_;
+			smsc911x_reg_write(pdata, INT_EN, temp);
+
+			temp = smsc911x_mac_read(pdata, MAC_CR);
+			temp &= ~(MAC_CR_RXEN_);
+			smsc911x_mac_write(pdata, MAC_CR, temp);
+		} else {
+			/* There is another update pending, this should now
+			 * use the newer values */
+		}
+	} else {
+		/* Newer hardware revision - can write immediately */
+		smsc911x_rx_multicast_update(pdata);
+	}
+
+	spin_unlock_irqrestore(&pdata->mac_lock, flags);
+}
+
+static irqreturn_t smsc911x_irqhandler(int irq, void *dev_id)
+{
+	struct net_device *dev = dev_id;
+	struct smsc911x_data *pdata = netdev_priv(dev);
+	u32 intsts = smsc911x_reg_read(pdata, INT_STS);
+	u32 inten = smsc911x_reg_read(pdata, INT_EN);
+	int serviced = IRQ_NONE;
+	u32 temp;
+
+	if (unlikely(intsts & inten & INT_STS_SW_INT_)) {
+		temp = smsc911x_reg_read(pdata, INT_EN);
+		temp &= (~INT_EN_SW_INT_EN_);
+		smsc911x_reg_write(pdata, INT_EN, temp);
+		smsc911x_reg_write(pdata, INT_STS, INT_STS_SW_INT_);
+		pdata->software_irq_signal = 1;
+		smp_wmb();
+		serviced = IRQ_HANDLED;
+	}
+
+	if (unlikely(intsts & inten & INT_STS_RXSTOP_INT_)) {
+		/* Called when there is a multicast update scheduled and
+		 * it is now safe to complete the update */
+		SMSC_TRACE(INTR, "RX Stop interrupt");
+		temp = smsc911x_reg_read(pdata, INT_EN);
+		temp &= (~INT_EN_RXSTOP_INT_EN_);
+		smsc911x_reg_write(pdata, INT_EN, temp);
+		smsc911x_reg_write(pdata, INT_STS, INT_STS_RXSTOP_INT_);
+		smsc911x_rx_multicast_update_workaround(pdata);
+		serviced = IRQ_HANDLED;
+	}
+
+	if (intsts & inten & INT_STS_TDFA_) {
+		temp = smsc911x_reg_read(pdata, FIFO_INT);
+		temp |= FIFO_INT_TX_AVAIL_LEVEL_;
+		smsc911x_reg_write(pdata, FIFO_INT, temp);
+		smsc911x_reg_write(pdata, INT_STS, INT_STS_TDFA_);
+		netif_wake_queue(dev);
+		serviced = IRQ_HANDLED;
+	}
+
+	if (unlikely(intsts & inten & INT_STS_RXE_)) {
+		SMSC_TRACE(INTR, "RX Error interrupt");
+		smsc911x_reg_write(pdata, INT_STS, INT_STS_RXE_);
+		serviced = IRQ_HANDLED;
+	}
+
+	if (likely(intsts & inten & INT_STS_RSFL_)) {
+		if (likely(netif_rx_schedule_prep(dev, &pdata->napi))) {
+			/* Disable Rx interrupts */
+			temp = smsc911x_reg_read(pdata, INT_EN);
+			temp &= (~INT_EN_RSFL_EN_);
+			smsc911x_reg_write(pdata, INT_EN, temp);
+			/* Schedule a NAPI poll */
+			__netif_rx_schedule(dev, &pdata->napi);
+		} else {
+			SMSC_WARNING(RX_ERR,
+				"netif_rx_schedule_prep failed");
+		}
+		serviced = IRQ_HANDLED;
+	}
+
+	return serviced;
+}
+
+#ifdef CONFIG_NET_POLL_CONTROLLER
+void smsc911x_poll_controller(struct net_device *dev)
+{
+	disable_irq(dev->irq);
+	smsc911x_irqhandler(0, dev);
+	enable_irq(dev->irq);
+}
+#endif				/* CONFIG_NET_POLL_CONTROLLER */
+
+/* Standard ioctls for mii-tool */
+static int smsc911x_do_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
+{
+	struct smsc911x_data *pdata = netdev_priv(dev);
+
+	if (!netif_running(dev) || !pdata->phy_dev)
+		return -EINVAL;
+
+	return phy_mii_ioctl(pdata->phy_dev, if_mii(ifr), cmd);
+}
+
+static int
+smsc911x_ethtool_getsettings(struct net_device *dev, struct ethtool_cmd *cmd)
+{
+	struct smsc911x_data *pdata = netdev_priv(dev);
+
+	cmd->maxtxpkt = 1;
+	cmd->maxrxpkt = 1;
+	return phy_ethtool_gset(pdata->phy_dev, cmd);
+}
+
+static int
+smsc911x_ethtool_setsettings(struct net_device *dev, struct ethtool_cmd *cmd)
+{
+	struct smsc911x_data *pdata = netdev_priv(dev);
+
+	return phy_ethtool_sset(pdata->phy_dev, cmd);
+}
+
+static void smsc911x_ethtool_getdrvinfo(struct net_device *dev,
+					struct ethtool_drvinfo *info)
+{
+	strlcpy(info->driver, SMSC_CHIPNAME, sizeof(info->driver));
+	strlcpy(info->version, SMSC_DRV_VERSION, sizeof(info->version));
+	strlcpy(info->bus_info, dev->dev.parent->bus_id,
+		sizeof(info->bus_info));
+}
+
+static int smsc911x_ethtool_nwayreset(struct net_device *dev)
+{
+	struct smsc911x_data *pdata = netdev_priv(dev);
+
+	return phy_start_aneg(pdata->phy_dev);
+}
+
+static u32 smsc911x_ethtool_getmsglevel(struct net_device *dev)
+{
+	struct smsc911x_data *pdata = netdev_priv(dev);
+	return pdata->msg_enable;
+}
+
+static void smsc911x_ethtool_setmsglevel(struct net_device *dev, u32 level)
+{
+	struct smsc911x_data *pdata = netdev_priv(dev);
+	pdata->msg_enable = level;
+}
+
+static int smsc911x_ethtool_getregslen(struct net_device *dev)
+{
+	return (((E2P_DATA - ID_REV) / 4 + 1) + (WUCSR - MAC_CR) + 1 + 32) *
+	    sizeof(u32);
+}
+
+static void
+smsc911x_ethtool_getregs(struct net_device *dev, struct ethtool_regs *regs,
+			 void *buf)
+{
+	struct smsc911x_data *pdata = netdev_priv(dev);
+	struct phy_device *phy_dev = pdata->phy_dev;
+	unsigned long flags;
+	unsigned int i;
+	unsigned int j = 0;
+	u32 *data = buf;
+
+	regs->version = pdata->idrev;
+	for (i = ID_REV; i <= E2P_DATA; i += (sizeof(u32)))
+		data[j++] = smsc911x_reg_read(pdata, i);
+
+	for (i = MAC_CR; i <= WUCSR; i++) {
+		spin_lock_irqsave(&pdata->mac_lock, flags);
+		data[j++] = smsc911x_mac_read(pdata, i);
+		spin_unlock_irqrestore(&pdata->mac_lock, flags);
+	}
+
+	for (i = 0; i <= 31; i++)
+		data[j++] = smsc911x_mii_read(phy_dev->bus, phy_dev->addr, i);
+}
+
+static void smsc911x_eeprom_enable_access(struct smsc911x_data *pdata)
+{
+	unsigned int temp = smsc911x_reg_read(pdata, GPIO_CFG);
+	temp &= ~GPIO_CFG_EEPR_EN_;
+	smsc911x_reg_write(pdata, GPIO_CFG, temp);
+	msleep(1);
+}
+
+static int smsc911x_eeprom_send_cmd(struct smsc911x_data *pdata, u32 op)
+{
+	int timeout = 100;
+	u32 e2cmd;
+
+	SMSC_TRACE(DRV, "op 0x%08x", op);
+	if (smsc911x_reg_read(pdata, E2P_CMD) & E2P_CMD_EPC_BUSY_) {
+		SMSC_WARNING(DRV, "Busy at start");
+		return -EBUSY;
+	}
+
+	e2cmd = op | E2P_CMD_EPC_BUSY_;
+	smsc911x_reg_write(pdata, E2P_CMD, e2cmd);
+
+	do {
+		msleep(1);
+		e2cmd = smsc911x_reg_read(pdata, E2P_CMD);
+	} while ((e2cmd & E2P_CMD_EPC_BUSY_) && (timeout--));
+
+	if (!timeout) {
+		SMSC_TRACE(DRV, "TIMED OUT");
+		return -EAGAIN;
+	}
+
+	if (e2cmd & E2P_CMD_EPC_TIMEOUT_) {
+		SMSC_TRACE(DRV, "Error occured during eeprom operation");
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+static int smsc911x_eeprom_read_location(struct smsc911x_data *pdata,
+					 u8 address, u8 *data)
+{
+	u32 op = E2P_CMD_EPC_CMD_READ_ | address;
+	int ret;
+
+	SMSC_TRACE(DRV, "address 0x%x", address);
+	ret = smsc911x_eeprom_send_cmd(pdata, op);
+
+	if (!ret)
+		data[address] = smsc911x_reg_read(pdata, E2P_DATA);
+
+	return ret;
+}
+
+static int smsc911x_eeprom_write_location(struct smsc911x_data *pdata,
+					  u8 address, u8 data)
+{
+	u32 op = E2P_CMD_EPC_CMD_ERASE_ | address;
+	int ret;
+
+	SMSC_TRACE(DRV, "address 0x%x, data 0x%x", address, data);
+	ret = smsc911x_eeprom_send_cmd(pdata, op);
+
+	if (!ret) {
+		op = E2P_CMD_EPC_CMD_WRITE_ | address;
+		smsc911x_reg_write(pdata, E2P_DATA, (u32)data);
+		ret = smsc911x_eeprom_send_cmd(pdata, op);
+	}
+
+	return ret;
+}
+
+static int smsc911x_ethtool_get_eeprom_len(struct net_device *dev)
+{
+	return SMSC911X_EEPROM_SIZE;
+}
+
+static int smsc911x_ethtool_get_eeprom(struct net_device *dev,
+				       struct ethtool_eeprom *eeprom, u8 *data)
+{
+	struct smsc911x_data *pdata = netdev_priv(dev);
+	u8 eeprom_data[SMSC911X_EEPROM_SIZE];
+	int len;
+	int i;
+
+	smsc911x_eeprom_enable_access(pdata);
+
+	len = min(eeprom->len, SMSC911X_EEPROM_SIZE);
+	for (i = 0; i < len; i++) {
+		int ret = smsc911x_eeprom_read_location(pdata, i, eeprom_data);
+		if (ret < 0) {
+			eeprom->len = 0;
+			return ret;
+		}
+	}
+
+	memcpy(data, &eeprom_data[eeprom->offset], len);
+	eeprom->len = len;
+	return 0;
+}
+
+static int smsc911x_ethtool_set_eeprom(struct net_device *dev,
+				       struct ethtool_eeprom *eeprom, u8 *data)
+{
+	int ret;
+	struct smsc911x_data *pdata = netdev_priv(dev);
+
+	smsc911x_eeprom_enable_access(pdata);
+	smsc911x_eeprom_send_cmd(pdata, E2P_CMD_EPC_CMD_EWEN_);
+	ret = smsc911x_eeprom_write_location(pdata, eeprom->offset, *data);
+	smsc911x_eeprom_send_cmd(pdata, E2P_CMD_EPC_CMD_EWDS_);
+
+	/* Single byte write, according to man page */
+	eeprom->len = 1;
+
+	return ret;
+}
+
+static struct ethtool_ops smsc911x_ethtool_ops = {
+	.get_settings = smsc911x_ethtool_getsettings,
+	.set_settings = smsc911x_ethtool_setsettings,
+	.get_link = ethtool_op_get_link,
+	.get_drvinfo = smsc911x_ethtool_getdrvinfo,
+	.nway_reset = smsc911x_ethtool_nwayreset,
+	.get_msglevel = smsc911x_ethtool_getmsglevel,
+	.set_msglevel = smsc911x_ethtool_setmsglevel,
+	.get_regs_len = smsc911x_ethtool_getregslen,
+	.get_regs = smsc911x_ethtool_getregs,
+	.get_eeprom_len = smsc911x_ethtool_get_eeprom_len,
+	.get_eeprom = smsc911x_ethtool_get_eeprom,
+	.set_eeprom = smsc911x_ethtool_set_eeprom,
+};
+
+/* Initializing private device structures, only called from probe */
+static int __devinit smsc911x_init(struct net_device *dev)
+{
+	struct smsc911x_data *pdata = netdev_priv(dev);
+	unsigned int byte_test;
+
+	SMSC_TRACE(PROBE, "Driver Parameters:");
+	SMSC_TRACE(PROBE, "LAN base: 0x%08lX",
+		(unsigned long)pdata->ioaddr);
+	SMSC_TRACE(PROBE, "IRQ: %d", dev->irq);
+	SMSC_TRACE(PROBE, "PHY will be autodetected.");
+
+#if (!SMSC_CAN_USE_32BIT)
+	spin_lock_init(&pdata->dev_lock);
+#endif
+
+	if (pdata->ioaddr == 0) {
+		SMSC_WARNING(PROBE, "pdata->ioaddr: 0x00000000");
+		return -ENODEV;
+	}
+
+	/* Check byte ordering */
+	byte_test = smsc911x_reg_read(pdata, BYTE_TEST);
+	SMSC_TRACE(PROBE, "BYTE_TEST: 0x%08X", byte_test);
+	if (byte_test == 0x43218765) {
+		SMSC_TRACE(PROBE, "BYTE_TEST looks swapped, "
+			"applying WORD_SWAP");
+		smsc911x_reg_write(pdata, WORD_SWAP, 0xffffffff);
+
+		/* 1 dummy read of BYTE_TEST is needed after a write to
+		 * WORD_SWAP before its contents are valid */
+		byte_test = smsc911x_reg_read(pdata, BYTE_TEST);
+
+		byte_test = smsc911x_reg_read(pdata, BYTE_TEST);
+	}
+
+	if (byte_test != 0x87654321) {
+		SMSC_WARNING(DRV, "BYTE_TEST: 0x%08X", byte_test);
+		if (((byte_test >> 16) & 0xFFFF) == (byte_test & 0xFFFF)) {
+			SMSC_WARNING(PROBE,
+				"top 16 bits equal to bottom 16 bits");
+			SMSC_TRACE(PROBE, "This may mean the chip is set "
+				"for 32 bit while the bus is reading 16 bit");
+		}
+		return -ENODEV;
+	}
+
+	/* Default generation to zero (all workarounds apply) */
+	pdata->generation = 0;
+
+	pdata->idrev = smsc911x_reg_read(pdata, ID_REV);
+	switch (pdata->idrev & 0xFFFF0000) {
+	case 0x01180000:
+	case 0x01170000:
+	case 0x01160000:
+	case 0x01150000:
+		/* LAN911[5678] family */
+		pdata->generation = pdata->idrev & 0x0000FFFF;
+		break;
+
+	case 0x118A0000:
+	case 0x117A0000:
+	case 0x116A0000:
+	case 0x115A0000:
+		/* LAN921[5678] family */
+		pdata->generation = 3;
+		break;
+
+	case 0x92100000:
+	case 0x92110000:
+	case 0x92200000:
+	case 0x92210000:
+		/* LAN9210/LAN9211/LAN9220/LAN9221 */
+		pdata->generation = 4;
+		break;
+
+	default:
+		SMSC_WARNING(PROBE, "LAN911x not identified, idrev: 0x%08X",
+			pdata->idrev);
+		return -ENODEV;
+	}
+
+	SMSC_TRACE(PROBE, "LAN911x identified, idrev: 0x%08X, generation: %d",
+		pdata->idrev, pdata->generation);
+
+	if (pdata->generation == 0)
+		SMSC_WARNING(PROBE,
+			"This driver is not intended for this chip revision");
+
+	/* Reset the LAN911x */
+	if (smsc911x_soft_reset(pdata))
+		return -ENODEV;
+
+	/* Disable all interrupt sources until we bring the device up */
+	smsc911x_reg_write(pdata, INT_EN, 0);
+
+	ether_setup(dev);
+	dev->open = smsc911x_open;
+	dev->stop = smsc911x_stop;
+	dev->hard_start_xmit = smsc911x_hard_start_xmit;
+	dev->get_stats = smsc911x_get_stats;
+	dev->set_multicast_list = smsc911x_set_multicast_list;
+	dev->flags |= IFF_MULTICAST;
+	dev->do_ioctl = smsc911x_do_ioctl;
+	netif_napi_add(dev, &pdata->napi, smsc911x_poll, SMSC_NAPI_WEIGHT);
+	dev->ethtool_ops = &smsc911x_ethtool_ops;
+
+#ifdef CONFIG_NET_POLL_CONTROLLER
+	dev->poll_controller = smsc911x_poll_controller;
+#endif				/* CONFIG_NET_POLL_CONTROLLER */
+
+	return 0;
+}
+
+static int __devexit smsc911x_drv_remove(struct platform_device *pdev)
+{
+	struct net_device *dev;
+	struct smsc911x_data *pdata;
+	struct resource *res;
+
+	dev = platform_get_drvdata(pdev);
+	BUG_ON(!dev);
+	pdata = netdev_priv(dev);
+	BUG_ON(!pdata);
+	BUG_ON(!pdata->ioaddr);
+	BUG_ON(!pdata->phy_dev);
+
+	SMSC_TRACE(IFDOWN, "Stopping driver.");
+
+	phy_disconnect(pdata->phy_dev);
+	pdata->phy_dev = NULL;
+	mdiobus_unregister(pdata->mii_bus);
+	mdiobus_free(pdata->mii_bus);
+
+	platform_set_drvdata(pdev, NULL);
+	unregister_netdev(dev);
+	free_irq(dev->irq, dev);
+	res = platform_get_resource_byname(pdev, IORESOURCE_MEM,
+					   "smsc911x-memory");
+	if (!res)
+		platform_get_resource(pdev, IORESOURCE_MEM, 0);
+
+	release_mem_region(res->start, res->end - res->start);
+
+	iounmap(pdata->ioaddr);
+
+	free_netdev(dev);
+
+	return 0;
+}
+
+static int __devinit smsc911x_drv_probe(struct platform_device *pdev)
+{
+	struct net_device *dev;
+	struct smsc911x_data *pdata;
+	struct resource *res;
+	unsigned int intcfg = 0;
+	int res_size;
+	int retval;
+	DECLARE_MAC_BUF(mac);
+
+	pr_info("%s: Driver version %s.\n", SMSC_CHIPNAME, SMSC_DRV_VERSION);
+
+	res = platform_get_resource_byname(pdev, IORESOURCE_MEM,
+					   "smsc911x-memory");
+	if (!res)
+		res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	if (!res) {
+		pr_warning("%s: Could not allocate resource.\n",
+			SMSC_CHIPNAME);
+		retval = -ENODEV;
+		goto out_0;
+	}
+	res_size = res->end - res->start;
+
+	if (!request_mem_region(res->start, res_size, SMSC_CHIPNAME)) {
+		retval = -EBUSY;
+		goto out_0;
+	}
+
+	dev = alloc_etherdev(sizeof(struct smsc911x_data));
+	if (!dev) {
+		pr_warning("%s: Could not allocate device.\n", SMSC_CHIPNAME);
+		retval = -ENOMEM;
+		goto out_release_io_1;
+	}
+
+	SET_NETDEV_DEV(dev, &pdev->dev);
+
+	pdata = netdev_priv(dev);
+
+	dev->irq = platform_get_irq(pdev, 0);
+	pdata->ioaddr = ioremap_nocache(res->start, res_size);
+
+	/* copy config parameters across if present, otherwise pdata
+	 * defaults to zeros */
+	if (pdev->dev.platform_data) {
+		struct smsc911x_platform_config *config =
+			pdev->dev.platform_data;
+		pdata->irq_polarity = config->irq_polarity;
+		pdata->irq_type  = config->irq_type;
+		pdata->phy_interface = config->phy_interface;
+	}
+
+	pdata->dev = dev;
+	pdata->msg_enable = ((1 << debug) - 1);
+
+	if (pdata->ioaddr == NULL) {
+		SMSC_WARNING(PROBE,
+			"Error smsc911x base address invalid");
+		retval = -ENOMEM;
+		goto out_free_netdev_2;
+	}
+
+	retval = smsc911x_init(dev);
+	if (retval < 0)
+		goto out_unmap_io_3;
+
+	/* configure irq polarity and type before connecting isr */
+	if (pdata->irq_polarity == SMSC911X_IRQ_POLARITY_ACTIVE_HIGH)
+		intcfg |= INT_CFG_IRQ_POL_;
+
+	if (pdata->irq_type == SMSC911X_IRQ_TYPE_PUSH_PULL)
+		intcfg |= INT_CFG_IRQ_TYPE_;
+
+	smsc911x_reg_write(pdata, INT_CFG, intcfg);
+
+	/* Ensure interrupts are globally disabled before connecting ISR */
+	smsc911x_reg_write(pdata, INT_EN, 0);
+	smsc911x_reg_write(pdata, INT_STS, 0xFFFFFFFF);
+
+	retval = request_irq(dev->irq, smsc911x_irqhandler, IRQF_DISABLED,
+			     SMSC_CHIPNAME, dev);
+	if (retval) {
+		SMSC_WARNING(PROBE,
+			"Unable to claim requested irq: %d", dev->irq);
+		goto out_unmap_io_3;
+	}
+
+	platform_set_drvdata(pdev, dev);
+
+	retval = register_netdev(dev);
+	if (retval) {
+		SMSC_WARNING(PROBE,
+			"Error %i registering device", retval);
+		goto out_unset_drvdata_4;
+	} else {
+		SMSC_TRACE(PROBE, "Network interface: \"%s\"", dev->name);
+	}
+
+	spin_lock_init(&pdata->mac_lock);
+
+	retval = smsc911x_mii_init(pdev, dev);
+	if (retval) {
+		SMSC_WARNING(PROBE,
+			"Error %i initialising mii", retval);
+		goto out_unregister_netdev_5;
+	}
+
+	spin_lock_irq(&pdata->mac_lock);
+
+	/* Check if mac address has been specified when bringing interface up */
+	if (is_valid_ether_addr(dev->dev_addr)) {
+		smsc911x_set_mac_address(pdata, dev->dev_addr);
+		SMSC_TRACE(PROBE, "MAC Address is specified by configuration");
+	} else {
+		/* Try reading mac address from device. if EEPROM is present
+		 * it will already have been set */
+		u32 mac_high16 = smsc911x_mac_read(pdata, ADDRH);
+		u32 mac_low32 = smsc911x_mac_read(pdata, ADDRL);
+		dev->dev_addr[0] = (u8)(mac_low32);
+		dev->dev_addr[1] = (u8)(mac_low32 >> 8);
+		dev->dev_addr[2] = (u8)(mac_low32 >> 16);
+		dev->dev_addr[3] = (u8)(mac_low32 >> 24);
+		dev->dev_addr[4] = (u8)(mac_high16);
+		dev->dev_addr[5] = (u8)(mac_high16 >> 8);
+
+		if (is_valid_ether_addr(dev->dev_addr)) {
+			/* eeprom values are valid  so use them */
+			SMSC_TRACE(PROBE,
+				"Mac Address is read from LAN911x EEPROM");
+		} else {
+			/* eeprom values are invalid, generate random MAC */
+			random_ether_addr(dev->dev_addr);
+			smsc911x_set_mac_address(pdata, dev->dev_addr);
+			SMSC_TRACE(PROBE,
+				"MAC Address is set to random_ether_addr");
+		}
+	}
+
+	spin_unlock_irq(&pdata->mac_lock);
+
+	dev_info(&dev->dev, "MAC Address: %s\n",
+		 print_mac(mac, dev->dev_addr));
+
+	return 0;
+
+out_unregister_netdev_5:
+	unregister_netdev(dev);
+out_unset_drvdata_4:
+	platform_set_drvdata(pdev, NULL);
+	free_irq(dev->irq, dev);
+out_unmap_io_3:
+	iounmap(pdata->ioaddr);
+out_free_netdev_2:
+	free_netdev(dev);
+out_release_io_1:
+	release_mem_region(res->start, res->end - res->start);
+out_0:
+	return retval;
+}
+
+static struct platform_driver smsc911x_driver = {
+	.probe = smsc911x_drv_probe,
+	.remove = smsc911x_drv_remove,
+	.driver = {
+		.name = SMSC_CHIPNAME,
+	},
+};
+
+/* Entry point for loading the module */
+static int __init smsc911x_init_module(void)
+{
+	return platform_driver_register(&smsc911x_driver);
+}
+
+/* entry point for unloading the module */
+static void __exit smsc911x_cleanup_module(void)
+{
+	platform_driver_unregister(&smsc911x_driver);
+}
+
+module_init(smsc911x_init_module);
+module_exit(smsc911x_cleanup_module);
diff --git a/drivers/net/smsc911x.h b/drivers/net/smsc911x.h
new file mode 100644
index 000000000000..feb36de274ca
--- /dev/null
+++ b/drivers/net/smsc911x.h
@@ -0,0 +1,394 @@
+/***************************************************************************
+ *
+ * Copyright (C) 2004-2008 SMSC
+ * Copyright (C) 2005-2008 ARM
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
+ *
+ ***************************************************************************/
+#ifndef __SMSC911X_H__
+#define __SMSC911X_H__
+
+#define SMSC_CAN_USE_32BIT	1
+#define TX_FIFO_LOW_THRESHOLD	((u32)1600)
+#define SMSC911X_EEPROM_SIZE	((u32)7)
+#define USE_DEBUG		0
+
+/* This is the maximum number of packets to be received every
+ * NAPI poll */
+#define SMSC_NAPI_WEIGHT	16
+
+/* implements a PHY loopback test at initialisation time, to ensure a packet
+ * can be succesfully looped back */
+#define USE_PHY_WORK_AROUND
+
+#define DPRINTK(nlevel, klevel, fmt, args...) \
+	((void)((NETIF_MSG_##nlevel & pdata->msg_enable) && \
+	printk(KERN_##klevel "%s: %s: " fmt "\n", \
+	pdata->dev->name, __func__, ## args)))
+
+#if USE_DEBUG >= 1
+#define SMSC_WARNING(nlevel, fmt, args...) \
+	DPRINTK(nlevel, WARNING, fmt, ## args)
+#else
+#define SMSC_WARNING(nlevel, fmt, args...) \
+	({ do {} while (0); 0; })
+#endif
+
+#if USE_DEBUG >= 2
+#define SMSC_TRACE(nlevel, fmt, args...) \
+	DPRINTK(nlevel, INFO, fmt, ## args)
+#else
+#define SMSC_TRACE(nlevel, fmt, args...) \
+	({ do {} while (0); 0; })
+#endif
+
+#ifdef CONFIG_DEBUG_SPINLOCK
+#define SMSC_ASSERT_MAC_LOCK(pdata) \
+		WARN_ON(!spin_is_locked(&pdata->mac_lock))
+#else
+#define SMSC_ASSERT_MAC_LOCK(pdata) do {} while (0)
+#endif				/* CONFIG_DEBUG_SPINLOCK */
+
+#define FLOW_CTRL_TX		(1)
+#define FLOW_CTRL_RX		(2)
+
+/* SMSC911x registers and bitfields */
+#define RX_DATA_FIFO			0x00
+
+#define TX_DATA_FIFO			0x20
+#define TX_CMD_A_ON_COMP_		0x80000000
+#define TX_CMD_A_BUF_END_ALGN_		0x03000000
+#define TX_CMD_A_4_BYTE_ALGN_		0x00000000
+#define TX_CMD_A_16_BYTE_ALGN_		0x01000000
+#define TX_CMD_A_32_BYTE_ALGN_		0x02000000
+#define TX_CMD_A_DATA_OFFSET_		0x001F0000
+#define TX_CMD_A_FIRST_SEG_		0x00002000
+#define TX_CMD_A_LAST_SEG_		0x00001000
+#define TX_CMD_A_BUF_SIZE_		0x000007FF
+#define TX_CMD_B_PKT_TAG_		0xFFFF0000
+#define TX_CMD_B_ADD_CRC_DISABLE_	0x00002000
+#define TX_CMD_B_DISABLE_PADDING_	0x00001000
+#define TX_CMD_B_PKT_BYTE_LENGTH_	0x000007FF
+
+#define RX_STATUS_FIFO			0x40
+#define RX_STS_ES_			0x00008000
+#define RX_STS_MCAST_			0x00000400
+
+#define RX_STATUS_FIFO_PEEK		0x44
+
+#define TX_STATUS_FIFO			0x48
+#define TX_STS_ES_			0x00008000
+
+#define TX_STATUS_FIFO_PEEK		0x4C
+
+#define ID_REV				0x50
+#define ID_REV_CHIP_ID_			0xFFFF0000
+#define ID_REV_REV_ID_			0x0000FFFF
+
+#define INT_CFG				0x54
+#define INT_CFG_INT_DEAS_		0xFF000000
+#define INT_CFG_INT_DEAS_CLR_		0x00004000
+#define INT_CFG_INT_DEAS_STS_		0x00002000
+#define INT_CFG_IRQ_INT_		0x00001000
+#define INT_CFG_IRQ_EN_			0x00000100
+#define INT_CFG_IRQ_POL_		0x00000010
+#define INT_CFG_IRQ_TYPE_		0x00000001
+
+#define INT_STS				0x58
+#define INT_STS_SW_INT_			0x80000000
+#define INT_STS_TXSTOP_INT_		0x02000000
+#define INT_STS_RXSTOP_INT_		0x01000000
+#define INT_STS_RXDFH_INT_		0x00800000
+#define INT_STS_RXDF_INT_		0x00400000
+#define INT_STS_TX_IOC_			0x00200000
+#define INT_STS_RXD_INT_		0x00100000
+#define INT_STS_GPT_INT_		0x00080000
+#define INT_STS_PHY_INT_		0x00040000
+#define INT_STS_PME_INT_		0x00020000
+#define INT_STS_TXSO_			0x00010000
+#define INT_STS_RWT_			0x00008000
+#define INT_STS_RXE_			0x00004000
+#define INT_STS_TXE_			0x00002000
+#define INT_STS_TDFU_			0x00000800
+#define INT_STS_TDFO_			0x00000400
+#define INT_STS_TDFA_			0x00000200
+#define INT_STS_TSFF_			0x00000100
+#define INT_STS_TSFL_			0x00000080
+#define INT_STS_RXDF_			0x00000040
+#define INT_STS_RDFL_			0x00000020
+#define INT_STS_RSFF_			0x00000010
+#define INT_STS_RSFL_			0x00000008
+#define INT_STS_GPIO2_INT_		0x00000004
+#define INT_STS_GPIO1_INT_		0x00000002
+#define INT_STS_GPIO0_INT_		0x00000001
+
+#define INT_EN				0x5C
+#define INT_EN_SW_INT_EN_		0x80000000
+#define INT_EN_TXSTOP_INT_EN_		0x02000000
+#define INT_EN_RXSTOP_INT_EN_		0x01000000
+#define INT_EN_RXDFH_INT_EN_		0x00800000
+#define INT_EN_TIOC_INT_EN_		0x00200000
+#define INT_EN_RXD_INT_EN_		0x00100000
+#define INT_EN_GPT_INT_EN_		0x00080000
+#define INT_EN_PHY_INT_EN_		0x00040000
+#define INT_EN_PME_INT_EN_		0x00020000
+#define INT_EN_TXSO_EN_			0x00010000
+#define INT_EN_RWT_EN_			0x00008000
+#define INT_EN_RXE_EN_			0x00004000
+#define INT_EN_TXE_EN_			0x00002000
+#define INT_EN_TDFU_EN_			0x00000800
+#define INT_EN_TDFO_EN_			0x00000400
+#define INT_EN_TDFA_EN_			0x00000200
+#define INT_EN_TSFF_EN_			0x00000100
+#define INT_EN_TSFL_EN_			0x00000080
+#define INT_EN_RXDF_EN_			0x00000040
+#define INT_EN_RDFL_EN_			0x00000020
+#define INT_EN_RSFF_EN_			0x00000010
+#define INT_EN_RSFL_EN_			0x00000008
+#define INT_EN_GPIO2_INT_		0x00000004
+#define INT_EN_GPIO1_INT_		0x00000002
+#define INT_EN_GPIO0_INT_		0x00000001
+
+#define BYTE_TEST			0x64
+
+#define FIFO_INT			0x68
+#define FIFO_INT_TX_AVAIL_LEVEL_	0xFF000000
+#define FIFO_INT_TX_STS_LEVEL_		0x00FF0000
+#define FIFO_INT_RX_AVAIL_LEVEL_	0x0000FF00
+#define FIFO_INT_RX_STS_LEVEL_		0x000000FF
+
+#define RX_CFG				0x6C
+#define RX_CFG_RX_END_ALGN_		0xC0000000
+#define RX_CFG_RX_END_ALGN4_		0x00000000
+#define RX_CFG_RX_END_ALGN16_		0x40000000
+#define RX_CFG_RX_END_ALGN32_		0x80000000
+#define RX_CFG_RX_DMA_CNT_		0x0FFF0000
+#define RX_CFG_RX_DUMP_			0x00008000
+#define RX_CFG_RXDOFF_			0x00001F00
+
+#define TX_CFG				0x70
+#define TX_CFG_TXS_DUMP_		0x00008000
+#define TX_CFG_TXD_DUMP_		0x00004000
+#define TX_CFG_TXSAO_			0x00000004
+#define TX_CFG_TX_ON_			0x00000002
+#define TX_CFG_STOP_TX_			0x00000001
+
+#define HW_CFG				0x74
+#define HW_CFG_TTM_			0x00200000
+#define HW_CFG_SF_			0x00100000
+#define HW_CFG_TX_FIF_SZ_		0x000F0000
+#define HW_CFG_TR_			0x00003000
+#define HW_CFG_SRST_			0x00000001
+
+/* only available on 115/117 */
+#define HW_CFG_PHY_CLK_SEL_		0x00000060
+#define HW_CFG_PHY_CLK_SEL_INT_PHY_	0x00000000
+#define HW_CFG_PHY_CLK_SEL_EXT_PHY_	0x00000020
+#define HW_CFG_PHY_CLK_SEL_CLK_DIS_	0x00000040
+#define HW_CFG_SMI_SEL_		 	0x00000010
+#define HW_CFG_EXT_PHY_DET_		0x00000008
+#define HW_CFG_EXT_PHY_EN_		0x00000004
+#define HW_CFG_SRST_TO_			0x00000002
+
+/* only available  on 116/118 */
+#define HW_CFG_32_16_BIT_MODE_		0x00000004
+
+#define RX_DP_CTRL			0x78
+#define RX_DP_CTRL_RX_FFWD_		0x80000000
+
+#define RX_FIFO_INF			0x7C
+#define RX_FIFO_INF_RXSUSED_		0x00FF0000
+#define RX_FIFO_INF_RXDUSED_		0x0000FFFF
+
+#define TX_FIFO_INF			0x80
+#define TX_FIFO_INF_TSUSED_		0x00FF0000
+#define TX_FIFO_INF_TDFREE_		0x0000FFFF
+
+#define PMT_CTRL			0x84
+#define PMT_CTRL_PM_MODE_		0x00003000
+#define PMT_CTRL_PM_MODE_D0_		0x00000000
+#define PMT_CTRL_PM_MODE_D1_		0x00001000
+#define PMT_CTRL_PM_MODE_D2_		0x00002000
+#define PMT_CTRL_PM_MODE_D3_		0x00003000
+#define PMT_CTRL_PHY_RST_		0x00000400
+#define PMT_CTRL_WOL_EN_		0x00000200
+#define PMT_CTRL_ED_EN_			0x00000100
+#define PMT_CTRL_PME_TYPE_		0x00000040
+#define PMT_CTRL_WUPS_			0x00000030
+#define PMT_CTRL_WUPS_NOWAKE_		0x00000000
+#define PMT_CTRL_WUPS_ED_		0x00000010
+#define PMT_CTRL_WUPS_WOL_		0x00000020
+#define PMT_CTRL_WUPS_MULTI_		0x00000030
+#define PMT_CTRL_PME_IND_		0x00000008
+#define PMT_CTRL_PME_POL_		0x00000004
+#define PMT_CTRL_PME_EN_		0x00000002
+#define PMT_CTRL_READY_			0x00000001
+
+#define GPIO_CFG			0x88
+#define GPIO_CFG_LED3_EN_		0x40000000
+#define GPIO_CFG_LED2_EN_		0x20000000
+#define GPIO_CFG_LED1_EN_		0x10000000
+#define GPIO_CFG_GPIO2_INT_POL_		0x04000000
+#define GPIO_CFG_GPIO1_INT_POL_		0x02000000
+#define GPIO_CFG_GPIO0_INT_POL_		0x01000000
+#define GPIO_CFG_EEPR_EN_		0x00700000
+#define GPIO_CFG_GPIOBUF2_		0x00040000
+#define GPIO_CFG_GPIOBUF1_		0x00020000
+#define GPIO_CFG_GPIOBUF0_		0x00010000
+#define GPIO_CFG_GPIODIR2_		0x00000400
+#define GPIO_CFG_GPIODIR1_		0x00000200
+#define GPIO_CFG_GPIODIR0_		0x00000100
+#define GPIO_CFG_GPIOD4_		0x00000020
+#define GPIO_CFG_GPIOD3_		0x00000010
+#define GPIO_CFG_GPIOD2_		0x00000004
+#define GPIO_CFG_GPIOD1_		0x00000002
+#define GPIO_CFG_GPIOD0_		0x00000001
+
+#define GPT_CFG				0x8C
+#define GPT_CFG_TIMER_EN_		0x20000000
+#define GPT_CFG_GPT_LOAD_		0x0000FFFF
+
+#define GPT_CNT				0x90
+#define GPT_CNT_GPT_CNT_		0x0000FFFF
+
+#define WORD_SWAP			0x98
+
+#define FREE_RUN			0x9C
+
+#define RX_DROP				0xA0
+
+#define MAC_CSR_CMD			0xA4
+#define MAC_CSR_CMD_CSR_BUSY_		0x80000000
+#define MAC_CSR_CMD_R_NOT_W_		0x40000000
+#define MAC_CSR_CMD_CSR_ADDR_		0x000000FF
+
+#define MAC_CSR_DATA			0xA8
+
+#define AFC_CFG				0xAC
+#define AFC_CFG_AFC_HI_			0x00FF0000
+#define AFC_CFG_AFC_LO_			0x0000FF00
+#define AFC_CFG_BACK_DUR_		0x000000F0
+#define AFC_CFG_FCMULT_			0x00000008
+#define AFC_CFG_FCBRD_			0x00000004
+#define AFC_CFG_FCADD_			0x00000002
+#define AFC_CFG_FCANY_			0x00000001
+
+#define E2P_CMD				0xB0
+#define E2P_CMD_EPC_BUSY_		0x80000000
+#define E2P_CMD_EPC_CMD_		0x70000000
+#define E2P_CMD_EPC_CMD_READ_		0x00000000
+#define E2P_CMD_EPC_CMD_EWDS_		0x10000000
+#define E2P_CMD_EPC_CMD_EWEN_		0x20000000
+#define E2P_CMD_EPC_CMD_WRITE_		0x30000000
+#define E2P_CMD_EPC_CMD_WRAL_		0x40000000
+#define E2P_CMD_EPC_CMD_ERASE_		0x50000000
+#define E2P_CMD_EPC_CMD_ERAL_		0x60000000
+#define E2P_CMD_EPC_CMD_RELOAD_		0x70000000
+#define E2P_CMD_EPC_TIMEOUT_		0x00000200
+#define E2P_CMD_MAC_ADDR_LOADED_	0x00000100
+#define E2P_CMD_EPC_ADDR_		0x000000FF
+
+#define E2P_DATA			0xB4
+#define E2P_DATA_EEPROM_DATA_		0x000000FF
+#define LAN_REGISTER_EXTENT		0x00000100
+
+/*
+ * MAC Control and Status Register (Indirect Address)
+ * Offset (through the MAC_CSR CMD and DATA port)
+ */
+#define MAC_CR				0x01
+#define MAC_CR_RXALL_			0x80000000
+#define MAC_CR_HBDIS_			0x10000000
+#define MAC_CR_RCVOWN_			0x00800000
+#define MAC_CR_LOOPBK_			0x00200000
+#define MAC_CR_FDPX_			0x00100000
+#define MAC_CR_MCPAS_			0x00080000
+#define MAC_CR_PRMS_			0x00040000
+#define MAC_CR_INVFILT_			0x00020000
+#define MAC_CR_PASSBAD_			0x00010000
+#define MAC_CR_HFILT_			0x00008000
+#define MAC_CR_HPFILT_			0x00002000
+#define MAC_CR_LCOLL_			0x00001000
+#define MAC_CR_BCAST_			0x00000800
+#define MAC_CR_DISRTY_			0x00000400
+#define MAC_CR_PADSTR_			0x00000100
+#define MAC_CR_BOLMT_MASK_		0x000000C0
+#define MAC_CR_DFCHK_			0x00000020
+#define MAC_CR_TXEN_			0x00000008
+#define MAC_CR_RXEN_			0x00000004
+
+#define ADDRH				0x02
+
+#define ADDRL				0x03
+
+#define HASHH				0x04
+
+#define HASHL				0x05
+
+#define MII_ACC				0x06
+#define MII_ACC_PHY_ADDR_		0x0000F800
+#define MII_ACC_MIIRINDA_		0x000007C0
+#define MII_ACC_MII_WRITE_		0x00000002
+#define MII_ACC_MII_BUSY_		0x00000001
+
+#define MII_DATA			0x07
+
+#define FLOW				0x08
+#define FLOW_FCPT_			0xFFFF0000
+#define FLOW_FCPASS_			0x00000004
+#define FLOW_FCEN_			0x00000002
+#define FLOW_FCBSY_			0x00000001
+
+#define VLAN1				0x09
+
+#define VLAN2				0x0A
+
+#define WUFF				0x0B
+
+#define WUCSR				0x0C
+#define WUCSR_GUE_			0x00000200
+#define WUCSR_WUFR_			0x00000040
+#define WUCSR_MPR_			0x00000020
+#define WUCSR_WAKE_EN_			0x00000004
+#define WUCSR_MPEN_			0x00000002
+
+/*
+ * Phy definitions (vendor-specific)
+ */
+#define LAN9118_PHY_ID			0x00C0001C
+
+#define MII_INTSTS			0x1D
+
+#define MII_INTMSK			0x1E
+#define PHY_INTMSK_AN_RCV_		(1 << 1)
+#define PHY_INTMSK_PDFAULT_		(1 << 2)
+#define PHY_INTMSK_AN_ACK_		(1 << 3)
+#define PHY_INTMSK_LNKDOWN_		(1 << 4)
+#define PHY_INTMSK_RFAULT_		(1 << 5)
+#define PHY_INTMSK_AN_COMP_		(1 << 6)
+#define PHY_INTMSK_ENERGYON_		(1 << 7)
+#define PHY_INTMSK_DEFAULT_		(PHY_INTMSK_ENERGYON_ | \
+					 PHY_INTMSK_AN_COMP_ | \
+					 PHY_INTMSK_RFAULT_ | \
+					 PHY_INTMSK_LNKDOWN_)
+
+#define ADVERTISE_PAUSE_ALL		(ADVERTISE_PAUSE_CAP | \
+					 ADVERTISE_PAUSE_ASYM)
+
+#define LPA_PAUSE_ALL			(LPA_PAUSE_CAP | \
+					 LPA_PAUSE_ASYM)
+
+#endif				/* __SMSC911X_H__ */
diff --git a/include/linux/smsc911x.h b/include/linux/smsc911x.h
new file mode 100644
index 000000000000..47c4ffd10dbb
--- /dev/null
+++ b/include/linux/smsc911x.h
@@ -0,0 +1,42 @@
+/***************************************************************************
+ *
+ * Copyright (C) 2004-2008 SMSC
+ * Copyright (C) 2005-2008 ARM
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
+ *
+ ***************************************************************************/
+#ifndef __LINUX_SMSC911X_H__
+#define __LINUX_SMSC911X_H__
+
+#include <linux/phy.h>
+
+/* platform_device configuration data, should be assigned to
+ * the platform_device's dev.platform_data */
+struct smsc911x_platform_config {
+	unsigned int irq_polarity;
+	unsigned int irq_type;
+	phy_interface_t phy_interface;
+};
+
+/* Constants for platform_device irq polarity configuration */
+#define SMSC911X_IRQ_POLARITY_ACTIVE_LOW	0
+#define SMSC911X_IRQ_POLARITY_ACTIVE_HIGH	1
+
+/* Constants for platform_device irq type configuration */
+#define SMSC911X_IRQ_TYPE_OPEN_DRAIN		0
+#define SMSC911X_IRQ_TYPE_PUSH_PULL		1
+
+#endif /* __LINUX_SMSC911X_H__ */
-- 
cgit v1.2.3


From 60a7ecf42661f2b22168751298592da6ee210c9e Mon Sep 17 00:00:00 2001
From: Steven Rostedt <srostedt@redhat.com>
Date: Wed, 5 Nov 2008 16:05:44 -0500
Subject: ftrace: add quick function trace stop

Impact: quick start and stop of function tracer

This patch adds a way to disable the function tracer quickly without
the need to run kstop_machine. It adds a new variable called
function_trace_stop which will stop the calls to functions from mcount
when set.  This is just an on/off switch and does not handle recursion
like preempt_disable().

It's main purpose is to help other tracers/debuggers start and stop tracing
fuctions without the need to call kstop_machine.

The config option HAVE_FUNCTION_TRACE_MCOUNT_TEST is added for archs
that implement the testing of the function_trace_stop in the mcount
arch dependent code. Otherwise, the test is done in the C code.

x86 is the only arch at the moment that supports this.

Signed-off-by: Steven Rostedt <srostedt@redhat.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/Kconfig           |  1 +
 arch/x86/kernel/entry_32.S |  6 ++++++
 arch/x86/kernel/entry_64.S |  5 +++++
 include/linux/ftrace.h     | 30 +++++++++++++++++++++++++++++
 kernel/trace/Kconfig       |  7 +++++++
 kernel/trace/ftrace.c      | 47 ++++++++++++++++++++++++++++++++++++----------
 6 files changed, 86 insertions(+), 10 deletions(-)

(limited to 'include')

diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 6f20718d3156..d09e812c6223 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -29,6 +29,7 @@ config X86
 	select HAVE_FTRACE_MCOUNT_RECORD
 	select HAVE_DYNAMIC_FTRACE
 	select HAVE_FUNCTION_TRACER
+	select HAVE_FUNCTION_TRACE_MCOUNT_TEST
 	select HAVE_KVM if ((X86_32 && !X86_VOYAGER && !X86_VISWS && !X86_NUMAQ) || X86_64)
 	select HAVE_ARCH_KGDB if !X86_VOYAGER
 	select HAVE_ARCH_TRACEHOOK
diff --git a/arch/x86/kernel/entry_32.S b/arch/x86/kernel/entry_32.S
index 28b597ef9ca1..9134de814c97 100644
--- a/arch/x86/kernel/entry_32.S
+++ b/arch/x86/kernel/entry_32.S
@@ -1157,6 +1157,9 @@ ENTRY(mcount)
 END(mcount)
 
 ENTRY(ftrace_caller)
+	cmpl $0, function_trace_stop
+	jne  ftrace_stub
+
 	pushl %eax
 	pushl %ecx
 	pushl %edx
@@ -1180,6 +1183,9 @@ END(ftrace_caller)
 #else /* ! CONFIG_DYNAMIC_FTRACE */
 
 ENTRY(mcount)
+	cmpl $0, function_trace_stop
+	jne  ftrace_stub
+
 	cmpl $ftrace_stub, ftrace_trace_function
 	jnz trace
 .globl ftrace_stub
diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S
index b86f332c96a6..08aa6b10933c 100644
--- a/arch/x86/kernel/entry_64.S
+++ b/arch/x86/kernel/entry_64.S
@@ -68,6 +68,8 @@ ENTRY(mcount)
 END(mcount)
 
 ENTRY(ftrace_caller)
+	cmpl $0, function_trace_stop
+	jne  ftrace_stub
 
 	/* taken from glibc */
 	subq $0x38, %rsp
@@ -103,6 +105,9 @@ END(ftrace_caller)
 
 #else /* ! CONFIG_DYNAMIC_FTRACE */
 ENTRY(mcount)
+	cmpl $0, function_trace_stop
+	jne  ftrace_stub
+
 	cmpq $ftrace_stub, ftrace_trace_function
 	jnz trace
 .globl ftrace_stub
diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h
index 4642959e5bda..794ab907dbfe 100644
--- a/include/linux/ftrace.h
+++ b/include/linux/ftrace.h
@@ -23,6 +23,34 @@ struct ftrace_ops {
 	struct ftrace_ops *next;
 };
 
+extern int function_trace_stop;
+
+/**
+ * ftrace_stop - stop function tracer.
+ *
+ * A quick way to stop the function tracer. Note this an on off switch,
+ * it is not something that is recursive like preempt_disable.
+ * This does not disable the calling of mcount, it only stops the
+ * calling of functions from mcount.
+ */
+static inline void ftrace_stop(void)
+{
+	function_trace_stop = 1;
+}
+
+/**
+ * ftrace_start - start the function tracer.
+ *
+ * This function is the inverse of ftrace_stop. This does not enable
+ * the function tracing if the function tracer is disabled. This only
+ * sets the function tracer flag to continue calling the functions
+ * from mcount.
+ */
+static inline void ftrace_start(void)
+{
+	function_trace_stop = 0;
+}
+
 /*
  * The ftrace_ops must be a static and should also
  * be read_mostly.  These functions do modify read_mostly variables
@@ -41,6 +69,8 @@ extern void ftrace_stub(unsigned long a0, unsigned long a1);
 # define unregister_ftrace_function(ops) do { } while (0)
 # define clear_ftrace_function(ops) do { } while (0)
 static inline void ftrace_kill(void) { }
+static inline void ftrace_stop(void) { }
+static inline void ftrace_start(void) { }
 #endif /* CONFIG_FUNCTION_TRACER */
 
 #ifdef CONFIG_DYNAMIC_FTRACE
diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig
index 33dbefd471e8..fc4febc3334a 100644
--- a/kernel/trace/Kconfig
+++ b/kernel/trace/Kconfig
@@ -9,6 +9,13 @@ config NOP_TRACER
 config HAVE_FUNCTION_TRACER
 	bool
 
+config HAVE_FUNCTION_TRACE_MCOUNT_TEST
+	bool
+	help
+	 This gets selected when the arch tests the function_trace_stop
+	 variable at the mcount call site. Otherwise, this variable
+	 is tested by the called function.
+
 config HAVE_DYNAMIC_FTRACE
 	bool
 
diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
index 4a39d24568c8..896c71f0f4c4 100644
--- a/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c
@@ -47,6 +47,9 @@
 int ftrace_enabled __read_mostly;
 static int last_ftrace_enabled;
 
+/* Quick disabling of function tracer. */
+int function_trace_stop;
+
 /*
  * ftrace_disabled is set when an anomaly is discovered.
  * ftrace_disabled is much stronger than ftrace_enabled.
@@ -63,6 +66,7 @@ static struct ftrace_ops ftrace_list_end __read_mostly =
 
 static struct ftrace_ops *ftrace_list __read_mostly = &ftrace_list_end;
 ftrace_func_t ftrace_trace_function __read_mostly = ftrace_stub;
+ftrace_func_t __ftrace_trace_function __read_mostly = ftrace_stub;
 
 static void ftrace_list_func(unsigned long ip, unsigned long parent_ip)
 {
@@ -88,8 +92,23 @@ static void ftrace_list_func(unsigned long ip, unsigned long parent_ip)
 void clear_ftrace_function(void)
 {
 	ftrace_trace_function = ftrace_stub;
+	__ftrace_trace_function = ftrace_stub;
 }
 
+#ifndef CONFIG_HAVE_FUNCTION_TRACE_MCOUNT_TEST
+/*
+ * For those archs that do not test ftrace_trace_stop in their
+ * mcount call site, we need to do it from C.
+ */
+static void ftrace_test_stop_func(unsigned long ip, unsigned long parent_ip)
+{
+	if (function_trace_stop)
+		return;
+
+	__ftrace_trace_function(ip, parent_ip);
+}
+#endif
+
 static int __register_ftrace_function(struct ftrace_ops *ops)
 {
 	/* should not be called from interrupt context */
@@ -110,10 +129,18 @@ static int __register_ftrace_function(struct ftrace_ops *ops)
 		 * For one func, simply call it directly.
 		 * For more than one func, call the chain.
 		 */
+#ifdef CONFIG_HAVE_FUNCTION_TRACE_MCOUNT_TEST
 		if (ops->next == &ftrace_list_end)
 			ftrace_trace_function = ops->func;
 		else
 			ftrace_trace_function = ftrace_list_func;
+#else
+		if (ops->next == &ftrace_list_end)
+			__ftrace_trace_function = ops->func;
+		else
+			__ftrace_trace_function = ftrace_list_func;
+		ftrace_trace_function = ftrace_test_stop_func;
+#endif
 	}
 
 	spin_unlock(&ftrace_lock);
@@ -526,7 +553,7 @@ static void ftrace_run_update_code(int command)
 }
 
 static ftrace_func_t saved_ftrace_func;
-static int ftrace_start;
+static int ftrace_start_up;
 static DEFINE_MUTEX(ftrace_start_lock);
 
 static void ftrace_startup(void)
@@ -537,8 +564,8 @@ static void ftrace_startup(void)
 		return;
 
 	mutex_lock(&ftrace_start_lock);
-	ftrace_start++;
-	if (ftrace_start == 1)
+	ftrace_start_up++;
+	if (ftrace_start_up == 1)
 		command |= FTRACE_ENABLE_CALLS;
 
 	if (saved_ftrace_func != ftrace_trace_function) {
@@ -562,8 +589,8 @@ static void ftrace_shutdown(void)
 		return;
 
 	mutex_lock(&ftrace_start_lock);
-	ftrace_start--;
-	if (!ftrace_start)
+	ftrace_start_up--;
+	if (!ftrace_start_up)
 		command |= FTRACE_DISABLE_CALLS;
 
 	if (saved_ftrace_func != ftrace_trace_function) {
@@ -589,8 +616,8 @@ static void ftrace_startup_sysctl(void)
 	mutex_lock(&ftrace_start_lock);
 	/* Force update next time */
 	saved_ftrace_func = NULL;
-	/* ftrace_start is true if we want ftrace running */
-	if (ftrace_start)
+	/* ftrace_start_up is true if we want ftrace running */
+	if (ftrace_start_up)
 		command |= FTRACE_ENABLE_CALLS;
 
 	ftrace_run_update_code(command);
@@ -605,8 +632,8 @@ static void ftrace_shutdown_sysctl(void)
 		return;
 
 	mutex_lock(&ftrace_start_lock);
-	/* ftrace_start is true if ftrace is running */
-	if (ftrace_start)
+	/* ftrace_start_up is true if ftrace is running */
+	if (ftrace_start_up)
 		command |= FTRACE_DISABLE_CALLS;
 
 	ftrace_run_update_code(command);
@@ -1186,7 +1213,7 @@ ftrace_regex_release(struct inode *inode, struct file *file, int enable)
 
 	mutex_lock(&ftrace_sysctl_lock);
 	mutex_lock(&ftrace_start_lock);
-	if (iter->filtered && ftrace_start && ftrace_enabled)
+	if (iter->filtered && ftrace_start_up && ftrace_enabled)
 		ftrace_run_update_code(FTRACE_ENABLE_CALLS);
 	mutex_unlock(&ftrace_start_lock);
 	mutex_unlock(&ftrace_sysctl_lock);
-- 
cgit v1.2.3


From 0f04870148ecb825133bc2733f473b1c5773ac0b Mon Sep 17 00:00:00 2001
From: Steven Rostedt <srostedt@redhat.com>
Date: Wed, 5 Nov 2008 16:05:44 -0500
Subject: ftrace: soft tracing stop and start

Impact: add way to quickly start stop tracing from the kernel

This patch adds a soft stop and start to the trace. This simply
disables function tracing via the ftrace_disabled flag, and
disables the trace buffers to prevent recording. The tracing
code may still be executed, but the trace will not be recorded.

Signed-off-by: Steven Rostedt <srostedt@redhat.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/ftrace.h |  5 ++++
 kernel/trace/trace.c   | 81 ++++++++++++++++++++++++++++++++++++++++++++++++--
 2 files changed, 84 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h
index 794ab907dbfe..7a75fc6d41f4 100644
--- a/include/linux/ftrace.h
+++ b/include/linux/ftrace.h
@@ -216,6 +216,9 @@ static inline void __ftrace_enabled_restore(int enabled)
 #ifdef CONFIG_TRACING
 extern int ftrace_dump_on_oops;
 
+extern void tracing_start(void);
+extern void tracing_stop(void);
+
 extern void
 ftrace_special(unsigned long arg1, unsigned long arg2, unsigned long arg3);
 
@@ -246,6 +249,8 @@ ftrace_special(unsigned long arg1, unsigned long arg2, unsigned long arg3) { }
 static inline int
 ftrace_printk(const char *fmt, ...) __attribute__ ((format (printf, 1, 0)));
 
+static inline void tracing_start(void) { }
+static inline void tracing_stop(void) { }
 static inline int
 ftrace_printk(const char *fmt, ...)
 {
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index 29ab40a764c8..113aea9447ec 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -43,6 +43,15 @@
 unsigned long __read_mostly	tracing_max_latency = (cycle_t)ULONG_MAX;
 unsigned long __read_mostly	tracing_thresh;
 
+
+/*
+ * Kill all tracing for good (never come back).
+ * It is initialized to 1 but will turn to zero if the initialization
+ * of the tracer is successful. But that is the only place that sets
+ * this back to zero.
+ */
+int tracing_disabled = 1;
+
 static DEFINE_PER_CPU(local_t, ftrace_cpu_disabled);
 
 static inline void ftrace_disable_cpu(void)
@@ -62,8 +71,6 @@ static cpumask_t __read_mostly		tracing_buffer_mask;
 #define for_each_tracing_cpu(cpu)	\
 	for_each_cpu_mask(cpu, tracing_buffer_mask)
 
-static int tracing_disabled = 1;
-
 /*
  * ftrace_dump_on_oops - variable to dump ftrace buffer on oops
  *
@@ -613,6 +620,76 @@ static void trace_init_cmdlines(void)
 	cmdline_idx = 0;
 }
 
+static int trace_stop_count;
+static DEFINE_SPINLOCK(tracing_start_lock);
+
+/**
+ * tracing_start - quick start of the tracer
+ *
+ * If tracing is enabled but was stopped by tracing_stop,
+ * this will start the tracer back up.
+ */
+void tracing_start(void)
+{
+	struct ring_buffer *buffer;
+	unsigned long flags;
+
+	if (tracing_disabled)
+		return;
+
+	spin_lock_irqsave(&tracing_start_lock, flags);
+	if (--trace_stop_count)
+		goto out;
+
+	if (trace_stop_count < 0) {
+		/* Someone screwed up their debugging */
+		WARN_ON_ONCE(1);
+		trace_stop_count = 0;
+		goto out;
+	}
+
+
+	buffer = global_trace.buffer;
+	if (buffer)
+		ring_buffer_record_enable(buffer);
+
+	buffer = max_tr.buffer;
+	if (buffer)
+		ring_buffer_record_enable(buffer);
+
+	ftrace_start();
+ out:
+	spin_unlock_irqrestore(&tracing_start_lock, flags);
+}
+
+/**
+ * tracing_stop - quick stop of the tracer
+ *
+ * Light weight way to stop tracing. Use in conjunction with
+ * tracing_start.
+ */
+void tracing_stop(void)
+{
+	struct ring_buffer *buffer;
+	unsigned long flags;
+
+	ftrace_stop();
+	spin_lock_irqsave(&tracing_start_lock, flags);
+	if (trace_stop_count++)
+		goto out;
+
+	buffer = global_trace.buffer;
+	if (buffer)
+		ring_buffer_record_disable(buffer);
+
+	buffer = max_tr.buffer;
+	if (buffer)
+		ring_buffer_record_disable(buffer);
+
+ out:
+	spin_unlock_irqrestore(&tracing_start_lock, flags);
+}
+
 void trace_stop_cmdline_recording(void);
 
 static void trace_save_cmdline(struct task_struct *tsk)
-- 
cgit v1.2.3


From 6a60dd121c5b6c2d827e99b38c1326f2600c3891 Mon Sep 17 00:00:00 2001
From: Steven Rostedt <srostedt@redhat.com>
Date: Thu, 6 Nov 2008 15:55:21 -0500
Subject: ftrace: split out hardirq ftrace code into own header

Impact: moving of function prototypes into own header file

ftrace.h is too big of a file for hardirq.h, and some archs will fail
to build because of the include dependencies not being met.

This patch pulls out the required prototypes for hardirq.h into a smaller
and safer ftrace_irq.h file.

Signed-off-by: Steven Rostedt <srostedt@redhat.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/ftrace.h     |  5 -----
 include/linux/ftrace_irq.h | 13 +++++++++++++
 include/linux/hardirq.h    |  2 +-
 3 files changed, 14 insertions(+), 6 deletions(-)
 create mode 100644 include/linux/ftrace_irq.h

(limited to 'include')

diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h
index 0ad1b48aea69..1b340e3fa249 100644
--- a/include/linux/ftrace.h
+++ b/include/linux/ftrace.h
@@ -104,9 +104,6 @@ extern void ftrace_release(void *start, unsigned long size);
 
 extern void ftrace_disable_daemon(void);
 extern void ftrace_enable_daemon(void);
-extern void ftrace_nmi_enter(void);
-extern void ftrace_nmi_exit(void);
-
 #else
 # define skip_trace(ip)				({ 0; })
 # define ftrace_force_update()			({ 0; })
@@ -114,8 +111,6 @@ extern void ftrace_nmi_exit(void);
 # define ftrace_disable_daemon()		do { } while (0)
 # define ftrace_enable_daemon()			do { } while (0)
 static inline void ftrace_release(void *start, unsigned long size) { }
-static inline void ftrace_nmi_enter(void) { }
-static inline void ftrace_nmi_exit(void) { }
 #endif /* CONFIG_DYNAMIC_FTRACE */
 
 /* totally disable ftrace - can not re-enable after this */
diff --git a/include/linux/ftrace_irq.h b/include/linux/ftrace_irq.h
new file mode 100644
index 000000000000..b1299d6729f2
--- /dev/null
+++ b/include/linux/ftrace_irq.h
@@ -0,0 +1,13 @@
+#ifndef _LINUX_FTRACE_IRQ_H
+#define _LINUX_FTRACE_IRQ_H
+
+
+#ifdef CONFIG_DYNAMIC_FTRACE
+extern void ftrace_nmi_enter(void);
+extern void ftrace_nmi_exit(void);
+#else
+static inline void ftrace_nmi_enter(void) { }
+static inline void ftrace_nmi_exit(void) { }
+#endif
+
+#endif /* _LINUX_FTRACE_IRQ_H */
diff --git a/include/linux/hardirq.h b/include/linux/hardirq.h
index ffc16ab5a878..89a56d79e4c6 100644
--- a/include/linux/hardirq.h
+++ b/include/linux/hardirq.h
@@ -4,7 +4,7 @@
 #include <linux/preempt.h>
 #include <linux/smp_lock.h>
 #include <linux/lockdep.h>
-#include <linux/ftrace.h>
+#include <linux/ftrace_irq.h>
 #include <asm/hardirq.h>
 #include <asm/system.h>
 
-- 
cgit v1.2.3


From 945eed02cd619f525e097319cd3d18c58d01da87 Mon Sep 17 00:00:00 2001
From: Takashi Iwai <tiwai@suse.de>
Date: Fri, 7 Nov 2008 09:08:15 +0100
Subject: ALSA: Evaluate condition in snd_BUG_ON() in non-debugging case

Change snd_BUG_ON() to evaluate the given condition, at least, in syntax
for avoiding compile warnings such as unused variables.  The compiler
should optimize out the condition evaluation in the real code, though.

Signed-off-by: Takashi Iwai <tiwai@suse.de>
---
 include/sound/core.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/sound/core.h b/include/sound/core.h
index 7e5589472681..6fa4c7b1970a 100644
--- a/include/sound/core.h
+++ b/include/sound/core.h
@@ -390,11 +390,11 @@ void snd_verbose_printd(const char *file, int line, const char *format, ...)
 
 #define snd_printd(fmt, args...)	do { } while (0)
 #define snd_BUG()			do { } while (0)
-static inline int __snd_bug_on(void)
+static inline int __snd_bug_on(int cond)
 {
 	return 0;
 }
-#define snd_BUG_ON(cond)		__snd_bug_on()  /* always false */
+#define snd_BUG_ON(cond)	__snd_bug_on(0 && (cond))  /* always false */
 
 #endif /* CONFIG_SND_DEBUG */
 
-- 
cgit v1.2.3


From f66fcedc84dd06d42a0dba3894d238498509e8b7 Mon Sep 17 00:00:00 2001
From: Takashi Iwai <tiwai@suse.de>
Date: Fri, 7 Nov 2008 09:37:22 +0100
Subject: ALSA: Document debug macros

Add descriptions of snd_BUG() and snd_BUG_ON().
Also fixed a typo in the comment of snd_printk(), too.

Signed-off-by: Takashi Iwai <tiwai@suse.de>
---
 include/sound/core.h | 24 +++++++++++++++++++++++-
 1 file changed, 23 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/sound/core.h b/include/sound/core.h
index 6fa4c7b1970a..b2f3bbe9e56d 100644
--- a/include/sound/core.h
+++ b/include/sound/core.h
@@ -356,7 +356,7 @@ void snd_verbose_printd(const char *file, int line, const char *format, ...)
  * snd_printk - printk wrapper
  * @fmt: format string
  *
- * Works like print() but prints the file and the line of the caller
+ * Works like printk() but prints the file and the line of the caller
  * when configured with CONFIG_SND_VERBOSE_PRINTK.
  */
 #define snd_printk(fmt, args...) \
@@ -383,7 +383,29 @@ void snd_verbose_printd(const char *file, int line, const char *format, ...)
 	printk(fmt ,##args)
 #endif
 
+/**
+ * snd_BUG - give a BUG warning message and stack trace
+ *
+ * Calls WARN() if CONFIG_SND_DEBUG is set.
+ * Ignored when CONFIG_SND_DEBUG is not set.
+ */
 #define snd_BUG()		WARN(1, "BUG?\n")
+
+/**
+ * snd_BUG_ON - debugging check macro
+ * @cond: condition to evaluate
+ *
+ * When CONFIG_SND_DEBUG is set, this macro evaluates the given condition,
+ * and call WARN() and returns the value if it's non-zero.
+ * 
+ * When CONFIG_SND_DEBUG is not set, this just returns zero, and the given
+ * condition is ignored.
+ *
+ * NOTE: the argument won't be evaluated at all when CONFIG_SND_DEBUG=n.
+ * Thus, don't put any statement that influences on the code behavior,
+ * such as pre/post increment, to the argument of this macro.
+ * If you want to evaluate and give a warning, use standard WARN_ON().
+ */
 #define snd_BUG_ON(cond)	WARN((cond), "BUG? (%s)\n", __stringify(cond))
 
 #else /* !CONFIG_SND_DEBUG */
-- 
cgit v1.2.3


From 3d8160b1493bcadca74fbb635d79b3928b8999cf Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@davemloft.net>
Date: Fri, 7 Nov 2008 22:52:14 -0800
Subject: Revert "net: Guaranetee the proper ordering of the loopback device."

This reverts commit ae33bc40c0d96d02f51a996482ea7e41c5152695.
---
 drivers/net/loopback.c    | 13 +++++++++++--
 include/linux/netdevice.h |  1 -
 net/core/dev.c            | 12 ------------
 3 files changed, 11 insertions(+), 15 deletions(-)

(limited to 'include')

diff --git a/drivers/net/loopback.c b/drivers/net/loopback.c
index c4516b580ba5..91d08585a6d8 100644
--- a/drivers/net/loopback.c
+++ b/drivers/net/loopback.c
@@ -204,8 +204,17 @@ static __net_exit void loopback_net_exit(struct net *net)
 	unregister_netdev(dev);
 }
 
-/* Registered in net/core/dev.c */
-struct pernet_operations __net_initdata loopback_net_ops = {
+static struct pernet_operations __net_initdata loopback_net_ops = {
        .init = loopback_net_init,
        .exit = loopback_net_exit,
 };
+
+static int __init loopback_init(void)
+{
+	return register_pernet_device(&loopback_net_ops);
+}
+
+/* Loopback is special. It should be initialized before any other network
+ * device and network subsystem.
+ */
+fs_initcall(loopback_init);
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 12d7f4469dc9..f1b0dbe58464 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -1766,7 +1766,6 @@ static inline int skb_bond_should_drop(struct sk_buff *skb)
 	return 0;
 }
 
-extern struct pernet_operations __net_initdata loopback_net_ops;
 #endif /* __KERNEL__ */
 
 #endif	/* _LINUX_DEV_H */
diff --git a/net/core/dev.c b/net/core/dev.c
index e0dc67a789b7..2306d56fbb5e 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -4909,18 +4909,6 @@ static int __init net_dev_init(void)
 	if (register_pernet_subsys(&netdev_net_ops))
 		goto out;
 
-	/* The loopback device is special if any other network devices
-	 * is present in a network namespace the loopback device must
-	 * be present. Since we now dynamically allocate and free the
-	 * loopback device ensure this invariant is maintained by
-	 * keeping the loopback device as the first device on the
-	 * list of network devices.  Ensuring the loopback devices
-	 * is the first device that appears and the last network device
-	 * that disappears.
-	 */
-	if (register_pernet_device(&loopback_net_ops))
-		goto out;
-
 	if (register_pernet_device(&default_device_ops))
 		goto out;
 
-- 
cgit v1.2.3


From 505d4f73dda9e20d59da05008f1f5eb432613e71 Mon Sep 17 00:00:00 2001
From: "Eric W. Biederman" <ebiederm@maxwell.aristanetworks.com>
Date: Fri, 7 Nov 2008 22:54:20 -0800
Subject: net: Guaranetee the proper ordering of the loopback device. v2

I was recently hunting a bug that occurred in network namespace
cleanup.  In looking at the code it became apparrent that we have
and will continue to have cases where if we have anything going
on in a network namespace there will be assumptions that the
loopback device is present.   Things like sending igmp unsubscribe
messages when we bring down network devices invokes the routing
code which assumes that at least the loopback driver is present.

Therefore to avoid magic initcall ordering hackery that is hard
to follow and hard to get right insert a call to register the
loopback device directly from net_dev_init().    This guarantes
that the loopback device is the first device registered and
the last network device to go away.

But do it carefully so we register the loopback device after
we clear dev_boot_phase.

Signed-off-by: Eric W. Biederman <ebiederm@maxwell.aristanetworks.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/loopback.c    | 13 ++-----------
 include/linux/netdevice.h |  1 +
 net/core/dev.c            | 22 +++++++++++++++++-----
 3 files changed, 20 insertions(+), 16 deletions(-)

(limited to 'include')

diff --git a/drivers/net/loopback.c b/drivers/net/loopback.c
index 91d08585a6d8..c4516b580ba5 100644
--- a/drivers/net/loopback.c
+++ b/drivers/net/loopback.c
@@ -204,17 +204,8 @@ static __net_exit void loopback_net_exit(struct net *net)
 	unregister_netdev(dev);
 }
 
-static struct pernet_operations __net_initdata loopback_net_ops = {
+/* Registered in net/core/dev.c */
+struct pernet_operations __net_initdata loopback_net_ops = {
        .init = loopback_net_init,
        .exit = loopback_net_exit,
 };
-
-static int __init loopback_init(void)
-{
-	return register_pernet_device(&loopback_net_ops);
-}
-
-/* Loopback is special. It should be initialized before any other network
- * device and network subsystem.
- */
-fs_initcall(loopback_init);
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index f1b0dbe58464..12d7f4469dc9 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -1766,6 +1766,7 @@ static inline int skb_bond_should_drop(struct sk_buff *skb)
 	return 0;
 }
 
+extern struct pernet_operations __net_initdata loopback_net_ops;
 #endif /* __KERNEL__ */
 
 #endif	/* _LINUX_DEV_H */
diff --git a/net/core/dev.c b/net/core/dev.c
index 2306d56fbb5e..31568b2068ac 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -4909,9 +4909,6 @@ static int __init net_dev_init(void)
 	if (register_pernet_subsys(&netdev_net_ops))
 		goto out;
 
-	if (register_pernet_device(&default_device_ops))
-		goto out;
-
 	/*
 	 *	Initialise the packet receive queues.
 	 */
@@ -4928,10 +4925,25 @@ static int __init net_dev_init(void)
 		queue->backlog.weight = weight_p;
 	}
 
-	netdev_dma_register();
-
 	dev_boot_phase = 0;
 
+	/* The loopback device is special if any other network devices
+	 * is present in a network namespace the loopback device must
+	 * be present. Since we now dynamically allocate and free the
+	 * loopback device ensure this invariant is maintained by
+	 * keeping the loopback device as the first device on the
+	 * list of network devices.  Ensuring the loopback devices
+	 * is the first device that appears and the last network device
+	 * that disappears.
+	 */
+	if (register_pernet_device(&loopback_net_ops))
+		goto out;
+
+	if (register_pernet_device(&default_device_ops))
+		goto out;
+
+	netdev_dma_register();
+
 	open_softirq(NET_TX_SOFTIRQ, net_tx_action);
 	open_softirq(NET_RX_SOFTIRQ, net_rx_action);
 
-- 
cgit v1.2.3


From f400923735ecbb67cbe4a3606c9479f694754f51 Mon Sep 17 00:00:00 2001
From: Thomas Graf <tgraf@suug.ch>
Date: Fri, 7 Nov 2008 22:56:00 -0800
Subject: pkt_sched: Control group classifier

The classifier should cover the most common use case and will work
without any special configuration.

The principle of the classifier is to directly access the
task_struct via get_current(). In order for this to work,
classification requests from softirqs must be ignored. This is
not a problem because the vast majority of packets in softirq
context are not assigned to a task anyway. For this to work, a
mechanism is needed to trace softirq context.

This repost goes back to the method of relying on the number of
nested bh disable calls for the sake of not adding too much
complexity and the option to come up with something more reliable
if actually needed.

Signed-off-by: Thomas Graf <tgraf@suug.ch>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/cgroup_subsys.h |   6 +
 include/linux/pkt_cls.h       |  14 ++
 net/sched/Kconfig             |  11 ++
 net/sched/Makefile            |   1 +
 net/sched/cls_cgroup.c        | 290 ++++++++++++++++++++++++++++++++++++++++++
 5 files changed, 322 insertions(+)
 create mode 100644 net/sched/cls_cgroup.c

(limited to 'include')

diff --git a/include/linux/cgroup_subsys.h b/include/linux/cgroup_subsys.h
index 9c22396e8b50..9c8d31bacf46 100644
--- a/include/linux/cgroup_subsys.h
+++ b/include/linux/cgroup_subsys.h
@@ -54,3 +54,9 @@ SUBSYS(freezer)
 #endif
 
 /* */
+
+#ifdef CONFIG_NET_CLS_CGROUP
+SUBSYS(net_cls)
+#endif
+
+/* */
diff --git a/include/linux/pkt_cls.h b/include/linux/pkt_cls.h
index 7cf7824df778..e6aa8482ad7a 100644
--- a/include/linux/pkt_cls.h
+++ b/include/linux/pkt_cls.h
@@ -394,6 +394,20 @@ enum
 
 #define TCA_BASIC_MAX (__TCA_BASIC_MAX - 1)
 
+
+/* Cgroup classifier */
+
+enum
+{
+	TCA_CGROUP_UNSPEC,
+	TCA_CGROUP_ACT,
+	TCA_CGROUP_POLICE,
+	TCA_CGROUP_EMATCHES,
+	__TCA_CGROUP_MAX,
+};
+
+#define TCA_CGROUP_MAX (__TCA_CGROUP_MAX - 1)
+
 /* Extended Matches */
 
 struct tcf_ematch_tree_hdr
diff --git a/net/sched/Kconfig b/net/sched/Kconfig
index 6767e54155db..36543b6fcef3 100644
--- a/net/sched/Kconfig
+++ b/net/sched/Kconfig
@@ -316,6 +316,17 @@ config NET_CLS_FLOW
 	  To compile this code as a module, choose M here: the
 	  module will be called cls_flow.
 
+config NET_CLS_CGROUP
+	bool "Control Group Classifier"
+	select NET_CLS
+	depends on CGROUPS
+	---help---
+	  Say Y here if you want to classify packets based on the control
+	  cgroup of their process.
+
+	  To compile this code as a module, choose M here: the
+	  module will be called cls_cgroup.
+
 config NET_EMATCH
 	bool "Extended Matches"
 	select NET_CLS
diff --git a/net/sched/Makefile b/net/sched/Makefile
index e60c9925b269..70b35f8708c3 100644
--- a/net/sched/Makefile
+++ b/net/sched/Makefile
@@ -38,6 +38,7 @@ obj-$(CONFIG_NET_CLS_TCINDEX)	+= cls_tcindex.o
 obj-$(CONFIG_NET_CLS_RSVP6)	+= cls_rsvp6.o
 obj-$(CONFIG_NET_CLS_BASIC)	+= cls_basic.o
 obj-$(CONFIG_NET_CLS_FLOW)	+= cls_flow.o
+obj-$(CONFIG_NET_CLS_CGROUP)	+= cls_cgroup.o
 obj-$(CONFIG_NET_EMATCH)	+= ematch.o
 obj-$(CONFIG_NET_EMATCH_CMP)	+= em_cmp.o
 obj-$(CONFIG_NET_EMATCH_NBYTE)	+= em_nbyte.o
diff --git a/net/sched/cls_cgroup.c b/net/sched/cls_cgroup.c
new file mode 100644
index 000000000000..53ada2c0e41c
--- /dev/null
+++ b/net/sched/cls_cgroup.c
@@ -0,0 +1,290 @@
+/*
+ * net/sched/cls_cgroup.c	Control Group Classifier
+ *
+ *		This program is free software; you can redistribute it and/or
+ *		modify it under the terms of the GNU General Public License
+ *		as published by the Free Software Foundation; either version
+ *		2 of the License, or (at your option) any later version.
+ *
+ * Authors:	Thomas Graf <tgraf@suug.ch>
+ */
+
+#include <linux/module.h>
+#include <linux/types.h>
+#include <linux/string.h>
+#include <linux/errno.h>
+#include <linux/skbuff.h>
+#include <linux/cgroup.h>
+#include <net/rtnetlink.h>
+#include <net/pkt_cls.h>
+
+struct cgroup_cls_state
+{
+	struct cgroup_subsys_state css;
+	u32 classid;
+};
+
+static inline struct cgroup_cls_state *net_cls_state(struct cgroup *cgrp)
+{
+	return (struct cgroup_cls_state *)
+		cgroup_subsys_state(cgrp, net_cls_subsys_id);
+}
+
+static struct cgroup_subsys_state *cgrp_create(struct cgroup_subsys *ss,
+						 struct cgroup *cgrp)
+{
+	struct cgroup_cls_state *cs;
+
+	if (!(cs = kzalloc(sizeof(*cs), GFP_KERNEL)))
+		return ERR_PTR(-ENOMEM);
+
+	if (cgrp->parent)
+		cs->classid = net_cls_state(cgrp->parent)->classid;
+
+	return &cs->css;
+}
+
+static void cgrp_destroy(struct cgroup_subsys *ss, struct cgroup *cgrp)
+{
+	kfree(ss);
+}
+
+static u64 read_classid(struct cgroup *cgrp, struct cftype *cft)
+{
+	return net_cls_state(cgrp)->classid;
+}
+
+static int write_classid(struct cgroup *cgrp, struct cftype *cft, u64 value)
+{
+	if (!cgroup_lock_live_group(cgrp))
+		return -ENODEV;
+
+	net_cls_state(cgrp)->classid = (u32) value;
+
+	cgroup_unlock();
+
+	return 0;
+}
+
+static struct cftype ss_files[] = {
+	{
+		.name = "classid",
+		.read_u64 = read_classid,
+		.write_u64 = write_classid,
+	},
+};
+
+static int cgrp_populate(struct cgroup_subsys *ss, struct cgroup *cgrp)
+{
+	return cgroup_add_files(cgrp, ss, ss_files, ARRAY_SIZE(ss_files));
+}
+
+struct cgroup_subsys net_cls_subsys = {
+	.name		= "net_cls",
+	.create		= cgrp_create,
+	.destroy	= cgrp_destroy,
+	.populate	= cgrp_populate,
+	.subsys_id	= net_cls_subsys_id,
+};
+
+struct cls_cgroup_head
+{
+	u32			handle;
+	struct tcf_exts		exts;
+	struct tcf_ematch_tree	ematches;
+};
+
+static int cls_cgroup_classify(struct sk_buff *skb, struct tcf_proto *tp,
+			       struct tcf_result *res)
+{
+	struct cls_cgroup_head *head = tp->root;
+	struct cgroup_cls_state *cs;
+	int ret = 0;
+
+	/*
+	 * Due to the nature of the classifier it is required to ignore all
+	 * packets originating from softirq context as accessing `current'
+	 * would lead to false results.
+	 *
+	 * This test assumes that all callers of dev_queue_xmit() explicitely
+	 * disable bh. Knowing this, it is possible to detect softirq based
+	 * calls by looking at the number of nested bh disable calls because
+	 * softirqs always disables bh.
+	 */
+	if (softirq_count() != SOFTIRQ_OFFSET)
+		return -1;
+
+	rcu_read_lock();
+	cs = (struct cgroup_cls_state *) task_subsys_state(current,
+							   net_cls_subsys_id);
+	if (cs->classid && tcf_em_tree_match(skb, &head->ematches, NULL)) {
+		res->classid = cs->classid;
+		res->class = 0;
+		ret = tcf_exts_exec(skb, &head->exts, res);
+	} else
+		ret = -1;
+
+	rcu_read_unlock();
+
+	return ret;
+}
+
+static unsigned long cls_cgroup_get(struct tcf_proto *tp, u32 handle)
+{
+	return 0UL;
+}
+
+static void cls_cgroup_put(struct tcf_proto *tp, unsigned long f)
+{
+}
+
+static int cls_cgroup_init(struct tcf_proto *tp)
+{
+	return 0;
+}
+
+static const struct tcf_ext_map cgroup_ext_map = {
+	.action = TCA_CGROUP_ACT,
+	.police = TCA_CGROUP_POLICE,
+};
+
+static const struct nla_policy cgroup_policy[TCA_CGROUP_MAX + 1] = {
+	[TCA_CGROUP_EMATCHES]	= { .type = NLA_NESTED },
+};
+
+static int cls_cgroup_change(struct tcf_proto *tp, unsigned long base,
+			     u32 handle, struct nlattr **tca,
+			     unsigned long *arg)
+{
+	struct nlattr *tb[TCA_CGROUP_MAX+1];
+	struct cls_cgroup_head *head = tp->root;
+	struct tcf_ematch_tree t;
+	struct tcf_exts e;
+	int err;
+
+	if (head == NULL) {
+		if (!handle)
+			return -EINVAL;
+
+		head = kzalloc(sizeof(*head), GFP_KERNEL);
+		if (head == NULL)
+			return -ENOBUFS;
+
+		head->handle = handle;
+
+		tcf_tree_lock(tp);
+		tp->root = head;
+		tcf_tree_unlock(tp);
+	}
+
+	if (handle != head->handle)
+		return -ENOENT;
+
+	err = nla_parse_nested(tb, TCA_CGROUP_MAX, tca[TCA_OPTIONS],
+			       cgroup_policy);
+	if (err < 0)
+		return err;
+
+	err = tcf_exts_validate(tp, tb, tca[TCA_RATE], &e, &cgroup_ext_map);
+	if (err < 0)
+		return err;
+
+	err = tcf_em_tree_validate(tp, tb[TCA_CGROUP_EMATCHES], &t);
+	if (err < 0)
+		return err;
+
+	tcf_exts_change(tp, &head->exts, &e);
+	tcf_em_tree_change(tp, &head->ematches, &t);
+
+	return 0;
+}
+
+static void cls_cgroup_destroy(struct tcf_proto *tp)
+{
+	struct cls_cgroup_head *head;
+
+	head = (struct cls_cgroup_head *)xchg(&tp->root, NULL);
+
+	if (head) {
+		tcf_exts_destroy(tp, &head->exts);
+		tcf_em_tree_destroy(tp, &head->ematches);
+		kfree(head);
+	}
+}
+
+static int cls_cgroup_delete(struct tcf_proto *tp, unsigned long arg)
+{
+	return -EOPNOTSUPP;
+}
+
+static void cls_cgroup_walk(struct tcf_proto *tp, struct tcf_walker *arg)
+{
+	struct cls_cgroup_head *head = tp->root;
+
+	if (arg->count < arg->skip)
+		goto skip;
+
+	if (arg->fn(tp, (unsigned long) head, arg) < 0) {
+		arg->stop = 1;
+		return;
+	}
+skip:
+	arg->count++;
+}
+
+static int cls_cgroup_dump(struct tcf_proto *tp, unsigned long fh,
+			   struct sk_buff *skb, struct tcmsg *t)
+{
+	struct cls_cgroup_head *head = tp->root;
+	unsigned char *b = skb_tail_pointer(skb);
+	struct nlattr *nest;
+
+	t->tcm_handle = head->handle;
+
+	nest = nla_nest_start(skb, TCA_OPTIONS);
+	if (nest == NULL)
+		goto nla_put_failure;
+
+	if (tcf_exts_dump(skb, &head->exts, &cgroup_ext_map) < 0 ||
+	    tcf_em_tree_dump(skb, &head->ematches, TCA_CGROUP_EMATCHES) < 0)
+		goto nla_put_failure;
+
+	nla_nest_end(skb, nest);
+
+	if (tcf_exts_dump_stats(skb, &head->exts, &cgroup_ext_map) < 0)
+		goto nla_put_failure;
+
+	return skb->len;
+
+nla_put_failure:
+	nlmsg_trim(skb, b);
+	return -1;
+}
+
+static struct tcf_proto_ops cls_cgroup_ops __read_mostly = {
+	.kind		=	"cgroup",
+	.init		=	cls_cgroup_init,
+	.change		=	cls_cgroup_change,
+	.classify	=	cls_cgroup_classify,
+	.destroy	=	cls_cgroup_destroy,
+	.get		=	cls_cgroup_get,
+	.put		=	cls_cgroup_put,
+	.delete		=	cls_cgroup_delete,
+	.walk		=	cls_cgroup_walk,
+	.dump		=	cls_cgroup_dump,
+	.owner		=	THIS_MODULE,
+};
+
+static int __init init_cgroup_cls(void)
+{
+	return register_tcf_proto_ops(&cls_cgroup_ops);
+}
+
+static void __exit exit_cgroup_cls(void)
+{
+	unregister_tcf_proto_ops(&cls_cgroup_ops);
+}
+
+module_init(init_cgroup_cls);
+module_exit(exit_cgroup_cls);
+MODULE_LICENSE("GPL");
-- 
cgit v1.2.3


From 1239cd58d237fa6ad501acaec8776262a5784ec8 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes@sipsolutions.net>
Date: Tue, 28 Oct 2008 11:12:57 +0100
Subject: wireless: move mesh config length constant

This is a constant from the 802.11 specification.

Signed-off-by: Johannes Berg <johannes@sipsolutions.net>
Cc: Javier Cardona <javier@cozybit.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 include/linux/ieee80211.h | 3 +++
 net/mac80211/mesh.c       | 2 +-
 net/mac80211/mesh.h       | 5 +----
 net/mac80211/scan.c       | 2 +-
 4 files changed, 6 insertions(+), 6 deletions(-)

(limited to 'include')

diff --git a/include/linux/ieee80211.h b/include/linux/ieee80211.h
index aad99195a4cc..9dc288b920c8 100644
--- a/include/linux/ieee80211.h
+++ b/include/linux/ieee80211.h
@@ -97,7 +97,10 @@
 #define IEEE80211_MAX_FRAME_LEN		2352
 
 #define IEEE80211_MAX_SSID_LEN		32
+
 #define IEEE80211_MAX_MESH_ID_LEN	32
+#define IEEE80211_MESH_CONFIG_LEN	19
+
 #define IEEE80211_QOS_CTL_LEN		2
 #define IEEE80211_QOS_CTL_TID_MASK	0x000F
 #define IEEE80211_QOS_CTL_TAG1D_MASK	0x0007
diff --git a/net/mac80211/mesh.c b/net/mac80211/mesh.c
index d3b6e1a648bd..82f568e94365 100644
--- a/net/mac80211/mesh.c
+++ b/net/mac80211/mesh.c
@@ -238,7 +238,7 @@ void mesh_mgmt_ies_add(struct sk_buff *skb, struct ieee80211_sub_if_data *sdata)
 
 	pos = skb_put(skb, 21);
 	*pos++ = WLAN_EID_MESH_CONFIG;
-	*pos++ = MESH_CFG_LEN;
+	*pos++ = IEEE80211_MESH_CONFIG_LEN;
 	/* Version */
 	*pos++ = 1;
 
diff --git a/net/mac80211/mesh.h b/net/mac80211/mesh.h
index e10471c6ba42..c197ab545e54 100644
--- a/net/mac80211/mesh.h
+++ b/net/mac80211/mesh.h
@@ -145,9 +145,6 @@ struct mesh_rmc {
 };
 
 
-/* Mesh IEs constants */
-#define MESH_CFG_LEN		19
-
 /*
  * MESH_CFG_COMP_LEN Includes:
  * 	- Active path selection protocol ID.
@@ -157,7 +154,7 @@ struct mesh_rmc {
  * Does not include mesh capabilities, which may vary across nodes in the same
  * mesh
  */
-#define MESH_CFG_CMP_LEN 	17
+#define MESH_CFG_CMP_LEN 	(IEEE80211_MESH_CONFIG_LEN - 2)
 
 /* Default values, timeouts in ms */
 #define MESH_TTL 		5
diff --git a/net/mac80211/scan.c b/net/mac80211/scan.c
index 7372d7abb8c0..f5c7c3371929 100644
--- a/net/mac80211/scan.c
+++ b/net/mac80211/scan.c
@@ -159,7 +159,7 @@ ieee80211_rx_mesh_bss_add(struct ieee80211_local *local, u8 *mesh_id, int mesh_i
 {
 	struct ieee80211_bss *bss;
 
-	if (mesh_config_len != MESH_CFG_LEN)
+	if (mesh_config_len != IEEE80211_MESH_CONFIG_LEN)
 		return NULL;
 
 	bss = kzalloc(sizeof(*bss), GFP_ATOMIC);
-- 
cgit v1.2.3


From 41bb73eeac5ff5fb217257ba33b654747b3abf11 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes@sipsolutions.net>
Date: Wed, 29 Oct 2008 01:09:37 +0100
Subject: mac80211: remove SSID driver code

Remove the SSID from the driver API since now there is no
driver that requires knowing the SSID and I think it's
unlikely that any hardware design that does require the
SSID will play well with mac80211.

This also removes support for setting the SSID in master
mode which will require a patch to hostapd to not try.

Signed-off-by: Johannes Berg <johannes@sipsolutions.net>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 drivers/net/wireless/iwlwifi/iwl-agn.c      |  7 -------
 drivers/net/wireless/iwlwifi/iwl3945-base.c |  7 -------
 include/net/mac80211.h                      | 11 +----------
 net/mac80211/ieee80211_i.h                  |  3 ---
 net/mac80211/main.c                         | 15 +++------------
 net/mac80211/mlme.c                         | 14 --------------
 net/mac80211/wext.c                         | 16 ----------------
 7 files changed, 4 insertions(+), 69 deletions(-)

(limited to 'include')

diff --git a/drivers/net/wireless/iwlwifi/iwl-agn.c b/drivers/net/wireless/iwlwifi/iwl-agn.c
index ce3141f07f35..ad186e134dec 100644
--- a/drivers/net/wireless/iwlwifi/iwl-agn.c
+++ b/drivers/net/wireless/iwlwifi/iwl-agn.c
@@ -2957,13 +2957,6 @@ static int iwl4965_mac_config_interface(struct ieee80211_hw *hw,
 			return rc;
 	}
 
-	if ((priv->iw_mode == NL80211_IFTYPE_AP) &&
-	    (!conf->ssid_len)) {
-		IWL_DEBUG_MAC80211
-		    ("Leaving in AP mode because HostAPD is not ready.\n");
-		return 0;
-	}
-
 	if (!iwl_is_alive(priv))
 		return -EAGAIN;
 
diff --git a/drivers/net/wireless/iwlwifi/iwl3945-base.c b/drivers/net/wireless/iwlwifi/iwl3945-base.c
index 0c2d778a9ff7..47881be9ba44 100644
--- a/drivers/net/wireless/iwlwifi/iwl3945-base.c
+++ b/drivers/net/wireless/iwlwifi/iwl3945-base.c
@@ -6743,13 +6743,6 @@ static int iwl3945_mac_config_interface(struct ieee80211_hw *hw,
 
 	/* XXX: this MUST use conf->mac_addr */
 
-	if ((priv->iw_mode == NL80211_IFTYPE_AP) &&
-	    (!conf->ssid_len)) {
-		IWL_DEBUG_MAC80211
-		    ("Leaving in AP mode because HostAPD is not ready.\n");
-		return 0;
-	}
-
 	if (!iwl3945_is_alive(priv))
 		return -EAGAIN;
 
diff --git a/include/net/mac80211.h b/include/net/mac80211.h
index 0b983bed3829..af2ec6f9beb9 100644
--- a/include/net/mac80211.h
+++ b/include/net/mac80211.h
@@ -616,14 +616,12 @@ struct ieee80211_if_init_conf {
  * enum ieee80211_if_conf_change - interface config change flags
  *
  * @IEEE80211_IFCC_BSSID: The BSSID changed.
- * @IEEE80211_IFCC_SSID: The SSID changed.
  * @IEEE80211_IFCC_BEACON: The beacon for this interface changed
  *	(currently AP and MESH only), use ieee80211_beacon_get().
  */
 enum ieee80211_if_conf_change {
 	IEEE80211_IFCC_BSSID	= BIT(0),
-	IEEE80211_IFCC_SSID	= BIT(1),
-	IEEE80211_IFCC_BEACON	= BIT(2),
+	IEEE80211_IFCC_BEACON	= BIT(1),
 };
 
 /**
@@ -631,11 +629,6 @@ enum ieee80211_if_conf_change {
  *
  * @changed: parameters that have changed, see &enum ieee80211_if_conf_change.
  * @bssid: BSSID of the network we are associated to/creating.
- * @ssid: used (together with @ssid_len) by drivers for hardware that
- *	generate beacons independently. The pointer is valid only during the
- *	config_interface() call, so copy the value somewhere if you need
- *	it.
- * @ssid_len: length of the @ssid field.
  *
  * This structure is passed to the config_interface() callback of
  * &struct ieee80211_hw.
@@ -643,8 +636,6 @@ enum ieee80211_if_conf_change {
 struct ieee80211_if_conf {
 	u32 changed;
 	u8 *bssid;
-	u8 *ssid;
-	size_t ssid_len;
 };
 
 /**
diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h
index 2c91108e3901..155a20410017 100644
--- a/net/mac80211/ieee80211_i.h
+++ b/net/mac80211/ieee80211_i.h
@@ -212,9 +212,6 @@ struct ieee80211_if_ap {
 
 	struct list_head vlans;
 
-	u8 ssid[IEEE80211_MAX_SSID_LEN];
-	size_t ssid_len;
-
 	/* yes, this looks ugly, but guarantees that we can later use
 	 * bitmap_empty :)
 	 * NB: don't touch this bitmap, use sta_info_{set,clear}_tim_bit */
diff --git a/net/mac80211/main.c b/net/mac80211/main.c
index fa0cc7a1e6b4..d631dc96c323 100644
--- a/net/mac80211/main.c
+++ b/net/mac80211/main.c
@@ -171,19 +171,13 @@ int ieee80211_if_config(struct ieee80211_sub_if_data *sdata, u32 changed)
 	conf.changed = changed;
 
 	if (sdata->vif.type == NL80211_IFTYPE_STATION ||
-	    sdata->vif.type == NL80211_IFTYPE_ADHOC) {
+	    sdata->vif.type == NL80211_IFTYPE_ADHOC)
 		conf.bssid = sdata->u.sta.bssid;
-		conf.ssid = sdata->u.sta.ssid;
-		conf.ssid_len = sdata->u.sta.ssid_len;
-	} else if (sdata->vif.type == NL80211_IFTYPE_AP) {
+	else if (sdata->vif.type == NL80211_IFTYPE_AP)
 		conf.bssid = sdata->dev->dev_addr;
-		conf.ssid = sdata->u.ap.ssid;
-		conf.ssid_len = sdata->u.ap.ssid_len;
-	} else if (ieee80211_vif_is_mesh(&sdata->vif)) {
+	else if (ieee80211_vif_is_mesh(&sdata->vif)) {
 		u8 zero[ETH_ALEN] = { 0 };
 		conf.bssid = zero;
-		conf.ssid = zero;
-		conf.ssid_len = 0;
 	} else {
 		WARN_ON(1);
 		return -EINVAL;
@@ -192,9 +186,6 @@ int ieee80211_if_config(struct ieee80211_sub_if_data *sdata, u32 changed)
 	if (WARN_ON(!conf.bssid && (changed & IEEE80211_IFCC_BSSID)))
 		return -EINVAL;
 
-	if (WARN_ON(!conf.ssid && (changed & IEEE80211_IFCC_SSID)))
-		return -EINVAL;
-
 	return local->ops->config_interface(local_to_hw(local),
 					    &sdata->vif, &conf);
 }
diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c
index 9e6c75abc922..708eb4502ed7 100644
--- a/net/mac80211/mlme.c
+++ b/net/mac80211/mlme.c
@@ -2416,7 +2416,6 @@ void ieee80211_sta_req_auth(struct ieee80211_sub_if_data *sdata,
 int ieee80211_sta_set_ssid(struct ieee80211_sub_if_data *sdata, char *ssid, size_t len)
 {
 	struct ieee80211_if_sta *ifsta;
-	int res;
 
 	if (len > IEEE80211_MAX_SSID_LEN)
 		return -EINVAL;
@@ -2428,19 +2427,6 @@ int ieee80211_sta_set_ssid(struct ieee80211_sub_if_data *sdata, char *ssid, size
 		memcpy(ifsta->ssid, ssid, len);
 		ifsta->ssid_len = len;
 		ifsta->flags &= ~IEEE80211_STA_PREV_BSSID_SET;
-
-		res = 0;
-		/*
-		 * Hack! MLME code needs to be cleaned up to have different
-		 * entry points for configuration and internal selection change
-		 */
-		if (netif_running(sdata->dev))
-			res = ieee80211_if_config(sdata, IEEE80211_IFCC_SSID);
-		if (res) {
-			printk(KERN_DEBUG "%s: Failed to config new SSID to "
-			       "the low-level driver\n", sdata->dev->name);
-			return res;
-		}
 	}
 
 	if (len)
diff --git a/net/mac80211/wext.c b/net/mac80211/wext.c
index 231cab57351f..63f36e9d1af8 100644
--- a/net/mac80211/wext.c
+++ b/net/mac80211/wext.c
@@ -407,13 +407,6 @@ static int ieee80211_ioctl_siwessid(struct net_device *dev,
 		return 0;
 	}
 
-	if (sdata->vif.type == NL80211_IFTYPE_AP) {
-		memcpy(sdata->u.ap.ssid, ssid, len);
-		memset(sdata->u.ap.ssid + len, 0,
-		       IEEE80211_MAX_SSID_LEN - len);
-		sdata->u.ap.ssid_len = len;
-		return ieee80211_if_config(sdata, IEEE80211_IFCC_SSID);
-	}
 	return -EOPNOTSUPP;
 }
 
@@ -437,15 +430,6 @@ static int ieee80211_ioctl_giwessid(struct net_device *dev,
 		return res;
 	}
 
-	if (sdata->vif.type == NL80211_IFTYPE_AP) {
-		len = sdata->u.ap.ssid_len;
-		if (len > IW_ESSID_MAX_SIZE)
-			len = IW_ESSID_MAX_SIZE;
-		memcpy(ssid, sdata->u.ap.ssid, len);
-		data->length = len;
-		data->flags = 1;
-		return 0;
-	}
 	return -EOPNOTSUPP;
 }
 
-- 
cgit v1.2.3


From 8469cdef1f123e2e3e56645f1ac26c7cfb333d9c Mon Sep 17 00:00:00 2001
From: Sujith <Sujith.Manoharan@atheros.com>
Date: Wed, 29 Oct 2008 10:19:28 +0530
Subject: mac80211: Add a new event in ieee80211_ampdu_mlme_action

Send a notification to the driver on succesful
reception of an ADDBA response, add IEEE80211_AMPDU_TX_RESUME
for this purpose.

Signed-off-by: Sujith <Sujith.Manoharan@atheros.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 drivers/net/wireless/ath9k/core.h |  1 +
 drivers/net/wireless/ath9k/main.c |  3 +++
 drivers/net/wireless/ath9k/xmit.c | 19 +++++++++++++++++++
 include/net/mac80211.h            |  2 ++
 net/mac80211/ht.c                 | 10 +++++++++-
 5 files changed, 34 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/drivers/net/wireless/ath9k/core.h b/drivers/net/wireless/ath9k/core.h
index 5b17e88ab9a9..69e8d3e41131 100644
--- a/drivers/net/wireless/ath9k/core.h
+++ b/drivers/net/wireless/ath9k/core.h
@@ -581,6 +581,7 @@ void ath_tx_aggr_teardown(struct ath_softc *sc,
 int ath_tx_aggr_start(struct ath_softc *sc, struct ieee80211_sta *sta,
 		      u16 tid, u16 *ssn);
 int ath_tx_aggr_stop(struct ath_softc *sc, struct ieee80211_sta *sta, u16 tid);
+void ath_tx_aggr_resume(struct ath_softc *sc, struct ieee80211_sta *sta, u16 tid);
 void ath_newassoc(struct ath_softc *sc,
 	struct ath_node *node, int isnew, int isuapsd);
 void ath_node_attach(struct ath_softc *sc, struct ieee80211_sta *sta);
diff --git a/drivers/net/wireless/ath9k/main.c b/drivers/net/wireless/ath9k/main.c
index 65a532e08ecd..fb50aa0fc996 100644
--- a/drivers/net/wireless/ath9k/main.c
+++ b/drivers/net/wireless/ath9k/main.c
@@ -1482,6 +1482,9 @@ static int ath9k_ampdu_action(struct ieee80211_hw *hw,
 
 		ieee80211_stop_tx_ba_cb_irqsafe(hw, sta->addr, tid);
 		break;
+	case IEEE80211_AMPDU_TX_RESUME:
+		ath_tx_aggr_resume(sc, sta, tid);
+		break;
 	default:
 		DPRINTF(sc, ATH_DBG_FATAL,
 			"%s: Unknown AMPDU action\n", __func__);
diff --git a/drivers/net/wireless/ath9k/xmit.c b/drivers/net/wireless/ath9k/xmit.c
index 7e6f4e59a5d1..fe386b6dadac 100644
--- a/drivers/net/wireless/ath9k/xmit.c
+++ b/drivers/net/wireless/ath9k/xmit.c
@@ -2371,6 +2371,25 @@ int ath_tx_aggr_stop(struct ath_softc *sc, struct ieee80211_sta *sta, u16 tid)
 	return 0;
 }
 
+/* Resume tx aggregation */
+
+void ath_tx_aggr_resume(struct ath_softc *sc, struct ieee80211_sta *sta, u16 tid)
+{
+	struct ath_atx_tid *txtid;
+	struct ath_node *an;
+
+	an = (struct ath_node *)sta->drv_priv;
+
+	if (sc->sc_flags & SC_OP_TXAGGR) {
+		txtid = ATH_AN_2_TID(an, tid);
+		txtid->baw_size =
+			IEEE80211_MIN_AMPDU_BUF << sta->ht_cap.ampdu_factor;
+		txtid->state |= AGGR_ADDBA_COMPLETE;
+		txtid->state &= ~AGGR_ADDBA_PROGRESS;
+		ath_tx_resume_tid(sc, txtid);
+	}
+}
+
 /*
  * Performs transmit side cleanup when TID changes from aggregated to
  * unaggregated.
diff --git a/include/net/mac80211.h b/include/net/mac80211.h
index af2ec6f9beb9..53856003aa18 100644
--- a/include/net/mac80211.h
+++ b/include/net/mac80211.h
@@ -1127,12 +1127,14 @@ enum ieee80211_filter_flags {
  * @IEEE80211_AMPDU_RX_STOP: stop Rx aggregation
  * @IEEE80211_AMPDU_TX_START: start Tx aggregation
  * @IEEE80211_AMPDU_TX_STOP: stop Tx aggregation
+ * @IEEE80211_AMPDU_TX_RESUME: resume TX aggregation
  */
 enum ieee80211_ampdu_mlme_action {
 	IEEE80211_AMPDU_RX_START,
 	IEEE80211_AMPDU_RX_STOP,
 	IEEE80211_AMPDU_TX_START,
 	IEEE80211_AMPDU_TX_STOP,
+	IEEE80211_AMPDU_TX_RESUME,
 };
 
 /**
diff --git a/net/mac80211/ht.c b/net/mac80211/ht.c
index 08009d4b7d6e..3e231d756776 100644
--- a/net/mac80211/ht.c
+++ b/net/mac80211/ht.c
@@ -987,7 +987,7 @@ void ieee80211_process_addba_resp(struct ieee80211_local *local,
 {
 	struct ieee80211_hw *hw = &local->hw;
 	u16 capab;
-	u16 tid;
+	u16 tid, start_seq_num;
 	u8 *state;
 
 	capab = le16_to_cpu(mgmt->u.action.u.addba_resp.capab);
@@ -1024,6 +1024,14 @@ void ieee80211_process_addba_resp(struct ieee80211_local *local,
 		    local->hw.ampdu_queues)
 			ieee80211_wake_queue(hw, sta->tid_to_tx_q[tid]);
 
+		if (local->ops->ampdu_action) {
+			(void)local->ops->ampdu_action(hw,
+					       IEEE80211_AMPDU_TX_RESUME,
+					       &sta->sta, tid, &start_seq_num);
+		}
+#ifdef CONFIG_MAC80211_HT_DEBUG
+		printk(KERN_DEBUG "Resuming TX aggregation for tid %d\n", tid);
+#endif /* CONFIG_MAC80211_HT_DEBUG */
 		spin_unlock_bh(&sta->lock);
 	} else {
 		sta->ampdu_mlme.addba_req_num[tid]++;
-- 
cgit v1.2.3


From bd815252720e4b667d9946d050d003ec89bda099 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes@sipsolutions.net>
Date: Wed, 29 Oct 2008 20:00:45 +0100
Subject: wireless: implement basic rate helper function

This adds a helper function that, given a bitmap of basic
rates and a bitrate returns the response rate for this rate.

Signed-off-by: Johannes Berg <johannes@sipsolutions.net>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 include/net/wireless.h | 16 ++++++++++++++++
 net/wireless/util.c    | 19 +++++++++++++++++++
 2 files changed, 35 insertions(+)

(limited to 'include')

diff --git a/include/net/wireless.h b/include/net/wireless.h
index 41294c5f6f8f..17d4b582cf34 100644
--- a/include/net/wireless.h
+++ b/include/net/wireless.h
@@ -340,6 +340,22 @@ ieee80211_get_channel(struct wiphy *wiphy, int freq)
 	return __ieee80211_get_channel(wiphy, freq);
 }
 
+/**
+ * ieee80211_get_response_rate - get basic rate for a given rate
+ *
+ * @sband: the band to look for rates in
+ * @basic_rates: bitmap of basic rates
+ * @bitrate: the bitrate for which to find the basic rate
+ *
+ * This function returns the basic rate corresponding to a given
+ * bitrate, that is the next lower bitrate contained in the basic
+ * rate map, which is, for this function, given as a bitmap of
+ * indices of rates in the band's bitrate table.
+ */
+struct ieee80211_rate *
+ieee80211_get_response_rate(struct ieee80211_supported_band *sband,
+			    u64 basic_rates, int bitrate);
+
 /**
  * regulatory_hint - driver hint to the wireless core a regulatory domain
  * @wiphy: the wireless device giving the hint (used only for reporting
diff --git a/net/wireless/util.c b/net/wireless/util.c
index f54424693a38..e76cc28b0345 100644
--- a/net/wireless/util.c
+++ b/net/wireless/util.c
@@ -7,6 +7,25 @@
 #include <asm/bitops.h>
 #include "core.h"
 
+struct ieee80211_rate *
+ieee80211_get_response_rate(struct ieee80211_supported_band *sband,
+			    u64 basic_rates, int bitrate)
+{
+	struct ieee80211_rate *result = &sband->bitrates[0];
+	int i;
+
+	for (i = 0; i < sband->n_bitrates; i++) {
+		if (!(basic_rates & BIT(i)))
+			continue;
+		if (sband->bitrates[i].bitrate > bitrate)
+			continue;
+		result = &sband->bitrates[i];
+	}
+
+	return result;
+}
+EXPORT_SYMBOL(ieee80211_get_response_rate);
+
 int ieee80211_channel_to_frequency(int chan)
 {
 	if (chan < 14)
-- 
cgit v1.2.3


From 90c97a040d6b08cc4890328aa262fdc37336ab01 Mon Sep 17 00:00:00 2001
From: Jouni Malinen <j@w1.fi>
Date: Thu, 30 Oct 2008 16:59:22 +0200
Subject: nl80211: Add basic rate configuration for AP mode

Add a new attribute, NL80211_ATTR_BSS_BASIC_RATES, that can be used with
NL80211_CMD_SET_BSS for userspace (e.g., hostapd) to set which rates are
in the basic rate set.

Signed-off-by: Jouni Malinen <jouni.malinen@atheros.com>
Acked-by: Johannes Berg <johannes@sipsolutions.net>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 include/linux/nl80211.h |  6 ++++++
 include/net/cfg80211.h  |  5 +++++
 net/mac80211/cfg.c      | 18 ++++++++++++++++++
 net/wireless/nl80211.c  |  8 ++++++++
 4 files changed, 37 insertions(+)

(limited to 'include')

diff --git a/include/linux/nl80211.h b/include/linux/nl80211.h
index e4cc7869b22f..5009809588c0 100644
--- a/include/linux/nl80211.h
+++ b/include/linux/nl80211.h
@@ -243,6 +243,9 @@ enum nl80211_commands {
  *	(u8, 0 or 1)
  * @NL80211_ATTR_BSS_SHORT_SLOT_TIME: whether short slot time enabled
  *	(u8, 0 or 1)
+ * @NL80211_ATTR_BSS_BASIC_RATES: basic rates, array of basic
+ *	rates in format defined by IEEE 802.11 7.3.2.2 but without the length
+ *	restriction (at most %NL80211_MAX_SUPP_RATES).
  *
  * @NL80211_ATTR_HT_CAPABILITY: HT Capability information element (from
  *	association request when used with NL80211_CMD_NEW_STATION)
@@ -307,6 +310,8 @@ enum nl80211_attrs {
 
 	NL80211_ATTR_MESH_PARAMS,
 
+	NL80211_ATTR_BSS_BASIC_RATES,
+
 	/* add attributes here, update the policy in nl80211.c */
 
 	__NL80211_ATTR_AFTER_LAST,
@@ -318,6 +323,7 @@ enum nl80211_attrs {
  * here
  */
 #define NL80211_ATTR_HT_CAPABILITY NL80211_ATTR_HT_CAPABILITY
+#define NL80211_ATTR_BSS_BASIC_RATES NL80211_ATTR_BSS_BASIC_RATES
 
 #define NL80211_MAX_SUPP_RATES			32
 #define NL80211_MAX_SUPP_REG_RULES		32
diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h
index 03e1e88c6a09..7caf3c76a12f 100644
--- a/include/net/cfg80211.h
+++ b/include/net/cfg80211.h
@@ -280,11 +280,16 @@ struct mpath_info {
  *	(0 = no, 1 = yes, -1 = do not change)
  * @use_short_slot_time: Whether the use of short slot time is allowed
  *	(0 = no, 1 = yes, -1 = do not change)
+ * @basic_rates: basic rates in IEEE 802.11 format
+ *	(or NULL for no change)
+ * @basic_rates_len: number of basic rates
  */
 struct bss_parameters {
 	int use_cts_prot;
 	int use_short_preamble;
 	int use_short_slot_time;
+	u8 *basic_rates;
+	u8 basic_rates_len;
 };
 
 /**
diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c
index 91f56a48e2b4..442a4d7b1808 100644
--- a/net/mac80211/cfg.c
+++ b/net/mac80211/cfg.c
@@ -1046,6 +1046,24 @@ static int ieee80211_change_bss(struct wiphy *wiphy,
 		changed |= BSS_CHANGED_ERP_SLOT;
 	}
 
+	if (params->basic_rates) {
+		int i, j;
+		u32 rates = 0;
+		struct ieee80211_local *local = wiphy_priv(wiphy);
+		struct ieee80211_supported_band *sband =
+			wiphy->bands[local->oper_channel->band];
+
+		for (i = 0; i < params->basic_rates_len; i++) {
+			int rate = (params->basic_rates[i] & 0x7f) * 5;
+			for (j = 0; j < sband->n_bitrates; j++) {
+				if (sband->bitrates[j].bitrate == rate)
+					rates |= BIT(j);
+			}
+		}
+		sdata->vif.bss_conf.basic_rates = rates;
+		changed |= BSS_CHANGED_BASIC_RATES;
+	}
+
 	ieee80211_bss_info_change_notify(sdata, changed);
 
 	return 0;
diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c
index 5e1d658a8b5a..1ea5e3fd3931 100644
--- a/net/wireless/nl80211.c
+++ b/net/wireless/nl80211.c
@@ -95,6 +95,8 @@ static struct nla_policy nl80211_policy[NL80211_ATTR_MAX+1] __read_mostly = {
 	[NL80211_ATTR_BSS_CTS_PROT] = { .type = NLA_U8 },
 	[NL80211_ATTR_BSS_SHORT_PREAMBLE] = { .type = NLA_U8 },
 	[NL80211_ATTR_BSS_SHORT_SLOT_TIME] = { .type = NLA_U8 },
+	[NL80211_ATTR_BSS_BASIC_RATES] = { .type = NLA_BINARY,
+					   .len = NL80211_MAX_SUPP_RATES },
 
 	[NL80211_ATTR_MESH_PARAMS] = { .type = NLA_NESTED },
 
@@ -1613,6 +1615,12 @@ static int nl80211_set_bss(struct sk_buff *skb, struct genl_info *info)
 	if (info->attrs[NL80211_ATTR_BSS_SHORT_SLOT_TIME])
 		params.use_short_slot_time =
 		    nla_get_u8(info->attrs[NL80211_ATTR_BSS_SHORT_SLOT_TIME]);
+	if (info->attrs[NL80211_ATTR_BSS_BASIC_RATES]) {
+		params.basic_rates =
+			nla_data(info->attrs[NL80211_ATTR_BSS_BASIC_RATES]);
+		params.basic_rates_len =
+			nla_len(info->attrs[NL80211_ATTR_BSS_BASIC_RATES]);
+	}
 
 	err = get_drv_dev_by_info_ifindex(info->attrs, &drv, &dev);
 	if (err)
-- 
cgit v1.2.3


From 318884875bdddca663ecc373c813cf8e117d9e43 Mon Sep 17 00:00:00 2001
From: Jouni Malinen <j@w1.fi>
Date: Thu, 30 Oct 2008 16:59:24 +0200
Subject: nl80211: Add TX queue parameter configuration

Add a new attribute, NL80211_ATTR_WIPHY_TXQ_PARAMS, that can be used with
NL80211_CMD_SET_WIPHY for userspace (e.g., hostapd) to set TX queue
parameters (txop, cwmin, cwmax, aifs).

Signed-off-by: Jouni Malinen <jouni.malinen@atheros.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 include/linux/nl80211.h | 43 +++++++++++++++++++++++++++++--
 include/net/cfg80211.h  | 23 +++++++++++++++++
 net/mac80211/cfg.c      | 25 ++++++++++++++++++
 net/wireless/nl80211.c  | 67 +++++++++++++++++++++++++++++++++++++++++++++----
 4 files changed, 151 insertions(+), 7 deletions(-)

(limited to 'include')

diff --git a/include/linux/nl80211.h b/include/linux/nl80211.h
index 5009809588c0..79827345351d 100644
--- a/include/linux/nl80211.h
+++ b/include/linux/nl80211.h
@@ -25,8 +25,9 @@
  *
  * @NL80211_CMD_GET_WIPHY: request information about a wiphy or dump request
  *	to get a list of all present wiphys.
- * @NL80211_CMD_SET_WIPHY: set wiphy name, needs %NL80211_ATTR_WIPHY and
- *	%NL80211_ATTR_WIPHY_NAME.
+ * @NL80211_CMD_SET_WIPHY: set wiphy parameters, needs %NL80211_ATTR_WIPHY or
+ *	%NL80211_ATTR_IFINDEX; can be used to set %NL80211_ATTR_WIPHY_NAME
+ *	and/or %NL80211_ATTR_WIPHY_TXQ_PARAMS.
  * @NL80211_CMD_NEW_WIPHY: Newly created wiphy, response to get request
  *	or rename notification. Has attributes %NL80211_ATTR_WIPHY and
  *	%NL80211_ATTR_WIPHY_NAME.
@@ -178,6 +179,7 @@ enum nl80211_commands {
  * @NL80211_ATTR_WIPHY: index of wiphy to operate on, cf.
  *	/sys/class/ieee80211/<phyname>/index
  * @NL80211_ATTR_WIPHY_NAME: wiphy name (used for renaming)
+ * @NL80211_ATTR_WIPHY_TXQ_PARAMS: a nested array of TX queue parameters
  *
  * @NL80211_ATTR_IFINDEX: network interface index of the device to operate on
  * @NL80211_ATTR_IFNAME: network interface name
@@ -312,6 +314,8 @@ enum nl80211_attrs {
 
 	NL80211_ATTR_BSS_BASIC_RATES,
 
+	NL80211_ATTR_WIPHY_TXQ_PARAMS,
+
 	/* add attributes here, update the policy in nl80211.c */
 
 	__NL80211_ATTR_AFTER_LAST,
@@ -324,6 +328,7 @@ enum nl80211_attrs {
  */
 #define NL80211_ATTR_HT_CAPABILITY NL80211_ATTR_HT_CAPABILITY
 #define NL80211_ATTR_BSS_BASIC_RATES NL80211_ATTR_BSS_BASIC_RATES
+#define NL80211_ATTR_WIPHY_TXQ_PARAMS NL80211_ATTR_WIPHY_TXQ_PARAMS
 
 #define NL80211_MAX_SUPP_RATES			32
 #define NL80211_MAX_SUPP_REG_RULES		32
@@ -698,4 +703,38 @@ enum nl80211_meshconf_params {
 	NL80211_MESHCONF_ATTR_MAX = __NL80211_MESHCONF_ATTR_AFTER_LAST - 1
 };
 
+/**
+ * enum nl80211_txq_attr - TX queue parameter attributes
+ * @__NL80211_TXQ_ATTR_INVALID: Attribute number 0 is reserved
+ * @NL80211_TXQ_ATTR_QUEUE: TX queue identifier (NL80211_TXQ_Q_*)
+ * @NL80211_TXQ_ATTR_TXOP: Maximum burst time in units of 32 usecs, 0 meaning
+ *	disabled
+ * @NL80211_TXQ_ATTR_CWMIN: Minimum contention window [a value of the form
+ *	2^n-1 in the range 1..32767]
+ * @NL80211_TXQ_ATTR_CWMAX: Maximum contention window [a value of the form
+ *	2^n-1 in the range 1..32767]
+ * @NL80211_TXQ_ATTR_AIFS: Arbitration interframe space [0..255]
+ * @__NL80211_TXQ_ATTR_AFTER_LAST: Internal
+ * @NL80211_TXQ_ATTR_MAX: Maximum TXQ attribute number
+ */
+enum nl80211_txq_attr {
+	__NL80211_TXQ_ATTR_INVALID,
+	NL80211_TXQ_ATTR_QUEUE,
+	NL80211_TXQ_ATTR_TXOP,
+	NL80211_TXQ_ATTR_CWMIN,
+	NL80211_TXQ_ATTR_CWMAX,
+	NL80211_TXQ_ATTR_AIFS,
+
+	/* keep last */
+	__NL80211_TXQ_ATTR_AFTER_LAST,
+	NL80211_TXQ_ATTR_MAX = __NL80211_TXQ_ATTR_AFTER_LAST - 1
+};
+
+enum nl80211_txq_q {
+	NL80211_TXQ_Q_VO,
+	NL80211_TXQ_Q_VI,
+	NL80211_TXQ_Q_BE,
+	NL80211_TXQ_Q_BK
+};
+
 #endif /* __LINUX_NL80211_H */
diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h
index 7caf3c76a12f..448023193e55 100644
--- a/include/net/cfg80211.h
+++ b/include/net/cfg80211.h
@@ -371,6 +371,24 @@ struct mesh_config {
 	u16 dot11MeshHWMPnetDiameterTraversalTime;
 };
 
+/**
+ * struct ieee80211_txq_params - TX queue parameters
+ * @queue: TX queue identifier (NL80211_TXQ_Q_*)
+ * @txop: Maximum burst time in units of 32 usecs, 0 meaning disabled
+ * @cwmin: Minimum contention window [a value of the form 2^n-1 in the range
+ *	1..32767]
+ * @cwmax: Maximum contention window [a value of the form 2^n-1 in the range
+ *	1..32767]
+ * @aifs: Arbitration interframe space [0..255]
+ */
+struct ieee80211_txq_params {
+	enum nl80211_txq_q queue;
+	u16 txop;
+	u16 cwmin;
+	u16 cwmax;
+	u8 aifs;
+};
+
 /* from net/wireless.h */
 struct wiphy;
 
@@ -430,6 +448,8 @@ struct wiphy;
  * @set_mesh_cfg: set mesh parameters (by now, just mesh id)
  *
  * @change_bss: Modify parameters for a given BSS.
+ *
+ * @set_txq_params: Set TX queue parameters
  */
 struct cfg80211_ops {
 	int	(*add_virtual_intf)(struct wiphy *wiphy, char *name,
@@ -490,6 +510,9 @@ struct cfg80211_ops {
 				const struct mesh_config *nconf, u32 mask);
 	int	(*change_bss)(struct wiphy *wiphy, struct net_device *dev,
 			      struct bss_parameters *params);
+
+	int	(*set_txq_params)(struct wiphy *wiphy,
+				  struct ieee80211_txq_params *params);
 };
 
 #endif /* __NET_CFG80211_H */
diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c
index 442a4d7b1808..fe672faa819d 100644
--- a/net/mac80211/cfg.c
+++ b/net/mac80211/cfg.c
@@ -1069,6 +1069,30 @@ static int ieee80211_change_bss(struct wiphy *wiphy,
 	return 0;
 }
 
+static int ieee80211_set_txq_params(struct wiphy *wiphy,
+				    struct ieee80211_txq_params *params)
+{
+	struct ieee80211_local *local = wiphy_priv(wiphy);
+	struct ieee80211_tx_queue_params p;
+
+	if (!local->ops->conf_tx)
+		return -EOPNOTSUPP;
+
+	memset(&p, 0, sizeof(p));
+	p.aifs = params->aifs;
+	p.cw_max = params->cwmax;
+	p.cw_min = params->cwmin;
+	p.txop = params->txop;
+	if (local->ops->conf_tx(local_to_hw(local), params->queue, &p)) {
+		printk(KERN_DEBUG "%s: failed to set TX queue "
+		       "parameters for queue %d\n", local->mdev->name,
+		       params->queue);
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
 struct cfg80211_ops mac80211_config_ops = {
 	.add_virtual_intf = ieee80211_add_iface,
 	.del_virtual_intf = ieee80211_del_iface,
@@ -1095,4 +1119,5 @@ struct cfg80211_ops mac80211_config_ops = {
 	.get_mesh_params = ieee80211_get_mesh_params,
 #endif
 	.change_bss = ieee80211_change_bss,
+	.set_txq_params = ieee80211_set_txq_params,
 };
diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c
index 1ea5e3fd3931..e3e1494e769a 100644
--- a/net/wireless/nl80211.c
+++ b/net/wireless/nl80211.c
@@ -58,6 +58,7 @@ static struct nla_policy nl80211_policy[NL80211_ATTR_MAX+1] __read_mostly = {
 	[NL80211_ATTR_WIPHY] = { .type = NLA_U32 },
 	[NL80211_ATTR_WIPHY_NAME] = { .type = NLA_NUL_STRING,
 				      .len = BUS_ID_SIZE-1 },
+	[NL80211_ATTR_WIPHY_TXQ_PARAMS] = { .type = NLA_NESTED },
 
 	[NL80211_ATTR_IFTYPE] = { .type = NLA_U32 },
 	[NL80211_ATTR_IFINDEX] = { .type = NLA_U32 },
@@ -286,20 +287,76 @@ static int nl80211_get_wiphy(struct sk_buff *skb, struct genl_info *info)
 	return -ENOBUFS;
 }
 
+static const struct nla_policy txq_params_policy[NL80211_TXQ_ATTR_MAX + 1] = {
+	[NL80211_TXQ_ATTR_QUEUE]		= { .type = NLA_U8 },
+	[NL80211_TXQ_ATTR_TXOP]			= { .type = NLA_U16 },
+	[NL80211_TXQ_ATTR_CWMIN]		= { .type = NLA_U16 },
+	[NL80211_TXQ_ATTR_CWMAX]		= { .type = NLA_U16 },
+	[NL80211_TXQ_ATTR_AIFS]			= { .type = NLA_U8 },
+};
+
+static int parse_txq_params(struct nlattr *tb[],
+			    struct ieee80211_txq_params *txq_params)
+{
+	if (!tb[NL80211_TXQ_ATTR_QUEUE] || !tb[NL80211_TXQ_ATTR_TXOP] ||
+	    !tb[NL80211_TXQ_ATTR_CWMIN] || !tb[NL80211_TXQ_ATTR_CWMAX] ||
+	    !tb[NL80211_TXQ_ATTR_AIFS])
+		return -EINVAL;
+
+	txq_params->queue = nla_get_u8(tb[NL80211_TXQ_ATTR_QUEUE]);
+	txq_params->txop = nla_get_u16(tb[NL80211_TXQ_ATTR_TXOP]);
+	txq_params->cwmin = nla_get_u16(tb[NL80211_TXQ_ATTR_CWMIN]);
+	txq_params->cwmax = nla_get_u16(tb[NL80211_TXQ_ATTR_CWMAX]);
+	txq_params->aifs = nla_get_u8(tb[NL80211_TXQ_ATTR_AIFS]);
+
+	return 0;
+}
+
 static int nl80211_set_wiphy(struct sk_buff *skb, struct genl_info *info)
 {
 	struct cfg80211_registered_device *rdev;
-	int result;
-
-	if (!info->attrs[NL80211_ATTR_WIPHY_NAME])
-		return -EINVAL;
+	int result = 0, rem_txq_params = 0;
+	struct nlattr *nl_txq_params;
 
 	rdev = cfg80211_get_dev_from_info(info);
 	if (IS_ERR(rdev))
 		return PTR_ERR(rdev);
 
-	result = cfg80211_dev_rename(rdev, nla_data(info->attrs[NL80211_ATTR_WIPHY_NAME]));
+	if (info->attrs[NL80211_ATTR_WIPHY_NAME]) {
+		result = cfg80211_dev_rename(
+			rdev, nla_data(info->attrs[NL80211_ATTR_WIPHY_NAME]));
+		if (result)
+			goto bad_res;
+	}
+
+	if (info->attrs[NL80211_ATTR_WIPHY_TXQ_PARAMS]) {
+		struct ieee80211_txq_params txq_params;
+		struct nlattr *tb[NL80211_TXQ_ATTR_MAX + 1];
+
+		if (!rdev->ops->set_txq_params) {
+			result = -EOPNOTSUPP;
+			goto bad_res;
+		}
+
+		nla_for_each_nested(nl_txq_params,
+				    info->attrs[NL80211_ATTR_WIPHY_TXQ_PARAMS],
+				    rem_txq_params) {
+			nla_parse(tb, NL80211_TXQ_ATTR_MAX,
+				  nla_data(nl_txq_params),
+				  nla_len(nl_txq_params),
+				  txq_params_policy);
+			result = parse_txq_params(tb, &txq_params);
+			if (result)
+				goto bad_res;
+
+			result = rdev->ops->set_txq_params(&rdev->wiphy,
+							   &txq_params);
+			if (result)
+				goto bad_res;
+		}
+	}
 
+bad_res:
 	cfg80211_put_dev(rdev);
 	return result;
 }
-- 
cgit v1.2.3


From fc6971d491517ba15e800540ff88caa55dc65b01 Mon Sep 17 00:00:00 2001
From: Jouni Malinen <j@w1.fi>
Date: Thu, 30 Oct 2008 19:59:05 +0200
Subject: mac80211_hwsim: Add support for client PS mode

This introduces a debugfs file (ieee80211/phy#/hwsim/ps) that can be
used to force a simulated radio into power save mode. Following values
can be written into this file to change PS mode:
0 = power save disabled (constantly awake)
1 = power save enabled (drop all frames; do not send PS-Poll)
2 = power save enabled (send PS-Poll frames automatically to receive
    buffered unicast frames); not yet fully implemented
3 = manual PS-Poll trigger (send a single PS-Poll frame)

Two different behavior for power save mode processing can be tested:
- move between modes 1 and 0 (i.e., receive all buffered frames at a
  time)
- move to mode 1 and use manual PS-Poll frames (write 3 to the 'ps'
  debugfs file) to fetch power save buffered frames one at a time

Mode 2 (automatic PS-Poll) does not yet parse Beacon frames, but
eventually, it should take a look at TIM IE and send PS-Poll if a
traffic bit is set for our AID.

Signed-off-by: Jouni Malinen <jouni.malinen@atheros.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 drivers/net/wireless/mac80211_hwsim.c | 184 ++++++++++++++++++++++++++++++++++
 include/linux/ieee80211.h             |   7 ++
 2 files changed, 191 insertions(+)

(limited to 'include')

diff --git a/drivers/net/wireless/mac80211_hwsim.c b/drivers/net/wireless/mac80211_hwsim.c
index bc9da9393760..b9230da925ee 100644
--- a/drivers/net/wireless/mac80211_hwsim.c
+++ b/drivers/net/wireless/mac80211_hwsim.c
@@ -21,6 +21,7 @@
 #include <linux/if_arp.h>
 #include <linux/rtnetlink.h>
 #include <linux/etherdevice.h>
+#include <linux/debugfs.h>
 
 MODULE_AUTHOR("Jouni Malinen");
 MODULE_DESCRIPTION("Software simulator of 802.11 radio(s) for mac80211");
@@ -32,6 +33,9 @@ MODULE_PARM_DESC(radios, "Number of simulated radios");
 
 struct hwsim_vif_priv {
 	u32 magic;
+	u8 bssid[ETH_ALEN];
+	bool assoc;
+	u16 aid;
 };
 
 #define HWSIM_VIF_MAGIC	0x69537748
@@ -132,6 +136,12 @@ struct mac80211_hwsim_data {
 	unsigned int rx_filter;
 	int started;
 	struct timer_list beacon_timer;
+	enum ps_mode {
+		PS_DISABLED, PS_ENABLED, PS_AUTO_POLL, PS_MANUAL_POLL
+	} ps;
+	bool ps_poll_pending;
+	struct dentry *debugfs;
+	struct dentry *debugfs_ps;
 };
 
 
@@ -196,6 +206,34 @@ static void mac80211_hwsim_monitor_rx(struct ieee80211_hw *hw,
 }
 
 
+static bool hwsim_ps_rx_ok(struct mac80211_hwsim_data *data,
+			   struct sk_buff *skb)
+{
+	switch (data->ps) {
+	case PS_DISABLED:
+		return true;
+	case PS_ENABLED:
+		return false;
+	case PS_AUTO_POLL:
+		/* TODO: accept (some) Beacons by default and other frames only
+		 * if pending PS-Poll has been sent */
+		return true;
+	case PS_MANUAL_POLL:
+		/* Allow unicast frames to own address if there is a pending
+		 * PS-Poll */
+		if (data->ps_poll_pending &&
+		    memcmp(data->hw->wiphy->perm_addr, skb->data + 4,
+			   ETH_ALEN) == 0) {
+			data->ps_poll_pending = false;
+			return true;
+		}
+		return false;
+	}
+
+	return true;
+}
+
+
 static bool mac80211_hwsim_tx_frame(struct ieee80211_hw *hw,
 				    struct sk_buff *skb)
 {
@@ -212,6 +250,9 @@ static bool mac80211_hwsim_tx_frame(struct ieee80211_hw *hw,
 	rx_status.rate_idx = info->control.rates[0].idx;
 	/* TODO: simulate signal strength (and optional packet drop) */
 
+	if (data->ps != PS_DISABLED)
+		hdr->frame_control |= cpu_to_le16(IEEE80211_FCTL_PM);
+
 	/* Copy skb to all enabled radios that are on the current frequency */
 	spin_lock(&hwsim_radio_lock);
 	list_for_each_entry(data2, &hwsim_radios, list) {
@@ -221,6 +262,7 @@ static bool mac80211_hwsim_tx_frame(struct ieee80211_hw *hw,
 			continue;
 
 		if (!data2->started || !data2->radio_enabled ||
+		    !hwsim_ps_rx_ok(data2, skb) ||
 		    data->channel->center_freq != data2->channel->center_freq)
 			continue;
 
@@ -404,7 +446,16 @@ static int mac80211_hwsim_config_interface(struct ieee80211_hw *hw,
 					   struct ieee80211_vif *vif,
 					   struct ieee80211_if_conf *conf)
 {
+	struct hwsim_vif_priv *vp = (void *)vif->drv_priv;
+
 	hwsim_check_magic(vif);
+	if (conf->changed & IEEE80211_IFCC_BSSID) {
+		DECLARE_MAC_BUF(mac);
+		printk(KERN_DEBUG "%s:%s: BSSID changed: %s\n",
+		       wiphy_name(hw->wiphy), __func__,
+		       print_mac(mac, conf->bssid));
+		memcpy(vp->bssid, conf->bssid, ETH_ALEN);
+	}
 	return 0;
 }
 
@@ -413,6 +464,8 @@ static void mac80211_hwsim_bss_info_changed(struct ieee80211_hw *hw,
 					    struct ieee80211_bss_conf *info,
 					    u32 changed)
 {
+	struct hwsim_vif_priv *vp = (void *)vif->drv_priv;
+
 	hwsim_check_magic(vif);
 
 	printk(KERN_DEBUG "%s:%s(changed=0x%x)\n",
@@ -421,6 +474,8 @@ static void mac80211_hwsim_bss_info_changed(struct ieee80211_hw *hw,
 	if (changed & BSS_CHANGED_ASSOC) {
 		printk(KERN_DEBUG "  %s: ASSOC: assoc=%d aid=%d\n",
 		       wiphy_name(hw->wiphy), info->assoc, info->aid);
+		vp->assoc = info->assoc;
+		vp->aid = info->aid;
 	}
 
 	if (changed & BSS_CHANGED_ERP_CTS_PROT) {
@@ -518,6 +573,8 @@ static void mac80211_hwsim_free(void)
 	spin_unlock_bh(&hwsim_radio_lock);
 
 	list_for_each_entry(data, &tmplist, list) {
+		debugfs_remove(data->debugfs_ps);
+		debugfs_remove(data->debugfs);
 		ieee80211_unregister_hw(data->hw);
 		device_unregister(data->dev);
 		ieee80211_free_hw(data->hw);
@@ -543,6 +600,127 @@ static void hwsim_mon_setup(struct net_device *dev)
 }
 
 
+static void hwsim_send_ps_poll(void *dat, u8 *mac, struct ieee80211_vif *vif)
+{
+	struct mac80211_hwsim_data *data = dat;
+	struct hwsim_vif_priv *vp = (void *)vif->drv_priv;
+	DECLARE_MAC_BUF(buf);
+	struct sk_buff *skb;
+	struct ieee80211_pspoll *pspoll;
+
+	if (!vp->assoc)
+		return;
+
+	printk(KERN_DEBUG "%s:%s: send PS-Poll to %s for aid %d\n",
+	       wiphy_name(data->hw->wiphy), __func__,
+	       print_mac(buf, vp->bssid), vp->aid);
+
+	skb = dev_alloc_skb(sizeof(*pspoll));
+	if (!skb)
+		return;
+	pspoll = (void *) skb_put(skb, sizeof(*pspoll));
+	pspoll->frame_control = cpu_to_le16(IEEE80211_FTYPE_CTL |
+					    IEEE80211_STYPE_PSPOLL |
+					    IEEE80211_FCTL_PM);
+	pspoll->aid = cpu_to_le16(0xc000 | vp->aid);
+	memcpy(pspoll->bssid, vp->bssid, ETH_ALEN);
+	memcpy(pspoll->ta, mac, ETH_ALEN);
+	if (data->radio_enabled &&
+	    !mac80211_hwsim_tx_frame(data->hw, skb))
+		printk(KERN_DEBUG "%s: PS-Poll frame not ack'ed\n", __func__);
+	dev_kfree_skb(skb);
+}
+
+
+static void hwsim_send_nullfunc(struct mac80211_hwsim_data *data, u8 *mac,
+				struct ieee80211_vif *vif, int ps)
+{
+	struct hwsim_vif_priv *vp = (void *)vif->drv_priv;
+	DECLARE_MAC_BUF(buf);
+	struct sk_buff *skb;
+	struct ieee80211_hdr *hdr;
+
+	if (!vp->assoc)
+		return;
+
+	printk(KERN_DEBUG "%s:%s: send data::nullfunc to %s ps=%d\n",
+	       wiphy_name(data->hw->wiphy), __func__,
+	       print_mac(buf, vp->bssid), ps);
+
+	skb = dev_alloc_skb(sizeof(*hdr));
+	if (!skb)
+		return;
+	hdr = (void *) skb_put(skb, sizeof(*hdr) - ETH_ALEN);
+	hdr->frame_control = cpu_to_le16(IEEE80211_FTYPE_DATA |
+					 IEEE80211_STYPE_NULLFUNC |
+					 (ps ? IEEE80211_FCTL_PM : 0));
+	hdr->duration_id = cpu_to_le16(0);
+	memcpy(hdr->addr1, vp->bssid, ETH_ALEN);
+	memcpy(hdr->addr2, mac, ETH_ALEN);
+	memcpy(hdr->addr3, vp->bssid, ETH_ALEN);
+	if (data->radio_enabled &&
+	    !mac80211_hwsim_tx_frame(data->hw, skb))
+		printk(KERN_DEBUG "%s: nullfunc frame not ack'ed\n", __func__);
+	dev_kfree_skb(skb);
+}
+
+
+static void hwsim_send_nullfunc_ps(void *dat, u8 *mac,
+				   struct ieee80211_vif *vif)
+{
+	struct mac80211_hwsim_data *data = dat;
+	hwsim_send_nullfunc(data, mac, vif, 1);
+}
+
+
+static void hwsim_send_nullfunc_no_ps(void *dat, u8 *mac,
+				      struct ieee80211_vif *vif)
+{
+	struct mac80211_hwsim_data *data = dat;
+	hwsim_send_nullfunc(data, mac, vif, 0);
+}
+
+
+static int hwsim_fops_ps_read(void *dat, u64 *val)
+{
+	struct mac80211_hwsim_data *data = dat;
+	*val = data->ps;
+	return 0;
+}
+
+static int hwsim_fops_ps_write(void *dat, u64 val)
+{
+	struct mac80211_hwsim_data *data = dat;
+	enum ps_mode old_ps;
+
+	if (val != PS_DISABLED && val != PS_ENABLED && val != PS_AUTO_POLL &&
+	    val != PS_MANUAL_POLL)
+		return -EINVAL;
+
+	old_ps = data->ps;
+	data->ps = val;
+
+	if (val == PS_MANUAL_POLL) {
+		ieee80211_iterate_active_interfaces(data->hw,
+						    hwsim_send_ps_poll, data);
+		data->ps_poll_pending = true;
+	} else if (old_ps == PS_DISABLED && val != PS_DISABLED) {
+		ieee80211_iterate_active_interfaces(data->hw,
+						    hwsim_send_nullfunc_ps,
+						    data);
+	} else if (old_ps != PS_DISABLED && val == PS_DISABLED) {
+		ieee80211_iterate_active_interfaces(data->hw,
+						    hwsim_send_nullfunc_no_ps,
+						    data);
+	}
+
+	return 0;
+}
+
+DEFINE_SIMPLE_ATTRIBUTE(hwsim_fops_ps, hwsim_fops_ps_read, hwsim_fops_ps_write,
+			"%llu\n");
+
+
 static int __init init_mac80211_hwsim(void)
 {
 	int i, err = 0;
@@ -634,6 +812,12 @@ static int __init init_mac80211_hwsim(void)
 		       wiphy_name(hw->wiphy),
 		       hw->wiphy->perm_addr);
 
+		data->debugfs = debugfs_create_dir("hwsim",
+						   hw->wiphy->debugfsdir);
+		data->debugfs_ps = debugfs_create_file("ps", 0666,
+						       data->debugfs, data,
+						       &hwsim_fops_ps);
+
 		setup_timer(&data->beacon_timer, mac80211_hwsim_beacon,
 			    (unsigned long) hw);
 
diff --git a/include/linux/ieee80211.h b/include/linux/ieee80211.h
index 9dc288b920c8..56b0eb25d927 100644
--- a/include/linux/ieee80211.h
+++ b/include/linux/ieee80211.h
@@ -669,6 +669,13 @@ struct ieee80211_cts {
 	u8 ra[6];
 } __attribute__ ((packed));
 
+struct ieee80211_pspoll {
+	__le16 frame_control;
+	__le16 aid;
+	u8 bssid[6];
+	u8 ta[6];
+} __attribute__ ((packed));
+
 /**
  * struct ieee80211_bar - HT Block Ack Request
  *
-- 
cgit v1.2.3


From b219cee191e7cfe88a695a57249a295d0d5b22e9 Mon Sep 17 00:00:00 2001
From: "Luis R. Rodriguez" <lrodriguez@atheros.com>
Date: Thu, 30 Oct 2008 13:33:55 -0700
Subject: cfg80211: make use of reg macros on REG_RULE

Ensure regulatory converstion macros safely accept
multiple arguments and make REG_RULE() use them.

Signed-off-by: Luis R. Rodriguez <lrodriguez@atheros.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 include/net/cfg80211.h | 22 +++++++++++-----------
 1 file changed, 11 insertions(+), 11 deletions(-)

(limited to 'include')

diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h
index 448023193e55..1d57835d73f2 100644
--- a/include/net/cfg80211.h
+++ b/include/net/cfg80211.h
@@ -336,19 +336,19 @@ struct ieee80211_regdomain {
 	struct ieee80211_reg_rule reg_rules[];
 };
 
-#define MHZ_TO_KHZ(freq) (freq * 1000)
-#define KHZ_TO_MHZ(freq) (freq / 1000)
-#define DBI_TO_MBI(gain) (gain * 100)
-#define MBI_TO_DBI(gain) (gain / 100)
-#define DBM_TO_MBM(gain) (gain * 100)
-#define MBM_TO_DBM(gain) (gain / 100)
+#define MHZ_TO_KHZ(freq) ((freq) * 1000)
+#define KHZ_TO_MHZ(freq) ((freq) / 1000)
+#define DBI_TO_MBI(gain) ((gain) * 100)
+#define MBI_TO_DBI(gain) ((gain) / 100)
+#define DBM_TO_MBM(gain) ((gain) * 100)
+#define MBM_TO_DBM(gain) ((gain) / 100)
 
 #define REG_RULE(start, end, bw, gain, eirp, reg_flags) { \
-	.freq_range.start_freq_khz = (start) * 1000, \
-	.freq_range.end_freq_khz = (end) * 1000, \
-	.freq_range.max_bandwidth_khz = (bw) * 1000, \
-	.power_rule.max_antenna_gain = (gain) * 100, \
-	.power_rule.max_eirp = (eirp) * 100, \
+	.freq_range.start_freq_khz = MHZ_TO_KHZ(start), \
+	.freq_range.end_freq_khz = MHZ_TO_KHZ(end), \
+	.freq_range.max_bandwidth_khz = MHZ_TO_KHZ(bw), \
+	.power_rule.max_antenna_gain = DBI_TO_MBI(gain), \
+	.power_rule.max_eirp = DBM_TO_MBM(eirp), \
 	.flags = reg_flags, \
 	}
 
-- 
cgit v1.2.3


From fb28ad35906af2f042c94e2f9c0f898ef9acfa37 Mon Sep 17 00:00:00 2001
From: Kay Sievers <kay.sievers@vrfy.org>
Date: Mon, 10 Nov 2008 13:55:14 -0800
Subject: net: struct device - replace bus_id with dev_name(), dev_set_name()

Acked-by: Marcel Holtmann <marcel@holtmann.org>
Acked-by: Greg Kroah-Hartman <gregkh@suse.de>
Signed-off-by: Kay Sievers <kay.sievers@vrfy.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/3c59x.c                        | 4 ++--
 drivers/net/defxx.c                        | 2 +-
 drivers/net/enc28j60.c                     | 2 +-
 drivers/net/fec_mpc52xx.c                  | 2 +-
 drivers/net/gianfar.c                      | 2 +-
 drivers/net/mlx4/mlx4_en.h                 | 8 ++++----
 drivers/net/pasemi_mac.c                   | 3 ++-
 drivers/net/phy/mdio_bus.c                 | 4 ++--
 drivers/net/phy/phy.c                      | 2 +-
 drivers/net/phy/phy_device.c               | 4 ++--
 drivers/net/sh_eth.c                       | 2 +-
 drivers/net/tg3.c                          | 4 ++--
 drivers/net/tulip/de4x5.c                  | 4 ++--
 drivers/net/ucc_geth.c                     | 4 ++--
 drivers/net/wireless/libertas/defs.h       | 2 +-
 drivers/net/wireless/orinoco/orinoco.c     | 2 +-
 drivers/net/wireless/orinoco/orinoco_cs.c  | 2 +-
 drivers/net/wireless/orinoco/spectrum_cs.c | 2 +-
 include/net/wireless.h                     | 4 ++--
 net/atm/atm_sysfs.c                        | 2 +-
 net/bluetooth/hci_sysfs.c                  | 7 +++----
 net/core/net-sysfs.c                       | 2 +-
 net/dsa/slave.c                            | 2 +-
 net/rfkill/rfkill.c                        | 5 ++---
 net/wireless/core.c                        | 3 +--
 25 files changed, 39 insertions(+), 41 deletions(-)

(limited to 'include')

diff --git a/drivers/net/3c59x.c b/drivers/net/3c59x.c
index b4168dfd6893..3893f505fb5f 100644
--- a/drivers/net/3c59x.c
+++ b/drivers/net/3c59x.c
@@ -1025,7 +1025,7 @@ static int __devinit vortex_probe1(struct device *gendev,
 		}
 
 		if ((edev = DEVICE_EISA(gendev))) {
-			print_name = edev->dev.bus_id;
+			print_name = dev_name(&edev->dev);
 		}
 	}
 
@@ -2883,7 +2883,7 @@ static void vortex_get_drvinfo(struct net_device *dev,
 		strcpy(info->bus_info, pci_name(VORTEX_PCI(vp)));
 	} else {
 		if (VORTEX_EISA(vp))
-			sprintf(info->bus_info, vp->gendev->bus_id);
+			sprintf(info->bus_info, dev_name(vp->gendev));
 		else
 			sprintf(info->bus_info, "EISA 0x%lx %d",
 					dev->base_addr, dev->irq);
diff --git a/drivers/net/defxx.c b/drivers/net/defxx.c
index f07887435247..6e6583b609f7 100644
--- a/drivers/net/defxx.c
+++ b/drivers/net/defxx.c
@@ -511,7 +511,7 @@ static int __devinit dfx_register(struct device *bdev)
 	int dfx_bus_pci = DFX_BUS_PCI(bdev);
 	int dfx_bus_tc = DFX_BUS_TC(bdev);
 	int dfx_use_mmio = DFX_MMIO || dfx_bus_tc;
-	char *print_name = bdev->bus_id;
+	const char *print_name = dev_name(bdev);
 	struct net_device *dev;
 	DFX_board_t	  *bp;			/* board pointer */
 	resource_size_t bar_start = 0;		/* pointer to port */
diff --git a/drivers/net/enc28j60.c b/drivers/net/enc28j60.c
index d186a52cdb62..32c19790d013 100644
--- a/drivers/net/enc28j60.c
+++ b/drivers/net/enc28j60.c
@@ -1448,7 +1448,7 @@ enc28j60_get_drvinfo(struct net_device *dev, struct ethtool_drvinfo *info)
 	strlcpy(info->driver, DRV_NAME, sizeof(info->driver));
 	strlcpy(info->version, DRV_VERSION, sizeof(info->version));
 	strlcpy(info->bus_info,
-		dev->dev.parent->bus_id, sizeof(info->bus_info));
+		dev_name(dev->dev.parent), sizeof(info->bus_info));
 }
 
 static int
diff --git a/drivers/net/fec_mpc52xx.c b/drivers/net/fec_mpc52xx.c
index 94054b4981f3..cd8e98b45ec5 100644
--- a/drivers/net/fec_mpc52xx.c
+++ b/drivers/net/fec_mpc52xx.c
@@ -216,7 +216,7 @@ static int mpc52xx_fec_init_phy(struct net_device *dev)
 	struct phy_device *phydev;
 	char phy_id[BUS_ID_SIZE];
 
-	snprintf(phy_id, BUS_ID_SIZE, "%x:%02x",
+	snprintf(phy_id, sizeof(phy_id), "%x:%02x",
 			(unsigned int)dev->base_addr, priv->phy_addr);
 
 	priv->link = PHY_DOWN;
diff --git a/drivers/net/gianfar.c b/drivers/net/gianfar.c
index a091db393615..451f6b8b6163 100644
--- a/drivers/net/gianfar.c
+++ b/drivers/net/gianfar.c
@@ -546,7 +546,7 @@ static int init_phy(struct net_device *dev)
 	priv->oldspeed = 0;
 	priv->oldduplex = -1;
 
-	snprintf(phy_id, BUS_ID_SIZE, PHY_ID_FMT, priv->einfo->bus_id, priv->einfo->phy_id);
+	snprintf(phy_id, sizeof(phy_id), PHY_ID_FMT, priv->einfo->bus_id, priv->einfo->phy_id);
 
 	interface = gfar_get_interface(dev);
 
diff --git a/drivers/net/mlx4/mlx4_en.h b/drivers/net/mlx4/mlx4_en.h
index 11fb17c6e97b..cc022197e2a5 100644
--- a/drivers/net/mlx4/mlx4_en.h
+++ b/drivers/net/mlx4/mlx4_en.h
@@ -58,17 +58,17 @@
 #define mlx4_dbg(mlevel, priv, format, arg...)	\
 	if (NETIF_MSG_##mlevel & priv->msg_enable) \
 	printk(KERN_DEBUG "%s %s: " format , DRV_NAME ,\
-		(&priv->mdev->pdev->dev)->bus_id , ## arg)
+		(dev_name(&priv->mdev->pdev->dev)) , ## arg)
 
 #define mlx4_err(mdev, format, arg...) \
 	printk(KERN_ERR "%s %s: " format , DRV_NAME ,\
-		(&mdev->pdev->dev)->bus_id , ## arg)
+		(dev_name(&mdev->pdev->dev)) , ## arg)
 #define mlx4_info(mdev, format, arg...) \
 	printk(KERN_INFO "%s %s: " format , DRV_NAME ,\
-		(&mdev->pdev->dev)->bus_id , ## arg)
+		(dev_name(&mdev->pdev->dev)) , ## arg)
 #define mlx4_warn(mdev, format, arg...) \
 	printk(KERN_WARNING "%s %s: " format , DRV_NAME ,\
-		(&mdev->pdev->dev)->bus_id , ## arg)
+		(dev_name(&mdev->pdev->dev)) , ## arg)
 
 /*
  * Device constants
diff --git a/drivers/net/pasemi_mac.c b/drivers/net/pasemi_mac.c
index b0270052c3a9..fcbf6ccd0a85 100644
--- a/drivers/net/pasemi_mac.c
+++ b/drivers/net/pasemi_mac.c
@@ -1105,7 +1105,8 @@ static int pasemi_mac_phy_init(struct net_device *dev)
 		goto err;
 
 	phy_id = *prop;
-	snprintf(mac->phy_id, BUS_ID_SIZE, "%x:%02x", (int)r.start, phy_id);
+	snprintf(mac->phy_id, sizeof(mac->phy_id), "%x:%02x",
+		 (int)r.start, phy_id);
 
 	of_node_put(phy_dn);
 
diff --git a/drivers/net/phy/mdio_bus.c b/drivers/net/phy/mdio_bus.c
index d0ed1ef284a8..6afd35f62d77 100644
--- a/drivers/net/phy/mdio_bus.c
+++ b/drivers/net/phy/mdio_bus.c
@@ -97,7 +97,7 @@ int mdiobus_register(struct mii_bus *bus)
 	bus->dev.parent = bus->parent;
 	bus->dev.class = &mdio_bus_class;
 	bus->dev.groups = NULL;
-	memcpy(bus->dev.bus_id, bus->id, MII_BUS_ID_SIZE);
+	dev_set_name(&bus->dev, bus->id);
 
 	err = device_register(&bus->dev);
 	if (err) {
@@ -191,7 +191,7 @@ struct phy_device *mdiobus_scan(struct mii_bus *bus, int addr)
 
 	phydev->dev.parent = bus->parent;
 	phydev->dev.bus = &mdio_bus_type;
-	snprintf(phydev->dev.bus_id, BUS_ID_SIZE, PHY_ID_FMT, bus->id, addr);
+	dev_set_name(&phydev->dev, PHY_ID_FMT, bus->id, addr);
 
 	phydev->bus = bus;
 
diff --git a/drivers/net/phy/phy.c b/drivers/net/phy/phy.c
index df4e6257d4a7..e4ede6080c9d 100644
--- a/drivers/net/phy/phy.c
+++ b/drivers/net/phy/phy.c
@@ -45,7 +45,7 @@
  */
 void phy_print_status(struct phy_device *phydev)
 {
-	pr_info("PHY: %s - Link is %s", phydev->dev.bus_id,
+	pr_info("PHY: %s - Link is %s", dev_name(&phydev->dev),
 			phydev->link ? "Up" : "Down");
 	if (phydev->link)
 		printk(" - %d/%s", phydev->speed,
diff --git a/drivers/net/phy/phy_device.c b/drivers/net/phy/phy_device.c
index e11b03b2b25a..e976c1c60095 100644
--- a/drivers/net/phy/phy_device.c
+++ b/drivers/net/phy/phy_device.c
@@ -74,7 +74,7 @@ int phy_register_fixup(const char *bus_id, u32 phy_uid, u32 phy_uid_mask,
 	if (!fixup)
 		return -ENOMEM;
 
-	strncpy(fixup->bus_id, bus_id, BUS_ID_SIZE);
+	strlcpy(fixup->bus_id, bus_id, sizeof(fixup->bus_id));
 	fixup->phy_uid = phy_uid;
 	fixup->phy_uid_mask = phy_uid_mask;
 	fixup->run = run;
@@ -109,7 +109,7 @@ EXPORT_SYMBOL(phy_register_fixup_for_id);
  */
 static int phy_needs_fixup(struct phy_device *phydev, struct phy_fixup *fixup)
 {
-	if (strcmp(fixup->bus_id, phydev->dev.bus_id) != 0)
+	if (strcmp(fixup->bus_id, dev_name(&phydev->dev)) != 0)
 		if (strcmp(fixup->bus_id, PHY_ANY_ID) != 0)
 			return 0;
 
diff --git a/drivers/net/sh_eth.c b/drivers/net/sh_eth.c
index c51bfc57d405..077d796ccb70 100644
--- a/drivers/net/sh_eth.c
+++ b/drivers/net/sh_eth.c
@@ -799,7 +799,7 @@ static int sh_eth_phy_init(struct net_device *ndev)
 	char phy_id[BUS_ID_SIZE];
 	struct phy_device *phydev = NULL;
 
-	snprintf(phy_id, BUS_ID_SIZE, PHY_ID_FMT,
+	snprintf(phy_id, sizeof(phy_id), PHY_ID_FMT,
 		mdp->mii_bus->id , mdp->phy_id);
 
 	mdp->link = PHY_DOWN;
diff --git a/drivers/net/tg3.c b/drivers/net/tg3.c
index 546b9eeaa171..e05849ee9000 100644
--- a/drivers/net/tg3.c
+++ b/drivers/net/tg3.c
@@ -1396,7 +1396,7 @@ static int tg3_phy_init(struct tg3 *tp)
 	phydev = tp->mdio_bus->phy_map[PHY_ADDR];
 
 	/* Attach the MAC to the PHY. */
-	phydev = phy_connect(tp->dev, phydev->dev.bus_id, tg3_adjust_link,
+	phydev = phy_connect(tp->dev, dev_name(&phydev->dev), tg3_adjust_link,
 			     phydev->dev_flags, phydev->interface);
 	if (IS_ERR(phydev)) {
 		printk(KERN_ERR "%s: Could not attach to PHY\n", tp->dev->name);
@@ -13645,7 +13645,7 @@ static int __devinit tg3_init_one(struct pci_dev *pdev,
 		       "%s: attached PHY driver [%s] (mii_bus:phy_addr=%s)\n",
 		       tp->dev->name,
 		       tp->mdio_bus->phy_map[PHY_ADDR]->drv->name,
-		       tp->mdio_bus->phy_map[PHY_ADDR]->dev.bus_id);
+		       dev_name(&tp->mdio_bus->phy_map[PHY_ADDR]->dev));
 	else
 		printk(KERN_INFO
 		       "%s: attached PHY is %s (%s Ethernet) (WireSpeed[%d])\n",
diff --git a/drivers/net/tulip/de4x5.c b/drivers/net/tulip/de4x5.c
index f8a45253eaf8..67bfd6f43366 100644
--- a/drivers/net/tulip/de4x5.c
+++ b/drivers/net/tulip/de4x5.c
@@ -1118,7 +1118,7 @@ de4x5_hw_init(struct net_device *dev, u_long iobase, struct device *gendev)
     }
 
     dev->base_addr = iobase;
-    printk ("%s: %s at 0x%04lx", gendev->bus_id, name, iobase);
+    printk ("%s: %s at 0x%04lx", dev_name(gendev), name, iobase);
 
     status = get_hw_addr(dev);
     printk(", h/w address %pM\n", dev->dev_addr);
@@ -1153,7 +1153,7 @@ de4x5_hw_init(struct net_device *dev, u_long iobase, struct device *gendev)
             }
         }
 	lp->fdx = lp->params.fdx;
-	sprintf(lp->adapter_name,"%s (%s)", name, gendev->bus_id);
+	sprintf(lp->adapter_name,"%s (%s)", name, dev_name(gendev));
 
 	lp->dma_size = (NUM_RX_DESC + NUM_TX_DESC) * sizeof(struct de4x5_desc);
 #if defined(__alpha__) || defined(__powerpc__) || defined(CONFIG_SPARC) || defined(DE4X5_DO_MEMCPY)
diff --git a/drivers/net/ucc_geth.c b/drivers/net/ucc_geth.c
index 4931af736630..0a5b817fd7ac 100644
--- a/drivers/net/ucc_geth.c
+++ b/drivers/net/ucc_geth.c
@@ -1615,8 +1615,8 @@ static int init_phy(struct net_device *dev)
 	priv->oldspeed = 0;
 	priv->oldduplex = -1;
 
-	snprintf(phy_id, BUS_ID_SIZE, PHY_ID_FMT, priv->ug_info->mdio_bus,
-			priv->ug_info->phy_address);
+	snprintf(phy_id, sizeof(phy_id), PHY_ID_FMT, priv->ug_info->mdio_bus,
+		 priv->ug_info->phy_address);
 
 	phydev = phy_connect(dev, phy_id, &adjust_link, 0, priv->phy_interface);
 
diff --git a/drivers/net/wireless/libertas/defs.h b/drivers/net/wireless/libertas/defs.h
index 076a636e8f62..2d4666f26808 100644
--- a/drivers/net/wireless/libertas/defs.h
+++ b/drivers/net/wireless/libertas/defs.h
@@ -79,7 +79,7 @@ do { if ((lbs_debug & (grp)) == (grp)) \
 #define lbs_deb_tx(fmt, args...)        LBS_DEB_LL(LBS_DEB_TX, " tx", fmt, ##args)
 #define lbs_deb_fw(fmt, args...)        LBS_DEB_LL(LBS_DEB_FW, " fw", fmt, ##args)
 #define lbs_deb_usb(fmt, args...)       LBS_DEB_LL(LBS_DEB_USB, " usb", fmt, ##args)
-#define lbs_deb_usbd(dev, fmt, args...) LBS_DEB_LL(LBS_DEB_USB, " usbd", "%s:" fmt, (dev)->bus_id, ##args)
+#define lbs_deb_usbd(dev, fmt, args...) LBS_DEB_LL(LBS_DEB_USB, " usbd", "%s:" fmt, dev_name(dev), ##args)
 #define lbs_deb_cs(fmt, args...)        LBS_DEB_LL(LBS_DEB_CS, " cs", fmt, ##args)
 #define lbs_deb_thread(fmt, args...)    LBS_DEB_LL(LBS_DEB_THREAD, " thread", fmt, ##args)
 #define lbs_deb_sdio(fmt, args...)      LBS_DEB_LL(LBS_DEB_SDIO, " sdio", fmt, ##args)
diff --git a/drivers/net/wireless/orinoco/orinoco.c b/drivers/net/wireless/orinoco/orinoco.c
index f4ea08f96970..072be44b37de 100644
--- a/drivers/net/wireless/orinoco/orinoco.c
+++ b/drivers/net/wireless/orinoco/orinoco.c
@@ -5987,7 +5987,7 @@ static void orinoco_get_drvinfo(struct net_device *dev,
 	strncpy(info->version, DRIVER_VERSION, sizeof(info->version) - 1);
 	strncpy(info->fw_version, priv->fw_name, sizeof(info->fw_version) - 1);
 	if (dev->dev.parent)
-		strncpy(info->bus_info, dev->dev.parent->bus_id,
+		strncpy(info->bus_info, dev_name(dev->dev.parent),
 			sizeof(info->bus_info) - 1);
 	else
 		snprintf(info->bus_info, sizeof(info->bus_info) - 1,
diff --git a/drivers/net/wireless/orinoco/orinoco_cs.c b/drivers/net/wireless/orinoco/orinoco_cs.c
index 6fcf2bda7cdf..bf6a51da3b29 100644
--- a/drivers/net/wireless/orinoco/orinoco_cs.c
+++ b/drivers/net/wireless/orinoco/orinoco_cs.c
@@ -308,7 +308,7 @@ orinoco_cs_config(struct pcmcia_device *link)
 
 	/* Finally, report what we've done */
 	printk(KERN_DEBUG "%s: " DRIVER_NAME " at %s, irq %d, io "
-	       "0x%04x-0x%04x\n", dev->name, dev->dev.parent->bus_id,
+	       "0x%04x-0x%04x\n", dev->name, dev_name(dev->dev.parent),
 	       link->irq.AssignedIRQ, link->io.BasePort1,
 	       link->io.BasePort1 + link->io.NumPorts1 - 1);
 	return 0;
diff --git a/drivers/net/wireless/orinoco/spectrum_cs.c b/drivers/net/wireless/orinoco/spectrum_cs.c
index 852789ad34b3..0bae3dcf9d50 100644
--- a/drivers/net/wireless/orinoco/spectrum_cs.c
+++ b/drivers/net/wireless/orinoco/spectrum_cs.c
@@ -383,7 +383,7 @@ spectrum_cs_config(struct pcmcia_device *link)
 
 	/* Finally, report what we've done */
 	printk(KERN_DEBUG "%s: " DRIVER_NAME " at %s, irq %d, io "
-	       "0x%04x-0x%04x\n", dev->name, dev->dev.parent->bus_id,
+	       "0x%04x-0x%04x\n", dev->name, dev_name(dev->dev.parent),
 	       link->irq.AssignedIRQ, link->io.BasePort1,
 	       link->io.BasePort1 + link->io.NumPorts1 - 1);
 
diff --git a/include/net/wireless.h b/include/net/wireless.h
index 17d4b582cf34..412351560b76 100644
--- a/include/net/wireless.h
+++ b/include/net/wireless.h
@@ -263,9 +263,9 @@ static inline struct device *wiphy_dev(struct wiphy *wiphy)
 /**
  * wiphy_name - get wiphy name
  */
-static inline char *wiphy_name(struct wiphy *wiphy)
+static inline const char *wiphy_name(struct wiphy *wiphy)
 {
-	return wiphy->dev.bus_id;
+	return dev_name(&wiphy->dev);
 }
 
 /**
diff --git a/net/atm/atm_sysfs.c b/net/atm/atm_sysfs.c
index 1b88311f2130..b5674dc2083d 100644
--- a/net/atm/atm_sysfs.c
+++ b/net/atm/atm_sysfs.c
@@ -149,7 +149,7 @@ int atm_register_sysfs(struct atm_dev *adev)
 	cdev->class = &atm_class;
 	dev_set_drvdata(cdev, adev);
 
-	snprintf(cdev->bus_id, BUS_ID_SIZE, "%s%d", adev->type, adev->number);
+	dev_set_name(cdev, "%s%d", adev->type, adev->number);
 	err = device_register(cdev);
 	if (err < 0)
 		return err;
diff --git a/net/bluetooth/hci_sysfs.c b/net/bluetooth/hci_sysfs.c
index f4f6615cad9f..f2bbb2f65434 100644
--- a/net/bluetooth/hci_sysfs.c
+++ b/net/bluetooth/hci_sysfs.c
@@ -113,8 +113,7 @@ void hci_conn_add_sysfs(struct hci_conn *conn)
 	conn->dev.class = bt_class;
 	conn->dev.parent = &hdev->dev;
 
-	snprintf(conn->dev.bus_id, BUS_ID_SIZE, "%s:%d",
-					hdev->name, conn->handle);
+	dev_set_name(&conn->dev, "%s:%d", hdev->name, conn->handle);
 
 	dev_set_drvdata(&conn->dev, conn);
 
@@ -132,7 +131,7 @@ void hci_conn_add_sysfs(struct hci_conn *conn)
  */
 static int __match_tty(struct device *dev, void *data)
 {
-	return !strncmp(dev->bus_id, "rfcomm", 6);
+	return !strncmp(dev_name(dev), "rfcomm", 6);
 }
 
 static void del_conn(struct work_struct *work)
@@ -421,7 +420,7 @@ int hci_register_sysfs(struct hci_dev *hdev)
 	dev->class = bt_class;
 	dev->parent = hdev->parent;
 
-	strlcpy(dev->bus_id, hdev->name, BUS_ID_SIZE);
+	dev_set_name(dev, hdev->name);
 
 	dev_set_drvdata(dev, hdev);
 
diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c
index 85cb8bdcfb8f..146dcfeb060e 100644
--- a/net/core/net-sysfs.c
+++ b/net/core/net-sysfs.c
@@ -494,7 +494,7 @@ int netdev_register_kobject(struct net_device *net)
 	dev->groups = groups;
 
 	BUILD_BUG_ON(BUS_ID_SIZE < IFNAMSIZ);
-	strlcpy(dev->bus_id, net->name, BUS_ID_SIZE);
+	dev_set_name(dev, net->name);
 
 #ifdef CONFIG_SYSFS
 	*groups++ = &netstat_group;
diff --git a/net/dsa/slave.c b/net/dsa/slave.c
index 37616884b8a9..7384bad81652 100644
--- a/net/dsa/slave.c
+++ b/net/dsa/slave.c
@@ -284,7 +284,7 @@ dsa_slave_create(struct dsa_switch *ds, struct device *parent,
 	netif_carrier_off(slave_dev);
 
 	if (p->phy != NULL) {
-		phy_attach(slave_dev, p->phy->dev.bus_id,
+		phy_attach(slave_dev, dev_name(&p->phy->dev),
 			   0, PHY_INTERFACE_MODE_GMII);
 
 		p->phy->autoneg = AUTONEG_ENABLE;
diff --git a/net/rfkill/rfkill.c b/net/rfkill/rfkill.c
index 69f3a3b4dd61..ec26eae8004d 100644
--- a/net/rfkill/rfkill.c
+++ b/net/rfkill/rfkill.c
@@ -774,7 +774,7 @@ static void rfkill_led_trigger_register(struct rfkill *rfkill)
 	int error;
 
 	if (!rfkill->led_trigger.name)
-		rfkill->led_trigger.name = rfkill->dev.bus_id;
+		rfkill->led_trigger.name = dev_name(&rfkill->dev);
 	if (!rfkill->led_trigger.activate)
 		rfkill->led_trigger.activate = rfkill_led_trigger_activate;
 	error = led_trigger_register(&rfkill->led_trigger);
@@ -815,8 +815,7 @@ int __must_check rfkill_register(struct rfkill *rfkill)
 			"badly initialized rfkill struct\n"))
 		return -EINVAL;
 
-	snprintf(dev->bus_id, sizeof(dev->bus_id),
-		 "rfkill%ld", (long)atomic_inc_return(&rfkill_no) - 1);
+	dev_set_name(dev, "rfkill%ld", (long)atomic_inc_return(&rfkill_no) - 1);
 
 	rfkill_led_trigger_register(rfkill);
 
diff --git a/net/wireless/core.c b/net/wireless/core.c
index 72825afe2bf6..39e3d10fccde 100644
--- a/net/wireless/core.c
+++ b/net/wireless/core.c
@@ -236,8 +236,7 @@ struct wiphy *wiphy_new(struct cfg80211_ops *ops, int sizeof_priv)
 	mutex_unlock(&cfg80211_drv_mutex);
 
 	/* give it a proper name */
-	snprintf(drv->wiphy.dev.bus_id, BUS_ID_SIZE,
-		 PHY_NAME "%d", drv->idx);
+	dev_set_name(&drv->wiphy.dev, PHY_NAME "%d", drv->idx);
 
 	mutex_init(&drv->mtx);
 	mutex_init(&drv->devlist_mtx);
-- 
cgit v1.2.3


From ae1e9130bfb9ad55eb97ec3fb17a122b7a118f98 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Tue, 11 Nov 2008 09:05:16 +0100
Subject: sched: rename SCHED_NO_NO_OMIT_FRAME_POINTER =>
 SCHED_OMIT_FRAME_POINTER

Impact: cleanup, change .config option name

We had this ugly config name for a long time for hysteric raisons.
Rename it to a saner name.

We still cannot get rid of it completely, until /proc/<pid>/stack
usage replaces WCHAN usage for good.

We'll be able to do that in the v2.6.29/v2.6.30 timeframe.

Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/ia64/Kconfig         | 2 +-
 arch/m32r/Kconfig         | 2 +-
 arch/mips/Kconfig         | 2 +-
 arch/powerpc/Kconfig      | 2 +-
 arch/x86/Kconfig          | 2 +-
 include/asm-m32r/system.h | 2 +-
 kernel/Makefile           | 2 +-
 7 files changed, 7 insertions(+), 7 deletions(-)

(limited to 'include')

diff --git a/arch/ia64/Kconfig b/arch/ia64/Kconfig
index 27eec71429b0..59d12788b60c 100644
--- a/arch/ia64/Kconfig
+++ b/arch/ia64/Kconfig
@@ -99,7 +99,7 @@ config GENERIC_IOMAP
 	bool
 	default y
 
-config SCHED_NO_NO_OMIT_FRAME_POINTER
+config SCHED_OMIT_FRAME_POINTER
 	bool
 	default y
 
diff --git a/arch/m32r/Kconfig b/arch/m32r/Kconfig
index dbaed4a63815..29047d5c259a 100644
--- a/arch/m32r/Kconfig
+++ b/arch/m32r/Kconfig
@@ -273,7 +273,7 @@ config GENERIC_CALIBRATE_DELAY
 	bool
 	default y
 
-config SCHED_NO_NO_OMIT_FRAME_POINTER
+config SCHED_OMIT_FRAME_POINTER
         bool
         default y
 
diff --git a/arch/mips/Kconfig b/arch/mips/Kconfig
index f4af967a6b30..a5255e7c79e0 100644
--- a/arch/mips/Kconfig
+++ b/arch/mips/Kconfig
@@ -653,7 +653,7 @@ config GENERIC_CMOS_UPDATE
 	bool
 	default y
 
-config SCHED_NO_NO_OMIT_FRAME_POINTER
+config SCHED_OMIT_FRAME_POINTER
 	bool
 	default y
 
diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
index 525c13a4de93..adb23ea1c1ef 100644
--- a/arch/powerpc/Kconfig
+++ b/arch/powerpc/Kconfig
@@ -141,7 +141,7 @@ config GENERIC_NVRAM
 	bool
 	default y if PPC32
 
-config SCHED_NO_NO_OMIT_FRAME_POINTER
+config SCHED_OMIT_FRAME_POINTER
 	bool
 	default y
 
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 1d5550d19b66..74db682ec1ce 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -364,7 +364,7 @@ config X86_RDC321X
 	  as R-8610-(G).
 	  If you don't have one of these chips, you should say N here.
 
-config SCHED_NO_NO_OMIT_FRAME_POINTER
+config SCHED_OMIT_FRAME_POINTER
 	def_bool y
 	prompt "Single-depth WCHAN output"
 	depends on X86
diff --git a/include/asm-m32r/system.h b/include/asm-m32r/system.h
index 70a57c8c002b..c980f5ba8de7 100644
--- a/include/asm-m32r/system.h
+++ b/include/asm-m32r/system.h
@@ -23,7 +23,7 @@
  */
 
 #if defined(CONFIG_FRAME_POINTER) || \
-	!defined(CONFIG_SCHED_NO_NO_OMIT_FRAME_POINTER)
+	!defined(CONFIG_SCHED_OMIT_FRAME_POINTER)
 #define M32R_PUSH_FP "	push fp\n"
 #define M32R_POP_FP  "	pop  fp\n"
 #else
diff --git a/kernel/Makefile b/kernel/Makefile
index e1af03972148..46e67a398495 100644
--- a/kernel/Makefile
+++ b/kernel/Makefile
@@ -91,7 +91,7 @@ obj-$(CONFIG_FUNCTION_TRACER) += trace/
 obj-$(CONFIG_TRACING) += trace/
 obj-$(CONFIG_SMP) += sched_cpupri.o
 
-ifneq ($(CONFIG_SCHED_NO_NO_OMIT_FRAME_POINTER),y)
+ifneq ($(CONFIG_SCHED_OMIT_FRAME_POINTER),y)
 # According to Alan Modra <alan@linuxcare.com.au>, the -fno-omit-frame-pointer is
 # needed for x86 only.  Why this used to be enabled for all architectures is beyond
 # me.  I suspect most platforms don't need this, but until we know that for sure
-- 
cgit v1.2.3


From caf4b323b02a16c92fba449952ac6515ddc76d7a Mon Sep 17 00:00:00 2001
From: Frederic Weisbecker <fweisbec@gmail.com>
Date: Tue, 11 Nov 2008 07:03:45 +0100
Subject: tracing, x86: add low level support for ftrace return tracing

Impact: add infrastructure for function-return tracing

Add low level support for ftrace return tracing.

This plug-in stores return addresses on the thread_info structure of
the current task.

The index of the current return address is initialized when the task
is the first one (init) and when a process forks (the child). It is
not needed when a task does a sys_execve because after this syscall,
it still needs to return on the kernel functions it called.

Note that the code of return_to_handler has been suggested by Steven
Rostedt as almost all of the ideas of improvements in this V3.

For purpose of security, arch/x86/kernel/process_32.c is not traced
because __switch_to() changes the current task during its execution.
That could cause inconsistency in the stored return address of this
function even if I didn't have any crash after testing with tracing on
this function enabled.

Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Steven Rostedt <rostedt@goodmis.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/Kconfig                   |   1 +
 arch/x86/include/asm/ftrace.h      |  26 ++++++
 arch/x86/include/asm/thread_info.h |  24 +++++
 arch/x86/kernel/Makefile           |   6 ++
 arch/x86/kernel/entry_32.S         |  33 +++++++
 arch/x86/kernel/ftrace.c           | 181 +++++++++++++++++++++++++++++++++++--
 include/linux/ftrace.h             |  20 ++++
 include/linux/ftrace_irq.h         |   2 +-
 include/linux/sched.h              |  11 +++
 kernel/Makefile                    |   4 +
 10 files changed, 300 insertions(+), 8 deletions(-)

(limited to 'include')

diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 27b8a3a39911..ca91e50bdb10 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -11,6 +11,7 @@ config 64BIT
 
 config X86_32
 	def_bool !64BIT
+	select HAVE_FUNCTION_RET_TRACER
 
 config X86_64
 	def_bool 64BIT
diff --git a/arch/x86/include/asm/ftrace.h b/arch/x86/include/asm/ftrace.h
index f8173ed1c970..9b6a1fa19e70 100644
--- a/arch/x86/include/asm/ftrace.h
+++ b/arch/x86/include/asm/ftrace.h
@@ -20,4 +20,30 @@ static inline unsigned long ftrace_call_adjust(unsigned long addr)
 #endif /* __ASSEMBLY__ */
 #endif /* CONFIG_FUNCTION_TRACER */
 
+#ifdef CONFIG_FUNCTION_RET_TRACER
+#define FTRACE_RET_STACK_SIZE 20
+
+#ifndef __ASSEMBLY__
+
+/*
+ * Stack of return addresses for functions
+ * of a thread.
+ * Used in struct thread_info
+ */
+struct ftrace_ret_stack {
+	unsigned long ret;
+	unsigned long func;
+	unsigned long long calltime;
+};
+
+/*
+ * Primary handler of a function return.
+ * It relays on ftrace_return_to_handler.
+ * Defined in entry32.S
+ */
+extern void return_to_handler(void);
+
+#endif /* __ASSEMBLY__ */
+#endif /* CONFIG_FUNCTION_RET_TRACER */
+
 #endif /* _ASM_X86_FTRACE_H */
diff --git a/arch/x86/include/asm/thread_info.h b/arch/x86/include/asm/thread_info.h
index e44d379faad2..a71158369fd4 100644
--- a/arch/x86/include/asm/thread_info.h
+++ b/arch/x86/include/asm/thread_info.h
@@ -20,6 +20,7 @@
 struct task_struct;
 struct exec_domain;
 #include <asm/processor.h>
+#include <asm/ftrace.h>
 
 struct thread_info {
 	struct task_struct	*task;		/* main task structure */
@@ -38,8 +39,30 @@ struct thread_info {
 						*/
 	__u8			supervisor_stack[0];
 #endif
+
+#ifdef CONFIG_FUNCTION_RET_TRACER
+	/* Index of current stored adress in ret_stack */
+	int		curr_ret_stack;
+	/* Stack of return addresses for return function tracing */
+	struct ftrace_ret_stack	ret_stack[FTRACE_RET_STACK_SIZE];
+#endif
 };
 
+#ifdef CONFIG_FUNCTION_RET_TRACER
+#define INIT_THREAD_INFO(tsk)			\
+{						\
+	.task		= &tsk,			\
+	.exec_domain	= &default_exec_domain,	\
+	.flags		= 0,			\
+	.cpu		= 0,			\
+	.preempt_count	= 1,			\
+	.addr_limit	= KERNEL_DS,		\
+	.restart_block = {			\
+		.fn = do_no_restart_syscall,	\
+	},					\
+	.curr_ret_stack = -1,\
+}
+#else
 #define INIT_THREAD_INFO(tsk)			\
 {						\
 	.task		= &tsk,			\
@@ -52,6 +75,7 @@ struct thread_info {
 		.fn = do_no_restart_syscall,	\
 	},					\
 }
+#endif
 
 #define init_thread_info	(init_thread_union.thread_info)
 #define init_stack		(init_thread_union.stack)
diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile
index e489ff9cb3e2..1d8ed95da846 100644
--- a/arch/x86/kernel/Makefile
+++ b/arch/x86/kernel/Makefile
@@ -14,6 +14,11 @@ CFLAGS_REMOVE_paravirt-spinlocks.o = -pg
 CFLAGS_REMOVE_ftrace.o = -pg
 endif
 
+ifdef CONFIG_FUNCTION_RET_TRACER
+# Don't trace __switch_to() but let it for function tracer
+CFLAGS_REMOVE_process_32.o = -pg
+endif
+
 #
 # vsyscalls (which work on the user stack) should have
 # no stack-protector checks:
@@ -65,6 +70,7 @@ obj-$(CONFIG_X86_LOCAL_APIC)	+= apic.o nmi.o
 obj-$(CONFIG_X86_IO_APIC)	+= io_apic.o
 obj-$(CONFIG_X86_REBOOTFIXUPS)	+= reboot_fixups_32.o
 obj-$(CONFIG_DYNAMIC_FTRACE)	+= ftrace.o
+obj-$(CONFIG_FUNCTION_RET_TRACER)	+= ftrace.o
 obj-$(CONFIG_KEXEC)		+= machine_kexec_$(BITS).o
 obj-$(CONFIG_KEXEC)		+= relocate_kernel_$(BITS).o crash.o
 obj-$(CONFIG_CRASH_DUMP)	+= crash_dump_$(BITS).o
diff --git a/arch/x86/kernel/entry_32.S b/arch/x86/kernel/entry_32.S
index 9134de814c97..9a0ac85946db 100644
--- a/arch/x86/kernel/entry_32.S
+++ b/arch/x86/kernel/entry_32.S
@@ -1188,6 +1188,10 @@ ENTRY(mcount)
 
 	cmpl $ftrace_stub, ftrace_trace_function
 	jnz trace
+#ifdef CONFIG_FUNCTION_RET_TRACER
+	cmpl $ftrace_stub, ftrace_function_return
+	jnz trace_return
+#endif
 .globl ftrace_stub
 ftrace_stub:
 	ret
@@ -1206,8 +1210,37 @@ trace:
 	popl %edx
 	popl %ecx
 	popl %eax
+	jmp ftrace_stub
 
+#ifdef CONFIG_FUNCTION_RET_TRACER
+trace_return:
+	pushl %eax
+	pushl %ecx
+	pushl %edx
+	movl 0xc(%esp), %eax
+	pushl %eax
+	lea 0x4(%ebp), %eax
+	pushl %eax
+	call prepare_ftrace_return
+	addl $8, %esp
+	popl %edx
+	popl %ecx
+	popl %eax
 	jmp ftrace_stub
+
+.globl return_to_handler
+return_to_handler:
+	pushl $0
+	pushl %eax
+	pushl %ecx
+	pushl %edx
+	call ftrace_return_to_handler
+	movl %eax, 0xc(%esp)
+	popl %edx
+	popl %ecx
+	popl %eax
+	ret
+#endif /* CONFIG_FUNCTION_RET_TRACER */
 END(mcount)
 #endif /* CONFIG_DYNAMIC_FTRACE */
 #endif /* CONFIG_FUNCTION_TRACER */
diff --git a/arch/x86/kernel/ftrace.c b/arch/x86/kernel/ftrace.c
index 69149337f2fe..d68033bba223 100644
--- a/arch/x86/kernel/ftrace.c
+++ b/arch/x86/kernel/ftrace.c
@@ -18,10 +18,173 @@
 #include <linux/list.h>
 
 #include <asm/ftrace.h>
+#include <linux/ftrace.h>
 #include <asm/nops.h>
+#include <asm/nmi.h>
 
 
-static unsigned char ftrace_nop[MCOUNT_INSN_SIZE];
+
+#ifdef CONFIG_FUNCTION_RET_TRACER
+
+/*
+ * These functions are picked from those used on
+ * this page for dynamic ftrace. They have been
+ * simplified to ignore all traces in NMI context.
+ */
+static atomic_t in_nmi;
+
+void ftrace_nmi_enter(void)
+{
+	atomic_inc(&in_nmi);
+}
+
+void ftrace_nmi_exit(void)
+{
+	atomic_dec(&in_nmi);
+}
+
+/*
+ * Synchronize accesses to return adresses stack with
+ * interrupts.
+ */
+static raw_spinlock_t ret_stack_lock;
+
+/* Add a function return address to the trace stack on thread info.*/
+static int push_return_trace(unsigned long ret, unsigned long long time,
+				unsigned long func)
+{
+	int index;
+	struct thread_info *ti;
+	unsigned long flags;
+	int err = 0;
+
+	raw_local_irq_save(flags);
+	__raw_spin_lock(&ret_stack_lock);
+
+	ti = current_thread_info();
+	/* The return trace stack is full */
+	if (ti->curr_ret_stack == FTRACE_RET_STACK_SIZE - 1) {
+		err = -EBUSY;
+		goto out;
+	}
+
+	index = ++ti->curr_ret_stack;
+	ti->ret_stack[index].ret = ret;
+	ti->ret_stack[index].func = func;
+	ti->ret_stack[index].calltime = time;
+
+out:
+	__raw_spin_unlock(&ret_stack_lock);
+	raw_local_irq_restore(flags);
+	return err;
+}
+
+/* Retrieve a function return address to the trace stack on thread info.*/
+static void pop_return_trace(unsigned long *ret, unsigned long long *time,
+				unsigned long *func)
+{
+	struct thread_info *ti;
+	int index;
+	unsigned long flags;
+
+	raw_local_irq_save(flags);
+	__raw_spin_lock(&ret_stack_lock);
+
+	ti = current_thread_info();
+	index = ti->curr_ret_stack;
+	*ret = ti->ret_stack[index].ret;
+	*func = ti->ret_stack[index].func;
+	*time = ti->ret_stack[index].calltime;
+	ti->curr_ret_stack--;
+
+	__raw_spin_unlock(&ret_stack_lock);
+	raw_local_irq_restore(flags);
+}
+
+/*
+ * Send the trace to the ring-buffer.
+ * @return the original return address.
+ */
+unsigned long ftrace_return_to_handler(void)
+{
+	struct ftrace_retfunc trace;
+	pop_return_trace(&trace.ret, &trace.calltime, &trace.func);
+	trace.rettime = cpu_clock(raw_smp_processor_id());
+	ftrace_function_return(&trace);
+
+	return trace.ret;
+}
+
+/*
+ * Hook the return address and push it in the stack of return addrs
+ * in current thread info.
+ */
+asmlinkage
+void prepare_ftrace_return(unsigned long *parent, unsigned long self_addr)
+{
+	unsigned long old;
+	unsigned long long calltime;
+	int faulted;
+	unsigned long return_hooker = (unsigned long)
+				&return_to_handler;
+
+	/* Nmi's are currently unsupported */
+	if (atomic_read(&in_nmi))
+		return;
+
+	/*
+	 * Protect against fault, even if it shouldn't
+	 * happen. This tool is too much intrusive to
+	 * ignore such a protection.
+	 */
+	asm volatile(
+		"1: movl (%[parent_old]), %[old]\n"
+		"2: movl %[return_hooker], (%[parent_replaced])\n"
+		"   movl $0, %[faulted]\n"
+
+		".section .fixup, \"ax\"\n"
+		"3: movl $1, %[faulted]\n"
+		".previous\n"
+
+		".section __ex_table, \"a\"\n"
+		"   .long 1b, 3b\n"
+		"   .long 2b, 3b\n"
+		".previous\n"
+
+		: [parent_replaced] "=rm" (parent), [old] "=r" (old),
+		  [faulted] "=r" (faulted)
+		: [parent_old] "0" (parent), [return_hooker] "r" (return_hooker)
+		: "memory"
+	);
+
+	if (WARN_ON(faulted)) {
+		unregister_ftrace_return();
+		return;
+	}
+
+	if (WARN_ON(!__kernel_text_address(old))) {
+		unregister_ftrace_return();
+		*parent = old;
+		return;
+	}
+
+	calltime = cpu_clock(raw_smp_processor_id());
+
+	if (push_return_trace(old, calltime, self_addr) == -EBUSY)
+		*parent = old;
+}
+
+static int __init init_ftrace_function_return(void)
+{
+	ret_stack_lock = (raw_spinlock_t)__RAW_SPIN_LOCK_UNLOCKED;
+	return 0;
+}
+device_initcall(init_ftrace_function_return);
+
+
+#endif
+
+#ifdef CONFIG_DYNAMIC_FTRACE
 
 union ftrace_code_union {
 	char code[MCOUNT_INSN_SIZE];
@@ -31,17 +194,11 @@ union ftrace_code_union {
 	} __attribute__((packed));
 };
 
-
 static int ftrace_calc_offset(long ip, long addr)
 {
 	return (int)(addr - ip);
 }
 
-unsigned char *ftrace_nop_replace(void)
-{
-	return ftrace_nop;
-}
-
 unsigned char *ftrace_call_replace(unsigned long ip, unsigned long addr)
 {
 	static union ftrace_code_union calc;
@@ -183,6 +340,15 @@ do_ftrace_mod_code(unsigned long ip, void *new_code)
 }
 
 
+
+
+static unsigned char ftrace_nop[MCOUNT_INSN_SIZE];
+
+unsigned char *ftrace_nop_replace(void)
+{
+	return ftrace_nop;
+}
+
 int
 ftrace_modify_code(unsigned long ip, unsigned char *old_code,
 		   unsigned char *new_code)
@@ -292,3 +458,4 @@ int __init ftrace_dyn_arch_init(void *data)
 
 	return 0;
 }
+#endif
diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h
index 1f5608c11023..dcbbf72a88b1 100644
--- a/include/linux/ftrace.h
+++ b/include/linux/ftrace.h
@@ -267,6 +267,26 @@ ftrace_init_module(unsigned long *start, unsigned long *end) { }
 #endif
 
 
+/*
+ * Structure that defines a return function trace.
+ */
+struct ftrace_retfunc {
+	unsigned long ret; /* Return address */
+	unsigned long func; /* Current function */
+	unsigned long long calltime;
+	unsigned long long rettime;
+};
+
+#ifdef CONFIG_FUNCTION_RET_TRACER
+/* Type of a callback handler of tracing return function */
+typedef void (*trace_function_return_t)(struct ftrace_retfunc *);
+
+extern void register_ftrace_return(trace_function_return_t func);
+/* The current handler in use */
+extern trace_function_return_t ftrace_function_return;
+extern void unregister_ftrace_return(void);
+#endif
+
 /*
  * Structure which defines the trace of an initcall.
  * You don't have to fill the func field since it is
diff --git a/include/linux/ftrace_irq.h b/include/linux/ftrace_irq.h
index b1299d6729f2..0b4df55d7a74 100644
--- a/include/linux/ftrace_irq.h
+++ b/include/linux/ftrace_irq.h
@@ -2,7 +2,7 @@
 #define _LINUX_FTRACE_IRQ_H
 
 
-#ifdef CONFIG_DYNAMIC_FTRACE
+#if defined(CONFIG_DYNAMIC_FTRACE) || defined(CONFIG_FUNCTION_RET_TRACER)
 extern void ftrace_nmi_enter(void);
 extern void ftrace_nmi_exit(void);
 #else
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 295b7c756ca6..df77abe860c9 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -2005,6 +2005,17 @@ static inline void setup_thread_stack(struct task_struct *p, struct task_struct
 {
 	*task_thread_info(p) = *task_thread_info(org);
 	task_thread_info(p)->task = p;
+
+#ifdef CONFIG_FUNCTION_RET_TRACER
+	/*
+	 * When fork() creates a child process, this function is called.
+	 * But the child task may not inherit the return adresses traced
+	 * by the return function tracer because it will directly execute
+	 * in userspace and will not return to kernel functions its parent
+	 * used.
+	 */
+	task_thread_info(p)->curr_ret_stack = -1;
+#endif
 }
 
 static inline unsigned long *end_of_stack(struct task_struct *p)
diff --git a/kernel/Makefile b/kernel/Makefile
index 9a3ec66a9d84..af3be57acbbb 100644
--- a/kernel/Makefile
+++ b/kernel/Makefile
@@ -23,6 +23,10 @@ CFLAGS_REMOVE_cgroup-debug.o = -pg
 CFLAGS_REMOVE_sched_clock.o = -pg
 CFLAGS_REMOVE_sched.o = -mno-spe -pg
 endif
+ifdef CONFIG_FUNCTION_RET_TRACER
+CFLAGS_REMOVE_extable.o = -pg # For __kernel_text_address()
+CFLAGS_REMOVE_module.o = -pg # For __module_text_address()
+endif
 
 obj-$(CONFIG_FREEZER) += freezer.o
 obj-$(CONFIG_PROFILING) += profile.o
-- 
cgit v1.2.3


From 9d36be76c55ad2c2bb29683b752b0d9ad2e4eeef Mon Sep 17 00:00:00 2001
From: Eric Paris <eparis@redhat.com>
Date: Tue, 11 Nov 2008 21:48:07 +1100
Subject: Document the order of arguments for cap_issubset.  It's not instantly
 clear which order the argument should be in.  So give an example.

Signed-off-by: Eric Paris <eparis@redhat.com>
Acked-by: Serge Hallyn <serue@us.ibm.com>
Signed-off-by: James Morris <jmorris@namei.org>
---
 include/linux/capability.h | 7 +++++++
 1 file changed, 7 insertions(+)

(limited to 'include')

diff --git a/include/linux/capability.h b/include/linux/capability.h
index 5bc145bd759a..b5750d0b96e0 100644
--- a/include/linux/capability.h
+++ b/include/linux/capability.h
@@ -457,6 +457,13 @@ static inline int cap_isclear(const kernel_cap_t a)
 	return 1;
 }
 
+/*
+ * Check if "a" is a subset of "set".
+ * return 1 if ALL of the capabilities in "a" are also in "set"
+ *	cap_issubset(0101, 1111) will return 1
+ * return 0 if ANY of the capabilities in "a" are not in "set"
+ *	cap_issubset(1111, 0101) will return 0
+ */
 static inline int cap_issubset(const kernel_cap_t a, const kernel_cap_t set)
 {
 	kernel_cap_t dest;
-- 
cgit v1.2.3


From c0b004413a46a0a5744e6d2b85220fe9d2c33d48 Mon Sep 17 00:00:00 2001
From: Eric Paris <eparis@redhat.com>
Date: Tue, 11 Nov 2008 21:48:10 +1100
Subject: This patch add a generic cpu endian caps structure and externally
 available functions which retrieve fcaps information from disk.  This
 information is necessary so fcaps information can be collected and recorded
 by the audit system.

Signed-off-by: Eric Paris <eparis@redhat.com>
Acked-by: Serge Hallyn <serue@us.ibm.com>
Signed-off-by: James Morris <jmorris@namei.org>
---
 include/linux/capability.h |   7 +++
 security/commoncap.c       | 129 +++++++++++++++++++++++++--------------------
 2 files changed, 78 insertions(+), 58 deletions(-)

(limited to 'include')

diff --git a/include/linux/capability.h b/include/linux/capability.h
index b5750d0b96e0..d567af247ed8 100644
--- a/include/linux/capability.h
+++ b/include/linux/capability.h
@@ -99,6 +99,13 @@ typedef struct kernel_cap_struct {
 	__u32 cap[_KERNEL_CAPABILITY_U32S];
 } kernel_cap_t;
 
+/* exact same as vfs_cap_data but in cpu endian and always filled completely */
+struct cpu_vfs_cap_data {
+	__u32 magic_etc;
+	kernel_cap_t permitted;
+	kernel_cap_t inheritable;
+};
+
 #define _USER_CAP_HEADER_SIZE  (sizeof(struct __user_cap_header_struct))
 #define _KERNEL_CAP_T_SIZE     (sizeof(kernel_cap_t))
 
diff --git a/security/commoncap.c b/security/commoncap.c
index f88119cb2bc2..d7eff5797b91 100644
--- a/security/commoncap.c
+++ b/security/commoncap.c
@@ -202,17 +202,70 @@ int cap_inode_killpriv(struct dentry *dentry)
 	return inode->i_op->removexattr(dentry, XATTR_NAME_CAPS);
 }
 
-static inline int cap_from_disk(struct vfs_cap_data *caps,
-				struct linux_binprm *bprm, unsigned size)
+static inline int bprm_caps_from_vfs_caps(struct cpu_vfs_cap_data *caps,
+					  struct linux_binprm *bprm)
 {
+	unsigned i;
+	int ret = 0;
+
+	if (caps->magic_etc & VFS_CAP_FLAGS_EFFECTIVE)
+		bprm->cap_effective = true;
+	else
+		bprm->cap_effective = false;
+
+	CAP_FOR_EACH_U32(i) {
+		__u32 permitted = caps->permitted.cap[i];
+		__u32 inheritable = caps->inheritable.cap[i];
+
+		/*
+		 * pP' = (X & fP) | (pI & fI)
+		 */
+		bprm->cap_post_exec_permitted.cap[i] =
+			(current->cap_bset.cap[i] & permitted) |
+			(current->cap_inheritable.cap[i] & inheritable);
+
+		if (permitted & ~bprm->cap_post_exec_permitted.cap[i]) {
+			/*
+			 * insufficient to execute correctly
+			 */
+			ret = -EPERM;
+		}
+	}
+
+	/*
+	 * For legacy apps, with no internal support for recognizing they
+	 * do not have enough capabilities, we return an error if they are
+	 * missing some "forced" (aka file-permitted) capabilities.
+	 */
+	return bprm->cap_effective ? ret : 0;
+}
+
+int get_vfs_caps_from_disk(const struct dentry *dentry, struct cpu_vfs_cap_data *cpu_caps)
+{
+	struct inode *inode = dentry->d_inode;
 	__u32 magic_etc;
 	unsigned tocopy, i;
-	int ret;
+	int size;
+	struct vfs_cap_data caps;
+
+	memset(cpu_caps, 0, sizeof(struct cpu_vfs_cap_data));
+
+	if (!inode || !inode->i_op || !inode->i_op->getxattr)
+		return -ENODATA;
+
+	size = inode->i_op->getxattr((struct dentry *)dentry, XATTR_NAME_CAPS, &caps,
+				   XATTR_CAPS_SZ);
+	if (size == -ENODATA || size == -EOPNOTSUPP) {
+		/* no data, that's ok */
+		return -ENODATA;
+	}
+	if (size < 0)
+		return size;
 
 	if (size < sizeof(magic_etc))
 		return -EINVAL;
 
-	magic_etc = le32_to_cpu(caps->magic_etc);
+	cpu_caps->magic_etc = magic_etc = le32_to_cpu(caps.magic_etc);
 
 	switch ((magic_etc & VFS_CAP_REVISION_MASK)) {
 	case VFS_CAP_REVISION_1:
@@ -229,46 +282,13 @@ static inline int cap_from_disk(struct vfs_cap_data *caps,
 		return -EINVAL;
 	}
 
-	if (magic_etc & VFS_CAP_FLAGS_EFFECTIVE) {
-		bprm->cap_effective = true;
-	} else {
-		bprm->cap_effective = false;
-	}
-
-	ret = 0;
-
 	CAP_FOR_EACH_U32(i) {
-		__u32 value_cpu;
-
-		if (i >= tocopy) {
-			/*
-			 * Legacy capability sets have no upper bits
-			 */
-			bprm->cap_post_exec_permitted.cap[i] = 0;
-			continue;
-		}
-		/*
-		 * pP' = (X & fP) | (pI & fI)
-		 */
-		value_cpu = le32_to_cpu(caps->data[i].permitted);
-		bprm->cap_post_exec_permitted.cap[i] =
-			(current->cap_bset.cap[i] & value_cpu) |
-			(current->cap_inheritable.cap[i] &
-				le32_to_cpu(caps->data[i].inheritable));
-		if (value_cpu & ~bprm->cap_post_exec_permitted.cap[i]) {
-			/*
-			 * insufficient to execute correctly
-			 */
-			ret = -EPERM;
-		}
+		if (i >= tocopy)
+			break;
+		cpu_caps->permitted.cap[i] = le32_to_cpu(caps.data[i].permitted);
+		cpu_caps->inheritable.cap[i] = le32_to_cpu(caps.data[i].inheritable);
 	}
-
-	/*
-	 * For legacy apps, with no internal support for recognizing they
-	 * do not have enough capabilities, we return an error if they are
-	 * missing some "forced" (aka file-permitted) capabilities.
-	 */
-	return bprm->cap_effective ? ret : 0;
+	return 0;
 }
 
 /* Locate any VFS capabilities: */
@@ -276,8 +296,7 @@ static int get_file_caps(struct linux_binprm *bprm)
 {
 	struct dentry *dentry;
 	int rc = 0;
-	struct vfs_cap_data vcaps;
-	struct inode *inode;
+	struct cpu_vfs_cap_data vcaps;
 
 	bprm_clear_caps(bprm);
 
@@ -288,24 +307,18 @@ static int get_file_caps(struct linux_binprm *bprm)
 		return 0;
 
 	dentry = dget(bprm->file->f_dentry);
-	inode = dentry->d_inode;
-	if (!inode->i_op || !inode->i_op->getxattr)
-		goto out;
 
-	rc = inode->i_op->getxattr(dentry, XATTR_NAME_CAPS, &vcaps,
-				   XATTR_CAPS_SZ);
-	if (rc == -ENODATA || rc == -EOPNOTSUPP) {
-		/* no data, that's ok */
-		rc = 0;
+	rc = get_vfs_caps_from_disk(dentry, &vcaps);
+	if (rc < 0) {
+		if (rc == -EINVAL)
+			printk(KERN_NOTICE "%s: get_vfs_caps_from_disk returned %d for %s\n",
+				__func__, rc, bprm->filename);
+		else if (rc == -ENODATA)
+			rc = 0;
 		goto out;
 	}
-	if (rc < 0)
-		goto out;
 
-	rc = cap_from_disk(&vcaps, bprm, rc);
-	if (rc == -EINVAL)
-		printk(KERN_NOTICE "%s: cap_from_disk returned %d for %s\n",
-		       __func__, rc, bprm->filename);
+	rc = bprm_caps_from_vfs_caps(&vcaps, bprm);
 
 out:
 	dput(dentry);
-- 
cgit v1.2.3


From 851f7ff56d9c21272f289dd85fb3f1b6cf7a6e10 Mon Sep 17 00:00:00 2001
From: Eric Paris <eparis@redhat.com>
Date: Tue, 11 Nov 2008 21:48:14 +1100
Subject: This patch will print cap_permitted and cap_inheritable data in the
 PATH records of any file that has file capabilities set.  Files which do not
 have fcaps set will not have different PATH records.

An example audit record if you run:
setcap "cap_net_admin+pie" /bin/bash
/bin/bash

type=SYSCALL msg=audit(1225741937.363:230): arch=c000003e syscall=59 success=yes exit=0 a0=2119230 a1=210da30 a2=20ee290 a3=8 items=2 ppid=2149 pid=2923 auid=0 uid=0 gid=0 euid=0 suid=0 fsuid=0 egid=0 sgid=0 fsgid=0 tty=pts0 ses=3 comm="ping" exe="/bin/ping" subj=unconfined_u:unconfined_r:unconfined_t:s0-s0:c0.c1023 key=(null)
type=EXECVE msg=audit(1225741937.363:230): argc=2 a0="ping" a1="www.google.com"
type=CWD msg=audit(1225741937.363:230):  cwd="/root"
type=PATH msg=audit(1225741937.363:230): item=0 name="/bin/ping" inode=49256 dev=fd:00 mode=0104755 ouid=0 ogid=0 rdev=00:00 obj=system_u:object_r:ping_exec_t:s0 cap_fp=0000000000002000 cap_fi=0000000000002000 cap_fe=1 cap_fver=2
type=PATH msg=audit(1225741937.363:230): item=1 name=(null) inode=507915 dev=fd:00 mode=0100755 ouid=0 ogid=0 rdev=00:00 obj=system_u:object_r:ld_so_t:s0

Signed-off-by: Eric Paris <eparis@redhat.com>
Acked-by: Serge Hallyn <serue@us.ibm.com>
Signed-off-by: James Morris <jmorris@namei.org>
---
 include/linux/capability.h |  5 +++
 kernel/auditsc.c           | 82 +++++++++++++++++++++++++++++++++++++++++++---
 2 files changed, 82 insertions(+), 5 deletions(-)

(limited to 'include')

diff --git a/include/linux/capability.h b/include/linux/capability.h
index d567af247ed8..0f1950181102 100644
--- a/include/linux/capability.h
+++ b/include/linux/capability.h
@@ -53,6 +53,7 @@ typedef struct __user_cap_data_struct {
 #define XATTR_NAME_CAPS XATTR_SECURITY_PREFIX XATTR_CAPS_SUFFIX
 
 #define VFS_CAP_REVISION_MASK	0xFF000000
+#define VFS_CAP_REVISION_SHIFT	24
 #define VFS_CAP_FLAGS_MASK	~VFS_CAP_REVISION_MASK
 #define VFS_CAP_FLAGS_EFFECTIVE	0x000001
 
@@ -534,6 +535,10 @@ kernel_cap_t cap_set_effective(const kernel_cap_t pE_new);
 
 extern int capable(int cap);
 
+/* audit system wants to get cap info from files as well */
+struct dentry;
+extern int get_vfs_caps_from_disk(const struct dentry *dentry, struct cpu_vfs_cap_data *cpu_caps);
+
 #endif /* __KERNEL__ */
 
 #endif /* !_LINUX_CAPABILITY_H */
diff --git a/kernel/auditsc.c b/kernel/auditsc.c
index cf5bc2f5f9c3..de7e9bcba9ae 100644
--- a/kernel/auditsc.c
+++ b/kernel/auditsc.c
@@ -65,6 +65,7 @@
 #include <linux/highmem.h>
 #include <linux/syscalls.h>
 #include <linux/inotify.h>
+#include <linux/capability.h>
 
 #include "audit.h"
 
@@ -84,6 +85,15 @@ int audit_n_rules;
 /* determines whether we collect data for signals sent */
 int audit_signals;
 
+struct audit_cap_data {
+	kernel_cap_t		permitted;
+	kernel_cap_t		inheritable;
+	union {
+		unsigned int	fE;		/* effective bit of a file capability */
+		kernel_cap_t	effective;	/* effective set of a process */
+	};
+};
+
 /* When fs/namei.c:getname() is called, we store the pointer in name and
  * we don't let putname() free it (instead we free all of the saved
  * pointers at syscall exit time).
@@ -100,6 +110,8 @@ struct audit_names {
 	gid_t		gid;
 	dev_t		rdev;
 	u32		osid;
+	struct audit_cap_data fcap;
+	unsigned int	fcap_ver;
 };
 
 struct audit_aux_data {
@@ -1171,6 +1183,35 @@ static void audit_log_execve_info(struct audit_context *context,
 	kfree(buf);
 }
 
+static void audit_log_cap(struct audit_buffer *ab, char *prefix, kernel_cap_t *cap)
+{
+	int i;
+
+	audit_log_format(ab, " %s=", prefix);
+	CAP_FOR_EACH_U32(i) {
+		audit_log_format(ab, "%08x", cap->cap[(_KERNEL_CAPABILITY_U32S-1) - i]);
+	}
+}
+
+static void audit_log_fcaps(struct audit_buffer *ab, struct audit_names *name)
+{
+	kernel_cap_t *perm = &name->fcap.permitted;
+	kernel_cap_t *inh = &name->fcap.inheritable;
+	int log = 0;
+
+	if (!cap_isclear(*perm)) {
+		audit_log_cap(ab, "cap_fp", perm);
+		log = 1;
+	}
+	if (!cap_isclear(*inh)) {
+		audit_log_cap(ab, "cap_fi", inh);
+		log = 1;
+	}
+
+	if (log)
+		audit_log_format(ab, " cap_fe=%d cap_fver=%x", name->fcap.fE, name->fcap_ver);
+}
+
 static void audit_log_exit(struct audit_context *context, struct task_struct *tsk)
 {
 	int i, call_panic = 0;
@@ -1421,6 +1462,8 @@ static void audit_log_exit(struct audit_context *context, struct task_struct *ts
 			}
 		}
 
+		audit_log_fcaps(ab, n);
+
 		audit_log_end(ab);
 	}
 
@@ -1787,8 +1830,36 @@ static int audit_inc_name_count(struct audit_context *context,
 	return 0;
 }
 
+
+static inline int audit_copy_fcaps(struct audit_names *name, const struct dentry *dentry)
+{
+	struct cpu_vfs_cap_data caps;
+	int rc;
+
+	memset(&name->fcap.permitted, 0, sizeof(kernel_cap_t));
+	memset(&name->fcap.inheritable, 0, sizeof(kernel_cap_t));
+	name->fcap.fE = 0;
+	name->fcap_ver = 0;
+
+	if (!dentry)
+		return 0;
+
+	rc = get_vfs_caps_from_disk(dentry, &caps);
+	if (rc)
+		return rc;
+
+	name->fcap.permitted = caps.permitted;
+	name->fcap.inheritable = caps.inheritable;
+	name->fcap.fE = !!(caps.magic_etc & VFS_CAP_FLAGS_EFFECTIVE);
+	name->fcap_ver = (caps.magic_etc & VFS_CAP_REVISION_MASK) >> VFS_CAP_REVISION_SHIFT;
+
+	return 0;
+}
+
+
 /* Copy inode data into an audit_names. */
-static void audit_copy_inode(struct audit_names *name, const struct inode *inode)
+static void audit_copy_inode(struct audit_names *name, const struct dentry *dentry,
+			     const struct inode *inode)
 {
 	name->ino   = inode->i_ino;
 	name->dev   = inode->i_sb->s_dev;
@@ -1797,6 +1868,7 @@ static void audit_copy_inode(struct audit_names *name, const struct inode *inode
 	name->gid   = inode->i_gid;
 	name->rdev  = inode->i_rdev;
 	security_inode_getsecid(inode, &name->osid);
+	audit_copy_fcaps(name, dentry);
 }
 
 /**
@@ -1831,7 +1903,7 @@ void __audit_inode(const char *name, const struct dentry *dentry)
 		context->names[idx].name = NULL;
 	}
 	handle_path(dentry);
-	audit_copy_inode(&context->names[idx], inode);
+	audit_copy_inode(&context->names[idx], dentry, inode);
 }
 
 /**
@@ -1892,7 +1964,7 @@ void __audit_inode_child(const char *dname, const struct dentry *dentry,
 		if (!strcmp(dname, n->name) ||
 		     !audit_compare_dname_path(dname, n->name, &dirlen)) {
 			if (inode)
-				audit_copy_inode(n, inode);
+				audit_copy_inode(n, NULL, inode);
 			else
 				n->ino = (unsigned long)-1;
 			found_child = n->name;
@@ -1906,7 +1978,7 @@ add_names:
 			return;
 		idx = context->name_count - 1;
 		context->names[idx].name = NULL;
-		audit_copy_inode(&context->names[idx], parent);
+		audit_copy_inode(&context->names[idx], NULL, parent);
 	}
 
 	if (!found_child) {
@@ -1927,7 +1999,7 @@ add_names:
 		}
 
 		if (inode)
-			audit_copy_inode(&context->names[idx], inode);
+			audit_copy_inode(&context->names[idx], NULL, inode);
 		else
 			context->names[idx].ino = (unsigned long)-1;
 	}
-- 
cgit v1.2.3


From 3fc689e96c0c90b6fede5946d6c31075e9464f69 Mon Sep 17 00:00:00 2001
From: Eric Paris <eparis@redhat.com>
Date: Tue, 11 Nov 2008 21:48:18 +1100
Subject: Any time fcaps or a setuid app under SECURE_NOROOT is used to result
 in a non-zero pE we will crate a new audit record which contains the entire
 set of known information about the executable in question, fP, fI, fE,
 fversion and includes the process's pE, pI, pP.  Before and after the bprm
 capability are applied.  This record type will only be emitted from execve
 syscalls.

an example of making ping use fcaps instead of setuid:

setcap "cat_net_raw+pe" /bin/ping

type=SYSCALL msg=audit(1225742021.015:236): arch=c000003e syscall=59 success=yes exit=0 a0=1457f30 a1=14606b0 a2=1463940 a3=321b770a70 items=2 ppid=2929 pid=2963 auid=0 uid=500 gid=500 euid=500 suid=500 fsuid=500 egid=500 sgid=500 fsgid=500 tty=pts0 ses=3 comm="ping" exe="/bin/ping" subj=unconfined_u:unconfined_r:unconfined_t:s0-s0:c0.c1023 key=(null)
type=UNKNOWN[1321] msg=audit(1225742021.015:236): fver=2 fp=0000000000002000 fi=0000000000000000 fe=1 old_pp=0000000000000000 old_pi=0000000000000000 old_pe=0000000000000000 new_pp=0000000000002000 new_pi=0000000000000000 new_pe=0000000000002000
type=EXECVE msg=audit(1225742021.015:236): argc=2 a0="ping" a1="127.0.0.1"
type=CWD msg=audit(1225742021.015:236):  cwd="/home/test"
type=PATH msg=audit(1225742021.015:236): item=0 name="/bin/ping" inode=49256 dev=fd:00 mode=0100755 ouid=0 ogid=0 rdev=00:00 obj=system_u:object_r:ping_exec_t:s0 cap_fp=0000000000002000 cap_fe=1 cap_fver=2
type=PATH msg=audit(1225742021.015:236): item=1 name=(null) inode=507915 dev=fd:00 mode=0100755 ouid=0 ogid=0 rdev=00:00 obj=system_u:object_r:ld_so_t:s0

Signed-off-by: Eric Paris <eparis@redhat.com>
Acked-by: Serge Hallyn <serue@us.ibm.com>
Signed-off-by: James Morris <jmorris@namei.org>
---
 include/linux/audit.h | 26 ++++++++++++++++++++
 kernel/auditsc.c      | 68 +++++++++++++++++++++++++++++++++++++++++++++++++++
 security/commoncap.c  | 23 ++++++++++++++++-
 3 files changed, 116 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/audit.h b/include/linux/audit.h
index 6272a395d43c..8cfb9feb2a05 100644
--- a/include/linux/audit.h
+++ b/include/linux/audit.h
@@ -99,6 +99,7 @@
 #define AUDIT_OBJ_PID		1318	/* ptrace target */
 #define AUDIT_TTY		1319	/* Input on an administrative TTY */
 #define AUDIT_EOE		1320	/* End of multi-record event */
+#define AUDIT_BPRM_FCAPS	1321	/* Information about fcaps increasing perms */
 
 #define AUDIT_AVC		1400	/* SE Linux avc denial or grant */
 #define AUDIT_SELINUX_ERR	1401	/* Internal SE Linux Errors */
@@ -452,6 +453,7 @@ extern int __audit_mq_timedsend(mqd_t mqdes, size_t msg_len, unsigned int msg_pr
 extern int __audit_mq_timedreceive(mqd_t mqdes, size_t msg_len, unsigned int __user *u_msg_prio, const struct timespec __user *u_abs_timeout);
 extern int __audit_mq_notify(mqd_t mqdes, const struct sigevent __user *u_notification);
 extern int __audit_mq_getsetattr(mqd_t mqdes, struct mq_attr *mqstat);
+extern void __audit_log_bprm_fcaps(struct linux_binprm *bprm, kernel_cap_t *pP, kernel_cap_t *pE);
 
 static inline int audit_ipc_obj(struct kern_ipc_perm *ipcp)
 {
@@ -501,6 +503,29 @@ static inline int audit_mq_getsetattr(mqd_t mqdes, struct mq_attr *mqstat)
 		return __audit_mq_getsetattr(mqdes, mqstat);
 	return 0;
 }
+
+/*
+ * ieieeeeee, an audit function without a return code!
+ *
+ * This function might fail!  I decided that it didn't matter.  We are too late
+ * to fail the syscall and the information isn't REQUIRED for any purpose.  It's
+ * just nice to have.  We should be able to look at past audit logs to figure
+ * out this process's current cap set along with the fcaps from the PATH record
+ * and use that to come up with the final set.  Yeah, its ugly, but all the info
+ * is still in the audit log.  So I'm not going to bother mentioning we failed
+ * if we couldn't allocate memory.
+ *
+ * If someone changes their mind they could create the aux record earlier and
+ * then search here and use that earlier allocation.  But I don't wanna.
+ *
+ * -Eric
+ */
+static inline void audit_log_bprm_fcaps(struct linux_binprm *bprm, kernel_cap_t *pP, kernel_cap_t *pE)
+{
+	if (unlikely(!audit_dummy_context()))
+		__audit_log_bprm_fcaps(bprm, pP, pE);
+}
+
 extern int audit_n_rules;
 extern int audit_signals;
 #else
@@ -532,6 +557,7 @@ extern int audit_signals;
 #define audit_mq_timedreceive(d,l,p,t) ({ 0; })
 #define audit_mq_notify(d,n) ({ 0; })
 #define audit_mq_getsetattr(d,s) ({ 0; })
+#define audit_log_bprm_fcaps(b, p, e) do { ; } while (0)
 #define audit_ptrace(t) ((void)0)
 #define audit_n_rules 0
 #define audit_signals 0
diff --git a/kernel/auditsc.c b/kernel/auditsc.c
index de7e9bcba9ae..3229cd4206f5 100644
--- a/kernel/auditsc.c
+++ b/kernel/auditsc.c
@@ -196,6 +196,14 @@ struct audit_aux_data_pids {
 	int			pid_count;
 };
 
+struct audit_aux_data_bprm_fcaps {
+	struct audit_aux_data	d;
+	struct audit_cap_data	fcap;
+	unsigned int		fcap_ver;
+	struct audit_cap_data	old_pcap;
+	struct audit_cap_data	new_pcap;
+};
+
 struct audit_tree_refs {
 	struct audit_tree_refs *next;
 	struct audit_chunk *c[31];
@@ -1375,6 +1383,20 @@ static void audit_log_exit(struct audit_context *context, struct task_struct *ts
 			audit_log_format(ab, "fd0=%d fd1=%d", axs->fd[0], axs->fd[1]);
 			break; }
 
+		case AUDIT_BPRM_FCAPS: {
+			struct audit_aux_data_bprm_fcaps *axs = (void *)aux;
+			audit_log_format(ab, "fver=%x", axs->fcap_ver);
+			audit_log_cap(ab, "fp", &axs->fcap.permitted);
+			audit_log_cap(ab, "fi", &axs->fcap.inheritable);
+			audit_log_format(ab, " fe=%d", axs->fcap.fE);
+			audit_log_cap(ab, "old_pp", &axs->old_pcap.permitted);
+			audit_log_cap(ab, "old_pi", &axs->old_pcap.inheritable);
+			audit_log_cap(ab, "old_pe", &axs->old_pcap.effective);
+			audit_log_cap(ab, "new_pp", &axs->new_pcap.permitted);
+			audit_log_cap(ab, "new_pi", &axs->new_pcap.inheritable);
+			audit_log_cap(ab, "new_pe", &axs->new_pcap.effective);
+			break; }
+
 		}
 		audit_log_end(ab);
 	}
@@ -2501,6 +2523,52 @@ int __audit_signal_info(int sig, struct task_struct *t)
 	return 0;
 }
 
+/**
+ * __audit_log_bprm_fcaps - store information about a loading bprm and relevant fcaps
+ * @bprm pointer to the bprm being processed
+ * @caps the caps read from the disk
+ *
+ * Simply check if the proc already has the caps given by the file and if not
+ * store the priv escalation info for later auditing at the end of the syscall
+ *
+ * this can fail and we don't care.  See the note in audit.h for
+ * audit_log_bprm_fcaps() for my explaination....
+ *
+ * -Eric
+ */
+void __audit_log_bprm_fcaps(struct linux_binprm *bprm, kernel_cap_t *pP, kernel_cap_t *pE)
+{
+	struct audit_aux_data_bprm_fcaps *ax;
+	struct audit_context *context = current->audit_context;
+	struct cpu_vfs_cap_data vcaps;
+	struct dentry *dentry;
+
+	ax = kmalloc(sizeof(*ax), GFP_KERNEL);
+	if (!ax)
+		return;
+
+	ax->d.type = AUDIT_BPRM_FCAPS;
+	ax->d.next = context->aux;
+	context->aux = (void *)ax;
+
+	dentry = dget(bprm->file->f_dentry);
+	get_vfs_caps_from_disk(dentry, &vcaps);
+	dput(dentry);
+
+	ax->fcap.permitted = vcaps.permitted;
+	ax->fcap.inheritable = vcaps.inheritable;
+	ax->fcap.fE = !!(vcaps.magic_etc & VFS_CAP_FLAGS_EFFECTIVE);
+	ax->fcap_ver = (vcaps.magic_etc & VFS_CAP_REVISION_MASK) >> VFS_CAP_REVISION_SHIFT;
+
+	ax->old_pcap.permitted = *pP;
+	ax->old_pcap.inheritable = current->cap_inheritable;
+	ax->old_pcap.effective = *pE;
+
+	ax->new_pcap.permitted = current->cap_permitted;
+	ax->new_pcap.inheritable = current->cap_inheritable;
+	ax->new_pcap.effective = current->cap_effective;
+}
+
 /**
  * audit_core_dumps - record information about processes that end abnormally
  * @signr: signal value
diff --git a/security/commoncap.c b/security/commoncap.c
index d7eff5797b91..d45393380997 100644
--- a/security/commoncap.c
+++ b/security/commoncap.c
@@ -8,6 +8,7 @@
  */
 
 #include <linux/capability.h>
+#include <linux/audit.h>
 #include <linux/module.h>
 #include <linux/init.h>
 #include <linux/kernel.h>
@@ -376,6 +377,9 @@ int cap_bprm_set_security (struct linux_binprm *bprm)
 
 void cap_bprm_apply_creds (struct linux_binprm *bprm, int unsafe)
 {
+	kernel_cap_t pP = current->cap_permitted;
+	kernel_cap_t pE = current->cap_effective;
+
 	if (bprm->e_uid != current->uid || bprm->e_gid != current->gid ||
 	    !cap_issubset(bprm->cap_post_exec_permitted,
 			  current->cap_permitted)) {
@@ -409,7 +413,24 @@ void cap_bprm_apply_creds (struct linux_binprm *bprm, int unsafe)
 			cap_clear(current->cap_effective);
 	}
 
-	/* AUD: Audit candidate if current->cap_effective is set */
+	/*
+	 * Audit candidate if current->cap_effective is set
+	 *
+	 * We do not bother to audit if 3 things are true:
+	 *   1) cap_effective has all caps
+	 *   2) we are root
+	 *   3) root is supposed to have all caps (SECURE_NOROOT)
+	 * Since this is just a normal root execing a process.
+	 *
+	 * Number 1 above might fail if you don't have a full bset, but I think
+	 * that is interesting information to audit.
+	 */
+	if (!cap_isclear(current->cap_effective)) {
+		if (!cap_issubset(CAP_FULL_SET, current->cap_effective) ||
+		    (bprm->e_uid != 0) || (current->uid != 0) ||
+		    issecure(SECURE_NOROOT))
+			audit_log_bprm_fcaps(bprm, &pP, &pE);
+	}
 
 	current->securebits &= ~issecure_mask(SECURE_KEEP_CAPS);
 }
-- 
cgit v1.2.3


From e68b75a027bb94066576139ee33676264f867b87 Mon Sep 17 00:00:00 2001
From: Eric Paris <eparis@redhat.com>
Date: Tue, 11 Nov 2008 21:48:22 +1100
Subject: When the capset syscall is used it is not possible for audit to
 record the actual capbilities being added/removed.  This patch adds a new
 record type which emits the target pid and the eff, inh, and perm cap sets.

example output if you audit capset syscalls would be:

type=SYSCALL msg=audit(1225743140.465:76): arch=c000003e syscall=126 success=yes exit=0 a0=17f2014 a1=17f201c a2=80000000 a3=7fff2ab7f060 items=0 ppid=2160 pid=2223 auid=0 uid=0 gid=0 euid=0 suid=0 fsuid=0 egid=0 sgid=0 fsgid=0 tty=pts0 ses=1 comm="setcap" exe="/usr/sbin/setcap" subj=unconfined_u:unconfined_r:unconfined_t:s0-s0:c0.c1023 key=(null)
type=UNKNOWN[1322] msg=audit(1225743140.465:76): pid=0 cap_pi=ffffffffffffffff cap_pp=ffffffffffffffff cap_pe=ffffffffffffffff

Signed-off-by: Eric Paris <eparis@redhat.com>
Acked-by: Serge Hallyn <serue@us.ibm.com>
Signed-off-by: James Morris <jmorris@namei.org>
---
 include/linux/audit.h | 10 ++++++++++
 kernel/auditsc.c      | 48 ++++++++++++++++++++++++++++++++++++++++++++++++
 kernel/capability.c   |  5 +++++
 3 files changed, 63 insertions(+)

(limited to 'include')

diff --git a/include/linux/audit.h b/include/linux/audit.h
index 8cfb9feb2a05..6fbebac7b1bf 100644
--- a/include/linux/audit.h
+++ b/include/linux/audit.h
@@ -100,6 +100,7 @@
 #define AUDIT_TTY		1319	/* Input on an administrative TTY */
 #define AUDIT_EOE		1320	/* End of multi-record event */
 #define AUDIT_BPRM_FCAPS	1321	/* Information about fcaps increasing perms */
+#define AUDIT_CAPSET		1322	/* Record showing argument to sys_capset */
 
 #define AUDIT_AVC		1400	/* SE Linux avc denial or grant */
 #define AUDIT_SELINUX_ERR	1401	/* Internal SE Linux Errors */
@@ -454,6 +455,7 @@ extern int __audit_mq_timedreceive(mqd_t mqdes, size_t msg_len, unsigned int __u
 extern int __audit_mq_notify(mqd_t mqdes, const struct sigevent __user *u_notification);
 extern int __audit_mq_getsetattr(mqd_t mqdes, struct mq_attr *mqstat);
 extern void __audit_log_bprm_fcaps(struct linux_binprm *bprm, kernel_cap_t *pP, kernel_cap_t *pE);
+extern int __audit_log_capset(pid_t pid, kernel_cap_t *eff, kernel_cap_t *inh, kernel_cap_t *perm);
 
 static inline int audit_ipc_obj(struct kern_ipc_perm *ipcp)
 {
@@ -526,6 +528,13 @@ static inline void audit_log_bprm_fcaps(struct linux_binprm *bprm, kernel_cap_t
 		__audit_log_bprm_fcaps(bprm, pP, pE);
 }
 
+static inline int audit_log_capset(pid_t pid, kernel_cap_t *eff, kernel_cap_t *inh, kernel_cap_t *perm)
+{
+	if (unlikely(!audit_dummy_context()))
+		return __audit_log_capset(pid, eff, inh, perm);
+	return 0;
+}
+
 extern int audit_n_rules;
 extern int audit_signals;
 #else
@@ -558,6 +567,7 @@ extern int audit_signals;
 #define audit_mq_notify(d,n) ({ 0; })
 #define audit_mq_getsetattr(d,s) ({ 0; })
 #define audit_log_bprm_fcaps(b, p, e) do { ; } while (0)
+#define audit_log_capset(pid, e, i, p) ({ 0; })
 #define audit_ptrace(t) ((void)0)
 #define audit_n_rules 0
 #define audit_signals 0
diff --git a/kernel/auditsc.c b/kernel/auditsc.c
index 3229cd4206f5..cef34235b362 100644
--- a/kernel/auditsc.c
+++ b/kernel/auditsc.c
@@ -204,6 +204,12 @@ struct audit_aux_data_bprm_fcaps {
 	struct audit_cap_data	new_pcap;
 };
 
+struct audit_aux_data_capset {
+	struct audit_aux_data	d;
+	pid_t			pid;
+	struct audit_cap_data	cap;
+};
+
 struct audit_tree_refs {
 	struct audit_tree_refs *next;
 	struct audit_chunk *c[31];
@@ -1397,6 +1403,14 @@ static void audit_log_exit(struct audit_context *context, struct task_struct *ts
 			audit_log_cap(ab, "new_pe", &axs->new_pcap.effective);
 			break; }
 
+		case AUDIT_CAPSET: {
+			struct audit_aux_data_capset *axs = (void *)aux;
+			audit_log_format(ab, "pid=%d", axs->pid);
+			audit_log_cap(ab, "cap_pi", &axs->cap.inheritable);
+			audit_log_cap(ab, "cap_pp", &axs->cap.permitted);
+			audit_log_cap(ab, "cap_pe", &axs->cap.effective);
+			break; }
+
 		}
 		audit_log_end(ab);
 	}
@@ -2569,6 +2583,40 @@ void __audit_log_bprm_fcaps(struct linux_binprm *bprm, kernel_cap_t *pP, kernel_
 	ax->new_pcap.effective = current->cap_effective;
 }
 
+/**
+ * __audit_log_capset - store information about the arguments to the capset syscall
+ * @pid target pid of the capset call
+ * @eff effective cap set
+ * @inh inheritible cap set
+ * @perm permited cap set
+ *
+ * Record the aguments userspace sent to sys_capset for later printing by the
+ * audit system if applicable
+ */
+int __audit_log_capset(pid_t pid, kernel_cap_t *eff, kernel_cap_t *inh, kernel_cap_t *perm)
+{
+	struct audit_aux_data_capset *ax;
+	struct audit_context *context = current->audit_context;
+
+	if (likely(!audit_enabled || !context || context->dummy))
+		return 0;
+
+	ax = kmalloc(sizeof(*ax), GFP_KERNEL);
+	if (!ax)
+		return -ENOMEM;
+
+	ax->d.type = AUDIT_CAPSET;
+	ax->d.next = context->aux;
+	context->aux = (void *)ax;
+
+	ax->pid = pid;
+	ax->cap.effective = *eff;
+	ax->cap.inheritable = *eff;
+	ax->cap.permitted = *perm;
+
+	return 0;
+}
+
 /**
  * audit_core_dumps - record information about processes that end abnormally
  * @signr: signal value
diff --git a/kernel/capability.c b/kernel/capability.c
index e13a68535ad5..19f9eda89975 100644
--- a/kernel/capability.c
+++ b/kernel/capability.c
@@ -7,6 +7,7 @@
  * 30 May 2002:	Cleanup, Robert M. Love <rml@tech9.net>
  */
 
+#include <linux/audit.h>
 #include <linux/capability.h>
 #include <linux/mm.h>
 #include <linux/module.h>
@@ -468,6 +469,10 @@ asmlinkage long sys_capset(cap_user_header_t header, const cap_user_data_t data)
 		i++;
 	}
 
+	ret = audit_log_capset(pid, &effective, &inheritable, &permitted);
+	if (ret)
+		return ret;
+
 	if (pid && (pid != task_pid_vnr(current)))
 		ret = do_sys_capset_other_tasks(pid, &effective, &inheritable,
 						&permitted);
-- 
cgit v1.2.3


From 06112163f5fd9e491a7f810443d81efa9d88e247 Mon Sep 17 00:00:00 2001
From: Eric Paris <eparis@redhat.com>
Date: Tue, 11 Nov 2008 22:02:50 +1100
Subject: Add a new capable interface that will be used by systems that use
 audit to make an A or B type decision instead of a security decision. 
 Currently this is the case at least for filesystems when deciding if a
 process can use the reserved 'root' blocks and for the case of things like
 the oom algorithm determining if processes are root processes and should be
 less likely to be killed.  These types of security system requests should not
 be audited or logged since they are not really security decisions.  It would
 be possible to solve this problem like the vm_enough_memory security check
 did by creating a new LSM interface and moving all of the policy into that
 interface but proves the needlessly bloat the LSM and provide complex
 indirection.

This merely allows those decisions to be made where they belong and to not
flood logs or printk with denials for thing that are not security decisions.

Signed-off-by: Eric Paris <eparis@redhat.com>
Acked-by:  Stephen Smalley <sds@tycho.nsa.gov>
Signed-off-by: James Morris <jmorris@namei.org>
---
 include/linux/capability.h |  3 +++
 include/linux/security.h   | 16 +++++++++++++---
 security/commoncap.c       |  8 ++++----
 security/security.c        |  7 ++++++-
 security/selinux/hooks.c   | 20 +++++++++++++-------
 5 files changed, 39 insertions(+), 15 deletions(-)

(limited to 'include')

diff --git a/include/linux/capability.h b/include/linux/capability.h
index 0f1950181102..b313ba1dd5d1 100644
--- a/include/linux/capability.h
+++ b/include/linux/capability.h
@@ -521,6 +521,8 @@ extern const kernel_cap_t __cap_init_eff_set;
 
 kernel_cap_t cap_set_effective(const kernel_cap_t pE_new);
 
+extern int security_capable(struct task_struct *t, int cap);
+extern int security_capable_noaudit(struct task_struct *t, int cap);
 /**
  * has_capability - Determine if a task has a superior capability available
  * @t: The task in question
@@ -532,6 +534,7 @@ kernel_cap_t cap_set_effective(const kernel_cap_t pE_new);
  * Note that this does not set PF_SUPERPRIV on the task.
  */
 #define has_capability(t, cap) (security_capable((t), (cap)) == 0)
+#define has_capability_noaudit(t, cap) (security_capable_noaudit((t), (cap)) == 0)
 
 extern int capable(int cap);
 
diff --git a/include/linux/security.h b/include/linux/security.h
index c13f1cec9abb..5fe28a671cd3 100644
--- a/include/linux/security.h
+++ b/include/linux/security.h
@@ -37,6 +37,10 @@
 /* Maximum number of letters for an LSM name string */
 #define SECURITY_NAME_MAX	10
 
+/* If capable should audit the security request */
+#define SECURITY_CAP_NOAUDIT 0
+#define SECURITY_CAP_AUDIT 1
+
 struct ctl_table;
 struct audit_krule;
 
@@ -44,7 +48,7 @@ struct audit_krule;
  * These functions are in security/capability.c and are used
  * as the default capabilities functions
  */
-extern int cap_capable(struct task_struct *tsk, int cap);
+extern int cap_capable(struct task_struct *tsk, int cap, int audit);
 extern int cap_settime(struct timespec *ts, struct timezone *tz);
 extern int cap_ptrace_may_access(struct task_struct *child, unsigned int mode);
 extern int cap_ptrace_traceme(struct task_struct *parent);
@@ -1307,7 +1311,7 @@ struct security_operations {
 			    kernel_cap_t *effective,
 			    kernel_cap_t *inheritable,
 			    kernel_cap_t *permitted);
-	int (*capable) (struct task_struct *tsk, int cap);
+	int (*capable) (struct task_struct *tsk, int cap, int audit);
 	int (*acct) (struct file *file);
 	int (*sysctl) (struct ctl_table *table, int op);
 	int (*quotactl) (int cmds, int type, int id, struct super_block *sb);
@@ -1577,6 +1581,7 @@ void security_capset_set(struct task_struct *target,
 			 kernel_cap_t *inheritable,
 			 kernel_cap_t *permitted);
 int security_capable(struct task_struct *tsk, int cap);
+int security_capable_noaudit(struct task_struct *tsk, int cap);
 int security_acct(struct file *file);
 int security_sysctl(struct ctl_table *table, int op);
 int security_quotactl(int cmds, int type, int id, struct super_block *sb);
@@ -1782,7 +1787,12 @@ static inline void security_capset_set(struct task_struct *target,
 
 static inline int security_capable(struct task_struct *tsk, int cap)
 {
-	return cap_capable(tsk, cap);
+	return cap_capable(tsk, cap, SECURITY_CAP_AUDIT);
+}
+
+static inline int security_capable_noaudit(struct task_struct *tsk, int cap)
+{
+	return cap_capable(tsk, cap, SECURITY_CAP_NOAUDIT);
 }
 
 static inline int security_acct(struct file *file)
diff --git a/security/commoncap.c b/security/commoncap.c
index d45393380997..dc06c0086b55 100644
--- a/security/commoncap.c
+++ b/security/commoncap.c
@@ -49,7 +49,7 @@ EXPORT_SYMBOL(cap_netlink_recv);
  * returns 0 when a task has a capability, but the kernel's capable()
  * returns 1 for this case.
  */
-int cap_capable (struct task_struct *tsk, int cap)
+int cap_capable(struct task_struct *tsk, int cap, int audit)
 {
 	/* Derived from include/linux/sched.h:capable. */
 	if (cap_raised(tsk->cap_effective, cap))
@@ -112,7 +112,7 @@ static inline int cap_inh_is_capped(void)
 	 * to the old permitted set. That is, if the current task
 	 * does *not* possess the CAP_SETPCAP capability.
 	 */
-	return (cap_capable(current, CAP_SETPCAP) != 0);
+	return (cap_capable(current, CAP_SETPCAP, SECURITY_CAP_AUDIT) != 0);
 }
 
 static inline int cap_limit_ptraced_target(void) { return 1; }
@@ -677,7 +677,7 @@ int cap_task_prctl(int option, unsigned long arg2, unsigned long arg3,
 		    || ((current->securebits & SECURE_ALL_LOCKS
 			 & ~arg2))                                    /*[2]*/
 		    || (arg2 & ~(SECURE_ALL_LOCKS | SECURE_ALL_BITS)) /*[3]*/
-		    || (cap_capable(current, CAP_SETPCAP) != 0)) {    /*[4]*/
+		    || (cap_capable(current, CAP_SETPCAP, SECURITY_CAP_AUDIT) != 0)) { /*[4]*/
 			/*
 			 * [1] no changing of bits that are locked
 			 * [2] no unlocking of locks
@@ -742,7 +742,7 @@ int cap_vm_enough_memory(struct mm_struct *mm, long pages)
 {
 	int cap_sys_admin = 0;
 
-	if (cap_capable(current, CAP_SYS_ADMIN) == 0)
+	if (cap_capable(current, CAP_SYS_ADMIN, SECURITY_CAP_NOAUDIT) == 0)
 		cap_sys_admin = 1;
 	return __vm_enough_memory(mm, pages, cap_sys_admin);
 }
diff --git a/security/security.c b/security/security.c
index c0acfa7177e5..346f21e0ec2c 100644
--- a/security/security.c
+++ b/security/security.c
@@ -163,7 +163,12 @@ void security_capset_set(struct task_struct *target,
 
 int security_capable(struct task_struct *tsk, int cap)
 {
-	return security_ops->capable(tsk, cap);
+	return security_ops->capable(tsk, cap, SECURITY_CAP_AUDIT);
+}
+
+int security_capable_noaudit(struct task_struct *tsk, int cap)
+{
+	return security_ops->capable(tsk, cap, SECURITY_CAP_NOAUDIT);
 }
 
 int security_acct(struct file *file)
diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c
index 7fd4de46b2a9..88a3ee33068a 100644
--- a/security/selinux/hooks.c
+++ b/security/selinux/hooks.c
@@ -1365,12 +1365,14 @@ static int task_has_perm(struct task_struct *tsk1,
 
 /* Check whether a task is allowed to use a capability. */
 static int task_has_capability(struct task_struct *tsk,
-			       int cap)
+			       int cap, int audit)
 {
 	struct task_security_struct *tsec;
 	struct avc_audit_data ad;
+	struct av_decision avd;
 	u16 sclass;
 	u32 av = CAP_TO_MASK(cap);
+	int rc;
 
 	tsec = tsk->security;
 
@@ -1390,7 +1392,11 @@ static int task_has_capability(struct task_struct *tsk,
 		       "SELinux:  out of range capability %d\n", cap);
 		BUG();
 	}
-	return avc_has_perm(tsec->sid, tsec->sid, sclass, av, &ad);
+
+	rc = avc_has_perm_noaudit(tsec->sid, tsec->sid, sclass, av, 0, &avd);
+	if (audit == SECURITY_CAP_AUDIT)
+		avc_audit(tsec->sid, tsec->sid, sclass, av, &avd, rc, &ad);
+	return rc;
 }
 
 /* Check whether a task is allowed to use a system operation. */
@@ -1802,15 +1808,15 @@ static void selinux_capset_set(struct task_struct *target, kernel_cap_t *effecti
 	secondary_ops->capset_set(target, effective, inheritable, permitted);
 }
 
-static int selinux_capable(struct task_struct *tsk, int cap)
+static int selinux_capable(struct task_struct *tsk, int cap, int audit)
 {
 	int rc;
 
-	rc = secondary_ops->capable(tsk, cap);
+	rc = secondary_ops->capable(tsk, cap, audit);
 	if (rc)
 		return rc;
 
-	return task_has_capability(tsk, cap);
+	return task_has_capability(tsk, cap, audit);
 }
 
 static int selinux_sysctl_get_sid(ctl_table *table, u16 tclass, u32 *sid)
@@ -1975,7 +1981,7 @@ static int selinux_vm_enough_memory(struct mm_struct *mm, long pages)
 	int rc, cap_sys_admin = 0;
 	struct task_security_struct *tsec = current->security;
 
-	rc = secondary_ops->capable(current, CAP_SYS_ADMIN);
+	rc = secondary_ops->capable(current, CAP_SYS_ADMIN, SECURITY_CAP_NOAUDIT);
 	if (rc == 0)
 		rc = avc_has_perm_noaudit(tsec->sid, tsec->sid,
 					  SECCLASS_CAPABILITY,
@@ -2829,7 +2835,7 @@ static int selinux_inode_getsecurity(const struct inode *inode, const char *name
 	 * and lack of permission just means that we fall back to the
 	 * in-core context value, not a denial.
 	 */
-	error = secondary_ops->capable(current, CAP_MAC_ADMIN);
+	error = secondary_ops->capable(current, CAP_MAC_ADMIN, SECURITY_CAP_NOAUDIT);
 	if (!error)
 		error = avc_has_perm_noaudit(tsec->sid, tsec->sid,
 					     SECCLASS_CAPABILITY2,
-- 
cgit v1.2.3


From 50ee91765e25e7967a7b69cd5cc2bcab85e2eeb8 Mon Sep 17 00:00:00 2001
From: Dhaval Giani <dhaval@linux.vnet.ibm.com>
Date: Tue, 11 Nov 2008 18:13:23 +0530
Subject: sched/rt: removed unneeded defintion

Impact: cleanup

This function no longer exists, so remove the defintion.

Signed-off-by: Dhaval Giani <dhaval@linux.vnet.ibm.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/sched.h | 2 --
 1 file changed, 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/sched.h b/include/linux/sched.h
index b483f39a7112..c6bfb34d978e 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -258,8 +258,6 @@ static inline int select_nohz_load_balancer(int cpu)
 }
 #endif
 
-extern unsigned long rt_needs_cpu(int cpu);
-
 /*
  * Only dump TASK_* tasks. (0 for all tasks)
  */
-- 
cgit v1.2.3


From 9b739ba5e66c96938fbc07a4dbd9da5b81eac56f Mon Sep 17 00:00:00 2001
From: Alexey Dobriyan <adobriyan@gmail.com>
Date: Tue, 11 Nov 2008 16:47:44 -0800
Subject: net: remove struct neigh_table::pde

->pde isn't actually needed, since name is stashed in ->id.

Signed-off-by: Alexey Dobriyan <adobriyan@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/neighbour.h | 3 ---
 net/core/neighbour.c    | 5 ++---
 2 files changed, 2 insertions(+), 6 deletions(-)

(limited to 'include')

diff --git a/include/net/neighbour.h b/include/net/neighbour.h
index aa4b708654a4..365b5e260239 100644
--- a/include/net/neighbour.h
+++ b/include/net/neighbour.h
@@ -180,9 +180,6 @@ struct neigh_table
 	__u32			hash_rnd;
 	unsigned int		hash_chain_gc;
 	struct pneigh_entry	**phash_buckets;
-#ifdef CONFIG_PROC_FS
-	struct proc_dir_entry	*pde;
-#endif
 };
 
 /* flags for neigh_update() */
diff --git a/net/core/neighbour.c b/net/core/neighbour.c
index d9bbe010e0ee..500c2430007c 100644
--- a/net/core/neighbour.c
+++ b/net/core/neighbour.c
@@ -1424,9 +1424,8 @@ void neigh_table_init_no_netlink(struct neigh_table *tbl)
 		panic("cannot create neighbour cache statistics");
 
 #ifdef CONFIG_PROC_FS
-	tbl->pde = proc_create_data(tbl->id, 0, init_net.proc_net_stat,
-				    &neigh_stat_seq_fops, tbl);
-	if (!tbl->pde)
+	if (!proc_create_data(tbl->id, 0, init_net.proc_net_stat,
+			      &neigh_stat_seq_fops, tbl))
 		panic("cannot create neighbour proc dir entry");
 #endif
 
-- 
cgit v1.2.3


From 6bb3ce25d05f2990c8a19adaf427531430267c1f Mon Sep 17 00:00:00 2001
From: Alexey Dobriyan <adobriyan@gmail.com>
Date: Tue, 11 Nov 2008 17:25:22 -0800
Subject: net: remove struct dst_entry::entry_size

Unused after kmem_cache_zalloc() conversion.

Signed-off-by: Alexey Dobriyan <adobriyan@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/dst.h       | 1 -
 net/decnet/dn_route.c   | 1 -
 net/ipv4/route.c        | 2 --
 net/ipv4/xfrm4_policy.c | 1 -
 net/ipv6/route.c        | 2 --
 net/ipv6/xfrm6_policy.c | 1 -
 6 files changed, 8 deletions(-)

(limited to 'include')

diff --git a/include/net/dst.h b/include/net/dst.h
index f96c4ba4dd32..65a60fab2f82 100644
--- a/include/net/dst.h
+++ b/include/net/dst.h
@@ -104,7 +104,6 @@ struct dst_ops
 	void			(*link_failure)(struct sk_buff *);
 	void			(*update_pmtu)(struct dst_entry *dst, u32 mtu);
 	int			(*local_out)(struct sk_buff *skb);
-	int			entry_size;
 
 	atomic_t		entries;
 	struct kmem_cache 		*kmem_cachep;
diff --git a/net/decnet/dn_route.c b/net/decnet/dn_route.c
index 821bd1cdec04..768df000523b 100644
--- a/net/decnet/dn_route.c
+++ b/net/decnet/dn_route.c
@@ -131,7 +131,6 @@ static struct dst_ops dn_dst_ops = {
 	.negative_advice =	dn_dst_negative_advice,
 	.link_failure =		dn_dst_link_failure,
 	.update_pmtu =		dn_dst_update_pmtu,
-	.entry_size =		sizeof(struct dn_route),
 	.entries =		ATOMIC_INIT(0),
 };
 
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index 0dc0c3826763..4e6959c29819 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -160,7 +160,6 @@ static struct dst_ops ipv4_dst_ops = {
 	.link_failure =		ipv4_link_failure,
 	.update_pmtu =		ip_rt_update_pmtu,
 	.local_out =		__ip_local_out,
-	.entry_size =		sizeof(struct rtable),
 	.entries =		ATOMIC_INIT(0),
 };
 
@@ -2701,7 +2700,6 @@ static struct dst_ops ipv4_dst_blackhole_ops = {
 	.destroy		=	ipv4_dst_destroy,
 	.check			=	ipv4_dst_check,
 	.update_pmtu		=	ipv4_rt_blackhole_update_pmtu,
-	.entry_size		=	sizeof(struct rtable),
 	.entries		=	ATOMIC_INIT(0),
 };
 
diff --git a/net/ipv4/xfrm4_policy.c b/net/ipv4/xfrm4_policy.c
index f9a775b7e796..84dbb5a03cc2 100644
--- a/net/ipv4/xfrm4_policy.c
+++ b/net/ipv4/xfrm4_policy.c
@@ -246,7 +246,6 @@ static struct dst_ops xfrm4_dst_ops = {
 	.ifdown =		xfrm4_dst_ifdown,
 	.local_out =		__ip_local_out,
 	.gc_thresh =		1024,
-	.entry_size =		sizeof(struct xfrm_dst),
 	.entries =		ATOMIC_INIT(0),
 };
 
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index 4d40dc214b2d..9da1ece466a2 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -108,7 +108,6 @@ static struct dst_ops ip6_dst_ops_template = {
 	.link_failure		=	ip6_link_failure,
 	.update_pmtu		=	ip6_rt_update_pmtu,
 	.local_out		=	__ip6_local_out,
-	.entry_size		=	sizeof(struct rt6_info),
 	.entries		=	ATOMIC_INIT(0),
 };
 
@@ -122,7 +121,6 @@ static struct dst_ops ip6_dst_blackhole_ops = {
 	.destroy		=	ip6_dst_destroy,
 	.check			=	ip6_dst_check,
 	.update_pmtu		=	ip6_rt_blackhole_update_pmtu,
-	.entry_size		=	sizeof(struct rt6_info),
 	.entries		=	ATOMIC_INIT(0),
 };
 
diff --git a/net/ipv6/xfrm6_policy.c b/net/ipv6/xfrm6_policy.c
index 604bc0a96c05..3b67ce7786ec 100644
--- a/net/ipv6/xfrm6_policy.c
+++ b/net/ipv6/xfrm6_policy.c
@@ -277,7 +277,6 @@ static struct dst_ops xfrm6_dst_ops = {
 	.ifdown =		xfrm6_dst_ifdown,
 	.local_out =		__ip6_local_out,
 	.gc_thresh =		1024,
-	.entry_size =		sizeof(struct xfrm_dst),
 	.entries =		ATOMIC_INIT(0),
 };
 
-- 
cgit v1.2.3


From d90ebcbfa7f5a8b4e20518c9f94c5c4e4cd3c2e5 Mon Sep 17 00:00:00 2001
From: Gerrit Renker <gerrit@erg.abdn.ac.uk>
Date: Wed, 12 Nov 2008 00:47:26 -0800
Subject: dccp: Query supported CCIDs

This provides a data structure to record which CCIDs are locally supported
and three accessor functions:
 - a test function for internal use which is used to validate CCID requests
   made by the user;
 - a copy function so that the list can be used for feature-negotiation;
 - documented getsockopt() support so that the user can query capabilities.

The data structure is a table which is filled in at compile-time with the
list of available CCIDs (which in turn depends on the Kconfig choices).

Using the copy function for cloning the list of supported CCIDs is useful for
feature negotiation, since the negotiation is now with the full list of available
CCIDs (e.g. {2, 3}) instead of the default value {2}. This means negotiation
will not fail if the peer requests to use CCID3 instead of CCID2.

Signed-off-by: Gerrit Renker <gerrit@erg.abdn.ac.uk>
Acked-by: Ian McDonald <ian.mcdonald@jandi.co.nz>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 Documentation/networking/dccp.txt |  4 ++++
 include/linux/dccp.h              |  1 +
 net/dccp/ccid.c                   | 48 +++++++++++++++++++++++++++++++++++++++
 net/dccp/ccid.h                   |  5 ++++
 net/dccp/feat.c                   |  4 ++++
 net/dccp/proto.c                  |  2 ++
 6 files changed, 64 insertions(+)

(limited to 'include')

diff --git a/Documentation/networking/dccp.txt b/Documentation/networking/dccp.txt
index 39131a3c78f8..f0aeb20fa63b 100644
--- a/Documentation/networking/dccp.txt
+++ b/Documentation/networking/dccp.txt
@@ -57,6 +57,10 @@ can be set before calling bind().
 DCCP_SOCKOPT_GET_CUR_MPS is read-only and retrieves the current maximum packet
 size (application payload size) in bytes, see RFC 4340, section 14.
 
+DCCP_SOCKOPT_AVAILABLE_CCIDS is also read-only and returns the list of CCIDs
+supported by the endpoint (see include/linux/dccp.h for symbolic constants).
+The caller needs to provide a sufficiently large (> 2) array of type uint8_t.
+
 DCCP_SOCKOPT_SERVER_TIMEWAIT enables the server (listening socket) to hold
 timewait state when closing the connection (RFC 4340, 8.3). The usual case is
 that the closing server sends a CloseReq, whereupon the client holds timewait
diff --git a/include/linux/dccp.h b/include/linux/dccp.h
index 484b8a1fb023..d3ac1bde60b4 100644
--- a/include/linux/dccp.h
+++ b/include/linux/dccp.h
@@ -209,6 +209,7 @@ struct dccp_so_feat {
 #define DCCP_SOCKOPT_SERVER_TIMEWAIT	6
 #define DCCP_SOCKOPT_SEND_CSCOV		10
 #define DCCP_SOCKOPT_RECV_CSCOV		11
+#define DCCP_SOCKOPT_AVAILABLE_CCIDS	12
 #define DCCP_SOCKOPT_CCID_RX_INFO	128
 #define DCCP_SOCKOPT_CCID_TX_INFO	192
 
diff --git a/net/dccp/ccid.c b/net/dccp/ccid.c
index 8fe931a3d7a1..647cb0614f84 100644
--- a/net/dccp/ccid.c
+++ b/net/dccp/ccid.c
@@ -13,6 +13,13 @@
 
 #include "ccid.h"
 
+static u8 builtin_ccids[] = {
+	DCCPC_CCID2,		/* CCID2 is supported by default */
+#if defined(CONFIG_IP_DCCP_CCID3) || defined(CONFIG_IP_DCCP_CCID3_MODULE)
+	DCCPC_CCID3,
+#endif
+};
+
 static struct ccid_operations *ccids[CCID_MAX];
 #if defined(CONFIG_SMP) || defined(CONFIG_PREEMPT)
 static atomic_t ccids_lockct = ATOMIC_INIT(0);
@@ -86,6 +93,47 @@ static void ccid_kmem_cache_destroy(struct kmem_cache *slab)
 	}
 }
 
+/* check that up to @array_len members in @ccid_array are supported */
+bool ccid_support_check(u8 const *ccid_array, u8 array_len)
+{
+	u8 i, j, found;
+
+	for (i = 0, found = 0; i < array_len; i++, found = 0) {
+		for (j = 0; !found && j < ARRAY_SIZE(builtin_ccids); j++)
+			found = (ccid_array[i] == builtin_ccids[j]);
+		if (!found)
+			return false;
+	}
+	return true;
+}
+
+/**
+ * ccid_get_builtin_ccids  -  Provide copy of `builtin' CCID array
+ * @ccid_array: pointer to copy into
+ * @array_len: value to return length into
+ * This function allocates memory - caller must see that it is freed after use.
+ */
+int ccid_get_builtin_ccids(u8 **ccid_array, u8 *array_len)
+{
+	*ccid_array = kmemdup(builtin_ccids, sizeof(builtin_ccids), gfp_any());
+	if (*ccid_array == NULL)
+		return -ENOBUFS;
+	*array_len = ARRAY_SIZE(builtin_ccids);
+	return 0;
+}
+
+int ccid_getsockopt_builtin_ccids(struct sock *sk, int len,
+				    char __user *optval, int __user *optlen)
+{
+	if (len < sizeof(builtin_ccids))
+		return -EINVAL;
+
+	if (put_user(sizeof(builtin_ccids), optlen) ||
+	    copy_to_user(optval, builtin_ccids, sizeof(builtin_ccids)))
+		return -EFAULT;
+	return 0;
+}
+
 int ccid_register(struct ccid_operations *ccid_ops)
 {
 	int err = -ENOBUFS;
diff --git a/net/dccp/ccid.h b/net/dccp/ccid.h
index fdeae7b57319..259f5469d7d0 100644
--- a/net/dccp/ccid.h
+++ b/net/dccp/ccid.h
@@ -103,6 +103,11 @@ static inline void *ccid_priv(const struct ccid *ccid)
 	return (void *)ccid->ccid_priv;
 }
 
+extern bool ccid_support_check(u8 const *ccid_array, u8 array_len);
+extern int  ccid_get_builtin_ccids(u8 **ccid_array, u8 *array_len);
+extern int  ccid_getsockopt_builtin_ccids(struct sock *sk, int len,
+					  char __user *, int __user *);
+
 extern struct ccid *ccid_new(unsigned char id, struct sock *sk, int rx,
 			     gfp_t gfp);
 
diff --git a/net/dccp/feat.c b/net/dccp/feat.c
index 192d494a3816..f79fb5e33f5e 100644
--- a/net/dccp/feat.c
+++ b/net/dccp/feat.c
@@ -342,6 +342,10 @@ static int __feat_register_sp(struct list_head *fn, u8 feat, u8 is_local,
 	    !dccp_feat_sp_list_ok(feat, sp_val, sp_len))
 		return -EINVAL;
 
+	/* Avoid negotiating alien CCIDs by only advertising supported ones */
+	if (feat == DCCPF_CCID && !ccid_support_check(sp_val, sp_len))
+		return -EOPNOTSUPP;
+
 	if (dccp_feat_clone_sp_val(&fval, sp_val, sp_len))
 		return -ENOMEM;
 
diff --git a/net/dccp/proto.c b/net/dccp/proto.c
index 01332fe7a99a..b4b10cbd8880 100644
--- a/net/dccp/proto.c
+++ b/net/dccp/proto.c
@@ -649,6 +649,8 @@ static int do_dccp_getsockopt(struct sock *sk, int level, int optname,
 	case DCCP_SOCKOPT_GET_CUR_MPS:
 		val = dp->dccps_mss_cache;
 		break;
+	case DCCP_SOCKOPT_AVAILABLE_CCIDS:
+		return ccid_getsockopt_builtin_ccids(sk, len, optval, optlen);
 	case DCCP_SOCKOPT_SERVER_TIMEWAIT:
 		val = dp->dccps_server_timewait;
 		break;
-- 
cgit v1.2.3


From 8f424b5f32d78b4f353b3cddca9804808ef063eb Mon Sep 17 00:00:00 2001
From: Eric Dumazet <dada1@cosmosbay.com>
Date: Wed, 12 Nov 2008 00:53:30 -0800
Subject: net: Introduce read_pnet() and write_pnet() helpers

This patch introduces two helpers that deal with reading and writing
struct net pointers in various network structures.

Their implementation depends on CONFIG_NET_NS

For symmetry, both functions work with "struct net **pnet".

Their usage should reduce the number of #ifdef CONFIG_NET_NS,
without adding many helpers for each network structure
that hold a "struct net *pointer"

Signed-off-by: Eric Dumazet <dada1@cosmosbay.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/net_namespace.h | 18 ++++++++++++++++++
 1 file changed, 18 insertions(+)

(limited to 'include')

diff --git a/include/net/net_namespace.h b/include/net/net_namespace.h
index 700c53a3c6fa..319557789a40 100644
--- a/include/net/net_namespace.h
+++ b/include/net/net_namespace.h
@@ -192,6 +192,24 @@ static inline void release_net(struct net *net)
 }
 #endif
 
+#ifdef CONFIG_NET_NS
+
+static inline void write_pnet(struct net **pnet, struct net *net)
+{
+	*pnet = net;
+}
+
+static inline struct net *read_pnet(struct net * const *pnet)
+{
+	return *pnet;
+}
+
+#else
+
+#define write_pnet(pnet, net)	do { (void)(net);} while (0)
+#define read_pnet(pnet)		(&init_net)
+
+#endif
 
 #define for_each_net(VAR)				\
 	list_for_each_entry(VAR, &net_namespace_list, list)
-- 
cgit v1.2.3


From 7a9546ee354ec6f23af403992b8c07baa50a23d2 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <dada1@cosmosbay.com>
Date: Wed, 12 Nov 2008 00:54:20 -0800
Subject: net: ib_net pointer should depends on CONFIG_NET_NS

We can shrink size of "struct inet_bind_bucket" by 50%, using
read_pnet() and write_pnet()

Signed-off-by: Eric Dumazet <dada1@cosmosbay.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/inet_hashtables.h   | 7 +++++++
 net/ipv4/inet_connection_sock.c | 4 ++--
 net/ipv4/inet_hashtables.c      | 6 +++---
 3 files changed, 12 insertions(+), 5 deletions(-)

(limited to 'include')

diff --git a/include/net/inet_hashtables.h b/include/net/inet_hashtables.h
index 5cc182f9ecae..cb31fbf8ae2a 100644
--- a/include/net/inet_hashtables.h
+++ b/include/net/inet_hashtables.h
@@ -77,13 +77,20 @@ struct inet_ehash_bucket {
  * ports are created in O(1) time?  I thought so. ;-)	-DaveM
  */
 struct inet_bind_bucket {
+#ifdef CONFIG_NET_NS
 	struct net		*ib_net;
+#endif
 	unsigned short		port;
 	signed short		fastreuse;
 	struct hlist_node	node;
 	struct hlist_head	owners;
 };
 
+static inline struct net *ib_net(struct inet_bind_bucket *ib)
+{
+	return read_pnet(&ib->ib_net);
+}
+
 #define inet_bind_bucket_for_each(tb, node, head) \
 	hlist_for_each_entry(tb, node, head, node)
 
diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c
index 36f4cbc7da3a..05af807ca9b9 100644
--- a/net/ipv4/inet_connection_sock.c
+++ b/net/ipv4/inet_connection_sock.c
@@ -109,7 +109,7 @@ int inet_csk_get_port(struct sock *sk, unsigned short snum)
 					hashinfo->bhash_size)];
 			spin_lock(&head->lock);
 			inet_bind_bucket_for_each(tb, node, &head->chain)
-				if (tb->ib_net == net && tb->port == rover)
+				if (ib_net(tb) == net && tb->port == rover)
 					goto next;
 			break;
 		next:
@@ -137,7 +137,7 @@ int inet_csk_get_port(struct sock *sk, unsigned short snum)
 				hashinfo->bhash_size)];
 		spin_lock(&head->lock);
 		inet_bind_bucket_for_each(tb, node, &head->chain)
-			if (tb->ib_net == net && tb->port == snum)
+			if (ib_net(tb) == net && tb->port == snum)
 				goto tb_found;
 	}
 	tb = NULL;
diff --git a/net/ipv4/inet_hashtables.c b/net/ipv4/inet_hashtables.c
index 44981906fb91..be41ebbec4eb 100644
--- a/net/ipv4/inet_hashtables.c
+++ b/net/ipv4/inet_hashtables.c
@@ -35,7 +35,7 @@ struct inet_bind_bucket *inet_bind_bucket_create(struct kmem_cache *cachep,
 	struct inet_bind_bucket *tb = kmem_cache_alloc(cachep, GFP_ATOMIC);
 
 	if (tb != NULL) {
-		tb->ib_net       = hold_net(net);
+		write_pnet(&tb->ib_net, hold_net(net));
 		tb->port      = snum;
 		tb->fastreuse = 0;
 		INIT_HLIST_HEAD(&tb->owners);
@@ -51,7 +51,7 @@ void inet_bind_bucket_destroy(struct kmem_cache *cachep, struct inet_bind_bucket
 {
 	if (hlist_empty(&tb->owners)) {
 		__hlist_del(&tb->node);
-		release_net(tb->ib_net);
+		release_net(ib_net(tb));
 		kmem_cache_free(cachep, tb);
 	}
 }
@@ -449,7 +449,7 @@ int __inet_hash_connect(struct inet_timewait_death_row *death_row,
 			 * unique enough.
 			 */
 			inet_bind_bucket_for_each(tb, node, &head->chain) {
-				if (tb->ib_net == net && tb->port == port) {
+				if (ib_net(tb) == net && tb->port == port) {
 					WARN_ON(hlist_empty(&tb->owners));
 					if (tb->fastreuse >= 0)
 						goto next_port;
-- 
cgit v1.2.3


From e42ea986e4a4cab4209d982feffcaf50f21e80e3 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <dada1@cosmosbay.com>
Date: Wed, 12 Nov 2008 00:54:54 -0800
Subject: net: Cleanup of neighbour code

Using read_pnet() and write_pnet() in neighbour code ease the reading
of code.

Signed-off-by: Eric Dumazet <dada1@cosmosbay.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/neighbour.h | 12 ++----------
 net/core/neighbour.c    | 12 +++---------
 2 files changed, 5 insertions(+), 19 deletions(-)

(limited to 'include')

diff --git a/include/net/neighbour.h b/include/net/neighbour.h
index 365b5e260239..d8d790e56d3d 100644
--- a/include/net/neighbour.h
+++ b/include/net/neighbour.h
@@ -220,11 +220,7 @@ extern void			neigh_parms_release(struct neigh_table *tbl, struct neigh_parms *p
 static inline
 struct net			*neigh_parms_net(const struct neigh_parms *parms)
 {
-#ifdef CONFIG_NET_NS
-	return parms->net;
-#else
-	return &init_net;
-#endif
+	return read_pnet(&parms->net);
 }
 
 extern unsigned long		neigh_rand_reach_time(unsigned long base);
@@ -241,11 +237,7 @@ extern int			pneigh_delete(struct neigh_table *tbl, struct net *net, const void
 static inline
 struct net			*pneigh_net(const struct pneigh_entry *pneigh)
 {
-#ifdef CONFIG_NET_NS
-	return pneigh->net;
-#else
-	return &init_net;
-#endif
+	return read_pnet(&pneigh->net);
 }
 
 extern void neigh_app_ns(struct neighbour *n);
diff --git a/net/core/neighbour.c b/net/core/neighbour.c
index 500c2430007c..cca6a55909eb 100644
--- a/net/core/neighbour.c
+++ b/net/core/neighbour.c
@@ -531,9 +531,7 @@ struct pneigh_entry * pneigh_lookup(struct neigh_table *tbl,
 	if (!n)
 		goto out;
 
-#ifdef CONFIG_NET_NS
-	n->net = hold_net(net);
-#endif
+	write_pnet(&n->net, hold_net(net));
 	memcpy(n->key, pkey, key_len);
 	n->dev = dev;
 	if (dev)
@@ -1350,9 +1348,7 @@ struct neigh_parms *neigh_parms_alloc(struct net_device *dev,
 
 		dev_hold(dev);
 		p->dev = dev;
-#ifdef CONFIG_NET_NS
-		p->net = hold_net(net);
-#endif
+		write_pnet(&p->net, hold_net(net));
 		p->sysctl_table = NULL;
 		write_lock_bh(&tbl->lock);
 		p->next		= tbl->parms.next;
@@ -1407,9 +1403,7 @@ void neigh_table_init_no_netlink(struct neigh_table *tbl)
 	unsigned long now = jiffies;
 	unsigned long phsize;
 
-#ifdef CONFIG_NET_NS
-	tbl->parms.net = &init_net;
-#endif
+	write_pnet(&tbl->parms.net, &init_net);
 	atomic_set(&tbl->parms.refcnt, 1);
 	tbl->parms.reachable_time =
 			  neigh_rand_reach_time(tbl->parms.base_reachable_time);
-- 
cgit v1.2.3


From 3f5ec13696fd4a33bde42f385406cbb1d3cc96fd Mon Sep 17 00:00:00 2001
From: Frederic Weisbecker <fweisbec@gmail.com>
Date: Tue, 11 Nov 2008 23:21:31 +0100
Subject: tracing/fastboot: move boot tracer structs and funcs into their own
 header.

Impact: Cleanups on the boot tracer and ftrace

This patch bring some cleanups about the boot tracer headers. The
functions and structures of this tracer have nothing related to ftrace
and should have so their own header file.

Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>
Acked-by: Steven Rostedt <rostedt@goodmis.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/ftrace.h | 41 -----------------------------------------
 include/trace/boot.h   | 43 +++++++++++++++++++++++++++++++++++++++++++
 init/main.c            |  1 +
 kernel/trace/trace.h   |  1 +
 4 files changed, 45 insertions(+), 41 deletions(-)
 create mode 100644 include/trace/boot.h

(limited to 'include')

diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h
index dcbbf72a88b1..4fbc4a8b86a5 100644
--- a/include/linux/ftrace.h
+++ b/include/linux/ftrace.h
@@ -287,45 +287,4 @@ extern trace_function_return_t ftrace_function_return;
 extern void unregister_ftrace_return(void);
 #endif
 
-/*
- * Structure which defines the trace of an initcall.
- * You don't have to fill the func field since it is
- * only used internally by the tracer.
- */
-struct boot_trace {
-	pid_t			caller;
-	char			func[KSYM_NAME_LEN];
-	int			result;
-	unsigned long long	duration;		/* usecs */
-	ktime_t			calltime;
-	ktime_t			rettime;
-};
-
-#ifdef CONFIG_BOOT_TRACER
-/* Append the trace on the ring-buffer */
-extern void trace_boot(struct boot_trace *it, initcall_t fn);
-
-/* Tells the tracer that smp_pre_initcall is finished.
- * So we can start the tracing
- */
-extern void start_boot_trace(void);
-
-/* Resume the tracing of other necessary events
- * such as sched switches
- */
-extern void enable_boot_trace(void);
-
-/* Suspend this tracing. Actually, only sched_switches tracing have
- * to be suspended. Initcalls doesn't need it.)
- */
-extern void disable_boot_trace(void);
-#else
-static inline void trace_boot(struct boot_trace *it, initcall_t fn) { }
-static inline void start_boot_trace(void) { }
-static inline void enable_boot_trace(void) { }
-static inline void disable_boot_trace(void) { }
-#endif
-
-
-
 #endif /* _LINUX_FTRACE_H */
diff --git a/include/trace/boot.h b/include/trace/boot.h
new file mode 100644
index 000000000000..4cbe64e46cdc
--- /dev/null
+++ b/include/trace/boot.h
@@ -0,0 +1,43 @@
+#ifndef _LINUX_TRACE_BOOT_H
+#define _LINUX_TRACE_BOOT_H
+
+/*
+ * Structure which defines the trace of an initcall.
+ * You don't have to fill the func field since it is
+ * only used internally by the tracer.
+ */
+struct boot_trace {
+	pid_t			caller;
+	char			func[KSYM_NAME_LEN];
+	int			result;
+	unsigned long long	duration;		/* usecs */
+	ktime_t			calltime;
+	ktime_t			rettime;
+};
+
+#ifdef CONFIG_BOOT_TRACER
+/* Append the trace on the ring-buffer */
+extern void trace_boot(struct boot_trace *it, initcall_t fn);
+
+/* Tells the tracer that smp_pre_initcall is finished.
+ * So we can start the tracing
+ */
+extern void start_boot_trace(void);
+
+/* Resume the tracing of other necessary events
+ * such as sched switches
+ */
+extern void enable_boot_trace(void);
+
+/* Suspend this tracing. Actually, only sched_switches tracing have
+ * to be suspended. Initcalls doesn't need it.)
+ */
+extern void disable_boot_trace(void);
+#else
+static inline void trace_boot(struct boot_trace *it, initcall_t fn) { }
+static inline void start_boot_trace(void) { }
+static inline void enable_boot_trace(void) { }
+static inline void disable_boot_trace(void) { }
+#endif /* CONFIG_BOOT_TRACER */
+
+#endif /* __LINUX_TRACE_BOOT_H */
diff --git a/init/main.c b/init/main.c
index 4b03cd5656ca..16ca1ee071c4 100644
--- a/init/main.c
+++ b/init/main.c
@@ -63,6 +63,7 @@
 #include <linux/signal.h>
 #include <linux/idr.h>
 #include <linux/ftrace.h>
+#include <trace/boot.h>
 
 #include <asm/io.h>
 #include <asm/bugs.h>
diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h
index e40ce0c14690..f69a5199596b 100644
--- a/kernel/trace/trace.h
+++ b/kernel/trace/trace.h
@@ -8,6 +8,7 @@
 #include <linux/ring_buffer.h>
 #include <linux/mmiotrace.h>
 #include <linux/ftrace.h>
+#include <trace/boot.h>
 
 enum trace_type {
 	__TRACE_FIRST_TYPE = 0,
-- 
cgit v1.2.3


From 74239072830ef3f1398edeb1bc1076fc330fd4a2 Mon Sep 17 00:00:00 2001
From: Frederic Weisbecker <fweisbec@gmail.com>
Date: Tue, 11 Nov 2008 23:24:42 +0100
Subject: tracing/fastboot: Use the ring-buffer timestamp for initcall entries

Impact: Split the boot tracer entries in two parts: call and return

Now that we are using the sched tracer from the boot tracer, we want
to use the same timestamp than the ring-buffer to have consistent time
captures between sched events and initcall events.

So we get rid of the old time capture by the boot tracer and split the
initcall events in two parts: call and return. This way we have the
ring buffer timestamp of both.

An example trace:

[   27.904149584] calling  net_ns_init+0x0/0x1c0 @ 1
[   27.904429624] initcall net_ns_init+0x0/0x1c0 returned 0 after 0 msecs
[   27.904575926] calling  reboot_init+0x0/0x20 @ 1
[   27.904655399] initcall reboot_init+0x0/0x20 returned 0 after 0 msecs
[   27.904800228] calling  sysctl_init+0x0/0x30 @ 1
[   27.905142914] initcall sysctl_init+0x0/0x30 returned 0 after 0 msecs
[   27.905287211] calling  ksysfs_init+0x0/0xb0 @ 1
 ##### CPU 0 buffer started ####
            init-1     [000]    27.905395:      1:120:R   + [001]    11:115:S
 ##### CPU 1 buffer started ####
          <idle>-0     [001]    27.905425:      0:140:R ==> [001]    11:115:R
            init-1     [000]    27.905426:      1:120:D ==> [000]     0:140:R
          <idle>-0     [000]    27.905431:      0:140:R   + [000]     4:115:S
          <idle>-0     [000]    27.905451:      0:140:R ==> [000]     4:115:R
     ksoftirqd/0-4     [000]    27.905456:      4:115:S ==> [000]     0:140:R
           udevd-11    [001]    27.905458:     11:115:R   + [001]    14:115:R
          <idle>-0     [000]    27.905459:      0:140:R   + [000]     4:115:S
          <idle>-0     [000]    27.905462:      0:140:R ==> [000]     4:115:R
           udevd-11    [001]    27.905462:     11:115:R ==> [001]    14:115:R
     ksoftirqd/0-4     [000]    27.905467:      4:115:S ==> [000]     0:140:R
          <idle>-0     [000]    27.905470:      0:140:R   + [000]     4:115:S
          <idle>-0     [000]    27.905473:      0:140:R ==> [000]     4:115:R
     ksoftirqd/0-4     [000]    27.905476:      4:115:S ==> [000]     0:140:R
          <idle>-0     [000]    27.905479:      0:140:R   + [000]     4:115:S
          <idle>-0     [000]    27.905482:      0:140:R ==> [000]     4:115:R
     ksoftirqd/0-4     [000]    27.905486:      4:115:S ==> [000]     0:140:R
           udevd-14    [001]    27.905499:     14:120:X ==> [001]    11:115:R
           udevd-11    [001]    27.905506:     11:115:R   + [000]     1:120:D
          <idle>-0     [000]    27.905515:      0:140:R ==> [000]     1:120:R
           udevd-11    [001]    27.905517:     11:115:S ==> [001]     0:140:R
[   27.905557107] initcall ksysfs_init+0x0/0xb0 returned 0 after 3906 msecs
[   27.905705736] calling  init_jiffies_clocksource+0x0/0x10 @ 1
[   27.905779239] initcall init_jiffies_clocksource+0x0/0x10 returned 0 after 0 msecs
[   27.906769814] calling  pm_init+0x0/0x30 @ 1
[   27.906853627] initcall pm_init+0x0/0x30 returned 0 after 0 msecs
[   27.906997803] calling  pm_disk_init+0x0/0x20 @ 1
[   27.907076946] initcall pm_disk_init+0x0/0x20 returned 0 after 0 msecs
[   27.907222556] calling  swsusp_header_init+0x0/0x30 @ 1
[   27.907294325] initcall swsusp_header_init+0x0/0x30 returned 0 after 0 msecs
[   27.907439620] calling  stop_machine_init+0x0/0x50 @ 1
            init-1     [000]    27.907485:      1:120:R   + [000]     2:115:S
            init-1     [000]    27.907490:      1:120:D ==> [000]     2:115:R
        kthreadd-2     [000]    27.907507:      2:115:R   + [001]    15:115:R
          <idle>-0     [001]    27.907517:      0:140:R ==> [001]    15:115:R
        kthreadd-2     [000]    27.907517:      2:115:D ==> [000]     0:140:R
          <idle>-0     [000]    27.907521:      0:140:R   + [000]     4:115:S
          <idle>-0     [000]    27.907524:      0:140:R ==> [000]     4:115:R
           udevd-15    [001]    27.907527:     15:115:D   + [000]     2:115:D
     ksoftirqd/0-4     [000]    27.907537:      4:115:S ==> [000]     2:115:R
           udevd-15    [001]    27.907537:     15:115:D ==> [001]     0:140:R
        kthreadd-2     [000]    27.907546:      2:115:R   + [000]     1:120:D
        kthreadd-2     [000]    27.907550:      2:115:S ==> [000]     1:120:R
            init-1     [000]    27.907584:      1:120:R   + [000]    15:  0:D
            init-1     [000]    27.907589:      1:120:R   + [000]     2:115:S
            init-1     [000]    27.907593:      1:120:D ==> [000]    15:  0:R
           udevd-15    [000]    27.907601:     15:  0:S ==> [000]     2:115:R
 ##### CPU 0 buffer started ####
        kthreadd-2     [000]    27.907616:      2:115:R   + [001]    16:115:R
 ##### CPU 1 buffer started ####
          <idle>-0     [001]    27.907620:      0:140:R ==> [001]    16:115:R
        kthreadd-2     [000]    27.907621:      2:115:D ==> [000]     0:140:R
           udevd-16    [001]    27.907625:     16:115:D   + [000]     2:115:D
          <idle>-0     [000]    27.907628:      0:140:R   + [000]     4:115:S
           udevd-16    [001]    27.907629:     16:115:D ==> [001]     0:140:R
          <idle>-0     [000]    27.907631:      0:140:R ==> [000]     4:115:R
     ksoftirqd/0-4     [000]    27.907636:      4:115:S ==> [000]     2:115:R
        kthreadd-2     [000]    27.907644:      2:115:R   + [000]     1:120:D
        kthreadd-2     [000]    27.907647:      2:115:S ==> [000]     1:120:R
            init-1     [000]    27.907657:      1:120:R   + [001]    16:  0:D
          <idle>-0     [001]    27.907666:      0:140:R ==> [001]    16:  0:R
[   27.907703862] initcall stop_machine_init+0x0/0x50 returned 0 after 0 msecs
[   27.907850704] calling  filelock_init+0x0/0x30 @ 1
[   27.907926573] initcall filelock_init+0x0/0x30 returned 0 after 0 msecs
[   27.908071327] calling  init_script_binfmt+0x0/0x10 @ 1
[   27.908165195] initcall init_script_binfmt+0x0/0x10 returned 0 after 0 msecs
[   27.908309461] calling  init_elf_binfmt+0x0/0x10 @ 1

Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>
Acked-by: Steven Rostedt <rostedt@goodmis.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/trace/boot.h      |  31 ++++++++----
 init/main.c               |  32 ++++++------
 kernel/trace/trace.h      |  17 +++++--
 kernel/trace/trace_boot.c | 123 +++++++++++++++++++++++++++++++++++-----------
 4 files changed, 144 insertions(+), 59 deletions(-)

(limited to 'include')

diff --git a/include/trace/boot.h b/include/trace/boot.h
index 4cbe64e46cdc..6b54537eab02 100644
--- a/include/trace/boot.h
+++ b/include/trace/boot.h
@@ -2,22 +2,30 @@
 #define _LINUX_TRACE_BOOT_H
 
 /*
- * Structure which defines the trace of an initcall.
+ * Structure which defines the trace of an initcall
+ * while it is called.
  * You don't have to fill the func field since it is
  * only used internally by the tracer.
  */
-struct boot_trace {
+struct boot_trace_call {
 	pid_t			caller;
 	char			func[KSYM_NAME_LEN];
-	int			result;
-	unsigned long long	duration;		/* usecs */
-	ktime_t			calltime;
-	ktime_t			rettime;
+};
+
+/*
+ * Structure which defines the trace of an initcall
+ * while it returns.
+ */
+struct boot_trace_ret {
+	char			func[KSYM_NAME_LEN];
+	int				result;
+	unsigned long long	duration;		/* nsecs */
 };
 
 #ifdef CONFIG_BOOT_TRACER
-/* Append the trace on the ring-buffer */
-extern void trace_boot(struct boot_trace *it, initcall_t fn);
+/* Append the traces on the ring-buffer */
+extern void trace_boot_call(struct boot_trace_call *bt, initcall_t fn);
+extern void trace_boot_ret(struct boot_trace_ret *bt, initcall_t fn);
 
 /* Tells the tracer that smp_pre_initcall is finished.
  * So we can start the tracing
@@ -34,7 +42,12 @@ extern void enable_boot_trace(void);
  */
 extern void disable_boot_trace(void);
 #else
-static inline void trace_boot(struct boot_trace *it, initcall_t fn) { }
+static inline
+void trace_boot_call(struct boot_trace_call *bt, initcall_t fn) { }
+
+static inline
+void trace_boot_ret(struct boot_trace_ret *bt, initcall_t fn) { }
+
 static inline void start_boot_trace(void) { }
 static inline void enable_boot_trace(void) { }
 static inline void disable_boot_trace(void) { }
diff --git a/init/main.c b/init/main.c
index 16ca1ee071c4..e810196bf2f2 100644
--- a/init/main.c
+++ b/init/main.c
@@ -704,33 +704,35 @@ core_param(initcall_debug, initcall_debug, bool, 0644);
 int do_one_initcall(initcall_t fn)
 {
 	int count = preempt_count();
-	ktime_t delta;
+	ktime_t calltime, delta, rettime;
 	char msgbuf[64];
-	struct boot_trace it;
+	struct boot_trace_call call;
+	struct boot_trace_ret ret;
 
 	if (initcall_debug) {
-		it.caller = task_pid_nr(current);
-		printk("calling  %pF @ %i\n", fn, it.caller);
-		it.calltime = ktime_get();
+		call.caller = task_pid_nr(current);
+		printk("calling  %pF @ %i\n", fn, call.caller);
+		calltime = ktime_get();
+		trace_boot_call(&call, fn);
 		enable_boot_trace();
 	}
 
-	it.result = fn();
+	ret.result = fn();
 
 	if (initcall_debug) {
-		it.rettime = ktime_get();
-		delta = ktime_sub(it.rettime, it.calltime);
-		it.duration = (unsigned long long) delta.tv64 >> 10;
-		printk("initcall %pF returned %d after %Ld usecs\n", fn,
-			it.result, it.duration);
-		trace_boot(&it, fn);
 		disable_boot_trace();
+		rettime = ktime_get();
+		delta = ktime_sub(rettime, calltime);
+		ret.duration = (unsigned long long) delta.tv64 >> 10;
+		trace_boot_ret(&ret, fn);
+		printk("initcall %pF returned %d after %Ld usecs\n", fn,
+			ret.result, ret.duration);
 	}
 
 	msgbuf[0] = 0;
 
-	if (it.result && it.result != -ENODEV && initcall_debug)
-		sprintf(msgbuf, "error code %d ", it.result);
+	if (ret.result && ret.result != -ENODEV && initcall_debug)
+		sprintf(msgbuf, "error code %d ", ret.result);
 
 	if (preempt_count() != count) {
 		strlcat(msgbuf, "preemption imbalance ", sizeof(msgbuf));
@@ -744,7 +746,7 @@ int do_one_initcall(initcall_t fn)
 		printk("initcall %pF returned with %s\n", fn, msgbuf);
 	}
 
-	return it.result;
+	return ret.result;
 }
 
 
diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h
index f69a5199596b..b5f91f198fd4 100644
--- a/kernel/trace/trace.h
+++ b/kernel/trace/trace.h
@@ -22,7 +22,8 @@ enum trace_type {
 	TRACE_SPECIAL,
 	TRACE_MMIO_RW,
 	TRACE_MMIO_MAP,
-	TRACE_BOOT,
+	TRACE_BOOT_CALL,
+	TRACE_BOOT_RET,
 	TRACE_FN_RET,
 
 	__TRACE_LAST_TYPE
@@ -123,9 +124,14 @@ struct trace_mmiotrace_map {
 	struct mmiotrace_map	map;
 };
 
-struct trace_boot {
+struct trace_boot_call {
 	struct trace_entry	ent;
-	struct boot_trace	initcall;
+	struct boot_trace_call boot_call;
+};
+
+struct trace_boot_ret {
+	struct trace_entry	ent;
+	struct boot_trace_ret boot_ret;
 };
 
 /*
@@ -228,8 +234,9 @@ extern void __ftrace_bad_type(void);
 			  TRACE_MMIO_RW);				\
 		IF_ASSIGN(var, ent, struct trace_mmiotrace_map,		\
 			  TRACE_MMIO_MAP);				\
-		IF_ASSIGN(var, ent, struct trace_boot, TRACE_BOOT);	\
-		IF_ASSIGN(var, ent, struct ftrace_ret_entry, TRACE_FN_RET); \
+		IF_ASSIGN(var, ent, struct trace_boot_call, TRACE_BOOT_CALL);\
+		IF_ASSIGN(var, ent, struct trace_boot_ret, TRACE_BOOT_RET);\
+		IF_ASSIGN(var, ent, struct ftrace_ret_entry, TRACE_FN_RET);\
 		__ftrace_bad_type();					\
 	} while (0)
 
diff --git a/kernel/trace/trace_boot.c b/kernel/trace/trace_boot.c
index 8f71915e8bb4..cb333b7fd113 100644
--- a/kernel/trace/trace_boot.c
+++ b/kernel/trace/trace_boot.c
@@ -58,35 +58,71 @@ static void boot_trace_init(struct trace_array *tr)
 	tracing_sched_switch_assign_trace(tr);
 }
 
-static enum print_line_t initcall_print_line(struct trace_iterator *iter)
+static enum print_line_t
+initcall_call_print_line(struct trace_iterator *iter)
 {
+	struct trace_entry *entry = iter->ent;
+	struct trace_seq *s = &iter->seq;
+	struct trace_boot_call *field;
+	struct boot_trace_call *call;
+	u64 ts;
+	unsigned long nsec_rem;
 	int ret;
+
+	trace_assign_type(field, entry);
+	call = &field->boot_call;
+	ts = iter->ts;
+	nsec_rem = do_div(ts, 1000000000);
+
+	ret = trace_seq_printf(s, "[%5ld.%09ld] calling  %s @ %i\n",
+			(unsigned long)ts, nsec_rem, call->func, call->caller);
+
+	if (!ret)
+		return TRACE_TYPE_PARTIAL_LINE;
+	else
+		return TRACE_TYPE_HANDLED;
+}
+
+static enum print_line_t
+initcall_ret_print_line(struct trace_iterator *iter)
+{
 	struct trace_entry *entry = iter->ent;
-	struct trace_boot *field = (struct trace_boot *)entry;
-	struct boot_trace *it = &field->initcall;
 	struct trace_seq *s = &iter->seq;
-	struct timespec calltime = ktime_to_timespec(it->calltime);
-	struct timespec rettime = ktime_to_timespec(it->rettime);
-
-	if (entry->type == TRACE_BOOT) {
-		ret = trace_seq_printf(s, "[%5ld.%09ld] calling  %s @ %i\n",
-					  calltime.tv_sec,
-					  calltime.tv_nsec,
-					  it->func, it->caller);
-		if (!ret)
-			return TRACE_TYPE_PARTIAL_LINE;
-
-		ret = trace_seq_printf(s, "[%5ld.%09ld] initcall %s "
-					  "returned %d after %lld msecs\n",
-					  rettime.tv_sec,
-					  rettime.tv_nsec,
-					  it->func, it->result, it->duration);
-
-		if (!ret)
-			return TRACE_TYPE_PARTIAL_LINE;
+	struct trace_boot_ret *field;
+	struct boot_trace_ret *init_ret;
+	u64 ts;
+	unsigned long nsec_rem;
+	int ret;
+
+	trace_assign_type(field, entry);
+	init_ret = &field->boot_ret;
+	ts = iter->ts;
+	nsec_rem = do_div(ts, 1000000000);
+
+	ret = trace_seq_printf(s, "[%5ld.%09ld] initcall %s "
+			"returned %d after %llu msecs\n",
+			(unsigned long) ts,
+			nsec_rem,
+			init_ret->func, init_ret->result, init_ret->duration);
+
+	if (!ret)
+		return TRACE_TYPE_PARTIAL_LINE;
+	else
 		return TRACE_TYPE_HANDLED;
+}
+
+static enum print_line_t initcall_print_line(struct trace_iterator *iter)
+{
+	struct trace_entry *entry = iter->ent;
+
+	switch (entry->type) {
+	case TRACE_BOOT_CALL:
+		return initcall_call_print_line(iter);
+	case TRACE_BOOT_RET:
+		return initcall_ret_print_line(iter);
+	default:
+		return TRACE_TYPE_UNHANDLED;
 	}
-	return TRACE_TYPE_UNHANDLED;
 }
 
 struct tracer boot_tracer __read_mostly =
@@ -97,11 +133,10 @@ struct tracer boot_tracer __read_mostly =
 	.print_line	= initcall_print_line,
 };
 
-void trace_boot(struct boot_trace *it, initcall_t fn)
+void trace_boot_call(struct boot_trace_call *bt, initcall_t fn)
 {
 	struct ring_buffer_event *event;
-	struct trace_boot *entry;
-	struct trace_array_cpu *data;
+	struct trace_boot_call *entry;
 	unsigned long irq_flags;
 	struct trace_array *tr = boot_trace;
 
@@ -111,9 +146,37 @@ void trace_boot(struct boot_trace *it, initcall_t fn)
 	/* Get its name now since this function could
 	 * disappear because it is in the .init section.
 	 */
-	sprint_symbol(it->func, (unsigned long)fn);
+	sprint_symbol(bt->func, (unsigned long)fn);
+	preempt_disable();
+
+	event = ring_buffer_lock_reserve(tr->buffer, sizeof(*entry),
+					 &irq_flags);
+	if (!event)
+		goto out;
+	entry	= ring_buffer_event_data(event);
+	tracing_generic_entry_update(&entry->ent, 0, 0);
+	entry->ent.type = TRACE_BOOT_CALL;
+	entry->boot_call = *bt;
+	ring_buffer_unlock_commit(tr->buffer, event, irq_flags);
+
+	trace_wake_up();
+
+ out:
+	preempt_enable();
+}
+
+void trace_boot_ret(struct boot_trace_ret *bt, initcall_t fn)
+{
+	struct ring_buffer_event *event;
+	struct trace_boot_ret *entry;
+	unsigned long irq_flags;
+	struct trace_array *tr = boot_trace;
+
+	if (!pre_initcalls_finished)
+		return;
+
+	sprint_symbol(bt->func, (unsigned long)fn);
 	preempt_disable();
-	data = tr->data[smp_processor_id()];
 
 	event = ring_buffer_lock_reserve(tr->buffer, sizeof(*entry),
 					 &irq_flags);
@@ -121,8 +184,8 @@ void trace_boot(struct boot_trace *it, initcall_t fn)
 		goto out;
 	entry	= ring_buffer_event_data(event);
 	tracing_generic_entry_update(&entry->ent, 0, 0);
-	entry->ent.type = TRACE_BOOT;
-	entry->initcall = *it;
+	entry->ent.type = TRACE_BOOT_RET;
+	entry->boot_ret = *bt;
 	ring_buffer_unlock_commit(tr->buffer, event, irq_flags);
 
 	trace_wake_up();
-- 
cgit v1.2.3


From a7d26f84cb76b962217ce277c3e3a9748a26ac11 Mon Sep 17 00:00:00 2001
From: Jaroslav Kysela <perex@perex.cz>
Date: Wed, 12 Nov 2008 11:02:25 +0100
Subject: ALSA: Release v1.0.18a

Signed-off-by: Jaroslav Kysela <perex@perex.cz>
Signed-off-by: Takashi Iwai <tiwai@suse.de>
---
 include/sound/version.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/sound/version.h b/include/sound/version.h
index eae315573563..2b48237e23bf 100644
--- a/include/sound/version.h
+++ b/include/sound/version.h
@@ -1,3 +1,3 @@
 /* include/version.h */
-#define CONFIG_SND_VERSION "1.0.18"
+#define CONFIG_SND_VERSION "1.0.18a"
 #define CONFIG_SND_DATE ""
-- 
cgit v1.2.3


From 92a77aac9812d5397abbe6f1920e085e50838635 Mon Sep 17 00:00:00 2001
From: James Morris <jmorris@namei.org>
Date: Wed, 12 Nov 2008 21:20:00 +1100
Subject: security: remove broken and useless declarations

Remove broken declarations for security_capable* functions,
which were not needed anyway.

Signed-off-by: James Morris <jmorris@namei.org>
---
 include/linux/capability.h | 2 --
 1 file changed, 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/capability.h b/include/linux/capability.h
index b313ba1dd5d1..7f26580a5a4d 100644
--- a/include/linux/capability.h
+++ b/include/linux/capability.h
@@ -521,8 +521,6 @@ extern const kernel_cap_t __cap_init_eff_set;
 
 kernel_cap_t cap_set_effective(const kernel_cap_t pE_new);
 
-extern int security_capable(struct task_struct *t, int cap);
-extern int security_capable_noaudit(struct task_struct *t, int cap);
 /**
  * has_capability - Determine if a task has a superior capability available
  * @t: The task in question
-- 
cgit v1.2.3


From 1f0d69a9fc815db82f15722bf05227190b1d714d Mon Sep 17 00:00:00 2001
From: Steven Rostedt <rostedt@goodmis.org>
Date: Wed, 12 Nov 2008 00:14:39 -0500
Subject: tracing: profile likely and unlikely annotations

Impact: new unlikely/likely profiler

Andrew Morton recently suggested having an in-kernel way to profile
likely and unlikely macros. This patch achieves that goal.

When configured, every(*) likely and unlikely macro gets a counter attached
to it. When the condition is hit, the hit and misses of that condition
are recorded. These numbers can later be retrieved by:

  /debugfs/tracing/profile_likely    - All likely markers
  /debugfs/tracing/profile_unlikely  - All unlikely markers.

# cat /debug/tracing/profile_unlikely | head
 correct incorrect  %        Function                  File              Line
 ------- ---------  -        --------                  ----              ----
    2167        0   0 do_arch_prctl                  process_64.c         832
       0        0   0 do_arch_prctl                  process_64.c         804
    2670        0   0 IS_ERR                         err.h                34
   71230     5693   7 __switch_to                    process_64.c         673
   76919        0   0 __switch_to                    process_64.c         639
   43184    33743  43 __switch_to                    process_64.c         624
   12740    64181  83 __switch_to                    process_64.c         594
   12740    64174  83 __switch_to                    process_64.c         590

# cat /debug/tracing/profile_unlikely | \
  awk '{ if ($3 > 25) print $0; }' |head -20
   44963    35259  43 __switch_to                    process_64.c         624
   12762    67454  84 __switch_to                    process_64.c         594
   12762    67447  84 __switch_to                    process_64.c         590
    1478      595  28 syscall_get_error              syscall.h            51
       0     2821 100 syscall_trace_leave            ptrace.c             1567
       0        1 100 native_smp_prepare_cpus        smpboot.c            1237
   86338   265881  75 calc_delta_fair                sched_fair.c         408
  210410   108540  34 calc_delta_mine                sched.c              1267
       0    54550 100 sched_info_queued              sched_stats.h        222
   51899    66435  56 pick_next_task_fair            sched_fair.c         1422
       6       10  62 yield_task_fair                sched_fair.c         982
    7325     2692  26 rt_policy                      sched.c              144
       0     1270 100 pre_schedule_rt                sched_rt.c           1261
    1268    48073  97 pick_next_task_rt              sched_rt.c           884
       0    45181 100 sched_info_dequeued            sched_stats.h        177
       0       15 100 sched_move_task                sched.c              8700
       0       15 100 sched_move_task                sched.c              8690
   53167    33217  38 schedule                       sched.c              4457
       0    80208 100 sched_info_switch              sched_stats.h        270
   30585    49631  61 context_switch                 sched.c              2619

# cat /debug/tracing/profile_likely | awk '{ if ($3 > 25) print $0; }'
   39900    36577  47 pick_next_task                 sched.c              4397
   20824    15233  42 switch_mm                      mmu_context_64.h     18
       0        7 100 __cancel_work_timer            workqueue.c          560
     617    66484  99 clocksource_adjust             timekeeping.c        456
       0   346340 100 audit_syscall_exit             auditsc.c            1570
      38   347350  99 audit_get_context              auditsc.c            732
       0   345244 100 audit_syscall_entry            auditsc.c            1541
      38     1017  96 audit_free                     auditsc.c            1446
       0     1090 100 audit_alloc                    auditsc.c            862
    2618     1090  29 audit_alloc                    auditsc.c            858
       0        6 100 move_masked_irq                migration.c          9
       1      198  99 probe_sched_wakeup             trace_sched_switch.c 58
       2        2  50 probe_wakeup                   trace_sched_wakeup.c 227
       0        2 100 probe_wakeup_sched_switch      trace_sched_wakeup.c 144
    4514     2090  31 __grab_cache_page              filemap.c            2149
   12882   228786  94 mapping_unevictable            pagemap.h            50
       4       11  73 __flush_cpu_slab               slub.c               1466
  627757   330451  34 slab_free                      slub.c               1731
    2959    61245  95 dentry_lru_del_init            dcache.c             153
     946     1217  56 load_elf_binary                binfmt_elf.c         904
     102       82  44 disk_put_part                  genhd.h              206
       1        1  50 dst_gc_task                    dst.c                82
       0       19 100 tcp_mss_split_point            tcp_output.c         1126

As you can see by the above, there's a bit of work to do in rethinking
the use of some unlikelys and likelys. Note: the unlikely case had 71 hits
that were more than 25%.

Note:  After submitting my first version of this patch, Andrew Morton
  showed me a version written by Daniel Walker, where I picked up
  the following ideas from:

  1)  Using __builtin_constant_p to avoid profiling fixed values.
  2)  Using __FILE__ instead of instruction pointers.
  3)  Using the preprocessor to stop all profiling of likely
       annotations from vsyscall_64.c.

Thanks to Andrew Morton, Arjan van de Ven, Theodore Tso and Ingo Molnar
for their feed back on this patch.

(*) Not ever unlikely is recorded, those that are used by vsyscalls
 (a few of them) had to have profiling disabled.

Signed-off-by: Steven Rostedt <srostedt@redhat.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Theodore Tso <tytso@mit.edu>
Cc: Arjan van de Ven <arjan@infradead.org>
Cc: Steven Rostedt <srostedt@redhat.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/vsyscall_64.c     |   8 ++
 include/asm-generic/vmlinux.lds.h |  14 +++-
 include/linux/compiler.h          |  61 +++++++++++++-
 kernel/trace/Kconfig              |  16 ++++
 kernel/trace/Makefile             |   1 +
 kernel/trace/trace_unlikely.c     | 164 ++++++++++++++++++++++++++++++++++++++
 6 files changed, 261 insertions(+), 3 deletions(-)
 create mode 100644 kernel/trace/trace_unlikely.c

(limited to 'include')

diff --git a/arch/x86/kernel/vsyscall_64.c b/arch/x86/kernel/vsyscall_64.c
index 0b8b6690a86d..2f90202e59b3 100644
--- a/arch/x86/kernel/vsyscall_64.c
+++ b/arch/x86/kernel/vsyscall_64.c
@@ -17,6 +17,14 @@
  *  want per guest time just set the kernel.vsyscall64 sysctl to 0.
  */
 
+/* Protect userspace from profiling */
+#ifdef CONFIG_TRACE_UNLIKELY_PROFILE
+# undef likely
+# undef unlikely
+# define likely(x)		likely_notrace(x)
+# define unlikely(x)		unlikely_notrace(x)
+#endif
+
 #include <linux/time.h>
 #include <linux/init.h>
 #include <linux/kernel.h>
diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h
index 80744606bad1..e10beb5335c9 100644
--- a/include/asm-generic/vmlinux.lds.h
+++ b/include/asm-generic/vmlinux.lds.h
@@ -45,6 +45,17 @@
 #define MCOUNT_REC()
 #endif
 
+#ifdef CONFIG_TRACE_UNLIKELY_PROFILE
+#define LIKELY_PROFILE()	VMLINUX_SYMBOL(__start_likely_profile) = .;   \
+				*(_ftrace_likely)			      \
+				VMLINUX_SYMBOL(__stop_likely_profile) = .;    \
+				VMLINUX_SYMBOL(__start_unlikely_profile) = .; \
+				*(_ftrace_unlikely)			      \
+				VMLINUX_SYMBOL(__stop_unlikely_profile) = .;
+#else
+#define LIKELY_PROFILE()
+#endif
+
 /* .data section */
 #define DATA_DATA							\
 	*(.data)							\
@@ -62,7 +73,8 @@
 	VMLINUX_SYMBOL(__stop___markers) = .;				\
 	VMLINUX_SYMBOL(__start___tracepoints) = .;			\
 	*(__tracepoints)						\
-	VMLINUX_SYMBOL(__stop___tracepoints) = .;
+	VMLINUX_SYMBOL(__stop___tracepoints) = .;			\
+	LIKELY_PROFILE()
 
 #define RO_DATA(align)							\
 	. = ALIGN((align));						\
diff --git a/include/linux/compiler.h b/include/linux/compiler.h
index 98115d9d04da..935e30cfaf3c 100644
--- a/include/linux/compiler.h
+++ b/include/linux/compiler.h
@@ -59,8 +59,65 @@ extern void __chk_io_ptr(const volatile void __iomem *);
  * specific implementations come from the above header files
  */
 
-#define likely(x)	__builtin_expect(!!(x), 1)
-#define unlikely(x)	__builtin_expect(!!(x), 0)
+#ifdef CONFIG_TRACE_UNLIKELY_PROFILE
+struct ftrace_likely_data {
+	const char *func;
+	const char *file;
+	unsigned line;
+	unsigned long correct;
+	unsigned long incorrect;
+};
+void ftrace_likely_update(struct ftrace_likely_data *f, int val, int expect);
+
+#define likely_notrace(x)	__builtin_expect(!!(x), 1)
+#define unlikely_notrace(x)	__builtin_expect(!!(x), 0)
+
+#define likely_check(x) ({						\
+			int ______r;					\
+			static struct ftrace_likely_data		\
+				__attribute__((__aligned__(4)))		\
+				__attribute__((section("_ftrace_likely"))) \
+				______f = {				\
+				.func = __func__,			\
+				.file = __FILE__,			\
+				.line = __LINE__,			\
+			};						\
+			______f.line = __LINE__;			\
+			______r = likely_notrace(x);			\
+			ftrace_likely_update(&______f, ______r, 1);	\
+			______r;					\
+		})
+#define unlikely_check(x) ({						\
+			int ______r;					\
+			static struct ftrace_likely_data		\
+				__attribute__((__aligned__(4)))		\
+				__attribute__((section("_ftrace_unlikely"))) \
+				______f = {				\
+				.func = __func__,			\
+				.file = __FILE__,			\
+				.line = __LINE__,			\
+			};						\
+			______f.line = __LINE__;			\
+			______r = unlikely_notrace(x);			\
+			ftrace_likely_update(&______f, ______r, 0);	\
+			______r;					\
+		})
+
+/*
+ * Using __builtin_constant_p(x) to ignore cases where the return
+ * value is always the same.  This idea is taken from a similar patch
+ * written by Daniel Walker.
+ */
+# ifndef likely
+#  define likely(x)	(__builtin_constant_p(x) ? !!(x) : likely_check(x))
+# endif
+# ifndef unlikely
+#  define unlikely(x)	(__builtin_constant_p(x) ? !!(x) : unlikely_check(x))
+# endif
+#else
+# define likely(x)	__builtin_expect(!!(x), 1)
+# define unlikely(x)	__builtin_expect(!!(x), 0)
+#endif
 
 /* Optimization barrier */
 #ifndef barrier
diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig
index d986216c8327..a604f24c755f 100644
--- a/kernel/trace/Kconfig
+++ b/kernel/trace/Kconfig
@@ -159,6 +159,22 @@ config BOOT_TRACER
 	    selected, because the self-tests are an initcall as well and that
 	    would invalidate the boot trace. )
 
+config TRACE_UNLIKELY_PROFILE
+	bool "Trace likely/unlikely profiler"
+	depends on DEBUG_KERNEL
+	select TRACING
+	help
+	  This tracer profiles all the the likely and unlikely macros
+	  in the kernel. It will display the results in:
+
+	  /debugfs/tracing/profile_likely
+	  /debugfs/tracing/profile_unlikely
+
+	  Note: this will add a significant overhead, only turn this
+	  on if you need to profile the system's use of these macros.
+
+	  Say N if unsure.
+
 config STACK_TRACER
 	bool "Trace max stack"
 	depends on HAVE_FUNCTION_TRACER
diff --git a/kernel/trace/Makefile b/kernel/trace/Makefile
index 3e1f361bbc17..98e70ee27986 100644
--- a/kernel/trace/Makefile
+++ b/kernel/trace/Makefile
@@ -25,5 +25,6 @@ obj-$(CONFIG_STACK_TRACER) += trace_stack.o
 obj-$(CONFIG_MMIOTRACE) += trace_mmiotrace.o
 obj-$(CONFIG_BOOT_TRACER) += trace_boot.o
 obj-$(CONFIG_FUNCTION_RET_TRACER) += trace_functions_return.o
+obj-$(CONFIG_TRACE_UNLIKELY_PROFILE) += trace_unlikely.o
 
 libftrace-y := ftrace.o
diff --git a/kernel/trace/trace_unlikely.c b/kernel/trace/trace_unlikely.c
new file mode 100644
index 000000000000..94932696069f
--- /dev/null
+++ b/kernel/trace/trace_unlikely.c
@@ -0,0 +1,164 @@
+/*
+ * unlikely profiler
+ *
+ * Copyright (C) 2008 Steven Rostedt <srostedt@redhat.com>
+ */
+#include <linux/kallsyms.h>
+#include <linux/seq_file.h>
+#include <linux/spinlock.h>
+#include <linux/debugfs.h>
+#include <linux/uaccess.h>
+#include <linux/module.h>
+#include <linux/ftrace.h>
+#include <linux/hash.h>
+#include <linux/fs.h>
+#include <asm/local.h>
+#include "trace.h"
+
+void ftrace_likely_update(struct ftrace_likely_data *f, int val, int expect)
+{
+	/* FIXME: Make this atomic! */
+	if (val == expect)
+		f->correct++;
+	else
+		f->incorrect++;
+}
+EXPORT_SYMBOL(ftrace_likely_update);
+
+struct ftrace_pointer {
+	void		*start;
+	void		*stop;
+};
+
+static void *
+t_next(struct seq_file *m, void *v, loff_t *pos)
+{
+	struct ftrace_pointer *f = m->private;
+	struct ftrace_likely_data *p = v;
+
+	(*pos)++;
+
+	if (v == (void *)1)
+		return f->start;
+
+	++p;
+
+	if ((void *)p >= (void *)f->stop)
+		return NULL;
+
+	return p;
+}
+
+static void *t_start(struct seq_file *m, loff_t *pos)
+{
+	void *t = (void *)1;
+	loff_t l = 0;
+
+	for (; t && l < *pos; t = t_next(m, t, &l))
+		;
+
+	return t;
+}
+
+static void t_stop(struct seq_file *m, void *p)
+{
+}
+
+static int t_show(struct seq_file *m, void *v)
+{
+	struct ftrace_likely_data *p = v;
+	const char *f;
+	unsigned long percent;
+
+	if (v == (void *)1) {
+		seq_printf(m, " correct incorrect  %% "
+			      "       Function                "
+			      "  File              Line\n"
+			      " ------- ---------  - "
+			      "       --------                "
+			      "  ----              ----\n");
+		return 0;
+	}
+
+	/* Only print the file, not the path */
+	f = p->file + strlen(p->file);
+	while (f >= p->file && *f != '/')
+		f--;
+	f++;
+
+	if (p->correct) {
+		percent = p->incorrect * 100;
+		percent /= p->correct + p->incorrect;
+	} else
+		percent = p->incorrect ? 100 : 0;
+
+	seq_printf(m, "%8lu %8lu %3lu ", p->correct, p->incorrect, percent);
+	seq_printf(m, "%-30.30s %-20.20s %d\n", p->func, f, p->line);
+	return 0;
+}
+
+static struct seq_operations tracing_likely_seq_ops = {
+	.start		= t_start,
+	.next		= t_next,
+	.stop		= t_stop,
+	.show		= t_show,
+};
+
+static int tracing_likely_open(struct inode *inode, struct file *file)
+{
+	int ret;
+
+	ret = seq_open(file, &tracing_likely_seq_ops);
+	if (!ret) {
+		struct seq_file *m = file->private_data;
+		m->private = (void *)inode->i_private;
+	}
+
+	return ret;
+}
+
+static struct file_operations tracing_likely_fops = {
+	.open		= tracing_likely_open,
+	.read		= seq_read,
+	.llseek		= seq_lseek,
+};
+
+extern unsigned long __start_likely_profile[];
+extern unsigned long __stop_likely_profile[];
+extern unsigned long __start_unlikely_profile[];
+extern unsigned long __stop_unlikely_profile[];
+
+static struct ftrace_pointer ftrace_likely_pos = {
+	.start			= __start_likely_profile,
+	.stop			= __stop_likely_profile,
+};
+
+static struct ftrace_pointer ftrace_unlikely_pos = {
+	.start			= __start_unlikely_profile,
+	.stop			= __stop_unlikely_profile,
+};
+
+static __init int ftrace_unlikely_init(void)
+{
+	struct dentry *d_tracer;
+	struct dentry *entry;
+
+	d_tracer = tracing_init_dentry();
+
+	entry = debugfs_create_file("profile_likely", 0444, d_tracer,
+				    &ftrace_likely_pos,
+				    &tracing_likely_fops);
+	if (!entry)
+		pr_warning("Could not create debugfs 'profile_likely' entry\n");
+
+	entry = debugfs_create_file("profile_unlikely", 0444, d_tracer,
+				    &ftrace_unlikely_pos,
+				    &tracing_likely_fops);
+	if (!entry)
+		pr_warning("Could not create debugfs"
+			   " 'profile_unlikely' entry\n");
+
+	return 0;
+}
+
+device_initcall(ftrace_unlikely_init);
-- 
cgit v1.2.3


From e25cf3db560e803292946ef23a30c69e341ce56f Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Fri, 17 Oct 2008 15:55:07 +0200
Subject: lockdep: include/linux/lockdep.h - fix warning in
 net/bluetooth/af_bluetooth.c
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

fix this warning:

  net/bluetooth/af_bluetooth.c:60: warning: ‘bt_key_strings’ defined but not used
  net/bluetooth/af_bluetooth.c:71: warning: ‘bt_slock_key_strings’ defined but not used

this is a lockdep macro problem in the !LOCKDEP case.

We cannot convert it to an inline because the macro works on multiple types,
but we can mark the parameter used.

[ also clean up a misaligned tab in sock_lock_init_class_and_name() ]

[ also remove #ifdefs from around af_family_clock_key strings - which
  were certainly added to get rid of the ugly build warnings. ]

Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/lockdep.h | 5 +++--
 include/net/sock.h      | 2 +-
 net/core/sock.c         | 2 --
 3 files changed, 4 insertions(+), 5 deletions(-)

(limited to 'include')

diff --git a/include/linux/lockdep.h b/include/linux/lockdep.h
index fc9f8e88123b..8956daf64abd 100644
--- a/include/linux/lockdep.h
+++ b/include/linux/lockdep.h
@@ -336,10 +336,11 @@ static inline void lockdep_on(void)
 # define lock_set_subclass(l, s, i)		do { } while (0)
 # define lockdep_init()				do { } while (0)
 # define lockdep_info()				do { } while (0)
-# define lockdep_init_map(lock, name, key, sub)	do { (void)(key); } while (0)
+# define lockdep_init_map(lock, name, key, sub) \
+		do { (void)(name); (void)(key); } while (0)
 # define lockdep_set_class(lock, key)		do { (void)(key); } while (0)
 # define lockdep_set_class_and_name(lock, key, name) \
-		do { (void)(key); } while (0)
+		do { (void)(key); (void)(name); } while (0)
 #define lockdep_set_class_and_subclass(lock, key, sub) \
 		do { (void)(key); } while (0)
 #define lockdep_set_subclass(lock, sub)		do { } while (0)
diff --git a/include/net/sock.h b/include/net/sock.h
index c04f9e18ea22..2f47107f6d0f 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -815,7 +815,7 @@ static inline void sk_wmem_free_skb(struct sock *sk, struct sk_buff *skb)
  */
 #define sock_lock_init_class_and_name(sk, sname, skey, name, key) 	\
 do {									\
-	sk->sk_lock.owned = 0;					\
+	sk->sk_lock.owned = 0;						\
 	init_waitqueue_head(&sk->sk_lock.wq);				\
 	spin_lock_init(&(sk)->sk_lock.slock);				\
 	debug_check_no_locks_freed((void *)&(sk)->sk_lock,		\
diff --git a/net/core/sock.c b/net/core/sock.c
index 5e2a3132a8c9..341e39456952 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -136,7 +136,6 @@
 static struct lock_class_key af_family_keys[AF_MAX];
 static struct lock_class_key af_family_slock_keys[AF_MAX];
 
-#ifdef CONFIG_DEBUG_LOCK_ALLOC
 /*
  * Make lock validator output more readable. (we pre-construct these
  * strings build-time, so that runtime initialization of socket
@@ -187,7 +186,6 @@ static const char *af_family_clock_key_strings[AF_MAX+1] = {
   "clock-AF_RXRPC" , "clock-AF_ISDN"     , "clock-AF_PHONET"   ,
   "clock-AF_MAX"
 };
-#endif
 
 /*
  * sk_callback_lock locking rules are per-address-family,
-- 
cgit v1.2.3


From 2b7d0390a6d6d595f43ea3806639664afe5b9ebe Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Wed, 12 Nov 2008 13:17:38 +0100
Subject: tracing: branch tracer, fix vdso crash

Impact: fix bootup crash

the branch tracer missed arch/x86/vdso/vclock_gettime.c from
disabling tracing, which caused such bootup crashes:

  [  201.840097] init[1]: segfault at 7fffed3fe7c0 ip 00007fffed3fea2e sp 000077

also clean up the ugly ifdefs in arch/x86/kernel/vsyscall_64.c by
creating DISABLE_UNLIKELY_PROFILE facility for code to turn off
instrumentation on a per file basis.

Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/vsyscall_64.c  | 9 ++-------
 arch/x86/vdso/vclock_gettime.c | 3 +++
 include/linux/compiler.h       | 6 +++++-
 3 files changed, 10 insertions(+), 8 deletions(-)

(limited to 'include')

diff --git a/arch/x86/kernel/vsyscall_64.c b/arch/x86/kernel/vsyscall_64.c
index 2f90202e59b3..ece02932ea57 100644
--- a/arch/x86/kernel/vsyscall_64.c
+++ b/arch/x86/kernel/vsyscall_64.c
@@ -17,13 +17,8 @@
  *  want per guest time just set the kernel.vsyscall64 sysctl to 0.
  */
 
-/* Protect userspace from profiling */
-#ifdef CONFIG_TRACE_UNLIKELY_PROFILE
-# undef likely
-# undef unlikely
-# define likely(x)		likely_notrace(x)
-# define unlikely(x)		unlikely_notrace(x)
-#endif
+/* Disable profiling for userspace code: */
+#define DISABLE_UNLIKELY_PROFILE
 
 #include <linux/time.h>
 #include <linux/init.h>
diff --git a/arch/x86/vdso/vclock_gettime.c b/arch/x86/vdso/vclock_gettime.c
index 1ef0f90813d6..6e667631e7dc 100644
--- a/arch/x86/vdso/vclock_gettime.c
+++ b/arch/x86/vdso/vclock_gettime.c
@@ -9,6 +9,9 @@
  * Also alternative() doesn't work.
  */
 
+/* Disable profiling for userspace code: */
+#define DISABLE_UNLIKELY_PROFILE
+
 #include <linux/kernel.h>
 #include <linux/posix-timers.h>
 #include <linux/time.h>
diff --git a/include/linux/compiler.h b/include/linux/compiler.h
index 935e30cfaf3c..63b7d9089d6e 100644
--- a/include/linux/compiler.h
+++ b/include/linux/compiler.h
@@ -59,7 +59,11 @@ extern void __chk_io_ptr(const volatile void __iomem *);
  * specific implementations come from the above header files
  */
 
-#ifdef CONFIG_TRACE_UNLIKELY_PROFILE
+/*
+ * Note: DISABLE_UNLIKELY_PROFILE can be used by special lowlevel code
+ * to disable branch tracing on a per file basis.
+ */
+#if defined(CONFIG_TRACE_UNLIKELY_PROFILE) && !defined(DISABLE_UNLIKELY_PROFILE)
 struct ftrace_likely_data {
 	const char *func;
 	const char *file;
-- 
cgit v1.2.3


From c2eb9c4ea383aee154e7139395872c4da629e715 Mon Sep 17 00:00:00 2001
From: Jaroslav Kysela <perex@perex.cz>
Date: Wed, 12 Nov 2008 16:31:37 +0100
Subject: ALSA: when card identification is changed, change also /proc/asound
 symlink

Signed-off-by: Jaroslav Kysela <perex@perex.cz>
Signed-off-by: Takashi Iwai <tiwai@suse.de>
---
 include/sound/info.h |  2 ++
 sound/core/info.c    | 17 +++++++++++++++++
 sound/core/init.c    |  1 +
 3 files changed, 20 insertions(+)

(limited to 'include')

diff --git a/include/sound/info.h b/include/sound/info.h
index 8ae72e74f898..46f702a76e4f 100644
--- a/include/sound/info.h
+++ b/include/sound/info.h
@@ -126,6 +126,7 @@ int snd_info_card_create(struct snd_card * card);
 int snd_info_card_register(struct snd_card * card);
 int snd_info_card_free(struct snd_card * card);
 void snd_info_card_disconnect(struct snd_card * card);
+void snd_info_card_id_change(struct snd_card * card);
 int snd_info_register(struct snd_info_entry * entry);
 
 /* for card drivers */
@@ -160,6 +161,7 @@ static inline int snd_info_card_create(struct snd_card * card) { return 0; }
 static inline int snd_info_card_register(struct snd_card * card) { return 0; }
 static inline int snd_info_card_free(struct snd_card * card) { return 0; }
 static inline void snd_info_card_disconnect(struct snd_card * card) { }
+static inline void snd_info_card_id_change(struct snd_card * card) { }
 static inline int snd_info_register(struct snd_info_entry * entry) { return 0; }
 
 static inline int snd_card_proc_new(struct snd_card *card, const char *name,
diff --git a/sound/core/info.c b/sound/core/info.c
index 527b207462b0..70fa87189f36 100644
--- a/sound/core/info.c
+++ b/sound/core/info.c
@@ -652,6 +652,23 @@ int snd_info_card_register(struct snd_card *card)
 	return 0;
 }
 
+/*
+ * called on card->id change
+ */
+void snd_info_card_id_change(struct snd_card *card)
+{
+	mutex_lock(&info_mutex);
+	if (card->proc_root_link) {
+		snd_remove_proc_entry(snd_proc_root, card->proc_root_link);
+		card->proc_root_link = NULL;
+	}
+	if (strcmp(card->id, card->proc_root->name))
+		card->proc_root_link = proc_symlink(card->id,
+						    snd_proc_root,
+						    card->proc_root->name);
+	mutex_unlock(&info_mutex);
+}
+
 /*
  * de-register the card proc file
  * called from init.c
diff --git a/sound/core/init.c b/sound/core/init.c
index 5ff297d1d89a..af1e407ca27f 100644
--- a/sound/core/init.c
+++ b/sound/core/init.c
@@ -571,6 +571,7 @@ card_id_store_attr(struct device *dev, struct device_attribute *attr,
 			goto __exist;
 	}
 	strcpy(card->id, buf1);
+	snd_info_card_id_change(card);
 	mutex_unlock(&snd_card_mutex);
 
 	return count;
-- 
cgit v1.2.3


From fd64138c0eff8351b93ef99f7da929bb8a49b9ed Mon Sep 17 00:00:00 2001
From: Jaroslav Kysela <perex@perex.cz>
Date: Wed, 12 Nov 2008 16:53:47 +0100
Subject: ALSA: include/sound/info.h - coding style changed

Change coding style to be more acceptable by checkpatch.pl.

Signed-off-by: Jaroslav Kysela <perex@perex.cz>
Signed-off-by: Takashi Iwai <tiwai@suse.de>
---
 include/sound/info.h | 108 +++++++++++++++++++++++++++------------------------
 1 file changed, 57 insertions(+), 51 deletions(-)

(limited to 'include')

diff --git a/include/sound/info.h b/include/sound/info.h
index 46f702a76e4f..7c2ee1a21b00 100644
--- a/include/sound/info.h
+++ b/include/sound/info.h
@@ -40,30 +40,34 @@ struct snd_info_buffer {
 struct snd_info_entry;
 
 struct snd_info_entry_text {
-	void (*read) (struct snd_info_entry *entry, struct snd_info_buffer *buffer);
-	void (*write) (struct snd_info_entry *entry, struct snd_info_buffer *buffer);
+	void (*read)(struct snd_info_entry *entry,
+		     struct snd_info_buffer *buffer);
+	void (*write)(struct snd_info_entry *entry,
+		      struct snd_info_buffer *buffer);
 };
 
 struct snd_info_entry_ops {
-	int (*open) (struct snd_info_entry *entry,
-		     unsigned short mode, void **file_private_data);
-	int (*release) (struct snd_info_entry * entry,
-			unsigned short mode, void *file_private_data);
-	long (*read) (struct snd_info_entry *entry, void *file_private_data,
-		      struct file * file, char __user *buf,
+	int (*open)(struct snd_info_entry *entry,
+		    unsigned short mode, void **file_private_data);
+	int (*release)(struct snd_info_entry *entry,
+		       unsigned short mode, void *file_private_data);
+	long (*read)(struct snd_info_entry *entry, void *file_private_data,
+		     struct file *file, char __user *buf,
+		     unsigned long count, unsigned long pos);
+	long (*write)(struct snd_info_entry *entry, void *file_private_data,
+		      struct file *file, const char __user *buf,
 		      unsigned long count, unsigned long pos);
-	long (*write) (struct snd_info_entry *entry, void *file_private_data,
-		       struct file * file, const char __user *buf,
-		       unsigned long count, unsigned long pos);
-	long long (*llseek) (struct snd_info_entry *entry, void *file_private_data,
-			    struct file * file, long long offset, int orig);
-	unsigned int (*poll) (struct snd_info_entry *entry, void *file_private_data,
-			      struct file * file, poll_table * wait);
-	int (*ioctl) (struct snd_info_entry *entry, void *file_private_data,
-		      struct file * file, unsigned int cmd, unsigned long arg);
-	int (*mmap) (struct snd_info_entry *entry, void *file_private_data,
-		     struct inode * inode, struct file * file,
-		     struct vm_area_struct * vma);
+	long long (*llseek)(struct snd_info_entry *entry,
+			    void *file_private_data, struct file *file,
+			    long long offset, int orig);
+	unsigned int(*poll)(struct snd_info_entry *entry,
+			    void *file_private_data, struct file *file,
+			    poll_table *wait);
+	int (*ioctl)(struct snd_info_entry *entry, void *file_private_data,
+		     struct file *file, unsigned int cmd, unsigned long arg);
+	int (*mmap)(struct snd_info_entry *entry, void *file_private_data,
+		    struct inode *inode, struct file *file,
+		    struct vm_area_struct *vma);
 };
 
 struct snd_info_entry {
@@ -106,35 +110,37 @@ void snd_card_info_read_oss(struct snd_info_buffer *buffer);
 static inline void snd_card_info_read_oss(struct snd_info_buffer *buffer) {}
 #endif
 
-int snd_iprintf(struct snd_info_buffer * buffer, char *fmt,...) __attribute__ ((format (printf, 2, 3)));
+int snd_iprintf(struct snd_info_buffer *buffer, char *fmt, ...) \
+				__attribute__ ((format (printf, 2, 3)));
 int snd_info_init(void);
 int snd_info_done(void);
 
-int snd_info_get_line(struct snd_info_buffer * buffer, char *line, int len);
+int snd_info_get_line(struct snd_info_buffer *buffer, char *line, int len);
 char *snd_info_get_str(char *dest, char *src, int len);
-struct snd_info_entry *snd_info_create_module_entry(struct module * module,
+struct snd_info_entry *snd_info_create_module_entry(struct module *module,
 					       const char *name,
-					       struct snd_info_entry * parent);
-struct snd_info_entry *snd_info_create_card_entry(struct snd_card * card,
+					       struct snd_info_entry *parent);
+struct snd_info_entry *snd_info_create_card_entry(struct snd_card *card,
 					     const char *name,
-					     struct snd_info_entry * parent);
-void snd_info_free_entry(struct snd_info_entry * entry);
-int snd_info_store_text(struct snd_info_entry * entry);
-int snd_info_restore_text(struct snd_info_entry * entry);
-
-int snd_info_card_create(struct snd_card * card);
-int snd_info_card_register(struct snd_card * card);
-int snd_info_card_free(struct snd_card * card);
-void snd_info_card_disconnect(struct snd_card * card);
-void snd_info_card_id_change(struct snd_card * card);
-int snd_info_register(struct snd_info_entry * entry);
+					     struct snd_info_entry *parent);
+void snd_info_free_entry(struct snd_info_entry *entry);
+int snd_info_store_text(struct snd_info_entry *entry);
+int snd_info_restore_text(struct snd_info_entry *entry);
+
+int snd_info_card_create(struct snd_card *card);
+int snd_info_card_register(struct snd_card *card);
+int snd_info_card_free(struct snd_card *card);
+void snd_info_card_disconnect(struct snd_card *card);
+void snd_info_card_id_change(struct snd_card *card);
+int snd_info_register(struct snd_info_entry *entry);
 
 /* for card drivers */
-int snd_card_proc_new(struct snd_card *card, const char *name, struct snd_info_entry **entryp);
+int snd_card_proc_new(struct snd_card *card, const char *name,
+		      struct snd_info_entry **entryp);
 
 static inline void snd_info_set_text_ops(struct snd_info_entry *entry, 
-					 void *private_data,
-					 void (*read)(struct snd_info_entry *, struct snd_info_buffer *))
+	void *private_data,
+	void (*read)(struct snd_info_entry *, struct snd_info_buffer *))
 {
 	entry->private_data = private_data;
 	entry->c.text.read = read;
@@ -147,22 +153,22 @@ int snd_info_check_reserved_words(const char *str);
 #define snd_seq_root NULL
 #define snd_oss_root NULL
 
-static inline int snd_iprintf(struct snd_info_buffer * buffer, char *fmt,...) { return 0; }
+static inline int snd_iprintf(struct snd_info_buffer *buffer, char *fmt, ...) { return 0; }
 static inline int snd_info_init(void) { return 0; }
 static inline int snd_info_done(void) { return 0; }
 
-static inline int snd_info_get_line(struct snd_info_buffer * buffer, char *line, int len) { return 0; }
+static inline int snd_info_get_line(struct snd_info_buffer *buffer, char *line, int len) { return 0; }
 static inline char *snd_info_get_str(char *dest, char *src, int len) { return NULL; }
-static inline struct snd_info_entry *snd_info_create_module_entry(struct module * module, const char *name, struct snd_info_entry * parent) { return NULL; }
-static inline struct snd_info_entry *snd_info_create_card_entry(struct snd_card * card, const char *name, struct snd_info_entry * parent) { return NULL; }
-static inline void snd_info_free_entry(struct snd_info_entry * entry) { ; }
-
-static inline int snd_info_card_create(struct snd_card * card) { return 0; }
-static inline int snd_info_card_register(struct snd_card * card) { return 0; }
-static inline int snd_info_card_free(struct snd_card * card) { return 0; }
-static inline void snd_info_card_disconnect(struct snd_card * card) { }
-static inline void snd_info_card_id_change(struct snd_card * card) { }
-static inline int snd_info_register(struct snd_info_entry * entry) { return 0; }
+static inline struct snd_info_entry *snd_info_create_module_entry(struct module *module, const char *name, struct snd_info_entry *parent) { return NULL; }
+static inline struct snd_info_entry *snd_info_create_card_entry(struct snd_card *card, const char *name, struct snd_info_entry *parent) { return NULL; }
+static inline void snd_info_free_entry(struct snd_info_entry *entry) { ; }
+
+static inline int snd_info_card_create(struct snd_card *card) { return 0; }
+static inline int snd_info_card_register(struct snd_card *card) { return 0; }
+static inline int snd_info_card_free(struct snd_card *card) { return 0; }
+static inline void snd_info_card_disconnect(struct snd_card *card) { }
+static inline void snd_info_card_id_change(struct snd_card *card) { }
+static inline int snd_info_register(struct snd_info_entry *entry) { return 0; }
 
 static inline int snd_card_proc_new(struct snd_card *card, const char *name,
 				    struct snd_info_entry **entryp) { return -EINVAL; }
-- 
cgit v1.2.3


From 2ed84eeb8808cf3c9f039213ca137ffd7d753f0e Mon Sep 17 00:00:00 2001
From: Steven Rostedt <srostedt@redhat.com>
Date: Wed, 12 Nov 2008 15:24:24 -0500
Subject: trace: rename unlikely profiler to branch profiler

Impact: name change of unlikely tracer and profiler

Ingo Molnar suggested changing the config from UNLIKELY_PROFILE
to BRANCH_PROFILING. I never did like the "unlikely" name so I
went one step farther, and renamed all the unlikely configurations
to a "BRANCH" variant.

Signed-off-by: Steven Rostedt <srostedt@redhat.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/vsyscall_64.c     |  2 +-
 arch/x86/vdso/vclock_gettime.c    |  2 +-
 include/asm-generic/vmlinux.lds.h |  2 +-
 include/linux/compiler.h          | 19 ++++++++++---------
 kernel/trace/Kconfig              | 10 +++++-----
 kernel/trace/Makefile             |  7 +++----
 kernel/trace/trace.c              |  2 +-
 kernel/trace/trace.h              |  6 +++---
 kernel/trace/trace_unlikely.c     |  4 ++--
 9 files changed, 27 insertions(+), 27 deletions(-)

(limited to 'include')

diff --git a/arch/x86/kernel/vsyscall_64.c b/arch/x86/kernel/vsyscall_64.c
index ece02932ea57..6f3d3d4cd973 100644
--- a/arch/x86/kernel/vsyscall_64.c
+++ b/arch/x86/kernel/vsyscall_64.c
@@ -18,7 +18,7 @@
  */
 
 /* Disable profiling for userspace code: */
-#define DISABLE_UNLIKELY_PROFILE
+#define DISABLE_BRANCH_PROFILING
 
 #include <linux/time.h>
 #include <linux/init.h>
diff --git a/arch/x86/vdso/vclock_gettime.c b/arch/x86/vdso/vclock_gettime.c
index 6e667631e7dc..d9d35824c56f 100644
--- a/arch/x86/vdso/vclock_gettime.c
+++ b/arch/x86/vdso/vclock_gettime.c
@@ -10,7 +10,7 @@
  */
 
 /* Disable profiling for userspace code: */
-#define DISABLE_UNLIKELY_PROFILE
+#define DISABLE_BRANCH_PROFILING
 
 #include <linux/kernel.h>
 #include <linux/posix-timers.h>
diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h
index e10beb5335c9..a5e4ed9baec8 100644
--- a/include/asm-generic/vmlinux.lds.h
+++ b/include/asm-generic/vmlinux.lds.h
@@ -45,7 +45,7 @@
 #define MCOUNT_REC()
 #endif
 
-#ifdef CONFIG_TRACE_UNLIKELY_PROFILE
+#ifdef CONFIG_TRACE_BRANCH_PROFILING
 #define LIKELY_PROFILE()	VMLINUX_SYMBOL(__start_likely_profile) = .;   \
 				*(_ftrace_likely)			      \
 				VMLINUX_SYMBOL(__stop_likely_profile) = .;    \
diff --git a/include/linux/compiler.h b/include/linux/compiler.h
index 63b7d9089d6e..c7d804a7a4d6 100644
--- a/include/linux/compiler.h
+++ b/include/linux/compiler.h
@@ -59,26 +59,27 @@ extern void __chk_io_ptr(const volatile void __iomem *);
  * specific implementations come from the above header files
  */
 
-/*
- * Note: DISABLE_UNLIKELY_PROFILE can be used by special lowlevel code
- * to disable branch tracing on a per file basis.
- */
-#if defined(CONFIG_TRACE_UNLIKELY_PROFILE) && !defined(DISABLE_UNLIKELY_PROFILE)
-struct ftrace_likely_data {
+struct ftrace_branch_data {
 	const char *func;
 	const char *file;
 	unsigned line;
 	unsigned long correct;
 	unsigned long incorrect;
 };
-void ftrace_likely_update(struct ftrace_likely_data *f, int val, int expect);
+
+/*
+ * Note: DISABLE_BRANCH_PROFILING can be used by special lowlevel code
+ * to disable branch tracing on a per file basis.
+ */
+#if defined(CONFIG_TRACE_BRANCH_PROFILING) && !defined(DISABLE_BRANCH_PROFILING)
+void ftrace_likely_update(struct ftrace_branch_data *f, int val, int expect);
 
 #define likely_notrace(x)	__builtin_expect(!!(x), 1)
 #define unlikely_notrace(x)	__builtin_expect(!!(x), 0)
 
 #define likely_check(x) ({						\
 			int ______r;					\
-			static struct ftrace_likely_data		\
+			static struct ftrace_branch_data		\
 				__attribute__((__aligned__(4)))		\
 				__attribute__((section("_ftrace_likely"))) \
 				______f = {				\
@@ -93,7 +94,7 @@ void ftrace_likely_update(struct ftrace_likely_data *f, int val, int expect);
 		})
 #define unlikely_check(x) ({						\
 			int ______r;					\
-			static struct ftrace_likely_data		\
+			static struct ftrace_branch_data		\
 				__attribute__((__aligned__(4)))		\
 				__attribute__((section("_ftrace_unlikely"))) \
 				______f = {				\
diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig
index 8abcaf821beb..9c89526b6b7c 100644
--- a/kernel/trace/Kconfig
+++ b/kernel/trace/Kconfig
@@ -159,7 +159,7 @@ config BOOT_TRACER
 	    selected, because the self-tests are an initcall as well and that
 	    would invalidate the boot trace. )
 
-config TRACE_UNLIKELY_PROFILE
+config TRACE_BRANCH_PROFILING
 	bool "Trace likely/unlikely profiler"
 	depends on DEBUG_KERNEL
 	select TRACING
@@ -175,7 +175,7 @@ config TRACE_UNLIKELY_PROFILE
 
 	  Say N if unsure.
 
-config TRACING_UNLIKELY
+config TRACING_BRANCHES
 	bool
 	help
 	  Selected by tracers that will trace the likely and unlikely
@@ -183,10 +183,10 @@ config TRACING_UNLIKELY
 	  profiled. Profiling the tracing infrastructure can only happen
 	  when the likelys and unlikelys are not being traced.
 
-config UNLIKELY_TRACER
+config BRANCH_TRACER
 	bool "Trace likely/unlikely instances"
-	depends on TRACE_UNLIKELY_PROFILE
-	select TRACING_UNLIKELY
+	depends on TRACE_BRANCH_PROFILING
+	select TRACING_BRANCHES
 	help
 	  This traces the events of likely and unlikely condition
 	  calls in the kernel.  The difference between this and the
diff --git a/kernel/trace/Makefile b/kernel/trace/Makefile
index c938d03516c0..0087df7ba44e 100644
--- a/kernel/trace/Makefile
+++ b/kernel/trace/Makefile
@@ -11,9 +11,8 @@ obj-y += trace_selftest_dynamic.o
 endif
 
 # If unlikely tracing is enabled, do not trace these files
-ifdef CONFIG_TRACING_UNLIKELY
-KBUILD_CFLAGS += '-Dlikely(x)=likely_notrace(x)'
-KBUILD_CFLAGS += '-Dunlikely(x)=unlikely_notrace(x)'
+ifdef CONFIG_TRACING_BRANCHES
+KBUILD_CFLAGS += -DDISABLE_BRANCH_PROFILING
 endif
 
 obj-$(CONFIG_FUNCTION_TRACER) += libftrace.o
@@ -31,6 +30,6 @@ obj-$(CONFIG_STACK_TRACER) += trace_stack.o
 obj-$(CONFIG_MMIOTRACE) += trace_mmiotrace.o
 obj-$(CONFIG_BOOT_TRACER) += trace_boot.o
 obj-$(CONFIG_FUNCTION_RET_TRACER) += trace_functions_return.o
-obj-$(CONFIG_TRACE_UNLIKELY_PROFILE) += trace_unlikely.o
+obj-$(CONFIG_TRACE_BRANCH_PROFILING) += trace_unlikely.o
 
 libftrace-y := ftrace.o
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index d842db14a59b..bad59d32a4a9 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -258,7 +258,7 @@ static const char *trace_options[] = {
 	"sched-tree",
 	"ftrace_printk",
 	"ftrace_preempt",
-#ifdef CONFIG_UNLIKELY_TRACER
+#ifdef CONFIG_BRANCH_TRACER
 	"unlikely",
 #endif
 	NULL
diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h
index 9635aa2c4fc1..dccae6312941 100644
--- a/kernel/trace/trace.h
+++ b/kernel/trace/trace.h
@@ -468,7 +468,7 @@ enum trace_iterator_flags {
 	TRACE_ITER_SCHED_TREE		= 0x200,
 	TRACE_ITER_PRINTK		= 0x400,
 	TRACE_ITER_PREEMPTONLY		= 0x800,
-#ifdef CONFIG_UNLIKELY_TRACER
+#ifdef CONFIG_BRANCH_TRACER
 	TRACE_ITER_UNLIKELY		= 0x1000,
 #endif
 };
@@ -530,7 +530,7 @@ static inline void ftrace_preempt_enable(int resched)
 		preempt_enable_notrace();
 }
 
-#ifdef CONFIG_UNLIKELY_TRACER
+#ifdef CONFIG_BRANCH_TRACER
 extern int enable_unlikely_tracing(struct trace_array *tr);
 extern void disable_unlikely_tracing(void);
 static inline int trace_unlikely_enable(struct trace_array *tr)
@@ -552,6 +552,6 @@ static inline int trace_unlikely_enable(struct trace_array *tr)
 static inline void trace_unlikely_disable(void)
 {
 }
-#endif /* CONFIG_UNLIKELY_TRACER */
+#endif /* CONFIG_BRANCH_TRACER */
 
 #endif /* _LINUX_KERNEL_TRACE_H */
diff --git a/kernel/trace/trace_unlikely.c b/kernel/trace/trace_unlikely.c
index 7290e0e7b4e3..856eb3b7f694 100644
--- a/kernel/trace/trace_unlikely.c
+++ b/kernel/trace/trace_unlikely.c
@@ -15,7 +15,7 @@
 #include <asm/local.h>
 #include "trace.h"
 
-#ifdef CONFIG_UNLIKELY_TRACER
+#ifdef CONFIG_BRANCH_TRACER
 
 static int unlikely_tracing_enabled __read_mostly;
 static DEFINE_MUTEX(unlikely_tracing_mutex);
@@ -119,7 +119,7 @@ static inline
 void trace_likely_condition(struct ftrace_likely_data *f, int val, int expect)
 {
 }
-#endif /* CONFIG_UNLIKELY_TRACER */
+#endif /* CONFIG_BRANCH_TRACER */
 
 void ftrace_likely_update(struct ftrace_likely_data *f, int val, int expect)
 {
-- 
cgit v1.2.3


From 2378982487c492541d17adc0a870e7e83b07ba43 Mon Sep 17 00:00:00 2001
From: Alexey Dobriyan <adobriyan@gmail.com>
Date: Wed, 12 Nov 2008 23:25:32 -0800
Subject: net: ifdef struct sock::sk_async_wait_queue

Every user is under CONFIG_NET_DMA already, so ifdef field as well.

Signed-off-by: Alexey Dobriyan <adobriyan@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/sock.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include')

diff --git a/include/net/sock.h b/include/net/sock.h
index 08291c1be41e..8b2b82131b67 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -239,7 +239,9 @@ struct sock {
 	int			sk_sndbuf;
 	struct sk_buff_head	sk_receive_queue;
 	struct sk_buff_head	sk_write_queue;
+#ifdef CONFIG_NET_DMA
 	struct sk_buff_head	sk_async_wait_queue;
+#endif
 	int			sk_wmem_queued;
 	int			sk_forward_alloc;
 	gfp_t			sk_allocation;
-- 
cgit v1.2.3


From da9592edebceeba1b9301beafe80ec8b9c2db0ce Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Fri, 14 Nov 2008 10:39:05 +1100
Subject: CRED: Wrap task credential accesses in the filesystem subsystem

Wrap access to task credentials so that they can be separated more easily from
the task_struct during the introduction of COW creds.

Change most current->(|e|s|fs)[ug]id to current_(|e|s|fs)[ug]id().

Change some task->e?[ug]id to task_e?[ug]id().  In some places it makes more
sense to use RCU directly rather than a convenient wrapper; these will be
addressed by later patches.

Signed-off-by: David Howells <dhowells@redhat.com>
Reviewed-by: James Morris <jmorris@namei.org>
Acked-by: Serge Hallyn <serue@us.ibm.com>
Cc: Al Viro <viro@zeniv.linux.org.uk>
Signed-off-by: James Morris <jmorris@namei.org>
---
 fs/anon_inodes.c      |  4 ++--
 fs/attr.c             |  4 ++--
 fs/binfmt_elf_fdpic.c |  8 ++++----
 fs/dquot.c            |  4 ++--
 fs/exec.c             | 18 +++++++++---------
 fs/fcntl.c            |  2 +-
 fs/inotify_user.c     |  2 +-
 fs/ioprio.c           |  4 ++--
 fs/locks.c            |  2 +-
 fs/namei.c            | 10 ++++++----
 fs/namespace.c        |  2 +-
 fs/pipe.c             |  4 ++--
 fs/posix_acl.c        |  4 ++--
 fs/quota.c            |  4 ++--
 include/linux/fs.h    |  2 +-
 15 files changed, 38 insertions(+), 36 deletions(-)

(limited to 'include')

diff --git a/fs/anon_inodes.c b/fs/anon_inodes.c
index 3662dd44896b..c16d9be1b017 100644
--- a/fs/anon_inodes.c
+++ b/fs/anon_inodes.c
@@ -154,8 +154,8 @@ static struct inode *anon_inode_mkinode(void)
 	 */
 	inode->i_state = I_DIRTY;
 	inode->i_mode = S_IRUSR | S_IWUSR;
-	inode->i_uid = current->fsuid;
-	inode->i_gid = current->fsgid;
+	inode->i_uid = current_fsuid();
+	inode->i_gid = current_fsgid();
 	inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME;
 	return inode;
 }
diff --git a/fs/attr.c b/fs/attr.c
index 7a83819f6ba2..f4360192a938 100644
--- a/fs/attr.c
+++ b/fs/attr.c
@@ -29,13 +29,13 @@ int inode_change_ok(struct inode *inode, struct iattr *attr)
 
 	/* Make sure a caller can chown. */
 	if ((ia_valid & ATTR_UID) &&
-	    (current->fsuid != inode->i_uid ||
+	    (current_fsuid() != inode->i_uid ||
 	     attr->ia_uid != inode->i_uid) && !capable(CAP_CHOWN))
 		goto error;
 
 	/* Make sure caller can chgrp. */
 	if ((ia_valid & ATTR_GID) &&
-	    (current->fsuid != inode->i_uid ||
+	    (current_fsuid() != inode->i_uid ||
 	    (!in_group_p(attr->ia_gid) && attr->ia_gid != inode->i_gid)) &&
 	    !capable(CAP_CHOWN))
 		goto error;
diff --git a/fs/binfmt_elf_fdpic.c b/fs/binfmt_elf_fdpic.c
index 5b5424cb3391..488584c87512 100644
--- a/fs/binfmt_elf_fdpic.c
+++ b/fs/binfmt_elf_fdpic.c
@@ -623,10 +623,10 @@ static int create_elf_fdpic_tables(struct linux_binprm *bprm,
 	NEW_AUX_ENT(AT_BASE,	interp_params->elfhdr_addr);
 	NEW_AUX_ENT(AT_FLAGS,	0);
 	NEW_AUX_ENT(AT_ENTRY,	exec_params->entry_addr);
-	NEW_AUX_ENT(AT_UID,	(elf_addr_t) current->uid);
-	NEW_AUX_ENT(AT_EUID,	(elf_addr_t) current->euid);
-	NEW_AUX_ENT(AT_GID,	(elf_addr_t) current->gid);
-	NEW_AUX_ENT(AT_EGID,	(elf_addr_t) current->egid);
+	NEW_AUX_ENT(AT_UID,	(elf_addr_t) current_uid());
+	NEW_AUX_ENT(AT_EUID,	(elf_addr_t) current_euid());
+	NEW_AUX_ENT(AT_GID,	(elf_addr_t) current_gid());
+	NEW_AUX_ENT(AT_EGID,	(elf_addr_t) current_egid());
 	NEW_AUX_ENT(AT_SECURE,	security_bprm_secureexec(bprm));
 	NEW_AUX_ENT(AT_EXECFN,	bprm->exec);
 
diff --git a/fs/dquot.c b/fs/dquot.c
index 5e95261005b2..c237ccc8581c 100644
--- a/fs/dquot.c
+++ b/fs/dquot.c
@@ -874,7 +874,7 @@ static inline int need_print_warning(struct dquot *dquot)
 
 	switch (dquot->dq_type) {
 		case USRQUOTA:
-			return current->fsuid == dquot->dq_id;
+			return current_fsuid() == dquot->dq_id;
 		case GRPQUOTA:
 			return in_group_p(dquot->dq_id);
 	}
@@ -981,7 +981,7 @@ static void send_warning(const struct dquot *dquot, const char warntype)
 		MINOR(dquot->dq_sb->s_dev));
 	if (ret)
 		goto attr_err_out;
-	ret = nla_put_u64(skb, QUOTA_NL_A_CAUSED_ID, current->user->uid);
+	ret = nla_put_u64(skb, QUOTA_NL_A_CAUSED_ID, current_uid());
 	if (ret)
 		goto attr_err_out;
 	genlmsg_end(skb, msg_head);
diff --git a/fs/exec.c b/fs/exec.c
index 4e834f16d9da..604834f3b208 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -980,7 +980,7 @@ int flush_old_exec(struct linux_binprm * bprm)
 	/* This is the point of no return */
 	current->sas_ss_sp = current->sas_ss_size = 0;
 
-	if (current->euid == current->uid && current->egid == current->gid)
+	if (current_euid() == current_uid() && current_egid() == current_gid())
 		set_dumpable(current->mm, 1);
 	else
 		set_dumpable(current->mm, suid_dumpable);
@@ -1007,7 +1007,7 @@ int flush_old_exec(struct linux_binprm * bprm)
 	 */
 	current->mm->task_size = TASK_SIZE;
 
-	if (bprm->e_uid != current->euid || bprm->e_gid != current->egid) {
+	if (bprm->e_uid != current_euid() || bprm->e_gid != current_egid()) {
 		suid_keys(current);
 		set_dumpable(current->mm, suid_dumpable);
 		current->pdeath_signal = 0;
@@ -1047,8 +1047,8 @@ int prepare_binprm(struct linux_binprm *bprm)
 	if (bprm->file->f_op == NULL)
 		return -EACCES;
 
-	bprm->e_uid = current->euid;
-	bprm->e_gid = current->egid;
+	bprm->e_uid = current_euid();
+	bprm->e_gid = current_egid();
 
 	if(!(bprm->file->f_path.mnt->mnt_flags & MNT_NOSUID)) {
 		/* Set-uid? */
@@ -1096,7 +1096,7 @@ void compute_creds(struct linux_binprm *bprm)
 {
 	int unsafe;
 
-	if (bprm->e_uid != current->uid) {
+	if (bprm->e_uid != current_uid()) {
 		suid_keys(current);
 		current->pdeath_signal = 0;
 	}
@@ -1424,7 +1424,7 @@ static int format_corename(char *corename, long signr)
 			/* uid */
 			case 'u':
 				rc = snprintf(out_ptr, out_end - out_ptr,
-					      "%d", current->uid);
+					      "%d", current_uid());
 				if (rc > out_end - out_ptr)
 					goto out;
 				out_ptr += rc;
@@ -1432,7 +1432,7 @@ static int format_corename(char *corename, long signr)
 			/* gid */
 			case 'g':
 				rc = snprintf(out_ptr, out_end - out_ptr,
-					      "%d", current->gid);
+					      "%d", current_gid());
 				if (rc > out_end - out_ptr)
 					goto out;
 				out_ptr += rc;
@@ -1709,7 +1709,7 @@ int do_coredump(long signr, int exit_code, struct pt_regs * regs)
 	struct inode * inode;
 	struct file * file;
 	int retval = 0;
-	int fsuid = current->fsuid;
+	int fsuid = current_fsuid();
 	int flag = 0;
 	int ispipe = 0;
 	unsigned long core_limit = current->signal->rlim[RLIMIT_CORE].rlim_cur;
@@ -1815,7 +1815,7 @@ int do_coredump(long signr, int exit_code, struct pt_regs * regs)
 	 * Dont allow local users get cute and trick others to coredump
 	 * into their pre-created files:
 	 */
-	if (inode->i_uid != current->fsuid)
+	if (inode->i_uid != current_fsuid())
 		goto close_fail;
 	if (!file->f_op)
 		goto close_fail;
diff --git a/fs/fcntl.c b/fs/fcntl.c
index ac4f7db9f134..bf049a805e59 100644
--- a/fs/fcntl.c
+++ b/fs/fcntl.c
@@ -211,7 +211,7 @@ int __f_setown(struct file *filp, struct pid *pid, enum pid_type type,
 	if (err)
 		return err;
 
-	f_modown(filp, pid, type, current->uid, current->euid, force);
+	f_modown(filp, pid, type, current_uid(), current_euid(), force);
 	return 0;
 }
 EXPORT_SYMBOL(__f_setown);
diff --git a/fs/inotify_user.c b/fs/inotify_user.c
index d367e9b92862..e2425bbd871f 100644
--- a/fs/inotify_user.c
+++ b/fs/inotify_user.c
@@ -601,7 +601,7 @@ asmlinkage long sys_inotify_init1(int flags)
 		goto out_put_fd;
 	}
 
-	user = get_uid(current->user);
+	user = get_current_user();
 	if (unlikely(atomic_read(&user->inotify_devs) >=
 			inotify_max_user_instances)) {
 		ret = -EMFILE;
diff --git a/fs/ioprio.c b/fs/ioprio.c
index da3cc460d4df..68d2cd807118 100644
--- a/fs/ioprio.c
+++ b/fs/ioprio.c
@@ -32,8 +32,8 @@ static int set_task_ioprio(struct task_struct *task, int ioprio)
 	int err;
 	struct io_context *ioc;
 
-	if (task->uid != current->euid &&
-	    task->uid != current->uid && !capable(CAP_SYS_NICE))
+	if (task->uid != current_euid() &&
+	    task->uid != current_uid() && !capable(CAP_SYS_NICE))
 		return -EPERM;
 
 	err = security_task_setioprio(task, ioprio);
diff --git a/fs/locks.c b/fs/locks.c
index 09062e3ff104..46a2e12f7d42 100644
--- a/fs/locks.c
+++ b/fs/locks.c
@@ -1349,7 +1349,7 @@ int generic_setlease(struct file *filp, long arg, struct file_lock **flp)
 	struct inode *inode = dentry->d_inode;
 	int error, rdlease_count = 0, wrlease_count = 0;
 
-	if ((current->fsuid != inode->i_uid) && !capable(CAP_LEASE))
+	if ((current_fsuid() != inode->i_uid) && !capable(CAP_LEASE))
 		return -EACCES;
 	if (!S_ISREG(inode->i_mode))
 		return -EINVAL;
diff --git a/fs/namei.c b/fs/namei.c
index 09ce58e49e72..42d7b7606936 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -186,7 +186,7 @@ int generic_permission(struct inode *inode, int mask,
 
 	mask &= MAY_READ | MAY_WRITE | MAY_EXEC;
 
-	if (current->fsuid == inode->i_uid)
+	if (current_fsuid() == inode->i_uid)
 		mode >>= 6;
 	else {
 		if (IS_POSIXACL(inode) && (mode & S_IRWXG) && check_acl) {
@@ -441,7 +441,7 @@ static int exec_permission_lite(struct inode *inode)
 	if (inode->i_op && inode->i_op->permission)
 		return -EAGAIN;
 
-	if (current->fsuid == inode->i_uid)
+	if (current_fsuid() == inode->i_uid)
 		mode >>= 6;
 	else if (in_group_p(inode->i_gid))
 		mode >>= 3;
@@ -1334,11 +1334,13 @@ static int user_path_parent(int dfd, const char __user *path,
  */
 static inline int check_sticky(struct inode *dir, struct inode *inode)
 {
+	uid_t fsuid = current_fsuid();
+
 	if (!(dir->i_mode & S_ISVTX))
 		return 0;
-	if (inode->i_uid == current->fsuid)
+	if (inode->i_uid == fsuid)
 		return 0;
-	if (dir->i_uid == current->fsuid)
+	if (dir->i_uid == fsuid)
 		return 0;
 	return !capable(CAP_FOWNER);
 }
diff --git a/fs/namespace.c b/fs/namespace.c
index cce46702d33c..d8bc2c4704a5 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -1176,7 +1176,7 @@ static int mount_is_safe(struct path *path)
 	if (S_ISLNK(path->dentry->d_inode->i_mode))
 		return -EPERM;
 	if (path->dentry->d_inode->i_mode & S_ISVTX) {
-		if (current->uid != path->dentry->d_inode->i_uid)
+		if (current_uid() != path->dentry->d_inode->i_uid)
 			return -EPERM;
 	}
 	if (inode_permission(path->dentry->d_inode, MAY_WRITE))
diff --git a/fs/pipe.c b/fs/pipe.c
index 7aea8b89baac..aaf797bd57b9 100644
--- a/fs/pipe.c
+++ b/fs/pipe.c
@@ -899,8 +899,8 @@ static struct inode * get_pipe_inode(void)
 	 */
 	inode->i_state = I_DIRTY;
 	inode->i_mode = S_IFIFO | S_IRUSR | S_IWUSR;
-	inode->i_uid = current->fsuid;
-	inode->i_gid = current->fsgid;
+	inode->i_uid = current_fsuid();
+	inode->i_gid = current_fsgid();
 	inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME;
 
 	return inode;
diff --git a/fs/posix_acl.c b/fs/posix_acl.c
index aec931e09973..39df95a0ec25 100644
--- a/fs/posix_acl.c
+++ b/fs/posix_acl.c
@@ -217,11 +217,11 @@ posix_acl_permission(struct inode *inode, const struct posix_acl *acl, int want)
                 switch(pa->e_tag) {
                         case ACL_USER_OBJ:
 				/* (May have been checked already) */
-                                if (inode->i_uid == current->fsuid)
+				if (inode->i_uid == current_fsuid())
                                         goto check_perm;
                                 break;
                         case ACL_USER:
-                                if (pa->e_id == current->fsuid)
+				if (pa->e_id == current_fsuid())
                                         goto mask;
 				break;
                         case ACL_GROUP_OBJ:
diff --git a/fs/quota.c b/fs/quota.c
index 7f4386ebc23a..b7fe44e01618 100644
--- a/fs/quota.c
+++ b/fs/quota.c
@@ -79,7 +79,7 @@ static int generic_quotactl_valid(struct super_block *sb, int type, int cmd, qid
 
 	/* Check privileges */
 	if (cmd == Q_GETQUOTA) {
-		if (((type == USRQUOTA && current->euid != id) ||
+		if (((type == USRQUOTA && current_euid() != id) ||
 		     (type == GRPQUOTA && !in_egroup_p(id))) &&
 		    !capable(CAP_SYS_ADMIN))
 			return -EPERM;
@@ -130,7 +130,7 @@ static int xqm_quotactl_valid(struct super_block *sb, int type, int cmd, qid_t i
 
 	/* Check privileges */
 	if (cmd == Q_XGETQUOTA) {
-		if (((type == XQM_USRQUOTA && current->euid != id) ||
+		if (((type == XQM_USRQUOTA && current_euid() != id) ||
 		     (type == XQM_GRPQUOTA && !in_egroup_p(id))) &&
 		     !capable(CAP_SYS_ADMIN))
 			return -EPERM;
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 0dcdd9458f4b..b3d404aaabed 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -1193,7 +1193,7 @@ enum {
 #define has_fs_excl() atomic_read(&current->fs_excl)
 
 #define is_owner_or_cap(inode)	\
-	((current->fsuid == (inode)->i_uid) || capable(CAP_FOWNER))
+	((current_fsuid() == (inode)->i_uid) || capable(CAP_FOWNER))
 
 /* not quite ready to be deprecated, but... */
 extern void lock_super(struct super_block *);
-- 
cgit v1.2.3


From 8192b0c482d7078fcdcb4854341b977426f6f09b Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Fri, 14 Nov 2008 10:39:10 +1100
Subject: CRED: Wrap task credential accesses in the networking subsystem

Wrap access to task credentials so that they can be separated more easily from
the task_struct during the introduction of COW creds.

Change most current->(|e|s|fs)[ug]id to current_(|e|s|fs)[ug]id().

Change some task->e?[ug]id to task_e?[ug]id().  In some places it makes more
sense to use RCU directly rather than a convenient wrapper; these will be
addressed by later patches.

Signed-off-by: David Howells <dhowells@redhat.com>
Reviewed-by: James Morris <jmorris@namei.org>
Acked-by: Serge Hallyn <serue@us.ibm.com>
Cc: netdev@vger.kernel.org
Signed-off-by: James Morris <jmorris@namei.org>
---
 include/net/scm.h | 4 ++--
 net/core/dev.c    | 8 ++++++--
 net/core/scm.c    | 8 ++++----
 net/socket.c      | 4 ++--
 4 files changed, 14 insertions(+), 10 deletions(-)

(limited to 'include')

diff --git a/include/net/scm.h b/include/net/scm.h
index 06df126103ca..f160116db54a 100644
--- a/include/net/scm.h
+++ b/include/net/scm.h
@@ -54,8 +54,8 @@ static __inline__ int scm_send(struct socket *sock, struct msghdr *msg,
 			       struct scm_cookie *scm)
 {
 	struct task_struct *p = current;
-	scm->creds.uid = p->uid;
-	scm->creds.gid = p->gid;
+	scm->creds.uid = current_uid();
+	scm->creds.gid = current_gid();
 	scm->creds.pid = task_tgid_vnr(p);
 	scm->fp = NULL;
 	scm->seq = 0;
diff --git a/net/core/dev.c b/net/core/dev.c
index d9038e328cc1..262df226b3c9 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -2958,6 +2958,8 @@ static void dev_change_rx_flags(struct net_device *dev, int flags)
 static int __dev_set_promiscuity(struct net_device *dev, int inc)
 {
 	unsigned short old_flags = dev->flags;
+	uid_t uid;
+	gid_t gid;
 
 	ASSERT_RTNL();
 
@@ -2982,15 +2984,17 @@ static int __dev_set_promiscuity(struct net_device *dev, int inc)
 		printk(KERN_INFO "device %s %s promiscuous mode\n",
 		       dev->name, (dev->flags & IFF_PROMISC) ? "entered" :
 							       "left");
-		if (audit_enabled)
+		if (audit_enabled) {
+			current_uid_gid(&uid, &gid);
 			audit_log(current->audit_context, GFP_ATOMIC,
 				AUDIT_ANOM_PROMISCUOUS,
 				"dev=%s prom=%d old_prom=%d auid=%u uid=%u gid=%u ses=%u",
 				dev->name, (dev->flags & IFF_PROMISC),
 				(old_flags & IFF_PROMISC),
 				audit_get_loginuid(current),
-				current->uid, current->gid,
+				uid, gid,
 				audit_get_sessionid(current));
+		}
 
 		dev_change_rx_flags(dev, IFF_PROMISC);
 	}
diff --git a/net/core/scm.c b/net/core/scm.c
index 10f5c65f6a47..4681d8f9b45b 100644
--- a/net/core/scm.c
+++ b/net/core/scm.c
@@ -45,10 +45,10 @@
 static __inline__ int scm_check_creds(struct ucred *creds)
 {
 	if ((creds->pid == task_tgid_vnr(current) || capable(CAP_SYS_ADMIN)) &&
-	    ((creds->uid == current->uid || creds->uid == current->euid ||
-	      creds->uid == current->suid) || capable(CAP_SETUID)) &&
-	    ((creds->gid == current->gid || creds->gid == current->egid ||
-	      creds->gid == current->sgid) || capable(CAP_SETGID))) {
+	    ((creds->uid == current_uid()   || creds->uid == current_euid() ||
+	      creds->uid == current_suid()) || capable(CAP_SETUID)) &&
+	    ((creds->gid == current_gid()   || creds->gid == current_egid() ||
+	      creds->gid == current_sgid()) || capable(CAP_SETGID))) {
 	       return 0;
 	}
 	return -EPERM;
diff --git a/net/socket.c b/net/socket.c
index 57550c3bcabe..62c7729527ff 100644
--- a/net/socket.c
+++ b/net/socket.c
@@ -491,8 +491,8 @@ static struct socket *sock_alloc(void)
 	sock = SOCKET_I(inode);
 
 	inode->i_mode = S_IFSOCK | S_IRWXUGO;
-	inode->i_uid = current->fsuid;
-	inode->i_gid = current->fsgid;
+	inode->i_uid = current_fsuid();
+	inode->i_gid = current_fsgid();
 
 	get_cpu_var(sockets_in_use)++;
 	put_cpu_var(sockets_in_use);
-- 
cgit v1.2.3


From e9e349b051d98799b743ebf248cc2d986fedf090 Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Fri, 14 Nov 2008 10:39:13 +1100
Subject: KEYS: Disperse linux/key_ui.h

Disperse the bits of linux/key_ui.h as the reason they were put here (keyfs)
didn't get in.

Signed-off-by: David Howells <dhowells@redhat.com>
Reviewed-by: James Morris <jmorris@namei.org>
Signed-off-by: James Morris <jmorris@namei.org>
---
 include/keys/keyring-type.h | 31 +++++++++++++++++++++
 include/linux/key-ui.h      | 66 ---------------------------------------------
 security/keys/internal.h    | 31 ++++++++++++++++++++-
 security/keys/keyring.c     |  1 +
 security/keys/request_key.c |  2 ++
 5 files changed, 64 insertions(+), 67 deletions(-)
 create mode 100644 include/keys/keyring-type.h
 delete mode 100644 include/linux/key-ui.h

(limited to 'include')

diff --git a/include/keys/keyring-type.h b/include/keys/keyring-type.h
new file mode 100644
index 000000000000..843f872a4b63
--- /dev/null
+++ b/include/keys/keyring-type.h
@@ -0,0 +1,31 @@
+/* Keyring key type
+ *
+ * Copyright (C) 2008 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#ifndef _KEYS_KEYRING_TYPE_H
+#define _KEYS_KEYRING_TYPE_H
+
+#include <linux/key.h>
+#include <linux/rcupdate.h>
+
+/*
+ * the keyring payload contains a list of the keys to which the keyring is
+ * subscribed
+ */
+struct keyring_list {
+	struct rcu_head	rcu;		/* RCU deletion hook */
+	unsigned short	maxkeys;	/* max keys this list can hold */
+	unsigned short	nkeys;		/* number of keys currently held */
+	unsigned short	delkey;		/* key to be unlinked by RCU */
+	struct key	*keys[0];
+};
+
+
+#endif /* _KEYS_KEYRING_TYPE_H */
diff --git a/include/linux/key-ui.h b/include/linux/key-ui.h
deleted file mode 100644
index e8b8a7a5c496..000000000000
--- a/include/linux/key-ui.h
+++ /dev/null
@@ -1,66 +0,0 @@
-/* key-ui.h: key userspace interface stuff
- *
- * Copyright (C) 2004 Red Hat, Inc. All Rights Reserved.
- * Written by David Howells (dhowells@redhat.com)
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
- */
-
-#ifndef _LINUX_KEY_UI_H
-#define _LINUX_KEY_UI_H
-
-#include <linux/key.h>
-
-/* the key tree */
-extern struct rb_root key_serial_tree;
-extern spinlock_t key_serial_lock;
-
-/* required permissions */
-#define	KEY_VIEW	0x01	/* require permission to view attributes */
-#define	KEY_READ	0x02	/* require permission to read content */
-#define	KEY_WRITE	0x04	/* require permission to update / modify */
-#define	KEY_SEARCH	0x08	/* require permission to search (keyring) or find (key) */
-#define	KEY_LINK	0x10	/* require permission to link */
-#define	KEY_SETATTR	0x20	/* require permission to change attributes */
-#define	KEY_ALL		0x3f	/* all the above permissions */
-
-/*
- * the keyring payload contains a list of the keys to which the keyring is
- * subscribed
- */
-struct keyring_list {
-	struct rcu_head	rcu;		/* RCU deletion hook */
-	unsigned short	maxkeys;	/* max keys this list can hold */
-	unsigned short	nkeys;		/* number of keys currently held */
-	unsigned short	delkey;		/* key to be unlinked by RCU */
-	struct key	*keys[0];
-};
-
-/*
- * check to see whether permission is granted to use a key in the desired way
- */
-extern int key_task_permission(const key_ref_t key_ref,
-			       struct task_struct *context,
-			       key_perm_t perm);
-
-static inline int key_permission(const key_ref_t key_ref, key_perm_t perm)
-{
-	return key_task_permission(key_ref, current, perm);
-}
-
-extern key_ref_t lookup_user_key(struct task_struct *context,
-				 key_serial_t id, int create, int partial,
-				 key_perm_t perm);
-
-extern long join_session_keyring(const char *name);
-
-extern struct key_type *key_type_lookup(const char *type);
-extern void key_type_put(struct key_type *ktype);
-
-#define key_negative_timeout	60	/* default timeout on a negative key's existence */
-
-
-#endif /* _LINUX_KEY_UI_H */
diff --git a/security/keys/internal.h b/security/keys/internal.h
index b39f5c2e2c4b..a60c68138b4d 100644
--- a/security/keys/internal.h
+++ b/security/keys/internal.h
@@ -13,7 +13,6 @@
 #define _INTERNAL_H
 
 #include <linux/key-type.h>
-#include <linux/key-ui.h>
 
 static inline __attribute__((format(printf, 1, 2)))
 void no_printk(const char *fmt, ...)
@@ -82,6 +81,9 @@ extern struct mutex key_construction_mutex;
 extern wait_queue_head_t request_key_conswq;
 
 
+extern struct key_type *key_type_lookup(const char *type);
+extern void key_type_put(struct key_type *ktype);
+
 extern int __key_link(struct key *keyring, struct key *key);
 
 extern key_ref_t __keyring_search_one(key_ref_t keyring_ref,
@@ -118,6 +120,33 @@ extern struct key *request_key_and_link(struct key_type *type,
 					struct key *dest_keyring,
 					unsigned long flags);
 
+extern key_ref_t lookup_user_key(struct task_struct *context,
+				 key_serial_t id, int create, int partial,
+				 key_perm_t perm);
+
+extern long join_session_keyring(const char *name);
+
+/*
+ * check to see whether permission is granted to use a key in the desired way
+ */
+extern int key_task_permission(const key_ref_t key_ref,
+			       struct task_struct *context,
+			       key_perm_t perm);
+
+static inline int key_permission(const key_ref_t key_ref, key_perm_t perm)
+{
+	return key_task_permission(key_ref, current, perm);
+}
+
+/* required permissions */
+#define	KEY_VIEW	0x01	/* require permission to view attributes */
+#define	KEY_READ	0x02	/* require permission to read content */
+#define	KEY_WRITE	0x04	/* require permission to update / modify */
+#define	KEY_SEARCH	0x08	/* require permission to search (keyring) or find (key) */
+#define	KEY_LINK	0x10	/* require permission to link */
+#define	KEY_SETATTR	0x20	/* require permission to change attributes */
+#define	KEY_ALL		0x3f	/* all the above permissions */
+
 /*
  * request_key authorisation
  */
diff --git a/security/keys/keyring.c b/security/keys/keyring.c
index a9ab8affc092..fdf75f901991 100644
--- a/security/keys/keyring.c
+++ b/security/keys/keyring.c
@@ -16,6 +16,7 @@
 #include <linux/security.h>
 #include <linux/seq_file.h>
 #include <linux/err.h>
+#include <keys/keyring-type.h>
 #include <asm/uaccess.h>
 #include "internal.h"
 
diff --git a/security/keys/request_key.c b/security/keys/request_key.c
index a8ebc9520cac..91953c814497 100644
--- a/security/keys/request_key.c
+++ b/security/keys/request_key.c
@@ -19,6 +19,8 @@
 #include <linux/slab.h>
 #include "internal.h"
 
+#define key_negative_timeout	60	/* default timeout on a negative key's existence */
+
 /*
  * wait_on_bit() sleep function for uninterruptible waiting
  */
-- 
cgit v1.2.3


From 8bbf4976b59fc9fc2861e79cab7beb3f6d647640 Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Fri, 14 Nov 2008 10:39:14 +1100
Subject: KEYS: Alter use of key instantiation link-to-keyring argument

Alter the use of the key instantiation and negation functions' link-to-keyring
arguments.  Currently this specifies a keyring in the target process to link
the key into, creating the keyring if it doesn't exist.  This, however, can be
a problem for copy-on-write credentials as it means that the instantiating
process can alter the credentials of the requesting process.

This patch alters the behaviour such that:

 (1) If keyctl_instantiate_key() or keyctl_negate_key() are given a specific
     keyring by ID (ringid >= 0), then that keyring will be used.

 (2) If keyctl_instantiate_key() or keyctl_negate_key() are given one of the
     special constants that refer to the requesting process's keyrings
     (KEY_SPEC_*_KEYRING, all <= 0), then:

     (a) If sys_request_key() was given a keyring to use (destringid) then the
     	 key will be attached to that keyring.

     (b) If sys_request_key() was given a NULL keyring, then the key being
     	 instantiated will be attached to the default keyring as set by
     	 keyctl_set_reqkey_keyring().

 (3) No extra link will be made.

Decision point (1) follows current behaviour, and allows those instantiators
who've searched for a specifically named keyring in the requestor's keyring so
as to partition the keys by type to still have their named keyrings.

Decision point (2) allows the requestor to make sure that the key or keys that
get produced by request_key() go where they want, whilst allowing the
instantiator to request that the key is retained.  This is mainly useful for
situations where the instantiator makes a secondary request, the key for which
should be retained by the initial requestor:

	+-----------+        +--------------+        +--------------+
	|           |        |              |        |              |
	| Requestor |------->| Instantiator |------->| Instantiator |
	|           |        |              |        |              |
	+-----------+        +--------------+        +--------------+
	           request_key()           request_key()

This might be useful, for example, in Kerberos, where the requestor requests a
ticket, and then the ticket instantiator requests the TGT, which someone else
then has to go and fetch.  The TGT, however, should be retained in the
keyrings of the requestor, not the first instantiator.  To make this explict
an extra special keyring constant is also added.

Signed-off-by: David Howells <dhowells@redhat.com>
Reviewed-by: James Morris <jmorris@namei.org>
Signed-off-by: James Morris <jmorris@namei.org>
---
 include/linux/key.h              |  16 +++---
 include/linux/keyctl.h           |   4 +-
 kernel/kmod.c                    |   2 +-
 security/keys/internal.h         |  12 ++--
 security/keys/keyctl.c           | 118 +++++++++++++++++++++++----------------
 security/keys/process_keys.c     |  88 ++++++++++++++++++-----------
 security/keys/request_key.c      |  75 +++++++++++++++++--------
 security/keys/request_key_auth.c |   5 +-
 8 files changed, 199 insertions(+), 121 deletions(-)

(limited to 'include')

diff --git a/include/linux/key.h b/include/linux/key.h
index 1b70e35a71e3..df709e1af3cd 100644
--- a/include/linux/key.h
+++ b/include/linux/key.h
@@ -287,11 +287,11 @@ extern void key_fsuid_changed(struct task_struct *tsk);
 extern void key_fsgid_changed(struct task_struct *tsk);
 extern void key_init(void);
 
-#define __install_session_keyring(tsk, keyring)			\
-({								\
-	struct key *old_session = tsk->signal->session_keyring;	\
-	tsk->signal->session_keyring = keyring;			\
-	old_session;						\
+#define __install_session_keyring(keyring)				\
+({									\
+	struct key *old_session = current->signal->session_keyring;	\
+	current->signal->session_keyring = keyring;			\
+	old_session;							\
 })
 
 #else /* CONFIG_KEYS */
@@ -302,11 +302,11 @@ extern void key_init(void);
 #define key_revoke(k)			do { } while(0)
 #define key_put(k)			do { } while(0)
 #define key_ref_put(k)			do { } while(0)
-#define make_key_ref(k, p)			({ NULL; })
-#define key_ref_to_ptr(k)		({ NULL; })
+#define make_key_ref(k, p)		NULL
+#define key_ref_to_ptr(k)		NULL
 #define is_key_possessed(k)		0
 #define switch_uid_keyring(u)		do { } while(0)
-#define __install_session_keyring(t, k)	({ NULL; })
+#define __install_session_keyring(k)	({ NULL; })
 #define copy_keys(f,t)			0
 #define copy_thread_group_keys(t)	0
 #define exit_keys(t)			do { } while(0)
diff --git a/include/linux/keyctl.h b/include/linux/keyctl.h
index 656ee6b77a4a..c0688eb72093 100644
--- a/include/linux/keyctl.h
+++ b/include/linux/keyctl.h
@@ -1,6 +1,6 @@
 /* keyctl.h: keyctl command IDs
  *
- * Copyright (C) 2004 Red Hat, Inc. All Rights Reserved.
+ * Copyright (C) 2004, 2008 Red Hat, Inc. All Rights Reserved.
  * Written by David Howells (dhowells@redhat.com)
  *
  * This program is free software; you can redistribute it and/or
@@ -20,6 +20,7 @@
 #define KEY_SPEC_USER_SESSION_KEYRING	-5	/* - key ID for UID-session keyring */
 #define KEY_SPEC_GROUP_KEYRING		-6	/* - key ID for GID-specific keyring */
 #define KEY_SPEC_REQKEY_AUTH_KEY	-7	/* - key ID for assumed request_key auth key */
+#define KEY_SPEC_REQUESTOR_KEYRING	-8	/* - key ID for request_key() dest keyring */
 
 /* request-key default keyrings */
 #define KEY_REQKEY_DEFL_NO_CHANGE		-1
@@ -30,6 +31,7 @@
 #define KEY_REQKEY_DEFL_USER_KEYRING		4
 #define KEY_REQKEY_DEFL_USER_SESSION_KEYRING	5
 #define KEY_REQKEY_DEFL_GROUP_KEYRING		6
+#define KEY_REQKEY_DEFL_REQUESTOR_KEYRING	7
 
 /* keyctl commands */
 #define KEYCTL_GET_KEYRING_ID		0	/* ask for a keyring's ID */
diff --git a/kernel/kmod.c b/kernel/kmod.c
index 3d3c3ea3a023..f044f8f57703 100644
--- a/kernel/kmod.c
+++ b/kernel/kmod.c
@@ -140,7 +140,7 @@ static int ____call_usermodehelper(void *data)
 	/* Unblock all signals and set the session keyring. */
 	new_session = key_get(sub_info->ring);
 	spin_lock_irq(&current->sighand->siglock);
-	old_session = __install_session_keyring(current, new_session);
+	old_session = __install_session_keyring(new_session);
 	flush_signal_handlers(current, 1);
 	sigemptyset(&current->blocked);
 	recalc_sigpending();
diff --git a/security/keys/internal.h b/security/keys/internal.h
index a60c68138b4d..d1586c629788 100644
--- a/security/keys/internal.h
+++ b/security/keys/internal.h
@@ -109,8 +109,9 @@ extern key_ref_t search_process_keyrings(struct key_type *type,
 
 extern struct key *find_keyring_by_name(const char *name, bool skip_perm_check);
 
-extern int install_thread_keyring(struct task_struct *tsk);
-extern int install_process_keyring(struct task_struct *tsk);
+extern int install_user_keyrings(void);
+extern int install_thread_keyring(void);
+extern int install_process_keyring(void);
 
 extern struct key *request_key_and_link(struct key_type *type,
 					const char *description,
@@ -120,8 +121,7 @@ extern struct key *request_key_and_link(struct key_type *type,
 					struct key *dest_keyring,
 					unsigned long flags);
 
-extern key_ref_t lookup_user_key(struct task_struct *context,
-				 key_serial_t id, int create, int partial,
+extern key_ref_t lookup_user_key(key_serial_t id, int create, int partial,
 				 key_perm_t perm);
 
 extern long join_session_keyring(const char *name);
@@ -152,6 +152,7 @@ static inline int key_permission(const key_ref_t key_ref, key_perm_t perm)
  */
 struct request_key_auth {
 	struct key		*target_key;
+	struct key		*dest_keyring;
 	struct task_struct	*context;
 	void			*callout_info;
 	size_t			callout_len;
@@ -161,7 +162,8 @@ struct request_key_auth {
 extern struct key_type key_type_request_key_auth;
 extern struct key *request_key_auth_new(struct key *target,
 					const void *callout_info,
-					size_t callout_len);
+					size_t callout_len,
+					struct key *dest_keyring);
 
 extern struct key *key_get_instantiation_authkey(key_serial_t target_id);
 
diff --git a/security/keys/keyctl.c b/security/keys/keyctl.c
index 3f09e5b2a784..fcce331eca72 100644
--- a/security/keys/keyctl.c
+++ b/security/keys/keyctl.c
@@ -103,7 +103,7 @@ asmlinkage long sys_add_key(const char __user *_type,
 	}
 
 	/* find the target keyring (which must be writable) */
-	keyring_ref = lookup_user_key(NULL, ringid, 1, 0, KEY_WRITE);
+	keyring_ref = lookup_user_key(ringid, 1, 0, KEY_WRITE);
 	if (IS_ERR(keyring_ref)) {
 		ret = PTR_ERR(keyring_ref);
 		goto error3;
@@ -185,7 +185,7 @@ asmlinkage long sys_request_key(const char __user *_type,
 	/* get the destination keyring if specified */
 	dest_ref = NULL;
 	if (destringid) {
-		dest_ref = lookup_user_key(NULL, destringid, 1, 0, KEY_WRITE);
+		dest_ref = lookup_user_key(destringid, 1, 0, KEY_WRITE);
 		if (IS_ERR(dest_ref)) {
 			ret = PTR_ERR(dest_ref);
 			goto error3;
@@ -235,7 +235,7 @@ long keyctl_get_keyring_ID(key_serial_t id, int create)
 	key_ref_t key_ref;
 	long ret;
 
-	key_ref = lookup_user_key(NULL, id, create, 0, KEY_SEARCH);
+	key_ref = lookup_user_key(id, create, 0, KEY_SEARCH);
 	if (IS_ERR(key_ref)) {
 		ret = PTR_ERR(key_ref);
 		goto error;
@@ -308,7 +308,7 @@ long keyctl_update_key(key_serial_t id,
 	}
 
 	/* find the target key (which must be writable) */
-	key_ref = lookup_user_key(NULL, id, 0, 0, KEY_WRITE);
+	key_ref = lookup_user_key(id, 0, 0, KEY_WRITE);
 	if (IS_ERR(key_ref)) {
 		ret = PTR_ERR(key_ref);
 		goto error2;
@@ -336,7 +336,7 @@ long keyctl_revoke_key(key_serial_t id)
 	key_ref_t key_ref;
 	long ret;
 
-	key_ref = lookup_user_key(NULL, id, 0, 0, KEY_WRITE);
+	key_ref = lookup_user_key(id, 0, 0, KEY_WRITE);
 	if (IS_ERR(key_ref)) {
 		ret = PTR_ERR(key_ref);
 		goto error;
@@ -362,7 +362,7 @@ long keyctl_keyring_clear(key_serial_t ringid)
 	key_ref_t keyring_ref;
 	long ret;
 
-	keyring_ref = lookup_user_key(NULL, ringid, 1, 0, KEY_WRITE);
+	keyring_ref = lookup_user_key(ringid, 1, 0, KEY_WRITE);
 	if (IS_ERR(keyring_ref)) {
 		ret = PTR_ERR(keyring_ref);
 		goto error;
@@ -388,13 +388,13 @@ long keyctl_keyring_link(key_serial_t id, key_serial_t ringid)
 	key_ref_t keyring_ref, key_ref;
 	long ret;
 
-	keyring_ref = lookup_user_key(NULL, ringid, 1, 0, KEY_WRITE);
+	keyring_ref = lookup_user_key(ringid, 1, 0, KEY_WRITE);
 	if (IS_ERR(keyring_ref)) {
 		ret = PTR_ERR(keyring_ref);
 		goto error;
 	}
 
-	key_ref = lookup_user_key(NULL, id, 1, 0, KEY_LINK);
+	key_ref = lookup_user_key(id, 1, 0, KEY_LINK);
 	if (IS_ERR(key_ref)) {
 		ret = PTR_ERR(key_ref);
 		goto error2;
@@ -422,13 +422,13 @@ long keyctl_keyring_unlink(key_serial_t id, key_serial_t ringid)
 	key_ref_t keyring_ref, key_ref;
 	long ret;
 
-	keyring_ref = lookup_user_key(NULL, ringid, 0, 0, KEY_WRITE);
+	keyring_ref = lookup_user_key(ringid, 0, 0, KEY_WRITE);
 	if (IS_ERR(keyring_ref)) {
 		ret = PTR_ERR(keyring_ref);
 		goto error;
 	}
 
-	key_ref = lookup_user_key(NULL, id, 0, 0, 0);
+	key_ref = lookup_user_key(id, 0, 0, 0);
 	if (IS_ERR(key_ref)) {
 		ret = PTR_ERR(key_ref);
 		goto error2;
@@ -464,7 +464,7 @@ long keyctl_describe_key(key_serial_t keyid,
 	char *tmpbuf;
 	long ret;
 
-	key_ref = lookup_user_key(NULL, keyid, 0, 1, KEY_VIEW);
+	key_ref = lookup_user_key(keyid, 0, 1, KEY_VIEW);
 	if (IS_ERR(key_ref)) {
 		/* viewing a key under construction is permitted if we have the
 		 * authorisation token handy */
@@ -472,7 +472,7 @@ long keyctl_describe_key(key_serial_t keyid,
 			instkey = key_get_instantiation_authkey(keyid);
 			if (!IS_ERR(instkey)) {
 				key_put(instkey);
-				key_ref = lookup_user_key(NULL, keyid,
+				key_ref = lookup_user_key(keyid,
 							  0, 1, 0);
 				if (!IS_ERR(key_ref))
 					goto okay;
@@ -557,7 +557,7 @@ long keyctl_keyring_search(key_serial_t ringid,
 	}
 
 	/* get the keyring at which to begin the search */
-	keyring_ref = lookup_user_key(NULL, ringid, 0, 0, KEY_SEARCH);
+	keyring_ref = lookup_user_key(ringid, 0, 0, KEY_SEARCH);
 	if (IS_ERR(keyring_ref)) {
 		ret = PTR_ERR(keyring_ref);
 		goto error2;
@@ -566,7 +566,7 @@ long keyctl_keyring_search(key_serial_t ringid,
 	/* get the destination keyring if specified */
 	dest_ref = NULL;
 	if (destringid) {
-		dest_ref = lookup_user_key(NULL, destringid, 1, 0, KEY_WRITE);
+		dest_ref = lookup_user_key(destringid, 1, 0, KEY_WRITE);
 		if (IS_ERR(dest_ref)) {
 			ret = PTR_ERR(dest_ref);
 			goto error3;
@@ -636,7 +636,7 @@ long keyctl_read_key(key_serial_t keyid, char __user *buffer, size_t buflen)
 	long ret;
 
 	/* find the key first */
-	key_ref = lookup_user_key(NULL, keyid, 0, 0, 0);
+	key_ref = lookup_user_key(keyid, 0, 0, 0);
 	if (IS_ERR(key_ref)) {
 		ret = -ENOKEY;
 		goto error;
@@ -699,7 +699,7 @@ long keyctl_chown_key(key_serial_t id, uid_t uid, gid_t gid)
 	if (uid == (uid_t) -1 && gid == (gid_t) -1)
 		goto error;
 
-	key_ref = lookup_user_key(NULL, id, 1, 1, KEY_SETATTR);
+	key_ref = lookup_user_key(id, 1, 1, KEY_SETATTR);
 	if (IS_ERR(key_ref)) {
 		ret = PTR_ERR(key_ref);
 		goto error;
@@ -804,7 +804,7 @@ long keyctl_setperm_key(key_serial_t id, key_perm_t perm)
 	if (perm & ~(KEY_POS_ALL | KEY_USR_ALL | KEY_GRP_ALL | KEY_OTH_ALL))
 		goto error;
 
-	key_ref = lookup_user_key(NULL, id, 1, 1, KEY_SETATTR);
+	key_ref = lookup_user_key(id, 1, 1, KEY_SETATTR);
 	if (IS_ERR(key_ref)) {
 		ret = PTR_ERR(key_ref);
 		goto error;
@@ -829,6 +829,43 @@ error:
 
 } /* end keyctl_setperm_key() */
 
+/*
+ * get the destination keyring for instantiation
+ */
+static long get_instantiation_keyring(key_serial_t ringid,
+				      struct request_key_auth *rka,
+				      struct key **_dest_keyring)
+{
+	key_ref_t dkref;
+
+	/* just return a NULL pointer if we weren't asked to make a link */
+	if (ringid == 0) {
+		*_dest_keyring = NULL;
+		return 0;
+	}
+
+	/* if a specific keyring is nominated by ID, then use that */
+	if (ringid > 0) {
+		dkref = lookup_user_key(ringid, 1, 0, KEY_WRITE);
+		if (IS_ERR(dkref))
+			return PTR_ERR(dkref);
+		*_dest_keyring = key_ref_to_ptr(dkref);
+		return 0;
+	}
+
+	if (ringid == KEY_SPEC_REQKEY_AUTH_KEY)
+		return -EINVAL;
+
+	/* otherwise specify the destination keyring recorded in the
+	 * authorisation key (any KEY_SPEC_*_KEYRING) */
+	if (ringid >= KEY_SPEC_REQUESTOR_KEYRING) {
+		*_dest_keyring = rka->dest_keyring;
+		return 0;
+	}
+
+	return -ENOKEY;
+}
+
 /*****************************************************************************/
 /*
  * instantiate the key with the specified payload, and, if one is given, link
@@ -840,8 +877,7 @@ long keyctl_instantiate_key(key_serial_t id,
 			    key_serial_t ringid)
 {
 	struct request_key_auth *rka;
-	struct key *instkey;
-	key_ref_t keyring_ref;
+	struct key *instkey, *dest_keyring;
 	void *payload;
 	long ret;
 	bool vm = false;
@@ -883,21 +919,15 @@ long keyctl_instantiate_key(key_serial_t id,
 
 	/* find the destination keyring amongst those belonging to the
 	 * requesting task */
-	keyring_ref = NULL;
-	if (ringid) {
-		keyring_ref = lookup_user_key(rka->context, ringid, 1, 0,
-					      KEY_WRITE);
-		if (IS_ERR(keyring_ref)) {
-			ret = PTR_ERR(keyring_ref);
-			goto error2;
-		}
-	}
+	ret = get_instantiation_keyring(ringid, rka, &dest_keyring);
+	if (ret < 0)
+		goto error2;
 
 	/* instantiate the key and link it into a keyring */
 	ret = key_instantiate_and_link(rka->target_key, payload, plen,
-				       key_ref_to_ptr(keyring_ref), instkey);
+				       dest_keyring, instkey);
 
-	key_ref_put(keyring_ref);
+	key_put(dest_keyring);
 
 	/* discard the assumed authority if it's just been disabled by
 	 * instantiation of the key */
@@ -924,8 +954,7 @@ error:
 long keyctl_negate_key(key_serial_t id, unsigned timeout, key_serial_t ringid)
 {
 	struct request_key_auth *rka;
-	struct key *instkey;
-	key_ref_t keyring_ref;
+	struct key *instkey, *dest_keyring;
 	long ret;
 
 	/* the appropriate instantiation authorisation key must have been
@@ -941,20 +970,15 @@ long keyctl_negate_key(key_serial_t id, unsigned timeout, key_serial_t ringid)
 
 	/* find the destination keyring if present (which must also be
 	 * writable) */
-	keyring_ref = NULL;
-	if (ringid) {
-		keyring_ref = lookup_user_key(NULL, ringid, 1, 0, KEY_WRITE);
-		if (IS_ERR(keyring_ref)) {
-			ret = PTR_ERR(keyring_ref);
-			goto error;
-		}
-	}
+	ret = get_instantiation_keyring(ringid, rka, &dest_keyring);
+	if (ret < 0)
+		goto error;
 
 	/* instantiate the key and link it into a keyring */
 	ret = key_negate_and_link(rka->target_key, timeout,
-				  key_ref_to_ptr(keyring_ref), instkey);
+				  dest_keyring, instkey);
 
-	key_ref_put(keyring_ref);
+	key_put(dest_keyring);
 
 	/* discard the assumed authority if it's just been disabled by
 	 * instantiation of the key */
@@ -979,13 +1003,13 @@ long keyctl_set_reqkey_keyring(int reqkey_defl)
 
 	switch (reqkey_defl) {
 	case KEY_REQKEY_DEFL_THREAD_KEYRING:
-		ret = install_thread_keyring(current);
+		ret = install_thread_keyring();
 		if (ret < 0)
 			return ret;
 		goto set;
 
 	case KEY_REQKEY_DEFL_PROCESS_KEYRING:
-		ret = install_process_keyring(current);
+		ret = install_process_keyring();
 		if (ret < 0)
 			return ret;
 
@@ -1018,7 +1042,7 @@ long keyctl_set_timeout(key_serial_t id, unsigned timeout)
 	time_t expiry;
 	long ret;
 
-	key_ref = lookup_user_key(NULL, id, 1, 1, KEY_SETATTR);
+	key_ref = lookup_user_key(id, 1, 1, KEY_SETATTR);
 	if (IS_ERR(key_ref)) {
 		ret = PTR_ERR(key_ref);
 		goto error;
@@ -1105,7 +1129,7 @@ long keyctl_get_security(key_serial_t keyid,
 	char *context;
 	long ret;
 
-	key_ref = lookup_user_key(NULL, keyid, 0, 1, KEY_VIEW);
+	key_ref = lookup_user_key(keyid, 0, 1, KEY_VIEW);
 	if (IS_ERR(key_ref)) {
 		if (PTR_ERR(key_ref) != -EACCES)
 			return PTR_ERR(key_ref);
@@ -1117,7 +1141,7 @@ long keyctl_get_security(key_serial_t keyid,
 			return PTR_ERR(key_ref);
 		key_put(instkey);
 
-		key_ref = lookup_user_key(NULL, keyid, 0, 1, 0);
+		key_ref = lookup_user_key(keyid, 0, 1, 0);
 		if (IS_ERR(key_ref))
 			return PTR_ERR(key_ref);
 	}
diff --git a/security/keys/process_keys.c b/security/keys/process_keys.c
index 5be6d018759a..1c793b7090a7 100644
--- a/security/keys/process_keys.c
+++ b/security/keys/process_keys.c
@@ -40,9 +40,9 @@ struct key_user root_key_user = {
 /*
  * install user and user session keyrings for a particular UID
  */
-static int install_user_keyrings(struct task_struct *tsk)
+int install_user_keyrings(void)
 {
-	struct user_struct *user = tsk->user;
+	struct user_struct *user = current->user;
 	struct key *uid_keyring, *session_keyring;
 	char buf[20];
 	int ret;
@@ -67,7 +67,7 @@ static int install_user_keyrings(struct task_struct *tsk)
 		uid_keyring = find_keyring_by_name(buf, true);
 		if (IS_ERR(uid_keyring)) {
 			uid_keyring = keyring_alloc(buf, user->uid, (gid_t) -1,
-						    tsk, KEY_ALLOC_IN_QUOTA,
+						    current, KEY_ALLOC_IN_QUOTA,
 						    NULL);
 			if (IS_ERR(uid_keyring)) {
 				ret = PTR_ERR(uid_keyring);
@@ -83,7 +83,8 @@ static int install_user_keyrings(struct task_struct *tsk)
 		if (IS_ERR(session_keyring)) {
 			session_keyring =
 				keyring_alloc(buf, user->uid, (gid_t) -1,
-					      tsk, KEY_ALLOC_IN_QUOTA, NULL);
+					      current, KEY_ALLOC_IN_QUOTA,
+					      NULL);
 			if (IS_ERR(session_keyring)) {
 				ret = PTR_ERR(session_keyring);
 				goto error_release;
@@ -146,8 +147,9 @@ void switch_uid_keyring(struct user_struct *new_user)
 /*
  * install a fresh thread keyring, discarding the old one
  */
-int install_thread_keyring(struct task_struct *tsk)
+int install_thread_keyring(void)
 {
+	struct task_struct *tsk = current;
 	struct key *keyring, *old;
 	char buf[20];
 	int ret;
@@ -178,8 +180,9 @@ error:
 /*
  * make sure a process keyring is installed
  */
-int install_process_keyring(struct task_struct *tsk)
+int install_process_keyring(void)
 {
+	struct task_struct *tsk = current;
 	struct key *keyring;
 	char buf[20];
 	int ret;
@@ -218,9 +221,9 @@ error:
  * install a session keyring, discarding the old one
  * - if a keyring is not supplied, an empty one is invented
  */
-static int install_session_keyring(struct task_struct *tsk,
-				   struct key *keyring)
+static int install_session_keyring(struct key *keyring)
 {
+	struct task_struct *tsk = current;
 	unsigned long flags;
 	struct key *old;
 	char buf[20];
@@ -572,93 +575,91 @@ static int lookup_user_key_possessed(const struct key *key, const void *target)
  * - don't create special keyrings unless so requested
  * - partially constructed keys aren't found unless requested
  */
-key_ref_t lookup_user_key(struct task_struct *context, key_serial_t id,
-			  int create, int partial, key_perm_t perm)
+key_ref_t lookup_user_key(key_serial_t id, int create, int partial,
+			  key_perm_t perm)
 {
+	struct request_key_auth *rka;
+	struct task_struct *t = current;
 	key_ref_t key_ref, skey_ref;
 	struct key *key;
 	int ret;
 
-	if (!context)
-		context = current;
-
 	key_ref = ERR_PTR(-ENOKEY);
 
 	switch (id) {
 	case KEY_SPEC_THREAD_KEYRING:
-		if (!context->thread_keyring) {
+		if (!t->thread_keyring) {
 			if (!create)
 				goto error;
 
-			ret = install_thread_keyring(context);
+			ret = install_thread_keyring();
 			if (ret < 0) {
 				key = ERR_PTR(ret);
 				goto error;
 			}
 		}
 
-		key = context->thread_keyring;
+		key = t->thread_keyring;
 		atomic_inc(&key->usage);
 		key_ref = make_key_ref(key, 1);
 		break;
 
 	case KEY_SPEC_PROCESS_KEYRING:
-		if (!context->signal->process_keyring) {
+		if (!t->signal->process_keyring) {
 			if (!create)
 				goto error;
 
-			ret = install_process_keyring(context);
+			ret = install_process_keyring();
 			if (ret < 0) {
 				key = ERR_PTR(ret);
 				goto error;
 			}
 		}
 
-		key = context->signal->process_keyring;
+		key = t->signal->process_keyring;
 		atomic_inc(&key->usage);
 		key_ref = make_key_ref(key, 1);
 		break;
 
 	case KEY_SPEC_SESSION_KEYRING:
-		if (!context->signal->session_keyring) {
+		if (!t->signal->session_keyring) {
 			/* always install a session keyring upon access if one
 			 * doesn't exist yet */
-			ret = install_user_keyrings(context);
+			ret = install_user_keyrings();
 			if (ret < 0)
 				goto error;
-			ret = install_session_keyring(
-				context, context->user->session_keyring);
+			ret = install_session_keyring(t->user->session_keyring);
 			if (ret < 0)
 				goto error;
 		}
 
 		rcu_read_lock();
-		key = rcu_dereference(context->signal->session_keyring);
+		key = rcu_dereference(t->signal->session_keyring);
 		atomic_inc(&key->usage);
 		rcu_read_unlock();
 		key_ref = make_key_ref(key, 1);
 		break;
 
 	case KEY_SPEC_USER_KEYRING:
-		if (!context->user->uid_keyring) {
-			ret = install_user_keyrings(context);
+		if (!t->user->uid_keyring) {
+			ret = install_user_keyrings();
 			if (ret < 0)
 				goto error;
 		}
 
-		key = context->user->uid_keyring;
+		key = t->user->uid_keyring;
 		atomic_inc(&key->usage);
 		key_ref = make_key_ref(key, 1);
 		break;
 
 	case KEY_SPEC_USER_SESSION_KEYRING:
-		if (!context->user->session_keyring) {
-			ret = install_user_keyrings(context);
+		if (!t->user->session_keyring) {
+			ret = install_user_keyrings();
 			if (ret < 0)
 				goto error;
 		}
 
-		key = context->user->session_keyring;
+		key = t->user->session_keyring;
 		atomic_inc(&key->usage);
 		key_ref = make_key_ref(key, 1);
 		break;
@@ -669,7 +670,7 @@ key_ref_t lookup_user_key(struct task_struct *context, key_serial_t id,
 		goto error;
 
 	case KEY_SPEC_REQKEY_AUTH_KEY:
-		key = context->request_key_auth;
+		key = t->request_key_auth;
 		if (!key)
 			goto error;
 
@@ -677,6 +678,25 @@ key_ref_t lookup_user_key(struct task_struct *context, key_serial_t id,
 		key_ref = make_key_ref(key, 1);
 		break;
 
+	case KEY_SPEC_REQUESTOR_KEYRING:
+		if (!t->request_key_auth)
+			goto error;
+
+		down_read(&t->request_key_auth->sem);
+		if (t->request_key_auth->flags & KEY_FLAG_REVOKED) {
+			key_ref = ERR_PTR(-EKEYREVOKED);
+			key = NULL;
+		} else {
+			rka = t->request_key_auth->payload.data;
+			key = rka->dest_keyring;
+			atomic_inc(&key->usage);
+		}
+		up_read(&t->request_key_auth->sem);
+		if (!key)
+			goto error;
+		key_ref = make_key_ref(key, 1);
+		break;
+
 	default:
 		key_ref = ERR_PTR(-EINVAL);
 		if (id < 1)
@@ -725,7 +745,7 @@ key_ref_t lookup_user_key(struct task_struct *context, key_serial_t id,
 		goto invalid_key;
 
 	/* check the permissions */
-	ret = key_task_permission(key_ref, context, perm);
+	ret = key_task_permission(key_ref, t, perm);
 	if (ret < 0)
 		goto invalid_key;
 
@@ -754,7 +774,7 @@ long join_session_keyring(const char *name)
 
 	/* if no name is provided, install an anonymous keyring */
 	if (!name) {
-		ret = install_session_keyring(tsk, NULL);
+		ret = install_session_keyring(NULL);
 		if (ret < 0)
 			goto error;
 
@@ -784,7 +804,7 @@ long join_session_keyring(const char *name)
 	}
 
 	/* we've got a keyring - now to install it */
-	ret = install_session_keyring(tsk, keyring);
+	ret = install_session_keyring(keyring);
 	if (ret < 0)
 		goto error2;
 
diff --git a/security/keys/request_key.c b/security/keys/request_key.c
index 91953c814497..8e9d93b4a402 100644
--- a/security/keys/request_key.c
+++ b/security/keys/request_key.c
@@ -76,6 +76,10 @@ static int call_sbin_request_key(struct key_construction *cons,
 
 	kenter("{%d},{%d},%s", key->serial, authkey->serial, op);
 
+	ret = install_user_keyrings();
+	if (ret < 0)
+		goto error_alloc;
+
 	/* allocate a new session keyring */
 	sprintf(desc, "_req.%u", key->serial);
 
@@ -165,7 +169,8 @@ error_alloc:
  * - we ignore program failure and go on key status instead
  */
 static int construct_key(struct key *key, const void *callout_info,
-			 size_t callout_len, void *aux)
+			 size_t callout_len, void *aux,
+			 struct key *dest_keyring)
 {
 	struct key_construction *cons;
 	request_key_actor_t actor;
@@ -179,7 +184,8 @@ static int construct_key(struct key *key, const void *callout_info,
 		return -ENOMEM;
 
 	/* allocate an authorisation key */
-	authkey = request_key_auth_new(key, callout_info, callout_len);
+	authkey = request_key_auth_new(key, callout_info, callout_len,
+				       dest_keyring);
 	if (IS_ERR(authkey)) {
 		kfree(cons);
 		ret = PTR_ERR(authkey);
@@ -207,27 +213,48 @@ static int construct_key(struct key *key, const void *callout_info,
 }
 
 /*
- * link a key to the appropriate destination keyring
- * - the caller must hold a write lock on the destination keyring
+ * get the appropriate destination keyring for the request
+ * - we return whatever keyring we select with an extra reference upon it which
+ *   the caller must release
  */
-static void construct_key_make_link(struct key *key, struct key *dest_keyring)
+static void construct_get_dest_keyring(struct key **_dest_keyring)
 {
+	struct request_key_auth *rka;
 	struct task_struct *tsk = current;
-	struct key *drop = NULL;
+	struct key *dest_keyring = *_dest_keyring, *authkey;
 
-	kenter("{%d},%p", key->serial, dest_keyring);
+	kenter("%p", dest_keyring);
 
 	/* find the appropriate keyring */
-	if (!dest_keyring) {
+	if (dest_keyring) {
+		/* the caller supplied one */
+		key_get(dest_keyring);
+	} else {
+		/* use a default keyring; falling through the cases until we
+		 * find one that we actually have */
 		switch (tsk->jit_keyring) {
 		case KEY_REQKEY_DEFL_DEFAULT:
+		case KEY_REQKEY_DEFL_REQUESTOR_KEYRING:
+			if (tsk->request_key_auth) {
+				authkey = tsk->request_key_auth;
+				down_read(&authkey->sem);
+				rka = authkey->payload.data;
+				if (!test_bit(KEY_FLAG_REVOKED,
+					      &authkey->flags))
+					dest_keyring =
+						key_get(rka->dest_keyring);
+				up_read(&authkey->sem);
+				if (dest_keyring)
+					break;
+			}
+
 		case KEY_REQKEY_DEFL_THREAD_KEYRING:
-			dest_keyring = tsk->thread_keyring;
+			dest_keyring = key_get(tsk->thread_keyring);
 			if (dest_keyring)
 				break;
 
 		case KEY_REQKEY_DEFL_PROCESS_KEYRING:
-			dest_keyring = tsk->signal->process_keyring;
+			dest_keyring = key_get(tsk->signal->process_keyring);
 			if (dest_keyring)
 				break;
 
@@ -236,17 +263,16 @@ static void construct_key_make_link(struct key *key, struct key *dest_keyring)
 			dest_keyring = key_get(
 				rcu_dereference(tsk->signal->session_keyring));
 			rcu_read_unlock();
-			drop = dest_keyring;
 
 			if (dest_keyring)
 				break;
 
 		case KEY_REQKEY_DEFL_USER_SESSION_KEYRING:
-			dest_keyring = tsk->user->session_keyring;
+			dest_keyring = key_get(tsk->user->session_keyring);
 			break;
 
 		case KEY_REQKEY_DEFL_USER_KEYRING:
-			dest_keyring = tsk->user->uid_keyring;
+			dest_keyring = key_get(tsk->user->uid_keyring);
 			break;
 
 		case KEY_REQKEY_DEFL_GROUP_KEYRING:
@@ -255,10 +281,9 @@ static void construct_key_make_link(struct key *key, struct key *dest_keyring)
 		}
 	}
 
-	/* and attach the key to it */
-	__key_link(dest_keyring, key);
-	key_put(drop);
-	kleave("");
+	*_dest_keyring = dest_keyring;
+	kleave(" [dk %d]", key_serial(dest_keyring));
+	return;
 }
 
 /*
@@ -288,8 +313,7 @@ static int construct_alloc_key(struct key_type *type,
 
 	set_bit(KEY_FLAG_USER_CONSTRUCT, &key->flags);
 
-	if (dest_keyring)
-		down_write(&dest_keyring->sem);
+	down_write(&dest_keyring->sem);
 
 	/* attach the key to the destination keyring under lock, but we do need
 	 * to do another check just in case someone beat us to it whilst we
@@ -301,12 +325,10 @@ static int construct_alloc_key(struct key_type *type,
 	if (!IS_ERR(key_ref))
 		goto key_already_present;
 
-	if (dest_keyring)
-		construct_key_make_link(key, dest_keyring);
+	__key_link(dest_keyring, key);
 
 	mutex_unlock(&key_construction_mutex);
-	if (dest_keyring)
-		up_write(&dest_keyring->sem);
+	up_write(&dest_keyring->sem);
 	mutex_unlock(&user->cons_lock);
 	*_key = key;
 	kleave(" = 0 [%d]", key_serial(key));
@@ -348,21 +370,26 @@ static struct key *construct_key_and_link(struct key_type *type,
 	if (!user)
 		return ERR_PTR(-ENOMEM);
 
+	construct_get_dest_keyring(&dest_keyring);
+
 	ret = construct_alloc_key(type, description, dest_keyring, flags, user,
 				  &key);
 	key_user_put(user);
 
 	if (ret == 0) {
-		ret = construct_key(key, callout_info, callout_len, aux);
+		ret = construct_key(key, callout_info, callout_len, aux,
+				    dest_keyring);
 		if (ret < 0)
 			goto construction_failed;
 	}
 
+	key_put(dest_keyring);
 	return key;
 
 construction_failed:
 	key_negate_and_link(key, key_negative_timeout, NULL, NULL);
 	key_put(key);
+	key_put(dest_keyring);
 	return ERR_PTR(ret);
 }
 
diff --git a/security/keys/request_key_auth.c b/security/keys/request_key_auth.c
index 729156b3485e..1762d44711d5 100644
--- a/security/keys/request_key_auth.c
+++ b/security/keys/request_key_auth.c
@@ -128,6 +128,7 @@ static void request_key_auth_destroy(struct key *key)
 	}
 
 	key_put(rka->target_key);
+	key_put(rka->dest_keyring);
 	kfree(rka->callout_info);
 	kfree(rka);
 
@@ -139,7 +140,7 @@ static void request_key_auth_destroy(struct key *key)
  * access to the caller's security data
  */
 struct key *request_key_auth_new(struct key *target, const void *callout_info,
-				 size_t callout_len)
+				 size_t callout_len, struct key *dest_keyring)
 {
 	struct request_key_auth *rka, *irka;
 	struct key *authkey = NULL;
@@ -188,6 +189,7 @@ struct key *request_key_auth_new(struct key *target, const void *callout_info,
 	}
 
 	rka->target_key = key_get(target);
+	rka->dest_keyring = key_get(dest_keyring);
 	memcpy(rka->callout_info, callout_info, callout_len);
 	rka->callout_len = callout_len;
 
@@ -223,6 +225,7 @@ error_inst:
 	key_put(authkey);
 error_alloc:
 	key_put(rka->target_key);
+	key_put(rka->dest_keyring);
 	kfree(rka->callout_info);
 	kfree(rka);
 	kleave("= %d", ret);
-- 
cgit v1.2.3


From 1cdcbec1a3372c0c49c59d292e708fd07b509f18 Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Fri, 14 Nov 2008 10:39:14 +1100
Subject: CRED: Neuter sys_capset()

Take away the ability for sys_capset() to affect processes other than current.

This means that current will not need to lock its own credentials when reading
them against interference by other processes.

This has effectively been the case for a while anyway, since:

 (1) Without LSM enabled, sys_capset() is disallowed.

 (2) With file-based capabilities, sys_capset() is neutered.

Signed-off-by: David Howells <dhowells@redhat.com>
Acked-by: Serge Hallyn <serue@us.ibm.com>
Acked-by: Andrew G. Morgan <morgan@kernel.org>
Acked-by: James Morris <jmorris@namei.org>
Signed-off-by: James Morris <jmorris@namei.org>
---
 fs/open.c                |  12 +--
 include/linux/security.h |  48 ++++------
 kernel/capability.c      | 227 +++++------------------------------------------
 security/commoncap.c     |  29 ++----
 security/security.c      |  18 ++--
 security/selinux/hooks.c |  10 +--
 6 files changed, 61 insertions(+), 283 deletions(-)

(limited to 'include')

diff --git a/fs/open.c b/fs/open.c
index 83cdb9dee0c1..500cc0c54762 100644
--- a/fs/open.c
+++ b/fs/open.c
@@ -441,17 +441,7 @@ asmlinkage long sys_faccessat(int dfd, const char __user *filename, int mode)
 	current->fsgid = current->gid;
 
 	if (!issecure(SECURE_NO_SETUID_FIXUP)) {
-		/*
-		 * Clear the capabilities if we switch to a non-root user
-		 */
-#ifndef CONFIG_SECURITY_FILE_CAPABILITIES
-		/*
-		 * FIXME: There is a race here against sys_capset.  The
-		 * capabilities can change yet we will restore the old
-		 * value below.  We should hold task_capabilities_lock,
-		 * but we cannot because user_path_at can sleep.
-		 */
-#endif /* ndef CONFIG_SECURITY_FILE_CAPABILITIES */
+		/* Clear the capabilities if we switch to a non-root user */
 		if (current->uid)
 			old_cap = cap_set_effective(__cap_empty_set);
 		else
diff --git a/include/linux/security.h b/include/linux/security.h
index 5fe28a671cd3..d1ce8beddbd7 100644
--- a/include/linux/security.h
+++ b/include/linux/security.h
@@ -53,8 +53,8 @@ extern int cap_settime(struct timespec *ts, struct timezone *tz);
 extern int cap_ptrace_may_access(struct task_struct *child, unsigned int mode);
 extern int cap_ptrace_traceme(struct task_struct *parent);
 extern int cap_capget(struct task_struct *target, kernel_cap_t *effective, kernel_cap_t *inheritable, kernel_cap_t *permitted);
-extern int cap_capset_check(struct task_struct *target, kernel_cap_t *effective, kernel_cap_t *inheritable, kernel_cap_t *permitted);
-extern void cap_capset_set(struct task_struct *target, kernel_cap_t *effective, kernel_cap_t *inheritable, kernel_cap_t *permitted);
+extern int cap_capset_check(kernel_cap_t *effective, kernel_cap_t *inheritable, kernel_cap_t *permitted);
+extern void cap_capset_set(kernel_cap_t *effective, kernel_cap_t *inheritable, kernel_cap_t *permitted);
 extern int cap_bprm_set_security(struct linux_binprm *bprm);
 extern void cap_bprm_apply_creds(struct linux_binprm *bprm, int unsafe);
 extern int cap_bprm_secureexec(struct linux_binprm *bprm);
@@ -1191,24 +1191,14 @@ static inline void security_free_mnt_opts(struct security_mnt_opts *opts)
  *	Return 0 if the capability sets were successfully obtained.
  * @capset_check:
  *	Check permission before setting the @effective, @inheritable, and
- *	@permitted capability sets for the @target process.
- *	Caveat:  @target is also set to current if a set of processes is
- *	specified (i.e. all processes other than current and init or a
- *	particular process group).  Hence, the capset_set hook may need to
- *	revalidate permission to the actual target process.
- *	@target contains the task_struct structure for target process.
+ *	@permitted capability sets for the current process.
  *	@effective contains the effective capability set.
  *	@inheritable contains the inheritable capability set.
  *	@permitted contains the permitted capability set.
  *	Return 0 if permission is granted.
  * @capset_set:
  *	Set the @effective, @inheritable, and @permitted capability sets for
- *	the @target process.  Since capset_check cannot always check permission
- *	to the real @target process, this hook may also perform permission
- *	checking to determine if the current process is allowed to set the
- *	capability sets of the @target process.  However, this hook has no way
- *	of returning an error due to the structure of the sys_capset code.
- *	@target contains the task_struct structure for target process.
+ *	the current process.
  *	@effective contains the effective capability set.
  *	@inheritable contains the inheritable capability set.
  *	@permitted contains the permitted capability set.
@@ -1303,12 +1293,10 @@ struct security_operations {
 	int (*capget) (struct task_struct *target,
 		       kernel_cap_t *effective,
 		       kernel_cap_t *inheritable, kernel_cap_t *permitted);
-	int (*capset_check) (struct task_struct *target,
-			     kernel_cap_t *effective,
+	int (*capset_check) (kernel_cap_t *effective,
 			     kernel_cap_t *inheritable,
 			     kernel_cap_t *permitted);
-	void (*capset_set) (struct task_struct *target,
-			    kernel_cap_t *effective,
+	void (*capset_set) (kernel_cap_t *effective,
 			    kernel_cap_t *inheritable,
 			    kernel_cap_t *permitted);
 	int (*capable) (struct task_struct *tsk, int cap, int audit);
@@ -1572,12 +1560,10 @@ int security_capget(struct task_struct *target,
 		    kernel_cap_t *effective,
 		    kernel_cap_t *inheritable,
 		    kernel_cap_t *permitted);
-int security_capset_check(struct task_struct *target,
-			  kernel_cap_t *effective,
+int security_capset_check(kernel_cap_t *effective,
 			  kernel_cap_t *inheritable,
 			  kernel_cap_t *permitted);
-void security_capset_set(struct task_struct *target,
-			 kernel_cap_t *effective,
+void security_capset_set(kernel_cap_t *effective,
 			 kernel_cap_t *inheritable,
 			 kernel_cap_t *permitted);
 int security_capable(struct task_struct *tsk, int cap);
@@ -1769,20 +1755,18 @@ static inline int security_capget(struct task_struct *target,
 	return cap_capget(target, effective, inheritable, permitted);
 }
 
-static inline int security_capset_check(struct task_struct *target,
-					 kernel_cap_t *effective,
-					 kernel_cap_t *inheritable,
-					 kernel_cap_t *permitted)
+static inline int security_capset_check(kernel_cap_t *effective,
+					kernel_cap_t *inheritable,
+					kernel_cap_t *permitted)
 {
-	return cap_capset_check(target, effective, inheritable, permitted);
+	return cap_capset_check(effective, inheritable, permitted);
 }
 
-static inline void security_capset_set(struct task_struct *target,
-					kernel_cap_t *effective,
-					kernel_cap_t *inheritable,
-					kernel_cap_t *permitted)
+static inline void security_capset_set(kernel_cap_t *effective,
+				       kernel_cap_t *inheritable,
+				       kernel_cap_t *permitted)
 {
-	cap_capset_set(target, effective, inheritable, permitted);
+	cap_capset_set(effective, inheritable, permitted);
 }
 
 static inline int security_capable(struct task_struct *tsk, int cap)
diff --git a/kernel/capability.c b/kernel/capability.c
index adb262f83de1..58b00519624a 100644
--- a/kernel/capability.c
+++ b/kernel/capability.c
@@ -127,160 +127,6 @@ static int cap_validate_magic(cap_user_header_t header, unsigned *tocopy)
 	return 0;
 }
 
-#ifndef CONFIG_SECURITY_FILE_CAPABILITIES
-
-/*
- * Without filesystem capability support, we nominally support one process
- * setting the capabilities of another
- */
-static inline int cap_get_target_pid(pid_t pid, kernel_cap_t *pEp,
-				     kernel_cap_t *pIp, kernel_cap_t *pPp)
-{
-	struct task_struct *target;
-	int ret;
-
-	spin_lock(&task_capability_lock);
-	read_lock(&tasklist_lock);
-
-	if (pid && pid != task_pid_vnr(current)) {
-		target = find_task_by_vpid(pid);
-		if (!target) {
-			ret = -ESRCH;
-			goto out;
-		}
-	} else
-		target = current;
-
-	ret = security_capget(target, pEp, pIp, pPp);
-
-out:
-	read_unlock(&tasklist_lock);
-	spin_unlock(&task_capability_lock);
-
-	return ret;
-}
-
-/*
- * cap_set_pg - set capabilities for all processes in a given process
- * group.  We call this holding task_capability_lock and tasklist_lock.
- */
-static inline int cap_set_pg(int pgrp_nr, kernel_cap_t *effective,
-			     kernel_cap_t *inheritable,
-			     kernel_cap_t *permitted)
-{
-	struct task_struct *g, *target;
-	int ret = -EPERM;
-	int found = 0;
-	struct pid *pgrp;
-
-	spin_lock(&task_capability_lock);
-	read_lock(&tasklist_lock);
-
-	pgrp = find_vpid(pgrp_nr);
-	do_each_pid_task(pgrp, PIDTYPE_PGID, g) {
-		target = g;
-		while_each_thread(g, target) {
-			if (!security_capset_check(target, effective,
-						   inheritable, permitted)) {
-				security_capset_set(target, effective,
-						    inheritable, permitted);
-				ret = 0;
-			}
-			found = 1;
-		}
-	} while_each_pid_task(pgrp, PIDTYPE_PGID, g);
-
-	read_unlock(&tasklist_lock);
-	spin_unlock(&task_capability_lock);
-
-	if (!found)
-		ret = 0;
-	return ret;
-}
-
-/*
- * cap_set_all - set capabilities for all processes other than init
- * and self.  We call this holding task_capability_lock and tasklist_lock.
- */
-static inline int cap_set_all(kernel_cap_t *effective,
-			      kernel_cap_t *inheritable,
-			      kernel_cap_t *permitted)
-{
-	struct task_struct *g, *target;
-	int ret = -EPERM;
-	int found = 0;
-
-	spin_lock(&task_capability_lock);
-	read_lock(&tasklist_lock);
-
-	do_each_thread(g, target) {
-		if (target == current
-		    || is_container_init(target->group_leader))
-			continue;
-		found = 1;
-		if (security_capset_check(target, effective, inheritable,
-					  permitted))
-			continue;
-		ret = 0;
-		security_capset_set(target, effective, inheritable, permitted);
-	} while_each_thread(g, target);
-
-	read_unlock(&tasklist_lock);
-	spin_unlock(&task_capability_lock);
-
-	if (!found)
-		ret = 0;
-
-	return ret;
-}
-
-/*
- * Given the target pid does not refer to the current process we
- * need more elaborate support... (This support is not present when
- * filesystem capabilities are configured.)
- */
-static inline int do_sys_capset_other_tasks(pid_t pid, kernel_cap_t *effective,
-					    kernel_cap_t *inheritable,
-					    kernel_cap_t *permitted)
-{
-	struct task_struct *target;
-	int ret;
-
-	if (!capable(CAP_SETPCAP))
-		return -EPERM;
-
-	if (pid == -1)	          /* all procs other than current and init */
-		return cap_set_all(effective, inheritable, permitted);
-
-	else if (pid < 0)                    /* all procs in process group */
-		return cap_set_pg(-pid, effective, inheritable, permitted);
-
-	/* target != current */
-	spin_lock(&task_capability_lock);
-	read_lock(&tasklist_lock);
-
-	target = find_task_by_vpid(pid);
-	if (!target)
-		ret = -ESRCH;
-	else {
-		ret = security_capset_check(target, effective, inheritable,
-					    permitted);
-
-		/* having verified that the proposed changes are legal,
-		   we now put them into effect. */
-		if (!ret)
-			security_capset_set(target, effective, inheritable,
-					    permitted);
-	}
-
-	read_unlock(&tasklist_lock);
-	spin_unlock(&task_capability_lock);
-
-	return ret;
-}
-
-#else /* ie., def CONFIG_SECURITY_FILE_CAPABILITIES */
-
 /*
  * If we have configured with filesystem capability support, then the
  * only thing that can change the capabilities of the current process
@@ -314,22 +160,6 @@ static inline int cap_get_target_pid(pid_t pid, kernel_cap_t *pEp,
 	return ret;
 }
 
-/*
- * With filesystem capability support configured, the kernel does not
- * permit the changing of capabilities in one process by another
- * process. (CAP_SETPCAP has much less broad semantics when configured
- * this way.)
- */
-static inline int do_sys_capset_other_tasks(pid_t pid,
-					    kernel_cap_t *effective,
-					    kernel_cap_t *inheritable,
-					    kernel_cap_t *permitted)
-{
-	return -EPERM;
-}
-
-#endif /* ie., ndef CONFIG_SECURITY_FILE_CAPABILITIES */
-
 /*
  * Atomically modify the effective capabilities returning the original
  * value. No permission check is performed here - it is assumed that the
@@ -424,16 +254,14 @@ asmlinkage long sys_capget(cap_user_header_t header, cap_user_data_t dataptr)
  * @data: pointer to struct that contains the effective, permitted,
  *	and inheritable capabilities
  *
- * Set capabilities for a given process, all processes, or all
- * processes in a given process group.
+ * Set capabilities for the current process only.  The ability to any other
+ * process(es) has been deprecated and removed.
  *
  * The restrictions on setting capabilities are specified as:
  *
- * [pid is for the 'target' task.  'current' is the calling task.]
- *
- * I: any raised capabilities must be a subset of the (old current) permitted
- * P: any raised capabilities must be a subset of the (old current) permitted
- * E: must be set to a subset of (new target) permitted
+ * I: any raised capabilities must be a subset of the old permitted
+ * P: any raised capabilities must be a subset of the old permitted
+ * E: must be set to a subset of new permitted
  *
  * Returns 0 on success and < 0 on error.
  */
@@ -452,10 +280,13 @@ asmlinkage long sys_capset(cap_user_header_t header, const cap_user_data_t data)
 	if (get_user(pid, &header->pid))
 		return -EFAULT;
 
+	/* may only affect current now */
+	if (pid != 0 && pid != task_pid_vnr(current))
+		return -EPERM;
+
 	if (copy_from_user(&kdata, data, tocopy
-			   * sizeof(struct __user_cap_data_struct))) {
+			   * sizeof(struct __user_cap_data_struct)))
 		return -EFAULT;
-	}
 
 	for (i = 0; i < tocopy; i++) {
 		effective.cap[i] = kdata[i].effective;
@@ -473,32 +304,20 @@ asmlinkage long sys_capset(cap_user_header_t header, const cap_user_data_t data)
 	if (ret)
 		return ret;
 
-	if (pid && (pid != task_pid_vnr(current)))
-		ret = do_sys_capset_other_tasks(pid, &effective, &inheritable,
-						&permitted);
-	else {
-		/*
-		 * This lock is required even when filesystem
-		 * capability support is configured - it protects the
-		 * sys_capget() call from returning incorrect data in
-		 * the case that the targeted process is not the
-		 * current one.
-		 */
-		spin_lock(&task_capability_lock);
-
-		ret = security_capset_check(current, &effective, &inheritable,
-					    &permitted);
-		/*
-		 * Having verified that the proposed changes are
-		 * legal, we now put them into effect.
-		 */
-		if (!ret)
-			security_capset_set(current, &effective, &inheritable,
-					    &permitted);
-		spin_unlock(&task_capability_lock);
-	}
-
+	/* This lock is required even when filesystem capability support is
+	 * configured - it protects the sys_capget() call from returning
+	 * incorrect data in the case that the targeted process is not the
+	 * current one.
+	 */
+	spin_lock(&task_capability_lock);
 
+	ret = security_capset_check(&effective, &inheritable, &permitted);
+	/* Having verified that the proposed changes are legal, we now put them
+	 * into effect.
+	 */
+	if (!ret)
+		security_capset_set(&effective, &inheritable, &permitted);
+	spin_unlock(&task_capability_lock);
 	return ret;
 }
 
diff --git a/security/commoncap.c b/security/commoncap.c
index 8283271f0768..e3f36ef629fa 100644
--- a/security/commoncap.c
+++ b/security/commoncap.c
@@ -96,15 +96,6 @@ int cap_capget (struct task_struct *target, kernel_cap_t *effective,
 
 #ifdef CONFIG_SECURITY_FILE_CAPABILITIES
 
-static inline int cap_block_setpcap(struct task_struct *target)
-{
-	/*
-	 * No support for remote process capability manipulation with
-	 * filesystem capability support.
-	 */
-	return (target != current);
-}
-
 static inline int cap_inh_is_capped(void)
 {
 	/*
@@ -119,7 +110,6 @@ static inline int cap_limit_ptraced_target(void) { return 1; }
 
 #else /* ie., ndef CONFIG_SECURITY_FILE_CAPABILITIES */
 
-static inline int cap_block_setpcap(struct task_struct *t) { return 0; }
 static inline int cap_inh_is_capped(void) { return 1; }
 static inline int cap_limit_ptraced_target(void)
 {
@@ -128,21 +118,18 @@ static inline int cap_limit_ptraced_target(void)
 
 #endif /* def CONFIG_SECURITY_FILE_CAPABILITIES */
 
-int cap_capset_check (struct task_struct *target, kernel_cap_t *effective,
+int cap_capset_check (kernel_cap_t *effective,
 		      kernel_cap_t *inheritable, kernel_cap_t *permitted)
 {
-	if (cap_block_setpcap(target)) {
-		return -EPERM;
-	}
 	if (cap_inh_is_capped()
 	    && !cap_issubset(*inheritable,
-			     cap_combine(target->cap_inheritable,
+			     cap_combine(current->cap_inheritable,
 					 current->cap_permitted))) {
 		/* incapable of using this inheritable set */
 		return -EPERM;
 	}
 	if (!cap_issubset(*inheritable,
-			   cap_combine(target->cap_inheritable,
+			   cap_combine(current->cap_inheritable,
 				       current->cap_bset))) {
 		/* no new pI capabilities outside bounding set */
 		return -EPERM;
@@ -150,7 +137,7 @@ int cap_capset_check (struct task_struct *target, kernel_cap_t *effective,
 
 	/* verify restrictions on target's new Permitted set */
 	if (!cap_issubset (*permitted,
-			   cap_combine (target->cap_permitted,
+			   cap_combine (current->cap_permitted,
 					current->cap_permitted))) {
 		return -EPERM;
 	}
@@ -163,12 +150,12 @@ int cap_capset_check (struct task_struct *target, kernel_cap_t *effective,
 	return 0;
 }
 
-void cap_capset_set (struct task_struct *target, kernel_cap_t *effective,
+void cap_capset_set (kernel_cap_t *effective,
 		     kernel_cap_t *inheritable, kernel_cap_t *permitted)
 {
-	target->cap_effective = *effective;
-	target->cap_inheritable = *inheritable;
-	target->cap_permitted = *permitted;
+	current->cap_effective = *effective;
+	current->cap_inheritable = *inheritable;
+	current->cap_permitted = *permitted;
 }
 
 static inline void bprm_clear_caps(struct linux_binprm *bprm)
diff --git a/security/security.c b/security/security.c
index 346f21e0ec2c..dca37381e2a7 100644
--- a/security/security.c
+++ b/security/security.c
@@ -145,20 +145,18 @@ int security_capget(struct task_struct *target,
 	return security_ops->capget(target, effective, inheritable, permitted);
 }
 
-int security_capset_check(struct task_struct *target,
-			   kernel_cap_t *effective,
-			   kernel_cap_t *inheritable,
-			   kernel_cap_t *permitted)
+int security_capset_check(kernel_cap_t *effective,
+			  kernel_cap_t *inheritable,
+			  kernel_cap_t *permitted)
 {
-	return security_ops->capset_check(target, effective, inheritable, permitted);
+	return security_ops->capset_check(effective, inheritable, permitted);
 }
 
-void security_capset_set(struct task_struct *target,
-			  kernel_cap_t *effective,
-			  kernel_cap_t *inheritable,
-			  kernel_cap_t *permitted)
+void security_capset_set(kernel_cap_t *effective,
+			 kernel_cap_t *inheritable,
+			 kernel_cap_t *permitted)
 {
-	security_ops->capset_set(target, effective, inheritable, permitted);
+	security_ops->capset_set(effective, inheritable, permitted);
 }
 
 int security_capable(struct task_struct *tsk, int cap)
diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c
index 378dc53c08e8..df9986940e9c 100644
--- a/security/selinux/hooks.c
+++ b/security/selinux/hooks.c
@@ -1790,22 +1790,22 @@ static int selinux_capget(struct task_struct *target, kernel_cap_t *effective,
 	return secondary_ops->capget(target, effective, inheritable, permitted);
 }
 
-static int selinux_capset_check(struct task_struct *target, kernel_cap_t *effective,
+static int selinux_capset_check(kernel_cap_t *effective,
 				kernel_cap_t *inheritable, kernel_cap_t *permitted)
 {
 	int error;
 
-	error = secondary_ops->capset_check(target, effective, inheritable, permitted);
+	error = secondary_ops->capset_check(effective, inheritable, permitted);
 	if (error)
 		return error;
 
-	return task_has_perm(current, target, PROCESS__SETCAP);
+	return task_has_perm(current, current, PROCESS__SETCAP);
 }
 
-static void selinux_capset_set(struct task_struct *target, kernel_cap_t *effective,
+static void selinux_capset_set(kernel_cap_t *effective,
 			       kernel_cap_t *inheritable, kernel_cap_t *permitted)
 {
-	secondary_ops->capset_set(target, effective, inheritable, permitted);
+	secondary_ops->capset_set(effective, inheritable, permitted);
 }
 
 static int selinux_capable(struct task_struct *tsk, int cap, int audit)
-- 
cgit v1.2.3


From 15a2460ed0af7538ca8e6c610fe607a2cd9da142 Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Fri, 14 Nov 2008 10:39:15 +1100
Subject: CRED: Constify the kernel_cap_t arguments to the capset LSM hooks

Constify the kernel_cap_t arguments to the capset LSM hooks.

Signed-off-by: David Howells <dhowells@redhat.com>
Acked-by: Serge Hallyn <serue@us.ibm.com>
Acked-by: James Morris <jmorris@namei.org>
Signed-off-by: James Morris <jmorris@namei.org>
---
 include/linux/security.h | 44 ++++++++++++++++++++++++--------------------
 security/commoncap.c     | 10 ++++++----
 security/security.c      | 12 ++++++------
 security/selinux/hooks.c | 10 ++++++----
 4 files changed, 42 insertions(+), 34 deletions(-)

(limited to 'include')

diff --git a/include/linux/security.h b/include/linux/security.h
index d1ce8beddbd7..9f305d4a31a7 100644
--- a/include/linux/security.h
+++ b/include/linux/security.h
@@ -53,8 +53,12 @@ extern int cap_settime(struct timespec *ts, struct timezone *tz);
 extern int cap_ptrace_may_access(struct task_struct *child, unsigned int mode);
 extern int cap_ptrace_traceme(struct task_struct *parent);
 extern int cap_capget(struct task_struct *target, kernel_cap_t *effective, kernel_cap_t *inheritable, kernel_cap_t *permitted);
-extern int cap_capset_check(kernel_cap_t *effective, kernel_cap_t *inheritable, kernel_cap_t *permitted);
-extern void cap_capset_set(kernel_cap_t *effective, kernel_cap_t *inheritable, kernel_cap_t *permitted);
+extern int cap_capset_check(const kernel_cap_t *effective,
+			    const kernel_cap_t *inheritable,
+			    const kernel_cap_t *permitted);
+extern void cap_capset_set(const kernel_cap_t *effective,
+			   const kernel_cap_t *inheritable,
+			   const kernel_cap_t *permitted);
 extern int cap_bprm_set_security(struct linux_binprm *bprm);
 extern void cap_bprm_apply_creds(struct linux_binprm *bprm, int unsafe);
 extern int cap_bprm_secureexec(struct linux_binprm *bprm);
@@ -1293,12 +1297,12 @@ struct security_operations {
 	int (*capget) (struct task_struct *target,
 		       kernel_cap_t *effective,
 		       kernel_cap_t *inheritable, kernel_cap_t *permitted);
-	int (*capset_check) (kernel_cap_t *effective,
-			     kernel_cap_t *inheritable,
-			     kernel_cap_t *permitted);
-	void (*capset_set) (kernel_cap_t *effective,
-			    kernel_cap_t *inheritable,
-			    kernel_cap_t *permitted);
+	int (*capset_check) (const kernel_cap_t *effective,
+			     const kernel_cap_t *inheritable,
+			     const kernel_cap_t *permitted);
+	void (*capset_set) (const kernel_cap_t *effective,
+			    const kernel_cap_t *inheritable,
+			    const kernel_cap_t *permitted);
 	int (*capable) (struct task_struct *tsk, int cap, int audit);
 	int (*acct) (struct file *file);
 	int (*sysctl) (struct ctl_table *table, int op);
@@ -1560,12 +1564,12 @@ int security_capget(struct task_struct *target,
 		    kernel_cap_t *effective,
 		    kernel_cap_t *inheritable,
 		    kernel_cap_t *permitted);
-int security_capset_check(kernel_cap_t *effective,
-			  kernel_cap_t *inheritable,
-			  kernel_cap_t *permitted);
-void security_capset_set(kernel_cap_t *effective,
-			 kernel_cap_t *inheritable,
-			 kernel_cap_t *permitted);
+int security_capset_check(const kernel_cap_t *effective,
+			  const kernel_cap_t *inheritable,
+			  const kernel_cap_t *permitted);
+void security_capset_set(const kernel_cap_t *effective,
+			 const kernel_cap_t *inheritable,
+			 const kernel_cap_t *permitted);
 int security_capable(struct task_struct *tsk, int cap);
 int security_capable_noaudit(struct task_struct *tsk, int cap);
 int security_acct(struct file *file);
@@ -1755,16 +1759,16 @@ static inline int security_capget(struct task_struct *target,
 	return cap_capget(target, effective, inheritable, permitted);
 }
 
-static inline int security_capset_check(kernel_cap_t *effective,
-					kernel_cap_t *inheritable,
-					kernel_cap_t *permitted)
+static inline int security_capset_check(const kernel_cap_t *effective,
+					const kernel_cap_t *inheritable,
+					const kernel_cap_t *permitted)
 {
 	return cap_capset_check(effective, inheritable, permitted);
 }
 
-static inline void security_capset_set(kernel_cap_t *effective,
-				       kernel_cap_t *inheritable,
-				       kernel_cap_t *permitted)
+static inline void security_capset_set(const kernel_cap_t *effective,
+				       const kernel_cap_t *inheritable,
+				       const kernel_cap_t *permitted)
 {
 	cap_capset_set(effective, inheritable, permitted);
 }
diff --git a/security/commoncap.c b/security/commoncap.c
index e3f36ef629fa..fb4e240720d8 100644
--- a/security/commoncap.c
+++ b/security/commoncap.c
@@ -118,8 +118,9 @@ static inline int cap_limit_ptraced_target(void)
 
 #endif /* def CONFIG_SECURITY_FILE_CAPABILITIES */
 
-int cap_capset_check (kernel_cap_t *effective,
-		      kernel_cap_t *inheritable, kernel_cap_t *permitted)
+int cap_capset_check(const kernel_cap_t *effective,
+		     const kernel_cap_t *inheritable,
+		     const kernel_cap_t *permitted)
 {
 	if (cap_inh_is_capped()
 	    && !cap_issubset(*inheritable,
@@ -150,8 +151,9 @@ int cap_capset_check (kernel_cap_t *effective,
 	return 0;
 }
 
-void cap_capset_set (kernel_cap_t *effective,
-		     kernel_cap_t *inheritable, kernel_cap_t *permitted)
+void cap_capset_set(const kernel_cap_t *effective,
+		    const kernel_cap_t *inheritable,
+		    const kernel_cap_t *permitted)
 {
 	current->cap_effective = *effective;
 	current->cap_inheritable = *inheritable;
diff --git a/security/security.c b/security/security.c
index dca37381e2a7..81c956a12300 100644
--- a/security/security.c
+++ b/security/security.c
@@ -145,16 +145,16 @@ int security_capget(struct task_struct *target,
 	return security_ops->capget(target, effective, inheritable, permitted);
 }
 
-int security_capset_check(kernel_cap_t *effective,
-			  kernel_cap_t *inheritable,
-			  kernel_cap_t *permitted)
+int security_capset_check(const kernel_cap_t *effective,
+			  const kernel_cap_t *inheritable,
+			  const kernel_cap_t *permitted)
 {
 	return security_ops->capset_check(effective, inheritable, permitted);
 }
 
-void security_capset_set(kernel_cap_t *effective,
-			 kernel_cap_t *inheritable,
-			 kernel_cap_t *permitted)
+void security_capset_set(const kernel_cap_t *effective,
+			 const kernel_cap_t *inheritable,
+			 const kernel_cap_t *permitted)
 {
 	security_ops->capset_set(effective, inheritable, permitted);
 }
diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c
index df9986940e9c..9f6da154cc82 100644
--- a/security/selinux/hooks.c
+++ b/security/selinux/hooks.c
@@ -1790,8 +1790,9 @@ static int selinux_capget(struct task_struct *target, kernel_cap_t *effective,
 	return secondary_ops->capget(target, effective, inheritable, permitted);
 }
 
-static int selinux_capset_check(kernel_cap_t *effective,
-				kernel_cap_t *inheritable, kernel_cap_t *permitted)
+static int selinux_capset_check(const kernel_cap_t *effective,
+				const kernel_cap_t *inheritable,
+				const kernel_cap_t *permitted)
 {
 	int error;
 
@@ -1802,8 +1803,9 @@ static int selinux_capset_check(kernel_cap_t *effective,
 	return task_has_perm(current, current, PROCESS__SETCAP);
 }
 
-static void selinux_capset_set(kernel_cap_t *effective,
-			       kernel_cap_t *inheritable, kernel_cap_t *permitted)
+static void selinux_capset_set(const kernel_cap_t *effective,
+			       const kernel_cap_t *inheritable,
+			       const kernel_cap_t *permitted)
 {
 	secondary_ops->capset_set(effective, inheritable, permitted);
 }
-- 
cgit v1.2.3


From b6dff3ec5e116e3af6f537d4caedcad6b9e5082a Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Fri, 14 Nov 2008 10:39:16 +1100
Subject: CRED: Separate task security context from task_struct

Separate the task security context from task_struct.  At this point, the
security data is temporarily embedded in the task_struct with two pointers
pointing to it.

Note that the Alpha arch is altered as it refers to (E)UID and (E)GID in
entry.S via asm-offsets.

With comment fixes Signed-off-by: Marc Dionne <marc.c.dionne@gmail.com>

Signed-off-by: David Howells <dhowells@redhat.com>
Acked-by: James Morris <jmorris@namei.org>
Acked-by: Serge Hallyn <serue@us.ibm.com>
Signed-off-by: James Morris <jmorris@namei.org>
---
 arch/alpha/kernel/asm-offsets.c  |  11 +-
 arch/alpha/kernel/entry.S        |  10 +-
 arch/ia64/ia32/sys_ia32.c        |   8 +-
 arch/mips/kernel/kspd.c          |   4 +-
 arch/s390/kernel/compat_linux.c  |  28 ++---
 drivers/connector/cn_proc.c      |   8 +-
 fs/binfmt_elf.c                  |  12 +-
 fs/binfmt_elf_fdpic.c            |  12 +-
 fs/exec.c                        |   4 +-
 fs/fcntl.c                       |   4 +-
 fs/file_table.c                  |   4 +-
 fs/fuse/dir.c                    |  12 +-
 fs/hugetlbfs/inode.c             |   4 +-
 fs/ioprio.c                      |  12 +-
 fs/nfsd/auth.c                   |  22 ++--
 fs/nfsd/nfs4recover.c            |  12 +-
 fs/nfsd/nfsfh.c                  |   6 +-
 fs/open.c                        |  17 +--
 fs/proc/array.c                  |  18 +--
 fs/proc/base.c                   |  16 +--
 fs/xfs/linux-2.6/xfs_cred.h      |   6 +-
 fs/xfs/linux-2.6/xfs_globals.h   |   2 +-
 fs/xfs/xfs_inode.h               |   2 +-
 fs/xfs/xfs_vnodeops.h            |  10 +-
 include/linux/cred.h             | 155 +++++++++++++++++++----
 include/linux/init_task.h        |  24 ++--
 include/linux/sched.h            |  52 +-------
 include/linux/securebits.h       |   2 +-
 ipc/mqueue.c                     |   2 +-
 ipc/shm.c                        |   4 +-
 kernel/auditsc.c                 |  52 ++++----
 kernel/capability.c              |   4 +-
 kernel/cgroup.c                  |   4 +-
 kernel/exit.c                    |  10 +-
 kernel/fork.c                    |  24 ++--
 kernel/futex.c                   |   6 +-
 kernel/futex_compat.c            |   5 +-
 kernel/ptrace.c                  |  19 +--
 kernel/sched.c                   |  10 +-
 kernel/signal.c                  |  16 +--
 kernel/sys.c                     | 266 ++++++++++++++++++++++-----------------
 kernel/trace/trace.c             |   2 +-
 kernel/tsacct.c                  |   4 +-
 kernel/uid16.c                   |  28 ++---
 kernel/user.c                    |   4 +-
 mm/mempolicy.c                   |  10 +-
 mm/migrate.c                     |  10 +-
 mm/oom_kill.c                    |   2 +-
 net/core/scm.c                   |  10 +-
 net/sunrpc/auth.c                |   2 +-
 security/commoncap.c             | 161 +++++++++++++-----------
 security/keys/keyctl.c           |  25 ++--
 security/keys/permission.c       |  11 +-
 security/keys/process_keys.c     |  98 ++++++++-------
 security/keys/request_key.c      |  18 +--
 security/keys/request_key_auth.c |  12 +-
 security/selinux/exports.c       |   2 +-
 security/selinux/hooks.c         | 116 ++++++++---------
 security/selinux/selinuxfs.c     |   2 +-
 security/selinux/xfrm.c          |   6 +-
 security/smack/smack_access.c    |   4 +-
 security/smack/smack_lsm.c       |  77 ++++++------
 security/smack/smackfs.c         |   6 +-
 63 files changed, 832 insertions(+), 677 deletions(-)

(limited to 'include')

diff --git a/arch/alpha/kernel/asm-offsets.c b/arch/alpha/kernel/asm-offsets.c
index 4b18cd94d59d..6ff8886e7e22 100644
--- a/arch/alpha/kernel/asm-offsets.c
+++ b/arch/alpha/kernel/asm-offsets.c
@@ -19,15 +19,18 @@ void foo(void)
 	BLANK();
 
         DEFINE(TASK_BLOCKED, offsetof(struct task_struct, blocked));
-        DEFINE(TASK_UID, offsetof(struct task_struct, uid));
-        DEFINE(TASK_EUID, offsetof(struct task_struct, euid));
-        DEFINE(TASK_GID, offsetof(struct task_struct, gid));
-        DEFINE(TASK_EGID, offsetof(struct task_struct, egid));
+        DEFINE(TASK_CRED, offsetof(struct task_struct, cred));
         DEFINE(TASK_REAL_PARENT, offsetof(struct task_struct, real_parent));
         DEFINE(TASK_GROUP_LEADER, offsetof(struct task_struct, group_leader));
         DEFINE(TASK_TGID, offsetof(struct task_struct, tgid));
         BLANK();
 
+        DEFINE(CRED_UID,  offsetof(struct cred, uid));
+        DEFINE(CRED_EUID, offsetof(struct cred, euid));
+        DEFINE(CRED_GID,  offsetof(struct cred, gid));
+        DEFINE(CRED_EGID, offsetof(struct cred, egid));
+        BLANK();
+
 	DEFINE(SIZEOF_PT_REGS, sizeof(struct pt_regs));
 	DEFINE(PT_PTRACED, PT_PTRACED);
 	DEFINE(CLONE_VM, CLONE_VM);
diff --git a/arch/alpha/kernel/entry.S b/arch/alpha/kernel/entry.S
index 5fc61e281ac7..f77345bc66a9 100644
--- a/arch/alpha/kernel/entry.S
+++ b/arch/alpha/kernel/entry.S
@@ -850,8 +850,9 @@ osf_getpriority:
 sys_getxuid:
 	.prologue 0
 	ldq	$2, TI_TASK($8)
-	ldl	$0, TASK_UID($2)
-	ldl	$1, TASK_EUID($2)
+	ldq	$3, TASK_CRED($2)
+	ldl	$0, CRED_UID($3)
+	ldl	$1, CRED_EUID($3)
 	stq	$1, 80($sp)
 	ret
 .end sys_getxuid
@@ -862,8 +863,9 @@ sys_getxuid:
 sys_getxgid:
 	.prologue 0
 	ldq	$2, TI_TASK($8)
-	ldl	$0, TASK_GID($2)
-	ldl	$1, TASK_EGID($2)
+	ldq	$3, TASK_CRED($2)
+	ldl	$0, CRED_GID($3)
+	ldl	$1, CRED_EGID($3)
 	stq	$1, 80($sp)
 	ret
 .end sys_getxgid
diff --git a/arch/ia64/ia32/sys_ia32.c b/arch/ia64/ia32/sys_ia32.c
index 5e92ae00bdbb..2445a9d3488e 100644
--- a/arch/ia64/ia32/sys_ia32.c
+++ b/arch/ia64/ia32/sys_ia32.c
@@ -1772,20 +1772,20 @@ sys32_getgroups16 (int gidsetsize, short __user *grouplist)
 	if (gidsetsize < 0)
 		return -EINVAL;
 
-	get_group_info(current->group_info);
-	i = current->group_info->ngroups;
+	get_group_info(current->cred->group_info);
+	i = current->cred->group_info->ngroups;
 	if (gidsetsize) {
 		if (i > gidsetsize) {
 			i = -EINVAL;
 			goto out;
 		}
-		if (groups16_to_user(grouplist, current->group_info)) {
+		if (groups16_to_user(grouplist, current->cred->group_info)) {
 			i = -EFAULT;
 			goto out;
 		}
 	}
 out:
-	put_group_info(current->group_info);
+	put_group_info(current->cred->group_info);
 	return i;
 }
 
diff --git a/arch/mips/kernel/kspd.c b/arch/mips/kernel/kspd.c
index b0591ae0ce56..fd6e51224034 100644
--- a/arch/mips/kernel/kspd.c
+++ b/arch/mips/kernel/kspd.c
@@ -174,8 +174,8 @@ static unsigned int translate_open_flags(int flags)
 
 static void sp_setfsuidgid( uid_t uid, gid_t gid)
 {
-	current->fsuid = uid;
-	current->fsgid = gid;
+	current->cred->fsuid = uid;
+	current->cred->fsgid = gid;
 
 	key_fsuid_changed(current);
 	key_fsgid_changed(current);
diff --git a/arch/s390/kernel/compat_linux.c b/arch/s390/kernel/compat_linux.c
index 4646382af34f..6cc87d8c8682 100644
--- a/arch/s390/kernel/compat_linux.c
+++ b/arch/s390/kernel/compat_linux.c
@@ -148,9 +148,9 @@ asmlinkage long sys32_getresuid16(u16 __user *ruid, u16 __user *euid, u16 __user
 {
 	int retval;
 
-	if (!(retval = put_user(high2lowuid(current->uid), ruid)) &&
-	    !(retval = put_user(high2lowuid(current->euid), euid)))
-		retval = put_user(high2lowuid(current->suid), suid);
+	if (!(retval = put_user(high2lowuid(current->cred->uid), ruid)) &&
+	    !(retval = put_user(high2lowuid(current->cred->euid), euid)))
+		retval = put_user(high2lowuid(current->cred->suid), suid);
 
 	return retval;
 }
@@ -165,9 +165,9 @@ asmlinkage long sys32_getresgid16(u16 __user *rgid, u16 __user *egid, u16 __user
 {
 	int retval;
 
-	if (!(retval = put_user(high2lowgid(current->gid), rgid)) &&
-	    !(retval = put_user(high2lowgid(current->egid), egid)))
-		retval = put_user(high2lowgid(current->sgid), sgid);
+	if (!(retval = put_user(high2lowgid(current->cred->gid), rgid)) &&
+	    !(retval = put_user(high2lowgid(current->cred->egid), egid)))
+		retval = put_user(high2lowgid(current->cred->sgid), sgid);
 
 	return retval;
 }
@@ -217,20 +217,20 @@ asmlinkage long sys32_getgroups16(int gidsetsize, u16 __user *grouplist)
 	if (gidsetsize < 0)
 		return -EINVAL;
 
-	get_group_info(current->group_info);
-	i = current->group_info->ngroups;
+	get_group_info(current->cred->group_info);
+	i = current->cred->group_info->ngroups;
 	if (gidsetsize) {
 		if (i > gidsetsize) {
 			i = -EINVAL;
 			goto out;
 		}
-		if (groups16_to_user(grouplist, current->group_info)) {
+		if (groups16_to_user(grouplist, current->cred->group_info)) {
 			i = -EFAULT;
 			goto out;
 		}
 	}
 out:
-	put_group_info(current->group_info);
+	put_group_info(current->cred->group_info);
 	return i;
 }
 
@@ -261,22 +261,22 @@ asmlinkage long sys32_setgroups16(int gidsetsize, u16 __user *grouplist)
 
 asmlinkage long sys32_getuid16(void)
 {
-	return high2lowuid(current->uid);
+	return high2lowuid(current->cred->uid);
 }
 
 asmlinkage long sys32_geteuid16(void)
 {
-	return high2lowuid(current->euid);
+	return high2lowuid(current->cred->euid);
 }
 
 asmlinkage long sys32_getgid16(void)
 {
-	return high2lowgid(current->gid);
+	return high2lowgid(current->cred->gid);
 }
 
 asmlinkage long sys32_getegid16(void)
 {
-	return high2lowgid(current->egid);
+	return high2lowgid(current->cred->egid);
 }
 
 /*
diff --git a/drivers/connector/cn_proc.c b/drivers/connector/cn_proc.c
index 5c9f67f98d10..354c1ff17159 100644
--- a/drivers/connector/cn_proc.c
+++ b/drivers/connector/cn_proc.c
@@ -116,11 +116,11 @@ void proc_id_connector(struct task_struct *task, int which_id)
 	ev->event_data.id.process_pid = task->pid;
 	ev->event_data.id.process_tgid = task->tgid;
 	if (which_id == PROC_EVENT_UID) {
-	 	ev->event_data.id.r.ruid = task->uid;
-	 	ev->event_data.id.e.euid = task->euid;
+		ev->event_data.id.r.ruid = task->cred->uid;
+		ev->event_data.id.e.euid = task->cred->euid;
 	} else if (which_id == PROC_EVENT_GID) {
-	   	ev->event_data.id.r.rgid = task->gid;
-	   	ev->event_data.id.e.egid = task->egid;
+		ev->event_data.id.r.rgid = task->cred->gid;
+		ev->event_data.id.e.egid = task->cred->egid;
 	} else
 	     	return;
 	get_seq(&msg->seq, &ev->cpu);
diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c
index 8fcfa398d350..7a52477ce493 100644
--- a/fs/binfmt_elf.c
+++ b/fs/binfmt_elf.c
@@ -223,10 +223,10 @@ create_elf_tables(struct linux_binprm *bprm, struct elfhdr *exec,
 	NEW_AUX_ENT(AT_BASE, interp_load_addr);
 	NEW_AUX_ENT(AT_FLAGS, 0);
 	NEW_AUX_ENT(AT_ENTRY, exec->e_entry);
-	NEW_AUX_ENT(AT_UID, tsk->uid);
-	NEW_AUX_ENT(AT_EUID, tsk->euid);
-	NEW_AUX_ENT(AT_GID, tsk->gid);
-	NEW_AUX_ENT(AT_EGID, tsk->egid);
+	NEW_AUX_ENT(AT_UID, tsk->cred->uid);
+	NEW_AUX_ENT(AT_EUID, tsk->cred->euid);
+	NEW_AUX_ENT(AT_GID, tsk->cred->gid);
+	NEW_AUX_ENT(AT_EGID, tsk->cred->egid);
  	NEW_AUX_ENT(AT_SECURE, security_bprm_secureexec(bprm));
 	NEW_AUX_ENT(AT_EXECFN, bprm->exec);
 	if (k_platform) {
@@ -1388,8 +1388,8 @@ static int fill_psinfo(struct elf_prpsinfo *psinfo, struct task_struct *p,
 	psinfo->pr_zomb = psinfo->pr_sname == 'Z';
 	psinfo->pr_nice = task_nice(p);
 	psinfo->pr_flag = p->flags;
-	SET_UID(psinfo->pr_uid, p->uid);
-	SET_GID(psinfo->pr_gid, p->gid);
+	SET_UID(psinfo->pr_uid, p->cred->uid);
+	SET_GID(psinfo->pr_gid, p->cred->gid);
 	strncpy(psinfo->pr_fname, p->comm, sizeof(psinfo->pr_fname));
 	
 	return 0;
diff --git a/fs/binfmt_elf_fdpic.c b/fs/binfmt_elf_fdpic.c
index 488584c87512..9f67054c2c4e 100644
--- a/fs/binfmt_elf_fdpic.c
+++ b/fs/binfmt_elf_fdpic.c
@@ -623,10 +623,10 @@ static int create_elf_fdpic_tables(struct linux_binprm *bprm,
 	NEW_AUX_ENT(AT_BASE,	interp_params->elfhdr_addr);
 	NEW_AUX_ENT(AT_FLAGS,	0);
 	NEW_AUX_ENT(AT_ENTRY,	exec_params->entry_addr);
-	NEW_AUX_ENT(AT_UID,	(elf_addr_t) current_uid());
-	NEW_AUX_ENT(AT_EUID,	(elf_addr_t) current_euid());
-	NEW_AUX_ENT(AT_GID,	(elf_addr_t) current_gid());
-	NEW_AUX_ENT(AT_EGID,	(elf_addr_t) current_egid());
+	NEW_AUX_ENT(AT_UID,	(elf_addr_t) current->cred->uid);
+	NEW_AUX_ENT(AT_EUID,	(elf_addr_t) current->cred->euid);
+	NEW_AUX_ENT(AT_GID,	(elf_addr_t) current->cred->gid);
+	NEW_AUX_ENT(AT_EGID,	(elf_addr_t) current->cred->egid);
 	NEW_AUX_ENT(AT_SECURE,	security_bprm_secureexec(bprm));
 	NEW_AUX_ENT(AT_EXECFN,	bprm->exec);
 
@@ -1440,8 +1440,8 @@ static int fill_psinfo(struct elf_prpsinfo *psinfo, struct task_struct *p,
 	psinfo->pr_zomb = psinfo->pr_sname == 'Z';
 	psinfo->pr_nice = task_nice(p);
 	psinfo->pr_flag = p->flags;
-	SET_UID(psinfo->pr_uid, p->uid);
-	SET_GID(psinfo->pr_gid, p->gid);
+	SET_UID(psinfo->pr_uid, p->cred->uid);
+	SET_GID(psinfo->pr_gid, p->cred->gid);
 	strncpy(psinfo->pr_fname, p->comm, sizeof(psinfo->pr_fname));
 
 	return 0;
diff --git a/fs/exec.c b/fs/exec.c
index 604834f3b208..31149e430a89 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -1738,7 +1738,7 @@ int do_coredump(long signr, int exit_code, struct pt_regs * regs)
 	 */
 	if (get_dumpable(mm) == 2) {	/* Setuid core dump mode */
 		flag = O_EXCL;		/* Stop rewrite attacks */
-		current->fsuid = 0;	/* Dump root private */
+		current->cred->fsuid = 0;	/* Dump root private */
 	}
 
 	retval = coredump_wait(exit_code, &core_state);
@@ -1834,7 +1834,7 @@ fail_unlock:
 	if (helper_argv)
 		argv_free(helper_argv);
 
-	current->fsuid = fsuid;
+	current->cred->fsuid = fsuid;
 	coredump_finish(mm);
 fail:
 	return retval;
diff --git a/fs/fcntl.c b/fs/fcntl.c
index bf049a805e59..63964d863ad6 100644
--- a/fs/fcntl.c
+++ b/fs/fcntl.c
@@ -401,8 +401,8 @@ static inline int sigio_perm(struct task_struct *p,
                              struct fown_struct *fown, int sig)
 {
 	return (((fown->euid == 0) ||
-		 (fown->euid == p->suid) || (fown->euid == p->uid) ||
-		 (fown->uid == p->suid) || (fown->uid == p->uid)) &&
+		 (fown->euid == p->cred->suid) || (fown->euid == p->cred->uid) ||
+		 (fown->uid == p->cred->suid) || (fown->uid == p->cred->uid)) &&
 		!security_file_send_sigiotask(p, fown, sig));
 }
 
diff --git a/fs/file_table.c b/fs/file_table.c
index 5ad0eca6eea2..3152b53cfab0 100644
--- a/fs/file_table.c
+++ b/fs/file_table.c
@@ -122,8 +122,8 @@ struct file *get_empty_filp(void)
 	INIT_LIST_HEAD(&f->f_u.fu_list);
 	atomic_long_set(&f->f_count, 1);
 	rwlock_init(&f->f_owner.lock);
-	f->f_uid = tsk->fsuid;
-	f->f_gid = tsk->fsgid;
+	f->f_uid = tsk->cred->fsuid;
+	f->f_gid = tsk->cred->fsgid;
 	eventpoll_init_file(f);
 	/* f->f_version: 0 */
 	return f;
diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c
index fd03330cadeb..e97a98981862 100644
--- a/fs/fuse/dir.c
+++ b/fs/fuse/dir.c
@@ -872,12 +872,12 @@ int fuse_allow_task(struct fuse_conn *fc, struct task_struct *task)
 	if (fc->flags & FUSE_ALLOW_OTHER)
 		return 1;
 
-	if (task->euid == fc->user_id &&
-	    task->suid == fc->user_id &&
-	    task->uid == fc->user_id &&
-	    task->egid == fc->group_id &&
-	    task->sgid == fc->group_id &&
-	    task->gid == fc->group_id)
+	if (task->cred->euid == fc->user_id &&
+	    task->cred->suid == fc->user_id &&
+	    task->cred->uid == fc->user_id &&
+	    task->cred->egid == fc->group_id &&
+	    task->cred->sgid == fc->group_id &&
+	    task->cred->gid == fc->group_id)
 		return 1;
 
 	return 0;
diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c
index 08ad76c79b49..870a721b8bd2 100644
--- a/fs/hugetlbfs/inode.c
+++ b/fs/hugetlbfs/inode.c
@@ -958,7 +958,7 @@ struct file *hugetlb_file_setup(const char *name, size_t size)
 	if (!can_do_hugetlb_shm())
 		return ERR_PTR(-EPERM);
 
-	if (!user_shm_lock(size, current->user))
+	if (!user_shm_lock(size, current->cred->user))
 		return ERR_PTR(-ENOMEM);
 
 	root = hugetlbfs_vfsmount->mnt_root;
@@ -998,7 +998,7 @@ out_inode:
 out_dentry:
 	dput(dentry);
 out_shm_unlock:
-	user_shm_unlock(size, current->user);
+	user_shm_unlock(size, current->cred->user);
 	return ERR_PTR(error);
 }
 
diff --git a/fs/ioprio.c b/fs/ioprio.c
index 68d2cd807118..bb5210af77c2 100644
--- a/fs/ioprio.c
+++ b/fs/ioprio.c
@@ -32,8 +32,8 @@ static int set_task_ioprio(struct task_struct *task, int ioprio)
 	int err;
 	struct io_context *ioc;
 
-	if (task->uid != current_euid() &&
-	    task->uid != current_uid() && !capable(CAP_SYS_NICE))
+	if (task->cred->uid != current_euid() &&
+	    task->cred->uid != current_uid() && !capable(CAP_SYS_NICE))
 		return -EPERM;
 
 	err = security_task_setioprio(task, ioprio);
@@ -123,7 +123,7 @@ asmlinkage long sys_ioprio_set(int which, int who, int ioprio)
 			break;
 		case IOPRIO_WHO_USER:
 			if (!who)
-				user = current->user;
+				user = current->cred->user;
 			else
 				user = find_user(who);
 
@@ -131,7 +131,7 @@ asmlinkage long sys_ioprio_set(int which, int who, int ioprio)
 				break;
 
 			do_each_thread(g, p) {
-				if (p->uid != who)
+				if (p->cred->uid != who)
 					continue;
 				ret = set_task_ioprio(p, ioprio);
 				if (ret)
@@ -216,7 +216,7 @@ asmlinkage long sys_ioprio_get(int which, int who)
 			break;
 		case IOPRIO_WHO_USER:
 			if (!who)
-				user = current->user;
+				user = current->cred->user;
 			else
 				user = find_user(who);
 
@@ -224,7 +224,7 @@ asmlinkage long sys_ioprio_get(int which, int who)
 				break;
 
 			do_each_thread(g, p) {
-				if (p->uid != user->uid)
+				if (p->cred->uid != user->uid)
 					continue;
 				tmpio = get_task_ioprio(p);
 				if (tmpio < 0)
diff --git a/fs/nfsd/auth.c b/fs/nfsd/auth.c
index 294992e9bf69..808fc03a6fbd 100644
--- a/fs/nfsd/auth.c
+++ b/fs/nfsd/auth.c
@@ -27,6 +27,7 @@ int nfsexp_flags(struct svc_rqst *rqstp, struct svc_export *exp)
 
 int nfsd_setuser(struct svc_rqst *rqstp, struct svc_export *exp)
 {
+	struct cred *act_as = current->cred ;
 	struct svc_cred	cred = rqstp->rq_cred;
 	int i;
 	int flags = nfsexp_flags(rqstp, exp);
@@ -55,25 +56,26 @@ int nfsd_setuser(struct svc_rqst *rqstp, struct svc_export *exp)
 		get_group_info(cred.cr_group_info);
 
 	if (cred.cr_uid != (uid_t) -1)
-		current->fsuid = cred.cr_uid;
+		act_as->fsuid = cred.cr_uid;
 	else
-		current->fsuid = exp->ex_anon_uid;
+		act_as->fsuid = exp->ex_anon_uid;
 	if (cred.cr_gid != (gid_t) -1)
-		current->fsgid = cred.cr_gid;
+		act_as->fsgid = cred.cr_gid;
 	else
-		current->fsgid = exp->ex_anon_gid;
+		act_as->fsgid = exp->ex_anon_gid;
 
 	if (!cred.cr_group_info)
 		return -ENOMEM;
-	ret = set_current_groups(cred.cr_group_info);
+	ret = set_groups(act_as, cred.cr_group_info);
 	put_group_info(cred.cr_group_info);
 	if ((cred.cr_uid)) {
-		current->cap_effective =
-			cap_drop_nfsd_set(current->cap_effective);
+		act_as->cap_effective =
+			cap_drop_nfsd_set(act_as->cap_effective);
 	} else {
-		current->cap_effective =
-			cap_raise_nfsd_set(current->cap_effective,
-					   current->cap_permitted);
+		act_as->cap_effective =
+			cap_raise_nfsd_set(act_as->cap_effective,
+					   act_as->cap_permitted);
 	}
 	return ret;
 }
+
diff --git a/fs/nfsd/nfs4recover.c b/fs/nfsd/nfs4recover.c
index bb93946ace22..a5e14e8695ea 100644
--- a/fs/nfsd/nfs4recover.c
+++ b/fs/nfsd/nfs4recover.c
@@ -57,17 +57,17 @@ static int rec_dir_init = 0;
 static void
 nfs4_save_user(uid_t *saveuid, gid_t *savegid)
 {
-	*saveuid = current->fsuid;
-	*savegid = current->fsgid;
-	current->fsuid = 0;
-	current->fsgid = 0;
+	*saveuid = current->cred->fsuid;
+	*savegid = current->cred->fsgid;
+	current->cred->fsuid = 0;
+	current->cred->fsgid = 0;
 }
 
 static void
 nfs4_reset_user(uid_t saveuid, gid_t savegid)
 {
-	current->fsuid = saveuid;
-	current->fsgid = savegid;
+	current->cred->fsuid = saveuid;
+	current->cred->fsgid = savegid;
 }
 
 static void
diff --git a/fs/nfsd/nfsfh.c b/fs/nfsd/nfsfh.c
index cd25d91895a1..e67cfaea0865 100644
--- a/fs/nfsd/nfsfh.c
+++ b/fs/nfsd/nfsfh.c
@@ -186,9 +186,9 @@ static __be32 nfsd_set_fh_dentry(struct svc_rqst *rqstp, struct svc_fh *fhp)
 		 * access control settings being in effect, we cannot
 		 * fix that case easily.
 		 */
-		current->cap_effective =
-			cap_raise_nfsd_set(current->cap_effective,
-					   current->cap_permitted);
+		current->cred->cap_effective =
+			cap_raise_nfsd_set(current->cred->cap_effective,
+					   current->cred->cap_permitted);
 	} else {
 		error = nfsd_setuser_and_check_port(rqstp, exp);
 		if (error)
diff --git a/fs/open.c b/fs/open.c
index 500cc0c54762..b1238e195e7e 100644
--- a/fs/open.c
+++ b/fs/open.c
@@ -425,6 +425,7 @@ out:
  */
 asmlinkage long sys_faccessat(int dfd, const char __user *filename, int mode)
 {
+	struct cred *cred = current->cred;
 	struct path path;
 	struct inode *inode;
 	int old_fsuid, old_fsgid;
@@ -434,18 +435,18 @@ asmlinkage long sys_faccessat(int dfd, const char __user *filename, int mode)
 	if (mode & ~S_IRWXO)	/* where's F_OK, X_OK, W_OK, R_OK? */
 		return -EINVAL;
 
-	old_fsuid = current->fsuid;
-	old_fsgid = current->fsgid;
+	old_fsuid = cred->fsuid;
+	old_fsgid = cred->fsgid;
 
-	current->fsuid = current->uid;
-	current->fsgid = current->gid;
+	cred->fsuid = cred->uid;
+	cred->fsgid = cred->gid;
 
 	if (!issecure(SECURE_NO_SETUID_FIXUP)) {
 		/* Clear the capabilities if we switch to a non-root user */
-		if (current->uid)
+		if (current->cred->uid)
 			old_cap = cap_set_effective(__cap_empty_set);
 		else
-			old_cap = cap_set_effective(current->cap_permitted);
+			old_cap = cap_set_effective(cred->cap_permitted);
 	}
 
 	res = user_path_at(dfd, filename, LOOKUP_FOLLOW, &path);
@@ -484,8 +485,8 @@ asmlinkage long sys_faccessat(int dfd, const char __user *filename, int mode)
 out_path_release:
 	path_put(&path);
 out:
-	current->fsuid = old_fsuid;
-	current->fsgid = old_fsgid;
+	cred->fsuid = old_fsuid;
+	cred->fsgid = old_fsgid;
 
 	if (!issecure(SECURE_NO_SETUID_FIXUP))
 		cap_set_effective(old_cap);
diff --git a/fs/proc/array.c b/fs/proc/array.c
index 6af7fba7abb1..62fe9b2009b6 100644
--- a/fs/proc/array.c
+++ b/fs/proc/array.c
@@ -182,8 +182,8 @@ static inline void task_state(struct seq_file *m, struct pid_namespace *ns,
 		task_tgid_nr_ns(p, ns),
 		pid_nr_ns(pid, ns),
 		ppid, tpid,
-		p->uid, p->euid, p->suid, p->fsuid,
-		p->gid, p->egid, p->sgid, p->fsgid);
+		p->cred->uid, p->cred->euid, p->cred->suid, p->cred->fsuid,
+		p->cred->gid, p->cred->egid, p->cred->sgid, p->cred->fsgid);
 
 	task_lock(p);
 	if (p->files)
@@ -194,7 +194,7 @@ static inline void task_state(struct seq_file *m, struct pid_namespace *ns,
 		fdt ? fdt->max_fds : 0);
 	rcu_read_unlock();
 
-	group_info = p->group_info;
+	group_info = p->cred->group_info;
 	get_group_info(group_info);
 	task_unlock(p);
 
@@ -262,7 +262,7 @@ static inline void task_sig(struct seq_file *m, struct task_struct *p)
 		blocked = p->blocked;
 		collect_sigign_sigcatch(p, &ignored, &caught);
 		num_threads = atomic_read(&p->signal->count);
-		qsize = atomic_read(&p->user->sigpending);
+		qsize = atomic_read(&p->cred->user->sigpending);
 		qlim = p->signal->rlim[RLIMIT_SIGPENDING].rlim_cur;
 		unlock_task_sighand(p, &flags);
 	}
@@ -293,10 +293,12 @@ static void render_cap_t(struct seq_file *m, const char *header,
 
 static inline void task_cap(struct seq_file *m, struct task_struct *p)
 {
-	render_cap_t(m, "CapInh:\t", &p->cap_inheritable);
-	render_cap_t(m, "CapPrm:\t", &p->cap_permitted);
-	render_cap_t(m, "CapEff:\t", &p->cap_effective);
-	render_cap_t(m, "CapBnd:\t", &p->cap_bset);
+	struct cred *cred = p->cred;
+
+	render_cap_t(m, "CapInh:\t", &cred->cap_inheritable);
+	render_cap_t(m, "CapPrm:\t", &cred->cap_permitted);
+	render_cap_t(m, "CapEff:\t", &cred->cap_effective);
+	render_cap_t(m, "CapBnd:\t", &cred->cap_bset);
 }
 
 static inline void task_context_switch_counts(struct seq_file *m,
diff --git a/fs/proc/base.c b/fs/proc/base.c
index 486cf3fe7139..6862b360c36c 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -1428,8 +1428,8 @@ static struct inode *proc_pid_make_inode(struct super_block * sb, struct task_st
 	inode->i_uid = 0;
 	inode->i_gid = 0;
 	if (task_dumpable(task)) {
-		inode->i_uid = task->euid;
-		inode->i_gid = task->egid;
+		inode->i_uid = task->cred->euid;
+		inode->i_gid = task->cred->egid;
 	}
 	security_task_to_inode(task, inode);
 
@@ -1454,8 +1454,8 @@ static int pid_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat
 	if (task) {
 		if ((inode->i_mode == (S_IFDIR|S_IRUGO|S_IXUGO)) ||
 		    task_dumpable(task)) {
-			stat->uid = task->euid;
-			stat->gid = task->egid;
+			stat->uid = task->cred->euid;
+			stat->gid = task->cred->egid;
 		}
 	}
 	rcu_read_unlock();
@@ -1486,8 +1486,8 @@ static int pid_revalidate(struct dentry *dentry, struct nameidata *nd)
 	if (task) {
 		if ((inode->i_mode == (S_IFDIR|S_IRUGO|S_IXUGO)) ||
 		    task_dumpable(task)) {
-			inode->i_uid = task->euid;
-			inode->i_gid = task->egid;
+			inode->i_uid = task->cred->euid;
+			inode->i_gid = task->cred->egid;
 		} else {
 			inode->i_uid = 0;
 			inode->i_gid = 0;
@@ -1658,8 +1658,8 @@ static int tid_fd_revalidate(struct dentry *dentry, struct nameidata *nd)
 				rcu_read_unlock();
 				put_files_struct(files);
 				if (task_dumpable(task)) {
-					inode->i_uid = task->euid;
-					inode->i_gid = task->egid;
+					inode->i_uid = task->cred->euid;
+					inode->i_gid = task->cred->egid;
 				} else {
 					inode->i_uid = 0;
 					inode->i_gid = 0;
diff --git a/fs/xfs/linux-2.6/xfs_cred.h b/fs/xfs/linux-2.6/xfs_cred.h
index 293043a5573a..8c022cd0ad67 100644
--- a/fs/xfs/linux-2.6/xfs_cred.h
+++ b/fs/xfs/linux-2.6/xfs_cred.h
@@ -23,11 +23,9 @@
 /*
  * Credentials
  */
-typedef struct cred {
-       /* EMPTY */
-} cred_t;
+typedef const struct cred cred_t;
 
-extern struct cred *sys_cred;
+extern cred_t *sys_cred;
 
 /* this is a hack.. (assumes sys_cred is the only cred_t in the system) */
 static inline int capable_cred(cred_t *cr, int cid)
diff --git a/fs/xfs/linux-2.6/xfs_globals.h b/fs/xfs/linux-2.6/xfs_globals.h
index 2770b0085ee8..6eda8a3eb6f1 100644
--- a/fs/xfs/linux-2.6/xfs_globals.h
+++ b/fs/xfs/linux-2.6/xfs_globals.h
@@ -19,6 +19,6 @@
 #define __XFS_GLOBALS_H__
 
 extern uint64_t	xfs_panic_mask;		/* set to cause more panics */
-extern struct cred *sys_cred;
+extern cred_t *sys_cred;
 
 #endif	/* __XFS_GLOBALS_H__ */
diff --git a/fs/xfs/xfs_inode.h b/fs/xfs/xfs_inode.h
index 1420c49674d7..6be310d41daf 100644
--- a/fs/xfs/xfs_inode.h
+++ b/fs/xfs/xfs_inode.h
@@ -497,7 +497,7 @@ int		xfs_iread(struct xfs_mount *, struct xfs_trans *, xfs_ino_t,
 			  xfs_inode_t **, xfs_daddr_t, uint);
 int		xfs_iread_extents(struct xfs_trans *, xfs_inode_t *, int);
 int		xfs_ialloc(struct xfs_trans *, xfs_inode_t *, mode_t,
-			   xfs_nlink_t, xfs_dev_t, struct cred *, xfs_prid_t,
+			   xfs_nlink_t, xfs_dev_t, cred_t *, xfs_prid_t,
 			   int, struct xfs_buf **, boolean_t *, xfs_inode_t **);
 void		xfs_dinode_from_disk(struct xfs_icdinode *,
 				     struct xfs_dinode_core *);
diff --git a/fs/xfs/xfs_vnodeops.h b/fs/xfs/xfs_vnodeops.h
index e932a96bec54..7b0c2ab88333 100644
--- a/fs/xfs/xfs_vnodeops.h
+++ b/fs/xfs/xfs_vnodeops.h
@@ -16,7 +16,7 @@ struct xfs_iomap;
 
 int xfs_open(struct xfs_inode *ip);
 int xfs_setattr(struct xfs_inode *ip, struct iattr *vap, int flags,
-		struct cred *credp);
+		cred_t *credp);
 #define	XFS_ATTR_DMI		0x01	/* invocation from a DMI function */
 #define	XFS_ATTR_NONBLOCK	0x02	/* return EAGAIN if operation would block */
 #define XFS_ATTR_NOLOCK		0x04	/* Don't grab any conflicting locks */
@@ -28,24 +28,24 @@ int xfs_inactive(struct xfs_inode *ip);
 int xfs_lookup(struct xfs_inode *dp, struct xfs_name *name,
 		struct xfs_inode **ipp, struct xfs_name *ci_name);
 int xfs_create(struct xfs_inode *dp, struct xfs_name *name, mode_t mode,
-		xfs_dev_t rdev, struct xfs_inode **ipp, struct cred *credp);
+		xfs_dev_t rdev, struct xfs_inode **ipp, cred_t *credp);
 int xfs_remove(struct xfs_inode *dp, struct xfs_name *name,
 		struct xfs_inode *ip);
 int xfs_link(struct xfs_inode *tdp, struct xfs_inode *sip,
 		struct xfs_name *target_name);
 int xfs_mkdir(struct xfs_inode *dp, struct xfs_name *dir_name,
-		mode_t mode, struct xfs_inode **ipp, struct cred *credp);
+		mode_t mode, struct xfs_inode **ipp, cred_t *credp);
 int xfs_readdir(struct xfs_inode	*dp, void *dirent, size_t bufsize,
 		       xfs_off_t *offset, filldir_t filldir);
 int xfs_symlink(struct xfs_inode *dp, struct xfs_name *link_name,
 		const char *target_path, mode_t mode, struct xfs_inode **ipp,
-		struct cred *credp);
+		cred_t *credp);
 int xfs_inode_flush(struct xfs_inode *ip, int flags);
 int xfs_set_dmattrs(struct xfs_inode *ip, u_int evmask, u_int16_t state);
 int xfs_reclaim(struct xfs_inode *ip);
 int xfs_change_file_space(struct xfs_inode *ip, int cmd,
 		xfs_flock64_t *bf, xfs_off_t offset,
-		struct cred *credp, int	attr_flags);
+		cred_t *credp, int	attr_flags);
 int xfs_rename(struct xfs_inode *src_dp, struct xfs_name *src_name,
 		struct xfs_inode *src_ip, struct xfs_inode *target_dp,
 		struct xfs_name *target_name, struct xfs_inode *target_ip);
diff --git a/include/linux/cred.h b/include/linux/cred.h
index b69222cc1fd2..3e65587a72e5 100644
--- a/include/linux/cred.h
+++ b/include/linux/cred.h
@@ -12,39 +12,150 @@
 #ifndef _LINUX_CRED_H
 #define _LINUX_CRED_H
 
-#define get_current_user()	(get_uid(current->user))
-
-#define task_uid(task)		((task)->uid)
-#define task_gid(task)		((task)->gid)
-#define task_euid(task)		((task)->euid)
-#define task_egid(task)		((task)->egid)
-
-#define current_uid()		(current->uid)
-#define current_gid()		(current->gid)
-#define current_euid()		(current->euid)
-#define current_egid()		(current->egid)
-#define current_suid()		(current->suid)
-#define current_sgid()		(current->sgid)
-#define current_fsuid()		(current->fsuid)
-#define current_fsgid()		(current->fsgid)
-#define current_cap()		(current->cap_effective)
+#include <linux/capability.h>
+#include <linux/key.h>
+#include <asm/atomic.h>
+
+struct user_struct;
+struct cred;
+
+/*
+ * COW Supplementary groups list
+ */
+#define NGROUPS_SMALL		32
+#define NGROUPS_PER_BLOCK	((unsigned int)(PAGE_SIZE / sizeof(gid_t)))
+
+struct group_info {
+	atomic_t	usage;
+	int		ngroups;
+	int		nblocks;
+	gid_t		small_block[NGROUPS_SMALL];
+	gid_t		*blocks[0];
+};
+
+/**
+ * get_group_info - Get a reference to a group info structure
+ * @group_info: The group info to reference
+ *
+ * This must be called with the owning task locked (via task_lock()) when task
+ * != current.  The reason being that the vast majority of callers are looking
+ * at current->group_info, which can not be changed except by the current task.
+ * Changing current->group_info requires the task lock, too.
+ */
+#define get_group_info(group_info)		\
+do {						\
+	atomic_inc(&(group_info)->usage);	\
+} while (0)
+
+/**
+ * put_group_info - Release a reference to a group info structure
+ * @group_info: The group info to release
+ */
+#define put_group_info(group_info)			\
+do {							\
+	if (atomic_dec_and_test(&(group_info)->usage))	\
+		groups_free(group_info);		\
+} while (0)
+
+extern struct group_info *groups_alloc(int);
+extern void groups_free(struct group_info *);
+extern int set_current_groups(struct group_info *);
+extern int set_groups(struct cred *, struct group_info *);
+extern int groups_search(struct group_info *, gid_t);
+
+/* access the groups "array" with this macro */
+#define GROUP_AT(gi, i) \
+	((gi)->blocks[(i) / NGROUPS_PER_BLOCK][(i) % NGROUPS_PER_BLOCK])
+
+extern int in_group_p(gid_t);
+extern int in_egroup_p(gid_t);
+
+/*
+ * The security context of a task
+ *
+ * The parts of the context break down into two categories:
+ *
+ *  (1) The objective context of a task.  These parts are used when some other
+ *	task is attempting to affect this one.
+ *
+ *  (2) The subjective context.  These details are used when the task is acting
+ *	upon another object, be that a file, a task, a key or whatever.
+ *
+ * Note that some members of this structure belong to both categories - the
+ * LSM security pointer for instance.
+ *
+ * A task has two security pointers.  task->real_cred points to the objective
+ * context that defines that task's actual details.  The objective part of this
+ * context is used whenever that task is acted upon.
+ *
+ * task->cred points to the subjective context that defines the details of how
+ * that task is going to act upon another object.  This may be overridden
+ * temporarily to point to another security context, but normally points to the
+ * same context as task->real_cred.
+ */
+struct cred {
+	atomic_t	usage;
+	uid_t		uid;		/* real UID of the task */
+	gid_t		gid;		/* real GID of the task */
+	uid_t		suid;		/* saved UID of the task */
+	gid_t		sgid;		/* saved GID of the task */
+	uid_t		euid;		/* effective UID of the task */
+	gid_t		egid;		/* effective GID of the task */
+	uid_t		fsuid;		/* UID for VFS ops */
+	gid_t		fsgid;		/* GID for VFS ops */
+	unsigned	securebits;	/* SUID-less security management */
+	kernel_cap_t	cap_inheritable; /* caps our children can inherit */
+	kernel_cap_t	cap_permitted;	/* caps we're permitted */
+	kernel_cap_t	cap_effective;	/* caps we can actually use */
+	kernel_cap_t	cap_bset;	/* capability bounding set */
+#ifdef CONFIG_KEYS
+	unsigned char	jit_keyring;	/* default keyring to attach requested
+					 * keys to */
+	struct key	*thread_keyring; /* keyring private to this thread */
+	struct key	*request_key_auth; /* assumed request_key authority */
+#endif
+#ifdef CONFIG_SECURITY
+	void		*security;	/* subjective LSM security */
+#endif
+	struct user_struct *user;	/* real user ID subscription */
+	struct group_info *group_info;	/* supplementary groups for euid/fsgid */
+	struct rcu_head	rcu;		/* RCU deletion hook */
+	spinlock_t	lock;		/* lock for pointer changes */
+};
+
+#define get_current_user()	(get_uid(current->cred->user))
+
+#define task_uid(task)		((task)->cred->uid)
+#define task_gid(task)		((task)->cred->gid)
+#define task_euid(task)		((task)->cred->euid)
+#define task_egid(task)		((task)->cred->egid)
+
+#define current_uid()		(current->cred->uid)
+#define current_gid()		(current->cred->gid)
+#define current_euid()		(current->cred->euid)
+#define current_egid()		(current->cred->egid)
+#define current_suid()		(current->cred->suid)
+#define current_sgid()		(current->cred->sgid)
+#define current_fsuid()		(current->cred->fsuid)
+#define current_fsgid()		(current->cred->fsgid)
+#define current_cap()		(current->cred->cap_effective)
 
 #define current_uid_gid(_uid, _gid)		\
 do {						\
-	*(_uid) = current->uid;			\
-	*(_gid) = current->gid;			\
+	*(_uid) = current->cred->uid;		\
+	*(_gid) = current->cred->gid;		\
 } while(0)
 
 #define current_euid_egid(_uid, _gid)		\
 do {						\
-	*(_uid) = current->euid;		\
-	*(_gid) = current->egid;		\
+	*(_uid) = current->cred->euid;		\
+	*(_gid) = current->cred->egid;		\
 } while(0)
 
 #define current_fsuid_fsgid(_uid, _gid)		\
 do {						\
-	*(_uid) = current->fsuid;		\
-	*(_gid) = current->fsgid;		\
+	*(_uid) = current->cred->fsuid;		\
+	*(_gid) = current->cred->fsgid;		\
 } while(0)
 
 #endif /* _LINUX_CRED_H */
diff --git a/include/linux/init_task.h b/include/linux/init_task.h
index 23fd8909b9e5..9de41ccd67b5 100644
--- a/include/linux/init_task.h
+++ b/include/linux/init_task.h
@@ -113,6 +113,21 @@ extern struct group_info init_groups;
 # define CAP_INIT_BSET  CAP_INIT_EFF_SET
 #endif
 
+extern struct cred init_cred;
+
+#define INIT_CRED(p)						\
+{								\
+	.usage			= ATOMIC_INIT(3),		\
+	.securebits		= SECUREBITS_DEFAULT,		\
+	.cap_inheritable	= CAP_INIT_INH_SET,		\
+	.cap_permitted		= CAP_FULL_SET,			\
+	.cap_effective		= CAP_INIT_EFF_SET,		\
+	.cap_bset		= CAP_INIT_BSET,		\
+	.user			= INIT_USER,			\
+	.group_info		= &init_groups,			\
+	.lock			= __SPIN_LOCK_UNLOCKED(p.lock),	\
+}
+
 /*
  *  INIT_TASK is used to set up the first task table, touch at
  * your own risk!. Base=0, limit=0x1fffff (=2MB)
@@ -147,13 +162,8 @@ extern struct group_info init_groups;
 	.children	= LIST_HEAD_INIT(tsk.children),			\
 	.sibling	= LIST_HEAD_INIT(tsk.sibling),			\
 	.group_leader	= &tsk,						\
-	.group_info	= &init_groups,					\
-	.cap_effective	= CAP_INIT_EFF_SET,				\
-	.cap_inheritable = CAP_INIT_INH_SET,				\
-	.cap_permitted	= CAP_FULL_SET,					\
-	.cap_bset 	= CAP_INIT_BSET,				\
-	.securebits     = SECUREBITS_DEFAULT,				\
-	.user		= INIT_USER,					\
+	.__temp_cred	= INIT_CRED(tsk.__temp_cred),			\
+	.cred		= &tsk.__temp_cred,				\
 	.comm		= "swapper",					\
 	.thread		= INIT_THREAD,					\
 	.fs		= &init_fs,					\
diff --git a/include/linux/sched.h b/include/linux/sched.h
index b483f39a7112..c8b92502354d 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -660,6 +660,7 @@ extern struct user_struct *find_user(uid_t);
 extern struct user_struct root_user;
 #define INIT_USER (&root_user)
 
+
 struct backing_dev_info;
 struct reclaim_state;
 
@@ -883,38 +884,7 @@ partition_sched_domains(int ndoms_new, cpumask_t *doms_new,
 #endif	/* !CONFIG_SMP */
 
 struct io_context;			/* See blkdev.h */
-#define NGROUPS_SMALL		32
-#define NGROUPS_PER_BLOCK	((unsigned int)(PAGE_SIZE / sizeof(gid_t)))
-struct group_info {
-	int ngroups;
-	atomic_t usage;
-	gid_t small_block[NGROUPS_SMALL];
-	int nblocks;
-	gid_t *blocks[0];
-};
-
-/*
- * get_group_info() must be called with the owning task locked (via task_lock())
- * when task != current.  The reason being that the vast majority of callers are
- * looking at current->group_info, which can not be changed except by the
- * current task.  Changing current->group_info requires the task lock, too.
- */
-#define get_group_info(group_info) do { \
-	atomic_inc(&(group_info)->usage); \
-} while (0)
 
-#define put_group_info(group_info) do { \
-	if (atomic_dec_and_test(&(group_info)->usage)) \
-		groups_free(group_info); \
-} while (0)
-
-extern struct group_info *groups_alloc(int gidsetsize);
-extern void groups_free(struct group_info *group_info);
-extern int set_current_groups(struct group_info *group_info);
-extern int groups_search(struct group_info *group_info, gid_t grp);
-/* access the groups "array" with this macro */
-#define GROUP_AT(gi, i) \
-    ((gi)->blocks[(i)/NGROUPS_PER_BLOCK][(i)%NGROUPS_PER_BLOCK])
 
 #ifdef ARCH_HAS_PREFETCH_SWITCH_STACK
 extern void prefetch_stack(struct task_struct *t);
@@ -1181,17 +1151,9 @@ struct task_struct {
 	struct list_head cpu_timers[3];
 
 /* process credentials */
-	uid_t uid,euid,suid,fsuid;
-	gid_t gid,egid,sgid,fsgid;
-	struct group_info *group_info;
-	kernel_cap_t   cap_effective, cap_inheritable, cap_permitted, cap_bset;
-	struct user_struct *user;
-	unsigned securebits;
-#ifdef CONFIG_KEYS
-	unsigned char jit_keyring;	/* default keyring to attach requested keys to */
-	struct key *request_key_auth;	/* assumed request_key authority */
-	struct key *thread_keyring;	/* keyring private to this thread */
-#endif
+	struct cred __temp_cred __deprecated; /* temporary credentials to be removed */
+	struct cred *cred;	/* actual/objective task credentials */
+
 	char comm[TASK_COMM_LEN]; /* executable name excluding path
 				     - access with [gs]et_task_comm (which lock
 				       it with task_lock())
@@ -1228,9 +1190,6 @@ struct task_struct {
 	int (*notifier)(void *priv);
 	void *notifier_data;
 	sigset_t *notifier_mask;
-#ifdef CONFIG_SECURITY
-	void *security;
-#endif
 	struct audit_context *audit_context;
 #ifdef CONFIG_AUDITSYSCALL
 	uid_t loginuid;
@@ -1787,9 +1746,6 @@ extern void wake_up_new_task(struct task_struct *tsk,
 extern void sched_fork(struct task_struct *p, int clone_flags);
 extern void sched_dead(struct task_struct *p);
 
-extern int in_group_p(gid_t);
-extern int in_egroup_p(gid_t);
-
 extern void proc_caches_init(void);
 extern void flush_signals(struct task_struct *);
 extern void ignore_signals(struct task_struct *);
diff --git a/include/linux/securebits.h b/include/linux/securebits.h
index 92f09bdf1175..6d389491bfa2 100644
--- a/include/linux/securebits.h
+++ b/include/linux/securebits.h
@@ -32,7 +32,7 @@
    setting is locked or not. A setting which is locked cannot be
    changed from user-level. */
 #define issecure_mask(X)	(1 << (X))
-#define issecure(X)		(issecure_mask(X) & current->securebits)
+#define issecure(X)		(issecure_mask(X) & current->cred->securebits)
 
 #define SECURE_ALL_BITS		(issecure_mask(SECURE_NOROOT) | \
 				 issecure_mask(SECURE_NO_SETUID_FIXUP) | \
diff --git a/ipc/mqueue.c b/ipc/mqueue.c
index abda5991d7e3..e1885b494bac 100644
--- a/ipc/mqueue.c
+++ b/ipc/mqueue.c
@@ -126,7 +126,7 @@ static struct inode *mqueue_get_inode(struct super_block *sb, int mode,
 		if (S_ISREG(mode)) {
 			struct mqueue_inode_info *info;
 			struct task_struct *p = current;
-			struct user_struct *u = p->user;
+			struct user_struct *u = p->cred->user;
 			unsigned long mq_bytes, mq_msg_tblsz;
 
 			inode->i_fop = &mqueue_file_operations;
diff --git a/ipc/shm.c b/ipc/shm.c
index 0c3debbe32d5..264a9d33c5dd 100644
--- a/ipc/shm.c
+++ b/ipc/shm.c
@@ -366,7 +366,7 @@ static int newseg(struct ipc_namespace *ns, struct ipc_params *params)
 	if (shmflg & SHM_HUGETLB) {
 		/* hugetlb_file_setup takes care of mlock user accounting */
 		file = hugetlb_file_setup(name, size);
-		shp->mlock_user = current->user;
+		shp->mlock_user = current->cred->user;
 	} else {
 		int acctflag = VM_ACCOUNT;
 		/*
@@ -767,7 +767,7 @@ asmlinkage long sys_shmctl(int shmid, int cmd, struct shmid_ds __user *buf)
 			goto out_unlock;
 		
 		if(cmd==SHM_LOCK) {
-			struct user_struct * user = current->user;
+			struct user_struct *user = current->cred->user;
 			if (!is_file_hugepages(shp->shm_file)) {
 				err = shmem_lock(shp->shm_file, 1, user);
 				if (!err && !(shp->shm_perm.mode & SHM_LOCKED)){
diff --git a/kernel/auditsc.c b/kernel/auditsc.c
index 9c7e47ae4576..2febf5165fad 100644
--- a/kernel/auditsc.c
+++ b/kernel/auditsc.c
@@ -447,6 +447,7 @@ static int audit_filter_rules(struct task_struct *tsk,
 			      struct audit_names *name,
 			      enum audit_state *state)
 {
+	struct cred *cred = tsk->cred;
 	int i, j, need_sid = 1;
 	u32 sid;
 
@@ -466,28 +467,28 @@ static int audit_filter_rules(struct task_struct *tsk,
 			}
 			break;
 		case AUDIT_UID:
-			result = audit_comparator(tsk->uid, f->op, f->val);
+			result = audit_comparator(cred->uid, f->op, f->val);
 			break;
 		case AUDIT_EUID:
-			result = audit_comparator(tsk->euid, f->op, f->val);
+			result = audit_comparator(cred->euid, f->op, f->val);
 			break;
 		case AUDIT_SUID:
-			result = audit_comparator(tsk->suid, f->op, f->val);
+			result = audit_comparator(cred->suid, f->op, f->val);
 			break;
 		case AUDIT_FSUID:
-			result = audit_comparator(tsk->fsuid, f->op, f->val);
+			result = audit_comparator(cred->fsuid, f->op, f->val);
 			break;
 		case AUDIT_GID:
-			result = audit_comparator(tsk->gid, f->op, f->val);
+			result = audit_comparator(cred->gid, f->op, f->val);
 			break;
 		case AUDIT_EGID:
-			result = audit_comparator(tsk->egid, f->op, f->val);
+			result = audit_comparator(cred->egid, f->op, f->val);
 			break;
 		case AUDIT_SGID:
-			result = audit_comparator(tsk->sgid, f->op, f->val);
+			result = audit_comparator(cred->sgid, f->op, f->val);
 			break;
 		case AUDIT_FSGID:
-			result = audit_comparator(tsk->fsgid, f->op, f->val);
+			result = audit_comparator(cred->fsgid, f->op, f->val);
 			break;
 		case AUDIT_PERS:
 			result = audit_comparator(tsk->personality, f->op, f->val);
@@ -1228,6 +1229,7 @@ static void audit_log_fcaps(struct audit_buffer *ab, struct audit_names *name)
 
 static void audit_log_exit(struct audit_context *context, struct task_struct *tsk)
 {
+	struct cred *cred = tsk->cred;
 	int i, call_panic = 0;
 	struct audit_buffer *ab;
 	struct audit_aux_data *aux;
@@ -1237,14 +1239,14 @@ static void audit_log_exit(struct audit_context *context, struct task_struct *ts
 	context->pid = tsk->pid;
 	if (!context->ppid)
 		context->ppid = sys_getppid();
-	context->uid = tsk->uid;
-	context->gid = tsk->gid;
-	context->euid = tsk->euid;
-	context->suid = tsk->suid;
-	context->fsuid = tsk->fsuid;
-	context->egid = tsk->egid;
-	context->sgid = tsk->sgid;
-	context->fsgid = tsk->fsgid;
+	context->uid = cred->uid;
+	context->gid = cred->gid;
+	context->euid = cred->euid;
+	context->suid = cred->suid;
+	context->fsuid = cred->fsuid;
+	context->egid = cred->egid;
+	context->sgid = cred->sgid;
+	context->fsgid = cred->fsgid;
 	context->personality = tsk->personality;
 
 	ab = audit_log_start(context, GFP_KERNEL, AUDIT_SYSCALL);
@@ -2086,7 +2088,7 @@ int audit_set_loginuid(struct task_struct *task, uid_t loginuid)
 			audit_log_format(ab, "login pid=%d uid=%u "
 				"old auid=%u new auid=%u"
 				" old ses=%u new ses=%u",
-				task->pid, task->uid,
+				task->pid, task->cred->uid,
 				task->loginuid, loginuid,
 				task->sessionid, sessionid);
 			audit_log_end(ab);
@@ -2469,7 +2471,7 @@ void __audit_ptrace(struct task_struct *t)
 
 	context->target_pid = t->pid;
 	context->target_auid = audit_get_loginuid(t);
-	context->target_uid = t->uid;
+	context->target_uid = t->cred->uid;
 	context->target_sessionid = audit_get_sessionid(t);
 	security_task_getsecid(t, &context->target_sid);
 	memcpy(context->target_comm, t->comm, TASK_COMM_LEN);
@@ -2495,7 +2497,7 @@ int __audit_signal_info(int sig, struct task_struct *t)
 			if (tsk->loginuid != -1)
 				audit_sig_uid = tsk->loginuid;
 			else
-				audit_sig_uid = tsk->uid;
+				audit_sig_uid = tsk->cred->uid;
 			security_task_getsecid(tsk, &audit_sig_sid);
 		}
 		if (!audit_signals || audit_dummy_context())
@@ -2507,7 +2509,7 @@ int __audit_signal_info(int sig, struct task_struct *t)
 	if (!ctx->target_pid) {
 		ctx->target_pid = t->tgid;
 		ctx->target_auid = audit_get_loginuid(t);
-		ctx->target_uid = t->uid;
+		ctx->target_uid = t->cred->uid;
 		ctx->target_sessionid = audit_get_sessionid(t);
 		security_task_getsecid(t, &ctx->target_sid);
 		memcpy(ctx->target_comm, t->comm, TASK_COMM_LEN);
@@ -2528,7 +2530,7 @@ int __audit_signal_info(int sig, struct task_struct *t)
 
 	axp->target_pid[axp->pid_count] = t->tgid;
 	axp->target_auid[axp->pid_count] = audit_get_loginuid(t);
-	axp->target_uid[axp->pid_count] = t->uid;
+	axp->target_uid[axp->pid_count] = t->cred->uid;
 	axp->target_sessionid[axp->pid_count] = audit_get_sessionid(t);
 	security_task_getsecid(t, &axp->target_sid[axp->pid_count]);
 	memcpy(axp->target_comm[axp->pid_count], t->comm, TASK_COMM_LEN);
@@ -2575,12 +2577,12 @@ void __audit_log_bprm_fcaps(struct linux_binprm *bprm, kernel_cap_t *pP, kernel_
 	ax->fcap_ver = (vcaps.magic_etc & VFS_CAP_REVISION_MASK) >> VFS_CAP_REVISION_SHIFT;
 
 	ax->old_pcap.permitted = *pP;
-	ax->old_pcap.inheritable = current->cap_inheritable;
+	ax->old_pcap.inheritable = current->cred->cap_inheritable;
 	ax->old_pcap.effective = *pE;
 
-	ax->new_pcap.permitted = current->cap_permitted;
-	ax->new_pcap.inheritable = current->cap_inheritable;
-	ax->new_pcap.effective = current->cap_effective;
+	ax->new_pcap.permitted = current->cred->cap_permitted;
+	ax->new_pcap.inheritable = current->cred->cap_inheritable;
+	ax->new_pcap.effective = current->cred->cap_effective;
 }
 
 /**
diff --git a/kernel/capability.c b/kernel/capability.c
index 58b00519624a..a404b980b1bd 100644
--- a/kernel/capability.c
+++ b/kernel/capability.c
@@ -171,8 +171,8 @@ kernel_cap_t cap_set_effective(const kernel_cap_t pE_new)
 
 	spin_lock(&task_capability_lock);
 
-	pE_old = current->cap_effective;
-	current->cap_effective = pE_new;
+	pE_old = current->cred->cap_effective;
+	current->cred->cap_effective = pE_new;
 
 	spin_unlock(&task_capability_lock);
 
diff --git a/kernel/cgroup.c b/kernel/cgroup.c
index 78f9b310c4f3..e210526e6401 100644
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -1293,7 +1293,9 @@ static int attach_task_by_pid(struct cgroup *cgrp, u64 pid)
 		rcu_read_unlock();
 
 		euid = current_euid();
-		if (euid && euid != tsk->uid && euid != tsk->suid) {
+		if (euid &&
+		    euid != tsk->cred->uid &&
+		    euid != tsk->cred->suid) {
 			put_task_struct(tsk);
 			return -EACCES;
 		}
diff --git a/kernel/exit.c b/kernel/exit.c
index 80137a5d9467..e0f6e1892fb9 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -160,7 +160,7 @@ void release_task(struct task_struct * p)
 	int zap_leader;
 repeat:
 	tracehook_prepare_release_task(p);
-	atomic_dec(&p->user->processes);
+	atomic_dec(&p->cred->user->processes);
 	proc_flush_task(p);
 	write_lock_irq(&tasklist_lock);
 	tracehook_finish_release_task(p);
@@ -1272,7 +1272,7 @@ static int wait_task_zombie(struct task_struct *p, int options,
 		return 0;
 
 	if (unlikely(options & WNOWAIT)) {
-		uid_t uid = p->uid;
+		uid_t uid = p->cred->uid;
 		int exit_code = p->exit_code;
 		int why, status;
 
@@ -1393,7 +1393,7 @@ static int wait_task_zombie(struct task_struct *p, int options,
 	if (!retval && infop)
 		retval = put_user(pid, &infop->si_pid);
 	if (!retval && infop)
-		retval = put_user(p->uid, &infop->si_uid);
+		retval = put_user(p->cred->uid, &infop->si_uid);
 	if (!retval)
 		retval = pid;
 
@@ -1458,7 +1458,7 @@ static int wait_task_stopped(int ptrace, struct task_struct *p,
 	if (!unlikely(options & WNOWAIT))
 		p->exit_code = 0;
 
-	uid = p->uid;
+	uid = p->cred->uid;
 unlock_sig:
 	spin_unlock_irq(&p->sighand->siglock);
 	if (!exit_code)
@@ -1535,7 +1535,7 @@ static int wait_task_continued(struct task_struct *p, int options,
 	spin_unlock_irq(&p->sighand->siglock);
 
 	pid = task_pid_vnr(p);
-	uid = p->uid;
+	uid = p->cred->uid;
 	get_task_struct(p);
 	read_unlock(&tasklist_lock);
 
diff --git a/kernel/fork.c b/kernel/fork.c
index f6083561dfe0..81fdc7733908 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -147,8 +147,8 @@ void __put_task_struct(struct task_struct *tsk)
 	WARN_ON(tsk == current);
 
 	security_task_free(tsk);
-	free_uid(tsk->user);
-	put_group_info(tsk->group_info);
+	free_uid(tsk->__temp_cred.user);
+	put_group_info(tsk->__temp_cred.group_info);
 	delayacct_tsk_free(tsk);
 
 	if (!profile_handoff_task(tsk))
@@ -969,17 +969,18 @@ static struct task_struct *copy_process(unsigned long clone_flags,
 	DEBUG_LOCKS_WARN_ON(!p->hardirqs_enabled);
 	DEBUG_LOCKS_WARN_ON(!p->softirqs_enabled);
 #endif
+	p->cred = &p->__temp_cred;
 	retval = -EAGAIN;
-	if (atomic_read(&p->user->processes) >=
+	if (atomic_read(&p->cred->user->processes) >=
 			p->signal->rlim[RLIMIT_NPROC].rlim_cur) {
 		if (!capable(CAP_SYS_ADMIN) && !capable(CAP_SYS_RESOURCE) &&
-		    p->user != current->nsproxy->user_ns->root_user)
+		    p->cred->user != current->nsproxy->user_ns->root_user)
 			goto bad_fork_free;
 	}
 
-	atomic_inc(&p->user->__count);
-	atomic_inc(&p->user->processes);
-	get_group_info(p->group_info);
+	atomic_inc(&p->cred->user->__count);
+	atomic_inc(&p->cred->user->processes);
+	get_group_info(p->cred->group_info);
 
 	/*
 	 * If multiple threads are within copy_process(), then this check
@@ -1035,9 +1036,8 @@ static struct task_struct *copy_process(unsigned long clone_flags,
 	p->real_start_time = p->start_time;
 	monotonic_to_bootbased(&p->real_start_time);
 #ifdef CONFIG_SECURITY
-	p->security = NULL;
+	p->cred->security = NULL;
 #endif
-	p->cap_bset = current->cap_bset;
 	p->io_context = NULL;
 	p->audit_context = NULL;
 	cgroup_fork(p);
@@ -1298,9 +1298,9 @@ bad_fork_cleanup_cgroup:
 bad_fork_cleanup_put_domain:
 	module_put(task_thread_info(p)->exec_domain->module);
 bad_fork_cleanup_count:
-	put_group_info(p->group_info);
-	atomic_dec(&p->user->processes);
-	free_uid(p->user);
+	put_group_info(p->cred->group_info);
+	atomic_dec(&p->cred->user->processes);
+	free_uid(p->cred->user);
 bad_fork_free:
 	free_task(p);
 fork_out:
diff --git a/kernel/futex.c b/kernel/futex.c
index e06962132aaf..28421d8210b8 100644
--- a/kernel/futex.c
+++ b/kernel/futex.c
@@ -443,7 +443,8 @@ static struct task_struct * futex_find_get_task(pid_t pid)
 
 	rcu_read_lock();
 	p = find_task_by_vpid(pid);
-	if (!p || (euid != p->euid && euid != p->uid))
+	if (!p || (euid != p->cred->euid &&
+		   euid != p->cred->uid))
 		p = ERR_PTR(-ESRCH);
 	else
 		get_task_struct(p);
@@ -1846,7 +1847,8 @@ sys_get_robust_list(int pid, struct robust_list_head __user * __user *head_ptr,
 		if (!p)
 			goto err_unlock;
 		ret = -EPERM;
-		if (euid != p->euid && euid != p->uid &&
+		if (euid != p->cred->euid &&
+		    euid != p->cred->uid &&
 		    !capable(CAP_SYS_PTRACE))
 			goto err_unlock;
 		head = p->robust_list;
diff --git a/kernel/futex_compat.c b/kernel/futex_compat.c
index 3254d4e41e88..2c3fd5ed34f5 100644
--- a/kernel/futex_compat.c
+++ b/kernel/futex_compat.c
@@ -151,8 +151,9 @@ compat_sys_get_robust_list(int pid, compat_uptr_t __user *head_ptr,
 		if (!p)
 			goto err_unlock;
 		ret = -EPERM;
-		if (euid != p->euid && euid != p->uid &&
-				!capable(CAP_SYS_PTRACE))
+		if (euid != p->cred->euid &&
+		    euid != p->cred->uid &&
+		    !capable(CAP_SYS_PTRACE))
 			goto err_unlock;
 		head = p->compat_robust_list;
 		read_unlock(&tasklist_lock);
diff --git a/kernel/ptrace.c b/kernel/ptrace.c
index 937f6b5b2008..49849d12dd12 100644
--- a/kernel/ptrace.c
+++ b/kernel/ptrace.c
@@ -115,6 +115,8 @@ int ptrace_check_attach(struct task_struct *child, int kill)
 
 int __ptrace_may_access(struct task_struct *task, unsigned int mode)
 {
+	struct cred *cred = current->cred, *tcred = task->cred;
+
 	/* May we inspect the given task?
 	 * This check is used both for attaching with ptrace
 	 * and for allowing access to sensitive information in /proc.
@@ -123,19 +125,18 @@ int __ptrace_may_access(struct task_struct *task, unsigned int mode)
 	 * because setting up the necessary parent/child relationship
 	 * or halting the specified task is impossible.
 	 */
-	uid_t uid;
-	gid_t gid;
+	uid_t uid = cred->uid;
+	gid_t gid = cred->gid;
 	int dumpable = 0;
 	/* Don't let security modules deny introspection */
 	if (task == current)
 		return 0;
-	current_uid_gid(&uid, &gid);
-	if ((uid != task->euid ||
-	     uid != task->suid ||
-	     uid != task->uid  ||
-	     gid != task->egid ||
-	     gid != task->sgid ||
-	     gid != task->gid) && !capable(CAP_SYS_PTRACE))
+	if ((uid != tcred->euid ||
+	     uid != tcred->suid ||
+	     uid != tcred->uid  ||
+	     gid != tcred->egid ||
+	     gid != tcred->sgid ||
+	     gid != tcred->gid) && !capable(CAP_SYS_PTRACE))
 		return -EPERM;
 	smp_rmb();
 	if (task->mm)
diff --git a/kernel/sched.c b/kernel/sched.c
index c3b8b1fcde0d..733c59e645aa 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -345,7 +345,7 @@ static inline struct task_group *task_group(struct task_struct *p)
 	struct task_group *tg;
 
 #ifdef CONFIG_USER_SCHED
-	tg = p->user->tg;
+	tg = p->cred->user->tg;
 #elif defined(CONFIG_CGROUP_SCHED)
 	tg = container_of(task_subsys_state(p, cpu_cgroup_subsys_id),
 				struct task_group, css);
@@ -5182,8 +5182,8 @@ recheck:
 
 		/* can't change other user's priorities */
 		euid = current_euid();
-		if (euid != p->euid &&
-		    euid != p->uid)
+		if (euid != p->cred->euid &&
+		    euid != p->cred->uid)
 			return -EPERM;
 	}
 
@@ -5417,7 +5417,9 @@ long sched_setaffinity(pid_t pid, const cpumask_t *in_mask)
 
 	euid = current_euid();
 	retval = -EPERM;
-	if (euid != p->euid && euid != p->uid && !capable(CAP_SYS_NICE))
+	if (euid != p->cred->euid &&
+	    euid != p->cred->uid &&
+	    !capable(CAP_SYS_NICE))
 		goto out_unlock;
 
 	retval = security_task_setscheduler(p, 0, NULL);
diff --git a/kernel/signal.c b/kernel/signal.c
index 167b535fe1a9..80e8a6489f97 100644
--- a/kernel/signal.c
+++ b/kernel/signal.c
@@ -187,7 +187,7 @@ static struct sigqueue *__sigqueue_alloc(struct task_struct *t, gfp_t flags,
 	 * In order to avoid problems with "switch_user()", we want to make
 	 * sure that the compiler doesn't re-load "t->user"
 	 */
-	user = t->user;
+	user = t->cred->user;
 	barrier();
 	atomic_inc(&user->sigpending);
 	if (override_rlimit ||
@@ -582,8 +582,8 @@ static int check_kill_permission(int sig, struct siginfo *info,
 
 	uid = current_uid();
 	euid = current_euid();
-	if ((euid ^ t->suid) && (euid ^ t->uid) &&
-	    (uid  ^ t->suid) && (uid  ^ t->uid) &&
+	if ((euid ^ t->cred->suid) && (euid ^ t->cred->uid) &&
+	    (uid  ^ t->cred->suid) && (uid  ^ t->cred->uid) &&
 	    !capable(CAP_KILL)) {
 		switch (sig) {
 		case SIGCONT:
@@ -1100,8 +1100,8 @@ int kill_pid_info_as_uid(int sig, struct siginfo *info, struct pid *pid,
 		goto out_unlock;
 	}
 	if ((info == SEND_SIG_NOINFO || (!is_si_special(info) && SI_FROMUSER(info)))
-	    && (euid != p->suid) && (euid != p->uid)
-	    && (uid != p->suid) && (uid != p->uid)) {
+	    && (euid != p->cred->suid) && (euid != p->cred->uid)
+	    && (uid != p->cred->suid) && (uid != p->cred->uid)) {
 		ret = -EPERM;
 		goto out_unlock;
 	}
@@ -1374,7 +1374,7 @@ int do_notify_parent(struct task_struct *tsk, int sig)
 	info.si_pid = task_pid_nr_ns(tsk, tsk->parent->nsproxy->pid_ns);
 	rcu_read_unlock();
 
-	info.si_uid = tsk->uid;
+	info.si_uid = tsk->cred->uid;
 
 	thread_group_cputime(tsk, &cputime);
 	info.si_utime = cputime_to_jiffies(cputime.utime);
@@ -1445,7 +1445,7 @@ static void do_notify_parent_cldstop(struct task_struct *tsk, int why)
 	info.si_pid = task_pid_nr_ns(tsk, tsk->parent->nsproxy->pid_ns);
 	rcu_read_unlock();
 
-	info.si_uid = tsk->uid;
+	info.si_uid = tsk->cred->uid;
 
 	info.si_utime = cputime_to_clock_t(tsk->utime);
 	info.si_stime = cputime_to_clock_t(tsk->stime);
@@ -1713,7 +1713,7 @@ static int ptrace_signal(int signr, siginfo_t *info,
 		info->si_errno = 0;
 		info->si_code = SI_USER;
 		info->si_pid = task_pid_vnr(current->parent);
-		info->si_uid = current->parent->uid;
+		info->si_uid = current->parent->cred->uid;
 	}
 
 	/* If the (new) signal is now blocked, requeue it.  */
diff --git a/kernel/sys.c b/kernel/sys.c
index ed5c29c748ac..5d81f07c0150 100644
--- a/kernel/sys.c
+++ b/kernel/sys.c
@@ -117,7 +117,9 @@ static int set_one_prio(struct task_struct *p, int niceval, int error)
 	uid_t euid = current_euid();
 	int no_nice;
 
-	if (p->uid != euid && p->euid != euid && !capable(CAP_SYS_NICE)) {
+	if (p->cred->uid  != euid &&
+	    p->cred->euid != euid &&
+	    !capable(CAP_SYS_NICE)) {
 		error = -EPERM;
 		goto out;
 	}
@@ -174,7 +176,7 @@ asmlinkage long sys_setpriority(int which, int who, int niceval)
 			} while_each_pid_thread(pgrp, PIDTYPE_PGID, p);
 			break;
 		case PRIO_USER:
-			user = current->user;
+			user = current->cred->user;
 			if (!who)
 				who = current_uid();
 			else
@@ -182,7 +184,7 @@ asmlinkage long sys_setpriority(int which, int who, int niceval)
 					goto out_unlock;	/* No processes for this user */
 
 			do_each_thread(g, p)
-				if (p->uid == who)
+				if (p->cred->uid == who)
 					error = set_one_prio(p, niceval, error);
 			while_each_thread(g, p);
 			if (who != current_uid())
@@ -236,7 +238,7 @@ asmlinkage long sys_getpriority(int which, int who)
 			} while_each_pid_thread(pgrp, PIDTYPE_PGID, p);
 			break;
 		case PRIO_USER:
-			user = current->user;
+			user = current->cred->user;
 			if (!who)
 				who = current_uid();
 			else
@@ -244,7 +246,7 @@ asmlinkage long sys_getpriority(int which, int who)
 					goto out_unlock;	/* No processes for this user */
 
 			do_each_thread(g, p)
-				if (p->uid == who) {
+				if (p->cred->uid == who) {
 					niceval = 20 - task_nice(p);
 					if (niceval > retval)
 						retval = niceval;
@@ -472,8 +474,9 @@ void ctrl_alt_del(void)
  */
 asmlinkage long sys_setregid(gid_t rgid, gid_t egid)
 {
-	int old_rgid = current->gid;
-	int old_egid = current->egid;
+	struct cred *cred = current->cred;
+	int old_rgid = cred->gid;
+	int old_egid = cred->egid;
 	int new_rgid = old_rgid;
 	int new_egid = old_egid;
 	int retval;
@@ -484,7 +487,7 @@ asmlinkage long sys_setregid(gid_t rgid, gid_t egid)
 
 	if (rgid != (gid_t) -1) {
 		if ((old_rgid == rgid) ||
-		    (current->egid==rgid) ||
+		    (cred->egid == rgid) ||
 		    capable(CAP_SETGID))
 			new_rgid = rgid;
 		else
@@ -492,8 +495,8 @@ asmlinkage long sys_setregid(gid_t rgid, gid_t egid)
 	}
 	if (egid != (gid_t) -1) {
 		if ((old_rgid == egid) ||
-		    (current->egid == egid) ||
-		    (current->sgid == egid) ||
+		    (cred->egid == egid) ||
+		    (cred->sgid == egid) ||
 		    capable(CAP_SETGID))
 			new_egid = egid;
 		else
@@ -505,10 +508,10 @@ asmlinkage long sys_setregid(gid_t rgid, gid_t egid)
 	}
 	if (rgid != (gid_t) -1 ||
 	    (egid != (gid_t) -1 && egid != old_rgid))
-		current->sgid = new_egid;
-	current->fsgid = new_egid;
-	current->egid = new_egid;
-	current->gid = new_rgid;
+		cred->sgid = new_egid;
+	cred->fsgid = new_egid;
+	cred->egid = new_egid;
+	cred->gid = new_rgid;
 	key_fsgid_changed(current);
 	proc_id_connector(current, PROC_EVENT_GID);
 	return 0;
@@ -521,7 +524,8 @@ asmlinkage long sys_setregid(gid_t rgid, gid_t egid)
  */
 asmlinkage long sys_setgid(gid_t gid)
 {
-	int old_egid = current->egid;
+	struct cred *cred = current->cred;
+	int old_egid = cred->egid;
 	int retval;
 
 	retval = security_task_setgid(gid, (gid_t)-1, (gid_t)-1, LSM_SETID_ID);
@@ -533,13 +537,13 @@ asmlinkage long sys_setgid(gid_t gid)
 			set_dumpable(current->mm, suid_dumpable);
 			smp_wmb();
 		}
-		current->gid = current->egid = current->sgid = current->fsgid = gid;
-	} else if ((gid == current->gid) || (gid == current->sgid)) {
+		cred->gid = cred->egid = cred->sgid = cred->fsgid = gid;
+	} else if ((gid == cred->gid) || (gid == cred->sgid)) {
 		if (old_egid != gid) {
 			set_dumpable(current->mm, suid_dumpable);
 			smp_wmb();
 		}
-		current->egid = current->fsgid = gid;
+		cred->egid = cred->fsgid = gid;
 	}
 	else
 		return -EPERM;
@@ -570,7 +574,7 @@ static int set_user(uid_t new_ruid, int dumpclear)
 		set_dumpable(current->mm, suid_dumpable);
 		smp_wmb();
 	}
-	current->uid = new_ruid;
+	current->cred->uid = new_ruid;
 	return 0;
 }
 
@@ -591,6 +595,7 @@ static int set_user(uid_t new_ruid, int dumpclear)
  */
 asmlinkage long sys_setreuid(uid_t ruid, uid_t euid)
 {
+	struct cred *cred = current->cred;
 	int old_ruid, old_euid, old_suid, new_ruid, new_euid;
 	int retval;
 
@@ -598,14 +603,14 @@ asmlinkage long sys_setreuid(uid_t ruid, uid_t euid)
 	if (retval)
 		return retval;
 
-	new_ruid = old_ruid = current->uid;
-	new_euid = old_euid = current->euid;
-	old_suid = current->suid;
+	new_ruid = old_ruid = cred->uid;
+	new_euid = old_euid = cred->euid;
+	old_suid = cred->suid;
 
 	if (ruid != (uid_t) -1) {
 		new_ruid = ruid;
 		if ((old_ruid != ruid) &&
-		    (current->euid != ruid) &&
+		    (cred->euid != ruid) &&
 		    !capable(CAP_SETUID))
 			return -EPERM;
 	}
@@ -613,8 +618,8 @@ asmlinkage long sys_setreuid(uid_t ruid, uid_t euid)
 	if (euid != (uid_t) -1) {
 		new_euid = euid;
 		if ((old_ruid != euid) &&
-		    (current->euid != euid) &&
-		    (current->suid != euid) &&
+		    (cred->euid != euid) &&
+		    (cred->suid != euid) &&
 		    !capable(CAP_SETUID))
 			return -EPERM;
 	}
@@ -626,11 +631,11 @@ asmlinkage long sys_setreuid(uid_t ruid, uid_t euid)
 		set_dumpable(current->mm, suid_dumpable);
 		smp_wmb();
 	}
-	current->fsuid = current->euid = new_euid;
+	cred->fsuid = cred->euid = new_euid;
 	if (ruid != (uid_t) -1 ||
 	    (euid != (uid_t) -1 && euid != old_ruid))
-		current->suid = current->euid;
-	current->fsuid = current->euid;
+		cred->suid = cred->euid;
+	cred->fsuid = cred->euid;
 
 	key_fsuid_changed(current);
 	proc_id_connector(current, PROC_EVENT_UID);
@@ -653,7 +658,8 @@ asmlinkage long sys_setreuid(uid_t ruid, uid_t euid)
  */
 asmlinkage long sys_setuid(uid_t uid)
 {
-	int old_euid = current->euid;
+	struct cred *cred = current->cred;
+	int old_euid = cred->euid;
 	int old_ruid, old_suid, new_suid;
 	int retval;
 
@@ -661,23 +667,23 @@ asmlinkage long sys_setuid(uid_t uid)
 	if (retval)
 		return retval;
 
-	old_ruid = current->uid;
-	old_suid = current->suid;
+	old_ruid = cred->uid;
+	old_suid = cred->suid;
 	new_suid = old_suid;
 	
 	if (capable(CAP_SETUID)) {
 		if (uid != old_ruid && set_user(uid, old_euid != uid) < 0)
 			return -EAGAIN;
 		new_suid = uid;
-	} else if ((uid != current->uid) && (uid != new_suid))
+	} else if ((uid != cred->uid) && (uid != new_suid))
 		return -EPERM;
 
 	if (old_euid != uid) {
 		set_dumpable(current->mm, suid_dumpable);
 		smp_wmb();
 	}
-	current->fsuid = current->euid = uid;
-	current->suid = new_suid;
+	cred->fsuid = cred->euid = uid;
+	cred->suid = new_suid;
 
 	key_fsuid_changed(current);
 	proc_id_connector(current, PROC_EVENT_UID);
@@ -692,9 +698,10 @@ asmlinkage long sys_setuid(uid_t uid)
  */
 asmlinkage long sys_setresuid(uid_t ruid, uid_t euid, uid_t suid)
 {
-	int old_ruid = current->uid;
-	int old_euid = current->euid;
-	int old_suid = current->suid;
+	struct cred *cred = current->cred;
+	int old_ruid = cred->uid;
+	int old_euid = cred->euid;
+	int old_suid = cred->suid;
 	int retval;
 
 	retval = security_task_setuid(ruid, euid, suid, LSM_SETID_RES);
@@ -702,30 +709,31 @@ asmlinkage long sys_setresuid(uid_t ruid, uid_t euid, uid_t suid)
 		return retval;
 
 	if (!capable(CAP_SETUID)) {
-		if ((ruid != (uid_t) -1) && (ruid != current->uid) &&
-		    (ruid != current->euid) && (ruid != current->suid))
+		if ((ruid != (uid_t) -1) && (ruid != cred->uid) &&
+		    (ruid != cred->euid) && (ruid != cred->suid))
 			return -EPERM;
-		if ((euid != (uid_t) -1) && (euid != current->uid) &&
-		    (euid != current->euid) && (euid != current->suid))
+		if ((euid != (uid_t) -1) && (euid != cred->uid) &&
+		    (euid != cred->euid) && (euid != cred->suid))
 			return -EPERM;
-		if ((suid != (uid_t) -1) && (suid != current->uid) &&
-		    (suid != current->euid) && (suid != current->suid))
+		if ((suid != (uid_t) -1) && (suid != cred->uid) &&
+		    (suid != cred->euid) && (suid != cred->suid))
 			return -EPERM;
 	}
 	if (ruid != (uid_t) -1) {
-		if (ruid != current->uid && set_user(ruid, euid != current->euid) < 0)
+		if (ruid != cred->uid &&
+		    set_user(ruid, euid != cred->euid) < 0)
 			return -EAGAIN;
 	}
 	if (euid != (uid_t) -1) {
-		if (euid != current->euid) {
+		if (euid != cred->euid) {
 			set_dumpable(current->mm, suid_dumpable);
 			smp_wmb();
 		}
-		current->euid = euid;
+		cred->euid = euid;
 	}
-	current->fsuid = current->euid;
+	cred->fsuid = cred->euid;
 	if (suid != (uid_t) -1)
-		current->suid = suid;
+		cred->suid = suid;
 
 	key_fsuid_changed(current);
 	proc_id_connector(current, PROC_EVENT_UID);
@@ -735,11 +743,12 @@ asmlinkage long sys_setresuid(uid_t ruid, uid_t euid, uid_t suid)
 
 asmlinkage long sys_getresuid(uid_t __user *ruid, uid_t __user *euid, uid_t __user *suid)
 {
+	struct cred *cred = current->cred;
 	int retval;
 
-	if (!(retval = put_user(current->uid, ruid)) &&
-	    !(retval = put_user(current->euid, euid)))
-		retval = put_user(current->suid, suid);
+	if (!(retval = put_user(cred->uid, ruid)) &&
+	    !(retval = put_user(cred->euid, euid)))
+		retval = put_user(cred->suid, suid);
 
 	return retval;
 }
@@ -749,6 +758,7 @@ asmlinkage long sys_getresuid(uid_t __user *ruid, uid_t __user *euid, uid_t __us
  */
 asmlinkage long sys_setresgid(gid_t rgid, gid_t egid, gid_t sgid)
 {
+	struct cred *cred = current->cred;
 	int retval;
 
 	retval = security_task_setgid(rgid, egid, sgid, LSM_SETID_RES);
@@ -756,28 +766,28 @@ asmlinkage long sys_setresgid(gid_t rgid, gid_t egid, gid_t sgid)
 		return retval;
 
 	if (!capable(CAP_SETGID)) {
-		if ((rgid != (gid_t) -1) && (rgid != current->gid) &&
-		    (rgid != current->egid) && (rgid != current->sgid))
+		if ((rgid != (gid_t) -1) && (rgid != cred->gid) &&
+		    (rgid != cred->egid) && (rgid != cred->sgid))
 			return -EPERM;
-		if ((egid != (gid_t) -1) && (egid != current->gid) &&
-		    (egid != current->egid) && (egid != current->sgid))
+		if ((egid != (gid_t) -1) && (egid != cred->gid) &&
+		    (egid != cred->egid) && (egid != cred->sgid))
 			return -EPERM;
-		if ((sgid != (gid_t) -1) && (sgid != current->gid) &&
-		    (sgid != current->egid) && (sgid != current->sgid))
+		if ((sgid != (gid_t) -1) && (sgid != cred->gid) &&
+		    (sgid != cred->egid) && (sgid != cred->sgid))
 			return -EPERM;
 	}
 	if (egid != (gid_t) -1) {
-		if (egid != current->egid) {
+		if (egid != cred->egid) {
 			set_dumpable(current->mm, suid_dumpable);
 			smp_wmb();
 		}
-		current->egid = egid;
+		cred->egid = egid;
 	}
-	current->fsgid = current->egid;
+	cred->fsgid = cred->egid;
 	if (rgid != (gid_t) -1)
-		current->gid = rgid;
+		cred->gid = rgid;
 	if (sgid != (gid_t) -1)
-		current->sgid = sgid;
+		cred->sgid = sgid;
 
 	key_fsgid_changed(current);
 	proc_id_connector(current, PROC_EVENT_GID);
@@ -786,11 +796,12 @@ asmlinkage long sys_setresgid(gid_t rgid, gid_t egid, gid_t sgid)
 
 asmlinkage long sys_getresgid(gid_t __user *rgid, gid_t __user *egid, gid_t __user *sgid)
 {
+	struct cred *cred = current->cred;
 	int retval;
 
-	if (!(retval = put_user(current->gid, rgid)) &&
-	    !(retval = put_user(current->egid, egid)))
-		retval = put_user(current->sgid, sgid);
+	if (!(retval = put_user(cred->gid, rgid)) &&
+	    !(retval = put_user(cred->egid, egid)))
+		retval = put_user(cred->sgid, sgid);
 
 	return retval;
 }
@@ -804,20 +815,21 @@ asmlinkage long sys_getresgid(gid_t __user *rgid, gid_t __user *egid, gid_t __us
  */
 asmlinkage long sys_setfsuid(uid_t uid)
 {
+	struct cred *cred = current->cred;
 	int old_fsuid;
 
-	old_fsuid = current->fsuid;
+	old_fsuid = cred->fsuid;
 	if (security_task_setuid(uid, (uid_t)-1, (uid_t)-1, LSM_SETID_FS))
 		return old_fsuid;
 
-	if (uid == current->uid || uid == current->euid ||
-	    uid == current->suid || uid == current->fsuid || 
+	if (uid == cred->uid || uid == cred->euid ||
+	    uid == cred->suid || uid == cred->fsuid ||
 	    capable(CAP_SETUID)) {
 		if (uid != old_fsuid) {
 			set_dumpable(current->mm, suid_dumpable);
 			smp_wmb();
 		}
-		current->fsuid = uid;
+		cred->fsuid = uid;
 	}
 
 	key_fsuid_changed(current);
@@ -833,20 +845,21 @@ asmlinkage long sys_setfsuid(uid_t uid)
  */
 asmlinkage long sys_setfsgid(gid_t gid)
 {
+	struct cred *cred = current->cred;
 	int old_fsgid;
 
-	old_fsgid = current->fsgid;
+	old_fsgid = cred->fsgid;
 	if (security_task_setgid(gid, (gid_t)-1, (gid_t)-1, LSM_SETID_FS))
 		return old_fsgid;
 
-	if (gid == current->gid || gid == current->egid ||
-	    gid == current->sgid || gid == current->fsgid || 
+	if (gid == cred->gid || gid == cred->egid ||
+	    gid == cred->sgid || gid == cred->fsgid ||
 	    capable(CAP_SETGID)) {
 		if (gid != old_fsgid) {
 			set_dumpable(current->mm, suid_dumpable);
 			smp_wmb();
 		}
-		current->fsgid = gid;
+		cred->fsgid = gid;
 		key_fsgid_changed(current);
 		proc_id_connector(current, PROC_EVENT_GID);
 	}
@@ -1208,8 +1221,15 @@ int groups_search(struct group_info *group_info, gid_t grp)
 	return 0;
 }
 
-/* validate and set current->group_info */
-int set_current_groups(struct group_info *group_info)
+/**
+ * set_groups - Change a group subscription in a security record
+ * @sec: The security record to alter
+ * @group_info: The group list to impose
+ *
+ * Validate a group subscription and, if valid, impose it upon a task security
+ * record.
+ */
+int set_groups(struct cred *cred, struct group_info *group_info)
 {
 	int retval;
 	struct group_info *old_info;
@@ -1221,20 +1241,34 @@ int set_current_groups(struct group_info *group_info)
 	groups_sort(group_info);
 	get_group_info(group_info);
 
-	task_lock(current);
-	old_info = current->group_info;
-	current->group_info = group_info;
-	task_unlock(current);
+	spin_lock(&cred->lock);
+	old_info = cred->group_info;
+	cred->group_info = group_info;
+	spin_unlock(&cred->lock);
 
 	put_group_info(old_info);
-
 	return 0;
 }
 
+EXPORT_SYMBOL(set_groups);
+
+/**
+ * set_current_groups - Change current's group subscription
+ * @group_info: The group list to impose
+ *
+ * Validate a group subscription and, if valid, impose it upon current's task
+ * security record.
+ */
+int set_current_groups(struct group_info *group_info)
+{
+	return set_groups(current->cred, group_info);
+}
+
 EXPORT_SYMBOL(set_current_groups);
 
 asmlinkage long sys_getgroups(int gidsetsize, gid_t __user *grouplist)
 {
+	struct cred *cred = current->cred;
 	int i = 0;
 
 	/*
@@ -1246,13 +1280,13 @@ asmlinkage long sys_getgroups(int gidsetsize, gid_t __user *grouplist)
 		return -EINVAL;
 
 	/* no need to grab task_lock here; it cannot change */
-	i = current->group_info->ngroups;
+	i = cred->group_info->ngroups;
 	if (gidsetsize) {
 		if (i > gidsetsize) {
 			i = -EINVAL;
 			goto out;
 		}
-		if (groups_to_user(grouplist, current->group_info)) {
+		if (groups_to_user(grouplist, cred->group_info)) {
 			i = -EFAULT;
 			goto out;
 		}
@@ -1296,9 +1330,10 @@ asmlinkage long sys_setgroups(int gidsetsize, gid_t __user *grouplist)
  */
 int in_group_p(gid_t grp)
 {
+	struct cred *cred = current->cred;
 	int retval = 1;
-	if (grp != current->fsgid)
-		retval = groups_search(current->group_info, grp);
+	if (grp != cred->fsgid)
+		retval = groups_search(cred->group_info, grp);
 	return retval;
 }
 
@@ -1306,9 +1341,10 @@ EXPORT_SYMBOL(in_group_p);
 
 int in_egroup_p(gid_t grp)
 {
+	struct cred *cred = current->cred;
 	int retval = 1;
-	if (grp != current->egid)
-		retval = groups_search(current->group_info, grp);
+	if (grp != cred->egid)
+		retval = groups_search(cred->group_info, grp);
 	return retval;
 }
 
@@ -1624,7 +1660,9 @@ asmlinkage long sys_umask(int mask)
 asmlinkage long sys_prctl(int option, unsigned long arg2, unsigned long arg3,
 			  unsigned long arg4, unsigned long arg5)
 {
-	long error = 0;
+	struct task_struct *me = current;
+	unsigned char comm[sizeof(me->comm)];
+	long error;
 
 	if (security_task_prctl(option, arg2, arg3, arg4, arg5, &error))
 		return error;
@@ -1635,39 +1673,41 @@ asmlinkage long sys_prctl(int option, unsigned long arg2, unsigned long arg3,
 				error = -EINVAL;
 				break;
 			}
-			current->pdeath_signal = arg2;
+			me->pdeath_signal = arg2;
+			error = 0;
 			break;
 		case PR_GET_PDEATHSIG:
-			error = put_user(current->pdeath_signal, (int __user *)arg2);
+			error = put_user(me->pdeath_signal, (int __user *)arg2);
 			break;
 		case PR_GET_DUMPABLE:
-			error = get_dumpable(current->mm);
+			error = get_dumpable(me->mm);
 			break;
 		case PR_SET_DUMPABLE:
 			if (arg2 < 0 || arg2 > 1) {
 				error = -EINVAL;
 				break;
 			}
-			set_dumpable(current->mm, arg2);
+			set_dumpable(me->mm, arg2);
+			error = 0;
 			break;
 
 		case PR_SET_UNALIGN:
-			error = SET_UNALIGN_CTL(current, arg2);
+			error = SET_UNALIGN_CTL(me, arg2);
 			break;
 		case PR_GET_UNALIGN:
-			error = GET_UNALIGN_CTL(current, arg2);
+			error = GET_UNALIGN_CTL(me, arg2);
 			break;
 		case PR_SET_FPEMU:
-			error = SET_FPEMU_CTL(current, arg2);
+			error = SET_FPEMU_CTL(me, arg2);
 			break;
 		case PR_GET_FPEMU:
-			error = GET_FPEMU_CTL(current, arg2);
+			error = GET_FPEMU_CTL(me, arg2);
 			break;
 		case PR_SET_FPEXC:
-			error = SET_FPEXC_CTL(current, arg2);
+			error = SET_FPEXC_CTL(me, arg2);
 			break;
 		case PR_GET_FPEXC:
-			error = GET_FPEXC_CTL(current, arg2);
+			error = GET_FPEXC_CTL(me, arg2);
 			break;
 		case PR_GET_TIMING:
 			error = PR_TIMING_STATISTICAL;
@@ -1675,33 +1715,28 @@ asmlinkage long sys_prctl(int option, unsigned long arg2, unsigned long arg3,
 		case PR_SET_TIMING:
 			if (arg2 != PR_TIMING_STATISTICAL)
 				error = -EINVAL;
+			else
+				error = 0;
 			break;
 
-		case PR_SET_NAME: {
-			struct task_struct *me = current;
-			unsigned char ncomm[sizeof(me->comm)];
-
-			ncomm[sizeof(me->comm)-1] = 0;
-			if (strncpy_from_user(ncomm, (char __user *)arg2,
-						sizeof(me->comm)-1) < 0)
+		case PR_SET_NAME:
+			comm[sizeof(me->comm)-1] = 0;
+			if (strncpy_from_user(comm, (char __user *)arg2,
+					      sizeof(me->comm) - 1) < 0)
 				return -EFAULT;
-			set_task_comm(me, ncomm);
+			set_task_comm(me, comm);
 			return 0;
-		}
-		case PR_GET_NAME: {
-			struct task_struct *me = current;
-			unsigned char tcomm[sizeof(me->comm)];
-
-			get_task_comm(tcomm, me);
-			if (copy_to_user((char __user *)arg2, tcomm, sizeof(tcomm)))
+		case PR_GET_NAME:
+			get_task_comm(comm, me);
+			if (copy_to_user((char __user *)arg2, comm,
+					 sizeof(comm)))
 				return -EFAULT;
 			return 0;
-		}
 		case PR_GET_ENDIAN:
-			error = GET_ENDIAN(current, arg2);
+			error = GET_ENDIAN(me, arg2);
 			break;
 		case PR_SET_ENDIAN:
-			error = SET_ENDIAN(current, arg2);
+			error = SET_ENDIAN(me, arg2);
 			break;
 
 		case PR_GET_SECCOMP:
@@ -1725,6 +1760,7 @@ asmlinkage long sys_prctl(int option, unsigned long arg2, unsigned long arg3,
 					current->default_timer_slack_ns;
 			else
 				current->timer_slack_ns = arg2;
+			error = 0;
 			break;
 		default:
 			error = -EINVAL;
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index 9f3b478f9171..5c97c5b4ea8f 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -246,7 +246,7 @@ __update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu)
 
 	memcpy(data->comm, tsk->comm, TASK_COMM_LEN);
 	data->pid = tsk->pid;
-	data->uid = tsk->uid;
+	data->uid = task_uid(tsk);
 	data->nice = tsk->static_prio - 20 - MAX_RT_PRIO;
 	data->policy = tsk->policy;
 	data->rt_priority = tsk->rt_priority;
diff --git a/kernel/tsacct.c b/kernel/tsacct.c
index 8ebcd8532dfb..6d1ed07bf312 100644
--- a/kernel/tsacct.c
+++ b/kernel/tsacct.c
@@ -53,8 +53,8 @@ void bacct_add_tsk(struct taskstats *stats, struct task_struct *tsk)
 		stats->ac_flag |= AXSIG;
 	stats->ac_nice	 = task_nice(tsk);
 	stats->ac_sched	 = tsk->policy;
-	stats->ac_uid	 = tsk->uid;
-	stats->ac_gid	 = tsk->gid;
+	stats->ac_uid	 = tsk->cred->uid;
+	stats->ac_gid	 = tsk->cred->gid;
 	stats->ac_pid	 = tsk->pid;
 	rcu_read_lock();
 	stats->ac_ppid	 = pid_alive(tsk) ?
diff --git a/kernel/uid16.c b/kernel/uid16.c
index 3e41c1673e2f..71f07fc39fea 100644
--- a/kernel/uid16.c
+++ b/kernel/uid16.c
@@ -86,9 +86,9 @@ asmlinkage long sys_getresuid16(old_uid_t __user *ruid, old_uid_t __user *euid,
 {
 	int retval;
 
-	if (!(retval = put_user(high2lowuid(current->uid), ruid)) &&
-	    !(retval = put_user(high2lowuid(current->euid), euid)))
-		retval = put_user(high2lowuid(current->suid), suid);
+	if (!(retval = put_user(high2lowuid(current->cred->uid), ruid)) &&
+	    !(retval = put_user(high2lowuid(current->cred->euid), euid)))
+		retval = put_user(high2lowuid(current->cred->suid), suid);
 
 	return retval;
 }
@@ -106,9 +106,9 @@ asmlinkage long sys_getresgid16(old_gid_t __user *rgid, old_gid_t __user *egid,
 {
 	int retval;
 
-	if (!(retval = put_user(high2lowgid(current->gid), rgid)) &&
-	    !(retval = put_user(high2lowgid(current->egid), egid)))
-		retval = put_user(high2lowgid(current->sgid), sgid);
+	if (!(retval = put_user(high2lowgid(current->cred->gid), rgid)) &&
+	    !(retval = put_user(high2lowgid(current->cred->egid), egid)))
+		retval = put_user(high2lowgid(current->cred->sgid), sgid);
 
 	return retval;
 }
@@ -166,20 +166,20 @@ asmlinkage long sys_getgroups16(int gidsetsize, old_gid_t __user *grouplist)
 	if (gidsetsize < 0)
 		return -EINVAL;
 
-	get_group_info(current->group_info);
-	i = current->group_info->ngroups;
+	get_group_info(current->cred->group_info);
+	i = current->cred->group_info->ngroups;
 	if (gidsetsize) {
 		if (i > gidsetsize) {
 			i = -EINVAL;
 			goto out;
 		}
-		if (groups16_to_user(grouplist, current->group_info)) {
+		if (groups16_to_user(grouplist, current->cred->group_info)) {
 			i = -EFAULT;
 			goto out;
 		}
 	}
 out:
-	put_group_info(current->group_info);
+	put_group_info(current->cred->group_info);
 	return i;
 }
 
@@ -210,20 +210,20 @@ asmlinkage long sys_setgroups16(int gidsetsize, old_gid_t __user *grouplist)
 
 asmlinkage long sys_getuid16(void)
 {
-	return high2lowuid(current->uid);
+	return high2lowuid(current->cred->uid);
 }
 
 asmlinkage long sys_geteuid16(void)
 {
-	return high2lowuid(current->euid);
+	return high2lowuid(current->cred->euid);
 }
 
 asmlinkage long sys_getgid16(void)
 {
-	return high2lowgid(current->gid);
+	return high2lowgid(current->cred->gid);
 }
 
 asmlinkage long sys_getegid16(void)
 {
-	return high2lowgid(current->egid);
+	return high2lowgid(current->cred->egid);
 }
diff --git a/kernel/user.c b/kernel/user.c
index 39d6159fae43..104d22ac84d5 100644
--- a/kernel/user.c
+++ b/kernel/user.c
@@ -457,11 +457,11 @@ void switch_uid(struct user_struct *new_user)
 	 * cheaply with the new uid cache, so if it matters
 	 * we should be checking for it.  -DaveM
 	 */
-	old_user = current->user;
+	old_user = current->cred->user;
 	atomic_inc(&new_user->processes);
 	atomic_dec(&old_user->processes);
 	switch_uid_keyring(new_user);
-	current->user = new_user;
+	current->cred->user = new_user;
 	sched_switch_user(current);
 
 	/*
diff --git a/mm/mempolicy.c b/mm/mempolicy.c
index 07a96474077d..b23492ee3e50 100644
--- a/mm/mempolicy.c
+++ b/mm/mempolicy.c
@@ -1110,12 +1110,12 @@ asmlinkage long sys_migrate_pages(pid_t pid, unsigned long maxnode,
 		const unsigned long __user *old_nodes,
 		const unsigned long __user *new_nodes)
 {
+	struct cred *cred, *tcred;
 	struct mm_struct *mm;
 	struct task_struct *task;
 	nodemask_t old;
 	nodemask_t new;
 	nodemask_t task_nodes;
-	uid_t uid, euid;
 	int err;
 
 	err = get_nodes(&old, old_nodes, maxnode);
@@ -1145,10 +1145,10 @@ asmlinkage long sys_migrate_pages(pid_t pid, unsigned long maxnode,
 	 * capabilities, superuser privileges or the same
 	 * userid as the target process.
 	 */
-	uid = current_uid();
-	euid = current_euid();
-	if (euid != task->suid && euid != task->uid &&
-	    uid  != task->suid && uid  != task->uid &&
+	cred = current->cred;
+	tcred = task->cred;
+	if (cred->euid != tcred->suid && cred->euid != tcred->uid &&
+	    cred->uid  != tcred->suid && cred->uid  != tcred->uid &&
 	    !capable(CAP_SYS_NICE)) {
 		err = -EPERM;
 		goto out;
diff --git a/mm/migrate.c b/mm/migrate.c
index 6263c24c4afe..794443da1b4f 100644
--- a/mm/migrate.c
+++ b/mm/migrate.c
@@ -1045,10 +1045,10 @@ asmlinkage long sys_move_pages(pid_t pid, unsigned long nr_pages,
 			const int __user *nodes,
 			int __user *status, int flags)
 {
+	struct cred *cred, *tcred;
 	struct task_struct *task;
 	struct mm_struct *mm;
 	int err;
-	uid_t uid, euid;
 
 	/* Check flags */
 	if (flags & ~(MPOL_MF_MOVE|MPOL_MF_MOVE_ALL))
@@ -1076,10 +1076,10 @@ asmlinkage long sys_move_pages(pid_t pid, unsigned long nr_pages,
 	 * capabilities, superuser privileges or the same
 	 * userid as the target process.
 	 */
-	uid = current_uid();
-	euid = current_euid();
-	if (euid != task->suid && euid != task->uid &&
-	    uid  != task->suid && uid  != task->uid &&
+	cred = current->cred;
+	tcred = task->cred;
+	if (cred->euid != tcred->suid && cred->euid != tcred->uid &&
+	    cred->uid  != tcred->suid && cred->uid  != tcred->uid &&
 	    !capable(CAP_SYS_NICE)) {
 		err = -EPERM;
 		goto out;
diff --git a/mm/oom_kill.c b/mm/oom_kill.c
index 34a458aa7997..3af787ba2077 100644
--- a/mm/oom_kill.c
+++ b/mm/oom_kill.c
@@ -298,7 +298,7 @@ static void dump_tasks(const struct mem_cgroup *mem)
 
 		task_lock(p);
 		printk(KERN_INFO "[%5d] %5d %5d %8lu %8lu %3d     %3d %s\n",
-		       p->pid, p->uid, p->tgid, p->mm->total_vm,
+		       p->pid, p->cred->uid, p->tgid, p->mm->total_vm,
 		       get_mm_rss(p->mm), (int)task_cpu(p), p->oomkilladj,
 		       p->comm);
 		task_unlock(p);
diff --git a/net/core/scm.c b/net/core/scm.c
index 4681d8f9b45b..c28ca32a7d93 100644
--- a/net/core/scm.c
+++ b/net/core/scm.c
@@ -44,11 +44,13 @@
 
 static __inline__ int scm_check_creds(struct ucred *creds)
 {
+	struct cred *cred = current->cred;
+
 	if ((creds->pid == task_tgid_vnr(current) || capable(CAP_SYS_ADMIN)) &&
-	    ((creds->uid == current_uid()   || creds->uid == current_euid() ||
-	      creds->uid == current_suid()) || capable(CAP_SETUID)) &&
-	    ((creds->gid == current_gid()   || creds->gid == current_egid() ||
-	      creds->gid == current_sgid()) || capable(CAP_SETGID))) {
+	    ((creds->uid == cred->uid   || creds->uid == cred->euid ||
+	      creds->uid == cred->suid) || capable(CAP_SETUID)) &&
+	    ((creds->gid == cred->gid   || creds->gid == cred->egid ||
+	      creds->gid == cred->sgid) || capable(CAP_SETGID))) {
 	       return 0;
 	}
 	return -EPERM;
diff --git a/net/sunrpc/auth.c b/net/sunrpc/auth.c
index 8fc380578807..c79543212602 100644
--- a/net/sunrpc/auth.c
+++ b/net/sunrpc/auth.c
@@ -353,7 +353,7 @@ rpcauth_lookupcred(struct rpc_auth *auth, int flags)
 	struct auth_cred acred = {
 		.uid = current_fsuid(),
 		.gid = current_fsgid(),
-		.group_info = current->group_info,
+		.group_info = current->cred->group_info,
 	};
 	struct rpc_cred *ret;
 
diff --git a/security/commoncap.c b/security/commoncap.c
index fb4e240720d8..fa61679f8c73 100644
--- a/security/commoncap.c
+++ b/security/commoncap.c
@@ -30,7 +30,7 @@
 
 int cap_netlink_send(struct sock *sk, struct sk_buff *skb)
 {
-	NETLINK_CB(skb).eff_cap = current->cap_effective;
+	NETLINK_CB(skb).eff_cap = current_cap();
 	return 0;
 }
 
@@ -52,7 +52,7 @@ EXPORT_SYMBOL(cap_netlink_recv);
 int cap_capable(struct task_struct *tsk, int cap, int audit)
 {
 	/* Derived from include/linux/sched.h:capable. */
-	if (cap_raised(tsk->cap_effective, cap))
+	if (cap_raised(tsk->cred->cap_effective, cap))
 		return 0;
 	return -EPERM;
 }
@@ -67,7 +67,8 @@ int cap_settime(struct timespec *ts, struct timezone *tz)
 int cap_ptrace_may_access(struct task_struct *child, unsigned int mode)
 {
 	/* Derived from arch/i386/kernel/ptrace.c:sys_ptrace. */
-	if (cap_issubset(child->cap_permitted, current->cap_permitted))
+	if (cap_issubset(child->cred->cap_permitted,
+			 current->cred->cap_permitted))
 		return 0;
 	if (capable(CAP_SYS_PTRACE))
 		return 0;
@@ -76,8 +77,8 @@ int cap_ptrace_may_access(struct task_struct *child, unsigned int mode)
 
 int cap_ptrace_traceme(struct task_struct *parent)
 {
-	/* Derived from arch/i386/kernel/ptrace.c:sys_ptrace. */
-	if (cap_issubset(current->cap_permitted, parent->cap_permitted))
+	if (cap_issubset(current->cred->cap_permitted,
+			 parent->cred->cap_permitted))
 		return 0;
 	if (has_capability(parent, CAP_SYS_PTRACE))
 		return 0;
@@ -87,10 +88,12 @@ int cap_ptrace_traceme(struct task_struct *parent)
 int cap_capget (struct task_struct *target, kernel_cap_t *effective,
 		kernel_cap_t *inheritable, kernel_cap_t *permitted)
 {
+	struct cred *cred = target->cred;
+
 	/* Derived from kernel/capability.c:sys_capget. */
-	*effective = target->cap_effective;
-	*inheritable = target->cap_inheritable;
-	*permitted = target->cap_permitted;
+	*effective   = cred->cap_effective;
+	*inheritable = cred->cap_inheritable;
+	*permitted   = cred->cap_permitted;
 	return 0;
 }
 
@@ -122,24 +125,26 @@ int cap_capset_check(const kernel_cap_t *effective,
 		     const kernel_cap_t *inheritable,
 		     const kernel_cap_t *permitted)
 {
+	const struct cred *cred = current->cred;
+
 	if (cap_inh_is_capped()
 	    && !cap_issubset(*inheritable,
-			     cap_combine(current->cap_inheritable,
-					 current->cap_permitted))) {
+			     cap_combine(cred->cap_inheritable,
+					 cred->cap_permitted))) {
 		/* incapable of using this inheritable set */
 		return -EPERM;
 	}
 	if (!cap_issubset(*inheritable,
-			   cap_combine(current->cap_inheritable,
-				       current->cap_bset))) {
+			   cap_combine(cred->cap_inheritable,
+				       cred->cap_bset))) {
 		/* no new pI capabilities outside bounding set */
 		return -EPERM;
 	}
 
 	/* verify restrictions on target's new Permitted set */
 	if (!cap_issubset (*permitted,
-			   cap_combine (current->cap_permitted,
-					current->cap_permitted))) {
+			   cap_combine (cred->cap_permitted,
+					cred->cap_permitted))) {
 		return -EPERM;
 	}
 
@@ -155,9 +160,11 @@ void cap_capset_set(const kernel_cap_t *effective,
 		    const kernel_cap_t *inheritable,
 		    const kernel_cap_t *permitted)
 {
-	current->cap_effective = *effective;
-	current->cap_inheritable = *inheritable;
-	current->cap_permitted = *permitted;
+	struct cred *cred = current->cred;
+
+	cred->cap_effective   = *effective;
+	cred->cap_inheritable = *inheritable;
+	cred->cap_permitted   = *permitted;
 }
 
 static inline void bprm_clear_caps(struct linux_binprm *bprm)
@@ -211,8 +218,8 @@ static inline int bprm_caps_from_vfs_caps(struct cpu_vfs_cap_data *caps,
 		 * pP' = (X & fP) | (pI & fI)
 		 */
 		bprm->cap_post_exec_permitted.cap[i] =
-			(current->cap_bset.cap[i] & permitted) |
-			(current->cap_inheritable.cap[i] & inheritable);
+			(current->cred->cap_bset.cap[i] & permitted) |
+			(current->cred->cap_inheritable.cap[i] & inheritable);
 
 		if (permitted & ~bprm->cap_post_exec_permitted.cap[i]) {
 			/*
@@ -354,8 +361,8 @@ int cap_bprm_set_security (struct linux_binprm *bprm)
 		if (bprm->e_uid == 0 || current_uid() == 0) {
 			/* pP' = (cap_bset & ~0) | (pI & ~0) */
 			bprm->cap_post_exec_permitted = cap_combine(
-				current->cap_bset, current->cap_inheritable
-				);
+				current->cred->cap_bset,
+				current->cred->cap_inheritable);
 			bprm->cap_effective = (bprm->e_uid == 0);
 			ret = 0;
 		}
@@ -366,44 +373,39 @@ int cap_bprm_set_security (struct linux_binprm *bprm)
 
 void cap_bprm_apply_creds (struct linux_binprm *bprm, int unsafe)
 {
-	kernel_cap_t pP = current->cap_permitted;
-	kernel_cap_t pE = current->cap_effective;
-	uid_t uid;
-	gid_t gid;
+	struct cred *cred = current->cred;
 
-	current_uid_gid(&uid, &gid);
-
-	if (bprm->e_uid != uid || bprm->e_gid != gid ||
+	if (bprm->e_uid != cred->uid || bprm->e_gid != cred->gid ||
 	    !cap_issubset(bprm->cap_post_exec_permitted,
-			  current->cap_permitted)) {
+			  cred->cap_permitted)) {
 		set_dumpable(current->mm, suid_dumpable);
 		current->pdeath_signal = 0;
 
 		if (unsafe & ~LSM_UNSAFE_PTRACE_CAP) {
 			if (!capable(CAP_SETUID)) {
-				bprm->e_uid = uid;
-				bprm->e_gid = gid;
+				bprm->e_uid = cred->uid;
+				bprm->e_gid = cred->gid;
 			}
 			if (cap_limit_ptraced_target()) {
 				bprm->cap_post_exec_permitted = cap_intersect(
 					bprm->cap_post_exec_permitted,
-					current->cap_permitted);
+					cred->cap_permitted);
 			}
 		}
 	}
 
-	current->suid = current->euid = current->fsuid = bprm->e_uid;
-	current->sgid = current->egid = current->fsgid = bprm->e_gid;
+	cred->suid = cred->euid = cred->fsuid = bprm->e_uid;
+	cred->sgid = cred->egid = cred->fsgid = bprm->e_gid;
 
 	/* For init, we want to retain the capabilities set
 	 * in the init_task struct. Thus we skip the usual
 	 * capability rules */
 	if (!is_global_init(current)) {
-		current->cap_permitted = bprm->cap_post_exec_permitted;
+		cred->cap_permitted = bprm->cap_post_exec_permitted;
 		if (bprm->cap_effective)
-			current->cap_effective = bprm->cap_post_exec_permitted;
+			cred->cap_effective = bprm->cap_post_exec_permitted;
 		else
-			cap_clear(current->cap_effective);
+			cap_clear(cred->cap_effective);
 	}
 
 	/*
@@ -418,27 +420,30 @@ void cap_bprm_apply_creds (struct linux_binprm *bprm, int unsafe)
 	 * Number 1 above might fail if you don't have a full bset, but I think
 	 * that is interesting information to audit.
 	 */
-	if (!cap_isclear(current->cap_effective)) {
-		if (!cap_issubset(CAP_FULL_SET, current->cap_effective) ||
-		    (bprm->e_uid != 0) || (current->uid != 0) ||
+	if (!cap_isclear(cred->cap_effective)) {
+		if (!cap_issubset(CAP_FULL_SET, cred->cap_effective) ||
+		    (bprm->e_uid != 0) || (cred->uid != 0) ||
 		    issecure(SECURE_NOROOT))
-			audit_log_bprm_fcaps(bprm, &pP, &pE);
+			audit_log_bprm_fcaps(bprm, &cred->cap_permitted,
+					     &cred->cap_effective);
 	}
 
-	current->securebits &= ~issecure_mask(SECURE_KEEP_CAPS);
+	cred->securebits &= ~issecure_mask(SECURE_KEEP_CAPS);
 }
 
 int cap_bprm_secureexec (struct linux_binprm *bprm)
 {
-	if (current_uid() != 0) {
+	const struct cred *cred = current->cred;
+
+	if (cred->uid != 0) {
 		if (bprm->cap_effective)
 			return 1;
 		if (!cap_isclear(bprm->cap_post_exec_permitted))
 			return 1;
 	}
 
-	return (current_euid() != current_uid() ||
-		current_egid() != current_gid());
+	return (cred->euid != cred->uid ||
+		cred->egid != cred->gid);
 }
 
 int cap_inode_setxattr(struct dentry *dentry, const char *name,
@@ -501,25 +506,27 @@ int cap_inode_removexattr(struct dentry *dentry, const char *name)
 static inline void cap_emulate_setxuid (int old_ruid, int old_euid,
 					int old_suid)
 {
-	uid_t euid = current_euid();
+	struct cred *cred = current->cred;
 
 	if ((old_ruid == 0 || old_euid == 0 || old_suid == 0) &&
-	    (current_uid()  != 0 && euid != 0 && current_suid() != 0) &&
+	    (cred->uid != 0 && cred->euid != 0 && cred->suid != 0) &&
 	    !issecure(SECURE_KEEP_CAPS)) {
-		cap_clear (current->cap_permitted);
-		cap_clear (current->cap_effective);
+		cap_clear (cred->cap_permitted);
+		cap_clear (cred->cap_effective);
 	}
-	if (old_euid == 0 && euid != 0) {
-		cap_clear (current->cap_effective);
+	if (old_euid == 0 && cred->euid != 0) {
+		cap_clear (cred->cap_effective);
 	}
-	if (old_euid != 0 && euid == 0) {
-		current->cap_effective = current->cap_permitted;
+	if (old_euid != 0 && cred->euid == 0) {
+		cred->cap_effective = cred->cap_permitted;
 	}
 }
 
 int cap_task_post_setuid (uid_t old_ruid, uid_t old_euid, uid_t old_suid,
 			  int flags)
 {
+	struct cred *cred = current->cred;
+
 	switch (flags) {
 	case LSM_SETID_RE:
 	case LSM_SETID_ID:
@@ -541,16 +548,16 @@ int cap_task_post_setuid (uid_t old_ruid, uid_t old_euid, uid_t old_suid,
 			 */
 
 			if (!issecure (SECURE_NO_SETUID_FIXUP)) {
-				if (old_fsuid == 0 && current_fsuid() != 0) {
-					current->cap_effective =
+				if (old_fsuid == 0 && cred->fsuid != 0) {
+					cred->cap_effective =
 						cap_drop_fs_set(
-						    current->cap_effective);
+							cred->cap_effective);
 				}
-				if (old_fsuid != 0 && current_fsuid() == 0) {
-					current->cap_effective =
+				if (old_fsuid != 0 && cred->fsuid == 0) {
+					cred->cap_effective =
 						cap_raise_fs_set(
-						    current->cap_effective,
-						    current->cap_permitted);
+						    cred->cap_effective,
+						    cred->cap_permitted);
 				}
 			}
 			break;
@@ -575,7 +582,8 @@ int cap_task_post_setuid (uid_t old_ruid, uid_t old_euid, uid_t old_suid,
  */
 static int cap_safe_nice(struct task_struct *p)
 {
-	if (!cap_issubset(p->cap_permitted, current->cap_permitted) &&
+	if (!cap_issubset(p->cred->cap_permitted,
+			  current->cred->cap_permitted) &&
 	    !capable(CAP_SYS_NICE))
 		return -EPERM;
 	return 0;
@@ -610,7 +618,7 @@ static long cap_prctl_drop(unsigned long cap)
 		return -EPERM;
 	if (!cap_valid(cap))
 		return -EINVAL;
-	cap_lower(current->cap_bset, cap);
+	cap_lower(current->cred->cap_bset, cap);
 	return 0;
 }
 
@@ -633,6 +641,7 @@ int cap_task_setnice (struct task_struct *p, int nice)
 int cap_task_prctl(int option, unsigned long arg2, unsigned long arg3,
 		   unsigned long arg4, unsigned long arg5, long *rc_p)
 {
+	struct cred *cred = current->cred;
 	long error = 0;
 
 	switch (option) {
@@ -640,7 +649,7 @@ int cap_task_prctl(int option, unsigned long arg2, unsigned long arg3,
 		if (!cap_valid(arg2))
 			error = -EINVAL;
 		else
-			error = !!cap_raised(current->cap_bset, arg2);
+			error = !!cap_raised(cred->cap_bset, arg2);
 		break;
 #ifdef CONFIG_SECURITY_FILE_CAPABILITIES
 	case PR_CAPBSET_DROP:
@@ -667,9 +676,9 @@ int cap_task_prctl(int option, unsigned long arg2, unsigned long arg3,
 	 * capability-based-privilege environment.
 	 */
 	case PR_SET_SECUREBITS:
-		if ((((current->securebits & SECURE_ALL_LOCKS) >> 1)
-		     & (current->securebits ^ arg2))                  /*[1]*/
-		    || ((current->securebits & SECURE_ALL_LOCKS
+		if ((((cred->securebits & SECURE_ALL_LOCKS) >> 1)
+		     & (cred->securebits ^ arg2))                  /*[1]*/
+		    || ((cred->securebits & SECURE_ALL_LOCKS
 			 & ~arg2))                                    /*[2]*/
 		    || (arg2 & ~(SECURE_ALL_LOCKS | SECURE_ALL_BITS)) /*[3]*/
 		    || (cap_capable(current, CAP_SETPCAP, SECURITY_CAP_AUDIT) != 0)) { /*[4]*/
@@ -682,11 +691,11 @@ int cap_task_prctl(int option, unsigned long arg2, unsigned long arg3,
 			 */
 			error = -EPERM;  /* cannot change a locked bit */
 		} else {
-			current->securebits = arg2;
+			cred->securebits = arg2;
 		}
 		break;
 	case PR_GET_SECUREBITS:
-		error = current->securebits;
+		error = cred->securebits;
 		break;
 
 #endif /* def CONFIG_SECURITY_FILE_CAPABILITIES */
@@ -701,10 +710,9 @@ int cap_task_prctl(int option, unsigned long arg2, unsigned long arg3,
 		else if (issecure(SECURE_KEEP_CAPS_LOCKED))
 			error = -EPERM;
 		else if (arg2)
-			current->securebits |= issecure_mask(SECURE_KEEP_CAPS);
+			cred->securebits |= issecure_mask(SECURE_KEEP_CAPS);
 		else
-			current->securebits &=
-				~issecure_mask(SECURE_KEEP_CAPS);
+			cred->securebits &= ~issecure_mask(SECURE_KEEP_CAPS);
 		break;
 
 	default:
@@ -719,11 +727,12 @@ int cap_task_prctl(int option, unsigned long arg2, unsigned long arg3,
 
 void cap_task_reparent_to_init (struct task_struct *p)
 {
-	cap_set_init_eff(p->cap_effective);
-	cap_clear(p->cap_inheritable);
-	cap_set_full(p->cap_permitted);
-	p->securebits = SECUREBITS_DEFAULT;
-	return;
+	struct cred *cred = p->cred;
+
+	cap_set_init_eff(cred->cap_effective);
+	cap_clear(cred->cap_inheritable);
+	cap_set_full(cred->cap_permitted);
+	p->cred->securebits = SECUREBITS_DEFAULT;
 }
 
 int cap_syslog (int type)
diff --git a/security/keys/keyctl.c b/security/keys/keyctl.c
index fcce331eca72..8833b447adef 100644
--- a/security/keys/keyctl.c
+++ b/security/keys/keyctl.c
@@ -889,7 +889,7 @@ long keyctl_instantiate_key(key_serial_t id,
 	/* the appropriate instantiation authorisation key must have been
 	 * assumed before calling this */
 	ret = -EPERM;
-	instkey = current->request_key_auth;
+	instkey = current->cred->request_key_auth;
 	if (!instkey)
 		goto error;
 
@@ -932,8 +932,8 @@ long keyctl_instantiate_key(key_serial_t id,
 	/* discard the assumed authority if it's just been disabled by
 	 * instantiation of the key */
 	if (ret == 0) {
-		key_put(current->request_key_auth);
-		current->request_key_auth = NULL;
+		key_put(current->cred->request_key_auth);
+		current->cred->request_key_auth = NULL;
 	}
 
 error2:
@@ -960,7 +960,7 @@ long keyctl_negate_key(key_serial_t id, unsigned timeout, key_serial_t ringid)
 	/* the appropriate instantiation authorisation key must have been
 	 * assumed before calling this */
 	ret = -EPERM;
-	instkey = current->request_key_auth;
+	instkey = current->cred->request_key_auth;
 	if (!instkey)
 		goto error;
 
@@ -983,8 +983,8 @@ long keyctl_negate_key(key_serial_t id, unsigned timeout, key_serial_t ringid)
 	/* discard the assumed authority if it's just been disabled by
 	 * instantiation of the key */
 	if (ret == 0) {
-		key_put(current->request_key_auth);
-		current->request_key_auth = NULL;
+		key_put(current->cred->request_key_auth);
+		current->cred->request_key_auth = NULL;
 	}
 
 error:
@@ -999,6 +999,7 @@ error:
  */
 long keyctl_set_reqkey_keyring(int reqkey_defl)
 {
+	struct cred *cred = current->cred;
 	int ret;
 
 	switch (reqkey_defl) {
@@ -1018,10 +1019,10 @@ long keyctl_set_reqkey_keyring(int reqkey_defl)
 	case KEY_REQKEY_DEFL_USER_KEYRING:
 	case KEY_REQKEY_DEFL_USER_SESSION_KEYRING:
 	set:
-		current->jit_keyring = reqkey_defl;
+		cred->jit_keyring = reqkey_defl;
 
 	case KEY_REQKEY_DEFL_NO_CHANGE:
-		return current->jit_keyring;
+		return cred->jit_keyring;
 
 	case KEY_REQKEY_DEFL_GROUP_KEYRING:
 	default:
@@ -1086,8 +1087,8 @@ long keyctl_assume_authority(key_serial_t id)
 
 	/* we divest ourselves of authority if given an ID of 0 */
 	if (id == 0) {
-		key_put(current->request_key_auth);
-		current->request_key_auth = NULL;
+		key_put(current->cred->request_key_auth);
+		current->cred->request_key_auth = NULL;
 		ret = 0;
 		goto error;
 	}
@@ -1103,8 +1104,8 @@ long keyctl_assume_authority(key_serial_t id)
 		goto error;
 	}
 
-	key_put(current->request_key_auth);
-	current->request_key_auth = authkey;
+	key_put(current->cred->request_key_auth);
+	current->cred->request_key_auth = authkey;
 	ret = authkey->serial;
 
 error:
diff --git a/security/keys/permission.c b/security/keys/permission.c
index 3b41f9b52537..baf3d5f31e71 100644
--- a/security/keys/permission.c
+++ b/security/keys/permission.c
@@ -22,6 +22,7 @@ int key_task_permission(const key_ref_t key_ref,
 			struct task_struct *context,
 			key_perm_t perm)
 {
+	struct cred *cred = context->cred;
 	struct key *key;
 	key_perm_t kperm;
 	int ret;
@@ -29,7 +30,7 @@ int key_task_permission(const key_ref_t key_ref,
 	key = key_ref_to_ptr(key_ref);
 
 	/* use the second 8-bits of permissions for keys the caller owns */
-	if (key->uid == context->fsuid) {
+	if (key->uid == cred->fsuid) {
 		kperm = key->perm >> 16;
 		goto use_these_perms;
 	}
@@ -37,14 +38,14 @@ int key_task_permission(const key_ref_t key_ref,
 	/* use the third 8-bits of permissions for keys the caller has a group
 	 * membership in common with */
 	if (key->gid != -1 && key->perm & KEY_GRP_ALL) {
-		if (key->gid == context->fsgid) {
+		if (key->gid == cred->fsgid) {
 			kperm = key->perm >> 8;
 			goto use_these_perms;
 		}
 
-		task_lock(context);
-		ret = groups_search(context->group_info, key->gid);
-		task_unlock(context);
+		spin_lock(&cred->lock);
+		ret = groups_search(cred->group_info, key->gid);
+		spin_unlock(&cred->lock);
 
 		if (ret) {
 			kperm = key->perm >> 8;
diff --git a/security/keys/process_keys.c b/security/keys/process_keys.c
index 1c793b7090a7..b0904cdda2e7 100644
--- a/security/keys/process_keys.c
+++ b/security/keys/process_keys.c
@@ -42,7 +42,7 @@ struct key_user root_key_user = {
  */
 int install_user_keyrings(void)
 {
-	struct user_struct *user = current->user;
+	struct user_struct *user = current->cred->user;
 	struct key *uid_keyring, *session_keyring;
 	char buf[20];
 	int ret;
@@ -156,7 +156,7 @@ int install_thread_keyring(void)
 
 	sprintf(buf, "_tid.%u", tsk->pid);
 
-	keyring = keyring_alloc(buf, tsk->uid, tsk->gid, tsk,
+	keyring = keyring_alloc(buf, tsk->cred->uid, tsk->cred->gid, tsk,
 				KEY_ALLOC_QUOTA_OVERRUN, NULL);
 	if (IS_ERR(keyring)) {
 		ret = PTR_ERR(keyring);
@@ -164,8 +164,8 @@ int install_thread_keyring(void)
 	}
 
 	task_lock(tsk);
-	old = tsk->thread_keyring;
-	tsk->thread_keyring = keyring;
+	old = tsk->cred->thread_keyring;
+	tsk->cred->thread_keyring = keyring;
 	task_unlock(tsk);
 
 	ret = 0;
@@ -192,7 +192,7 @@ int install_process_keyring(void)
 	if (!tsk->signal->process_keyring) {
 		sprintf(buf, "_pid.%u", tsk->tgid);
 
-		keyring = keyring_alloc(buf, tsk->uid, tsk->gid, tsk,
+		keyring = keyring_alloc(buf, tsk->cred->uid, tsk->cred->gid, tsk,
 					KEY_ALLOC_QUOTA_OVERRUN, NULL);
 		if (IS_ERR(keyring)) {
 			ret = PTR_ERR(keyring);
@@ -238,7 +238,7 @@ static int install_session_keyring(struct key *keyring)
 		if (tsk->signal->session_keyring)
 			flags = KEY_ALLOC_IN_QUOTA;
 
-		keyring = keyring_alloc(buf, tsk->uid, tsk->gid, tsk,
+		keyring = keyring_alloc(buf, tsk->cred->uid, tsk->cred->gid, tsk,
 					flags, NULL);
 		if (IS_ERR(keyring))
 			return PTR_ERR(keyring);
@@ -292,14 +292,14 @@ int copy_thread_group_keys(struct task_struct *tsk)
  */
 int copy_keys(unsigned long clone_flags, struct task_struct *tsk)
 {
-	key_check(tsk->thread_keyring);
-	key_check(tsk->request_key_auth);
+	key_check(tsk->cred->thread_keyring);
+	key_check(tsk->cred->request_key_auth);
 
 	/* no thread keyring yet */
-	tsk->thread_keyring = NULL;
+	tsk->cred->thread_keyring = NULL;
 
 	/* copy the request_key() authorisation for this thread */
-	key_get(tsk->request_key_auth);
+	key_get(tsk->cred->request_key_auth);
 
 	return 0;
 
@@ -322,8 +322,8 @@ void exit_thread_group_keys(struct signal_struct *tg)
  */
 void exit_keys(struct task_struct *tsk)
 {
-	key_put(tsk->thread_keyring);
-	key_put(tsk->request_key_auth);
+	key_put(tsk->cred->thread_keyring);
+	key_put(tsk->cred->request_key_auth);
 
 } /* end exit_keys() */
 
@@ -337,8 +337,8 @@ int exec_keys(struct task_struct *tsk)
 
 	/* newly exec'd tasks don't get a thread keyring */
 	task_lock(tsk);
-	old = tsk->thread_keyring;
-	tsk->thread_keyring = NULL;
+	old = tsk->cred->thread_keyring;
+	tsk->cred->thread_keyring = NULL;
 	task_unlock(tsk);
 
 	key_put(old);
@@ -373,10 +373,11 @@ int suid_keys(struct task_struct *tsk)
 void key_fsuid_changed(struct task_struct *tsk)
 {
 	/* update the ownership of the thread keyring */
-	if (tsk->thread_keyring) {
-		down_write(&tsk->thread_keyring->sem);
-		tsk->thread_keyring->uid = tsk->fsuid;
-		up_write(&tsk->thread_keyring->sem);
+	BUG_ON(!tsk->cred);
+	if (tsk->cred->thread_keyring) {
+		down_write(&tsk->cred->thread_keyring->sem);
+		tsk->cred->thread_keyring->uid = tsk->cred->fsuid;
+		up_write(&tsk->cred->thread_keyring->sem);
 	}
 
 } /* end key_fsuid_changed() */
@@ -388,10 +389,11 @@ void key_fsuid_changed(struct task_struct *tsk)
 void key_fsgid_changed(struct task_struct *tsk)
 {
 	/* update the ownership of the thread keyring */
-	if (tsk->thread_keyring) {
-		down_write(&tsk->thread_keyring->sem);
-		tsk->thread_keyring->gid = tsk->fsgid;
-		up_write(&tsk->thread_keyring->sem);
+	BUG_ON(!tsk->cred);
+	if (tsk->cred->thread_keyring) {
+		down_write(&tsk->cred->thread_keyring->sem);
+		tsk->cred->thread_keyring->gid = tsk->cred->fsgid;
+		up_write(&tsk->cred->thread_keyring->sem);
 	}
 
 } /* end key_fsgid_changed() */
@@ -426,9 +428,9 @@ key_ref_t search_process_keyrings(struct key_type *type,
 	err = ERR_PTR(-EAGAIN);
 
 	/* search the thread keyring first */
-	if (context->thread_keyring) {
+	if (context->cred->thread_keyring) {
 		key_ref = keyring_search_aux(
-			make_key_ref(context->thread_keyring, 1),
+			make_key_ref(context->cred->thread_keyring, 1),
 			context, type, description, match);
 		if (!IS_ERR(key_ref))
 			goto found;
@@ -493,9 +495,9 @@ key_ref_t search_process_keyrings(struct key_type *type,
 		}
 	}
 	/* or search the user-session keyring */
-	else if (context->user->session_keyring) {
+	else if (context->cred->user->session_keyring) {
 		key_ref = keyring_search_aux(
-			make_key_ref(context->user->session_keyring, 1),
+			make_key_ref(context->cred->user->session_keyring, 1),
 			context, type, description, match);
 		if (!IS_ERR(key_ref))
 			goto found;
@@ -517,20 +519,20 @@ key_ref_t search_process_keyrings(struct key_type *type,
 	 * search the keyrings of the process mentioned there
 	 * - we don't permit access to request_key auth keys via this method
 	 */
-	if (context->request_key_auth &&
+	if (context->cred->request_key_auth &&
 	    context == current &&
 	    type != &key_type_request_key_auth
 	    ) {
 		/* defend against the auth key being revoked */
-		down_read(&context->request_key_auth->sem);
+		down_read(&context->cred->request_key_auth->sem);
 
-		if (key_validate(context->request_key_auth) == 0) {
-			rka = context->request_key_auth->payload.data;
+		if (key_validate(context->cred->request_key_auth) == 0) {
+			rka = context->cred->request_key_auth->payload.data;
 
 			key_ref = search_process_keyrings(type, description,
 							  match, rka->context);
 
-			up_read(&context->request_key_auth->sem);
+			up_read(&context->cred->request_key_auth->sem);
 
 			if (!IS_ERR(key_ref))
 				goto found;
@@ -547,7 +549,7 @@ key_ref_t search_process_keyrings(struct key_type *type,
 				break;
 			}
 		} else {
-			up_read(&context->request_key_auth->sem);
+			up_read(&context->cred->request_key_auth->sem);
 		}
 	}
 
@@ -580,15 +582,16 @@ key_ref_t lookup_user_key(key_serial_t id, int create, int partial,
 {
 	struct request_key_auth *rka;
 	struct task_struct *t = current;
-	key_ref_t key_ref, skey_ref;
+	struct cred *cred = t->cred;
 	struct key *key;
+	key_ref_t key_ref, skey_ref;
 	int ret;
 
 	key_ref = ERR_PTR(-ENOKEY);
 
 	switch (id) {
 	case KEY_SPEC_THREAD_KEYRING:
-		if (!t->thread_keyring) {
+		if (!cred->thread_keyring) {
 			if (!create)
 				goto error;
 
@@ -599,7 +602,7 @@ key_ref_t lookup_user_key(key_serial_t id, int create, int partial,
 			}
 		}
 
-		key = t->thread_keyring;
+		key = cred->thread_keyring;
 		atomic_inc(&key->usage);
 		key_ref = make_key_ref(key, 1);
 		break;
@@ -628,7 +631,8 @@ key_ref_t lookup_user_key(key_serial_t id, int create, int partial,
 			ret = install_user_keyrings();
 			if (ret < 0)
 				goto error;
-			ret = install_session_keyring(t->user->session_keyring);
+			ret = install_session_keyring(
+				cred->user->session_keyring);
 			if (ret < 0)
 				goto error;
 		}
@@ -641,25 +645,25 @@ key_ref_t lookup_user_key(key_serial_t id, int create, int partial,
 		break;
 
 	case KEY_SPEC_USER_KEYRING:
-		if (!t->user->uid_keyring) {
+		if (!cred->user->uid_keyring) {
 			ret = install_user_keyrings();
 			if (ret < 0)
 				goto error;
 		}
 
-		key = t->user->uid_keyring;
+		key = cred->user->uid_keyring;
 		atomic_inc(&key->usage);
 		key_ref = make_key_ref(key, 1);
 		break;
 
 	case KEY_SPEC_USER_SESSION_KEYRING:
-		if (!t->user->session_keyring) {
+		if (!cred->user->session_keyring) {
 			ret = install_user_keyrings();
 			if (ret < 0)
 				goto error;
 		}
 
-		key = t->user->session_keyring;
+		key = cred->user->session_keyring;
 		atomic_inc(&key->usage);
 		key_ref = make_key_ref(key, 1);
 		break;
@@ -670,7 +674,7 @@ key_ref_t lookup_user_key(key_serial_t id, int create, int partial,
 		goto error;
 
 	case KEY_SPEC_REQKEY_AUTH_KEY:
-		key = t->request_key_auth;
+		key = cred->request_key_auth;
 		if (!key)
 			goto error;
 
@@ -679,19 +683,19 @@ key_ref_t lookup_user_key(key_serial_t id, int create, int partial,
 		break;
 
 	case KEY_SPEC_REQUESTOR_KEYRING:
-		if (!t->request_key_auth)
+		if (!cred->request_key_auth)
 			goto error;
 
-		down_read(&t->request_key_auth->sem);
-		if (t->request_key_auth->flags & KEY_FLAG_REVOKED) {
+		down_read(&cred->request_key_auth->sem);
+		if (cred->request_key_auth->flags & KEY_FLAG_REVOKED) {
 			key_ref = ERR_PTR(-EKEYREVOKED);
 			key = NULL;
 		} else {
-			rka = t->request_key_auth->payload.data;
+			rka = cred->request_key_auth->payload.data;
 			key = rka->dest_keyring;
 			atomic_inc(&key->usage);
 		}
-		up_read(&t->request_key_auth->sem);
+		up_read(&cred->request_key_auth->sem);
 		if (!key)
 			goto error;
 		key_ref = make_key_ref(key, 1);
@@ -791,7 +795,7 @@ long join_session_keyring(const char *name)
 	keyring = find_keyring_by_name(name, false);
 	if (PTR_ERR(keyring) == -ENOKEY) {
 		/* not found - try and create a new one */
-		keyring = keyring_alloc(name, tsk->uid, tsk->gid, tsk,
+		keyring = keyring_alloc(name, tsk->cred->uid, tsk->cred->gid, tsk,
 					KEY_ALLOC_IN_QUOTA, NULL);
 		if (IS_ERR(keyring)) {
 			ret = PTR_ERR(keyring);
diff --git a/security/keys/request_key.c b/security/keys/request_key.c
index 8e9d93b4a402..3e9b9eb1dd28 100644
--- a/security/keys/request_key.c
+++ b/security/keys/request_key.c
@@ -104,7 +104,8 @@ static int call_sbin_request_key(struct key_construction *cons,
 
 	/* we specify the process's default keyrings */
 	sprintf(keyring_str[0], "%d",
-		tsk->thread_keyring ? tsk->thread_keyring->serial : 0);
+		tsk->cred->thread_keyring ?
+		tsk->cred->thread_keyring->serial : 0);
 
 	prkey = 0;
 	if (tsk->signal->process_keyring)
@@ -117,7 +118,7 @@ static int call_sbin_request_key(struct key_construction *cons,
 		sskey = rcu_dereference(tsk->signal->session_keyring)->serial;
 		rcu_read_unlock();
 	} else {
-		sskey = tsk->user->session_keyring->serial;
+		sskey = tsk->cred->user->session_keyring->serial;
 	}
 
 	sprintf(keyring_str[2], "%d", sskey);
@@ -232,11 +233,11 @@ static void construct_get_dest_keyring(struct key **_dest_keyring)
 	} else {
 		/* use a default keyring; falling through the cases until we
 		 * find one that we actually have */
-		switch (tsk->jit_keyring) {
+		switch (tsk->cred->jit_keyring) {
 		case KEY_REQKEY_DEFL_DEFAULT:
 		case KEY_REQKEY_DEFL_REQUESTOR_KEYRING:
-			if (tsk->request_key_auth) {
-				authkey = tsk->request_key_auth;
+			if (tsk->cred->request_key_auth) {
+				authkey = tsk->cred->request_key_auth;
 				down_read(&authkey->sem);
 				rka = authkey->payload.data;
 				if (!test_bit(KEY_FLAG_REVOKED,
@@ -249,7 +250,7 @@ static void construct_get_dest_keyring(struct key **_dest_keyring)
 			}
 
 		case KEY_REQKEY_DEFL_THREAD_KEYRING:
-			dest_keyring = key_get(tsk->thread_keyring);
+			dest_keyring = key_get(tsk->cred->thread_keyring);
 			if (dest_keyring)
 				break;
 
@@ -268,11 +269,12 @@ static void construct_get_dest_keyring(struct key **_dest_keyring)
 				break;
 
 		case KEY_REQKEY_DEFL_USER_SESSION_KEYRING:
-			dest_keyring = key_get(tsk->user->session_keyring);
+			dest_keyring =
+				key_get(tsk->cred->user->session_keyring);
 			break;
 
 		case KEY_REQKEY_DEFL_USER_KEYRING:
-			dest_keyring = key_get(tsk->user->uid_keyring);
+			dest_keyring = key_get(tsk->cred->user->uid_keyring);
 			break;
 
 		case KEY_REQKEY_DEFL_GROUP_KEYRING:
diff --git a/security/keys/request_key_auth.c b/security/keys/request_key_auth.c
index 1762d44711d5..2125579d5d73 100644
--- a/security/keys/request_key_auth.c
+++ b/security/keys/request_key_auth.c
@@ -164,22 +164,22 @@ struct key *request_key_auth_new(struct key *target, const void *callout_info,
 
 	/* see if the calling process is already servicing the key request of
 	 * another process */
-	if (current->request_key_auth) {
+	if (current->cred->request_key_auth) {
 		/* it is - use that instantiation context here too */
-		down_read(&current->request_key_auth->sem);
+		down_read(&current->cred->request_key_auth->sem);
 
 		/* if the auth key has been revoked, then the key we're
 		 * servicing is already instantiated */
 		if (test_bit(KEY_FLAG_REVOKED,
-			     &current->request_key_auth->flags))
+			     &current->cred->request_key_auth->flags))
 			goto auth_key_revoked;
 
-		irka = current->request_key_auth->payload.data;
+		irka = current->cred->request_key_auth->payload.data;
 		rka->context = irka->context;
 		rka->pid = irka->pid;
 		get_task_struct(rka->context);
 
-		up_read(&current->request_key_auth->sem);
+		up_read(&current->cred->request_key_auth->sem);
 	}
 	else {
 		/* it isn't - use this process as the context */
@@ -214,7 +214,7 @@ struct key *request_key_auth_new(struct key *target, const void *callout_info,
 	return authkey;
 
 auth_key_revoked:
-	up_read(&current->request_key_auth->sem);
+	up_read(&current->cred->request_key_auth->sem);
 	kfree(rka->callout_info);
 	kfree(rka);
 	kleave("= -EKEYREVOKED");
diff --git a/security/selinux/exports.c b/security/selinux/exports.c
index 64af2d3409ef..cf02490cd1eb 100644
--- a/security/selinux/exports.c
+++ b/security/selinux/exports.c
@@ -39,7 +39,7 @@ EXPORT_SYMBOL_GPL(selinux_string_to_sid);
 int selinux_secmark_relabel_packet_permission(u32 sid)
 {
 	if (selinux_enabled) {
-		struct task_security_struct *tsec = current->security;
+		struct task_security_struct *tsec = current->cred->security;
 
 		return avc_has_perm(tsec->sid, sid, SECCLASS_PACKET,
 				    PACKET__RELABELTO, NULL);
diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c
index 9f6da154cc82..328308f2882a 100644
--- a/security/selinux/hooks.c
+++ b/security/selinux/hooks.c
@@ -167,21 +167,21 @@ static int task_alloc_security(struct task_struct *task)
 		return -ENOMEM;
 
 	tsec->osid = tsec->sid = SECINITSID_UNLABELED;
-	task->security = tsec;
+	task->cred->security = tsec;
 
 	return 0;
 }
 
 static void task_free_security(struct task_struct *task)
 {
-	struct task_security_struct *tsec = task->security;
-	task->security = NULL;
+	struct task_security_struct *tsec = task->cred->security;
+	task->cred->security = NULL;
 	kfree(tsec);
 }
 
 static int inode_alloc_security(struct inode *inode)
 {
-	struct task_security_struct *tsec = current->security;
+	struct task_security_struct *tsec = current->cred->security;
 	struct inode_security_struct *isec;
 
 	isec = kmem_cache_zalloc(sel_inode_cache, GFP_NOFS);
@@ -215,7 +215,7 @@ static void inode_free_security(struct inode *inode)
 
 static int file_alloc_security(struct file *file)
 {
-	struct task_security_struct *tsec = current->security;
+	struct task_security_struct *tsec = current->cred->security;
 	struct file_security_struct *fsec;
 
 	fsec = kzalloc(sizeof(struct file_security_struct), GFP_KERNEL);
@@ -554,7 +554,7 @@ static int selinux_set_mnt_opts(struct super_block *sb,
 				struct security_mnt_opts *opts)
 {
 	int rc = 0, i;
-	struct task_security_struct *tsec = current->security;
+	struct task_security_struct *tsec = current->cred->security;
 	struct superblock_security_struct *sbsec = sb->s_security;
 	const char *name = sb->s_type->name;
 	struct inode *inode = sbsec->sb->s_root->d_inode;
@@ -1353,8 +1353,8 @@ static int task_has_perm(struct task_struct *tsk1,
 {
 	struct task_security_struct *tsec1, *tsec2;
 
-	tsec1 = tsk1->security;
-	tsec2 = tsk2->security;
+	tsec1 = tsk1->cred->security;
+	tsec2 = tsk2->cred->security;
 	return avc_has_perm(tsec1->sid, tsec2->sid,
 			    SECCLASS_PROCESS, perms, NULL);
 }
@@ -1374,7 +1374,7 @@ static int task_has_capability(struct task_struct *tsk,
 	u32 av = CAP_TO_MASK(cap);
 	int rc;
 
-	tsec = tsk->security;
+	tsec = tsk->cred->security;
 
 	AVC_AUDIT_DATA_INIT(&ad, CAP);
 	ad.tsk = tsk;
@@ -1405,7 +1405,7 @@ static int task_has_system(struct task_struct *tsk,
 {
 	struct task_security_struct *tsec;
 
-	tsec = tsk->security;
+	tsec = tsk->cred->security;
 
 	return avc_has_perm(tsec->sid, SECINITSID_KERNEL,
 			    SECCLASS_SYSTEM, perms, NULL);
@@ -1426,7 +1426,7 @@ static int inode_has_perm(struct task_struct *tsk,
 	if (unlikely(IS_PRIVATE(inode)))
 		return 0;
 
-	tsec = tsk->security;
+	tsec = tsk->cred->security;
 	isec = inode->i_security;
 
 	if (!adp) {
@@ -1466,7 +1466,7 @@ static int file_has_perm(struct task_struct *tsk,
 				struct file *file,
 				u32 av)
 {
-	struct task_security_struct *tsec = tsk->security;
+	struct task_security_struct *tsec = tsk->cred->security;
 	struct file_security_struct *fsec = file->f_security;
 	struct inode *inode = file->f_path.dentry->d_inode;
 	struct avc_audit_data ad;
@@ -1503,7 +1503,7 @@ static int may_create(struct inode *dir,
 	struct avc_audit_data ad;
 	int rc;
 
-	tsec = current->security;
+	tsec = current->cred->security;
 	dsec = dir->i_security;
 	sbsec = dir->i_sb->s_security;
 
@@ -1540,7 +1540,7 @@ static int may_create_key(u32 ksid,
 {
 	struct task_security_struct *tsec;
 
-	tsec = ctx->security;
+	tsec = ctx->cred->security;
 
 	return avc_has_perm(tsec->sid, ksid, SECCLASS_KEY, KEY__CREATE, NULL);
 }
@@ -1561,7 +1561,7 @@ static int may_link(struct inode *dir,
 	u32 av;
 	int rc;
 
-	tsec = current->security;
+	tsec = current->cred->security;
 	dsec = dir->i_security;
 	isec = dentry->d_inode->i_security;
 
@@ -1606,7 +1606,7 @@ static inline int may_rename(struct inode *old_dir,
 	int old_is_dir, new_is_dir;
 	int rc;
 
-	tsec = current->security;
+	tsec = current->cred->security;
 	old_dsec = old_dir->i_security;
 	old_isec = old_dentry->d_inode->i_security;
 	old_is_dir = S_ISDIR(old_dentry->d_inode->i_mode);
@@ -1659,7 +1659,7 @@ static int superblock_has_perm(struct task_struct *tsk,
 	struct task_security_struct *tsec;
 	struct superblock_security_struct *sbsec;
 
-	tsec = tsk->security;
+	tsec = tsk->cred->security;
 	sbsec = sb->s_security;
 	return avc_has_perm(tsec->sid, sbsec->sid, SECCLASS_FILESYSTEM,
 			    perms, ad);
@@ -1758,8 +1758,8 @@ static int selinux_ptrace_may_access(struct task_struct *child,
 		return rc;
 
 	if (mode == PTRACE_MODE_READ) {
-		struct task_security_struct *tsec = current->security;
-		struct task_security_struct *csec = child->security;
+		struct task_security_struct *tsec = current->cred->security;
+		struct task_security_struct *csec = child->cred->security;
 		return avc_has_perm(tsec->sid, csec->sid,
 				    SECCLASS_FILE, FILE__READ, NULL);
 	}
@@ -1874,7 +1874,7 @@ static int selinux_sysctl(ctl_table *table, int op)
 	if (rc)
 		return rc;
 
-	tsec = current->security;
+	tsec = current->cred->security;
 
 	rc = selinux_sysctl_get_sid(table, (op == 0001) ?
 				    SECCLASS_DIR : SECCLASS_FILE, &tsid);
@@ -2025,7 +2025,7 @@ static int selinux_bprm_set_security(struct linux_binprm *bprm)
 	if (bsec->set)
 		return 0;
 
-	tsec = current->security;
+	tsec = current->cred->security;
 	isec = inode->i_security;
 
 	/* Default to the current task SID. */
@@ -2090,7 +2090,7 @@ static int selinux_bprm_check_security(struct linux_binprm *bprm)
 
 static int selinux_bprm_secureexec(struct linux_binprm *bprm)
 {
-	struct task_security_struct *tsec = current->security;
+	struct task_security_struct *tsec = current->cred->security;
 	int atsecure = 0;
 
 	if (tsec->osid != tsec->sid) {
@@ -2214,7 +2214,7 @@ static void selinux_bprm_apply_creds(struct linux_binprm *bprm, int unsafe)
 
 	secondary_ops->bprm_apply_creds(bprm, unsafe);
 
-	tsec = current->security;
+	tsec = current->cred->security;
 
 	bsec = bprm->security;
 	sid = bsec->sid;
@@ -2243,7 +2243,7 @@ static void selinux_bprm_apply_creds(struct linux_binprm *bprm, int unsafe)
 			rcu_read_lock();
 			tracer = tracehook_tracer_task(current);
 			if (likely(tracer != NULL)) {
-				sec = tracer->security;
+				sec = tracer->cred->security;
 				ptsid = sec->sid;
 			}
 			rcu_read_unlock();
@@ -2274,7 +2274,7 @@ static void selinux_bprm_post_apply_creds(struct linux_binprm *bprm)
 	int rc, i;
 	unsigned long flags;
 
-	tsec = current->security;
+	tsec = current->cred->security;
 	bsec = bprm->security;
 
 	if (bsec->unsafe) {
@@ -2521,7 +2521,7 @@ static int selinux_inode_init_security(struct inode *inode, struct inode *dir,
 	int rc;
 	char *namep = NULL, *context;
 
-	tsec = current->security;
+	tsec = current->cred->security;
 	dsec = dir->i_security;
 	sbsec = dir->i_sb->s_security;
 
@@ -2706,7 +2706,7 @@ static int selinux_inode_setotherxattr(struct dentry *dentry, const char *name)
 static int selinux_inode_setxattr(struct dentry *dentry, const char *name,
 				  const void *value, size_t size, int flags)
 {
-	struct task_security_struct *tsec = current->security;
+	struct task_security_struct *tsec = current->cred->security;
 	struct inode *inode = dentry->d_inode;
 	struct inode_security_struct *isec = inode->i_security;
 	struct superblock_security_struct *sbsec;
@@ -2918,7 +2918,7 @@ static int selinux_revalidate_file_permission(struct file *file, int mask)
 static int selinux_file_permission(struct file *file, int mask)
 {
 	struct inode *inode = file->f_path.dentry->d_inode;
-	struct task_security_struct *tsec = current->security;
+	struct task_security_struct *tsec = current->cred->security;
 	struct file_security_struct *fsec = file->f_security;
 	struct inode_security_struct *isec = inode->i_security;
 
@@ -2995,7 +2995,8 @@ static int selinux_file_mmap(struct file *file, unsigned long reqprot,
 			     unsigned long addr, unsigned long addr_only)
 {
 	int rc = 0;
-	u32 sid = ((struct task_security_struct *)(current->security))->sid;
+	u32 sid = ((struct task_security_struct *)
+		   (current->cred->security))->sid;
 
 	if (addr < mmap_min_addr)
 		rc = avc_has_perm(sid, sid, SECCLASS_MEMPROTECT,
@@ -3107,7 +3108,7 @@ static int selinux_file_set_fowner(struct file *file)
 	struct task_security_struct *tsec;
 	struct file_security_struct *fsec;
 
-	tsec = current->security;
+	tsec = current->cred->security;
 	fsec = file->f_security;
 	fsec->fown_sid = tsec->sid;
 
@@ -3125,7 +3126,7 @@ static int selinux_file_send_sigiotask(struct task_struct *tsk,
 	/* struct fown_struct is never outside the context of a struct file */
 	file = container_of(fown, struct file, f_owner);
 
-	tsec = tsk->security;
+	tsec = tsk->cred->security;
 	fsec = file->f_security;
 
 	if (!signum)
@@ -3188,12 +3189,12 @@ static int selinux_task_alloc_security(struct task_struct *tsk)
 	struct task_security_struct *tsec1, *tsec2;
 	int rc;
 
-	tsec1 = current->security;
+	tsec1 = current->cred->security;
 
 	rc = task_alloc_security(tsk);
 	if (rc)
 		return rc;
-	tsec2 = tsk->security;
+	tsec2 = tsk->cred->security;
 
 	tsec2->osid = tsec1->osid;
 	tsec2->sid = tsec1->sid;
@@ -3251,7 +3252,7 @@ static int selinux_task_getsid(struct task_struct *p)
 
 static void selinux_task_getsecid(struct task_struct *p, u32 *secid)
 {
-	struct task_security_struct *tsec = p->security;
+	struct task_security_struct *tsec = p->cred->security;
 	*secid = tsec->sid;
 }
 
@@ -3343,7 +3344,7 @@ static int selinux_task_kill(struct task_struct *p, struct siginfo *info,
 		perm = PROCESS__SIGNULL; /* null signal; existence test */
 	else
 		perm = signal_to_av(sig);
-	tsec = p->security;
+	tsec = p->cred->security;
 	if (secid)
 		rc = avc_has_perm(secid, tsec->sid, SECCLASS_PROCESS, perm, NULL);
 	else
@@ -3375,7 +3376,7 @@ static void selinux_task_reparent_to_init(struct task_struct *p)
 
 	secondary_ops->task_reparent_to_init(p);
 
-	tsec = p->security;
+	tsec = p->cred->security;
 	tsec->osid = tsec->sid;
 	tsec->sid = SECINITSID_KERNEL;
 	return;
@@ -3384,7 +3385,7 @@ static void selinux_task_reparent_to_init(struct task_struct *p)
 static void selinux_task_to_inode(struct task_struct *p,
 				  struct inode *inode)
 {
-	struct task_security_struct *tsec = p->security;
+	struct task_security_struct *tsec = p->cred->security;
 	struct inode_security_struct *isec = inode->i_security;
 
 	isec->sid = tsec->sid;
@@ -3632,7 +3633,7 @@ static int socket_has_perm(struct task_struct *task, struct socket *sock,
 	struct avc_audit_data ad;
 	int err = 0;
 
-	tsec = task->security;
+	tsec = task->cred->security;
 	isec = SOCK_INODE(sock)->i_security;
 
 	if (isec->sid == SECINITSID_KERNEL)
@@ -3656,7 +3657,7 @@ static int selinux_socket_create(int family, int type,
 	if (kern)
 		goto out;
 
-	tsec = current->security;
+	tsec = current->cred->security;
 	newsid = tsec->sockcreate_sid ? : tsec->sid;
 	err = avc_has_perm(tsec->sid, newsid,
 			   socket_type_to_security_class(family, type,
@@ -3677,7 +3678,7 @@ static int selinux_socket_post_create(struct socket *sock, int family,
 
 	isec = SOCK_INODE(sock)->i_security;
 
-	tsec = current->security;
+	tsec = current->cred->security;
 	newsid = tsec->sockcreate_sid ? : tsec->sid;
 	isec->sclass = socket_type_to_security_class(family, type, protocol);
 	isec->sid = kern ? SECINITSID_KERNEL : newsid;
@@ -3723,7 +3724,7 @@ static int selinux_socket_bind(struct socket *sock, struct sockaddr *address, in
 		struct sock *sk = sock->sk;
 		u32 sid, node_perm;
 
-		tsec = current->security;
+		tsec = current->cred->security;
 		isec = SOCK_INODE(sock)->i_security;
 
 		if (family == PF_INET) {
@@ -4764,7 +4765,7 @@ static int ipc_alloc_security(struct task_struct *task,
 			      struct kern_ipc_perm *perm,
 			      u16 sclass)
 {
-	struct task_security_struct *tsec = task->security;
+	struct task_security_struct *tsec = task->cred->security;
 	struct ipc_security_struct *isec;
 
 	isec = kzalloc(sizeof(struct ipc_security_struct), GFP_KERNEL);
@@ -4814,7 +4815,7 @@ static int ipc_has_perm(struct kern_ipc_perm *ipc_perms,
 	struct ipc_security_struct *isec;
 	struct avc_audit_data ad;
 
-	tsec = current->security;
+	tsec = current->cred->security;
 	isec = ipc_perms->security;
 
 	AVC_AUDIT_DATA_INIT(&ad, IPC);
@@ -4845,7 +4846,7 @@ static int selinux_msg_queue_alloc_security(struct msg_queue *msq)
 	if (rc)
 		return rc;
 
-	tsec = current->security;
+	tsec = current->cred->security;
 	isec = msq->q_perm.security;
 
 	AVC_AUDIT_DATA_INIT(&ad, IPC);
@@ -4871,7 +4872,7 @@ static int selinux_msg_queue_associate(struct msg_queue *msq, int msqflg)
 	struct ipc_security_struct *isec;
 	struct avc_audit_data ad;
 
-	tsec = current->security;
+	tsec = current->cred->security;
 	isec = msq->q_perm.security;
 
 	AVC_AUDIT_DATA_INIT(&ad, IPC);
@@ -4917,7 +4918,7 @@ static int selinux_msg_queue_msgsnd(struct msg_queue *msq, struct msg_msg *msg,
 	struct avc_audit_data ad;
 	int rc;
 
-	tsec = current->security;
+	tsec = current->cred->security;
 	isec = msq->q_perm.security;
 	msec = msg->security;
 
@@ -4965,7 +4966,7 @@ static int selinux_msg_queue_msgrcv(struct msg_queue *msq, struct msg_msg *msg,
 	struct avc_audit_data ad;
 	int rc;
 
-	tsec = target->security;
+	tsec = target->cred->security;
 	isec = msq->q_perm.security;
 	msec = msg->security;
 
@@ -4992,7 +4993,7 @@ static int selinux_shm_alloc_security(struct shmid_kernel *shp)
 	if (rc)
 		return rc;
 
-	tsec = current->security;
+	tsec = current->cred->security;
 	isec = shp->shm_perm.security;
 
 	AVC_AUDIT_DATA_INIT(&ad, IPC);
@@ -5018,7 +5019,7 @@ static int selinux_shm_associate(struct shmid_kernel *shp, int shmflg)
 	struct ipc_security_struct *isec;
 	struct avc_audit_data ad;
 
-	tsec = current->security;
+	tsec = current->cred->security;
 	isec = shp->shm_perm.security;
 
 	AVC_AUDIT_DATA_INIT(&ad, IPC);
@@ -5091,7 +5092,7 @@ static int selinux_sem_alloc_security(struct sem_array *sma)
 	if (rc)
 		return rc;
 
-	tsec = current->security;
+	tsec = current->cred->security;
 	isec = sma->sem_perm.security;
 
 	AVC_AUDIT_DATA_INIT(&ad, IPC);
@@ -5117,7 +5118,7 @@ static int selinux_sem_associate(struct sem_array *sma, int semflg)
 	struct ipc_security_struct *isec;
 	struct avc_audit_data ad;
 
-	tsec = current->security;
+	tsec = current->cred->security;
 	isec = sma->sem_perm.security;
 
 	AVC_AUDIT_DATA_INIT(&ad, IPC);
@@ -5224,7 +5225,7 @@ static int selinux_getprocattr(struct task_struct *p,
 			return error;
 	}
 
-	tsec = p->security;
+	tsec = p->cred->security;
 
 	if (!strcmp(name, "current"))
 		sid = tsec->sid;
@@ -5308,7 +5309,7 @@ static int selinux_setprocattr(struct task_struct *p,
 	   operation.  See selinux_bprm_set_security for the execve
 	   checks and may_create for the file creation checks. The
 	   operation will then fail if the context is not permitted. */
-	tsec = p->security;
+	tsec = p->cred->security;
 	if (!strcmp(name, "exec"))
 		tsec->exec_sid = sid;
 	else if (!strcmp(name, "fscreate"))
@@ -5361,7 +5362,8 @@ boundary_ok:
 		rcu_read_lock();
 		tracer = tracehook_tracer_task(p);
 		if (tracer != NULL) {
-			struct task_security_struct *ptsec = tracer->security;
+			struct task_security_struct *ptsec =
+				tracer->cred->security;
 			u32 ptsid = ptsec->sid;
 			rcu_read_unlock();
 			error = avc_has_perm_noaudit(ptsid, sid,
@@ -5405,7 +5407,7 @@ static void selinux_release_secctx(char *secdata, u32 seclen)
 static int selinux_key_alloc(struct key *k, struct task_struct *tsk,
 			     unsigned long flags)
 {
-	struct task_security_struct *tsec = tsk->security;
+	struct task_security_struct *tsec = tsk->cred->security;
 	struct key_security_struct *ksec;
 
 	ksec = kzalloc(sizeof(struct key_security_struct), GFP_KERNEL);
@@ -5439,7 +5441,7 @@ static int selinux_key_permission(key_ref_t key_ref,
 
 	key = key_ref_to_ptr(key_ref);
 
-	tsec = ctx->security;
+	tsec = ctx->cred->security;
 	ksec = key->security;
 
 	/* if no specific permissions are requested, we skip the
@@ -5683,7 +5685,7 @@ static __init int selinux_init(void)
 	/* Set the security state for the initial task. */
 	if (task_alloc_security(current))
 		panic("SELinux:  Failed to initialize initial task.\n");
-	tsec = current->security;
+	tsec = current->cred->security;
 	tsec->osid = tsec->sid = SECINITSID_KERNEL;
 
 	sel_inode_cache = kmem_cache_create("selinux_inode_security",
diff --git a/security/selinux/selinuxfs.c b/security/selinux/selinuxfs.c
index 69c9dccc8cf0..10715d1330b9 100644
--- a/security/selinux/selinuxfs.c
+++ b/security/selinux/selinuxfs.c
@@ -97,7 +97,7 @@ static int task_has_security(struct task_struct *tsk,
 {
 	struct task_security_struct *tsec;
 
-	tsec = tsk->security;
+	tsec = tsk->cred->security;
 	if (!tsec)
 		return -EACCES;
 
diff --git a/security/selinux/xfrm.c b/security/selinux/xfrm.c
index 8f17f542a116..d7db76617b0e 100644
--- a/security/selinux/xfrm.c
+++ b/security/selinux/xfrm.c
@@ -197,7 +197,7 @@ static int selinux_xfrm_sec_ctx_alloc(struct xfrm_sec_ctx **ctxp,
 	struct xfrm_user_sec_ctx *uctx, u32 sid)
 {
 	int rc = 0;
-	struct task_security_struct *tsec = current->security;
+	struct task_security_struct *tsec = current->cred->security;
 	struct xfrm_sec_ctx *ctx = NULL;
 	char *ctx_str = NULL;
 	u32 str_len;
@@ -333,7 +333,7 @@ void selinux_xfrm_policy_free(struct xfrm_sec_ctx *ctx)
  */
 int selinux_xfrm_policy_delete(struct xfrm_sec_ctx *ctx)
 {
-	struct task_security_struct *tsec = current->security;
+	struct task_security_struct *tsec = current->cred->security;
 	int rc = 0;
 
 	if (ctx) {
@@ -378,7 +378,7 @@ void selinux_xfrm_state_free(struct xfrm_state *x)
   */
 int selinux_xfrm_state_delete(struct xfrm_state *x)
 {
-	struct task_security_struct *tsec = current->security;
+	struct task_security_struct *tsec = current->cred->security;
 	struct xfrm_sec_ctx *ctx = x->security;
 	int rc = 0;
 
diff --git a/security/smack/smack_access.c b/security/smack/smack_access.c
index 79ff21ed4c3b..b6dd4fc0fb0b 100644
--- a/security/smack/smack_access.c
+++ b/security/smack/smack_access.c
@@ -164,7 +164,7 @@ int smk_curacc(char *obj_label, u32 mode)
 {
 	int rc;
 
-	rc = smk_access(current->security, obj_label, mode);
+	rc = smk_access(current->cred->security, obj_label, mode);
 	if (rc == 0)
 		return 0;
 
@@ -173,7 +173,7 @@ int smk_curacc(char *obj_label, u32 mode)
 	 * only one that gets privilege and current does not
 	 * have that label.
 	 */
-	if (smack_onlycap != NULL && smack_onlycap != current->security)
+	if (smack_onlycap != NULL && smack_onlycap != current->cred->security)
 		return rc;
 
 	if (capable(CAP_MAC_OVERRIDE))
diff --git a/security/smack/smack_lsm.c b/security/smack/smack_lsm.c
index 6e2dc0bab70d..791da238d049 100644
--- a/security/smack/smack_lsm.c
+++ b/security/smack/smack_lsm.c
@@ -102,7 +102,8 @@ static int smack_ptrace_may_access(struct task_struct *ctp, unsigned int mode)
 	if (rc != 0)
 		return rc;
 
-	rc = smk_access(current->security, ctp->security, MAY_READWRITE);
+	rc = smk_access(current->cred->security, ctp->cred->security,
+			MAY_READWRITE);
 	if (rc != 0 && capable(CAP_MAC_OVERRIDE))
 		return 0;
 	return rc;
@@ -124,7 +125,8 @@ static int smack_ptrace_traceme(struct task_struct *ptp)
 	if (rc != 0)
 		return rc;
 
-	rc = smk_access(ptp->security, current->security, MAY_READWRITE);
+	rc = smk_access(ptp->cred->security, current->cred->security,
+			MAY_READWRITE);
 	if (rc != 0 && has_capability(ptp, CAP_MAC_OVERRIDE))
 		return 0;
 	return rc;
@@ -141,7 +143,7 @@ static int smack_ptrace_traceme(struct task_struct *ptp)
 static int smack_syslog(int type)
 {
 	int rc;
-	char *sp = current->security;
+	char *sp = current->cred->security;
 
 	rc = cap_syslog(type);
 	if (rc != 0)
@@ -373,7 +375,7 @@ static int smack_sb_umount(struct vfsmount *mnt, int flags)
  */
 static int smack_inode_alloc_security(struct inode *inode)
 {
-	inode->i_security = new_inode_smack(current->security);
+	inode->i_security = new_inode_smack(current->cred->security);
 	if (inode->i_security == NULL)
 		return -ENOMEM;
 	return 0;
@@ -818,7 +820,7 @@ static int smack_file_permission(struct file *file, int mask)
  */
 static int smack_file_alloc_security(struct file *file)
 {
-	file->f_security = current->security;
+	file->f_security = current->cred->security;
 	return 0;
 }
 
@@ -916,7 +918,7 @@ static int smack_file_fcntl(struct file *file, unsigned int cmd,
  */
 static int smack_file_set_fowner(struct file *file)
 {
-	file->f_security = current->security;
+	file->f_security = current->cred->security;
 	return 0;
 }
 
@@ -941,7 +943,7 @@ static int smack_file_send_sigiotask(struct task_struct *tsk,
 	 * struct fown_struct is never outside the context of a struct file
 	 */
 	file = container_of(fown, struct file, f_owner);
-	rc = smk_access(file->f_security, tsk->security, MAY_WRITE);
+	rc = smk_access(file->f_security, tsk->cred->security, MAY_WRITE);
 	if (rc != 0 && has_capability(tsk, CAP_MAC_OVERRIDE))
 		return 0;
 	return rc;
@@ -984,7 +986,7 @@ static int smack_file_receive(struct file *file)
  */
 static int smack_task_alloc_security(struct task_struct *tsk)
 {
-	tsk->security = current->security;
+	tsk->cred->security = current->cred->security;
 
 	return 0;
 }
@@ -999,7 +1001,7 @@ static int smack_task_alloc_security(struct task_struct *tsk)
  */
 static void smack_task_free_security(struct task_struct *task)
 {
-	task->security = NULL;
+	task->cred->security = NULL;
 }
 
 /**
@@ -1011,7 +1013,7 @@ static void smack_task_free_security(struct task_struct *task)
  */
 static int smack_task_setpgid(struct task_struct *p, pid_t pgid)
 {
-	return smk_curacc(p->security, MAY_WRITE);
+	return smk_curacc(p->cred->security, MAY_WRITE);
 }
 
 /**
@@ -1022,7 +1024,7 @@ static int smack_task_setpgid(struct task_struct *p, pid_t pgid)
  */
 static int smack_task_getpgid(struct task_struct *p)
 {
-	return smk_curacc(p->security, MAY_READ);
+	return smk_curacc(p->cred->security, MAY_READ);
 }
 
 /**
@@ -1033,7 +1035,7 @@ static int smack_task_getpgid(struct task_struct *p)
  */
 static int smack_task_getsid(struct task_struct *p)
 {
-	return smk_curacc(p->security, MAY_READ);
+	return smk_curacc(p->cred->security, MAY_READ);
 }
 
 /**
@@ -1045,7 +1047,7 @@ static int smack_task_getsid(struct task_struct *p)
  */
 static void smack_task_getsecid(struct task_struct *p, u32 *secid)
 {
-	*secid = smack_to_secid(p->security);
+	*secid = smack_to_secid(p->cred->security);
 }
 
 /**
@@ -1061,7 +1063,7 @@ static int smack_task_setnice(struct task_struct *p, int nice)
 
 	rc = cap_task_setnice(p, nice);
 	if (rc == 0)
-		rc = smk_curacc(p->security, MAY_WRITE);
+		rc = smk_curacc(p->cred->security, MAY_WRITE);
 	return rc;
 }
 
@@ -1078,7 +1080,7 @@ static int smack_task_setioprio(struct task_struct *p, int ioprio)
 
 	rc = cap_task_setioprio(p, ioprio);
 	if (rc == 0)
-		rc = smk_curacc(p->security, MAY_WRITE);
+		rc = smk_curacc(p->cred->security, MAY_WRITE);
 	return rc;
 }
 
@@ -1090,7 +1092,7 @@ static int smack_task_setioprio(struct task_struct *p, int ioprio)
  */
 static int smack_task_getioprio(struct task_struct *p)
 {
-	return smk_curacc(p->security, MAY_READ);
+	return smk_curacc(p->cred->security, MAY_READ);
 }
 
 /**
@@ -1108,7 +1110,7 @@ static int smack_task_setscheduler(struct task_struct *p, int policy,
 
 	rc = cap_task_setscheduler(p, policy, lp);
 	if (rc == 0)
-		rc = smk_curacc(p->security, MAY_WRITE);
+		rc = smk_curacc(p->cred->security, MAY_WRITE);
 	return rc;
 }
 
@@ -1120,7 +1122,7 @@ static int smack_task_setscheduler(struct task_struct *p, int policy,
  */
 static int smack_task_getscheduler(struct task_struct *p)
 {
-	return smk_curacc(p->security, MAY_READ);
+	return smk_curacc(p->cred->security, MAY_READ);
 }
 
 /**
@@ -1131,7 +1133,7 @@ static int smack_task_getscheduler(struct task_struct *p)
  */
 static int smack_task_movememory(struct task_struct *p)
 {
-	return smk_curacc(p->security, MAY_WRITE);
+	return smk_curacc(p->cred->security, MAY_WRITE);
 }
 
 /**
@@ -1154,13 +1156,13 @@ static int smack_task_kill(struct task_struct *p, struct siginfo *info,
 	 * can write the receiver.
 	 */
 	if (secid == 0)
-		return smk_curacc(p->security, MAY_WRITE);
+		return smk_curacc(p->cred->security, MAY_WRITE);
 	/*
 	 * If the secid isn't 0 we're dealing with some USB IO
 	 * specific behavior. This is not clean. For one thing
 	 * we can't take privilege into account.
 	 */
-	return smk_access(smack_from_secid(secid), p->security, MAY_WRITE);
+	return smk_access(smack_from_secid(secid), p->cred->security, MAY_WRITE);
 }
 
 /**
@@ -1173,7 +1175,7 @@ static int smack_task_wait(struct task_struct *p)
 {
 	int rc;
 
-	rc = smk_access(current->security, p->security, MAY_WRITE);
+	rc = smk_access(current->cred->security, p->cred->security, MAY_WRITE);
 	if (rc == 0)
 		return 0;
 
@@ -1204,7 +1206,7 @@ static int smack_task_wait(struct task_struct *p)
 static void smack_task_to_inode(struct task_struct *p, struct inode *inode)
 {
 	struct inode_smack *isp = inode->i_security;
-	isp->smk_inode = p->security;
+	isp->smk_inode = p->cred->security;
 }
 
 /*
@@ -1223,7 +1225,7 @@ static void smack_task_to_inode(struct task_struct *p, struct inode *inode)
  */
 static int smack_sk_alloc_security(struct sock *sk, int family, gfp_t gfp_flags)
 {
-	char *csp = current->security;
+	char *csp = current->cred->security;
 	struct socket_smack *ssp;
 
 	ssp = kzalloc(sizeof(struct socket_smack), gfp_flags);
@@ -1448,7 +1450,7 @@ static int smack_flags_to_may(int flags)
  */
 static int smack_msg_msg_alloc_security(struct msg_msg *msg)
 {
-	msg->security = current->security;
+	msg->security = current->cred->security;
 	return 0;
 }
 
@@ -1484,7 +1486,7 @@ static int smack_shm_alloc_security(struct shmid_kernel *shp)
 {
 	struct kern_ipc_perm *isp = &shp->shm_perm;
 
-	isp->security = current->security;
+	isp->security = current->cred->security;
 	return 0;
 }
 
@@ -1593,7 +1595,7 @@ static int smack_sem_alloc_security(struct sem_array *sma)
 {
 	struct kern_ipc_perm *isp = &sma->sem_perm;
 
-	isp->security = current->security;
+	isp->security = current->cred->security;
 	return 0;
 }
 
@@ -1697,7 +1699,7 @@ static int smack_msg_queue_alloc_security(struct msg_queue *msq)
 {
 	struct kern_ipc_perm *kisp = &msq->q_perm;
 
-	kisp->security = current->security;
+	kisp->security = current->cred->security;
 	return 0;
 }
 
@@ -1852,7 +1854,7 @@ static void smack_d_instantiate(struct dentry *opt_dentry, struct inode *inode)
 	struct super_block *sbp;
 	struct superblock_smack *sbsp;
 	struct inode_smack *isp;
-	char *csp = current->security;
+	char *csp = current->cred->security;
 	char *fetched;
 	char *final;
 	struct dentry *dp;
@@ -2009,7 +2011,7 @@ static int smack_getprocattr(struct task_struct *p, char *name, char **value)
 	if (strcmp(name, "current") != 0)
 		return -EINVAL;
 
-	cp = kstrdup(p->security, GFP_KERNEL);
+	cp = kstrdup(p->cred->security, GFP_KERNEL);
 	if (cp == NULL)
 		return -ENOMEM;
 
@@ -2055,7 +2057,7 @@ static int smack_setprocattr(struct task_struct *p, char *name,
 	if (newsmack == NULL)
 		return -EINVAL;
 
-	p->security = newsmack;
+	p->cred->security = newsmack;
 	return size;
 }
 
@@ -2288,8 +2290,8 @@ static void smack_sock_graft(struct sock *sk, struct socket *parent)
 		return;
 
 	ssp = sk->sk_security;
-	ssp->smk_in = current->security;
-	ssp->smk_out = current->security;
+	ssp->smk_in = current->cred->security;
+	ssp->smk_out = current->cred->security;
 	ssp->smk_packet[0] = '\0';
 
 	rc = smack_netlabel(sk);
@@ -2362,7 +2364,7 @@ static int smack_inet_conn_request(struct sock *sk, struct sk_buff *skb,
 static int smack_key_alloc(struct key *key, struct task_struct *tsk,
 			   unsigned long flags)
 {
-	key->security = tsk->security;
+	key->security = tsk->cred->security;
 	return 0;
 }
 
@@ -2403,10 +2405,11 @@ static int smack_key_permission(key_ref_t key_ref,
 	/*
 	 * This should not occur
 	 */
-	if (context->security == NULL)
+	if (context->cred->security == NULL)
 		return -EACCES;
 
-	return smk_access(context->security, keyp->security, MAY_READWRITE);
+	return smk_access(context->cred->security, keyp->security,
+			  MAY_READWRITE);
 }
 #endif /* CONFIG_KEYS */
 
@@ -2726,7 +2729,7 @@ static __init int smack_init(void)
 	/*
 	 * Set the security state for the initial task.
 	 */
-	current->security = &smack_known_floor.smk_known;
+	current->cred->security = &smack_known_floor.smk_known;
 
 	/*
 	 * Initialize locks
diff --git a/security/smack/smackfs.c b/security/smack/smackfs.c
index c21d8c8bf0c7..c5ca279e0506 100644
--- a/security/smack/smackfs.c
+++ b/security/smack/smackfs.c
@@ -336,7 +336,7 @@ static void smk_cipso_doi(void)
 
 	audit_info.loginuid = audit_get_loginuid(current);
 	audit_info.sessionid = audit_get_sessionid(current);
-	audit_info.secid = smack_to_secid(current->security);
+	audit_info.secid = smack_to_secid(current->cred->security);
 
 	rc = netlbl_cfg_map_del(NULL, &audit_info);
 	if (rc != 0)
@@ -371,7 +371,7 @@ static void smk_unlbl_ambient(char *oldambient)
 
 	audit_info.loginuid = audit_get_loginuid(current);
 	audit_info.sessionid = audit_get_sessionid(current);
-	audit_info.secid = smack_to_secid(current->security);
+	audit_info.secid = smack_to_secid(current->cred->security);
 
 	if (oldambient != NULL) {
 		rc = netlbl_cfg_map_del(oldambient, &audit_info);
@@ -843,7 +843,7 @@ static ssize_t smk_write_onlycap(struct file *file, const char __user *buf,
 				 size_t count, loff_t *ppos)
 {
 	char in[SMK_LABELLEN];
-	char *sp = current->security;
+	char *sp = current->cred->security;
 
 	if (!capable(CAP_MAC_ADMIN))
 		return -EPERM;
-- 
cgit v1.2.3


From f1752eec6145c97163dbce62d17cf5d928e28a27 Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Fri, 14 Nov 2008 10:39:17 +1100
Subject: CRED: Detach the credentials from task_struct

Detach the credentials from task_struct, duplicating them in copy_process()
and releasing them in __put_task_struct().

Signed-off-by: David Howells <dhowells@redhat.com>
Acked-by: James Morris <jmorris@namei.org>
Acked-by: Serge Hallyn <serue@us.ibm.com>
Signed-off-by: James Morris <jmorris@namei.org>
---
 include/linux/cred.h       | 29 ++++++++++++++
 include/linux/init_task.h  | 16 +-------
 include/linux/sched.h      |  1 -
 include/linux/security.h   | 26 ++++++-------
 kernel/Makefile            |  2 +-
 kernel/cred.c              | 96 ++++++++++++++++++++++++++++++++++++++++++++++
 kernel/fork.c              | 24 +++---------
 security/capability.c      |  8 ++--
 security/security.c        |  8 ++--
 security/selinux/hooks.c   | 32 ++++++++--------
 security/smack/smack_lsm.c | 20 +++++-----
 11 files changed, 179 insertions(+), 83 deletions(-)
 create mode 100644 kernel/cred.c

(limited to 'include')

diff --git a/include/linux/cred.h b/include/linux/cred.h
index 3e65587a72e5..a7a686074cb0 100644
--- a/include/linux/cred.h
+++ b/include/linux/cred.h
@@ -158,4 +158,33 @@ do {						\
 	*(_gid) = current->cred->fsgid;		\
 } while(0)
 
+extern void __put_cred(struct cred *);
+extern int copy_creds(struct task_struct *, unsigned long);
+
+/**
+ * get_cred - Get a reference on a set of credentials
+ * @cred: The credentials to reference
+ *
+ * Get a reference on the specified set of credentials.  The caller must
+ * release the reference.
+ */
+static inline struct cred *get_cred(struct cred *cred)
+{
+	atomic_inc(&cred->usage);
+	return cred;
+}
+
+/**
+ * put_cred - Release a reference to a set of credentials
+ * @cred: The credentials to release
+ *
+ * Release a reference to a set of credentials, deleting them when the last ref
+ * is released.
+ */
+static inline void put_cred(struct cred *cred)
+{
+	if (atomic_dec_and_test(&(cred)->usage))
+		__put_cred(cred);
+}
+
 #endif /* _LINUX_CRED_H */
diff --git a/include/linux/init_task.h b/include/linux/init_task.h
index 9de41ccd67b5..5e24c54b6dfd 100644
--- a/include/linux/init_task.h
+++ b/include/linux/init_task.h
@@ -115,19 +115,6 @@ extern struct group_info init_groups;
 
 extern struct cred init_cred;
 
-#define INIT_CRED(p)						\
-{								\
-	.usage			= ATOMIC_INIT(3),		\
-	.securebits		= SECUREBITS_DEFAULT,		\
-	.cap_inheritable	= CAP_INIT_INH_SET,		\
-	.cap_permitted		= CAP_FULL_SET,			\
-	.cap_effective		= CAP_INIT_EFF_SET,		\
-	.cap_bset		= CAP_INIT_BSET,		\
-	.user			= INIT_USER,			\
-	.group_info		= &init_groups,			\
-	.lock			= __SPIN_LOCK_UNLOCKED(p.lock),	\
-}
-
 /*
  *  INIT_TASK is used to set up the first task table, touch at
  * your own risk!. Base=0, limit=0x1fffff (=2MB)
@@ -162,8 +149,7 @@ extern struct cred init_cred;
 	.children	= LIST_HEAD_INIT(tsk.children),			\
 	.sibling	= LIST_HEAD_INIT(tsk.sibling),			\
 	.group_leader	= &tsk,						\
-	.__temp_cred	= INIT_CRED(tsk.__temp_cred),			\
-	.cred		= &tsk.__temp_cred,				\
+	.cred		= &init_cred,					\
 	.comm		= "swapper",					\
 	.thread		= INIT_THREAD,					\
 	.fs		= &init_fs,					\
diff --git a/include/linux/sched.h b/include/linux/sched.h
index c8b92502354d..740cf946c8cc 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1151,7 +1151,6 @@ struct task_struct {
 	struct list_head cpu_timers[3];
 
 /* process credentials */
-	struct cred __temp_cred __deprecated; /* temporary credentials to be removed */
 	struct cred *cred;	/* actual/objective task credentials */
 
 	char comm[TASK_COMM_LEN]; /* executable name excluding path
diff --git a/include/linux/security.h b/include/linux/security.h
index 9f305d4a31a7..9239cc11eb9c 100644
--- a/include/linux/security.h
+++ b/include/linux/security.h
@@ -593,15 +593,15 @@ static inline void security_free_mnt_opts(struct security_mnt_opts *opts)
  *	manual page for definitions of the @clone_flags.
  *	@clone_flags contains the flags indicating what should be shared.
  *	Return 0 if permission is granted.
- * @task_alloc_security:
- *	@p contains the task_struct for child process.
- *	Allocate and attach a security structure to the p->security field. The
- *	security field is initialized to NULL when the task structure is
+ * @cred_alloc_security:
+ *	@cred contains the cred struct for child process.
+ *	Allocate and attach a security structure to the cred->security field.
+ *	The security field is initialized to NULL when the task structure is
  *	allocated.
  *	Return 0 if operation was successful.
- * @task_free_security:
- *	@p contains the task_struct for process.
- *	Deallocate and clear the p->security field.
+ * @cred_free:
+ *	@cred points to the credentials.
+ *	Deallocate and clear the cred->security field in a set of credentials.
  * @task_setuid:
  *	Check permission before setting one or more of the user identity
  *	attributes of the current process.  The @flags parameter indicates
@@ -1405,8 +1405,8 @@ struct security_operations {
 	int (*dentry_open) (struct file *file);
 
 	int (*task_create) (unsigned long clone_flags);
-	int (*task_alloc_security) (struct task_struct *p);
-	void (*task_free_security) (struct task_struct *p);
+	int (*cred_alloc_security) (struct cred *cred);
+	void (*cred_free) (struct cred *cred);
 	int (*task_setuid) (uid_t id0, uid_t id1, uid_t id2, int flags);
 	int (*task_post_setuid) (uid_t old_ruid /* or fsuid */ ,
 				 uid_t old_euid, uid_t old_suid, int flags);
@@ -1660,8 +1660,8 @@ int security_file_send_sigiotask(struct task_struct *tsk,
 int security_file_receive(struct file *file);
 int security_dentry_open(struct file *file);
 int security_task_create(unsigned long clone_flags);
-int security_task_alloc(struct task_struct *p);
-void security_task_free(struct task_struct *p);
+int security_cred_alloc(struct cred *cred);
+void security_cred_free(struct cred *cred);
 int security_task_setuid(uid_t id0, uid_t id1, uid_t id2, int flags);
 int security_task_post_setuid(uid_t old_ruid, uid_t old_euid,
 			      uid_t old_suid, int flags);
@@ -2181,12 +2181,12 @@ static inline int security_task_create(unsigned long clone_flags)
 	return 0;
 }
 
-static inline int security_task_alloc(struct task_struct *p)
+static inline int security_cred_alloc(struct cred *cred)
 {
 	return 0;
 }
 
-static inline void security_task_free(struct task_struct *p)
+static inline void security_cred_free(struct cred *cred)
 { }
 
 static inline int security_task_setuid(uid_t id0, uid_t id1, uid_t id2,
diff --git a/kernel/Makefile b/kernel/Makefile
index 9a3ec66a9d84..5a6a612c302d 100644
--- a/kernel/Makefile
+++ b/kernel/Makefile
@@ -9,7 +9,7 @@ obj-y     = sched.o fork.o exec_domain.o panic.o printk.o \
 	    rcupdate.o extable.o params.o posix-timers.o \
 	    kthread.o wait.o kfifo.o sys_ni.o posix-cpu-timers.o mutex.o \
 	    hrtimer.o rwsem.o nsproxy.o srcu.o semaphore.o \
-	    notifier.o ksysfs.o pm_qos_params.o sched_clock.o
+	    notifier.o ksysfs.o pm_qos_params.o sched_clock.o cred.o
 
 CFLAGS_REMOVE_sched.o = -mno-spe
 
diff --git a/kernel/cred.c b/kernel/cred.c
new file mode 100644
index 000000000000..833244a7cb05
--- /dev/null
+++ b/kernel/cred.c
@@ -0,0 +1,96 @@
+/* Task credentials management
+ *
+ * Copyright (C) 2008 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public Licence
+ * as published by the Free Software Foundation; either version
+ * 2 of the Licence, or (at your option) any later version.
+ */
+#include <linux/module.h>
+#include <linux/cred.h>
+#include <linux/sched.h>
+#include <linux/key.h>
+#include <linux/keyctl.h>
+#include <linux/init_task.h>
+#include <linux/security.h>
+
+/*
+ * The initial credentials for the initial task
+ */
+struct cred init_cred = {
+	.usage			= ATOMIC_INIT(3),
+	.securebits		= SECUREBITS_DEFAULT,
+	.cap_inheritable	= CAP_INIT_INH_SET,
+	.cap_permitted		= CAP_FULL_SET,
+	.cap_effective		= CAP_INIT_EFF_SET,
+	.cap_bset		= CAP_INIT_BSET,
+	.user			= INIT_USER,
+	.group_info		= &init_groups,
+};
+
+/*
+ * The RCU callback to actually dispose of a set of credentials
+ */
+static void put_cred_rcu(struct rcu_head *rcu)
+{
+	struct cred *cred = container_of(rcu, struct cred, rcu);
+
+	BUG_ON(atomic_read(&cred->usage) != 0);
+
+	key_put(cred->thread_keyring);
+	key_put(cred->request_key_auth);
+	put_group_info(cred->group_info);
+	free_uid(cred->user);
+	security_cred_free(cred);
+	kfree(cred);
+}
+
+/**
+ * __put_cred - Destroy a set of credentials
+ * @sec: The record to release
+ *
+ * Destroy a set of credentials on which no references remain.
+ */
+void __put_cred(struct cred *cred)
+{
+	call_rcu(&cred->rcu, put_cred_rcu);
+}
+EXPORT_SYMBOL(__put_cred);
+
+/*
+ * Copy credentials for the new process created by fork()
+ */
+int copy_creds(struct task_struct *p, unsigned long clone_flags)
+{
+	struct cred *pcred;
+	int ret;
+
+	pcred = kmemdup(p->cred, sizeof(*p->cred), GFP_KERNEL);
+	if (!pcred)
+		return -ENOMEM;
+
+#ifdef CONFIG_SECURITY
+	pcred->security = NULL;
+#endif
+
+	ret = security_cred_alloc(pcred);
+	if (ret < 0) {
+		kfree(pcred);
+		return ret;
+	}
+
+	atomic_set(&pcred->usage, 1);
+	get_group_info(pcred->group_info);
+	get_uid(pcred->user);
+	key_get(pcred->thread_keyring);
+	key_get(pcred->request_key_auth);
+
+	atomic_inc(&pcred->user->processes);
+
+	/* RCU assignment is unneeded here as no-one can have accessed this
+	 * pointer yet, barring us */
+	p->cred = pcred;
+	return 0;
+}
diff --git a/kernel/fork.c b/kernel/fork.c
index 81fdc7733908..c932e283ddfc 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -146,9 +146,7 @@ void __put_task_struct(struct task_struct *tsk)
 	WARN_ON(atomic_read(&tsk->usage));
 	WARN_ON(tsk == current);
 
-	security_task_free(tsk);
-	free_uid(tsk->__temp_cred.user);
-	put_group_info(tsk->__temp_cred.group_info);
+	put_cred(tsk->cred);
 	delayacct_tsk_free(tsk);
 
 	if (!profile_handoff_task(tsk))
@@ -969,7 +967,6 @@ static struct task_struct *copy_process(unsigned long clone_flags,
 	DEBUG_LOCKS_WARN_ON(!p->hardirqs_enabled);
 	DEBUG_LOCKS_WARN_ON(!p->softirqs_enabled);
 #endif
-	p->cred = &p->__temp_cred;
 	retval = -EAGAIN;
 	if (atomic_read(&p->cred->user->processes) >=
 			p->signal->rlim[RLIMIT_NPROC].rlim_cur) {
@@ -978,9 +975,9 @@ static struct task_struct *copy_process(unsigned long clone_flags,
 			goto bad_fork_free;
 	}
 
-	atomic_inc(&p->cred->user->__count);
-	atomic_inc(&p->cred->user->processes);
-	get_group_info(p->cred->group_info);
+	retval = copy_creds(p, clone_flags);
+	if (retval < 0)
+		goto bad_fork_free;
 
 	/*
 	 * If multiple threads are within copy_process(), then this check
@@ -1035,9 +1032,6 @@ static struct task_struct *copy_process(unsigned long clone_flags,
 	do_posix_clock_monotonic_gettime(&p->start_time);
 	p->real_start_time = p->start_time;
 	monotonic_to_bootbased(&p->real_start_time);
-#ifdef CONFIG_SECURITY
-	p->cred->security = NULL;
-#endif
 	p->io_context = NULL;
 	p->audit_context = NULL;
 	cgroup_fork(p);
@@ -1082,10 +1076,8 @@ static struct task_struct *copy_process(unsigned long clone_flags,
 	/* Perform scheduler related setup. Assign this task to a CPU. */
 	sched_fork(p, clone_flags);
 
-	if ((retval = security_task_alloc(p)))
-		goto bad_fork_cleanup_policy;
 	if ((retval = audit_alloc(p)))
-		goto bad_fork_cleanup_security;
+		goto bad_fork_cleanup_policy;
 	/* copy all the process information */
 	if ((retval = copy_semundo(clone_flags, p)))
 		goto bad_fork_cleanup_audit;
@@ -1284,8 +1276,6 @@ bad_fork_cleanup_semundo:
 	exit_sem(p);
 bad_fork_cleanup_audit:
 	audit_free(p);
-bad_fork_cleanup_security:
-	security_task_free(p);
 bad_fork_cleanup_policy:
 #ifdef CONFIG_NUMA
 	mpol_put(p->mempolicy);
@@ -1298,9 +1288,7 @@ bad_fork_cleanup_cgroup:
 bad_fork_cleanup_put_domain:
 	module_put(task_thread_info(p)->exec_domain->module);
 bad_fork_cleanup_count:
-	put_group_info(p->cred->group_info);
-	atomic_dec(&p->cred->user->processes);
-	free_uid(p->cred->user);
+	put_cred(p->cred);
 bad_fork_free:
 	free_task(p);
 fork_out:
diff --git a/security/capability.c b/security/capability.c
index 245874819036..6c4b5137ca7b 100644
--- a/security/capability.c
+++ b/security/capability.c
@@ -340,12 +340,12 @@ static int cap_task_create(unsigned long clone_flags)
 	return 0;
 }
 
-static int cap_task_alloc_security(struct task_struct *p)
+static int cap_cred_alloc_security(struct cred *cred)
 {
 	return 0;
 }
 
-static void cap_task_free_security(struct task_struct *p)
+static void cap_cred_free(struct cred *cred)
 {
 }
 
@@ -890,8 +890,8 @@ void security_fixup_ops(struct security_operations *ops)
 	set_to_cap_if_null(ops, file_receive);
 	set_to_cap_if_null(ops, dentry_open);
 	set_to_cap_if_null(ops, task_create);
-	set_to_cap_if_null(ops, task_alloc_security);
-	set_to_cap_if_null(ops, task_free_security);
+	set_to_cap_if_null(ops, cred_alloc_security);
+	set_to_cap_if_null(ops, cred_free);
 	set_to_cap_if_null(ops, task_setuid);
 	set_to_cap_if_null(ops, task_post_setuid);
 	set_to_cap_if_null(ops, task_setgid);
diff --git a/security/security.c b/security/security.c
index 81c956a12300..d058f7d5b10a 100644
--- a/security/security.c
+++ b/security/security.c
@@ -616,14 +616,14 @@ int security_task_create(unsigned long clone_flags)
 	return security_ops->task_create(clone_flags);
 }
 
-int security_task_alloc(struct task_struct *p)
+int security_cred_alloc(struct cred *cred)
 {
-	return security_ops->task_alloc_security(p);
+	return security_ops->cred_alloc_security(cred);
 }
 
-void security_task_free(struct task_struct *p)
+void security_cred_free(struct cred *cred)
 {
-	security_ops->task_free_security(p);
+	security_ops->cred_free(cred);
 }
 
 int security_task_setuid(uid_t id0, uid_t id1, uid_t id2, int flags)
diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c
index 328308f2882a..658435dce37c 100644
--- a/security/selinux/hooks.c
+++ b/security/selinux/hooks.c
@@ -158,7 +158,7 @@ static int selinux_secmark_enabled(void)
 
 /* Allocate and free functions for each kind of security blob. */
 
-static int task_alloc_security(struct task_struct *task)
+static int cred_alloc_security(struct cred *cred)
 {
 	struct task_security_struct *tsec;
 
@@ -167,18 +167,11 @@ static int task_alloc_security(struct task_struct *task)
 		return -ENOMEM;
 
 	tsec->osid = tsec->sid = SECINITSID_UNLABELED;
-	task->cred->security = tsec;
+	cred->security = tsec;
 
 	return 0;
 }
 
-static void task_free_security(struct task_struct *task)
-{
-	struct task_security_struct *tsec = task->cred->security;
-	task->cred->security = NULL;
-	kfree(tsec);
-}
-
 static int inode_alloc_security(struct inode *inode)
 {
 	struct task_security_struct *tsec = current->cred->security;
@@ -3184,17 +3177,17 @@ static int selinux_task_create(unsigned long clone_flags)
 	return task_has_perm(current, current, PROCESS__FORK);
 }
 
-static int selinux_task_alloc_security(struct task_struct *tsk)
+static int selinux_cred_alloc_security(struct cred *cred)
 {
 	struct task_security_struct *tsec1, *tsec2;
 	int rc;
 
 	tsec1 = current->cred->security;
 
-	rc = task_alloc_security(tsk);
+	rc = cred_alloc_security(cred);
 	if (rc)
 		return rc;
-	tsec2 = tsk->cred->security;
+	tsec2 = cred->security;
 
 	tsec2->osid = tsec1->osid;
 	tsec2->sid = tsec1->sid;
@@ -3208,9 +3201,14 @@ static int selinux_task_alloc_security(struct task_struct *tsk)
 	return 0;
 }
 
-static void selinux_task_free_security(struct task_struct *tsk)
+/*
+ * detach and free the LSM part of a set of credentials
+ */
+static void selinux_cred_free(struct cred *cred)
 {
-	task_free_security(tsk);
+	struct task_security_struct *tsec = cred->security;
+	cred->security = NULL;
+	kfree(tsec);
 }
 
 static int selinux_task_setuid(uid_t id0, uid_t id1, uid_t id2, int flags)
@@ -5552,8 +5550,8 @@ static struct security_operations selinux_ops = {
 	.dentry_open =			selinux_dentry_open,
 
 	.task_create =			selinux_task_create,
-	.task_alloc_security =		selinux_task_alloc_security,
-	.task_free_security =		selinux_task_free_security,
+	.cred_alloc_security =		selinux_cred_alloc_security,
+	.cred_free =			selinux_cred_free,
 	.task_setuid =			selinux_task_setuid,
 	.task_post_setuid =		selinux_task_post_setuid,
 	.task_setgid =			selinux_task_setgid,
@@ -5683,7 +5681,7 @@ static __init int selinux_init(void)
 	printk(KERN_INFO "SELinux:  Initializing.\n");
 
 	/* Set the security state for the initial task. */
-	if (task_alloc_security(current))
+	if (cred_alloc_security(current->cred))
 		panic("SELinux:  Failed to initialize initial task.\n");
 	tsec = current->cred->security;
 	tsec->osid = tsec->sid = SECINITSID_KERNEL;
diff --git a/security/smack/smack_lsm.c b/security/smack/smack_lsm.c
index 791da238d049..cc837314fb0e 100644
--- a/security/smack/smack_lsm.c
+++ b/security/smack/smack_lsm.c
@@ -975,8 +975,8 @@ static int smack_file_receive(struct file *file)
  */
 
 /**
- * smack_task_alloc_security - "allocate" a task blob
- * @tsk: the task in need of a blob
+ * smack_cred_alloc_security - "allocate" a task cred blob
+ * @cred: the task creds in need of a blob
  *
  * Smack isn't using copies of blobs. Everyone
  * points to an immutable list. No alloc required.
@@ -984,24 +984,24 @@ static int smack_file_receive(struct file *file)
  *
  * Always returns 0
  */
-static int smack_task_alloc_security(struct task_struct *tsk)
+static int smack_cred_alloc_security(struct cred *cred)
 {
-	tsk->cred->security = current->cred->security;
+	cred->security = current->cred->security;
 
 	return 0;
 }
 
 /**
- * smack_task_free_security - "free" a task blob
- * @task: the task with the blob
+ * smack_cred_free - "free" task-level security credentials
+ * @cred: the credentials in question
  *
  * Smack isn't using copies of blobs. Everyone
  * points to an immutable list. The blobs never go away.
  * There is no leak here.
  */
-static void smack_task_free_security(struct task_struct *task)
+static void smack_cred_free(struct cred *cred)
 {
-	task->cred->security = NULL;
+	cred->security = NULL;
 }
 
 /**
@@ -2630,8 +2630,8 @@ struct security_operations smack_ops = {
 	.file_send_sigiotask = 		smack_file_send_sigiotask,
 	.file_receive = 		smack_file_receive,
 
-	.task_alloc_security = 		smack_task_alloc_security,
-	.task_free_security = 		smack_task_free_security,
+	.cred_alloc_security = 		smack_cred_alloc_security,
+	.cred_free =			smack_cred_free,
 	.task_post_setuid =		cap_task_post_setuid,
 	.task_setpgid = 		smack_task_setpgid,
 	.task_getpgid = 		smack_task_getpgid,
-- 
cgit v1.2.3


From 86a264abe542cfececb4df129bc45a0338d8cdb9 Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Fri, 14 Nov 2008 10:39:18 +1100
Subject: CRED: Wrap current->cred and a few other accessors

Wrap current->cred and a few other accessors to hide their actual
implementation.

Signed-off-by: David Howells <dhowells@redhat.com>
Acked-by: James Morris <jmorris@namei.org>
Acked-by: Serge Hallyn <serue@us.ibm.com>
Signed-off-by: James Morris <jmorris@namei.org>
---
 arch/ia64/ia32/sys_ia32.c     |   7 +-
 drivers/net/tun.c             |   8 +-
 drivers/usb/core/devio.c      |  10 ++-
 fs/binfmt_elf.c               |  10 +--
 fs/binfmt_elf_fdpic.c         |   9 +-
 fs/exec.c                     |   5 +-
 fs/fcntl.c                    |   3 +-
 fs/file_table.c               |   7 +-
 fs/hugetlbfs/inode.c          |   5 +-
 fs/ioprio.c                   |   4 +-
 fs/smbfs/dir.c                |   3 +-
 include/linux/cred.h          | 187 ++++++++++++++++++++++++++++++++----------
 include/linux/securebits.h    |   2 +-
 ipc/mqueue.c                  |   2 +-
 ipc/shm.c                     |   4 +-
 kernel/sys.c                  |  59 +++++++------
 kernel/uid16.c                |  31 +++----
 net/core/scm.c                |   2 +-
 net/sunrpc/auth.c             |  14 ++--
 security/commoncap.c          |   2 +-
 security/keys/process_keys.c  |   2 +-
 security/keys/request_key.c   |  11 +--
 security/selinux/exports.c    |   8 +-
 security/selinux/xfrm.c       |   6 +-
 security/smack/smack_access.c |   2 +-
 security/smack/smack_lsm.c    |  26 +++---
 security/smack/smackfs.c      |   4 +-
 27 files changed, 271 insertions(+), 162 deletions(-)

(limited to 'include')

diff --git a/arch/ia64/ia32/sys_ia32.c b/arch/ia64/ia32/sys_ia32.c
index 2445a9d3488e..16ef61a91d95 100644
--- a/arch/ia64/ia32/sys_ia32.c
+++ b/arch/ia64/ia32/sys_ia32.c
@@ -1767,25 +1767,24 @@ groups16_from_user(struct group_info *group_info, short __user *grouplist)
 asmlinkage long
 sys32_getgroups16 (int gidsetsize, short __user *grouplist)
 {
+	const struct cred *cred = current_cred();
 	int i;
 
 	if (gidsetsize < 0)
 		return -EINVAL;
 
-	get_group_info(current->cred->group_info);
-	i = current->cred->group_info->ngroups;
+	i = cred->group_info->ngroups;
 	if (gidsetsize) {
 		if (i > gidsetsize) {
 			i = -EINVAL;
 			goto out;
 		}
-		if (groups16_to_user(grouplist, current->cred->group_info)) {
+		if (groups16_to_user(grouplist, cred->group_info)) {
 			i = -EFAULT;
 			goto out;
 		}
 	}
 out:
-	put_group_info(current->cred->group_info);
 	return i;
 }
 
diff --git a/drivers/net/tun.c b/drivers/net/tun.c
index b14e2025e221..55dc70c6b4db 100644
--- a/drivers/net/tun.c
+++ b/drivers/net/tun.c
@@ -702,6 +702,7 @@ static int tun_set_iff(struct net *net, struct file *file, struct ifreq *ifr)
 	struct tun_net *tn;
 	struct tun_struct *tun;
 	struct net_device *dev;
+	const struct cred *cred = current_cred();
 	int err;
 
 	tn = net_generic(net, tun_net_id);
@@ -712,11 +713,12 @@ static int tun_set_iff(struct net *net, struct file *file, struct ifreq *ifr)
 
 		/* Check permissions */
 		if (((tun->owner != -1 &&
-		      current_euid() != tun->owner) ||
+		      cred->euid != tun->owner) ||
 		     (tun->group != -1 &&
-		      current_egid() != tun->group)) &&
-		     !capable(CAP_NET_ADMIN))
+		      cred->egid != tun->group)) &&
+		    !capable(CAP_NET_ADMIN)) {
 			return -EPERM;
+		}
 	}
 	else if (__dev_get_by_name(net, ifr->ifr_name))
 		return -EINVAL;
diff --git a/drivers/usb/core/devio.c b/drivers/usb/core/devio.c
index 1aadb9387027..aa79280df15d 100644
--- a/drivers/usb/core/devio.c
+++ b/drivers/usb/core/devio.c
@@ -574,6 +574,7 @@ static int usbdev_open(struct inode *inode, struct file *file)
 {
 	struct usb_device *dev = NULL;
 	struct dev_state *ps;
+	const struct cred *cred = current_cred();
 	int ret;
 
 	lock_kernel();
@@ -617,8 +618,8 @@ static int usbdev_open(struct inode *inode, struct file *file)
 	init_waitqueue_head(&ps->wait);
 	ps->discsignr = 0;
 	ps->disc_pid = get_pid(task_pid(current));
-	ps->disc_uid = current_uid();
-	ps->disc_euid = current_euid();
+	ps->disc_uid = cred->uid;
+	ps->disc_euid = cred->euid;
 	ps->disccontext = NULL;
 	ps->ifclaimed = 0;
 	security_task_getsecid(current, &ps->secid);
@@ -967,6 +968,7 @@ static int proc_do_submiturb(struct dev_state *ps, struct usbdevfs_urb *uurb,
 	struct usb_host_endpoint *ep;
 	struct async *as;
 	struct usb_ctrlrequest *dr = NULL;
+	const struct cred *cred = current_cred();
 	unsigned int u, totlen, isofrmlen;
 	int ret, ifnum = -1;
 	int is_in;
@@ -1174,8 +1176,8 @@ static int proc_do_submiturb(struct dev_state *ps, struct usbdevfs_urb *uurb,
 	as->signr = uurb->signr;
 	as->ifnum = ifnum;
 	as->pid = get_pid(task_pid(current));
-	as->uid = current_uid();
-	as->euid = current_euid();
+	as->uid = cred->uid;
+	as->euid = cred->euid;
 	security_task_getsecid(current, &as->secid);
 	if (!is_in) {
 		if (copy_from_user(as->urb->transfer_buffer, uurb->buffer,
diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c
index 7a52477ce493..0e6655613169 100644
--- a/fs/binfmt_elf.c
+++ b/fs/binfmt_elf.c
@@ -157,7 +157,7 @@ create_elf_tables(struct linux_binprm *bprm, struct elfhdr *exec,
 	int items;
 	elf_addr_t *elf_info;
 	int ei_index = 0;
-	struct task_struct *tsk = current;
+	const struct cred *cred = current_cred();
 	struct vm_area_struct *vma;
 
 	/*
@@ -223,10 +223,10 @@ create_elf_tables(struct linux_binprm *bprm, struct elfhdr *exec,
 	NEW_AUX_ENT(AT_BASE, interp_load_addr);
 	NEW_AUX_ENT(AT_FLAGS, 0);
 	NEW_AUX_ENT(AT_ENTRY, exec->e_entry);
-	NEW_AUX_ENT(AT_UID, tsk->cred->uid);
-	NEW_AUX_ENT(AT_EUID, tsk->cred->euid);
-	NEW_AUX_ENT(AT_GID, tsk->cred->gid);
-	NEW_AUX_ENT(AT_EGID, tsk->cred->egid);
+	NEW_AUX_ENT(AT_UID, cred->uid);
+	NEW_AUX_ENT(AT_EUID, cred->euid);
+	NEW_AUX_ENT(AT_GID, cred->gid);
+	NEW_AUX_ENT(AT_EGID, cred->egid);
  	NEW_AUX_ENT(AT_SECURE, security_bprm_secureexec(bprm));
 	NEW_AUX_ENT(AT_EXECFN, bprm->exec);
 	if (k_platform) {
diff --git a/fs/binfmt_elf_fdpic.c b/fs/binfmt_elf_fdpic.c
index 9f67054c2c4e..1f6e8c023b4c 100644
--- a/fs/binfmt_elf_fdpic.c
+++ b/fs/binfmt_elf_fdpic.c
@@ -475,6 +475,7 @@ static int create_elf_fdpic_tables(struct linux_binprm *bprm,
 				   struct elf_fdpic_params *exec_params,
 				   struct elf_fdpic_params *interp_params)
 {
+	const struct cred *cred = current_cred();
 	unsigned long sp, csp, nitems;
 	elf_caddr_t __user *argv, *envp;
 	size_t platform_len = 0, len;
@@ -623,10 +624,10 @@ static int create_elf_fdpic_tables(struct linux_binprm *bprm,
 	NEW_AUX_ENT(AT_BASE,	interp_params->elfhdr_addr);
 	NEW_AUX_ENT(AT_FLAGS,	0);
 	NEW_AUX_ENT(AT_ENTRY,	exec_params->entry_addr);
-	NEW_AUX_ENT(AT_UID,	(elf_addr_t) current->cred->uid);
-	NEW_AUX_ENT(AT_EUID,	(elf_addr_t) current->cred->euid);
-	NEW_AUX_ENT(AT_GID,	(elf_addr_t) current->cred->gid);
-	NEW_AUX_ENT(AT_EGID,	(elf_addr_t) current->cred->egid);
+	NEW_AUX_ENT(AT_UID,	(elf_addr_t) cred->uid);
+	NEW_AUX_ENT(AT_EUID,	(elf_addr_t) cred->euid);
+	NEW_AUX_ENT(AT_GID,	(elf_addr_t) cred->gid);
+	NEW_AUX_ENT(AT_EGID,	(elf_addr_t) cred->egid);
 	NEW_AUX_ENT(AT_SECURE,	security_bprm_secureexec(bprm));
 	NEW_AUX_ENT(AT_EXECFN,	bprm->exec);
 
diff --git a/fs/exec.c b/fs/exec.c
index 31149e430a89..a5330e1a2216 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -1388,6 +1388,7 @@ EXPORT_SYMBOL(set_binfmt);
  */
 static int format_corename(char *corename, long signr)
 {
+	const struct cred *cred = current_cred();
 	const char *pat_ptr = core_pattern;
 	int ispipe = (*pat_ptr == '|');
 	char *out_ptr = corename;
@@ -1424,7 +1425,7 @@ static int format_corename(char *corename, long signr)
 			/* uid */
 			case 'u':
 				rc = snprintf(out_ptr, out_end - out_ptr,
-					      "%d", current_uid());
+					      "%d", cred->uid);
 				if (rc > out_end - out_ptr)
 					goto out;
 				out_ptr += rc;
@@ -1432,7 +1433,7 @@ static int format_corename(char *corename, long signr)
 			/* gid */
 			case 'g':
 				rc = snprintf(out_ptr, out_end - out_ptr,
-					      "%d", current_gid());
+					      "%d", cred->gid);
 				if (rc > out_end - out_ptr)
 					goto out;
 				out_ptr += rc;
diff --git a/fs/fcntl.c b/fs/fcntl.c
index 63964d863ad6..c594cc0e40fb 100644
--- a/fs/fcntl.c
+++ b/fs/fcntl.c
@@ -205,13 +205,14 @@ static void f_modown(struct file *filp, struct pid *pid, enum pid_type type,
 int __f_setown(struct file *filp, struct pid *pid, enum pid_type type,
 		int force)
 {
+	const struct cred *cred = current_cred();
 	int err;
 	
 	err = security_file_set_fowner(filp);
 	if (err)
 		return err;
 
-	f_modown(filp, pid, type, current_uid(), current_euid(), force);
+	f_modown(filp, pid, type, cred->uid, cred->euid, force);
 	return 0;
 }
 EXPORT_SYMBOL(__f_setown);
diff --git a/fs/file_table.c b/fs/file_table.c
index 3152b53cfab0..bc4563fe791d 100644
--- a/fs/file_table.c
+++ b/fs/file_table.c
@@ -94,7 +94,7 @@ int proc_nr_files(ctl_table *table, int write, struct file *filp,
  */
 struct file *get_empty_filp(void)
 {
-	struct task_struct *tsk;
+	const struct cred *cred = current_cred();
 	static int old_max;
 	struct file * f;
 
@@ -118,12 +118,11 @@ struct file *get_empty_filp(void)
 	if (security_file_alloc(f))
 		goto fail_sec;
 
-	tsk = current;
 	INIT_LIST_HEAD(&f->f_u.fu_list);
 	atomic_long_set(&f->f_count, 1);
 	rwlock_init(&f->f_owner.lock);
-	f->f_uid = tsk->cred->fsuid;
-	f->f_gid = tsk->cred->fsgid;
+	f->f_uid = cred->fsuid;
+	f->f_gid = cred->fsgid;
 	eventpoll_init_file(f);
 	/* f->f_version: 0 */
 	return f;
diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c
index 870a721b8bd2..7d479ce3aceb 100644
--- a/fs/hugetlbfs/inode.c
+++ b/fs/hugetlbfs/inode.c
@@ -951,6 +951,7 @@ struct file *hugetlb_file_setup(const char *name, size_t size)
 	struct inode *inode;
 	struct dentry *dentry, *root;
 	struct qstr quick_string;
+	struct user_struct *user = current_user();
 
 	if (!hugetlbfs_vfsmount)
 		return ERR_PTR(-ENOENT);
@@ -958,7 +959,7 @@ struct file *hugetlb_file_setup(const char *name, size_t size)
 	if (!can_do_hugetlb_shm())
 		return ERR_PTR(-EPERM);
 
-	if (!user_shm_lock(size, current->cred->user))
+	if (!user_shm_lock(size, user))
 		return ERR_PTR(-ENOMEM);
 
 	root = hugetlbfs_vfsmount->mnt_root;
@@ -998,7 +999,7 @@ out_inode:
 out_dentry:
 	dput(dentry);
 out_shm_unlock:
-	user_shm_unlock(size, current->cred->user);
+	user_shm_unlock(size, user);
 	return ERR_PTR(error);
 }
 
diff --git a/fs/ioprio.c b/fs/ioprio.c
index bb5210af77c2..5112554fd210 100644
--- a/fs/ioprio.c
+++ b/fs/ioprio.c
@@ -123,7 +123,7 @@ asmlinkage long sys_ioprio_set(int which, int who, int ioprio)
 			break;
 		case IOPRIO_WHO_USER:
 			if (!who)
-				user = current->cred->user;
+				user = current_user();
 			else
 				user = find_user(who);
 
@@ -216,7 +216,7 @@ asmlinkage long sys_ioprio_get(int which, int who)
 			break;
 		case IOPRIO_WHO_USER:
 			if (!who)
-				user = current->cred->user;
+				user = current_user();
 			else
 				user = find_user(who);
 
diff --git a/fs/smbfs/dir.c b/fs/smbfs/dir.c
index 9e9bb0db4f6d..e7ddd0328ddc 100644
--- a/fs/smbfs/dir.c
+++ b/fs/smbfs/dir.c
@@ -667,8 +667,7 @@ smb_make_node(struct inode *dir, struct dentry *dentry, int mode, dev_t dev)
 
 	attr.ia_valid = ATTR_MODE | ATTR_UID | ATTR_GID;
 	attr.ia_mode = mode;
-	attr.ia_uid = current_euid();
-	attr.ia_gid = current_egid();
+	current_euid_egid(&attr.ia_uid, &attr.ia_gid);
 
 	if (!new_valid_dev(dev))
 		return -EINVAL;
diff --git a/include/linux/cred.h b/include/linux/cred.h
index a7a686074cb0..4221ec6000c1 100644
--- a/include/linux/cred.h
+++ b/include/linux/cred.h
@@ -37,15 +37,16 @@ struct group_info {
  * get_group_info - Get a reference to a group info structure
  * @group_info: The group info to reference
  *
- * This must be called with the owning task locked (via task_lock()) when task
- * != current.  The reason being that the vast majority of callers are looking
- * at current->group_info, which can not be changed except by the current task.
- * Changing current->group_info requires the task lock, too.
+ * This gets a reference to a set of supplementary groups.
+ *
+ * If the caller is accessing a task's credentials, they must hold the RCU read
+ * lock when reading.
  */
-#define get_group_info(group_info)		\
-do {						\
-	atomic_inc(&(group_info)->usage);	\
-} while (0)
+static inline struct group_info *get_group_info(struct group_info *gi)
+{
+	atomic_inc(&gi->usage);
+	return gi;
+}
 
 /**
  * put_group_info - Release a reference to a group info structure
@@ -61,7 +62,7 @@ extern struct group_info *groups_alloc(int);
 extern void groups_free(struct group_info *);
 extern int set_current_groups(struct group_info *);
 extern int set_groups(struct cred *, struct group_info *);
-extern int groups_search(struct group_info *, gid_t);
+extern int groups_search(const struct group_info *, gid_t);
 
 /* access the groups "array" with this macro */
 #define GROUP_AT(gi, i) \
@@ -123,41 +124,6 @@ struct cred {
 	spinlock_t	lock;		/* lock for pointer changes */
 };
 
-#define get_current_user()	(get_uid(current->cred->user))
-
-#define task_uid(task)		((task)->cred->uid)
-#define task_gid(task)		((task)->cred->gid)
-#define task_euid(task)		((task)->cred->euid)
-#define task_egid(task)		((task)->cred->egid)
-
-#define current_uid()		(current->cred->uid)
-#define current_gid()		(current->cred->gid)
-#define current_euid()		(current->cred->euid)
-#define current_egid()		(current->cred->egid)
-#define current_suid()		(current->cred->suid)
-#define current_sgid()		(current->cred->sgid)
-#define current_fsuid()		(current->cred->fsuid)
-#define current_fsgid()		(current->cred->fsgid)
-#define current_cap()		(current->cred->cap_effective)
-
-#define current_uid_gid(_uid, _gid)		\
-do {						\
-	*(_uid) = current->cred->uid;		\
-	*(_gid) = current->cred->gid;		\
-} while(0)
-
-#define current_euid_egid(_uid, _gid)		\
-do {						\
-	*(_uid) = current->cred->euid;		\
-	*(_gid) = current->cred->egid;		\
-} while(0)
-
-#define current_fsuid_fsgid(_uid, _gid)		\
-do {						\
-	*(_uid) = current->cred->fsuid;		\
-	*(_gid) = current->cred->fsgid;		\
-} while(0)
-
 extern void __put_cred(struct cred *);
 extern int copy_creds(struct task_struct *, unsigned long);
 
@@ -187,4 +153,137 @@ static inline void put_cred(struct cred *cred)
 		__put_cred(cred);
 }
 
+/**
+ * current_cred - Access the current task's credentials
+ *
+ * Access the credentials of the current task.
+ */
+#define current_cred() \
+	(current->cred)
+
+/**
+ * __task_cred - Access another task's credentials
+ * @task: The task to query
+ *
+ * Access the credentials of another task.  The caller must hold the
+ * RCU readlock.
+ *
+ * The caller must make sure task doesn't go away, either by holding a ref on
+ * task or by holding tasklist_lock to prevent it from being unlinked.
+ */
+#define __task_cred(task) \
+	((const struct cred *)(rcu_dereference((task)->cred)))
+
+/**
+ * get_task_cred - Get another task's credentials
+ * @task: The task to query
+ *
+ * Get the credentials of a task, pinning them so that they can't go away.
+ * Accessing a task's credentials directly is not permitted.
+ *
+ * The caller must make sure task doesn't go away, either by holding a ref on
+ * task or by holding tasklist_lock to prevent it from being unlinked.
+ */
+#define get_task_cred(task)				\
+({							\
+	struct cred *__cred;				\
+	rcu_read_lock();				\
+	__cred = (struct cred *) __task_cred((task));	\
+	get_cred(__cred);				\
+	rcu_read_unlock();				\
+	__cred;						\
+})
+
+/**
+ * get_current_cred - Get the current task's credentials
+ *
+ * Get the credentials of the current task, pinning them so that they can't go
+ * away.  Accessing the current task's credentials directly is not permitted.
+ */
+#define get_current_cred()				\
+	(get_cred(current_cred()))
+
+/**
+ * get_current_user - Get the current task's user_struct
+ *
+ * Get the user record of the current task, pinning it so that it can't go
+ * away.
+ */
+#define get_current_user()				\
+({							\
+	struct user_struct *__u;			\
+	struct cred *__cred;				\
+	__cred = (struct cred *) current_cred();	\
+	__u = get_uid(__cred->user);			\
+	__u;						\
+})
+
+/**
+ * get_current_groups - Get the current task's supplementary group list
+ *
+ * Get the supplementary group list of the current task, pinning it so that it
+ * can't go away.
+ */
+#define get_current_groups()				\
+({							\
+	struct group_info *__groups;			\
+	struct cred *__cred;				\
+	__cred = (struct cred *) current_cred();	\
+	__groups = get_group_info(__cred->group_info);	\
+	__groups;					\
+})
+
+#define task_cred_xxx(task, xxx)		\
+({						\
+	__typeof__(task->cred->xxx) ___val;	\
+	rcu_read_lock();			\
+	___val = __task_cred((task))->xxx;	\
+	rcu_read_unlock();			\
+	___val;					\
+})
+
+#define task_uid(task)		(task_cred_xxx((task), uid))
+#define task_euid(task)		(task_cred_xxx((task), euid))
+
+#define current_cred_xxx(xxx)			\
+({						\
+	current->cred->xxx;			\
+})
+
+#define current_uid()		(current_cred_xxx(uid))
+#define current_gid()		(current_cred_xxx(gid))
+#define current_euid()		(current_cred_xxx(euid))
+#define current_egid()		(current_cred_xxx(egid))
+#define current_suid()		(current_cred_xxx(suid))
+#define current_sgid()		(current_cred_xxx(sgid))
+#define current_fsuid() 	(current_cred_xxx(fsuid))
+#define current_fsgid() 	(current_cred_xxx(fsgid))
+#define current_cap()		(current_cred_xxx(cap_effective))
+#define current_user()		(current_cred_xxx(user))
+#define current_security()	(current_cred_xxx(security))
+
+#define current_uid_gid(_uid, _gid)		\
+do {						\
+	const struct cred *__cred;		\
+	__cred = current_cred();		\
+	*(_uid) = __cred->uid;			\
+	*(_gid) = __cred->gid;			\
+} while(0)
+
+#define current_euid_egid(_euid, _egid)		\
+do {						\
+	const struct cred *__cred;		\
+	__cred = current_cred();		\
+	*(_euid) = __cred->euid;		\
+	*(_egid) = __cred->egid;		\
+} while(0)
+
+#define current_fsuid_fsgid(_fsuid, _fsgid)	\
+do {						\
+	const struct cred *__cred;		\
+	__cred = current_cred();		\
+	*(_fsuid) = __cred->fsuid;		\
+	*(_fsgid) = __cred->fsgid;		\
+} while(0)
+
 #endif /* _LINUX_CRED_H */
diff --git a/include/linux/securebits.h b/include/linux/securebits.h
index 6d389491bfa2..d2c5ed845bcc 100644
--- a/include/linux/securebits.h
+++ b/include/linux/securebits.h
@@ -32,7 +32,7 @@
    setting is locked or not. A setting which is locked cannot be
    changed from user-level. */
 #define issecure_mask(X)	(1 << (X))
-#define issecure(X)		(issecure_mask(X) & current->cred->securebits)
+#define issecure(X)		(issecure_mask(X) & current_cred_xxx(securebits))
 
 #define SECURE_ALL_BITS		(issecure_mask(SECURE_NOROOT) | \
 				 issecure_mask(SECURE_NO_SETUID_FIXUP) | \
diff --git a/ipc/mqueue.c b/ipc/mqueue.c
index e1885b494bac..1151881ccb9a 100644
--- a/ipc/mqueue.c
+++ b/ipc/mqueue.c
@@ -112,6 +112,7 @@ static inline struct mqueue_inode_info *MQUEUE_I(struct inode *inode)
 static struct inode *mqueue_get_inode(struct super_block *sb, int mode,
 							struct mq_attr *attr)
 {
+	struct user_struct *u = current_user();
 	struct inode *inode;
 
 	inode = new_inode(sb);
@@ -126,7 +127,6 @@ static struct inode *mqueue_get_inode(struct super_block *sb, int mode,
 		if (S_ISREG(mode)) {
 			struct mqueue_inode_info *info;
 			struct task_struct *p = current;
-			struct user_struct *u = p->cred->user;
 			unsigned long mq_bytes, mq_msg_tblsz;
 
 			inode->i_fop = &mqueue_file_operations;
diff --git a/ipc/shm.c b/ipc/shm.c
index 264a9d33c5dd..38a055758a9b 100644
--- a/ipc/shm.c
+++ b/ipc/shm.c
@@ -366,7 +366,7 @@ static int newseg(struct ipc_namespace *ns, struct ipc_params *params)
 	if (shmflg & SHM_HUGETLB) {
 		/* hugetlb_file_setup takes care of mlock user accounting */
 		file = hugetlb_file_setup(name, size);
-		shp->mlock_user = current->cred->user;
+		shp->mlock_user = current_user();
 	} else {
 		int acctflag = VM_ACCOUNT;
 		/*
@@ -767,7 +767,7 @@ asmlinkage long sys_shmctl(int shmid, int cmd, struct shmid_ds __user *buf)
 			goto out_unlock;
 		
 		if(cmd==SHM_LOCK) {
-			struct user_struct *user = current->cred->user;
+			struct user_struct *user = current_user();
 			if (!is_file_hugepages(shp->shm_file)) {
 				err = shmem_lock(shp->shm_file, 1, user);
 				if (!err && !(shp->shm_perm.mode & SHM_LOCKED)){
diff --git a/kernel/sys.c b/kernel/sys.c
index 5d81f07c0150..c4d6b59553e9 100644
--- a/kernel/sys.c
+++ b/kernel/sys.c
@@ -143,6 +143,7 @@ asmlinkage long sys_setpriority(int which, int who, int niceval)
 {
 	struct task_struct *g, *p;
 	struct user_struct *user;
+	const struct cred *cred = current_cred();
 	int error = -EINVAL;
 	struct pid *pgrp;
 
@@ -176,18 +177,18 @@ asmlinkage long sys_setpriority(int which, int who, int niceval)
 			} while_each_pid_thread(pgrp, PIDTYPE_PGID, p);
 			break;
 		case PRIO_USER:
-			user = current->cred->user;
+			user = cred->user;
 			if (!who)
-				who = current_uid();
-			else
-				if (who != current_uid() && !(user = find_user(who)))
-					goto out_unlock;	/* No processes for this user */
+				who = cred->uid;
+			else if ((who != cred->uid) &&
+				 !(user = find_user(who)))
+				goto out_unlock;	/* No processes for this user */
 
 			do_each_thread(g, p)
-				if (p->cred->uid == who)
+				if (__task_cred(p)->uid == who)
 					error = set_one_prio(p, niceval, error);
 			while_each_thread(g, p);
-			if (who != current_uid())
+			if (who != cred->uid)
 				free_uid(user);		/* For find_user() */
 			break;
 	}
@@ -207,6 +208,7 @@ asmlinkage long sys_getpriority(int which, int who)
 {
 	struct task_struct *g, *p;
 	struct user_struct *user;
+	const struct cred *cred = current_cred();
 	long niceval, retval = -ESRCH;
 	struct pid *pgrp;
 
@@ -238,21 +240,21 @@ asmlinkage long sys_getpriority(int which, int who)
 			} while_each_pid_thread(pgrp, PIDTYPE_PGID, p);
 			break;
 		case PRIO_USER:
-			user = current->cred->user;
+			user = (struct user_struct *) cred->user;
 			if (!who)
-				who = current_uid();
-			else
-				if (who != current_uid() && !(user = find_user(who)))
-					goto out_unlock;	/* No processes for this user */
+				who = cred->uid;
+			else if ((who != cred->uid) &&
+				 !(user = find_user(who)))
+				goto out_unlock;	/* No processes for this user */
 
 			do_each_thread(g, p)
-				if (p->cred->uid == who) {
+				if (__task_cred(p)->uid == who) {
 					niceval = 20 - task_nice(p);
 					if (niceval > retval)
 						retval = niceval;
 				}
 			while_each_thread(g, p);
-			if (who != current_uid())
+			if (who != cred->uid)
 				free_uid(user);		/* for find_user() */
 			break;
 	}
@@ -743,11 +745,11 @@ asmlinkage long sys_setresuid(uid_t ruid, uid_t euid, uid_t suid)
 
 asmlinkage long sys_getresuid(uid_t __user *ruid, uid_t __user *euid, uid_t __user *suid)
 {
-	struct cred *cred = current->cred;
+	const struct cred *cred = current_cred();
 	int retval;
 
-	if (!(retval = put_user(cred->uid, ruid)) &&
-	    !(retval = put_user(cred->euid, euid)))
+	if (!(retval   = put_user(cred->uid,  ruid)) &&
+	    !(retval   = put_user(cred->euid, euid)))
 		retval = put_user(cred->suid, suid);
 
 	return retval;
@@ -796,11 +798,11 @@ asmlinkage long sys_setresgid(gid_t rgid, gid_t egid, gid_t sgid)
 
 asmlinkage long sys_getresgid(gid_t __user *rgid, gid_t __user *egid, gid_t __user *sgid)
 {
-	struct cred *cred = current->cred;
+	const struct cred *cred = current_cred();
 	int retval;
 
-	if (!(retval = put_user(cred->gid, rgid)) &&
-	    !(retval = put_user(cred->egid, egid)))
+	if (!(retval   = put_user(cred->gid,  rgid)) &&
+	    !(retval   = put_user(cred->egid, egid)))
 		retval = put_user(cred->sgid, sgid);
 
 	return retval;
@@ -1199,7 +1201,7 @@ static void groups_sort(struct group_info *group_info)
 }
 
 /* a simple bsearch */
-int groups_search(struct group_info *group_info, gid_t grp)
+int groups_search(const struct group_info *group_info, gid_t grp)
 {
 	unsigned int left, right;
 
@@ -1268,13 +1270,8 @@ EXPORT_SYMBOL(set_current_groups);
 
 asmlinkage long sys_getgroups(int gidsetsize, gid_t __user *grouplist)
 {
-	struct cred *cred = current->cred;
-	int i = 0;
-
-	/*
-	 *	SMP: Nobody else can change our grouplist. Thus we are
-	 *	safe.
-	 */
+	const struct cred *cred = current_cred();
+	int i;
 
 	if (gidsetsize < 0)
 		return -EINVAL;
@@ -1330,8 +1327,9 @@ asmlinkage long sys_setgroups(int gidsetsize, gid_t __user *grouplist)
  */
 int in_group_p(gid_t grp)
 {
-	struct cred *cred = current->cred;
+	const struct cred *cred = current_cred();
 	int retval = 1;
+
 	if (grp != cred->fsgid)
 		retval = groups_search(cred->group_info, grp);
 	return retval;
@@ -1341,8 +1339,9 @@ EXPORT_SYMBOL(in_group_p);
 
 int in_egroup_p(gid_t grp)
 {
-	struct cred *cred = current->cred;
+	const struct cred *cred = current_cred();
 	int retval = 1;
+
 	if (grp != cred->egid)
 		retval = groups_search(cred->group_info, grp);
 	return retval;
diff --git a/kernel/uid16.c b/kernel/uid16.c
index 71f07fc39fea..2460c3199b5a 100644
--- a/kernel/uid16.c
+++ b/kernel/uid16.c
@@ -84,11 +84,12 @@ asmlinkage long sys_setresuid16(old_uid_t ruid, old_uid_t euid, old_uid_t suid)
 
 asmlinkage long sys_getresuid16(old_uid_t __user *ruid, old_uid_t __user *euid, old_uid_t __user *suid)
 {
+	const struct cred *cred = current_cred();
 	int retval;
 
-	if (!(retval = put_user(high2lowuid(current->cred->uid), ruid)) &&
-	    !(retval = put_user(high2lowuid(current->cred->euid), euid)))
-		retval = put_user(high2lowuid(current->cred->suid), suid);
+	if (!(retval   = put_user(high2lowuid(cred->uid),  ruid)) &&
+	    !(retval   = put_user(high2lowuid(cred->euid), euid)))
+		retval = put_user(high2lowuid(cred->suid), suid);
 
 	return retval;
 }
@@ -104,11 +105,12 @@ asmlinkage long sys_setresgid16(old_gid_t rgid, old_gid_t egid, old_gid_t sgid)
 
 asmlinkage long sys_getresgid16(old_gid_t __user *rgid, old_gid_t __user *egid, old_gid_t __user *sgid)
 {
+	const struct cred *cred = current_cred();
 	int retval;
 
-	if (!(retval = put_user(high2lowgid(current->cred->gid), rgid)) &&
-	    !(retval = put_user(high2lowgid(current->cred->egid), egid)))
-		retval = put_user(high2lowgid(current->cred->sgid), sgid);
+	if (!(retval   = put_user(high2lowgid(cred->gid),  rgid)) &&
+	    !(retval   = put_user(high2lowgid(cred->egid), egid)))
+		retval = put_user(high2lowgid(cred->sgid), sgid);
 
 	return retval;
 }
@@ -161,25 +163,24 @@ static int groups16_from_user(struct group_info *group_info,
 
 asmlinkage long sys_getgroups16(int gidsetsize, old_gid_t __user *grouplist)
 {
-	int i = 0;
+	const struct cred *cred = current_cred();
+	int i;
 
 	if (gidsetsize < 0)
 		return -EINVAL;
 
-	get_group_info(current->cred->group_info);
-	i = current->cred->group_info->ngroups;
+	i = cred->group_info->ngroups;
 	if (gidsetsize) {
 		if (i > gidsetsize) {
 			i = -EINVAL;
 			goto out;
 		}
-		if (groups16_to_user(grouplist, current->cred->group_info)) {
+		if (groups16_to_user(grouplist, cred->group_info)) {
 			i = -EFAULT;
 			goto out;
 		}
 	}
 out:
-	put_group_info(current->cred->group_info);
 	return i;
 }
 
@@ -210,20 +211,20 @@ asmlinkage long sys_setgroups16(int gidsetsize, old_gid_t __user *grouplist)
 
 asmlinkage long sys_getuid16(void)
 {
-	return high2lowuid(current->cred->uid);
+	return high2lowuid(current_uid());
 }
 
 asmlinkage long sys_geteuid16(void)
 {
-	return high2lowuid(current->cred->euid);
+	return high2lowuid(current_euid());
 }
 
 asmlinkage long sys_getgid16(void)
 {
-	return high2lowgid(current->cred->gid);
+	return high2lowgid(current_gid());
 }
 
 asmlinkage long sys_getegid16(void)
 {
-	return high2lowgid(current->cred->egid);
+	return high2lowgid(current_egid());
 }
diff --git a/net/core/scm.c b/net/core/scm.c
index c28ca32a7d93..f73c44b17dda 100644
--- a/net/core/scm.c
+++ b/net/core/scm.c
@@ -44,7 +44,7 @@
 
 static __inline__ int scm_check_creds(struct ucred *creds)
 {
-	struct cred *cred = current->cred;
+	const struct cred *cred = current_cred();
 
 	if ((creds->pid == task_tgid_vnr(current) || capable(CAP_SYS_ADMIN)) &&
 	    ((creds->uid == cred->uid   || creds->uid == cred->euid ||
diff --git a/net/sunrpc/auth.c b/net/sunrpc/auth.c
index c79543212602..0443f8349458 100644
--- a/net/sunrpc/auth.c
+++ b/net/sunrpc/auth.c
@@ -350,16 +350,18 @@ EXPORT_SYMBOL_GPL(rpcauth_lookup_credcache);
 struct rpc_cred *
 rpcauth_lookupcred(struct rpc_auth *auth, int flags)
 {
-	struct auth_cred acred = {
-		.uid = current_fsuid(),
-		.gid = current_fsgid(),
-		.group_info = current->cred->group_info,
-	};
+	struct auth_cred acred;
 	struct rpc_cred *ret;
+	const struct cred *cred = current_cred();
 
 	dprintk("RPC:       looking up %s cred\n",
 		auth->au_ops->au_name);
-	get_group_info(acred.group_info);
+
+	memset(&acred, 0, sizeof(acred));
+	acred.uid = cred->fsuid;
+	acred.gid = cred->fsgid;
+	acred.group_info = get_group_info(((struct cred *)cred)->group_info);
+
 	ret = auth->au_ops->lookup_cred(auth, &acred, flags);
 	put_group_info(acred.group_info);
 	return ret;
diff --git a/security/commoncap.c b/security/commoncap.c
index fa61679f8c73..61307f590003 100644
--- a/security/commoncap.c
+++ b/security/commoncap.c
@@ -641,7 +641,7 @@ int cap_task_setnice (struct task_struct *p, int nice)
 int cap_task_prctl(int option, unsigned long arg2, unsigned long arg3,
 		   unsigned long arg4, unsigned long arg5, long *rc_p)
 {
-	struct cred *cred = current->cred;
+	struct cred *cred = current_cred();
 	long error = 0;
 
 	switch (option) {
diff --git a/security/keys/process_keys.c b/security/keys/process_keys.c
index b0904cdda2e7..ce8ac6073d57 100644
--- a/security/keys/process_keys.c
+++ b/security/keys/process_keys.c
@@ -582,7 +582,7 @@ key_ref_t lookup_user_key(key_serial_t id, int create, int partial,
 {
 	struct request_key_auth *rka;
 	struct task_struct *t = current;
-	struct cred *cred = t->cred;
+	struct cred *cred = current_cred();
 	struct key *key;
 	key_ref_t key_ref, skey_ref;
 	int ret;
diff --git a/security/keys/request_key.c b/security/keys/request_key.c
index 3e9b9eb1dd28..0488b0af5bd6 100644
--- a/security/keys/request_key.c
+++ b/security/keys/request_key.c
@@ -67,6 +67,7 @@ static int call_sbin_request_key(struct key_construction *cons,
 				 void *aux)
 {
 	struct task_struct *tsk = current;
+	const struct cred *cred = current_cred();
 	key_serial_t prkey, sskey;
 	struct key *key = cons->key, *authkey = cons->authkey, *keyring;
 	char *argv[9], *envp[3], uid_str[12], gid_str[12];
@@ -96,16 +97,16 @@ static int call_sbin_request_key(struct key_construction *cons,
 		goto error_link;
 
 	/* record the UID and GID */
-	sprintf(uid_str, "%d", current_fsuid());
-	sprintf(gid_str, "%d", current_fsgid());
+	sprintf(uid_str, "%d", cred->fsuid);
+	sprintf(gid_str, "%d", cred->fsgid);
 
 	/* we say which key is under construction */
 	sprintf(key_str, "%d", key->serial);
 
 	/* we specify the process's default keyrings */
 	sprintf(keyring_str[0], "%d",
-		tsk->cred->thread_keyring ?
-		tsk->cred->thread_keyring->serial : 0);
+		cred->thread_keyring ?
+		cred->thread_keyring->serial : 0);
 
 	prkey = 0;
 	if (tsk->signal->process_keyring)
@@ -118,7 +119,7 @@ static int call_sbin_request_key(struct key_construction *cons,
 		sskey = rcu_dereference(tsk->signal->session_keyring)->serial;
 		rcu_read_unlock();
 	} else {
-		sskey = tsk->cred->user->session_keyring->serial;
+		sskey = cred->user->session_keyring->serial;
 	}
 
 	sprintf(keyring_str[2], "%d", sskey);
diff --git a/security/selinux/exports.c b/security/selinux/exports.c
index cf02490cd1eb..c73aeaa008e8 100644
--- a/security/selinux/exports.c
+++ b/security/selinux/exports.c
@@ -39,9 +39,13 @@ EXPORT_SYMBOL_GPL(selinux_string_to_sid);
 int selinux_secmark_relabel_packet_permission(u32 sid)
 {
 	if (selinux_enabled) {
-		struct task_security_struct *tsec = current->cred->security;
+		const struct task_security_struct *__tsec;
+		u32 tsid;
 
-		return avc_has_perm(tsec->sid, sid, SECCLASS_PACKET,
+		__tsec = current_security();
+		tsid = __tsec->sid;
+
+		return avc_has_perm(tsid, sid, SECCLASS_PACKET,
 				    PACKET__RELABELTO, NULL);
 	}
 	return 0;
diff --git a/security/selinux/xfrm.c b/security/selinux/xfrm.c
index d7db76617b0e..c0eb72013d67 100644
--- a/security/selinux/xfrm.c
+++ b/security/selinux/xfrm.c
@@ -197,7 +197,7 @@ static int selinux_xfrm_sec_ctx_alloc(struct xfrm_sec_ctx **ctxp,
 	struct xfrm_user_sec_ctx *uctx, u32 sid)
 {
 	int rc = 0;
-	struct task_security_struct *tsec = current->cred->security;
+	const struct task_security_struct *tsec = current_security();
 	struct xfrm_sec_ctx *ctx = NULL;
 	char *ctx_str = NULL;
 	u32 str_len;
@@ -333,7 +333,7 @@ void selinux_xfrm_policy_free(struct xfrm_sec_ctx *ctx)
  */
 int selinux_xfrm_policy_delete(struct xfrm_sec_ctx *ctx)
 {
-	struct task_security_struct *tsec = current->cred->security;
+	const struct task_security_struct *tsec = current_security();
 	int rc = 0;
 
 	if (ctx) {
@@ -378,7 +378,7 @@ void selinux_xfrm_state_free(struct xfrm_state *x)
   */
 int selinux_xfrm_state_delete(struct xfrm_state *x)
 {
-	struct task_security_struct *tsec = current->cred->security;
+	const struct task_security_struct *tsec = current_security();
 	struct xfrm_sec_ctx *ctx = x->security;
 	int rc = 0;
 
diff --git a/security/smack/smack_access.c b/security/smack/smack_access.c
index b6dd4fc0fb0b..247cec3b5a43 100644
--- a/security/smack/smack_access.c
+++ b/security/smack/smack_access.c
@@ -164,7 +164,7 @@ int smk_curacc(char *obj_label, u32 mode)
 {
 	int rc;
 
-	rc = smk_access(current->cred->security, obj_label, mode);
+	rc = smk_access(current_security(), obj_label, mode);
 	if (rc == 0)
 		return 0;
 
diff --git a/security/smack/smack_lsm.c b/security/smack/smack_lsm.c
index cc837314fb0e..e8a4fcb1ad04 100644
--- a/security/smack/smack_lsm.c
+++ b/security/smack/smack_lsm.c
@@ -143,7 +143,7 @@ static int smack_ptrace_traceme(struct task_struct *ptp)
 static int smack_syslog(int type)
 {
 	int rc;
-	char *sp = current->cred->security;
+	char *sp = current_security();
 
 	rc = cap_syslog(type);
 	if (rc != 0)
@@ -375,7 +375,7 @@ static int smack_sb_umount(struct vfsmount *mnt, int flags)
  */
 static int smack_inode_alloc_security(struct inode *inode)
 {
-	inode->i_security = new_inode_smack(current->cred->security);
+	inode->i_security = new_inode_smack(current_security());
 	if (inode->i_security == NULL)
 		return -ENOMEM;
 	return 0;
@@ -820,7 +820,7 @@ static int smack_file_permission(struct file *file, int mask)
  */
 static int smack_file_alloc_security(struct file *file)
 {
-	file->f_security = current->cred->security;
+	file->f_security = current_security();
 	return 0;
 }
 
@@ -918,7 +918,7 @@ static int smack_file_fcntl(struct file *file, unsigned int cmd,
  */
 static int smack_file_set_fowner(struct file *file)
 {
-	file->f_security = current->cred->security;
+	file->f_security = current_security();
 	return 0;
 }
 
@@ -986,8 +986,7 @@ static int smack_file_receive(struct file *file)
  */
 static int smack_cred_alloc_security(struct cred *cred)
 {
-	cred->security = current->cred->security;
-
+	cred->security = current_security();
 	return 0;
 }
 
@@ -1225,7 +1224,7 @@ static void smack_task_to_inode(struct task_struct *p, struct inode *inode)
  */
 static int smack_sk_alloc_security(struct sock *sk, int family, gfp_t gfp_flags)
 {
-	char *csp = current->cred->security;
+	char *csp = current_security();
 	struct socket_smack *ssp;
 
 	ssp = kzalloc(sizeof(struct socket_smack), gfp_flags);
@@ -1450,7 +1449,7 @@ static int smack_flags_to_may(int flags)
  */
 static int smack_msg_msg_alloc_security(struct msg_msg *msg)
 {
-	msg->security = current->cred->security;
+	msg->security = current_security();
 	return 0;
 }
 
@@ -1486,7 +1485,7 @@ static int smack_shm_alloc_security(struct shmid_kernel *shp)
 {
 	struct kern_ipc_perm *isp = &shp->shm_perm;
 
-	isp->security = current->cred->security;
+	isp->security = current_security();
 	return 0;
 }
 
@@ -1595,7 +1594,7 @@ static int smack_sem_alloc_security(struct sem_array *sma)
 {
 	struct kern_ipc_perm *isp = &sma->sem_perm;
 
-	isp->security = current->cred->security;
+	isp->security = current_security();
 	return 0;
 }
 
@@ -1699,7 +1698,7 @@ static int smack_msg_queue_alloc_security(struct msg_queue *msq)
 {
 	struct kern_ipc_perm *kisp = &msq->q_perm;
 
-	kisp->security = current->cred->security;
+	kisp->security = current_security();
 	return 0;
 }
 
@@ -1854,7 +1853,7 @@ static void smack_d_instantiate(struct dentry *opt_dentry, struct inode *inode)
 	struct super_block *sbp;
 	struct superblock_smack *sbsp;
 	struct inode_smack *isp;
-	char *csp = current->cred->security;
+	char *csp = current_security();
 	char *fetched;
 	char *final;
 	struct dentry *dp;
@@ -2290,8 +2289,7 @@ static void smack_sock_graft(struct sock *sk, struct socket *parent)
 		return;
 
 	ssp = sk->sk_security;
-	ssp->smk_in = current->cred->security;
-	ssp->smk_out = current->cred->security;
+	ssp->smk_in = ssp->smk_out = current_security();
 	ssp->smk_packet[0] = '\0';
 
 	rc = smack_netlabel(sk);
diff --git a/security/smack/smackfs.c b/security/smack/smackfs.c
index c5ca279e0506..ca257dfdc75d 100644
--- a/security/smack/smackfs.c
+++ b/security/smack/smackfs.c
@@ -336,7 +336,7 @@ static void smk_cipso_doi(void)
 
 	audit_info.loginuid = audit_get_loginuid(current);
 	audit_info.sessionid = audit_get_sessionid(current);
-	audit_info.secid = smack_to_secid(current->cred->security);
+	audit_info.secid = smack_to_secid(current_security());
 
 	rc = netlbl_cfg_map_del(NULL, &audit_info);
 	if (rc != 0)
@@ -371,7 +371,7 @@ static void smk_unlbl_ambient(char *oldambient)
 
 	audit_info.loginuid = audit_get_loginuid(current);
 	audit_info.sessionid = audit_get_sessionid(current);
-	audit_info.secid = smack_to_secid(current->cred->security);
+	audit_info.secid = smack_to_secid(current_security());
 
 	if (oldambient != NULL) {
 		rc = netlbl_cfg_map_del(oldambient, &audit_info);
-- 
cgit v1.2.3


From c69e8d9c01db2adc503464993c358901c9af9de4 Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Fri, 14 Nov 2008 10:39:19 +1100
Subject: CRED: Use RCU to access another task's creds and to release a task's
 own creds

Use RCU to access another task's creds and to release a task's own creds.
This means that it will be possible for the credentials of a task to be
replaced without another task (a) requiring a full lock to read them, and (b)
seeing deallocated memory.

Signed-off-by: David Howells <dhowells@redhat.com>
Acked-by: James Morris <jmorris@namei.org>
Acked-by: Serge Hallyn <serue@us.ibm.com>
Signed-off-by: James Morris <jmorris@namei.org>
---
 arch/ia64/kernel/perfmon.c   | 32 +++++++++++++---------
 drivers/connector/cn_proc.c  | 16 +++++++----
 fs/binfmt_elf.c              |  8 ++++--
 fs/binfmt_elf_fdpic.c        |  8 ++++--
 fs/fcntl.c                   | 15 ++++++++---
 fs/fuse/dir.c                | 23 ++++++++++------
 fs/ioprio.c                  | 14 +++++++---
 fs/proc/array.c              | 32 ++++++++++++++--------
 fs/proc/base.c               | 32 ++++++++++++++++------
 include/linux/cred.h         |  3 ++-
 kernel/auditsc.c             | 33 +++++++++++++----------
 kernel/cgroup.c              | 16 +++++------
 kernel/exit.c                | 14 ++++++----
 kernel/futex.c               | 22 +++++++++------
 kernel/futex_compat.c        |  7 ++---
 kernel/ptrace.c              | 22 ++++++++-------
 kernel/sched.c               | 31 ++++++++++++++-------
 kernel/signal.c              | 49 ++++++++++++++++++++-------------
 kernel/sys.c                 | 11 +++++---
 kernel/tsacct.c              |  6 +++--
 mm/mempolicy.c               |  8 +++---
 mm/migrate.c                 |  8 +++---
 mm/oom_kill.c                |  6 ++---
 security/commoncap.c         | 64 +++++++++++++++++++++++++++-----------------
 security/keys/permission.c   | 10 ++++---
 security/keys/process_keys.c | 24 ++++++++++-------
 security/selinux/selinuxfs.c | 13 ++++++---
 security/smack/smack_lsm.c   | 32 +++++++++++-----------
 28 files changed, 355 insertions(+), 204 deletions(-)

(limited to 'include')

diff --git a/arch/ia64/kernel/perfmon.c b/arch/ia64/kernel/perfmon.c
index dd38db46a77a..0e499757309b 100644
--- a/arch/ia64/kernel/perfmon.c
+++ b/arch/ia64/kernel/perfmon.c
@@ -2399,25 +2399,33 @@ error_kmem:
 static int
 pfm_bad_permissions(struct task_struct *task)
 {
+	const struct cred *tcred;
 	uid_t uid = current_uid();
 	gid_t gid = current_gid();
+	int ret;
+
+	rcu_read_lock();
+	tcred = __task_cred(task);
 
 	/* inspired by ptrace_attach() */
 	DPRINT(("cur: uid=%d gid=%d task: euid=%d suid=%d uid=%d egid=%d sgid=%d\n",
 		uid,
 		gid,
-		task->euid,
-		task->suid,
-		task->uid,
-		task->egid,
-		task->sgid));
-
-	return (uid != task->euid)
-	    || (uid != task->suid)
-	    || (uid != task->uid)
-	    || (gid != task->egid)
-	    || (gid != task->sgid)
-	    || (gid != task->gid)) && !capable(CAP_SYS_PTRACE);
+		tcred->euid,
+		tcred->suid,
+		tcred->uid,
+		tcred->egid,
+		tcred->sgid));
+
+	ret = ((uid != tcred->euid)
+	       || (uid != tcred->suid)
+	       || (uid != tcred->uid)
+	       || (gid != tcred->egid)
+	       || (gid != tcred->sgid)
+	       || (gid != tcred->gid)) && !capable(CAP_SYS_PTRACE);
+
+	rcu_read_unlock();
+	return ret;
 }
 
 static int
diff --git a/drivers/connector/cn_proc.c b/drivers/connector/cn_proc.c
index 354c1ff17159..c5afc98e2675 100644
--- a/drivers/connector/cn_proc.c
+++ b/drivers/connector/cn_proc.c
@@ -106,6 +106,7 @@ void proc_id_connector(struct task_struct *task, int which_id)
 	struct proc_event *ev;
 	__u8 buffer[CN_PROC_MSG_SIZE];
 	struct timespec ts;
+	const struct cred *cred;
 
 	if (atomic_read(&proc_event_num_listeners) < 1)
 		return;
@@ -115,14 +116,19 @@ void proc_id_connector(struct task_struct *task, int which_id)
 	ev->what = which_id;
 	ev->event_data.id.process_pid = task->pid;
 	ev->event_data.id.process_tgid = task->tgid;
+	rcu_read_lock();
+	cred = __task_cred(task);
 	if (which_id == PROC_EVENT_UID) {
-		ev->event_data.id.r.ruid = task->cred->uid;
-		ev->event_data.id.e.euid = task->cred->euid;
+		ev->event_data.id.r.ruid = cred->uid;
+		ev->event_data.id.e.euid = cred->euid;
 	} else if (which_id == PROC_EVENT_GID) {
-		ev->event_data.id.r.rgid = task->cred->gid;
-		ev->event_data.id.e.egid = task->cred->egid;
-	} else
+		ev->event_data.id.r.rgid = cred->gid;
+		ev->event_data.id.e.egid = cred->egid;
+	} else {
+		rcu_read_unlock();
 	     	return;
+	}
+	rcu_read_unlock();
 	get_seq(&msg->seq, &ev->cpu);
 	ktime_get_ts(&ts); /* get high res monotonic timestamp */
 	put_unaligned(timespec_to_ns(&ts), (__u64 *)&ev->timestamp_ns);
diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c
index 0e6655613169..9142ff5dc8e6 100644
--- a/fs/binfmt_elf.c
+++ b/fs/binfmt_elf.c
@@ -1361,6 +1361,7 @@ static void fill_prstatus(struct elf_prstatus *prstatus,
 static int fill_psinfo(struct elf_prpsinfo *psinfo, struct task_struct *p,
 		       struct mm_struct *mm)
 {
+	const struct cred *cred;
 	unsigned int i, len;
 	
 	/* first copy the parameters from user space */
@@ -1388,8 +1389,11 @@ static int fill_psinfo(struct elf_prpsinfo *psinfo, struct task_struct *p,
 	psinfo->pr_zomb = psinfo->pr_sname == 'Z';
 	psinfo->pr_nice = task_nice(p);
 	psinfo->pr_flag = p->flags;
-	SET_UID(psinfo->pr_uid, p->cred->uid);
-	SET_GID(psinfo->pr_gid, p->cred->gid);
+	rcu_read_lock();
+	cred = __task_cred(p);
+	SET_UID(psinfo->pr_uid, cred->uid);
+	SET_GID(psinfo->pr_gid, cred->gid);
+	rcu_read_unlock();
 	strncpy(psinfo->pr_fname, p->comm, sizeof(psinfo->pr_fname));
 	
 	return 0;
diff --git a/fs/binfmt_elf_fdpic.c b/fs/binfmt_elf_fdpic.c
index 1f6e8c023b4c..45dabd59936f 100644
--- a/fs/binfmt_elf_fdpic.c
+++ b/fs/binfmt_elf_fdpic.c
@@ -1414,6 +1414,7 @@ static void fill_prstatus(struct elf_prstatus *prstatus,
 static int fill_psinfo(struct elf_prpsinfo *psinfo, struct task_struct *p,
 		       struct mm_struct *mm)
 {
+	const struct cred *cred;
 	unsigned int i, len;
 
 	/* first copy the parameters from user space */
@@ -1441,8 +1442,11 @@ static int fill_psinfo(struct elf_prpsinfo *psinfo, struct task_struct *p,
 	psinfo->pr_zomb = psinfo->pr_sname == 'Z';
 	psinfo->pr_nice = task_nice(p);
 	psinfo->pr_flag = p->flags;
-	SET_UID(psinfo->pr_uid, p->cred->uid);
-	SET_GID(psinfo->pr_gid, p->cred->gid);
+	rcu_read_lock();
+	cred = __task_cred(p);
+	SET_UID(psinfo->pr_uid, cred->uid);
+	SET_GID(psinfo->pr_gid, cred->gid);
+	rcu_read_unlock();
 	strncpy(psinfo->pr_fname, p->comm, sizeof(psinfo->pr_fname));
 
 	return 0;
diff --git a/fs/fcntl.c b/fs/fcntl.c
index c594cc0e40fb..87c39f1f0817 100644
--- a/fs/fcntl.c
+++ b/fs/fcntl.c
@@ -401,10 +401,17 @@ static const long band_table[NSIGPOLL] = {
 static inline int sigio_perm(struct task_struct *p,
                              struct fown_struct *fown, int sig)
 {
-	return (((fown->euid == 0) ||
-		 (fown->euid == p->cred->suid) || (fown->euid == p->cred->uid) ||
-		 (fown->uid == p->cred->suid) || (fown->uid == p->cred->uid)) &&
-		!security_file_send_sigiotask(p, fown, sig));
+	const struct cred *cred;
+	int ret;
+
+	rcu_read_lock();
+	cred = __task_cred(p);
+	ret = ((fown->euid == 0 ||
+		fown->euid == cred->suid || fown->euid == cred->uid ||
+		fown->uid  == cred->suid || fown->uid  == cred->uid) &&
+	       !security_file_send_sigiotask(p, fown, sig));
+	rcu_read_unlock();
+	return ret;
 }
 
 static void send_sigio_to_task(struct task_struct *p,
diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c
index e97a98981862..95bc22bdd060 100644
--- a/fs/fuse/dir.c
+++ b/fs/fuse/dir.c
@@ -869,18 +869,25 @@ int fuse_update_attributes(struct inode *inode, struct kstat *stat,
  */
 int fuse_allow_task(struct fuse_conn *fc, struct task_struct *task)
 {
+	const struct cred *cred;
+	int ret;
+
 	if (fc->flags & FUSE_ALLOW_OTHER)
 		return 1;
 
-	if (task->cred->euid == fc->user_id &&
-	    task->cred->suid == fc->user_id &&
-	    task->cred->uid == fc->user_id &&
-	    task->cred->egid == fc->group_id &&
-	    task->cred->sgid == fc->group_id &&
-	    task->cred->gid == fc->group_id)
-		return 1;
+	rcu_read_lock();
+	ret = 0;
+	cred = __task_cred(task);
+	if (cred->euid == fc->user_id &&
+	    cred->suid == fc->user_id &&
+	    cred->uid  == fc->user_id &&
+	    cred->egid == fc->group_id &&
+	    cred->sgid == fc->group_id &&
+	    cred->gid  == fc->group_id)
+		ret = 1;
+	rcu_read_unlock();
 
-	return 0;
+	return ret;
 }
 
 static int fuse_access(struct inode *inode, int mask)
diff --git a/fs/ioprio.c b/fs/ioprio.c
index 5112554fd210..3569e0ad86a2 100644
--- a/fs/ioprio.c
+++ b/fs/ioprio.c
@@ -31,10 +31,16 @@ static int set_task_ioprio(struct task_struct *task, int ioprio)
 {
 	int err;
 	struct io_context *ioc;
+	const struct cred *cred = current_cred(), *tcred;
 
-	if (task->cred->uid != current_euid() &&
-	    task->cred->uid != current_uid() && !capable(CAP_SYS_NICE))
+	rcu_read_lock();
+	tcred = __task_cred(task);
+	if (tcred->uid != cred->euid &&
+	    tcred->uid != cred->uid && !capable(CAP_SYS_NICE)) {
+		rcu_read_unlock();
 		return -EPERM;
+	}
+	rcu_read_unlock();
 
 	err = security_task_setioprio(task, ioprio);
 	if (err)
@@ -131,7 +137,7 @@ asmlinkage long sys_ioprio_set(int which, int who, int ioprio)
 				break;
 
 			do_each_thread(g, p) {
-				if (p->cred->uid != who)
+				if (__task_cred(p)->uid != who)
 					continue;
 				ret = set_task_ioprio(p, ioprio);
 				if (ret)
@@ -224,7 +230,7 @@ asmlinkage long sys_ioprio_get(int which, int who)
 				break;
 
 			do_each_thread(g, p) {
-				if (p->cred->uid != user->uid)
+				if (__task_cred(p)->uid != user->uid)
 					continue;
 				tmpio = get_task_ioprio(p);
 				if (tmpio < 0)
diff --git a/fs/proc/array.c b/fs/proc/array.c
index 62fe9b2009b6..7e4877d9dcb5 100644
--- a/fs/proc/array.c
+++ b/fs/proc/array.c
@@ -159,6 +159,7 @@ static inline void task_state(struct seq_file *m, struct pid_namespace *ns,
 	struct group_info *group_info;
 	int g;
 	struct fdtable *fdt = NULL;
+	const struct cred *cred;
 	pid_t ppid, tpid;
 
 	rcu_read_lock();
@@ -170,6 +171,7 @@ static inline void task_state(struct seq_file *m, struct pid_namespace *ns,
 		if (tracer)
 			tpid = task_pid_nr_ns(tracer, ns);
 	}
+	cred = get_cred((struct cred *) __task_cred(p));
 	seq_printf(m,
 		"State:\t%s\n"
 		"Tgid:\t%d\n"
@@ -182,8 +184,8 @@ static inline void task_state(struct seq_file *m, struct pid_namespace *ns,
 		task_tgid_nr_ns(p, ns),
 		pid_nr_ns(pid, ns),
 		ppid, tpid,
-		p->cred->uid, p->cred->euid, p->cred->suid, p->cred->fsuid,
-		p->cred->gid, p->cred->egid, p->cred->sgid, p->cred->fsgid);
+		cred->uid, cred->euid, cred->suid, cred->fsuid,
+		cred->gid, cred->egid, cred->sgid, cred->fsgid);
 
 	task_lock(p);
 	if (p->files)
@@ -194,13 +196,12 @@ static inline void task_state(struct seq_file *m, struct pid_namespace *ns,
 		fdt ? fdt->max_fds : 0);
 	rcu_read_unlock();
 
-	group_info = p->cred->group_info;
-	get_group_info(group_info);
+	group_info = cred->group_info;
 	task_unlock(p);
 
 	for (g = 0; g < min(group_info->ngroups, NGROUPS_SMALL); g++)
 		seq_printf(m, "%d ", GROUP_AT(group_info, g));
-	put_group_info(group_info);
+	put_cred(cred);
 
 	seq_printf(m, "\n");
 }
@@ -262,7 +263,7 @@ static inline void task_sig(struct seq_file *m, struct task_struct *p)
 		blocked = p->blocked;
 		collect_sigign_sigcatch(p, &ignored, &caught);
 		num_threads = atomic_read(&p->signal->count);
-		qsize = atomic_read(&p->cred->user->sigpending);
+		qsize = atomic_read(&__task_cred(p)->user->sigpending);
 		qlim = p->signal->rlim[RLIMIT_SIGPENDING].rlim_cur;
 		unlock_task_sighand(p, &flags);
 	}
@@ -293,12 +294,21 @@ static void render_cap_t(struct seq_file *m, const char *header,
 
 static inline void task_cap(struct seq_file *m, struct task_struct *p)
 {
-	struct cred *cred = p->cred;
+	const struct cred *cred;
+	kernel_cap_t cap_inheritable, cap_permitted, cap_effective, cap_bset;
 
-	render_cap_t(m, "CapInh:\t", &cred->cap_inheritable);
-	render_cap_t(m, "CapPrm:\t", &cred->cap_permitted);
-	render_cap_t(m, "CapEff:\t", &cred->cap_effective);
-	render_cap_t(m, "CapBnd:\t", &cred->cap_bset);
+	rcu_read_lock();
+	cred = __task_cred(p);
+	cap_inheritable	= cred->cap_inheritable;
+	cap_permitted	= cred->cap_permitted;
+	cap_effective	= cred->cap_effective;
+	cap_bset	= cred->cap_bset;
+	rcu_read_unlock();
+
+	render_cap_t(m, "CapInh:\t", &cap_inheritable);
+	render_cap_t(m, "CapPrm:\t", &cap_permitted);
+	render_cap_t(m, "CapEff:\t", &cap_effective);
+	render_cap_t(m, "CapBnd:\t", &cap_bset);
 }
 
 static inline void task_context_switch_counts(struct seq_file *m,
diff --git a/fs/proc/base.c b/fs/proc/base.c
index 6862b360c36c..cf42c42cbfbb 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -1406,6 +1406,7 @@ static struct inode *proc_pid_make_inode(struct super_block * sb, struct task_st
 {
 	struct inode * inode;
 	struct proc_inode *ei;
+	const struct cred *cred;
 
 	/* We need a new inode */
 
@@ -1428,8 +1429,11 @@ static struct inode *proc_pid_make_inode(struct super_block * sb, struct task_st
 	inode->i_uid = 0;
 	inode->i_gid = 0;
 	if (task_dumpable(task)) {
-		inode->i_uid = task->cred->euid;
-		inode->i_gid = task->cred->egid;
+		rcu_read_lock();
+		cred = __task_cred(task);
+		inode->i_uid = cred->euid;
+		inode->i_gid = cred->egid;
+		rcu_read_unlock();
 	}
 	security_task_to_inode(task, inode);
 
@@ -1445,6 +1449,8 @@ static int pid_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat
 {
 	struct inode *inode = dentry->d_inode;
 	struct task_struct *task;
+	const struct cred *cred;
+
 	generic_fillattr(inode, stat);
 
 	rcu_read_lock();
@@ -1454,8 +1460,9 @@ static int pid_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat
 	if (task) {
 		if ((inode->i_mode == (S_IFDIR|S_IRUGO|S_IXUGO)) ||
 		    task_dumpable(task)) {
-			stat->uid = task->cred->euid;
-			stat->gid = task->cred->egid;
+			cred = __task_cred(task);
+			stat->uid = cred->euid;
+			stat->gid = cred->egid;
 		}
 	}
 	rcu_read_unlock();
@@ -1483,11 +1490,16 @@ static int pid_revalidate(struct dentry *dentry, struct nameidata *nd)
 {
 	struct inode *inode = dentry->d_inode;
 	struct task_struct *task = get_proc_task(inode);
+	const struct cred *cred;
+
 	if (task) {
 		if ((inode->i_mode == (S_IFDIR|S_IRUGO|S_IXUGO)) ||
 		    task_dumpable(task)) {
-			inode->i_uid = task->cred->euid;
-			inode->i_gid = task->cred->egid;
+			rcu_read_lock();
+			cred = __task_cred(task);
+			inode->i_uid = cred->euid;
+			inode->i_gid = cred->egid;
+			rcu_read_unlock();
 		} else {
 			inode->i_uid = 0;
 			inode->i_gid = 0;
@@ -1649,6 +1661,7 @@ static int tid_fd_revalidate(struct dentry *dentry, struct nameidata *nd)
 	struct task_struct *task = get_proc_task(inode);
 	int fd = proc_fd(inode);
 	struct files_struct *files;
+	const struct cred *cred;
 
 	if (task) {
 		files = get_files_struct(task);
@@ -1658,8 +1671,11 @@ static int tid_fd_revalidate(struct dentry *dentry, struct nameidata *nd)
 				rcu_read_unlock();
 				put_files_struct(files);
 				if (task_dumpable(task)) {
-					inode->i_uid = task->cred->euid;
-					inode->i_gid = task->cred->egid;
+					rcu_read_lock();
+					cred = __task_cred(task);
+					inode->i_uid = cred->euid;
+					inode->i_gid = cred->egid;
+					rcu_read_unlock();
 				} else {
 					inode->i_uid = 0;
 					inode->i_gid = 0;
diff --git a/include/linux/cred.h b/include/linux/cred.h
index 4221ec6000c1..166ce4ddba64 100644
--- a/include/linux/cred.h
+++ b/include/linux/cred.h
@@ -147,8 +147,9 @@ static inline struct cred *get_cred(struct cred *cred)
  * Release a reference to a set of credentials, deleting them when the last ref
  * is released.
  */
-static inline void put_cred(struct cred *cred)
+static inline void put_cred(const struct cred *_cred)
 {
+	struct cred *cred = (struct cred *) _cred;
 	if (atomic_dec_and_test(&(cred)->usage))
 		__put_cred(cred);
 }
diff --git a/kernel/auditsc.c b/kernel/auditsc.c
index 2febf5165fad..ae8ef88ade3f 100644
--- a/kernel/auditsc.c
+++ b/kernel/auditsc.c
@@ -447,7 +447,7 @@ static int audit_filter_rules(struct task_struct *tsk,
 			      struct audit_names *name,
 			      enum audit_state *state)
 {
-	struct cred *cred = tsk->cred;
+	const struct cred *cred = get_task_cred(tsk);
 	int i, j, need_sid = 1;
 	u32 sid;
 
@@ -642,8 +642,10 @@ static int audit_filter_rules(struct task_struct *tsk,
 			break;
 		}
 
-		if (!result)
+		if (!result) {
+			put_cred(cred);
 			return 0;
+		}
 	}
 	if (rule->filterkey && ctx)
 		ctx->filterkey = kstrdup(rule->filterkey, GFP_ATOMIC);
@@ -651,6 +653,7 @@ static int audit_filter_rules(struct task_struct *tsk,
 	case AUDIT_NEVER:    *state = AUDIT_DISABLED;	    break;
 	case AUDIT_ALWAYS:   *state = AUDIT_RECORD_CONTEXT; break;
 	}
+	put_cred(cred);
 	return 1;
 }
 
@@ -1229,7 +1232,7 @@ static void audit_log_fcaps(struct audit_buffer *ab, struct audit_names *name)
 
 static void audit_log_exit(struct audit_context *context, struct task_struct *tsk)
 {
-	struct cred *cred = tsk->cred;
+	const struct cred *cred;
 	int i, call_panic = 0;
 	struct audit_buffer *ab;
 	struct audit_aux_data *aux;
@@ -1239,13 +1242,14 @@ static void audit_log_exit(struct audit_context *context, struct task_struct *ts
 	context->pid = tsk->pid;
 	if (!context->ppid)
 		context->ppid = sys_getppid();
-	context->uid = cred->uid;
-	context->gid = cred->gid;
-	context->euid = cred->euid;
-	context->suid = cred->suid;
+	cred = current_cred();
+	context->uid   = cred->uid;
+	context->gid   = cred->gid;
+	context->euid  = cred->euid;
+	context->suid  = cred->suid;
 	context->fsuid = cred->fsuid;
-	context->egid = cred->egid;
-	context->sgid = cred->sgid;
+	context->egid  = cred->egid;
+	context->sgid  = cred->sgid;
 	context->fsgid = cred->fsgid;
 	context->personality = tsk->personality;
 
@@ -2088,7 +2092,7 @@ int audit_set_loginuid(struct task_struct *task, uid_t loginuid)
 			audit_log_format(ab, "login pid=%d uid=%u "
 				"old auid=%u new auid=%u"
 				" old ses=%u new ses=%u",
-				task->pid, task->cred->uid,
+				task->pid, task_uid(task),
 				task->loginuid, loginuid,
 				task->sessionid, sessionid);
 			audit_log_end(ab);
@@ -2471,7 +2475,7 @@ void __audit_ptrace(struct task_struct *t)
 
 	context->target_pid = t->pid;
 	context->target_auid = audit_get_loginuid(t);
-	context->target_uid = t->cred->uid;
+	context->target_uid = task_uid(t);
 	context->target_sessionid = audit_get_sessionid(t);
 	security_task_getsecid(t, &context->target_sid);
 	memcpy(context->target_comm, t->comm, TASK_COMM_LEN);
@@ -2490,6 +2494,7 @@ int __audit_signal_info(int sig, struct task_struct *t)
 	struct audit_aux_data_pids *axp;
 	struct task_struct *tsk = current;
 	struct audit_context *ctx = tsk->audit_context;
+	uid_t uid = current_uid(), t_uid = task_uid(t);
 
 	if (audit_pid && t->tgid == audit_pid) {
 		if (sig == SIGTERM || sig == SIGHUP || sig == SIGUSR1 || sig == SIGUSR2) {
@@ -2497,7 +2502,7 @@ int __audit_signal_info(int sig, struct task_struct *t)
 			if (tsk->loginuid != -1)
 				audit_sig_uid = tsk->loginuid;
 			else
-				audit_sig_uid = tsk->cred->uid;
+				audit_sig_uid = uid;
 			security_task_getsecid(tsk, &audit_sig_sid);
 		}
 		if (!audit_signals || audit_dummy_context())
@@ -2509,7 +2514,7 @@ int __audit_signal_info(int sig, struct task_struct *t)
 	if (!ctx->target_pid) {
 		ctx->target_pid = t->tgid;
 		ctx->target_auid = audit_get_loginuid(t);
-		ctx->target_uid = t->cred->uid;
+		ctx->target_uid = t_uid;
 		ctx->target_sessionid = audit_get_sessionid(t);
 		security_task_getsecid(t, &ctx->target_sid);
 		memcpy(ctx->target_comm, t->comm, TASK_COMM_LEN);
@@ -2530,7 +2535,7 @@ int __audit_signal_info(int sig, struct task_struct *t)
 
 	axp->target_pid[axp->pid_count] = t->tgid;
 	axp->target_auid[axp->pid_count] = audit_get_loginuid(t);
-	axp->target_uid[axp->pid_count] = t->cred->uid;
+	axp->target_uid[axp->pid_count] = t_uid;
 	axp->target_sessionid[axp->pid_count] = audit_get_sessionid(t);
 	security_task_getsecid(t, &axp->target_sid[axp->pid_count]);
 	memcpy(axp->target_comm[axp->pid_count], t->comm, TASK_COMM_LEN);
diff --git a/kernel/cgroup.c b/kernel/cgroup.c
index e210526e6401..a512a75a5560 100644
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -1279,7 +1279,7 @@ int cgroup_attach_task(struct cgroup *cgrp, struct task_struct *tsk)
 static int attach_task_by_pid(struct cgroup *cgrp, u64 pid)
 {
 	struct task_struct *tsk;
-	uid_t euid;
+	const struct cred *cred = current_cred(), *tcred;
 	int ret;
 
 	if (pid) {
@@ -1289,16 +1289,16 @@ static int attach_task_by_pid(struct cgroup *cgrp, u64 pid)
 			rcu_read_unlock();
 			return -ESRCH;
 		}
-		get_task_struct(tsk);
-		rcu_read_unlock();
 
-		euid = current_euid();
-		if (euid &&
-		    euid != tsk->cred->uid &&
-		    euid != tsk->cred->suid) {
-			put_task_struct(tsk);
+		tcred = __task_cred(tsk);
+		if (cred->euid &&
+		    cred->euid != tcred->uid &&
+		    cred->euid != tcred->suid) {
+			rcu_read_unlock();
 			return -EACCES;
 		}
+		get_task_struct(tsk);
+		rcu_read_unlock();
 	} else {
 		tsk = current;
 		get_task_struct(tsk);
diff --git a/kernel/exit.c b/kernel/exit.c
index e0f6e1892fb9..bbc22530f2c1 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -160,7 +160,10 @@ void release_task(struct task_struct * p)
 	int zap_leader;
 repeat:
 	tracehook_prepare_release_task(p);
-	atomic_dec(&p->cred->user->processes);
+	/* don't need to get the RCU readlock here - the process is dead and
+	 * can't be modifying its own credentials */
+	atomic_dec(&__task_cred(p)->user->processes);
+
 	proc_flush_task(p);
 	write_lock_irq(&tasklist_lock);
 	tracehook_finish_release_task(p);
@@ -1267,12 +1270,12 @@ static int wait_task_zombie(struct task_struct *p, int options,
 	unsigned long state;
 	int retval, status, traced;
 	pid_t pid = task_pid_vnr(p);
+	uid_t uid = __task_cred(p)->uid;
 
 	if (!likely(options & WEXITED))
 		return 0;
 
 	if (unlikely(options & WNOWAIT)) {
-		uid_t uid = p->cred->uid;
 		int exit_code = p->exit_code;
 		int why, status;
 
@@ -1393,7 +1396,7 @@ static int wait_task_zombie(struct task_struct *p, int options,
 	if (!retval && infop)
 		retval = put_user(pid, &infop->si_pid);
 	if (!retval && infop)
-		retval = put_user(p->cred->uid, &infop->si_uid);
+		retval = put_user(uid, &infop->si_uid);
 	if (!retval)
 		retval = pid;
 
@@ -1458,7 +1461,8 @@ static int wait_task_stopped(int ptrace, struct task_struct *p,
 	if (!unlikely(options & WNOWAIT))
 		p->exit_code = 0;
 
-	uid = p->cred->uid;
+	/* don't need the RCU readlock here as we're holding a spinlock */
+	uid = __task_cred(p)->uid;
 unlock_sig:
 	spin_unlock_irq(&p->sighand->siglock);
 	if (!exit_code)
@@ -1532,10 +1536,10 @@ static int wait_task_continued(struct task_struct *p, int options,
 	}
 	if (!unlikely(options & WNOWAIT))
 		p->signal->flags &= ~SIGNAL_STOP_CONTINUED;
+	uid = __task_cred(p)->uid;
 	spin_unlock_irq(&p->sighand->siglock);
 
 	pid = task_pid_vnr(p);
-	uid = p->cred->uid;
 	get_task_struct(p);
 	read_unlock(&tasklist_lock);
 
diff --git a/kernel/futex.c b/kernel/futex.c
index 28421d8210b8..4fe790e89d0f 100644
--- a/kernel/futex.c
+++ b/kernel/futex.c
@@ -439,15 +439,20 @@ static void free_pi_state(struct futex_pi_state *pi_state)
 static struct task_struct * futex_find_get_task(pid_t pid)
 {
 	struct task_struct *p;
-	uid_t euid = current_euid();
+	const struct cred *cred = current_cred(), *pcred;
 
 	rcu_read_lock();
 	p = find_task_by_vpid(pid);
-	if (!p || (euid != p->cred->euid &&
-		   euid != p->cred->uid))
+	if (!p) {
 		p = ERR_PTR(-ESRCH);
-	else
-		get_task_struct(p);
+	} else {
+		pcred = __task_cred(p);
+		if (cred->euid != pcred->euid &&
+		    cred->euid != pcred->uid)
+			p = ERR_PTR(-ESRCH);
+		else
+			get_task_struct(p);
+	}
 
 	rcu_read_unlock();
 
@@ -1831,7 +1836,7 @@ sys_get_robust_list(int pid, struct robust_list_head __user * __user *head_ptr,
 {
 	struct robust_list_head __user *head;
 	unsigned long ret;
-	uid_t euid = current_euid();
+	const struct cred *cred = current_cred(), *pcred;
 
 	if (!futex_cmpxchg_enabled)
 		return -ENOSYS;
@@ -1847,8 +1852,9 @@ sys_get_robust_list(int pid, struct robust_list_head __user * __user *head_ptr,
 		if (!p)
 			goto err_unlock;
 		ret = -EPERM;
-		if (euid != p->cred->euid &&
-		    euid != p->cred->uid &&
+		pcred = __task_cred(p);
+		if (cred->euid != pcred->euid &&
+		    cred->euid != pcred->uid &&
 		    !capable(CAP_SYS_PTRACE))
 			goto err_unlock;
 		head = p->robust_list;
diff --git a/kernel/futex_compat.c b/kernel/futex_compat.c
index 2c3fd5ed34f5..d607a5b9ee29 100644
--- a/kernel/futex_compat.c
+++ b/kernel/futex_compat.c
@@ -135,7 +135,7 @@ compat_sys_get_robust_list(int pid, compat_uptr_t __user *head_ptr,
 {
 	struct compat_robust_list_head __user *head;
 	unsigned long ret;
-	uid_t euid = current_euid();
+	const struct cred *cred = current_cred(), *pcred;
 
 	if (!futex_cmpxchg_enabled)
 		return -ENOSYS;
@@ -151,8 +151,9 @@ compat_sys_get_robust_list(int pid, compat_uptr_t __user *head_ptr,
 		if (!p)
 			goto err_unlock;
 		ret = -EPERM;
-		if (euid != p->cred->euid &&
-		    euid != p->cred->uid &&
+		pcred = __task_cred(p);
+		if (cred->euid != pcred->euid &&
+		    cred->euid != pcred->uid &&
 		    !capable(CAP_SYS_PTRACE))
 			goto err_unlock;
 		head = p->compat_robust_list;
diff --git a/kernel/ptrace.c b/kernel/ptrace.c
index 49849d12dd12..b9d5f4e4f6a4 100644
--- a/kernel/ptrace.c
+++ b/kernel/ptrace.c
@@ -115,7 +115,7 @@ int ptrace_check_attach(struct task_struct *child, int kill)
 
 int __ptrace_may_access(struct task_struct *task, unsigned int mode)
 {
-	struct cred *cred = current->cred, *tcred = task->cred;
+	const struct cred *cred = current_cred(), *tcred;
 
 	/* May we inspect the given task?
 	 * This check is used both for attaching with ptrace
@@ -125,19 +125,23 @@ int __ptrace_may_access(struct task_struct *task, unsigned int mode)
 	 * because setting up the necessary parent/child relationship
 	 * or halting the specified task is impossible.
 	 */
-	uid_t uid = cred->uid;
-	gid_t gid = cred->gid;
 	int dumpable = 0;
 	/* Don't let security modules deny introspection */
 	if (task == current)
 		return 0;
-	if ((uid != tcred->euid ||
-	     uid != tcred->suid ||
-	     uid != tcred->uid  ||
-	     gid != tcred->egid ||
-	     gid != tcred->sgid ||
-	     gid != tcred->gid) && !capable(CAP_SYS_PTRACE))
+	rcu_read_lock();
+	tcred = __task_cred(task);
+	if ((cred->uid != tcred->euid ||
+	     cred->uid != tcred->suid ||
+	     cred->uid != tcred->uid  ||
+	     cred->gid != tcred->egid ||
+	     cred->gid != tcred->sgid ||
+	     cred->gid != tcred->gid) &&
+	    !capable(CAP_SYS_PTRACE)) {
+		rcu_read_unlock();
 		return -EPERM;
+	}
+	rcu_read_unlock();
 	smp_rmb();
 	if (task->mm)
 		dumpable = get_dumpable(task->mm);
diff --git a/kernel/sched.c b/kernel/sched.c
index 733c59e645aa..92992e287b10 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -345,7 +345,9 @@ static inline struct task_group *task_group(struct task_struct *p)
 	struct task_group *tg;
 
 #ifdef CONFIG_USER_SCHED
-	tg = p->cred->user->tg;
+	rcu_read_lock();
+	tg = __task_cred(p)->user->tg;
+	rcu_read_unlock();
 #elif defined(CONFIG_CGROUP_SCHED)
 	tg = container_of(task_subsys_state(p, cpu_cgroup_subsys_id),
 				struct task_group, css);
@@ -5121,6 +5123,22 @@ __setscheduler(struct rq *rq, struct task_struct *p, int policy, int prio)
 	set_load_weight(p);
 }
 
+/*
+ * check the target process has a UID that matches the current process's
+ */
+static bool check_same_owner(struct task_struct *p)
+{
+	const struct cred *cred = current_cred(), *pcred;
+	bool match;
+
+	rcu_read_lock();
+	pcred = __task_cred(p);
+	match = (cred->euid == pcred->euid ||
+		 cred->euid == pcred->uid);
+	rcu_read_unlock();
+	return match;
+}
+
 static int __sched_setscheduler(struct task_struct *p, int policy,
 				struct sched_param *param, bool user)
 {
@@ -5128,7 +5146,6 @@ static int __sched_setscheduler(struct task_struct *p, int policy,
 	unsigned long flags;
 	const struct sched_class *prev_class = p->sched_class;
 	struct rq *rq;
-	uid_t euid;
 
 	/* may grab non-irq protected spin_locks */
 	BUG_ON(in_interrupt());
@@ -5181,9 +5198,7 @@ recheck:
 			return -EPERM;
 
 		/* can't change other user's priorities */
-		euid = current_euid();
-		if (euid != p->cred->euid &&
-		    euid != p->cred->uid)
+		if (!check_same_owner(p))
 			return -EPERM;
 	}
 
@@ -5394,7 +5409,6 @@ long sched_setaffinity(pid_t pid, const cpumask_t *in_mask)
 	cpumask_t cpus_allowed;
 	cpumask_t new_mask = *in_mask;
 	struct task_struct *p;
-	uid_t euid;
 	int retval;
 
 	get_online_cpus();
@@ -5415,11 +5429,8 @@ long sched_setaffinity(pid_t pid, const cpumask_t *in_mask)
 	get_task_struct(p);
 	read_unlock(&tasklist_lock);
 
-	euid = current_euid();
 	retval = -EPERM;
-	if (euid != p->cred->euid &&
-	    euid != p->cred->uid &&
-	    !capable(CAP_SYS_NICE))
+	if (!check_same_owner(p) && !capable(CAP_SYS_NICE))
 		goto out_unlock;
 
 	retval = security_task_setscheduler(p, 0, NULL);
diff --git a/kernel/signal.c b/kernel/signal.c
index 80e8a6489f97..84989124bafb 100644
--- a/kernel/signal.c
+++ b/kernel/signal.c
@@ -177,6 +177,11 @@ int next_signal(struct sigpending *pending, sigset_t *mask)
 	return sig;
 }
 
+/*
+ * allocate a new signal queue record
+ * - this may be called without locks if and only if t == current, otherwise an
+ *   appopriate lock must be held to protect t's user_struct
+ */
 static struct sigqueue *__sigqueue_alloc(struct task_struct *t, gfp_t flags,
 					 int override_rlimit)
 {
@@ -184,11 +189,12 @@ static struct sigqueue *__sigqueue_alloc(struct task_struct *t, gfp_t flags,
 	struct user_struct *user;
 
 	/*
-	 * In order to avoid problems with "switch_user()", we want to make
-	 * sure that the compiler doesn't re-load "t->user"
+	 * We won't get problems with the target's UID changing under us
+	 * because changing it requires RCU be used, and if t != current, the
+	 * caller must be holding the RCU readlock (by way of a spinlock) and
+	 * we use RCU protection here
 	 */
-	user = t->cred->user;
-	barrier();
+	user = __task_cred(t)->user;
 	atomic_inc(&user->sigpending);
 	if (override_rlimit ||
 	    atomic_read(&user->sigpending) <=
@@ -562,12 +568,13 @@ static int rm_from_queue(unsigned long mask, struct sigpending *s)
 
 /*
  * Bad permissions for sending the signal
+ * - the caller must hold at least the RCU read lock
  */
 static int check_kill_permission(int sig, struct siginfo *info,
 				 struct task_struct *t)
 {
+	const struct cred *cred = current_cred(), *tcred;
 	struct pid *sid;
-	uid_t uid, euid;
 	int error;
 
 	if (!valid_signal(sig))
@@ -580,10 +587,11 @@ static int check_kill_permission(int sig, struct siginfo *info,
 	if (error)
 		return error;
 
-	uid = current_uid();
-	euid = current_euid();
-	if ((euid ^ t->cred->suid) && (euid ^ t->cred->uid) &&
-	    (uid  ^ t->cred->suid) && (uid  ^ t->cred->uid) &&
+	tcred = __task_cred(t);
+	if ((cred->euid ^ tcred->suid) &&
+	    (cred->euid ^ tcred->uid) &&
+	    (cred->uid  ^ tcred->suid) &&
+	    (cred->uid  ^ tcred->uid) &&
 	    !capable(CAP_KILL)) {
 		switch (sig) {
 		case SIGCONT:
@@ -1011,6 +1019,10 @@ struct sighand_struct *lock_task_sighand(struct task_struct *tsk, unsigned long
 	return sighand;
 }
 
+/*
+ * send signal info to all the members of a group
+ * - the caller must hold the RCU read lock at least
+ */
 int group_send_sig_info(int sig, struct siginfo *info, struct task_struct *p)
 {
 	unsigned long flags;
@@ -1032,8 +1044,8 @@ int group_send_sig_info(int sig, struct siginfo *info, struct task_struct *p)
 /*
  * __kill_pgrp_info() sends a signal to a process group: this is what the tty
  * control characters do (^C, ^Z etc)
+ * - the caller must hold at least a readlock on tasklist_lock
  */
-
 int __kill_pgrp_info(int sig, struct siginfo *info, struct pid *pgrp)
 {
 	struct task_struct *p = NULL;
@@ -1089,6 +1101,7 @@ int kill_pid_info_as_uid(int sig, struct siginfo *info, struct pid *pid,
 {
 	int ret = -EINVAL;
 	struct task_struct *p;
+	const struct cred *pcred;
 
 	if (!valid_signal(sig))
 		return ret;
@@ -1099,9 +1112,11 @@ int kill_pid_info_as_uid(int sig, struct siginfo *info, struct pid *pid,
 		ret = -ESRCH;
 		goto out_unlock;
 	}
-	if ((info == SEND_SIG_NOINFO || (!is_si_special(info) && SI_FROMUSER(info)))
-	    && (euid != p->cred->suid) && (euid != p->cred->uid)
-	    && (uid != p->cred->suid) && (uid != p->cred->uid)) {
+	pcred = __task_cred(p);
+	if ((info == SEND_SIG_NOINFO ||
+	     (!is_si_special(info) && SI_FROMUSER(info))) &&
+	    euid != pcred->suid && euid != pcred->uid &&
+	    uid  != pcred->suid && uid  != pcred->uid) {
 		ret = -EPERM;
 		goto out_unlock;
 	}
@@ -1372,10 +1387,9 @@ int do_notify_parent(struct task_struct *tsk, int sig)
 	 */
 	rcu_read_lock();
 	info.si_pid = task_pid_nr_ns(tsk, tsk->parent->nsproxy->pid_ns);
+	info.si_uid = __task_cred(tsk)->uid;
 	rcu_read_unlock();
 
-	info.si_uid = tsk->cred->uid;
-
 	thread_group_cputime(tsk, &cputime);
 	info.si_utime = cputime_to_jiffies(cputime.utime);
 	info.si_stime = cputime_to_jiffies(cputime.stime);
@@ -1443,10 +1457,9 @@ static void do_notify_parent_cldstop(struct task_struct *tsk, int why)
 	 */
 	rcu_read_lock();
 	info.si_pid = task_pid_nr_ns(tsk, tsk->parent->nsproxy->pid_ns);
+	info.si_uid = __task_cred(tsk)->uid;
 	rcu_read_unlock();
 
-	info.si_uid = tsk->cred->uid;
-
 	info.si_utime = cputime_to_clock_t(tsk->utime);
 	info.si_stime = cputime_to_clock_t(tsk->stime);
 
@@ -1713,7 +1726,7 @@ static int ptrace_signal(int signr, siginfo_t *info,
 		info->si_errno = 0;
 		info->si_code = SI_USER;
 		info->si_pid = task_pid_vnr(current->parent);
-		info->si_uid = current->parent->cred->uid;
+		info->si_uid = task_uid(current->parent);
 	}
 
 	/* If the (new) signal is now blocked, requeue it.  */
diff --git a/kernel/sys.c b/kernel/sys.c
index c4d6b59553e9..ccc9eb736d35 100644
--- a/kernel/sys.c
+++ b/kernel/sys.c
@@ -112,14 +112,17 @@ EXPORT_SYMBOL(cad_pid);
 
 void (*pm_power_off_prepare)(void);
 
+/*
+ * set the priority of a task
+ * - the caller must hold the RCU read lock
+ */
 static int set_one_prio(struct task_struct *p, int niceval, int error)
 {
-	uid_t euid = current_euid();
+	const struct cred *cred = current_cred(), *pcred = __task_cred(p);
 	int no_nice;
 
-	if (p->cred->uid  != euid &&
-	    p->cred->euid != euid &&
-	    !capable(CAP_SYS_NICE)) {
+	if (pcred->uid  != cred->euid &&
+	    pcred->euid != cred->euid && !capable(CAP_SYS_NICE)) {
 		error = -EPERM;
 		goto out;
 	}
diff --git a/kernel/tsacct.c b/kernel/tsacct.c
index 6d1ed07bf312..2dc06ab35716 100644
--- a/kernel/tsacct.c
+++ b/kernel/tsacct.c
@@ -27,6 +27,7 @@
  */
 void bacct_add_tsk(struct taskstats *stats, struct task_struct *tsk)
 {
+	const struct cred *tcred;
 	struct timespec uptime, ts;
 	u64 ac_etime;
 
@@ -53,10 +54,11 @@ void bacct_add_tsk(struct taskstats *stats, struct task_struct *tsk)
 		stats->ac_flag |= AXSIG;
 	stats->ac_nice	 = task_nice(tsk);
 	stats->ac_sched	 = tsk->policy;
-	stats->ac_uid	 = tsk->cred->uid;
-	stats->ac_gid	 = tsk->cred->gid;
 	stats->ac_pid	 = tsk->pid;
 	rcu_read_lock();
+	tcred = __task_cred(tsk);
+	stats->ac_uid	 = tcred->uid;
+	stats->ac_gid	 = tcred->gid;
 	stats->ac_ppid	 = pid_alive(tsk) ?
 				rcu_dereference(tsk->real_parent)->tgid : 0;
 	rcu_read_unlock();
diff --git a/mm/mempolicy.c b/mm/mempolicy.c
index b23492ee3e50..7555219c535b 100644
--- a/mm/mempolicy.c
+++ b/mm/mempolicy.c
@@ -1110,7 +1110,7 @@ asmlinkage long sys_migrate_pages(pid_t pid, unsigned long maxnode,
 		const unsigned long __user *old_nodes,
 		const unsigned long __user *new_nodes)
 {
-	struct cred *cred, *tcred;
+	const struct cred *cred = current_cred(), *tcred;
 	struct mm_struct *mm;
 	struct task_struct *task;
 	nodemask_t old;
@@ -1145,14 +1145,16 @@ asmlinkage long sys_migrate_pages(pid_t pid, unsigned long maxnode,
 	 * capabilities, superuser privileges or the same
 	 * userid as the target process.
 	 */
-	cred = current->cred;
-	tcred = task->cred;
+	rcu_read_lock();
+	tcred = __task_cred(task);
 	if (cred->euid != tcred->suid && cred->euid != tcred->uid &&
 	    cred->uid  != tcred->suid && cred->uid  != tcred->uid &&
 	    !capable(CAP_SYS_NICE)) {
+		rcu_read_unlock();
 		err = -EPERM;
 		goto out;
 	}
+	rcu_read_unlock();
 
 	task_nodes = cpuset_mems_allowed(task);
 	/* Is the user allowed to access the target nodes? */
diff --git a/mm/migrate.c b/mm/migrate.c
index 794443da1b4f..142284229ce2 100644
--- a/mm/migrate.c
+++ b/mm/migrate.c
@@ -1045,7 +1045,7 @@ asmlinkage long sys_move_pages(pid_t pid, unsigned long nr_pages,
 			const int __user *nodes,
 			int __user *status, int flags)
 {
-	struct cred *cred, *tcred;
+	const struct cred *cred = current_cred(), *tcred;
 	struct task_struct *task;
 	struct mm_struct *mm;
 	int err;
@@ -1076,14 +1076,16 @@ asmlinkage long sys_move_pages(pid_t pid, unsigned long nr_pages,
 	 * capabilities, superuser privileges or the same
 	 * userid as the target process.
 	 */
-	cred = current->cred;
-	tcred = task->cred;
+	rcu_read_lock();
+	tcred = __task_cred(task);
 	if (cred->euid != tcred->suid && cred->euid != tcred->uid &&
 	    cred->uid  != tcred->suid && cred->uid  != tcred->uid &&
 	    !capable(CAP_SYS_NICE)) {
+		rcu_read_unlock();
 		err = -EPERM;
 		goto out;
 	}
+	rcu_read_unlock();
 
  	err = security_task_movememory(task);
  	if (err)
diff --git a/mm/oom_kill.c b/mm/oom_kill.c
index 3af787ba2077..0e0b282a2073 100644
--- a/mm/oom_kill.c
+++ b/mm/oom_kill.c
@@ -298,9 +298,9 @@ static void dump_tasks(const struct mem_cgroup *mem)
 
 		task_lock(p);
 		printk(KERN_INFO "[%5d] %5d %5d %8lu %8lu %3d     %3d %s\n",
-		       p->pid, p->cred->uid, p->tgid, p->mm->total_vm,
-		       get_mm_rss(p->mm), (int)task_cpu(p), p->oomkilladj,
-		       p->comm);
+		       p->pid, __task_cred(p)->uid, p->tgid,
+		       p->mm->total_vm, get_mm_rss(p->mm), (int)task_cpu(p),
+		       p->oomkilladj, p->comm);
 		task_unlock(p);
 	} while_each_thread(g, p);
 }
diff --git a/security/commoncap.c b/security/commoncap.c
index 61307f590003..0384bf95db68 100644
--- a/security/commoncap.c
+++ b/security/commoncap.c
@@ -51,10 +51,13 @@ EXPORT_SYMBOL(cap_netlink_recv);
  */
 int cap_capable(struct task_struct *tsk, int cap, int audit)
 {
+	__u32 cap_raised;
+
 	/* Derived from include/linux/sched.h:capable. */
-	if (cap_raised(tsk->cred->cap_effective, cap))
-		return 0;
-	return -EPERM;
+	rcu_read_lock();
+	cap_raised = cap_raised(__task_cred(tsk)->cap_effective, cap);
+	rcu_read_unlock();
+	return cap_raised ? 0 : -EPERM;
 }
 
 int cap_settime(struct timespec *ts, struct timezone *tz)
@@ -66,34 +69,42 @@ int cap_settime(struct timespec *ts, struct timezone *tz)
 
 int cap_ptrace_may_access(struct task_struct *child, unsigned int mode)
 {
-	/* Derived from arch/i386/kernel/ptrace.c:sys_ptrace. */
-	if (cap_issubset(child->cred->cap_permitted,
-			 current->cred->cap_permitted))
-		return 0;
-	if (capable(CAP_SYS_PTRACE))
-		return 0;
-	return -EPERM;
+	int ret = 0;
+
+	rcu_read_lock();
+	if (!cap_issubset(child->cred->cap_permitted,
+			  current->cred->cap_permitted) &&
+	    !capable(CAP_SYS_PTRACE))
+		ret = -EPERM;
+	rcu_read_unlock();
+	return ret;
 }
 
 int cap_ptrace_traceme(struct task_struct *parent)
 {
-	if (cap_issubset(current->cred->cap_permitted,
-			 parent->cred->cap_permitted))
-		return 0;
-	if (has_capability(parent, CAP_SYS_PTRACE))
-		return 0;
-	return -EPERM;
+	int ret = 0;
+
+	rcu_read_lock();
+	if (!cap_issubset(current->cred->cap_permitted,
+			 parent->cred->cap_permitted) &&
+	    !has_capability(parent, CAP_SYS_PTRACE))
+		ret = -EPERM;
+	rcu_read_unlock();
+	return ret;
 }
 
 int cap_capget (struct task_struct *target, kernel_cap_t *effective,
 		kernel_cap_t *inheritable, kernel_cap_t *permitted)
 {
-	struct cred *cred = target->cred;
+	const struct cred *cred;
 
 	/* Derived from kernel/capability.c:sys_capget. */
+	rcu_read_lock();
+	cred = __task_cred(target);
 	*effective   = cred->cap_effective;
 	*inheritable = cred->cap_inheritable;
 	*permitted   = cred->cap_permitted;
+	rcu_read_unlock();
 	return 0;
 }
 
@@ -433,7 +444,7 @@ void cap_bprm_apply_creds (struct linux_binprm *bprm, int unsafe)
 
 int cap_bprm_secureexec (struct linux_binprm *bprm)
 {
-	const struct cred *cred = current->cred;
+	const struct cred *cred = current_cred();
 
 	if (cred->uid != 0) {
 		if (bprm->cap_effective)
@@ -511,11 +522,11 @@ static inline void cap_emulate_setxuid (int old_ruid, int old_euid,
 	if ((old_ruid == 0 || old_euid == 0 || old_suid == 0) &&
 	    (cred->uid != 0 && cred->euid != 0 && cred->suid != 0) &&
 	    !issecure(SECURE_KEEP_CAPS)) {
-		cap_clear (cred->cap_permitted);
-		cap_clear (cred->cap_effective);
+		cap_clear(cred->cap_permitted);
+		cap_clear(cred->cap_effective);
 	}
 	if (old_euid == 0 && cred->euid != 0) {
-		cap_clear (cred->cap_effective);
+		cap_clear(cred->cap_effective);
 	}
 	if (old_euid != 0 && cred->euid == 0) {
 		cred->cap_effective = cred->cap_permitted;
@@ -582,9 +593,14 @@ int cap_task_post_setuid (uid_t old_ruid, uid_t old_euid, uid_t old_suid,
  */
 static int cap_safe_nice(struct task_struct *p)
 {
-	if (!cap_issubset(p->cred->cap_permitted,
-			  current->cred->cap_permitted) &&
-	    !capable(CAP_SYS_NICE))
+	int is_subset;
+
+	rcu_read_lock();
+	is_subset = cap_issubset(__task_cred(p)->cap_permitted,
+				 current_cred()->cap_permitted);
+	rcu_read_unlock();
+
+	if (!is_subset && !capable(CAP_SYS_NICE))
 		return -EPERM;
 	return 0;
 }
diff --git a/security/keys/permission.c b/security/keys/permission.c
index baf3d5f31e71..13c36164f284 100644
--- a/security/keys/permission.c
+++ b/security/keys/permission.c
@@ -22,13 +22,16 @@ int key_task_permission(const key_ref_t key_ref,
 			struct task_struct *context,
 			key_perm_t perm)
 {
-	struct cred *cred = context->cred;
+	const struct cred *cred;
 	struct key *key;
 	key_perm_t kperm;
 	int ret;
 
 	key = key_ref_to_ptr(key_ref);
 
+	rcu_read_lock();
+	cred = __task_cred(context);
+
 	/* use the second 8-bits of permissions for keys the caller owns */
 	if (key->uid == cred->fsuid) {
 		kperm = key->perm >> 16;
@@ -43,10 +46,7 @@ int key_task_permission(const key_ref_t key_ref,
 			goto use_these_perms;
 		}
 
-		spin_lock(&cred->lock);
 		ret = groups_search(cred->group_info, key->gid);
-		spin_unlock(&cred->lock);
-
 		if (ret) {
 			kperm = key->perm >> 8;
 			goto use_these_perms;
@@ -57,6 +57,8 @@ int key_task_permission(const key_ref_t key_ref,
 	kperm = key->perm;
 
 use_these_perms:
+	rcu_read_lock();
+
 	/* use the top 8-bits of permissions for keys the caller possesses
 	 * - possessor permissions are additive with other permissions
 	 */
diff --git a/security/keys/process_keys.c b/security/keys/process_keys.c
index ce8ac6073d57..212601ebaa46 100644
--- a/security/keys/process_keys.c
+++ b/security/keys/process_keys.c
@@ -412,10 +412,13 @@ key_ref_t search_process_keyrings(struct key_type *type,
 				  struct task_struct *context)
 {
 	struct request_key_auth *rka;
+	struct cred *cred;
 	key_ref_t key_ref, ret, err;
 
 	might_sleep();
 
+	cred = get_task_cred(context);
+
 	/* we want to return -EAGAIN or -ENOKEY if any of the keyrings were
 	 * searchable, but we failed to find a key or we found a negative key;
 	 * otherwise we want to return a sample error (probably -EACCES) if
@@ -428,9 +431,9 @@ key_ref_t search_process_keyrings(struct key_type *type,
 	err = ERR_PTR(-EAGAIN);
 
 	/* search the thread keyring first */
-	if (context->cred->thread_keyring) {
+	if (cred->thread_keyring) {
 		key_ref = keyring_search_aux(
-			make_key_ref(context->cred->thread_keyring, 1),
+			make_key_ref(cred->thread_keyring, 1),
 			context, type, description, match);
 		if (!IS_ERR(key_ref))
 			goto found;
@@ -495,9 +498,9 @@ key_ref_t search_process_keyrings(struct key_type *type,
 		}
 	}
 	/* or search the user-session keyring */
-	else if (context->cred->user->session_keyring) {
+	else if (cred->user->session_keyring) {
 		key_ref = keyring_search_aux(
-			make_key_ref(context->cred->user->session_keyring, 1),
+			make_key_ref(cred->user->session_keyring, 1),
 			context, type, description, match);
 		if (!IS_ERR(key_ref))
 			goto found;
@@ -519,20 +522,20 @@ key_ref_t search_process_keyrings(struct key_type *type,
 	 * search the keyrings of the process mentioned there
 	 * - we don't permit access to request_key auth keys via this method
 	 */
-	if (context->cred->request_key_auth &&
+	if (cred->request_key_auth &&
 	    context == current &&
 	    type != &key_type_request_key_auth
 	    ) {
 		/* defend against the auth key being revoked */
-		down_read(&context->cred->request_key_auth->sem);
+		down_read(&cred->request_key_auth->sem);
 
-		if (key_validate(context->cred->request_key_auth) == 0) {
-			rka = context->cred->request_key_auth->payload.data;
+		if (key_validate(cred->request_key_auth) == 0) {
+			rka = cred->request_key_auth->payload.data;
 
 			key_ref = search_process_keyrings(type, description,
 							  match, rka->context);
 
-			up_read(&context->cred->request_key_auth->sem);
+			up_read(&cred->request_key_auth->sem);
 
 			if (!IS_ERR(key_ref))
 				goto found;
@@ -549,7 +552,7 @@ key_ref_t search_process_keyrings(struct key_type *type,
 				break;
 			}
 		} else {
-			up_read(&context->cred->request_key_auth->sem);
+			up_read(&cred->request_key_auth->sem);
 		}
 	}
 
@@ -557,6 +560,7 @@ key_ref_t search_process_keyrings(struct key_type *type,
 	key_ref = ret ? ret : err;
 
 found:
+	put_cred(cred);
 	return key_ref;
 
 } /* end search_process_keyrings() */
diff --git a/security/selinux/selinuxfs.c b/security/selinux/selinuxfs.c
index 10715d1330b9..c86303638235 100644
--- a/security/selinux/selinuxfs.c
+++ b/security/selinux/selinuxfs.c
@@ -95,13 +95,18 @@ extern void selnl_notify_setenforce(int val);
 static int task_has_security(struct task_struct *tsk,
 			     u32 perms)
 {
-	struct task_security_struct *tsec;
-
-	tsec = tsk->cred->security;
+	const struct task_security_struct *tsec;
+	u32 sid = 0;
+
+	rcu_read_lock();
+	tsec = __task_cred(tsk)->security;
+	if (tsec)
+		sid = tsec->sid;
+	rcu_read_unlock();
 	if (!tsec)
 		return -EACCES;
 
-	return avc_has_perm(tsec->sid, SECINITSID_SECURITY,
+	return avc_has_perm(sid, SECINITSID_SECURITY,
 			    SECCLASS_SECURITY, perms, NULL);
 }
 
diff --git a/security/smack/smack_lsm.c b/security/smack/smack_lsm.c
index e8a4fcb1ad04..11167fd567b9 100644
--- a/security/smack/smack_lsm.c
+++ b/security/smack/smack_lsm.c
@@ -30,6 +30,8 @@
 
 #include "smack.h"
 
+#define task_security(task)	(task_cred_xxx((task), security))
+
 /*
  * I hope these are the hokeyist lines of code in the module. Casey.
  */
@@ -1012,7 +1014,7 @@ static void smack_cred_free(struct cred *cred)
  */
 static int smack_task_setpgid(struct task_struct *p, pid_t pgid)
 {
-	return smk_curacc(p->cred->security, MAY_WRITE);
+	return smk_curacc(task_security(p), MAY_WRITE);
 }
 
 /**
@@ -1023,7 +1025,7 @@ static int smack_task_setpgid(struct task_struct *p, pid_t pgid)
  */
 static int smack_task_getpgid(struct task_struct *p)
 {
-	return smk_curacc(p->cred->security, MAY_READ);
+	return smk_curacc(task_security(p), MAY_READ);
 }
 
 /**
@@ -1034,7 +1036,7 @@ static int smack_task_getpgid(struct task_struct *p)
  */
 static int smack_task_getsid(struct task_struct *p)
 {
-	return smk_curacc(p->cred->security, MAY_READ);
+	return smk_curacc(task_security(p), MAY_READ);
 }
 
 /**
@@ -1046,7 +1048,7 @@ static int smack_task_getsid(struct task_struct *p)
  */
 static void smack_task_getsecid(struct task_struct *p, u32 *secid)
 {
-	*secid = smack_to_secid(p->cred->security);
+	*secid = smack_to_secid(task_security(p));
 }
 
 /**
@@ -1062,7 +1064,7 @@ static int smack_task_setnice(struct task_struct *p, int nice)
 
 	rc = cap_task_setnice(p, nice);
 	if (rc == 0)
-		rc = smk_curacc(p->cred->security, MAY_WRITE);
+		rc = smk_curacc(task_security(p), MAY_WRITE);
 	return rc;
 }
 
@@ -1079,7 +1081,7 @@ static int smack_task_setioprio(struct task_struct *p, int ioprio)
 
 	rc = cap_task_setioprio(p, ioprio);
 	if (rc == 0)
-		rc = smk_curacc(p->cred->security, MAY_WRITE);
+		rc = smk_curacc(task_security(p), MAY_WRITE);
 	return rc;
 }
 
@@ -1091,7 +1093,7 @@ static int smack_task_setioprio(struct task_struct *p, int ioprio)
  */
 static int smack_task_getioprio(struct task_struct *p)
 {
-	return smk_curacc(p->cred->security, MAY_READ);
+	return smk_curacc(task_security(p), MAY_READ);
 }
 
 /**
@@ -1109,7 +1111,7 @@ static int smack_task_setscheduler(struct task_struct *p, int policy,
 
 	rc = cap_task_setscheduler(p, policy, lp);
 	if (rc == 0)
-		rc = smk_curacc(p->cred->security, MAY_WRITE);
+		rc = smk_curacc(task_security(p), MAY_WRITE);
 	return rc;
 }
 
@@ -1121,7 +1123,7 @@ static int smack_task_setscheduler(struct task_struct *p, int policy,
  */
 static int smack_task_getscheduler(struct task_struct *p)
 {
-	return smk_curacc(p->cred->security, MAY_READ);
+	return smk_curacc(task_security(p), MAY_READ);
 }
 
 /**
@@ -1132,7 +1134,7 @@ static int smack_task_getscheduler(struct task_struct *p)
  */
 static int smack_task_movememory(struct task_struct *p)
 {
-	return smk_curacc(p->cred->security, MAY_WRITE);
+	return smk_curacc(task_security(p), MAY_WRITE);
 }
 
 /**
@@ -1155,13 +1157,13 @@ static int smack_task_kill(struct task_struct *p, struct siginfo *info,
 	 * can write the receiver.
 	 */
 	if (secid == 0)
-		return smk_curacc(p->cred->security, MAY_WRITE);
+		return smk_curacc(task_security(p), MAY_WRITE);
 	/*
 	 * If the secid isn't 0 we're dealing with some USB IO
 	 * specific behavior. This is not clean. For one thing
 	 * we can't take privilege into account.
 	 */
-	return smk_access(smack_from_secid(secid), p->cred->security, MAY_WRITE);
+	return smk_access(smack_from_secid(secid), task_security(p), MAY_WRITE);
 }
 
 /**
@@ -1174,7 +1176,7 @@ static int smack_task_wait(struct task_struct *p)
 {
 	int rc;
 
-	rc = smk_access(current->cred->security, p->cred->security, MAY_WRITE);
+	rc = smk_access(current_security(), task_security(p), MAY_WRITE);
 	if (rc == 0)
 		return 0;
 
@@ -1205,7 +1207,7 @@ static int smack_task_wait(struct task_struct *p)
 static void smack_task_to_inode(struct task_struct *p, struct inode *inode)
 {
 	struct inode_smack *isp = inode->i_security;
-	isp->smk_inode = p->cred->security;
+	isp->smk_inode = task_security(p);
 }
 
 /*
@@ -2010,7 +2012,7 @@ static int smack_getprocattr(struct task_struct *p, char *name, char **value)
 	if (strcmp(name, "current") != 0)
 		return -EINVAL;
 
-	cp = kstrdup(p->cred->security, GFP_KERNEL);
+	cp = kstrdup(task_security(p), GFP_KERNEL);
 	if (cp == NULL)
 		return -ENOMEM;
 
-- 
cgit v1.2.3


From bb952bb98a7e479262c7eb25d5592545a3af147d Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Fri, 14 Nov 2008 10:39:20 +1100
Subject: CRED: Separate per-task-group keyrings from signal_struct

Separate per-task-group keyrings from signal_struct and dangle their anchor
from the cred struct rather than the signal_struct.

Signed-off-by: David Howells <dhowells@redhat.com>
Reviewed-by: James Morris <jmorris@namei.org>
Signed-off-by: James Morris <jmorris@namei.org>
---
 include/linux/cred.h         |  16 +++++++
 include/linux/key.h          |   8 +---
 include/linux/sched.h        |   6 ---
 kernel/cred.c                |  63 +++++++++++++++++++++++++++
 kernel/fork.c                |   7 ---
 security/keys/process_keys.c | 100 +++++++++++++++++--------------------------
 security/keys/request_key.c  |  34 ++++++---------
 7 files changed, 135 insertions(+), 99 deletions(-)

(limited to 'include')

diff --git a/include/linux/cred.h b/include/linux/cred.h
index 166ce4ddba64..62b9e532422d 100644
--- a/include/linux/cred.h
+++ b/include/linux/cred.h
@@ -71,6 +71,21 @@ extern int groups_search(const struct group_info *, gid_t);
 extern int in_group_p(gid_t);
 extern int in_egroup_p(gid_t);
 
+/*
+ * The common credentials for a thread group
+ * - shared by CLONE_THREAD
+ */
+#ifdef CONFIG_KEYS
+struct thread_group_cred {
+	atomic_t	usage;
+	pid_t		tgid;			/* thread group process ID */
+	spinlock_t	lock;
+	struct key	*session_keyring;	/* keyring inherited over fork */
+	struct key	*process_keyring;	/* keyring private to this process */
+	struct rcu_head	rcu;			/* RCU deletion hook */
+};
+#endif
+
 /*
  * The security context of a task
  *
@@ -114,6 +129,7 @@ struct cred {
 					 * keys to */
 	struct key	*thread_keyring; /* keyring private to this thread */
 	struct key	*request_key_auth; /* assumed request_key authority */
+	struct thread_group_cred *tgcred; /* thread-group shared credentials */
 #endif
 #ifdef CONFIG_SECURITY
 	void		*security;	/* subjective LSM security */
diff --git a/include/linux/key.h b/include/linux/key.h
index df709e1af3cd..0836cc838b0c 100644
--- a/include/linux/key.h
+++ b/include/linux/key.h
@@ -278,9 +278,7 @@ extern ctl_table key_sysctls[];
  */
 extern void switch_uid_keyring(struct user_struct *new_user);
 extern int copy_keys(unsigned long clone_flags, struct task_struct *tsk);
-extern int copy_thread_group_keys(struct task_struct *tsk);
 extern void exit_keys(struct task_struct *tsk);
-extern void exit_thread_group_keys(struct signal_struct *tg);
 extern int suid_keys(struct task_struct *tsk);
 extern int exec_keys(struct task_struct *tsk);
 extern void key_fsuid_changed(struct task_struct *tsk);
@@ -289,8 +287,8 @@ extern void key_init(void);
 
 #define __install_session_keyring(keyring)				\
 ({									\
-	struct key *old_session = current->signal->session_keyring;	\
-	current->signal->session_keyring = keyring;			\
+	struct key *old_session = current->cred->tgcred->session_keyring; \
+	current->cred->tgcred->session_keyring = keyring;		\
 	old_session;							\
 })
 
@@ -308,9 +306,7 @@ extern void key_init(void);
 #define switch_uid_keyring(u)		do { } while(0)
 #define __install_session_keyring(k)	({ NULL; })
 #define copy_keys(f,t)			0
-#define copy_thread_group_keys(t)	0
 #define exit_keys(t)			do { } while(0)
-#define exit_thread_group_keys(tg)	do { } while(0)
 #define suid_keys(t)			do { } while(0)
 #define exec_keys(t)			do { } while(0)
 #define key_fsuid_changed(t)		do { } while(0)
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 740cf946c8cc..2913252989b3 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -571,12 +571,6 @@ struct signal_struct {
 	 */
 	struct rlimit rlim[RLIM_NLIMITS];
 
-	/* keep the process-shared keyrings here so that they do the right
-	 * thing in threads created with CLONE_THREAD */
-#ifdef CONFIG_KEYS
-	struct key *session_keyring;	/* keyring inherited over fork */
-	struct key *process_keyring;	/* keyring private to this process */
-#endif
 #ifdef CONFIG_BSD_PROCESS_ACCT
 	struct pacct_struct pacct;	/* per-process accounting information */
 #endif
diff --git a/kernel/cred.c b/kernel/cred.c
index 833244a7cb05..ac73e3617684 100644
--- a/kernel/cred.c
+++ b/kernel/cred.c
@@ -16,6 +16,17 @@
 #include <linux/init_task.h>
 #include <linux/security.h>
 
+/*
+ * The common credentials for the initial task's thread group
+ */
+#ifdef CONFIG_KEYS
+static struct thread_group_cred init_tgcred = {
+	.usage	= ATOMIC_INIT(2),
+	.tgid	= 0,
+	.lock	= SPIN_LOCK_UNLOCKED,
+};
+#endif
+
 /*
  * The initial credentials for the initial task
  */
@@ -28,8 +39,41 @@ struct cred init_cred = {
 	.cap_bset		= CAP_INIT_BSET,
 	.user			= INIT_USER,
 	.group_info		= &init_groups,
+#ifdef CONFIG_KEYS
+	.tgcred			= &init_tgcred,
+#endif
 };
 
+/*
+ * Dispose of the shared task group credentials
+ */
+#ifdef CONFIG_KEYS
+static void release_tgcred_rcu(struct rcu_head *rcu)
+{
+	struct thread_group_cred *tgcred =
+		container_of(rcu, struct thread_group_cred, rcu);
+
+	BUG_ON(atomic_read(&tgcred->usage) != 0);
+
+	key_put(tgcred->session_keyring);
+	key_put(tgcred->process_keyring);
+	kfree(tgcred);
+}
+#endif
+
+/*
+ * Release a set of thread group credentials.
+ */
+static void release_tgcred(struct cred *cred)
+{
+#ifdef CONFIG_KEYS
+	struct thread_group_cred *tgcred = cred->tgcred;
+
+	if (atomic_dec_and_test(&tgcred->usage))
+		call_rcu(&tgcred->rcu, release_tgcred_rcu);
+#endif
+}
+
 /*
  * The RCU callback to actually dispose of a set of credentials
  */
@@ -41,6 +85,7 @@ static void put_cred_rcu(struct rcu_head *rcu)
 
 	key_put(cred->thread_keyring);
 	key_put(cred->request_key_auth);
+	release_tgcred(cred);
 	put_group_info(cred->group_info);
 	free_uid(cred->user);
 	security_cred_free(cred);
@@ -71,12 +116,30 @@ int copy_creds(struct task_struct *p, unsigned long clone_flags)
 	if (!pcred)
 		return -ENOMEM;
 
+#ifdef CONFIG_KEYS
+	if (clone_flags & CLONE_THREAD) {
+		atomic_inc(&pcred->tgcred->usage);
+	} else {
+		pcred->tgcred = kmalloc(sizeof(struct cred), GFP_KERNEL);
+		if (!pcred->tgcred) {
+			kfree(pcred);
+			return -ENOMEM;
+		}
+		atomic_set(&pcred->tgcred->usage, 1);
+		spin_lock_init(&pcred->tgcred->lock);
+		pcred->tgcred->process_keyring = NULL;
+		pcred->tgcred->session_keyring =
+			key_get(p->cred->tgcred->session_keyring);
+	}
+#endif
+
 #ifdef CONFIG_SECURITY
 	pcred->security = NULL;
 #endif
 
 	ret = security_cred_alloc(pcred);
 	if (ret < 0) {
+		release_tgcred(pcred);
 		kfree(pcred);
 		return ret;
 	}
diff --git a/kernel/fork.c b/kernel/fork.c
index c932e283ddfc..ded1972672a3 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -802,12 +802,6 @@ static int copy_signal(unsigned long clone_flags, struct task_struct *tsk)
 	if (!sig)
 		return -ENOMEM;
 
-	ret = copy_thread_group_keys(tsk);
-	if (ret < 0) {
-		kmem_cache_free(signal_cachep, sig);
-		return ret;
-	}
-
 	atomic_set(&sig->count, 1);
 	atomic_set(&sig->live, 1);
 	init_waitqueue_head(&sig->wait_chldexit);
@@ -852,7 +846,6 @@ static int copy_signal(unsigned long clone_flags, struct task_struct *tsk)
 void __cleanup_signal(struct signal_struct *sig)
 {
 	thread_group_cputime_free(sig);
-	exit_thread_group_keys(sig);
 	tty_kref_put(sig->tty);
 	kmem_cache_free(signal_cachep, sig);
 }
diff --git a/security/keys/process_keys.c b/security/keys/process_keys.c
index 212601ebaa46..70ee93406f30 100644
--- a/security/keys/process_keys.c
+++ b/security/keys/process_keys.c
@@ -189,7 +189,7 @@ int install_process_keyring(void)
 
 	might_sleep();
 
-	if (!tsk->signal->process_keyring) {
+	if (!tsk->cred->tgcred->process_keyring) {
 		sprintf(buf, "_pid.%u", tsk->tgid);
 
 		keyring = keyring_alloc(buf, tsk->cred->uid, tsk->cred->gid, tsk,
@@ -200,12 +200,12 @@ int install_process_keyring(void)
 		}
 
 		/* attach keyring */
-		spin_lock_irq(&tsk->sighand->siglock);
-		if (!tsk->signal->process_keyring) {
-			tsk->signal->process_keyring = keyring;
+		spin_lock_irq(&tsk->cred->tgcred->lock);
+		if (!tsk->cred->tgcred->process_keyring) {
+			tsk->cred->tgcred->process_keyring = keyring;
 			keyring = NULL;
 		}
-		spin_unlock_irq(&tsk->sighand->siglock);
+		spin_unlock_irq(&tsk->cred->tgcred->lock);
 
 		key_put(keyring);
 	}
@@ -235,11 +235,11 @@ static int install_session_keyring(struct key *keyring)
 		sprintf(buf, "_ses.%u", tsk->tgid);
 
 		flags = KEY_ALLOC_QUOTA_OVERRUN;
-		if (tsk->signal->session_keyring)
+		if (tsk->cred->tgcred->session_keyring)
 			flags = KEY_ALLOC_IN_QUOTA;
 
-		keyring = keyring_alloc(buf, tsk->cred->uid, tsk->cred->gid, tsk,
-					flags, NULL);
+		keyring = keyring_alloc(buf, tsk->cred->uid, tsk->cred->gid,
+					tsk, flags, NULL);
 		if (IS_ERR(keyring))
 			return PTR_ERR(keyring);
 	}
@@ -248,10 +248,10 @@ static int install_session_keyring(struct key *keyring)
 	}
 
 	/* install the keyring */
-	spin_lock_irq(&tsk->sighand->siglock);
-	old = tsk->signal->session_keyring;
-	rcu_assign_pointer(tsk->signal->session_keyring, keyring);
-	spin_unlock_irq(&tsk->sighand->siglock);
+	spin_lock_irq(&tsk->cred->tgcred->lock);
+	old = tsk->cred->tgcred->session_keyring;
+	rcu_assign_pointer(tsk->cred->tgcred->session_keyring, keyring);
+	spin_unlock_irq(&tsk->cred->tgcred->lock);
 
 	/* we're using RCU on the pointer, but there's no point synchronising
 	 * on it if it didn't previously point to anything */
@@ -264,28 +264,6 @@ static int install_session_keyring(struct key *keyring)
 
 } /* end install_session_keyring() */
 
-/*****************************************************************************/
-/*
- * copy the keys in a thread group for fork without CLONE_THREAD
- */
-int copy_thread_group_keys(struct task_struct *tsk)
-{
-	key_check(current->thread_group->session_keyring);
-	key_check(current->thread_group->process_keyring);
-
-	/* no process keyring yet */
-	tsk->signal->process_keyring = NULL;
-
-	/* same session keyring */
-	rcu_read_lock();
-	tsk->signal->session_keyring =
-		key_get(rcu_dereference(current->signal->session_keyring));
-	rcu_read_unlock();
-
-	return 0;
-
-} /* end copy_thread_group_keys() */
-
 /*****************************************************************************/
 /*
  * copy the keys for fork
@@ -305,17 +283,6 @@ int copy_keys(unsigned long clone_flags, struct task_struct *tsk)
 
 } /* end copy_keys() */
 
-/*****************************************************************************/
-/*
- * dispose of thread group keys upon thread group destruction
- */
-void exit_thread_group_keys(struct signal_struct *tg)
-{
-	key_put(tg->session_keyring);
-	key_put(tg->process_keyring);
-
-} /* end exit_thread_group_keys() */
-
 /*****************************************************************************/
 /*
  * dispose of per-thread keys upon thread exit
@@ -344,10 +311,10 @@ int exec_keys(struct task_struct *tsk)
 	key_put(old);
 
 	/* discard the process keyring from a newly exec'd task */
-	spin_lock_irq(&tsk->sighand->siglock);
-	old = tsk->signal->process_keyring;
-	tsk->signal->process_keyring = NULL;
-	spin_unlock_irq(&tsk->sighand->siglock);
+	spin_lock_irq(&tsk->cred->tgcred->lock);
+	old = tsk->cred->tgcred->process_keyring;
+	tsk->cred->tgcred->process_keyring = NULL;
+	spin_unlock_irq(&tsk->cred->tgcred->lock);
 
 	key_put(old);
 
@@ -452,9 +419,9 @@ key_ref_t search_process_keyrings(struct key_type *type,
 	}
 
 	/* search the process keyring second */
-	if (context->signal->process_keyring) {
+	if (cred->tgcred->process_keyring) {
 		key_ref = keyring_search_aux(
-			make_key_ref(context->signal->process_keyring, 1),
+			make_key_ref(cred->tgcred->process_keyring, 1),
 			context, type, description, match);
 		if (!IS_ERR(key_ref))
 			goto found;
@@ -473,11 +440,11 @@ key_ref_t search_process_keyrings(struct key_type *type,
 	}
 
 	/* search the session keyring */
-	if (context->signal->session_keyring) {
+	if (cred->tgcred->session_keyring) {
 		rcu_read_lock();
 		key_ref = keyring_search_aux(
 			make_key_ref(rcu_dereference(
-					     context->signal->session_keyring),
+					     cred->tgcred->session_keyring),
 				     1),
 			context, type, description, match);
 		rcu_read_unlock();
@@ -586,11 +553,13 @@ key_ref_t lookup_user_key(key_serial_t id, int create, int partial,
 {
 	struct request_key_auth *rka;
 	struct task_struct *t = current;
-	struct cred *cred = current_cred();
+	struct cred *cred;
 	struct key *key;
 	key_ref_t key_ref, skey_ref;
 	int ret;
 
+try_again:
+	cred = get_current_cred();
 	key_ref = ERR_PTR(-ENOKEY);
 
 	switch (id) {
@@ -604,6 +573,7 @@ key_ref_t lookup_user_key(key_serial_t id, int create, int partial,
 				key = ERR_PTR(ret);
 				goto error;
 			}
+			goto reget_creds;
 		}
 
 		key = cred->thread_keyring;
@@ -612,7 +582,7 @@ key_ref_t lookup_user_key(key_serial_t id, int create, int partial,
 		break;
 
 	case KEY_SPEC_PROCESS_KEYRING:
-		if (!t->signal->process_keyring) {
+		if (!cred->tgcred->process_keyring) {
 			if (!create)
 				goto error;
 
@@ -621,15 +591,16 @@ key_ref_t lookup_user_key(key_serial_t id, int create, int partial,
 				key = ERR_PTR(ret);
 				goto error;
 			}
+			goto reget_creds;
 		}
 
-		key = t->signal->process_keyring;
+		key = cred->tgcred->process_keyring;
 		atomic_inc(&key->usage);
 		key_ref = make_key_ref(key, 1);
 		break;
 
 	case KEY_SPEC_SESSION_KEYRING:
-		if (!t->signal->session_keyring) {
+		if (!cred->tgcred->session_keyring) {
 			/* always install a session keyring upon access if one
 			 * doesn't exist yet */
 			ret = install_user_keyrings();
@@ -639,10 +610,11 @@ key_ref_t lookup_user_key(key_serial_t id, int create, int partial,
 				cred->user->session_keyring);
 			if (ret < 0)
 				goto error;
+			goto reget_creds;
 		}
 
 		rcu_read_lock();
-		key = rcu_dereference(t->signal->session_keyring);
+		key = rcu_dereference(cred->tgcred->session_keyring);
 		atomic_inc(&key->usage);
 		rcu_read_unlock();
 		key_ref = make_key_ref(key, 1);
@@ -758,6 +730,7 @@ key_ref_t lookup_user_key(key_serial_t id, int create, int partial,
 		goto invalid_key;
 
 error:
+	put_cred(cred);
 	return key_ref;
 
 invalid_key:
@@ -765,6 +738,12 @@ invalid_key:
 	key_ref = ERR_PTR(ret);
 	goto error;
 
+	/* if we attempted to install a keyring, then it may have caused new
+	 * creds to be installed */
+reget_creds:
+	put_cred(cred);
+	goto try_again;
+
 } /* end lookup_user_key() */
 
 /*****************************************************************************/
@@ -777,6 +756,7 @@ invalid_key:
 long join_session_keyring(const char *name)
 {
 	struct task_struct *tsk = current;
+	struct cred *cred = current->cred;
 	struct key *keyring;
 	long ret;
 
@@ -787,7 +767,7 @@ long join_session_keyring(const char *name)
 			goto error;
 
 		rcu_read_lock();
-		ret = rcu_dereference(tsk->signal->session_keyring)->serial;
+		ret = rcu_dereference(cred->tgcred->session_keyring)->serial;
 		rcu_read_unlock();
 		goto error;
 	}
@@ -799,7 +779,7 @@ long join_session_keyring(const char *name)
 	keyring = find_keyring_by_name(name, false);
 	if (PTR_ERR(keyring) == -ENOKEY) {
 		/* not found - try and create a new one */
-		keyring = keyring_alloc(name, tsk->cred->uid, tsk->cred->gid, tsk,
+		keyring = keyring_alloc(name, cred->uid, cred->gid, tsk,
 					KEY_ALLOC_IN_QUOTA, NULL);
 		if (IS_ERR(keyring)) {
 			ret = PTR_ERR(keyring);
diff --git a/security/keys/request_key.c b/security/keys/request_key.c
index 0488b0af5bd6..3d12558362df 100644
--- a/security/keys/request_key.c
+++ b/security/keys/request_key.c
@@ -66,7 +66,6 @@ static int call_sbin_request_key(struct key_construction *cons,
 				 const char *op,
 				 void *aux)
 {
-	struct task_struct *tsk = current;
 	const struct cred *cred = current_cred();
 	key_serial_t prkey, sskey;
 	struct key *key = cons->key, *authkey = cons->authkey, *keyring;
@@ -109,18 +108,13 @@ static int call_sbin_request_key(struct key_construction *cons,
 		cred->thread_keyring->serial : 0);
 
 	prkey = 0;
-	if (tsk->signal->process_keyring)
-		prkey = tsk->signal->process_keyring->serial;
+	if (cred->tgcred->process_keyring)
+		prkey = cred->tgcred->process_keyring->serial;
 
-	sprintf(keyring_str[1], "%d", prkey);
-
-	if (tsk->signal->session_keyring) {
-		rcu_read_lock();
-		sskey = rcu_dereference(tsk->signal->session_keyring)->serial;
-		rcu_read_unlock();
-	} else {
+	if (cred->tgcred->session_keyring)
+		sskey = rcu_dereference(cred->tgcred->session_keyring)->serial;
+	else
 		sskey = cred->user->session_keyring->serial;
-	}
 
 	sprintf(keyring_str[2], "%d", sskey);
 
@@ -222,7 +216,7 @@ static int construct_key(struct key *key, const void *callout_info,
 static void construct_get_dest_keyring(struct key **_dest_keyring)
 {
 	struct request_key_auth *rka;
-	struct task_struct *tsk = current;
+	const struct cred *cred = current_cred();
 	struct key *dest_keyring = *_dest_keyring, *authkey;
 
 	kenter("%p", dest_keyring);
@@ -234,11 +228,11 @@ static void construct_get_dest_keyring(struct key **_dest_keyring)
 	} else {
 		/* use a default keyring; falling through the cases until we
 		 * find one that we actually have */
-		switch (tsk->cred->jit_keyring) {
+		switch (cred->jit_keyring) {
 		case KEY_REQKEY_DEFL_DEFAULT:
 		case KEY_REQKEY_DEFL_REQUESTOR_KEYRING:
-			if (tsk->cred->request_key_auth) {
-				authkey = tsk->cred->request_key_auth;
+			if (cred->request_key_auth) {
+				authkey = cred->request_key_auth;
 				down_read(&authkey->sem);
 				rka = authkey->payload.data;
 				if (!test_bit(KEY_FLAG_REVOKED,
@@ -251,19 +245,19 @@ static void construct_get_dest_keyring(struct key **_dest_keyring)
 			}
 
 		case KEY_REQKEY_DEFL_THREAD_KEYRING:
-			dest_keyring = key_get(tsk->cred->thread_keyring);
+			dest_keyring = key_get(cred->thread_keyring);
 			if (dest_keyring)
 				break;
 
 		case KEY_REQKEY_DEFL_PROCESS_KEYRING:
-			dest_keyring = key_get(tsk->signal->process_keyring);
+			dest_keyring = key_get(cred->tgcred->process_keyring);
 			if (dest_keyring)
 				break;
 
 		case KEY_REQKEY_DEFL_SESSION_KEYRING:
 			rcu_read_lock();
 			dest_keyring = key_get(
-				rcu_dereference(tsk->signal->session_keyring));
+				rcu_dereference(cred->tgcred->session_keyring));
 			rcu_read_unlock();
 
 			if (dest_keyring)
@@ -271,11 +265,11 @@ static void construct_get_dest_keyring(struct key **_dest_keyring)
 
 		case KEY_REQKEY_DEFL_USER_SESSION_KEYRING:
 			dest_keyring =
-				key_get(tsk->cred->user->session_keyring);
+				key_get(cred->user->session_keyring);
 			break;
 
 		case KEY_REQKEY_DEFL_USER_KEYRING:
-			dest_keyring = key_get(tsk->cred->user->uid_keyring);
+			dest_keyring = key_get(cred->user->uid_keyring);
 			break;
 
 		case KEY_REQKEY_DEFL_GROUP_KEYRING:
-- 
cgit v1.2.3


From 745ca2475a6ac596e3d8d37c2759c0fbe2586227 Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Fri, 14 Nov 2008 10:39:22 +1100
Subject: CRED: Pass credentials through dentry_open()

Pass credentials through dentry_open() so that the COW creds patch can have
SELinux's flush_unauthorized_files() pass the appropriate creds back to itself
when it opens its null chardev.

The security_dentry_open() call also now takes a creds pointer, as does the
dentry_open hook in struct security_operations.

Signed-off-by: David Howells <dhowells@redhat.com>
Acked-by: James Morris <jmorris@namei.org>
Signed-off-by: James Morris <jmorris@namei.org>
---
 arch/powerpc/platforms/cell/spufs/inode.c |  4 ++--
 arch/um/drivers/mconsole_kern.c           |  3 ++-
 fs/autofs4/dev-ioctl.c                    |  3 ++-
 fs/ecryptfs/ecryptfs_kernel.h             |  3 ++-
 fs/ecryptfs/kthread.c                     |  9 +++++----
 fs/ecryptfs/main.c                        |  3 ++-
 fs/exportfs/expfs.c                       |  4 +++-
 fs/hppfs/hppfs.c                          |  6 ++++--
 fs/nfsctl.c                               |  3 ++-
 fs/nfsd/nfs4recover.c                     |  3 ++-
 fs/nfsd/vfs.c                             |  3 ++-
 fs/open.c                                 | 17 +++++++++++------
 fs/xfs/linux-2.6/xfs_ioctl.c              |  3 ++-
 include/linux/fs.h                        |  4 +++-
 include/linux/security.h                  |  7 ++++---
 ipc/mqueue.c                              | 11 +++++++----
 security/capability.c                     |  2 +-
 security/security.c                       |  4 ++--
 security/selinux/hooks.c                  | 15 +++++++++------
 19 files changed, 67 insertions(+), 40 deletions(-)

(limited to 'include')

diff --git a/arch/powerpc/platforms/cell/spufs/inode.c b/arch/powerpc/platforms/cell/spufs/inode.c
index e128ce7f0993..6296bfd9cb0b 100644
--- a/arch/powerpc/platforms/cell/spufs/inode.c
+++ b/arch/powerpc/platforms/cell/spufs/inode.c
@@ -323,7 +323,7 @@ static int spufs_context_open(struct dentry *dentry, struct vfsmount *mnt)
 		goto out;
 	}
 
-	filp = dentry_open(dentry, mnt, O_RDONLY);
+	filp = dentry_open(dentry, mnt, O_RDONLY, current_cred());
 	if (IS_ERR(filp)) {
 		put_unused_fd(ret);
 		ret = PTR_ERR(filp);
@@ -562,7 +562,7 @@ static int spufs_gang_open(struct dentry *dentry, struct vfsmount *mnt)
 		goto out;
 	}
 
-	filp = dentry_open(dentry, mnt, O_RDONLY);
+	filp = dentry_open(dentry, mnt, O_RDONLY, current_cred());
 	if (IS_ERR(filp)) {
 		put_unused_fd(ret);
 		ret = PTR_ERR(filp);
diff --git a/arch/um/drivers/mconsole_kern.c b/arch/um/drivers/mconsole_kern.c
index 19d579d74d27..16d3b3789a50 100644
--- a/arch/um/drivers/mconsole_kern.c
+++ b/arch/um/drivers/mconsole_kern.c
@@ -159,7 +159,8 @@ void mconsole_proc(struct mc_request *req)
 		goto out_kill;
 	}
 
-	file = dentry_open(nd.path.dentry, nd.path.mnt, O_RDONLY);
+	file = dentry_open(nd.path.dentry, nd.path.mnt, O_RDONLY,
+			   current_cred());
 	if (IS_ERR(file)) {
 		mconsole_reply(req, "Failed to open file", 1, 0);
 		goto out_kill;
diff --git a/fs/autofs4/dev-ioctl.c b/fs/autofs4/dev-ioctl.c
index 625abf5422e2..ec16255d27dd 100644
--- a/fs/autofs4/dev-ioctl.c
+++ b/fs/autofs4/dev-ioctl.c
@@ -307,7 +307,8 @@ static int autofs_dev_ioctl_open_mountpoint(const char *path, dev_t devid)
 			goto out;
 		}
 
-		filp = dentry_open(nd.path.dentry, nd.path.mnt, O_RDONLY);
+		filp = dentry_open(nd.path.dentry, nd.path.mnt, O_RDONLY,
+				   current_cred());
 		if (IS_ERR(filp)) {
 			err = PTR_ERR(filp);
 			goto out;
diff --git a/fs/ecryptfs/ecryptfs_kernel.h b/fs/ecryptfs/ecryptfs_kernel.h
index 3504cf9df358..a75026d35d16 100644
--- a/fs/ecryptfs/ecryptfs_kernel.h
+++ b/fs/ecryptfs/ecryptfs_kernel.h
@@ -691,7 +691,8 @@ int ecryptfs_init_kthread(void);
 void ecryptfs_destroy_kthread(void);
 int ecryptfs_privileged_open(struct file **lower_file,
 			     struct dentry *lower_dentry,
-			     struct vfsmount *lower_mnt);
+			     struct vfsmount *lower_mnt,
+			     const struct cred *cred);
 int ecryptfs_init_persistent_file(struct dentry *ecryptfs_dentry);
 
 #endif /* #ifndef ECRYPTFS_KERNEL_H */
diff --git a/fs/ecryptfs/kthread.c b/fs/ecryptfs/kthread.c
index c440c6b58b2d..c6d7a4d748a0 100644
--- a/fs/ecryptfs/kthread.c
+++ b/fs/ecryptfs/kthread.c
@@ -73,7 +73,7 @@ static int ecryptfs_threadfn(void *ignored)
 				mntget(req->lower_mnt);
 				(*req->lower_file) = dentry_open(
 					req->lower_dentry, req->lower_mnt,
-					(O_RDWR | O_LARGEFILE));
+					(O_RDWR | O_LARGEFILE), current_cred());
 				req->flags |= ECRYPTFS_REQ_PROCESSED;
 			}
 			wake_up(&req->wait);
@@ -132,7 +132,8 @@ void ecryptfs_destroy_kthread(void)
  */
 int ecryptfs_privileged_open(struct file **lower_file,
 			     struct dentry *lower_dentry,
-			     struct vfsmount *lower_mnt)
+			     struct vfsmount *lower_mnt,
+			     const struct cred *cred)
 {
 	struct ecryptfs_open_req *req;
 	int rc = 0;
@@ -143,7 +144,7 @@ int ecryptfs_privileged_open(struct file **lower_file,
 	dget(lower_dentry);
 	mntget(lower_mnt);
 	(*lower_file) = dentry_open(lower_dentry, lower_mnt,
-				    (O_RDWR | O_LARGEFILE));
+				    (O_RDWR | O_LARGEFILE), cred);
 	if (!IS_ERR(*lower_file))
 		goto out;
 	req = kmem_cache_alloc(ecryptfs_open_req_cache, GFP_KERNEL);
@@ -184,7 +185,7 @@ int ecryptfs_privileged_open(struct file **lower_file,
 		dget(lower_dentry);
 		mntget(lower_mnt);
 		(*lower_file) = dentry_open(lower_dentry, lower_mnt,
-					    (O_RDONLY | O_LARGEFILE));
+					    (O_RDONLY | O_LARGEFILE), cred);
 		if (IS_ERR(*lower_file)) {
 			rc = PTR_ERR(*req->lower_file);
 			(*lower_file) = NULL;
diff --git a/fs/ecryptfs/main.c b/fs/ecryptfs/main.c
index 64d2ba980df4..fd630713c5c7 100644
--- a/fs/ecryptfs/main.c
+++ b/fs/ecryptfs/main.c
@@ -115,6 +115,7 @@ void __ecryptfs_printk(const char *fmt, ...)
  */
 int ecryptfs_init_persistent_file(struct dentry *ecryptfs_dentry)
 {
+	const struct cred *cred = current_cred();
 	struct ecryptfs_inode_info *inode_info =
 		ecryptfs_inode_to_private(ecryptfs_dentry->d_inode);
 	int rc = 0;
@@ -127,7 +128,7 @@ int ecryptfs_init_persistent_file(struct dentry *ecryptfs_dentry)
 
 		lower_dentry = ecryptfs_dentry_to_lower(ecryptfs_dentry);
 		rc = ecryptfs_privileged_open(&inode_info->lower_file,
-						     lower_dentry, lower_mnt);
+					      lower_dentry, lower_mnt, cred);
 		if (rc || IS_ERR(inode_info->lower_file)) {
 			printk(KERN_ERR "Error opening lower persistent file "
 			       "for lower_dentry [0x%p] and lower_mnt [0x%p]; "
diff --git a/fs/exportfs/expfs.c b/fs/exportfs/expfs.c
index 80246bad1b7f..ec1fb918200f 100644
--- a/fs/exportfs/expfs.c
+++ b/fs/exportfs/expfs.c
@@ -14,6 +14,7 @@
 #include <linux/module.h>
 #include <linux/mount.h>
 #include <linux/namei.h>
+#include <linux/sched.h>
 
 #define dprintk(fmt, args...) do{}while(0)
 
@@ -249,6 +250,7 @@ static int filldir_one(void * __buf, const char * name, int len,
 static int get_name(struct vfsmount *mnt, struct dentry *dentry,
 		char *name, struct dentry *child)
 {
+	const struct cred *cred = current_cred();
 	struct inode *dir = dentry->d_inode;
 	int error;
 	struct file *file;
@@ -263,7 +265,7 @@ static int get_name(struct vfsmount *mnt, struct dentry *dentry,
 	/*
 	 * Open the directory ...
 	 */
-	file = dentry_open(dget(dentry), mntget(mnt), O_RDONLY);
+	file = dentry_open(dget(dentry), mntget(mnt), O_RDONLY, cred);
 	error = PTR_ERR(file);
 	if (IS_ERR(file))
 		goto out;
diff --git a/fs/hppfs/hppfs.c b/fs/hppfs/hppfs.c
index 2b3d1828db99..795e2c130ad7 100644
--- a/fs/hppfs/hppfs.c
+++ b/fs/hppfs/hppfs.c
@@ -426,6 +426,7 @@ static int file_mode(int fmode)
 
 static int hppfs_open(struct inode *inode, struct file *file)
 {
+	const struct cred *cred = current_cred();
 	struct hppfs_private *data;
 	struct vfsmount *proc_mnt;
 	struct dentry *proc_dentry;
@@ -446,7 +447,7 @@ static int hppfs_open(struct inode *inode, struct file *file)
 
 	/* XXX This isn't closed anywhere */
 	data->proc_file = dentry_open(dget(proc_dentry), mntget(proc_mnt),
-				      file_mode(file->f_mode));
+				      file_mode(file->f_mode), cred);
 	err = PTR_ERR(data->proc_file);
 	if (IS_ERR(data->proc_file))
 		goto out_free1;
@@ -489,6 +490,7 @@ static int hppfs_open(struct inode *inode, struct file *file)
 
 static int hppfs_dir_open(struct inode *inode, struct file *file)
 {
+	const struct cred *cred = current_cred();
 	struct hppfs_private *data;
 	struct vfsmount *proc_mnt;
 	struct dentry *proc_dentry;
@@ -502,7 +504,7 @@ static int hppfs_dir_open(struct inode *inode, struct file *file)
 	proc_dentry = HPPFS_I(inode)->proc_dentry;
 	proc_mnt = inode->i_sb->s_fs_info;
 	data->proc_file = dentry_open(dget(proc_dentry), mntget(proc_mnt),
-				      file_mode(file->f_mode));
+				      file_mode(file->f_mode), cred);
 	err = PTR_ERR(data->proc_file);
 	if (IS_ERR(data->proc_file))
 		goto out_free;
diff --git a/fs/nfsctl.c b/fs/nfsctl.c
index aed8145d9087..cc4ef2642a51 100644
--- a/fs/nfsctl.c
+++ b/fs/nfsctl.c
@@ -41,7 +41,8 @@ static struct file *do_open(char *name, int flags)
 		error = may_open(&nd, MAY_WRITE, FMODE_WRITE);
 
 	if (!error)
-		return dentry_open(nd.path.dentry, nd.path.mnt, flags);
+		return dentry_open(nd.path.dentry, nd.path.mnt, flags,
+				   current_cred());
 
 	path_put(&nd.path);
 	return ERR_PTR(error);
diff --git a/fs/nfsd/nfs4recover.c b/fs/nfsd/nfs4recover.c
index a5e14e8695ea..632a50b4b371 100644
--- a/fs/nfsd/nfs4recover.c
+++ b/fs/nfsd/nfs4recover.c
@@ -226,7 +226,8 @@ nfsd4_list_rec_dir(struct dentry *dir, recdir_func *f)
 
 	nfs4_save_user(&uid, &gid);
 
-	filp = dentry_open(dget(dir), mntget(rec_dir.mnt), O_RDONLY);
+	filp = dentry_open(dget(dir), mntget(rec_dir.mnt), O_RDONLY,
+			   current_cred());
 	status = PTR_ERR(filp);
 	if (IS_ERR(filp))
 		goto out;
diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c
index 890d9a68c852..b59ec5a6ed24 100644
--- a/fs/nfsd/vfs.c
+++ b/fs/nfsd/vfs.c
@@ -671,6 +671,7 @@ __be32
 nfsd_open(struct svc_rqst *rqstp, struct svc_fh *fhp, int type,
 			int access, struct file **filp)
 {
+	const struct cred *cred = current_cred();
 	struct dentry	*dentry;
 	struct inode	*inode;
 	int		flags = O_RDONLY|O_LARGEFILE;
@@ -725,7 +726,7 @@ nfsd_open(struct svc_rqst *rqstp, struct svc_fh *fhp, int type,
 		DQUOT_INIT(inode);
 	}
 	*filp = dentry_open(dget(dentry), mntget(fhp->fh_export->ex_path.mnt),
-				flags);
+			    flags, cred);
 	if (IS_ERR(*filp))
 		host_err = PTR_ERR(*filp);
 out_nfserr:
diff --git a/fs/open.c b/fs/open.c
index b1238e195e7e..f96eaab280a3 100644
--- a/fs/open.c
+++ b/fs/open.c
@@ -783,7 +783,8 @@ static inline int __get_file_write_access(struct inode *inode,
 
 static struct file *__dentry_open(struct dentry *dentry, struct vfsmount *mnt,
 					int flags, struct file *f,
-					int (*open)(struct inode *, struct file *))
+					int (*open)(struct inode *, struct file *),
+					const struct cred *cred)
 {
 	struct inode *inode;
 	int error;
@@ -807,7 +808,7 @@ static struct file *__dentry_open(struct dentry *dentry, struct vfsmount *mnt,
 	f->f_op = fops_get(inode->i_fop);
 	file_move(f, &inode->i_sb->s_files);
 
-	error = security_dentry_open(f);
+	error = security_dentry_open(f, cred);
 	if (error)
 		goto cleanup_all;
 
@@ -882,6 +883,8 @@ cleanup_file:
 struct file *lookup_instantiate_filp(struct nameidata *nd, struct dentry *dentry,
 		int (*open)(struct inode *, struct file *))
 {
+	const struct cred *cred = current_cred();
+
 	if (IS_ERR(nd->intent.open.file))
 		goto out;
 	if (IS_ERR(dentry))
@@ -889,7 +892,7 @@ struct file *lookup_instantiate_filp(struct nameidata *nd, struct dentry *dentry
 	nd->intent.open.file = __dentry_open(dget(dentry), mntget(nd->path.mnt),
 					     nd->intent.open.flags - 1,
 					     nd->intent.open.file,
-					     open);
+					     open, cred);
 out:
 	return nd->intent.open.file;
 out_err:
@@ -908,6 +911,7 @@ EXPORT_SYMBOL_GPL(lookup_instantiate_filp);
  */
 struct file *nameidata_to_filp(struct nameidata *nd, int flags)
 {
+	const struct cred *cred = current_cred();
 	struct file *filp;
 
 	/* Pick up the filp from the open intent */
@@ -915,7 +919,7 @@ struct file *nameidata_to_filp(struct nameidata *nd, int flags)
 	/* Has the filesystem initialised the file for us? */
 	if (filp->f_path.dentry == NULL)
 		filp = __dentry_open(nd->path.dentry, nd->path.mnt, flags, filp,
-				     NULL);
+				     NULL, cred);
 	else
 		path_put(&nd->path);
 	return filp;
@@ -925,7 +929,8 @@ struct file *nameidata_to_filp(struct nameidata *nd, int flags)
  * dentry_open() will have done dput(dentry) and mntput(mnt) if it returns an
  * error.
  */
-struct file *dentry_open(struct dentry *dentry, struct vfsmount *mnt, int flags)
+struct file *dentry_open(struct dentry *dentry, struct vfsmount *mnt, int flags,
+			 const struct cred *cred)
 {
 	int error;
 	struct file *f;
@@ -950,7 +955,7 @@ struct file *dentry_open(struct dentry *dentry, struct vfsmount *mnt, int flags)
 		return ERR_PTR(error);
 	}
 
-	return __dentry_open(dentry, mnt, flags, f, NULL);
+	return __dentry_open(dentry, mnt, flags, f, NULL, cred);
 }
 EXPORT_SYMBOL(dentry_open);
 
diff --git a/fs/xfs/linux-2.6/xfs_ioctl.c b/fs/xfs/linux-2.6/xfs_ioctl.c
index 67c72aec97e6..281cbd5a25cf 100644
--- a/fs/xfs/linux-2.6/xfs_ioctl.c
+++ b/fs/xfs/linux-2.6/xfs_ioctl.c
@@ -256,6 +256,7 @@ xfs_open_by_handle(
 	struct file		*parfilp,
 	struct inode		*parinode)
 {
+	const struct cred	*cred = current_cred();
 	int			error;
 	int			new_fd;
 	int			permflag;
@@ -321,7 +322,7 @@ xfs_open_by_handle(
 	mntget(parfilp->f_path.mnt);
 
 	/* Create file pointer. */
-	filp = dentry_open(dentry, parfilp->f_path.mnt, hreq.oflags);
+	filp = dentry_open(dentry, parfilp->f_path.mnt, hreq.oflags, cred);
 	if (IS_ERR(filp)) {
 		put_unused_fd(new_fd);
 		return -XFS_ERROR(-PTR_ERR(filp));
diff --git a/include/linux/fs.h b/include/linux/fs.h
index b3d404aaabed..3bfec1327b8d 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -315,6 +315,7 @@ struct poll_table_struct;
 struct kstatfs;
 struct vm_area_struct;
 struct vfsmount;
+struct cred;
 
 extern void __init inode_init(void);
 extern void __init inode_init_early(void);
@@ -1673,7 +1674,8 @@ extern int do_truncate(struct dentry *, loff_t start, unsigned int time_attrs,
 extern long do_sys_open(int dfd, const char __user *filename, int flags,
 			int mode);
 extern struct file *filp_open(const char *, int, int);
-extern struct file * dentry_open(struct dentry *, struct vfsmount *, int);
+extern struct file * dentry_open(struct dentry *, struct vfsmount *, int,
+				 const struct cred *);
 extern int filp_close(struct file *, fl_owner_t id);
 extern char * getname(const char __user *);
 
diff --git a/include/linux/security.h b/include/linux/security.h
index 9239cc11eb9c..7e9fe046a0d1 100644
--- a/include/linux/security.h
+++ b/include/linux/security.h
@@ -1402,7 +1402,7 @@ struct security_operations {
 	int (*file_send_sigiotask) (struct task_struct *tsk,
 				    struct fown_struct *fown, int sig);
 	int (*file_receive) (struct file *file);
-	int (*dentry_open) (struct file *file);
+	int (*dentry_open) (struct file *file, const struct cred *cred);
 
 	int (*task_create) (unsigned long clone_flags);
 	int (*cred_alloc_security) (struct cred *cred);
@@ -1658,7 +1658,7 @@ int security_file_set_fowner(struct file *file);
 int security_file_send_sigiotask(struct task_struct *tsk,
 				 struct fown_struct *fown, int sig);
 int security_file_receive(struct file *file);
-int security_dentry_open(struct file *file);
+int security_dentry_open(struct file *file, const struct cred *cred);
 int security_task_create(unsigned long clone_flags);
 int security_cred_alloc(struct cred *cred);
 void security_cred_free(struct cred *cred);
@@ -2171,7 +2171,8 @@ static inline int security_file_receive(struct file *file)
 	return 0;
 }
 
-static inline int security_dentry_open(struct file *file)
+static inline int security_dentry_open(struct file *file,
+				       const struct cred *cred)
 {
 	return 0;
 }
diff --git a/ipc/mqueue.c b/ipc/mqueue.c
index 1151881ccb9a..d9393f8e4c3e 100644
--- a/ipc/mqueue.c
+++ b/ipc/mqueue.c
@@ -594,6 +594,7 @@ static int mq_attr_ok(struct mq_attr *attr)
 static struct file *do_create(struct dentry *dir, struct dentry *dentry,
 			int oflag, mode_t mode, struct mq_attr __user *u_attr)
 {
+	const struct cred *cred = current_cred();
 	struct mq_attr attr;
 	struct file *result;
 	int ret;
@@ -618,7 +619,7 @@ static struct file *do_create(struct dentry *dir, struct dentry *dentry,
 	if (ret)
 		goto out_drop_write;
 
-	result = dentry_open(dentry, mqueue_mnt, oflag);
+	result = dentry_open(dentry, mqueue_mnt, oflag, cred);
 	/*
 	 * dentry_open() took a persistent mnt_want_write(),
 	 * so we can now drop this one.
@@ -637,8 +638,10 @@ out:
 /* Opens existing queue */
 static struct file *do_open(struct dentry *dentry, int oflag)
 {
-static int oflag2acc[O_ACCMODE] = { MAY_READ, MAY_WRITE,
-					MAY_READ | MAY_WRITE };
+	const struct cred *cred = current_cred();
+
+	static const int oflag2acc[O_ACCMODE] = { MAY_READ, MAY_WRITE,
+						  MAY_READ | MAY_WRITE };
 
 	if ((oflag & O_ACCMODE) == (O_RDWR | O_WRONLY)) {
 		dput(dentry);
@@ -652,7 +655,7 @@ static int oflag2acc[O_ACCMODE] = { MAY_READ, MAY_WRITE,
 		return ERR_PTR(-EACCES);
 	}
 
-	return dentry_open(dentry, mqueue_mnt, oflag);
+	return dentry_open(dentry, mqueue_mnt, oflag, cred);
 }
 
 asmlinkage long sys_mq_open(const char __user *u_name, int oflag, mode_t mode,
diff --git a/security/capability.c b/security/capability.c
index 6c4b5137ca7b..fac2f61b69a9 100644
--- a/security/capability.c
+++ b/security/capability.c
@@ -330,7 +330,7 @@ static int cap_file_receive(struct file *file)
 	return 0;
 }
 
-static int cap_dentry_open(struct file *file)
+static int cap_dentry_open(struct file *file, const struct cred *cred)
 {
 	return 0;
 }
diff --git a/security/security.c b/security/security.c
index d058f7d5b10a..f40a0a04c3c2 100644
--- a/security/security.c
+++ b/security/security.c
@@ -606,9 +606,9 @@ int security_file_receive(struct file *file)
 	return security_ops->file_receive(file);
 }
 
-int security_dentry_open(struct file *file)
+int security_dentry_open(struct file *file, const struct cred *cred)
 {
-	return security_ops->dentry_open(file);
+	return security_ops->dentry_open(file, cred);
 }
 
 int security_task_create(unsigned long clone_flags)
diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c
index cc6e5a3f10cc..f20cbd681ba6 100644
--- a/security/selinux/hooks.c
+++ b/security/selinux/hooks.c
@@ -2150,9 +2150,9 @@ extern struct vfsmount *selinuxfs_mount;
 extern struct dentry *selinux_null;
 
 /* Derived from fs/exec.c:flush_old_files. */
-static inline void flush_unauthorized_files(struct files_struct *files)
+static inline void flush_unauthorized_files(const struct cred *cred,
+					    struct files_struct *files)
 {
-	const struct cred *cred = current_cred();
 	struct avc_audit_data ad;
 	struct file *file, *devnull = NULL;
 	struct tty_struct *tty;
@@ -2222,7 +2222,10 @@ static inline void flush_unauthorized_files(struct files_struct *files)
 					if (devnull) {
 						get_file(devnull);
 					} else {
-						devnull = dentry_open(dget(selinux_null), mntget(selinuxfs_mount), O_RDWR);
+						devnull = dentry_open(
+							dget(selinux_null),
+							mntget(selinuxfs_mount),
+							O_RDWR, cred);
 						if (IS_ERR(devnull)) {
 							devnull = NULL;
 							put_unused_fd(fd);
@@ -2302,6 +2305,7 @@ static void selinux_bprm_apply_creds(struct linux_binprm *bprm, int unsafe)
  */
 static void selinux_bprm_post_apply_creds(struct linux_binprm *bprm)
 {
+	const struct cred *cred = current_cred();
 	struct task_security_struct *tsec;
 	struct rlimit *rlim, *initrlim;
 	struct itimerval itimer;
@@ -2321,7 +2325,7 @@ static void selinux_bprm_post_apply_creds(struct linux_binprm *bprm)
 		return;
 
 	/* Close files for which the new task SID is not authorized. */
-	flush_unauthorized_files(current->files);
+	flush_unauthorized_files(cred, current->files);
 
 	/* Check whether the new SID can inherit signal state
 	   from the old SID.  If not, clear itimers to avoid
@@ -3202,9 +3206,8 @@ static int selinux_file_receive(struct file *file)
 	return file_has_perm(cred, file, file_to_av(file));
 }
 
-static int selinux_dentry_open(struct file *file)
+static int selinux_dentry_open(struct file *file, const struct cred *cred)
 {
-	const struct cred *cred = current_cred();
 	struct file_security_struct *fsec;
 	struct inode *inode;
 	struct inode_security_struct *isec;
-- 
cgit v1.2.3


From d84f4f992cbd76e8f39c488cf0c5d123843923b1 Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Fri, 14 Nov 2008 10:39:23 +1100
Subject: CRED: Inaugurate COW credentials

Inaugurate copy-on-write credentials management.  This uses RCU to manage the
credentials pointer in the task_struct with respect to accesses by other tasks.
A process may only modify its own credentials, and so does not need locking to
access or modify its own credentials.

A mutex (cred_replace_mutex) is added to the task_struct to control the effect
of PTRACE_ATTACHED on credential calculations, particularly with respect to
execve().

With this patch, the contents of an active credentials struct may not be
changed directly; rather a new set of credentials must be prepared, modified
and committed using something like the following sequence of events:

	struct cred *new = prepare_creds();
	int ret = blah(new);
	if (ret < 0) {
		abort_creds(new);
		return ret;
	}
	return commit_creds(new);

There are some exceptions to this rule: the keyrings pointed to by the active
credentials may be instantiated - keyrings violate the COW rule as managing
COW keyrings is tricky, given that it is possible for a task to directly alter
the keys in a keyring in use by another task.

To help enforce this, various pointers to sets of credentials, such as those in
the task_struct, are declared const.  The purpose of this is compile-time
discouragement of altering credentials through those pointers.  Once a set of
credentials has been made public through one of these pointers, it may not be
modified, except under special circumstances:

  (1) Its reference count may incremented and decremented.

  (2) The keyrings to which it points may be modified, but not replaced.

The only safe way to modify anything else is to create a replacement and commit
using the functions described in Documentation/credentials.txt (which will be
added by a later patch).

This patch and the preceding patches have been tested with the LTP SELinux
testsuite.

This patch makes several logical sets of alteration:

 (1) execve().

     This now prepares and commits credentials in various places in the
     security code rather than altering the current creds directly.

 (2) Temporary credential overrides.

     do_coredump() and sys_faccessat() now prepare their own credentials and
     temporarily override the ones currently on the acting thread, whilst
     preventing interference from other threads by holding cred_replace_mutex
     on the thread being dumped.

     This will be replaced in a future patch by something that hands down the
     credentials directly to the functions being called, rather than altering
     the task's objective credentials.

 (3) LSM interface.

     A number of functions have been changed, added or removed:

     (*) security_capset_check(), ->capset_check()
     (*) security_capset_set(), ->capset_set()

     	 Removed in favour of security_capset().

     (*) security_capset(), ->capset()

     	 New.  This is passed a pointer to the new creds, a pointer to the old
     	 creds and the proposed capability sets.  It should fill in the new
     	 creds or return an error.  All pointers, barring the pointer to the
     	 new creds, are now const.

     (*) security_bprm_apply_creds(), ->bprm_apply_creds()

     	 Changed; now returns a value, which will cause the process to be
     	 killed if it's an error.

     (*) security_task_alloc(), ->task_alloc_security()

     	 Removed in favour of security_prepare_creds().

     (*) security_cred_free(), ->cred_free()

     	 New.  Free security data attached to cred->security.

     (*) security_prepare_creds(), ->cred_prepare()

     	 New. Duplicate any security data attached to cred->security.

     (*) security_commit_creds(), ->cred_commit()

     	 New. Apply any security effects for the upcoming installation of new
     	 security by commit_creds().

     (*) security_task_post_setuid(), ->task_post_setuid()

     	 Removed in favour of security_task_fix_setuid().

     (*) security_task_fix_setuid(), ->task_fix_setuid()

     	 Fix up the proposed new credentials for setuid().  This is used by
     	 cap_set_fix_setuid() to implicitly adjust capabilities in line with
     	 setuid() changes.  Changes are made to the new credentials, rather
     	 than the task itself as in security_task_post_setuid().

     (*) security_task_reparent_to_init(), ->task_reparent_to_init()

     	 Removed.  Instead the task being reparented to init is referred
     	 directly to init's credentials.

	 NOTE!  This results in the loss of some state: SELinux's osid no
	 longer records the sid of the thread that forked it.

     (*) security_key_alloc(), ->key_alloc()
     (*) security_key_permission(), ->key_permission()

     	 Changed.  These now take cred pointers rather than task pointers to
     	 refer to the security context.

 (4) sys_capset().

     This has been simplified and uses less locking.  The LSM functions it
     calls have been merged.

 (5) reparent_to_kthreadd().

     This gives the current thread the same credentials as init by simply using
     commit_thread() to point that way.

 (6) __sigqueue_alloc() and switch_uid()

     __sigqueue_alloc() can't stop the target task from changing its creds
     beneath it, so this function gets a reference to the currently applicable
     user_struct which it then passes into the sigqueue struct it returns if
     successful.

     switch_uid() is now called from commit_creds(), and possibly should be
     folded into that.  commit_creds() should take care of protecting
     __sigqueue_alloc().

 (7) [sg]et[ug]id() and co and [sg]et_current_groups.

     The set functions now all use prepare_creds(), commit_creds() and
     abort_creds() to build and check a new set of credentials before applying
     it.

     security_task_set[ug]id() is called inside the prepared section.  This
     guarantees that nothing else will affect the creds until we've finished.

     The calling of set_dumpable() has been moved into commit_creds().

     Much of the functionality of set_user() has been moved into
     commit_creds().

     The get functions all simply access the data directly.

 (8) security_task_prctl() and cap_task_prctl().

     security_task_prctl() has been modified to return -ENOSYS if it doesn't
     want to handle a function, or otherwise return the return value directly
     rather than through an argument.

     Additionally, cap_task_prctl() now prepares a new set of credentials, even
     if it doesn't end up using it.

 (9) Keyrings.

     A number of changes have been made to the keyrings code:

     (a) switch_uid_keyring(), copy_keys(), exit_keys() and suid_keys() have
     	 all been dropped and built in to the credentials functions directly.
     	 They may want separating out again later.

     (b) key_alloc() and search_process_keyrings() now take a cred pointer
     	 rather than a task pointer to specify the security context.

     (c) copy_creds() gives a new thread within the same thread group a new
     	 thread keyring if its parent had one, otherwise it discards the thread
     	 keyring.

     (d) The authorisation key now points directly to the credentials to extend
     	 the search into rather pointing to the task that carries them.

     (e) Installing thread, process or session keyrings causes a new set of
     	 credentials to be created, even though it's not strictly necessary for
     	 process or session keyrings (they're shared).

(10) Usermode helper.

     The usermode helper code now carries a cred struct pointer in its
     subprocess_info struct instead of a new session keyring pointer.  This set
     of credentials is derived from init_cred and installed on the new process
     after it has been cloned.

     call_usermodehelper_setup() allocates the new credentials and
     call_usermodehelper_freeinfo() discards them if they haven't been used.  A
     special cred function (prepare_usermodeinfo_creds()) is provided
     specifically for call_usermodehelper_setup() to call.

     call_usermodehelper_setkeys() adjusts the credentials to sport the
     supplied keyring as the new session keyring.

(11) SELinux.

     SELinux has a number of changes, in addition to those to support the LSM
     interface changes mentioned above:

     (a) selinux_setprocattr() no longer does its check for whether the
     	 current ptracer can access processes with the new SID inside the lock
     	 that covers getting the ptracer's SID.  Whilst this lock ensures that
     	 the check is done with the ptracer pinned, the result is only valid
     	 until the lock is released, so there's no point doing it inside the
     	 lock.

(12) is_single_threaded().

     This function has been extracted from selinux_setprocattr() and put into
     a file of its own in the lib/ directory as join_session_keyring() now
     wants to use it too.

     The code in SELinux just checked to see whether a task shared mm_structs
     with other tasks (CLONE_VM), but that isn't good enough.  We really want
     to know if they're part of the same thread group (CLONE_THREAD).

(13) nfsd.

     The NFS server daemon now has to use the COW credentials to set the
     credentials it is going to use.  It really needs to pass the credentials
     down to the functions it calls, but it can't do that until other patches
     in this series have been applied.

Signed-off-by: David Howells <dhowells@redhat.com>
Acked-by: James Morris <jmorris@namei.org>
Signed-off-by: James Morris <jmorris@namei.org>
---
 fs/exec.c                        |  31 ++-
 fs/nfsd/auth.c                   |  92 ++++----
 fs/nfsd/nfs4recover.c            |  68 +++---
 fs/nfsd/nfsfh.c                  |  11 +-
 fs/open.c                        |  31 ++-
 include/linux/audit.h            |  22 +-
 include/linux/capability.h       |   2 -
 include/linux/cred.h             |  44 +++-
 include/linux/init_task.h        |   2 +
 include/linux/key.h              |  22 +-
 include/linux/sched.h            |   6 +-
 include/linux/security.h         | 178 +++++++---------
 init/main.c                      |   1 +
 kernel/auditsc.c                 |  42 ++--
 kernel/capability.c              |  78 +++----
 kernel/cred-internals.h          |  21 ++
 kernel/cred.c                    | 321 ++++++++++++++++++++++++----
 kernel/exit.c                    |   9 +-
 kernel/fork.c                    |   7 +-
 kernel/kmod.c                    |  30 ++-
 kernel/ptrace.c                  |   9 +
 kernel/signal.c                  |  10 +-
 kernel/sys.c                     | 450 +++++++++++++++++++++------------------
 kernel/user.c                    |  37 +---
 kernel/user_namespace.c          |  12 +-
 lib/Makefile                     |   2 +-
 net/rxrpc/ar-key.c               |   6 +-
 security/capability.c            |  21 +-
 security/commoncap.c             | 265 +++++++++++------------
 security/keys/internal.h         |  17 +-
 security/keys/key.c              |  25 +--
 security/keys/keyctl.c           |  95 ++++++---
 security/keys/keyring.c          |  14 +-
 security/keys/permission.c       |  24 ++-
 security/keys/proc.c             |   8 +-
 security/keys/process_keys.c     | 333 ++++++++++++++---------------
 security/keys/request_key.c      |  29 ++-
 security/keys/request_key_auth.c |  41 ++--
 security/security.c              |  58 +++--
 security/selinux/hooks.c         | 286 ++++++++++++-------------
 security/smack/smack_lsm.c       |  82 ++++---
 41 files changed, 1603 insertions(+), 1239 deletions(-)
 create mode 100644 kernel/cred-internals.h

(limited to 'include')

diff --git a/fs/exec.c b/fs/exec.c
index a5330e1a2216..9bd3559ddece 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -1007,13 +1007,12 @@ int flush_old_exec(struct linux_binprm * bprm)
 	 */
 	current->mm->task_size = TASK_SIZE;
 
-	if (bprm->e_uid != current_euid() || bprm->e_gid != current_egid()) {
-		suid_keys(current);
+	if (bprm->e_uid != current_euid() ||
+	    bprm->e_gid != current_egid()) {
 		set_dumpable(current->mm, suid_dumpable);
 		current->pdeath_signal = 0;
 	} else if (file_permission(bprm->file, MAY_READ) ||
 			(bprm->interp_flags & BINPRM_FLAGS_ENFORCE_NONDUMP)) {
-		suid_keys(current);
 		set_dumpable(current->mm, suid_dumpable);
 	}
 
@@ -1096,10 +1095,8 @@ void compute_creds(struct linux_binprm *bprm)
 {
 	int unsafe;
 
-	if (bprm->e_uid != current_uid()) {
-		suid_keys(current);
+	if (bprm->e_uid != current_uid())
 		current->pdeath_signal = 0;
-	}
 	exec_keys(current);
 
 	task_lock(current);
@@ -1709,8 +1706,9 @@ int do_coredump(long signr, int exit_code, struct pt_regs * regs)
 	struct linux_binfmt * binfmt;
 	struct inode * inode;
 	struct file * file;
+	const struct cred *old_cred;
+	struct cred *cred;
 	int retval = 0;
-	int fsuid = current_fsuid();
 	int flag = 0;
 	int ispipe = 0;
 	unsigned long core_limit = current->signal->rlim[RLIMIT_CORE].rlim_cur;
@@ -1723,12 +1721,20 @@ int do_coredump(long signr, int exit_code, struct pt_regs * regs)
 	binfmt = current->binfmt;
 	if (!binfmt || !binfmt->core_dump)
 		goto fail;
+
+	cred = prepare_creds();
+	if (!cred) {
+		retval = -ENOMEM;
+		goto fail;
+	}
+
 	down_write(&mm->mmap_sem);
 	/*
 	 * If another thread got here first, or we are not dumpable, bail out.
 	 */
 	if (mm->core_state || !get_dumpable(mm)) {
 		up_write(&mm->mmap_sem);
+		put_cred(cred);
 		goto fail;
 	}
 
@@ -1739,12 +1745,16 @@ int do_coredump(long signr, int exit_code, struct pt_regs * regs)
 	 */
 	if (get_dumpable(mm) == 2) {	/* Setuid core dump mode */
 		flag = O_EXCL;		/* Stop rewrite attacks */
-		current->cred->fsuid = 0;	/* Dump root private */
+		cred->fsuid = 0;	/* Dump root private */
 	}
 
 	retval = coredump_wait(exit_code, &core_state);
-	if (retval < 0)
+	if (retval < 0) {
+		put_cred(cred);
 		goto fail;
+	}
+
+	old_cred = override_creds(cred);
 
 	/*
 	 * Clear any false indication of pending signals that might
@@ -1835,7 +1845,8 @@ fail_unlock:
 	if (helper_argv)
 		argv_free(helper_argv);
 
-	current->cred->fsuid = fsuid;
+	revert_creds(old_cred);
+	put_cred(cred);
 	coredump_finish(mm);
 fail:
 	return retval;
diff --git a/fs/nfsd/auth.c b/fs/nfsd/auth.c
index 808fc03a6fbd..836ffa1047d9 100644
--- a/fs/nfsd/auth.c
+++ b/fs/nfsd/auth.c
@@ -27,55 +27,67 @@ int nfsexp_flags(struct svc_rqst *rqstp, struct svc_export *exp)
 
 int nfsd_setuser(struct svc_rqst *rqstp, struct svc_export *exp)
 {
-	struct cred *act_as = current->cred ;
-	struct svc_cred	cred = rqstp->rq_cred;
+	struct group_info *rqgi;
+	struct group_info *gi;
+	struct cred *new;
 	int i;
 	int flags = nfsexp_flags(rqstp, exp);
 	int ret;
 
+	new = prepare_creds();
+	if (!new)
+		return -ENOMEM;
+
+	new->fsuid = rqstp->rq_cred.cr_uid;
+	new->fsgid = rqstp->rq_cred.cr_gid;
+
+	rqgi = rqstp->rq_cred.cr_group_info;
+
 	if (flags & NFSEXP_ALLSQUASH) {
-		cred.cr_uid = exp->ex_anon_uid;
-		cred.cr_gid = exp->ex_anon_gid;
-		cred.cr_group_info = groups_alloc(0);
+		new->fsuid = exp->ex_anon_uid;
+		new->fsgid = exp->ex_anon_gid;
+		gi = groups_alloc(0);
 	} else if (flags & NFSEXP_ROOTSQUASH) {
-		struct group_info *gi;
-		if (!cred.cr_uid)
-			cred.cr_uid = exp->ex_anon_uid;
-		if (!cred.cr_gid)
-			cred.cr_gid = exp->ex_anon_gid;
-		gi = groups_alloc(cred.cr_group_info->ngroups);
-		if (gi)
-			for (i = 0; i < cred.cr_group_info->ngroups; i++) {
-				if (!GROUP_AT(cred.cr_group_info, i))
-					GROUP_AT(gi, i) = exp->ex_anon_gid;
-				else
-					GROUP_AT(gi, i) = GROUP_AT(cred.cr_group_info, i);
-			}
-		cred.cr_group_info = gi;
-	} else
-		get_group_info(cred.cr_group_info);
-
-	if (cred.cr_uid != (uid_t) -1)
-		act_as->fsuid = cred.cr_uid;
-	else
-		act_as->fsuid = exp->ex_anon_uid;
-	if (cred.cr_gid != (gid_t) -1)
-		act_as->fsgid = cred.cr_gid;
-	else
-		act_as->fsgid = exp->ex_anon_gid;
+		if (!new->fsuid)
+			new->fsuid = exp->ex_anon_uid;
+		if (!new->fsgid)
+			new->fsgid = exp->ex_anon_gid;
 
-	if (!cred.cr_group_info)
-		return -ENOMEM;
-	ret = set_groups(act_as, cred.cr_group_info);
-	put_group_info(cred.cr_group_info);
-	if ((cred.cr_uid)) {
-		act_as->cap_effective =
-			cap_drop_nfsd_set(act_as->cap_effective);
+		gi = groups_alloc(rqgi->ngroups);
+		if (!gi)
+			goto oom;
+
+		for (i = 0; i < rqgi->ngroups; i++) {
+			if (!GROUP_AT(rqgi, i))
+				GROUP_AT(gi, i) = exp->ex_anon_gid;
+			else
+				GROUP_AT(gi, i) = GROUP_AT(rqgi, i);
+		}
 	} else {
-		act_as->cap_effective =
-			cap_raise_nfsd_set(act_as->cap_effective,
-					   act_as->cap_permitted);
+		gi = get_group_info(rqgi);
 	}
+
+	if (new->fsuid == (uid_t) -1)
+		new->fsuid = exp->ex_anon_uid;
+	if (new->fsgid == (gid_t) -1)
+		new->fsgid = exp->ex_anon_gid;
+
+	ret = set_groups(new, gi);
+	put_group_info(gi);
+	if (!ret)
+		goto error;
+
+	if (new->uid)
+		new->cap_effective = cap_drop_nfsd_set(new->cap_effective);
+	else
+		new->cap_effective = cap_raise_nfsd_set(new->cap_effective,
+							new->cap_permitted);
+	return commit_creds(new);
+
+oom:
+	ret = -ENOMEM;
+error:
+	abort_creds(new);
 	return ret;
 }
 
diff --git a/fs/nfsd/nfs4recover.c b/fs/nfsd/nfs4recover.c
index 632a50b4b371..9371ea12d7fa 100644
--- a/fs/nfsd/nfs4recover.c
+++ b/fs/nfsd/nfs4recover.c
@@ -54,20 +54,26 @@
 static struct path rec_dir;
 static int rec_dir_init = 0;
 
-static void
-nfs4_save_user(uid_t *saveuid, gid_t *savegid)
+static int
+nfs4_save_creds(const struct cred **original_creds)
 {
-	*saveuid = current->cred->fsuid;
-	*savegid = current->cred->fsgid;
-	current->cred->fsuid = 0;
-	current->cred->fsgid = 0;
+	struct cred *new;
+
+	new = prepare_creds();
+	if (!new)
+		return -ENOMEM;
+
+	new->fsuid = 0;
+	new->fsgid = 0;
+	*original_creds = override_creds(new);
+	put_cred(new);
+	return 0;
 }
 
 static void
-nfs4_reset_user(uid_t saveuid, gid_t savegid)
+nfs4_reset_creds(const struct cred *original)
 {
-	current->cred->fsuid = saveuid;
-	current->cred->fsgid = savegid;
+	revert_creds(original);
 }
 
 static void
@@ -129,10 +135,9 @@ nfsd4_sync_rec_dir(void)
 int
 nfsd4_create_clid_dir(struct nfs4_client *clp)
 {
+	const struct cred *original_cred;
 	char *dname = clp->cl_recdir;
 	struct dentry *dentry;
-	uid_t uid;
-	gid_t gid;
 	int status;
 
 	dprintk("NFSD: nfsd4_create_clid_dir for \"%s\"\n", dname);
@@ -140,7 +145,9 @@ nfsd4_create_clid_dir(struct nfs4_client *clp)
 	if (!rec_dir_init || clp->cl_firststate)
 		return 0;
 
-	nfs4_save_user(&uid, &gid);
+	status = nfs4_save_creds(&original_cred);
+	if (status < 0)
+		return status;
 
 	/* lock the parent */
 	mutex_lock(&rec_dir.dentry->d_inode->i_mutex);
@@ -168,7 +175,7 @@ out_unlock:
 		clp->cl_firststate = 1;
 		nfsd4_sync_rec_dir();
 	}
-	nfs4_reset_user(uid, gid);
+	nfs4_reset_creds(original_cred);
 	dprintk("NFSD: nfsd4_create_clid_dir returns %d\n", status);
 	return status;
 }
@@ -211,20 +218,21 @@ nfsd4_build_dentrylist(void *arg, const char *name, int namlen,
 static int
 nfsd4_list_rec_dir(struct dentry *dir, recdir_func *f)
 {
+	const struct cred *original_cred;
 	struct file *filp;
 	struct dentry_list_arg dla = {
 		.parent = dir,
 	};
 	struct list_head *dentries = &dla.dentries;
 	struct dentry_list *child;
-	uid_t uid;
-	gid_t gid;
 	int status;
 
 	if (!rec_dir_init)
 		return 0;
 
-	nfs4_save_user(&uid, &gid);
+	status = nfs4_save_creds(&original_cred);
+	if (status < 0)
+		return status;
 
 	filp = dentry_open(dget(dir), mntget(rec_dir.mnt), O_RDONLY,
 			   current_cred());
@@ -250,7 +258,7 @@ out:
 		dput(child->dentry);
 		kfree(child);
 	}
-	nfs4_reset_user(uid, gid);
+	nfs4_reset_creds(original_cred);
 	return status;
 }
 
@@ -312,8 +320,7 @@ out:
 void
 nfsd4_remove_clid_dir(struct nfs4_client *clp)
 {
-	uid_t uid;
-	gid_t gid;
+	const struct cred *original_cred;
 	int status;
 
 	if (!rec_dir_init || !clp->cl_firststate)
@@ -323,9 +330,13 @@ nfsd4_remove_clid_dir(struct nfs4_client *clp)
 	if (status)
 		goto out;
 	clp->cl_firststate = 0;
-	nfs4_save_user(&uid, &gid);
+
+	status = nfs4_save_creds(&original_cred);
+	if (status < 0)
+		goto out;
+
 	status = nfsd4_unlink_clid_dir(clp->cl_recdir, HEXDIR_LEN-1);
-	nfs4_reset_user(uid, gid);
+	nfs4_reset_creds(original_cred);
 	if (status == 0)
 		nfsd4_sync_rec_dir();
 	mnt_drop_write(rec_dir.mnt);
@@ -402,16 +413,21 @@ nfsd4_recdir_load(void) {
 void
 nfsd4_init_recdir(char *rec_dirname)
 {
-	uid_t			uid = 0;
-	gid_t			gid = 0;
-	int 			status;
+	const struct cred *original_cred;
+	int status;
 
 	printk("NFSD: Using %s as the NFSv4 state recovery directory\n",
 			rec_dirname);
 
 	BUG_ON(rec_dir_init);
 
-	nfs4_save_user(&uid, &gid);
+	status = nfs4_save_creds(&original_cred);
+	if (status < 0) {
+		printk("NFSD: Unable to change credentials to find recovery"
+		       " directory: error %d\n",
+		       status);
+		return;
+	}
 
 	status = kern_path(rec_dirname, LOOKUP_FOLLOW | LOOKUP_DIRECTORY,
 			&rec_dir);
@@ -421,7 +437,7 @@ nfsd4_init_recdir(char *rec_dirname)
 
 	if (!status)
 		rec_dir_init = 1;
-	nfs4_reset_user(uid, gid);
+	nfs4_reset_creds(original_cred);
 }
 
 void
diff --git a/fs/nfsd/nfsfh.c b/fs/nfsd/nfsfh.c
index e67cfaea0865..f0da7d9c3a92 100644
--- a/fs/nfsd/nfsfh.c
+++ b/fs/nfsd/nfsfh.c
@@ -186,9 +186,14 @@ static __be32 nfsd_set_fh_dentry(struct svc_rqst *rqstp, struct svc_fh *fhp)
 		 * access control settings being in effect, we cannot
 		 * fix that case easily.
 		 */
-		current->cred->cap_effective =
-			cap_raise_nfsd_set(current->cred->cap_effective,
-					   current->cred->cap_permitted);
+		struct cred *new = prepare_creds();
+		if (!new)
+			return nfserrno(-ENOMEM);
+		new->cap_effective =
+			cap_raise_nfsd_set(new->cap_effective,
+					   new->cap_permitted);
+		put_cred(override_creds(new));
+		put_cred(new);
 	} else {
 		error = nfsd_setuser_and_check_port(rqstp, exp);
 		if (error)
diff --git a/fs/open.c b/fs/open.c
index f96eaab280a3..c0a426d5766c 100644
--- a/fs/open.c
+++ b/fs/open.c
@@ -425,30 +425,33 @@ out:
  */
 asmlinkage long sys_faccessat(int dfd, const char __user *filename, int mode)
 {
-	struct cred *cred = current->cred;
+	const struct cred *old_cred;
+	struct cred *override_cred;
 	struct path path;
 	struct inode *inode;
-	int old_fsuid, old_fsgid;
-	kernel_cap_t uninitialized_var(old_cap);  /* !SECURE_NO_SETUID_FIXUP */
 	int res;
 
 	if (mode & ~S_IRWXO)	/* where's F_OK, X_OK, W_OK, R_OK? */
 		return -EINVAL;
 
-	old_fsuid = cred->fsuid;
-	old_fsgid = cred->fsgid;
+	override_cred = prepare_creds();
+	if (!override_cred)
+		return -ENOMEM;
 
-	cred->fsuid = cred->uid;
-	cred->fsgid = cred->gid;
+	override_cred->fsuid = override_cred->uid;
+	override_cred->fsgid = override_cred->gid;
 
 	if (!issecure(SECURE_NO_SETUID_FIXUP)) {
 		/* Clear the capabilities if we switch to a non-root user */
-		if (current->cred->uid)
-			old_cap = cap_set_effective(__cap_empty_set);
+		if (override_cred->uid)
+			cap_clear(override_cred->cap_effective);
 		else
-			old_cap = cap_set_effective(cred->cap_permitted);
+			override_cred->cap_effective =
+				override_cred->cap_permitted;
 	}
 
+	old_cred = override_creds(override_cred);
+
 	res = user_path_at(dfd, filename, LOOKUP_FOLLOW, &path);
 	if (res)
 		goto out;
@@ -485,12 +488,8 @@ asmlinkage long sys_faccessat(int dfd, const char __user *filename, int mode)
 out_path_release:
 	path_put(&path);
 out:
-	cred->fsuid = old_fsuid;
-	cred->fsgid = old_fsgid;
-
-	if (!issecure(SECURE_NO_SETUID_FIXUP))
-		cap_set_effective(old_cap);
-
+	revert_creds(old_cred);
+	put_cred(override_cred);
 	return res;
 }
 
diff --git a/include/linux/audit.h b/include/linux/audit.h
index 6fbebac7b1bf..0b2fcb698a63 100644
--- a/include/linux/audit.h
+++ b/include/linux/audit.h
@@ -454,8 +454,10 @@ extern int __audit_mq_timedsend(mqd_t mqdes, size_t msg_len, unsigned int msg_pr
 extern int __audit_mq_timedreceive(mqd_t mqdes, size_t msg_len, unsigned int __user *u_msg_prio, const struct timespec __user *u_abs_timeout);
 extern int __audit_mq_notify(mqd_t mqdes, const struct sigevent __user *u_notification);
 extern int __audit_mq_getsetattr(mqd_t mqdes, struct mq_attr *mqstat);
-extern void __audit_log_bprm_fcaps(struct linux_binprm *bprm, kernel_cap_t *pP, kernel_cap_t *pE);
-extern int __audit_log_capset(pid_t pid, kernel_cap_t *eff, kernel_cap_t *inh, kernel_cap_t *perm);
+extern int __audit_log_bprm_fcaps(struct linux_binprm *bprm,
+				  const struct cred *new,
+				  const struct cred *old);
+extern int __audit_log_capset(pid_t pid, const struct cred *new, const struct cred *old);
 
 static inline int audit_ipc_obj(struct kern_ipc_perm *ipcp)
 {
@@ -522,16 +524,20 @@ static inline int audit_mq_getsetattr(mqd_t mqdes, struct mq_attr *mqstat)
  *
  * -Eric
  */
-static inline void audit_log_bprm_fcaps(struct linux_binprm *bprm, kernel_cap_t *pP, kernel_cap_t *pE)
+static inline int audit_log_bprm_fcaps(struct linux_binprm *bprm,
+				       const struct cred *new,
+				       const struct cred *old)
 {
 	if (unlikely(!audit_dummy_context()))
-		__audit_log_bprm_fcaps(bprm, pP, pE);
+		return __audit_log_bprm_fcaps(bprm, new, old);
+	return 0;
 }
 
-static inline int audit_log_capset(pid_t pid, kernel_cap_t *eff, kernel_cap_t *inh, kernel_cap_t *perm)
+static inline int audit_log_capset(pid_t pid, const struct cred *new,
+				   const struct cred *old)
 {
 	if (unlikely(!audit_dummy_context()))
-		return __audit_log_capset(pid, eff, inh, perm);
+		return __audit_log_capset(pid, new, old);
 	return 0;
 }
 
@@ -566,8 +572,8 @@ extern int audit_signals;
 #define audit_mq_timedreceive(d,l,p,t) ({ 0; })
 #define audit_mq_notify(d,n) ({ 0; })
 #define audit_mq_getsetattr(d,s) ({ 0; })
-#define audit_log_bprm_fcaps(b, p, e) do { ; } while (0)
-#define audit_log_capset(pid, e, i, p) ({ 0; })
+#define audit_log_bprm_fcaps(b, ncr, ocr) ({ 0; })
+#define audit_log_capset(pid, ncr, ocr) ({ 0; })
 #define audit_ptrace(t) ((void)0)
 #define audit_n_rules 0
 #define audit_signals 0
diff --git a/include/linux/capability.h b/include/linux/capability.h
index 7f26580a5a4d..e22f48c2a46f 100644
--- a/include/linux/capability.h
+++ b/include/linux/capability.h
@@ -519,8 +519,6 @@ extern const kernel_cap_t __cap_empty_set;
 extern const kernel_cap_t __cap_full_set;
 extern const kernel_cap_t __cap_init_eff_set;
 
-kernel_cap_t cap_set_effective(const kernel_cap_t pE_new);
-
 /**
  * has_capability - Determine if a task has a superior capability available
  * @t: The task in question
diff --git a/include/linux/cred.h b/include/linux/cred.h
index 62b9e532422d..eaf6fa695a04 100644
--- a/include/linux/cred.h
+++ b/include/linux/cred.h
@@ -84,6 +84,8 @@ struct thread_group_cred {
 	struct key	*process_keyring;	/* keyring private to this process */
 	struct rcu_head	rcu;			/* RCU deletion hook */
 };
+
+extern void release_tgcred(struct cred *cred);
 #endif
 
 /*
@@ -137,11 +139,30 @@ struct cred {
 	struct user_struct *user;	/* real user ID subscription */
 	struct group_info *group_info;	/* supplementary groups for euid/fsgid */
 	struct rcu_head	rcu;		/* RCU deletion hook */
-	spinlock_t	lock;		/* lock for pointer changes */
 };
 
 extern void __put_cred(struct cred *);
 extern int copy_creds(struct task_struct *, unsigned long);
+extern struct cred *prepare_creds(void);
+extern struct cred *prepare_usermodehelper_creds(void);
+extern int commit_creds(struct cred *);
+extern void abort_creds(struct cred *);
+extern const struct cred *override_creds(const struct cred *) __deprecated;
+extern void revert_creds(const struct cred *) __deprecated;
+extern void __init cred_init(void);
+
+/**
+ * get_new_cred - Get a reference on a new set of credentials
+ * @cred: The new credentials to reference
+ *
+ * Get a reference on the specified set of new credentials.  The caller must
+ * release the reference.
+ */
+static inline struct cred *get_new_cred(struct cred *cred)
+{
+	atomic_inc(&cred->usage);
+	return cred;
+}
 
 /**
  * get_cred - Get a reference on a set of credentials
@@ -150,10 +171,9 @@ extern int copy_creds(struct task_struct *, unsigned long);
  * Get a reference on the specified set of credentials.  The caller must
  * release the reference.
  */
-static inline struct cred *get_cred(struct cred *cred)
+static inline const struct cred *get_cred(const struct cred *cred)
 {
-	atomic_inc(&cred->usage);
-	return cred;
+	return get_new_cred((struct cred *) cred);
 }
 
 /**
@@ -166,6 +186,8 @@ static inline struct cred *get_cred(struct cred *cred)
 static inline void put_cred(const struct cred *_cred)
 {
 	struct cred *cred = (struct cred *) _cred;
+
+	BUG_ON(atomic_read(&(cred)->usage) <= 0);
 	if (atomic_dec_and_test(&(cred)->usage))
 		__put_cred(cred);
 }
@@ -250,13 +272,13 @@ static inline void put_cred(const struct cred *_cred)
 	__groups;					\
 })
 
-#define task_cred_xxx(task, xxx)		\
-({						\
-	__typeof__(task->cred->xxx) ___val;	\
-	rcu_read_lock();			\
-	___val = __task_cred((task))->xxx;	\
-	rcu_read_unlock();			\
-	___val;					\
+#define task_cred_xxx(task, xxx)			\
+({							\
+	__typeof__(((struct cred *)NULL)->xxx) ___val;	\
+	rcu_read_lock();				\
+	___val = __task_cred((task))->xxx;		\
+	rcu_read_unlock();				\
+	___val;						\
 })
 
 #define task_uid(task)		(task_cred_xxx((task), uid))
diff --git a/include/linux/init_task.h b/include/linux/init_task.h
index 5e24c54b6dfd..08c3b24ad9a8 100644
--- a/include/linux/init_task.h
+++ b/include/linux/init_task.h
@@ -150,6 +150,8 @@ extern struct cred init_cred;
 	.sibling	= LIST_HEAD_INIT(tsk.sibling),			\
 	.group_leader	= &tsk,						\
 	.cred		= &init_cred,					\
+	.cred_exec_mutex =						\
+		 __MUTEX_INITIALIZER(tsk.cred_exec_mutex),		\
 	.comm		= "swapper",					\
 	.thread		= INIT_THREAD,					\
 	.fs		= &init_fs,					\
diff --git a/include/linux/key.h b/include/linux/key.h
index 0836cc838b0c..69ecf0934b02 100644
--- a/include/linux/key.h
+++ b/include/linux/key.h
@@ -73,6 +73,7 @@ struct key;
 struct seq_file;
 struct user_struct;
 struct signal_struct;
+struct cred;
 
 struct key_type;
 struct key_owner;
@@ -181,7 +182,7 @@ struct key {
 extern struct key *key_alloc(struct key_type *type,
 			     const char *desc,
 			     uid_t uid, gid_t gid,
-			     struct task_struct *ctx,
+			     const struct cred *cred,
 			     key_perm_t perm,
 			     unsigned long flags);
 
@@ -249,7 +250,7 @@ extern int key_unlink(struct key *keyring,
 		      struct key *key);
 
 extern struct key *keyring_alloc(const char *description, uid_t uid, gid_t gid,
-				 struct task_struct *ctx,
+				 const struct cred *cred,
 				 unsigned long flags,
 				 struct key *dest);
 
@@ -276,22 +277,12 @@ extern ctl_table key_sysctls[];
 /*
  * the userspace interface
  */
-extern void switch_uid_keyring(struct user_struct *new_user);
-extern int copy_keys(unsigned long clone_flags, struct task_struct *tsk);
-extern void exit_keys(struct task_struct *tsk);
-extern int suid_keys(struct task_struct *tsk);
+extern int install_thread_keyring_to_cred(struct cred *cred);
 extern int exec_keys(struct task_struct *tsk);
 extern void key_fsuid_changed(struct task_struct *tsk);
 extern void key_fsgid_changed(struct task_struct *tsk);
 extern void key_init(void);
 
-#define __install_session_keyring(keyring)				\
-({									\
-	struct key *old_session = current->cred->tgcred->session_keyring; \
-	current->cred->tgcred->session_keyring = keyring;		\
-	old_session;							\
-})
-
 #else /* CONFIG_KEYS */
 
 #define key_validate(k)			0
@@ -303,11 +294,6 @@ extern void key_init(void);
 #define make_key_ref(k, p)		NULL
 #define key_ref_to_ptr(k)		NULL
 #define is_key_possessed(k)		0
-#define switch_uid_keyring(u)		do { } while(0)
-#define __install_session_keyring(k)	({ NULL; })
-#define copy_keys(f,t)			0
-#define exit_keys(t)			do { } while(0)
-#define suid_keys(t)			do { } while(0)
 #define exec_keys(t)			do { } while(0)
 #define key_fsuid_changed(t)		do { } while(0)
 #define key_fsgid_changed(t)		do { } while(0)
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 2913252989b3..121d655e460d 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1145,7 +1145,8 @@ struct task_struct {
 	struct list_head cpu_timers[3];
 
 /* process credentials */
-	struct cred *cred;	/* actual/objective task credentials */
+	const struct cred *cred;	/* actual/objective task credentials (COW) */
+	struct mutex cred_exec_mutex;	/* execve vs ptrace cred calculation mutex */
 
 	char comm[TASK_COMM_LEN]; /* executable name excluding path
 				     - access with [gs]et_task_comm (which lock
@@ -1720,7 +1721,6 @@ static inline struct user_struct *get_uid(struct user_struct *u)
 	return u;
 }
 extern void free_uid(struct user_struct *);
-extern void switch_uid(struct user_struct *);
 extern void release_uids(struct user_namespace *ns);
 
 #include <asm/current.h>
@@ -1870,6 +1870,8 @@ static inline unsigned long wait_task_inactive(struct task_struct *p,
 #define for_each_process(p) \
 	for (p = &init_task ; (p = next_task(p)) != &init_task ; )
 
+extern bool is_single_threaded(struct task_struct *);
+
 /*
  * Careful: do_each_thread/while_each_thread is a double loop so
  *          'break' will not work as expected - use goto instead.
diff --git a/include/linux/security.h b/include/linux/security.h
index 7e9fe046a0d1..68be11251447 100644
--- a/include/linux/security.h
+++ b/include/linux/security.h
@@ -53,24 +53,21 @@ extern int cap_settime(struct timespec *ts, struct timezone *tz);
 extern int cap_ptrace_may_access(struct task_struct *child, unsigned int mode);
 extern int cap_ptrace_traceme(struct task_struct *parent);
 extern int cap_capget(struct task_struct *target, kernel_cap_t *effective, kernel_cap_t *inheritable, kernel_cap_t *permitted);
-extern int cap_capset_check(const kernel_cap_t *effective,
-			    const kernel_cap_t *inheritable,
-			    const kernel_cap_t *permitted);
-extern void cap_capset_set(const kernel_cap_t *effective,
-			   const kernel_cap_t *inheritable,
-			   const kernel_cap_t *permitted);
+extern int cap_capset(struct cred *new, const struct cred *old,
+		      const kernel_cap_t *effective,
+		      const kernel_cap_t *inheritable,
+		      const kernel_cap_t *permitted);
 extern int cap_bprm_set_security(struct linux_binprm *bprm);
-extern void cap_bprm_apply_creds(struct linux_binprm *bprm, int unsafe);
+extern int cap_bprm_apply_creds(struct linux_binprm *bprm, int unsafe);
 extern int cap_bprm_secureexec(struct linux_binprm *bprm);
 extern int cap_inode_setxattr(struct dentry *dentry, const char *name,
 			      const void *value, size_t size, int flags);
 extern int cap_inode_removexattr(struct dentry *dentry, const char *name);
 extern int cap_inode_need_killpriv(struct dentry *dentry);
 extern int cap_inode_killpriv(struct dentry *dentry);
-extern int cap_task_post_setuid(uid_t old_ruid, uid_t old_euid, uid_t old_suid, int flags);
-extern void cap_task_reparent_to_init(struct task_struct *p);
+extern int cap_task_fix_setuid(struct cred *new, const struct cred *old, int flags);
 extern int cap_task_prctl(int option, unsigned long arg2, unsigned long arg3,
-			  unsigned long arg4, unsigned long arg5, long *rc_p);
+			  unsigned long arg4, unsigned long arg5);
 extern int cap_task_setscheduler(struct task_struct *p, int policy, struct sched_param *lp);
 extern int cap_task_setioprio(struct task_struct *p, int ioprio);
 extern int cap_task_setnice(struct task_struct *p, int nice);
@@ -170,8 +167,8 @@ static inline void security_free_mnt_opts(struct security_mnt_opts *opts)
  *	Compute and set the security attributes of a process being transformed
  *	by an execve operation based on the old attributes (current->security)
  *	and the information saved in @bprm->security by the set_security hook.
- *	Since this hook function (and its caller) are void, this hook can not
- *	return an error.  However, it can leave the security attributes of the
+ *	Since this function may return an error, in which case the process will
+ *      be killed.  However, it can leave the security attributes of the
  *	process unchanged if an access failure occurs at this point.
  *	bprm_apply_creds is called under task_lock.  @unsafe indicates various
  *	reasons why it may be unsafe to change security state.
@@ -593,15 +590,18 @@ static inline void security_free_mnt_opts(struct security_mnt_opts *opts)
  *	manual page for definitions of the @clone_flags.
  *	@clone_flags contains the flags indicating what should be shared.
  *	Return 0 if permission is granted.
- * @cred_alloc_security:
- *	@cred contains the cred struct for child process.
- *	Allocate and attach a security structure to the cred->security field.
- *	The security field is initialized to NULL when the task structure is
- *	allocated.
- *	Return 0 if operation was successful.
  * @cred_free:
  *	@cred points to the credentials.
  *	Deallocate and clear the cred->security field in a set of credentials.
+ * @cred_prepare:
+ *	@new points to the new credentials.
+ *	@old points to the original credentials.
+ *	@gfp indicates the atomicity of any memory allocations.
+ *	Prepare a new set of credentials by copying the data from the old set.
+ * @cred_commit:
+ *	@new points to the new credentials.
+ *	@old points to the original credentials.
+ *	Install a new set of credentials.
  * @task_setuid:
  *	Check permission before setting one or more of the user identity
  *	attributes of the current process.  The @flags parameter indicates
@@ -614,15 +614,13 @@ static inline void security_free_mnt_opts(struct security_mnt_opts *opts)
  *	@id2 contains a uid.
  *	@flags contains one of the LSM_SETID_* values.
  *	Return 0 if permission is granted.
- * @task_post_setuid:
+ * @task_fix_setuid:
  *	Update the module's state after setting one or more of the user
  *	identity attributes of the current process.  The @flags parameter
  *	indicates which of the set*uid system calls invoked this hook.  If
- *	@flags is LSM_SETID_FS, then @old_ruid is the old fs uid and the other
- *	parameters are not used.
- *	@old_ruid contains the old real uid (or fs uid if LSM_SETID_FS).
- *	@old_euid contains the old effective uid (or -1 if LSM_SETID_FS).
- *	@old_suid contains the old saved uid (or -1 if LSM_SETID_FS).
+ *	@new is the set of credentials that will be installed.  Modifications
+ *	should be made to this rather than to @current->cred.
+ *	@old is the set of credentials that are being replaces
  *	@flags contains one of the LSM_SETID_* values.
  *	Return 0 on success.
  * @task_setgid:
@@ -725,13 +723,8 @@ static inline void security_free_mnt_opts(struct security_mnt_opts *opts)
  *	@arg3 contains a argument.
  *	@arg4 contains a argument.
  *	@arg5 contains a argument.
- *      @rc_p contains a pointer to communicate back the forced return code
- *	Return 0 if permission is granted, and non-zero if the security module
- *      has taken responsibility (setting *rc_p) for the prctl call.
- * @task_reparent_to_init:
- *	Set the security attributes in @p->security for a kernel thread that
- *	is being reparented to the init task.
- *	@p contains the task_struct for the kernel thread.
+ *	Return -ENOSYS if no-one wanted to handle this op, any other value to
+ *	cause prctl() to return immediately with that value.
  * @task_to_inode:
  *	Set the security attributes for an inode based on an associated task's
  *	security attributes, e.g. for /proc/pid inodes.
@@ -1008,7 +1001,7 @@ static inline void security_free_mnt_opts(struct security_mnt_opts *opts)
  *	See whether a specific operational right is granted to a process on a
  *	key.
  *	@key_ref refers to the key (key pointer + possession attribute bit).
- *	@context points to the process to provide the context against which to
+ *	@cred points to the credentials to provide the context against which to
  *	evaluate the security data on the key.
  *	@perm describes the combination of permissions required of this key.
  *	Return 1 if permission granted, 0 if permission denied and -ve it the
@@ -1170,6 +1163,7 @@ static inline void security_free_mnt_opts(struct security_mnt_opts *opts)
  *	@child process.
  *	Security modules may also want to perform a process tracing check
  *	during an execve in the set_security or apply_creds hooks of
+ *	tracing check during an execve in the bprm_set_creds hook of
  *	binprm_security_ops if the process is being traced and its security
  *	attributes would be changed by the execve.
  *	@child contains the task_struct structure for the target process.
@@ -1193,19 +1187,15 @@ static inline void security_free_mnt_opts(struct security_mnt_opts *opts)
  *	@inheritable contains the inheritable capability set.
  *	@permitted contains the permitted capability set.
  *	Return 0 if the capability sets were successfully obtained.
- * @capset_check:
- *	Check permission before setting the @effective, @inheritable, and
- *	@permitted capability sets for the current process.
- *	@effective contains the effective capability set.
- *	@inheritable contains the inheritable capability set.
- *	@permitted contains the permitted capability set.
- *	Return 0 if permission is granted.
- * @capset_set:
+ * @capset:
  *	Set the @effective, @inheritable, and @permitted capability sets for
  *	the current process.
+ *	@new contains the new credentials structure for target process.
+ *	@old contains the current credentials structure for target process.
  *	@effective contains the effective capability set.
  *	@inheritable contains the inheritable capability set.
  *	@permitted contains the permitted capability set.
+ *	Return 0 and update @new if permission is granted.
  * @capable:
  *	Check whether the @tsk process has the @cap capability.
  *	@tsk contains the task_struct for the process.
@@ -1297,12 +1287,11 @@ struct security_operations {
 	int (*capget) (struct task_struct *target,
 		       kernel_cap_t *effective,
 		       kernel_cap_t *inheritable, kernel_cap_t *permitted);
-	int (*capset_check) (const kernel_cap_t *effective,
-			     const kernel_cap_t *inheritable,
-			     const kernel_cap_t *permitted);
-	void (*capset_set) (const kernel_cap_t *effective,
-			    const kernel_cap_t *inheritable,
-			    const kernel_cap_t *permitted);
+	int (*capset) (struct cred *new,
+		       const struct cred *old,
+		       const kernel_cap_t *effective,
+		       const kernel_cap_t *inheritable,
+		       const kernel_cap_t *permitted);
 	int (*capable) (struct task_struct *tsk, int cap, int audit);
 	int (*acct) (struct file *file);
 	int (*sysctl) (struct ctl_table *table, int op);
@@ -1314,7 +1303,7 @@ struct security_operations {
 
 	int (*bprm_alloc_security) (struct linux_binprm *bprm);
 	void (*bprm_free_security) (struct linux_binprm *bprm);
-	void (*bprm_apply_creds) (struct linux_binprm *bprm, int unsafe);
+	int (*bprm_apply_creds) (struct linux_binprm *bprm, int unsafe);
 	void (*bprm_post_apply_creds) (struct linux_binprm *bprm);
 	int (*bprm_set_security) (struct linux_binprm *bprm);
 	int (*bprm_check_security) (struct linux_binprm *bprm);
@@ -1405,11 +1394,13 @@ struct security_operations {
 	int (*dentry_open) (struct file *file, const struct cred *cred);
 
 	int (*task_create) (unsigned long clone_flags);
-	int (*cred_alloc_security) (struct cred *cred);
 	void (*cred_free) (struct cred *cred);
+	int (*cred_prepare)(struct cred *new, const struct cred *old,
+			    gfp_t gfp);
+	void (*cred_commit)(struct cred *new, const struct cred *old);
 	int (*task_setuid) (uid_t id0, uid_t id1, uid_t id2, int flags);
-	int (*task_post_setuid) (uid_t old_ruid /* or fsuid */ ,
-				 uid_t old_euid, uid_t old_suid, int flags);
+	int (*task_fix_setuid) (struct cred *new, const struct cred *old,
+				int flags);
 	int (*task_setgid) (gid_t id0, gid_t id1, gid_t id2, int flags);
 	int (*task_setpgid) (struct task_struct *p, pid_t pgid);
 	int (*task_getpgid) (struct task_struct *p);
@@ -1429,8 +1420,7 @@ struct security_operations {
 	int (*task_wait) (struct task_struct *p);
 	int (*task_prctl) (int option, unsigned long arg2,
 			   unsigned long arg3, unsigned long arg4,
-			   unsigned long arg5, long *rc_p);
-	void (*task_reparent_to_init) (struct task_struct *p);
+			   unsigned long arg5);
 	void (*task_to_inode) (struct task_struct *p, struct inode *inode);
 
 	int (*ipc_permission) (struct kern_ipc_perm *ipcp, short flag);
@@ -1535,10 +1525,10 @@ struct security_operations {
 
 	/* key management security hooks */
 #ifdef CONFIG_KEYS
-	int (*key_alloc) (struct key *key, struct task_struct *tsk, unsigned long flags);
+	int (*key_alloc) (struct key *key, const struct cred *cred, unsigned long flags);
 	void (*key_free) (struct key *key);
 	int (*key_permission) (key_ref_t key_ref,
-			       struct task_struct *context,
+			       const struct cred *cred,
 			       key_perm_t perm);
 	int (*key_getsecurity)(struct key *key, char **_buffer);
 #endif	/* CONFIG_KEYS */
@@ -1564,12 +1554,10 @@ int security_capget(struct task_struct *target,
 		    kernel_cap_t *effective,
 		    kernel_cap_t *inheritable,
 		    kernel_cap_t *permitted);
-int security_capset_check(const kernel_cap_t *effective,
-			  const kernel_cap_t *inheritable,
-			  const kernel_cap_t *permitted);
-void security_capset_set(const kernel_cap_t *effective,
-			 const kernel_cap_t *inheritable,
-			 const kernel_cap_t *permitted);
+int security_capset(struct cred *new, const struct cred *old,
+		    const kernel_cap_t *effective,
+		    const kernel_cap_t *inheritable,
+		    const kernel_cap_t *permitted);
 int security_capable(struct task_struct *tsk, int cap);
 int security_capable_noaudit(struct task_struct *tsk, int cap);
 int security_acct(struct file *file);
@@ -1583,7 +1571,7 @@ int security_vm_enough_memory_mm(struct mm_struct *mm, long pages);
 int security_vm_enough_memory_kern(long pages);
 int security_bprm_alloc(struct linux_binprm *bprm);
 void security_bprm_free(struct linux_binprm *bprm);
-void security_bprm_apply_creds(struct linux_binprm *bprm, int unsafe);
+int security_bprm_apply_creds(struct linux_binprm *bprm, int unsafe);
 void security_bprm_post_apply_creds(struct linux_binprm *bprm);
 int security_bprm_set(struct linux_binprm *bprm);
 int security_bprm_check(struct linux_binprm *bprm);
@@ -1660,11 +1648,12 @@ int security_file_send_sigiotask(struct task_struct *tsk,
 int security_file_receive(struct file *file);
 int security_dentry_open(struct file *file, const struct cred *cred);
 int security_task_create(unsigned long clone_flags);
-int security_cred_alloc(struct cred *cred);
 void security_cred_free(struct cred *cred);
+int security_prepare_creds(struct cred *new, const struct cred *old, gfp_t gfp);
+void security_commit_creds(struct cred *new, const struct cred *old);
 int security_task_setuid(uid_t id0, uid_t id1, uid_t id2, int flags);
-int security_task_post_setuid(uid_t old_ruid, uid_t old_euid,
-			      uid_t old_suid, int flags);
+int security_task_fix_setuid(struct cred *new, const struct cred *old,
+			     int flags);
 int security_task_setgid(gid_t id0, gid_t id1, gid_t id2, int flags);
 int security_task_setpgid(struct task_struct *p, pid_t pgid);
 int security_task_getpgid(struct task_struct *p);
@@ -1683,8 +1672,7 @@ int security_task_kill(struct task_struct *p, struct siginfo *info,
 			int sig, u32 secid);
 int security_task_wait(struct task_struct *p);
 int security_task_prctl(int option, unsigned long arg2, unsigned long arg3,
-			 unsigned long arg4, unsigned long arg5, long *rc_p);
-void security_task_reparent_to_init(struct task_struct *p);
+			unsigned long arg4, unsigned long arg5);
 void security_task_to_inode(struct task_struct *p, struct inode *inode);
 int security_ipc_permission(struct kern_ipc_perm *ipcp, short flag);
 void security_ipc_getsecid(struct kern_ipc_perm *ipcp, u32 *secid);
@@ -1759,18 +1747,13 @@ static inline int security_capget(struct task_struct *target,
 	return cap_capget(target, effective, inheritable, permitted);
 }
 
-static inline int security_capset_check(const kernel_cap_t *effective,
-					const kernel_cap_t *inheritable,
-					const kernel_cap_t *permitted)
+static inline int security_capset(struct cred *new,
+				   const struct cred *old,
+				   const kernel_cap_t *effective,
+				   const kernel_cap_t *inheritable,
+				   const kernel_cap_t *permitted)
 {
-	return cap_capset_check(effective, inheritable, permitted);
-}
-
-static inline void security_capset_set(const kernel_cap_t *effective,
-				       const kernel_cap_t *inheritable,
-				       const kernel_cap_t *permitted)
-{
-	cap_capset_set(effective, inheritable, permitted);
+	return cap_capset(new, old, effective, inheritable, permitted);
 }
 
 static inline int security_capable(struct task_struct *tsk, int cap)
@@ -1837,9 +1820,9 @@ static inline int security_bprm_alloc(struct linux_binprm *bprm)
 static inline void security_bprm_free(struct linux_binprm *bprm)
 { }
 
-static inline void security_bprm_apply_creds(struct linux_binprm *bprm, int unsafe)
+static inline int security_bprm_apply_creds(struct linux_binprm *bprm, int unsafe)
 {
-	cap_bprm_apply_creds(bprm, unsafe);
+	return cap_bprm_apply_creds(bprm, unsafe);
 }
 
 static inline void security_bprm_post_apply_creds(struct linux_binprm *bprm)
@@ -2182,13 +2165,20 @@ static inline int security_task_create(unsigned long clone_flags)
 	return 0;
 }
 
-static inline int security_cred_alloc(struct cred *cred)
+static inline void security_cred_free(struct cred *cred)
+{ }
+
+static inline int security_prepare_creds(struct cred *new,
+					 const struct cred *old,
+					 gfp_t gfp)
 {
 	return 0;
 }
 
-static inline void security_cred_free(struct cred *cred)
-{ }
+static inline void security_commit_creds(struct cred *new,
+					 const struct cred *old)
+{
+}
 
 static inline int security_task_setuid(uid_t id0, uid_t id1, uid_t id2,
 				       int flags)
@@ -2196,10 +2186,11 @@ static inline int security_task_setuid(uid_t id0, uid_t id1, uid_t id2,
 	return 0;
 }
 
-static inline int security_task_post_setuid(uid_t old_ruid, uid_t old_euid,
-					    uid_t old_suid, int flags)
+static inline int security_task_fix_setuid(struct cred *new,
+					   const struct cred *old,
+					   int flags)
 {
-	return cap_task_post_setuid(old_ruid, old_euid, old_suid, flags);
+	return cap_task_fix_setuid(new, old, flags);
 }
 
 static inline int security_task_setgid(gid_t id0, gid_t id1, gid_t id2,
@@ -2286,14 +2277,9 @@ static inline int security_task_wait(struct task_struct *p)
 static inline int security_task_prctl(int option, unsigned long arg2,
 				      unsigned long arg3,
 				      unsigned long arg4,
-				      unsigned long arg5, long *rc_p)
-{
-	return cap_task_prctl(option, arg2, arg3, arg3, arg5, rc_p);
-}
-
-static inline void security_task_reparent_to_init(struct task_struct *p)
+				      unsigned long arg5)
 {
-	cap_task_reparent_to_init(p);
+	return cap_task_prctl(option, arg2, arg3, arg3, arg5);
 }
 
 static inline void security_task_to_inode(struct task_struct *p, struct inode *inode)
@@ -2719,16 +2705,16 @@ static inline void security_skb_classify_flow(struct sk_buff *skb, struct flowi
 #ifdef CONFIG_KEYS
 #ifdef CONFIG_SECURITY
 
-int security_key_alloc(struct key *key, struct task_struct *tsk, unsigned long flags);
+int security_key_alloc(struct key *key, const struct cred *cred, unsigned long flags);
 void security_key_free(struct key *key);
 int security_key_permission(key_ref_t key_ref,
-			    struct task_struct *context, key_perm_t perm);
+			    const struct cred *cred, key_perm_t perm);
 int security_key_getsecurity(struct key *key, char **_buffer);
 
 #else
 
 static inline int security_key_alloc(struct key *key,
-				     struct task_struct *tsk,
+				     const struct cred *cred,
 				     unsigned long flags)
 {
 	return 0;
@@ -2739,7 +2725,7 @@ static inline void security_key_free(struct key *key)
 }
 
 static inline int security_key_permission(key_ref_t key_ref,
-					  struct task_struct *context,
+					  const struct cred *cred,
 					  key_perm_t perm)
 {
 	return 0;
diff --git a/init/main.c b/init/main.c
index 7e117a231af1..db843bff5732 100644
--- a/init/main.c
+++ b/init/main.c
@@ -669,6 +669,7 @@ asmlinkage void __init start_kernel(void)
 		efi_enter_virtual_mode();
 #endif
 	thread_info_cache_init();
+	cred_init();
 	fork_init(num_physpages);
 	proc_caches_init();
 	buffer_init();
diff --git a/kernel/auditsc.c b/kernel/auditsc.c
index ae8ef88ade3f..bc1e2d854bf6 100644
--- a/kernel/auditsc.c
+++ b/kernel/auditsc.c
@@ -2546,18 +2546,17 @@ int __audit_signal_info(int sig, struct task_struct *t)
 
 /**
  * __audit_log_bprm_fcaps - store information about a loading bprm and relevant fcaps
- * @bprm pointer to the bprm being processed
- * @caps the caps read from the disk
+ * @bprm: pointer to the bprm being processed
+ * @new: the proposed new credentials
+ * @old: the old credentials
  *
  * Simply check if the proc already has the caps given by the file and if not
  * store the priv escalation info for later auditing at the end of the syscall
  *
- * this can fail and we don't care.  See the note in audit.h for
- * audit_log_bprm_fcaps() for my explaination....
- *
  * -Eric
  */
-void __audit_log_bprm_fcaps(struct linux_binprm *bprm, kernel_cap_t *pP, kernel_cap_t *pE)
+int __audit_log_bprm_fcaps(struct linux_binprm *bprm,
+			   const struct cred *new, const struct cred *old)
 {
 	struct audit_aux_data_bprm_fcaps *ax;
 	struct audit_context *context = current->audit_context;
@@ -2566,7 +2565,7 @@ void __audit_log_bprm_fcaps(struct linux_binprm *bprm, kernel_cap_t *pP, kernel_
 
 	ax = kmalloc(sizeof(*ax), GFP_KERNEL);
 	if (!ax)
-		return;
+		return -ENOMEM;
 
 	ax->d.type = AUDIT_BPRM_FCAPS;
 	ax->d.next = context->aux;
@@ -2581,26 +2580,27 @@ void __audit_log_bprm_fcaps(struct linux_binprm *bprm, kernel_cap_t *pP, kernel_
 	ax->fcap.fE = !!(vcaps.magic_etc & VFS_CAP_FLAGS_EFFECTIVE);
 	ax->fcap_ver = (vcaps.magic_etc & VFS_CAP_REVISION_MASK) >> VFS_CAP_REVISION_SHIFT;
 
-	ax->old_pcap.permitted = *pP;
-	ax->old_pcap.inheritable = current->cred->cap_inheritable;
-	ax->old_pcap.effective = *pE;
+	ax->old_pcap.permitted   = old->cap_permitted;
+	ax->old_pcap.inheritable = old->cap_inheritable;
+	ax->old_pcap.effective   = old->cap_effective;
 
-	ax->new_pcap.permitted = current->cred->cap_permitted;
-	ax->new_pcap.inheritable = current->cred->cap_inheritable;
-	ax->new_pcap.effective = current->cred->cap_effective;
+	ax->new_pcap.permitted   = new->cap_permitted;
+	ax->new_pcap.inheritable = new->cap_inheritable;
+	ax->new_pcap.effective   = new->cap_effective;
+	return 0;
 }
 
 /**
  * __audit_log_capset - store information about the arguments to the capset syscall
- * @pid target pid of the capset call
- * @eff effective cap set
- * @inh inheritible cap set
- * @perm permited cap set
+ * @pid: target pid of the capset call
+ * @new: the new credentials
+ * @old: the old (current) credentials
  *
  * Record the aguments userspace sent to sys_capset for later printing by the
  * audit system if applicable
  */
-int __audit_log_capset(pid_t pid, kernel_cap_t *eff, kernel_cap_t *inh, kernel_cap_t *perm)
+int __audit_log_capset(pid_t pid,
+		       const struct cred *new, const struct cred *old)
 {
 	struct audit_aux_data_capset *ax;
 	struct audit_context *context = current->audit_context;
@@ -2617,9 +2617,9 @@ int __audit_log_capset(pid_t pid, kernel_cap_t *eff, kernel_cap_t *inh, kernel_c
 	context->aux = (void *)ax;
 
 	ax->pid = pid;
-	ax->cap.effective = *eff;
-	ax->cap.inheritable = *eff;
-	ax->cap.permitted = *perm;
+	ax->cap.effective   = new->cap_effective;
+	ax->cap.inheritable = new->cap_effective;
+	ax->cap.permitted   = new->cap_permitted;
 
 	return 0;
 }
diff --git a/kernel/capability.c b/kernel/capability.c
index a404b980b1bd..36b4b4daebec 100644
--- a/kernel/capability.c
+++ b/kernel/capability.c
@@ -15,12 +15,7 @@
 #include <linux/syscalls.h>
 #include <linux/pid_namespace.h>
 #include <asm/uaccess.h>
-
-/*
- * This lock protects task->cap_* for all tasks including current.
- * Locking rule: acquire this prior to tasklist_lock.
- */
-static DEFINE_SPINLOCK(task_capability_lock);
+#include "cred-internals.h"
 
 /*
  * Leveraged for setting/resetting capabilities
@@ -128,12 +123,11 @@ static int cap_validate_magic(cap_user_header_t header, unsigned *tocopy)
 }
 
 /*
- * If we have configured with filesystem capability support, then the
- * only thing that can change the capabilities of the current process
- * is the current process. As such, we can't be in this code at the
- * same time as we are in the process of setting capabilities in this
- * process. The net result is that we can limit our use of locks to
- * when we are reading the caps of another process.
+ * The only thing that can change the capabilities of the current
+ * process is the current process. As such, we can't be in this code
+ * at the same time as we are in the process of setting capabilities
+ * in this process. The net result is that we can limit our use of
+ * locks to when we are reading the caps of another process.
  */
 static inline int cap_get_target_pid(pid_t pid, kernel_cap_t *pEp,
 				     kernel_cap_t *pIp, kernel_cap_t *pPp)
@@ -143,7 +137,6 @@ static inline int cap_get_target_pid(pid_t pid, kernel_cap_t *pEp,
 	if (pid && (pid != task_pid_vnr(current))) {
 		struct task_struct *target;
 
-		spin_lock(&task_capability_lock);
 		read_lock(&tasklist_lock);
 
 		target = find_task_by_vpid(pid);
@@ -153,34 +146,12 @@ static inline int cap_get_target_pid(pid_t pid, kernel_cap_t *pEp,
 			ret = security_capget(target, pEp, pIp, pPp);
 
 		read_unlock(&tasklist_lock);
-		spin_unlock(&task_capability_lock);
 	} else
 		ret = security_capget(current, pEp, pIp, pPp);
 
 	return ret;
 }
 
-/*
- * Atomically modify the effective capabilities returning the original
- * value. No permission check is performed here - it is assumed that the
- * caller is permitted to set the desired effective capabilities.
- */
-kernel_cap_t cap_set_effective(const kernel_cap_t pE_new)
-{
-	kernel_cap_t pE_old;
-
-	spin_lock(&task_capability_lock);
-
-	pE_old = current->cred->cap_effective;
-	current->cred->cap_effective = pE_new;
-
-	spin_unlock(&task_capability_lock);
-
-	return pE_old;
-}
-
-EXPORT_SYMBOL(cap_set_effective);
-
 /**
  * sys_capget - get the capabilities of a given process.
  * @header: pointer to struct that contains capability version and
@@ -208,7 +179,6 @@ asmlinkage long sys_capget(cap_user_header_t header, cap_user_data_t dataptr)
 		return -EINVAL;
 
 	ret = cap_get_target_pid(pid, &pE, &pI, &pP);
-
 	if (!ret) {
 		struct __user_cap_data_struct kdata[_KERNEL_CAPABILITY_U32S];
 		unsigned i;
@@ -270,6 +240,7 @@ asmlinkage long sys_capset(cap_user_header_t header, const cap_user_data_t data)
 	struct __user_cap_data_struct kdata[_KERNEL_CAPABILITY_U32S];
 	unsigned i, tocopy;
 	kernel_cap_t inheritable, permitted, effective;
+	struct cred *new;
 	int ret;
 	pid_t pid;
 
@@ -284,8 +255,8 @@ asmlinkage long sys_capset(cap_user_header_t header, const cap_user_data_t data)
 	if (pid != 0 && pid != task_pid_vnr(current))
 		return -EPERM;
 
-	if (copy_from_user(&kdata, data, tocopy
-			   * sizeof(struct __user_cap_data_struct)))
+	if (copy_from_user(&kdata, data,
+			   tocopy * sizeof(struct __user_cap_data_struct)))
 		return -EFAULT;
 
 	for (i = 0; i < tocopy; i++) {
@@ -300,24 +271,23 @@ asmlinkage long sys_capset(cap_user_header_t header, const cap_user_data_t data)
 		i++;
 	}
 
-	ret = audit_log_capset(pid, &effective, &inheritable, &permitted);
-	if (ret)
+	new = prepare_creds();
+	if (!new)
+		return -ENOMEM;
+
+	ret = security_capset(new, current_cred(),
+			      &effective, &inheritable, &permitted);
+	if (ret < 0)
+		goto error;
+
+	ret = audit_log_capset(pid, new, current_cred());
+	if (ret < 0)
 		return ret;
 
-	/* This lock is required even when filesystem capability support is
-	 * configured - it protects the sys_capget() call from returning
-	 * incorrect data in the case that the targeted process is not the
-	 * current one.
-	 */
-	spin_lock(&task_capability_lock);
-
-	ret = security_capset_check(&effective, &inheritable, &permitted);
-	/* Having verified that the proposed changes are legal, we now put them
-	 * into effect.
-	 */
-	if (!ret)
-		security_capset_set(&effective, &inheritable, &permitted);
-	spin_unlock(&task_capability_lock);
+	return commit_creds(new);
+
+error:
+	abort_creds(new);
 	return ret;
 }
 
diff --git a/kernel/cred-internals.h b/kernel/cred-internals.h
new file mode 100644
index 000000000000..2dc4fc2d0bf1
--- /dev/null
+++ b/kernel/cred-internals.h
@@ -0,0 +1,21 @@
+/* Internal credentials stuff
+ *
+ * Copyright (C) 2008 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public Licence
+ * as published by the Free Software Foundation; either version
+ * 2 of the Licence, or (at your option) any later version.
+ */
+
+/*
+ * user.c
+ */
+static inline void sched_switch_user(struct task_struct *p)
+{
+#ifdef CONFIG_USER_SCHED
+	sched_move_task(p);
+#endif	/* CONFIG_USER_SCHED */
+}
+
diff --git a/kernel/cred.c b/kernel/cred.c
index ac73e3617684..cb6b5eda978d 100644
--- a/kernel/cred.c
+++ b/kernel/cred.c
@@ -15,6 +15,10 @@
 #include <linux/keyctl.h>
 #include <linux/init_task.h>
 #include <linux/security.h>
+#include <linux/cn_proc.h>
+#include "cred-internals.h"
+
+static struct kmem_cache *cred_jar;
 
 /*
  * The common credentials for the initial task's thread group
@@ -64,7 +68,7 @@ static void release_tgcred_rcu(struct rcu_head *rcu)
 /*
  * Release a set of thread group credentials.
  */
-static void release_tgcred(struct cred *cred)
+void release_tgcred(struct cred *cred)
 {
 #ifdef CONFIG_KEYS
 	struct thread_group_cred *tgcred = cred->tgcred;
@@ -81,79 +85,322 @@ static void put_cred_rcu(struct rcu_head *rcu)
 {
 	struct cred *cred = container_of(rcu, struct cred, rcu);
 
-	BUG_ON(atomic_read(&cred->usage) != 0);
+	if (atomic_read(&cred->usage) != 0)
+		panic("CRED: put_cred_rcu() sees %p with usage %d\n",
+		      cred, atomic_read(&cred->usage));
 
+	security_cred_free(cred);
 	key_put(cred->thread_keyring);
 	key_put(cred->request_key_auth);
 	release_tgcred(cred);
 	put_group_info(cred->group_info);
 	free_uid(cred->user);
-	security_cred_free(cred);
-	kfree(cred);
+	kmem_cache_free(cred_jar, cred);
 }
 
 /**
  * __put_cred - Destroy a set of credentials
- * @sec: The record to release
+ * @cred: The record to release
  *
  * Destroy a set of credentials on which no references remain.
  */
 void __put_cred(struct cred *cred)
 {
+	BUG_ON(atomic_read(&cred->usage) != 0);
+
 	call_rcu(&cred->rcu, put_cred_rcu);
 }
 EXPORT_SYMBOL(__put_cred);
 
+/**
+ * prepare_creds - Prepare a new set of credentials for modification
+ *
+ * Prepare a new set of task credentials for modification.  A task's creds
+ * shouldn't generally be modified directly, therefore this function is used to
+ * prepare a new copy, which the caller then modifies and then commits by
+ * calling commit_creds().
+ *
+ * Returns a pointer to the new creds-to-be if successful, NULL otherwise.
+ *
+ * Call commit_creds() or abort_creds() to clean up.
+ */
+struct cred *prepare_creds(void)
+{
+	struct task_struct *task = current;
+	const struct cred *old;
+	struct cred *new;
+
+	BUG_ON(atomic_read(&task->cred->usage) < 1);
+
+	new = kmem_cache_alloc(cred_jar, GFP_KERNEL);
+	if (!new)
+		return NULL;
+
+	old = task->cred;
+	memcpy(new, old, sizeof(struct cred));
+
+	atomic_set(&new->usage, 1);
+	get_group_info(new->group_info);
+	get_uid(new->user);
+
+#ifdef CONFIG_KEYS
+	key_get(new->thread_keyring);
+	key_get(new->request_key_auth);
+	atomic_inc(&new->tgcred->usage);
+#endif
+
+#ifdef CONFIG_SECURITY
+	new->security = NULL;
+#endif
+
+	if (security_prepare_creds(new, old, GFP_KERNEL) < 0)
+		goto error;
+	return new;
+
+error:
+	abort_creds(new);
+	return NULL;
+}
+EXPORT_SYMBOL(prepare_creds);
+
+/*
+ * prepare new credentials for the usermode helper dispatcher
+ */
+struct cred *prepare_usermodehelper_creds(void)
+{
+#ifdef CONFIG_KEYS
+	struct thread_group_cred *tgcred = NULL;
+#endif
+	struct cred *new;
+
+#ifdef CONFIG_KEYS
+	tgcred = kzalloc(sizeof(*new->tgcred), GFP_ATOMIC);
+	if (!tgcred)
+		return NULL;
+#endif
+
+	new = kmem_cache_alloc(cred_jar, GFP_ATOMIC);
+	if (!new)
+		return NULL;
+
+	memcpy(new, &init_cred, sizeof(struct cred));
+
+	atomic_set(&new->usage, 1);
+	get_group_info(new->group_info);
+	get_uid(new->user);
+
+#ifdef CONFIG_KEYS
+	new->thread_keyring = NULL;
+	new->request_key_auth = NULL;
+	new->jit_keyring = KEY_REQKEY_DEFL_DEFAULT;
+
+	atomic_set(&tgcred->usage, 1);
+	spin_lock_init(&tgcred->lock);
+	new->tgcred = tgcred;
+#endif
+
+#ifdef CONFIG_SECURITY
+	new->security = NULL;
+#endif
+	if (security_prepare_creds(new, &init_cred, GFP_ATOMIC) < 0)
+		goto error;
+
+	BUG_ON(atomic_read(&new->usage) != 1);
+	return new;
+
+error:
+	put_cred(new);
+	return NULL;
+}
+
 /*
  * Copy credentials for the new process created by fork()
+ *
+ * We share if we can, but under some circumstances we have to generate a new
+ * set.
  */
 int copy_creds(struct task_struct *p, unsigned long clone_flags)
 {
-	struct cred *pcred;
-	int ret;
+#ifdef CONFIG_KEYS
+	struct thread_group_cred *tgcred;
+#endif
+	struct cred *new;
+
+	mutex_init(&p->cred_exec_mutex);
 
-	pcred = kmemdup(p->cred, sizeof(*p->cred), GFP_KERNEL);
-	if (!pcred)
+	if (
+#ifdef CONFIG_KEYS
+		!p->cred->thread_keyring &&
+#endif
+		clone_flags & CLONE_THREAD
+	    ) {
+		get_cred(p->cred);
+		atomic_inc(&p->cred->user->processes);
+		return 0;
+	}
+
+	new = prepare_creds();
+	if (!new)
 		return -ENOMEM;
 
 #ifdef CONFIG_KEYS
-	if (clone_flags & CLONE_THREAD) {
-		atomic_inc(&pcred->tgcred->usage);
-	} else {
-		pcred->tgcred = kmalloc(sizeof(struct cred), GFP_KERNEL);
-		if (!pcred->tgcred) {
-			kfree(pcred);
+	/* new threads get their own thread keyrings if their parent already
+	 * had one */
+	if (new->thread_keyring) {
+		key_put(new->thread_keyring);
+		new->thread_keyring = NULL;
+		if (clone_flags & CLONE_THREAD)
+			install_thread_keyring_to_cred(new);
+	}
+
+	/* we share the process and session keyrings between all the threads in
+	 * a process - this is slightly icky as we violate COW credentials a
+	 * bit */
+	if (!(clone_flags & CLONE_THREAD)) {
+		tgcred = kmalloc(sizeof(*tgcred), GFP_KERNEL);
+		if (!tgcred) {
+			put_cred(new);
 			return -ENOMEM;
 		}
-		atomic_set(&pcred->tgcred->usage, 1);
-		spin_lock_init(&pcred->tgcred->lock);
-		pcred->tgcred->process_keyring = NULL;
-		pcred->tgcred->session_keyring =
-			key_get(p->cred->tgcred->session_keyring);
+		atomic_set(&tgcred->usage, 1);
+		spin_lock_init(&tgcred->lock);
+		tgcred->process_keyring = NULL;
+		tgcred->session_keyring = key_get(new->tgcred->session_keyring);
+
+		release_tgcred(new);
+		new->tgcred = tgcred;
 	}
 #endif
 
-#ifdef CONFIG_SECURITY
-	pcred->security = NULL;
-#endif
+	atomic_inc(&new->user->processes);
+	p->cred = new;
+	return 0;
+}
 
-	ret = security_cred_alloc(pcred);
-	if (ret < 0) {
-		release_tgcred(pcred);
-		kfree(pcred);
-		return ret;
+/**
+ * commit_creds - Install new credentials upon the current task
+ * @new: The credentials to be assigned
+ *
+ * Install a new set of credentials to the current task, using RCU to replace
+ * the old set.
+ *
+ * This function eats the caller's reference to the new credentials.
+ *
+ * Always returns 0 thus allowing this function to be tail-called at the end
+ * of, say, sys_setgid().
+ */
+int commit_creds(struct cred *new)
+{
+	struct task_struct *task = current;
+	const struct cred *old;
+
+	BUG_ON(atomic_read(&new->usage) < 1);
+	BUG_ON(atomic_read(&task->cred->usage) < 1);
+
+	old = task->cred;
+	security_commit_creds(new, old);
+
+	/* dumpability changes */
+	if (old->euid != new->euid ||
+	    old->egid != new->egid ||
+	    old->fsuid != new->fsuid ||
+	    old->fsgid != new->fsgid ||
+	    !cap_issubset(new->cap_permitted, old->cap_permitted)) {
+		set_dumpable(task->mm, suid_dumpable);
+		task->pdeath_signal = 0;
+		smp_wmb();
 	}
 
-	atomic_set(&pcred->usage, 1);
-	get_group_info(pcred->group_info);
-	get_uid(pcred->user);
-	key_get(pcred->thread_keyring);
-	key_get(pcred->request_key_auth);
+	/* alter the thread keyring */
+	if (new->fsuid != old->fsuid)
+		key_fsuid_changed(task);
+	if (new->fsgid != old->fsgid)
+		key_fsgid_changed(task);
+
+	/* do it
+	 * - What if a process setreuid()'s and this brings the
+	 *   new uid over his NPROC rlimit?  We can check this now
+	 *   cheaply with the new uid cache, so if it matters
+	 *   we should be checking for it.  -DaveM
+	 */
+	if (new->user != old->user)
+		atomic_inc(&new->user->processes);
+	rcu_assign_pointer(task->cred, new);
+	if (new->user != old->user)
+		atomic_dec(&old->user->processes);
+
+	sched_switch_user(task);
+
+	/* send notifications */
+	if (new->uid   != old->uid  ||
+	    new->euid  != old->euid ||
+	    new->suid  != old->suid ||
+	    new->fsuid != old->fsuid)
+		proc_id_connector(task, PROC_EVENT_UID);
 
-	atomic_inc(&pcred->user->processes);
+	if (new->gid   != old->gid  ||
+	    new->egid  != old->egid ||
+	    new->sgid  != old->sgid ||
+	    new->fsgid != old->fsgid)
+		proc_id_connector(task, PROC_EVENT_GID);
 
-	/* RCU assignment is unneeded here as no-one can have accessed this
-	 * pointer yet, barring us */
-	p->cred = pcred;
+	put_cred(old);
 	return 0;
 }
+EXPORT_SYMBOL(commit_creds);
+
+/**
+ * abort_creds - Discard a set of credentials and unlock the current task
+ * @new: The credentials that were going to be applied
+ *
+ * Discard a set of credentials that were under construction and unlock the
+ * current task.
+ */
+void abort_creds(struct cred *new)
+{
+	BUG_ON(atomic_read(&new->usage) < 1);
+	put_cred(new);
+}
+EXPORT_SYMBOL(abort_creds);
+
+/**
+ * override_creds - Temporarily override the current process's credentials
+ * @new: The credentials to be assigned
+ *
+ * Install a set of temporary override credentials on the current process,
+ * returning the old set for later reversion.
+ */
+const struct cred *override_creds(const struct cred *new)
+{
+	const struct cred *old = current->cred;
+
+	rcu_assign_pointer(current->cred, get_cred(new));
+	return old;
+}
+EXPORT_SYMBOL(override_creds);
+
+/**
+ * revert_creds - Revert a temporary credentials override
+ * @old: The credentials to be restored
+ *
+ * Revert a temporary set of override credentials to an old set, discarding the
+ * override set.
+ */
+void revert_creds(const struct cred *old)
+{
+	const struct cred *override = current->cred;
+
+	rcu_assign_pointer(current->cred, old);
+	put_cred(override);
+}
+EXPORT_SYMBOL(revert_creds);
+
+/*
+ * initialise the credentials stuff
+ */
+void __init cred_init(void)
+{
+	/* allocate a slab in which we can store credentials */
+	cred_jar = kmem_cache_create("cred_jar", sizeof(struct cred),
+				     0, SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL);
+}
diff --git a/kernel/exit.c b/kernel/exit.c
index bbc22530f2c1..c0711da15486 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -47,12 +47,14 @@
 #include <linux/blkdev.h>
 #include <linux/task_io_accounting_ops.h>
 #include <linux/tracehook.h>
+#include <linux/init_task.h>
 #include <trace/sched.h>
 
 #include <asm/uaccess.h>
 #include <asm/unistd.h>
 #include <asm/pgtable.h>
 #include <asm/mmu_context.h>
+#include "cred-internals.h"
 
 static void exit_mm(struct task_struct * tsk);
 
@@ -338,12 +340,12 @@ static void reparent_to_kthreadd(void)
 	/* cpus_allowed? */
 	/* rt_priority? */
 	/* signals? */
-	security_task_reparent_to_init(current);
 	memcpy(current->signal->rlim, init_task.signal->rlim,
 	       sizeof(current->signal->rlim));
-	atomic_inc(&(INIT_USER->__count));
+
+	atomic_inc(&init_cred.usage);
+	commit_creds(&init_cred);
 	write_unlock_irq(&tasklist_lock);
-	switch_uid(INIT_USER);
 }
 
 void __set_special_pids(struct pid *pid)
@@ -1085,7 +1087,6 @@ NORET_TYPE void do_exit(long code)
 	check_stack_usage();
 	exit_thread();
 	cgroup_exit(tsk, 1);
-	exit_keys(tsk);
 
 	if (group_dead && tsk->signal->leader)
 		disassociate_ctty(1);
diff --git a/kernel/fork.c b/kernel/fork.c
index ded1972672a3..82a7948a664e 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -1084,10 +1084,8 @@ static struct task_struct *copy_process(unsigned long clone_flags,
 		goto bad_fork_cleanup_sighand;
 	if ((retval = copy_mm(clone_flags, p)))
 		goto bad_fork_cleanup_signal;
-	if ((retval = copy_keys(clone_flags, p)))
-		goto bad_fork_cleanup_mm;
 	if ((retval = copy_namespaces(clone_flags, p)))
-		goto bad_fork_cleanup_keys;
+		goto bad_fork_cleanup_mm;
 	if ((retval = copy_io(clone_flags, p)))
 		goto bad_fork_cleanup_namespaces;
 	retval = copy_thread(0, clone_flags, stack_start, stack_size, p, regs);
@@ -1252,8 +1250,6 @@ bad_fork_cleanup_io:
 	put_io_context(p->io_context);
 bad_fork_cleanup_namespaces:
 	exit_task_namespaces(p);
-bad_fork_cleanup_keys:
-	exit_keys(p);
 bad_fork_cleanup_mm:
 	if (p->mm)
 		mmput(p->mm);
@@ -1281,6 +1277,7 @@ bad_fork_cleanup_cgroup:
 bad_fork_cleanup_put_domain:
 	module_put(task_thread_info(p)->exec_domain->module);
 bad_fork_cleanup_count:
+	atomic_dec(&p->cred->user->processes);
 	put_cred(p->cred);
 bad_fork_free:
 	free_task(p);
diff --git a/kernel/kmod.c b/kernel/kmod.c
index f044f8f57703..b46dbb908669 100644
--- a/kernel/kmod.c
+++ b/kernel/kmod.c
@@ -118,10 +118,10 @@ EXPORT_SYMBOL(request_module);
 struct subprocess_info {
 	struct work_struct work;
 	struct completion *complete;
+	struct cred *cred;
 	char *path;
 	char **argv;
 	char **envp;
-	struct key *ring;
 	enum umh_wait wait;
 	int retval;
 	struct file *stdin;
@@ -134,19 +134,20 @@ struct subprocess_info {
 static int ____call_usermodehelper(void *data)
 {
 	struct subprocess_info *sub_info = data;
-	struct key *new_session, *old_session;
 	int retval;
 
-	/* Unblock all signals and set the session keyring. */
-	new_session = key_get(sub_info->ring);
+	BUG_ON(atomic_read(&sub_info->cred->usage) != 1);
+
+	/* Unblock all signals */
 	spin_lock_irq(&current->sighand->siglock);
-	old_session = __install_session_keyring(new_session);
 	flush_signal_handlers(current, 1);
 	sigemptyset(&current->blocked);
 	recalc_sigpending();
 	spin_unlock_irq(&current->sighand->siglock);
 
-	key_put(old_session);
+	/* Install the credentials */
+	commit_creds(sub_info->cred);
+	sub_info->cred = NULL;
 
 	/* Install input pipe when needed */
 	if (sub_info->stdin) {
@@ -185,6 +186,8 @@ void call_usermodehelper_freeinfo(struct subprocess_info *info)
 {
 	if (info->cleanup)
 		(*info->cleanup)(info->argv, info->envp);
+	if (info->cred)
+		put_cred(info->cred);
 	kfree(info);
 }
 EXPORT_SYMBOL(call_usermodehelper_freeinfo);
@@ -240,6 +243,8 @@ static void __call_usermodehelper(struct work_struct *work)
 	pid_t pid;
 	enum umh_wait wait = sub_info->wait;
 
+	BUG_ON(atomic_read(&sub_info->cred->usage) != 1);
+
 	/* CLONE_VFORK: wait until the usermode helper has execve'd
 	 * successfully We need the data structures to stay around
 	 * until that is done.  */
@@ -362,6 +367,9 @@ struct subprocess_info *call_usermodehelper_setup(char *path, char **argv,
 	sub_info->path = path;
 	sub_info->argv = argv;
 	sub_info->envp = envp;
+	sub_info->cred = prepare_usermodehelper_creds();
+	if (!sub_info->cred)
+		return NULL;
 
   out:
 	return sub_info;
@@ -376,7 +384,13 @@ EXPORT_SYMBOL(call_usermodehelper_setup);
 void call_usermodehelper_setkeys(struct subprocess_info *info,
 				 struct key *session_keyring)
 {
-	info->ring = session_keyring;
+#ifdef CONFIG_KEYS
+	struct thread_group_cred *tgcred = info->cred->tgcred;
+	key_put(tgcred->session_keyring);
+	tgcred->session_keyring = key_get(session_keyring);
+#else
+	BUG();
+#endif
 }
 EXPORT_SYMBOL(call_usermodehelper_setkeys);
 
@@ -444,6 +458,8 @@ int call_usermodehelper_exec(struct subprocess_info *sub_info,
 	DECLARE_COMPLETION_ONSTACK(done);
 	int retval = 0;
 
+	BUG_ON(atomic_read(&sub_info->cred->usage) != 1);
+
 	helper_lock();
 	if (sub_info->path[0] == '\0')
 		goto out;
diff --git a/kernel/ptrace.c b/kernel/ptrace.c
index b9d5f4e4f6a4..f764b8806955 100644
--- a/kernel/ptrace.c
+++ b/kernel/ptrace.c
@@ -171,6 +171,14 @@ int ptrace_attach(struct task_struct *task)
 	if (same_thread_group(task, current))
 		goto out;
 
+	/* Protect exec's credential calculations against our interference;
+	 * SUID, SGID and LSM creds get determined differently under ptrace.
+	 */
+	retval = mutex_lock_interruptible(&current->cred_exec_mutex);
+	if (retval  < 0)
+		goto out;
+
+	retval = -EPERM;
 repeat:
 	/*
 	 * Nasty, nasty.
@@ -210,6 +218,7 @@ repeat:
 bad:
 	write_unlock_irqrestore(&tasklist_lock, flags);
 	task_unlock(task);
+	mutex_unlock(&current->cred_exec_mutex);
 out:
 	return retval;
 }
diff --git a/kernel/signal.c b/kernel/signal.c
index 84989124bafb..2a64304ed54b 100644
--- a/kernel/signal.c
+++ b/kernel/signal.c
@@ -180,7 +180,7 @@ int next_signal(struct sigpending *pending, sigset_t *mask)
 /*
  * allocate a new signal queue record
  * - this may be called without locks if and only if t == current, otherwise an
- *   appopriate lock must be held to protect t's user_struct
+ *   appopriate lock must be held to stop the target task from exiting
  */
 static struct sigqueue *__sigqueue_alloc(struct task_struct *t, gfp_t flags,
 					 int override_rlimit)
@@ -194,7 +194,7 @@ static struct sigqueue *__sigqueue_alloc(struct task_struct *t, gfp_t flags,
 	 * caller must be holding the RCU readlock (by way of a spinlock) and
 	 * we use RCU protection here
 	 */
-	user = __task_cred(t)->user;
+	user = get_uid(__task_cred(t)->user);
 	atomic_inc(&user->sigpending);
 	if (override_rlimit ||
 	    atomic_read(&user->sigpending) <=
@@ -202,12 +202,14 @@ static struct sigqueue *__sigqueue_alloc(struct task_struct *t, gfp_t flags,
 		q = kmem_cache_alloc(sigqueue_cachep, flags);
 	if (unlikely(q == NULL)) {
 		atomic_dec(&user->sigpending);
+		free_uid(user);
 	} else {
 		INIT_LIST_HEAD(&q->list);
 		q->flags = 0;
-		q->user = get_uid(user);
+		q->user = user;
 	}
-	return(q);
+
+	return q;
 }
 
 static void __sigqueue_free(struct sigqueue *q)
diff --git a/kernel/sys.c b/kernel/sys.c
index ccc9eb736d35..ab735040468a 100644
--- a/kernel/sys.c
+++ b/kernel/sys.c
@@ -180,7 +180,7 @@ asmlinkage long sys_setpriority(int which, int who, int niceval)
 			} while_each_pid_thread(pgrp, PIDTYPE_PGID, p);
 			break;
 		case PRIO_USER:
-			user = cred->user;
+			user = (struct user_struct *) cred->user;
 			if (!who)
 				who = cred->uid;
 			else if ((who != cred->uid) &&
@@ -479,47 +479,48 @@ void ctrl_alt_del(void)
  */
 asmlinkage long sys_setregid(gid_t rgid, gid_t egid)
 {
-	struct cred *cred = current->cred;
-	int old_rgid = cred->gid;
-	int old_egid = cred->egid;
-	int new_rgid = old_rgid;
-	int new_egid = old_egid;
+	const struct cred *old;
+	struct cred *new;
 	int retval;
 
+	new = prepare_creds();
+	if (!new)
+		return -ENOMEM;
+	old = current_cred();
+
 	retval = security_task_setgid(rgid, egid, (gid_t)-1, LSM_SETID_RE);
 	if (retval)
-		return retval;
+		goto error;
 
+	retval = -EPERM;
 	if (rgid != (gid_t) -1) {
-		if ((old_rgid == rgid) ||
-		    (cred->egid == rgid) ||
+		if (old->gid == rgid ||
+		    old->egid == rgid ||
 		    capable(CAP_SETGID))
-			new_rgid = rgid;
+			new->gid = rgid;
 		else
-			return -EPERM;
+			goto error;
 	}
 	if (egid != (gid_t) -1) {
-		if ((old_rgid == egid) ||
-		    (cred->egid == egid) ||
-		    (cred->sgid == egid) ||
+		if (old->gid == egid ||
+		    old->egid == egid ||
+		    old->sgid == egid ||
 		    capable(CAP_SETGID))
-			new_egid = egid;
+			new->egid = egid;
 		else
-			return -EPERM;
-	}
-	if (new_egid != old_egid) {
-		set_dumpable(current->mm, suid_dumpable);
-		smp_wmb();
+			goto error;
 	}
+
 	if (rgid != (gid_t) -1 ||
-	    (egid != (gid_t) -1 && egid != old_rgid))
-		cred->sgid = new_egid;
-	cred->fsgid = new_egid;
-	cred->egid = new_egid;
-	cred->gid = new_rgid;
-	key_fsgid_changed(current);
-	proc_id_connector(current, PROC_EVENT_GID);
-	return 0;
+	    (egid != (gid_t) -1 && egid != old->gid))
+		new->sgid = new->egid;
+	new->fsgid = new->egid;
+
+	return commit_creds(new);
+
+error:
+	abort_creds(new);
+	return retval;
 }
 
 /*
@@ -529,40 +530,42 @@ asmlinkage long sys_setregid(gid_t rgid, gid_t egid)
  */
 asmlinkage long sys_setgid(gid_t gid)
 {
-	struct cred *cred = current->cred;
-	int old_egid = cred->egid;
+	const struct cred *old;
+	struct cred *new;
 	int retval;
 
+	new = prepare_creds();
+	if (!new)
+		return -ENOMEM;
+	old = current_cred();
+
 	retval = security_task_setgid(gid, (gid_t)-1, (gid_t)-1, LSM_SETID_ID);
 	if (retval)
-		return retval;
+		goto error;
 
-	if (capable(CAP_SETGID)) {
-		if (old_egid != gid) {
-			set_dumpable(current->mm, suid_dumpable);
-			smp_wmb();
-		}
-		cred->gid = cred->egid = cred->sgid = cred->fsgid = gid;
-	} else if ((gid == cred->gid) || (gid == cred->sgid)) {
-		if (old_egid != gid) {
-			set_dumpable(current->mm, suid_dumpable);
-			smp_wmb();
-		}
-		cred->egid = cred->fsgid = gid;
-	}
+	retval = -EPERM;
+	if (capable(CAP_SETGID))
+		new->gid = new->egid = new->sgid = new->fsgid = gid;
+	else if (gid == old->gid || gid == old->sgid)
+		new->egid = new->fsgid = gid;
 	else
-		return -EPERM;
+		goto error;
 
-	key_fsgid_changed(current);
-	proc_id_connector(current, PROC_EVENT_GID);
-	return 0;
+	return commit_creds(new);
+
+error:
+	abort_creds(new);
+	return retval;
 }
   
-static int set_user(uid_t new_ruid, int dumpclear)
+/*
+ * change the user struct in a credentials set to match the new UID
+ */
+static int set_user(struct cred *new)
 {
 	struct user_struct *new_user;
 
-	new_user = alloc_uid(current->nsproxy->user_ns, new_ruid);
+	new_user = alloc_uid(current->nsproxy->user_ns, new->uid);
 	if (!new_user)
 		return -EAGAIN;
 
@@ -573,13 +576,8 @@ static int set_user(uid_t new_ruid, int dumpclear)
 		return -EAGAIN;
 	}
 
-	switch_uid(new_user);
-
-	if (dumpclear) {
-		set_dumpable(current->mm, suid_dumpable);
-		smp_wmb();
-	}
-	current->cred->uid = new_ruid;
+	free_uid(new->user);
+	new->user = new_user;
 	return 0;
 }
 
@@ -600,55 +598,56 @@ static int set_user(uid_t new_ruid, int dumpclear)
  */
 asmlinkage long sys_setreuid(uid_t ruid, uid_t euid)
 {
-	struct cred *cred = current->cred;
-	int old_ruid, old_euid, old_suid, new_ruid, new_euid;
+	const struct cred *old;
+	struct cred *new;
 	int retval;
 
+	new = prepare_creds();
+	if (!new)
+		return -ENOMEM;
+	old = current_cred();
+
 	retval = security_task_setuid(ruid, euid, (uid_t)-1, LSM_SETID_RE);
 	if (retval)
-		return retval;
-
-	new_ruid = old_ruid = cred->uid;
-	new_euid = old_euid = cred->euid;
-	old_suid = cred->suid;
+		goto error;
 
+	retval = -EPERM;
 	if (ruid != (uid_t) -1) {
-		new_ruid = ruid;
-		if ((old_ruid != ruid) &&
-		    (cred->euid != ruid) &&
+		new->uid = ruid;
+		if (old->uid != ruid &&
+		    old->euid != ruid &&
 		    !capable(CAP_SETUID))
-			return -EPERM;
+			goto error;
 	}
 
 	if (euid != (uid_t) -1) {
-		new_euid = euid;
-		if ((old_ruid != euid) &&
-		    (cred->euid != euid) &&
-		    (cred->suid != euid) &&
+		new->euid = euid;
+		if (old->uid != euid &&
+		    old->euid != euid &&
+		    old->suid != euid &&
 		    !capable(CAP_SETUID))
-			return -EPERM;
+			goto error;
 	}
 
-	if (new_ruid != old_ruid && set_user(new_ruid, new_euid != old_euid) < 0)
-		return -EAGAIN;
+	retval = -EAGAIN;
+	if (new->uid != old->uid && set_user(new) < 0)
+		goto error;
 
-	if (new_euid != old_euid) {
-		set_dumpable(current->mm, suid_dumpable);
-		smp_wmb();
-	}
-	cred->fsuid = cred->euid = new_euid;
 	if (ruid != (uid_t) -1 ||
-	    (euid != (uid_t) -1 && euid != old_ruid))
-		cred->suid = cred->euid;
-	cred->fsuid = cred->euid;
-
-	key_fsuid_changed(current);
-	proc_id_connector(current, PROC_EVENT_UID);
+	    (euid != (uid_t) -1 && euid != old->uid))
+		new->suid = new->euid;
+	new->fsuid = new->euid;
 
-	return security_task_post_setuid(old_ruid, old_euid, old_suid, LSM_SETID_RE);
-}
+	retval = security_task_fix_setuid(new, old, LSM_SETID_RE);
+	if (retval < 0)
+		goto error;
 
+	return commit_creds(new);
 
+error:
+	abort_creds(new);
+	return retval;
+}
 		
 /*
  * setuid() is implemented like SysV with SAVED_IDS 
@@ -663,37 +662,41 @@ asmlinkage long sys_setreuid(uid_t ruid, uid_t euid)
  */
 asmlinkage long sys_setuid(uid_t uid)
 {
-	struct cred *cred = current->cred;
-	int old_euid = cred->euid;
-	int old_ruid, old_suid, new_suid;
+	const struct cred *old;
+	struct cred *new;
 	int retval;
 
+	new = prepare_creds();
+	if (!new)
+		return -ENOMEM;
+	old = current_cred();
+
 	retval = security_task_setuid(uid, (uid_t)-1, (uid_t)-1, LSM_SETID_ID);
 	if (retval)
-		return retval;
+		goto error;
 
-	old_ruid = cred->uid;
-	old_suid = cred->suid;
-	new_suid = old_suid;
-	
+	retval = -EPERM;
 	if (capable(CAP_SETUID)) {
-		if (uid != old_ruid && set_user(uid, old_euid != uid) < 0)
-			return -EAGAIN;
-		new_suid = uid;
-	} else if ((uid != cred->uid) && (uid != new_suid))
-		return -EPERM;
-
-	if (old_euid != uid) {
-		set_dumpable(current->mm, suid_dumpable);
-		smp_wmb();
+		new->suid = new->uid = uid;
+		if (uid != old->uid && set_user(new) < 0) {
+			retval = -EAGAIN;
+			goto error;
+		}
+	} else if (uid != old->uid && uid != new->suid) {
+		goto error;
 	}
-	cred->fsuid = cred->euid = uid;
-	cred->suid = new_suid;
 
-	key_fsuid_changed(current);
-	proc_id_connector(current, PROC_EVENT_UID);
+	new->fsuid = new->euid = uid;
+
+	retval = security_task_fix_setuid(new, old, LSM_SETID_ID);
+	if (retval < 0)
+		goto error;
+
+	return commit_creds(new);
 
-	return security_task_post_setuid(old_ruid, old_euid, old_suid, LSM_SETID_ID);
+error:
+	abort_creds(new);
+	return retval;
 }
 
 
@@ -703,47 +706,53 @@ asmlinkage long sys_setuid(uid_t uid)
  */
 asmlinkage long sys_setresuid(uid_t ruid, uid_t euid, uid_t suid)
 {
-	struct cred *cred = current->cred;
-	int old_ruid = cred->uid;
-	int old_euid = cred->euid;
-	int old_suid = cred->suid;
+	const struct cred *old;
+	struct cred *new;
 	int retval;
 
+	new = prepare_creds();
+	if (!new)
+		return -ENOMEM;
+
 	retval = security_task_setuid(ruid, euid, suid, LSM_SETID_RES);
 	if (retval)
-		return retval;
+		goto error;
+	old = current_cred();
 
+	retval = -EPERM;
 	if (!capable(CAP_SETUID)) {
-		if ((ruid != (uid_t) -1) && (ruid != cred->uid) &&
-		    (ruid != cred->euid) && (ruid != cred->suid))
-			return -EPERM;
-		if ((euid != (uid_t) -1) && (euid != cred->uid) &&
-		    (euid != cred->euid) && (euid != cred->suid))
-			return -EPERM;
-		if ((suid != (uid_t) -1) && (suid != cred->uid) &&
-		    (suid != cred->euid) && (suid != cred->suid))
-			return -EPERM;
+		if (ruid != (uid_t) -1 && ruid != old->uid &&
+		    ruid != old->euid  && ruid != old->suid)
+			goto error;
+		if (euid != (uid_t) -1 && euid != old->uid &&
+		    euid != old->euid  && euid != old->suid)
+			goto error;
+		if (suid != (uid_t) -1 && suid != old->uid &&
+		    suid != old->euid  && suid != old->suid)
+			goto error;
 	}
+
+	retval = -EAGAIN;
 	if (ruid != (uid_t) -1) {
-		if (ruid != cred->uid &&
-		    set_user(ruid, euid != cred->euid) < 0)
-			return -EAGAIN;
+		new->uid = ruid;
+		if (ruid != old->uid && set_user(new) < 0)
+			goto error;
 	}
-	if (euid != (uid_t) -1) {
-		if (euid != cred->euid) {
-			set_dumpable(current->mm, suid_dumpable);
-			smp_wmb();
-		}
-		cred->euid = euid;
-	}
-	cred->fsuid = cred->euid;
+	if (euid != (uid_t) -1)
+		new->euid = euid;
 	if (suid != (uid_t) -1)
-		cred->suid = suid;
+		new->suid = suid;
+	new->fsuid = new->euid;
 
-	key_fsuid_changed(current);
-	proc_id_connector(current, PROC_EVENT_UID);
+	retval = security_task_fix_setuid(new, old, LSM_SETID_RES);
+	if (retval < 0)
+		goto error;
 
-	return security_task_post_setuid(old_ruid, old_euid, old_suid, LSM_SETID_RES);
+	return commit_creds(new);
+
+error:
+	abort_creds(new);
+	return retval;
 }
 
 asmlinkage long sys_getresuid(uid_t __user *ruid, uid_t __user *euid, uid_t __user *suid)
@@ -763,40 +772,45 @@ asmlinkage long sys_getresuid(uid_t __user *ruid, uid_t __user *euid, uid_t __us
  */
 asmlinkage long sys_setresgid(gid_t rgid, gid_t egid, gid_t sgid)
 {
-	struct cred *cred = current->cred;
+	const struct cred *old;
+	struct cred *new;
 	int retval;
 
+	new = prepare_creds();
+	if (!new)
+		return -ENOMEM;
+	old = current_cred();
+
 	retval = security_task_setgid(rgid, egid, sgid, LSM_SETID_RES);
 	if (retval)
-		return retval;
+		goto error;
 
+	retval = -EPERM;
 	if (!capable(CAP_SETGID)) {
-		if ((rgid != (gid_t) -1) && (rgid != cred->gid) &&
-		    (rgid != cred->egid) && (rgid != cred->sgid))
-			return -EPERM;
-		if ((egid != (gid_t) -1) && (egid != cred->gid) &&
-		    (egid != cred->egid) && (egid != cred->sgid))
-			return -EPERM;
-		if ((sgid != (gid_t) -1) && (sgid != cred->gid) &&
-		    (sgid != cred->egid) && (sgid != cred->sgid))
-			return -EPERM;
+		if (rgid != (gid_t) -1 && rgid != old->gid &&
+		    rgid != old->egid  && rgid != old->sgid)
+			goto error;
+		if (egid != (gid_t) -1 && egid != old->gid &&
+		    egid != old->egid  && egid != old->sgid)
+			goto error;
+		if (sgid != (gid_t) -1 && sgid != old->gid &&
+		    sgid != old->egid  && sgid != old->sgid)
+			goto error;
 	}
-	if (egid != (gid_t) -1) {
-		if (egid != cred->egid) {
-			set_dumpable(current->mm, suid_dumpable);
-			smp_wmb();
-		}
-		cred->egid = egid;
-	}
-	cred->fsgid = cred->egid;
+
 	if (rgid != (gid_t) -1)
-		cred->gid = rgid;
+		new->gid = rgid;
+	if (egid != (gid_t) -1)
+		new->egid = egid;
 	if (sgid != (gid_t) -1)
-		cred->sgid = sgid;
+		new->sgid = sgid;
+	new->fsgid = new->egid;
 
-	key_fsgid_changed(current);
-	proc_id_connector(current, PROC_EVENT_GID);
-	return 0;
+	return commit_creds(new);
+
+error:
+	abort_creds(new);
+	return retval;
 }
 
 asmlinkage long sys_getresgid(gid_t __user *rgid, gid_t __user *egid, gid_t __user *sgid)
@@ -820,28 +834,35 @@ asmlinkage long sys_getresgid(gid_t __user *rgid, gid_t __user *egid, gid_t __us
  */
 asmlinkage long sys_setfsuid(uid_t uid)
 {
-	struct cred *cred = current->cred;
-	int old_fsuid;
+	const struct cred *old;
+	struct cred *new;
+	uid_t old_fsuid;
+
+	new = prepare_creds();
+	if (!new)
+		return current_fsuid();
+	old = current_cred();
+	old_fsuid = old->fsuid;
 
-	old_fsuid = cred->fsuid;
-	if (security_task_setuid(uid, (uid_t)-1, (uid_t)-1, LSM_SETID_FS))
-		return old_fsuid;
+	if (security_task_setuid(uid, (uid_t)-1, (uid_t)-1, LSM_SETID_FS) < 0)
+		goto error;
 
-	if (uid == cred->uid || uid == cred->euid ||
-	    uid == cred->suid || uid == cred->fsuid ||
+	if (uid == old->uid  || uid == old->euid  ||
+	    uid == old->suid || uid == old->fsuid ||
 	    capable(CAP_SETUID)) {
 		if (uid != old_fsuid) {
-			set_dumpable(current->mm, suid_dumpable);
-			smp_wmb();
+			new->fsuid = uid;
+			if (security_task_fix_setuid(new, old, LSM_SETID_FS) == 0)
+				goto change_okay;
 		}
-		cred->fsuid = uid;
 	}
 
-	key_fsuid_changed(current);
-	proc_id_connector(current, PROC_EVENT_UID);
-
-	security_task_post_setuid(old_fsuid, (uid_t)-1, (uid_t)-1, LSM_SETID_FS);
+error:
+	abort_creds(new);
+	return old_fsuid;
 
+change_okay:
+	commit_creds(new);
 	return old_fsuid;
 }
 
@@ -850,24 +871,34 @@ asmlinkage long sys_setfsuid(uid_t uid)
  */
 asmlinkage long sys_setfsgid(gid_t gid)
 {
-	struct cred *cred = current->cred;
-	int old_fsgid;
+	const struct cred *old;
+	struct cred *new;
+	gid_t old_fsgid;
+
+	new = prepare_creds();
+	if (!new)
+		return current_fsgid();
+	old = current_cred();
+	old_fsgid = old->fsgid;
 
-	old_fsgid = cred->fsgid;
 	if (security_task_setgid(gid, (gid_t)-1, (gid_t)-1, LSM_SETID_FS))
-		return old_fsgid;
+		goto error;
 
-	if (gid == cred->gid || gid == cred->egid ||
-	    gid == cred->sgid || gid == cred->fsgid ||
+	if (gid == old->gid  || gid == old->egid  ||
+	    gid == old->sgid || gid == old->fsgid ||
 	    capable(CAP_SETGID)) {
 		if (gid != old_fsgid) {
-			set_dumpable(current->mm, suid_dumpable);
-			smp_wmb();
+			new->fsgid = gid;
+			goto change_okay;
 		}
-		cred->fsgid = gid;
-		key_fsgid_changed(current);
-		proc_id_connector(current, PROC_EVENT_GID);
 	}
+
+error:
+	abort_creds(new);
+	return old_fsgid;
+
+change_okay:
+	commit_creds(new);
 	return old_fsgid;
 }
 
@@ -1136,7 +1167,7 @@ EXPORT_SYMBOL(groups_free);
 
 /* export the group_info to a user-space array */
 static int groups_to_user(gid_t __user *grouplist,
-    struct group_info *group_info)
+			  const struct group_info *group_info)
 {
 	int i;
 	unsigned int count = group_info->ngroups;
@@ -1227,31 +1258,25 @@ int groups_search(const struct group_info *group_info, gid_t grp)
 }
 
 /**
- * set_groups - Change a group subscription in a security record
- * @sec: The security record to alter
- * @group_info: The group list to impose
+ * set_groups - Change a group subscription in a set of credentials
+ * @new: The newly prepared set of credentials to alter
+ * @group_info: The group list to install
  *
- * Validate a group subscription and, if valid, impose it upon a task security
- * record.
+ * Validate a group subscription and, if valid, insert it into a set
+ * of credentials.
  */
-int set_groups(struct cred *cred, struct group_info *group_info)
+int set_groups(struct cred *new, struct group_info *group_info)
 {
 	int retval;
-	struct group_info *old_info;
 
 	retval = security_task_setgroups(group_info);
 	if (retval)
 		return retval;
 
+	put_group_info(new->group_info);
 	groups_sort(group_info);
 	get_group_info(group_info);
-
-	spin_lock(&cred->lock);
-	old_info = cred->group_info;
-	cred->group_info = group_info;
-	spin_unlock(&cred->lock);
-
-	put_group_info(old_info);
+	new->group_info = group_info;
 	return 0;
 }
 
@@ -1266,7 +1291,20 @@ EXPORT_SYMBOL(set_groups);
  */
 int set_current_groups(struct group_info *group_info)
 {
-	return set_groups(current->cred, group_info);
+	struct cred *new;
+	int ret;
+
+	new = prepare_creds();
+	if (!new)
+		return -ENOMEM;
+
+	ret = set_groups(new, group_info);
+	if (ret < 0) {
+		abort_creds(new);
+		return ret;
+	}
+
+	return commit_creds(new);
 }
 
 EXPORT_SYMBOL(set_current_groups);
@@ -1666,9 +1704,11 @@ asmlinkage long sys_prctl(int option, unsigned long arg2, unsigned long arg3,
 	unsigned char comm[sizeof(me->comm)];
 	long error;
 
-	if (security_task_prctl(option, arg2, arg3, arg4, arg5, &error))
+	error = security_task_prctl(option, arg2, arg3, arg4, arg5);
+	if (error != -ENOSYS)
 		return error;
 
+	error = 0;
 	switch (option) {
 		case PR_SET_PDEATHSIG:
 			if (!valid_signal(arg2)) {
diff --git a/kernel/user.c b/kernel/user.c
index 104d22ac84d5..d476307dd4b0 100644
--- a/kernel/user.c
+++ b/kernel/user.c
@@ -16,6 +16,7 @@
 #include <linux/interrupt.h>
 #include <linux/module.h>
 #include <linux/user_namespace.h>
+#include "cred-internals.h"
 
 struct user_namespace init_user_ns = {
 	.kref = {
@@ -104,16 +105,10 @@ static int sched_create_user(struct user_struct *up)
 	return rc;
 }
 
-static void sched_switch_user(struct task_struct *p)
-{
-	sched_move_task(p);
-}
-
 #else	/* CONFIG_USER_SCHED */
 
 static void sched_destroy_user(struct user_struct *up) { }
 static int sched_create_user(struct user_struct *up) { return 0; }
-static void sched_switch_user(struct task_struct *p) { }
 
 #endif	/* CONFIG_USER_SCHED */
 
@@ -448,36 +443,6 @@ out_unlock:
 	return NULL;
 }
 
-void switch_uid(struct user_struct *new_user)
-{
-	struct user_struct *old_user;
-
-	/* What if a process setreuid()'s and this brings the
-	 * new uid over his NPROC rlimit?  We can check this now
-	 * cheaply with the new uid cache, so if it matters
-	 * we should be checking for it.  -DaveM
-	 */
-	old_user = current->cred->user;
-	atomic_inc(&new_user->processes);
-	atomic_dec(&old_user->processes);
-	switch_uid_keyring(new_user);
-	current->cred->user = new_user;
-	sched_switch_user(current);
-
-	/*
-	 * We need to synchronize with __sigqueue_alloc()
-	 * doing a get_uid(p->user).. If that saw the old
-	 * user value, we need to wait until it has exited
-	 * its critical region before we can free the old
-	 * structure.
-	 */
-	smp_mb();
-	spin_unlock_wait(&current->sighand->siglock);
-
-	free_uid(old_user);
-	suid_keys(current);
-}
-
 #ifdef CONFIG_USER_NS
 void release_uids(struct user_namespace *ns)
 {
diff --git a/kernel/user_namespace.c b/kernel/user_namespace.c
index f82730adea00..0d9c51d67333 100644
--- a/kernel/user_namespace.c
+++ b/kernel/user_namespace.c
@@ -19,6 +19,7 @@ static struct user_namespace *clone_user_ns(struct user_namespace *old_ns)
 {
 	struct user_namespace *ns;
 	struct user_struct *new_user;
+	struct cred *new;
 	int n;
 
 	ns = kmalloc(sizeof(struct user_namespace), GFP_KERNEL);
@@ -45,7 +46,16 @@ static struct user_namespace *clone_user_ns(struct user_namespace *old_ns)
 		return ERR_PTR(-ENOMEM);
 	}
 
-	switch_uid(new_user);
+	/* Install the new user */
+	new = prepare_creds();
+	if (!new) {
+		free_uid(new_user);
+		free_uid(ns->root_user);
+		kfree(ns);
+	}
+	free_uid(new->user);
+	new->user = new_user;
+	commit_creds(new);
 	return ns;
 }
 
diff --git a/lib/Makefile b/lib/Makefile
index 7cb65d85aeb0..80fe8a3ec12a 100644
--- a/lib/Makefile
+++ b/lib/Makefile
@@ -11,7 +11,7 @@ lib-y := ctype.o string.o vsprintf.o cmdline.o \
 	 rbtree.o radix-tree.o dump_stack.o \
 	 idr.o int_sqrt.o extable.o prio_tree.o \
 	 sha1.o irq_regs.o reciprocal_div.o argv_split.o \
-	 proportions.o prio_heap.o ratelimit.o show_mem.o
+	 proportions.o prio_heap.o ratelimit.o show_mem.o is_single_threaded.o
 
 lib-$(CONFIG_MMU) += ioremap.o
 lib-$(CONFIG_SMP) += cpumask.o
diff --git a/net/rxrpc/ar-key.c b/net/rxrpc/ar-key.c
index 9a8ff684da79..ad8c7a782da1 100644
--- a/net/rxrpc/ar-key.c
+++ b/net/rxrpc/ar-key.c
@@ -287,6 +287,7 @@ int rxrpc_get_server_data_key(struct rxrpc_connection *conn,
 			      time_t expiry,
 			      u32 kvno)
 {
+	const struct cred *cred = current_cred();
 	struct key *key;
 	int ret;
 
@@ -297,7 +298,7 @@ int rxrpc_get_server_data_key(struct rxrpc_connection *conn,
 
 	_enter("");
 
-	key = key_alloc(&key_type_rxrpc, "x", 0, 0, current, 0,
+	key = key_alloc(&key_type_rxrpc, "x", 0, 0, cred, 0,
 			KEY_ALLOC_NOT_IN_QUOTA);
 	if (IS_ERR(key)) {
 		_leave(" = -ENOMEM [alloc %ld]", PTR_ERR(key));
@@ -340,10 +341,11 @@ EXPORT_SYMBOL(rxrpc_get_server_data_key);
  */
 struct key *rxrpc_get_null_key(const char *keyname)
 {
+	const struct cred *cred = current_cred();
 	struct key *key;
 	int ret;
 
-	key = key_alloc(&key_type_rxrpc, keyname, 0, 0, current,
+	key = key_alloc(&key_type_rxrpc, keyname, 0, 0, cred,
 			KEY_POS_SEARCH, KEY_ALLOC_NOT_IN_QUOTA);
 	if (IS_ERR(key))
 		return key;
diff --git a/security/capability.c b/security/capability.c
index fac2f61b69a9..efeb6d9e0e6a 100644
--- a/security/capability.c
+++ b/security/capability.c
@@ -340,12 +340,16 @@ static int cap_task_create(unsigned long clone_flags)
 	return 0;
 }
 
-static int cap_cred_alloc_security(struct cred *cred)
+static void cap_cred_free(struct cred *cred)
+{
+}
+
+static int cap_cred_prepare(struct cred *new, const struct cred *old, gfp_t gfp)
 {
 	return 0;
 }
 
-static void cap_cred_free(struct cred *cred)
+static void cap_cred_commit(struct cred *new, const struct cred *old)
 {
 }
 
@@ -750,7 +754,7 @@ static void cap_release_secctx(char *secdata, u32 seclen)
 }
 
 #ifdef CONFIG_KEYS
-static int cap_key_alloc(struct key *key, struct task_struct *ctx,
+static int cap_key_alloc(struct key *key, const struct cred *cred,
 			 unsigned long flags)
 {
 	return 0;
@@ -760,7 +764,7 @@ static void cap_key_free(struct key *key)
 {
 }
 
-static int cap_key_permission(key_ref_t key_ref, struct task_struct *context,
+static int cap_key_permission(key_ref_t key_ref, const struct cred *cred,
 			      key_perm_t perm)
 {
 	return 0;
@@ -814,8 +818,7 @@ void security_fixup_ops(struct security_operations *ops)
 	set_to_cap_if_null(ops, ptrace_may_access);
 	set_to_cap_if_null(ops, ptrace_traceme);
 	set_to_cap_if_null(ops, capget);
-	set_to_cap_if_null(ops, capset_check);
-	set_to_cap_if_null(ops, capset_set);
+	set_to_cap_if_null(ops, capset);
 	set_to_cap_if_null(ops, acct);
 	set_to_cap_if_null(ops, capable);
 	set_to_cap_if_null(ops, quotactl);
@@ -890,10 +893,11 @@ void security_fixup_ops(struct security_operations *ops)
 	set_to_cap_if_null(ops, file_receive);
 	set_to_cap_if_null(ops, dentry_open);
 	set_to_cap_if_null(ops, task_create);
-	set_to_cap_if_null(ops, cred_alloc_security);
 	set_to_cap_if_null(ops, cred_free);
+	set_to_cap_if_null(ops, cred_prepare);
+	set_to_cap_if_null(ops, cred_commit);
 	set_to_cap_if_null(ops, task_setuid);
-	set_to_cap_if_null(ops, task_post_setuid);
+	set_to_cap_if_null(ops, task_fix_setuid);
 	set_to_cap_if_null(ops, task_setgid);
 	set_to_cap_if_null(ops, task_setpgid);
 	set_to_cap_if_null(ops, task_getpgid);
@@ -910,7 +914,6 @@ void security_fixup_ops(struct security_operations *ops)
 	set_to_cap_if_null(ops, task_wait);
 	set_to_cap_if_null(ops, task_kill);
 	set_to_cap_if_null(ops, task_prctl);
-	set_to_cap_if_null(ops, task_reparent_to_init);
 	set_to_cap_if_null(ops, task_to_inode);
 	set_to_cap_if_null(ops, ipc_permission);
 	set_to_cap_if_null(ops, ipc_getsecid);
diff --git a/security/commoncap.c b/security/commoncap.c
index 0384bf95db68..b5419273f92d 100644
--- a/security/commoncap.c
+++ b/security/commoncap.c
@@ -72,8 +72,8 @@ int cap_ptrace_may_access(struct task_struct *child, unsigned int mode)
 	int ret = 0;
 
 	rcu_read_lock();
-	if (!cap_issubset(child->cred->cap_permitted,
-			  current->cred->cap_permitted) &&
+	if (!cap_issubset(__task_cred(child)->cap_permitted,
+			  current_cred()->cap_permitted) &&
 	    !capable(CAP_SYS_PTRACE))
 		ret = -EPERM;
 	rcu_read_unlock();
@@ -85,8 +85,8 @@ int cap_ptrace_traceme(struct task_struct *parent)
 	int ret = 0;
 
 	rcu_read_lock();
-	if (!cap_issubset(current->cred->cap_permitted,
-			 parent->cred->cap_permitted) &&
+	if (!cap_issubset(current_cred()->cap_permitted,
+			  __task_cred(parent)->cap_permitted) &&
 	    !has_capability(parent, CAP_SYS_PTRACE))
 		ret = -EPERM;
 	rcu_read_unlock();
@@ -117,7 +117,7 @@ static inline int cap_inh_is_capped(void)
 	 * to the old permitted set. That is, if the current task
 	 * does *not* possess the CAP_SETPCAP capability.
 	 */
-	return (cap_capable(current, CAP_SETPCAP, SECURITY_CAP_AUDIT) != 0);
+	return cap_capable(current, CAP_SETPCAP, SECURITY_CAP_AUDIT) != 0;
 }
 
 static inline int cap_limit_ptraced_target(void) { return 1; }
@@ -132,52 +132,39 @@ static inline int cap_limit_ptraced_target(void)
 
 #endif /* def CONFIG_SECURITY_FILE_CAPABILITIES */
 
-int cap_capset_check(const kernel_cap_t *effective,
-		     const kernel_cap_t *inheritable,
-		     const kernel_cap_t *permitted)
+int cap_capset(struct cred *new,
+	       const struct cred *old,
+	       const kernel_cap_t *effective,
+	       const kernel_cap_t *inheritable,
+	       const kernel_cap_t *permitted)
 {
-	const struct cred *cred = current->cred;
-
-	if (cap_inh_is_capped()
-	    && !cap_issubset(*inheritable,
-			     cap_combine(cred->cap_inheritable,
-					 cred->cap_permitted))) {
+	if (cap_inh_is_capped() &&
+	    !cap_issubset(*inheritable,
+			  cap_combine(old->cap_inheritable,
+				      old->cap_permitted)))
 		/* incapable of using this inheritable set */
 		return -EPERM;
-	}
+
 	if (!cap_issubset(*inheritable,
-			   cap_combine(cred->cap_inheritable,
-				       cred->cap_bset))) {
+			  cap_combine(old->cap_inheritable,
+				      old->cap_bset)))
 		/* no new pI capabilities outside bounding set */
 		return -EPERM;
-	}
 
 	/* verify restrictions on target's new Permitted set */
-	if (!cap_issubset (*permitted,
-			   cap_combine (cred->cap_permitted,
-					cred->cap_permitted))) {
+	if (!cap_issubset(*permitted, old->cap_permitted))
 		return -EPERM;
-	}
 
 	/* verify the _new_Effective_ is a subset of the _new_Permitted_ */
-	if (!cap_issubset (*effective, *permitted)) {
+	if (!cap_issubset(*effective, *permitted))
 		return -EPERM;
-	}
 
+	new->cap_effective   = *effective;
+	new->cap_inheritable = *inheritable;
+	new->cap_permitted   = *permitted;
 	return 0;
 }
 
-void cap_capset_set(const kernel_cap_t *effective,
-		    const kernel_cap_t *inheritable,
-		    const kernel_cap_t *permitted)
-{
-	struct cred *cred = current->cred;
-
-	cred->cap_effective   = *effective;
-	cred->cap_inheritable = *inheritable;
-	cred->cap_permitted   = *permitted;
-}
-
 static inline void bprm_clear_caps(struct linux_binprm *bprm)
 {
 	cap_clear(bprm->cap_post_exec_permitted);
@@ -382,41 +369,46 @@ int cap_bprm_set_security (struct linux_binprm *bprm)
 	return ret;
 }
 
-void cap_bprm_apply_creds (struct linux_binprm *bprm, int unsafe)
+int cap_bprm_apply_creds (struct linux_binprm *bprm, int unsafe)
 {
-	struct cred *cred = current->cred;
+	const struct cred *old = current_cred();
+	struct cred *new;
+
+	new = prepare_creds();
+	if (!new)
+		return -ENOMEM;
 
-	if (bprm->e_uid != cred->uid || bprm->e_gid != cred->gid ||
+	if (bprm->e_uid != old->uid || bprm->e_gid != old->gid ||
 	    !cap_issubset(bprm->cap_post_exec_permitted,
-			  cred->cap_permitted)) {
+			  old->cap_permitted)) {
 		set_dumpable(current->mm, suid_dumpable);
 		current->pdeath_signal = 0;
 
 		if (unsafe & ~LSM_UNSAFE_PTRACE_CAP) {
 			if (!capable(CAP_SETUID)) {
-				bprm->e_uid = cred->uid;
-				bprm->e_gid = cred->gid;
+				bprm->e_uid = old->uid;
+				bprm->e_gid = old->gid;
 			}
 			if (cap_limit_ptraced_target()) {
 				bprm->cap_post_exec_permitted = cap_intersect(
 					bprm->cap_post_exec_permitted,
-					cred->cap_permitted);
+					new->cap_permitted);
 			}
 		}
 	}
 
-	cred->suid = cred->euid = cred->fsuid = bprm->e_uid;
-	cred->sgid = cred->egid = cred->fsgid = bprm->e_gid;
+	new->suid = new->euid = new->fsuid = bprm->e_uid;
+	new->sgid = new->egid = new->fsgid = bprm->e_gid;
 
 	/* For init, we want to retain the capabilities set
 	 * in the init_task struct. Thus we skip the usual
 	 * capability rules */
 	if (!is_global_init(current)) {
-		cred->cap_permitted = bprm->cap_post_exec_permitted;
+		new->cap_permitted = bprm->cap_post_exec_permitted;
 		if (bprm->cap_effective)
-			cred->cap_effective = bprm->cap_post_exec_permitted;
+			new->cap_effective = bprm->cap_post_exec_permitted;
 		else
-			cap_clear(cred->cap_effective);
+			cap_clear(new->cap_effective);
 	}
 
 	/*
@@ -431,15 +423,15 @@ void cap_bprm_apply_creds (struct linux_binprm *bprm, int unsafe)
 	 * Number 1 above might fail if you don't have a full bset, but I think
 	 * that is interesting information to audit.
 	 */
-	if (!cap_isclear(cred->cap_effective)) {
-		if (!cap_issubset(CAP_FULL_SET, cred->cap_effective) ||
-		    (bprm->e_uid != 0) || (cred->uid != 0) ||
+	if (!cap_isclear(new->cap_effective)) {
+		if (!cap_issubset(CAP_FULL_SET, new->cap_effective) ||
+		    bprm->e_uid != 0 || new->uid != 0 ||
 		    issecure(SECURE_NOROOT))
-			audit_log_bprm_fcaps(bprm, &cred->cap_permitted,
-					     &cred->cap_effective);
+			audit_log_bprm_fcaps(bprm, new, old);
 	}
 
-	cred->securebits &= ~issecure_mask(SECURE_KEEP_CAPS);
+	new->securebits &= ~issecure_mask(SECURE_KEEP_CAPS);
+	return commit_creds(new);
 }
 
 int cap_bprm_secureexec (struct linux_binprm *bprm)
@@ -514,65 +506,49 @@ int cap_inode_removexattr(struct dentry *dentry, const char *name)
  * files..
  * Thanks to Olaf Kirch and Peter Benie for spotting this.
  */
-static inline void cap_emulate_setxuid (int old_ruid, int old_euid,
-					int old_suid)
+static inline void cap_emulate_setxuid(struct cred *new, const struct cred *old)
 {
-	struct cred *cred = current->cred;
-
-	if ((old_ruid == 0 || old_euid == 0 || old_suid == 0) &&
-	    (cred->uid != 0 && cred->euid != 0 && cred->suid != 0) &&
+	if ((old->uid == 0 || old->euid == 0 || old->suid == 0) &&
+	    (new->uid != 0 && new->euid != 0 && new->suid != 0) &&
 	    !issecure(SECURE_KEEP_CAPS)) {
-		cap_clear(cred->cap_permitted);
-		cap_clear(cred->cap_effective);
-	}
-	if (old_euid == 0 && cred->euid != 0) {
-		cap_clear(cred->cap_effective);
-	}
-	if (old_euid != 0 && cred->euid == 0) {
-		cred->cap_effective = cred->cap_permitted;
+		cap_clear(new->cap_permitted);
+		cap_clear(new->cap_effective);
 	}
+	if (old->euid == 0 && new->euid != 0)
+		cap_clear(new->cap_effective);
+	if (old->euid != 0 && new->euid == 0)
+		new->cap_effective = new->cap_permitted;
 }
 
-int cap_task_post_setuid (uid_t old_ruid, uid_t old_euid, uid_t old_suid,
-			  int flags)
+int cap_task_fix_setuid(struct cred *new, const struct cred *old, int flags)
 {
-	struct cred *cred = current->cred;
-
 	switch (flags) {
 	case LSM_SETID_RE:
 	case LSM_SETID_ID:
 	case LSM_SETID_RES:
 		/* Copied from kernel/sys.c:setreuid/setuid/setresuid. */
-		if (!issecure (SECURE_NO_SETUID_FIXUP)) {
-			cap_emulate_setxuid (old_ruid, old_euid, old_suid);
-		}
+		if (!issecure(SECURE_NO_SETUID_FIXUP))
+			cap_emulate_setxuid(new, old);
 		break;
 	case LSM_SETID_FS:
-		{
-			uid_t old_fsuid = old_ruid;
-
-			/* Copied from kernel/sys.c:setfsuid. */
+		/* Copied from kernel/sys.c:setfsuid. */
 
-			/*
-			 * FIXME - is fsuser used for all CAP_FS_MASK capabilities?
-			 *          if not, we might be a bit too harsh here.
-			 */
-
-			if (!issecure (SECURE_NO_SETUID_FIXUP)) {
-				if (old_fsuid == 0 && cred->fsuid != 0) {
-					cred->cap_effective =
-						cap_drop_fs_set(
-							cred->cap_effective);
-				}
-				if (old_fsuid != 0 && cred->fsuid == 0) {
-					cred->cap_effective =
-						cap_raise_fs_set(
-						    cred->cap_effective,
-						    cred->cap_permitted);
-				}
+		/*
+		 * FIXME - is fsuser used for all CAP_FS_MASK capabilities?
+		 *          if not, we might be a bit too harsh here.
+		 */
+		if (!issecure(SECURE_NO_SETUID_FIXUP)) {
+			if (old->fsuid == 0 && new->fsuid != 0) {
+				new->cap_effective =
+					cap_drop_fs_set(new->cap_effective);
+			}
+			if (old->fsuid != 0 && new->fsuid == 0) {
+				new->cap_effective =
+					cap_raise_fs_set(new->cap_effective,
+							 new->cap_permitted);
 			}
-			break;
 		}
+		break;
 	default:
 		return -EINVAL;
 	}
@@ -628,13 +604,14 @@ int cap_task_setnice (struct task_struct *p, int nice)
  * this task could get inconsistent info.  There can be no
  * racing writer bc a task can only change its own caps.
  */
-static long cap_prctl_drop(unsigned long cap)
+static long cap_prctl_drop(struct cred *new, unsigned long cap)
 {
 	if (!capable(CAP_SETPCAP))
 		return -EPERM;
 	if (!cap_valid(cap))
 		return -EINVAL;
-	cap_lower(current->cred->cap_bset, cap);
+
+	cap_lower(new->cap_bset, cap);
 	return 0;
 }
 
@@ -655,22 +632,29 @@ int cap_task_setnice (struct task_struct *p, int nice)
 #endif
 
 int cap_task_prctl(int option, unsigned long arg2, unsigned long arg3,
-		   unsigned long arg4, unsigned long arg5, long *rc_p)
+		   unsigned long arg4, unsigned long arg5)
 {
-	struct cred *cred = current_cred();
+	struct cred *new;
 	long error = 0;
 
+	new = prepare_creds();
+	if (!new)
+		return -ENOMEM;
+
 	switch (option) {
 	case PR_CAPBSET_READ:
+		error = -EINVAL;
 		if (!cap_valid(arg2))
-			error = -EINVAL;
-		else
-			error = !!cap_raised(cred->cap_bset, arg2);
-		break;
+			goto error;
+		error = !!cap_raised(new->cap_bset, arg2);
+		goto no_change;
+
 #ifdef CONFIG_SECURITY_FILE_CAPABILITIES
 	case PR_CAPBSET_DROP:
-		error = cap_prctl_drop(arg2);
-		break;
+		error = cap_prctl_drop(new, arg2);
+		if (error < 0)
+			goto error;
+		goto changed;
 
 	/*
 	 * The next four prctl's remain to assist with transitioning a
@@ -692,12 +676,12 @@ int cap_task_prctl(int option, unsigned long arg2, unsigned long arg3,
 	 * capability-based-privilege environment.
 	 */
 	case PR_SET_SECUREBITS:
-		if ((((cred->securebits & SECURE_ALL_LOCKS) >> 1)
-		     & (cred->securebits ^ arg2))                  /*[1]*/
-		    || ((cred->securebits & SECURE_ALL_LOCKS
-			 & ~arg2))                                    /*[2]*/
-		    || (arg2 & ~(SECURE_ALL_LOCKS | SECURE_ALL_BITS)) /*[3]*/
-		    || (cap_capable(current, CAP_SETPCAP, SECURITY_CAP_AUDIT) != 0)) { /*[4]*/
+		error = -EPERM;
+		if ((((new->securebits & SECURE_ALL_LOCKS) >> 1)
+		     & (new->securebits ^ arg2))			/*[1]*/
+		    || ((new->securebits & SECURE_ALL_LOCKS & ~arg2))	/*[2]*/
+		    || (arg2 & ~(SECURE_ALL_LOCKS | SECURE_ALL_BITS))	/*[3]*/
+		    || (cap_capable(current, CAP_SETPCAP, SECURITY_CAP_AUDIT) != 0) /*[4]*/
 			/*
 			 * [1] no changing of bits that are locked
 			 * [2] no unlocking of locks
@@ -705,50 +689,51 @@ int cap_task_prctl(int option, unsigned long arg2, unsigned long arg3,
 			 * [4] doing anything requires privilege (go read about
 			 *     the "sendmail capabilities bug")
 			 */
-			error = -EPERM;  /* cannot change a locked bit */
-		} else {
-			cred->securebits = arg2;
-		}
-		break;
+		    )
+			/* cannot change a locked bit */
+			goto error;
+		new->securebits = arg2;
+		goto changed;
+
 	case PR_GET_SECUREBITS:
-		error = cred->securebits;
-		break;
+		error = new->securebits;
+		goto no_change;
 
 #endif /* def CONFIG_SECURITY_FILE_CAPABILITIES */
 
 	case PR_GET_KEEPCAPS:
 		if (issecure(SECURE_KEEP_CAPS))
 			error = 1;
-		break;
+		goto no_change;
+
 	case PR_SET_KEEPCAPS:
+		error = -EINVAL;
 		if (arg2 > 1) /* Note, we rely on arg2 being unsigned here */
-			error = -EINVAL;
-		else if (issecure(SECURE_KEEP_CAPS_LOCKED))
-			error = -EPERM;
-		else if (arg2)
-			cred->securebits |= issecure_mask(SECURE_KEEP_CAPS);
+			goto error;
+		error = -EPERM;
+		if (issecure(SECURE_KEEP_CAPS_LOCKED))
+			goto error;
+		if (arg2)
+			new->securebits |= issecure_mask(SECURE_KEEP_CAPS);
 		else
-			cred->securebits &= ~issecure_mask(SECURE_KEEP_CAPS);
-		break;
+			new->securebits &= ~issecure_mask(SECURE_KEEP_CAPS);
+		goto changed;
 
 	default:
 		/* No functionality available - continue with default */
-		return 0;
+		error = -ENOSYS;
+		goto error;
 	}
 
 	/* Functionality provided */
-	*rc_p = error;
-	return 1;
-}
-
-void cap_task_reparent_to_init (struct task_struct *p)
-{
-	struct cred *cred = p->cred;
-
-	cap_set_init_eff(cred->cap_effective);
-	cap_clear(cred->cap_inheritable);
-	cap_set_full(cred->cap_permitted);
-	p->cred->securebits = SECUREBITS_DEFAULT;
+changed:
+	return commit_creds(new);
+
+no_change:
+	error = 0;
+error:
+	abort_creds(new);
+	return error;
 }
 
 int cap_syslog (int type)
diff --git a/security/keys/internal.h b/security/keys/internal.h
index d1586c629788..81932abefe7b 100644
--- a/security/keys/internal.h
+++ b/security/keys/internal.h
@@ -12,6 +12,7 @@
 #ifndef _INTERNAL_H
 #define _INTERNAL_H
 
+#include <linux/sched.h>
 #include <linux/key-type.h>
 
 static inline __attribute__((format(printf, 1, 2)))
@@ -25,7 +26,7 @@ void no_printk(const char *fmt, ...)
 #define kleave(FMT, ...) \
 	printk(KERN_DEBUG "<== %s()"FMT"\n", __func__, ##__VA_ARGS__)
 #define kdebug(FMT, ...) \
-	printk(KERN_DEBUG "xxx" FMT"yyy\n", ##__VA_ARGS__)
+	printk(KERN_DEBUG "   "FMT"\n", ##__VA_ARGS__)
 #else
 #define kenter(FMT, ...) \
 	no_printk(KERN_DEBUG "==> %s("FMT")\n", __func__, ##__VA_ARGS__)
@@ -97,7 +98,7 @@ extern struct key *keyring_search_instkey(struct key *keyring,
 typedef int (*key_match_func_t)(const struct key *, const void *);
 
 extern key_ref_t keyring_search_aux(key_ref_t keyring_ref,
-				    struct task_struct *tsk,
+				    const struct cred *cred,
 				    struct key_type *type,
 				    const void *description,
 				    key_match_func_t match);
@@ -105,13 +106,13 @@ extern key_ref_t keyring_search_aux(key_ref_t keyring_ref,
 extern key_ref_t search_process_keyrings(struct key_type *type,
 					 const void *description,
 					 key_match_func_t match,
-					 struct task_struct *tsk);
+					 const struct cred *cred);
 
 extern struct key *find_keyring_by_name(const char *name, bool skip_perm_check);
 
 extern int install_user_keyrings(void);
-extern int install_thread_keyring(void);
-extern int install_process_keyring(void);
+extern int install_thread_keyring_to_cred(struct cred *);
+extern int install_process_keyring_to_cred(struct cred *);
 
 extern struct key *request_key_and_link(struct key_type *type,
 					const char *description,
@@ -130,12 +131,12 @@ extern long join_session_keyring(const char *name);
  * check to see whether permission is granted to use a key in the desired way
  */
 extern int key_task_permission(const key_ref_t key_ref,
-			       struct task_struct *context,
+			       const struct cred *cred,
 			       key_perm_t perm);
 
 static inline int key_permission(const key_ref_t key_ref, key_perm_t perm)
 {
-	return key_task_permission(key_ref, current, perm);
+	return key_task_permission(key_ref, current_cred(), perm);
 }
 
 /* required permissions */
@@ -153,7 +154,7 @@ static inline int key_permission(const key_ref_t key_ref, key_perm_t perm)
 struct request_key_auth {
 	struct key		*target_key;
 	struct key		*dest_keyring;
-	struct task_struct	*context;
+	const struct cred	*cred;
 	void			*callout_info;
 	size_t			callout_len;
 	pid_t			pid;
diff --git a/security/keys/key.c b/security/keys/key.c
index a6ca39ed3b0e..f76c8a546fd3 100644
--- a/security/keys/key.c
+++ b/security/keys/key.c
@@ -218,7 +218,7 @@ serial_exists:
  *   instantiate the key or discard it before returning
  */
 struct key *key_alloc(struct key_type *type, const char *desc,
-		      uid_t uid, gid_t gid, struct task_struct *ctx,
+		      uid_t uid, gid_t gid, const struct cred *cred,
 		      key_perm_t perm, unsigned long flags)
 {
 	struct key_user *user = NULL;
@@ -294,7 +294,7 @@ struct key *key_alloc(struct key_type *type, const char *desc,
 #endif
 
 	/* let the security module know about the key */
-	ret = security_key_alloc(key, ctx, flags);
+	ret = security_key_alloc(key, cred, flags);
 	if (ret < 0)
 		goto security_error;
 
@@ -391,7 +391,7 @@ static int __key_instantiate_and_link(struct key *key,
 				      const void *data,
 				      size_t datalen,
 				      struct key *keyring,
-				      struct key *instkey)
+				      struct key *authkey)
 {
 	int ret, awaken;
 
@@ -421,8 +421,8 @@ static int __key_instantiate_and_link(struct key *key,
 				ret = __key_link(keyring, key);
 
 			/* disable the authorisation key */
-			if (instkey)
-				key_revoke(instkey);
+			if (authkey)
+				key_revoke(authkey);
 		}
 	}
 
@@ -444,14 +444,14 @@ int key_instantiate_and_link(struct key *key,
 			     const void *data,
 			     size_t datalen,
 			     struct key *keyring,
-			     struct key *instkey)
+			     struct key *authkey)
 {
 	int ret;
 
 	if (keyring)
 		down_write(&keyring->sem);
 
-	ret = __key_instantiate_and_link(key, data, datalen, keyring, instkey);
+	ret = __key_instantiate_and_link(key, data, datalen, keyring, authkey);
 
 	if (keyring)
 		up_write(&keyring->sem);
@@ -469,7 +469,7 @@ EXPORT_SYMBOL(key_instantiate_and_link);
 int key_negate_and_link(struct key *key,
 			unsigned timeout,
 			struct key *keyring,
-			struct key *instkey)
+			struct key *authkey)
 {
 	struct timespec now;
 	int ret, awaken;
@@ -504,8 +504,8 @@ int key_negate_and_link(struct key *key,
 			ret = __key_link(keyring, key);
 
 		/* disable the authorisation key */
-		if (instkey)
-			key_revoke(instkey);
+		if (authkey)
+			key_revoke(authkey);
 	}
 
 	mutex_unlock(&key_construction_mutex);
@@ -743,6 +743,7 @@ key_ref_t key_create_or_update(key_ref_t keyring_ref,
 			       key_perm_t perm,
 			       unsigned long flags)
 {
+	const struct cred *cred = current_cred();
 	struct key_type *ktype;
 	struct key *keyring, *key = NULL;
 	key_ref_t key_ref;
@@ -802,8 +803,8 @@ key_ref_t key_create_or_update(key_ref_t keyring_ref,
 	}
 
 	/* allocate a new key */
-	key = key_alloc(ktype, description, current_fsuid(), current_fsgid(),
-			current, perm, flags);
+	key = key_alloc(ktype, description, cred->fsuid, cred->fsgid, cred,
+			perm, flags);
 	if (IS_ERR(key)) {
 		key_ref = ERR_CAST(key);
 		goto error_3;
diff --git a/security/keys/keyctl.c b/security/keys/keyctl.c
index 8833b447adef..7c72baa02f2e 100644
--- a/security/keys/keyctl.c
+++ b/security/keys/keyctl.c
@@ -866,6 +866,23 @@ static long get_instantiation_keyring(key_serial_t ringid,
 	return -ENOKEY;
 }
 
+/*
+ * change the request_key authorisation key on the current process
+ */
+static int keyctl_change_reqkey_auth(struct key *key)
+{
+	struct cred *new;
+
+	new = prepare_creds();
+	if (!new)
+		return -ENOMEM;
+
+	key_put(new->request_key_auth);
+	new->request_key_auth = key_get(key);
+
+	return commit_creds(new);
+}
+
 /*****************************************************************************/
 /*
  * instantiate the key with the specified payload, and, if one is given, link
@@ -876,12 +893,15 @@ long keyctl_instantiate_key(key_serial_t id,
 			    size_t plen,
 			    key_serial_t ringid)
 {
+	const struct cred *cred = current_cred();
 	struct request_key_auth *rka;
 	struct key *instkey, *dest_keyring;
 	void *payload;
 	long ret;
 	bool vm = false;
 
+	kenter("%d,,%zu,%d", id, plen, ringid);
+
 	ret = -EINVAL;
 	if (plen > 1024 * 1024 - 1)
 		goto error;
@@ -889,7 +909,7 @@ long keyctl_instantiate_key(key_serial_t id,
 	/* the appropriate instantiation authorisation key must have been
 	 * assumed before calling this */
 	ret = -EPERM;
-	instkey = current->cred->request_key_auth;
+	instkey = cred->request_key_auth;
 	if (!instkey)
 		goto error;
 
@@ -931,10 +951,8 @@ long keyctl_instantiate_key(key_serial_t id,
 
 	/* discard the assumed authority if it's just been disabled by
 	 * instantiation of the key */
-	if (ret == 0) {
-		key_put(current->cred->request_key_auth);
-		current->cred->request_key_auth = NULL;
-	}
+	if (ret == 0)
+		keyctl_change_reqkey_auth(NULL);
 
 error2:
 	if (!vm)
@@ -953,14 +971,17 @@ error:
  */
 long keyctl_negate_key(key_serial_t id, unsigned timeout, key_serial_t ringid)
 {
+	const struct cred *cred = current_cred();
 	struct request_key_auth *rka;
 	struct key *instkey, *dest_keyring;
 	long ret;
 
+	kenter("%d,%u,%d", id, timeout, ringid);
+
 	/* the appropriate instantiation authorisation key must have been
 	 * assumed before calling this */
 	ret = -EPERM;
-	instkey = current->cred->request_key_auth;
+	instkey = cred->request_key_auth;
 	if (!instkey)
 		goto error;
 
@@ -982,10 +1003,8 @@ long keyctl_negate_key(key_serial_t id, unsigned timeout, key_serial_t ringid)
 
 	/* discard the assumed authority if it's just been disabled by
 	 * instantiation of the key */
-	if (ret == 0) {
-		key_put(current->cred->request_key_auth);
-		current->cred->request_key_auth = NULL;
-	}
+	if (ret == 0)
+		keyctl_change_reqkey_auth(NULL);
 
 error:
 	return ret;
@@ -999,36 +1018,56 @@ error:
  */
 long keyctl_set_reqkey_keyring(int reqkey_defl)
 {
-	struct cred *cred = current->cred;
-	int ret;
+	struct cred *new;
+	int ret, old_setting;
+
+	old_setting = current_cred_xxx(jit_keyring);
+
+	if (reqkey_defl == KEY_REQKEY_DEFL_NO_CHANGE)
+		return old_setting;
+
+	new = prepare_creds();
+	if (!new)
+		return -ENOMEM;
 
 	switch (reqkey_defl) {
 	case KEY_REQKEY_DEFL_THREAD_KEYRING:
-		ret = install_thread_keyring();
+		ret = install_thread_keyring_to_cred(new);
 		if (ret < 0)
-			return ret;
+			goto error;
 		goto set;
 
 	case KEY_REQKEY_DEFL_PROCESS_KEYRING:
-		ret = install_process_keyring();
-		if (ret < 0)
-			return ret;
+		ret = install_process_keyring_to_cred(new);
+		if (ret < 0) {
+			if (ret != -EEXIST)
+				goto error;
+			ret = 0;
+		}
+		goto set;
 
 	case KEY_REQKEY_DEFL_DEFAULT:
 	case KEY_REQKEY_DEFL_SESSION_KEYRING:
 	case KEY_REQKEY_DEFL_USER_KEYRING:
 	case KEY_REQKEY_DEFL_USER_SESSION_KEYRING:
-	set:
-		cred->jit_keyring = reqkey_defl;
+	case KEY_REQKEY_DEFL_REQUESTOR_KEYRING:
+		goto set;
 
 	case KEY_REQKEY_DEFL_NO_CHANGE:
-		return cred->jit_keyring;
-
 	case KEY_REQKEY_DEFL_GROUP_KEYRING:
 	default:
-		return -EINVAL;
+		ret = -EINVAL;
+		goto error;
 	}
 
+set:
+	new->jit_keyring = reqkey_defl;
+	commit_creds(new);
+	return old_setting;
+error:
+	abort_creds(new);
+	return -EINVAL;
+
 } /* end keyctl_set_reqkey_keyring() */
 
 /*****************************************************************************/
@@ -1087,9 +1126,7 @@ long keyctl_assume_authority(key_serial_t id)
 
 	/* we divest ourselves of authority if given an ID of 0 */
 	if (id == 0) {
-		key_put(current->cred->request_key_auth);
-		current->cred->request_key_auth = NULL;
-		ret = 0;
+		ret = keyctl_change_reqkey_auth(NULL);
 		goto error;
 	}
 
@@ -1104,10 +1141,12 @@ long keyctl_assume_authority(key_serial_t id)
 		goto error;
 	}
 
-	key_put(current->cred->request_key_auth);
-	current->cred->request_key_auth = authkey;
-	ret = authkey->serial;
+	ret = keyctl_change_reqkey_auth(authkey);
+	if (ret < 0)
+		goto error;
+	key_put(authkey);
 
+	ret = authkey->serial;
 error:
 	return ret;
 
diff --git a/security/keys/keyring.c b/security/keys/keyring.c
index fdf75f901991..ed851574d073 100644
--- a/security/keys/keyring.c
+++ b/security/keys/keyring.c
@@ -245,14 +245,14 @@ static long keyring_read(const struct key *keyring,
  * allocate a keyring and link into the destination keyring
  */
 struct key *keyring_alloc(const char *description, uid_t uid, gid_t gid,
-			  struct task_struct *ctx, unsigned long flags,
+			  const struct cred *cred, unsigned long flags,
 			  struct key *dest)
 {
 	struct key *keyring;
 	int ret;
 
 	keyring = key_alloc(&key_type_keyring, description,
-			    uid, gid, ctx,
+			    uid, gid, cred,
 			    (KEY_POS_ALL & ~KEY_POS_SETATTR) | KEY_USR_ALL,
 			    flags);
 
@@ -281,7 +281,7 @@ struct key *keyring_alloc(const char *description, uid_t uid, gid_t gid,
  * - we propagate the possession attribute from the keyring ref to the key ref
  */
 key_ref_t keyring_search_aux(key_ref_t keyring_ref,
-			     struct task_struct *context,
+			     const struct cred *cred,
 			     struct key_type *type,
 			     const void *description,
 			     key_match_func_t match)
@@ -304,7 +304,7 @@ key_ref_t keyring_search_aux(key_ref_t keyring_ref,
 	key_check(keyring);
 
 	/* top keyring must have search permission to begin the search */
-        err = key_task_permission(keyring_ref, context, KEY_SEARCH);
+        err = key_task_permission(keyring_ref, cred, KEY_SEARCH);
 	if (err < 0) {
 		key_ref = ERR_PTR(err);
 		goto error;
@@ -377,7 +377,7 @@ descend:
 
 		/* key must have search permissions */
 		if (key_task_permission(make_key_ref(key, possessed),
-					context, KEY_SEARCH) < 0)
+					cred, KEY_SEARCH) < 0)
 			continue;
 
 		/* we set a different error code if we pass a negative key */
@@ -404,7 +404,7 @@ ascend:
 			continue;
 
 		if (key_task_permission(make_key_ref(key, possessed),
-					context, KEY_SEARCH) < 0)
+					cred, KEY_SEARCH) < 0)
 			continue;
 
 		/* stack the current position */
@@ -459,7 +459,7 @@ key_ref_t keyring_search(key_ref_t keyring,
 	if (!type->match)
 		return ERR_PTR(-ENOKEY);
 
-	return keyring_search_aux(keyring, current,
+	return keyring_search_aux(keyring, current->cred,
 				  type, description, type->match);
 
 } /* end keyring_search() */
diff --git a/security/keys/permission.c b/security/keys/permission.c
index 13c36164f284..5d9fc7b93f2e 100644
--- a/security/keys/permission.c
+++ b/security/keys/permission.c
@@ -14,24 +14,27 @@
 #include "internal.h"
 
 /*****************************************************************************/
-/*
- * check to see whether permission is granted to use a key in the desired way,
- * but permit the security modules to override
+/**
+ * key_task_permission - Check a key can be used
+ * @key_ref: The key to check
+ * @cred: The credentials to use
+ * @perm: The permissions to check for
+ *
+ * Check to see whether permission is granted to use a key in the desired way,
+ * but permit the security modules to override.
+ *
+ * The caller must hold either a ref on cred or must hold the RCU readlock or a
+ * spinlock.
  */
-int key_task_permission(const key_ref_t key_ref,
-			struct task_struct *context,
+int key_task_permission(const key_ref_t key_ref, const struct cred *cred,
 			key_perm_t perm)
 {
-	const struct cred *cred;
 	struct key *key;
 	key_perm_t kperm;
 	int ret;
 
 	key = key_ref_to_ptr(key_ref);
 
-	rcu_read_lock();
-	cred = __task_cred(context);
-
 	/* use the second 8-bits of permissions for keys the caller owns */
 	if (key->uid == cred->fsuid) {
 		kperm = key->perm >> 16;
@@ -57,7 +60,6 @@ int key_task_permission(const key_ref_t key_ref,
 	kperm = key->perm;
 
 use_these_perms:
-	rcu_read_lock();
 
 	/* use the top 8-bits of permissions for keys the caller possesses
 	 * - possessor permissions are additive with other permissions
@@ -71,7 +73,7 @@ use_these_perms:
 		return -EACCES;
 
 	/* let LSM be the final arbiter */
-	return security_key_permission(key_ref, context, perm);
+	return security_key_permission(key_ref, cred, perm);
 
 } /* end key_task_permission() */
 
diff --git a/security/keys/proc.c b/security/keys/proc.c
index f619170da760..7f508def50e3 100644
--- a/security/keys/proc.c
+++ b/security/keys/proc.c
@@ -136,8 +136,12 @@ static int proc_keys_show(struct seq_file *m, void *v)
 	int rc;
 
 	/* check whether the current task is allowed to view the key (assuming
-	 * non-possession) */
-	rc = key_task_permission(make_key_ref(key, 0), current, KEY_VIEW);
+	 * non-possession)
+	 * - the caller holds a spinlock, and thus the RCU read lock, making our
+	 *   access to __current_cred() safe
+	 */
+	rc = key_task_permission(make_key_ref(key, 0), current_cred(),
+				 KEY_VIEW);
 	if (rc < 0)
 		return 0;
 
diff --git a/security/keys/process_keys.c b/security/keys/process_keys.c
index 70ee93406f30..df329f684a65 100644
--- a/security/keys/process_keys.c
+++ b/security/keys/process_keys.c
@@ -42,11 +42,15 @@ struct key_user root_key_user = {
  */
 int install_user_keyrings(void)
 {
-	struct user_struct *user = current->cred->user;
+	struct user_struct *user;
+	const struct cred *cred;
 	struct key *uid_keyring, *session_keyring;
 	char buf[20];
 	int ret;
 
+	cred = current_cred();
+	user = cred->user;
+
 	kenter("%p{%u}", user, user->uid);
 
 	if (user->uid_keyring) {
@@ -67,7 +71,7 @@ int install_user_keyrings(void)
 		uid_keyring = find_keyring_by_name(buf, true);
 		if (IS_ERR(uid_keyring)) {
 			uid_keyring = keyring_alloc(buf, user->uid, (gid_t) -1,
-						    current, KEY_ALLOC_IN_QUOTA,
+						    cred, KEY_ALLOC_IN_QUOTA,
 						    NULL);
 			if (IS_ERR(uid_keyring)) {
 				ret = PTR_ERR(uid_keyring);
@@ -83,8 +87,7 @@ int install_user_keyrings(void)
 		if (IS_ERR(session_keyring)) {
 			session_keyring =
 				keyring_alloc(buf, user->uid, (gid_t) -1,
-					      current, KEY_ALLOC_IN_QUOTA,
-					      NULL);
+					      cred, KEY_ALLOC_IN_QUOTA, NULL);
 			if (IS_ERR(session_keyring)) {
 				ret = PTR_ERR(session_keyring);
 				goto error_release;
@@ -116,142 +119,128 @@ error:
 	return ret;
 }
 
-/*****************************************************************************/
 /*
- * deal with the UID changing
+ * install a fresh thread keyring directly to new credentials
  */
-void switch_uid_keyring(struct user_struct *new_user)
+int install_thread_keyring_to_cred(struct cred *new)
 {
-#if 0 /* do nothing for now */
-	struct key *old;
-
-	/* switch to the new user's session keyring if we were running under
-	 * root's default session keyring */
-	if (new_user->uid != 0 &&
-	    current->session_keyring == &root_session_keyring
-	    ) {
-		atomic_inc(&new_user->session_keyring->usage);
-
-		task_lock(current);
-		old = current->session_keyring;
-		current->session_keyring = new_user->session_keyring;
-		task_unlock(current);
+	struct key *keyring;
 
-		key_put(old);
-	}
-#endif
+	keyring = keyring_alloc("_tid", new->uid, new->gid, new,
+				KEY_ALLOC_QUOTA_OVERRUN, NULL);
+	if (IS_ERR(keyring))
+		return PTR_ERR(keyring);
 
-} /* end switch_uid_keyring() */
+	new->thread_keyring = keyring;
+	return 0;
+}
 
-/*****************************************************************************/
 /*
  * install a fresh thread keyring, discarding the old one
  */
-int install_thread_keyring(void)
+static int install_thread_keyring(void)
 {
-	struct task_struct *tsk = current;
-	struct key *keyring, *old;
-	char buf[20];
+	struct cred *new;
 	int ret;
 
-	sprintf(buf, "_tid.%u", tsk->pid);
+	new = prepare_creds();
+	if (!new)
+		return -ENOMEM;
 
-	keyring = keyring_alloc(buf, tsk->cred->uid, tsk->cred->gid, tsk,
-				KEY_ALLOC_QUOTA_OVERRUN, NULL);
-	if (IS_ERR(keyring)) {
-		ret = PTR_ERR(keyring);
-		goto error;
+	BUG_ON(new->thread_keyring);
+
+	ret = install_thread_keyring_to_cred(new);
+	if (ret < 0) {
+		abort_creds(new);
+		return ret;
 	}
 
-	task_lock(tsk);
-	old = tsk->cred->thread_keyring;
-	tsk->cred->thread_keyring = keyring;
-	task_unlock(tsk);
+	return commit_creds(new);
+}
 
-	ret = 0;
+/*
+ * install a process keyring directly to a credentials struct
+ * - returns -EEXIST if there was already a process keyring, 0 if one installed,
+ *   and other -ve on any other error
+ */
+int install_process_keyring_to_cred(struct cred *new)
+{
+	struct key *keyring;
+	int ret;
 
-	key_put(old);
-error:
+	if (new->tgcred->process_keyring)
+		return -EEXIST;
+
+	keyring = keyring_alloc("_pid", new->uid, new->gid,
+				new, KEY_ALLOC_QUOTA_OVERRUN, NULL);
+	if (IS_ERR(keyring))
+		return PTR_ERR(keyring);
+
+	spin_lock_irq(&new->tgcred->lock);
+	if (!new->tgcred->process_keyring) {
+		new->tgcred->process_keyring = keyring;
+		keyring = NULL;
+		ret = 0;
+	} else {
+		ret = -EEXIST;
+	}
+	spin_unlock_irq(&new->tgcred->lock);
+	key_put(keyring);
 	return ret;
+}
 
-} /* end install_thread_keyring() */
-
-/*****************************************************************************/
 /*
  * make sure a process keyring is installed
+ * - we
  */
-int install_process_keyring(void)
+static int install_process_keyring(void)
 {
-	struct task_struct *tsk = current;
-	struct key *keyring;
-	char buf[20];
+	struct cred *new;
 	int ret;
 
-	might_sleep();
-
-	if (!tsk->cred->tgcred->process_keyring) {
-		sprintf(buf, "_pid.%u", tsk->tgid);
-
-		keyring = keyring_alloc(buf, tsk->cred->uid, tsk->cred->gid, tsk,
-					KEY_ALLOC_QUOTA_OVERRUN, NULL);
-		if (IS_ERR(keyring)) {
-			ret = PTR_ERR(keyring);
-			goto error;
-		}
-
-		/* attach keyring */
-		spin_lock_irq(&tsk->cred->tgcred->lock);
-		if (!tsk->cred->tgcred->process_keyring) {
-			tsk->cred->tgcred->process_keyring = keyring;
-			keyring = NULL;
-		}
-		spin_unlock_irq(&tsk->cred->tgcred->lock);
+	new = prepare_creds();
+	if (!new)
+		return -ENOMEM;
 
-		key_put(keyring);
+	ret = install_process_keyring_to_cred(new);
+	if (ret < 0) {
+		abort_creds(new);
+		return ret != -EEXIST ?: 0;
 	}
 
-	ret = 0;
-error:
-	return ret;
-
-} /* end install_process_keyring() */
+	return commit_creds(new);
+}
 
-/*****************************************************************************/
 /*
- * install a session keyring, discarding the old one
- * - if a keyring is not supplied, an empty one is invented
+ * install a session keyring directly to a credentials struct
  */
-static int install_session_keyring(struct key *keyring)
+static int install_session_keyring_to_cred(struct cred *cred,
+					   struct key *keyring)
 {
-	struct task_struct *tsk = current;
 	unsigned long flags;
 	struct key *old;
-	char buf[20];
 
 	might_sleep();
 
 	/* create an empty session keyring */
 	if (!keyring) {
-		sprintf(buf, "_ses.%u", tsk->tgid);
-
 		flags = KEY_ALLOC_QUOTA_OVERRUN;
-		if (tsk->cred->tgcred->session_keyring)
+		if (cred->tgcred->session_keyring)
 			flags = KEY_ALLOC_IN_QUOTA;
 
-		keyring = keyring_alloc(buf, tsk->cred->uid, tsk->cred->gid,
-					tsk, flags, NULL);
+		keyring = keyring_alloc("_ses", cred->uid, cred->gid,
+					cred, flags, NULL);
 		if (IS_ERR(keyring))
 			return PTR_ERR(keyring);
-	}
-	else {
+	} else {
 		atomic_inc(&keyring->usage);
 	}
 
 	/* install the keyring */
-	spin_lock_irq(&tsk->cred->tgcred->lock);
-	old = tsk->cred->tgcred->session_keyring;
-	rcu_assign_pointer(tsk->cred->tgcred->session_keyring, keyring);
-	spin_unlock_irq(&tsk->cred->tgcred->lock);
+	spin_lock_irq(&cred->tgcred->lock);
+	old = cred->tgcred->session_keyring;
+	rcu_assign_pointer(cred->tgcred->session_keyring, keyring);
+	spin_unlock_irq(&cred->tgcred->lock);
 
 	/* we're using RCU on the pointer, but there's no point synchronising
 	 * on it if it didn't previously point to anything */
@@ -261,38 +250,29 @@ static int install_session_keyring(struct key *keyring)
 	}
 
 	return 0;
+}
 
-} /* end install_session_keyring() */
-
-/*****************************************************************************/
 /*
- * copy the keys for fork
+ * install a session keyring, discarding the old one
+ * - if a keyring is not supplied, an empty one is invented
  */
-int copy_keys(unsigned long clone_flags, struct task_struct *tsk)
+static int install_session_keyring(struct key *keyring)
 {
-	key_check(tsk->cred->thread_keyring);
-	key_check(tsk->cred->request_key_auth);
-
-	/* no thread keyring yet */
-	tsk->cred->thread_keyring = NULL;
-
-	/* copy the request_key() authorisation for this thread */
-	key_get(tsk->cred->request_key_auth);
-
-	return 0;
+	struct cred *new;
+	int ret;
 
-} /* end copy_keys() */
+	new = prepare_creds();
+	if (!new)
+		return -ENOMEM;
 
-/*****************************************************************************/
-/*
- * dispose of per-thread keys upon thread exit
- */
-void exit_keys(struct task_struct *tsk)
-{
-	key_put(tsk->cred->thread_keyring);
-	key_put(tsk->cred->request_key_auth);
+	ret = install_session_keyring_to_cred(new, NULL);
+	if (ret < 0) {
+		abort_creds(new);
+		return ret;
+	}
 
-} /* end exit_keys() */
+	return commit_creds(new);
+}
 
 /*****************************************************************************/
 /*
@@ -300,38 +280,41 @@ void exit_keys(struct task_struct *tsk)
  */
 int exec_keys(struct task_struct *tsk)
 {
-	struct key *old;
+	struct thread_group_cred *tgcred = NULL;
+	struct cred *new;
 
-	/* newly exec'd tasks don't get a thread keyring */
-	task_lock(tsk);
-	old = tsk->cred->thread_keyring;
-	tsk->cred->thread_keyring = NULL;
-	task_unlock(tsk);
+#ifdef CONFIG_KEYS
+	tgcred = kmalloc(sizeof(*tgcred), GFP_KERNEL);
+	if (!tgcred)
+		return -ENOMEM;
+#endif
 
-	key_put(old);
+	new = prepare_creds();
+	if (new < 0)
+		return -ENOMEM;
 
-	/* discard the process keyring from a newly exec'd task */
-	spin_lock_irq(&tsk->cred->tgcred->lock);
-	old = tsk->cred->tgcred->process_keyring;
-	tsk->cred->tgcred->process_keyring = NULL;
-	spin_unlock_irq(&tsk->cred->tgcred->lock);
+	/* newly exec'd tasks don't get a thread keyring */
+	key_put(new->thread_keyring);
+	new->thread_keyring = NULL;
 
-	key_put(old);
+	/* create a new per-thread-group creds for all this set of threads to
+	 * share */
+	memcpy(tgcred, new->tgcred, sizeof(struct thread_group_cred));
 
-	return 0;
+	atomic_set(&tgcred->usage, 1);
+	spin_lock_init(&tgcred->lock);
 
-} /* end exec_keys() */
+	/* inherit the session keyring; new process keyring */
+	key_get(tgcred->session_keyring);
+	tgcred->process_keyring = NULL;
 
-/*****************************************************************************/
-/*
- * deal with SUID programs
- * - we might want to make this invent a new session keyring
- */
-int suid_keys(struct task_struct *tsk)
-{
+	release_tgcred(new);
+	new->tgcred = tgcred;
+
+	commit_creds(new);
 	return 0;
 
-} /* end suid_keys() */
+} /* end exec_keys() */
 
 /*****************************************************************************/
 /*
@@ -376,16 +359,13 @@ void key_fsgid_changed(struct task_struct *tsk)
 key_ref_t search_process_keyrings(struct key_type *type,
 				  const void *description,
 				  key_match_func_t match,
-				  struct task_struct *context)
+				  const struct cred *cred)
 {
 	struct request_key_auth *rka;
-	struct cred *cred;
 	key_ref_t key_ref, ret, err;
 
 	might_sleep();
 
-	cred = get_task_cred(context);
-
 	/* we want to return -EAGAIN or -ENOKEY if any of the keyrings were
 	 * searchable, but we failed to find a key or we found a negative key;
 	 * otherwise we want to return a sample error (probably -EACCES) if
@@ -401,7 +381,7 @@ key_ref_t search_process_keyrings(struct key_type *type,
 	if (cred->thread_keyring) {
 		key_ref = keyring_search_aux(
 			make_key_ref(cred->thread_keyring, 1),
-			context, type, description, match);
+			cred, type, description, match);
 		if (!IS_ERR(key_ref))
 			goto found;
 
@@ -422,7 +402,7 @@ key_ref_t search_process_keyrings(struct key_type *type,
 	if (cred->tgcred->process_keyring) {
 		key_ref = keyring_search_aux(
 			make_key_ref(cred->tgcred->process_keyring, 1),
-			context, type, description, match);
+			cred, type, description, match);
 		if (!IS_ERR(key_ref))
 			goto found;
 
@@ -446,7 +426,7 @@ key_ref_t search_process_keyrings(struct key_type *type,
 			make_key_ref(rcu_dereference(
 					     cred->tgcred->session_keyring),
 				     1),
-			context, type, description, match);
+			cred, type, description, match);
 		rcu_read_unlock();
 
 		if (!IS_ERR(key_ref))
@@ -468,7 +448,7 @@ key_ref_t search_process_keyrings(struct key_type *type,
 	else if (cred->user->session_keyring) {
 		key_ref = keyring_search_aux(
 			make_key_ref(cred->user->session_keyring, 1),
-			context, type, description, match);
+			cred, type, description, match);
 		if (!IS_ERR(key_ref))
 			goto found;
 
@@ -490,7 +470,7 @@ key_ref_t search_process_keyrings(struct key_type *type,
 	 * - we don't permit access to request_key auth keys via this method
 	 */
 	if (cred->request_key_auth &&
-	    context == current &&
+	    cred == current_cred() &&
 	    type != &key_type_request_key_auth
 	    ) {
 		/* defend against the auth key being revoked */
@@ -500,7 +480,7 @@ key_ref_t search_process_keyrings(struct key_type *type,
 			rka = cred->request_key_auth->payload.data;
 
 			key_ref = search_process_keyrings(type, description,
-							  match, rka->context);
+							  match, rka->cred);
 
 			up_read(&cred->request_key_auth->sem);
 
@@ -527,7 +507,6 @@ key_ref_t search_process_keyrings(struct key_type *type,
 	key_ref = ret ? ret : err;
 
 found:
-	put_cred(cred);
 	return key_ref;
 
 } /* end search_process_keyrings() */
@@ -552,8 +531,7 @@ key_ref_t lookup_user_key(key_serial_t id, int create, int partial,
 			  key_perm_t perm)
 {
 	struct request_key_auth *rka;
-	struct task_struct *t = current;
-	struct cred *cred;
+	const struct cred *cred;
 	struct key *key;
 	key_ref_t key_ref, skey_ref;
 	int ret;
@@ -608,6 +586,7 @@ try_again:
 				goto error;
 			ret = install_session_keyring(
 				cred->user->session_keyring);
+
 			if (ret < 0)
 				goto error;
 			goto reget_creds;
@@ -693,7 +672,7 @@ try_again:
 		/* check to see if we possess the key */
 		skey_ref = search_process_keyrings(key->type, key,
 						   lookup_user_key_possessed,
-						   current);
+						   cred);
 
 		if (!IS_ERR(skey_ref)) {
 			key_put(key);
@@ -725,7 +704,7 @@ try_again:
 		goto invalid_key;
 
 	/* check the permissions */
-	ret = key_task_permission(key_ref, t, perm);
+	ret = key_task_permission(key_ref, cred, perm);
 	if (ret < 0)
 		goto invalid_key;
 
@@ -755,21 +734,33 @@ reget_creds:
  */
 long join_session_keyring(const char *name)
 {
-	struct task_struct *tsk = current;
-	struct cred *cred = current->cred;
+	const struct cred *old;
+	struct cred *new;
 	struct key *keyring;
-	long ret;
+	long ret, serial;
+
+	/* only permit this if there's a single thread in the thread group -
+	 * this avoids us having to adjust the creds on all threads and risking
+	 * ENOMEM */
+	if (!is_single_threaded(current))
+		return -EMLINK;
+
+	new = prepare_creds();
+	if (!new)
+		return -ENOMEM;
+	old = current_cred();
 
 	/* if no name is provided, install an anonymous keyring */
 	if (!name) {
-		ret = install_session_keyring(NULL);
+		ret = install_session_keyring_to_cred(new, NULL);
 		if (ret < 0)
 			goto error;
 
-		rcu_read_lock();
-		ret = rcu_dereference(cred->tgcred->session_keyring)->serial;
-		rcu_read_unlock();
-		goto error;
+		serial = new->tgcred->session_keyring->serial;
+		ret = commit_creds(new);
+		if (ret == 0)
+			ret = serial;
+		goto okay;
 	}
 
 	/* allow the user to join or create a named keyring */
@@ -779,29 +770,33 @@ long join_session_keyring(const char *name)
 	keyring = find_keyring_by_name(name, false);
 	if (PTR_ERR(keyring) == -ENOKEY) {
 		/* not found - try and create a new one */
-		keyring = keyring_alloc(name, cred->uid, cred->gid, tsk,
+		keyring = keyring_alloc(name, old->uid, old->gid, old,
 					KEY_ALLOC_IN_QUOTA, NULL);
 		if (IS_ERR(keyring)) {
 			ret = PTR_ERR(keyring);
 			goto error2;
 		}
-	}
-	else if (IS_ERR(keyring)) {
+	} else if (IS_ERR(keyring)) {
 		ret = PTR_ERR(keyring);
 		goto error2;
 	}
 
 	/* we've got a keyring - now to install it */
-	ret = install_session_keyring(keyring);
+	ret = install_session_keyring_to_cred(new, keyring);
 	if (ret < 0)
 		goto error2;
 
+	commit_creds(new);
+	mutex_unlock(&key_session_mutex);
+
 	ret = keyring->serial;
 	key_put(keyring);
+okay:
+	return ret;
 
 error2:
 	mutex_unlock(&key_session_mutex);
 error:
+	abort_creds(new);
 	return ret;
-
-} /* end join_session_keyring() */
+}
diff --git a/security/keys/request_key.c b/security/keys/request_key.c
index 3d12558362df..0e04f72ef2d4 100644
--- a/security/keys/request_key.c
+++ b/security/keys/request_key.c
@@ -83,8 +83,10 @@ static int call_sbin_request_key(struct key_construction *cons,
 	/* allocate a new session keyring */
 	sprintf(desc, "_req.%u", key->serial);
 
-	keyring = keyring_alloc(desc, current_fsuid(), current_fsgid(), current,
+	cred = get_current_cred();
+	keyring = keyring_alloc(desc, cred->fsuid, cred->fsgid, cred,
 				KEY_ALLOC_QUOTA_OVERRUN, NULL);
+	put_cred(cred);
 	if (IS_ERR(keyring)) {
 		ret = PTR_ERR(keyring);
 		goto error_alloc;
@@ -104,8 +106,7 @@ static int call_sbin_request_key(struct key_construction *cons,
 
 	/* we specify the process's default keyrings */
 	sprintf(keyring_str[0], "%d",
-		cred->thread_keyring ?
-		cred->thread_keyring->serial : 0);
+		cred->thread_keyring ? cred->thread_keyring->serial : 0);
 
 	prkey = 0;
 	if (cred->tgcred->process_keyring)
@@ -155,8 +156,8 @@ error_link:
 	key_put(keyring);
 
 error_alloc:
-	kleave(" = %d", ret);
 	complete_request_key(cons, ret);
+	kleave(" = %d", ret);
 	return ret;
 }
 
@@ -295,6 +296,7 @@ static int construct_alloc_key(struct key_type *type,
 			       struct key_user *user,
 			       struct key **_key)
 {
+	const struct cred *cred = current_cred();
 	struct key *key;
 	key_ref_t key_ref;
 
@@ -302,9 +304,8 @@ static int construct_alloc_key(struct key_type *type,
 
 	mutex_lock(&user->cons_lock);
 
-	key = key_alloc(type, description,
-			current_fsuid(), current_fsgid(), current, KEY_POS_ALL,
-			flags);
+	key = key_alloc(type, description, cred->fsuid, cred->fsgid, cred,
+			KEY_POS_ALL, flags);
 	if (IS_ERR(key))
 		goto alloc_failed;
 
@@ -317,8 +318,7 @@ static int construct_alloc_key(struct key_type *type,
 	 * waited for locks */
 	mutex_lock(&key_construction_mutex);
 
-	key_ref = search_process_keyrings(type, description, type->match,
-					  current);
+	key_ref = search_process_keyrings(type, description, type->match, cred);
 	if (!IS_ERR(key_ref))
 		goto key_already_present;
 
@@ -363,6 +363,8 @@ static struct key *construct_key_and_link(struct key_type *type,
 	struct key *key;
 	int ret;
 
+	kenter("");
+
 	user = key_user_lookup(current_fsuid());
 	if (!user)
 		return ERR_PTR(-ENOMEM);
@@ -376,17 +378,21 @@ static struct key *construct_key_and_link(struct key_type *type,
 	if (ret == 0) {
 		ret = construct_key(key, callout_info, callout_len, aux,
 				    dest_keyring);
-		if (ret < 0)
+		if (ret < 0) {
+			kdebug("cons failed");
 			goto construction_failed;
+		}
 	}
 
 	key_put(dest_keyring);
+	kleave(" = key %d", key_serial(key));
 	return key;
 
 construction_failed:
 	key_negate_and_link(key, key_negative_timeout, NULL, NULL);
 	key_put(key);
 	key_put(dest_keyring);
+	kleave(" = %d", ret);
 	return ERR_PTR(ret);
 }
 
@@ -405,6 +411,7 @@ struct key *request_key_and_link(struct key_type *type,
 				 struct key *dest_keyring,
 				 unsigned long flags)
 {
+	const struct cred *cred = current_cred();
 	struct key *key;
 	key_ref_t key_ref;
 
@@ -414,7 +421,7 @@ struct key *request_key_and_link(struct key_type *type,
 
 	/* search all the process keyrings for a key */
 	key_ref = search_process_keyrings(type, description, type->match,
-					  current);
+					  cred);
 
 	if (!IS_ERR(key_ref)) {
 		key = key_ref_to_ptr(key_ref);
diff --git a/security/keys/request_key_auth.c b/security/keys/request_key_auth.c
index 2125579d5d73..86747151ee5b 100644
--- a/security/keys/request_key_auth.c
+++ b/security/keys/request_key_auth.c
@@ -105,9 +105,9 @@ static void request_key_auth_revoke(struct key *key)
 
 	kenter("{%d}", key->serial);
 
-	if (rka->context) {
-		put_task_struct(rka->context);
-		rka->context = NULL;
+	if (rka->cred) {
+		put_cred(rka->cred);
+		rka->cred = NULL;
 	}
 
 } /* end request_key_auth_revoke() */
@@ -122,9 +122,9 @@ static void request_key_auth_destroy(struct key *key)
 
 	kenter("{%d}", key->serial);
 
-	if (rka->context) {
-		put_task_struct(rka->context);
-		rka->context = NULL;
+	if (rka->cred) {
+		put_cred(rka->cred);
+		rka->cred = NULL;
 	}
 
 	key_put(rka->target_key);
@@ -143,6 +143,7 @@ struct key *request_key_auth_new(struct key *target, const void *callout_info,
 				 size_t callout_len, struct key *dest_keyring)
 {
 	struct request_key_auth *rka, *irka;
+	const struct cred *cred = current->cred;
 	struct key *authkey = NULL;
 	char desc[20];
 	int ret;
@@ -164,28 +165,25 @@ struct key *request_key_auth_new(struct key *target, const void *callout_info,
 
 	/* see if the calling process is already servicing the key request of
 	 * another process */
-	if (current->cred->request_key_auth) {
+	if (cred->request_key_auth) {
 		/* it is - use that instantiation context here too */
-		down_read(&current->cred->request_key_auth->sem);
+		down_read(&cred->request_key_auth->sem);
 
 		/* if the auth key has been revoked, then the key we're
 		 * servicing is already instantiated */
-		if (test_bit(KEY_FLAG_REVOKED,
-			     &current->cred->request_key_auth->flags))
+		if (test_bit(KEY_FLAG_REVOKED, &cred->request_key_auth->flags))
 			goto auth_key_revoked;
 
-		irka = current->cred->request_key_auth->payload.data;
-		rka->context = irka->context;
+		irka = cred->request_key_auth->payload.data;
+		rka->cred = get_cred(irka->cred);
 		rka->pid = irka->pid;
-		get_task_struct(rka->context);
 
-		up_read(&current->cred->request_key_auth->sem);
+		up_read(&cred->request_key_auth->sem);
 	}
 	else {
 		/* it isn't - use this process as the context */
-		rka->context = current;
+		rka->cred = get_cred(cred);
 		rka->pid = current->pid;
-		get_task_struct(rka->context);
 	}
 
 	rka->target_key = key_get(target);
@@ -197,7 +195,7 @@ struct key *request_key_auth_new(struct key *target, const void *callout_info,
 	sprintf(desc, "%x", target->serial);
 
 	authkey = key_alloc(&key_type_request_key_auth, desc,
-			    current_fsuid(), current_fsgid(), current,
+			    cred->fsuid, cred->fsgid, cred,
 			    KEY_POS_VIEW | KEY_POS_READ | KEY_POS_SEARCH |
 			    KEY_USR_VIEW, KEY_ALLOC_NOT_IN_QUOTA);
 	if (IS_ERR(authkey)) {
@@ -205,16 +203,16 @@ struct key *request_key_auth_new(struct key *target, const void *callout_info,
 		goto error_alloc;
 	}
 
-	/* construct and attach to the keyring */
+	/* construct the auth key */
 	ret = key_instantiate_and_link(authkey, rka, 0, NULL, NULL);
 	if (ret < 0)
 		goto error_inst;
 
-	kleave(" = {%d}", authkey->serial);
+	kleave(" = {%d,%d}", authkey->serial, atomic_read(&authkey->usage));
 	return authkey;
 
 auth_key_revoked:
-	up_read(&current->cred->request_key_auth->sem);
+	up_read(&cred->request_key_auth->sem);
 	kfree(rka->callout_info);
 	kfree(rka);
 	kleave("= -EKEYREVOKED");
@@ -257,6 +255,7 @@ static int key_get_instantiation_authkey_match(const struct key *key,
  */
 struct key *key_get_instantiation_authkey(key_serial_t target_id)
 {
+	const struct cred *cred = current_cred();
 	struct key *authkey;
 	key_ref_t authkey_ref;
 
@@ -264,7 +263,7 @@ struct key *key_get_instantiation_authkey(key_serial_t target_id)
 		&key_type_request_key_auth,
 		(void *) (unsigned long) target_id,
 		key_get_instantiation_authkey_match,
-		current);
+		cred);
 
 	if (IS_ERR(authkey_ref)) {
 		authkey = ERR_CAST(authkey_ref);
diff --git a/security/security.c b/security/security.c
index f40a0a04c3c2..a55d739c6864 100644
--- a/security/security.c
+++ b/security/security.c
@@ -145,18 +145,13 @@ int security_capget(struct task_struct *target,
 	return security_ops->capget(target, effective, inheritable, permitted);
 }
 
-int security_capset_check(const kernel_cap_t *effective,
-			  const kernel_cap_t *inheritable,
-			  const kernel_cap_t *permitted)
+int security_capset(struct cred *new, const struct cred *old,
+		    const kernel_cap_t *effective,
+		    const kernel_cap_t *inheritable,
+		    const kernel_cap_t *permitted)
 {
-	return security_ops->capset_check(effective, inheritable, permitted);
-}
-
-void security_capset_set(const kernel_cap_t *effective,
-			 const kernel_cap_t *inheritable,
-			 const kernel_cap_t *permitted)
-{
-	security_ops->capset_set(effective, inheritable, permitted);
+	return security_ops->capset(new, old,
+				    effective, inheritable, permitted);
 }
 
 int security_capable(struct task_struct *tsk, int cap)
@@ -228,9 +223,9 @@ void security_bprm_free(struct linux_binprm *bprm)
 	security_ops->bprm_free_security(bprm);
 }
 
-void security_bprm_apply_creds(struct linux_binprm *bprm, int unsafe)
+int security_bprm_apply_creds(struct linux_binprm *bprm, int unsafe)
 {
-	security_ops->bprm_apply_creds(bprm, unsafe);
+	return security_ops->bprm_apply_creds(bprm, unsafe);
 }
 
 void security_bprm_post_apply_creds(struct linux_binprm *bprm)
@@ -616,14 +611,19 @@ int security_task_create(unsigned long clone_flags)
 	return security_ops->task_create(clone_flags);
 }
 
-int security_cred_alloc(struct cred *cred)
+void security_cred_free(struct cred *cred)
 {
-	return security_ops->cred_alloc_security(cred);
+	security_ops->cred_free(cred);
 }
 
-void security_cred_free(struct cred *cred)
+int security_prepare_creds(struct cred *new, const struct cred *old, gfp_t gfp)
 {
-	security_ops->cred_free(cred);
+	return security_ops->cred_prepare(new, old, gfp);
+}
+
+void security_commit_creds(struct cred *new, const struct cred *old)
+{
+	return security_ops->cred_commit(new, old);
 }
 
 int security_task_setuid(uid_t id0, uid_t id1, uid_t id2, int flags)
@@ -631,10 +631,10 @@ int security_task_setuid(uid_t id0, uid_t id1, uid_t id2, int flags)
 	return security_ops->task_setuid(id0, id1, id2, flags);
 }
 
-int security_task_post_setuid(uid_t old_ruid, uid_t old_euid,
-			       uid_t old_suid, int flags)
+int security_task_fix_setuid(struct cred *new, const struct cred *old,
+			     int flags)
 {
-	return security_ops->task_post_setuid(old_ruid, old_euid, old_suid, flags);
+	return security_ops->task_fix_setuid(new, old, flags);
 }
 
 int security_task_setgid(gid_t id0, gid_t id1, gid_t id2, int flags)
@@ -716,14 +716,9 @@ int security_task_wait(struct task_struct *p)
 }
 
 int security_task_prctl(int option, unsigned long arg2, unsigned long arg3,
-			 unsigned long arg4, unsigned long arg5, long *rc_p)
-{
-	return security_ops->task_prctl(option, arg2, arg3, arg4, arg5, rc_p);
-}
-
-void security_task_reparent_to_init(struct task_struct *p)
+			 unsigned long arg4, unsigned long arg5)
 {
-	security_ops->task_reparent_to_init(p);
+	return security_ops->task_prctl(option, arg2, arg3, arg4, arg5);
 }
 
 void security_task_to_inode(struct task_struct *p, struct inode *inode)
@@ -1123,9 +1118,10 @@ EXPORT_SYMBOL(security_skb_classify_flow);
 
 #ifdef CONFIG_KEYS
 
-int security_key_alloc(struct key *key, struct task_struct *tsk, unsigned long flags)
+int security_key_alloc(struct key *key, const struct cred *cred,
+		       unsigned long flags)
 {
-	return security_ops->key_alloc(key, tsk, flags);
+	return security_ops->key_alloc(key, cred, flags);
 }
 
 void security_key_free(struct key *key)
@@ -1134,9 +1130,9 @@ void security_key_free(struct key *key)
 }
 
 int security_key_permission(key_ref_t key_ref,
-			    struct task_struct *context, key_perm_t perm)
+			    const struct cred *cred, key_perm_t perm)
 {
-	return security_ops->key_permission(key_ref, context, perm);
+	return security_ops->key_permission(key_ref, cred, perm);
 }
 
 int security_key_getsecurity(struct key *key, char **_buffer)
diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c
index f20cbd681ba6..c71bba78872f 100644
--- a/security/selinux/hooks.c
+++ b/security/selinux/hooks.c
@@ -156,20 +156,20 @@ static int selinux_secmark_enabled(void)
 	return (atomic_read(&selinux_secmark_refcount) > 0);
 }
 
-/* Allocate and free functions for each kind of security blob. */
-
-static int cred_alloc_security(struct cred *cred)
+/*
+ * initialise the security for the init task
+ */
+static void cred_init_security(void)
 {
+	struct cred *cred = (struct cred *) current->cred;
 	struct task_security_struct *tsec;
 
 	tsec = kzalloc(sizeof(struct task_security_struct), GFP_KERNEL);
 	if (!tsec)
-		return -ENOMEM;
+		panic("SELinux:  Failed to initialize initial task.\n");
 
-	tsec->osid = tsec->sid = SECINITSID_UNLABELED;
+	tsec->osid = tsec->sid = SECINITSID_KERNEL;
 	cred->security = tsec;
-
-	return 0;
 }
 
 /*
@@ -1378,6 +1378,19 @@ static inline u32 signal_to_av(int sig)
 	return perm;
 }
 
+/*
+ * Check permission between a pair of credentials
+ * fork check, ptrace check, etc.
+ */
+static int cred_has_perm(const struct cred *actor,
+			 const struct cred *target,
+			 u32 perms)
+{
+	u32 asid = cred_sid(actor), tsid = cred_sid(target);
+
+	return avc_has_perm(asid, tsid, SECCLASS_PROCESS, perms, NULL);
+}
+
 /*
  * Check permission between a pair of tasks, e.g. signal checks,
  * fork check, ptrace check, etc.
@@ -1820,24 +1833,19 @@ static int selinux_capget(struct task_struct *target, kernel_cap_t *effective,
 	return secondary_ops->capget(target, effective, inheritable, permitted);
 }
 
-static int selinux_capset_check(const kernel_cap_t *effective,
-				const kernel_cap_t *inheritable,
-				const kernel_cap_t *permitted)
+static int selinux_capset(struct cred *new, const struct cred *old,
+			  const kernel_cap_t *effective,
+			  const kernel_cap_t *inheritable,
+			  const kernel_cap_t *permitted)
 {
 	int error;
 
-	error = secondary_ops->capset_check(effective, inheritable, permitted);
+	error = secondary_ops->capset(new, old,
+				      effective, inheritable, permitted);
 	if (error)
 		return error;
 
-	return task_has_perm(current, current, PROCESS__SETCAP);
-}
-
-static void selinux_capset_set(const kernel_cap_t *effective,
-			       const kernel_cap_t *inheritable,
-			       const kernel_cap_t *permitted)
-{
-	secondary_ops->capset_set(effective, inheritable, permitted);
+	return cred_has_perm(old, new, PROCESS__SETCAP);
 }
 
 static int selinux_capable(struct task_struct *tsk, int cap, int audit)
@@ -2244,16 +2252,23 @@ static inline void flush_unauthorized_files(const struct cred *cred,
 	spin_unlock(&files->file_lock);
 }
 
-static void selinux_bprm_apply_creds(struct linux_binprm *bprm, int unsafe)
+static int selinux_bprm_apply_creds(struct linux_binprm *bprm, int unsafe)
 {
 	struct task_security_struct *tsec;
 	struct bprm_security_struct *bsec;
+	struct cred *new;
 	u32 sid;
 	int rc;
 
-	secondary_ops->bprm_apply_creds(bprm, unsafe);
+	rc = secondary_ops->bprm_apply_creds(bprm, unsafe);
+	if (rc < 0)
+		return rc;
 
-	tsec = current_security();
+	new = prepare_creds();
+	if (!new)
+		return -ENOMEM;
+
+	tsec = new->security;
 
 	bsec = bprm->security;
 	sid = bsec->sid;
@@ -2268,7 +2283,7 @@ static void selinux_bprm_apply_creds(struct linux_binprm *bprm, int unsafe)
 					PROCESS__SHARE, NULL);
 			if (rc) {
 				bsec->unsafe = 1;
-				return;
+				goto out;
 			}
 		}
 
@@ -2292,12 +2307,16 @@ static void selinux_bprm_apply_creds(struct linux_binprm *bprm, int unsafe)
 						  PROCESS__PTRACE, NULL);
 				if (rc) {
 					bsec->unsafe = 1;
-					return;
+					goto out;
 				}
 			}
 		}
 		tsec->sid = sid;
 	}
+
+out:
+	commit_creds(new);
+	return 0;
 }
 
 /*
@@ -3021,6 +3040,7 @@ static int selinux_file_ioctl(struct file *file, unsigned int cmd,
 static int file_map_prot_check(struct file *file, unsigned long prot, int shared)
 {
 	const struct cred *cred = current_cred();
+	int rc = 0;
 
 #ifndef CONFIG_PPC32
 	if ((prot & PROT_EXEC) && (!file || (!shared && (prot & PROT_WRITE)))) {
@@ -3029,9 +3049,9 @@ static int file_map_prot_check(struct file *file, unsigned long prot, int shared
 		 * private file mapping that will also be writable.
 		 * This has an additional check.
 		 */
-		int rc = task_has_perm(current, current, PROCESS__EXECMEM);
+		rc = cred_has_perm(cred, cred, PROCESS__EXECMEM);
 		if (rc)
-			return rc;
+			goto error;
 	}
 #endif
 
@@ -3048,7 +3068,9 @@ static int file_map_prot_check(struct file *file, unsigned long prot, int shared
 
 		return file_has_perm(cred, file, av);
 	}
-	return 0;
+
+error:
+	return rc;
 }
 
 static int selinux_file_mmap(struct file *file, unsigned long reqprot,
@@ -3090,8 +3112,7 @@ static int selinux_file_mprotect(struct vm_area_struct *vma,
 		rc = 0;
 		if (vma->vm_start >= vma->vm_mm->start_brk &&
 		    vma->vm_end <= vma->vm_mm->brk) {
-			rc = task_has_perm(current, current,
-					   PROCESS__EXECHEAP);
+			rc = cred_has_perm(cred, cred, PROCESS__EXECHEAP);
 		} else if (!vma->vm_file &&
 			   vma->vm_start <= vma->vm_mm->start_stack &&
 			   vma->vm_end >= vma->vm_mm->start_stack) {
@@ -3104,8 +3125,7 @@ static int selinux_file_mprotect(struct vm_area_struct *vma,
 			 * modified content.  This typically should only
 			 * occur for text relocations.
 			 */
-			rc = file_has_perm(cred, vma->vm_file,
-					   FILE__EXECMOD);
+			rc = file_has_perm(cred, vma->vm_file, FILE__EXECMOD);
 		}
 		if (rc)
 			return rc;
@@ -3211,6 +3231,7 @@ static int selinux_dentry_open(struct file *file, const struct cred *cred)
 	struct file_security_struct *fsec;
 	struct inode *inode;
 	struct inode_security_struct *isec;
+
 	inode = file->f_path.dentry->d_inode;
 	fsec = file->f_security;
 	isec = inode->i_security;
@@ -3247,38 +3268,41 @@ static int selinux_task_create(unsigned long clone_flags)
 	return task_has_perm(current, current, PROCESS__FORK);
 }
 
-static int selinux_cred_alloc_security(struct cred *cred)
+/*
+ * detach and free the LSM part of a set of credentials
+ */
+static void selinux_cred_free(struct cred *cred)
 {
-	struct task_security_struct *tsec1, *tsec2;
-	int rc;
-
-	tsec1 = current_security();
+	struct task_security_struct *tsec = cred->security;
+	cred->security = NULL;
+	kfree(tsec);
+}
 
-	rc = cred_alloc_security(cred);
-	if (rc)
-		return rc;
-	tsec2 = cred->security;
+/*
+ * prepare a new set of credentials for modification
+ */
+static int selinux_cred_prepare(struct cred *new, const struct cred *old,
+				gfp_t gfp)
+{
+	const struct task_security_struct *old_tsec;
+	struct task_security_struct *tsec;
 
-	tsec2->osid = tsec1->osid;
-	tsec2->sid = tsec1->sid;
+	old_tsec = old->security;
 
-	/* Retain the exec, fs, key, and sock SIDs across fork */
-	tsec2->exec_sid = tsec1->exec_sid;
-	tsec2->create_sid = tsec1->create_sid;
-	tsec2->keycreate_sid = tsec1->keycreate_sid;
-	tsec2->sockcreate_sid = tsec1->sockcreate_sid;
+	tsec = kmemdup(old_tsec, sizeof(struct task_security_struct), gfp);
+	if (!tsec)
+		return -ENOMEM;
 
+	new->security = tsec;
 	return 0;
 }
 
 /*
- * detach and free the LSM part of a set of credentials
+ * commit new credentials
  */
-static void selinux_cred_free(struct cred *cred)
+static void selinux_cred_commit(struct cred *new, const struct cred *old)
 {
-	struct task_security_struct *tsec = cred->security;
-	cred->security = NULL;
-	kfree(tsec);
+	secondary_ops->cred_commit(new, old);
 }
 
 static int selinux_task_setuid(uid_t id0, uid_t id1, uid_t id2, int flags)
@@ -3292,9 +3316,10 @@ static int selinux_task_setuid(uid_t id0, uid_t id1, uid_t id2, int flags)
 	return 0;
 }
 
-static int selinux_task_post_setuid(uid_t id0, uid_t id1, uid_t id2, int flags)
+static int selinux_task_fix_setuid(struct cred *new, const struct cred *old,
+				   int flags)
 {
-	return secondary_ops->task_post_setuid(id0, id1, id2, flags);
+	return secondary_ops->task_fix_setuid(new, old, flags);
 }
 
 static int selinux_task_setgid(gid_t id0, gid_t id1, gid_t id2, int flags)
@@ -3368,7 +3393,7 @@ static int selinux_task_setrlimit(unsigned int resource, struct rlimit *new_rlim
 	/* Control the ability to change the hard limit (whether
 	   lowering or raising it), so that the hard limit can
 	   later be used as a safe reset point for the soft limit
-	   upon context transitions. See selinux_bprm_apply_creds. */
+	   upon context transitions.  See selinux_bprm_committing_creds. */
 	if (old_rlim->rlim_max != new_rlim->rlim_max)
 		return task_has_perm(current, current, PROCESS__SETRLIMIT);
 
@@ -3422,13 +3447,12 @@ static int selinux_task_prctl(int option,
 			      unsigned long arg2,
 			      unsigned long arg3,
 			      unsigned long arg4,
-			      unsigned long arg5,
-			      long *rc_p)
+			      unsigned long arg5)
 {
 	/* The current prctl operations do not appear to require
 	   any SELinux controls since they merely observe or modify
 	   the state of the current process. */
-	return secondary_ops->task_prctl(option, arg2, arg3, arg4, arg5, rc_p);
+	return secondary_ops->task_prctl(option, arg2, arg3, arg4, arg5);
 }
 
 static int selinux_task_wait(struct task_struct *p)
@@ -3436,18 +3460,6 @@ static int selinux_task_wait(struct task_struct *p)
 	return task_has_perm(p, current, PROCESS__SIGCHLD);
 }
 
-static void selinux_task_reparent_to_init(struct task_struct *p)
-{
-	struct task_security_struct *tsec;
-
-	secondary_ops->task_reparent_to_init(p);
-
-	tsec = p->cred->security;
-	tsec->osid = tsec->sid;
-	tsec->sid = SECINITSID_KERNEL;
-	return;
-}
-
 static void selinux_task_to_inode(struct task_struct *p,
 				  struct inode *inode)
 {
@@ -5325,7 +5337,8 @@ static int selinux_setprocattr(struct task_struct *p,
 {
 	struct task_security_struct *tsec;
 	struct task_struct *tracer;
-	u32 sid = 0;
+	struct cred *new;
+	u32 sid = 0, ptsid;
 	int error;
 	char *str = value;
 
@@ -5372,86 +5385,75 @@ static int selinux_setprocattr(struct task_struct *p,
 			return error;
 	}
 
+	new = prepare_creds();
+	if (!new)
+		return -ENOMEM;
+
 	/* Permission checking based on the specified context is
 	   performed during the actual operation (execve,
 	   open/mkdir/...), when we know the full context of the
-	   operation.  See selinux_bprm_set_security for the execve
+	   operation.  See selinux_bprm_set_creds for the execve
 	   checks and may_create for the file creation checks. The
 	   operation will then fail if the context is not permitted. */
-	tsec = p->cred->security;
-	if (!strcmp(name, "exec"))
+	tsec = new->security;
+	if (!strcmp(name, "exec")) {
 		tsec->exec_sid = sid;
-	else if (!strcmp(name, "fscreate"))
+	} else if (!strcmp(name, "fscreate")) {
 		tsec->create_sid = sid;
-	else if (!strcmp(name, "keycreate")) {
+	} else if (!strcmp(name, "keycreate")) {
 		error = may_create_key(sid, p);
 		if (error)
-			return error;
+			goto abort_change;
 		tsec->keycreate_sid = sid;
-	} else if (!strcmp(name, "sockcreate"))
+	} else if (!strcmp(name, "sockcreate")) {
 		tsec->sockcreate_sid = sid;
-	else if (!strcmp(name, "current")) {
-		struct av_decision avd;
-
+	} else if (!strcmp(name, "current")) {
+		error = -EINVAL;
 		if (sid == 0)
-			return -EINVAL;
-		/*
-		 * SELinux allows to change context in the following case only.
-		 *  - Single threaded processes.
-		 *  - Multi threaded processes intend to change its context into
-		 *    more restricted domain (defined by TYPEBOUNDS statement).
-		 */
-		if (atomic_read(&p->mm->mm_users) != 1) {
-			struct task_struct *g, *t;
-			struct mm_struct *mm = p->mm;
-			read_lock(&tasklist_lock);
-			do_each_thread(g, t) {
-				if (t->mm == mm && t != p) {
-					read_unlock(&tasklist_lock);
-					error = security_bounded_transition(tsec->sid, sid);
-					if (!error)
-						goto boundary_ok;
-
-					return error;
-				}
-			} while_each_thread(g, t);
-			read_unlock(&tasklist_lock);
+			goto abort_change;
+
+		/* Only allow single threaded processes to change context */
+		error = -EPERM;
+		if (!is_single_threaded(p)) {
+			error = security_bounded_transition(tsec->sid, sid);
+			if (error)
+				goto abort_change;
 		}
-boundary_ok:
 
 		/* Check permissions for the transition. */
 		error = avc_has_perm(tsec->sid, sid, SECCLASS_PROCESS,
 				     PROCESS__DYNTRANSITION, NULL);
 		if (error)
-			return error;
+			goto abort_change;
 
 		/* Check for ptracing, and update the task SID if ok.
 		   Otherwise, leave SID unchanged and fail. */
+		ptsid = 0;
 		task_lock(p);
-		rcu_read_lock();
 		tracer = tracehook_tracer_task(p);
-		if (tracer != NULL) {
-			u32 ptsid = task_sid(tracer);
-			rcu_read_unlock();
-			error = avc_has_perm_noaudit(ptsid, sid,
-						     SECCLASS_PROCESS,
-						     PROCESS__PTRACE, 0, &avd);
-			if (!error)
-				tsec->sid = sid;
-			task_unlock(p);
-			avc_audit(ptsid, sid, SECCLASS_PROCESS,
-				  PROCESS__PTRACE, &avd, error, NULL);
+		if (tracer)
+			ptsid = task_sid(tracer);
+		task_unlock(p);
+
+		if (tracer) {
+			error = avc_has_perm(ptsid, sid, SECCLASS_PROCESS,
+					     PROCESS__PTRACE, NULL);
 			if (error)
-				return error;
-		} else {
-			rcu_read_unlock();
-			tsec->sid = sid;
-			task_unlock(p);
+				goto abort_change;
 		}
-	} else
-		return -EINVAL;
 
+		tsec->sid = sid;
+	} else {
+		error = -EINVAL;
+		goto abort_change;
+	}
+
+	commit_creds(new);
 	return size;
+
+abort_change:
+	abort_creds(new);
+	return error;
 }
 
 static int selinux_secid_to_secctx(u32 secid, char **secdata, u32 *seclen)
@@ -5471,23 +5473,21 @@ static void selinux_release_secctx(char *secdata, u32 seclen)
 
 #ifdef CONFIG_KEYS
 
-static int selinux_key_alloc(struct key *k, struct task_struct *tsk,
+static int selinux_key_alloc(struct key *k, const struct cred *cred,
 			     unsigned long flags)
 {
-	const struct task_security_struct *__tsec;
+	const struct task_security_struct *tsec;
 	struct key_security_struct *ksec;
 
 	ksec = kzalloc(sizeof(struct key_security_struct), GFP_KERNEL);
 	if (!ksec)
 		return -ENOMEM;
 
-	rcu_read_lock();
-	__tsec = __task_cred(tsk)->security;
-	if (__tsec->keycreate_sid)
-		ksec->sid = __tsec->keycreate_sid;
+	tsec = cred->security;
+	if (tsec->keycreate_sid)
+		ksec->sid = tsec->keycreate_sid;
 	else
-		ksec->sid = __tsec->sid;
-	rcu_read_unlock();
+		ksec->sid = tsec->sid;
 
 	k->security = ksec;
 	return 0;
@@ -5502,8 +5502,8 @@ static void selinux_key_free(struct key *k)
 }
 
 static int selinux_key_permission(key_ref_t key_ref,
-			    struct task_struct *ctx,
-			    key_perm_t perm)
+				  const struct cred *cred,
+				  key_perm_t perm)
 {
 	struct key *key;
 	struct key_security_struct *ksec;
@@ -5515,7 +5515,7 @@ static int selinux_key_permission(key_ref_t key_ref,
 	if (perm == 0)
 		return 0;
 
-	sid = task_sid(ctx);
+	sid = cred_sid(cred);
 
 	key = key_ref_to_ptr(key_ref);
 	ksec = key->security;
@@ -5545,8 +5545,7 @@ static struct security_operations selinux_ops = {
 	.ptrace_may_access =		selinux_ptrace_may_access,
 	.ptrace_traceme =		selinux_ptrace_traceme,
 	.capget =			selinux_capget,
-	.capset_check =			selinux_capset_check,
-	.capset_set =			selinux_capset_set,
+	.capset =			selinux_capset,
 	.sysctl =			selinux_sysctl,
 	.capable =			selinux_capable,
 	.quotactl =			selinux_quotactl,
@@ -5621,10 +5620,11 @@ static struct security_operations selinux_ops = {
 	.dentry_open =			selinux_dentry_open,
 
 	.task_create =			selinux_task_create,
-	.cred_alloc_security =		selinux_cred_alloc_security,
 	.cred_free =			selinux_cred_free,
+	.cred_prepare =			selinux_cred_prepare,
+	.cred_commit =			selinux_cred_commit,
 	.task_setuid =			selinux_task_setuid,
-	.task_post_setuid =		selinux_task_post_setuid,
+	.task_fix_setuid =		selinux_task_fix_setuid,
 	.task_setgid =			selinux_task_setgid,
 	.task_setpgid =			selinux_task_setpgid,
 	.task_getpgid =			selinux_task_getpgid,
@@ -5641,7 +5641,6 @@ static struct security_operations selinux_ops = {
 	.task_kill =			selinux_task_kill,
 	.task_wait =			selinux_task_wait,
 	.task_prctl =			selinux_task_prctl,
-	.task_reparent_to_init =	selinux_task_reparent_to_init,
 	.task_to_inode =		selinux_task_to_inode,
 
 	.ipc_permission =		selinux_ipc_permission,
@@ -5737,8 +5736,6 @@ static struct security_operations selinux_ops = {
 
 static __init int selinux_init(void)
 {
-	struct task_security_struct *tsec;
-
 	if (!security_module_enable(&selinux_ops)) {
 		selinux_enabled = 0;
 		return 0;
@@ -5752,10 +5749,7 @@ static __init int selinux_init(void)
 	printk(KERN_INFO "SELinux:  Initializing.\n");
 
 	/* Set the security state for the initial task. */
-	if (cred_alloc_security(current->cred))
-		panic("SELinux:  Failed to initialize initial task.\n");
-	tsec = current->cred->security;
-	tsec->osid = tsec->sid = SECINITSID_KERNEL;
+	cred_init_security();
 
 	sel_inode_cache = kmem_cache_create("selinux_inode_security",
 					    sizeof(struct inode_security_struct),
diff --git a/security/smack/smack_lsm.c b/security/smack/smack_lsm.c
index 11167fd567b9..e952b397153d 100644
--- a/security/smack/smack_lsm.c
+++ b/security/smack/smack_lsm.c
@@ -104,8 +104,7 @@ static int smack_ptrace_may_access(struct task_struct *ctp, unsigned int mode)
 	if (rc != 0)
 		return rc;
 
-	rc = smk_access(current->cred->security, ctp->cred->security,
-			MAY_READWRITE);
+	rc = smk_access(current_security(), task_security(ctp), MAY_READWRITE);
 	if (rc != 0 && capable(CAP_MAC_OVERRIDE))
 		return 0;
 	return rc;
@@ -127,8 +126,7 @@ static int smack_ptrace_traceme(struct task_struct *ptp)
 	if (rc != 0)
 		return rc;
 
-	rc = smk_access(ptp->cred->security, current->cred->security,
-			MAY_READWRITE);
+	rc = smk_access(task_security(ptp), current_security(), MAY_READWRITE);
 	if (rc != 0 && has_capability(ptp, CAP_MAC_OVERRIDE))
 		return 0;
 	return rc;
@@ -976,22 +974,6 @@ static int smack_file_receive(struct file *file)
  * Task hooks
  */
 
-/**
- * smack_cred_alloc_security - "allocate" a task cred blob
- * @cred: the task creds in need of a blob
- *
- * Smack isn't using copies of blobs. Everyone
- * points to an immutable list. No alloc required.
- * No data copy required.
- *
- * Always returns 0
- */
-static int smack_cred_alloc_security(struct cred *cred)
-{
-	cred->security = current_security();
-	return 0;
-}
-
 /**
  * smack_cred_free - "free" task-level security credentials
  * @cred: the credentials in question
@@ -1005,6 +987,30 @@ static void smack_cred_free(struct cred *cred)
 	cred->security = NULL;
 }
 
+/**
+ * smack_cred_prepare - prepare new set of credentials for modification
+ * @new: the new credentials
+ * @old: the original credentials
+ * @gfp: the atomicity of any memory allocations
+ *
+ * Prepare a new set of credentials for modification.
+ */
+static int smack_cred_prepare(struct cred *new, const struct cred *old,
+			      gfp_t gfp)
+{
+	new->security = old->security;
+	return 0;
+}
+
+/*
+ * commit new credentials
+ * @new: the new credentials
+ * @old: the original credentials
+ */
+static void smack_cred_commit(struct cred *new, const struct cred *old)
+{
+}
+
 /**
  * smack_task_setpgid - Smack check on setting pgid
  * @p: the task object
@@ -2036,6 +2042,7 @@ static int smack_getprocattr(struct task_struct *p, char *name, char **value)
 static int smack_setprocattr(struct task_struct *p, char *name,
 			     void *value, size_t size)
 {
+	struct cred *new;
 	char *newsmack;
 
 	/*
@@ -2058,7 +2065,11 @@ static int smack_setprocattr(struct task_struct *p, char *name,
 	if (newsmack == NULL)
 		return -EINVAL;
 
-	p->cred->security = newsmack;
+	new = prepare_creds();
+	if (!new)
+		return -ENOMEM;
+	new->security = newsmack;
+	commit_creds(new);
 	return size;
 }
 
@@ -2354,17 +2365,17 @@ static int smack_inet_conn_request(struct sock *sk, struct sk_buff *skb,
 /**
  * smack_key_alloc - Set the key security blob
  * @key: object
- * @tsk: the task associated with the key
+ * @cred: the credentials to use
  * @flags: unused
  *
  * No allocation required
  *
  * Returns 0
  */
-static int smack_key_alloc(struct key *key, struct task_struct *tsk,
+static int smack_key_alloc(struct key *key, const struct cred *cred,
 			   unsigned long flags)
 {
-	key->security = tsk->cred->security;
+	key->security = cred->security;
 	return 0;
 }
 
@@ -2382,14 +2393,14 @@ static void smack_key_free(struct key *key)
 /*
  * smack_key_permission - Smack access on a key
  * @key_ref: gets to the object
- * @context: task involved
+ * @cred: the credentials to use
  * @perm: unused
  *
  * Return 0 if the task has read and write to the object,
  * an error code otherwise
  */
 static int smack_key_permission(key_ref_t key_ref,
-				struct task_struct *context, key_perm_t perm)
+				const struct cred *cred, key_perm_t perm)
 {
 	struct key *keyp;
 
@@ -2405,11 +2416,10 @@ static int smack_key_permission(key_ref_t key_ref,
 	/*
 	 * This should not occur
 	 */
-	if (context->cred->security == NULL)
+	if (cred->security == NULL)
 		return -EACCES;
 
-	return smk_access(context->cred->security, keyp->security,
-			  MAY_READWRITE);
+	return smk_access(cred->security, keyp->security, MAY_READWRITE);
 }
 #endif /* CONFIG_KEYS */
 
@@ -2580,8 +2590,7 @@ struct security_operations smack_ops = {
 	.ptrace_may_access =		smack_ptrace_may_access,
 	.ptrace_traceme =		smack_ptrace_traceme,
 	.capget = 			cap_capget,
-	.capset_check = 		cap_capset_check,
-	.capset_set = 			cap_capset_set,
+	.capset = 			cap_capset,
 	.capable = 			cap_capable,
 	.syslog = 			smack_syslog,
 	.settime = 			cap_settime,
@@ -2630,9 +2639,10 @@ struct security_operations smack_ops = {
 	.file_send_sigiotask = 		smack_file_send_sigiotask,
 	.file_receive = 		smack_file_receive,
 
-	.cred_alloc_security = 		smack_cred_alloc_security,
 	.cred_free =			smack_cred_free,
-	.task_post_setuid =		cap_task_post_setuid,
+	.cred_prepare =			smack_cred_prepare,
+	.cred_commit =			smack_cred_commit,
+	.task_fix_setuid =		cap_task_fix_setuid,
 	.task_setpgid = 		smack_task_setpgid,
 	.task_getpgid = 		smack_task_getpgid,
 	.task_getsid = 			smack_task_getsid,
@@ -2645,7 +2655,6 @@ struct security_operations smack_ops = {
 	.task_movememory = 		smack_task_movememory,
 	.task_kill = 			smack_task_kill,
 	.task_wait = 			smack_task_wait,
-	.task_reparent_to_init =	cap_task_reparent_to_init,
 	.task_to_inode = 		smack_task_to_inode,
 	.task_prctl =			cap_task_prctl,
 
@@ -2721,6 +2730,8 @@ struct security_operations smack_ops = {
  */
 static __init int smack_init(void)
 {
+	struct cred *cred;
+
 	if (!security_module_enable(&smack_ops))
 		return 0;
 
@@ -2729,7 +2740,8 @@ static __init int smack_init(void)
 	/*
 	 * Set the security state for the initial task.
 	 */
-	current->cred->security = &smack_known_floor.smk_known;
+	cred = (struct cred *) current->cred;
+	cred->security = &smack_known_floor.smk_known;
 
 	/*
 	 * Initialize locks
-- 
cgit v1.2.3


From a6f76f23d297f70e2a6b3ec607f7aeeea9e37e8d Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Fri, 14 Nov 2008 10:39:24 +1100
Subject: CRED: Make execve() take advantage of copy-on-write credentials

Make execve() take advantage of copy-on-write credentials, allowing it to set
up the credentials in advance, and then commit the whole lot after the point
of no return.

This patch and the preceding patches have been tested with the LTP SELinux
testsuite.

This patch makes several logical sets of alteration:

 (1) execve().

     The credential bits from struct linux_binprm are, for the most part,
     replaced with a single credentials pointer (bprm->cred).  This means that
     all the creds can be calculated in advance and then applied at the point
     of no return with no possibility of failure.

     I would like to replace bprm->cap_effective with:

	cap_isclear(bprm->cap_effective)

     but this seems impossible due to special behaviour for processes of pid 1
     (they always retain their parent's capability masks where normally they'd
     be changed - see cap_bprm_set_creds()).

     The following sequence of events now happens:

     (a) At the start of do_execve, the current task's cred_exec_mutex is
     	 locked to prevent PTRACE_ATTACH from obsoleting the calculation of
     	 creds that we make.

     (a) prepare_exec_creds() is then called to make a copy of the current
     	 task's credentials and prepare it.  This copy is then assigned to
     	 bprm->cred.

  	 This renders security_bprm_alloc() and security_bprm_free()
     	 unnecessary, and so they've been removed.

     (b) The determination of unsafe execution is now performed immediately
     	 after (a) rather than later on in the code.  The result is stored in
     	 bprm->unsafe for future reference.

     (c) prepare_binprm() is called, possibly multiple times.

     	 (i) This applies the result of set[ug]id binaries to the new creds
     	     attached to bprm->cred.  Personality bit clearance is recorded,
     	     but now deferred on the basis that the exec procedure may yet
     	     fail.

         (ii) This then calls the new security_bprm_set_creds().  This should
	     calculate the new LSM and capability credentials into *bprm->cred.

	     This folds together security_bprm_set() and parts of
	     security_bprm_apply_creds() (these two have been removed).
	     Anything that might fail must be done at this point.

         (iii) bprm->cred_prepared is set to 1.

	     bprm->cred_prepared is 0 on the first pass of the security
	     calculations, and 1 on all subsequent passes.  This allows SELinux
	     in (ii) to base its calculations only on the initial script and
	     not on the interpreter.

     (d) flush_old_exec() is called to commit the task to execution.  This
     	 performs the following steps with regard to credentials:

	 (i) Clear pdeath_signal and set dumpable on certain circumstances that
	     may not be covered by commit_creds().

         (ii) Clear any bits in current->personality that were deferred from
             (c.i).

     (e) install_exec_creds() [compute_creds() as was] is called to install the
     	 new credentials.  This performs the following steps with regard to
     	 credentials:

         (i) Calls security_bprm_committing_creds() to apply any security
             requirements, such as flushing unauthorised files in SELinux, that
             must be done before the credentials are changed.

	     This is made up of bits of security_bprm_apply_creds() and
	     security_bprm_post_apply_creds(), both of which have been removed.
	     This function is not allowed to fail; anything that might fail
	     must have been done in (c.ii).

         (ii) Calls commit_creds() to apply the new credentials in a single
             assignment (more or less).  Possibly pdeath_signal and dumpable
             should be part of struct creds.

	 (iii) Unlocks the task's cred_replace_mutex, thus allowing
	     PTRACE_ATTACH to take place.

         (iv) Clears The bprm->cred pointer as the credentials it was holding
             are now immutable.

         (v) Calls security_bprm_committed_creds() to apply any security
             alterations that must be done after the creds have been changed.
             SELinux uses this to flush signals and signal handlers.

     (f) If an error occurs before (d.i), bprm_free() will call abort_creds()
     	 to destroy the proposed new credentials and will then unlock
     	 cred_replace_mutex.  No changes to the credentials will have been
     	 made.

 (2) LSM interface.

     A number of functions have been changed, added or removed:

     (*) security_bprm_alloc(), ->bprm_alloc_security()
     (*) security_bprm_free(), ->bprm_free_security()

     	 Removed in favour of preparing new credentials and modifying those.

     (*) security_bprm_apply_creds(), ->bprm_apply_creds()
     (*) security_bprm_post_apply_creds(), ->bprm_post_apply_creds()

     	 Removed; split between security_bprm_set_creds(),
     	 security_bprm_committing_creds() and security_bprm_committed_creds().

     (*) security_bprm_set(), ->bprm_set_security()

     	 Removed; folded into security_bprm_set_creds().

     (*) security_bprm_set_creds(), ->bprm_set_creds()

     	 New.  The new credentials in bprm->creds should be checked and set up
     	 as appropriate.  bprm->cred_prepared is 0 on the first call, 1 on the
     	 second and subsequent calls.

     (*) security_bprm_committing_creds(), ->bprm_committing_creds()
     (*) security_bprm_committed_creds(), ->bprm_committed_creds()

     	 New.  Apply the security effects of the new credentials.  This
     	 includes closing unauthorised files in SELinux.  This function may not
     	 fail.  When the former is called, the creds haven't yet been applied
     	 to the process; when the latter is called, they have.

 	 The former may access bprm->cred, the latter may not.

 (3) SELinux.

     SELinux has a number of changes, in addition to those to support the LSM
     interface changes mentioned above:

     (a) The bprm_security_struct struct has been removed in favour of using
     	 the credentials-under-construction approach.

     (c) flush_unauthorized_files() now takes a cred pointer and passes it on
     	 to inode_has_perm(), file_has_perm() and dentry_open().

Signed-off-by: David Howells <dhowells@redhat.com>
Acked-by: James Morris <jmorris@namei.org>
Acked-by: Serge Hallyn <serue@us.ibm.com>
Signed-off-by: James Morris <jmorris@namei.org>
---
 arch/x86/ia32/ia32_aout.c         |   2 +-
 fs/binfmt_aout.c                  |   2 +-
 fs/binfmt_elf.c                   |   2 +-
 fs/binfmt_elf_fdpic.c             |   2 +-
 fs/binfmt_flat.c                  |   2 +-
 fs/binfmt_som.c                   |   2 +-
 fs/compat.c                       |  42 +++---
 fs/exec.c                         | 149 +++++++++++---------
 fs/internal.h                     |   6 +
 include/linux/audit.h             |  16 ---
 include/linux/binfmts.h           |  16 ++-
 include/linux/cred.h              |   3 +-
 include/linux/key.h               |   2 -
 include/linux/security.h          | 103 +++++---------
 kernel/cred.c                     |  46 ++++++-
 security/capability.c             |  19 +--
 security/commoncap.c              | 152 ++++++++++----------
 security/keys/process_keys.c      |  42 ------
 security/root_plug.c              |  13 +-
 security/security.c               |  26 ++--
 security/selinux/hooks.c          | 283 ++++++++++++++++----------------------
 security/selinux/include/objsec.h |  11 --
 security/smack/smack_lsm.c        |   3 +-
 23 files changed, 429 insertions(+), 515 deletions(-)

(limited to 'include')

diff --git a/arch/x86/ia32/ia32_aout.c b/arch/x86/ia32/ia32_aout.c
index 127ec3f07214..2a4d073d2cf1 100644
--- a/arch/x86/ia32/ia32_aout.c
+++ b/arch/x86/ia32/ia32_aout.c
@@ -327,7 +327,7 @@ static int load_aout_binary(struct linux_binprm *bprm, struct pt_regs *regs)
 	current->mm->cached_hole_size = 0;
 
 	current->mm->mmap = NULL;
-	compute_creds(bprm);
+	install_exec_creds(bprm);
 	current->flags &= ~PF_FORKNOEXEC;
 
 	if (N_MAGIC(ex) == OMAGIC) {
diff --git a/fs/binfmt_aout.c b/fs/binfmt_aout.c
index 204cfd1d7676..f1f3f4192a60 100644
--- a/fs/binfmt_aout.c
+++ b/fs/binfmt_aout.c
@@ -320,7 +320,7 @@ static int load_aout_binary(struct linux_binprm * bprm, struct pt_regs * regs)
 	current->mm->free_area_cache = current->mm->mmap_base;
 	current->mm->cached_hole_size = 0;
 
-	compute_creds(bprm);
+	install_exec_creds(bprm);
  	current->flags &= ~PF_FORKNOEXEC;
 #ifdef __sparc__
 	if (N_MAGIC(ex) == NMAGIC) {
diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c
index 9142ff5dc8e6..f458c1217c5e 100644
--- a/fs/binfmt_elf.c
+++ b/fs/binfmt_elf.c
@@ -956,7 +956,7 @@ static int load_elf_binary(struct linux_binprm *bprm, struct pt_regs *regs)
 	}
 #endif /* ARCH_HAS_SETUP_ADDITIONAL_PAGES */
 
-	compute_creds(bprm);
+	install_exec_creds(bprm);
 	current->flags &= ~PF_FORKNOEXEC;
 	retval = create_elf_tables(bprm, &loc->elf_ex,
 			  load_addr, interp_load_addr);
diff --git a/fs/binfmt_elf_fdpic.c b/fs/binfmt_elf_fdpic.c
index 45dabd59936f..aa5b43205e37 100644
--- a/fs/binfmt_elf_fdpic.c
+++ b/fs/binfmt_elf_fdpic.c
@@ -404,7 +404,7 @@ static int load_elf_fdpic_binary(struct linux_binprm *bprm,
 	current->mm->start_stack = current->mm->start_brk + stack_size;
 #endif
 
-	compute_creds(bprm);
+	install_exec_creds(bprm);
 	current->flags &= ~PF_FORKNOEXEC;
 	if (create_elf_fdpic_tables(bprm, current->mm,
 				    &exec_params, &interp_params) < 0)
diff --git a/fs/binfmt_flat.c b/fs/binfmt_flat.c
index ccb781a6a804..7bbd5c6b3725 100644
--- a/fs/binfmt_flat.c
+++ b/fs/binfmt_flat.c
@@ -880,7 +880,7 @@ static int load_flat_binary(struct linux_binprm * bprm, struct pt_regs * regs)
 					(libinfo.lib_list[j].loaded)?
 						libinfo.lib_list[j].start_data:UNLOADED_LIB;
 
-	compute_creds(bprm);
+	install_exec_creds(bprm);
  	current->flags &= ~PF_FORKNOEXEC;
 
 	set_binfmt(&flat_format);
diff --git a/fs/binfmt_som.c b/fs/binfmt_som.c
index 74e587a52796..08644a61616e 100644
--- a/fs/binfmt_som.c
+++ b/fs/binfmt_som.c
@@ -255,7 +255,7 @@ load_som_binary(struct linux_binprm * bprm, struct pt_regs * regs)
 	kfree(hpuxhdr);
 
 	set_binfmt(&som_format);
-	compute_creds(bprm);
+	install_exec_creds(bprm);
 	setup_arg_pages(bprm, STACK_TOP, EXSTACK_DEFAULT);
 
 	create_som_tables(bprm);
diff --git a/fs/compat.c b/fs/compat.c
index e5f49f538502..d1ece79b6411 100644
--- a/fs/compat.c
+++ b/fs/compat.c
@@ -1393,10 +1393,20 @@ int compat_do_execve(char * filename,
 	if (!bprm)
 		goto out_ret;
 
+	retval = mutex_lock_interruptible(&current->cred_exec_mutex);
+	if (retval < 0)
+		goto out_free;
+
+	retval = -ENOMEM;
+	bprm->cred = prepare_exec_creds();
+	if (!bprm->cred)
+		goto out_unlock;
+	check_unsafe_exec(bprm);
+
 	file = open_exec(filename);
 	retval = PTR_ERR(file);
 	if (IS_ERR(file))
-		goto out_kfree;
+		goto out_unlock;
 
 	sched_exec();
 
@@ -1410,14 +1420,10 @@ int compat_do_execve(char * filename,
 
 	bprm->argc = compat_count(argv, MAX_ARG_STRINGS);
 	if ((retval = bprm->argc) < 0)
-		goto out_mm;
+		goto out;
 
 	bprm->envc = compat_count(envp, MAX_ARG_STRINGS);
 	if ((retval = bprm->envc) < 0)
-		goto out_mm;
-
-	retval = security_bprm_alloc(bprm);
-	if (retval)
 		goto out;
 
 	retval = prepare_binprm(bprm);
@@ -1438,19 +1444,16 @@ int compat_do_execve(char * filename,
 		goto out;
 
 	retval = search_binary_handler(bprm, regs);
-	if (retval >= 0) {
-		/* execve success */
-		security_bprm_free(bprm);
-		acct_update_integrals(current);
-		free_bprm(bprm);
-		return retval;
-	}
+	if (retval < 0)
+		goto out;
 
-out:
-	if (bprm->security)
-		security_bprm_free(bprm);
+	/* execve succeeded */
+	mutex_unlock(&current->cred_exec_mutex);
+	acct_update_integrals(current);
+	free_bprm(bprm);
+	return retval;
 
-out_mm:
+out:
 	if (bprm->mm)
 		mmput(bprm->mm);
 
@@ -1460,7 +1463,10 @@ out_file:
 		fput(bprm->file);
 	}
 
-out_kfree:
+out_unlock:
+	mutex_unlock(&current->cred_exec_mutex);
+
+out_free:
 	free_bprm(bprm);
 
 out_ret:
diff --git a/fs/exec.c b/fs/exec.c
index 9bd3559ddece..32f13e299417 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -55,6 +55,7 @@
 #include <asm/uaccess.h>
 #include <asm/mmu_context.h>
 #include <asm/tlb.h>
+#include "internal.h"
 
 #ifdef __alpha__
 /* for /sbin/loader handling in search_binary_handler() */
@@ -1007,15 +1008,17 @@ int flush_old_exec(struct linux_binprm * bprm)
 	 */
 	current->mm->task_size = TASK_SIZE;
 
-	if (bprm->e_uid != current_euid() ||
-	    bprm->e_gid != current_egid()) {
-		set_dumpable(current->mm, suid_dumpable);
+	/* install the new credentials */
+	if (bprm->cred->uid != current_euid() ||
+	    bprm->cred->gid != current_egid()) {
 		current->pdeath_signal = 0;
 	} else if (file_permission(bprm->file, MAY_READ) ||
-			(bprm->interp_flags & BINPRM_FLAGS_ENFORCE_NONDUMP)) {
+		   bprm->interp_flags & BINPRM_FLAGS_ENFORCE_NONDUMP) {
 		set_dumpable(current->mm, suid_dumpable);
 	}
 
+	current->personality &= ~bprm->per_clear;
+
 	/* An exec changes our domain. We are no longer part of the thread
 	   group */
 
@@ -1032,13 +1035,50 @@ out:
 
 EXPORT_SYMBOL(flush_old_exec);
 
+/*
+ * install the new credentials for this executable
+ */
+void install_exec_creds(struct linux_binprm *bprm)
+{
+	security_bprm_committing_creds(bprm);
+
+	commit_creds(bprm->cred);
+	bprm->cred = NULL;
+
+	/* cred_exec_mutex must be held at least to this point to prevent
+	 * ptrace_attach() from altering our determination of the task's
+	 * credentials; any time after this it may be unlocked */
+
+	security_bprm_committed_creds(bprm);
+}
+EXPORT_SYMBOL(install_exec_creds);
+
+/*
+ * determine how safe it is to execute the proposed program
+ * - the caller must hold current->cred_exec_mutex to protect against
+ *   PTRACE_ATTACH
+ */
+void check_unsafe_exec(struct linux_binprm *bprm)
+{
+	struct task_struct *p = current;
+
+	bprm->unsafe = tracehook_unsafe_exec(p);
+
+	if (atomic_read(&p->fs->count) > 1 ||
+	    atomic_read(&p->files->count) > 1 ||
+	    atomic_read(&p->sighand->count) > 1)
+		bprm->unsafe |= LSM_UNSAFE_SHARE;
+}
+
 /* 
  * Fill the binprm structure from the inode. 
  * Check permissions, then read the first 128 (BINPRM_BUF_SIZE) bytes
+ *
+ * This may be called multiple times for binary chains (scripts for example).
  */
 int prepare_binprm(struct linux_binprm *bprm)
 {
-	int mode;
+	umode_t mode;
 	struct inode * inode = bprm->file->f_path.dentry->d_inode;
 	int retval;
 
@@ -1046,14 +1086,15 @@ int prepare_binprm(struct linux_binprm *bprm)
 	if (bprm->file->f_op == NULL)
 		return -EACCES;
 
-	bprm->e_uid = current_euid();
-	bprm->e_gid = current_egid();
+	/* clear any previous set[ug]id data from a previous binary */
+	bprm->cred->euid = current_euid();
+	bprm->cred->egid = current_egid();
 
-	if(!(bprm->file->f_path.mnt->mnt_flags & MNT_NOSUID)) {
+	if (!(bprm->file->f_path.mnt->mnt_flags & MNT_NOSUID)) {
 		/* Set-uid? */
 		if (mode & S_ISUID) {
-			current->personality &= ~PER_CLEAR_ON_SETID;
-			bprm->e_uid = inode->i_uid;
+			bprm->per_clear |= PER_CLEAR_ON_SETID;
+			bprm->cred->euid = inode->i_uid;
 		}
 
 		/* Set-gid? */
@@ -1063,50 +1104,23 @@ int prepare_binprm(struct linux_binprm *bprm)
 		 * executable.
 		 */
 		if ((mode & (S_ISGID | S_IXGRP)) == (S_ISGID | S_IXGRP)) {
-			current->personality &= ~PER_CLEAR_ON_SETID;
-			bprm->e_gid = inode->i_gid;
+			bprm->per_clear |= PER_CLEAR_ON_SETID;
+			bprm->cred->egid = inode->i_gid;
 		}
 	}
 
 	/* fill in binprm security blob */
-	retval = security_bprm_set(bprm);
+	retval = security_bprm_set_creds(bprm);
 	if (retval)
 		return retval;
+	bprm->cred_prepared = 1;
 
-	memset(bprm->buf,0,BINPRM_BUF_SIZE);
-	return kernel_read(bprm->file,0,bprm->buf,BINPRM_BUF_SIZE);
+	memset(bprm->buf, 0, BINPRM_BUF_SIZE);
+	return kernel_read(bprm->file, 0, bprm->buf, BINPRM_BUF_SIZE);
 }
 
 EXPORT_SYMBOL(prepare_binprm);
 
-static int unsafe_exec(struct task_struct *p)
-{
-	int unsafe = tracehook_unsafe_exec(p);
-
-	if (atomic_read(&p->fs->count) > 1 ||
-	    atomic_read(&p->files->count) > 1 ||
-	    atomic_read(&p->sighand->count) > 1)
-		unsafe |= LSM_UNSAFE_SHARE;
-
-	return unsafe;
-}
-
-void compute_creds(struct linux_binprm *bprm)
-{
-	int unsafe;
-
-	if (bprm->e_uid != current_uid())
-		current->pdeath_signal = 0;
-	exec_keys(current);
-
-	task_lock(current);
-	unsafe = unsafe_exec(current);
-	security_bprm_apply_creds(bprm, unsafe);
-	task_unlock(current);
-	security_bprm_post_apply_creds(bprm);
-}
-EXPORT_SYMBOL(compute_creds);
-
 /*
  * Arguments are '\0' separated strings found at the location bprm->p
  * points to; chop off the first by relocating brpm->p to right after
@@ -1259,6 +1273,8 @@ EXPORT_SYMBOL(search_binary_handler);
 void free_bprm(struct linux_binprm *bprm)
 {
 	free_arg_pages(bprm);
+	if (bprm->cred)
+		abort_creds(bprm->cred);
 	kfree(bprm);
 }
 
@@ -1284,10 +1300,20 @@ int do_execve(char * filename,
 	if (!bprm)
 		goto out_files;
 
+	retval = mutex_lock_interruptible(&current->cred_exec_mutex);
+	if (retval < 0)
+		goto out_free;
+
+	retval = -ENOMEM;
+	bprm->cred = prepare_exec_creds();
+	if (!bprm->cred)
+		goto out_unlock;
+	check_unsafe_exec(bprm);
+
 	file = open_exec(filename);
 	retval = PTR_ERR(file);
 	if (IS_ERR(file))
-		goto out_kfree;
+		goto out_unlock;
 
 	sched_exec();
 
@@ -1301,14 +1327,10 @@ int do_execve(char * filename,
 
 	bprm->argc = count(argv, MAX_ARG_STRINGS);
 	if ((retval = bprm->argc) < 0)
-		goto out_mm;
+		goto out;
 
 	bprm->envc = count(envp, MAX_ARG_STRINGS);
 	if ((retval = bprm->envc) < 0)
-		goto out_mm;
-
-	retval = security_bprm_alloc(bprm);
-	if (retval)
 		goto out;
 
 	retval = prepare_binprm(bprm);
@@ -1330,21 +1352,18 @@ int do_execve(char * filename,
 
 	current->flags &= ~PF_KTHREAD;
 	retval = search_binary_handler(bprm,regs);
-	if (retval >= 0) {
-		/* execve success */
-		security_bprm_free(bprm);
-		acct_update_integrals(current);
-		free_bprm(bprm);
-		if (displaced)
-			put_files_struct(displaced);
-		return retval;
-	}
+	if (retval < 0)
+		goto out;
 
-out:
-	if (bprm->security)
-		security_bprm_free(bprm);
+	/* execve succeeded */
+	mutex_unlock(&current->cred_exec_mutex);
+	acct_update_integrals(current);
+	free_bprm(bprm);
+	if (displaced)
+		put_files_struct(displaced);
+	return retval;
 
-out_mm:
+out:
 	if (bprm->mm)
 		mmput (bprm->mm);
 
@@ -1353,7 +1372,11 @@ out_file:
 		allow_write_access(bprm->file);
 		fput(bprm->file);
 	}
-out_kfree:
+
+out_unlock:
+	mutex_unlock(&current->cred_exec_mutex);
+
+out_free:
 	free_bprm(bprm);
 
 out_files:
diff --git a/fs/internal.h b/fs/internal.h
index 80aa9a023372..53af885f1732 100644
--- a/fs/internal.h
+++ b/fs/internal.h
@@ -10,6 +10,7 @@
  */
 
 struct super_block;
+struct linux_binprm;
 
 /*
  * block_dev.c
@@ -39,6 +40,11 @@ static inline int sb_is_blkdev_sb(struct super_block *sb)
  */
 extern void __init chrdev_init(void);
 
+/*
+ * exec.c
+ */
+extern void check_unsafe_exec(struct linux_binprm *);
+
 /*
  * namespace.c
  */
diff --git a/include/linux/audit.h b/include/linux/audit.h
index 0b2fcb698a63..e8ce2c4c7ac7 100644
--- a/include/linux/audit.h
+++ b/include/linux/audit.h
@@ -508,22 +508,6 @@ static inline int audit_mq_getsetattr(mqd_t mqdes, struct mq_attr *mqstat)
 	return 0;
 }
 
-/*
- * ieieeeeee, an audit function without a return code!
- *
- * This function might fail!  I decided that it didn't matter.  We are too late
- * to fail the syscall and the information isn't REQUIRED for any purpose.  It's
- * just nice to have.  We should be able to look at past audit logs to figure
- * out this process's current cap set along with the fcaps from the PATH record
- * and use that to come up with the final set.  Yeah, its ugly, but all the info
- * is still in the audit log.  So I'm not going to bother mentioning we failed
- * if we couldn't allocate memory.
- *
- * If someone changes their mind they could create the aux record earlier and
- * then search here and use that earlier allocation.  But I don't wanna.
- *
- * -Eric
- */
 static inline int audit_log_bprm_fcaps(struct linux_binprm *bprm,
 				       const struct cred *new,
 				       const struct cred *old)
diff --git a/include/linux/binfmts.h b/include/linux/binfmts.h
index 7394b5b349ff..6cbfbe297180 100644
--- a/include/linux/binfmts.h
+++ b/include/linux/binfmts.h
@@ -35,16 +35,20 @@ struct linux_binprm{
 	struct mm_struct *mm;
 	unsigned long p; /* current top of mem */
 	unsigned int sh_bang:1,
-		     misc_bang:1;
+		misc_bang:1,
+		cred_prepared:1,/* true if creds already prepared (multiple
+				 * preps happen for interpreters) */
+		cap_effective:1;/* true if has elevated effective capabilities,
+				 * false if not; except for init which inherits
+				 * its parent's caps anyway */
 #ifdef __alpha__
 	unsigned int taso:1;
 #endif
 	unsigned int recursion_depth;
 	struct file * file;
-	int e_uid, e_gid;
-	kernel_cap_t cap_post_exec_permitted;
-	bool cap_effective;
-	void *security;
+	struct cred *cred;	/* new credentials */
+	int unsafe;		/* how unsafe this exec is (mask of LSM_UNSAFE_*) */
+	unsigned int per_clear;	/* bits to clear in current->personality */
 	int argc, envc;
 	char * filename;	/* Name of binary as seen by procps */
 	char * interp;		/* Name of the binary really executed. Most
@@ -101,7 +105,7 @@ extern int setup_arg_pages(struct linux_binprm * bprm,
 			   int executable_stack);
 extern int bprm_mm_init(struct linux_binprm *bprm);
 extern int copy_strings_kernel(int argc,char ** argv,struct linux_binprm *bprm);
-extern void compute_creds(struct linux_binprm *binprm);
+extern void install_exec_creds(struct linux_binprm *bprm);
 extern int do_coredump(long signr, int exit_code, struct pt_regs * regs);
 extern int set_binfmt(struct linux_binfmt *new);
 extern void free_bprm(struct linux_binprm *);
diff --git a/include/linux/cred.h b/include/linux/cred.h
index eaf6fa695a04..8edb4d1d5427 100644
--- a/include/linux/cred.h
+++ b/include/linux/cred.h
@@ -84,8 +84,6 @@ struct thread_group_cred {
 	struct key	*process_keyring;	/* keyring private to this process */
 	struct rcu_head	rcu;			/* RCU deletion hook */
 };
-
-extern void release_tgcred(struct cred *cred);
 #endif
 
 /*
@@ -144,6 +142,7 @@ struct cred {
 extern void __put_cred(struct cred *);
 extern int copy_creds(struct task_struct *, unsigned long);
 extern struct cred *prepare_creds(void);
+extern struct cred *prepare_exec_creds(void);
 extern struct cred *prepare_usermodehelper_creds(void);
 extern int commit_creds(struct cred *);
 extern void abort_creds(struct cred *);
diff --git a/include/linux/key.h b/include/linux/key.h
index 69ecf0934b02..21d32a142c00 100644
--- a/include/linux/key.h
+++ b/include/linux/key.h
@@ -278,7 +278,6 @@ extern ctl_table key_sysctls[];
  * the userspace interface
  */
 extern int install_thread_keyring_to_cred(struct cred *cred);
-extern int exec_keys(struct task_struct *tsk);
 extern void key_fsuid_changed(struct task_struct *tsk);
 extern void key_fsgid_changed(struct task_struct *tsk);
 extern void key_init(void);
@@ -294,7 +293,6 @@ extern void key_init(void);
 #define make_key_ref(k, p)		NULL
 #define key_ref_to_ptr(k)		NULL
 #define is_key_possessed(k)		0
-#define exec_keys(t)			do { } while(0)
 #define key_fsuid_changed(t)		do { } while(0)
 #define key_fsgid_changed(t)		do { } while(0)
 #define key_init()			do { } while(0)
diff --git a/include/linux/security.h b/include/linux/security.h
index 68be11251447..56a0eed65673 100644
--- a/include/linux/security.h
+++ b/include/linux/security.h
@@ -57,8 +57,7 @@ extern int cap_capset(struct cred *new, const struct cred *old,
 		      const kernel_cap_t *effective,
 		      const kernel_cap_t *inheritable,
 		      const kernel_cap_t *permitted);
-extern int cap_bprm_set_security(struct linux_binprm *bprm);
-extern int cap_bprm_apply_creds(struct linux_binprm *bprm, int unsafe);
+extern int cap_bprm_set_creds(struct linux_binprm *bprm);
 extern int cap_bprm_secureexec(struct linux_binprm *bprm);
 extern int cap_inode_setxattr(struct dentry *dentry, const char *name,
 			      const void *value, size_t size, int flags);
@@ -110,7 +109,7 @@ extern unsigned long mmap_min_addr;
 struct sched_param;
 struct request_sock;
 
-/* bprm_apply_creds unsafe reasons */
+/* bprm->unsafe reasons */
 #define LSM_UNSAFE_SHARE	1
 #define LSM_UNSAFE_PTRACE	2
 #define LSM_UNSAFE_PTRACE_CAP	4
@@ -154,36 +153,7 @@ static inline void security_free_mnt_opts(struct security_mnt_opts *opts)
  *
  * Security hooks for program execution operations.
  *
- * @bprm_alloc_security:
- *	Allocate and attach a security structure to the @bprm->security field.
- *	The security field is initialized to NULL when the bprm structure is
- *	allocated.
- *	@bprm contains the linux_binprm structure to be modified.
- *	Return 0 if operation was successful.
- * @bprm_free_security:
- *	@bprm contains the linux_binprm structure to be modified.
- *	Deallocate and clear the @bprm->security field.
- * @bprm_apply_creds:
- *	Compute and set the security attributes of a process being transformed
- *	by an execve operation based on the old attributes (current->security)
- *	and the information saved in @bprm->security by the set_security hook.
- *	Since this function may return an error, in which case the process will
- *      be killed.  However, it can leave the security attributes of the
- *	process unchanged if an access failure occurs at this point.
- *	bprm_apply_creds is called under task_lock.  @unsafe indicates various
- *	reasons why it may be unsafe to change security state.
- *	@bprm contains the linux_binprm structure.
- * @bprm_post_apply_creds:
- *	Runs after bprm_apply_creds with the task_lock dropped, so that
- *	functions which cannot be called safely under the task_lock can
- *	be used.  This hook is a good place to perform state changes on
- *	the process such as closing open file descriptors to which access
- *	is no longer granted if the attributes were changed.
- *	Note that a security module might need to save state between
- *	bprm_apply_creds and bprm_post_apply_creds to store the decision
- *	on whether the process may proceed.
- *	@bprm contains the linux_binprm structure.
- * @bprm_set_security:
+ * @bprm_set_creds:
  *	Save security information in the bprm->security field, typically based
  *	on information about the bprm->file, for later use by the apply_creds
  *	hook.  This hook may also optionally check permissions (e.g. for
@@ -196,15 +166,30 @@ static inline void security_free_mnt_opts(struct security_mnt_opts *opts)
  *	@bprm contains the linux_binprm structure.
  *	Return 0 if the hook is successful and permission is granted.
  * @bprm_check_security:
- *	This hook mediates the point when a search for a binary handler	will
- *	begin.  It allows a check the @bprm->security value which is set in
- *	the preceding set_security call.  The primary difference from
- *	set_security is that the argv list and envp list are reliably
- *	available in @bprm.  This hook may be called multiple times
- *	during a single execve; and in each pass set_security is called
- *	first.
+ *	This hook mediates the point when a search for a binary handler will
+ *	begin.  It allows a check the @bprm->security value which is set in the
+ *	preceding set_creds call.  The primary difference from set_creds is
+ *	that the argv list and envp list are reliably available in @bprm.  This
+ *	hook may be called multiple times during a single execve; and in each
+ *	pass set_creds is called first.
  *	@bprm contains the linux_binprm structure.
  *	Return 0 if the hook is successful and permission is granted.
+ * @bprm_committing_creds:
+ *	Prepare to install the new security attributes of a process being
+ *	transformed by an execve operation, based on the old credentials
+ *	pointed to by @current->cred and the information set in @bprm->cred by
+ *	the bprm_set_creds hook.  @bprm points to the linux_binprm structure.
+ *	This hook is a good place to perform state changes on the process such
+ *	as closing open file descriptors to which access will no longer be
+ *	granted when the attributes are changed.  This is called immediately
+ *	before commit_creds().
+ * @bprm_committed_creds:
+ *	Tidy up after the installation of the new security attributes of a
+ *	process being transformed by an execve operation.  The new credentials
+ *	have, by this point, been set to @current->cred.  @bprm points to the
+ *	linux_binprm structure.  This hook is a good place to perform state
+ *	changes on the process such as clearing out non-inheritable signal
+ *	state.  This is called immediately after commit_creds().
  * @bprm_secureexec:
  *	Return a boolean value (0 or 1) indicating whether a "secure exec"
  *	is required.  The flag is passed in the auxiliary table
@@ -1301,13 +1286,11 @@ struct security_operations {
 	int (*settime) (struct timespec *ts, struct timezone *tz);
 	int (*vm_enough_memory) (struct mm_struct *mm, long pages);
 
-	int (*bprm_alloc_security) (struct linux_binprm *bprm);
-	void (*bprm_free_security) (struct linux_binprm *bprm);
-	int (*bprm_apply_creds) (struct linux_binprm *bprm, int unsafe);
-	void (*bprm_post_apply_creds) (struct linux_binprm *bprm);
-	int (*bprm_set_security) (struct linux_binprm *bprm);
+	int (*bprm_set_creds) (struct linux_binprm *bprm);
 	int (*bprm_check_security) (struct linux_binprm *bprm);
 	int (*bprm_secureexec) (struct linux_binprm *bprm);
+	void (*bprm_committing_creds) (struct linux_binprm *bprm);
+	void (*bprm_committed_creds) (struct linux_binprm *bprm);
 
 	int (*sb_alloc_security) (struct super_block *sb);
 	void (*sb_free_security) (struct super_block *sb);
@@ -1569,12 +1552,10 @@ int security_settime(struct timespec *ts, struct timezone *tz);
 int security_vm_enough_memory(long pages);
 int security_vm_enough_memory_mm(struct mm_struct *mm, long pages);
 int security_vm_enough_memory_kern(long pages);
-int security_bprm_alloc(struct linux_binprm *bprm);
-void security_bprm_free(struct linux_binprm *bprm);
-int security_bprm_apply_creds(struct linux_binprm *bprm, int unsafe);
-void security_bprm_post_apply_creds(struct linux_binprm *bprm);
-int security_bprm_set(struct linux_binprm *bprm);
+int security_bprm_set_creds(struct linux_binprm *bprm);
 int security_bprm_check(struct linux_binprm *bprm);
+void security_bprm_committing_creds(struct linux_binprm *bprm);
+void security_bprm_committed_creds(struct linux_binprm *bprm);
 int security_bprm_secureexec(struct linux_binprm *bprm);
 int security_sb_alloc(struct super_block *sb);
 void security_sb_free(struct super_block *sb);
@@ -1812,32 +1793,22 @@ static inline int security_vm_enough_memory_mm(struct mm_struct *mm, long pages)
 	return cap_vm_enough_memory(mm, pages);
 }
 
-static inline int security_bprm_alloc(struct linux_binprm *bprm)
-{
-	return 0;
-}
-
-static inline void security_bprm_free(struct linux_binprm *bprm)
-{ }
-
-static inline int security_bprm_apply_creds(struct linux_binprm *bprm, int unsafe)
+static inline int security_bprm_set_creds(struct linux_binprm *bprm)
 {
-	return cap_bprm_apply_creds(bprm, unsafe);
+	return cap_bprm_set_creds(bprm);
 }
 
-static inline void security_bprm_post_apply_creds(struct linux_binprm *bprm)
+static inline int security_bprm_check(struct linux_binprm *bprm)
 {
-	return;
+	return 0;
 }
 
-static inline int security_bprm_set(struct linux_binprm *bprm)
+static inline void security_bprm_committing_creds(struct linux_binprm *bprm)
 {
-	return cap_bprm_set_security(bprm);
 }
 
-static inline int security_bprm_check(struct linux_binprm *bprm)
+static inline void security_bprm_committed_creds(struct linux_binprm *bprm)
 {
-	return 0;
 }
 
 static inline int security_bprm_secureexec(struct linux_binprm *bprm)
diff --git a/kernel/cred.c b/kernel/cred.c
index cb6b5eda978d..e6fcdd67b2ec 100644
--- a/kernel/cred.c
+++ b/kernel/cred.c
@@ -68,7 +68,7 @@ static void release_tgcred_rcu(struct rcu_head *rcu)
 /*
  * Release a set of thread group credentials.
  */
-void release_tgcred(struct cred *cred)
+static void release_tgcred(struct cred *cred)
 {
 #ifdef CONFIG_KEYS
 	struct thread_group_cred *tgcred = cred->tgcred;
@@ -163,6 +163,50 @@ error:
 }
 EXPORT_SYMBOL(prepare_creds);
 
+/*
+ * Prepare credentials for current to perform an execve()
+ * - The caller must hold current->cred_exec_mutex
+ */
+struct cred *prepare_exec_creds(void)
+{
+	struct thread_group_cred *tgcred = NULL;
+	struct cred *new;
+
+#ifdef CONFIG_KEYS
+	tgcred = kmalloc(sizeof(*tgcred), GFP_KERNEL);
+	if (!tgcred)
+		return NULL;
+#endif
+
+	new = prepare_creds();
+	if (!new) {
+		kfree(tgcred);
+		return new;
+	}
+
+#ifdef CONFIG_KEYS
+	/* newly exec'd tasks don't get a thread keyring */
+	key_put(new->thread_keyring);
+	new->thread_keyring = NULL;
+
+	/* create a new per-thread-group creds for all this set of threads to
+	 * share */
+	memcpy(tgcred, new->tgcred, sizeof(struct thread_group_cred));
+
+	atomic_set(&tgcred->usage, 1);
+	spin_lock_init(&tgcred->lock);
+
+	/* inherit the session keyring; new process keyring */
+	key_get(tgcred->session_keyring);
+	tgcred->process_keyring = NULL;
+
+	release_tgcred(new);
+	new->tgcred = tgcred;
+#endif
+
+	return new;
+}
+
 /*
  * prepare new credentials for the usermode helper dispatcher
  */
diff --git a/security/capability.c b/security/capability.c
index efeb6d9e0e6a..185804f99ad1 100644
--- a/security/capability.c
+++ b/security/capability.c
@@ -32,24 +32,19 @@ static int cap_quota_on(struct dentry *dentry)
 	return 0;
 }
 
-static int cap_bprm_alloc_security(struct linux_binprm *bprm)
+static int cap_bprm_check_security (struct linux_binprm *bprm)
 {
 	return 0;
 }
 
-static void cap_bprm_free_security(struct linux_binprm *bprm)
+static void cap_bprm_committing_creds(struct linux_binprm *bprm)
 {
 }
 
-static void cap_bprm_post_apply_creds(struct linux_binprm *bprm)
+static void cap_bprm_committed_creds(struct linux_binprm *bprm)
 {
 }
 
-static int cap_bprm_check_security(struct linux_binprm *bprm)
-{
-	return 0;
-}
-
 static int cap_sb_alloc_security(struct super_block *sb)
 {
 	return 0;
@@ -827,11 +822,9 @@ void security_fixup_ops(struct security_operations *ops)
 	set_to_cap_if_null(ops, syslog);
 	set_to_cap_if_null(ops, settime);
 	set_to_cap_if_null(ops, vm_enough_memory);
-	set_to_cap_if_null(ops, bprm_alloc_security);
-	set_to_cap_if_null(ops, bprm_free_security);
-	set_to_cap_if_null(ops, bprm_apply_creds);
-	set_to_cap_if_null(ops, bprm_post_apply_creds);
-	set_to_cap_if_null(ops, bprm_set_security);
+	set_to_cap_if_null(ops, bprm_set_creds);
+	set_to_cap_if_null(ops, bprm_committing_creds);
+	set_to_cap_if_null(ops, bprm_committed_creds);
 	set_to_cap_if_null(ops, bprm_check_security);
 	set_to_cap_if_null(ops, bprm_secureexec);
 	set_to_cap_if_null(ops, sb_alloc_security);
diff --git a/security/commoncap.c b/security/commoncap.c
index b5419273f92d..51dfa11e8e56 100644
--- a/security/commoncap.c
+++ b/security/commoncap.c
@@ -167,7 +167,7 @@ int cap_capset(struct cred *new,
 
 static inline void bprm_clear_caps(struct linux_binprm *bprm)
 {
-	cap_clear(bprm->cap_post_exec_permitted);
+	cap_clear(bprm->cred->cap_permitted);
 	bprm->cap_effective = false;
 }
 
@@ -198,15 +198,15 @@ int cap_inode_killpriv(struct dentry *dentry)
 }
 
 static inline int bprm_caps_from_vfs_caps(struct cpu_vfs_cap_data *caps,
-					  struct linux_binprm *bprm)
+					  struct linux_binprm *bprm,
+					  bool *effective)
 {
+	struct cred *new = bprm->cred;
 	unsigned i;
 	int ret = 0;
 
 	if (caps->magic_etc & VFS_CAP_FLAGS_EFFECTIVE)
-		bprm->cap_effective = true;
-	else
-		bprm->cap_effective = false;
+		*effective = true;
 
 	CAP_FOR_EACH_U32(i) {
 		__u32 permitted = caps->permitted.cap[i];
@@ -215,16 +215,13 @@ static inline int bprm_caps_from_vfs_caps(struct cpu_vfs_cap_data *caps,
 		/*
 		 * pP' = (X & fP) | (pI & fI)
 		 */
-		bprm->cap_post_exec_permitted.cap[i] =
-			(current->cred->cap_bset.cap[i] & permitted) |
-			(current->cred->cap_inheritable.cap[i] & inheritable);
+		new->cap_permitted.cap[i] =
+			(new->cap_bset.cap[i] & permitted) |
+			(new->cap_inheritable.cap[i] & inheritable);
 
-		if (permitted & ~bprm->cap_post_exec_permitted.cap[i]) {
-			/*
-			 * insufficient to execute correctly
-			 */
+		if (permitted & ~new->cap_permitted.cap[i])
+			/* insufficient to execute correctly */
 			ret = -EPERM;
-		}
 	}
 
 	/*
@@ -232,7 +229,7 @@ static inline int bprm_caps_from_vfs_caps(struct cpu_vfs_cap_data *caps,
 	 * do not have enough capabilities, we return an error if they are
 	 * missing some "forced" (aka file-permitted) capabilities.
 	 */
-	return bprm->cap_effective ? ret : 0;
+	return *effective ? ret : 0;
 }
 
 int get_vfs_caps_from_disk(const struct dentry *dentry, struct cpu_vfs_cap_data *cpu_caps)
@@ -250,10 +247,9 @@ int get_vfs_caps_from_disk(const struct dentry *dentry, struct cpu_vfs_cap_data
 
 	size = inode->i_op->getxattr((struct dentry *)dentry, XATTR_NAME_CAPS, &caps,
 				   XATTR_CAPS_SZ);
-	if (size == -ENODATA || size == -EOPNOTSUPP) {
+	if (size == -ENODATA || size == -EOPNOTSUPP)
 		/* no data, that's ok */
 		return -ENODATA;
-	}
 	if (size < 0)
 		return size;
 
@@ -262,7 +258,7 @@ int get_vfs_caps_from_disk(const struct dentry *dentry, struct cpu_vfs_cap_data
 
 	cpu_caps->magic_etc = magic_etc = le32_to_cpu(caps.magic_etc);
 
-	switch ((magic_etc & VFS_CAP_REVISION_MASK)) {
+	switch (magic_etc & VFS_CAP_REVISION_MASK) {
 	case VFS_CAP_REVISION_1:
 		if (size != XATTR_CAPS_SZ_1)
 			return -EINVAL;
@@ -283,11 +279,12 @@ int get_vfs_caps_from_disk(const struct dentry *dentry, struct cpu_vfs_cap_data
 		cpu_caps->permitted.cap[i] = le32_to_cpu(caps.data[i].permitted);
 		cpu_caps->inheritable.cap[i] = le32_to_cpu(caps.data[i].inheritable);
 	}
+
 	return 0;
 }
 
 /* Locate any VFS capabilities: */
-static int get_file_caps(struct linux_binprm *bprm)
+static int get_file_caps(struct linux_binprm *bprm, bool *effective)
 {
 	struct dentry *dentry;
 	int rc = 0;
@@ -313,7 +310,10 @@ static int get_file_caps(struct linux_binprm *bprm)
 		goto out;
 	}
 
-	rc = bprm_caps_from_vfs_caps(&vcaps, bprm);
+	rc = bprm_caps_from_vfs_caps(&vcaps, bprm, effective);
+	if (rc == -EINVAL)
+		printk(KERN_NOTICE "%s: cap_from_disk returned %d for %s\n",
+		       __func__, rc, bprm->filename);
 
 out:
 	dput(dentry);
@@ -334,18 +334,27 @@ int cap_inode_killpriv(struct dentry *dentry)
 	return 0;
 }
 
-static inline int get_file_caps(struct linux_binprm *bprm)
+static inline int get_file_caps(struct linux_binprm *bprm, bool *effective)
 {
 	bprm_clear_caps(bprm);
 	return 0;
 }
 #endif
 
-int cap_bprm_set_security (struct linux_binprm *bprm)
+/*
+ * set up the new credentials for an exec'd task
+ */
+int cap_bprm_set_creds(struct linux_binprm *bprm)
 {
+	const struct cred *old = current_cred();
+	struct cred *new = bprm->cred;
+	bool effective;
 	int ret;
 
-	ret = get_file_caps(bprm);
+	effective = false;
+	ret = get_file_caps(bprm, &effective);
+	if (ret < 0)
+		return ret;
 
 	if (!issecure(SECURE_NOROOT)) {
 		/*
@@ -353,63 +362,47 @@ int cap_bprm_set_security (struct linux_binprm *bprm)
 		 * executables under compatibility mode, we override the
 		 * capability sets for the file.
 		 *
-		 * If only the real uid is 0, we do not set the effective
-		 * bit.
+		 * If only the real uid is 0, we do not set the effective bit.
 		 */
-		if (bprm->e_uid == 0 || current_uid() == 0) {
+		if (new->euid == 0 || new->uid == 0) {
 			/* pP' = (cap_bset & ~0) | (pI & ~0) */
-			bprm->cap_post_exec_permitted = cap_combine(
-				current->cred->cap_bset,
-				current->cred->cap_inheritable);
-			bprm->cap_effective = (bprm->e_uid == 0);
-			ret = 0;
+			new->cap_permitted = cap_combine(old->cap_bset,
+							 old->cap_inheritable);
 		}
+		if (new->euid == 0)
+			effective = true;
 	}
 
-	return ret;
-}
-
-int cap_bprm_apply_creds (struct linux_binprm *bprm, int unsafe)
-{
-	const struct cred *old = current_cred();
-	struct cred *new;
-
-	new = prepare_creds();
-	if (!new)
-		return -ENOMEM;
-
-	if (bprm->e_uid != old->uid || bprm->e_gid != old->gid ||
-	    !cap_issubset(bprm->cap_post_exec_permitted,
-			  old->cap_permitted)) {
-		set_dumpable(current->mm, suid_dumpable);
-		current->pdeath_signal = 0;
-
-		if (unsafe & ~LSM_UNSAFE_PTRACE_CAP) {
-			if (!capable(CAP_SETUID)) {
-				bprm->e_uid = old->uid;
-				bprm->e_gid = old->gid;
-			}
-			if (cap_limit_ptraced_target()) {
-				bprm->cap_post_exec_permitted = cap_intersect(
-					bprm->cap_post_exec_permitted,
-					new->cap_permitted);
-			}
+	/* Don't let someone trace a set[ug]id/setpcap binary with the revised
+	 * credentials unless they have the appropriate permit
+	 */
+	if ((new->euid != old->uid ||
+	     new->egid != old->gid ||
+	     !cap_issubset(new->cap_permitted, old->cap_permitted)) &&
+	    bprm->unsafe & ~LSM_UNSAFE_PTRACE_CAP) {
+		/* downgrade; they get no more than they had, and maybe less */
+		if (!capable(CAP_SETUID)) {
+			new->euid = new->uid;
+			new->egid = new->gid;
 		}
+		if (cap_limit_ptraced_target())
+			new->cap_permitted = cap_intersect(new->cap_permitted,
+							   old->cap_permitted);
 	}
 
-	new->suid = new->euid = new->fsuid = bprm->e_uid;
-	new->sgid = new->egid = new->fsgid = bprm->e_gid;
+	new->suid = new->fsuid = new->euid;
+	new->sgid = new->fsgid = new->egid;
 
-	/* For init, we want to retain the capabilities set
-	 * in the init_task struct. Thus we skip the usual
-	 * capability rules */
+	/* For init, we want to retain the capabilities set in the initial
+	 * task.  Thus we skip the usual capability rules
+	 */
 	if (!is_global_init(current)) {
-		new->cap_permitted = bprm->cap_post_exec_permitted;
-		if (bprm->cap_effective)
-			new->cap_effective = bprm->cap_post_exec_permitted;
+		if (effective)
+			new->cap_effective = new->cap_permitted;
 		else
 			cap_clear(new->cap_effective);
 	}
+	bprm->cap_effective = effective;
 
 	/*
 	 * Audit candidate if current->cap_effective is set
@@ -425,23 +418,31 @@ int cap_bprm_apply_creds (struct linux_binprm *bprm, int unsafe)
 	 */
 	if (!cap_isclear(new->cap_effective)) {
 		if (!cap_issubset(CAP_FULL_SET, new->cap_effective) ||
-		    bprm->e_uid != 0 || new->uid != 0 ||
-		    issecure(SECURE_NOROOT))
-			audit_log_bprm_fcaps(bprm, new, old);
+		    new->euid != 0 || new->uid != 0 ||
+		    issecure(SECURE_NOROOT)) {
+			ret = audit_log_bprm_fcaps(bprm, new, old);
+			if (ret < 0)
+				return ret;
+		}
 	}
 
 	new->securebits &= ~issecure_mask(SECURE_KEEP_CAPS);
-	return commit_creds(new);
+	return 0;
 }
 
-int cap_bprm_secureexec (struct linux_binprm *bprm)
+/*
+ * determine whether a secure execution is required
+ * - the creds have been committed at this point, and are no longer available
+ *   through bprm
+ */
+int cap_bprm_secureexec(struct linux_binprm *bprm)
 {
 	const struct cred *cred = current_cred();
 
 	if (cred->uid != 0) {
 		if (bprm->cap_effective)
 			return 1;
-		if (!cap_isclear(bprm->cap_post_exec_permitted))
+		if (!cap_isclear(cred->cap_permitted))
 			return 1;
 	}
 
@@ -477,7 +478,7 @@ int cap_inode_removexattr(struct dentry *dentry, const char *name)
 }
 
 /* moved from kernel/sys.c. */
-/* 
+/*
  * cap_emulate_setxuid() fixes the effective / permitted capabilities of
  * a process after a call to setuid, setreuid, or setresuid.
  *
@@ -491,10 +492,10 @@ int cap_inode_removexattr(struct dentry *dentry, const char *name)
  *  3) When set*uiding _from_ euid != 0 _to_ euid == 0, the effective
  *  capabilities are set to the permitted capabilities.
  *
- *  fsuid is handled elsewhere. fsuid == 0 and {r,e,s}uid!= 0 should 
+ *  fsuid is handled elsewhere. fsuid == 0 and {r,e,s}uid!= 0 should
  *  never happen.
  *
- *  -astor 
+ *  -astor
  *
  * cevans - New behaviour, Oct '99
  * A process may, via prctl(), elect to keep its capabilities when it
@@ -751,4 +752,3 @@ int cap_vm_enough_memory(struct mm_struct *mm, long pages)
 		cap_sys_admin = 1;
 	return __vm_enough_memory(mm, pages, cap_sys_admin);
 }
-
diff --git a/security/keys/process_keys.c b/security/keys/process_keys.c
index df329f684a65..2f5d89e92b85 100644
--- a/security/keys/process_keys.c
+++ b/security/keys/process_keys.c
@@ -274,48 +274,6 @@ static int install_session_keyring(struct key *keyring)
 	return commit_creds(new);
 }
 
-/*****************************************************************************/
-/*
- * deal with execve()
- */
-int exec_keys(struct task_struct *tsk)
-{
-	struct thread_group_cred *tgcred = NULL;
-	struct cred *new;
-
-#ifdef CONFIG_KEYS
-	tgcred = kmalloc(sizeof(*tgcred), GFP_KERNEL);
-	if (!tgcred)
-		return -ENOMEM;
-#endif
-
-	new = prepare_creds();
-	if (new < 0)
-		return -ENOMEM;
-
-	/* newly exec'd tasks don't get a thread keyring */
-	key_put(new->thread_keyring);
-	new->thread_keyring = NULL;
-
-	/* create a new per-thread-group creds for all this set of threads to
-	 * share */
-	memcpy(tgcred, new->tgcred, sizeof(struct thread_group_cred));
-
-	atomic_set(&tgcred->usage, 1);
-	spin_lock_init(&tgcred->lock);
-
-	/* inherit the session keyring; new process keyring */
-	key_get(tgcred->session_keyring);
-	tgcred->process_keyring = NULL;
-
-	release_tgcred(new);
-	new->tgcred = tgcred;
-
-	commit_creds(new);
-	return 0;
-
-} /* end exec_keys() */
-
 /*****************************************************************************/
 /*
  * the filesystem user ID changed
diff --git a/security/root_plug.c b/security/root_plug.c
index c3f68b5b372d..40fb4f15e27b 100644
--- a/security/root_plug.c
+++ b/security/root_plug.c
@@ -55,9 +55,9 @@ static int rootplug_bprm_check_security (struct linux_binprm *bprm)
 	struct usb_device *dev;
 
 	root_dbg("file %s, e_uid = %d, e_gid = %d\n",
-		 bprm->filename, bprm->e_uid, bprm->e_gid);
+		 bprm->filename, bprm->cred->euid, bprm->cred->egid);
 
-	if (bprm->e_gid == 0) {
+	if (bprm->cred->egid == 0) {
 		dev = usb_find_device(vendor_id, product_id);
 		if (!dev) {
 			root_dbg("e_gid = 0, and device not found, "
@@ -75,15 +75,12 @@ static struct security_operations rootplug_security_ops = {
 	.ptrace_may_access =		cap_ptrace_may_access,
 	.ptrace_traceme =		cap_ptrace_traceme,
 	.capget =			cap_capget,
-	.capset_check =			cap_capset_check,
-	.capset_set =			cap_capset_set,
+	.capset =			cap_capset,
 	.capable =			cap_capable,
 
-	.bprm_apply_creds =		cap_bprm_apply_creds,
-	.bprm_set_security =		cap_bprm_set_security,
+	.bprm_set_creds =		cap_bprm_set_creds,
 
-	.task_post_setuid =		cap_task_post_setuid,
-	.task_reparent_to_init =	cap_task_reparent_to_init,
+	.task_fix_setuid =		cap_task_fix_setuid,
 	.task_prctl =			cap_task_prctl,
 
 	.bprm_check_security =		rootplug_bprm_check_security,
diff --git a/security/security.c b/security/security.c
index a55d739c6864..dc5babb2d6d8 100644
--- a/security/security.c
+++ b/security/security.c
@@ -213,34 +213,24 @@ int security_vm_enough_memory_kern(long pages)
 	return security_ops->vm_enough_memory(current->mm, pages);
 }
 
-int security_bprm_alloc(struct linux_binprm *bprm)
+int security_bprm_set_creds(struct linux_binprm *bprm)
 {
-	return security_ops->bprm_alloc_security(bprm);
+	return security_ops->bprm_set_creds(bprm);
 }
 
-void security_bprm_free(struct linux_binprm *bprm)
-{
-	security_ops->bprm_free_security(bprm);
-}
-
-int security_bprm_apply_creds(struct linux_binprm *bprm, int unsafe)
-{
-	return security_ops->bprm_apply_creds(bprm, unsafe);
-}
-
-void security_bprm_post_apply_creds(struct linux_binprm *bprm)
+int security_bprm_check(struct linux_binprm *bprm)
 {
-	security_ops->bprm_post_apply_creds(bprm);
+	return security_ops->bprm_check_security(bprm);
 }
 
-int security_bprm_set(struct linux_binprm *bprm)
+void security_bprm_committing_creds(struct linux_binprm *bprm)
 {
-	return security_ops->bprm_set_security(bprm);
+	return security_ops->bprm_committing_creds(bprm);
 }
 
-int security_bprm_check(struct linux_binprm *bprm)
+void security_bprm_committed_creds(struct linux_binprm *bprm)
 {
-	return security_ops->bprm_check_security(bprm);
+	return security_ops->bprm_committed_creds(bprm);
 }
 
 int security_bprm_secureexec(struct linux_binprm *bprm)
diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c
index c71bba78872f..21a592184633 100644
--- a/security/selinux/hooks.c
+++ b/security/selinux/hooks.c
@@ -2029,59 +2029,45 @@ static int selinux_vm_enough_memory(struct mm_struct *mm, long pages)
 
 /* binprm security operations */
 
-static int selinux_bprm_alloc_security(struct linux_binprm *bprm)
+static int selinux_bprm_set_creds(struct linux_binprm *bprm)
 {
-	struct bprm_security_struct *bsec;
-
-	bsec = kzalloc(sizeof(struct bprm_security_struct), GFP_KERNEL);
-	if (!bsec)
-		return -ENOMEM;
-
-	bsec->sid = SECINITSID_UNLABELED;
-	bsec->set = 0;
-
-	bprm->security = bsec;
-	return 0;
-}
-
-static int selinux_bprm_set_security(struct linux_binprm *bprm)
-{
-	struct task_security_struct *tsec;
-	struct inode *inode = bprm->file->f_path.dentry->d_inode;
+	const struct task_security_struct *old_tsec;
+	struct task_security_struct *new_tsec;
 	struct inode_security_struct *isec;
-	struct bprm_security_struct *bsec;
-	u32 newsid;
 	struct avc_audit_data ad;
+	struct inode *inode = bprm->file->f_path.dentry->d_inode;
 	int rc;
 
-	rc = secondary_ops->bprm_set_security(bprm);
+	rc = secondary_ops->bprm_set_creds(bprm);
 	if (rc)
 		return rc;
 
-	bsec = bprm->security;
-
-	if (bsec->set)
+	/* SELinux context only depends on initial program or script and not
+	 * the script interpreter */
+	if (bprm->cred_prepared)
 		return 0;
 
-	tsec = current_security();
+	old_tsec = current_security();
+	new_tsec = bprm->cred->security;
 	isec = inode->i_security;
 
 	/* Default to the current task SID. */
-	bsec->sid = tsec->sid;
+	new_tsec->sid = old_tsec->sid;
+	new_tsec->osid = old_tsec->sid;
 
 	/* Reset fs, key, and sock SIDs on execve. */
-	tsec->create_sid = 0;
-	tsec->keycreate_sid = 0;
-	tsec->sockcreate_sid = 0;
+	new_tsec->create_sid = 0;
+	new_tsec->keycreate_sid = 0;
+	new_tsec->sockcreate_sid = 0;
 
-	if (tsec->exec_sid) {
-		newsid = tsec->exec_sid;
+	if (old_tsec->exec_sid) {
+		new_tsec->sid = old_tsec->exec_sid;
 		/* Reset exec SID on execve. */
-		tsec->exec_sid = 0;
+		new_tsec->exec_sid = 0;
 	} else {
 		/* Check for a default transition on this program. */
-		rc = security_transition_sid(tsec->sid, isec->sid,
-					     SECCLASS_PROCESS, &newsid);
+		rc = security_transition_sid(old_tsec->sid, isec->sid,
+					     SECCLASS_PROCESS, &new_tsec->sid);
 		if (rc)
 			return rc;
 	}
@@ -2090,33 +2076,63 @@ static int selinux_bprm_set_security(struct linux_binprm *bprm)
 	ad.u.fs.path = bprm->file->f_path;
 
 	if (bprm->file->f_path.mnt->mnt_flags & MNT_NOSUID)
-		newsid = tsec->sid;
+		new_tsec->sid = old_tsec->sid;
 
-	if (tsec->sid == newsid) {
-		rc = avc_has_perm(tsec->sid, isec->sid,
+	if (new_tsec->sid == old_tsec->sid) {
+		rc = avc_has_perm(old_tsec->sid, isec->sid,
 				  SECCLASS_FILE, FILE__EXECUTE_NO_TRANS, &ad);
 		if (rc)
 			return rc;
 	} else {
 		/* Check permissions for the transition. */
-		rc = avc_has_perm(tsec->sid, newsid,
+		rc = avc_has_perm(old_tsec->sid, new_tsec->sid,
 				  SECCLASS_PROCESS, PROCESS__TRANSITION, &ad);
 		if (rc)
 			return rc;
 
-		rc = avc_has_perm(newsid, isec->sid,
+		rc = avc_has_perm(new_tsec->sid, isec->sid,
 				  SECCLASS_FILE, FILE__ENTRYPOINT, &ad);
 		if (rc)
 			return rc;
 
-		/* Clear any possibly unsafe personality bits on exec: */
-		current->personality &= ~PER_CLEAR_ON_SETID;
+		/* Check for shared state */
+		if (bprm->unsafe & LSM_UNSAFE_SHARE) {
+			rc = avc_has_perm(old_tsec->sid, new_tsec->sid,
+					  SECCLASS_PROCESS, PROCESS__SHARE,
+					  NULL);
+			if (rc)
+				return -EPERM;
+		}
+
+		/* Make sure that anyone attempting to ptrace over a task that
+		 * changes its SID has the appropriate permit */
+		if (bprm->unsafe &
+		    (LSM_UNSAFE_PTRACE | LSM_UNSAFE_PTRACE_CAP)) {
+			struct task_struct *tracer;
+			struct task_security_struct *sec;
+			u32 ptsid = 0;
+
+			rcu_read_lock();
+			tracer = tracehook_tracer_task(current);
+			if (likely(tracer != NULL)) {
+				sec = __task_cred(tracer)->security;
+				ptsid = sec->sid;
+			}
+			rcu_read_unlock();
+
+			if (ptsid != 0) {
+				rc = avc_has_perm(ptsid, new_tsec->sid,
+						  SECCLASS_PROCESS,
+						  PROCESS__PTRACE, NULL);
+				if (rc)
+					return -EPERM;
+			}
+		}
 
-		/* Set the security field to the new SID. */
-		bsec->sid = newsid;
+		/* Clear any possibly unsafe personality bits on exec: */
+		bprm->per_clear |= PER_CLEAR_ON_SETID;
 	}
 
-	bsec->set = 1;
 	return 0;
 }
 
@@ -2125,7 +2141,6 @@ static int selinux_bprm_check_security(struct linux_binprm *bprm)
 	return secondary_ops->bprm_check_security(bprm);
 }
 
-
 static int selinux_bprm_secureexec(struct linux_binprm *bprm)
 {
 	const struct cred *cred = current_cred();
@@ -2141,19 +2156,13 @@ static int selinux_bprm_secureexec(struct linux_binprm *bprm)
 		   the noatsecure permission is granted between
 		   the two SIDs, i.e. ahp returns 0. */
 		atsecure = avc_has_perm(osid, sid,
-					 SECCLASS_PROCESS,
-					 PROCESS__NOATSECURE, NULL);
+					SECCLASS_PROCESS,
+					PROCESS__NOATSECURE, NULL);
 	}
 
 	return (atsecure || secondary_ops->bprm_secureexec(bprm));
 }
 
-static void selinux_bprm_free_security(struct linux_binprm *bprm)
-{
-	kfree(bprm->security);
-	bprm->security = NULL;
-}
-
 extern struct vfsmount *selinuxfs_mount;
 extern struct dentry *selinux_null;
 
@@ -2252,108 +2261,78 @@ static inline void flush_unauthorized_files(const struct cred *cred,
 	spin_unlock(&files->file_lock);
 }
 
-static int selinux_bprm_apply_creds(struct linux_binprm *bprm, int unsafe)
+/*
+ * Prepare a process for imminent new credential changes due to exec
+ */
+static void selinux_bprm_committing_creds(struct linux_binprm *bprm)
 {
-	struct task_security_struct *tsec;
-	struct bprm_security_struct *bsec;
-	struct cred *new;
-	u32 sid;
-	int rc;
-
-	rc = secondary_ops->bprm_apply_creds(bprm, unsafe);
-	if (rc < 0)
-		return rc;
-
-	new = prepare_creds();
-	if (!new)
-		return -ENOMEM;
+	struct task_security_struct *new_tsec;
+	struct rlimit *rlim, *initrlim;
+	int rc, i;
 
-	tsec = new->security;
+	secondary_ops->bprm_committing_creds(bprm);
 
-	bsec = bprm->security;
-	sid = bsec->sid;
-
-	tsec->osid = tsec->sid;
-	bsec->unsafe = 0;
-	if (tsec->sid != sid) {
-		/* Check for shared state.  If not ok, leave SID
-		   unchanged and kill. */
-		if (unsafe & LSM_UNSAFE_SHARE) {
-			rc = avc_has_perm(tsec->sid, sid, SECCLASS_PROCESS,
-					PROCESS__SHARE, NULL);
-			if (rc) {
-				bsec->unsafe = 1;
-				goto out;
-			}
-		}
+	new_tsec = bprm->cred->security;
+	if (new_tsec->sid == new_tsec->osid)
+		return;
 
-		/* Check for ptracing, and update the task SID if ok.
-		   Otherwise, leave SID unchanged and kill. */
-		if (unsafe & (LSM_UNSAFE_PTRACE | LSM_UNSAFE_PTRACE_CAP)) {
-			struct task_struct *tracer;
-			struct task_security_struct *sec;
-			u32 ptsid = 0;
+	/* Close files for which the new task SID is not authorized. */
+	flush_unauthorized_files(bprm->cred, current->files);
 
-			rcu_read_lock();
-			tracer = tracehook_tracer_task(current);
-			if (likely(tracer != NULL)) {
-				sec = __task_cred(tracer)->security;
-				ptsid = sec->sid;
-			}
-			rcu_read_unlock();
+	/* Always clear parent death signal on SID transitions. */
+	current->pdeath_signal = 0;
 
-			if (ptsid != 0) {
-				rc = avc_has_perm(ptsid, sid, SECCLASS_PROCESS,
-						  PROCESS__PTRACE, NULL);
-				if (rc) {
-					bsec->unsafe = 1;
-					goto out;
-				}
-			}
+	/* Check whether the new SID can inherit resource limits from the old
+	 * SID.  If not, reset all soft limits to the lower of the current
+	 * task's hard limit and the init task's soft limit.
+	 *
+	 * Note that the setting of hard limits (even to lower them) can be
+	 * controlled by the setrlimit check.  The inclusion of the init task's
+	 * soft limit into the computation is to avoid resetting soft limits
+	 * higher than the default soft limit for cases where the default is
+	 * lower than the hard limit, e.g. RLIMIT_CORE or RLIMIT_STACK.
+	 */
+	rc = avc_has_perm(new_tsec->osid, new_tsec->sid, SECCLASS_PROCESS,
+			  PROCESS__RLIMITINH, NULL);
+	if (rc) {
+		for (i = 0; i < RLIM_NLIMITS; i++) {
+			rlim = current->signal->rlim + i;
+			initrlim = init_task.signal->rlim + i;
+			rlim->rlim_cur = min(rlim->rlim_max, initrlim->rlim_cur);
 		}
-		tsec->sid = sid;
+		update_rlimit_cpu(rlim->rlim_cur);
 	}
-
-out:
-	commit_creds(new);
-	return 0;
 }
 
 /*
- * called after apply_creds without the task lock held
+ * Clean up the process immediately after the installation of new credentials
+ * due to exec
  */
-static void selinux_bprm_post_apply_creds(struct linux_binprm *bprm)
+static void selinux_bprm_committed_creds(struct linux_binprm *bprm)
 {
-	const struct cred *cred = current_cred();
-	struct task_security_struct *tsec;
-	struct rlimit *rlim, *initrlim;
+	const struct task_security_struct *tsec = current_security();
 	struct itimerval itimer;
-	struct bprm_security_struct *bsec;
 	struct sighand_struct *psig;
+	u32 osid, sid;
 	int rc, i;
 	unsigned long flags;
 
-	tsec = current_security();
-	bsec = bprm->security;
+	secondary_ops->bprm_committed_creds(bprm);
 
-	if (bsec->unsafe) {
-		force_sig_specific(SIGKILL, current);
-		return;
-	}
-	if (tsec->osid == tsec->sid)
+	osid = tsec->osid;
+	sid = tsec->sid;
+
+	if (sid == osid)
 		return;
 
-	/* Close files for which the new task SID is not authorized. */
-	flush_unauthorized_files(cred, current->files);
-
-	/* Check whether the new SID can inherit signal state
-	   from the old SID.  If not, clear itimers to avoid
-	   subsequent signal generation and flush and unblock
-	   signals. This must occur _after_ the task SID has
-	  been updated so that any kill done after the flush
-	  will be checked against the new SID. */
-	rc = avc_has_perm(tsec->osid, tsec->sid, SECCLASS_PROCESS,
-			  PROCESS__SIGINH, NULL);
+	/* Check whether the new SID can inherit signal state from the old SID.
+	 * If not, clear itimers to avoid subsequent signal generation and
+	 * flush and unblock signals.
+	 *
+	 * This must occur _after_ the task SID has been updated so that any
+	 * kill done after the flush will be checked against the new SID.
+	 */
+	rc = avc_has_perm(osid, sid, SECCLASS_PROCESS, PROCESS__SIGINH, NULL);
 	if (rc) {
 		memset(&itimer, 0, sizeof itimer);
 		for (i = 0; i < 3; i++)
@@ -2366,32 +2345,8 @@ static void selinux_bprm_post_apply_creds(struct linux_binprm *bprm)
 		spin_unlock_irq(&current->sighand->siglock);
 	}
 
-	/* Always clear parent death signal on SID transitions. */
-	current->pdeath_signal = 0;
-
-	/* Check whether the new SID can inherit resource limits
-	   from the old SID.  If not, reset all soft limits to
-	   the lower of the current task's hard limit and the init
-	   task's soft limit.  Note that the setting of hard limits
-	   (even to lower them) can be controlled by the setrlimit
-	   check. The inclusion of the init task's soft limit into
-	   the computation is to avoid resetting soft limits higher
-	   than the default soft limit for cases where the default
-	   is lower than the hard limit, e.g. RLIMIT_CORE or
-	   RLIMIT_STACK.*/
-	rc = avc_has_perm(tsec->osid, tsec->sid, SECCLASS_PROCESS,
-			  PROCESS__RLIMITINH, NULL);
-	if (rc) {
-		for (i = 0; i < RLIM_NLIMITS; i++) {
-			rlim = current->signal->rlim + i;
-			initrlim = init_task.signal->rlim+i;
-			rlim->rlim_cur = min(rlim->rlim_max, initrlim->rlim_cur);
-		}
-		update_rlimit_cpu(rlim->rlim_cur);
-	}
-
-	/* Wake up the parent if it is waiting so that it can
-	   recheck wait permission to the new task SID. */
+	/* Wake up the parent if it is waiting so that it can recheck
+	 * wait permission to the new task SID. */
 	read_lock_irq(&tasklist_lock);
 	psig = current->parent->sighand;
 	spin_lock_irqsave(&psig->siglock, flags);
@@ -5556,12 +5511,10 @@ static struct security_operations selinux_ops = {
 	.netlink_send =			selinux_netlink_send,
 	.netlink_recv =			selinux_netlink_recv,
 
-	.bprm_alloc_security =		selinux_bprm_alloc_security,
-	.bprm_free_security =		selinux_bprm_free_security,
-	.bprm_apply_creds =		selinux_bprm_apply_creds,
-	.bprm_post_apply_creds =	selinux_bprm_post_apply_creds,
-	.bprm_set_security =		selinux_bprm_set_security,
+	.bprm_set_creds =		selinux_bprm_set_creds,
 	.bprm_check_security =		selinux_bprm_check_security,
+	.bprm_committing_creds =	selinux_bprm_committing_creds,
+	.bprm_committed_creds =		selinux_bprm_committed_creds,
 	.bprm_secureexec =		selinux_bprm_secureexec,
 
 	.sb_alloc_security =		selinux_sb_alloc_security,
diff --git a/security/selinux/include/objsec.h b/security/selinux/include/objsec.h
index f8be8d7fa26d..3cc45168f674 100644
--- a/security/selinux/include/objsec.h
+++ b/security/selinux/include/objsec.h
@@ -77,17 +77,6 @@ struct ipc_security_struct {
 	u32 sid;	/* SID of IPC resource */
 };
 
-struct bprm_security_struct {
-	u32 sid;		/* SID for transformed process */
-	unsigned char set;
-
-	/*
-	 * unsafe is used to share failure information from bprm_apply_creds()
-	 * to bprm_post_apply_creds().
-	 */
-	char unsafe;
-};
-
 struct netif_security_struct {
 	int ifindex;			/* device index */
 	u32 sid;			/* SID for this interface */
diff --git a/security/smack/smack_lsm.c b/security/smack/smack_lsm.c
index e952b397153d..de396742abf4 100644
--- a/security/smack/smack_lsm.c
+++ b/security/smack/smack_lsm.c
@@ -2596,8 +2596,7 @@ struct security_operations smack_ops = {
 	.settime = 			cap_settime,
 	.vm_enough_memory = 		cap_vm_enough_memory,
 
-	.bprm_apply_creds = 		cap_bprm_apply_creds,
-	.bprm_set_security = 		cap_bprm_set_security,
+	.bprm_set_creds = 		cap_bprm_set_creds,
 	.bprm_secureexec = 		cap_bprm_secureexec,
 
 	.sb_alloc_security = 		smack_sb_alloc_security,
-- 
cgit v1.2.3


From d76b0d9b2d87cfc95686e148767cbf7d0e22bdc0 Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Fri, 14 Nov 2008 10:39:25 +1100
Subject: CRED: Use creds in file structs

Attach creds to file structs and discard f_uid/f_gid.

file_operations::open() methods (such as hppfs_open()) should use file->f_cred
rather than current_cred().  At the moment file->f_cred will be current_cred()
at this point.

Signed-off-by: David Howells <dhowells@redhat.com>
Reviewed-by: James Morris <jmorris@namei.org>
Signed-off-by: James Morris <jmorris@namei.org>
---
 arch/mips/kernel/vpe.c              |  4 ++--
 drivers/isdn/hysdn/hysdn_procconf.c |  6 ++++--
 fs/coda/file.c                      |  2 +-
 fs/file_table.c                     |  7 ++++---
 fs/hppfs/hppfs.c                    |  4 ++--
 include/linux/fs.h                  |  2 +-
 net/ipv4/netfilter/ipt_LOG.c        |  4 ++--
 net/ipv6/netfilter/ip6t_LOG.c       |  4 ++--
 net/netfilter/nfnetlink_log.c       |  5 +++--
 net/netfilter/xt_owner.c            | 16 ++++++++--------
 net/sched/cls_flow.c                |  4 ++--
 11 files changed, 31 insertions(+), 27 deletions(-)

(limited to 'include')

diff --git a/arch/mips/kernel/vpe.c b/arch/mips/kernel/vpe.c
index 972b2d2b8401..09786e496375 100644
--- a/arch/mips/kernel/vpe.c
+++ b/arch/mips/kernel/vpe.c
@@ -1085,8 +1085,8 @@ static int vpe_open(struct inode *inode, struct file *filp)
 	v->load_addr = NULL;
 	v->len = 0;
 
-	v->uid = filp->f_uid;
-	v->gid = filp->f_gid;
+	v->uid = filp->f_cred->fsuid;
+	v->gid = filp->f_cred->fsgid;
 
 #ifdef CONFIG_MIPS_APSP_KSPD
 	/* get kspd to tell us when a syscall_exit happens */
diff --git a/drivers/isdn/hysdn/hysdn_procconf.c b/drivers/isdn/hysdn/hysdn_procconf.c
index 484299b031f8..8f9f4912de32 100644
--- a/drivers/isdn/hysdn/hysdn_procconf.c
+++ b/drivers/isdn/hysdn/hysdn_procconf.c
@@ -246,7 +246,8 @@ hysdn_conf_open(struct inode *ino, struct file *filep)
 	}
 	if (card->debug_flags & (LOG_PROC_OPEN | LOG_PROC_ALL))
 		hysdn_addlog(card, "config open for uid=%d gid=%d mode=0x%x",
-			     filep->f_uid, filep->f_gid, filep->f_mode);
+			     filep->f_cred->fsuid, filep->f_cred->fsgid,
+			     filep->f_mode);
 
 	if ((filep->f_mode & (FMODE_READ | FMODE_WRITE)) == FMODE_WRITE) {
 		/* write only access -> write boot file or conf line */
@@ -331,7 +332,8 @@ hysdn_conf_close(struct inode *ino, struct file *filep)
 	}
 	if (card->debug_flags & (LOG_PROC_OPEN | LOG_PROC_ALL))
 		hysdn_addlog(card, "config close for uid=%d gid=%d mode=0x%x",
-			     filep->f_uid, filep->f_gid, filep->f_mode);
+			     filep->f_cred->fsuid, filep->f_cred->fsgid,
+			     filep->f_mode);
 
 	if ((filep->f_mode & (FMODE_READ | FMODE_WRITE)) == FMODE_WRITE) {
 		/* write only access -> write boot file or conf line */
diff --git a/fs/coda/file.c b/fs/coda/file.c
index 29137ff3ca67..5a8769985494 100644
--- a/fs/coda/file.c
+++ b/fs/coda/file.c
@@ -174,7 +174,7 @@ int coda_release(struct inode *coda_inode, struct file *coda_file)
 	BUG_ON(!cfi || cfi->cfi_magic != CODA_MAGIC);
 
 	err = venus_close(coda_inode->i_sb, coda_i2f(coda_inode),
-			  coda_flags, coda_file->f_uid);
+			  coda_flags, coda_file->f_cred->fsuid);
 
 	host_inode = cfi->cfi_container->f_path.dentry->d_inode;
 	cii = ITOC(coda_inode);
diff --git a/fs/file_table.c b/fs/file_table.c
index bc4563fe791d..0fbcacc3ea75 100644
--- a/fs/file_table.c
+++ b/fs/file_table.c
@@ -36,7 +36,9 @@ static struct percpu_counter nr_files __cacheline_aligned_in_smp;
 
 static inline void file_free_rcu(struct rcu_head *head)
 {
-	struct file *f =  container_of(head, struct file, f_u.fu_rcuhead);
+	struct file *f = container_of(head, struct file, f_u.fu_rcuhead);
+
+	put_cred(f->f_cred);
 	kmem_cache_free(filp_cachep, f);
 }
 
@@ -121,8 +123,7 @@ struct file *get_empty_filp(void)
 	INIT_LIST_HEAD(&f->f_u.fu_list);
 	atomic_long_set(&f->f_count, 1);
 	rwlock_init(&f->f_owner.lock);
-	f->f_uid = cred->fsuid;
-	f->f_gid = cred->fsgid;
+	f->f_cred = get_cred(cred);
 	eventpoll_init_file(f);
 	/* f->f_version: 0 */
 	return f;
diff --git a/fs/hppfs/hppfs.c b/fs/hppfs/hppfs.c
index 795e2c130ad7..b278f7f52024 100644
--- a/fs/hppfs/hppfs.c
+++ b/fs/hppfs/hppfs.c
@@ -426,7 +426,7 @@ static int file_mode(int fmode)
 
 static int hppfs_open(struct inode *inode, struct file *file)
 {
-	const struct cred *cred = current_cred();
+	const struct cred *cred = file->f_cred;
 	struct hppfs_private *data;
 	struct vfsmount *proc_mnt;
 	struct dentry *proc_dentry;
@@ -490,7 +490,7 @@ static int hppfs_open(struct inode *inode, struct file *file)
 
 static int hppfs_dir_open(struct inode *inode, struct file *file)
 {
-	const struct cred *cred = current_cred();
+	const struct cred *cred = file->f_cred;
 	struct hppfs_private *data;
 	struct vfsmount *proc_mnt;
 	struct dentry *proc_dentry;
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 3bfec1327b8d..c0fb6d81d89b 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -827,7 +827,7 @@ struct file {
 	fmode_t			f_mode;
 	loff_t			f_pos;
 	struct fown_struct	f_owner;
-	unsigned int		f_uid, f_gid;
+	const struct cred	*f_cred;
 	struct file_ra_state	f_ra;
 
 	u64			f_version;
diff --git a/net/ipv4/netfilter/ipt_LOG.c b/net/ipv4/netfilter/ipt_LOG.c
index fc6ce04a3e35..7b5dbe118c09 100644
--- a/net/ipv4/netfilter/ipt_LOG.c
+++ b/net/ipv4/netfilter/ipt_LOG.c
@@ -340,8 +340,8 @@ static void dump_packet(const struct nf_loginfo *info,
 		read_lock_bh(&skb->sk->sk_callback_lock);
 		if (skb->sk->sk_socket && skb->sk->sk_socket->file)
 			printk("UID=%u GID=%u ",
-				skb->sk->sk_socket->file->f_uid,
-				skb->sk->sk_socket->file->f_gid);
+				skb->sk->sk_socket->file->f_cred->fsuid,
+				skb->sk->sk_socket->file->f_cred->fsgid);
 		read_unlock_bh(&skb->sk->sk_callback_lock);
 	}
 
diff --git a/net/ipv6/netfilter/ip6t_LOG.c b/net/ipv6/netfilter/ip6t_LOG.c
index caa441d09567..871d157cec4e 100644
--- a/net/ipv6/netfilter/ip6t_LOG.c
+++ b/net/ipv6/netfilter/ip6t_LOG.c
@@ -364,8 +364,8 @@ static void dump_packet(const struct nf_loginfo *info,
 		read_lock_bh(&skb->sk->sk_callback_lock);
 		if (skb->sk->sk_socket && skb->sk->sk_socket->file)
 			printk("UID=%u GID=%u ",
-				skb->sk->sk_socket->file->f_uid,
-				skb->sk->sk_socket->file->f_gid);
+				skb->sk->sk_socket->file->f_cred->fsuid,
+				skb->sk->sk_socket->file->f_cred->fsgid);
 		read_unlock_bh(&skb->sk->sk_callback_lock);
 	}
 
diff --git a/net/netfilter/nfnetlink_log.c b/net/netfilter/nfnetlink_log.c
index 41e0105d3828..38f9efd90e8d 100644
--- a/net/netfilter/nfnetlink_log.c
+++ b/net/netfilter/nfnetlink_log.c
@@ -474,8 +474,9 @@ __build_packet_message(struct nfulnl_instance *inst,
 	if (skb->sk) {
 		read_lock_bh(&skb->sk->sk_callback_lock);
 		if (skb->sk->sk_socket && skb->sk->sk_socket->file) {
-			__be32 uid = htonl(skb->sk->sk_socket->file->f_uid);
-			__be32 gid = htonl(skb->sk->sk_socket->file->f_gid);
+			struct file *file = skb->sk->sk_socket->file;
+			__be32 uid = htonl(file->f_cred->fsuid);
+			__be32 gid = htonl(file->f_cred->fsgid);
 			/* need to unlock here since NLA_PUT may goto */
 			read_unlock_bh(&skb->sk->sk_callback_lock);
 			NLA_PUT_BE32(inst->skb, NFULA_UID, uid);
diff --git a/net/netfilter/xt_owner.c b/net/netfilter/xt_owner.c
index f19ebd9b78f5..22b2a5e881ea 100644
--- a/net/netfilter/xt_owner.c
+++ b/net/netfilter/xt_owner.c
@@ -34,12 +34,12 @@ owner_mt_v0(const struct sk_buff *skb, const struct xt_match_param *par)
 		return false;
 
 	if (info->match & IPT_OWNER_UID)
-		if ((filp->f_uid != info->uid) ^
+		if ((filp->f_cred->fsuid != info->uid) ^
 		    !!(info->invert & IPT_OWNER_UID))
 			return false;
 
 	if (info->match & IPT_OWNER_GID)
-		if ((filp->f_gid != info->gid) ^
+		if ((filp->f_cred->fsgid != info->gid) ^
 		    !!(info->invert & IPT_OWNER_GID))
 			return false;
 
@@ -60,12 +60,12 @@ owner_mt6_v0(const struct sk_buff *skb, const struct xt_match_param *par)
 		return false;
 
 	if (info->match & IP6T_OWNER_UID)
-		if ((filp->f_uid != info->uid) ^
+		if ((filp->f_cred->fsuid != info->uid) ^
 		    !!(info->invert & IP6T_OWNER_UID))
 			return false;
 
 	if (info->match & IP6T_OWNER_GID)
-		if ((filp->f_gid != info->gid) ^
+		if ((filp->f_cred->fsgid != info->gid) ^
 		    !!(info->invert & IP6T_OWNER_GID))
 			return false;
 
@@ -93,14 +93,14 @@ owner_mt(const struct sk_buff *skb, const struct xt_match_param *par)
 		       (XT_OWNER_UID | XT_OWNER_GID)) == 0;
 
 	if (info->match & XT_OWNER_UID)
-		if ((filp->f_uid >= info->uid_min &&
-		    filp->f_uid <= info->uid_max) ^
+		if ((filp->f_cred->fsuid >= info->uid_min &&
+		    filp->f_cred->fsuid <= info->uid_max) ^
 		    !(info->invert & XT_OWNER_UID))
 			return false;
 
 	if (info->match & XT_OWNER_GID)
-		if ((filp->f_gid >= info->gid_min &&
-		    filp->f_gid <= info->gid_max) ^
+		if ((filp->f_cred->fsgid >= info->gid_min &&
+		    filp->f_cred->fsgid <= info->gid_max) ^
 		    !(info->invert & XT_OWNER_GID))
 			return false;
 
diff --git a/net/sched/cls_flow.c b/net/sched/cls_flow.c
index 0ebaff637e31..0ef4e3065bcd 100644
--- a/net/sched/cls_flow.c
+++ b/net/sched/cls_flow.c
@@ -260,14 +260,14 @@ static u32 flow_get_rtclassid(const struct sk_buff *skb)
 static u32 flow_get_skuid(const struct sk_buff *skb)
 {
 	if (skb->sk && skb->sk->sk_socket && skb->sk->sk_socket->file)
-		return skb->sk->sk_socket->file->f_uid;
+		return skb->sk->sk_socket->file->f_cred->fsuid;
 	return 0;
 }
 
 static u32 flow_get_skgid(const struct sk_buff *skb)
 {
 	if (skb->sk && skb->sk->sk_socket && skb->sk->sk_socket->file)
-		return skb->sk->sk_socket->file->f_gid;
+		return skb->sk->sk_socket->file->f_cred->fsgid;
 	return 0;
 }
 
-- 
cgit v1.2.3


From 98870ab0a5a3f1822aee681d2997017e1c87d026 Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Fri, 14 Nov 2008 10:39:26 +1100
Subject: CRED: Documentation

Document credentials and the new credentials API.

Signed-off-by: David Howells <dhowells@redhat.com>
Signed-off-by: James Morris <jmorris@namei.org>
---
 Documentation/credentials.txt | 582 ++++++++++++++++++++++++++++++++++++++++++
 include/linux/cred.h          |  12 +-
 kernel/cred.c                 |   2 +-
 3 files changed, 594 insertions(+), 2 deletions(-)
 create mode 100644 Documentation/credentials.txt

(limited to 'include')

diff --git a/Documentation/credentials.txt b/Documentation/credentials.txt
new file mode 100644
index 000000000000..df03169782ea
--- /dev/null
+++ b/Documentation/credentials.txt
@@ -0,0 +1,582 @@
+			     ====================
+			     CREDENTIALS IN LINUX
+			     ====================
+
+By: David Howells <dhowells@redhat.com>
+
+Contents:
+
+ (*) Overview.
+
+ (*) Types of credentials.
+
+ (*) File markings.
+
+ (*) Task credentials.
+
+     - Immutable credentials.
+     - Accessing task credentials.
+     - Accessing another task's credentials.
+     - Altering credentials.
+     - Managing credentials.
+
+ (*) Open file credentials.
+
+ (*) Overriding the VFS's use of credentials.
+
+
+========
+OVERVIEW
+========
+
+There are several parts to the security check performed by Linux when one
+object acts upon another:
+
+ (1) Objects.
+
+     Objects are things in the system that may be acted upon directly by
+     userspace programs.  Linux has a variety of actionable objects, including:
+
+	- Tasks
+	- Files/inodes
+	- Sockets
+	- Message queues
+	- Shared memory segments
+	- Semaphores
+	- Keys
+
+     As a part of the description of all these objects there is a set of
+     credentials.  What's in the set depends on the type of object.
+
+ (2) Object ownership.
+
+     Amongst the credentials of most objects, there will be a subset that
+     indicates the ownership of that object.  This is used for resource
+     accounting and limitation (disk quotas and task rlimits for example).
+
+     In a standard UNIX filesystem, for instance, this will be defined by the
+     UID marked on the inode.
+
+ (3) The objective context.
+
+     Also amongst the credentials of those objects, there will be a subset that
+     indicates the 'objective context' of that object.  This may or may not be
+     the same set as in (2) - in standard UNIX files, for instance, this is the
+     defined by the UID and the GID marked on the inode.
+
+     The objective context is used as part of the security calculation that is
+     carried out when an object is acted upon.
+
+ (4) Subjects.
+
+     A subject is an object that is acting upon another object.
+
+     Most of the objects in the system are inactive: they don't act on other
+     objects within the system.  Processes/tasks are the obvious exception:
+     they do stuff; they access and manipulate things.
+
+     Objects other than tasks may under some circumstances also be subjects.
+     For instance an open file may send SIGIO to a task using the UID and EUID
+     given to it by a task that called fcntl(F_SETOWN) upon it.  In this case,
+     the file struct will have a subjective context too.
+
+ (5) The subjective context.
+
+     A subject has an additional interpretation of its credentials.  A subset
+     of its credentials forms the 'subjective context'.  The subjective context
+     is used as part of the security calculation that is carried out when a
+     subject acts.
+
+     A Linux task, for example, has the FSUID, FSGID and the supplementary
+     group list for when it is acting upon a file - which are quite separate
+     from the real UID and GID that normally form the objective context of the
+     task.
+
+ (6) Actions.
+
+     Linux has a number of actions available that a subject may perform upon an
+     object.  The set of actions available depends on the nature of the subject
+     and the object.
+
+     Actions include reading, writing, creating and deleting files; forking or
+     signalling and tracing tasks.
+
+ (7) Rules, access control lists and security calculations.
+
+     When a subject acts upon an object, a security calculation is made.  This
+     involves taking the subjective context, the objective context and the
+     action, and searching one or more sets of rules to see whether the subject
+     is granted or denied permission to act in the desired manner on the
+     object, given those contexts.
+
+     There are two main sources of rules:
+
+     (a) Discretionary access control (DAC):
+
+	 Sometimes the object will include sets of rules as part of its
+	 description.  This is an 'Access Control List' or 'ACL'.  A Linux
+	 file may supply more than one ACL.
+
+	 A traditional UNIX file, for example, includes a permissions mask that
+	 is an abbreviated ACL with three fixed classes of subject ('user',
+	 'group' and 'other'), each of which may be granted certain privileges
+	 ('read', 'write' and 'execute' - whatever those map to for the object
+	 in question).  UNIX file permissions do not allow the arbitrary
+	 specification of subjects, however, and so are of limited use.
+
+	 A Linux file might also sport a POSIX ACL.  This is a list of rules
+	 that grants various permissions to arbitrary subjects.
+
+     (b) Mandatory access control (MAC):
+
+	 The system as a whole may have one or more sets of rules that get
+	 applied to all subjects and objects, regardless of their source.
+	 SELinux and Smack are examples of this.
+
+	 In the case of SELinux and Smack, each object is given a label as part
+	 of its credentials.  When an action is requested, they take the
+	 subject label, the object label and the action and look for a rule
+	 that says that this action is either granted or denied.
+
+
+====================
+TYPES OF CREDENTIALS
+====================
+
+The Linux kernel supports the following types of credentials:
+
+ (1) Traditional UNIX credentials.
+
+	Real User ID
+	Real Group ID
+
+     The UID and GID are carried by most, if not all, Linux objects, even if in
+     some cases it has to be invented (FAT or CIFS files for example, which are
+     derived from Windows).  These (mostly) define the objective context of
+     that object, with tasks being slightly different in some cases.
+
+	Effective, Saved and FS User ID
+	Effective, Saved and FS Group ID
+	Supplementary groups
+
+     These are additional credentials used by tasks only.  Usually, an
+     EUID/EGID/GROUPS will be used as the subjective context, and real UID/GID
+     will be used as the objective.  For tasks, it should be noted that this is
+     not always true.
+
+ (2) Capabilities.
+
+	Set of permitted capabilities
+	Set of inheritable capabilities
+	Set of effective capabilities
+	Capability bounding set
+
+     These are only carried by tasks.  They indicate superior capabilities
+     granted piecemeal to a task that an ordinary task wouldn't otherwise have.
+     These are manipulated implicitly by changes to the traditional UNIX
+     credentials, but can also be manipulated directly by the capset() system
+     call.
+
+     The permitted capabilities are those caps that the process might grant
+     itself to its effective or permitted sets through capset().  This
+     inheritable set might also be so constrained.
+
+     The effective capabilities are the ones that a task is actually allowed to
+     make use of itself.
+
+     The inheritable capabilities are the ones that may get passed across
+     execve().
+
+     The bounding set limits the capabilities that may be inherited across
+     execve(), especially when a binary is executed that will execute as UID 0.
+
+ (3) Secure management flags (securebits).
+
+     These are only carried by tasks.  These govern the way the above
+     credentials are manipulated and inherited over certain operations such as
+     execve().  They aren't used directly as objective or subjective
+     credentials.
+
+ (4) Keys and keyrings.
+
+     These are only carried by tasks.  They carry and cache security tokens
+     that don't fit into the other standard UNIX credentials.  They are for
+     making such things as network filesystem keys available to the file
+     accesses performed by processes, without the necessity of ordinary
+     programs having to know about security details involved.
+
+     Keyrings are a special type of key.  They carry sets of other keys and can
+     be searched for the desired key.  Each process may subscribe to a number
+     of keyrings:
+
+	Per-thread keying
+	Per-process keyring
+	Per-session keyring
+
+     When a process accesses a key, if not already present, it will normally be
+     cached on one of these keyrings for future accesses to find.
+
+     For more information on using keys, see Documentation/keys.txt.
+
+ (5) LSM
+
+     The Linux Security Module allows extra controls to be placed over the
+     operations that a task may do.  Currently Linux supports two main
+     alternate LSM options: SELinux and Smack.
+
+     Both work by labelling the objects in a system and then applying sets of
+     rules (policies) that say what operations a task with one label may do to
+     an object with another label.
+
+ (6) AF_KEY
+
+     This is a socket-based approach to credential management for networking
+     stacks [RFC 2367].  It isn't discussed by this document as it doesn't
+     interact directly with task and file credentials; rather it keeps system
+     level credentials.
+
+
+When a file is opened, part of the opening task's subjective context is
+recorded in the file struct created.  This allows operations using that file
+struct to use those credentials instead of the subjective context of the task
+that issued the operation.  An example of this would be a file opened on a
+network filesystem where the credentials of the opened file should be presented
+to the server, regardless of who is actually doing a read or a write upon it.
+
+
+=============
+FILE MARKINGS
+=============
+
+Files on disk or obtained over the network may have annotations that form the
+objective security context of that file.  Depending on the type of filesystem,
+this may include one or more of the following:
+
+ (*) UNIX UID, GID, mode;
+
+ (*) Windows user ID;
+
+ (*) Access control list;
+
+ (*) LSM security label;
+
+ (*) UNIX exec privilege escalation bits (SUID/SGID);
+
+ (*) File capabilities exec privilege escalation bits.
+
+These are compared to the task's subjective security context, and certain
+operations allowed or disallowed as a result.  In the case of execve(), the
+privilege escalation bits come into play, and may allow the resulting process
+extra privileges, based on the annotations on the executable file.
+
+
+================
+TASK CREDENTIALS
+================
+
+In Linux, all of a task's credentials are held in (uid, gid) or through
+(groups, keys, LSM security) a refcounted structure of type 'struct cred'.
+Each task points to its credentials by a pointer called 'cred' in its
+task_struct.
+
+Once a set of credentials has been prepared and committed, it may not be
+changed, barring the following exceptions:
+
+ (1) its reference count may be changed;
+
+ (2) the reference count on the group_info struct it points to may be changed;
+
+ (3) the reference count on the security data it points to may be changed;
+
+ (4) the reference count on any keyrings it points to may be changed;
+
+ (5) any keyrings it points to may be revoked, expired or have their security
+     attributes changed; and
+
+ (6) the contents of any keyrings to which it points may be changed (the whole
+     point of keyrings being a shared set of credentials, modifiable by anyone
+     with appropriate access).
+
+To alter anything in the cred struct, the copy-and-replace principle must be
+adhered to.  First take a copy, then alter the copy and then use RCU to change
+the task pointer to make it point to the new copy.  There are wrappers to aid
+with this (see below).
+
+A task may only alter its _own_ credentials; it is no longer permitted for a
+task to alter another's credentials.  This means the capset() system call is no
+longer permitted to take any PID other than the one of the current process.
+Also keyctl_instantiate() and keyctl_negate() functions no longer permit
+attachment to process-specific keyrings in the requesting process as the
+instantiating process may need to create them.
+
+
+IMMUTABLE CREDENTIALS
+---------------------
+
+Once a set of credentials has been made public (by calling commit_creds() for
+example), it must be considered immutable, barring two exceptions:
+
+ (1) The reference count may be altered.
+
+ (2) Whilst the keyring subscriptions of a set of credentials may not be
+     changed, the keyrings subscribed to may have their contents altered.
+
+To catch accidental credential alteration at compile time, struct task_struct
+has _const_ pointers to its credential sets, as does struct file.  Furthermore,
+certain functions such as get_cred() and put_cred() operate on const pointers,
+thus rendering casts unnecessary, but require to temporarily ditch the const
+qualification to be able to alter the reference count.
+
+
+ACCESSING TASK CREDENTIALS
+--------------------------
+
+A task being able to alter only its own credentials permits the current process
+to read or replace its own credentials without the need for any form of locking
+- which simplifies things greatly.  It can just call:
+
+	const struct cred *current_cred()
+
+to get a pointer to its credentials structure, and it doesn't have to release
+it afterwards.
+
+There are convenience wrappers for retrieving specific aspects of a task's
+credentials (the value is simply returned in each case):
+
+	uid_t current_uid(void)		Current's real UID
+	gid_t current_gid(void)		Current's real GID
+	uid_t current_euid(void)	Current's effective UID
+	gid_t current_egid(void)	Current's effective GID
+	uid_t current_fsuid(void)	Current's file access UID
+	gid_t current_fsgid(void)	Current's file access GID
+	kernel_cap_t current_cap(void)	Current's effective capabilities
+	void *current_security(void)	Current's LSM security pointer
+	struct user_struct *current_user(void)  Current's user account
+
+There are also convenience wrappers for retrieving specific associated pairs of
+a task's credentials:
+
+	void current_uid_gid(uid_t *, gid_t *);
+	void current_euid_egid(uid_t *, gid_t *);
+	void current_fsuid_fsgid(uid_t *, gid_t *);
+
+which return these pairs of values through their arguments after retrieving
+them from the current task's credentials.
+
+
+In addition, there is a function for obtaining a reference on the current
+process's current set of credentials:
+
+	const struct cred *get_current_cred(void);
+
+and functions for getting references to one of the credentials that don't
+actually live in struct cred:
+
+	struct user_struct *get_current_user(void);
+	struct group_info *get_current_groups(void);
+
+which get references to the current process's user accounting structure and
+supplementary groups list respectively.
+
+Once a reference has been obtained, it must be released with put_cred(),
+free_uid() or put_group_info() as appropriate.
+
+
+ACCESSING ANOTHER TASK'S CREDENTIALS
+------------------------------------
+
+Whilst a task may access its own credentials without the need for locking, the
+same is not true of a task wanting to access another task's credentials.  It
+must use the RCU read lock and rcu_dereference().
+
+The rcu_dereference() is wrapped by:
+
+	const struct cred *__task_cred(struct task_struct *task);
+
+This should be used inside the RCU read lock, as in the following example:
+
+	void foo(struct task_struct *t, struct foo_data *f)
+	{
+		const struct cred *tcred;
+		...
+		rcu_read_lock();
+		tcred = __task_cred(t);
+		f->uid = tcred->uid;
+		f->gid = tcred->gid;
+		f->groups = get_group_info(tcred->groups);
+		rcu_read_unlock();
+		...
+	}
+
+A function need not get RCU read lock to use __task_cred() if it is holding a
+spinlock at the time as this implicitly holds the RCU read lock.
+
+Should it be necessary to hold another task's credentials for a long period of
+time, and possibly to sleep whilst doing so, then the caller should get a
+reference on them using:
+
+	const struct cred *get_task_cred(struct task_struct *task);
+
+This does all the RCU magic inside of it.  The caller must call put_cred() on
+the credentials so obtained when they're finished with.
+
+There are a couple of convenience functions to access bits of another task's
+credentials, hiding the RCU magic from the caller:
+
+	uid_t task_uid(task)		Task's real UID
+	uid_t task_euid(task)		Task's effective UID
+
+If the caller is holding a spinlock or the RCU read lock at the time anyway,
+then:
+
+	__task_cred(task)->uid
+	__task_cred(task)->euid
+
+should be used instead.  Similarly, if multiple aspects of a task's credentials
+need to be accessed, RCU read lock or a spinlock should be used, __task_cred()
+called, the result stored in a temporary pointer and then the credential
+aspects called from that before dropping the lock.  This prevents the
+potentially expensive RCU magic from being invoked multiple times.
+
+Should some other single aspect of another task's credentials need to be
+accessed, then this can be used:
+
+	task_cred_xxx(task, member)
+
+where 'member' is a non-pointer member of the cred struct.  For instance:
+
+	uid_t task_cred_xxx(task, suid);
+
+will retrieve 'struct cred::suid' from the task, doing the appropriate RCU
+magic.  This may not be used for pointer members as what they point to may
+disappear the moment the RCU read lock is dropped.
+
+
+ALTERING CREDENTIALS
+--------------------
+
+As previously mentioned, a task may only alter its own credentials, and may not
+alter those of another task.  This means that it doesn't need to use any
+locking to alter its own credentials.
+
+To alter the current process's credentials, a function should first prepare a
+new set of credentials by calling:
+
+	struct cred *prepare_creds(void);
+
+this locks current->cred_replace_mutex and then allocates and constructs a
+duplicate of the current process's credentials, returning with the mutex still
+held if successful.  It returns NULL if not successful (out of memory).
+
+The mutex prevents ptrace() from altering the ptrace state of a process whilst
+security checks on credentials construction and changing is taking place as
+the ptrace state may alter the outcome, particularly in the case of execve().
+
+The new credentials set should be altered appropriately, and any security
+checks and hooks done.  Both the current and the proposed sets of credentials
+are available for this purpose as current_cred() will return the current set
+still at this point.
+
+
+When the credential set is ready, it should be committed to the current process
+by calling:
+
+	int commit_creds(struct cred *new);
+
+This will alter various aspects of the credentials and the process, giving the
+LSM a chance to do likewise, then it will use rcu_assign_pointer() to actually
+commit the new credentials to current->cred, it will release
+current->cred_replace_mutex to allow ptrace() to take place, and it will notify
+the scheduler and others of the changes.
+
+This function is guaranteed to return 0, so that it can be tail-called at the
+end of such functions as sys_setresuid().
+
+Note that this function consumes the caller's reference to the new credentials.
+The caller should _not_ call put_cred() on the new credentials afterwards.
+
+Furthermore, once this function has been called on a new set of credentials,
+those credentials may _not_ be changed further.
+
+
+Should the security checks fail or some other error occur after prepare_creds()
+has been called, then the following function should be invoked:
+
+	void abort_creds(struct cred *new);
+
+This releases the lock on current->cred_replace_mutex that prepare_creds() got
+and then releases the new credentials.
+
+
+A typical credentials alteration function would look something like this:
+
+	int alter_suid(uid_t suid)
+	{
+		struct cred *new;
+		int ret;
+
+		new = prepare_creds();
+		if (!new)
+			return -ENOMEM;
+
+		new->suid = suid;
+		ret = security_alter_suid(new);
+		if (ret < 0) {
+			abort_creds(new);
+			return ret;
+		}
+
+		return commit_creds(new);
+	}
+
+
+MANAGING CREDENTIALS
+--------------------
+
+There are some functions to help manage credentials:
+
+ (*) void put_cred(const struct cred *cred);
+
+     This releases a reference to the given set of credentials.  If the
+     reference count reaches zero, the credentials will be scheduled for
+     destruction by the RCU system.
+
+ (*) const struct cred *get_cred(const struct cred *cred);
+
+     This gets a reference on a live set of credentials, returning a pointer to
+     that set of credentials.
+
+ (*) struct cred *get_new_cred(struct cred *cred);
+
+     This gets a reference on a set of credentials that is under construction
+     and is thus still mutable, returning a pointer to that set of credentials.
+
+
+=====================
+OPEN FILE CREDENTIALS
+=====================
+
+When a new file is opened, a reference is obtained on the opening task's
+credentials and this is attached to the file struct as 'f_cred' in place of
+'f_uid' and 'f_gid'.  Code that used to access file->f_uid and file->f_gid
+should now access file->f_cred->fsuid and file->f_cred->fsgid.
+
+It is safe to access f_cred without the use of RCU or locking because the
+pointer will not change over the lifetime of the file struct, and nor will the
+contents of the cred struct pointed to, barring the exceptions listed above
+(see the Task Credentials section).
+
+
+=======================================
+OVERRIDING THE VFS'S USE OF CREDENTIALS
+=======================================
+
+Under some circumstances it is desirable to override the credentials used by
+the VFS, and that can be done by calling into such as vfs_mkdir() with a
+different set of credentials.  This is done in the following places:
+
+ (*) sys_faccessat().
+
+ (*) do_coredump().
+
+ (*) nfs4recover.c.
diff --git a/include/linux/cred.h b/include/linux/cred.h
index 8edb4d1d5427..794aab5c66e5 100644
--- a/include/linux/cred.h
+++ b/include/linux/cred.h
@@ -1,4 +1,4 @@
-/* Credentials management
+/* Credentials management - see Documentation/credentials.txt
  *
  * Copyright (C) 2008 Red Hat, Inc. All Rights Reserved.
  * Written by David Howells (dhowells@redhat.com)
@@ -169,6 +169,12 @@ static inline struct cred *get_new_cred(struct cred *cred)
  *
  * Get a reference on the specified set of credentials.  The caller must
  * release the reference.
+ *
+ * This is used to deal with a committed set of credentials.  Although the
+ * pointer is const, this will temporarily discard the const and increment the
+ * usage count.  The purpose of this is to attempt to catch at compile time the
+ * accidental alteration of a set of credentials that should be considered
+ * immutable.
  */
 static inline const struct cred *get_cred(const struct cred *cred)
 {
@@ -181,6 +187,10 @@ static inline const struct cred *get_cred(const struct cred *cred)
  *
  * Release a reference to a set of credentials, deleting them when the last ref
  * is released.
+ *
+ * This takes a const pointer to a set of credentials because the credentials
+ * on task_struct are attached by const pointers to prevent accidental
+ * alteration of otherwise immutable credential sets.
  */
 static inline void put_cred(const struct cred *_cred)
 {
diff --git a/kernel/cred.c b/kernel/cred.c
index e6fcdd67b2ec..b8bd2f99d8ce 100644
--- a/kernel/cred.c
+++ b/kernel/cred.c
@@ -1,4 +1,4 @@
-/* Task credentials management
+/* Task credentials management - see Documentation/credentials.txt
  *
  * Copyright (C) 2008 Red Hat, Inc. All Rights Reserved.
  * Written by David Howells (dhowells@redhat.com)
-- 
cgit v1.2.3


From 3b11a1decef07c19443d24ae926982bc8ec9f4c0 Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Fri, 14 Nov 2008 10:39:26 +1100
Subject: CRED: Differentiate objective and effective subjective credentials on
 a task

Differentiate the objective and real subjective credentials from the effective
subjective credentials on a task by introducing a second credentials pointer
into the task_struct.

task_struct::real_cred then refers to the objective and apparent real
subjective credentials of a task, as perceived by the other tasks in the
system.

task_struct::cred then refers to the effective subjective credentials of a
task, as used by that task when it's actually running.  These are not visible
to the other tasks in the system.

__task_cred(task) then refers to the objective/real credentials of the task in
question.

current_cred() refers to the effective subjective credentials of the current
task.

prepare_creds() uses the objective creds as a base and commit_creds() changes
both pointers in the task_struct (indeed commit_creds() requires them to be the
same).

override_creds() and revert_creds() change the subjective creds pointer only,
and the former returns the old subjective creds.  These are used by NFSD,
faccessat() and do_coredump(), and will by used by CacheFiles.

In SELinux, current_has_perm() is provided as an alternative to
task_has_perm().  This uses the effective subjective context of current,
whereas task_has_perm() uses the objective/real context of the subject.

Signed-off-by: David Howells <dhowells@redhat.com>
Signed-off-by: James Morris <jmorris@namei.org>
---
 fs/nfsd/auth.c            |  5 +++-
 include/linux/cred.h      | 29 +++++++++++----------
 include/linux/init_task.h |  1 +
 include/linux/sched.h     |  5 +++-
 kernel/cred.c             | 38 ++++++++++++++++++---------
 kernel/fork.c             |  6 +++--
 security/selinux/hooks.c  | 65 ++++++++++++++++++++++++++++++-----------------
 7 files changed, 95 insertions(+), 54 deletions(-)

(limited to 'include')

diff --git a/fs/nfsd/auth.c b/fs/nfsd/auth.c
index 836ffa1047d9..0184fe9b514c 100644
--- a/fs/nfsd/auth.c
+++ b/fs/nfsd/auth.c
@@ -34,6 +34,8 @@ int nfsd_setuser(struct svc_rqst *rqstp, struct svc_export *exp)
 	int flags = nfsexp_flags(rqstp, exp);
 	int ret;
 
+	/* discard any old override before preparing the new set */
+	revert_creds(get_cred(current->real_cred));
 	new = prepare_creds();
 	if (!new)
 		return -ENOMEM;
@@ -82,7 +84,8 @@ int nfsd_setuser(struct svc_rqst *rqstp, struct svc_export *exp)
 	else
 		new->cap_effective = cap_raise_nfsd_set(new->cap_effective,
 							new->cap_permitted);
-	return commit_creds(new);
+	put_cred(override_creds(new));
+	return 0;
 
 oom:
 	ret = -ENOMEM;
diff --git a/include/linux/cred.h b/include/linux/cred.h
index 794aab5c66e5..55a9c995d694 100644
--- a/include/linux/cred.h
+++ b/include/linux/cred.h
@@ -146,8 +146,8 @@ extern struct cred *prepare_exec_creds(void);
 extern struct cred *prepare_usermodehelper_creds(void);
 extern int commit_creds(struct cred *);
 extern void abort_creds(struct cred *);
-extern const struct cred *override_creds(const struct cred *) __deprecated;
-extern void revert_creds(const struct cred *) __deprecated;
+extern const struct cred *override_creds(const struct cred *);
+extern void revert_creds(const struct cred *);
 extern void __init cred_init(void);
 
 /**
@@ -202,32 +202,32 @@ static inline void put_cred(const struct cred *_cred)
 }
 
 /**
- * current_cred - Access the current task's credentials
+ * current_cred - Access the current task's subjective credentials
  *
- * Access the credentials of the current task.
+ * Access the subjective credentials of the current task.
  */
 #define current_cred() \
 	(current->cred)
 
 /**
- * __task_cred - Access another task's credentials
+ * __task_cred - Access a task's objective credentials
  * @task: The task to query
  *
- * Access the credentials of another task.  The caller must hold the
- * RCU readlock.
+ * Access the objective credentials of a task.  The caller must hold the RCU
+ * readlock.
  *
  * The caller must make sure task doesn't go away, either by holding a ref on
  * task or by holding tasklist_lock to prevent it from being unlinked.
  */
 #define __task_cred(task) \
-	((const struct cred *)(rcu_dereference((task)->cred)))
+	((const struct cred *)(rcu_dereference((task)->real_cred)))
 
 /**
- * get_task_cred - Get another task's credentials
+ * get_task_cred - Get another task's objective credentials
  * @task: The task to query
  *
- * Get the credentials of a task, pinning them so that they can't go away.
- * Accessing a task's credentials directly is not permitted.
+ * Get the objective credentials of a task, pinning them so that they can't go
+ * away.  Accessing a task's credentials directly is not permitted.
  *
  * The caller must make sure task doesn't go away, either by holding a ref on
  * task or by holding tasklist_lock to prevent it from being unlinked.
@@ -243,10 +243,11 @@ static inline void put_cred(const struct cred *_cred)
 })
 
 /**
- * get_current_cred - Get the current task's credentials
+ * get_current_cred - Get the current task's subjective credentials
  *
- * Get the credentials of the current task, pinning them so that they can't go
- * away.  Accessing the current task's credentials directly is not permitted.
+ * Get the subjective credentials of the current task, pinning them so that
+ * they can't go away.  Accessing the current task's credentials directly is
+ * not permitted.
  */
 #define get_current_cred()				\
 	(get_cred(current_cred()))
diff --git a/include/linux/init_task.h b/include/linux/init_task.h
index 08c3b24ad9a8..2597858035cd 100644
--- a/include/linux/init_task.h
+++ b/include/linux/init_task.h
@@ -149,6 +149,7 @@ extern struct cred init_cred;
 	.children	= LIST_HEAD_INIT(tsk.children),			\
 	.sibling	= LIST_HEAD_INIT(tsk.sibling),			\
 	.group_leader	= &tsk,						\
+	.real_cred	= &init_cred,					\
 	.cred		= &init_cred,					\
 	.cred_exec_mutex =						\
 		 __MUTEX_INITIALIZER(tsk.cred_exec_mutex),		\
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 121d655e460d..3443123b0709 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1145,7 +1145,10 @@ struct task_struct {
 	struct list_head cpu_timers[3];
 
 /* process credentials */
-	const struct cred *cred;	/* actual/objective task credentials (COW) */
+	const struct cred *real_cred;	/* objective and real subjective task
+					 * credentials (COW) */
+	const struct cred *cred;	/* effective (overridable) subjective task
+					 * credentials (COW) */
 	struct mutex cred_exec_mutex;	/* execve vs ptrace cred calculation mutex */
 
 	char comm[TASK_COMM_LEN]; /* executable name excluding path
diff --git a/kernel/cred.c b/kernel/cred.c
index b8bd2f99d8ce..f3ca10660617 100644
--- a/kernel/cred.c
+++ b/kernel/cred.c
@@ -35,7 +35,7 @@ static struct thread_group_cred init_tgcred = {
  * The initial credentials for the initial task
  */
 struct cred init_cred = {
-	.usage			= ATOMIC_INIT(3),
+	.usage			= ATOMIC_INIT(4),
 	.securebits		= SECUREBITS_DEFAULT,
 	.cap_inheritable	= CAP_INIT_INH_SET,
 	.cap_permitted		= CAP_FULL_SET,
@@ -120,6 +120,8 @@ EXPORT_SYMBOL(__put_cred);
  * prepare a new copy, which the caller then modifies and then commits by
  * calling commit_creds().
  *
+ * Preparation involves making a copy of the objective creds for modification.
+ *
  * Returns a pointer to the new creds-to-be if successful, NULL otherwise.
  *
  * Call commit_creds() or abort_creds() to clean up.
@@ -130,7 +132,7 @@ struct cred *prepare_creds(void)
 	const struct cred *old;
 	struct cred *new;
 
-	BUG_ON(atomic_read(&task->cred->usage) < 1);
+	BUG_ON(atomic_read(&task->real_cred->usage) < 1);
 
 	new = kmem_cache_alloc(cred_jar, GFP_KERNEL);
 	if (!new)
@@ -262,6 +264,9 @@ error:
  *
  * We share if we can, but under some circumstances we have to generate a new
  * set.
+ *
+ * The new process gets the current process's subjective credentials as its
+ * objective and subjective credentials
  */
 int copy_creds(struct task_struct *p, unsigned long clone_flags)
 {
@@ -278,6 +283,7 @@ int copy_creds(struct task_struct *p, unsigned long clone_flags)
 #endif
 		clone_flags & CLONE_THREAD
 	    ) {
+		p->real_cred = get_cred(p->cred);
 		get_cred(p->cred);
 		atomic_inc(&p->cred->user->processes);
 		return 0;
@@ -317,7 +323,7 @@ int copy_creds(struct task_struct *p, unsigned long clone_flags)
 #endif
 
 	atomic_inc(&new->user->processes);
-	p->cred = new;
+	p->cred = p->real_cred = get_cred(new);
 	return 0;
 }
 
@@ -326,7 +332,9 @@ int copy_creds(struct task_struct *p, unsigned long clone_flags)
  * @new: The credentials to be assigned
  *
  * Install a new set of credentials to the current task, using RCU to replace
- * the old set.
+ * the old set.  Both the objective and the subjective credentials pointers are
+ * updated.  This function may not be called if the subjective credentials are
+ * in an overridden state.
  *
  * This function eats the caller's reference to the new credentials.
  *
@@ -338,12 +346,15 @@ int commit_creds(struct cred *new)
 	struct task_struct *task = current;
 	const struct cred *old;
 
+	BUG_ON(task->cred != task->real_cred);
+	BUG_ON(atomic_read(&task->real_cred->usage) < 2);
 	BUG_ON(atomic_read(&new->usage) < 1);
-	BUG_ON(atomic_read(&task->cred->usage) < 1);
 
-	old = task->cred;
+	old = task->real_cred;
 	security_commit_creds(new, old);
 
+	get_cred(new); /* we will require a ref for the subj creds too */
+
 	/* dumpability changes */
 	if (old->euid != new->euid ||
 	    old->egid != new->egid ||
@@ -369,6 +380,7 @@ int commit_creds(struct cred *new)
 	 */
 	if (new->user != old->user)
 		atomic_inc(&new->user->processes);
+	rcu_assign_pointer(task->real_cred, new);
 	rcu_assign_pointer(task->cred, new);
 	if (new->user != old->user)
 		atomic_dec(&old->user->processes);
@@ -388,6 +400,8 @@ int commit_creds(struct cred *new)
 	    new->fsgid != old->fsgid)
 		proc_id_connector(task, PROC_EVENT_GID);
 
+	/* release the old obj and subj refs both */
+	put_cred(old);
 	put_cred(old);
 	return 0;
 }
@@ -408,11 +422,11 @@ void abort_creds(struct cred *new)
 EXPORT_SYMBOL(abort_creds);
 
 /**
- * override_creds - Temporarily override the current process's credentials
+ * override_creds - Override the current process's subjective credentials
  * @new: The credentials to be assigned
  *
- * Install a set of temporary override credentials on the current process,
- * returning the old set for later reversion.
+ * Install a set of temporary override subjective credentials on the current
+ * process, returning the old set for later reversion.
  */
 const struct cred *override_creds(const struct cred *new)
 {
@@ -424,11 +438,11 @@ const struct cred *override_creds(const struct cred *new)
 EXPORT_SYMBOL(override_creds);
 
 /**
- * revert_creds - Revert a temporary credentials override
+ * revert_creds - Revert a temporary subjective credentials override
  * @old: The credentials to be restored
  *
- * Revert a temporary set of override credentials to an old set, discarding the
- * override set.
+ * Revert a temporary set of override subjective credentials to an old set,
+ * discarding the override set.
  */
 void revert_creds(const struct cred *old)
 {
diff --git a/kernel/fork.c b/kernel/fork.c
index 82a7948a664e..af0d0f04585c 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -146,6 +146,7 @@ void __put_task_struct(struct task_struct *tsk)
 	WARN_ON(atomic_read(&tsk->usage));
 	WARN_ON(tsk == current);
 
+	put_cred(tsk->real_cred);
 	put_cred(tsk->cred);
 	delayacct_tsk_free(tsk);
 
@@ -961,10 +962,10 @@ static struct task_struct *copy_process(unsigned long clone_flags,
 	DEBUG_LOCKS_WARN_ON(!p->softirqs_enabled);
 #endif
 	retval = -EAGAIN;
-	if (atomic_read(&p->cred->user->processes) >=
+	if (atomic_read(&p->real_cred->user->processes) >=
 			p->signal->rlim[RLIMIT_NPROC].rlim_cur) {
 		if (!capable(CAP_SYS_ADMIN) && !capable(CAP_SYS_RESOURCE) &&
-		    p->cred->user != current->nsproxy->user_ns->root_user)
+		    p->real_cred->user != current->nsproxy->user_ns->root_user)
 			goto bad_fork_free;
 	}
 
@@ -1278,6 +1279,7 @@ bad_fork_cleanup_put_domain:
 	module_put(task_thread_info(p)->exec_domain->module);
 bad_fork_cleanup_count:
 	atomic_dec(&p->cred->user->processes);
+	put_cred(p->real_cred);
 	put_cred(p->cred);
 bad_fork_free:
 	free_task(p);
diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c
index 21a592184633..91b06f2aa963 100644
--- a/security/selinux/hooks.c
+++ b/security/selinux/hooks.c
@@ -161,7 +161,7 @@ static int selinux_secmark_enabled(void)
  */
 static void cred_init_security(void)
 {
-	struct cred *cred = (struct cred *) current->cred;
+	struct cred *cred = (struct cred *) current->real_cred;
 	struct task_security_struct *tsec;
 
 	tsec = kzalloc(sizeof(struct task_security_struct), GFP_KERNEL);
@@ -184,7 +184,7 @@ static inline u32 cred_sid(const struct cred *cred)
 }
 
 /*
- * get the security ID of a task
+ * get the objective security ID of a task
  */
 static inline u32 task_sid(const struct task_struct *task)
 {
@@ -197,7 +197,7 @@ static inline u32 task_sid(const struct task_struct *task)
 }
 
 /*
- * get the security ID of the current task
+ * get the subjective security ID of the current task
  */
 static inline u32 current_sid(void)
 {
@@ -1395,6 +1395,7 @@ static int cred_has_perm(const struct cred *actor,
  * Check permission between a pair of tasks, e.g. signal checks,
  * fork check, ptrace check, etc.
  * tsk1 is the actor and tsk2 is the target
+ * - this uses the default subjective creds of tsk1
  */
 static int task_has_perm(const struct task_struct *tsk1,
 			 const struct task_struct *tsk2,
@@ -1410,6 +1411,22 @@ static int task_has_perm(const struct task_struct *tsk1,
 	return avc_has_perm(sid1, sid2, SECCLASS_PROCESS, perms, NULL);
 }
 
+/*
+ * Check permission between current and another task, e.g. signal checks,
+ * fork check, ptrace check, etc.
+ * current is the actor and tsk2 is the target
+ * - this uses current's subjective creds
+ */
+static int current_has_perm(const struct task_struct *tsk,
+			    u32 perms)
+{
+	u32 sid, tsid;
+
+	sid = current_sid();
+	tsid = task_sid(tsk);
+	return avc_has_perm(sid, tsid, SECCLASS_PROCESS, perms, NULL);
+}
+
 #if CAP_LAST_CAP > 63
 #error Fix SELinux to handle capabilities > 63.
 #endif
@@ -1807,7 +1824,7 @@ static int selinux_ptrace_may_access(struct task_struct *child,
 		return avc_has_perm(sid, csid, SECCLASS_FILE, FILE__READ, NULL);
 	}
 
-	return task_has_perm(current, child, PROCESS__PTRACE);
+	return current_has_perm(child, PROCESS__PTRACE);
 }
 
 static int selinux_ptrace_traceme(struct task_struct *parent)
@@ -1826,7 +1843,7 @@ static int selinux_capget(struct task_struct *target, kernel_cap_t *effective,
 {
 	int error;
 
-	error = task_has_perm(current, target, PROCESS__GETCAP);
+	error = current_has_perm(target, PROCESS__GETCAP);
 	if (error)
 		return error;
 
@@ -3071,7 +3088,7 @@ static int selinux_file_mprotect(struct vm_area_struct *vma,
 		} else if (!vma->vm_file &&
 			   vma->vm_start <= vma->vm_mm->start_stack &&
 			   vma->vm_end >= vma->vm_mm->start_stack) {
-			rc = task_has_perm(current, current, PROCESS__EXECSTACK);
+			rc = current_has_perm(current, PROCESS__EXECSTACK);
 		} else if (vma->vm_file && vma->anon_vma) {
 			/*
 			 * We are making executable a file mapping that has
@@ -3220,7 +3237,7 @@ static int selinux_task_create(unsigned long clone_flags)
 	if (rc)
 		return rc;
 
-	return task_has_perm(current, current, PROCESS__FORK);
+	return current_has_perm(current, PROCESS__FORK);
 }
 
 /*
@@ -3285,17 +3302,17 @@ static int selinux_task_setgid(gid_t id0, gid_t id1, gid_t id2, int flags)
 
 static int selinux_task_setpgid(struct task_struct *p, pid_t pgid)
 {
-	return task_has_perm(current, p, PROCESS__SETPGID);
+	return current_has_perm(p, PROCESS__SETPGID);
 }
 
 static int selinux_task_getpgid(struct task_struct *p)
 {
-	return task_has_perm(current, p, PROCESS__GETPGID);
+	return current_has_perm(p, PROCESS__GETPGID);
 }
 
 static int selinux_task_getsid(struct task_struct *p)
 {
-	return task_has_perm(current, p, PROCESS__GETSESSION);
+	return current_has_perm(p, PROCESS__GETSESSION);
 }
 
 static void selinux_task_getsecid(struct task_struct *p, u32 *secid)
@@ -3317,7 +3334,7 @@ static int selinux_task_setnice(struct task_struct *p, int nice)
 	if (rc)
 		return rc;
 
-	return task_has_perm(current, p, PROCESS__SETSCHED);
+	return current_has_perm(p, PROCESS__SETSCHED);
 }
 
 static int selinux_task_setioprio(struct task_struct *p, int ioprio)
@@ -3328,12 +3345,12 @@ static int selinux_task_setioprio(struct task_struct *p, int ioprio)
 	if (rc)
 		return rc;
 
-	return task_has_perm(current, p, PROCESS__SETSCHED);
+	return current_has_perm(p, PROCESS__SETSCHED);
 }
 
 static int selinux_task_getioprio(struct task_struct *p)
 {
-	return task_has_perm(current, p, PROCESS__GETSCHED);
+	return current_has_perm(p, PROCESS__GETSCHED);
 }
 
 static int selinux_task_setrlimit(unsigned int resource, struct rlimit *new_rlim)
@@ -3350,7 +3367,7 @@ static int selinux_task_setrlimit(unsigned int resource, struct rlimit *new_rlim
 	   later be used as a safe reset point for the soft limit
 	   upon context transitions.  See selinux_bprm_committing_creds. */
 	if (old_rlim->rlim_max != new_rlim->rlim_max)
-		return task_has_perm(current, current, PROCESS__SETRLIMIT);
+		return current_has_perm(current, PROCESS__SETRLIMIT);
 
 	return 0;
 }
@@ -3363,17 +3380,17 @@ static int selinux_task_setscheduler(struct task_struct *p, int policy, struct s
 	if (rc)
 		return rc;
 
-	return task_has_perm(current, p, PROCESS__SETSCHED);
+	return current_has_perm(p, PROCESS__SETSCHED);
 }
 
 static int selinux_task_getscheduler(struct task_struct *p)
 {
-	return task_has_perm(current, p, PROCESS__GETSCHED);
+	return current_has_perm(p, PROCESS__GETSCHED);
 }
 
 static int selinux_task_movememory(struct task_struct *p)
 {
-	return task_has_perm(current, p, PROCESS__SETSCHED);
+	return current_has_perm(p, PROCESS__SETSCHED);
 }
 
 static int selinux_task_kill(struct task_struct *p, struct siginfo *info,
@@ -3394,7 +3411,7 @@ static int selinux_task_kill(struct task_struct *p, struct siginfo *info,
 		rc = avc_has_perm(secid, task_sid(p),
 				  SECCLASS_PROCESS, perm, NULL);
 	else
-		rc = task_has_perm(current, p, perm);
+		rc = current_has_perm(p, perm);
 	return rc;
 }
 
@@ -5250,7 +5267,7 @@ static int selinux_getprocattr(struct task_struct *p,
 	unsigned len;
 
 	if (current != p) {
-		error = task_has_perm(current, p, PROCESS__GETATTR);
+		error = current_has_perm(p, PROCESS__GETATTR);
 		if (error)
 			return error;
 	}
@@ -5309,15 +5326,15 @@ static int selinux_setprocattr(struct task_struct *p,
 	 * above restriction is ever removed.
 	 */
 	if (!strcmp(name, "exec"))
-		error = task_has_perm(current, p, PROCESS__SETEXEC);
+		error = current_has_perm(p, PROCESS__SETEXEC);
 	else if (!strcmp(name, "fscreate"))
-		error = task_has_perm(current, p, PROCESS__SETFSCREATE);
+		error = current_has_perm(p, PROCESS__SETFSCREATE);
 	else if (!strcmp(name, "keycreate"))
-		error = task_has_perm(current, p, PROCESS__SETKEYCREATE);
+		error = current_has_perm(p, PROCESS__SETKEYCREATE);
 	else if (!strcmp(name, "sockcreate"))
-		error = task_has_perm(current, p, PROCESS__SETSOCKCREATE);
+		error = current_has_perm(p, PROCESS__SETSOCKCREATE);
 	else if (!strcmp(name, "current"))
-		error = task_has_perm(current, p, PROCESS__SETCURRENT);
+		error = current_has_perm(p, PROCESS__SETCURRENT);
 	else
 		error = -EINVAL;
 	if (error)
-- 
cgit v1.2.3


From 3a3b7ce9336952ea7b9564d976d068a238976c9d Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Fri, 14 Nov 2008 10:39:28 +1100
Subject: CRED: Allow kernel services to override LSM settings for task actions

Allow kernel services to override LSM settings appropriate to the actions
performed by a task by duplicating a set of credentials, modifying it and then
using task_struct::cred to point to it when performing operations on behalf of
a task.

This is used, for example, by CacheFiles which has to transparently access the
cache on behalf of a process that thinks it is doing, say, NFS accesses with a
potentially inappropriate (with respect to accessing the cache) set of
credentials.

This patch provides two LSM hooks for modifying a task security record:

 (*) security_kernel_act_as() which allows modification of the security datum
     with which a task acts on other objects (most notably files).

 (*) security_kernel_create_files_as() which allows modification of the
     security datum that is used to initialise the security data on a file that
     a task creates.

The patch also provides four new credentials handling functions, which wrap the
LSM functions:

 (1) prepare_kernel_cred()

     Prepare a set of credentials for a kernel service to use, based either on
     a daemon's credentials or on init_cred.  All the keyrings are cleared.

 (2) set_security_override()

     Set the LSM security ID in a set of credentials to a specific security
     context, assuming permission from the LSM policy.

 (3) set_security_override_from_ctx()

     As (2), but takes the security context as a string.

 (4) set_create_files_as()

     Set the file creation LSM security ID in a set of credentials to be the
     same as that on a particular inode.

Signed-off-by: Casey Schaufler <casey@schaufler-ca.com> [Smack changes]
Signed-off-by: David Howells <dhowells@redhat.com>
Signed-off-by: James Morris <jmorris@namei.org>
---
 include/linux/cred.h       |   6 +++
 include/linux/security.h   |  28 +++++++++++
 kernel/cred.c              | 113 +++++++++++++++++++++++++++++++++++++++++++++
 security/capability.c      |  12 +++++
 security/security.c        |  10 ++++
 security/selinux/hooks.c   |  46 ++++++++++++++++++
 security/smack/smack_lsm.c |  37 +++++++++++++++
 7 files changed, 252 insertions(+)

(limited to 'include')

diff --git a/include/linux/cred.h b/include/linux/cred.h
index 55a9c995d694..26c1ab179946 100644
--- a/include/linux/cred.h
+++ b/include/linux/cred.h
@@ -18,6 +18,7 @@
 
 struct user_struct;
 struct cred;
+struct inode;
 
 /*
  * COW Supplementary groups list
@@ -148,6 +149,11 @@ extern int commit_creds(struct cred *);
 extern void abort_creds(struct cred *);
 extern const struct cred *override_creds(const struct cred *);
 extern void revert_creds(const struct cred *);
+extern struct cred *prepare_kernel_cred(struct task_struct *);
+extern int change_create_files_as(struct cred *, struct inode *);
+extern int set_security_override(struct cred *, u32);
+extern int set_security_override_from_ctx(struct cred *, const char *);
+extern int set_create_files_as(struct cred *, struct inode *);
 extern void __init cred_init(void);
 
 /**
diff --git a/include/linux/security.h b/include/linux/security.h
index 56a0eed65673..59a11e19b617 100644
--- a/include/linux/security.h
+++ b/include/linux/security.h
@@ -587,6 +587,19 @@ static inline void security_free_mnt_opts(struct security_mnt_opts *opts)
  *	@new points to the new credentials.
  *	@old points to the original credentials.
  *	Install a new set of credentials.
+ * @kernel_act_as:
+ *	Set the credentials for a kernel service to act as (subjective context).
+ *	@new points to the credentials to be modified.
+ *	@secid specifies the security ID to be set
+ *	The current task must be the one that nominated @secid.
+ *	Return 0 if successful.
+ * @kernel_create_files_as:
+ *	Set the file creation context in a set of credentials to be the same as
+ *	the objective context of the specified inode.
+ *	@new points to the credentials to be modified.
+ *	@inode points to the inode to use as a reference.
+ *	The current task must be the one that nominated @inode.
+ *	Return 0 if successful.
  * @task_setuid:
  *	Check permission before setting one or more of the user identity
  *	attributes of the current process.  The @flags parameter indicates
@@ -1381,6 +1394,8 @@ struct security_operations {
 	int (*cred_prepare)(struct cred *new, const struct cred *old,
 			    gfp_t gfp);
 	void (*cred_commit)(struct cred *new, const struct cred *old);
+	int (*kernel_act_as)(struct cred *new, u32 secid);
+	int (*kernel_create_files_as)(struct cred *new, struct inode *inode);
 	int (*task_setuid) (uid_t id0, uid_t id1, uid_t id2, int flags);
 	int (*task_fix_setuid) (struct cred *new, const struct cred *old,
 				int flags);
@@ -1632,6 +1647,8 @@ int security_task_create(unsigned long clone_flags);
 void security_cred_free(struct cred *cred);
 int security_prepare_creds(struct cred *new, const struct cred *old, gfp_t gfp);
 void security_commit_creds(struct cred *new, const struct cred *old);
+int security_kernel_act_as(struct cred *new, u32 secid);
+int security_kernel_create_files_as(struct cred *new, struct inode *inode);
 int security_task_setuid(uid_t id0, uid_t id1, uid_t id2, int flags);
 int security_task_fix_setuid(struct cred *new, const struct cred *old,
 			     int flags);
@@ -2151,6 +2168,17 @@ static inline void security_commit_creds(struct cred *new,
 {
 }
 
+static inline int security_kernel_act_as(struct cred *cred, u32 secid)
+{
+	return 0;
+}
+
+static inline int security_kernel_create_files_as(struct cred *cred,
+						  struct inode *inode)
+{
+	return 0;
+}
+
 static inline int security_task_setuid(uid_t id0, uid_t id1, uid_t id2,
 				       int flags)
 {
diff --git a/kernel/cred.c b/kernel/cred.c
index f3ca10660617..13697ca2bb38 100644
--- a/kernel/cred.c
+++ b/kernel/cred.c
@@ -462,3 +462,116 @@ void __init cred_init(void)
 	cred_jar = kmem_cache_create("cred_jar", sizeof(struct cred),
 				     0, SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL);
 }
+
+/**
+ * prepare_kernel_cred - Prepare a set of credentials for a kernel service
+ * @daemon: A userspace daemon to be used as a reference
+ *
+ * Prepare a set of credentials for a kernel service.  This can then be used to
+ * override a task's own credentials so that work can be done on behalf of that
+ * task that requires a different subjective context.
+ *
+ * @daemon is used to provide a base for the security record, but can be NULL.
+ * If @daemon is supplied, then the security data will be derived from that;
+ * otherwise they'll be set to 0 and no groups, full capabilities and no keys.
+ *
+ * The caller may change these controls afterwards if desired.
+ *
+ * Returns the new credentials or NULL if out of memory.
+ *
+ * Does not take, and does not return holding current->cred_replace_mutex.
+ */
+struct cred *prepare_kernel_cred(struct task_struct *daemon)
+{
+	const struct cred *old;
+	struct cred *new;
+
+	new = kmem_cache_alloc(cred_jar, GFP_KERNEL);
+	if (!new)
+		return NULL;
+
+	if (daemon)
+		old = get_task_cred(daemon);
+	else
+		old = get_cred(&init_cred);
+
+	get_uid(new->user);
+	get_group_info(new->group_info);
+
+#ifdef CONFIG_KEYS
+	atomic_inc(&init_tgcred.usage);
+	new->tgcred = &init_tgcred;
+	new->request_key_auth = NULL;
+	new->thread_keyring = NULL;
+	new->jit_keyring = KEY_REQKEY_DEFL_THREAD_KEYRING;
+#endif
+
+#ifdef CONFIG_SECURITY
+	new->security = NULL;
+#endif
+	if (security_prepare_creds(new, old, GFP_KERNEL) < 0)
+		goto error;
+
+	atomic_set(&new->usage, 1);
+	put_cred(old);
+	return new;
+
+error:
+	put_cred(new);
+	return NULL;
+}
+EXPORT_SYMBOL(prepare_kernel_cred);
+
+/**
+ * set_security_override - Set the security ID in a set of credentials
+ * @new: The credentials to alter
+ * @secid: The LSM security ID to set
+ *
+ * Set the LSM security ID in a set of credentials so that the subjective
+ * security is overridden when an alternative set of credentials is used.
+ */
+int set_security_override(struct cred *new, u32 secid)
+{
+	return security_kernel_act_as(new, secid);
+}
+EXPORT_SYMBOL(set_security_override);
+
+/**
+ * set_security_override_from_ctx - Set the security ID in a set of credentials
+ * @new: The credentials to alter
+ * @secctx: The LSM security context to generate the security ID from.
+ *
+ * Set the LSM security ID in a set of credentials so that the subjective
+ * security is overridden when an alternative set of credentials is used.  The
+ * security ID is specified in string form as a security context to be
+ * interpreted by the LSM.
+ */
+int set_security_override_from_ctx(struct cred *new, const char *secctx)
+{
+	u32 secid;
+	int ret;
+
+	ret = security_secctx_to_secid(secctx, strlen(secctx), &secid);
+	if (ret < 0)
+		return ret;
+
+	return set_security_override(new, secid);
+}
+EXPORT_SYMBOL(set_security_override_from_ctx);
+
+/**
+ * set_create_files_as - Set the LSM file create context in a set of credentials
+ * @new: The credentials to alter
+ * @inode: The inode to take the context from
+ *
+ * Change the LSM file creation context in a set of credentials to be the same
+ * as the object context of the specified inode, so that the new inodes have
+ * the same MAC context as that inode.
+ */
+int set_create_files_as(struct cred *new, struct inode *inode)
+{
+	new->fsuid = inode->i_uid;
+	new->fsgid = inode->i_gid;
+	return security_kernel_create_files_as(new, inode);
+}
+EXPORT_SYMBOL(set_create_files_as);
diff --git a/security/capability.c b/security/capability.c
index 185804f99ad1..b9e391425e6f 100644
--- a/security/capability.c
+++ b/security/capability.c
@@ -348,6 +348,16 @@ static void cap_cred_commit(struct cred *new, const struct cred *old)
 {
 }
 
+static int cap_kernel_act_as(struct cred *new, u32 secid)
+{
+	return 0;
+}
+
+static int cap_kernel_create_files_as(struct cred *new, struct inode *inode)
+{
+	return 0;
+}
+
 static int cap_task_setuid(uid_t id0, uid_t id1, uid_t id2, int flags)
 {
 	return 0;
@@ -889,6 +899,8 @@ void security_fixup_ops(struct security_operations *ops)
 	set_to_cap_if_null(ops, cred_free);
 	set_to_cap_if_null(ops, cred_prepare);
 	set_to_cap_if_null(ops, cred_commit);
+	set_to_cap_if_null(ops, kernel_act_as);
+	set_to_cap_if_null(ops, kernel_create_files_as);
 	set_to_cap_if_null(ops, task_setuid);
 	set_to_cap_if_null(ops, task_fix_setuid);
 	set_to_cap_if_null(ops, task_setgid);
diff --git a/security/security.c b/security/security.c
index dc5babb2d6d8..038ef04b2c7f 100644
--- a/security/security.c
+++ b/security/security.c
@@ -616,6 +616,16 @@ void security_commit_creds(struct cred *new, const struct cred *old)
 	return security_ops->cred_commit(new, old);
 }
 
+int security_kernel_act_as(struct cred *new, u32 secid)
+{
+	return security_ops->kernel_act_as(new, secid);
+}
+
+int security_kernel_create_files_as(struct cred *new, struct inode *inode)
+{
+	return security_ops->kernel_create_files_as(new, inode);
+}
+
 int security_task_setuid(uid_t id0, uid_t id1, uid_t id2, int flags)
 {
 	return security_ops->task_setuid(id0, id1, id2, flags);
diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c
index 91b06f2aa963..520f82ab3fbf 100644
--- a/security/selinux/hooks.c
+++ b/security/selinux/hooks.c
@@ -3277,6 +3277,50 @@ static void selinux_cred_commit(struct cred *new, const struct cred *old)
 	secondary_ops->cred_commit(new, old);
 }
 
+/*
+ * set the security data for a kernel service
+ * - all the creation contexts are set to unlabelled
+ */
+static int selinux_kernel_act_as(struct cred *new, u32 secid)
+{
+	struct task_security_struct *tsec = new->security;
+	u32 sid = current_sid();
+	int ret;
+
+	ret = avc_has_perm(sid, secid,
+			   SECCLASS_KERNEL_SERVICE,
+			   KERNEL_SERVICE__USE_AS_OVERRIDE,
+			   NULL);
+	if (ret == 0) {
+		tsec->sid = secid;
+		tsec->create_sid = 0;
+		tsec->keycreate_sid = 0;
+		tsec->sockcreate_sid = 0;
+	}
+	return ret;
+}
+
+/*
+ * set the file creation context in a security record to the same as the
+ * objective context of the specified inode
+ */
+static int selinux_kernel_create_files_as(struct cred *new, struct inode *inode)
+{
+	struct inode_security_struct *isec = inode->i_security;
+	struct task_security_struct *tsec = new->security;
+	u32 sid = current_sid();
+	int ret;
+
+	ret = avc_has_perm(sid, isec->sid,
+			   SECCLASS_KERNEL_SERVICE,
+			   KERNEL_SERVICE__CREATE_FILES_AS,
+			   NULL);
+
+	if (ret == 0)
+		tsec->create_sid = isec->sid;
+	return 0;
+}
+
 static int selinux_task_setuid(uid_t id0, uid_t id1, uid_t id2, int flags)
 {
 	/* Since setuid only affects the current process, and
@@ -5593,6 +5637,8 @@ static struct security_operations selinux_ops = {
 	.cred_free =			selinux_cred_free,
 	.cred_prepare =			selinux_cred_prepare,
 	.cred_commit =			selinux_cred_commit,
+	.kernel_act_as =		selinux_kernel_act_as,
+	.kernel_create_files_as =	selinux_kernel_create_files_as,
 	.task_setuid =			selinux_task_setuid,
 	.task_fix_setuid =		selinux_task_fix_setuid,
 	.task_setgid =			selinux_task_setgid,
diff --git a/security/smack/smack_lsm.c b/security/smack/smack_lsm.c
index de396742abf4..8ad48161cef5 100644
--- a/security/smack/smack_lsm.c
+++ b/security/smack/smack_lsm.c
@@ -1011,6 +1011,41 @@ static void smack_cred_commit(struct cred *new, const struct cred *old)
 {
 }
 
+/**
+ * smack_kernel_act_as - Set the subjective context in a set of credentials
+ * @new points to the set of credentials to be modified.
+ * @secid specifies the security ID to be set
+ *
+ * Set the security data for a kernel service.
+ */
+static int smack_kernel_act_as(struct cred *new, u32 secid)
+{
+	char *smack = smack_from_secid(secid);
+
+	if (smack == NULL)
+		return -EINVAL;
+
+	new->security = smack;
+	return 0;
+}
+
+/**
+ * smack_kernel_create_files_as - Set the file creation label in a set of creds
+ * @new points to the set of credentials to be modified
+ * @inode points to the inode to use as a reference
+ *
+ * Set the file creation context in a set of credentials to the same
+ * as the objective context of the specified inode
+ */
+static int smack_kernel_create_files_as(struct cred *new,
+					struct inode *inode)
+{
+	struct inode_smack *isp = inode->i_security;
+
+	new->security = isp->smk_inode;
+	return 0;
+}
+
 /**
  * smack_task_setpgid - Smack check on setting pgid
  * @p: the task object
@@ -2641,6 +2676,8 @@ struct security_operations smack_ops = {
 	.cred_free =			smack_cred_free,
 	.cred_prepare =			smack_cred_prepare,
 	.cred_commit =			smack_cred_commit,
+	.kernel_act_as =		smack_kernel_act_as,
+	.kernel_create_files_as =	smack_kernel_create_files_as,
 	.task_fix_setuid =		cap_task_fix_setuid,
 	.task_setpgid = 		smack_task_setpgid,
 	.task_getpgid = 		smack_task_getpgid,
-- 
cgit v1.2.3


From 38a7ddffa4b79d7b1fbc9bf2fa82b21b72622858 Mon Sep 17 00:00:00 2001
From: Petr Tesarik <ptesarik@suse.cz>
Date: Thu, 13 Nov 2008 22:44:11 -0800
Subject: tcp: remove an unnecessary field in struct tcp_skb_cb

The urg_ptr field is not used anywhere and is merely confusing.

Signed-off-by: Petr Tesarik <ptesarik@suse.cz>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/tcp.h | 1 -
 1 file changed, 1 deletion(-)

(limited to 'include')

diff --git a/include/net/tcp.h b/include/net/tcp.h
index 438014d57610..8f26b28fb407 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -590,7 +590,6 @@ struct tcp_skb_cb {
 #define TCPCB_EVER_RETRANS	0x80	/* Ever retransmitted frame	*/
 #define TCPCB_RETRANS		(TCPCB_SACKED_RETRANS|TCPCB_EVER_RETRANS)
 
-	__u16		urg_ptr;	/* Valid w/URG flags is set.	*/
 	__u32		ack_seq;	/* Sequence number ACK'd	*/
 };
 
-- 
cgit v1.2.3


From f30ab418a1d3c5a8b83493e7d70d6876a74aa0ce Mon Sep 17 00:00:00 2001
From: Jarek Poplawski <jarkao2@gmail.com>
Date: Thu, 13 Nov 2008 22:56:30 -0800
Subject: pkt_sched: Remove qdisc->ops->requeue() etc.

After implementing qdisc->ops->peek() and changing sch_netem into
classless qdisc there are no more qdisc->ops->requeue() users. This
patch removes this method with its wrappers (qdisc_requeue()), and
also unused qdisc->requeue structure. There are a few minor fixes of
warnings (htb_enqueue()) and comments btw.

The idea to kill ->requeue() and a similar patch were first developed
by David S. Miller.

Signed-off-by: Jarek Poplawski <jarkao2@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/sch_generic.h | 17 -------------
 net/sched/sch_api.c       |  7 ------
 net/sched/sch_atm.c       | 20 +--------------
 net/sched/sch_cbq.c       | 35 --------------------------
 net/sched/sch_dsmark.c    | 21 ----------------
 net/sched/sch_fifo.c      |  2 --
 net/sched/sch_generic.c   | 23 -----------------
 net/sched/sch_gred.c      | 21 ----------------
 net/sched/sch_hfsc.c      | 19 --------------
 net/sched/sch_htb.c       | 44 +--------------------------------
 net/sched/sch_multiq.c    | 39 ++---------------------------
 net/sched/sch_netem.c     | 16 ------------
 net/sched/sch_prio.c      | 28 ---------------------
 net/sched/sch_red.c       | 18 --------------
 net/sched/sch_sfq.c       | 63 -----------------------------------------------
 net/sched/sch_tbf.c       | 14 -----------
 net/sched/sch_teql.c      | 11 ---------
 17 files changed, 4 insertions(+), 394 deletions(-)

(limited to 'include')

diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h
index 64ae1ba9f554..f8c47429044a 100644
--- a/include/net/sch_generic.h
+++ b/include/net/sch_generic.h
@@ -53,7 +53,6 @@ struct Qdisc
 	atomic_t		refcnt;
 	unsigned long		state;
 	struct sk_buff		*gso_skb;
-	struct sk_buff_head	requeue;
 	struct sk_buff_head	q;
 	struct netdev_queue	*dev_queue;
 	struct Qdisc		*next_sched;
@@ -112,7 +111,6 @@ struct Qdisc_ops
 	int 			(*enqueue)(struct sk_buff *, struct Qdisc *);
 	struct sk_buff *	(*dequeue)(struct Qdisc *);
 	struct sk_buff *	(*peek)(struct Qdisc *);
-	int 			(*requeue)(struct sk_buff *, struct Qdisc *);
 	unsigned int		(*drop)(struct Qdisc *);
 
 	int			(*init)(struct Qdisc *, struct nlattr *arg);
@@ -467,21 +465,6 @@ static inline struct sk_buff *qdisc_dequeue_peeked(struct Qdisc *sch)
 	return skb;
 }
 
-static inline int __qdisc_requeue(struct sk_buff *skb, struct Qdisc *sch,
-				  struct sk_buff_head *list)
-{
-	__skb_queue_head(list, skb);
-	sch->qstats.backlog += qdisc_pkt_len(skb);
-	sch->qstats.requeues++;
-
-	return NET_XMIT_SUCCESS;
-}
-
-static inline int qdisc_requeue(struct sk_buff *skb, struct Qdisc *sch)
-{
-	return __qdisc_requeue(skb, sch, &sch->q);
-}
-
 static inline void __qdisc_reset_queue(struct Qdisc *sch,
 				       struct sk_buff_head *list)
 {
diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c
index e5646614e88d..5bcef13408c8 100644
--- a/net/sched/sch_api.c
+++ b/net/sched/sch_api.c
@@ -97,11 +97,6 @@ static int tclass_notify(struct sk_buff *oskb, struct nlmsghdr *n,
 
    Auxiliary routines:
 
-   ---requeue
-
-   requeues once dequeued packet. It is used for non-standard or
-   just buggy devices, which can defer output even if netif_queue_stopped()=0.
-
    ---peek
 
    like dequeue but without removing a packet from the queue
@@ -151,8 +146,6 @@ int register_qdisc(struct Qdisc_ops *qops)
 
 	if (qops->enqueue == NULL)
 		qops->enqueue = noop_qdisc_ops.enqueue;
-	if (qops->requeue == NULL)
-		qops->requeue = noop_qdisc_ops.requeue;
 	if (qops->peek == NULL) {
 		if (qops->dequeue == NULL) {
 			qops->peek = noop_qdisc_ops.peek;
diff --git a/net/sched/sch_atm.c b/net/sched/sch_atm.c
index 6eb9a650b63d..ca90f6e59aee 100644
--- a/net/sched/sch_atm.c
+++ b/net/sched/sch_atm.c
@@ -62,7 +62,7 @@ struct atm_qdisc_data {
 	struct atm_flow_data	link;		/* unclassified skbs go here */
 	struct atm_flow_data	*flows;		/* NB: "link" is also on this
 						   list */
-	struct tasklet_struct	task;		/* requeue tasklet */
+	struct tasklet_struct	task;		/* dequeue tasklet */
 };
 
 /* ------------------------- Class/flow operations ------------------------- */
@@ -534,23 +534,6 @@ static struct sk_buff *atm_tc_peek(struct Qdisc *sch)
 	return p->link.q->ops->peek(p->link.q);
 }
 
-static int atm_tc_requeue(struct sk_buff *skb, struct Qdisc *sch)
-{
-	struct atm_qdisc_data *p = qdisc_priv(sch);
-	int ret;
-
-	pr_debug("atm_tc_requeue(skb %p,sch %p,[qdisc %p])\n", skb, sch, p);
-	ret = p->link.q->ops->requeue(skb, p->link.q);
-	if (!ret) {
-		sch->q.qlen++;
-		sch->qstats.requeues++;
-	} else if (net_xmit_drop_count(ret)) {
-		sch->qstats.drops++;
-		p->link.qstats.drops++;
-	}
-	return ret;
-}
-
 static unsigned int atm_tc_drop(struct Qdisc *sch)
 {
 	struct atm_qdisc_data *p = qdisc_priv(sch);
@@ -707,7 +690,6 @@ static struct Qdisc_ops atm_qdisc_ops __read_mostly = {
 	.enqueue	= atm_tc_enqueue,
 	.dequeue	= atm_tc_dequeue,
 	.peek		= atm_tc_peek,
-	.requeue	= atm_tc_requeue,
 	.drop		= atm_tc_drop,
 	.init		= atm_tc_init,
 	.reset		= atm_tc_reset,
diff --git a/net/sched/sch_cbq.c b/net/sched/sch_cbq.c
index 63efa70abbea..a99e37e9e6f1 100644
--- a/net/sched/sch_cbq.c
+++ b/net/sched/sch_cbq.c
@@ -405,40 +405,6 @@ cbq_enqueue(struct sk_buff *skb, struct Qdisc *sch)
 	return ret;
 }
 
-static int
-cbq_requeue(struct sk_buff *skb, struct Qdisc *sch)
-{
-	struct cbq_sched_data *q = qdisc_priv(sch);
-	struct cbq_class *cl;
-	int ret;
-
-	if ((cl = q->tx_class) == NULL) {
-		kfree_skb(skb);
-		sch->qstats.drops++;
-		return NET_XMIT_CN;
-	}
-	q->tx_class = NULL;
-
-	cbq_mark_toplevel(q, cl);
-
-#ifdef CONFIG_NET_CLS_ACT
-	q->rx_class = cl;
-	cl->q->__parent = sch;
-#endif
-	if ((ret = cl->q->ops->requeue(skb, cl->q)) == 0) {
-		sch->q.qlen++;
-		sch->qstats.requeues++;
-		if (!cl->next_alive)
-			cbq_activate_class(cl);
-		return 0;
-	}
-	if (net_xmit_drop_count(ret)) {
-		sch->qstats.drops++;
-		cl->qstats.drops++;
-	}
-	return ret;
-}
-
 /* Overlimit actions */
 
 /* TC_CBQ_OVL_CLASSIC: (default) penalize leaf class by adding offtime */
@@ -2067,7 +2033,6 @@ static struct Qdisc_ops cbq_qdisc_ops __read_mostly = {
 	.enqueue	=	cbq_enqueue,
 	.dequeue	=	cbq_dequeue,
 	.peek		=	qdisc_peek_dequeued,
-	.requeue	=	cbq_requeue,
 	.drop		=	cbq_drop,
 	.init		=	cbq_init,
 	.reset		=	cbq_reset,
diff --git a/net/sched/sch_dsmark.c b/net/sched/sch_dsmark.c
index 3e491479ea86..3f9427a4b757 100644
--- a/net/sched/sch_dsmark.c
+++ b/net/sched/sch_dsmark.c
@@ -322,26 +322,6 @@ static struct sk_buff *dsmark_peek(struct Qdisc *sch)
 	return p->q->ops->peek(p->q);
 }
 
-static int dsmark_requeue(struct sk_buff *skb, struct Qdisc *sch)
-{
-	struct dsmark_qdisc_data *p = qdisc_priv(sch);
-	int err;
-
-	pr_debug("dsmark_requeue(skb %p,sch %p,[qdisc %p])\n", skb, sch, p);
-
-	err = p->q->ops->requeue(skb, p->q);
-	if (err != NET_XMIT_SUCCESS) {
-		if (net_xmit_drop_count(err))
-			sch->qstats.drops++;
-		return err;
-	}
-
-	sch->q.qlen++;
-	sch->qstats.requeues++;
-
-	return NET_XMIT_SUCCESS;
-}
-
 static unsigned int dsmark_drop(struct Qdisc *sch)
 {
 	struct dsmark_qdisc_data *p = qdisc_priv(sch);
@@ -506,7 +486,6 @@ static struct Qdisc_ops dsmark_qdisc_ops __read_mostly = {
 	.enqueue	=	dsmark_enqueue,
 	.dequeue	=	dsmark_dequeue,
 	.peek		=	dsmark_peek,
-	.requeue	=	dsmark_requeue,
 	.drop		=	dsmark_drop,
 	.init		=	dsmark_init,
 	.reset		=	dsmark_reset,
diff --git a/net/sched/sch_fifo.c b/net/sched/sch_fifo.c
index 8825e8806f41..92cfc9d7e3b9 100644
--- a/net/sched/sch_fifo.c
+++ b/net/sched/sch_fifo.c
@@ -84,7 +84,6 @@ struct Qdisc_ops pfifo_qdisc_ops __read_mostly = {
 	.enqueue	=	pfifo_enqueue,
 	.dequeue	=	qdisc_dequeue_head,
 	.peek		=	qdisc_peek_head,
-	.requeue	=	qdisc_requeue,
 	.drop		=	qdisc_queue_drop,
 	.init		=	fifo_init,
 	.reset		=	qdisc_reset_queue,
@@ -100,7 +99,6 @@ struct Qdisc_ops bfifo_qdisc_ops __read_mostly = {
 	.enqueue	=	bfifo_enqueue,
 	.dequeue	=	qdisc_dequeue_head,
 	.peek		=	qdisc_peek_head,
-	.requeue	=	qdisc_requeue,
 	.drop		=	qdisc_queue_drop,
 	.init		=	fifo_init,
 	.reset		=	qdisc_reset_queue,
diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c
index 1192da229835..80c8f3dbbea1 100644
--- a/net/sched/sch_generic.c
+++ b/net/sched/sch_generic.c
@@ -306,22 +306,12 @@ static struct sk_buff *noop_dequeue(struct Qdisc * qdisc)
 	return NULL;
 }
 
-static int noop_requeue(struct sk_buff *skb, struct Qdisc* qdisc)
-{
-	if (net_ratelimit())
-		printk(KERN_DEBUG "%s deferred output. It is buggy.\n",
-		       skb->dev->name);
-	kfree_skb(skb);
-	return NET_XMIT_CN;
-}
-
 struct Qdisc_ops noop_qdisc_ops __read_mostly = {
 	.id		=	"noop",
 	.priv_size	=	0,
 	.enqueue	=	noop_enqueue,
 	.dequeue	=	noop_dequeue,
 	.peek		=	noop_dequeue,
-	.requeue	=	noop_requeue,
 	.owner		=	THIS_MODULE,
 };
 
@@ -336,7 +326,6 @@ struct Qdisc noop_qdisc = {
 	.flags		=	TCQ_F_BUILTIN,
 	.ops		=	&noop_qdisc_ops,
 	.list		=	LIST_HEAD_INIT(noop_qdisc.list),
-	.requeue.lock	=	__SPIN_LOCK_UNLOCKED(noop_qdisc.q.lock),
 	.q.lock		=	__SPIN_LOCK_UNLOCKED(noop_qdisc.q.lock),
 	.dev_queue	=	&noop_netdev_queue,
 };
@@ -348,7 +337,6 @@ static struct Qdisc_ops noqueue_qdisc_ops __read_mostly = {
 	.enqueue	=	noop_enqueue,
 	.dequeue	=	noop_dequeue,
 	.peek		=	noop_dequeue,
-	.requeue	=	noop_requeue,
 	.owner		=	THIS_MODULE,
 };
 
@@ -364,7 +352,6 @@ static struct Qdisc noqueue_qdisc = {
 	.flags		=	TCQ_F_BUILTIN,
 	.ops		=	&noqueue_qdisc_ops,
 	.list		=	LIST_HEAD_INIT(noqueue_qdisc.list),
-	.requeue.lock	=	__SPIN_LOCK_UNLOCKED(noqueue_qdisc.q.lock),
 	.q.lock		=	__SPIN_LOCK_UNLOCKED(noqueue_qdisc.q.lock),
 	.dev_queue	=	&noqueue_netdev_queue,
 };
@@ -426,12 +413,6 @@ static struct sk_buff *pfifo_fast_peek(struct Qdisc* qdisc)
 	return NULL;
 }
 
-static int pfifo_fast_requeue(struct sk_buff *skb, struct Qdisc* qdisc)
-{
-	qdisc->q.qlen++;
-	return __qdisc_requeue(skb, qdisc, prio2list(skb, qdisc));
-}
-
 static void pfifo_fast_reset(struct Qdisc* qdisc)
 {
 	int prio;
@@ -473,7 +454,6 @@ static struct Qdisc_ops pfifo_fast_ops __read_mostly = {
 	.enqueue	=	pfifo_fast_enqueue,
 	.dequeue	=	pfifo_fast_dequeue,
 	.peek		=	pfifo_fast_peek,
-	.requeue	=	pfifo_fast_requeue,
 	.init		=	pfifo_fast_init,
 	.reset		=	pfifo_fast_reset,
 	.dump		=	pfifo_fast_dump,
@@ -499,7 +479,6 @@ struct Qdisc *qdisc_alloc(struct netdev_queue *dev_queue,
 	sch->padded = (char *) sch - (char *) p;
 
 	INIT_LIST_HEAD(&sch->list);
-	skb_queue_head_init(&sch->requeue);
 	skb_queue_head_init(&sch->q);
 	sch->ops = ops;
 	sch->enqueue = ops->enqueue;
@@ -571,8 +550,6 @@ void qdisc_destroy(struct Qdisc *qdisc)
 	dev_put(qdisc_dev(qdisc));
 
 	kfree_skb(qdisc->gso_skb);
-	__skb_queue_purge(&qdisc->requeue);
-
 	kfree((char *) qdisc - qdisc->padded);
 }
 EXPORT_SYMBOL(qdisc_destroy);
diff --git a/net/sched/sch_gred.c b/net/sched/sch_gred.c
index cb20ee3b9fc2..40408d595c08 100644
--- a/net/sched/sch_gred.c
+++ b/net/sched/sch_gred.c
@@ -240,26 +240,6 @@ congestion_drop:
 	return NET_XMIT_CN;
 }
 
-static int gred_requeue(struct sk_buff *skb, struct Qdisc* sch)
-{
-	struct gred_sched *t = qdisc_priv(sch);
-	struct gred_sched_data *q;
-	u16 dp = tc_index_to_dp(skb);
-
-	if (dp >= t->DPs || (q = t->tab[dp]) == NULL) {
-		if (net_ratelimit())
-			printk(KERN_WARNING "GRED: Unable to relocate VQ 0x%x "
-			       "for requeue, screwing up backlog.\n",
-			       tc_index_to_dp(skb));
-	} else {
-		if (red_is_idling(&q->parms))
-			red_end_of_idle_period(&q->parms);
-		q->backlog += qdisc_pkt_len(skb);
-	}
-
-	return qdisc_requeue(skb, sch);
-}
-
 static struct sk_buff *gred_dequeue(struct Qdisc* sch)
 {
 	struct sk_buff *skb;
@@ -603,7 +583,6 @@ static struct Qdisc_ops gred_qdisc_ops __read_mostly = {
 	.enqueue	=	gred_enqueue,
 	.dequeue	=	gred_dequeue,
 	.peek		=	qdisc_peek_head,
-	.requeue	=	gred_requeue,
 	.drop		=	gred_drop,
 	.init		=	gred_init,
 	.reset		=	gred_reset,
diff --git a/net/sched/sch_hfsc.c b/net/sched/sch_hfsc.c
index d90b1652f2af..071c4749a12b 100644
--- a/net/sched/sch_hfsc.c
+++ b/net/sched/sch_hfsc.c
@@ -184,7 +184,6 @@ struct hfsc_sched
 	struct rb_root eligible;		/* eligible tree */
 	struct list_head droplist;		/* active leaf class list (for
 						   dropping) */
-	struct sk_buff_head requeue;		/* requeued packet */
 	struct qdisc_watchdog watchdog;		/* watchdog timer */
 };
 
@@ -1432,7 +1431,6 @@ hfsc_init_qdisc(struct Qdisc *sch, struct nlattr *opt)
 		return err;
 	q->eligible = RB_ROOT;
 	INIT_LIST_HEAD(&q->droplist);
-	skb_queue_head_init(&q->requeue);
 
 	q->root.cl_common.classid = sch->handle;
 	q->root.refcnt  = 1;
@@ -1517,7 +1515,6 @@ hfsc_reset_qdisc(struct Qdisc *sch)
 		hlist_for_each_entry(cl, n, &q->clhash.hash[i], cl_common.hnode)
 			hfsc_reset_class(cl);
 	}
-	__skb_queue_purge(&q->requeue);
 	q->eligible = RB_ROOT;
 	INIT_LIST_HEAD(&q->droplist);
 	qdisc_watchdog_cancel(&q->watchdog);
@@ -1542,7 +1539,6 @@ hfsc_destroy_qdisc(struct Qdisc *sch)
 			hfsc_destroy_class(sch, cl);
 	}
 	qdisc_class_hash_destroy(&q->clhash);
-	__skb_queue_purge(&q->requeue);
 	qdisc_watchdog_cancel(&q->watchdog);
 }
 
@@ -1609,8 +1605,6 @@ hfsc_dequeue(struct Qdisc *sch)
 
 	if (sch->q.qlen == 0)
 		return NULL;
-	if ((skb = __skb_dequeue(&q->requeue)))
-		goto out;
 
 	cur_time = psched_get_time();
 
@@ -1659,24 +1653,12 @@ hfsc_dequeue(struct Qdisc *sch)
 		set_passive(cl);
 	}
 
- out:
 	sch->flags &= ~TCQ_F_THROTTLED;
 	sch->q.qlen--;
 
 	return skb;
 }
 
-static int
-hfsc_requeue(struct sk_buff *skb, struct Qdisc *sch)
-{
-	struct hfsc_sched *q = qdisc_priv(sch);
-
-	__skb_queue_head(&q->requeue, skb);
-	sch->q.qlen++;
-	sch->qstats.requeues++;
-	return NET_XMIT_SUCCESS;
-}
-
 static unsigned int
 hfsc_drop(struct Qdisc *sch)
 {
@@ -1728,7 +1710,6 @@ static struct Qdisc_ops hfsc_qdisc_ops __read_mostly = {
 	.enqueue	= hfsc_enqueue,
 	.dequeue	= hfsc_dequeue,
 	.peek		= qdisc_peek_dequeued,
-	.requeue	= hfsc_requeue,
 	.drop		= hfsc_drop,
 	.cl_ops		= &hfsc_class_ops,
 	.priv_size	= sizeof(struct hfsc_sched),
diff --git a/net/sched/sch_htb.c b/net/sched/sch_htb.c
index 3fda8199713d..83f5e69243c1 100644
--- a/net/sched/sch_htb.c
+++ b/net/sched/sch_htb.c
@@ -551,7 +551,7 @@ static inline void htb_deactivate(struct htb_sched *q, struct htb_class *cl)
 
 static int htb_enqueue(struct sk_buff *skb, struct Qdisc *sch)
 {
-	int ret;
+	int uninitialized_var(ret);
 	struct htb_sched *q = qdisc_priv(sch);
 	struct htb_class *cl = htb_classify(skb, sch, &ret);
 
@@ -591,47 +591,6 @@ static int htb_enqueue(struct sk_buff *skb, struct Qdisc *sch)
 	return NET_XMIT_SUCCESS;
 }
 
-/* TODO: requeuing packet charges it to policers again !! */
-static int htb_requeue(struct sk_buff *skb, struct Qdisc *sch)
-{
-	int ret;
-	struct htb_sched *q = qdisc_priv(sch);
-	struct htb_class *cl = htb_classify(skb, sch, &ret);
-	struct sk_buff *tskb;
-
-	if (cl == HTB_DIRECT) {
-		/* enqueue to helper queue */
-		if (q->direct_queue.qlen < q->direct_qlen) {
-			__skb_queue_head(&q->direct_queue, skb);
-		} else {
-			__skb_queue_head(&q->direct_queue, skb);
-			tskb = __skb_dequeue_tail(&q->direct_queue);
-			kfree_skb(tskb);
-			sch->qstats.drops++;
-			return NET_XMIT_CN;
-		}
-#ifdef CONFIG_NET_CLS_ACT
-	} else if (!cl) {
-		if (ret & __NET_XMIT_BYPASS)
-			sch->qstats.drops++;
-		kfree_skb(skb);
-		return ret;
-#endif
-	} else if ((ret = cl->un.leaf.q->ops->requeue(skb, cl->un.leaf.q)) !=
-		   NET_XMIT_SUCCESS) {
-		if (net_xmit_drop_count(ret)) {
-			sch->qstats.drops++;
-			cl->qstats.drops++;
-		}
-		return ret;
-	} else
-		htb_activate(q, cl);
-
-	sch->q.qlen++;
-	sch->qstats.requeues++;
-	return NET_XMIT_SUCCESS;
-}
-
 /**
  * htb_charge_class - charges amount "bytes" to leaf and ancestors
  *
@@ -1566,7 +1525,6 @@ static struct Qdisc_ops htb_qdisc_ops __read_mostly = {
 	.enqueue	=	htb_enqueue,
 	.dequeue	=	htb_dequeue,
 	.peek		=	qdisc_peek_dequeued,
-	.requeue	=	htb_requeue,
 	.drop		=	htb_drop,
 	.init		=	htb_init,
 	.reset		=	htb_reset,
diff --git a/net/sched/sch_multiq.c b/net/sched/sch_multiq.c
index 155648d23b7c..f645ac55a1a1 100644
--- a/net/sched/sch_multiq.c
+++ b/net/sched/sch_multiq.c
@@ -92,40 +92,6 @@ multiq_enqueue(struct sk_buff *skb, struct Qdisc *sch)
 	return ret;
 }
 
-
-static int
-multiq_requeue(struct sk_buff *skb, struct Qdisc *sch)
-{
-	struct Qdisc *qdisc;
-	struct multiq_sched_data *q = qdisc_priv(sch);
-	int ret;
-
-	qdisc = multiq_classify(skb, sch, &ret);
-#ifdef CONFIG_NET_CLS_ACT
-	if (qdisc == NULL) {
-		if (ret & __NET_XMIT_BYPASS)
-			sch->qstats.drops++;
-		kfree_skb(skb);
-		return ret;
-	}
-#endif
-
-	ret = qdisc->ops->requeue(skb, qdisc);
-	if (ret == NET_XMIT_SUCCESS) {
-		sch->q.qlen++;
-		sch->qstats.requeues++;
-		if (q->curband)
-			q->curband--;
-		else
-			q->curband = q->bands - 1;
-		return NET_XMIT_SUCCESS;
-	}
-	if (net_xmit_drop_count(ret))
-		sch->qstats.drops++;
-	return ret;
-}
-
-
 static struct sk_buff *multiq_dequeue(struct Qdisc *sch)
 {
 	struct multiq_sched_data *q = qdisc_priv(sch);
@@ -140,7 +106,7 @@ static struct sk_buff *multiq_dequeue(struct Qdisc *sch)
 			q->curband = 0;
 
 		/* Check that target subqueue is available before
-		 * pulling an skb to avoid excessive requeues
+		 * pulling an skb to avoid head-of-line blocking.
 		 */
 		if (!__netif_subqueue_stopped(qdisc_dev(sch), q->curband)) {
 			qdisc = q->queues[q->curband];
@@ -170,7 +136,7 @@ static struct sk_buff *multiq_peek(struct Qdisc *sch)
 			curband = 0;
 
 		/* Check that target subqueue is available before
-		 * pulling an skb to avoid excessive requeues
+		 * pulling an skb to avoid head-of-line blocking.
 		 */
 		if (!__netif_subqueue_stopped(qdisc_dev(sch), curband)) {
 			qdisc = q->queues[curband];
@@ -480,7 +446,6 @@ static struct Qdisc_ops multiq_qdisc_ops __read_mostly = {
 	.enqueue	=	multiq_enqueue,
 	.dequeue	=	multiq_dequeue,
 	.peek		=	multiq_peek,
-	.requeue	=	multiq_requeue,
 	.drop		=	multiq_drop,
 	.init		=	multiq_init,
 	.reset		=	multiq_reset,
diff --git a/net/sched/sch_netem.c b/net/sched/sch_netem.c
index f69698ff88d9..3cbc3ff7b5bc 100644
--- a/net/sched/sch_netem.c
+++ b/net/sched/sch_netem.c
@@ -252,20 +252,6 @@ static int netem_enqueue(struct sk_buff *skb, struct Qdisc *sch)
 	return ret;
 }
 
-/* Requeue packets but don't change time stamp */
-static int netem_requeue(struct sk_buff *skb, struct Qdisc *sch)
-{
-	struct netem_sched_data *q = qdisc_priv(sch);
-	int ret;
-
-	if ((ret = q->qdisc->ops->requeue(skb, q->qdisc)) == 0) {
-		sch->q.qlen++;
-		sch->qstats.requeues++;
-	}
-
-	return ret;
-}
-
 static unsigned int netem_drop(struct Qdisc* sch)
 {
 	struct netem_sched_data *q = qdisc_priv(sch);
@@ -531,7 +517,6 @@ static struct Qdisc_ops tfifo_qdisc_ops __read_mostly = {
 	.enqueue	=	tfifo_enqueue,
 	.dequeue	=	qdisc_dequeue_head,
 	.peek		=	qdisc_peek_head,
-	.requeue	=	qdisc_requeue,
 	.drop		=	qdisc_queue_drop,
 	.init		=	tfifo_init,
 	.reset		=	qdisc_reset_queue,
@@ -620,7 +605,6 @@ static struct Qdisc_ops netem_qdisc_ops __read_mostly = {
 	.enqueue	=	netem_enqueue,
 	.dequeue	=	netem_dequeue,
 	.peek		=	qdisc_peek_dequeued,
-	.requeue	=	netem_requeue,
 	.drop		=	netem_drop,
 	.init		=	netem_init,
 	.reset		=	netem_reset,
diff --git a/net/sched/sch_prio.c b/net/sched/sch_prio.c
index 3651da3e2802..ea65a87ec22c 100644
--- a/net/sched/sch_prio.c
+++ b/net/sched/sch_prio.c
@@ -93,33 +93,6 @@ prio_enqueue(struct sk_buff *skb, struct Qdisc *sch)
 	return ret;
 }
 
-
-static int
-prio_requeue(struct sk_buff *skb, struct Qdisc* sch)
-{
-	struct Qdisc *qdisc;
-	int ret;
-
-	qdisc = prio_classify(skb, sch, &ret);
-#ifdef CONFIG_NET_CLS_ACT
-	if (qdisc == NULL) {
-		if (ret & __NET_XMIT_BYPASS)
-			sch->qstats.drops++;
-		kfree_skb(skb);
-		return ret;
-	}
-#endif
-
-	if ((ret = qdisc->ops->requeue(skb, qdisc)) == NET_XMIT_SUCCESS) {
-		sch->q.qlen++;
-		sch->qstats.requeues++;
-		return NET_XMIT_SUCCESS;
-	}
-	if (net_xmit_drop_count(ret))
-		sch->qstats.drops++;
-	return ret;
-}
-
 static struct sk_buff *prio_peek(struct Qdisc *sch)
 {
 	struct prio_sched_data *q = qdisc_priv(sch);
@@ -435,7 +408,6 @@ static struct Qdisc_ops prio_qdisc_ops __read_mostly = {
 	.enqueue	=	prio_enqueue,
 	.dequeue	=	prio_dequeue,
 	.peek		=	prio_peek,
-	.requeue	=	prio_requeue,
 	.drop		=	prio_drop,
 	.init		=	prio_init,
 	.reset		=	prio_reset,
diff --git a/net/sched/sch_red.c b/net/sched/sch_red.c
index 7abc51454c2d..6a0371c22643 100644
--- a/net/sched/sch_red.c
+++ b/net/sched/sch_red.c
@@ -108,23 +108,6 @@ congestion_drop:
 	return NET_XMIT_CN;
 }
 
-static int red_requeue(struct sk_buff *skb, struct Qdisc* sch)
-{
-	struct red_sched_data *q = qdisc_priv(sch);
-	struct Qdisc *child = q->qdisc;
-	int ret;
-
-	if (red_is_idling(&q->parms))
-		red_end_of_idle_period(&q->parms);
-
-	ret = child->ops->requeue(skb, child);
-	if (likely(ret == NET_XMIT_SUCCESS)) {
-		sch->qstats.requeues++;
-		sch->q.qlen++;
-	}
-	return ret;
-}
-
 static struct sk_buff * red_dequeue(struct Qdisc* sch)
 {
 	struct sk_buff *skb;
@@ -370,7 +353,6 @@ static struct Qdisc_ops red_qdisc_ops __read_mostly = {
 	.enqueue	=	red_enqueue,
 	.dequeue	=	red_dequeue,
 	.peek		=	red_peek,
-	.requeue	=	red_requeue,
 	.drop		=	red_drop,
 	.init		=	red_init,
 	.reset		=	red_reset,
diff --git a/net/sched/sch_sfq.c b/net/sched/sch_sfq.c
index 198b83d42ba8..ab8cfee3c9ce 100644
--- a/net/sched/sch_sfq.c
+++ b/net/sched/sch_sfq.c
@@ -329,68 +329,6 @@ sfq_enqueue(struct sk_buff *skb, struct Qdisc *sch)
 	return NET_XMIT_CN;
 }
 
-static int
-sfq_requeue(struct sk_buff *skb, struct Qdisc *sch)
-{
-	struct sfq_sched_data *q = qdisc_priv(sch);
-	unsigned int hash;
-	sfq_index x;
-	int ret;
-
-	hash = sfq_classify(skb, sch, &ret);
-	if (hash == 0) {
-		if (ret & __NET_XMIT_BYPASS)
-			sch->qstats.drops++;
-		kfree_skb(skb);
-		return ret;
-	}
-	hash--;
-
-	x = q->ht[hash];
-	if (x == SFQ_DEPTH) {
-		q->ht[hash] = x = q->dep[SFQ_DEPTH].next;
-		q->hash[x] = hash;
-	}
-
-	sch->qstats.backlog += qdisc_pkt_len(skb);
-	__skb_queue_head(&q->qs[x], skb);
-	/* If selected queue has length q->limit+1, this means that
-	 * all another queues are empty and we do simple tail drop.
-	 * This packet is still requeued at head of queue, tail packet
-	 * is dropped.
-	 */
-	if (q->qs[x].qlen > q->limit) {
-		skb = q->qs[x].prev;
-		__skb_unlink(skb, &q->qs[x]);
-		sch->qstats.drops++;
-		sch->qstats.backlog -= qdisc_pkt_len(skb);
-		kfree_skb(skb);
-		return NET_XMIT_CN;
-	}
-
-	sfq_inc(q, x);
-	if (q->qs[x].qlen == 1) {		/* The flow is new */
-		if (q->tail == SFQ_DEPTH) {	/* It is the first flow */
-			q->tail = x;
-			q->next[x] = x;
-			q->allot[x] = q->quantum;
-		} else {
-			q->next[x] = q->next[q->tail];
-			q->next[q->tail] = x;
-			q->tail = x;
-		}
-	}
-
-	if (++sch->q.qlen <= q->limit) {
-		sch->qstats.requeues++;
-		return 0;
-	}
-
-	sch->qstats.drops++;
-	sfq_drop(sch);
-	return NET_XMIT_CN;
-}
-
 static struct sk_buff *
 sfq_peek(struct Qdisc *sch)
 {
@@ -636,7 +574,6 @@ static struct Qdisc_ops sfq_qdisc_ops __read_mostly = {
 	.enqueue	=	sfq_enqueue,
 	.dequeue	=	sfq_dequeue,
 	.peek		=	sfq_peek,
-	.requeue	=	sfq_requeue,
 	.drop		=	sfq_drop,
 	.init		=	sfq_init,
 	.reset		=	sfq_reset,
diff --git a/net/sched/sch_tbf.c b/net/sched/sch_tbf.c
index 435076cf620e..bb7783d584bb 100644
--- a/net/sched/sch_tbf.c
+++ b/net/sched/sch_tbf.c
@@ -139,19 +139,6 @@ static int tbf_enqueue(struct sk_buff *skb, struct Qdisc* sch)
 	return 0;
 }
 
-static int tbf_requeue(struct sk_buff *skb, struct Qdisc* sch)
-{
-	struct tbf_sched_data *q = qdisc_priv(sch);
-	int ret;
-
-	if ((ret = q->qdisc->ops->requeue(skb, q->qdisc)) == 0) {
-		sch->q.qlen++;
-		sch->qstats.requeues++;
-	}
-
-	return ret;
-}
-
 static unsigned int tbf_drop(struct Qdisc* sch)
 {
 	struct tbf_sched_data *q = qdisc_priv(sch);
@@ -468,7 +455,6 @@ static struct Qdisc_ops tbf_qdisc_ops __read_mostly = {
 	.enqueue	=	tbf_enqueue,
 	.dequeue	=	tbf_dequeue,
 	.peek		=	qdisc_peek_dequeued,
-	.requeue	=	tbf_requeue,
 	.drop		=	tbf_drop,
 	.init		=	tbf_init,
 	.reset		=	tbf_reset,
diff --git a/net/sched/sch_teql.c b/net/sched/sch_teql.c
index bf03e7fa1849..cfc8e7caba62 100644
--- a/net/sched/sch_teql.c
+++ b/net/sched/sch_teql.c
@@ -93,16 +93,6 @@ teql_enqueue(struct sk_buff *skb, struct Qdisc* sch)
 	return NET_XMIT_DROP;
 }
 
-static int
-teql_requeue(struct sk_buff *skb, struct Qdisc* sch)
-{
-	struct teql_sched_data *q = qdisc_priv(sch);
-
-	__skb_queue_head(&q->q, skb);
-	sch->qstats.requeues++;
-	return 0;
-}
-
 static struct sk_buff *
 teql_dequeue(struct Qdisc* sch)
 {
@@ -441,7 +431,6 @@ static __init void teql_master_setup(struct net_device *dev)
 	ops->enqueue	=	teql_enqueue;
 	ops->dequeue	=	teql_dequeue;
 	ops->peek	=	teql_peek;
-	ops->requeue	=	teql_requeue;
 	ops->init	=	teql_qdisc_init;
 	ops->reset	=	teql_reset;
 	ops->destroy	=	teql_destroy;
-- 
cgit v1.2.3


From 31e889098a80ceb3e9e3c555d522b2686a6663c6 Mon Sep 17 00:00:00 2001
From: Steven Rostedt <srostedt@redhat.com>
Date: Fri, 14 Nov 2008 16:21:19 -0800
Subject: ftrace: pass module struct to arch dynamic ftrace functions

Impact: allow archs more flexibility on dynamic ftrace implementations

Dynamic ftrace has largly been developed on x86. Since x86 does not
have the same limitations as other architectures, the ftrace interaction
between the generic code and the architecture specific code was not
flexible enough to handle some of the issues that other architectures
have.

Most notably, module trampolines. Due to the limited branch distance
that archs make in calling kernel core code from modules, the module
load code must create a trampoline to jump to what will make the
larger jump into core kernel code.

The problem arises when this happens to a call to mcount. Ftrace checks
all code before modifying it and makes sure the current code is what
it expects. Right now, there is not enough information to handle modifying
module trampolines.

This patch changes the API between generic dynamic ftrace code and
the arch dependent code. There is now two functions for modifying code:

  ftrace_make_nop(mod, rec, addr) - convert the code at rec->ip into
       a nop, where the original text is calling addr. (mod is the
       module struct if called by module init)

  ftrace_make_caller(rec, addr) - convert the code rec->ip that should
       be a nop into a caller to addr.

The record "rec" now has a new field called "arch" where the architecture
can add any special attributes to each call site record.

Signed-off-by: Steven Rostedt <srostedt@redhat.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/include/asm/ftrace.h |  8 ++++++
 arch/x86/kernel/ftrace.c      | 29 +++++++++++++++++---
 include/linux/ftrace.h        | 53 +++++++++++++++++++++++++++---------
 kernel/module.c               |  2 +-
 kernel/trace/ftrace.c         | 62 +++++++++++++++++--------------------------
 5 files changed, 100 insertions(+), 54 deletions(-)

(limited to 'include')

diff --git a/arch/x86/include/asm/ftrace.h b/arch/x86/include/asm/ftrace.h
index 9b6a1fa19e70..2bb43b433e07 100644
--- a/arch/x86/include/asm/ftrace.h
+++ b/arch/x86/include/asm/ftrace.h
@@ -17,6 +17,14 @@ static inline unsigned long ftrace_call_adjust(unsigned long addr)
 	 */
 	return addr - 1;
 }
+
+#ifdef CONFIG_DYNAMIC_FTRACE
+
+struct dyn_arch_ftrace {
+	/* No extra data needed for x86 */
+};
+
+#endif /*  CONFIG_DYNAMIC_FTRACE */
 #endif /* __ASSEMBLY__ */
 #endif /* CONFIG_FUNCTION_TRACER */
 
diff --git a/arch/x86/kernel/ftrace.c b/arch/x86/kernel/ftrace.c
index fe832738e1e2..762222ad1387 100644
--- a/arch/x86/kernel/ftrace.c
+++ b/arch/x86/kernel/ftrace.c
@@ -166,7 +166,7 @@ static int ftrace_calc_offset(long ip, long addr)
 	return (int)(addr - ip);
 }
 
-unsigned char *ftrace_call_replace(unsigned long ip, unsigned long addr)
+static unsigned char *ftrace_call_replace(unsigned long ip, unsigned long addr)
 {
 	static union ftrace_code_union calc;
 
@@ -311,12 +311,12 @@ do_ftrace_mod_code(unsigned long ip, void *new_code)
 
 static unsigned char ftrace_nop[MCOUNT_INSN_SIZE];
 
-unsigned char *ftrace_nop_replace(void)
+static unsigned char *ftrace_nop_replace(void)
 {
 	return ftrace_nop;
 }
 
-int
+static int
 ftrace_modify_code(unsigned long ip, unsigned char *old_code,
 		   unsigned char *new_code)
 {
@@ -349,6 +349,29 @@ ftrace_modify_code(unsigned long ip, unsigned char *old_code,
 	return 0;
 }
 
+int ftrace_make_nop(struct module *mod,
+		    struct dyn_ftrace *rec, unsigned long addr)
+{
+	unsigned char *new, *old;
+	unsigned long ip = rec->ip;
+
+	old = ftrace_call_replace(ip, addr);
+	new = ftrace_nop_replace();
+
+	return ftrace_modify_code(rec->ip, old, new);
+}
+
+int ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr)
+{
+	unsigned char *new, *old;
+	unsigned long ip = rec->ip;
+
+	old = ftrace_nop_replace();
+	new = ftrace_call_replace(ip, addr);
+
+	return ftrace_modify_code(rec->ip, old, new);
+}
+
 int ftrace_update_ftrace_func(ftrace_func_t func)
 {
 	unsigned long ip = (unsigned long)(&ftrace_call);
diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h
index 4fbc4a8b86a5..166a2070ef65 100644
--- a/include/linux/ftrace.h
+++ b/include/linux/ftrace.h
@@ -74,6 +74,9 @@ static inline void ftrace_start(void) { }
 #endif /* CONFIG_FUNCTION_TRACER */
 
 #ifdef CONFIG_DYNAMIC_FTRACE
+/* asm/ftrace.h must be defined for archs supporting dynamic ftrace */
+#include <asm/ftrace.h>
+
 enum {
 	FTRACE_FL_FREE		= (1 << 0),
 	FTRACE_FL_FAILED	= (1 << 1),
@@ -88,6 +91,7 @@ struct dyn_ftrace {
 	struct list_head	list;
 	unsigned long		ip; /* address of mcount call-site */
 	unsigned long		flags;
+	struct dyn_arch_ftrace	arch;
 };
 
 int ftrace_force_update(void);
@@ -95,22 +99,40 @@ void ftrace_set_filter(unsigned char *buf, int len, int reset);
 
 /* defined in arch */
 extern int ftrace_ip_converted(unsigned long ip);
-extern unsigned char *ftrace_nop_replace(void);
-extern unsigned char *ftrace_call_replace(unsigned long ip, unsigned long addr);
 extern int ftrace_dyn_arch_init(void *data);
 extern int ftrace_update_ftrace_func(ftrace_func_t func);
 extern void ftrace_caller(void);
 extern void ftrace_call(void);
 extern void mcount_call(void);
 
-/* May be defined in arch */
-extern int ftrace_arch_read_dyn_info(char *buf, int size);
+/**
+ * ftrace_make_nop - convert code into top
+ * @mod: module structure if called by module load initialization
+ * @rec: the mcount call site record
+ * @addr: the address that the call site should be calling
+ *
+ * This is a very sensitive operation and great care needs
+ * to be taken by the arch.  The operation should carefully
+ * read the location, check to see if what is read is indeed
+ * what we expect it to be, and then on success of the compare,
+ * it should write to the location.
+ *
+ * The code segment at @rec->ip should be a caller to @addr
+ *
+ * Return must be:
+ *  0 on success
+ *  -EFAULT on error reading the location
+ *  -EINVAL on a failed compare of the contents
+ *  -EPERM  on error writing to the location
+ * Any other value will be considered a failure.
+ */
+extern int ftrace_make_nop(struct module *mod,
+			   struct dyn_ftrace *rec, unsigned long addr);
 
 /**
- * ftrace_modify_code - modify code segment
- * @ip: the address of the code segment
- * @old_code: the contents of what is expected to be there
- * @new_code: the code to patch in
+ * ftrace_make_call - convert a nop call site into a call to addr
+ * @rec: the mcount call site record
+ * @addr: the address that the call site should call
  *
  * This is a very sensitive operation and great care needs
  * to be taken by the arch.  The operation should carefully
@@ -118,6 +140,8 @@ extern int ftrace_arch_read_dyn_info(char *buf, int size);
  * what we expect it to be, and then on success of the compare,
  * it should write to the location.
  *
+ * The code segment at @rec->ip should be a nop
+ *
  * Return must be:
  *  0 on success
  *  -EFAULT on error reading the location
@@ -125,8 +149,11 @@ extern int ftrace_arch_read_dyn_info(char *buf, int size);
  *  -EPERM  on error writing to the location
  * Any other value will be considered a failure.
  */
-extern int ftrace_modify_code(unsigned long ip, unsigned char *old_code,
-			      unsigned char *new_code);
+extern int ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr);
+
+
+/* May be defined in arch */
+extern int ftrace_arch_read_dyn_info(char *buf, int size);
 
 extern int skip_trace(unsigned long ip);
 
@@ -259,11 +286,13 @@ static inline void ftrace_dump(void) { }
 
 #ifdef CONFIG_FTRACE_MCOUNT_RECORD
 extern void ftrace_init(void);
-extern void ftrace_init_module(unsigned long *start, unsigned long *end);
+extern void ftrace_init_module(struct module *mod,
+			       unsigned long *start, unsigned long *end);
 #else
 static inline void ftrace_init(void) { }
 static inline void
-ftrace_init_module(unsigned long *start, unsigned long *end) { }
+ftrace_init_module(struct module *mod,
+		   unsigned long *start, unsigned long *end) { }
 #endif
 
 
diff --git a/kernel/module.c b/kernel/module.c
index 1f4cc00e0c20..69791274e899 100644
--- a/kernel/module.c
+++ b/kernel/module.c
@@ -2201,7 +2201,7 @@ static noinline struct module *load_module(void __user *umod,
 	/* sechdrs[0].sh_size is always zero */
 	mseg = section_objs(hdr, sechdrs, secstrings, "__mcount_loc",
 			    sizeof(*mseg), &num_mcount);
-	ftrace_init_module(mseg, mseg + num_mcount);
+	ftrace_init_module(mod, mseg, mseg + num_mcount);
 
 	err = module_finalize(hdr, sechdrs, mod);
 	if (err < 0)
diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
index 3940c71ac2a2..e9a5fbfce08e 100644
--- a/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c
@@ -358,9 +358,7 @@ static void print_ip_ins(const char *fmt, unsigned char *p)
 		printk(KERN_CONT "%s%02x", i ? ":" : "", p[i]);
 }
 
-static void ftrace_bug(int failed, unsigned long ip,
-		       unsigned char *expected,
-		       unsigned char *replace)
+static void ftrace_bug(int failed, unsigned long ip)
 {
 	switch (failed) {
 	case -EFAULT:
@@ -372,9 +370,7 @@ static void ftrace_bug(int failed, unsigned long ip,
 		FTRACE_WARN_ON_ONCE(1);
 		pr_info("ftrace failed to modify ");
 		print_ip_sym(ip);
-		print_ip_ins(" expected: ", expected);
 		print_ip_ins(" actual: ", (unsigned char *)ip);
-		print_ip_ins(" replace: ", replace);
 		printk(KERN_CONT "\n");
 		break;
 	case -EPERM:
@@ -392,8 +388,7 @@ static void ftrace_bug(int failed, unsigned long ip,
 #define FTRACE_ADDR ((long)(ftrace_caller))
 
 static int
-__ftrace_replace_code(struct dyn_ftrace *rec,
-		      unsigned char *old, unsigned char *new, int enable)
+__ftrace_replace_code(struct dyn_ftrace *rec, int enable)
 {
 	unsigned long ip, fl;
 
@@ -435,12 +430,10 @@ __ftrace_replace_code(struct dyn_ftrace *rec,
 		 * otherwise enable it!
 		 */
 		if (fl & FTRACE_FL_ENABLED) {
-			/* swap new and old */
-			new = old;
-			old = ftrace_call_replace(ip, FTRACE_ADDR);
+			enable = 0;
 			rec->flags &= ~FTRACE_FL_ENABLED;
 		} else {
-			new = ftrace_call_replace(ip, FTRACE_ADDR);
+			enable = 1;
 			rec->flags |= FTRACE_FL_ENABLED;
 		}
 	} else {
@@ -453,10 +446,7 @@ __ftrace_replace_code(struct dyn_ftrace *rec,
 			fl = rec->flags & (FTRACE_FL_NOTRACE | FTRACE_FL_ENABLED);
 			if (fl == FTRACE_FL_NOTRACE)
 				return 0;
-
-			new = ftrace_call_replace(ip, FTRACE_ADDR);
-		} else
-			old = ftrace_call_replace(ip, FTRACE_ADDR);
+		}
 
 		if (enable) {
 			if (rec->flags & FTRACE_FL_ENABLED)
@@ -469,21 +459,18 @@ __ftrace_replace_code(struct dyn_ftrace *rec,
 		}
 	}
 
-	return ftrace_modify_code(ip, old, new);
+	if (enable)
+		return ftrace_make_call(rec, FTRACE_ADDR);
+	else
+		return ftrace_make_nop(NULL, rec, FTRACE_ADDR);
 }
 
 static void ftrace_replace_code(int enable)
 {
 	int i, failed;
-	unsigned char *new = NULL, *old = NULL;
 	struct dyn_ftrace *rec;
 	struct ftrace_page *pg;
 
-	if (enable)
-		old = ftrace_nop_replace();
-	else
-		new = ftrace_nop_replace();
-
 	for (pg = ftrace_pages_start; pg; pg = pg->next) {
 		for (i = 0; i < pg->index; i++) {
 			rec = &pg->records[i];
@@ -504,34 +491,30 @@ static void ftrace_replace_code(int enable)
 				unfreeze_record(rec);
 			}
 
-			failed = __ftrace_replace_code(rec, old, new, enable);
+			failed = __ftrace_replace_code(rec, enable);
 			if (failed && (rec->flags & FTRACE_FL_CONVERTED)) {
 				rec->flags |= FTRACE_FL_FAILED;
 				if ((system_state == SYSTEM_BOOTING) ||
 				    !core_kernel_text(rec->ip)) {
 					ftrace_free_rec(rec);
 				} else
-					ftrace_bug(failed, rec->ip, old, new);
+					ftrace_bug(failed, rec->ip);
 			}
 		}
 	}
 }
 
 static int
-ftrace_code_disable(struct dyn_ftrace *rec)
+ftrace_code_disable(struct module *mod, struct dyn_ftrace *rec)
 {
 	unsigned long ip;
-	unsigned char *nop, *call;
 	int ret;
 
 	ip = rec->ip;
 
-	nop = ftrace_nop_replace();
-	call = ftrace_call_replace(ip, mcount_addr);
-
-	ret = ftrace_modify_code(ip, call, nop);
+	ret = ftrace_make_nop(mod, rec, mcount_addr);
 	if (ret) {
-		ftrace_bug(ret, ip, call, nop);
+		ftrace_bug(ret, ip);
 		rec->flags |= FTRACE_FL_FAILED;
 		return 0;
 	}
@@ -650,7 +633,7 @@ static cycle_t		ftrace_update_time;
 static unsigned long	ftrace_update_cnt;
 unsigned long		ftrace_update_tot_cnt;
 
-static int ftrace_update_code(void)
+static int ftrace_update_code(struct module *mod)
 {
 	struct dyn_ftrace *p, *t;
 	cycle_t start, stop;
@@ -667,7 +650,7 @@ static int ftrace_update_code(void)
 		list_del_init(&p->list);
 
 		/* convert record (i.e, patch mcount-call with NOP) */
-		if (ftrace_code_disable(p)) {
+		if (ftrace_code_disable(mod, p)) {
 			p->flags |= FTRACE_FL_CONVERTED;
 			ftrace_update_cnt++;
 		} else
@@ -1309,7 +1292,8 @@ static __init int ftrace_init_debugfs(void)
 
 fs_initcall(ftrace_init_debugfs);
 
-static int ftrace_convert_nops(unsigned long *start,
+static int ftrace_convert_nops(struct module *mod,
+			       unsigned long *start,
 			       unsigned long *end)
 {
 	unsigned long *p;
@@ -1325,18 +1309,19 @@ static int ftrace_convert_nops(unsigned long *start,
 
 	/* disable interrupts to prevent kstop machine */
 	local_irq_save(flags);
-	ftrace_update_code();
+	ftrace_update_code(mod);
 	local_irq_restore(flags);
 	mutex_unlock(&ftrace_start_lock);
 
 	return 0;
 }
 
-void ftrace_init_module(unsigned long *start, unsigned long *end)
+void ftrace_init_module(struct module *mod,
+			unsigned long *start, unsigned long *end)
 {
 	if (ftrace_disabled || start == end)
 		return;
-	ftrace_convert_nops(start, end);
+	ftrace_convert_nops(mod, start, end);
 }
 
 extern unsigned long __start_mcount_loc[];
@@ -1366,7 +1351,8 @@ void __init ftrace_init(void)
 
 	last_ftrace_enabled = ftrace_enabled = 1;
 
-	ret = ftrace_convert_nops(__start_mcount_loc,
+	ret = ftrace_convert_nops(NULL,
+				  __start_mcount_loc,
 				  __stop_mcount_loc);
 
 	return;
-- 
cgit v1.2.3


From e7d3737ea1b102030f44e96c97754101e41515f0 Mon Sep 17 00:00:00 2001
From: Frederic Weisbecker <fweisbec@gmail.com>
Date: Sun, 16 Nov 2008 06:02:06 +0100
Subject: tracing/function-return-tracer: support for dynamic ftrace on
 function return tracer

This patch adds the support for dynamic tracing on the function return tracer.
The whole difference with normal dynamic function tracing is that we don't need
to hook on a particular callback. The only pro that we want is to nop or set
dynamically the calls to ftrace_caller (which is ftrace_return_caller here).

Some security checks ensure that we are not trying to launch dynamic tracing for
return tracing while normal function tracing is already running.

An example of trace with getnstimeofday set as a filter:

ktime_get_ts+0x22/0x50 -> getnstimeofday (2283 ns)
ktime_get_ts+0x22/0x50 -> getnstimeofday (1396 ns)
ktime_get_ts+0x22/0x50 -> getnstimeofday (1382 ns)
ktime_get_ts+0x22/0x50 -> getnstimeofday (1825 ns)
ktime_get_ts+0x22/0x50 -> getnstimeofday (1426 ns)
ktime_get_ts+0x22/0x50 -> getnstimeofday (1464 ns)
ktime_get_ts+0x22/0x50 -> getnstimeofday (1524 ns)
ktime_get_ts+0x22/0x50 -> getnstimeofday (1382 ns)
ktime_get_ts+0x22/0x50 -> getnstimeofday (1382 ns)
ktime_get_ts+0x22/0x50 -> getnstimeofday (1434 ns)
ktime_get_ts+0x22/0x50 -> getnstimeofday (1464 ns)
ktime_get_ts+0x22/0x50 -> getnstimeofday (1502 ns)
ktime_get_ts+0x22/0x50 -> getnstimeofday (1404 ns)
ktime_get_ts+0x22/0x50 -> getnstimeofday (1397 ns)
ktime_get_ts+0x22/0x50 -> getnstimeofday (1051 ns)
ktime_get_ts+0x22/0x50 -> getnstimeofday (1314 ns)
ktime_get_ts+0x22/0x50 -> getnstimeofday (1344 ns)
ktime_get_ts+0x22/0x50 -> getnstimeofday (1163 ns)
ktime_get_ts+0x22/0x50 -> getnstimeofday (1390 ns)
ktime_get_ts+0x22/0x50 -> getnstimeofday (1374 ns)

Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/entry_32.S            |  18 ++-
 arch/x86/kernel/ftrace.c              | 258 +++++++++++++++++-----------------
 include/linux/ftrace.h                |  16 ++-
 kernel/trace/Kconfig                  |   1 -
 kernel/trace/ftrace.c                 |  58 +++++++-
 kernel/trace/trace_functions_return.c |  15 +-
 6 files changed, 211 insertions(+), 155 deletions(-)

(limited to 'include')

diff --git a/arch/x86/kernel/entry_32.S b/arch/x86/kernel/entry_32.S
index f97621149839..74defe21ba42 100644
--- a/arch/x86/kernel/entry_32.S
+++ b/arch/x86/kernel/entry_32.S
@@ -1190,7 +1190,7 @@ ENTRY(mcount)
 	jnz trace
 #ifdef CONFIG_FUNCTION_RET_TRACER
 	cmpl $ftrace_stub, ftrace_function_return
-	jnz trace_return
+	jnz ftrace_return_caller
 #endif
 .globl ftrace_stub
 ftrace_stub:
@@ -1211,9 +1211,15 @@ trace:
 	popl %ecx
 	popl %eax
 	jmp ftrace_stub
+END(mcount)
+#endif /* CONFIG_DYNAMIC_FTRACE */
+#endif /* CONFIG_FUNCTION_TRACER */
 
 #ifdef CONFIG_FUNCTION_RET_TRACER
-trace_return:
+ENTRY(ftrace_return_caller)
+	cmpl $0, function_trace_stop
+	jne ftrace_stub
+
 	pushl %eax
 	pushl %ecx
 	pushl %edx
@@ -1223,7 +1229,8 @@ trace_return:
 	popl %edx
 	popl %ecx
 	popl %eax
-	jmp ftrace_stub
+	ret
+END(ftrace_return_caller)
 
 .globl return_to_handler
 return_to_handler:
@@ -1237,10 +1244,7 @@ return_to_handler:
 	popl %ecx
 	popl %eax
 	ret
-#endif /* CONFIG_FUNCTION_RET_TRACER */
-END(mcount)
-#endif /* CONFIG_DYNAMIC_FTRACE */
-#endif /* CONFIG_FUNCTION_TRACER */
+#endif
 
 .section .rodata,"a"
 #include "syscall_table_32.S"
diff --git a/arch/x86/kernel/ftrace.c b/arch/x86/kernel/ftrace.c
index d98b5a8ecf4c..924153edd973 100644
--- a/arch/x86/kernel/ftrace.c
+++ b/arch/x86/kernel/ftrace.c
@@ -24,134 +24,6 @@
 #include <asm/nmi.h>
 
 
-
-#ifdef CONFIG_FUNCTION_RET_TRACER
-
-/*
- * These functions are picked from those used on
- * this page for dynamic ftrace. They have been
- * simplified to ignore all traces in NMI context.
- */
-static atomic_t in_nmi;
-
-void ftrace_nmi_enter(void)
-{
-	atomic_inc(&in_nmi);
-}
-
-void ftrace_nmi_exit(void)
-{
-	atomic_dec(&in_nmi);
-}
-
-/* Add a function return address to the trace stack on thread info.*/
-static int push_return_trace(unsigned long ret, unsigned long long time,
-				unsigned long func)
-{
-	int index;
-	struct thread_info *ti = current_thread_info();
-
-	/* The return trace stack is full */
-	if (ti->curr_ret_stack == FTRACE_RET_STACK_SIZE - 1)
-		return -EBUSY;
-
-	index = ++ti->curr_ret_stack;
-	barrier();
-	ti->ret_stack[index].ret = ret;
-	ti->ret_stack[index].func = func;
-	ti->ret_stack[index].calltime = time;
-
-	return 0;
-}
-
-/* Retrieve a function return address to the trace stack on thread info.*/
-static void pop_return_trace(unsigned long *ret, unsigned long long *time,
-				unsigned long *func)
-{
-	int index;
-
-	struct thread_info *ti = current_thread_info();
-	index = ti->curr_ret_stack;
-	*ret = ti->ret_stack[index].ret;
-	*func = ti->ret_stack[index].func;
-	*time = ti->ret_stack[index].calltime;
-	ti->curr_ret_stack--;
-}
-
-/*
- * Send the trace to the ring-buffer.
- * @return the original return address.
- */
-unsigned long ftrace_return_to_handler(void)
-{
-	struct ftrace_retfunc trace;
-	pop_return_trace(&trace.ret, &trace.calltime, &trace.func);
-	trace.rettime = cpu_clock(raw_smp_processor_id());
-	ftrace_function_return(&trace);
-
-	return trace.ret;
-}
-
-/*
- * Hook the return address and push it in the stack of return addrs
- * in current thread info.
- */
-void prepare_ftrace_return(unsigned long *parent, unsigned long self_addr)
-{
-	unsigned long old;
-	unsigned long long calltime;
-	int faulted;
-	unsigned long return_hooker = (unsigned long)
-				&return_to_handler;
-
-	/* Nmi's are currently unsupported */
-	if (atomic_read(&in_nmi))
-		return;
-
-	/*
-	 * Protect against fault, even if it shouldn't
-	 * happen. This tool is too much intrusive to
-	 * ignore such a protection.
-	 */
-	asm volatile(
-		"1: movl (%[parent_old]), %[old]\n"
-		"2: movl %[return_hooker], (%[parent_replaced])\n"
-		"   movl $0, %[faulted]\n"
-
-		".section .fixup, \"ax\"\n"
-		"3: movl $1, %[faulted]\n"
-		".previous\n"
-
-		".section __ex_table, \"a\"\n"
-		"   .long 1b, 3b\n"
-		"   .long 2b, 3b\n"
-		".previous\n"
-
-		: [parent_replaced] "=r" (parent), [old] "=r" (old),
-		  [faulted] "=r" (faulted)
-		: [parent_old] "0" (parent), [return_hooker] "r" (return_hooker)
-		: "memory"
-	);
-
-	if (WARN_ON(faulted)) {
-		unregister_ftrace_return();
-		return;
-	}
-
-	if (WARN_ON(!__kernel_text_address(old))) {
-		unregister_ftrace_return();
-		*parent = old;
-		return;
-	}
-
-	calltime = cpu_clock(raw_smp_processor_id());
-
-	if (push_return_trace(old, calltime, self_addr) == -EBUSY)
-		*parent = old;
-}
-
-#endif
-
 #ifdef CONFIG_DYNAMIC_FTRACE
 
 union ftrace_code_union {
@@ -450,3 +322,133 @@ int __init ftrace_dyn_arch_init(void *data)
 	return 0;
 }
 #endif
+
+#ifdef CONFIG_FUNCTION_RET_TRACER
+
+#ifndef CONFIG_DYNAMIC_FTRACE
+
+/*
+ * These functions are picked from those used on
+ * this page for dynamic ftrace. They have been
+ * simplified to ignore all traces in NMI context.
+ */
+static atomic_t in_nmi;
+
+void ftrace_nmi_enter(void)
+{
+	atomic_inc(&in_nmi);
+}
+
+void ftrace_nmi_exit(void)
+{
+	atomic_dec(&in_nmi);
+}
+#endif /* !CONFIG_DYNAMIC_FTRACE */
+
+/* Add a function return address to the trace stack on thread info.*/
+static int push_return_trace(unsigned long ret, unsigned long long time,
+				unsigned long func)
+{
+	int index;
+	struct thread_info *ti = current_thread_info();
+
+	/* The return trace stack is full */
+	if (ti->curr_ret_stack == FTRACE_RET_STACK_SIZE - 1)
+		return -EBUSY;
+
+	index = ++ti->curr_ret_stack;
+	barrier();
+	ti->ret_stack[index].ret = ret;
+	ti->ret_stack[index].func = func;
+	ti->ret_stack[index].calltime = time;
+
+	return 0;
+}
+
+/* Retrieve a function return address to the trace stack on thread info.*/
+static void pop_return_trace(unsigned long *ret, unsigned long long *time,
+				unsigned long *func)
+{
+	int index;
+
+	struct thread_info *ti = current_thread_info();
+	index = ti->curr_ret_stack;
+	*ret = ti->ret_stack[index].ret;
+	*func = ti->ret_stack[index].func;
+	*time = ti->ret_stack[index].calltime;
+	ti->curr_ret_stack--;
+}
+
+/*
+ * Send the trace to the ring-buffer.
+ * @return the original return address.
+ */
+unsigned long ftrace_return_to_handler(void)
+{
+	struct ftrace_retfunc trace;
+	pop_return_trace(&trace.ret, &trace.calltime, &trace.func);
+	trace.rettime = cpu_clock(raw_smp_processor_id());
+	ftrace_function_return(&trace);
+
+	return trace.ret;
+}
+
+/*
+ * Hook the return address and push it in the stack of return addrs
+ * in current thread info.
+ */
+void prepare_ftrace_return(unsigned long *parent, unsigned long self_addr)
+{
+	unsigned long old;
+	unsigned long long calltime;
+	int faulted;
+	unsigned long return_hooker = (unsigned long)
+				&return_to_handler;
+
+	/* Nmi's are currently unsupported */
+	if (atomic_read(&in_nmi))
+		return;
+
+	/*
+	 * Protect against fault, even if it shouldn't
+	 * happen. This tool is too much intrusive to
+	 * ignore such a protection.
+	 */
+	asm volatile(
+		"1: movl (%[parent_old]), %[old]\n"
+		"2: movl %[return_hooker], (%[parent_replaced])\n"
+		"   movl $0, %[faulted]\n"
+
+		".section .fixup, \"ax\"\n"
+		"3: movl $1, %[faulted]\n"
+		".previous\n"
+
+		".section __ex_table, \"a\"\n"
+		"   .long 1b, 3b\n"
+		"   .long 2b, 3b\n"
+		".previous\n"
+
+		: [parent_replaced] "=r" (parent), [old] "=r" (old),
+		  [faulted] "=r" (faulted)
+		: [parent_old] "0" (parent), [return_hooker] "r" (return_hooker)
+		: "memory"
+	);
+
+	if (WARN_ON(faulted)) {
+		unregister_ftrace_return();
+		return;
+	}
+
+	if (WARN_ON(!__kernel_text_address(old))) {
+		unregister_ftrace_return();
+		*parent = old;
+		return;
+	}
+
+	calltime = cpu_clock(raw_smp_processor_id());
+
+	if (push_return_trace(old, calltime, self_addr) == -EBUSY)
+		*parent = old;
+}
+
+#endif /* CONFIG_FUNCTION_RET_TRACER */
diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h
index 166a2070ef65..f1af1aab00e6 100644
--- a/include/linux/ftrace.h
+++ b/include/linux/ftrace.h
@@ -25,6 +25,17 @@ struct ftrace_ops {
 
 extern int function_trace_stop;
 
+/*
+ * Type of the current tracing.
+ */
+enum ftrace_tracing_type_t {
+	FTRACE_TYPE_ENTER = 0, /* Hook the call of the function */
+	FTRACE_TYPE_RETURN,	/* Hook the return of the function */
+};
+
+/* Current tracing type, default is FTRACE_TYPE_ENTER */
+extern enum ftrace_tracing_type_t ftrace_tracing_type;
+
 /**
  * ftrace_stop - stop function tracer.
  *
@@ -104,6 +115,9 @@ extern int ftrace_update_ftrace_func(ftrace_func_t func);
 extern void ftrace_caller(void);
 extern void ftrace_call(void);
 extern void mcount_call(void);
+#ifdef CONFIG_FUNCTION_RET_TRACER
+extern void ftrace_return_caller(void);
+#endif
 
 /**
  * ftrace_make_nop - convert code into top
@@ -310,7 +324,7 @@ struct ftrace_retfunc {
 /* Type of a callback handler of tracing return function */
 typedef void (*trace_function_return_t)(struct ftrace_retfunc *);
 
-extern void register_ftrace_return(trace_function_return_t func);
+extern int register_ftrace_return(trace_function_return_t func);
 /* The current handler in use */
 extern trace_function_return_t ftrace_function_return;
 extern void unregister_ftrace_return(void);
diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig
index 9c89526b6b7c..b8378fad29a3 100644
--- a/kernel/trace/Kconfig
+++ b/kernel/trace/Kconfig
@@ -59,7 +59,6 @@ config FUNCTION_TRACER
 
 config FUNCTION_RET_TRACER
 	bool "Kernel Function return Tracer"
-	depends on !DYNAMIC_FTRACE
 	depends on HAVE_FUNCTION_RET_TRACER
 	depends on FUNCTION_TRACER
 	help
diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
index b42ec1de546b..2f78a45aac14 100644
--- a/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c
@@ -50,6 +50,9 @@ static int last_ftrace_enabled;
 /* Quick disabling of function tracer. */
 int function_trace_stop;
 
+/* By default, current tracing type is normal tracing. */
+enum ftrace_tracing_type_t ftrace_tracing_type = FTRACE_TYPE_ENTER;
+
 /*
  * ftrace_disabled is set when an anomaly is discovered.
  * ftrace_disabled is much stronger than ftrace_enabled.
@@ -385,12 +388,21 @@ static void ftrace_bug(int failed, unsigned long ip)
 	}
 }
 
-#define FTRACE_ADDR ((long)(ftrace_caller))
 
 static int
 __ftrace_replace_code(struct dyn_ftrace *rec, int enable)
 {
 	unsigned long ip, fl;
+	unsigned long ftrace_addr;
+
+#ifdef CONFIG_FUNCTION_RET_TRACER
+	if (ftrace_tracing_type == FTRACE_TYPE_ENTER)
+		ftrace_addr = (unsigned long)ftrace_caller;
+	else
+		ftrace_addr = (unsigned long)ftrace_return_caller;
+#else
+	ftrace_addr = (unsigned long)ftrace_caller;
+#endif
 
 	ip = rec->ip;
 
@@ -450,9 +462,9 @@ __ftrace_replace_code(struct dyn_ftrace *rec, int enable)
 	}
 
 	if (rec->flags & FTRACE_FL_ENABLED)
-		return ftrace_make_call(rec, FTRACE_ADDR);
+		return ftrace_make_call(rec, ftrace_addr);
 	else
-		return ftrace_make_nop(NULL, rec, FTRACE_ADDR);
+		return ftrace_make_nop(NULL, rec, ftrace_addr);
 }
 
 static void ftrace_replace_code(int enable)
@@ -1405,10 +1417,17 @@ int register_ftrace_function(struct ftrace_ops *ops)
 		return -1;
 
 	mutex_lock(&ftrace_sysctl_lock);
+
+	if (ftrace_tracing_type == FTRACE_TYPE_RETURN) {
+		ret = -EBUSY;
+		goto out;
+	}
+
 	ret = __register_ftrace_function(ops);
 	ftrace_startup();
-	mutex_unlock(&ftrace_sysctl_lock);
 
+out:
+	mutex_unlock(&ftrace_sysctl_lock);
 	return ret;
 }
 
@@ -1474,16 +1493,45 @@ ftrace_enable_sysctl(struct ctl_table *table, int write,
 }
 
 #ifdef CONFIG_FUNCTION_RET_TRACER
+
+/* The callback that hooks the return of a function */
 trace_function_return_t ftrace_function_return =
 			(trace_function_return_t)ftrace_stub;
-void register_ftrace_return(trace_function_return_t func)
+
+int register_ftrace_return(trace_function_return_t func)
 {
+	int ret = 0;
+
+	mutex_lock(&ftrace_sysctl_lock);
+
+	/*
+	 * Don't launch return tracing if normal function
+	 * tracing is already running.
+	 */
+	if (ftrace_trace_function != ftrace_stub) {
+		ret = -EBUSY;
+		goto out;
+	}
+
+	ftrace_tracing_type = FTRACE_TYPE_RETURN;
 	ftrace_function_return = func;
+	ftrace_startup();
+
+out:
+	mutex_unlock(&ftrace_sysctl_lock);
+	return ret;
 }
 
 void unregister_ftrace_return(void)
 {
+	mutex_lock(&ftrace_sysctl_lock);
+
 	ftrace_function_return = (trace_function_return_t)ftrace_stub;
+	ftrace_shutdown();
+	/* Restore normal tracing type */
+	ftrace_tracing_type = FTRACE_TYPE_ENTER;
+
+	mutex_unlock(&ftrace_sysctl_lock);
 }
 #endif
 
diff --git a/kernel/trace/trace_functions_return.c b/kernel/trace/trace_functions_return.c
index 61185f756a13..a68564af022b 100644
--- a/kernel/trace/trace_functions_return.c
+++ b/kernel/trace/trace_functions_return.c
@@ -14,29 +14,18 @@
 #include "trace.h"
 
 
-static void start_return_trace(struct trace_array *tr)
-{
-	register_ftrace_return(&trace_function_return);
-}
-
-static void stop_return_trace(struct trace_array *tr)
-{
-	unregister_ftrace_return();
-}
-
 static int return_trace_init(struct trace_array *tr)
 {
 	int cpu;
 	for_each_online_cpu(cpu)
 		tracing_reset(tr, cpu);
 
-	start_return_trace(tr);
-	return 0;
+	return register_ftrace_return(&trace_function_return);
 }
 
 static void return_trace_reset(struct trace_array *tr)
 {
-		stop_return_trace(tr);
+		unregister_ftrace_return();
 }
 
 
-- 
cgit v1.2.3


From 954e100d2275cb2f150f2b18d5cddcdf67b956ac Mon Sep 17 00:00:00 2001
From: Mathieu Desnoyers <mathieu.desnoyers@polymtl.ca>
Date: Fri, 14 Nov 2008 17:47:34 -0500
Subject: rcu: add rcu_read_*_sched_notrace()

Impact: new API, useful for tracepoints and markers.

Add _notrace version to rcu_read_*_sched().

Signed-off-by: Mathieu Desnoyers <mathieu.desnoyers@polymtl.ca>
Reviewed-by: Paul E McKenney <paulmck@linux.vnet.ibm.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/rcupdate.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include')

diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h
index 86f1f5e43e33..895dc9c1088c 100644
--- a/include/linux/rcupdate.h
+++ b/include/linux/rcupdate.h
@@ -142,6 +142,7 @@ struct rcu_head {
  * on the write-side to insure proper synchronization.
  */
 #define rcu_read_lock_sched() preempt_disable()
+#define rcu_read_lock_sched_notrace() preempt_disable_notrace()
 
 /*
  * rcu_read_unlock_sched - marks the end of a RCU-classic critical section
@@ -149,6 +150,7 @@ struct rcu_head {
  * See rcu_read_lock_sched for more information.
  */
 #define rcu_read_unlock_sched() preempt_enable()
+#define rcu_read_unlock_sched_notrace() preempt_enable_notrace()
 
 
-- 
cgit v1.2.3


From e3f8c4b9117d70127a8cab480af83bbfd048a28b Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@redhat.com>
Date: Fri, 14 Nov 2008 17:47:36 -0500
Subject: markers: add missing stdargs.h include, needed due to va_list usage

Impact: build fix (for future changes)

That seemed to cause built issue when marker.h is included early, even
though stdargs.h is included in kernel.h.

Signed-off-by: Mathieu Desnoyers <mathieu.desnoyers@polymtl.ca>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/marker.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/linux/marker.h b/include/linux/marker.h
index 4cf45472d9f5..05ec0df37089 100644
--- a/include/linux/marker.h
+++ b/include/linux/marker.h
@@ -12,6 +12,7 @@
  * See the file COPYING for more details.
  */
 
+#include <stdarg.h>
 #include <linux/types.h>
 
 struct module;
-- 
cgit v1.2.3


From c1df1bd2c4d4b20c83755a0f41956b57aec4842a Mon Sep 17 00:00:00 2001
From: Mathieu Desnoyers <mathieu.desnoyers@polymtl.ca>
Date: Fri, 14 Nov 2008 17:47:39 -0500
Subject: markers: auto enable tracepoints (new API : trace_mark_tp())

Impact: new API

Add a new API trace_mark_tp(), which declares a marker within a
tracepoint probe. When the marker is activated, the tracepoint is
automatically enabled.

No branch test is used at the marker site, because it would be a
duplicate of the branch already present in the tracepoint.

Signed-off-by: Mathieu Desnoyers <mathieu.desnoyers@polymtl.ca>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/marker.h | 45 +++++++++++++++++++++++++++++++++++++++++-
 init/Kconfig           |  1 +
 kernel/marker.c        | 53 ++++++++++++++++++++++++++++++++++++++++++++++++--
 3 files changed, 96 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/include/linux/marker.h b/include/linux/marker.h
index 05ec0df37089..57a307018ceb 100644
--- a/include/linux/marker.h
+++ b/include/linux/marker.h
@@ -49,6 +49,8 @@ struct marker {
 	void (*call)(const struct marker *mdata, void *call_private, ...);
 	struct marker_probe_closure single;
 	struct marker_probe_closure *multi;
+	const char *tp_name;	/* Optional tracepoint name */
+	void *tp_cb;		/* Optional tracepoint callback */
 } __attribute__((aligned(8)));
 
 #ifdef CONFIG_MARKERS
@@ -73,7 +75,7 @@ struct marker {
 		__attribute__((section("__markers"), aligned(8))) =	\
 		{ __mstrtab_##name, &__mstrtab_##name[sizeof(#name)],	\
 		0, 0, marker_probe_cb,					\
-		{ __mark_empty_function, NULL}, NULL };			\
+		{ __mark_empty_function, NULL}, NULL, NULL, NULL };	\
 		__mark_check_format(format, ## args);			\
 		if (unlikely(__mark_##name.state)) {			\
 			(*__mark_##name.call)				\
@@ -81,11 +83,38 @@ struct marker {
 		}							\
 	} while (0)
 
+#define __trace_mark_tp(name, call_private, tp_name, tp_cb, format, args...) \
+	do {								\
+		void __check_tp_type(void)				\
+		{							\
+			register_trace_##tp_name(tp_cb);		\
+		}							\
+		static const char __mstrtab_##name[]			\
+		__attribute__((section("__markers_strings")))		\
+		= #name "\0" format;					\
+		static struct marker __mark_##name			\
+		__attribute__((section("__markers"), aligned(8))) =	\
+		{ __mstrtab_##name, &__mstrtab_##name[sizeof(#name)],	\
+		0, 0, marker_probe_cb,					\
+		{ __mark_empty_function, NULL}, NULL, #tp_name, tp_cb };\
+		__mark_check_format(format, ## args);			\
+		(*__mark_##name.call)(&__mark_##name, call_private,	\
+					## args);			\
+	} while (0)
+
 extern void marker_update_probe_range(struct marker *begin,
 	struct marker *end);
 #else /* !CONFIG_MARKERS */
 #define __trace_mark(generic, name, call_private, format, args...) \
 		__mark_check_format(format, ## args)
+#define __trace_mark_tp(name, call_private, tp_name, tp_cb, format, args...) \
+	do {								\
+		void __check_tp_type(void)				\
+		{							\
+			register_trace_##tp_name(tp_cb);		\
+		}							\
+		__mark_check_format(format, ## args);			\
+	} while (0)
 static inline void marker_update_probe_range(struct marker *begin,
 	struct marker *end)
 { }
@@ -117,6 +146,20 @@ static inline void marker_update_probe_range(struct marker *begin,
 #define _trace_mark(name, format, args...) \
 	__trace_mark(1, name, NULL, format, ## args)
 
+/**
+ * trace_mark_tp - Marker in a tracepoint callback
+ * @name: marker name, not quoted.
+ * @tp_name: tracepoint name, not quoted.
+ * @tp_cb: tracepoint callback. Should have an associated global symbol so it
+ *         is not optimized away by the compiler (should not be static).
+ * @format: format string
+ * @args...: variable argument list
+ *
+ * Places a marker in a tracepoint callback.
+ */
+#define trace_mark_tp(name, tp_name, tp_cb, format, args...)	\
+	__trace_mark_tp(name, NULL, tp_name, tp_cb, format, ## args)
+
 /**
  * MARK_NOARGS - Format string for a marker with no argument.
  */
diff --git a/init/Kconfig b/init/Kconfig
index 86b00c53fade..f5bacb438711 100644
--- a/init/Kconfig
+++ b/init/Kconfig
@@ -808,6 +808,7 @@ config TRACEPOINTS
 
 config MARKERS
 	bool "Activate markers"
+	depends on TRACEPOINTS
 	help
 	  Place an empty function call at each marker site. Can be
 	  dynamically changed for a probe function.
diff --git a/kernel/marker.c b/kernel/marker.c
index 348e70cc355a..c14ec26a9b9f 100644
--- a/kernel/marker.c
+++ b/kernel/marker.c
@@ -479,7 +479,7 @@ static int marker_set_format(struct marker_entry *entry, const char *format)
 static int set_marker(struct marker_entry *entry, struct marker *elem,
 		int active)
 {
-	int ret;
+	int ret = 0;
 	WARN_ON(strcmp(entry->name, elem->name) != 0);
 
 	if (entry->format) {
@@ -531,9 +531,40 @@ static int set_marker(struct marker_entry *entry, struct marker *elem,
 	 */
 	smp_wmb();
 	elem->ptype = entry->ptype;
+
+	if (elem->tp_name && (active ^ elem->state)) {
+		WARN_ON(!elem->tp_cb);
+		/*
+		 * It is ok to directly call the probe registration because type
+		 * checking has been done in the __trace_mark_tp() macro.
+		 */
+
+		if (active) {
+			/*
+			 * try_module_get should always succeed because we hold
+			 * lock_module() to get the tp_cb address.
+			 */
+			ret = try_module_get(__module_text_address(
+				(unsigned long)elem->tp_cb));
+			BUG_ON(!ret);
+			ret = tracepoint_probe_register_noupdate(
+				elem->tp_name,
+				elem->tp_cb);
+		} else {
+			ret = tracepoint_probe_unregister_noupdate(
+				elem->tp_name,
+				elem->tp_cb);
+			/*
+			 * tracepoint_probe_update_all() must be called
+			 * before the module containing tp_cb is unloaded.
+			 */
+			module_put(__module_text_address(
+				(unsigned long)elem->tp_cb));
+		}
+	}
 	elem->state = active;
 
-	return 0;
+	return ret;
 }
 
 /*
@@ -544,7 +575,24 @@ static int set_marker(struct marker_entry *entry, struct marker *elem,
  */
 static void disable_marker(struct marker *elem)
 {
+	int ret;
+
 	/* leave "call" as is. It is known statically. */
+	if (elem->tp_name && elem->state) {
+		WARN_ON(!elem->tp_cb);
+		/*
+		 * It is ok to directly call the probe registration because type
+		 * checking has been done in the __trace_mark_tp() macro.
+		 */
+		ret = tracepoint_probe_unregister_noupdate(elem->tp_name,
+			elem->tp_cb);
+		WARN_ON(ret);
+		/*
+		 * tracepoint_probe_update_all() must be called
+		 * before the module containing tp_cb is unloaded.
+		 */
+		module_put(__module_text_address((unsigned long)elem->tp_cb));
+	}
 	elem->state = 0;
 	elem->single.func = __mark_empty_function;
 	/* Update the function before setting the ptype */
@@ -608,6 +656,7 @@ static void marker_update_probes(void)
 	marker_update_probe_range(__start___markers, __stop___markers);
 	/* Markers in modules. */
 	module_update_markers();
+	tracepoint_probe_update_all();
 }
 
 /**
-- 
cgit v1.2.3


From a0bca6a59ebc052751eed6e3b182c153495672d8 Mon Sep 17 00:00:00 2001
From: Mathieu Desnoyers <mathieu.desnoyers@polymtl.ca>
Date: Fri, 14 Nov 2008 17:47:40 -0500
Subject: markers: create DEFINE_MARKER and GET_MARKER (new API)

Impact: new API.

Allow markers to be used only for declaration, without function call
associated. Useful to create specialized probes.

The problem we had is that two function calls were required when one
wanted to put a marker in a tracepoint probe. Now the marker can be used
simply for trace data type declaration, leaving the trace write work
within the tracepoint probe without any additional function call.

Signed-off-by: Mathieu Desnoyers <mathieu.desnoyers@polymtl.ca>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 Documentation/markers.txt | 14 ++++++++++++++
 include/linux/marker.h    | 39 +++++++++++++++++++++++----------------
 2 files changed, 37 insertions(+), 16 deletions(-)

(limited to 'include')

diff --git a/Documentation/markers.txt b/Documentation/markers.txt
index 089f6138fcd9..6d275e4ef385 100644
--- a/Documentation/markers.txt
+++ b/Documentation/markers.txt
@@ -70,6 +70,20 @@ a printk warning which identifies the inconsistency:
 
 "Format mismatch for probe probe_name (format), marker (format)"
 
+Another way to use markers is to simply define the marker without generating any
+function call to actually call into the marker. This is useful in combination
+with tracepoint probes in a scheme like this :
+
+void probe_tracepoint_name(unsigned int arg1, struct task_struct *tsk);
+
+DEFINE_MARKER_TP(marker_eventname, tracepoint_name, probe_tracepoint_name,
+	"arg1 %u pid %d");
+
+notrace void probe_tracepoint_name(unsigned int arg1, struct task_struct *tsk)
+{
+	struct marker *marker = &GET_MARKER(kernel_irq_entry);
+	/* write data to trace buffers ... */
+}
 
 * Probe / marker example
 
diff --git a/include/linux/marker.h b/include/linux/marker.h
index 57a307018ceb..34c14bc957f5 100644
--- a/include/linux/marker.h
+++ b/include/linux/marker.h
@@ -55,6 +55,22 @@ struct marker {
 
 #ifdef CONFIG_MARKERS
 
+#define _DEFINE_MARKER(name, tp_name_str, tp_cb, format)		\
+		static const char __mstrtab_##name[]			\
+		__attribute__((section("__markers_strings")))		\
+		= #name "\0" format;					\
+		static struct marker __mark_##name			\
+		__attribute__((section("__markers"), aligned(8))) =	\
+		{ __mstrtab_##name, &__mstrtab_##name[sizeof(#name)],	\
+		  0, 0, marker_probe_cb, { __mark_empty_function, NULL},\
+		  NULL, tp_name_str, tp_cb }
+
+#define DEFINE_MARKER(name, format)					\
+		_DEFINE_MARKER(name, NULL, NULL, format)
+
+#define DEFINE_MARKER_TP(name, tp_name, tp_cb, format)			\
+		_DEFINE_MARKER(name, #tp_name, tp_cb, format)
+
 /*
  * Note : the empty asm volatile with read constraint is used here instead of a
  * "used" attribute to fix a gcc 4.1.x bug.
@@ -68,14 +84,7 @@ struct marker {
  */
 #define __trace_mark(generic, name, call_private, format, args...)	\
 	do {								\
-		static const char __mstrtab_##name[]			\
-		__attribute__((section("__markers_strings")))		\
-		= #name "\0" format;					\
-		static struct marker __mark_##name			\
-		__attribute__((section("__markers"), aligned(8))) =	\
-		{ __mstrtab_##name, &__mstrtab_##name[sizeof(#name)],	\
-		0, 0, marker_probe_cb,					\
-		{ __mark_empty_function, NULL}, NULL, NULL, NULL };	\
+		DEFINE_MARKER(name, format);				\
 		__mark_check_format(format, ## args);			\
 		if (unlikely(__mark_##name.state)) {			\
 			(*__mark_##name.call)				\
@@ -89,14 +98,7 @@ struct marker {
 		{							\
 			register_trace_##tp_name(tp_cb);		\
 		}							\
-		static const char __mstrtab_##name[]			\
-		__attribute__((section("__markers_strings")))		\
-		= #name "\0" format;					\
-		static struct marker __mark_##name			\
-		__attribute__((section("__markers"), aligned(8))) =	\
-		{ __mstrtab_##name, &__mstrtab_##name[sizeof(#name)],	\
-		0, 0, marker_probe_cb,					\
-		{ __mark_empty_function, NULL}, NULL, #tp_name, tp_cb };\
+		DEFINE_MARKER_TP(name, tp_name, tp_cb, format);		\
 		__mark_check_format(format, ## args);			\
 		(*__mark_##name.call)(&__mark_##name, call_private,	\
 					## args);			\
@@ -104,7 +106,11 @@ struct marker {
 
 extern void marker_update_probe_range(struct marker *begin,
 	struct marker *end);
+
+#define GET_MARKER(name)	(__mark_##name)
+
 #else /* !CONFIG_MARKERS */
+#define DEFINE_MARKER(name, tp_name, tp_cb, format)
 #define __trace_mark(generic, name, call_private, format, args...) \
 		__mark_check_format(format, ## args)
 #define __trace_mark_tp(name, call_private, tp_name, tp_cb, format, args...) \
@@ -118,6 +124,7 @@ extern void marker_update_probe_range(struct marker *begin,
 static inline void marker_update_probe_range(struct marker *begin,
 	struct marker *end)
 { }
+#define GET_MARKER(name)
 #endif /* CONFIG_MARKERS */
 
 /**
-- 
cgit v1.2.3


From da7b3eab167091693ad215ad7692f7d0d24d1356 Mon Sep 17 00:00:00 2001
From: Mathieu Desnoyers <mathieu.desnoyers@polymtl.ca>
Date: Fri, 14 Nov 2008 17:47:43 -0500
Subject: tracepoints: use rcu_*_sched_notrace

Make sure tracepoints can be called within ftrace callbacks.

Signed-off-by: Mathieu Desnoyers <mathieu.desnoyers@polymtl.ca>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/tracepoint.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/tracepoint.h b/include/linux/tracepoint.h
index 63064e9403f2..69648c54a326 100644
--- a/include/linux/tracepoint.h
+++ b/include/linux/tracepoint.h
@@ -40,14 +40,14 @@ struct tracepoint {
 	do {								\
 		void **it_func;						\
 									\
-		rcu_read_lock_sched();					\
+		rcu_read_lock_sched_notrace();				\
 		it_func = rcu_dereference((tp)->funcs);			\
 		if (it_func) {						\
 			do {						\
 				((void(*)(proto))(*it_func))(args);	\
 			} while (*(++it_func));				\
 		}							\
-		rcu_read_unlock_sched();				\
+		rcu_read_unlock_sched_notrace();			\
 	} while (0)
 
 /*
-- 
cgit v1.2.3


From c420970ef476d7d68df119711700666224001f43 Mon Sep 17 00:00:00 2001
From: Mathieu Desnoyers <mathieu.desnoyers@polymtl.ca>
Date: Fri, 14 Nov 2008 17:47:44 -0500
Subject: tracepoints: use unregister return value

Impact: bugfix.

Unregistering a tracepoint can fail. Return the error value.

Signed-off-by: Mathieu Desnoyers <mathieu.desnoyers@polymtl.ca>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/tracepoint.h | 10 ++++++----
 1 file changed, 6 insertions(+), 4 deletions(-)

(limited to 'include')

diff --git a/include/linux/tracepoint.h b/include/linux/tracepoint.h
index 69648c54a326..c60a791f8874 100644
--- a/include/linux/tracepoint.h
+++ b/include/linux/tracepoint.h
@@ -73,9 +73,9 @@ struct tracepoint {
 		return tracepoint_probe_register(#name ":" #proto,	\
 			(void *)probe);					\
 	}								\
-	static inline void unregister_trace_##name(void (*probe)(proto))\
+	static inline int unregister_trace_##name(void (*probe)(proto))	\
 	{								\
-		tracepoint_probe_unregister(#name ":" #proto,		\
+		return tracepoint_probe_unregister(#name ":" #proto,	\
 			(void *)probe);					\
 	}
 
@@ -92,8 +92,10 @@ extern void tracepoint_update_probe_range(struct tracepoint *begin,
 	{								\
 		return -ENOSYS;						\
 	}								\
-	static inline void unregister_trace_##name(void (*probe)(proto))\
-	{ }
+	static inline int unregister_trace_##name(void (*probe)(proto))	\
+	{								\
+		return -ENOSYS;						\
+	}
 
 static inline void tracepoint_update_probe_range(struct tracepoint *begin,
 	struct tracepoint *end)
-- 
cgit v1.2.3


From 5f382671def7cb9c0f4b75d586dc5f60dca5e1c3 Mon Sep 17 00:00:00 2001
From: Mathieu Desnoyers <mathieu.desnoyers@polymtl.ca>
Date: Fri, 14 Nov 2008 17:47:45 -0500
Subject: tracepoints: do not put arguments in name

Impact: cleanup

That's overkill, takes space. We have a global tracepoint registery in
header files anyway.

Signed-off-by: Mathieu Desnoyers <mathieu.desnoyers@polymtl.ca>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/tracepoint.h | 8 +++-----
 1 file changed, 3 insertions(+), 5 deletions(-)

(limited to 'include')

diff --git a/include/linux/tracepoint.h b/include/linux/tracepoint.h
index c60a791f8874..7e9b42aeae0e 100644
--- a/include/linux/tracepoint.h
+++ b/include/linux/tracepoint.h
@@ -60,7 +60,7 @@ struct tracepoint {
 	{								\
 		static const char __tpstrtab_##name[]			\
 		__attribute__((section("__tracepoints_strings")))	\
-		= #name ":" #proto;					\
+		= #name;						\
 		static struct tracepoint __tracepoint_##name		\
 		__attribute__((section("__tracepoints"), aligned(8))) =	\
 		{ __tpstrtab_##name, 0, NULL };				\
@@ -70,13 +70,11 @@ struct tracepoint {
 	}								\
 	static inline int register_trace_##name(void (*probe)(proto))	\
 	{								\
-		return tracepoint_probe_register(#name ":" #proto,	\
-			(void *)probe);					\
+		return tracepoint_probe_register(#name, (void *)probe);	\
 	}								\
 	static inline int unregister_trace_##name(void (*probe)(proto))	\
 	{								\
-		return tracepoint_probe_unregister(#name ":" #proto,	\
-			(void *)probe);					\
+		return tracepoint_probe_unregister(#name, (void *)probe);\
 	}
 
 extern void tracepoint_update_probe_range(struct tracepoint *begin,
-- 
cgit v1.2.3


From 7e066fb870fcd1025ec3ba7bbde5d541094f4ce1 Mon Sep 17 00:00:00 2001
From: Mathieu Desnoyers <mathieu.desnoyers@polymtl.ca>
Date: Fri, 14 Nov 2008 17:47:47 -0500
Subject: tracepoints: add DECLARE_TRACE() and DEFINE_TRACE()

Impact: API *CHANGE*. Must update all tracepoint users.

Add DEFINE_TRACE() to tracepoints to let them declare the tracepoint
structure in a single spot for all the kernel. It helps reducing memory
consumption, especially when declaring a lot of tracepoints, e.g. for
kmalloc tracing.

*API CHANGE WARNING*: now, DECLARE_TRACE() must be used in headers for
tracepoint declarations rather than DEFINE_TRACE(). This is the sane way
to do it. The name previously used was misleading.

Updates scheduler instrumentation to follow this API change.

Signed-off-by: Mathieu Desnoyers <mathieu.desnoyers@polymtl.ca>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 Documentation/tracepoints.txt           |  7 ++++++-
 include/asm-generic/vmlinux.lds.h       |  1 +
 include/linux/tracepoint.h              | 35 +++++++++++++++++++++++----------
 include/trace/sched.h                   | 24 +++++++++++-----------
 kernel/exit.c                           |  4 ++++
 kernel/fork.c                           |  2 ++
 kernel/kthread.c                        |  3 +++
 kernel/sched.c                          |  6 ++++++
 kernel/signal.c                         |  2 ++
 samples/tracepoints/tp-samples-trace.h  |  4 ++--
 samples/tracepoints/tracepoint-sample.c |  3 +++
 11 files changed, 66 insertions(+), 25 deletions(-)

(limited to 'include')

diff --git a/Documentation/tracepoints.txt b/Documentation/tracepoints.txt
index 5d354e167494..e8ad47b437f3 100644
--- a/Documentation/tracepoints.txt
+++ b/Documentation/tracepoints.txt
@@ -42,7 +42,7 @@ In include/trace/subsys.h :
 
 #include <linux/tracepoint.h>
 
-DEFINE_TRACE(subsys_eventname,
+DECLARE_TRACE(subsys_eventname,
 	TPPTOTO(int firstarg, struct task_struct *p),
 	TPARGS(firstarg, p));
 
@@ -50,6 +50,8 @@ In subsys/file.c (where the tracing statement must be added) :
 
 #include <trace/subsys.h>
 
+DEFINE_TRACE(subsys_eventname);
+
 void somefct(void)
 {
 	...
@@ -86,6 +88,9 @@ to limit collisions. Tracepoint names are global to the kernel: they are
 considered as being the same whether they are in the core kernel image or in
 modules.
 
+If the tracepoint has to be used in kernel modules, an
+EXPORT_TRACEPOINT_SYMBOL_GPL() or EXPORT_TRACEPOINT_SYMBOL() can be used to
+export the defined tracepoints.
 
 * Probe / tracepoint example
 
diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h
index a5e4ed9baec8..3b46ae464933 100644
--- a/include/asm-generic/vmlinux.lds.h
+++ b/include/asm-generic/vmlinux.lds.h
@@ -71,6 +71,7 @@
 	VMLINUX_SYMBOL(__start___markers) = .;				\
 	*(__markers)							\
 	VMLINUX_SYMBOL(__stop___markers) = .;				\
+	. = ALIGN(32);							\
 	VMLINUX_SYMBOL(__start___tracepoints) = .;			\
 	*(__tracepoints)						\
 	VMLINUX_SYMBOL(__stop___tracepoints) = .;			\
diff --git a/include/linux/tracepoint.h b/include/linux/tracepoint.h
index 7e9b42aeae0e..757005458366 100644
--- a/include/linux/tracepoint.h
+++ b/include/linux/tracepoint.h
@@ -24,8 +24,12 @@ struct tracepoint {
 	const char *name;		/* Tracepoint name */
 	int state;			/* State. */
 	void **funcs;
-} __attribute__((aligned(8)));
-
+} __attribute__((aligned(32)));		/*
+					 * Aligned on 32 bytes because it is
+					 * globally visible and gcc happily
+					 * align these on the structure size.
+					 * Keep in sync with vmlinux.lds.h.
+					 */
 
 #define TPPROTO(args...)	args
 #define TPARGS(args...)		args
@@ -55,15 +59,10 @@ struct tracepoint {
  * not add unwanted padding between the beginning of the section and the
  * structure. Force alignment to the same alignment as the section start.
  */
-#define DEFINE_TRACE(name, proto, args)					\
+#define DECLARE_TRACE(name, proto, args)				\
+	extern struct tracepoint __tracepoint_##name;			\
 	static inline void trace_##name(proto)				\
 	{								\
-		static const char __tpstrtab_##name[]			\
-		__attribute__((section("__tracepoints_strings")))	\
-		= #name;						\
-		static struct tracepoint __tracepoint_##name		\
-		__attribute__((section("__tracepoints"), aligned(8))) =	\
-		{ __tpstrtab_##name, 0, NULL };				\
 		if (unlikely(__tracepoint_##name.state))		\
 			__DO_TRACE(&__tracepoint_##name,		\
 				TPPROTO(proto), TPARGS(args));		\
@@ -77,11 +76,23 @@ struct tracepoint {
 		return tracepoint_probe_unregister(#name, (void *)probe);\
 	}
 
+#define DEFINE_TRACE(name)						\
+	static const char __tpstrtab_##name[]				\
+	__attribute__((section("__tracepoints_strings"))) = #name;	\
+	struct tracepoint __tracepoint_##name				\
+	__attribute__((section("__tracepoints"), aligned(32))) =	\
+		{ __tpstrtab_##name, 0, NULL }
+
+#define EXPORT_TRACEPOINT_SYMBOL_GPL(name)				\
+	EXPORT_SYMBOL_GPL(__tracepoint_##name)
+#define EXPORT_TRACEPOINT_SYMBOL(name)					\
+	EXPORT_SYMBOL(__tracepoint_##name)
+
 extern void tracepoint_update_probe_range(struct tracepoint *begin,
 	struct tracepoint *end);
 
 #else /* !CONFIG_TRACEPOINTS */
-#define DEFINE_TRACE(name, proto, args)			\
+#define DECLARE_TRACE(name, proto, args)				\
 	static inline void _do_trace_##name(struct tracepoint *tp, proto) \
 	{ }								\
 	static inline void trace_##name(proto)				\
@@ -95,6 +106,10 @@ extern void tracepoint_update_probe_range(struct tracepoint *begin,
 		return -ENOSYS;						\
 	}
 
+#define DEFINE_TRACE(name)
+#define EXPORT_TRACEPOINT_SYMBOL_GPL(name)
+#define EXPORT_TRACEPOINT_SYMBOL(name)
+
 static inline void tracepoint_update_probe_range(struct tracepoint *begin,
 	struct tracepoint *end)
 { }
diff --git a/include/trace/sched.h b/include/trace/sched.h
index ad47369d01b5..9b2854abf7e2 100644
--- a/include/trace/sched.h
+++ b/include/trace/sched.h
@@ -4,52 +4,52 @@
 #include <linux/sched.h>
 #include <linux/tracepoint.h>
 
-DEFINE_TRACE(sched_kthread_stop,
+DECLARE_TRACE(sched_kthread_stop,
 	TPPROTO(struct task_struct *t),
 		TPARGS(t));
 
-DEFINE_TRACE(sched_kthread_stop_ret,
+DECLARE_TRACE(sched_kthread_stop_ret,
 	TPPROTO(int ret),
 		TPARGS(ret));
 
-DEFINE_TRACE(sched_wait_task,
+DECLARE_TRACE(sched_wait_task,
 	TPPROTO(struct rq *rq, struct task_struct *p),
 		TPARGS(rq, p));
 
-DEFINE_TRACE(sched_wakeup,
+DECLARE_TRACE(sched_wakeup,
 	TPPROTO(struct rq *rq, struct task_struct *p),
 		TPARGS(rq, p));
 
-DEFINE_TRACE(sched_wakeup_new,
+DECLARE_TRACE(sched_wakeup_new,
 	TPPROTO(struct rq *rq, struct task_struct *p),
 		TPARGS(rq, p));
 
-DEFINE_TRACE(sched_switch,
+DECLARE_TRACE(sched_switch,
 	TPPROTO(struct rq *rq, struct task_struct *prev,
 		struct task_struct *next),
 		TPARGS(rq, prev, next));
 
-DEFINE_TRACE(sched_migrate_task,
+DECLARE_TRACE(sched_migrate_task,
 	TPPROTO(struct rq *rq, struct task_struct *p, int dest_cpu),
 		TPARGS(rq, p, dest_cpu));
 
-DEFINE_TRACE(sched_process_free,
+DECLARE_TRACE(sched_process_free,
 	TPPROTO(struct task_struct *p),
 		TPARGS(p));
 
-DEFINE_TRACE(sched_process_exit,
+DECLARE_TRACE(sched_process_exit,
 	TPPROTO(struct task_struct *p),
 		TPARGS(p));
 
-DEFINE_TRACE(sched_process_wait,
+DECLARE_TRACE(sched_process_wait,
 	TPPROTO(struct pid *pid),
 		TPARGS(pid));
 
-DEFINE_TRACE(sched_process_fork,
+DECLARE_TRACE(sched_process_fork,
 	TPPROTO(struct task_struct *parent, struct task_struct *child),
 		TPARGS(parent, child));
 
-DEFINE_TRACE(sched_signal_send,
+DECLARE_TRACE(sched_signal_send,
 	TPPROTO(int sig, struct task_struct *p),
 		TPARGS(sig, p));
 
diff --git a/kernel/exit.c b/kernel/exit.c
index ae2b92be5fae..f995d2418668 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -54,6 +54,10 @@
 #include <asm/pgtable.h>
 #include <asm/mmu_context.h>
 
+DEFINE_TRACE(sched_process_free);
+DEFINE_TRACE(sched_process_exit);
+DEFINE_TRACE(sched_process_wait);
+
 static void exit_mm(struct task_struct * tsk);
 
 static inline int task_detached(struct task_struct *p)
diff --git a/kernel/fork.c b/kernel/fork.c
index f6083561dfe0..0837d0deee5f 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -79,6 +79,8 @@ DEFINE_PER_CPU(unsigned long, process_counts) = 0;
 
 __cacheline_aligned DEFINE_RWLOCK(tasklist_lock);  /* outer */
 
+DEFINE_TRACE(sched_process_fork);
+
 int nr_processes(void)
 {
 	int cpu;
diff --git a/kernel/kthread.c b/kernel/kthread.c
index 8e7a7ce3ed0a..4fbc456f393d 100644
--- a/kernel/kthread.c
+++ b/kernel/kthread.c
@@ -21,6 +21,9 @@ static DEFINE_SPINLOCK(kthread_create_lock);
 static LIST_HEAD(kthread_create_list);
 struct task_struct *kthreadd_task;
 
+DEFINE_TRACE(sched_kthread_stop);
+DEFINE_TRACE(sched_kthread_stop_ret);
+
 struct kthread_create_info
 {
 	/* Information passed to kthread() from kthreadd. */
diff --git a/kernel/sched.c b/kernel/sched.c
index 50a21f964679..327f91c63c99 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -118,6 +118,12 @@
  */
 #define RUNTIME_INF	((u64)~0ULL)
 
+DEFINE_TRACE(sched_wait_task);
+DEFINE_TRACE(sched_wakeup);
+DEFINE_TRACE(sched_wakeup_new);
+DEFINE_TRACE(sched_switch);
+DEFINE_TRACE(sched_migrate_task);
+
 #ifdef CONFIG_SMP
 /*
  * Divide a load by a sched group cpu_power : (load / sg->__cpu_power)
diff --git a/kernel/signal.c b/kernel/signal.c
index 4530fc654455..e9afe63da24b 100644
--- a/kernel/signal.c
+++ b/kernel/signal.c
@@ -41,6 +41,8 @@
 
 static struct kmem_cache *sigqueue_cachep;
 
+DEFINE_TRACE(sched_signal_send);
+
 static void __user *sig_handler(struct task_struct *t, int sig)
 {
 	return t->sighand->action[sig - 1].sa.sa_handler;
diff --git a/samples/tracepoints/tp-samples-trace.h b/samples/tracepoints/tp-samples-trace.h
index 0216b55bd640..01724e04c556 100644
--- a/samples/tracepoints/tp-samples-trace.h
+++ b/samples/tracepoints/tp-samples-trace.h
@@ -4,10 +4,10 @@
 #include <linux/proc_fs.h>	/* for struct inode and struct file */
 #include <linux/tracepoint.h>
 
-DEFINE_TRACE(subsys_event,
+DECLARE_TRACE(subsys_event,
 	TPPROTO(struct inode *inode, struct file *file),
 	TPARGS(inode, file));
-DEFINE_TRACE(subsys_eventb,
+DECLARE_TRACE(subsys_eventb,
 	TPPROTO(void),
 	TPARGS());
 #endif
diff --git a/samples/tracepoints/tracepoint-sample.c b/samples/tracepoints/tracepoint-sample.c
index 4ae4b7fcc043..00d169792a3e 100644
--- a/samples/tracepoints/tracepoint-sample.c
+++ b/samples/tracepoints/tracepoint-sample.c
@@ -13,6 +13,9 @@
 #include <linux/proc_fs.h>
 #include "tp-samples-trace.h"
 
+DEFINE_TRACE(subsys_event);
+DEFINE_TRACE(subsys_eventb);
+
 struct proc_dir_entry *pentry_example;
 
 static int my_open(struct inode *inode, struct file *file)
-- 
cgit v1.2.3


From f004f3ea34209d8b836426b26ade3dc502631b18 Mon Sep 17 00:00:00 2001
From: Paulius Zaleckas <paulius.zaleckas@teltonika.lt>
Date: Fri, 14 Nov 2008 00:24:34 +0000
Subject: phylib: make mdio-gpio work without OF (v4)

make mdio-gpio work with non OpenFirmware gpio implementation.

Aditional changes to mdio-gpio:
- use gpio_request() and gpio_free()
- place irq[] array in struct mdio_gpio_info
- add module description, author and license
- add note about compiling this driver as module
- rename mdc and mdio function (were ugly names)
- change MII to MDIO in bus name
- add __init __exit to module (un)loading functions
- probe fails if no phys added to the bus
- kzalloc bitbang with sizeof(*bitbang)

Changes since v3:
- keep bus naming "%x" to be compatible with existing drivers.

Changes since v2:
- more #ifdefs reduction
- platform driver will be registered on OF platforms also
- unified platform and OF bus_id to phy%i

Changes since v1:
- removed NO_IRQ
- reduced #idefs

Laurent, please test this driver under OF.

Signed-off-by: Paulius Zaleckas <paulius.zaleckas@teltonika.lt>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/phy/Kconfig     |   5 +-
 drivers/net/phy/mdio-gpio.c | 232 +++++++++++++++++++++++++++++++-------------
 include/linux/mdio-gpio.h   |  25 +++++
 3 files changed, 191 insertions(+), 71 deletions(-)
 create mode 100644 include/linux/mdio-gpio.h

(limited to 'include')

diff --git a/drivers/net/phy/Kconfig b/drivers/net/phy/Kconfig
index 031807751991..c4c5a2f8ec74 100644
--- a/drivers/net/phy/Kconfig
+++ b/drivers/net/phy/Kconfig
@@ -86,8 +86,11 @@ config MDIO_BITBANG
 
 config MDIO_GPIO
 	tristate "Support for GPIO lib-based bitbanged MDIO buses"
-	depends on MDIO_BITBANG && OF_GPIO
+	depends on MDIO_BITBANG && GENERIC_GPIO
 	---help---
 	  Supports GPIO lib-based MDIO busses.
 
+	  To compile this driver as a module, choose M here: the module
+	  will be called mdio-gpio.
+
 endif # PHYLIB
diff --git a/drivers/net/phy/mdio-gpio.c b/drivers/net/phy/mdio-gpio.c
index 2ff97754e574..a439ebeb4319 100644
--- a/drivers/net/phy/mdio-gpio.c
+++ b/drivers/net/phy/mdio-gpio.c
@@ -1,9 +1,12 @@
 /*
- * OpenFirmware GPIO based MDIO bitbang driver.
+ * GPIO based MDIO bitbang driver.
+ * Supports OpenFirmware.
  *
  * Copyright (c) 2008 CSE Semaphore Belgium.
  *  by Laurent Pinchart <laurentp@cse-semaphore.com>
  *
+ * Copyright (C) 2008, Paulius Zaleckas <paulius.zaleckas@teltonika.lt>
+ *
  * Based on earlier work by
  *
  * Copyright (c) 2003 Intracom S.A.
@@ -21,9 +24,14 @@
 #include <linux/slab.h>
 #include <linux/init.h>
 #include <linux/interrupt.h>
-#include <linux/mdio-bitbang.h>
+#include <linux/platform_device.h>
+#include <linux/gpio.h>
+#include <linux/mdio-gpio.h>
+
+#ifdef CONFIG_OF_GPIO
 #include <linux/of_gpio.h>
 #include <linux/of_platform.h>
+#endif
 
 struct mdio_gpio_info {
 	struct mdiobb_ctrl ctrl;
@@ -41,7 +49,7 @@ static void mdio_dir(struct mdiobb_ctrl *ctrl, int dir)
 		gpio_direction_input(bitbang->mdio);
 }
 
-static int mdio_read(struct mdiobb_ctrl *ctrl)
+static int mdio_get(struct mdiobb_ctrl *ctrl)
 {
 	struct mdio_gpio_info *bitbang =
 		container_of(ctrl, struct mdio_gpio_info, ctrl);
@@ -49,7 +57,7 @@ static int mdio_read(struct mdiobb_ctrl *ctrl)
 	return gpio_get_value(bitbang->mdio);
 }
 
-static void mdio(struct mdiobb_ctrl *ctrl, int what)
+static void mdio_set(struct mdiobb_ctrl *ctrl, int what)
 {
 	struct mdio_gpio_info *bitbang =
 		container_of(ctrl, struct mdio_gpio_info, ctrl);
@@ -57,7 +65,7 @@ static void mdio(struct mdiobb_ctrl *ctrl, int what)
 	gpio_set_value(bitbang->mdio, what);
 }
 
-static void mdc(struct mdiobb_ctrl *ctrl, int what)
+static void mdc_set(struct mdiobb_ctrl *ctrl, int what)
 {
 	struct mdio_gpio_info *bitbang =
 		container_of(ctrl, struct mdio_gpio_info, ctrl);
@@ -67,93 +75,69 @@ static void mdc(struct mdiobb_ctrl *ctrl, int what)
 
 static struct mdiobb_ops mdio_gpio_ops = {
 	.owner = THIS_MODULE,
-	.set_mdc = mdc,
+	.set_mdc = mdc_set,
 	.set_mdio_dir = mdio_dir,
-	.set_mdio_data = mdio,
-	.get_mdio_data = mdio_read,
+	.set_mdio_data = mdio_set,
+	.get_mdio_data = mdio_get,
 };
 
-static int __devinit mdio_ofgpio_bitbang_init(struct mii_bus *bus,
-                                         struct device_node *np)
-{
-	struct mdio_gpio_info *bitbang = bus->priv;
-
-	bitbang->mdc = of_get_gpio(np, 0);
-	bitbang->mdio = of_get_gpio(np, 1);
-
-	if (bitbang->mdc < 0 || bitbang->mdio < 0)
-		return -ENODEV;
-
-	snprintf(bus->id, MII_BUS_ID_SIZE, "%x", bitbang->mdc);
-	return 0;
-}
-
-static void __devinit add_phy(struct mii_bus *bus, struct device_node *np)
+static int __devinit mdio_gpio_bus_init(struct device *dev,
+					struct mdio_gpio_platform_data *pdata,
+					int bus_id)
 {
-	const u32 *data;
-	int len, id, irq;
-
-	data = of_get_property(np, "reg", &len);
-	if (!data || len != 4)
-		return;
-
-	id = *data;
-	bus->phy_mask &= ~(1 << id);
-
-	irq = of_irq_to_resource(np, 0, NULL);
-	if (irq != NO_IRQ)
-		bus->irq[id] = irq;
-}
-
-static int __devinit mdio_ofgpio_probe(struct of_device *ofdev,
-                                        const struct of_device_id *match)
-{
-	struct device_node *np = NULL;
 	struct mii_bus *new_bus;
 	struct mdio_gpio_info *bitbang;
 	int ret = -ENOMEM;
 	int i;
 
-	bitbang = kzalloc(sizeof(struct mdio_gpio_info), GFP_KERNEL);
+	bitbang = kzalloc(sizeof(*bitbang), GFP_KERNEL);
 	if (!bitbang)
 		goto out;
 
 	bitbang->ctrl.ops = &mdio_gpio_ops;
+	bitbang->mdc = pdata->mdc;
+	bitbang->mdio = pdata->mdio;
 
 	new_bus = alloc_mdio_bitbang(&bitbang->ctrl);
 	if (!new_bus)
 		goto out_free_bitbang;
 
-	new_bus->name = "GPIO Bitbanged MII",
+	new_bus->name = "GPIO Bitbanged MDIO",
 
-	ret = mdio_ofgpio_bitbang_init(new_bus, ofdev->node);
-	if (ret)
-		goto out_free_bus;
+	ret = -ENODEV;
 
-	new_bus->phy_mask = ~0;
-	new_bus->irq = kmalloc(sizeof(int) * PHY_MAX_ADDR, GFP_KERNEL);
-	if (!new_bus->irq)
+	new_bus->phy_mask = pdata->phy_mask;
+	new_bus->irq = pdata->irqs;
+	new_bus->parent = dev;
+
+	if (new_bus->phy_mask == ~0)
 		goto out_free_bus;
 
 	for (i = 0; i < PHY_MAX_ADDR; i++)
-		new_bus->irq[i] = -1;
+		if (!new_bus->irq[i])
+			new_bus->irq[i] = PHY_POLL;
 
-	while ((np = of_get_next_child(ofdev->node, np)))
-		if (!strcmp(np->type, "ethernet-phy"))
-			add_phy(new_bus, np);
+	snprintf(new_bus->id, MII_BUS_ID_SIZE, "%x", bus_id);
+
+	if (gpio_request(bitbang->mdc, "mdc"))
+		goto out_free_bus;
 
-	new_bus->parent = &ofdev->dev;
-	dev_set_drvdata(&ofdev->dev, new_bus);
+	if (gpio_request(bitbang->mdio, "mdio"))
+		goto out_free_mdc;
+
+	dev_set_drvdata(dev, new_bus);
 
 	ret = mdiobus_register(new_bus);
 	if (ret)
-		goto out_free_irqs;
+		goto out_free_all;
 
 	return 0;
 
-out_free_irqs:
-	dev_set_drvdata(&ofdev->dev, NULL);
-	kfree(new_bus->irq);
+out_free_all:
+	dev_set_drvdata(dev, NULL);
+	gpio_free(bitbang->mdio);
+out_free_mdc:
+	gpio_free(bitbang->mdc);
 out_free_bus:
 	free_mdio_bitbang(new_bus);
 out_free_bitbang:
@@ -162,16 +146,86 @@ out:
 	return ret;
 }
 
-static int mdio_ofgpio_remove(struct of_device *ofdev)
+static void __devexit mdio_gpio_bus_destroy(struct device *dev)
 {
-	struct mii_bus *bus = dev_get_drvdata(&ofdev->dev);
+	struct mii_bus *bus = dev_get_drvdata(dev);
 	struct mdio_gpio_info *bitbang = bus->priv;
 
 	mdiobus_unregister(bus);
-	kfree(bus->irq);
 	free_mdio_bitbang(bus);
-	dev_set_drvdata(&ofdev->dev, NULL);
+	dev_set_drvdata(dev, NULL);
+	gpio_free(bitbang->mdc);
+	gpio_free(bitbang->mdio);
 	kfree(bitbang);
+}
+
+static int __devinit mdio_gpio_probe(struct platform_device *pdev)
+{
+	struct mdio_gpio_platform_data *pdata = pdev->dev.platform_data;
+
+	if (!pdata)
+		return -ENODEV;
+
+	return mdio_gpio_bus_init(&pdev->dev, pdata, pdev->id);
+}
+
+static int __devexit mdio_gpio_remove(struct platform_device *pdev)
+{
+	mdio_gpio_bus_destroy(&pdev->dev);
+
+	return 0;
+}
+
+#ifdef CONFIG_OF_GPIO
+static void __devinit add_phy(struct mdio_gpio_platform_data *pdata,
+			      struct device_node *np)
+{
+	const u32 *data;
+	int len, id, irq;
+
+	data = of_get_property(np, "reg", &len);
+	if (!data || len != 4)
+		return;
+
+	id = *data;
+	pdata->phy_mask &= ~(1 << id);
+
+	irq = of_irq_to_resource(np, 0, NULL);
+	if (irq)
+		pdata->irqs[id] = irq;
+}
+
+static int __devinit mdio_ofgpio_probe(struct of_device *ofdev,
+                                        const struct of_device_id *match)
+{
+	struct device_node *np = NULL;
+	struct mdio_gpio_platform_data *pdata;
+
+	pdata = kzalloc(sizeof(*pdata), GFP_KERNEL);
+	if (!pdata)
+		return -ENOMEM;
+
+	pdata->mdc = of_get_gpio(ofdev->node, 0);
+	pdata->mdio = of_get_gpio(ofdev->node, 1);
+
+	if (pdata->mdc < 0 || pdata->mdio < 0)
+		goto out_free;
+
+	while ((np = of_get_next_child(ofdev->node, np)))
+		if (!strcmp(np->type, "ethernet-phy"))
+			add_phy(pdata, np);
+
+	return mdio_gpio_bus_init(&ofdev->dev, pdata, pdata->mdc);
+
+out_free:
+	kfree(pdata);
+	return -ENODEV;
+}
+
+static int __devexit mdio_ofgpio_remove(struct of_device *ofdev)
+{
+	mdio_gpio_bus_destroy(&ofdev->dev);
+	kfree(ofdev->dev.platform_data);
 
 	return 0;
 }
@@ -187,18 +241,56 @@ static struct of_platform_driver mdio_ofgpio_driver = {
 	.name = "mdio-gpio",
 	.match_table = mdio_ofgpio_match,
 	.probe = mdio_ofgpio_probe,
-	.remove = mdio_ofgpio_remove,
+	.remove = __devexit_p(mdio_ofgpio_remove),
 };
 
-static int mdio_ofgpio_init(void)
+static inline int __init mdio_ofgpio_init(void)
 {
 	return of_register_platform_driver(&mdio_ofgpio_driver);
 }
 
-static void mdio_ofgpio_exit(void)
+static inline void __exit mdio_ofgpio_exit(void)
 {
 	of_unregister_platform_driver(&mdio_ofgpio_driver);
 }
+#else
+static inline int __init mdio_ofgpio_init(void) { return 0; }
+static inline void __exit mdio_ofgpio_exit(void) { }
+#endif /* CONFIG_OF_GPIO */
+
+static struct platform_driver mdio_gpio_driver = {
+	.probe = mdio_gpio_probe,
+	.remove = __devexit_p(mdio_gpio_remove),
+	.driver		= {
+		.name	= "mdio-gpio",
+		.owner	= THIS_MODULE,
+	},
+};
+
+static int __init mdio_gpio_init(void)
+{
+	int ret;
+
+	ret = mdio_ofgpio_init();
+	if (ret)
+		return ret;
+
+	ret = platform_driver_register(&mdio_gpio_driver);
+	if (ret)
+		mdio_ofgpio_exit();
+
+	return ret;
+}
+module_init(mdio_gpio_init);
+
+static void __exit mdio_gpio_exit(void)
+{
+	platform_driver_unregister(&mdio_gpio_driver);
+	mdio_ofgpio_exit();
+}
+module_exit(mdio_gpio_exit);
 
-module_init(mdio_ofgpio_init);
-module_exit(mdio_ofgpio_exit);
+MODULE_ALIAS("platform:mdio-gpio");
+MODULE_AUTHOR("Laurent Pinchart, Paulius Zaleckas");
+MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("Generic driver for MDIO bus emulation using GPIO");
diff --git a/include/linux/mdio-gpio.h b/include/linux/mdio-gpio.h
new file mode 100644
index 000000000000..e9d3fdfe41d7
--- /dev/null
+++ b/include/linux/mdio-gpio.h
@@ -0,0 +1,25 @@
+/*
+ * MDIO-GPIO bus platform data structures
+ *
+ * Copyright (C) 2008, Paulius Zaleckas <paulius.zaleckas@teltonika.lt>
+ *
+ * This file is licensed under the terms of the GNU General Public License
+ * version 2. This program is licensed "as is" without any warranty of any
+ * kind, whether express or implied.
+ */
+
+#ifndef __LINUX_MDIO_GPIO_H
+#define __LINUX_MDIO_GPIO_H
+
+#include <linux/mdio-bitbang.h>
+
+struct mdio_gpio_platform_data {
+	/* GPIO numbers for bus pins */
+	unsigned int mdc;
+	unsigned int mdio;
+
+	unsigned int phy_mask;
+	int irqs[PHY_MAX_ADDR];
+};
+
+#endif /* __LINUX_MDIO_GPIO_H */
-- 
cgit v1.2.3


From e8b2dfe9b4501ed0047459b2756ba26e5a940a69 Mon Sep 17 00:00:00 2001
From: Balazs Scheidler <bazsi@balabit.hu>
Date: Sun, 16 Nov 2008 19:32:39 -0800
Subject: TPROXY: implemented IP_RECVORIGDSTADDR socket option

In case UDP traffic is redirected to a local UDP socket,
the originally addressed destination address/port
cannot be recovered with the in-kernel tproxy.

This patch adds an IP_RECVORIGDSTADDR sockopt that enables
a IP_ORIGDSTADDR ancillary message in recvmsg(). This
ancillary message contains the original destination address/port
of the packet being received.

Signed-off-by: Balazs Scheidler <bazsi@balabit.hu>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/in.h     |  4 ++++
 net/ipv4/ip_sockglue.c | 40 +++++++++++++++++++++++++++++++++++++++-
 2 files changed, 43 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/in.h b/include/linux/in.h
index db458beef19d..d60122a3a088 100644
--- a/include/linux/in.h
+++ b/include/linux/in.h
@@ -80,6 +80,10 @@ struct in_addr {
 /* BSD compatibility */
 #define IP_RECVRETOPTS	IP_RETOPTS
 
+/* TProxy original addresses */
+#define IP_ORIGDSTADDR       20
+#define IP_RECVORIGDSTADDR   IP_ORIGDSTADDR
+
 /* IP_MTU_DISCOVER values */
 #define IP_PMTUDISC_DONT		0	/* Never send DF frames */
 #define IP_PMTUDISC_WANT		1	/* Use per route hints	*/
diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c
index e976efeb1456..624b534201e2 100644
--- a/net/ipv4/ip_sockglue.c
+++ b/net/ipv4/ip_sockglue.c
@@ -48,6 +48,7 @@
 #define IP_CMSG_RECVOPTS	8
 #define IP_CMSG_RETOPTS		16
 #define IP_CMSG_PASSSEC		32
+#define IP_CMSG_ORIGDSTADDR     64
 
 /*
  *	SOL_IP control messages.
@@ -126,6 +127,27 @@ static void ip_cmsg_recv_security(struct msghdr *msg, struct sk_buff *skb)
 	security_release_secctx(secdata, seclen);
 }
 
+void ip_cmsg_recv_dstaddr(struct msghdr *msg, struct sk_buff *skb)
+{
+	struct sockaddr_in sin;
+	struct iphdr *iph = ip_hdr(skb);
+	u16 *ports = (u16 *) skb_transport_header(skb);
+
+	if (skb_transport_offset(skb) + 4 > skb->len)
+		return;
+
+	/* All current transport protocols have the port numbers in the
+	 * first four bytes of the transport header and this function is
+	 * written with this assumption in mind.
+	 */
+
+	sin.sin_family = AF_INET;
+	sin.sin_addr.s_addr = iph->daddr;
+	sin.sin_port = ports[1];
+	memset(sin.sin_zero, 0, sizeof(sin.sin_zero));
+
+	put_cmsg(msg, SOL_IP, IP_ORIGDSTADDR, sizeof(sin), &sin);
+}
 
 void ip_cmsg_recv(struct msghdr *msg, struct sk_buff *skb)
 {
@@ -160,6 +182,12 @@ void ip_cmsg_recv(struct msghdr *msg, struct sk_buff *skb)
 
 	if (flags & 1)
 		ip_cmsg_recv_security(msg, skb);
+
+	if ((flags>>=1) == 0)
+		return;
+	if (flags & 1)
+		ip_cmsg_recv_dstaddr(msg, skb);
+
 }
 
 int ip_cmsg_send(struct net *net, struct msghdr *msg, struct ipcm_cookie *ipc)
@@ -421,7 +449,8 @@ static int do_ip_setsockopt(struct sock *sk, int level,
 			     (1<<IP_ROUTER_ALERT) | (1<<IP_FREEBIND) |
 			     (1<<IP_PASSSEC) | (1<<IP_TRANSPARENT))) ||
 	    optname == IP_MULTICAST_TTL ||
-	    optname == IP_MULTICAST_LOOP) {
+	    optname == IP_MULTICAST_LOOP ||
+	    optname == IP_RECVORIGDSTADDR) {
 		if (optlen >= sizeof(int)) {
 			if (get_user(val, (int __user *) optval))
 				return -EFAULT;
@@ -509,6 +538,12 @@ static int do_ip_setsockopt(struct sock *sk, int level,
 		else
 			inet->cmsg_flags &= ~IP_CMSG_PASSSEC;
 		break;
+	case IP_RECVORIGDSTADDR:
+		if (val)
+			inet->cmsg_flags |= IP_CMSG_ORIGDSTADDR;
+		else
+			inet->cmsg_flags &= ~IP_CMSG_ORIGDSTADDR;
+		break;
 	case IP_TOS:	/* This sets both TOS and Precedence */
 		if (sk->sk_type == SOCK_STREAM) {
 			val &= ~3;
@@ -1022,6 +1057,9 @@ static int do_ip_getsockopt(struct sock *sk, int level, int optname,
 	case IP_PASSSEC:
 		val = (inet->cmsg_flags & IP_CMSG_PASSSEC) != 0;
 		break;
+	case IP_RECVORIGDSTADDR:
+		val = (inet->cmsg_flags & IP_CMSG_ORIGDSTADDR) != 0;
+		break;
 	case IP_TOS:
 		val = inet->tos;
 		break;
-- 
cgit v1.2.3


From bbaffaca4810de1a25e32ecaf836eeaacc7a3d11 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <dada1@cosmosbay.com>
Date: Sun, 16 Nov 2008 19:37:55 -0800
Subject: rcu: Introduce hlist_nulls variant of hlist

hlist uses NULL value to finish a chain.

hlist_nulls variant use the low order bit set to 1 to signal an end-of-list marker.

This allows to store many different end markers, so that some RCU lockless
algos (used in TCP/UDP stack for example) can save some memory barriers in
fast paths.

Two new files are added :

include/linux/list_nulls.h
  - mimics hlist part of include/linux/list.h, derived to hlist_nulls variant

include/linux/rculist_nulls.h
  - mimics hlist part of include/linux/rculist.h, derived to hlist_nulls variant

   Only four helpers are declared for the moment :

     hlist_nulls_del_init_rcu(), hlist_nulls_del_rcu(),
     hlist_nulls_add_head_rcu() and hlist_nulls_for_each_entry_rcu()

prefetches() were removed, since an end of list is not anymore NULL value.
prefetches() could trigger useless (and possibly dangerous) memory transactions.

Example of use (extracted from __udp4_lib_lookup())

	struct sock *sk, *result;
        struct hlist_nulls_node *node;
        unsigned short hnum = ntohs(dport);
        unsigned int hash = udp_hashfn(net, hnum);
        struct udp_hslot *hslot = &udptable->hash[hash];
        int score, badness;

        rcu_read_lock();
begin:
        result = NULL;
        badness = -1;
        sk_nulls_for_each_rcu(sk, node, &hslot->head) {
                score = compute_score(sk, net, saddr, hnum, sport,
                                      daddr, dport, dif);
                if (score > badness) {
                        result = sk;
                        badness = score;
                }
        }
        /*
         * if the nulls value we got at the end of this lookup is
         * not the expected one, we must restart lookup.
         * We probably met an item that was moved to another chain.
         */
        if (get_nulls_value(node) != hash)
                goto begin;

        if (result) {
                if (unlikely(!atomic_inc_not_zero(&result->sk_refcnt)))
                        result = NULL;
                else if (unlikely(compute_score(result, net, saddr, hnum, sport,
                                  daddr, dport, dif) < badness)) {
                        sock_put(result);
                        goto begin;
                }
        }
        rcu_read_unlock();
        return result;

Signed-off-by: Eric Dumazet <dada1@cosmosbay.com>
Acked-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/list_nulls.h    |  94 ++++++++++++++++++++++++++++++++++++
 include/linux/rculist_nulls.h | 110 ++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 204 insertions(+)
 create mode 100644 include/linux/list_nulls.h
 create mode 100644 include/linux/rculist_nulls.h

(limited to 'include')

diff --git a/include/linux/list_nulls.h b/include/linux/list_nulls.h
new file mode 100644
index 000000000000..93150ecf3ea4
--- /dev/null
+++ b/include/linux/list_nulls.h
@@ -0,0 +1,94 @@
+#ifndef _LINUX_LIST_NULLS_H
+#define _LINUX_LIST_NULLS_H
+
+/*
+ * Special version of lists, where end of list is not a NULL pointer,
+ * but a 'nulls' marker, which can have many different values.
+ * (up to 2^31 different values guaranteed on all platforms)
+ *
+ * In the standard hlist, termination of a list is the NULL pointer.
+ * In this special 'nulls' variant, we use the fact that objects stored in
+ * a list are aligned on a word (4 or 8 bytes alignment).
+ * We therefore use the last significant bit of 'ptr' :
+ * Set to 1 : This is a 'nulls' end-of-list marker (ptr >> 1)
+ * Set to 0 : This is a pointer to some object (ptr)
+ */
+
+struct hlist_nulls_head {
+	struct hlist_nulls_node *first;
+};
+
+struct hlist_nulls_node {
+	struct hlist_nulls_node *next, **pprev;
+};
+#define INIT_HLIST_NULLS_HEAD(ptr, nulls) \
+	((ptr)->first = (struct hlist_nulls_node *) (1UL | (((long)nulls) << 1)))
+
+#define hlist_nulls_entry(ptr, type, member) container_of(ptr,type,member)
+/**
+ * ptr_is_a_nulls - Test if a ptr is a nulls
+ * @ptr: ptr to be tested
+ *
+ */
+static inline int is_a_nulls(const struct hlist_nulls_node *ptr)
+{
+	return ((unsigned long)ptr & 1);
+}
+
+/**
+ * get_nulls_value - Get the 'nulls' value of the end of chain
+ * @ptr: end of chain
+ *
+ * Should be called only if is_a_nulls(ptr);
+ */
+static inline unsigned long get_nulls_value(const struct hlist_nulls_node *ptr)
+{
+	return ((unsigned long)ptr) >> 1;
+}
+
+static inline int hlist_nulls_unhashed(const struct hlist_nulls_node *h)
+{
+	return !h->pprev;
+}
+
+static inline int hlist_nulls_empty(const struct hlist_nulls_head *h)
+{
+	return is_a_nulls(h->first);
+}
+
+static inline void __hlist_nulls_del(struct hlist_nulls_node *n)
+{
+	struct hlist_nulls_node *next = n->next;
+	struct hlist_nulls_node **pprev = n->pprev;
+	*pprev = next;
+	if (!is_a_nulls(next))
+		next->pprev = pprev;
+}
+
+/**
+ * hlist_nulls_for_each_entry	- iterate over list of given type
+ * @tpos:	the type * to use as a loop cursor.
+ * @pos:	the &struct hlist_node to use as a loop cursor.
+ * @head:	the head for your list.
+ * @member:	the name of the hlist_node within the struct.
+ *
+ */
+#define hlist_nulls_for_each_entry(tpos, pos, head, member)		       \
+	for (pos = (head)->first;					       \
+	     (!is_a_nulls(pos)) &&					       \
+		({ tpos = hlist_nulls_entry(pos, typeof(*tpos), member); 1;}); \
+	     pos = pos->next)
+
+/**
+ * hlist_nulls_for_each_entry_from - iterate over a hlist continuing from current point
+ * @tpos:	the type * to use as a loop cursor.
+ * @pos:	the &struct hlist_node to use as a loop cursor.
+ * @member:	the name of the hlist_node within the struct.
+ *
+ */
+#define hlist_nulls_for_each_entry_from(tpos, pos, member)	\
+	for (; (!is_a_nulls(pos)) && 				\
+		({ tpos = hlist_nulls_entry(pos, typeof(*tpos), member); 1;}); \
+	     pos = pos->next)
+
+#endif
diff --git a/include/linux/rculist_nulls.h b/include/linux/rculist_nulls.h
new file mode 100644
index 000000000000..f9ddd03961a8
--- /dev/null
+++ b/include/linux/rculist_nulls.h
@@ -0,0 +1,110 @@
+#ifndef _LINUX_RCULIST_NULLS_H
+#define _LINUX_RCULIST_NULLS_H
+
+#ifdef __KERNEL__
+
+/*
+ * RCU-protected list version
+ */
+#include <linux/list_nulls.h>
+#include <linux/rcupdate.h>
+
+/**
+ * hlist_nulls_del_init_rcu - deletes entry from hash list with re-initialization
+ * @n: the element to delete from the hash list.
+ *
+ * Note: hlist_nulls_unhashed() on the node return true after this. It is
+ * useful for RCU based read lockfree traversal if the writer side
+ * must know if the list entry is still hashed or already unhashed.
+ *
+ * In particular, it means that we can not poison the forward pointers
+ * that may still be used for walking the hash list and we can only
+ * zero the pprev pointer so list_unhashed() will return true after
+ * this.
+ *
+ * The caller must take whatever precautions are necessary (such as
+ * holding appropriate locks) to avoid racing with another
+ * list-mutation primitive, such as hlist_nulls_add_head_rcu() or
+ * hlist_nulls_del_rcu(), running on this same list.  However, it is
+ * perfectly legal to run concurrently with the _rcu list-traversal
+ * primitives, such as hlist_nulls_for_each_entry_rcu().
+ */
+static inline void hlist_nulls_del_init_rcu(struct hlist_nulls_node *n)
+{
+	if (!hlist_nulls_unhashed(n)) {
+		__hlist_nulls_del(n);
+		n->pprev = NULL;
+	}
+}
+
+/**
+ * hlist_nulls_del_rcu - deletes entry from hash list without re-initialization
+ * @n: the element to delete from the hash list.
+ *
+ * Note: hlist_nulls_unhashed() on entry does not return true after this,
+ * the entry is in an undefined state. It is useful for RCU based
+ * lockfree traversal.
+ *
+ * In particular, it means that we can not poison the forward
+ * pointers that may still be used for walking the hash list.
+ *
+ * The caller must take whatever precautions are necessary
+ * (such as holding appropriate locks) to avoid racing
+ * with another list-mutation primitive, such as hlist_nulls_add_head_rcu()
+ * or hlist_nulls_del_rcu(), running on this same list.
+ * However, it is perfectly legal to run concurrently with
+ * the _rcu list-traversal primitives, such as
+ * hlist_nulls_for_each_entry().
+ */
+static inline void hlist_nulls_del_rcu(struct hlist_nulls_node *n)
+{
+	__hlist_nulls_del(n);
+	n->pprev = LIST_POISON2;
+}
+
+/**
+ * hlist_nulls_add_head_rcu
+ * @n: the element to add to the hash list.
+ * @h: the list to add to.
+ *
+ * Description:
+ * Adds the specified element to the specified hlist_nulls,
+ * while permitting racing traversals.
+ *
+ * The caller must take whatever precautions are necessary
+ * (such as holding appropriate locks) to avoid racing
+ * with another list-mutation primitive, such as hlist_nulls_add_head_rcu()
+ * or hlist_nulls_del_rcu(), running on this same list.
+ * However, it is perfectly legal to run concurrently with
+ * the _rcu list-traversal primitives, such as
+ * hlist_nulls_for_each_entry_rcu(), used to prevent memory-consistency
+ * problems on Alpha CPUs.  Regardless of the type of CPU, the
+ * list-traversal primitive must be guarded by rcu_read_lock().
+ */
+static inline void hlist_nulls_add_head_rcu(struct hlist_nulls_node *n,
+					struct hlist_nulls_head *h)
+{
+	struct hlist_nulls_node *first = h->first;
+
+	n->next = first;
+	n->pprev = &h->first;
+	rcu_assign_pointer(h->first, n);
+	if (!is_a_nulls(first))
+		first->pprev = &n->next;
+}
+/**
+ * hlist_nulls_for_each_entry_rcu - iterate over rcu list of given type
+ * @tpos:	the type * to use as a loop cursor.
+ * @pos:	the &struct hlist_nulls_node to use as a loop cursor.
+ * @head:	the head for your list.
+ * @member:	the name of the hlist_nulls_node within the struct.
+ *
+ */
+#define hlist_nulls_for_each_entry_rcu(tpos, pos, head, member) \
+	for (pos = rcu_dereference((head)->first);			 \
+		(!is_a_nulls(pos)) && 			\
+		({ tpos = hlist_nulls_entry(pos, typeof(*tpos), member); 1; }); \
+		pos = rcu_dereference(pos->next))
+
+#endif
+#endif
-- 
cgit v1.2.3


From 88ab1932eac721c6e7336708558fa5ed02c85c80 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <dada1@cosmosbay.com>
Date: Sun, 16 Nov 2008 19:39:21 -0800
Subject: udp: Use hlist_nulls in UDP RCU code

This is a straightforward patch, using hlist_nulls infrastructure.

RCUification already done on UDP two weeks ago.

Using hlist_nulls permits us to avoid some memory barriers, both
at lookup time and delete time.

Patch is large because it adds new macros to include/net/sock.h.
These macros will be used by TCP & DCCP in next patch.

Signed-off-by: Eric Dumazet <dada1@cosmosbay.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/rculist.h | 17 ---------------
 include/net/sock.h      | 57 +++++++++++++++++++++++++++++++++++++++----------
 include/net/udp.h       |  2 +-
 net/ipv4/udp.c          | 47 +++++++++++++++++++---------------------
 net/ipv6/udp.c          | 26 +++++++++++-----------
 5 files changed, 83 insertions(+), 66 deletions(-)

(limited to 'include')

diff --git a/include/linux/rculist.h b/include/linux/rculist.h
index 3ba2998b22ba..e649bd3f2c97 100644
--- a/include/linux/rculist.h
+++ b/include/linux/rculist.h
@@ -383,22 +383,5 @@ static inline void hlist_add_after_rcu(struct hlist_node *prev,
 		({ tpos = hlist_entry(pos, typeof(*tpos), member); 1; }); \
 		pos = rcu_dereference(pos->next))
 
-/**
- * hlist_for_each_entry_rcu_safenext - iterate over rcu list of given type
- * @tpos:	the type * to use as a loop cursor.
- * @pos:	the &struct hlist_node to use as a loop cursor.
- * @head:	the head for your list.
- * @member:	the name of the hlist_node within the struct.
- * @next:       the &struct hlist_node to use as a next cursor
- *
- * Special version of hlist_for_each_entry_rcu that make sure
- * each next pointer is fetched before each iteration.
- */
-#define hlist_for_each_entry_rcu_safenext(tpos, pos, head, member, next) \
-	for (pos = rcu_dereference((head)->first);			 \
-		pos && ({ next = pos->next; smp_rmb(); prefetch(next); 1; }) &&	\
-		({ tpos = hlist_entry(pos, typeof(*tpos), member); 1; }); \
-		pos = rcu_dereference(next))
-
 #endif	/* __KERNEL__ */
 #endif
diff --git a/include/net/sock.h b/include/net/sock.h
index 8b2b82131b67..0a638948868d 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -42,6 +42,7 @@
 
 #include <linux/kernel.h>
 #include <linux/list.h>
+#include <linux/list_nulls.h>
 #include <linux/timer.h>
 #include <linux/cache.h>
 #include <linux/module.h>
@@ -52,6 +53,7 @@
 #include <linux/security.h>
 
 #include <linux/filter.h>
+#include <linux/rculist_nulls.h>
 
 #include <asm/atomic.h>
 #include <net/dst.h>
@@ -106,6 +108,7 @@ struct net;
  *	@skc_reuse: %SO_REUSEADDR setting
  *	@skc_bound_dev_if: bound device index if != 0
  *	@skc_node: main hash linkage for various protocol lookup tables
+ *	@skc_nulls_node: main hash linkage for UDP/UDP-Lite protocol
  *	@skc_bind_node: bind hash linkage for various protocol lookup tables
  *	@skc_refcnt: reference count
  *	@skc_hash: hash value used with various protocol lookup tables
@@ -120,7 +123,10 @@ struct sock_common {
 	volatile unsigned char	skc_state;
 	unsigned char		skc_reuse;
 	int			skc_bound_dev_if;
-	struct hlist_node	skc_node;
+	union {
+		struct hlist_node	skc_node;
+		struct hlist_nulls_node skc_nulls_node;
+	};
 	struct hlist_node	skc_bind_node;
 	atomic_t		skc_refcnt;
 	unsigned int		skc_hash;
@@ -206,6 +212,7 @@ struct sock {
 #define sk_reuse		__sk_common.skc_reuse
 #define sk_bound_dev_if		__sk_common.skc_bound_dev_if
 #define sk_node			__sk_common.skc_node
+#define sk_nulls_node		__sk_common.skc_nulls_node
 #define sk_bind_node		__sk_common.skc_bind_node
 #define sk_refcnt		__sk_common.skc_refcnt
 #define sk_hash			__sk_common.skc_hash
@@ -300,12 +307,30 @@ static inline struct sock *sk_head(const struct hlist_head *head)
 	return hlist_empty(head) ? NULL : __sk_head(head);
 }
 
+static inline struct sock *__sk_nulls_head(const struct hlist_nulls_head *head)
+{
+	return hlist_nulls_entry(head->first, struct sock, sk_nulls_node);
+}
+
+static inline struct sock *sk_nulls_head(const struct hlist_nulls_head *head)
+{
+	return hlist_nulls_empty(head) ? NULL : __sk_nulls_head(head);
+}
+
 static inline struct sock *sk_next(const struct sock *sk)
 {
 	return sk->sk_node.next ?
 		hlist_entry(sk->sk_node.next, struct sock, sk_node) : NULL;
 }
 
+static inline struct sock *sk_nulls_next(const struct sock *sk)
+{
+	return (!is_a_nulls(sk->sk_nulls_node.next)) ?
+		hlist_nulls_entry(sk->sk_nulls_node.next,
+				  struct sock, sk_nulls_node) :
+		NULL;
+}
+
 static inline int sk_unhashed(const struct sock *sk)
 {
 	return hlist_unhashed(&sk->sk_node);
@@ -321,6 +346,11 @@ static __inline__ void sk_node_init(struct hlist_node *node)
 	node->pprev = NULL;
 }
 
+static __inline__ void sk_nulls_node_init(struct hlist_nulls_node *node)
+{
+	node->pprev = NULL;
+}
+
 static __inline__ void __sk_del_node(struct sock *sk)
 {
 	__hlist_del(&sk->sk_node);
@@ -367,18 +397,18 @@ static __inline__ int sk_del_node_init(struct sock *sk)
 	return rc;
 }
 
-static __inline__ int __sk_del_node_init_rcu(struct sock *sk)
+static __inline__ int __sk_nulls_del_node_init_rcu(struct sock *sk)
 {
 	if (sk_hashed(sk)) {
-		hlist_del_init_rcu(&sk->sk_node);
+		hlist_nulls_del_init_rcu(&sk->sk_nulls_node);
 		return 1;
 	}
 	return 0;
 }
 
-static __inline__ int sk_del_node_init_rcu(struct sock *sk)
+static __inline__ int sk_nulls_del_node_init_rcu(struct sock *sk)
 {
-	int rc = __sk_del_node_init_rcu(sk);
+	int rc = __sk_nulls_del_node_init_rcu(sk);
 
 	if (rc) {
 		/* paranoid for a while -acme */
@@ -399,15 +429,15 @@ static __inline__ void sk_add_node(struct sock *sk, struct hlist_head *list)
 	__sk_add_node(sk, list);
 }
 
-static __inline__ void __sk_add_node_rcu(struct sock *sk, struct hlist_head *list)
+static __inline__ void __sk_nulls_add_node_rcu(struct sock *sk, struct hlist_nulls_head *list)
 {
-	hlist_add_head_rcu(&sk->sk_node, list);
+	hlist_nulls_add_head_rcu(&sk->sk_nulls_node, list);
 }
 
-static __inline__ void sk_add_node_rcu(struct sock *sk, struct hlist_head *list)
+static __inline__ void sk_nulls_add_node_rcu(struct sock *sk, struct hlist_nulls_head *list)
 {
 	sock_hold(sk);
-	__sk_add_node_rcu(sk, list);
+	__sk_nulls_add_node_rcu(sk, list);
 }
 
 static __inline__ void __sk_del_bind_node(struct sock *sk)
@@ -423,11 +453,16 @@ static __inline__ void sk_add_bind_node(struct sock *sk,
 
 #define sk_for_each(__sk, node, list) \
 	hlist_for_each_entry(__sk, node, list, sk_node)
-#define sk_for_each_rcu_safenext(__sk, node, list, next) \
-	hlist_for_each_entry_rcu_safenext(__sk, node, list, sk_node, next)
+#define sk_nulls_for_each(__sk, node, list) \
+	hlist_nulls_for_each_entry(__sk, node, list, sk_nulls_node)
+#define sk_nulls_for_each_rcu(__sk, node, list) \
+	hlist_nulls_for_each_entry_rcu(__sk, node, list, sk_nulls_node)
 #define sk_for_each_from(__sk, node) \
 	if (__sk && ({ node = &(__sk)->sk_node; 1; })) \
 		hlist_for_each_entry_from(__sk, node, sk_node)
+#define sk_nulls_for_each_from(__sk, node) \
+	if (__sk && ({ node = &(__sk)->sk_nulls_node; 1; })) \
+		hlist_nulls_for_each_entry_from(__sk, node, sk_nulls_node)
 #define sk_for_each_continue(__sk, node) \
 	if (__sk && ({ node = &(__sk)->sk_node; 1; })) \
 		hlist_for_each_entry_continue(__sk, node, sk_node)
diff --git a/include/net/udp.h b/include/net/udp.h
index df2bfe545374..90e6ce56be65 100644
--- a/include/net/udp.h
+++ b/include/net/udp.h
@@ -51,7 +51,7 @@ struct udp_skb_cb {
 #define UDP_SKB_CB(__skb)	((struct udp_skb_cb *)((__skb)->cb))
 
 struct udp_hslot {
-	struct hlist_head	head;
+	struct hlist_nulls_head	head;
 	spinlock_t		lock;
 } __attribute__((aligned(2 * sizeof(long))));
 struct udp_table {
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index 54badc9a019d..fea2d873dd41 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -127,9 +127,9 @@ static int udp_lib_lport_inuse(struct net *net, __u16 num,
 						 const struct sock *sk2))
 {
 	struct sock *sk2;
-	struct hlist_node *node;
+	struct hlist_nulls_node *node;
 
-	sk_for_each(sk2, node, &hslot->head)
+	sk_nulls_for_each(sk2, node, &hslot->head)
 		if (net_eq(sock_net(sk2), net)			&&
 		    sk2 != sk					&&
 		    sk2->sk_hash == num				&&
@@ -189,12 +189,7 @@ int udp_lib_get_port(struct sock *sk, unsigned short snum,
 	inet_sk(sk)->num = snum;
 	sk->sk_hash = snum;
 	if (sk_unhashed(sk)) {
-		/*
-		 * We need that previous write to sk->sk_hash committed
-		 * before write to sk->next done in following add_node() variant
-		 */
-		smp_wmb();
-		sk_add_node_rcu(sk, &hslot->head);
+		sk_nulls_add_node_rcu(sk, &hslot->head);
 		sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1);
 	}
 	error = 0;
@@ -261,7 +256,7 @@ static struct sock *__udp4_lib_lookup(struct net *net, __be32 saddr,
 		int dif, struct udp_table *udptable)
 {
 	struct sock *sk, *result;
-	struct hlist_node *node, *next;
+	struct hlist_nulls_node *node;
 	unsigned short hnum = ntohs(dport);
 	unsigned int hash = udp_hashfn(net, hnum);
 	struct udp_hslot *hslot = &udptable->hash[hash];
@@ -271,13 +266,7 @@ static struct sock *__udp4_lib_lookup(struct net *net, __be32 saddr,
 begin:
 	result = NULL;
 	badness = -1;
-	sk_for_each_rcu_safenext(sk, node, &hslot->head, next) {
-		/*
-		 * lockless reader, and SLAB_DESTROY_BY_RCU items:
-		 * We must check this item was not moved to another chain
-		 */
-		if (udp_hashfn(net, sk->sk_hash) != hash)
-			goto begin;
+	sk_nulls_for_each_rcu(sk, node, &hslot->head) {
 		score = compute_score(sk, net, saddr, hnum, sport,
 				      daddr, dport, dif);
 		if (score > badness) {
@@ -285,6 +274,14 @@ begin:
 			badness = score;
 		}
 	}
+	/*
+	 * if the nulls value we got at the end of this lookup is
+	 * not the expected one, we must restart lookup.
+	 * We probably met an item that was moved to another chain.
+	 */
+	if (get_nulls_value(node) != hash)
+		goto begin;
+
 	if (result) {
 		if (unlikely(!atomic_inc_not_zero(&result->sk_refcnt)))
 			result = NULL;
@@ -325,11 +322,11 @@ static inline struct sock *udp_v4_mcast_next(struct net *net, struct sock *sk,
 					     __be16 rmt_port, __be32 rmt_addr,
 					     int dif)
 {
-	struct hlist_node *node;
+	struct hlist_nulls_node *node;
 	struct sock *s = sk;
 	unsigned short hnum = ntohs(loc_port);
 
-	sk_for_each_from(s, node) {
+	sk_nulls_for_each_from(s, node) {
 		struct inet_sock *inet = inet_sk(s);
 
 		if (!net_eq(sock_net(s), net)				||
@@ -977,7 +974,7 @@ void udp_lib_unhash(struct sock *sk)
 	struct udp_hslot *hslot = &udptable->hash[hash];
 
 	spin_lock_bh(&hslot->lock);
-	if (sk_del_node_init_rcu(sk)) {
+	if (sk_nulls_del_node_init_rcu(sk)) {
 		inet_sk(sk)->num = 0;
 		sock_prot_inuse_add(sock_net(sk), sk->sk_prot, -1);
 	}
@@ -1130,7 +1127,7 @@ static int __udp4_lib_mcast_deliver(struct net *net, struct sk_buff *skb,
 	int dif;
 
 	spin_lock(&hslot->lock);
-	sk = sk_head(&hslot->head);
+	sk = sk_nulls_head(&hslot->head);
 	dif = skb->dev->ifindex;
 	sk = udp_v4_mcast_next(net, sk, uh->dest, daddr, uh->source, saddr, dif);
 	if (sk) {
@@ -1139,7 +1136,7 @@ static int __udp4_lib_mcast_deliver(struct net *net, struct sk_buff *skb,
 		do {
 			struct sk_buff *skb1 = skb;
 
-			sknext = udp_v4_mcast_next(net, sk_next(sk), uh->dest,
+			sknext = udp_v4_mcast_next(net, sk_nulls_next(sk), uh->dest,
 						   daddr, uh->source, saddr,
 						   dif);
 			if (sknext)
@@ -1560,10 +1557,10 @@ static struct sock *udp_get_first(struct seq_file *seq, int start)
 	struct net *net = seq_file_net(seq);
 
 	for (state->bucket = start; state->bucket < UDP_HTABLE_SIZE; ++state->bucket) {
-		struct hlist_node *node;
+		struct hlist_nulls_node *node;
 		struct udp_hslot *hslot = &state->udp_table->hash[state->bucket];
 		spin_lock_bh(&hslot->lock);
-		sk_for_each(sk, node, &hslot->head) {
+		sk_nulls_for_each(sk, node, &hslot->head) {
 			if (!net_eq(sock_net(sk), net))
 				continue;
 			if (sk->sk_family == state->family)
@@ -1582,7 +1579,7 @@ static struct sock *udp_get_next(struct seq_file *seq, struct sock *sk)
 	struct net *net = seq_file_net(seq);
 
 	do {
-		sk = sk_next(sk);
+		sk = sk_nulls_next(sk);
 	} while (sk && (!net_eq(sock_net(sk), net) || sk->sk_family != state->family));
 
 	if (!sk) {
@@ -1753,7 +1750,7 @@ void __init udp_table_init(struct udp_table *table)
 	int i;
 
 	for (i = 0; i < UDP_HTABLE_SIZE; i++) {
-		INIT_HLIST_HEAD(&table->hash[i].head);
+		INIT_HLIST_NULLS_HEAD(&table->hash[i].head, i);
 		spin_lock_init(&table->hash[i].lock);
 	}
 }
diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c
index 8dafa36b1ba5..fd2d9ad4a8a3 100644
--- a/net/ipv6/udp.c
+++ b/net/ipv6/udp.c
@@ -98,7 +98,7 @@ static struct sock *__udp6_lib_lookup(struct net *net,
 				      int dif, struct udp_table *udptable)
 {
 	struct sock *sk, *result;
-	struct hlist_node *node, *next;
+	struct hlist_nulls_node *node;
 	unsigned short hnum = ntohs(dport);
 	unsigned int hash = udp_hashfn(net, hnum);
 	struct udp_hslot *hslot = &udptable->hash[hash];
@@ -108,19 +108,21 @@ static struct sock *__udp6_lib_lookup(struct net *net,
 begin:
 	result = NULL;
 	badness = -1;
-	sk_for_each_rcu_safenext(sk, node, &hslot->head, next) {
-		/*
-		 * lockless reader, and SLAB_DESTROY_BY_RCU items:
-		 * We must check this item was not moved to another chain
-		 */
-		if (udp_hashfn(net, sk->sk_hash) != hash)
-			goto begin;
+	sk_nulls_for_each_rcu(sk, node, &hslot->head) {
 		score = compute_score(sk, net, hnum, saddr, sport, daddr, dport, dif);
 		if (score > badness) {
 			result = sk;
 			badness = score;
 		}
 	}
+	/*
+	 * if the nulls value we got at the end of this lookup is
+	 * not the expected one, we must restart lookup.
+	 * We probably met an item that was moved to another chain.
+	 */
+	if (get_nulls_value(node) != hash)
+		goto begin;
+
 	if (result) {
 		if (unlikely(!atomic_inc_not_zero(&result->sk_refcnt)))
 			result = NULL;
@@ -374,11 +376,11 @@ static struct sock *udp_v6_mcast_next(struct net *net, struct sock *sk,
 				      __be16 rmt_port, struct in6_addr *rmt_addr,
 				      int dif)
 {
-	struct hlist_node *node;
+	struct hlist_nulls_node *node;
 	struct sock *s = sk;
 	unsigned short num = ntohs(loc_port);
 
-	sk_for_each_from(s, node) {
+	sk_nulls_for_each_from(s, node) {
 		struct inet_sock *inet = inet_sk(s);
 
 		if (!net_eq(sock_net(s), net))
@@ -423,7 +425,7 @@ static int __udp6_lib_mcast_deliver(struct net *net, struct sk_buff *skb,
 	int dif;
 
 	spin_lock(&hslot->lock);
-	sk = sk_head(&hslot->head);
+	sk = sk_nulls_head(&hslot->head);
 	dif = inet6_iif(skb);
 	sk = udp_v6_mcast_next(net, sk, uh->dest, daddr, uh->source, saddr, dif);
 	if (!sk) {
@@ -432,7 +434,7 @@ static int __udp6_lib_mcast_deliver(struct net *net, struct sk_buff *skb,
 	}
 
 	sk2 = sk;
-	while ((sk2 = udp_v6_mcast_next(net, sk_next(sk2), uh->dest, daddr,
+	while ((sk2 = udp_v6_mcast_next(net, sk_nulls_next(sk2), uh->dest, daddr,
 					uh->source, saddr, dif))) {
 		struct sk_buff *buff = skb_clone(skb, GFP_ATOMIC);
 		if (buff) {
-- 
cgit v1.2.3


From 3ab5aee7fe840b5b1b35a8d1ac11c3de5281e611 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <dada1@cosmosbay.com>
Date: Sun, 16 Nov 2008 19:40:17 -0800
Subject: net: Convert TCP & DCCP hash tables to use RCU / hlist_nulls

RCU was added to UDP lookups, using a fast infrastructure :
- sockets kmem_cache use SLAB_DESTROY_BY_RCU and dont pay the
  price of call_rcu() at freeing time.
- hlist_nulls permits to use few memory barriers.

This patch uses same infrastructure for TCP/DCCP established
and timewait sockets.

Thanks to SLAB_DESTROY_BY_RCU, no slowdown for applications
using short lived TCP connections. A followup patch, converting
rwlocks to spinlocks will even speedup this case.

__inet_lookup_established() is pretty fast now we dont have to
dirty a contended cache line (read_lock/read_unlock)

Only established and timewait hashtable are converted to RCU
(bind table and listen table are still using traditional locking)

Signed-off-by: Eric Dumazet <dada1@cosmosbay.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/inet_hashtables.h    |  4 +--
 include/net/inet_timewait_sock.h | 10 +++---
 net/core/sock.c                  |  4 ++-
 net/dccp/ipv4.c                  |  1 +
 net/dccp/ipv6.c                  |  1 +
 net/dccp/proto.c                 |  4 +--
 net/ipv4/inet_diag.c             |  7 ++--
 net/ipv4/inet_hashtables.c       | 78 ++++++++++++++++++++++++++++------------
 net/ipv4/inet_timewait_sock.c    | 26 ++++++++------
 net/ipv4/tcp.c                   |  4 +--
 net/ipv4/tcp_ipv4.c              | 25 ++++++-------
 net/ipv6/inet6_hashtables.c      | 70 ++++++++++++++++++++++++------------
 net/ipv6/tcp_ipv6.c              |  1 +
 13 files changed, 151 insertions(+), 84 deletions(-)

(limited to 'include')

diff --git a/include/net/inet_hashtables.h b/include/net/inet_hashtables.h
index cb31fbf8ae2a..481896045111 100644
--- a/include/net/inet_hashtables.h
+++ b/include/net/inet_hashtables.h
@@ -41,8 +41,8 @@
  * I'll experiment with dynamic table growth later.
  */
 struct inet_ehash_bucket {
-	struct hlist_head chain;
-	struct hlist_head twchain;
+	struct hlist_nulls_head chain;
+	struct hlist_nulls_head twchain;
 };
 
 /* There are a few simple rules, which allow for local port reuse by
diff --git a/include/net/inet_timewait_sock.h b/include/net/inet_timewait_sock.h
index 80e4977631b8..4b8ece22b8e9 100644
--- a/include/net/inet_timewait_sock.h
+++ b/include/net/inet_timewait_sock.h
@@ -110,7 +110,7 @@ struct inet_timewait_sock {
 #define tw_state		__tw_common.skc_state
 #define tw_reuse		__tw_common.skc_reuse
 #define tw_bound_dev_if		__tw_common.skc_bound_dev_if
-#define tw_node			__tw_common.skc_node
+#define tw_node			__tw_common.skc_nulls_node
 #define tw_bind_node		__tw_common.skc_bind_node
 #define tw_refcnt		__tw_common.skc_refcnt
 #define tw_hash			__tw_common.skc_hash
@@ -137,10 +137,10 @@ struct inet_timewait_sock {
 	struct hlist_node	tw_death_node;
 };
 
-static inline void inet_twsk_add_node(struct inet_timewait_sock *tw,
-				      struct hlist_head *list)
+static inline void inet_twsk_add_node_rcu(struct inet_timewait_sock *tw,
+				      struct hlist_nulls_head *list)
 {
-	hlist_add_head(&tw->tw_node, list);
+	hlist_nulls_add_head_rcu(&tw->tw_node, list);
 }
 
 static inline void inet_twsk_add_bind_node(struct inet_timewait_sock *tw,
@@ -175,7 +175,7 @@ static inline int inet_twsk_del_dead_node(struct inet_timewait_sock *tw)
 }
 
 #define inet_twsk_for_each(tw, node, head) \
-	hlist_for_each_entry(tw, node, head, tw_node)
+	hlist_nulls_for_each_entry(tw, node, head, tw_node)
 
 #define inet_twsk_for_each_inmate(tw, node, jail) \
 	hlist_for_each_entry(tw, node, jail, tw_death_node)
diff --git a/net/core/sock.c b/net/core/sock.c
index ded1eb5d2fd4..38de9c3f563b 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -2082,7 +2082,9 @@ int proto_register(struct proto *prot, int alloc_slab)
 			prot->twsk_prot->twsk_slab =
 				kmem_cache_create(timewait_sock_slab_name,
 						  prot->twsk_prot->twsk_obj_size,
-						  0, SLAB_HWCACHE_ALIGN,
+						  0,
+						  SLAB_HWCACHE_ALIGN |
+							prot->slab_flags,
 						  NULL);
 			if (prot->twsk_prot->twsk_slab == NULL)
 				goto out_free_timewait_sock_slab_name;
diff --git a/net/dccp/ipv4.c b/net/dccp/ipv4.c
index 528baa2e5be4..d1dd95289b89 100644
--- a/net/dccp/ipv4.c
+++ b/net/dccp/ipv4.c
@@ -938,6 +938,7 @@ static struct proto dccp_v4_prot = {
 	.orphan_count		= &dccp_orphan_count,
 	.max_header		= MAX_DCCP_HEADER,
 	.obj_size		= sizeof(struct dccp_sock),
+	.slab_flags		= SLAB_DESTROY_BY_RCU,
 	.rsk_prot		= &dccp_request_sock_ops,
 	.twsk_prot		= &dccp_timewait_sock_ops,
 	.h.hashinfo		= &dccp_hashinfo,
diff --git a/net/dccp/ipv6.c b/net/dccp/ipv6.c
index 4aa1148cdb20..f033e845bb07 100644
--- a/net/dccp/ipv6.c
+++ b/net/dccp/ipv6.c
@@ -1140,6 +1140,7 @@ static struct proto dccp_v6_prot = {
 	.orphan_count	   = &dccp_orphan_count,
 	.max_header	   = MAX_DCCP_HEADER,
 	.obj_size	   = sizeof(struct dccp6_sock),
+	.slab_flags	   = SLAB_DESTROY_BY_RCU,
 	.rsk_prot	   = &dccp6_request_sock_ops,
 	.twsk_prot	   = &dccp6_timewait_sock_ops,
 	.h.hashinfo	   = &dccp_hashinfo,
diff --git a/net/dccp/proto.c b/net/dccp/proto.c
index 46cb3490d48e..1117d4d8c8f1 100644
--- a/net/dccp/proto.c
+++ b/net/dccp/proto.c
@@ -1090,8 +1090,8 @@ static int __init dccp_init(void)
 	}
 
 	for (i = 0; i < dccp_hashinfo.ehash_size; i++) {
-		INIT_HLIST_HEAD(&dccp_hashinfo.ehash[i].chain);
-		INIT_HLIST_HEAD(&dccp_hashinfo.ehash[i].twchain);
+		INIT_HLIST_NULLS_HEAD(&dccp_hashinfo.ehash[i].chain, i);
+		INIT_HLIST_NULLS_HEAD(&dccp_hashinfo.ehash[i].twchain, i);
 	}
 
 	if (inet_ehash_locks_alloc(&dccp_hashinfo))
diff --git a/net/ipv4/inet_diag.c b/net/ipv4/inet_diag.c
index 564230dabcb8..41b36720e977 100644
--- a/net/ipv4/inet_diag.c
+++ b/net/ipv4/inet_diag.c
@@ -778,18 +778,19 @@ skip_listen_ht:
 		struct inet_ehash_bucket *head = &hashinfo->ehash[i];
 		rwlock_t *lock = inet_ehash_lockp(hashinfo, i);
 		struct sock *sk;
-		struct hlist_node *node;
+		struct hlist_nulls_node *node;
 
 		num = 0;
 
-		if (hlist_empty(&head->chain) && hlist_empty(&head->twchain))
+		if (hlist_nulls_empty(&head->chain) &&
+			hlist_nulls_empty(&head->twchain))
 			continue;
 
 		if (i > s_i)
 			s_num = 0;
 
 		read_lock_bh(lock);
-		sk_for_each(sk, node, &head->chain) {
+		sk_nulls_for_each(sk, node, &head->chain) {
 			struct inet_sock *inet = inet_sk(sk);
 
 			if (num < s_num)
diff --git a/net/ipv4/inet_hashtables.c b/net/ipv4/inet_hashtables.c
index be41ebbec4eb..fd269cfef0ec 100644
--- a/net/ipv4/inet_hashtables.c
+++ b/net/ipv4/inet_hashtables.c
@@ -223,35 +223,65 @@ struct sock * __inet_lookup_established(struct net *net,
 	INET_ADDR_COOKIE(acookie, saddr, daddr)
 	const __portpair ports = INET_COMBINED_PORTS(sport, hnum);
 	struct sock *sk;
-	const struct hlist_node *node;
+	const struct hlist_nulls_node *node;
 	/* Optimize here for direct hit, only listening connections can
 	 * have wildcards anyways.
 	 */
 	unsigned int hash = inet_ehashfn(net, daddr, hnum, saddr, sport);
-	struct inet_ehash_bucket *head = inet_ehash_bucket(hashinfo, hash);
-	rwlock_t *lock = inet_ehash_lockp(hashinfo, hash);
+	unsigned int slot = hash & (hashinfo->ehash_size - 1);
+	struct inet_ehash_bucket *head = &hashinfo->ehash[slot];
 
-	prefetch(head->chain.first);
-	read_lock(lock);
-	sk_for_each(sk, node, &head->chain) {
+	rcu_read_lock();
+begin:
+	sk_nulls_for_each_rcu(sk, node, &head->chain) {
 		if (INET_MATCH(sk, net, hash, acookie,
-					saddr, daddr, ports, dif))
-			goto hit; /* You sunk my battleship! */
+					saddr, daddr, ports, dif)) {
+			if (unlikely(!atomic_inc_not_zero(&sk->sk_refcnt)))
+				goto begintw;
+			if (unlikely(!INET_MATCH(sk, net, hash, acookie,
+				saddr, daddr, ports, dif))) {
+				sock_put(sk);
+				goto begin;
+			}
+			goto out;
+		}
 	}
+	/*
+	 * if the nulls value we got at the end of this lookup is
+	 * not the expected one, we must restart lookup.
+	 * We probably met an item that was moved to another chain.
+	 */
+	if (get_nulls_value(node) != slot)
+		goto begin;
 
+begintw:
 	/* Must check for a TIME_WAIT'er before going to listener hash. */
-	sk_for_each(sk, node, &head->twchain) {
+	sk_nulls_for_each_rcu(sk, node, &head->twchain) {
 		if (INET_TW_MATCH(sk, net, hash, acookie,
-					saddr, daddr, ports, dif))
-			goto hit;
+					saddr, daddr, ports, dif)) {
+			if (unlikely(!atomic_inc_not_zero(&sk->sk_refcnt))) {
+				sk = NULL;
+				goto out;
+			}
+			if (unlikely(!INET_TW_MATCH(sk, net, hash, acookie,
+				 saddr, daddr, ports, dif))) {
+				sock_put(sk);
+				goto begintw;
+			}
+			goto out;
+		}
 	}
+	/*
+	 * if the nulls value we got at the end of this lookup is
+	 * not the expected one, we must restart lookup.
+	 * We probably met an item that was moved to another chain.
+	 */
+	if (get_nulls_value(node) != slot)
+		goto begintw;
 	sk = NULL;
 out:
-	read_unlock(lock);
+	rcu_read_unlock();
 	return sk;
-hit:
-	sock_hold(sk);
-	goto out;
 }
 EXPORT_SYMBOL_GPL(__inet_lookup_established);
 
@@ -272,14 +302,14 @@ static int __inet_check_established(struct inet_timewait_death_row *death_row,
 	struct inet_ehash_bucket *head = inet_ehash_bucket(hinfo, hash);
 	rwlock_t *lock = inet_ehash_lockp(hinfo, hash);
 	struct sock *sk2;
-	const struct hlist_node *node;
+	const struct hlist_nulls_node *node;
 	struct inet_timewait_sock *tw;
 
 	prefetch(head->chain.first);
 	write_lock(lock);
 
 	/* Check TIME-WAIT sockets first. */
-	sk_for_each(sk2, node, &head->twchain) {
+	sk_nulls_for_each(sk2, node, &head->twchain) {
 		tw = inet_twsk(sk2);
 
 		if (INET_TW_MATCH(sk2, net, hash, acookie,
@@ -293,7 +323,7 @@ static int __inet_check_established(struct inet_timewait_death_row *death_row,
 	tw = NULL;
 
 	/* And established part... */
-	sk_for_each(sk2, node, &head->chain) {
+	sk_nulls_for_each(sk2, node, &head->chain) {
 		if (INET_MATCH(sk2, net, hash, acookie,
 					saddr, daddr, ports, dif))
 			goto not_unique;
@@ -306,7 +336,7 @@ unique:
 	inet->sport = htons(lport);
 	sk->sk_hash = hash;
 	WARN_ON(!sk_unhashed(sk));
-	__sk_add_node(sk, &head->chain);
+	__sk_nulls_add_node_rcu(sk, &head->chain);
 	sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1);
 	write_unlock(lock);
 
@@ -338,7 +368,7 @@ static inline u32 inet_sk_port_offset(const struct sock *sk)
 void __inet_hash_nolisten(struct sock *sk)
 {
 	struct inet_hashinfo *hashinfo = sk->sk_prot->h.hashinfo;
-	struct hlist_head *list;
+	struct hlist_nulls_head *list;
 	rwlock_t *lock;
 	struct inet_ehash_bucket *head;
 
@@ -350,7 +380,7 @@ void __inet_hash_nolisten(struct sock *sk)
 	lock = inet_ehash_lockp(hashinfo, sk->sk_hash);
 
 	write_lock(lock);
-	__sk_add_node(sk, list);
+	__sk_nulls_add_node_rcu(sk, list);
 	sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1);
 	write_unlock(lock);
 }
@@ -400,13 +430,15 @@ void inet_unhash(struct sock *sk)
 		local_bh_disable();
 		inet_listen_wlock(hashinfo);
 		lock = &hashinfo->lhash_lock;
+		if (__sk_del_node_init(sk))
+			sock_prot_inuse_add(sock_net(sk), sk->sk_prot, -1);
 	} else {
 		lock = inet_ehash_lockp(hashinfo, sk->sk_hash);
 		write_lock_bh(lock);
+		if (__sk_nulls_del_node_init_rcu(sk))
+			sock_prot_inuse_add(sock_net(sk), sk->sk_prot, -1);
 	}
 
-	if (__sk_del_node_init(sk))
-		sock_prot_inuse_add(sock_net(sk), sk->sk_prot, -1);
 	write_unlock_bh(lock);
 out:
 	if (sk->sk_state == TCP_LISTEN)
diff --git a/net/ipv4/inet_timewait_sock.c b/net/ipv4/inet_timewait_sock.c
index 1c5fd38f8824..60689951ecdb 100644
--- a/net/ipv4/inet_timewait_sock.c
+++ b/net/ipv4/inet_timewait_sock.c
@@ -23,12 +23,12 @@ static void __inet_twsk_kill(struct inet_timewait_sock *tw,
 	rwlock_t *lock = inet_ehash_lockp(hashinfo, tw->tw_hash);
 
 	write_lock(lock);
-	if (hlist_unhashed(&tw->tw_node)) {
+	if (hlist_nulls_unhashed(&tw->tw_node)) {
 		write_unlock(lock);
 		return;
 	}
-	__hlist_del(&tw->tw_node);
-	sk_node_init(&tw->tw_node);
+	hlist_nulls_del_rcu(&tw->tw_node);
+	sk_nulls_node_init(&tw->tw_node);
 	write_unlock(lock);
 
 	/* Disassociate with bind bucket. */
@@ -92,13 +92,17 @@ void __inet_twsk_hashdance(struct inet_timewait_sock *tw, struct sock *sk,
 
 	write_lock(lock);
 
-	/* Step 2: Remove SK from established hash. */
-	if (__sk_del_node_init(sk))
-		sock_prot_inuse_add(sock_net(sk), sk->sk_prot, -1);
-
-	/* Step 3: Hash TW into TIMEWAIT chain. */
-	inet_twsk_add_node(tw, &ehead->twchain);
+	/*
+	 * Step 2: Hash TW into TIMEWAIT chain.
+	 * Should be done before removing sk from established chain
+	 * because readers are lockless and search established first.
+	 */
 	atomic_inc(&tw->tw_refcnt);
+	inet_twsk_add_node_rcu(tw, &ehead->twchain);
+
+	/* Step 3: Remove SK from established hash. */
+	if (__sk_nulls_del_node_init_rcu(sk))
+		sock_prot_inuse_add(sock_net(sk), sk->sk_prot, -1);
 
 	write_unlock(lock);
 }
@@ -416,7 +420,7 @@ void inet_twsk_purge(struct net *net, struct inet_hashinfo *hashinfo,
 {
 	struct inet_timewait_sock *tw;
 	struct sock *sk;
-	struct hlist_node *node;
+	struct hlist_nulls_node *node;
 	int h;
 
 	local_bh_disable();
@@ -426,7 +430,7 @@ void inet_twsk_purge(struct net *net, struct inet_hashinfo *hashinfo,
 		rwlock_t *lock = inet_ehash_lockp(hashinfo, h);
 restart:
 		write_lock(lock);
-		sk_for_each(sk, node, &head->twchain) {
+		sk_nulls_for_each(sk, node, &head->twchain) {
 
 			tw = inet_twsk(sk);
 			if (!net_eq(twsk_net(tw), net) ||
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index f60a5917e54d..044224a341eb 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -2707,8 +2707,8 @@ void __init tcp_init(void)
 					thash_entries ? 0 : 512 * 1024);
 	tcp_hashinfo.ehash_size = 1 << tcp_hashinfo.ehash_size;
 	for (i = 0; i < tcp_hashinfo.ehash_size; i++) {
-		INIT_HLIST_HEAD(&tcp_hashinfo.ehash[i].chain);
-		INIT_HLIST_HEAD(&tcp_hashinfo.ehash[i].twchain);
+		INIT_HLIST_NULLS_HEAD(&tcp_hashinfo.ehash[i].chain, i);
+		INIT_HLIST_NULLS_HEAD(&tcp_hashinfo.ehash[i].twchain, i);
 	}
 	if (inet_ehash_locks_alloc(&tcp_hashinfo))
 		panic("TCP: failed to alloc ehash_locks");
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index d49233f409b5..b2e3ab2287ba 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -1857,16 +1857,16 @@ EXPORT_SYMBOL(tcp_v4_destroy_sock);
 #ifdef CONFIG_PROC_FS
 /* Proc filesystem TCP sock list dumping. */
 
-static inline struct inet_timewait_sock *tw_head(struct hlist_head *head)
+static inline struct inet_timewait_sock *tw_head(struct hlist_nulls_head *head)
 {
-	return hlist_empty(head) ? NULL :
+	return hlist_nulls_empty(head) ? NULL :
 		list_entry(head->first, struct inet_timewait_sock, tw_node);
 }
 
 static inline struct inet_timewait_sock *tw_next(struct inet_timewait_sock *tw)
 {
-	return tw->tw_node.next ?
-		hlist_entry(tw->tw_node.next, typeof(*tw), tw_node) : NULL;
+	return !is_a_nulls(tw->tw_node.next) ?
+		hlist_nulls_entry(tw->tw_node.next, typeof(*tw), tw_node) : NULL;
 }
 
 static void *listening_get_next(struct seq_file *seq, void *cur)
@@ -1954,8 +1954,8 @@ static void *listening_get_idx(struct seq_file *seq, loff_t *pos)
 
 static inline int empty_bucket(struct tcp_iter_state *st)
 {
-	return hlist_empty(&tcp_hashinfo.ehash[st->bucket].chain) &&
-		hlist_empty(&tcp_hashinfo.ehash[st->bucket].twchain);
+	return hlist_nulls_empty(&tcp_hashinfo.ehash[st->bucket].chain) &&
+		hlist_nulls_empty(&tcp_hashinfo.ehash[st->bucket].twchain);
 }
 
 static void *established_get_first(struct seq_file *seq)
@@ -1966,7 +1966,7 @@ static void *established_get_first(struct seq_file *seq)
 
 	for (st->bucket = 0; st->bucket < tcp_hashinfo.ehash_size; ++st->bucket) {
 		struct sock *sk;
-		struct hlist_node *node;
+		struct hlist_nulls_node *node;
 		struct inet_timewait_sock *tw;
 		rwlock_t *lock = inet_ehash_lockp(&tcp_hashinfo, st->bucket);
 
@@ -1975,7 +1975,7 @@ static void *established_get_first(struct seq_file *seq)
 			continue;
 
 		read_lock_bh(lock);
-		sk_for_each(sk, node, &tcp_hashinfo.ehash[st->bucket].chain) {
+		sk_nulls_for_each(sk, node, &tcp_hashinfo.ehash[st->bucket].chain) {
 			if (sk->sk_family != st->family ||
 			    !net_eq(sock_net(sk), net)) {
 				continue;
@@ -2004,7 +2004,7 @@ static void *established_get_next(struct seq_file *seq, void *cur)
 {
 	struct sock *sk = cur;
 	struct inet_timewait_sock *tw;
-	struct hlist_node *node;
+	struct hlist_nulls_node *node;
 	struct tcp_iter_state *st = seq->private;
 	struct net *net = seq_file_net(seq);
 
@@ -2032,11 +2032,11 @@ get_tw:
 			return NULL;
 
 		read_lock_bh(inet_ehash_lockp(&tcp_hashinfo, st->bucket));
-		sk = sk_head(&tcp_hashinfo.ehash[st->bucket].chain);
+		sk = sk_nulls_head(&tcp_hashinfo.ehash[st->bucket].chain);
 	} else
-		sk = sk_next(sk);
+		sk = sk_nulls_next(sk);
 
-	sk_for_each_from(sk, node) {
+	sk_nulls_for_each_from(sk, node) {
 		if (sk->sk_family == st->family && net_eq(sock_net(sk), net))
 			goto found;
 	}
@@ -2375,6 +2375,7 @@ struct proto tcp_prot = {
 	.sysctl_rmem		= sysctl_tcp_rmem,
 	.max_header		= MAX_TCP_HEADER,
 	.obj_size		= sizeof(struct tcp_sock),
+	.slab_flags		= SLAB_DESTROY_BY_RCU,
 	.twsk_prot		= &tcp_timewait_sock_ops,
 	.rsk_prot		= &tcp_request_sock_ops,
 	.h.hashinfo		= &tcp_hashinfo,
diff --git a/net/ipv6/inet6_hashtables.c b/net/ipv6/inet6_hashtables.c
index 1646a5658255..c1b4d401fd95 100644
--- a/net/ipv6/inet6_hashtables.c
+++ b/net/ipv6/inet6_hashtables.c
@@ -25,24 +25,28 @@
 void __inet6_hash(struct sock *sk)
 {
 	struct inet_hashinfo *hashinfo = sk->sk_prot->h.hashinfo;
-	struct hlist_head *list;
 	rwlock_t *lock;
 
 	WARN_ON(!sk_unhashed(sk));
 
 	if (sk->sk_state == TCP_LISTEN) {
+		struct hlist_head *list;
+
 		list = &hashinfo->listening_hash[inet_sk_listen_hashfn(sk)];
 		lock = &hashinfo->lhash_lock;
 		inet_listen_wlock(hashinfo);
+		__sk_add_node(sk, list);
 	} else {
 		unsigned int hash;
+		struct hlist_nulls_head *list;
+
 		sk->sk_hash = hash = inet6_sk_ehashfn(sk);
 		list = &inet_ehash_bucket(hashinfo, hash)->chain;
 		lock = inet_ehash_lockp(hashinfo, hash);
 		write_lock(lock);
+		__sk_nulls_add_node_rcu(sk, list);
 	}
 
-	__sk_add_node(sk, list);
 	sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1);
 	write_unlock(lock);
 }
@@ -63,33 +67,53 @@ struct sock *__inet6_lookup_established(struct net *net,
 					   const int dif)
 {
 	struct sock *sk;
-	const struct hlist_node *node;
+	const struct hlist_nulls_node *node;
 	const __portpair ports = INET_COMBINED_PORTS(sport, hnum);
 	/* Optimize here for direct hit, only listening connections can
 	 * have wildcards anyways.
 	 */
 	unsigned int hash = inet6_ehashfn(net, daddr, hnum, saddr, sport);
-	struct inet_ehash_bucket *head = inet_ehash_bucket(hashinfo, hash);
-	rwlock_t *lock = inet_ehash_lockp(hashinfo, hash);
+	unsigned int slot = hash & (hashinfo->ehash_size - 1);
+	struct inet_ehash_bucket *head = &hashinfo->ehash[slot];
 
-	prefetch(head->chain.first);
-	read_lock(lock);
-	sk_for_each(sk, node, &head->chain) {
+
+	rcu_read_lock();
+begin:
+	sk_nulls_for_each_rcu(sk, node, &head->chain) {
 		/* For IPV6 do the cheaper port and family tests first. */
-		if (INET6_MATCH(sk, net, hash, saddr, daddr, ports, dif))
-			goto hit; /* You sunk my battleship! */
+		if (INET6_MATCH(sk, net, hash, saddr, daddr, ports, dif)) {
+			if (unlikely(!atomic_inc_not_zero(&sk->sk_refcnt)))
+				goto begintw;
+			if (!INET6_MATCH(sk, net, hash, saddr, daddr, ports, dif)) {
+				sock_put(sk);
+				goto begin;
+			}
+		goto out;
+		}
 	}
+	if (get_nulls_value(node) != slot)
+		goto begin;
+
+begintw:
 	/* Must check for a TIME_WAIT'er before going to listener hash. */
-	sk_for_each(sk, node, &head->twchain) {
-		if (INET6_TW_MATCH(sk, net, hash, saddr, daddr, ports, dif))
-			goto hit;
+	sk_nulls_for_each_rcu(sk, node, &head->twchain) {
+		if (INET6_TW_MATCH(sk, net, hash, saddr, daddr, ports, dif)) {
+			if (unlikely(!atomic_inc_not_zero(&sk->sk_refcnt))) {
+				sk = NULL;
+				goto out;
+			}
+			if (!INET6_TW_MATCH(sk, net, hash, saddr, daddr, ports, dif)) {
+				sock_put(sk);
+				goto begintw;
+			}
+			goto out;
+		}
 	}
-	read_unlock(lock);
-	return NULL;
-
-hit:
-	sock_hold(sk);
-	read_unlock(lock);
+	if (get_nulls_value(node) != slot)
+		goto begintw;
+	sk = NULL;
+out:
+	rcu_read_unlock();
 	return sk;
 }
 EXPORT_SYMBOL(__inet6_lookup_established);
@@ -172,14 +196,14 @@ static int __inet6_check_established(struct inet_timewait_death_row *death_row,
 	struct inet_ehash_bucket *head = inet_ehash_bucket(hinfo, hash);
 	rwlock_t *lock = inet_ehash_lockp(hinfo, hash);
 	struct sock *sk2;
-	const struct hlist_node *node;
+	const struct hlist_nulls_node *node;
 	struct inet_timewait_sock *tw;
 
 	prefetch(head->chain.first);
 	write_lock(lock);
 
 	/* Check TIME-WAIT sockets first. */
-	sk_for_each(sk2, node, &head->twchain) {
+	sk_nulls_for_each(sk2, node, &head->twchain) {
 		tw = inet_twsk(sk2);
 
 		if (INET6_TW_MATCH(sk2, net, hash, saddr, daddr, ports, dif)) {
@@ -192,7 +216,7 @@ static int __inet6_check_established(struct inet_timewait_death_row *death_row,
 	tw = NULL;
 
 	/* And established part... */
-	sk_for_each(sk2, node, &head->chain) {
+	sk_nulls_for_each(sk2, node, &head->chain) {
 		if (INET6_MATCH(sk2, net, hash, saddr, daddr, ports, dif))
 			goto not_unique;
 	}
@@ -203,7 +227,7 @@ unique:
 	inet->num = lport;
 	inet->sport = htons(lport);
 	WARN_ON(!sk_unhashed(sk));
-	__sk_add_node(sk, &head->chain);
+	__sk_nulls_add_node_rcu(sk, &head->chain);
 	sk->sk_hash = hash;
 	sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1);
 	write_unlock(lock);
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index 984276463a8d..b35787056313 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -2043,6 +2043,7 @@ struct proto tcpv6_prot = {
 	.sysctl_rmem		= sysctl_tcp_rmem,
 	.max_header		= MAX_TCP_HEADER,
 	.obj_size		= sizeof(struct tcp6_sock),
+	.slab_flags		= SLAB_DESTROY_BY_RCU,
 	.twsk_prot		= &tcp6_timewait_sock_ops,
 	.rsk_prot		= &tcp6_request_sock_ops,
 	.h.hashinfo		= &tcp_hashinfo,
-- 
cgit v1.2.3


From 5635c10d976716ef47ae441998aeae144c7e7387 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <dada1@cosmosbay.com>
Date: Sun, 16 Nov 2008 19:46:36 -0800
Subject: net: make sure struct dst_entry refcount is aligned on 64 bytes

As found in the past (commit f1dd9c379cac7d5a76259e7dffcd5f8edc697d17
[NET]: Fix tbench regression in 2.6.25-rc1), it is really
important that struct dst_entry refcount is aligned on a cache line.

We cannot use __atribute((aligned)), so manually pad the structure
for 32 and 64 bit arches.

for 32bit : offsetof(truct dst_entry, __refcnt) is 0x80
for 64bit : offsetof(truct dst_entry, __refcnt) is 0xc0

As it is not possible to guess at compile time cache line size,
we use a generic value of 64 bytes, that satisfies many current arches.
(Using 128 bytes alignment on 64bit arches would waste 64 bytes)

Add a BUILD_BUG_ON to catch future updates to "struct dst_entry" dont
break this alignment.

"tbench 8" is 4.4 % faster on a dual quad core (HP BL460c G1), Intel E5450 @3.00GHz
(2350 MB/s instead of 2250 MB/s)

Signed-off-by: Eric Dumazet <dada1@cosmosbay.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/dst.h | 19 +++++++++++++++++++
 1 file changed, 19 insertions(+)

(limited to 'include')

diff --git a/include/net/dst.h b/include/net/dst.h
index 65a60fab2f82..6c778799bf10 100644
--- a/include/net/dst.h
+++ b/include/net/dst.h
@@ -61,6 +61,8 @@ struct dst_entry
 	struct hh_cache		*hh;
 #ifdef CONFIG_XFRM
 	struct xfrm_state	*xfrm;
+#else
+	void			*__pad1;
 #endif
 	int			(*input)(struct sk_buff*);
 	int			(*output)(struct sk_buff*);
@@ -71,8 +73,20 @@ struct dst_entry
 
 #ifdef CONFIG_NET_CLS_ROUTE
 	__u32			tclassid;
+#else
+	__u32			__pad2;
 #endif
 
+
+	/*
+	 * Align __refcnt to a 64 bytes alignment
+	 * (L1_CACHE_SIZE would be too much)
+	 */
+#ifdef CONFIG_64BIT
+	long			__pad_to_align_refcnt[2];
+#else
+	long			__pad_to_align_refcnt[1];
+#endif
 	/*
 	 * __refcnt wants to be on a different cache line from
 	 * input/output/ops or performance tanks badly
@@ -157,6 +171,11 @@ dst_metric_locked(struct dst_entry *dst, int metric)
 
 static inline void dst_hold(struct dst_entry * dst)
 {
+	/*
+	 * If your kernel compilation stops here, please check
+	 * __pad_to_align_refcnt declaration in struct dst_entry
+	 */
+	BUILD_BUG_ON(offsetof(struct dst_entry, __refcnt) & 63);
 	atomic_inc(&dst->__refcnt);
 }
 
-- 
cgit v1.2.3


From 3f2c31d90327f21d76d296af34aa4ca547932ff4 Mon Sep 17 00:00:00 2001
From: Mark McLoughlin <markmc@redhat.com>
Date: Sun, 16 Nov 2008 22:41:34 -0800
Subject: virtio_net: VIRTIO_NET_F_MSG_RXBUF (imprive rcv buffer allocation)

If segmentation offload is enabled by the host, we currently allocate
maximum sized packet buffers and pass them to the host. This uses up
20 ring entries, allowing us to supply only 20 packet buffers to the
host with a 256 entry ring. This is a huge overhead when receiving
small packets, and is most keenly felt when receiving MTU sized
packets from off-host.

The VIRTIO_NET_F_MRG_RXBUF feature flag is set by hosts which support
using receive buffers which are smaller than the maximum packet size.
In order to transfer large packets to the guest, the host merges
together multiple receive buffers to form a larger logical buffer.
The number of merged buffers is returned to the guest via a field in
the virtio_net_hdr.

Make use of this support by supplying single page receive buffers to
the host. On receive, we extract the virtio_net_hdr, copy 128 bytes of
the payload to the skb's linear data buffer and adjust the fragment
offset to point to the remaining data. This ensures proper alignment
and allows us to not use any paged data for small packets. If the
payload occupies multiple pages, we simply append those pages as
fragments and free the associated skbs.

This scheme allows us to be efficient in our use of ring entries
while still supporting large packets. Benchmarking using netperf from
an external machine to a guest over a 10Gb/s network shows a 100%
improvement from ~1Gb/s to ~2Gb/s. With a local host->guest benchmark
with GSO disabled on the host side, throughput was seen to increase
from 700Mb/s to 1.7Gb/s.

Based on a patch from Herbert Xu.

Signed-off-by: Mark McLoughlin <markmc@redhat.com>
Signed-off-by: Rusty Russell <rusty@rustcorp.com.au> (use netdev_priv)
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/virtio_net.c   | 173 +++++++++++++++++++++++++++++++++++++++------
 include/linux/virtio_net.h |   9 +++
 2 files changed, 162 insertions(+), 20 deletions(-)

(limited to 'include')

diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
index 27559c987d47..e6b5d6ef9ea8 100644
--- a/drivers/net/virtio_net.c
+++ b/drivers/net/virtio_net.c
@@ -34,6 +34,7 @@ module_param(gso, bool, 0444);
 
 /* FIXME: MTU in config. */
 #define MAX_PACKET_LEN (ETH_HLEN+ETH_DATA_LEN)
+#define GOOD_COPY_LEN	128
 
 struct virtnet_info
 {
@@ -58,6 +59,9 @@ struct virtnet_info
 	/* I like... big packets and I cannot lie! */
 	bool big_packets;
 
+	/* Host will merge rx buffers for big packets (shake it! shake it!) */
+	bool mergeable_rx_bufs;
+
 	/* Receive & send queues. */
 	struct sk_buff_head recv;
 	struct sk_buff_head send;
@@ -66,16 +70,11 @@ struct virtnet_info
 	struct page *pages;
 };
 
-static inline struct virtio_net_hdr *skb_vnet_hdr(struct sk_buff *skb)
+static inline void *skb_vnet_hdr(struct sk_buff *skb)
 {
 	return (struct virtio_net_hdr *)skb->cb;
 }
 
-static inline void vnet_hdr_to_sg(struct scatterlist *sg, struct sk_buff *skb)
-{
-	sg_init_one(sg, skb_vnet_hdr(skb), sizeof(struct virtio_net_hdr));
-}
-
 static void give_a_page(struct virtnet_info *vi, struct page *page)
 {
 	page->private = (unsigned long)vi->pages;
@@ -121,25 +120,97 @@ static void skb_xmit_done(struct virtqueue *svq)
 static void receive_skb(struct net_device *dev, struct sk_buff *skb,
 			unsigned len)
 {
+	struct virtnet_info *vi = netdev_priv(dev);
 	struct virtio_net_hdr *hdr = skb_vnet_hdr(skb);
 	int err;
+	int i;
 
 	if (unlikely(len < sizeof(struct virtio_net_hdr) + ETH_HLEN)) {
 		pr_debug("%s: short packet %i\n", dev->name, len);
 		dev->stats.rx_length_errors++;
 		goto drop;
 	}
-	len -= sizeof(struct virtio_net_hdr);
 
-	if (len <= MAX_PACKET_LEN)
-		trim_pages(netdev_priv(dev), skb);
+	if (vi->mergeable_rx_bufs) {
+		struct virtio_net_hdr_mrg_rxbuf *mhdr = skb_vnet_hdr(skb);
+		unsigned int copy;
+		char *p = page_address(skb_shinfo(skb)->frags[0].page);
 
-	err = pskb_trim(skb, len);
-	if (err) {
-		pr_debug("%s: pskb_trim failed %i %d\n", dev->name, len, err);
-		dev->stats.rx_dropped++;
-		goto drop;
+		if (len > PAGE_SIZE)
+			len = PAGE_SIZE;
+		len -= sizeof(struct virtio_net_hdr_mrg_rxbuf);
+
+		memcpy(hdr, p, sizeof(*mhdr));
+		p += sizeof(*mhdr);
+
+		copy = len;
+		if (copy > skb_tailroom(skb))
+			copy = skb_tailroom(skb);
+
+		memcpy(skb_put(skb, copy), p, copy);
+
+		len -= copy;
+
+		if (!len) {
+			give_a_page(vi, skb_shinfo(skb)->frags[0].page);
+			skb_shinfo(skb)->nr_frags--;
+		} else {
+			skb_shinfo(skb)->frags[0].page_offset +=
+				sizeof(*mhdr) + copy;
+			skb_shinfo(skb)->frags[0].size = len;
+			skb->data_len += len;
+			skb->len += len;
+		}
+
+		while (--mhdr->num_buffers) {
+			struct sk_buff *nskb;
+
+			i = skb_shinfo(skb)->nr_frags;
+			if (i >= MAX_SKB_FRAGS) {
+				pr_debug("%s: packet too long %d\n", dev->name,
+					 len);
+				dev->stats.rx_length_errors++;
+				goto drop;
+			}
+
+			nskb = vi->rvq->vq_ops->get_buf(vi->rvq, &len);
+			if (!nskb) {
+				pr_debug("%s: rx error: %d buffers missing\n",
+					 dev->name, mhdr->num_buffers);
+				dev->stats.rx_length_errors++;
+				goto drop;
+			}
+
+			__skb_unlink(nskb, &vi->recv);
+			vi->num--;
+
+			skb_shinfo(skb)->frags[i] = skb_shinfo(nskb)->frags[0];
+			skb_shinfo(nskb)->nr_frags = 0;
+			kfree_skb(nskb);
+
+			if (len > PAGE_SIZE)
+				len = PAGE_SIZE;
+
+			skb_shinfo(skb)->frags[i].size = len;
+			skb_shinfo(skb)->nr_frags++;
+			skb->data_len += len;
+			skb->len += len;
+		}
+	} else {
+		len -= sizeof(struct virtio_net_hdr);
+
+		if (len <= MAX_PACKET_LEN)
+			trim_pages(vi, skb);
+
+		err = pskb_trim(skb, len);
+		if (err) {
+			pr_debug("%s: pskb_trim failed %i %d\n", dev->name,
+				 len, err);
+			dev->stats.rx_dropped++;
+			goto drop;
+		}
 	}
+
 	skb->truesize += skb->data_len;
 	dev->stats.rx_bytes += skb->len;
 	dev->stats.rx_packets++;
@@ -198,7 +269,7 @@ drop:
 	dev_kfree_skb(skb);
 }
 
-static void try_fill_recv(struct virtnet_info *vi)
+static void try_fill_recv_maxbufs(struct virtnet_info *vi)
 {
 	struct sk_buff *skb;
 	struct scatterlist sg[2+MAX_SKB_FRAGS];
@@ -206,12 +277,16 @@ static void try_fill_recv(struct virtnet_info *vi)
 
 	sg_init_table(sg, 2+MAX_SKB_FRAGS);
 	for (;;) {
+		struct virtio_net_hdr *hdr;
+
 		skb = netdev_alloc_skb(vi->dev, MAX_PACKET_LEN);
 		if (unlikely(!skb))
 			break;
 
 		skb_put(skb, MAX_PACKET_LEN);
-		vnet_hdr_to_sg(sg, skb);
+
+		hdr = skb_vnet_hdr(skb);
+		sg_init_one(sg, hdr, sizeof(*hdr));
 
 		if (vi->big_packets) {
 			for (i = 0; i < MAX_SKB_FRAGS; i++) {
@@ -247,6 +322,54 @@ static void try_fill_recv(struct virtnet_info *vi)
 	vi->rvq->vq_ops->kick(vi->rvq);
 }
 
+static void try_fill_recv(struct virtnet_info *vi)
+{
+	struct sk_buff *skb;
+	struct scatterlist sg[1];
+	int err;
+
+	if (!vi->mergeable_rx_bufs) {
+		try_fill_recv_maxbufs(vi);
+		return;
+	}
+
+	for (;;) {
+		skb_frag_t *f;
+
+		skb = netdev_alloc_skb(vi->dev, GOOD_COPY_LEN + NET_IP_ALIGN);
+		if (unlikely(!skb))
+			break;
+
+		skb_reserve(skb, NET_IP_ALIGN);
+
+		f = &skb_shinfo(skb)->frags[0];
+		f->page = get_a_page(vi, GFP_ATOMIC);
+		if (!f->page) {
+			kfree_skb(skb);
+			break;
+		}
+
+		f->page_offset = 0;
+		f->size = PAGE_SIZE;
+
+		skb_shinfo(skb)->nr_frags++;
+
+		sg_init_one(sg, page_address(f->page), PAGE_SIZE);
+		skb_queue_head(&vi->recv, skb);
+
+		err = vi->rvq->vq_ops->add_buf(vi->rvq, sg, 0, 1, skb);
+		if (err) {
+			skb_unlink(skb, &vi->recv);
+			kfree_skb(skb);
+			break;
+		}
+		vi->num++;
+	}
+	if (unlikely(vi->num > vi->max))
+		vi->max = vi->num;
+	vi->rvq->vq_ops->kick(vi->rvq);
+}
+
 static void skb_recv_done(struct virtqueue *rvq)
 {
 	struct virtnet_info *vi = rvq->vdev->priv;
@@ -325,15 +448,14 @@ static int xmit_skb(struct virtnet_info *vi, struct sk_buff *skb)
 {
 	int num, err;
 	struct scatterlist sg[2+MAX_SKB_FRAGS];
-	struct virtio_net_hdr *hdr;
+	struct virtio_net_hdr_mrg_rxbuf *mhdr = skb_vnet_hdr(skb);
+	struct virtio_net_hdr *hdr = skb_vnet_hdr(skb);
 	const unsigned char *dest = ((struct ethhdr *)skb->data)->h_dest;
 
 	sg_init_table(sg, 2+MAX_SKB_FRAGS);
 
 	pr_debug("%s: xmit %p %pM\n", vi->dev->name, skb, dest);
 
-	/* Encode metadata header at front. */
-	hdr = skb_vnet_hdr(skb);
 	if (skb->ip_summed == CHECKSUM_PARTIAL) {
 		hdr->flags = VIRTIO_NET_HDR_F_NEEDS_CSUM;
 		hdr->csum_start = skb->csum_start - skb_headroom(skb);
@@ -361,7 +483,14 @@ static int xmit_skb(struct virtnet_info *vi, struct sk_buff *skb)
 		hdr->gso_size = hdr->hdr_len = 0;
 	}
 
-	vnet_hdr_to_sg(sg, skb);
+	mhdr->num_buffers = 0;
+
+	/* Encode metadata header at front. */
+	if (vi->mergeable_rx_bufs)
+		sg_init_one(sg, mhdr, sizeof(*mhdr));
+	else
+		sg_init_one(sg, hdr, sizeof(*hdr));
+
 	num = skb_to_sgvec(skb, sg+1, 0, skb->len) + 1;
 
 	err = vi->svq->vq_ops->add_buf(vi->svq, sg, num, 0, skb);
@@ -551,6 +680,9 @@ static int virtnet_probe(struct virtio_device *vdev)
 	    || virtio_has_feature(vdev, VIRTIO_NET_F_GUEST_ECN))
 		vi->big_packets = true;
 
+	if (virtio_has_feature(vdev, VIRTIO_NET_F_MRG_RXBUF))
+		vi->mergeable_rx_bufs = true;
+
 	/* We expect two virtqueues, receive then send. */
 	vi->rvq = vdev->config->find_vq(vdev, 0, skb_recv_done);
 	if (IS_ERR(vi->rvq)) {
@@ -643,6 +775,7 @@ static unsigned int features[] = {
 	VIRTIO_NET_F_HOST_TSO4, VIRTIO_NET_F_HOST_UFO, VIRTIO_NET_F_HOST_TSO6,
 	VIRTIO_NET_F_HOST_ECN, VIRTIO_NET_F_GUEST_TSO4, VIRTIO_NET_F_GUEST_TSO6,
 	VIRTIO_NET_F_GUEST_ECN, /* We don't yet handle UFO input. */
+	VIRTIO_NET_F_MRG_RXBUF,
 	VIRTIO_F_NOTIFY_ON_EMPTY,
 };
 
diff --git a/include/linux/virtio_net.h b/include/linux/virtio_net.h
index 5e33761b9b8a..5cdd0aa8bde9 100644
--- a/include/linux/virtio_net.h
+++ b/include/linux/virtio_net.h
@@ -20,6 +20,7 @@
 #define VIRTIO_NET_F_HOST_TSO6	12	/* Host can handle TSOv6 in. */
 #define VIRTIO_NET_F_HOST_ECN	13	/* Host can handle TSO[6] w/ ECN in. */
 #define VIRTIO_NET_F_HOST_UFO	14	/* Host can handle UFO in. */
+#define VIRTIO_NET_F_MRG_RXBUF	15	/* Host can merge receive buffers. */
 
 struct virtio_net_config
 {
@@ -44,4 +45,12 @@ struct virtio_net_hdr
 	__u16 csum_start;	/* Position to start checksumming from */
 	__u16 csum_offset;	/* Offset after that to place checksum */
 };
+
+/* This is the version of the header to use when the MRG_RXBUF
+ * feature has been negotiated. */
+struct virtio_net_hdr_mrg_rxbuf {
+	struct virtio_net_hdr hdr;
+	__u16 num_buffers;	/* Number of merged rx buffers */
+};
+
 #endif /* _LINUX_VIRTIO_NET_H */
-- 
cgit v1.2.3


From 49aebc66d6b896f9c7c5739d85c4548c00015aa7 Mon Sep 17 00:00:00 2001
From: Gerrit Renker <gerrit@erg.abdn.ac.uk>
Date: Sun, 16 Nov 2008 22:51:23 -0800
Subject: dccp: Deprecate old setsockopt framework

The previous setsockopt interface, which passed socket options via struct
dccp_so_feat, is complicated/difficult to use. Continuing to support it leads to
ugly code since the old approach did not distinguish between NN and SP values.

This patch removes the old setsockopt interface and replaces it with two new
functions to register NN/SP values for feature negotiation.
These are essentially wrappers around the internal __feat_register functions,
with checking added to avoid

 * wrong usage (type);
 * changing values while the connection is in progress.

Signed-off-by: Gerrit Renker <gerrit@erg.abdn.ac.uk>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/dccp.h |  7 -----
 net/dccp/feat.c      | 72 ++++++++++++++++++++--------------------------------
 net/dccp/feat.h      |  5 ++--
 net/dccp/proto.c     | 53 ++------------------------------------
 4 files changed, 32 insertions(+), 105 deletions(-)

(limited to 'include')

diff --git a/include/linux/dccp.h b/include/linux/dccp.h
index d3ac1bde60b4..6eaaca9b037a 100644
--- a/include/linux/dccp.h
+++ b/include/linux/dccp.h
@@ -193,13 +193,6 @@ enum dccp_feature_numbers {
 	DCCPF_MAX_CCID_SPECIFIC = 255,
 };
 
-/* this structure is argument to DCCP_SOCKOPT_CHANGE_X */
-struct dccp_so_feat {
-	__u8 dccpsf_feat;
-	__u8 __user *dccpsf_val;
-	__u8 dccpsf_len;
-};
-
 /* DCCP socket options */
 #define DCCP_SOCKOPT_PACKET_SIZE	1 /* XXX deprecated, without effect */
 #define DCCP_SOCKOPT_SERVICE		2
diff --git a/net/dccp/feat.c b/net/dccp/feat.c
index 4f86a48723f6..47ce9abaf72b 100644
--- a/net/dccp/feat.c
+++ b/net/dccp/feat.c
@@ -364,53 +364,35 @@ static int __feat_register_sp(struct list_head *fn, u8 feat, u8 is_local,
 	return dccp_feat_push_change(fn, feat, is_local, mandatory, &fval);
 }
 
-int dccp_feat_change(struct dccp_minisock *dmsk, u8 type, u8 feature,
-		     u8 *val, u8 len, gfp_t gfp)
-{
-	struct dccp_opt_pend *opt;
-
-	dccp_feat_debug(type, feature, *val);
-
-	if (len > 3) {
-		DCCP_WARN("invalid length %d\n", len);
+/**
+ * dccp_feat_register_sp  -  Register requests to change SP feature values
+ * @sk: client or listening socket
+ * @feat: one of %dccp_feature_numbers
+ * @is_local: whether the local (1) or remote (0) @feat is meant
+ * @list: array of preferred values, in descending order of preference
+ * @len: length of @list in bytes
+ */
+int dccp_feat_register_sp(struct sock *sk, u8 feat, u8 is_local,
+			  u8 const *list, u8 len)
+{	 /* any changes must be registered before establishing the connection */
+	if (sk->sk_state != DCCP_CLOSED)
+		return -EISCONN;
+	if (dccp_feat_type(feat) != FEAT_SP)
 		return -EINVAL;
-	}
-	/* XXX add further sanity checks */
-
-	/* check if that feature is already being negotiated */
-	list_for_each_entry(opt, &dmsk->dccpms_pending, dccpop_node) {
-		/* ok we found a negotiation for this option already */
-		if (opt->dccpop_feat == feature && opt->dccpop_type == type) {
-			dccp_pr_debug("Replacing old\n");
-			/* replace */
-			BUG_ON(opt->dccpop_val == NULL);
-			kfree(opt->dccpop_val);
-			opt->dccpop_val	 = val;
-			opt->dccpop_len	 = len;
-			opt->dccpop_conf = 0;
-			return 0;
-		}
-	}
-
-	/* negotiation for a new feature */
-	opt = kmalloc(sizeof(*opt), gfp);
-	if (opt == NULL)
-		return -ENOMEM;
-
-	opt->dccpop_type = type;
-	opt->dccpop_feat = feature;
-	opt->dccpop_len	 = len;
-	opt->dccpop_val	 = val;
-	opt->dccpop_conf = 0;
-	opt->dccpop_sc	 = NULL;
-
-	BUG_ON(opt->dccpop_val == NULL);
-
-	list_add_tail(&opt->dccpop_node, &dmsk->dccpms_pending);
-	return 0;
+	return __feat_register_sp(&dccp_sk(sk)->dccps_featneg, feat, is_local,
+				  0, list, len);
 }
 
-EXPORT_SYMBOL_GPL(dccp_feat_change);
+/* Analogous to dccp_feat_register_sp(), but for non-negotiable values */
+int dccp_feat_register_nn(struct sock *sk, u8 feat, u64 val)
+{
+	/* any changes must be registered before establishing the connection */
+	if (sk->sk_state != DCCP_CLOSED)
+		return -EISCONN;
+	if (dccp_feat_type(feat) != FEAT_NN)
+		return -EINVAL;
+	return __feat_register_nn(&dccp_sk(sk)->dccps_featneg, feat, 0, val);
+}
 
 /*
  *	Tracking features whose value depend on the choice of CCID
@@ -1108,7 +1090,7 @@ int dccp_feat_init(struct sock *sk)
 
 	/* Ack ratio */
 	rc = __feat_register_nn(&dp->dccps_featneg, DCCPF_ACK_RATIO, 0,
-				dmsk->dccpms_ack_ratio);
+				dp->dccps_l_ack_ratio);
 out:
 	return rc;
 }
diff --git a/net/dccp/feat.h b/net/dccp/feat.h
index 232c653e69c5..4d172822df17 100644
--- a/net/dccp/feat.h
+++ b/net/dccp/feat.h
@@ -111,8 +111,9 @@ static inline void dccp_feat_debug(const u8 type, const u8 feat, const u8 val)
 #define dccp_feat_debug(type, feat, val)
 #endif /* CONFIG_IP_DCCP_DEBUG */
 
-extern int  dccp_feat_change(struct dccp_minisock *dmsk, u8 type, u8 feature,
-			     u8 *val, u8 len, gfp_t gfp);
+extern int  dccp_feat_register_sp(struct sock *sk, u8 feat, u8 is_local,
+				  u8 const *list, u8 len);
+extern int  dccp_feat_register_nn(struct sock *sk, u8 feat, u64 val);
 extern int  dccp_feat_change_recv(struct sock *sk, u8 type, u8 feature,
 				  u8 *val, u8 len);
 extern int  dccp_feat_confirm_recv(struct sock *sk, u8 type, u8 feature,
diff --git a/net/dccp/proto.c b/net/dccp/proto.c
index 1117d4d8c8f1..3090fc40eebd 100644
--- a/net/dccp/proto.c
+++ b/net/dccp/proto.c
@@ -470,44 +470,6 @@ static int dccp_setsockopt_service(struct sock *sk, const __be32 service,
 	return 0;
 }
 
-/* byte 1 is feature.  the rest is the preference list */
-static int dccp_setsockopt_change(struct sock *sk, int type,
-				  struct dccp_so_feat __user *optval)
-{
-	struct dccp_so_feat opt;
-	u8 *val;
-	int rc;
-
-	if (copy_from_user(&opt, optval, sizeof(opt)))
-		return -EFAULT;
-	/*
-	 * rfc4340: 6.1. Change Options
-	 */
-	if (opt.dccpsf_len < 1)
-		return -EINVAL;
-
-	val = kmalloc(opt.dccpsf_len, GFP_KERNEL);
-	if (!val)
-		return -ENOMEM;
-
-	if (copy_from_user(val, opt.dccpsf_val, opt.dccpsf_len)) {
-		rc = -EFAULT;
-		goto out_free_val;
-	}
-
-	rc = dccp_feat_change(dccp_msk(sk), type, opt.dccpsf_feat,
-			      val, opt.dccpsf_len, GFP_KERNEL);
-	if (rc)
-		goto out_free_val;
-
-out:
-	return rc;
-
-out_free_val:
-	kfree(val);
-	goto out;
-}
-
 static int do_dccp_setsockopt(struct sock *sk, int level, int optname,
 		char __user *optval, int optlen)
 {
@@ -530,20 +492,9 @@ static int do_dccp_setsockopt(struct sock *sk, int level, int optname,
 		err = 0;
 		break;
 	case DCCP_SOCKOPT_CHANGE_L:
-		if (optlen != sizeof(struct dccp_so_feat))
-			err = -EINVAL;
-		else
-			err = dccp_setsockopt_change(sk, DCCPO_CHANGE_L,
-						     (struct dccp_so_feat __user *)
-						     optval);
-		break;
 	case DCCP_SOCKOPT_CHANGE_R:
-		if (optlen != sizeof(struct dccp_so_feat))
-			err = -EINVAL;
-		else
-			err = dccp_setsockopt_change(sk, DCCPO_CHANGE_R,
-						     (struct dccp_so_feat __user *)
-						     optval);
+		DCCP_WARN("sockopt(CHANGE_L/R) is deprecated: fix your app\n");
+		err = 0;
 		break;
 	case DCCP_SOCKOPT_SERVER_TIMEWAIT:
 		if (dp->dccps_role != DCCP_ROLE_SERVER)
-- 
cgit v1.2.3


From 29450559849da7066813601effb7666966869853 Mon Sep 17 00:00:00 2001
From: Gerrit Renker <gerrit@erg.abdn.ac.uk>
Date: Sun, 16 Nov 2008 22:53:48 -0800
Subject: dccp: Feature negotiation for minimum-checksum-coverage

This provides feature negotiation for server minimum checksum coverage
which so far has been missing.

Since sender/receiver coverage values range only from 0...15, their
type has also been reduced in size from u16 to u4.

Feature-negotiation options are now generated for both sender and receiver
coverage, i.e. when the peer has `forgotten' to enable partial coverage
then feature negotiation will automatically enable (negotiate) the partial
coverage value for this connection.

Signed-off-by: Gerrit Renker <gerrit@erg.abdn.ac.uk>
Acked-by: Ian McDonald <ian.mcdonald@jandi.co.nz>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/dccp.h |  4 ++--
 net/dccp/proto.c     | 53 +++++++++++++++++++++++++++++++++++++++-------------
 2 files changed, 42 insertions(+), 15 deletions(-)

(limited to 'include')

diff --git a/include/linux/dccp.h b/include/linux/dccp.h
index 6eaaca9b037a..5a5a89935dbc 100644
--- a/include/linux/dccp.h
+++ b/include/linux/dccp.h
@@ -527,8 +527,8 @@ struct dccp_sock {
 	__u32				dccps_timestamp_time;
 	__u16				dccps_l_ack_ratio;
 	__u16				dccps_r_ack_ratio;
-	__u16				dccps_pcslen;
-	__u16				dccps_pcrlen;
+	__u8				dccps_pcslen:4;
+	__u8				dccps_pcrlen:4;
 	__u64				dccps_ndp_count:48;
 	unsigned long			dccps_rate_last;
 	struct dccp_minisock		dccps_minisock;
diff --git a/net/dccp/proto.c b/net/dccp/proto.c
index 3090fc40eebd..c6b4362bb1d7 100644
--- a/net/dccp/proto.c
+++ b/net/dccp/proto.c
@@ -470,6 +470,42 @@ static int dccp_setsockopt_service(struct sock *sk, const __be32 service,
 	return 0;
 }
 
+static int dccp_setsockopt_cscov(struct sock *sk, int cscov, bool rx)
+{
+	u8 *list, len;
+	int i, rc;
+
+	if (cscov < 0 || cscov > 15)
+		return -EINVAL;
+	/*
+	 * Populate a list of permissible values, in the range cscov...15. This
+	 * is necessary since feature negotiation of single values only works if
+	 * both sides incidentally choose the same value. Since the list starts
+	 * lowest-value first, negotiation will pick the smallest shared value.
+	 */
+	if (cscov == 0)
+		return 0;
+	len = 16 - cscov;
+
+	list = kmalloc(len, GFP_KERNEL);
+	if (list == NULL)
+		return -ENOBUFS;
+
+	for (i = 0; i < len; i++)
+		list[i] = cscov++;
+
+	rc = dccp_feat_register_sp(sk, DCCPF_MIN_CSUM_COVER, rx, list, len);
+
+	if (rc == 0) {
+		if (rx)
+			dccp_sk(sk)->dccps_pcrlen = cscov;
+		else
+			dccp_sk(sk)->dccps_pcslen = cscov;
+	}
+	kfree(list);
+	return rc;
+}
+
 static int do_dccp_setsockopt(struct sock *sk, int level, int optname,
 		char __user *optval, int optlen)
 {
@@ -502,20 +538,11 @@ static int do_dccp_setsockopt(struct sock *sk, int level, int optname,
 		else
 			dp->dccps_server_timewait = (val != 0);
 		break;
-	case DCCP_SOCKOPT_SEND_CSCOV:	/* sender side, RFC 4340, sec. 9.2 */
-		if (val < 0 || val > 15)
-			err = -EINVAL;
-		else
-			dp->dccps_pcslen = val;
+	case DCCP_SOCKOPT_SEND_CSCOV:
+		err = dccp_setsockopt_cscov(sk, val, false);
 		break;
-	case DCCP_SOCKOPT_RECV_CSCOV:	/* receiver side, RFC 4340 sec. 9.2.1 */
-		if (val < 0 || val > 15)
-			err = -EINVAL;
-		else {
-			dp->dccps_pcrlen = val;
-			/* FIXME: add feature negotiation,
-			 * ChangeL(MinimumChecksumCoverage, val) */
-		}
+	case DCCP_SOCKOPT_RECV_CSCOV:
+		err = dccp_setsockopt_cscov(sk, val, true);
 		break;
 	default:
 		err = -ENOPROTOOPT;
-- 
cgit v1.2.3


From dd9c0e363cef32b7d6f23d4c87e8dfe4f91fd1c5 Mon Sep 17 00:00:00 2001
From: Gerrit Renker <gerrit@erg.abdn.ac.uk>
Date: Sun, 16 Nov 2008 22:55:08 -0800
Subject: dccp: Deprecate Ack Ratio sysctl

This patch deprecates the Ack Ratio sysctl, since
 * Ack Ratio is entirely ignored by CCID-3 and CCID-4,
 * Ack Ratio currently doesn't work in CCID-2 (i.e. is always set to 1);
 * even if it would work in CCID-2, there is no point for a user to change it:
   - Ack Ratio is constrained by cwnd (RFC 4341, 6.1.2),
   - if Ack Ratio > cwnd, the system resorts to spurious RTO timeouts
     (since waiting for Acks which will never arrive in this window),
   - cwnd is not a user-configurable value.

The only reasonable place for Ack Ratio is to print it for debugging. It is
planned to do this later on, as part of e.g. dccp_probe.

With this patch Ack Ratio is now under full control of feature negotiation:
 * Ack Ratio is resolved as a dependency of the selected CCID;
 * if the chosen CCID supports it (i.e. CCID == CCID-2), Ack Ratio is set to
   the default of 2, following RFC 4340, 11.3 - "New connections start with Ack
   Ratio 2 for both endpoints";
 * what happens then is part of another patch set, since it concerns the
   dynamic update of Ack Ratio while the connection is in full flight.

Thanks to Tomasz Grobelny for discussion leading up to this patch.

Signed-off-by: Gerrit Renker <gerrit@erg.abdn.ac.uk>
Acked-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 Documentation/networking/dccp.txt | 3 ---
 include/linux/dccp.h              | 2 --
 net/dccp/dccp.h                   | 1 -
 net/dccp/minisocks.c              | 1 -
 net/dccp/options.c                | 1 -
 net/dccp/sysctl.c                 | 7 -------
 6 files changed, 15 deletions(-)

(limited to 'include')

diff --git a/Documentation/networking/dccp.txt b/Documentation/networking/dccp.txt
index f0aeb20fa63b..43df4487379b 100644
--- a/Documentation/networking/dccp.txt
+++ b/Documentation/networking/dccp.txt
@@ -125,9 +125,6 @@ send_ndp = 1
 send_ackvec = 1
 	Whether or not to send Ack Vector options (sec. 11.5).
 
-ack_ratio = 2
-	The default Ack Ratio (sec. 11.3) to use.
-
 tx_ccid = 2
 	Default CCID for the sender-receiver half-connection.
 
diff --git a/include/linux/dccp.h b/include/linux/dccp.h
index 5a5a89935dbc..eda389ce04f4 100644
--- a/include/linux/dccp.h
+++ b/include/linux/dccp.h
@@ -368,7 +368,6 @@ static inline unsigned int dccp_hdr_len(const struct sk_buff *skb)
   * @dccpms_ccid - Congestion Control Id (CCID) (section 10)
   * @dccpms_send_ack_vector - Send Ack Vector Feature (section 11.5)
   * @dccpms_send_ndp_count - Send NDP Count Feature (7.7.2)
-  * @dccpms_ack_ratio - Ack Ratio Feature (section 11.3)
   * @dccpms_pending - List of features being negotiated
   * @dccpms_conf -
   */
@@ -378,7 +377,6 @@ struct dccp_minisock {
 	__u8			dccpms_tx_ccid;
 	__u8			dccpms_send_ack_vector;
 	__u8			dccpms_send_ndp_count;
-	__u8			dccpms_ack_ratio;
 	struct list_head	dccpms_pending;
 	struct list_head	dccpms_conf;
 };
diff --git a/net/dccp/dccp.h b/net/dccp/dccp.h
index e656dafb5d96..031ce350d3c1 100644
--- a/net/dccp/dccp.h
+++ b/net/dccp/dccp.h
@@ -98,7 +98,6 @@ extern int  sysctl_dccp_retries2;
 extern int  sysctl_dccp_feat_sequence_window;
 extern int  sysctl_dccp_feat_rx_ccid;
 extern int  sysctl_dccp_feat_tx_ccid;
-extern int  sysctl_dccp_feat_ack_ratio;
 extern int  sysctl_dccp_feat_send_ack_vector;
 extern int  sysctl_dccp_feat_send_ndp_count;
 extern int  sysctl_dccp_tx_qlen;
diff --git a/net/dccp/minisocks.c b/net/dccp/minisocks.c
index afdacbb94d75..ed61bc58e41e 100644
--- a/net/dccp/minisocks.c
+++ b/net/dccp/minisocks.c
@@ -47,7 +47,6 @@ void dccp_minisock_init(struct dccp_minisock *dmsk)
 	dmsk->dccpms_sequence_window = sysctl_dccp_feat_sequence_window;
 	dmsk->dccpms_rx_ccid	     = sysctl_dccp_feat_rx_ccid;
 	dmsk->dccpms_tx_ccid	     = sysctl_dccp_feat_tx_ccid;
-	dmsk->dccpms_ack_ratio	     = sysctl_dccp_feat_ack_ratio;
 	dmsk->dccpms_send_ack_vector = sysctl_dccp_feat_send_ack_vector;
 	dmsk->dccpms_send_ndp_count  = sysctl_dccp_feat_send_ndp_count;
 }
diff --git a/net/dccp/options.c b/net/dccp/options.c
index 67a171a1268c..515ad45013ad 100644
--- a/net/dccp/options.c
+++ b/net/dccp/options.c
@@ -26,7 +26,6 @@
 int sysctl_dccp_feat_sequence_window = DCCPF_INITIAL_SEQUENCE_WINDOW;
 int sysctl_dccp_feat_rx_ccid	      = DCCPF_INITIAL_CCID;
 int sysctl_dccp_feat_tx_ccid	      = DCCPF_INITIAL_CCID;
-int sysctl_dccp_feat_ack_ratio	      = DCCPF_INITIAL_ACK_RATIO;
 int sysctl_dccp_feat_send_ack_vector = DCCPF_INITIAL_SEND_ACK_VECTOR;
 int sysctl_dccp_feat_send_ndp_count  = DCCPF_INITIAL_SEND_NDP_COUNT;
 
diff --git a/net/dccp/sysctl.c b/net/dccp/sysctl.c
index 21295993fdb8..f6e54f433e29 100644
--- a/net/dccp/sysctl.c
+++ b/net/dccp/sysctl.c
@@ -40,13 +40,6 @@ static struct ctl_table dccp_default_table[] = {
 		.mode		= 0644,
 		.proc_handler	= proc_dointvec,
 	},
-	{
-		.procname	= "ack_ratio",
-		.data		= &sysctl_dccp_feat_ack_ratio,
-		.maxlen		= sizeof(sysctl_dccp_feat_ack_ratio),
-		.mode		= 0644,
-		.proc_handler	= proc_dointvec,
-	},
 	{
 		.procname	= "send_ackvec",
 		.data		= &sysctl_dccp_feat_send_ack_vector,
-- 
cgit v1.2.3


From 4d24b52ac5085ef8a264d044f1b302b7c029887a Mon Sep 17 00:00:00 2001
From: Alexey Dobriyan <adobriyan@gmail.com>
Date: Sun, 16 Nov 2008 23:01:49 -0800
Subject: ematch: simpler tcf_em_unregister()

Simply delete ops from list and let list debugging do the job.

Signed-off-by: Alexey Dobriyan <adobriyan@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/pkt_cls.h |  2 +-
 net/sched/ematch.c    | 18 +++---------------
 2 files changed, 4 insertions(+), 16 deletions(-)

(limited to 'include')

diff --git a/include/net/pkt_cls.h b/include/net/pkt_cls.h
index aa9e282db485..d1ca31444644 100644
--- a/include/net/pkt_cls.h
+++ b/include/net/pkt_cls.h
@@ -246,7 +246,7 @@ struct tcf_ematch_ops
 };
 
 extern int tcf_em_register(struct tcf_ematch_ops *);
-extern int tcf_em_unregister(struct tcf_ematch_ops *);
+extern void tcf_em_unregister(struct tcf_ematch_ops *);
 extern int tcf_em_tree_validate(struct tcf_proto *, struct nlattr *,
 				struct tcf_ematch_tree *);
 extern void tcf_em_tree_destroy(struct tcf_proto *, struct tcf_ematch_tree *);
diff --git a/net/sched/ematch.c b/net/sched/ematch.c
index e82519e548d7..aab59409728b 100644
--- a/net/sched/ematch.c
+++ b/net/sched/ematch.c
@@ -71,7 +71,7 @@
  *
  *      static void __exit exit_my_ematch(void)
  *      {
- *      	return tcf_em_unregister(&my_ops);
+ *      	tcf_em_unregister(&my_ops);
  *      }
  *
  *      module_init(init_my_ematch);
@@ -154,23 +154,11 @@ EXPORT_SYMBOL(tcf_em_register);
  *
  * Returns -ENOENT if no matching ematch was found.
  */
-int tcf_em_unregister(struct tcf_ematch_ops *ops)
+void tcf_em_unregister(struct tcf_ematch_ops *ops)
 {
-	int err = 0;
-	struct tcf_ematch_ops *e;
-
 	write_lock(&ematch_mod_lock);
-	list_for_each_entry(e, &ematch_ops, link) {
-		if (e == ops) {
-			list_del(&e->link);
-			goto out;
-		}
-	}
-
-	err = -ENOENT;
-out:
+	list_del(&ops->link);
 	write_unlock(&ematch_mod_lock);
-	return err;
 }
 EXPORT_SYMBOL(tcf_em_unregister);
 
-- 
cgit v1.2.3


From 1cad1de1b216b355a60d907c103b2daf1a285345 Mon Sep 17 00:00:00 2001
From: Christian Pellegrin <chripell@gmail.com>
Date: Sat, 15 Nov 2008 08:58:16 +0100
Subject: ASoC: UDA134x codec driver

Signed-off-by: Christian Pellegrin <chripell@fsfe.org>
Signed-off-by: Mark Brown <broonie@opensource.wolfsonmicro.com>
---
 include/sound/l3.h               |  18 ++
 include/sound/uda134x.h          |  26 ++
 sound/soc/codecs/Kconfig         |   8 +
 sound/soc/codecs/Makefile        |   4 +
 sound/soc/codecs/l3.c            |  91 ++++++
 sound/soc/codecs/uda134x.c       | 656 +++++++++++++++++++++++++++++++++++++++
 sound/soc/codecs/uda134x_codec.h |  36 +++
 7 files changed, 839 insertions(+)
 create mode 100644 include/sound/l3.h
 create mode 100644 include/sound/uda134x.h
 create mode 100644 sound/soc/codecs/l3.c
 create mode 100644 sound/soc/codecs/uda134x.c
 create mode 100644 sound/soc/codecs/uda134x_codec.h

(limited to 'include')

diff --git a/include/sound/l3.h b/include/sound/l3.h
new file mode 100644
index 000000000000..423a08f0f1b0
--- /dev/null
+++ b/include/sound/l3.h
@@ -0,0 +1,18 @@
+#ifndef _L3_H_
+#define _L3_H_ 1
+
+struct l3_pins {
+	void (*setdat)(int);
+	void (*setclk)(int);
+	void (*setmode)(int);
+	int data_hold;
+	int data_setup;
+	int clock_high;
+	int mode_hold;
+	int mode;
+	int mode_setup;
+};
+
+int l3_write(struct l3_pins *adap, u8 addr, u8 *data, int len);
+
+#endif
diff --git a/include/sound/uda134x.h b/include/sound/uda134x.h
new file mode 100644
index 000000000000..475ef8bb7dcd
--- /dev/null
+++ b/include/sound/uda134x.h
@@ -0,0 +1,26 @@
+/*
+ * uda134x.h  --  UDA134x ALSA SoC Codec driver
+ *
+ * Copyright 2007 Dension Audio Systems Ltd.
+ * Author: Zoltan Devai
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#ifndef _UDA134X_H
+#define _UDA134X_H
+
+#include <sound/l3.h>
+
+struct uda134x_platform_data {
+	struct l3_pins l3;
+	void (*power) (int);
+	int model;
+#define UDA134X_UDA1340 1
+#define UDA134X_UDA1341 2
+#define UDA134X_UDA1344 3
+};
+
+#endif /* _UDA134X_H */
diff --git a/sound/soc/codecs/Kconfig b/sound/soc/codecs/Kconfig
index 8a84460a6f74..04f49f5c3c3d 100644
--- a/sound/soc/codecs/Kconfig
+++ b/sound/soc/codecs/Kconfig
@@ -10,6 +10,7 @@ config SND_SOC_ALL_CODECS
 	select SND_SOC_TLV320AIC26 if SPI_MASTER
 	select SND_SOC_TLV320AIC3X if I2C
 	select SND_SOC_TWL4030 if TWL4030_CORE
+	select SND_SOC_UDA134X
 	select SND_SOC_UDA1380 if I2C
 	select SND_SOC_WM8510 if (I2C || SPI_MASTER)
 	select SND_SOC_WM8580 if I2C
@@ -66,6 +67,9 @@ config SND_SOC_CS4270_VD33_ERRATA
 	bool
 	depends on SND_SOC_CS4270
 
+config SND_SOC_L3
+       tristate
+
 config SND_SOC_SSM2602
 	tristate
 
@@ -85,6 +89,10 @@ config SND_SOC_TWL4030
 	tristate
 	depends on TWL4030_CORE
 
+config SND_SOC_UDA134X
+       tristate
+       select SND_SOC_L3
+
 config SND_SOC_UDA1380
         tristate
 
diff --git a/sound/soc/codecs/Makefile b/sound/soc/codecs/Makefile
index 7ae17a6ea271..de6572356d1b 100644
--- a/sound/soc/codecs/Makefile
+++ b/sound/soc/codecs/Makefile
@@ -3,11 +3,13 @@ snd-soc-ad1980-objs := ad1980.o
 snd-soc-ad73311-objs := ad73311.o
 snd-soc-ak4535-objs := ak4535.o
 snd-soc-cs4270-objs := cs4270.o
+snd-soc-l3-objs := l3.o
 snd-soc-ssm2602-objs := ssm2602.o
 snd-soc-tlv320aic23-objs := tlv320aic23.o
 snd-soc-tlv320aic26-objs := tlv320aic26.o
 snd-soc-tlv320aic3x-objs := tlv320aic3x.o
 snd-soc-twl4030-objs := twl4030.o
+snd-soc-uda134x-objs := uda134x.o
 snd-soc-uda1380-objs := uda1380.o
 snd-soc-wm8510-objs := wm8510.o
 snd-soc-wm8580-objs := wm8580.o
@@ -27,11 +29,13 @@ obj-$(CONFIG_SND_SOC_AD1980)	+= snd-soc-ad1980.o
 obj-$(CONFIG_SND_SOC_AD73311) += snd-soc-ad73311.o
 obj-$(CONFIG_SND_SOC_AK4535)	+= snd-soc-ak4535.o
 obj-$(CONFIG_SND_SOC_CS4270)	+= snd-soc-cs4270.o
+obj-$(CONFIG_SND_SOC_L3)	+= snd-soc-l3.o
 obj-$(CONFIG_SND_SOC_SSM2602)	+= snd-soc-ssm2602.o
 obj-$(CONFIG_SND_SOC_TLV320AIC23)	+= snd-soc-tlv320aic23.o
 obj-$(CONFIG_SND_SOC_TLV320AIC26)	+= snd-soc-tlv320aic26.o
 obj-$(CONFIG_SND_SOC_TLV320AIC3X)	+= snd-soc-tlv320aic3x.o
 obj-$(CONFIG_SND_SOC_TWL4030)	+= snd-soc-twl4030.o
+obj-$(CONFIG_SND_SOC_UDA134X)	+= snd-soc-uda134x.o
 obj-$(CONFIG_SND_SOC_UDA1380)	+= snd-soc-uda1380.o
 obj-$(CONFIG_SND_SOC_WM8510)	+= snd-soc-wm8510.o
 obj-$(CONFIG_SND_SOC_WM8580)	+= snd-soc-wm8580.o
diff --git a/sound/soc/codecs/l3.c b/sound/soc/codecs/l3.c
new file mode 100644
index 000000000000..5353af58862c
--- /dev/null
+++ b/sound/soc/codecs/l3.c
@@ -0,0 +1,91 @@
+/*
+ * L3 code
+ *
+ *  Copyright (C) 2008, Christian Pellegrin <chripell@evolware.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ *
+ * based on:
+ *
+ * L3 bus algorithm module.
+ *
+ *  Copyright (C) 2001 Russell King, All Rights Reserved.
+ *
+ *
+ */
+
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/delay.h>
+
+#include <sound/l3.h>
+
+/*
+ * Send one byte of data to the chip.  Data is latched into the chip on
+ * the rising edge of the clock.
+ */
+static void sendbyte(struct l3_pins *adap, unsigned int byte)
+{
+	int i;
+
+	for (i = 0; i < 8; i++) {
+		adap->setclk(0);
+		udelay(adap->data_hold);
+		adap->setdat(byte & 1);
+		udelay(adap->data_setup);
+		adap->setclk(1);
+		udelay(adap->clock_high);
+		byte >>= 1;
+	}
+}
+
+/*
+ * Send a set of bytes to the chip.  We need to pulse the MODE line
+ * between each byte, but never at the start nor at the end of the
+ * transfer.
+ */
+static void sendbytes(struct l3_pins *adap, const u8 *buf,
+		      int len)
+{
+	int i;
+
+	for (i = 0; i < len; i++) {
+		if (i) {
+			udelay(adap->mode_hold);
+			adap->setmode(0);
+			udelay(adap->mode);
+		}
+		adap->setmode(1);
+		udelay(adap->mode_setup);
+		sendbyte(adap, buf[i]);
+	}
+}
+
+int l3_write(struct l3_pins *adap, u8 addr, u8 *data, int len)
+{
+	adap->setclk(1);
+	adap->setdat(1);
+	adap->setmode(1);
+	udelay(adap->mode);
+
+	adap->setmode(0);
+	udelay(adap->mode_setup);
+	sendbyte(adap, addr);
+	udelay(adap->mode_hold);
+
+	sendbytes(adap, data, len);
+
+	adap->setclk(1);
+	adap->setdat(1);
+	adap->setmode(0);
+
+	return len;
+}
+EXPORT_SYMBOL_GPL(l3_write);
+
+MODULE_DESCRIPTION("L3 bit-banging driver");
+MODULE_AUTHOR("Christian Pellegrin <chripell@evolware.org>");
+MODULE_LICENSE("GPL");
diff --git a/sound/soc/codecs/uda134x.c b/sound/soc/codecs/uda134x.c
new file mode 100644
index 000000000000..04b30da10228
--- /dev/null
+++ b/sound/soc/codecs/uda134x.c
@@ -0,0 +1,656 @@
+/*
+ * uda134x.c  --  UDA134X ALSA SoC Codec driver
+ *
+ * Modifications by Christian Pellegrin <chripell@evolware.org>
+ *
+ * Copyright 2007 Dension Audio Systems Ltd.
+ * Author: Zoltan Devai
+ *
+ * Based on the WM87xx drivers by Liam Girdwood and Richard Purdie
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/module.h>
+#include <linux/delay.h>
+#include <sound/pcm.h>
+#include <sound/pcm_params.h>
+#include <sound/soc.h>
+#include <sound/soc-dapm.h>
+#include <sound/initval.h>
+
+#include <sound/uda134x.h>
+#include <sound/l3.h>
+
+#include "uda134x_codec.h"
+
+
+#define POWER_OFF_ON_STANDBY 1
+/*
+  ALSA SOC usually puts the device in standby mode when it's not used
+  for sometime. If you define POWER_OFF_ON_STANDBY the driver will
+  turn off the ADC/DAC when this callback is invoked and turn it back
+  on when needed. Unfortunately this will result in a very light bump
+  (it can be audible only with good earphones). If this bothers you
+  just comment this line, you will have slightly higher power
+  consumption . Please note that sending the L3 command for ADC is
+  enough to make the bump, so it doesn't make difference if you
+  completely take off power from the codec.
+ */
+
+#define UDA134X_RATES SNDRV_PCM_RATE_8000_48000
+#define UDA134X_FORMATS (SNDRV_PCM_FMTBIT_S8 | SNDRV_PCM_FMTBIT_S16_LE | \
+		SNDRV_PCM_FMTBIT_S18_3LE | SNDRV_PCM_FMTBIT_S20_3LE)
+
+struct uda134x_priv {
+	int sysclk;
+	int dai_fmt;
+
+	struct snd_pcm_substream *master_substream;
+	struct snd_pcm_substream *slave_substream;
+};
+
+/* In-data addresses are hard-coded into the reg-cache values */
+static const char uda134x_reg[UDA134X_REGS_NUM] = {
+	/* Extended address registers */
+	0x04, 0x04, 0x04, 0x00, 0x00, 0x00, 0x00, 0x00,
+	/* Status, data regs */
+	0x00, 0x83, 0x00, 0x40, 0x80, 0x00,
+};
+
+/*
+ * The codec has no support for reading its registers except for peak level...
+ */
+static inline unsigned int uda134x_read_reg_cache(struct snd_soc_codec *codec,
+	unsigned int reg)
+{
+	u8 *cache = codec->reg_cache;
+
+	if (reg >= UDA134X_REGS_NUM)
+		return -1;
+	return cache[reg];
+}
+
+/*
+ * Write the register cache
+ */
+static inline void uda134x_write_reg_cache(struct snd_soc_codec *codec,
+	u8 reg, unsigned int value)
+{
+	u8 *cache = codec->reg_cache;
+
+	if (reg >= UDA134X_REGS_NUM)
+		return;
+	cache[reg] = value;
+}
+
+/*
+ * Write to the uda134x registers
+ *
+ */
+static int uda134x_write(struct snd_soc_codec *codec, unsigned int reg,
+	unsigned int value)
+{
+	int ret;
+	u8 addr;
+	u8 data = value;
+	struct uda134x_platform_data *pd = codec->control_data;
+
+	pr_debug("%s reg: %02X, value:%02X\n", __func__, reg, value);
+
+	if (reg >= UDA134X_REGS_NUM) {
+		printk(KERN_ERR "%s unkown register: reg: %d",
+		       __func__, reg);
+		return -EINVAL;
+	}
+
+	uda134x_write_reg_cache(codec, reg, value);
+
+	switch (reg) {
+	case UDA134X_STATUS0:
+	case UDA134X_STATUS1:
+		addr = UDA134X_STATUS_ADDR;
+		break;
+	case UDA134X_DATA000:
+	case UDA134X_DATA001:
+	case UDA134X_DATA010:
+		addr = UDA134X_DATA0_ADDR;
+		break;
+	case UDA134X_DATA1:
+		addr = UDA134X_DATA1_ADDR;
+		break;
+	default:
+		/* It's an extended address register */
+		addr =  (reg | UDA134X_EXTADDR_PREFIX);
+
+		ret = l3_write(&pd->l3,
+			       UDA134X_DATA0_ADDR, &addr, 1);
+		if (ret != 1)
+			return -EIO;
+
+		addr = UDA134X_DATA0_ADDR;
+		data = (value | UDA134X_EXTDATA_PREFIX);
+		break;
+	}
+
+	ret = l3_write(&pd->l3,
+		       addr, &data, 1);
+	if (ret != 1)
+		return -EIO;
+
+	return 0;
+}
+
+static inline void uda134x_reset(struct snd_soc_codec *codec)
+{
+	u8 reset_reg = uda134x_read_reg_cache(codec, UDA134X_STATUS0);
+	uda134x_write(codec, UDA134X_STATUS0, reset_reg | (1<<6));
+	msleep(1);
+	uda134x_write(codec, UDA134X_STATUS0, reset_reg & ~(1<<6));
+}
+
+static int uda134x_mute(struct snd_soc_dai *dai, int mute)
+{
+	struct snd_soc_codec *codec = dai->codec;
+	u8 mute_reg = uda134x_read_reg_cache(codec, UDA134X_DATA010);
+
+	pr_debug("%s mute: %d\n", __func__, mute);
+
+	if (mute)
+		mute_reg |= (1<<2);
+	else
+		mute_reg &= ~(1<<2);
+
+	uda134x_write(codec, UDA134X_DATA010, mute_reg & ~(1<<2));
+
+	return 0;
+}
+
+static int uda134x_startup(struct snd_pcm_substream *substream)
+{
+	struct snd_soc_pcm_runtime *rtd = substream->private_data;
+	struct snd_soc_device *socdev = rtd->socdev;
+	struct snd_soc_codec *codec = socdev->codec;
+	struct uda134x_priv *uda134x = codec->private_data;
+	struct snd_pcm_runtime *master_runtime;
+
+	if (uda134x->master_substream) {
+		master_runtime = uda134x->master_substream->runtime;
+
+		pr_debug("%s constraining to %d bits at %d\n", __func__,
+			 master_runtime->sample_bits,
+			 master_runtime->rate);
+
+		snd_pcm_hw_constraint_minmax(substream->runtime,
+					     SNDRV_PCM_HW_PARAM_RATE,
+					     master_runtime->rate,
+					     master_runtime->rate);
+
+		snd_pcm_hw_constraint_minmax(substream->runtime,
+					     SNDRV_PCM_HW_PARAM_SAMPLE_BITS,
+					     master_runtime->sample_bits,
+					     master_runtime->sample_bits);
+
+		uda134x->slave_substream = substream;
+	} else
+		uda134x->master_substream = substream;
+
+	return 0;
+}
+
+static void uda134x_shutdown(struct snd_pcm_substream *substream)
+{
+	struct snd_soc_pcm_runtime *rtd = substream->private_data;
+	struct snd_soc_device *socdev = rtd->socdev;
+	struct snd_soc_codec *codec = socdev->codec;
+	struct uda134x_priv *uda134x = codec->private_data;
+
+	if (uda134x->master_substream == substream)
+		uda134x->master_substream = uda134x->slave_substream;
+
+	uda134x->slave_substream = NULL;
+}
+
+static int uda134x_hw_params(struct snd_pcm_substream *substream,
+	struct snd_pcm_hw_params *params)
+{
+	struct snd_soc_pcm_runtime *rtd = substream->private_data;
+	struct snd_soc_device *socdev = rtd->socdev;
+	struct snd_soc_codec *codec = socdev->codec;
+	struct uda134x_priv *uda134x = codec->private_data;
+	u8 hw_params;
+
+	if (substream == uda134x->slave_substream) {
+		pr_debug("%s ignoring hw_params for slave substream\n",
+			 __func__);
+		return 0;
+	}
+
+	hw_params = uda134x_read_reg_cache(codec, UDA134X_STATUS0);
+	hw_params &= STATUS0_SYSCLK_MASK;
+	hw_params &= STATUS0_DAIFMT_MASK;
+
+	pr_debug("%s sysclk: %d, rate:%d\n", __func__,
+		 uda134x->sysclk, params_rate(params));
+
+	/* set SYSCLK / fs ratio */
+	switch (uda134x->sysclk / params_rate(params)) {
+	case 512:
+		break;
+	case 384:
+		hw_params |= (1<<4);
+		break;
+	case 256:
+		hw_params |= (1<<5);
+		break;
+	default:
+		printk(KERN_ERR "%s unsupported fs\n", __func__);
+		return -EINVAL;
+	}
+
+	pr_debug("%s dai_fmt: %d, params_format:%d\n", __func__,
+		 uda134x->dai_fmt, params_format(params));
+
+	/* set DAI format and word length */
+	switch (uda134x->dai_fmt & SND_SOC_DAIFMT_FORMAT_MASK) {
+	case SND_SOC_DAIFMT_I2S:
+		break;
+	case SND_SOC_DAIFMT_RIGHT_J:
+		switch (params_format(params)) {
+		case SNDRV_PCM_FORMAT_S16_LE:
+			hw_params |= (1<<1);
+			break;
+		case SNDRV_PCM_FORMAT_S18_3LE:
+			hw_params |= (1<<2);
+			break;
+		case SNDRV_PCM_FORMAT_S20_3LE:
+			hw_params |= ((1<<2) | (1<<1));
+			break;
+		default:
+			printk(KERN_ERR "%s unsupported format (right)\n",
+			       __func__);
+			return -EINVAL;
+		}
+		break;
+	case SND_SOC_DAIFMT_LEFT_J:
+		hw_params |= (1<<3);
+		break;
+	default:
+		printk(KERN_ERR "%s unsupported format\n", __func__);
+		return -EINVAL;
+	}
+
+	uda134x_write(codec, UDA134X_STATUS0, hw_params);
+
+	return 0;
+}
+
+static int uda134x_set_dai_sysclk(struct snd_soc_dai *codec_dai,
+				  int clk_id, unsigned int freq, int dir)
+{
+	struct snd_soc_codec *codec = codec_dai->codec;
+	struct uda134x_priv *uda134x = codec->private_data;
+
+	pr_debug("%s clk_id: %d, freq: %d, dir: %d\n", __func__,
+		 clk_id, freq, dir);
+
+	/* Anything between 256fs*8Khz and 512fs*48Khz should be acceptable
+	   because the codec is slave. Of course limitations of the clock
+	   master (the IIS controller) apply.
+	   We'll error out on set_hw_params if it's not OK */
+	if ((freq >= (256 * 8000)) && (freq <= (512 * 48000))) {
+		uda134x->sysclk = freq;
+		return 0;
+	}
+
+	printk(KERN_ERR "%s unsupported sysclk\n", __func__);
+	return -EINVAL;
+}
+
+static int uda134x_set_dai_fmt(struct snd_soc_dai *codec_dai,
+			       unsigned int fmt)
+{
+	struct snd_soc_codec *codec = codec_dai->codec;
+	struct uda134x_priv *uda134x = codec->private_data;
+
+	pr_debug("%s fmt: %08X\n", __func__, fmt);
+
+	/* codec supports only full slave mode */
+	if ((fmt & SND_SOC_DAIFMT_MASTER_MASK) != SND_SOC_DAIFMT_CBS_CFS) {
+		printk(KERN_ERR "%s unsupported slave mode\n", __func__);
+		return -EINVAL;
+	}
+
+	/* no support for clock inversion */
+	if ((fmt & SND_SOC_DAIFMT_INV_MASK) != SND_SOC_DAIFMT_NB_NF) {
+		printk(KERN_ERR "%s unsupported clock inversion\n", __func__);
+		return -EINVAL;
+	}
+
+	/* We can't setup DAI format here as it depends on the word bit num */
+	/* so let's just store the value for later */
+	uda134x->dai_fmt = fmt;
+
+	return 0;
+}
+
+static int uda134x_set_bias_level(struct snd_soc_codec *codec,
+				  enum snd_soc_bias_level level)
+{
+	u8 reg;
+	struct uda134x_platform_data *pd = codec->control_data;
+	int i;
+	u8 *cache = codec->reg_cache;
+
+	pr_debug("%s bias level %d\n", __func__, level);
+
+	switch (level) {
+	case SND_SOC_BIAS_ON:
+		/* ADC, DAC on */
+		reg = uda134x_read_reg_cache(codec, UDA134X_STATUS1);
+		uda134x_write(codec, UDA134X_STATUS1, reg | 0x03);
+		break;
+	case SND_SOC_BIAS_PREPARE:
+		/* power on */
+		if (pd->power) {
+			pd->power(1);
+			/* Sync reg_cache with the hardware */
+			for (i = 0; i < ARRAY_SIZE(uda134x_reg); i++)
+				codec->write(codec, i, *cache++);
+		}
+		break;
+	case SND_SOC_BIAS_STANDBY:
+		/* ADC, DAC power off */
+		reg = uda134x_read_reg_cache(codec, UDA134X_STATUS1);
+		uda134x_write(codec, UDA134X_STATUS1, reg & ~(0x03));
+		break;
+	case SND_SOC_BIAS_OFF:
+		/* power off */
+		if (pd->power)
+			pd->power(0);
+		break;
+	}
+	codec->bias_level = level;
+	return 0;
+}
+
+static const char *uda134x_dsp_setting[] = {"Flat", "Minimum1",
+					    "Minimum2", "Maximum"};
+static const char *uda134x_deemph[] = {"None", "32Khz", "44.1Khz", "48Khz"};
+static const char *uda134x_mixmode[] = {"Differential", "Analog1",
+					"Analog2", "Both"};
+
+static const struct soc_enum uda134x_mixer_enum[] = {
+SOC_ENUM_SINGLE(UDA134X_DATA010, 0, 0x04, uda134x_dsp_setting),
+SOC_ENUM_SINGLE(UDA134X_DATA010, 3, 0x04, uda134x_deemph),
+SOC_ENUM_SINGLE(UDA134X_EA010, 0, 0x04, uda134x_mixmode),
+};
+
+static const struct snd_kcontrol_new uda1341_snd_controls[] = {
+SOC_SINGLE("Master Playback Volume", UDA134X_DATA000, 0, 0x3F, 1),
+SOC_SINGLE("Capture Volume", UDA134X_EA010, 2, 0x07, 0),
+SOC_SINGLE("Analog1 Volume", UDA134X_EA000, 0, 0x1F, 1),
+SOC_SINGLE("Analog2 Volume", UDA134X_EA001, 0, 0x1F, 1),
+
+SOC_SINGLE("Mic Sensitivity", UDA134X_EA010, 2, 7, 0),
+SOC_SINGLE("Mic Volume", UDA134X_EA101, 0, 0x1F, 0),
+
+SOC_SINGLE("Tone Control - Bass", UDA134X_DATA001, 2, 0xF, 0),
+SOC_SINGLE("Tone Control - Treble", UDA134X_DATA001, 0, 3, 0),
+
+SOC_ENUM("Sound Processing Filter", uda134x_mixer_enum[0]),
+SOC_ENUM("PCM Playback De-emphasis", uda134x_mixer_enum[1]),
+SOC_ENUM("Input Mux", uda134x_mixer_enum[2]),
+
+SOC_SINGLE("AGC Switch", UDA134X_EA100, 4, 1, 0),
+SOC_SINGLE("AGC Target Volume", UDA134X_EA110, 0, 0x03, 1),
+SOC_SINGLE("AGC Timing", UDA134X_EA110, 2, 0x07, 0),
+
+SOC_SINGLE("DAC +6dB Switch", UDA134X_STATUS1, 6, 1, 0),
+SOC_SINGLE("ADC +6dB Switch", UDA134X_STATUS1, 5, 1, 0),
+SOC_SINGLE("ADC Polarity Switch", UDA134X_STATUS1, 4, 1, 0),
+SOC_SINGLE("DAC Polarity Switch", UDA134X_STATUS1, 3, 1, 0),
+SOC_SINGLE("Double Speed Playback Switch", UDA134X_STATUS1, 2, 1, 0),
+SOC_SINGLE("DC Filter Enable Switch", UDA134X_STATUS0, 0, 1, 0),
+};
+
+static const struct snd_kcontrol_new uda1340_snd_controls[] = {
+SOC_SINGLE("Master Playback Volume", UDA134X_DATA000, 0, 0x3F, 1),
+
+SOC_SINGLE("Tone Control - Bass", UDA134X_DATA001, 2, 0xF, 0),
+SOC_SINGLE("Tone Control - Treble", UDA134X_DATA001, 0, 3, 0),
+
+SOC_ENUM("Sound Processing Filter", uda134x_mixer_enum[0]),
+SOC_ENUM("PCM Playback De-emphasis", uda134x_mixer_enum[1]),
+
+SOC_SINGLE("DC Filter Enable Switch", UDA134X_STATUS0, 0, 1, 0),
+};
+
+static int uda134x_add_controls(struct snd_soc_codec *codec)
+{
+	int err, i, n;
+	const struct snd_kcontrol_new *ctrls;
+	struct uda134x_platform_data *pd = codec->control_data;
+
+	switch (pd->model) {
+	case UDA134X_UDA1340:
+	case UDA134X_UDA1344:
+		n = ARRAY_SIZE(uda1340_snd_controls);
+		ctrls = uda1340_snd_controls;
+		break;
+	case UDA134X_UDA1341:
+		n = ARRAY_SIZE(uda1341_snd_controls);
+		ctrls = uda1341_snd_controls;
+		break;
+	default:
+		printk(KERN_ERR "%s unkown codec type: %d",
+		       __func__, pd->model);
+		return -EINVAL;
+	}
+
+	for (i = 0; i < n; i++) {
+		err = snd_ctl_add(codec->card,
+				  snd_soc_cnew(&ctrls[i],
+					       codec, NULL));
+		if (err < 0)
+			return err;
+	}
+
+	return 0;
+}
+
+struct snd_soc_dai uda134x_dai = {
+	.name = "UDA134X",
+	/* playback capabilities */
+	.playback = {
+		.stream_name = "Playback",
+		.channels_min = 1,
+		.channels_max = 2,
+		.rates = UDA134X_RATES,
+		.formats = UDA134X_FORMATS,
+	},
+	/* capture capabilities */
+	.capture = {
+		.stream_name = "Capture",
+		.channels_min = 1,
+		.channels_max = 2,
+		.rates = UDA134X_RATES,
+		.formats = UDA134X_FORMATS,
+	},
+	/* pcm operations */
+	.ops = {
+		.startup = uda134x_startup,
+		.shutdown = uda134x_shutdown,
+		.hw_params = uda134x_hw_params,
+	},
+	/* DAI operations */
+	.dai_ops = {
+		.digital_mute = uda134x_mute,
+		.set_sysclk = uda134x_set_dai_sysclk,
+		.set_fmt = uda134x_set_dai_fmt,
+	}
+};
+EXPORT_SYMBOL(uda134x_dai);
+
+
+static int uda134x_soc_probe(struct platform_device *pdev)
+{
+	struct snd_soc_device *socdev = platform_get_drvdata(pdev);
+	struct snd_soc_codec *codec;
+	struct uda134x_priv *uda134x;
+	void *codec_setup_data = socdev->codec_data;
+	int ret = -ENOMEM;
+	struct uda134x_platform_data *pd;
+
+	printk(KERN_INFO "UDA134X SoC Audio Codec\n");
+
+	if (!codec_setup_data) {
+		printk(KERN_ERR "UDA134X SoC codec: "
+		       "missing L3 bitbang function\n");
+		return -ENODEV;
+	}
+
+	pd = codec_setup_data;
+	switch (pd->model) {
+	case UDA134X_UDA1340:
+	case UDA134X_UDA1341:
+	case UDA134X_UDA1344:
+		break;
+	default:
+		printk(KERN_ERR "UDA134X SoC codec: "
+		       "unsupported model %d\n",
+			pd->model);
+		return -EINVAL;
+	}
+
+	socdev->codec = kzalloc(sizeof(struct snd_soc_codec), GFP_KERNEL);
+	if (socdev->codec == NULL)
+		return ret;
+
+	codec = socdev->codec;
+
+	uda134x = kzalloc(sizeof(struct uda134x_priv), GFP_KERNEL);
+	if (uda134x == NULL)
+		goto priv_err;
+	codec->private_data = uda134x;
+
+	codec->reg_cache = kmemdup(uda134x_reg, sizeof(uda134x_reg),
+				   GFP_KERNEL);
+	if (codec->reg_cache == NULL)
+		goto reg_err;
+
+	mutex_init(&codec->mutex);
+
+	codec->reg_cache_size = sizeof(uda134x_reg);
+	codec->reg_cache_step = 1;
+
+	codec->name = "UDA134X";
+	codec->owner = THIS_MODULE;
+	codec->dai = &uda134x_dai;
+	codec->num_dai = 1;
+	codec->read = uda134x_read_reg_cache;
+	codec->write = uda134x_write;
+#ifdef POWER_OFF_ON_STANDBY
+	codec->set_bias_level = uda134x_set_bias_level;
+#endif
+	INIT_LIST_HEAD(&codec->dapm_widgets);
+	INIT_LIST_HEAD(&codec->dapm_paths);
+
+	codec->control_data = codec_setup_data;
+
+	if (pd->power)
+		pd->power(1);
+
+	uda134x_reset(codec);
+
+	/* register pcms */
+	ret = snd_soc_new_pcms(socdev, SNDRV_DEFAULT_IDX1, SNDRV_DEFAULT_STR1);
+	if (ret < 0) {
+		printk(KERN_ERR "UDA134X: failed to register pcms\n");
+		goto pcm_err;
+	}
+
+	ret = uda134x_add_controls(codec);
+	if (ret < 0) {
+		printk(KERN_ERR "UDA134X: failed to register controls\n");
+		goto pcm_err;
+	}
+
+	ret = snd_soc_register_card(socdev);
+	if (ret < 0) {
+		printk(KERN_ERR "UDA134X: failed to register card\n");
+		goto card_err;
+	}
+
+	return 0;
+
+card_err:
+	snd_soc_free_pcms(socdev);
+	snd_soc_dapm_free(socdev);
+pcm_err:
+	kfree(codec->reg_cache);
+reg_err:
+	kfree(codec->private_data);
+priv_err:
+	kfree(codec);
+	return ret;
+}
+
+/* power down chip */
+static int uda134x_soc_remove(struct platform_device *pdev)
+{
+	struct snd_soc_device *socdev = platform_get_drvdata(pdev);
+	struct snd_soc_codec *codec = socdev->codec;
+
+	uda134x_set_bias_level(codec, SND_SOC_BIAS_STANDBY);
+	uda134x_set_bias_level(codec, SND_SOC_BIAS_OFF);
+
+	snd_soc_free_pcms(socdev);
+	snd_soc_dapm_free(socdev);
+
+	kfree(codec->private_data);
+	kfree(codec->reg_cache);
+	kfree(codec);
+
+	return 0;
+}
+
+#if defined(CONFIG_PM)
+static int uda134x_soc_suspend(struct platform_device *pdev,
+						pm_message_t state)
+{
+	struct snd_soc_device *socdev = platform_get_drvdata(pdev);
+	struct snd_soc_codec *codec = socdev->codec;
+
+	uda134x_set_bias_level(codec, SND_SOC_BIAS_STANDBY);
+	uda134x_set_bias_level(codec, SND_SOC_BIAS_OFF);
+	return 0;
+}
+
+static int uda134x_soc_resume(struct platform_device *pdev)
+{
+	struct snd_soc_device *socdev = platform_get_drvdata(pdev);
+	struct snd_soc_codec *codec = socdev->codec;
+
+	uda134x_set_bias_level(codec, SND_SOC_BIAS_PREPARE);
+	uda134x_set_bias_level(codec, SND_SOC_BIAS_ON);
+	return 0;
+}
+#else
+#define uda134x_soc_suspend NULL
+#define uda134x_soc_resume NULL
+#endif /* CONFIG_PM */
+
+struct snd_soc_codec_device soc_codec_dev_uda134x = {
+	.probe =        uda134x_soc_probe,
+	.remove =       uda134x_soc_remove,
+	.suspend =      uda134x_soc_suspend,
+	.resume =       uda134x_soc_resume,
+};
+EXPORT_SYMBOL_GPL(soc_codec_dev_uda134x);
+
+MODULE_DESCRIPTION("UDA134X ALSA soc codec driver");
+MODULE_AUTHOR("Zoltan Devai, Christian Pellegrin <chripell@evolware.org>");
+MODULE_LICENSE("GPL");
diff --git a/sound/soc/codecs/uda134x_codec.h b/sound/soc/codecs/uda134x_codec.h
new file mode 100644
index 000000000000..94f440490b31
--- /dev/null
+++ b/sound/soc/codecs/uda134x_codec.h
@@ -0,0 +1,36 @@
+#ifndef _UDA134X_CODEC_H
+#define _UDA134X_CODEC_H
+
+#define UDA134X_L3ADDR	5
+#define UDA134X_DATA0_ADDR	((UDA134X_L3ADDR << 2) | 0)
+#define UDA134X_DATA1_ADDR	((UDA134X_L3ADDR << 2) | 1)
+#define UDA134X_STATUS_ADDR	((UDA134X_L3ADDR << 2) | 2)
+
+#define UDA134X_EXTADDR_PREFIX	0xC0
+#define UDA134X_EXTDATA_PREFIX	0xE0
+
+/* UDA134X registers */
+#define UDA134X_EA000	0
+#define UDA134X_EA001	1
+#define UDA134X_EA010	2
+#define UDA134X_EA011	3
+#define UDA134X_EA100	4
+#define UDA134X_EA101	5
+#define UDA134X_EA110	6
+#define UDA134X_EA111	7
+#define UDA134X_STATUS0 8
+#define UDA134X_STATUS1 9
+#define UDA134X_DATA000 10
+#define UDA134X_DATA001 11
+#define UDA134X_DATA010 12
+#define UDA134X_DATA1	13
+
+#define UDA134X_REGS_NUM 14
+
+#define STATUS0_DAIFMT_MASK (~(7<<1))
+#define STATUS0_SYSCLK_MASK (~(3<<4))
+
+extern struct snd_soc_dai uda134x_dai;
+extern struct snd_soc_codec_device soc_codec_dev_uda134x;
+
+#endif
-- 
cgit v1.2.3


From 7ad933d7a6677c20ce1bdb17425e732cf1ebee8a Mon Sep 17 00:00:00 2001
From: Christian Pellegrin <chripell@gmail.com>
Date: Sat, 15 Nov 2008 08:58:32 +0100
Subject: ASoC: Machine driver for for s3c24xx with uda134x

Signed-off-by: Christian Pellegrin <chripell@fsfe.org>
Signed-off-by: Mark Brown <broonie@opensource.wolfsonmicro.com>
---
 include/sound/s3c24xx_uda134x.h     |  14 ++
 sound/soc/s3c24xx/Kconfig           |   5 +
 sound/soc/s3c24xx/Makefile          |   2 +
 sound/soc/s3c24xx/s3c24xx_uda134x.c | 374 ++++++++++++++++++++++++++++++++++++
 4 files changed, 395 insertions(+)
 create mode 100644 include/sound/s3c24xx_uda134x.h
 create mode 100644 sound/soc/s3c24xx/s3c24xx_uda134x.c

(limited to 'include')

diff --git a/include/sound/s3c24xx_uda134x.h b/include/sound/s3c24xx_uda134x.h
new file mode 100644
index 000000000000..33df4cb909d3
--- /dev/null
+++ b/include/sound/s3c24xx_uda134x.h
@@ -0,0 +1,14 @@
+#ifndef _S3C24XX_UDA134X_H_
+#define _S3C24XX_UDA134X_H_ 1
+
+#include <sound/uda134x.h>
+
+struct s3c24xx_uda134x_platform_data {
+	int l3_clk;
+	int l3_mode;
+	int l3_data;
+	void (*power) (int);
+	int model;
+};
+
+#endif
diff --git a/sound/soc/s3c24xx/Kconfig b/sound/soc/s3c24xx/Kconfig
index b9f2353effeb..fcd03acf10f6 100644
--- a/sound/soc/s3c24xx/Kconfig
+++ b/sound/soc/s3c24xx/Kconfig
@@ -44,3 +44,8 @@ config SND_S3C24XX_SOC_LN2440SBC_ALC650
 	  Say Y if you want to add support for SoC audio on ln2440sbc
 	  with the ALC650.
 
+config SND_S3C24XX_SOC_S3C24XX_UDA134X
+	tristate "SoC I2S Audio support UDA134X wired to a S3C24XX"
+       	depends on SND_S3C24XX_SOC
+       	select SND_S3C24XX_SOC_I2S
+       	select SND_SOC_UDA134X
diff --git a/sound/soc/s3c24xx/Makefile b/sound/soc/s3c24xx/Makefile
index 0aa5fb0b9700..96b3f3f617d4 100644
--- a/sound/soc/s3c24xx/Makefile
+++ b/sound/soc/s3c24xx/Makefile
@@ -13,7 +13,9 @@ obj-$(CONFIG_SND_S3C2412_SOC_I2S) += snd-soc-s3c2412-i2s.o
 snd-soc-neo1973-wm8753-objs := neo1973_wm8753.o
 snd-soc-smdk2443-wm9710-objs := smdk2443_wm9710.o
 snd-soc-ln2440sbc-alc650-objs := ln2440sbc_alc650.o
+snd-soc-s3c24xx-uda134x-objs := s3c24xx_uda134x.o
 
 obj-$(CONFIG_SND_S3C24XX_SOC_NEO1973_WM8753) += snd-soc-neo1973-wm8753.o
 obj-$(CONFIG_SND_S3C24XX_SOC_SMDK2443_WM9710) += snd-soc-smdk2443-wm9710.o
 obj-$(CONFIG_SND_S3C24XX_SOC_LN2440SBC_ALC650) += snd-soc-ln2440sbc-alc650.o
+obj-$(CONFIG_SND_S3C24XX_SOC_S3C24XX_UDA134X) += snd-soc-s3c24xx-uda134x.o
diff --git a/sound/soc/s3c24xx/s3c24xx_uda134x.c b/sound/soc/s3c24xx/s3c24xx_uda134x.c
new file mode 100644
index 000000000000..29a68132f169
--- /dev/null
+++ b/sound/soc/s3c24xx/s3c24xx_uda134x.c
@@ -0,0 +1,374 @@
+/*
+ * Modifications by Christian Pellegrin <chripell@evolware.org>
+ *
+ * s3c24xx_uda134x.c  --  S3C24XX_UDA134X ALSA SoC Audio board driver
+ *
+ * Copyright 2007 Dension Audio Systems Ltd.
+ * Author: Zoltan Devai
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/module.h>
+#include <linux/clk.h>
+#include <linux/mutex.h>
+#include <linux/gpio.h>
+#include <sound/pcm.h>
+#include <sound/pcm_params.h>
+#include <sound/soc.h>
+#include <sound/soc-dapm.h>
+#include <sound/s3c24xx_uda134x.h>
+#include <sound/uda134x.h>
+
+#include <asm/plat-s3c24xx/regs-iis.h>
+
+#include "s3c24xx-pcm.h"
+#include "s3c24xx-i2s.h"
+#include "../codecs/uda134x_codec.h"
+
+
+/* #define ENFORCE_RATES 1 */
+/*
+  Unfortunately the S3C24XX in master mode has a limited capacity of
+  generating the clock for the codec. If you define this only rates
+  that are really available will be enforced. But be careful, most
+  user level application just want the usual sampling frequencies (8,
+  11.025, 22.050, 44.1 kHz) and anyway resampling is a costly
+  operation for embedded systems. So if you aren't very lucky or your
+  hardware engineer wasn't very forward-looking it's better to leave
+  this undefined. If you do so an approximate value for the requested
+  sampling rate in the range -/+ 5% will be chosen. If this in not
+  possible an error will be returned.
+*/
+
+static struct clk *xtal;
+static struct clk *pclk;
+/* this is need because we don't have a place where to keep the
+ * pointers to the clocks in each substream. We get the clocks only
+ * when we are actually using them so we don't block stuff like
+ * frequency change or oscillator power-off */
+static int clk_users;
+static DEFINE_MUTEX(clk_lock);
+
+static unsigned int rates[33 * 2];
+#ifdef ENFORCE_RATES
+static struct snd_pcm_hw_constraint_list hw_constraints_rates = {
+	.count	= ARRAY_SIZE(rates),
+	.list	= rates,
+	.mask	= 0,
+};
+#endif
+
+static struct platform_device *s3c24xx_uda134x_snd_device;
+
+int s3c24xx_uda134x_startup(struct snd_pcm_substream *substream)
+{
+	int ret = 0;
+#ifdef ENFORCE_RATES
+	struct snd_pcm_runtime *runtime = substream->runtime;;
+#endif
+
+	mutex_lock(&clk_lock);
+	pr_debug("%s %d\n", __func__, clk_users);
+	if (clk_users == 0) {
+		xtal = clk_get(&s3c24xx_uda134x_snd_device->dev, "xtal");
+		if (!xtal) {
+			printk(KERN_ERR "%s cannot get xtal\n", __func__);
+			ret = -EBUSY;
+		} else {
+			pclk = clk_get(&s3c24xx_uda134x_snd_device->dev,
+				       "pclk");
+			if (!pclk) {
+				printk(KERN_ERR "%s cannot get pclk\n",
+				       __func__);
+				clk_put(xtal);
+				ret = -EBUSY;
+			}
+		}
+		if (!ret) {
+			int i, j;
+
+			for (i = 0; i < 2; i++) {
+				int fs = i ? 256 : 384;
+
+				rates[i*33] = clk_get_rate(xtal) / fs;
+				for (j = 1; j < 33; j++)
+					rates[i*33 + j] = clk_get_rate(pclk) /
+						(j * fs);
+			}
+		}
+	}
+	clk_users += 1;
+	mutex_unlock(&clk_lock);
+	if (!ret) {
+#ifdef ENFORCE_RATES
+		ret = snd_pcm_hw_constraint_list(runtime, 0,
+						 SNDRV_PCM_HW_PARAM_RATE,
+						 &hw_constraints_rates);
+		if (ret < 0)
+			printk(KERN_ERR "%s cannot set constraints\n",
+			       __func__);
+#endif
+	}
+	return ret;
+}
+
+void s3c24xx_uda134x_shutdown(struct snd_pcm_substream *substream)
+{
+	mutex_lock(&clk_lock);
+	pr_debug("%s %d\n", __func__, clk_users);
+	clk_users -= 1;
+	if (clk_users == 0) {
+		clk_put(xtal);
+		xtal = NULL;
+		clk_put(pclk);
+		pclk = NULL;
+	}
+	mutex_unlock(&clk_lock);
+}
+
+static int s3c24xx_uda134x_hw_params(struct snd_pcm_substream *substream,
+					struct snd_pcm_hw_params *params)
+{
+	struct snd_soc_pcm_runtime *rtd = substream->private_data;
+	struct snd_soc_dai *codec_dai = rtd->dai->codec_dai;
+	struct snd_soc_dai *cpu_dai = rtd->dai->cpu_dai;
+	unsigned int clk = 0;
+	int ret = 0;
+	int clk_source, fs_mode;
+	unsigned long rate = params_rate(params);
+	long err, cerr;
+	unsigned int div;
+	int i, bi;
+
+	err = 999999;
+	bi = 0;
+	for (i = 0; i < 2*33; i++) {
+		cerr = rates[i] - rate;
+		if (cerr < 0)
+			cerr = -cerr;
+		if (cerr < err) {
+			err = cerr;
+			bi = i;
+		}
+	}
+	if (bi / 33 == 1)
+		fs_mode = S3C2410_IISMOD_256FS;
+	else
+		fs_mode = S3C2410_IISMOD_384FS;
+	if (bi % 33 == 0) {
+		clk_source = S3C24XX_CLKSRC_MPLL;
+		div = 1;
+	} else {
+		clk_source = S3C24XX_CLKSRC_PCLK;
+		div = bi % 33;
+	}
+	pr_debug("%s desired rate %lu, %d\n", __func__, rate, bi);
+
+	clk = (fs_mode == S3C2410_IISMOD_384FS ? 384 : 256) * rate;
+	pr_debug("%s will use: %s %s %d sysclk %d err %ld\n", __func__,
+		 fs_mode == S3C2410_IISMOD_384FS ? "384FS" : "256FS",
+		 clk_source == S3C24XX_CLKSRC_MPLL ? "MPLLin" : "PCLK",
+		 div, clk, err);
+
+	if ((err * 100 / rate) > 5) {
+		printk(KERN_ERR "S3C24XX_UDA134X: effective frequency "
+		       "too different from desired (%ld%%)\n",
+		       err * 100 / rate);
+		return -EINVAL;
+	}
+
+	ret = codec_dai->dai_ops.set_fmt(codec_dai, SND_SOC_DAIFMT_I2S |
+			SND_SOC_DAIFMT_NB_NF | SND_SOC_DAIFMT_CBS_CFS);
+	if (ret < 0)
+		return ret;
+
+	ret = cpu_dai->dai_ops.set_fmt(cpu_dai, SND_SOC_DAIFMT_I2S |
+			SND_SOC_DAIFMT_NB_NF | SND_SOC_DAIFMT_CBS_CFS);
+	if (ret < 0)
+		return ret;
+
+	ret = cpu_dai->dai_ops.set_sysclk(cpu_dai, clk_source , clk,
+						SND_SOC_CLOCK_IN);
+	if (ret < 0)
+		return ret;
+
+	ret = cpu_dai->dai_ops.set_clkdiv(cpu_dai, S3C24XX_DIV_MCLK,
+						fs_mode);
+	if (ret < 0)
+		return ret;
+
+	ret = cpu_dai->dai_ops.set_clkdiv(cpu_dai, S3C24XX_DIV_BCLK,
+						S3C2410_IISMOD_32FS);
+	if (ret < 0)
+		return ret;
+
+	ret = cpu_dai->dai_ops.set_clkdiv(cpu_dai, S3C24XX_DIV_PRESCALER,
+					  S3C24XX_PRESCALE(div, div));
+	if (ret < 0)
+		return ret;
+
+	/* set the codec system clock for DAC and ADC */
+	ret = codec_dai->dai_ops.set_sysclk(codec_dai, 0, clk,
+						SND_SOC_CLOCK_OUT);
+	if (ret < 0)
+		return ret;
+
+	return 0;
+}
+
+static struct snd_soc_ops s3c24xx_uda134x_ops = {
+	.startup = s3c24xx_uda134x_startup,
+	.shutdown = s3c24xx_uda134x_shutdown,
+	.hw_params = s3c24xx_uda134x_hw_params,
+};
+
+static struct snd_soc_dai_link s3c24xx_uda134x_dai_link = {
+	.name = "UDA134X",
+	.stream_name = "UDA134X",
+	.codec_dai = &uda134x_dai,
+	.cpu_dai = &s3c24xx_i2s_dai,
+	.ops = &s3c24xx_uda134x_ops,
+};
+
+static struct snd_soc_machine snd_soc_machine_s3c24xx_uda134x = {
+	.name = "S3C24XX_UDA134X",
+	.dai_link = &s3c24xx_uda134x_dai_link,
+	.num_links = 1,
+};
+
+static struct s3c24xx_uda134x_platform_data *s3c24xx_uda134x_l3_pins;
+
+static void setdat(int v)
+{
+	gpio_set_value(s3c24xx_uda134x_l3_pins->l3_data, v > 0);
+}
+
+static void setclk(int v)
+{
+	gpio_set_value(s3c24xx_uda134x_l3_pins->l3_clk, v > 0);
+}
+
+static void setmode(int v)
+{
+	gpio_set_value(s3c24xx_uda134x_l3_pins->l3_mode, v > 0);
+}
+
+static struct uda134x_platform_data s3c24xx_uda134x = {
+	.l3 = {
+		.setdat = setdat,
+		.setclk = setclk,
+		.setmode = setmode,
+		.data_hold = 1,
+		.data_setup = 1,
+		.clock_high = 1,
+		.mode_hold = 1,
+		.mode = 1,
+		.mode_setup = 1,
+	},
+};
+
+static struct snd_soc_device s3c24xx_uda134x_snd_devdata = {
+	.machine = &snd_soc_machine_s3c24xx_uda134x,
+	.platform = &s3c24xx_soc_platform,
+	.codec_dev = &soc_codec_dev_uda134x,
+	.codec_data = &s3c24xx_uda134x,
+};
+
+static int s3c24xx_uda134x_setup_pin(int pin, char *fun)
+{
+	if (gpio_request(pin, "s3c24xx_uda134x") < 0) {
+		printk(KERN_ERR "S3C24XX_UDA134X SoC Audio: "
+		       "l3 %s pin already in use", fun);
+		return -EBUSY;
+	}
+	gpio_direction_output(pin, 0);
+	return 0;
+}
+
+static int s3c24xx_uda134x_probe(struct platform_device *pdev)
+{
+	int ret;
+
+	printk(KERN_INFO "S3C24XX_UDA134X SoC Audio driver\n");
+
+	s3c24xx_uda134x_l3_pins = pdev->dev.platform_data;
+	if (s3c24xx_uda134x_l3_pins == NULL) {
+		printk(KERN_ERR "S3C24XX_UDA134X SoC Audio: "
+		       "unable to find platform data\n");
+		return -ENODEV;
+	}
+	s3c24xx_uda134x.power = s3c24xx_uda134x_l3_pins->power;
+	s3c24xx_uda134x.model = s3c24xx_uda134x_l3_pins->model;
+
+	if (s3c24xx_uda134x_setup_pin(s3c24xx_uda134x_l3_pins->l3_data,
+				      "data") < 0)
+		return -EBUSY;
+	if (s3c24xx_uda134x_setup_pin(s3c24xx_uda134x_l3_pins->l3_clk,
+				      "clk") < 0) {
+		gpio_free(s3c24xx_uda134x_l3_pins->l3_data);
+		return -EBUSY;
+	}
+	if (s3c24xx_uda134x_setup_pin(s3c24xx_uda134x_l3_pins->l3_mode,
+				      "mode") < 0) {
+		gpio_free(s3c24xx_uda134x_l3_pins->l3_data);
+		gpio_free(s3c24xx_uda134x_l3_pins->l3_clk);
+		return -EBUSY;
+	}
+
+	s3c24xx_uda134x_snd_device = platform_device_alloc("soc-audio", -1);
+	if (!s3c24xx_uda134x_snd_device) {
+		printk(KERN_ERR "S3C24XX_UDA134X SoC Audio: "
+		       "Unable to register\n");
+		return -ENOMEM;
+	}
+
+	platform_set_drvdata(s3c24xx_uda134x_snd_device,
+			     &s3c24xx_uda134x_snd_devdata);
+	s3c24xx_uda134x_snd_devdata.dev = &s3c24xx_uda134x_snd_device->dev;
+	ret = platform_device_add(s3c24xx_uda134x_snd_device);
+	if (ret) {
+		printk(KERN_ERR "S3C24XX_UDA134X SoC Audio: Unable to add\n");
+		platform_device_put(s3c24xx_uda134x_snd_device);
+	}
+
+	return ret;
+}
+
+static int s3c24xx_uda134x_remove(struct platform_device *pdev)
+{
+	platform_device_unregister(s3c24xx_uda134x_snd_device);
+	gpio_free(s3c24xx_uda134x_l3_pins->l3_data);
+	gpio_free(s3c24xx_uda134x_l3_pins->l3_clk);
+	gpio_free(s3c24xx_uda134x_l3_pins->l3_mode);
+	return 0;
+}
+
+static struct platform_driver s3c24xx_uda134x_driver = {
+	.probe  = s3c24xx_uda134x_probe,
+	.remove = s3c24xx_uda134x_remove,
+	.driver = {
+		.name = "s3c24xx_uda134x",
+		.owner = THIS_MODULE,
+	},
+};
+
+static int __init s3c24xx_uda134x_init(void)
+{
+	return platform_driver_register(&s3c24xx_uda134x_driver);
+}
+
+static void __exit s3c24xx_uda134x_exit(void)
+{
+	platform_driver_unregister(&s3c24xx_uda134x_driver);
+}
+
+
+module_init(s3c24xx_uda134x_init);
+module_exit(s3c24xx_uda134x_exit);
+
+MODULE_AUTHOR("Zoltan Devai, Christian Pellegrin <chripell@evolware.org>");
+MODULE_DESCRIPTION("S3C24XX_UDA134X ALSA SoC audio driver");
+MODULE_LICENSE("GPL");
-- 
cgit v1.2.3


From 4dc06f9633444f426ef9960c53426f2d2ded64ac Mon Sep 17 00:00:00 2001
From: Pablo Neira Ayuso <pablo@netfilter.org>
Date: Mon, 17 Nov 2008 16:01:42 +0100
Subject: netfilter: nf_conntrack: connection tracking helper name persistent
 aliases

This patch adds the macro MODULE_ALIAS_NFCT_HELPER that defines a
way to provide generic and persistent aliases for the connection
tracking helpers.

This next patch requires this patch.

Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
Signed-off-by: Patrick McHardy <kaber@trash.net>
---
 include/net/netfilter/nf_conntrack.h    | 3 +++
 net/netfilter/nf_conntrack_amanda.c     | 1 +
 net/netfilter/nf_conntrack_ftp.c        | 1 +
 net/netfilter/nf_conntrack_h323_main.c  | 1 +
 net/netfilter/nf_conntrack_irc.c        | 1 +
 net/netfilter/nf_conntrack_netbios_ns.c | 1 +
 net/netfilter/nf_conntrack_pptp.c       | 1 +
 net/netfilter/nf_conntrack_sane.c       | 1 +
 net/netfilter/nf_conntrack_sip.c        | 1 +
 net/netfilter/nf_conntrack_tftp.c       | 1 +
 10 files changed, 12 insertions(+)

(limited to 'include')

diff --git a/include/net/netfilter/nf_conntrack.h b/include/net/netfilter/nf_conntrack.h
index b76a8685b5b5..f11255e1ea35 100644
--- a/include/net/netfilter/nf_conntrack.h
+++ b/include/net/netfilter/nf_conntrack.h
@@ -298,5 +298,8 @@ do {							\
 	local_bh_enable();				\
 } while (0)
 
+#define MODULE_ALIAS_NFCT_HELPER(helper) \
+        MODULE_ALIAS("nfct-helper-" helper)
+
 #endif /* __KERNEL__ */
 #endif /* _NF_CONNTRACK_H */
diff --git a/net/netfilter/nf_conntrack_amanda.c b/net/netfilter/nf_conntrack_amanda.c
index 38aedeeaf4e1..4f8fcf498545 100644
--- a/net/netfilter/nf_conntrack_amanda.c
+++ b/net/netfilter/nf_conntrack_amanda.c
@@ -30,6 +30,7 @@ MODULE_AUTHOR("Brian J. Murrell <netfilter@interlinx.bc.ca>");
 MODULE_DESCRIPTION("Amanda connection tracking module");
 MODULE_LICENSE("GPL");
 MODULE_ALIAS("ip_conntrack_amanda");
+MODULE_ALIAS_NFCT_HELPER("amanda");
 
 module_param(master_timeout, uint, 0600);
 MODULE_PARM_DESC(master_timeout, "timeout for the master connection");
diff --git a/net/netfilter/nf_conntrack_ftp.c b/net/netfilter/nf_conntrack_ftp.c
index 629500901bd4..703a4378074a 100644
--- a/net/netfilter/nf_conntrack_ftp.c
+++ b/net/netfilter/nf_conntrack_ftp.c
@@ -29,6 +29,7 @@ MODULE_LICENSE("GPL");
 MODULE_AUTHOR("Rusty Russell <rusty@rustcorp.com.au>");
 MODULE_DESCRIPTION("ftp connection tracking helper");
 MODULE_ALIAS("ip_conntrack_ftp");
+MODULE_ALIAS_NFCT_HELPER("ftp");
 
 /* This is slow, but it's simple. --RR */
 static char *ftp_buffer;
diff --git a/net/netfilter/nf_conntrack_h323_main.c b/net/netfilter/nf_conntrack_h323_main.c
index 99bc803d1dd1..687bd633c3d7 100644
--- a/net/netfilter/nf_conntrack_h323_main.c
+++ b/net/netfilter/nf_conntrack_h323_main.c
@@ -1827,3 +1827,4 @@ MODULE_AUTHOR("Jing Min Zhao <zhaojingmin@users.sourceforge.net>");
 MODULE_DESCRIPTION("H.323 connection tracking helper");
 MODULE_LICENSE("GPL");
 MODULE_ALIAS("ip_conntrack_h323");
+MODULE_ALIAS_NFCT_HELPER("h323");
diff --git a/net/netfilter/nf_conntrack_irc.c b/net/netfilter/nf_conntrack_irc.c
index 4d681a04447e..409c8be58e7c 100644
--- a/net/netfilter/nf_conntrack_irc.c
+++ b/net/netfilter/nf_conntrack_irc.c
@@ -41,6 +41,7 @@ MODULE_AUTHOR("Harald Welte <laforge@netfilter.org>");
 MODULE_DESCRIPTION("IRC (DCC) connection tracking helper");
 MODULE_LICENSE("GPL");
 MODULE_ALIAS("ip_conntrack_irc");
+MODULE_ALIAS_NFCT_HELPER("irc");
 
 module_param_array(ports, ushort, &ports_c, 0400);
 MODULE_PARM_DESC(ports, "port numbers of IRC servers");
diff --git a/net/netfilter/nf_conntrack_netbios_ns.c b/net/netfilter/nf_conntrack_netbios_ns.c
index 08404e6755fb..5af4273b4668 100644
--- a/net/netfilter/nf_conntrack_netbios_ns.c
+++ b/net/netfilter/nf_conntrack_netbios_ns.c
@@ -37,6 +37,7 @@ MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>");
 MODULE_DESCRIPTION("NetBIOS name service broadcast connection tracking helper");
 MODULE_LICENSE("GPL");
 MODULE_ALIAS("ip_conntrack_netbios_ns");
+MODULE_ALIAS_NFCT_HELPER("netbios_ns");
 
 static unsigned int timeout __read_mostly = 3;
 module_param(timeout, uint, 0400);
diff --git a/net/netfilter/nf_conntrack_pptp.c b/net/netfilter/nf_conntrack_pptp.c
index 1bc3001d1827..9e169ef2e854 100644
--- a/net/netfilter/nf_conntrack_pptp.c
+++ b/net/netfilter/nf_conntrack_pptp.c
@@ -37,6 +37,7 @@ MODULE_LICENSE("GPL");
 MODULE_AUTHOR("Harald Welte <laforge@gnumonks.org>");
 MODULE_DESCRIPTION("Netfilter connection tracking helper module for PPTP");
 MODULE_ALIAS("ip_conntrack_pptp");
+MODULE_ALIAS_NFCT_HELPER("pptp");
 
 static DEFINE_SPINLOCK(nf_pptp_lock);
 
diff --git a/net/netfilter/nf_conntrack_sane.c b/net/netfilter/nf_conntrack_sane.c
index a94294b2b23c..dcfecbb81c46 100644
--- a/net/netfilter/nf_conntrack_sane.c
+++ b/net/netfilter/nf_conntrack_sane.c
@@ -30,6 +30,7 @@
 MODULE_LICENSE("GPL");
 MODULE_AUTHOR("Michal Schmidt <mschmidt@redhat.com>");
 MODULE_DESCRIPTION("SANE connection tracking helper");
+MODULE_ALIAS_NFCT_HELPER("sane");
 
 static char *sane_buffer;
 
diff --git a/net/netfilter/nf_conntrack_sip.c b/net/netfilter/nf_conntrack_sip.c
index 6813f1c8863f..4b572163784b 100644
--- a/net/netfilter/nf_conntrack_sip.c
+++ b/net/netfilter/nf_conntrack_sip.c
@@ -28,6 +28,7 @@ MODULE_LICENSE("GPL");
 MODULE_AUTHOR("Christian Hentschel <chentschel@arnet.com.ar>");
 MODULE_DESCRIPTION("SIP connection tracking helper");
 MODULE_ALIAS("ip_conntrack_sip");
+MODULE_ALIAS_NFCT_HELPER("sip");
 
 #define MAX_PORTS	8
 static unsigned short ports[MAX_PORTS];
diff --git a/net/netfilter/nf_conntrack_tftp.c b/net/netfilter/nf_conntrack_tftp.c
index f57f6e7a71ee..46e646b2e9b9 100644
--- a/net/netfilter/nf_conntrack_tftp.c
+++ b/net/netfilter/nf_conntrack_tftp.c
@@ -22,6 +22,7 @@ MODULE_AUTHOR("Magnus Boden <mb@ozaba.mine.nu>");
 MODULE_DESCRIPTION("TFTP connection tracking helper");
 MODULE_LICENSE("GPL");
 MODULE_ALIAS("ip_conntrack_tftp");
+MODULE_ALIAS_NFCT_HELPER("tftp");
 
 #define MAX_PORTS 8
 static unsigned short ports[MAX_PORTS];
-- 
cgit v1.2.3


From 0231022cc32d5f2e7f3c06b75691dda0ad6aec33 Mon Sep 17 00:00:00 2001
From: Frederic Weisbecker <fweisbec@gmail.com>
Date: Mon, 17 Nov 2008 03:22:41 +0100
Subject: tracing/function-return-tracer: add the overrun field

Impact: help to find the better depth of trace

We decided to arbitrary define the depth of function return trace as
"20". Perhaps this is not enough. To help finding an optimal depth, we
measure now the overrun: the number of functions that have been missed
for the current thread. By default this is not displayed, we have to
do set a particular flag on the return tracer: echo overrun >
/debug/tracing/trace_options And the overrun will be printed on the
right.

As the trace shows below, the current 20 depth is not enough.

update_wall_time+0x37f/0x8c0 -> update_xtime_cache (345 ns) (Overruns: 2838)
update_wall_time+0x384/0x8c0 -> clocksource_get_next (1141 ns) (Overruns: 2838)
do_timer+0x23/0x100 -> update_wall_time (3882 ns) (Overruns: 2838)
tick_do_update_jiffies64+0xbf/0x160 -> do_timer (5339 ns) (Overruns: 2838)
tick_sched_timer+0x6a/0xf0 -> tick_do_update_jiffies64 (7209 ns) (Overruns: 2838)
vgacon_set_cursor_size+0x98/0x120 -> native_io_delay (2613 ns) (Overruns: 274)
vgacon_cursor+0x16e/0x1d0 -> vgacon_set_cursor_size (33151 ns) (Overruns: 274)
set_cursor+0x5f/0x80 -> vgacon_cursor (36432 ns) (Overruns: 274)
con_flush_chars+0x34/0x40 -> set_cursor (38790 ns) (Overruns: 274)
release_console_sem+0x1ec/0x230 -> up (721 ns) (Overruns: 274)
release_console_sem+0x225/0x230 -> wake_up_klogd (316 ns) (Overruns: 274)
con_flush_chars+0x39/0x40 -> release_console_sem (2996 ns) (Overruns: 274)
con_write+0x22/0x30 -> con_flush_chars (46067 ns) (Overruns: 274)
n_tty_write+0x1cc/0x360 -> con_write (292670 ns) (Overruns: 274)
smp_apic_timer_interrupt+0x2a/0x90 -> native_apic_mem_write (330 ns) (Overruns: 274)
irq_enter+0x17/0x70 -> idle_cpu (413 ns) (Overruns: 274)
smp_apic_timer_interrupt+0x2f/0x90 -> irq_enter (1525 ns) (Overruns: 274)
ktime_get_ts+0x40/0x70 -> getnstimeofday (465 ns) (Overruns: 274)
ktime_get_ts+0x60/0x70 -> set_normalized_timespec (436 ns) (Overruns: 274)
ktime_get+0x16/0x30 -> ktime_get_ts (2501 ns) (Overruns: 274)
hrtimer_interrupt+0x77/0x1a0 -> ktime_get (3439 ns) (Overruns: 274)

Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>
Acked-by: Steven Rostedt <rostedt@goodmis.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/include/asm/thread_info.h    |  7 +++++++
 arch/x86/kernel/ftrace.c              | 10 ++++++---
 include/linux/ftrace.h                |  2 ++
 include/linux/sched.h                 |  1 +
 kernel/trace/trace.c                  |  1 +
 kernel/trace/trace.h                  |  1 +
 kernel/trace/trace_functions_return.c | 38 +++++++++++++++++++++++++++++------
 7 files changed, 51 insertions(+), 9 deletions(-)

(limited to 'include')

diff --git a/arch/x86/include/asm/thread_info.h b/arch/x86/include/asm/thread_info.h
index a71158369fd4..e90e81ef6ab9 100644
--- a/arch/x86/include/asm/thread_info.h
+++ b/arch/x86/include/asm/thread_info.h
@@ -21,6 +21,7 @@ struct task_struct;
 struct exec_domain;
 #include <asm/processor.h>
 #include <asm/ftrace.h>
+#include <asm/atomic.h>
 
 struct thread_info {
 	struct task_struct	*task;		/* main task structure */
@@ -45,6 +46,11 @@ struct thread_info {
 	int		curr_ret_stack;
 	/* Stack of return addresses for return function tracing */
 	struct ftrace_ret_stack	ret_stack[FTRACE_RET_STACK_SIZE];
+	/*
+	 * Number of functions that haven't been traced
+	 * because of depth overrun.
+	 */
+	atomic_t	trace_overrun;
 #endif
 };
 
@@ -61,6 +67,7 @@ struct thread_info {
 		.fn = do_no_restart_syscall,	\
 	},					\
 	.curr_ret_stack = -1,\
+	.trace_overrun	= ATOMIC_INIT(0)	\
 }
 #else
 #define INIT_THREAD_INFO(tsk)			\
diff --git a/arch/x86/kernel/ftrace.c b/arch/x86/kernel/ftrace.c
index 924153edd973..356bb1eb6e9a 100644
--- a/arch/x86/kernel/ftrace.c
+++ b/arch/x86/kernel/ftrace.c
@@ -353,8 +353,10 @@ static int push_return_trace(unsigned long ret, unsigned long long time,
 	struct thread_info *ti = current_thread_info();
 
 	/* The return trace stack is full */
-	if (ti->curr_ret_stack == FTRACE_RET_STACK_SIZE - 1)
+	if (ti->curr_ret_stack == FTRACE_RET_STACK_SIZE - 1) {
+		atomic_inc(&ti->trace_overrun);
 		return -EBUSY;
+	}
 
 	index = ++ti->curr_ret_stack;
 	barrier();
@@ -367,7 +369,7 @@ static int push_return_trace(unsigned long ret, unsigned long long time,
 
 /* Retrieve a function return address to the trace stack on thread info.*/
 static void pop_return_trace(unsigned long *ret, unsigned long long *time,
-				unsigned long *func)
+				unsigned long *func, unsigned long *overrun)
 {
 	int index;
 
@@ -376,6 +378,7 @@ static void pop_return_trace(unsigned long *ret, unsigned long long *time,
 	*ret = ti->ret_stack[index].ret;
 	*func = ti->ret_stack[index].func;
 	*time = ti->ret_stack[index].calltime;
+	*overrun = atomic_read(&ti->trace_overrun);
 	ti->curr_ret_stack--;
 }
 
@@ -386,7 +389,8 @@ static void pop_return_trace(unsigned long *ret, unsigned long long *time,
 unsigned long ftrace_return_to_handler(void)
 {
 	struct ftrace_retfunc trace;
-	pop_return_trace(&trace.ret, &trace.calltime, &trace.func);
+	pop_return_trace(&trace.ret, &trace.calltime, &trace.func,
+			&trace.overrun);
 	trace.rettime = cpu_clock(raw_smp_processor_id());
 	ftrace_function_return(&trace);
 
diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h
index f1af1aab00e6..f7ba4ea5e128 100644
--- a/include/linux/ftrace.h
+++ b/include/linux/ftrace.h
@@ -318,6 +318,8 @@ struct ftrace_retfunc {
 	unsigned long func; /* Current function */
 	unsigned long long calltime;
 	unsigned long long rettime;
+	/* Number of functions that overran the depth limit for current task */
+	unsigned long overrun;
 };
 
 #ifdef CONFIG_FUNCTION_RET_TRACER
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 61c8cc36028a..c8e0db464206 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -2016,6 +2016,7 @@ static inline void setup_thread_stack(struct task_struct *p, struct task_struct
 	 * used.
 	 */
 	task_thread_info(p)->curr_ret_stack = -1;
+	atomic_set(&task_thread_info(p)->trace_overrun, 0);
 #endif
 }
 
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index 9531fddcfb8d..e97c29a6e7b0 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -853,6 +853,7 @@ static void __trace_function_return(struct trace_array *tr,
 	entry->parent_ip	= trace->ret;
 	entry->rettime		= trace->rettime;
 	entry->calltime		= trace->calltime;
+	entry->overrun		= trace->overrun;
 	ring_buffer_unlock_commit(global_trace.buffer, event, irq_flags);
 }
 #endif
diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h
index 9d22618bf99f..2cb12fd98f6b 100644
--- a/kernel/trace/trace.h
+++ b/kernel/trace/trace.h
@@ -60,6 +60,7 @@ struct ftrace_ret_entry {
 	unsigned long		parent_ip;
 	unsigned long long	calltime;
 	unsigned long long	rettime;
+	unsigned long		overrun;
 };
 extern struct tracer boot_tracer;
 
diff --git a/kernel/trace/trace_functions_return.c b/kernel/trace/trace_functions_return.c
index a68564af022b..e00d64509c9c 100644
--- a/kernel/trace/trace_functions_return.c
+++ b/kernel/trace/trace_functions_return.c
@@ -14,6 +14,19 @@
 #include "trace.h"
 
 
+#define TRACE_RETURN_PRINT_OVERRUN	0x1
+static struct tracer_opt trace_opts[] = {
+	/* Display overruns or not */
+	{ TRACER_OPT(overrun, TRACE_RETURN_PRINT_OVERRUN) },
+	{ } /* Empty entry */
+};
+
+static struct tracer_flags tracer_flags = {
+	.val = 0, /* Don't display overruns by default */
+	.opts = trace_opts
+};
+
+
 static int return_trace_init(struct trace_array *tr)
 {
 	int cpu;
@@ -42,26 +55,39 @@ print_return_function(struct trace_iterator *iter)
 		ret = trace_seq_printf(s, "%pF -> ", (void *)field->parent_ip);
 		if (!ret)
 			return TRACE_TYPE_PARTIAL_LINE;
+
 		ret = seq_print_ip_sym(s, field->ip,
 					trace_flags & TRACE_ITER_SYM_MASK);
 		if (!ret)
 			return TRACE_TYPE_PARTIAL_LINE;
-		ret = trace_seq_printf(s, " (%llu ns)\n",
+
+		ret = trace_seq_printf(s, " (%llu ns)",
 					field->rettime - field->calltime);
 		if (!ret)
 			return TRACE_TYPE_PARTIAL_LINE;
-		else
-			return TRACE_TYPE_HANDLED;
+
+		if (tracer_flags.val & TRACE_RETURN_PRINT_OVERRUN) {
+			ret = trace_seq_printf(s, " (Overruns: %lu)",
+						field->overrun);
+			if (!ret)
+				return TRACE_TYPE_PARTIAL_LINE;
+		}
+
+		ret = trace_seq_printf(s, "\n");
+		if (!ret)
+			return TRACE_TYPE_PARTIAL_LINE;
+
+		return TRACE_TYPE_HANDLED;
 	}
 	return TRACE_TYPE_UNHANDLED;
 }
 
-static struct tracer return_trace __read_mostly =
-{
+static struct tracer return_trace __read_mostly = {
 	.name	     = "return",
 	.init	     = return_trace_init,
 	.reset	     = return_trace_reset,
-	.print_line = print_return_function
+	.print_line = print_return_function,
+	.flags		= &tracer_flags,
 };
 
 static __init int init_return_trace(void)
-- 
cgit v1.2.3


From 226c0c0ef2abdf91b8d9cce1aaf7d4635a5e5926 Mon Sep 17 00:00:00 2001
From: Pablo Neira Ayuso <pablo@netfilter.org>
Date: Tue, 18 Nov 2008 11:54:05 +0100
Subject: netfilter: ctnetlink: helper modules load-on-demand support

This patch adds module loading for helpers via ctnetlink.

* Creation path: We support explicit and implicit helper assignation. For
  the explicit case, we try to load the module. If the module is correctly
  loaded and the helper is present, we return EAGAIN to re-start the
  creation. Otherwise, we return EOPNOTSUPP.
* Update path: release the spin lock, load the module and check. If it is
  present, then return EAGAIN to re-start the update.

This patch provides a refactorized function to lookup-and-set the
connection tracking helper. The function removes the exported symbol
__nf_ct_helper_find as it has not clients anymore.

Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
Signed-off-by: Patrick McHardy <kaber@trash.net>
---
 include/net/netfilter/nf_conntrack_helper.h |  5 +--
 net/netfilter/nf_conntrack_core.c           | 28 +-----------
 net/netfilter/nf_conntrack_helper.c         | 32 ++++++++++++-
 net/netfilter/nf_conntrack_netlink.c        | 70 +++++++++++++++++++++++++----
 4 files changed, 95 insertions(+), 40 deletions(-)

(limited to 'include')

diff --git a/include/net/netfilter/nf_conntrack_helper.h b/include/net/netfilter/nf_conntrack_helper.h
index f8060ab5a083..66d65a7caa39 100644
--- a/include/net/netfilter/nf_conntrack_helper.h
+++ b/include/net/netfilter/nf_conntrack_helper.h
@@ -38,9 +38,6 @@ struct nf_conntrack_helper
 	unsigned int expect_class_max;
 };
 
-extern struct nf_conntrack_helper *
-__nf_ct_helper_find(const struct nf_conntrack_tuple *tuple);
-
 extern struct nf_conntrack_helper *
 __nf_conntrack_helper_find_byname(const char *name);
 
@@ -49,6 +46,8 @@ extern void nf_conntrack_helper_unregister(struct nf_conntrack_helper *);
 
 extern struct nf_conn_help *nf_ct_helper_ext_add(struct nf_conn *ct, gfp_t gfp);
 
+extern int __nf_ct_try_assign_helper(struct nf_conn *ct, gfp_t flags);
+
 static inline struct nf_conn_help *nfct_help(const struct nf_conn *ct)
 {
 	return nf_ct_ext_find(ct, NF_CT_EXT_HELPER);
diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c
index 622d7c671cb7..1e649fb9e0df 100644
--- a/net/netfilter/nf_conntrack_core.c
+++ b/net/netfilter/nf_conntrack_core.c
@@ -588,14 +588,7 @@ init_conntrack(struct net *net,
 		nf_conntrack_get(&ct->master->ct_general);
 		NF_CT_STAT_INC(net, expect_new);
 	} else {
-		struct nf_conntrack_helper *helper;
-
-		helper = __nf_ct_helper_find(&repl_tuple);
-		if (helper) {
-			help = nf_ct_helper_ext_add(ct, GFP_ATOMIC);
-			if (help)
-				rcu_assign_pointer(help->helper, helper);
-		}
+		__nf_ct_try_assign_helper(ct, GFP_ATOMIC);
 		NF_CT_STAT_INC(net, new);
 	}
 
@@ -772,7 +765,6 @@ void nf_conntrack_alter_reply(struct nf_conn *ct,
 			      const struct nf_conntrack_tuple *newreply)
 {
 	struct nf_conn_help *help = nfct_help(ct);
-	struct nf_conntrack_helper *helper;
 
 	/* Should be unconfirmed, so not in hash table yet */
 	NF_CT_ASSERT(!nf_ct_is_confirmed(ct));
@@ -785,23 +777,7 @@ void nf_conntrack_alter_reply(struct nf_conn *ct,
 		return;
 
 	rcu_read_lock();
-	helper = __nf_ct_helper_find(newreply);
-	if (helper == NULL) {
-		if (help)
-			rcu_assign_pointer(help->helper, NULL);
-		goto out;
-	}
-
-	if (help == NULL) {
-		help = nf_ct_helper_ext_add(ct, GFP_ATOMIC);
-		if (help == NULL)
-			goto out;
-	} else {
-		memset(&help->help, 0, sizeof(help->help));
-	}
-
-	rcu_assign_pointer(help->helper, helper);
-out:
+	__nf_ct_try_assign_helper(ct, GFP_ATOMIC);
 	rcu_read_unlock();
 }
 EXPORT_SYMBOL_GPL(nf_conntrack_alter_reply);
diff --git a/net/netfilter/nf_conntrack_helper.c b/net/netfilter/nf_conntrack_helper.c
index 9c06b9f86ad4..9e4b74b95ce8 100644
--- a/net/netfilter/nf_conntrack_helper.c
+++ b/net/netfilter/nf_conntrack_helper.c
@@ -44,7 +44,7 @@ static unsigned int helper_hash(const struct nf_conntrack_tuple *tuple)
 		(__force __u16)tuple->src.u.all) % nf_ct_helper_hsize;
 }
 
-struct nf_conntrack_helper *
+static struct nf_conntrack_helper *
 __nf_ct_helper_find(const struct nf_conntrack_tuple *tuple)
 {
 	struct nf_conntrack_helper *helper;
@@ -62,7 +62,6 @@ __nf_ct_helper_find(const struct nf_conntrack_tuple *tuple)
 	}
 	return NULL;
 }
-EXPORT_SYMBOL_GPL(__nf_ct_helper_find);
 
 struct nf_conntrack_helper *
 __nf_conntrack_helper_find_byname(const char *name)
@@ -94,6 +93,35 @@ struct nf_conn_help *nf_ct_helper_ext_add(struct nf_conn *ct, gfp_t gfp)
 }
 EXPORT_SYMBOL_GPL(nf_ct_helper_ext_add);
 
+int __nf_ct_try_assign_helper(struct nf_conn *ct, gfp_t flags)
+{
+	int ret = 0;
+	struct nf_conntrack_helper *helper;
+	struct nf_conn_help *help = nfct_help(ct);
+
+	helper = __nf_ct_helper_find(&ct->tuplehash[IP_CT_DIR_REPLY].tuple);
+	if (helper == NULL) {
+		if (help)
+			rcu_assign_pointer(help->helper, NULL);
+		goto out;
+	}
+
+	if (help == NULL) {
+		help = nf_ct_helper_ext_add(ct, flags);
+		if (help == NULL) {
+			ret = -ENOMEM;
+			goto out;
+		}
+	} else {
+		memset(&help->help, 0, sizeof(help->help));
+	}
+
+	rcu_assign_pointer(help->helper, helper);
+out:
+	return ret;
+}
+EXPORT_SYMBOL_GPL(__nf_ct_try_assign_helper);
+
 static inline int unhelp(struct nf_conntrack_tuple_hash *i,
 			 const struct nf_conntrack_helper *me)
 {
diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c
index 49a04fa0becc..4f6486cfd337 100644
--- a/net/netfilter/nf_conntrack_netlink.c
+++ b/net/netfilter/nf_conntrack_netlink.c
@@ -917,8 +917,22 @@ ctnetlink_change_helper(struct nf_conn *ct, struct nlattr *cda[])
 	}
 
 	helper = __nf_conntrack_helper_find_byname(helpname);
-	if (helper == NULL)
+	if (helper == NULL) {
+#ifdef CONFIG_MODULES
+		spin_unlock_bh(&nf_conntrack_lock);
+
+		if (request_module("nfct-helper-%s", helpname) < 0) {
+			spin_lock_bh(&nf_conntrack_lock);
+			return -EOPNOTSUPP;
+		}
+
+		spin_lock_bh(&nf_conntrack_lock);
+		helper = __nf_conntrack_helper_find_byname(helpname);
+		if (helper)
+			return -EAGAIN;
+#endif
 		return -EOPNOTSUPP;
+	}
 
 	if (help) {
 		if (help->helper == helper)
@@ -1082,7 +1096,6 @@ ctnetlink_create_conntrack(struct nlattr *cda[],
 {
 	struct nf_conn *ct;
 	int err = -EINVAL;
-	struct nf_conn_help *help;
 	struct nf_conntrack_helper *helper;
 
 	ct = nf_conntrack_alloc(&init_net, otuple, rtuple, GFP_KERNEL);
@@ -1097,16 +1110,55 @@ ctnetlink_create_conntrack(struct nlattr *cda[],
 	ct->status |= IPS_CONFIRMED;
 
 	rcu_read_lock();
-	helper = __nf_ct_helper_find(rtuple);
-	if (helper) {
-		help = nf_ct_helper_ext_add(ct, GFP_ATOMIC);
-		if (help == NULL) {
+ 	if (cda[CTA_HELP]) {
+ 		char *helpname;
+ 
+ 		err = ctnetlink_parse_help(cda[CTA_HELP], &helpname);
+ 		if (err < 0) {
+			rcu_read_unlock();
+			goto err;
+		}
+
+		helper = __nf_conntrack_helper_find_byname(helpname);
+		if (helper == NULL) {
+			rcu_read_unlock();
+#ifdef CONFIG_MODULES
+			if (request_module("nfct-helper-%s", helpname) < 0) {
+				err = -EOPNOTSUPP;
+				goto err;
+			}
+
+			rcu_read_lock();
+			helper = __nf_conntrack_helper_find_byname(helpname);
+			if (helper) {
+				rcu_read_unlock();
+				err = -EAGAIN;
+				goto err;
+			}
+			rcu_read_unlock();
+#endif
+			err = -EOPNOTSUPP;
+			goto err;
+		} else {
+			struct nf_conn_help *help;
+
+			help = nf_ct_helper_ext_add(ct, GFP_ATOMIC);
+			if (help == NULL) {
+				rcu_read_unlock();
+				err = -ENOMEM;
+				goto err;
+			}
+
+			/* not in hash table yet so not strictly necessary */
+			rcu_assign_pointer(help->helper, helper);
+		}
+	} else {
+		/* try an implicit helper assignation */
+		err = __nf_ct_try_assign_helper(ct, GFP_ATOMIC);
+		if (err < 0) {
 			rcu_read_unlock();
-			err = -ENOMEM;
 			goto err;
 		}
-		/* not in hash table yet so not strictly necessary */
-		rcu_assign_pointer(help->helper, helper);
 	}
 
 	if (cda[CTA_STATUS]) {
-- 
cgit v1.2.3


From 19abb7b090a6bce88d4e9b2914a0367f4f684432 Mon Sep 17 00:00:00 2001
From: Pablo Neira Ayuso <pablo@netfilter.org>
Date: Tue, 18 Nov 2008 11:56:20 +0100
Subject: netfilter: ctnetlink: deliver events for conntracks changed from
 userspace

As for now, the creation and update of conntracks via ctnetlink do not
propagate an event to userspace. This can result in inconsistent situations
if several userspace processes modify the connection tracking table by means
of ctnetlink at the same time. Specifically, using the conntrack command
line tool and conntrackd at the same time can trigger unconsistencies.

This patch also modifies the event cache infrastructure to pass the
process PID and the ECHO flag to nfnetlink_send() to report back
to userspace if the process that triggered the change needs so.
Based on a suggestion from Patrick McHardy.

Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
Signed-off-by: Patrick McHardy <kaber@trash.net>
---
 include/net/netfilter/nf_conntrack.h        |  2 +-
 include/net/netfilter/nf_conntrack_ecache.h | 57 +++++++++++++++++--
 include/net/netfilter/nf_conntrack_expect.h |  2 +
 net/netfilter/nf_conntrack_core.c           | 25 ++++++--
 net/netfilter/nf_conntrack_ecache.c         | 14 ++++-
 net/netfilter/nf_conntrack_expect.c         | 43 +++++++++++---
 net/netfilter/nf_conntrack_netlink.c        | 88 ++++++++++++++++++++++++-----
 7 files changed, 197 insertions(+), 34 deletions(-)

(limited to 'include')

diff --git a/include/net/netfilter/nf_conntrack.h b/include/net/netfilter/nf_conntrack.h
index f11255e1ea35..2e0c53641cbe 100644
--- a/include/net/netfilter/nf_conntrack.h
+++ b/include/net/netfilter/nf_conntrack.h
@@ -199,7 +199,7 @@ __nf_conntrack_find(struct net *net, const struct nf_conntrack_tuple *tuple);
 
 extern void nf_conntrack_hash_insert(struct nf_conn *ct);
 
-extern void nf_conntrack_flush(struct net *net);
+extern void nf_conntrack_flush(struct net *net, u32 pid, int report);
 
 extern bool nf_ct_get_tuplepr(const struct sk_buff *skb,
 			      unsigned int nhoff, u_int16_t l3num,
diff --git a/include/net/netfilter/nf_conntrack_ecache.h b/include/net/netfilter/nf_conntrack_ecache.h
index 1285ff26a014..0ff0dc69ca4a 100644
--- a/include/net/netfilter/nf_conntrack_ecache.h
+++ b/include/net/netfilter/nf_conntrack_ecache.h
@@ -17,6 +17,13 @@ struct nf_conntrack_ecache {
 	unsigned int events;
 };
 
+/* This structure is passed to event handler */
+struct nf_ct_event {
+	struct nf_conn *ct;
+	u32 pid;
+	int report;
+};
+
 extern struct atomic_notifier_head nf_conntrack_chain;
 extern int nf_conntrack_register_notifier(struct notifier_block *nb);
 extern int nf_conntrack_unregister_notifier(struct notifier_block *nb);
@@ -39,22 +46,56 @@ nf_conntrack_event_cache(enum ip_conntrack_events event, struct nf_conn *ct)
 	local_bh_enable();
 }
 
-static inline void nf_conntrack_event(enum ip_conntrack_events event,
-				      struct nf_conn *ct)
+static inline void
+nf_conntrack_event_report(enum ip_conntrack_events event,
+			  struct nf_conn *ct,
+			  u32 pid,
+			  int report)
 {
+	struct nf_ct_event item = {
+		.ct 	= ct,
+		.pid	= pid,
+		.report = report
+	};
 	if (nf_ct_is_confirmed(ct) && !nf_ct_is_dying(ct))
-		atomic_notifier_call_chain(&nf_conntrack_chain, event, ct);
+		atomic_notifier_call_chain(&nf_conntrack_chain, event, &item);
 }
 
+static inline void
+nf_conntrack_event(enum ip_conntrack_events event, struct nf_conn *ct)
+{
+	nf_conntrack_event_report(event, ct, 0, 0);
+}
+
+struct nf_exp_event {
+	struct nf_conntrack_expect *exp;
+	u32 pid;
+	int report;
+};
+
 extern struct atomic_notifier_head nf_ct_expect_chain;
 extern int nf_ct_expect_register_notifier(struct notifier_block *nb);
 extern int nf_ct_expect_unregister_notifier(struct notifier_block *nb);
 
+static inline void
+nf_ct_expect_event_report(enum ip_conntrack_expect_events event,
+			  struct nf_conntrack_expect *exp,
+			  u32 pid,
+			  int report)
+{
+	struct nf_exp_event item = {
+		.exp	= exp,
+		.pid	= pid,
+		.report = report
+	};
+	atomic_notifier_call_chain(&nf_ct_expect_chain, event, &item);
+}
+
 static inline void
 nf_ct_expect_event(enum ip_conntrack_expect_events event,
 		   struct nf_conntrack_expect *exp)
 {
-	atomic_notifier_call_chain(&nf_ct_expect_chain, event, exp);
+	nf_ct_expect_event_report(event, exp, 0, 0);
 }
 
 extern int nf_conntrack_ecache_init(struct net *net);
@@ -66,9 +107,17 @@ static inline void nf_conntrack_event_cache(enum ip_conntrack_events event,
 					    struct nf_conn *ct) {}
 static inline void nf_conntrack_event(enum ip_conntrack_events event,
 				      struct nf_conn *ct) {}
+static inline void nf_conntrack_event_report(enum ip_conntrack_events event,
+					     struct nf_conn *ct,
+					     u32 pid,
+					     int report) {}
 static inline void nf_ct_deliver_cached_events(const struct nf_conn *ct) {}
 static inline void nf_ct_expect_event(enum ip_conntrack_expect_events event,
 				      struct nf_conntrack_expect *exp) {}
+static inline void nf_ct_expect_event_report(enum ip_conntrack_expect_events e,
+					     struct nf_conntrack_expect *exp,
+ 					     u32 pid,
+ 					     int report) {}
 static inline void nf_ct_event_cache_flush(struct net *net) {}
 
 static inline int nf_conntrack_ecache_init(struct net *net)
diff --git a/include/net/netfilter/nf_conntrack_expect.h b/include/net/netfilter/nf_conntrack_expect.h
index 37a7fc1164b0..ab17a159ac66 100644
--- a/include/net/netfilter/nf_conntrack_expect.h
+++ b/include/net/netfilter/nf_conntrack_expect.h
@@ -100,6 +100,8 @@ void nf_ct_expect_init(struct nf_conntrack_expect *, unsigned int, u_int8_t,
 		       u_int8_t, const __be16 *, const __be16 *);
 void nf_ct_expect_put(struct nf_conntrack_expect *exp);
 int nf_ct_expect_related(struct nf_conntrack_expect *expect);
+int nf_ct_expect_related_report(struct nf_conntrack_expect *expect, 
+				u32 pid, int report);
 
 #endif /*_NF_CONNTRACK_EXPECT_H*/
 
diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c
index 1e649fb9e0df..dc3fea09f3fc 100644
--- a/net/netfilter/nf_conntrack_core.c
+++ b/net/netfilter/nf_conntrack_core.c
@@ -181,7 +181,8 @@ destroy_conntrack(struct nf_conntrack *nfct)
 	NF_CT_ASSERT(atomic_read(&nfct->use) == 0);
 	NF_CT_ASSERT(!timer_pending(&ct->timeout));
 
-	nf_conntrack_event(IPCT_DESTROY, ct);
+	if (!test_bit(IPS_DYING_BIT, &ct->status))
+		nf_conntrack_event(IPCT_DESTROY, ct);
 	set_bit(IPS_DYING_BIT, &ct->status);
 
 	/* To make sure we don't get any weird locking issues here:
@@ -972,8 +973,20 @@ void nf_ct_iterate_cleanup(struct net *net,
 }
 EXPORT_SYMBOL_GPL(nf_ct_iterate_cleanup);
 
+struct __nf_ct_flush_report {
+	u32 pid;
+	int report;
+};
+
 static int kill_all(struct nf_conn *i, void *data)
 {
+	struct __nf_ct_flush_report *fr = (struct __nf_ct_flush_report *)data;
+
+	/* get_next_corpse sets the dying bit for us */
+	nf_conntrack_event_report(IPCT_DESTROY,
+				  i,
+				  fr->pid,
+				  fr->report);
 	return 1;
 }
 
@@ -987,9 +1000,13 @@ void nf_ct_free_hashtable(struct hlist_head *hash, int vmalloced, unsigned int s
 }
 EXPORT_SYMBOL_GPL(nf_ct_free_hashtable);
 
-void nf_conntrack_flush(struct net *net)
+void nf_conntrack_flush(struct net *net, u32 pid, int report)
 {
-	nf_ct_iterate_cleanup(net, kill_all, NULL);
+	struct __nf_ct_flush_report fr = {
+		.pid 	= pid,
+		.report = report,
+	};
+	nf_ct_iterate_cleanup(net, kill_all, &fr);
 }
 EXPORT_SYMBOL_GPL(nf_conntrack_flush);
 
@@ -1005,7 +1022,7 @@ static void nf_conntrack_cleanup_net(struct net *net)
 	nf_ct_event_cache_flush(net);
 	nf_conntrack_ecache_fini(net);
  i_see_dead_people:
-	nf_conntrack_flush(net);
+	nf_conntrack_flush(net, 0, 0);
 	if (atomic_read(&net->ct.count) != 0) {
 		schedule();
 		goto i_see_dead_people;
diff --git a/net/netfilter/nf_conntrack_ecache.c b/net/netfilter/nf_conntrack_ecache.c
index a5f5e2e65d13..dee4190209cc 100644
--- a/net/netfilter/nf_conntrack_ecache.c
+++ b/net/netfilter/nf_conntrack_ecache.c
@@ -35,9 +35,17 @@ static inline void
 __nf_ct_deliver_cached_events(struct nf_conntrack_ecache *ecache)
 {
 	if (nf_ct_is_confirmed(ecache->ct) && !nf_ct_is_dying(ecache->ct)
-	    && ecache->events)
-		atomic_notifier_call_chain(&nf_conntrack_chain, ecache->events,
-				    ecache->ct);
+	    && ecache->events) {
+		struct nf_ct_event item = {
+			.ct 	= ecache->ct,
+			.pid	= 0,
+			.report	= 0
+		};
+
+		atomic_notifier_call_chain(&nf_conntrack_chain,
+					   ecache->events,
+					   &item);
+	}
 
 	ecache->events = 0;
 	nf_ct_put(ecache->ct);
diff --git a/net/netfilter/nf_conntrack_expect.c b/net/netfilter/nf_conntrack_expect.c
index 37a703bc3b8e..3a8a34a6d37c 100644
--- a/net/netfilter/nf_conntrack_expect.c
+++ b/net/netfilter/nf_conntrack_expect.c
@@ -362,7 +362,7 @@ static inline int refresh_timer(struct nf_conntrack_expect *i)
 	return 1;
 }
 
-int nf_ct_expect_related(struct nf_conntrack_expect *expect)
+static inline int __nf_ct_expect_check(struct nf_conntrack_expect *expect)
 {
 	const struct nf_conntrack_expect_policy *p;
 	struct nf_conntrack_expect *i;
@@ -371,11 +371,8 @@ int nf_ct_expect_related(struct nf_conntrack_expect *expect)
 	struct net *net = nf_ct_exp_net(expect);
 	struct hlist_node *n;
 	unsigned int h;
-	int ret;
-
-	NF_CT_ASSERT(master_help);
+	int ret = 0;
 
-	spin_lock_bh(&nf_conntrack_lock);
 	if (!master_help->helper) {
 		ret = -ESHUTDOWN;
 		goto out;
@@ -409,18 +406,50 @@ int nf_ct_expect_related(struct nf_conntrack_expect *expect)
 			printk(KERN_WARNING
 			       "nf_conntrack: expectation table full\n");
 		ret = -EMFILE;
-		goto out;
 	}
+out:
+	return ret;
+}
+
+int nf_ct_expect_related(struct nf_conntrack_expect *expect)
+{
+	int ret;
+
+	spin_lock_bh(&nf_conntrack_lock);
+	ret = __nf_ct_expect_check(expect);
+	if (ret < 0)
+		goto out;
 
 	nf_ct_expect_insert(expect);
+	atomic_inc(&expect->use);
+	spin_unlock_bh(&nf_conntrack_lock);
 	nf_ct_expect_event(IPEXP_NEW, expect);
-	ret = 0;
+	nf_ct_expect_put(expect);
+	return ret;
 out:
 	spin_unlock_bh(&nf_conntrack_lock);
 	return ret;
 }
 EXPORT_SYMBOL_GPL(nf_ct_expect_related);
 
+int nf_ct_expect_related_report(struct nf_conntrack_expect *expect, 
+				u32 pid, int report)
+{
+	int ret;
+
+	spin_lock_bh(&nf_conntrack_lock);
+	ret = __nf_ct_expect_check(expect);
+	if (ret < 0)
+		goto out;
+	nf_ct_expect_insert(expect);
+out:
+	spin_unlock_bh(&nf_conntrack_lock);
+	if (ret == 0)
+		nf_ct_expect_event_report(IPEXP_NEW, expect, pid, report);
+	return ret;
+}
+EXPORT_SYMBOL_GPL(nf_ct_expect_related_report);
+
 #ifdef CONFIG_PROC_FS
 struct ct_expect_iter_state {
 	struct seq_net_private p;
diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c
index 4f6486cfd337..ccc5ef1d7573 100644
--- a/net/netfilter/nf_conntrack_netlink.c
+++ b/net/netfilter/nf_conntrack_netlink.c
@@ -410,7 +410,8 @@ static int ctnetlink_conntrack_event(struct notifier_block *this,
 	struct nlmsghdr *nlh;
 	struct nfgenmsg *nfmsg;
 	struct nlattr *nest_parms;
-	struct nf_conn *ct = (struct nf_conn *)ptr;
+	struct nf_ct_event *item = (struct nf_ct_event *)ptr;
+	struct nf_conn *ct = item->ct;
 	struct sk_buff *skb;
 	unsigned int type;
 	sk_buff_data_t b;
@@ -443,7 +444,7 @@ static int ctnetlink_conntrack_event(struct notifier_block *this,
 	b = skb->tail;
 
 	type |= NFNL_SUBSYS_CTNETLINK << 8;
-	nlh   = NLMSG_PUT(skb, 0, 0, type, sizeof(struct nfgenmsg));
+	nlh   = NLMSG_PUT(skb, item->pid, 0, type, sizeof(struct nfgenmsg));
 	nfmsg = NLMSG_DATA(nlh);
 
 	nlh->nlmsg_flags    = flags;
@@ -511,7 +512,7 @@ static int ctnetlink_conntrack_event(struct notifier_block *this,
 	rcu_read_unlock();
 
 	nlh->nlmsg_len = skb->tail - b;
-	nfnetlink_send(skb, 0, group, 0);
+	nfnetlink_send(skb, item->pid, group, item->report);
 	return NOTIFY_DONE;
 
 nla_put_failure:
@@ -722,7 +723,9 @@ ctnetlink_del_conntrack(struct sock *ctnl, struct sk_buff *skb,
 		err = ctnetlink_parse_tuple(cda, &tuple, CTA_TUPLE_REPLY, u3);
 	else {
 		/* Flush the whole table */
-		nf_conntrack_flush(&init_net);
+		nf_conntrack_flush(&init_net, 
+				   NETLINK_CB(skb).pid, 
+				   nlmsg_report(nlh));
 		return 0;
 	}
 
@@ -743,6 +746,14 @@ ctnetlink_del_conntrack(struct sock *ctnl, struct sk_buff *skb,
 		}
 	}
 
+	nf_conntrack_event_report(IPCT_DESTROY,
+				  ct,
+				  NETLINK_CB(skb).pid,
+				  nlmsg_report(nlh));
+
+	/* death_by_timeout would report the event again */
+	set_bit(IPS_DYING_BIT, &ct->status);
+
 	nf_ct_kill(ct);
 	nf_ct_put(ct);
 
@@ -1088,11 +1099,35 @@ ctnetlink_change_conntrack(struct nf_conn *ct, struct nlattr *cda[])
 	return 0;
 }
 
+static inline void
+ctnetlink_event_report(struct nf_conn *ct, u32 pid, int report)
+{
+	unsigned int events = 0;
+
+	if (test_bit(IPS_EXPECTED_BIT, &ct->status))
+		events |= IPCT_RELATED;
+	else
+		events |= IPCT_NEW;
+
+	nf_conntrack_event_report(IPCT_STATUS |
+				  IPCT_HELPER |
+				  IPCT_REFRESH |
+				  IPCT_PROTOINFO |
+				  IPCT_NATSEQADJ |
+				  IPCT_MARK |
+				  events,
+				  ct,
+				  pid,
+				  report);
+}
+
 static int
 ctnetlink_create_conntrack(struct nlattr *cda[],
 			   struct nf_conntrack_tuple *otuple,
 			   struct nf_conntrack_tuple *rtuple,
-			   struct nf_conn *master_ct)
+			   struct nf_conn *master_ct,
+			   u32 pid,
+			   int report)
 {
 	struct nf_conn *ct;
 	int err = -EINVAL;
@@ -1198,9 +1233,12 @@ ctnetlink_create_conntrack(struct nlattr *cda[],
 		ct->master = master_ct;
 	}
 
+	nf_conntrack_get(&ct->ct_general);
 	add_timer(&ct->timeout);
 	nf_conntrack_hash_insert(ct);
 	rcu_read_unlock();
+	ctnetlink_event_report(ct, pid, report);
+	nf_ct_put(ct);
 
 	return 0;
 
@@ -1265,7 +1303,9 @@ ctnetlink_new_conntrack(struct sock *ctnl, struct sk_buff *skb,
 			err = ctnetlink_create_conntrack(cda,
 							 &otuple,
 							 &rtuple,
-							 master_ct);
+							 master_ct,
+							 NETLINK_CB(skb).pid,
+							 nlmsg_report(nlh));
 		if (err < 0 && master_ct)
 			nf_ct_put(master_ct);
 
@@ -1277,6 +1317,8 @@ ctnetlink_new_conntrack(struct sock *ctnl, struct sk_buff *skb,
 	 * so there's no need to increase the refcount */
 	err = -EEXIST;
 	if (!(nlh->nlmsg_flags & NLM_F_EXCL)) {
+		struct nf_conn *ct = nf_ct_tuplehash_to_ctrack(h);
+
 		/* we only allow nat config for new conntracks */
 		if (cda[CTA_NAT_SRC] || cda[CTA_NAT_DST]) {
 			err = -EOPNOTSUPP;
@@ -1287,8 +1329,19 @@ ctnetlink_new_conntrack(struct sock *ctnl, struct sk_buff *skb,
 			err = -EOPNOTSUPP;
 			goto out_unlock;
 		}
-		err = ctnetlink_change_conntrack(nf_ct_tuplehash_to_ctrack(h),
-						 cda);
+
+		err = ctnetlink_change_conntrack(ct, cda);
+		if (err == 0) {
+			nf_conntrack_get(&ct->ct_general);
+			spin_unlock_bh(&nf_conntrack_lock);
+			ctnetlink_event_report(ct,
+					       NETLINK_CB(skb).pid,
+					       nlmsg_report(nlh));
+			nf_ct_put(ct);
+		} else
+			spin_unlock_bh(&nf_conntrack_lock);
+
+		return err;
 	}
 
 out_unlock:
@@ -1423,7 +1476,8 @@ static int ctnetlink_expect_event(struct notifier_block *this,
 {
 	struct nlmsghdr *nlh;
 	struct nfgenmsg *nfmsg;
-	struct nf_conntrack_expect *exp = (struct nf_conntrack_expect *)ptr;
+	struct nf_exp_event *item = (struct nf_exp_event *)ptr;
+	struct nf_conntrack_expect *exp = item->exp;
 	struct sk_buff *skb;
 	unsigned int type;
 	sk_buff_data_t b;
@@ -1445,7 +1499,7 @@ static int ctnetlink_expect_event(struct notifier_block *this,
 	b = skb->tail;
 
 	type |= NFNL_SUBSYS_CTNETLINK_EXP << 8;
-	nlh   = NLMSG_PUT(skb, 0, 0, type, sizeof(struct nfgenmsg));
+	nlh   = NLMSG_PUT(skb, item->pid, 0, type, sizeof(struct nfgenmsg));
 	nfmsg = NLMSG_DATA(nlh);
 
 	nlh->nlmsg_flags    = flags;
@@ -1459,7 +1513,7 @@ static int ctnetlink_expect_event(struct notifier_block *this,
 	rcu_read_unlock();
 
 	nlh->nlmsg_len = skb->tail - b;
-	nfnetlink_send(skb, 0, NFNLGRP_CONNTRACK_EXP_NEW, 0);
+	nfnetlink_send(skb, item->pid, NFNLGRP_CONNTRACK_EXP_NEW, item->report);
 	return NOTIFY_DONE;
 
 nla_put_failure:
@@ -1673,7 +1727,7 @@ ctnetlink_change_expect(struct nf_conntrack_expect *x, struct nlattr *cda[])
 }
 
 static int
-ctnetlink_create_expect(struct nlattr *cda[], u_int8_t u3)
+ctnetlink_create_expect(struct nlattr *cda[], u_int8_t u3, u32 pid, int report)
 {
 	struct nf_conntrack_tuple tuple, mask, master_tuple;
 	struct nf_conntrack_tuple_hash *h = NULL;
@@ -1720,7 +1774,7 @@ ctnetlink_create_expect(struct nlattr *cda[], u_int8_t u3)
 	memcpy(&exp->mask.src.u3, &mask.src.u3, sizeof(exp->mask.src.u3));
 	exp->mask.src.u.all = mask.src.u.all;
 
-	err = nf_ct_expect_related(exp);
+	err = nf_ct_expect_related_report(exp, pid, report);
 	nf_ct_expect_put(exp);
 
 out:
@@ -1753,8 +1807,12 @@ ctnetlink_new_expect(struct sock *ctnl, struct sk_buff *skb,
 	if (!exp) {
 		spin_unlock_bh(&nf_conntrack_lock);
 		err = -ENOENT;
-		if (nlh->nlmsg_flags & NLM_F_CREATE)
-			err = ctnetlink_create_expect(cda, u3);
+		if (nlh->nlmsg_flags & NLM_F_CREATE) {
+			err = ctnetlink_create_expect(cda,
+						      u3,
+						      NETLINK_CB(skb).pid,
+						      nlmsg_report(nlh));
+		}
 		return err;
 	}
 
-- 
cgit v1.2.3


From d9e150071d18b5c87ba7a097af4063a5ad0c6a0c Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Tue, 18 Nov 2008 12:16:52 +0100
Subject: netfilter: nfnetlink_log: fix warning and prototype mismatch

net/netfilter/nfnetlink_log.c:537:1: warning: symbol 'nfulnl_log_packet' was not declared. Should it be static?

Including the proper header also revealed an incorrect prototype.

Signed-off-by: Patrick McHardy <kaber@trash.net>
---
 include/net/netfilter/nfnetlink_log.h | 2 +-
 net/netfilter/nfnetlink_log.c         | 1 +
 2 files changed, 2 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/net/netfilter/nfnetlink_log.h b/include/net/netfilter/nfnetlink_log.h
index 9b67f948a8d7..b0569ff0775e 100644
--- a/include/net/netfilter/nfnetlink_log.h
+++ b/include/net/netfilter/nfnetlink_log.h
@@ -2,7 +2,7 @@
 #define _KER_NFNETLINK_LOG_H
 
 void
-nfulnl_log_packet(unsigned int pf,
+nfulnl_log_packet(u_int8_t pf,
 		  unsigned int hooknum,
 		  const struct sk_buff *skb,
 		  const struct net_device *in,
diff --git a/net/netfilter/nfnetlink_log.c b/net/netfilter/nfnetlink_log.c
index a51892b3f01a..2770b4e57ea0 100644
--- a/net/netfilter/nfnetlink_log.c
+++ b/net/netfilter/nfnetlink_log.c
@@ -30,6 +30,7 @@
 #include <linux/random.h>
 #include <net/sock.h>
 #include <net/netfilter/nf_log.h>
+#include <net/netfilter/nfnetlink_log.h>
 
 #include <asm/atomic.h>
 
-- 
cgit v1.2.3


From ca3ea02e90d63a6a91c1c2a445d6d71f9031a44a Mon Sep 17 00:00:00 2001
From: Mark Brown <broonie@opensource.wolfsonmicro.com>
Date: Tue, 18 Nov 2008 20:40:36 +0000
Subject: ASoC: Remove unused snd_soc_machine_config declaration

Signed-off-by: Mark Brown <broonie@opensource.wolfsonmicro.com>
---
 include/sound/soc.h | 1 -
 1 file changed, 1 deletion(-)

(limited to 'include')

diff --git a/include/sound/soc.h b/include/sound/soc.h
index fa1b99b45893..077dfe4e51f0 100644
--- a/include/sound/soc.h
+++ b/include/sound/soc.h
@@ -228,7 +228,6 @@ struct snd_soc_dai_mode;
 struct snd_soc_pcm_runtime;
 struct snd_soc_dai;
 struct snd_soc_codec;
-struct snd_soc_machine_config;
 struct soc_enum;
 struct snd_soc_ac97_ops;
 struct snd_soc_clock_info;
-- 
cgit v1.2.3


From 1e291b14c8f1101b9093434489bd4dc0e03f3d0f Mon Sep 17 00:00:00 2001
From: Michael Ellerman <michael@ellerman.id.au>
Date: Wed, 12 Nov 2008 18:54:42 +0000
Subject: of: Add helpers for finding device nodes which have a given property

This commit adds a routine for finding a device node which has a
certain property.  The contents of the property are not taken into
account, merely the presence or absence of the property.

Based on that routine, we add a for_each_ macro for iterating over all
nodes that have a certain property.

Signed-off-by: Michael Ellerman <michael@ellerman.id.au>
Signed-off-by: Paul Mackerras <paulus@samba.org>
---
 drivers/of/base.c  | 35 +++++++++++++++++++++++++++++++++++
 include/linux/of.h |  6 ++++++
 2 files changed, 41 insertions(+)

(limited to 'include')

diff --git a/drivers/of/base.c b/drivers/of/base.c
index 7c79e94a35ea..4f884a358a7b 100644
--- a/drivers/of/base.c
+++ b/drivers/of/base.c
@@ -328,6 +328,41 @@ struct device_node *of_find_compatible_node(struct device_node *from,
 }
 EXPORT_SYMBOL(of_find_compatible_node);
 
+/**
+ *	of_find_node_with_property - Find a node which has a property with
+ *                                   the given name.
+ *	@from:		The node to start searching from or NULL, the node
+ *			you pass will not be searched, only the next one
+ *			will; typically, you pass what the previous call
+ *			returned. of_node_put() will be called on it
+ *	@prop_name:	The name of the property to look for.
+ *
+ *	Returns a node pointer with refcount incremented, use
+ *	of_node_put() on it when done.
+ */
+struct device_node *of_find_node_with_property(struct device_node *from,
+	const char *prop_name)
+{
+	struct device_node *np;
+	struct property *pp;
+
+	read_lock(&devtree_lock);
+	np = from ? from->allnext : allnodes;
+	for (; np; np = np->allnext) {
+		for (pp = np->properties; pp != 0; pp = pp->next) {
+			if (of_prop_cmp(pp->name, prop_name) == 0) {
+				of_node_get(np);
+				goto out;
+			}
+		}
+	}
+out:
+	of_node_put(from);
+	read_unlock(&devtree_lock);
+	return np;
+}
+EXPORT_SYMBOL(of_find_node_with_property);
+
 /**
  * of_match_node - Tell if an device_node has a matching of_match structure
  *	@matches:	array of of device match structures to search in
diff --git a/include/linux/of.h b/include/linux/of.h
index e2488f5e7cb2..6a7efa242f5e 100644
--- a/include/linux/of.h
+++ b/include/linux/of.h
@@ -57,6 +57,12 @@ extern struct device_node *of_get_next_child(const struct device_node *node,
 	for (child = of_get_next_child(parent, NULL); child != NULL; \
 	     child = of_get_next_child(parent, child))
 
+extern struct device_node *of_find_node_with_property(
+	struct device_node *from, const char *prop_name);
+#define for_each_node_with_property(dn, prop_name) \
+	for (dn = of_find_node_with_property(NULL, prop_name); dn; \
+	     dn = of_find_node_with_property(dn, prop_name))
+
 extern struct property *of_find_property(const struct device_node *np,
 					 const char *name,
 					 int *lenp);
-- 
cgit v1.2.3


From 07f0757a6808f2f36a0e58c3a54867ccffdb8dc9 Mon Sep 17 00:00:00 2001
From: Joe Perches <joe@perches.com>
Date: Wed, 19 Nov 2008 15:44:53 -0800
Subject: include/net net/ - csum_partial - remove unnecessary casts

The first argument to csum_partial is const void *
casts to char/u8 * are not necessary

Signed-off-by: Joe Perches <joe@perches.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/checksum.h | 2 +-
 include/net/ip_vs.h    | 6 +++---
 net/core/netpoll.c     | 2 +-
 net/ipv4/inet_lro.c    | 4 ++--
 net/ipv4/tcp_ipv4.c    | 4 ++--
 net/ipv6/icmp.c        | 4 ++--
 net/ipv6/mcast.c       | 2 +-
 net/ipv6/ndisc.c       | 4 ++--
 net/ipv6/tcp_ipv6.c    | 6 +++---
 net/unix/af_unix.c     | 4 ++--
 10 files changed, 19 insertions(+), 19 deletions(-)

(limited to 'include')

diff --git a/include/net/checksum.h b/include/net/checksum.h
index 07602b7fa218..ba55d8b8c87c 100644
--- a/include/net/checksum.h
+++ b/include/net/checksum.h
@@ -98,7 +98,7 @@ static inline void csum_replace4(__sum16 *sum, __be32 from, __be32 to)
 {
 	__be32 diff[] = { ~from, to };
 
-	*sum = csum_fold(csum_partial((char *)diff, sizeof(diff), ~csum_unfold(*sum)));
+	*sum = csum_fold(csum_partial(diff, sizeof(diff), ~csum_unfold(*sum)));
 }
 
 static inline void csum_replace2(__sum16 *sum, __be16 from, __be16 to)
diff --git a/include/net/ip_vs.h b/include/net/ip_vs.h
index 8f6abf4883e3..ab9b003ab671 100644
--- a/include/net/ip_vs.h
+++ b/include/net/ip_vs.h
@@ -913,7 +913,7 @@ static inline __wsum ip_vs_check_diff4(__be32 old, __be32 new, __wsum oldsum)
 {
 	__be32 diff[2] = { ~old, new };
 
-	return csum_partial((char *) diff, sizeof(diff), oldsum);
+	return csum_partial(diff, sizeof(diff), oldsum);
 }
 
 #ifdef CONFIG_IP_VS_IPV6
@@ -923,7 +923,7 @@ static inline __wsum ip_vs_check_diff16(const __be32 *old, const __be32 *new,
 	__be32 diff[8] = { ~old[3], ~old[2], ~old[1], ~old[0],
 			    new[3],  new[2],  new[1],  new[0] };
 
-	return csum_partial((char *) diff, sizeof(diff), oldsum);
+	return csum_partial(diff, sizeof(diff), oldsum);
 }
 #endif
 
@@ -931,7 +931,7 @@ static inline __wsum ip_vs_check_diff2(__be16 old, __be16 new, __wsum oldsum)
 {
 	__be16 diff[2] = { ~old, new };
 
-	return csum_partial((char *) diff, sizeof(diff), oldsum);
+	return csum_partial(diff, sizeof(diff), oldsum);
 }
 
 #endif /* __KERNEL__ */
diff --git a/net/core/netpoll.c b/net/core/netpoll.c
index 34f5d072f168..fc4e28e23b89 100644
--- a/net/core/netpoll.c
+++ b/net/core/netpoll.c
@@ -343,7 +343,7 @@ void netpoll_send_udp(struct netpoll *np, const char *msg, int len)
 	udph->check = csum_tcpudp_magic(htonl(np->local_ip),
 					htonl(np->remote_ip),
 					udp_len, IPPROTO_UDP,
-					csum_partial((unsigned char *)udph, udp_len, 0));
+					csum_partial(udph, udp_len, 0));
 	if (udph->check == 0)
 		udph->check = CSUM_MANGLED_0;
 
diff --git a/net/ipv4/inet_lro.c b/net/ipv4/inet_lro.c
index cfd034a2b96e..6a667dae315e 100644
--- a/net/ipv4/inet_lro.c
+++ b/net/ipv4/inet_lro.c
@@ -120,7 +120,7 @@ static void lro_update_tcp_ip_header(struct net_lro_desc *lro_desc)
 	iph->check = ip_fast_csum((u8 *)lro_desc->iph, iph->ihl);
 
 	tcph->check = 0;
-	tcp_hdr_csum = csum_partial((u8 *)tcph, TCP_HDR_LEN(tcph), 0);
+	tcp_hdr_csum = csum_partial(tcph, TCP_HDR_LEN(tcph), 0);
 	lro_desc->data_csum = csum_add(lro_desc->data_csum, tcp_hdr_csum);
 	tcph->check = csum_tcpudp_magic(iph->saddr, iph->daddr,
 					lro_desc->ip_tot_len -
@@ -135,7 +135,7 @@ static __wsum lro_tcp_data_csum(struct iphdr *iph, struct tcphdr *tcph, int len)
 	__wsum tcp_ps_hdr_csum;
 
 	tcp_csum = ~csum_unfold(tcph->check);
-	tcp_hdr_csum = csum_partial((u8 *)tcph, TCP_HDR_LEN(tcph), tcp_csum);
+	tcp_hdr_csum = csum_partial(tcph, TCP_HDR_LEN(tcph), tcp_csum);
 
 	tcp_ps_hdr_csum = csum_tcpudp_nofold(iph->saddr, iph->daddr,
 					     len + TCP_HDR_LEN(tcph),
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index b2e3ab2287ba..5559fea61e87 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -492,7 +492,7 @@ void tcp_v4_send_check(struct sock *sk, int len, struct sk_buff *skb)
 		skb->csum_offset = offsetof(struct tcphdr, check);
 	} else {
 		th->check = tcp_v4_check(len, inet->saddr, inet->daddr,
-					 csum_partial((char *)th,
+					 csum_partial(th,
 						      th->doff << 2,
 						      skb->csum));
 	}
@@ -726,7 +726,7 @@ static int __tcp_v4_send_synack(struct sock *sk, struct request_sock *req,
 		th->check = tcp_v4_check(skb->len,
 					 ireq->loc_addr,
 					 ireq->rmt_addr,
-					 csum_partial((char *)th, skb->len,
+					 csum_partial(th, skb->len,
 						      skb->csum));
 
 		err = ip_build_and_send_pkt(skb, sk, ireq->loc_addr,
diff --git a/net/ipv6/icmp.c b/net/ipv6/icmp.c
index be351009fd03..a77b8d103804 100644
--- a/net/ipv6/icmp.c
+++ b/net/ipv6/icmp.c
@@ -233,7 +233,7 @@ static int icmpv6_push_pending_frames(struct sock *sk, struct flowi *fl, struct
 	icmp6h->icmp6_cksum = 0;
 
 	if (skb_queue_len(&sk->sk_write_queue) == 1) {
-		skb->csum = csum_partial((char *)icmp6h,
+		skb->csum = csum_partial(icmp6h,
 					sizeof(struct icmp6hdr), skb->csum);
 		icmp6h->icmp6_cksum = csum_ipv6_magic(&fl->fl6_src,
 						      &fl->fl6_dst,
@@ -246,7 +246,7 @@ static int icmpv6_push_pending_frames(struct sock *sk, struct flowi *fl, struct
 			tmp_csum = csum_add(tmp_csum, skb->csum);
 		}
 
-		tmp_csum = csum_partial((char *)icmp6h,
+		tmp_csum = csum_partial(icmp6h,
 					sizeof(struct icmp6hdr), tmp_csum);
 		icmp6h->icmp6_cksum = csum_ipv6_magic(&fl->fl6_src,
 						      &fl->fl6_dst,
diff --git a/net/ipv6/mcast.c b/net/ipv6/mcast.c
index a76199ecad23..870a1d64605a 100644
--- a/net/ipv6/mcast.c
+++ b/net/ipv6/mcast.c
@@ -1817,7 +1817,7 @@ static void igmp6_send(struct in6_addr *addr, struct net_device *dev, int type)
 
 	hdr->icmp6_cksum = csum_ipv6_magic(saddr, snd_addr, len,
 					   IPPROTO_ICMPV6,
-					   csum_partial((__u8 *) hdr, len, 0));
+					   csum_partial(hdr, len, 0));
 
 	idev = in6_dev_get(skb->dev);
 
diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c
index fbf451c0d77a..af6705f03b5c 100644
--- a/net/ipv6/ndisc.c
+++ b/net/ipv6/ndisc.c
@@ -491,7 +491,7 @@ struct sk_buff *ndisc_build_skb(struct net_device *dev,
 
 	hdr->icmp6_cksum = csum_ipv6_magic(saddr, daddr, len,
 					   IPPROTO_ICMPV6,
-					   csum_partial((__u8 *) hdr,
+					   csum_partial(hdr,
 							len, 0));
 
 	return skb;
@@ -1612,7 +1612,7 @@ void ndisc_send_redirect(struct sk_buff *skb, struct neighbour *neigh,
 
 	icmph->icmp6_cksum = csum_ipv6_magic(&saddr_buf, &ipv6_hdr(skb)->saddr,
 					     len, IPPROTO_ICMPV6,
-					     csum_partial((u8 *) icmph, len, 0));
+					     csum_partial(icmph, len, 0));
 
 	buff->dst = dst;
 	idev = in6_dev_get(dst->dev);
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index b35787056313..a5d750acd793 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -501,7 +501,7 @@ static int tcp_v6_send_synack(struct sock *sk, struct request_sock *req)
 
 		th->check = tcp_v6_check(th, skb->len,
 					 &treq->loc_addr, &treq->rmt_addr,
-					 csum_partial((char *)th, skb->len, skb->csum));
+					 csum_partial(th, skb->len, skb->csum));
 
 		ipv6_addr_copy(&fl.fl6_dst, &treq->rmt_addr);
 		err = ip6_xmit(sk, skb, &fl, opt, 0);
@@ -915,7 +915,7 @@ static void tcp_v6_send_check(struct sock *sk, int len, struct sk_buff *skb)
 		skb->csum_offset = offsetof(struct tcphdr, check);
 	} else {
 		th->check = csum_ipv6_magic(&np->saddr, &np->daddr, len, IPPROTO_TCP,
-					    csum_partial((char *)th, th->doff<<2,
+					    csum_partial(th, th->doff<<2,
 							 skb->csum));
 	}
 }
@@ -997,7 +997,7 @@ static void tcp_v6_send_response(struct sk_buff *skb, u32 seq, u32 ack, u32 win,
 	}
 #endif
 
-	buff->csum = csum_partial((char *)t1, tot_len, 0);
+	buff->csum = csum_partial(t1, tot_len, 0);
 
 	memset(&fl, 0, sizeof(fl));
 	ipv6_addr_copy(&fl.fl6_dst, &ipv6_hdr(skb)->saddr);
diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c
index f2cf3f583f62..ebc4a9a4b1f7 100644
--- a/net/unix/af_unix.c
+++ b/net/unix/af_unix.c
@@ -216,7 +216,7 @@ static int unix_mkname(struct sockaddr_un *sunaddr, int len, unsigned *hashp)
 		return len;
 	}
 
-	*hashp = unix_hash_fold(csum_partial((char *)sunaddr, len, 0));
+	*hashp = unix_hash_fold(csum_partial(sunaddr, len, 0));
 	return len;
 }
 
@@ -686,7 +686,7 @@ static int unix_autobind(struct socket *sock)
 
 retry:
 	addr->len = sprintf(addr->name->sun_path+1, "%05x", ordernum) + 1 + sizeof(short);
-	addr->hash = unix_hash_fold(csum_partial((void *)addr->name, addr->len, 0));
+	addr->hash = unix_hash_fold(csum_partial(addr->name, addr->len, 0));
 
 	spin_lock(&unix_table_lock);
 	ordernum = (ordernum+1)&0xFFFFF;
-- 
cgit v1.2.3


From d314774cf2cd5dfeb39a00d37deee65d4c627927 Mon Sep 17 00:00:00 2001
From: Stephen Hemminger <shemminger@vyatta.com>
Date: Wed, 19 Nov 2008 21:32:24 -0800
Subject: netdev: network device operations infrastructure

This patch changes the network device internal API to move adminstrative
operations out of the network device structure and into a separate structure.

This patch involves some hackery to maintain compatablity between the
new and old model, so all 300+ drivers don't have to be changed at once.
For drivers that aren't converted yet, the netdevice_ops virt function list
still resides in the net_device structure. For old protocols, the new
net_device_ops are copied out to the old net_device pointers.

After the transistion is completed the nag message can be changed to
an WARN_ON, and the compatiablity code can be made configurable.

Some function pointers aren't moved:
* destructor can't be in net_device_ops because
  it may need to be referenced after the module is unloaded.
* neighbor setup is manipulated in a couple of places that need special
  consideration
* hard_start_xmit is in the fast path for transmit.

Signed-off-by: Stephen Hemminger <shemminger@vyatta.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netdevice.h | 232 +++++++++++++++++++++++++++++++++-------------
 net/Kconfig               |   3 +
 net/core/dev.c            | 109 +++++++++++++++-------
 net/core/netpoll.c        |   7 +-
 net/core/rtnetlink.c      |   9 +-
 net/sched/sch_generic.c   |   4 +-
 6 files changed, 259 insertions(+), 105 deletions(-)

(limited to 'include')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 12d7f4469dc9..9060f5f3517a 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -451,6 +451,131 @@ struct netdev_queue {
 	struct Qdisc		*qdisc_sleeping;
 } ____cacheline_aligned_in_smp;
 
+
+/*
+ * This structure defines the management hooks for network devices.
+ * The following hooks can bed defined and are optonal (can be null)
+ * unless otherwise noted.
+ *
+ * int (*ndo_init)(struct net_device *dev);
+ *     This function is called once when network device is registered.
+ *     The network device can use this to any late stage initializaton
+ *     or semantic validattion. It can fail with an error code which will
+ *     be propogated back to register_netdev
+ *
+ * void (*ndo_uninit)(struct net_device *dev);
+ *     This function is called when device is unregistered or when registration
+ *     fails. It is not called if init fails.
+ *
+ * int (*ndo_open)(struct net_device *dev);
+ *     This function is called when network device transistions to the up
+ *     state.
+ *
+ * int (*ndo_stop)(struct net_device *dev);
+ *     This function is called when network device transistions to the down
+ *     state.
+ *
+ * void (*ndo_change_rx_flags)(struct net_device *dev, int flags);
+ *	This function is called to allow device receiver to make
+ *	changes to configuration when multicast or promiscious is enabled.
+ *
+ * void (*ndo_set_rx_mode)(struct net_device *dev);
+ *	This function is called device changes address list filtering.
+ *
+ * void (*ndo_set_multicast_list)(struct net_device *dev);
+ *	This function is called when the multicast address list changes.
+ *
+ * int (*ndo_set_mac_address)(struct net_device *dev, void *addr);
+ *	This function  is called when the Media Access Control address
+ *	needs to be changed. If not this interface is not defined, the
+ *	mac address can not be changed.
+ *
+ * int (*ndo_validate_addr)(struct net_device *dev);
+ *	Test if Media Access Control address is valid for the device.
+ *
+ * int (*ndo_do_ioctl)(struct net_device *dev, struct ifreq *ifr, int cmd);
+ *	Called when a user request an ioctl which can't be handled by
+ *	the generic interface code. If not defined ioctl's return
+ *	not supported error code.
+ *
+ * int (*ndo_set_config)(struct net_device *dev, struct ifmap *map);
+ *	Used to set network devices bus interface parameters. This interface
+ *	is retained for legacy reason, new devices should use the bus
+ *	interface (PCI) for low level management.
+ *
+ * int (*ndo_change_mtu)(struct net_device *dev, int new_mtu);
+ *	Called when a user wants to change the Maximum Transfer Unit
+ *	of a device. If not defined, any request to change MTU will
+ *	will return an error.
+ *
+ * void (*ndo_tx_timeout) (struct net_device *dev);
+ *	Callback uses when the transmitter has not made any progress
+ *	for dev->watchdog ticks.
+ *
+ * struct net_device_stats* (*get_stats)(struct net_device *dev);
+ *	Called when a user wants to get the network device usage
+ *	statistics. If not defined, the counters in dev->stats will
+ *	be used.
+ *
+ * void (*ndo_vlan_rx_register)(struct net_device *dev, struct vlan_group *grp);
+ *	If device support VLAN receive accleration
+ *	(ie. dev->features & NETIF_F_HW_VLAN_RX), then this function is called
+ *	when vlan groups for the device changes.  Note: grp is NULL
+ *	if no vlan's groups are being used.
+ *
+ * void (*ndo_vlan_rx_add_vid)(struct net_device *dev, unsigned short vid);
+ *	If device support VLAN filtering (dev->features & NETIF_F_HW_VLAN_FILTER)
+ *	this function is called when a VLAN id is registered.
+ *
+ * void (*ndo_vlan_rx_kill_vid)(struct net_device *dev, unsigned short vid);
+ *	If device support VLAN filtering (dev->features & NETIF_F_HW_VLAN_FILTER)
+ *	this function is called when a VLAN id is unregistered.
+ *
+ * void (*ndo_poll_controller)(struct net_device *dev);
+ */
+struct net_device_ops {
+	int			(*ndo_init)(struct net_device *dev);
+	void			(*ndo_uninit)(struct net_device *dev);
+	int			(*ndo_open)(struct net_device *dev);
+	int			(*ndo_stop)(struct net_device *dev);
+#define HAVE_CHANGE_RX_FLAGS
+	void			(*ndo_change_rx_flags)(struct net_device *dev,
+						       int flags);
+#define HAVE_SET_RX_MODE
+	void			(*ndo_set_rx_mode)(struct net_device *dev);
+#define HAVE_MULTICAST
+	void			(*ndo_set_multicast_list)(struct net_device *dev);
+#define HAVE_SET_MAC_ADDR
+	int			(*ndo_set_mac_address)(struct net_device *dev,
+						       void *addr);
+#define HAVE_VALIDATE_ADDR
+	int			(*ndo_validate_addr)(struct net_device *dev);
+#define HAVE_PRIVATE_IOCTL
+	int			(*ndo_do_ioctl)(struct net_device *dev,
+					        struct ifreq *ifr, int cmd);
+#define HAVE_SET_CONFIG
+	int			(*ndo_set_config)(struct net_device *dev,
+					          struct ifmap *map);
+#define HAVE_CHANGE_MTU
+	int			(*ndo_change_mtu)(struct net_device *dev, int new_mtu);
+
+#define HAVE_TX_TIMEOUT
+	void			(*ndo_tx_timeout) (struct net_device *dev);
+
+	struct net_device_stats* (*ndo_get_stats)(struct net_device *dev);
+
+	void			(*ndo_vlan_rx_register)(struct net_device *dev,
+						        struct vlan_group *grp);
+	void			(*ndo_vlan_rx_add_vid)(struct net_device *dev,
+						       unsigned short vid);
+	void			(*ndo_vlan_rx_kill_vid)(struct net_device *dev,
+						        unsigned short vid);
+#ifdef CONFIG_NET_POLL_CONTROLLER
+#define HAVE_NETDEV_POLL
+	void                    (*ndo_poll_controller)(struct net_device *dev);
+#endif
+};
+
 /*
  *	The DEVICE structure.
  *	Actually, this whole structure is a big mistake.  It mixes I/O
@@ -498,11 +623,6 @@ struct net_device
 #ifdef CONFIG_NETPOLL
 	struct list_head	napi_list;
 #endif
-	
-	/* The device initialization function. Called only once. */
-	int			(*init)(struct net_device *dev);
-
-	/* ------- Fields preinitialized in Space.c finish here ------- */
 
 	/* Net device features */
 	unsigned long		features;
@@ -546,15 +666,13 @@ struct net_device
 	 * for all in netdev_increment_features.
 	 */
 #define NETIF_F_ONE_FOR_ALL	(NETIF_F_GSO_SOFTWARE | NETIF_F_GSO_ROBUST | \
-				 NETIF_F_SG | NETIF_F_HIGHDMA | \
+				 NETIF_F_SG | NETIF_F_HIGHDMA |		\
 				 NETIF_F_FRAGLIST)
 
 	/* Interface index. Unique device identifier	*/
 	int			ifindex;
 	int			iflink;
 
-
-	struct net_device_stats* (*get_stats)(struct net_device *dev);
 	struct net_device_stats	stats;
 
 #ifdef CONFIG_WIRELESS_EXT
@@ -564,18 +682,13 @@ struct net_device
 	/* Instance data managed by the core of Wireless Extensions. */
 	struct iw_public_data *	wireless_data;
 #endif
+	/* Management operations */
+	const struct net_device_ops *netdev_ops;
 	const struct ethtool_ops *ethtool_ops;
 
 	/* Hardware header description */
 	const struct header_ops *header_ops;
 
-	/*
-	 * This marks the end of the "visible" part of the structure. All
-	 * fields hereafter are internal to the system, and may change at
-	 * will (read: may be cleaned up at will).
-	 */
-
-
 	unsigned int		flags;	/* interface flags (a la BSD)	*/
 	unsigned short		gflags;
         unsigned short          priv_flags; /* Like 'flags' but invisible to userspace. */
@@ -634,7 +747,7 @@ struct net_device
 	unsigned long		last_rx;	/* Time of last Rx	*/
 	/* Interface address info used in eth_type_trans() */
 	unsigned char		dev_addr[MAX_ADDR_LEN];	/* hw address, (before bcast 
-							because most packets are unicast) */
+							   because most packets are unicast) */
 
 	unsigned char		broadcast[MAX_ADDR_LEN];	/* hw bcast add	*/
 
@@ -648,6 +761,10 @@ struct net_device
 	/* Number of TX queues currently active in device  */
 	unsigned int		real_num_tx_queues;
 
+	/* Map buffer to appropriate transmit queue */
+	u16			(*select_queue)(struct net_device *dev,
+						struct sk_buff *skb);
+
 	unsigned long		tx_queue_len;	/* Max frames per queue allowed */
 	spinlock_t		tx_global_lock;
 /*
@@ -662,9 +779,6 @@ struct net_device
 	int			watchdog_timeo; /* used by dev_watchdog() */
 	struct timer_list	watchdog_timer;
 
-/*
- * refcnt is a very hot point, so align it on SMP
- */
 	/* Number of references to this device */
 	atomic_t		refcnt ____cacheline_aligned_in_smp;
 
@@ -683,56 +797,14 @@ struct net_device
 	       NETREG_RELEASED,		/* called free_netdev */
 	} reg_state;
 
-	/* Called after device is detached from network. */
-	void			(*uninit)(struct net_device *dev);
-	/* Called after last user reference disappears. */
-	void			(*destructor)(struct net_device *dev);
+	/* Called from unregister, can be used to call free_netdev */
+	void (*destructor)(struct net_device *dev);
 
-	/* Pointers to interface service routines.	*/
-	int			(*open)(struct net_device *dev);
-	int			(*stop)(struct net_device *dev);
-#define HAVE_NETDEV_POLL
-#define HAVE_CHANGE_RX_FLAGS
-	void			(*change_rx_flags)(struct net_device *dev,
-						   int flags);
-#define HAVE_SET_RX_MODE
-	void			(*set_rx_mode)(struct net_device *dev);
-#define HAVE_MULTICAST			 
-	void			(*set_multicast_list)(struct net_device *dev);
-#define HAVE_SET_MAC_ADDR  		 
-	int			(*set_mac_address)(struct net_device *dev,
-						   void *addr);
-#define HAVE_VALIDATE_ADDR
-	int			(*validate_addr)(struct net_device *dev);
-#define HAVE_PRIVATE_IOCTL
-	int			(*do_ioctl)(struct net_device *dev,
-					    struct ifreq *ifr, int cmd);
-#define HAVE_SET_CONFIG
-	int			(*set_config)(struct net_device *dev,
-					      struct ifmap *map);
-#define HAVE_CHANGE_MTU
-	int			(*change_mtu)(struct net_device *dev, int new_mtu);
+	int (*neigh_setup)(struct net_device *dev, struct neigh_parms *);
 
-#define HAVE_TX_TIMEOUT
-	void			(*tx_timeout) (struct net_device *dev);
-
-	void			(*vlan_rx_register)(struct net_device *dev,
-						    struct vlan_group *grp);
-	void			(*vlan_rx_add_vid)(struct net_device *dev,
-						   unsigned short vid);
-	void			(*vlan_rx_kill_vid)(struct net_device *dev,
-						    unsigned short vid);
-
-	int			(*neigh_setup)(struct net_device *dev, struct neigh_parms *);
 #ifdef CONFIG_NETPOLL
 	struct netpoll_info	*npinfo;
 #endif
-#ifdef CONFIG_NET_POLL_CONTROLLER
-	void                    (*poll_controller)(struct net_device *dev);
-#endif
-
-	u16			(*select_queue)(struct net_device *dev,
-						struct sk_buff *skb);
 
 #ifdef CONFIG_NET_NS
 	/* Network namespace this network device is inside */
@@ -763,6 +835,38 @@ struct net_device
 	/* for setting kernel sock attribute on TCP connection setup */
 #define GSO_MAX_SIZE		65536
 	unsigned int		gso_max_size;
+
+#ifdef CONFIG_COMPAT_NET_DEV_OPS
+	struct {
+		int			(*init)(struct net_device *dev);
+		void			(*uninit)(struct net_device *dev);
+		int			(*open)(struct net_device *dev);
+		int			(*stop)(struct net_device *dev);
+		void			(*change_rx_flags)(struct net_device *dev,
+							   int flags);
+		void			(*set_rx_mode)(struct net_device *dev);
+		void			(*set_multicast_list)(struct net_device *dev);
+		int			(*set_mac_address)(struct net_device *dev,
+							   void *addr);
+		int			(*validate_addr)(struct net_device *dev);
+		int			(*do_ioctl)(struct net_device *dev,
+						    struct ifreq *ifr, int cmd);
+		int			(*set_config)(struct net_device *dev,
+						      struct ifmap *map);
+		int			(*change_mtu)(struct net_device *dev, int new_mtu);
+		void			(*tx_timeout) (struct net_device *dev);
+		struct net_device_stats* (*get_stats)(struct net_device *dev);
+		void			(*vlan_rx_register)(struct net_device *dev,
+							    struct vlan_group *grp);
+		void			(*vlan_rx_add_vid)(struct net_device *dev,
+							   unsigned short vid);
+		void			(*vlan_rx_kill_vid)(struct net_device *dev,
+							    unsigned short vid);
+#ifdef CONFIG_NET_POLL_CONTROLLER
+		void                    (*poll_controller)(struct net_device *dev);
+#endif
+#endif
+	};
 };
 #define to_net_dev(d) container_of(d, struct net_device, dev)
 
diff --git a/net/Kconfig b/net/Kconfig
index 8c3d97ca0d96..4e2e40ba8ba6 100644
--- a/net/Kconfig
+++ b/net/Kconfig
@@ -32,6 +32,9 @@ config NET_NS
 	  Allow user space to create what appear to be multiple instances
 	  of the network stack.
 
+config COMPAT_NET_DEV_OPS
+       def_bool y
+
 source "net/packet/Kconfig"
 source "net/unix/Kconfig"
 source "net/xfrm/Kconfig"
diff --git a/net/core/dev.c b/net/core/dev.c
index e08c0fcd603b..ca14ab407b33 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -1059,6 +1059,7 @@ void dev_load(struct net *net, const char *name)
  */
 int dev_open(struct net_device *dev)
 {
+	const struct net_device_ops *ops = dev->netdev_ops;
 	int ret = 0;
 
 	ASSERT_RTNL();
@@ -1081,11 +1082,11 @@ int dev_open(struct net_device *dev)
 	 */
 	set_bit(__LINK_STATE_START, &dev->state);
 
-	if (dev->validate_addr)
-		ret = dev->validate_addr(dev);
+	if (ops->ndo_validate_addr)
+		ret = ops->ndo_validate_addr(dev);
 
-	if (!ret && dev->open)
-		ret = dev->open(dev);
+	if (!ret && ops->ndo_open)
+		ret = ops->ndo_open(dev);
 
 	/*
 	 *	If it went open OK then:
@@ -1129,6 +1130,7 @@ int dev_open(struct net_device *dev)
  */
 int dev_close(struct net_device *dev)
 {
+	const struct net_device_ops *ops = dev->netdev_ops;
 	ASSERT_RTNL();
 
 	might_sleep();
@@ -1161,8 +1163,8 @@ int dev_close(struct net_device *dev)
 	 *	We allow it to be called even after a DETACH hot-plug
 	 *	event.
 	 */
-	if (dev->stop)
-		dev->stop(dev);
+	if (ops->ndo_stop)
+		ops->ndo_stop(dev);
 
 	/*
 	 *	Device is now down.
@@ -2930,8 +2932,10 @@ int netdev_set_master(struct net_device *slave, struct net_device *master)
 
 static void dev_change_rx_flags(struct net_device *dev, int flags)
 {
-	if (dev->flags & IFF_UP && dev->change_rx_flags)
-		dev->change_rx_flags(dev, flags);
+	const struct net_device_ops *ops = dev->netdev_ops;
+
+	if ((dev->flags & IFF_UP) && ops->ndo_change_rx_flags)
+		ops->ndo_change_rx_flags(dev, flags);
 }
 
 static int __dev_set_promiscuity(struct net_device *dev, int inc)
@@ -3051,6 +3055,8 @@ int dev_set_allmulti(struct net_device *dev, int inc)
  */
 void __dev_set_rx_mode(struct net_device *dev)
 {
+	const struct net_device_ops *ops = dev->netdev_ops;
+
 	/* dev_open will call this function so the list will stay sane. */
 	if (!(dev->flags&IFF_UP))
 		return;
@@ -3058,8 +3064,8 @@ void __dev_set_rx_mode(struct net_device *dev)
 	if (!netif_device_present(dev))
 		return;
 
-	if (dev->set_rx_mode)
-		dev->set_rx_mode(dev);
+	if (ops->ndo_set_rx_mode)
+		ops->ndo_set_rx_mode(dev);
 	else {
 		/* Unicast addresses changes may only happen under the rtnl,
 		 * therefore calling __dev_set_promiscuity here is safe.
@@ -3072,8 +3078,8 @@ void __dev_set_rx_mode(struct net_device *dev)
 			dev->uc_promisc = 0;
 		}
 
-		if (dev->set_multicast_list)
-			dev->set_multicast_list(dev);
+		if (ops->ndo_set_multicast_list)
+			ops->ndo_set_multicast_list(dev);
 	}
 }
 
@@ -3432,6 +3438,7 @@ int dev_change_flags(struct net_device *dev, unsigned flags)
  */
 int dev_set_mtu(struct net_device *dev, int new_mtu)
 {
+	const struct net_device_ops *ops = dev->netdev_ops;
 	int err;
 
 	if (new_mtu == dev->mtu)
@@ -3445,10 +3452,11 @@ int dev_set_mtu(struct net_device *dev, int new_mtu)
 		return -ENODEV;
 
 	err = 0;
-	if (dev->change_mtu)
-		err = dev->change_mtu(dev, new_mtu);
+	if (ops->ndo_change_mtu)
+		err = ops->ndo_change_mtu(dev, new_mtu);
 	else
 		dev->mtu = new_mtu;
+
 	if (!err && dev->flags & IFF_UP)
 		call_netdevice_notifiers(NETDEV_CHANGEMTU, dev);
 	return err;
@@ -3463,15 +3471,16 @@ int dev_set_mtu(struct net_device *dev, int new_mtu)
  */
 int dev_set_mac_address(struct net_device *dev, struct sockaddr *sa)
 {
+	const struct net_device_ops *ops = dev->netdev_ops;
 	int err;
 
-	if (!dev->set_mac_address)
+	if (!ops->ndo_set_mac_address)
 		return -EOPNOTSUPP;
 	if (sa->sa_family != dev->type)
 		return -EINVAL;
 	if (!netif_device_present(dev))
 		return -ENODEV;
-	err = dev->set_mac_address(dev, sa);
+	err = ops->ndo_set_mac_address(dev, sa);
 	if (!err)
 		call_netdevice_notifiers(NETDEV_CHANGEADDR, dev);
 	return err;
@@ -3551,6 +3560,7 @@ static int dev_ifsioc(struct net *net, struct ifreq *ifr, unsigned int cmd)
 {
 	int err;
 	struct net_device *dev = __dev_get_by_name(net, ifr->ifr_name);
+	const struct net_device_ops *ops = dev->netdev_ops;
 
 	if (!dev)
 		return -ENODEV;
@@ -3578,15 +3588,15 @@ static int dev_ifsioc(struct net *net, struct ifreq *ifr, unsigned int cmd)
 			return 0;
 
 		case SIOCSIFMAP:
-			if (dev->set_config) {
+			if (ops->ndo_set_config) {
 				if (!netif_device_present(dev))
 					return -ENODEV;
-				return dev->set_config(dev, &ifr->ifr_map);
+				return ops->ndo_set_config(dev, &ifr->ifr_map);
 			}
 			return -EOPNOTSUPP;
 
 		case SIOCADDMULTI:
-			if ((!dev->set_multicast_list && !dev->set_rx_mode) ||
+			if ((!ops->ndo_set_multicast_list && !ops->ndo_set_rx_mode) ||
 			    ifr->ifr_hwaddr.sa_family != AF_UNSPEC)
 				return -EINVAL;
 			if (!netif_device_present(dev))
@@ -3595,7 +3605,7 @@ static int dev_ifsioc(struct net *net, struct ifreq *ifr, unsigned int cmd)
 					  dev->addr_len, 1);
 
 		case SIOCDELMULTI:
-			if ((!dev->set_multicast_list && !dev->set_rx_mode) ||
+			if ((!ops->ndo_set_multicast_list && !ops->ndo_set_rx_mode) ||
 			    ifr->ifr_hwaddr.sa_family != AF_UNSPEC)
 				return -EINVAL;
 			if (!netif_device_present(dev))
@@ -3633,10 +3643,9 @@ static int dev_ifsioc(struct net *net, struct ifreq *ifr, unsigned int cmd)
 			    cmd == SIOCBRDELIF ||
 			    cmd == SIOCWANDEV) {
 				err = -EOPNOTSUPP;
-				if (dev->do_ioctl) {
+				if (ops->ndo_do_ioctl) {
 					if (netif_device_present(dev))
-						err = dev->do_ioctl(dev, ifr,
-								    cmd);
+						err = ops->ndo_do_ioctl(dev, ifr, cmd);
 					else
 						err = -ENODEV;
 				}
@@ -3897,8 +3906,8 @@ static void rollback_registered(struct net_device *dev)
 	 */
 	dev_addr_discard(dev);
 
-	if (dev->uninit)
-		dev->uninit(dev);
+	if (dev->netdev_ops->ndo_uninit)
+		dev->netdev_ops->ndo_uninit(dev);
 
 	/* Notifier chain MUST detach us from master device. */
 	WARN_ON(dev->master);
@@ -3988,7 +3997,7 @@ int register_netdevice(struct net_device *dev)
 	struct hlist_head *head;
 	struct hlist_node *p;
 	int ret;
-	struct net *net;
+	struct net *net = dev_net(dev);
 
 	BUG_ON(dev_boot_phase);
 	ASSERT_RTNL();
@@ -3997,8 +4006,7 @@ int register_netdevice(struct net_device *dev)
 
 	/* When net_device's are persistent, this will be fatal. */
 	BUG_ON(dev->reg_state != NETREG_UNINITIALIZED);
-	BUG_ON(!dev_net(dev));
-	net = dev_net(dev);
+	BUG_ON(!net);
 
 	spin_lock_init(&dev->addr_list_lock);
 	netdev_set_addr_lockdep_class(dev);
@@ -4006,9 +4014,46 @@ int register_netdevice(struct net_device *dev)
 
 	dev->iflink = -1;
 
+#ifdef CONFIG_COMPAT_NET_DEV_OPS
+	/* Netdevice_ops API compatiability support.
+	 * This is temporary until all network devices are converted.
+	 */
+	if (dev->netdev_ops) {
+		const struct net_device_ops *ops = dev->netdev_ops;
+
+		dev->init = ops->ndo_init;
+		dev->uninit = ops->ndo_uninit;
+		dev->open = ops->ndo_open;
+		dev->change_rx_flags = ops->ndo_change_rx_flags;
+		dev->set_rx_mode = ops->ndo_set_rx_mode;
+		dev->set_multicast_list = ops->ndo_set_multicast_list;
+		dev->set_mac_address = ops->ndo_set_mac_address;
+		dev->validate_addr = ops->ndo_validate_addr;
+		dev->do_ioctl = ops->ndo_do_ioctl;
+		dev->set_config = ops->ndo_set_config;
+		dev->change_mtu = ops->ndo_change_mtu;
+		dev->tx_timeout = ops->ndo_tx_timeout;
+		dev->get_stats = ops->ndo_get_stats;
+		dev->vlan_rx_register = ops->ndo_vlan_rx_register;
+		dev->vlan_rx_add_vid = ops->ndo_vlan_rx_add_vid;
+		dev->vlan_rx_kill_vid = ops->ndo_vlan_rx_kill_vid;
+#ifdef CONFIG_NET_POLL_CONTROLLER
+		dev->poll_controller = ops->ndo_poll_controller;
+#endif
+	} else {
+		char drivername[64];
+		pr_info("%s (%s): not using net_device_ops yet\n",
+			dev->name, netdev_drivername(dev, drivername, 64));
+
+		/* This works only because net_device_ops and the
+		   compatiablity structure are the same. */
+		dev->netdev_ops = (void *) &(dev->init);
+	}
+#endif
+
 	/* Init, if this function is available */
-	if (dev->init) {
-		ret = dev->init(dev);
+	if (dev->netdev_ops->ndo_init) {
+		ret = dev->netdev_ops->ndo_init(dev);
 		if (ret) {
 			if (ret > 0)
 				ret = -EIO;
@@ -4086,8 +4131,8 @@ out:
 	return ret;
 
 err_uninit:
-	if (dev->uninit)
-		dev->uninit(dev);
+	if (dev->netdev_ops->ndo_uninit)
+		dev->netdev_ops->ndo_uninit(dev);
 	goto out;
 }
 
diff --git a/net/core/netpoll.c b/net/core/netpoll.c
index fc4e28e23b89..630df6034444 100644
--- a/net/core/netpoll.c
+++ b/net/core/netpoll.c
@@ -172,12 +172,13 @@ static void service_arp_queue(struct netpoll_info *npi)
 void netpoll_poll(struct netpoll *np)
 {
 	struct net_device *dev = np->dev;
+	const struct net_device_ops *ops = dev->netdev_ops;
 
-	if (!dev || !netif_running(dev) || !dev->poll_controller)
+	if (!dev || !netif_running(dev) || !ops->ndo_poll_controller)
 		return;
 
 	/* Process pending work on NIC */
-	dev->poll_controller(dev);
+	ops->ndo_poll_controller(dev);
 
 	poll_napi(dev);
 
@@ -694,7 +695,7 @@ int netpoll_setup(struct netpoll *np)
 		atomic_inc(&npinfo->refcnt);
 	}
 
-	if (!ndev->poll_controller) {
+	if (!ndev->netdev_ops->ndo_poll_controller) {
 		printk(KERN_ERR "%s: %s doesn't support polling, aborting.\n",
 		       np->name, np->dev_name);
 		err = -ENOTSUPP;
diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index 4dfb6b4d4559..6f8e0778e565 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -762,6 +762,7 @@ static int validate_linkmsg(struct net_device *dev, struct nlattr *tb[])
 static int do_setlink(struct net_device *dev, struct ifinfomsg *ifm,
 		      struct nlattr **tb, char *ifname, int modified)
 {
+	const struct net_device_ops *ops = dev->netdev_ops;
 	int send_addr_notify = 0;
 	int err;
 
@@ -783,7 +784,7 @@ static int do_setlink(struct net_device *dev, struct ifinfomsg *ifm,
 		struct rtnl_link_ifmap *u_map;
 		struct ifmap k_map;
 
-		if (!dev->set_config) {
+		if (!ops->ndo_set_config) {
 			err = -EOPNOTSUPP;
 			goto errout;
 		}
@@ -801,7 +802,7 @@ static int do_setlink(struct net_device *dev, struct ifinfomsg *ifm,
 		k_map.dma = (unsigned char) u_map->dma;
 		k_map.port = (unsigned char) u_map->port;
 
-		err = dev->set_config(dev, &k_map);
+		err = ops->ndo_set_config(dev, &k_map);
 		if (err < 0)
 			goto errout;
 
@@ -812,7 +813,7 @@ static int do_setlink(struct net_device *dev, struct ifinfomsg *ifm,
 		struct sockaddr *sa;
 		int len;
 
-		if (!dev->set_mac_address) {
+		if (!ops->ndo_set_mac_address) {
 			err = -EOPNOTSUPP;
 			goto errout;
 		}
@@ -831,7 +832,7 @@ static int do_setlink(struct net_device *dev, struct ifinfomsg *ifm,
 		sa->sa_family = dev->type;
 		memcpy(sa->sa_data, nla_data(tb[IFLA_ADDRESS]),
 		       dev->addr_len);
-		err = dev->set_mac_address(dev, sa);
+		err = ops->ndo_set_mac_address(dev, sa);
 		kfree(sa);
 		if (err)
 			goto errout;
diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c
index 80c8f3dbbea1..95ab55c064f1 100644
--- a/net/sched/sch_generic.c
+++ b/net/sched/sch_generic.c
@@ -224,7 +224,7 @@ static void dev_watchdog(unsigned long arg)
 				char drivername[64];
 				WARN_ONCE(1, KERN_INFO "NETDEV WATCHDOG: %s (%s): transmit timed out\n",
 				       dev->name, netdev_drivername(dev, drivername, 64));
-				dev->tx_timeout(dev);
+				dev->netdev_ops->ndo_tx_timeout(dev);
 			}
 			if (!mod_timer(&dev->watchdog_timer,
 				       round_jiffies(jiffies +
@@ -239,7 +239,7 @@ static void dev_watchdog(unsigned long arg)
 
 void __netdev_watchdog_up(struct net_device *dev)
 {
-	if (dev->tx_timeout) {
+	if (dev->netdev_ops->ndo_tx_timeout) {
 		if (dev->watchdog_timeo <= 0)
 			dev->watchdog_timeo = 5*HZ;
 		if (!mod_timer(&dev->watchdog_timer,
-- 
cgit v1.2.3


From eeda3fd64f75bcbfaa70ce946513abaf3f23b8e0 Mon Sep 17 00:00:00 2001
From: Stephen Hemminger <shemminger@vyatta.com>
Date: Wed, 19 Nov 2008 21:40:23 -0800
Subject: netdev: introduce dev_get_stats()

In order for the network device ops get_stats call to be immutable, the handling
of the default internal network device stats block has to be changed. Add a new
helper function which replaces the old use of internal_get_stats.

Note: change return code to make it clear that the caller should not
go changing the returned statistics.

Signed-off-by: Stephen Hemminger <shemminger@vyatta.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 arch/s390/appldata/appldata_net_sum.c |  4 ++--
 drivers/net/bonding/bond_main.c       |  5 +++--
 drivers/net/sfc/ethtool.c             |  2 +-
 drivers/parisc/led.c                  |  4 ++--
 include/linux/netdevice.h             |  4 +++-
 net/core/dev.c                        | 23 ++++++++++++++++++-----
 net/core/net-sysfs.c                  |  3 +--
 net/core/rtnetlink.c                  |  6 +++---
 8 files changed, 33 insertions(+), 18 deletions(-)

(limited to 'include')

diff --git a/arch/s390/appldata/appldata_net_sum.c b/arch/s390/appldata/appldata_net_sum.c
index 3b746556e1a3..fa741f84c5b9 100644
--- a/arch/s390/appldata/appldata_net_sum.c
+++ b/arch/s390/appldata/appldata_net_sum.c
@@ -67,7 +67,6 @@ static void appldata_get_net_sum_data(void *data)
 	int i;
 	struct appldata_net_sum_data *net_data;
 	struct net_device *dev;
-	struct net_device_stats *stats;
 	unsigned long rx_packets, tx_packets, rx_bytes, tx_bytes, rx_errors,
 			tx_errors, rx_dropped, tx_dropped, collisions;
 
@@ -86,7 +85,8 @@ static void appldata_get_net_sum_data(void *data)
 	collisions = 0;
 	read_lock(&dev_base_lock);
 	for_each_netdev(&init_net, dev) {
-		stats = dev->get_stats(dev);
+		const struct net_device_stats *stats = dev_get_stats(dev);
+
 		rx_packets += stats->rx_packets;
 		tx_packets += stats->tx_packets;
 		rx_bytes   += stats->rx_bytes;
diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c
index a08ea4808056..db5f5c24a250 100644
--- a/drivers/net/bonding/bond_main.c
+++ b/drivers/net/bonding/bond_main.c
@@ -3899,7 +3899,7 @@ static int bond_close(struct net_device *bond_dev)
 static struct net_device_stats *bond_get_stats(struct net_device *bond_dev)
 {
 	struct bonding *bond = netdev_priv(bond_dev);
-	struct net_device_stats *stats = &(bond->stats), *sstats;
+	struct net_device_stats *stats = &bond->stats;
 	struct net_device_stats local_stats;
 	struct slave *slave;
 	int i;
@@ -3909,7 +3909,8 @@ static struct net_device_stats *bond_get_stats(struct net_device *bond_dev)
 	read_lock_bh(&bond->lock);
 
 	bond_for_each_slave(bond, slave, i) {
-		sstats = slave->dev->get_stats(slave->dev);
+		const struct net_device_stats *sstats = dev_get_stats(slave->dev);
+
 		local_stats.rx_packets += sstats->rx_packets;
 		local_stats.rx_bytes += sstats->rx_bytes;
 		local_stats.rx_errors += sstats->rx_errors;
diff --git a/drivers/net/sfc/ethtool.c b/drivers/net/sfc/ethtool.c
index abd8fcd6e62d..cd92c4d8dbc5 100644
--- a/drivers/net/sfc/ethtool.c
+++ b/drivers/net/sfc/ethtool.c
@@ -426,7 +426,7 @@ static void efx_ethtool_get_stats(struct net_device *net_dev,
 	EFX_BUG_ON_PARANOID(stats->n_stats != EFX_ETHTOOL_NUM_STATS);
 
 	/* Update MAC and NIC statistics */
-	net_dev->get_stats(net_dev);
+	dev_get_stats(net_dev);
 
 	/* Fill detailed statistics buffer */
 	for (i = 0; i < EFX_ETHTOOL_NUM_STATS; i++) {
diff --git a/drivers/parisc/led.c b/drivers/parisc/led.c
index f9b12664f9fb..454b6532e409 100644
--- a/drivers/parisc/led.c
+++ b/drivers/parisc/led.c
@@ -360,13 +360,13 @@ static __inline__ int led_get_net_activity(void)
 	read_lock(&dev_base_lock);
 	rcu_read_lock();
 	for_each_netdev(&init_net, dev) {
-	    struct net_device_stats *stats;
+	    const struct net_device_stats *stats;
 	    struct in_device *in_dev = __in_dev_get_rcu(dev);
 	    if (!in_dev || !in_dev->ifa_list)
 		continue;
 	    if (ipv4_is_loopback(in_dev->ifa_list->ifa_local))
 		continue;
-	    stats = dev->get_stats(dev);
+	    stats = dev_get_stats(dev);
 	    rx_total += stats->rx_packets;
 	    tx_total += stats->tx_packets;
 	}
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 9060f5f3517a..981a089d5149 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -864,9 +864,9 @@ struct net_device
 							    unsigned short vid);
 #ifdef CONFIG_NET_POLL_CONTROLLER
 		void                    (*poll_controller)(struct net_device *dev);
-#endif
 #endif
 	};
+#endif
 };
 #define to_net_dev(d) container_of(d, struct net_device, dev)
 
@@ -1780,6 +1780,8 @@ extern void		netdev_features_change(struct net_device *dev);
 /* Load a device via the kmod */
 extern void		dev_load(struct net *net, const char *name);
 extern void		dev_mcast_init(void);
+extern const struct net_device_stats *dev_get_stats(struct net_device *dev);
+
 extern int		netdev_max_backlog;
 extern int		weight_p;
 extern int		netdev_set_master(struct net_device *dev, struct net_device *master);
diff --git a/net/core/dev.c b/net/core/dev.c
index ca14ab407b33..8843f4e3f5e1 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -2620,7 +2620,7 @@ void dev_seq_stop(struct seq_file *seq, void *v)
 
 static void dev_seq_printf_stats(struct seq_file *seq, struct net_device *dev)
 {
-	struct net_device_stats *stats = dev->get_stats(dev);
+	const struct net_device_stats *stats = dev_get_stats(dev);
 
 	seq_printf(seq, "%6s:%8lu %7lu %4lu %4lu %4lu %5lu %10lu %9lu "
 		   "%8lu %7lu %4lu %4lu %4lu %5lu %7lu %10lu\n",
@@ -4288,10 +4288,24 @@ void netdev_run_todo(void)
 	}
 }
 
-static struct net_device_stats *internal_stats(struct net_device *dev)
-{
-	return &dev->stats;
+/**
+ *	dev_get_stats	- get network device statistics
+ *	@dev: device to get statistics from
+ *
+ *	Get network statistics from device. The device driver may provide
+ *	its own method by setting dev->netdev_ops->get_stats; otherwise
+ *	the internal statistics structure is used.
+ */
+const struct net_device_stats *dev_get_stats(struct net_device *dev)
+ {
+	const struct net_device_ops *ops = dev->netdev_ops;
+
+	if (ops->ndo_get_stats)
+		return ops->ndo_get_stats(dev);
+	else
+		return &dev->stats;
 }
+EXPORT_SYMBOL(dev_get_stats);
 
 static void netdev_init_one_queue(struct net_device *dev,
 				  struct netdev_queue *queue,
@@ -4370,7 +4384,6 @@ struct net_device *alloc_netdev_mq(int sizeof_priv, const char *name,
 
 	netdev_init_queues(dev);
 
-	dev->get_stats = internal_stats;
 	netpoll_netdev_init(dev);
 	setup(dev);
 	strcpy(dev->name, name);
diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c
index 146dcfeb060e..afd42d717320 100644
--- a/net/core/net-sysfs.c
+++ b/net/core/net-sysfs.c
@@ -270,7 +270,6 @@ static ssize_t netstat_show(const struct device *d,
 			    unsigned long offset)
 {
 	struct net_device *dev = to_net_dev(d);
-	struct net_device_stats *stats;
 	ssize_t ret = -EINVAL;
 
 	WARN_ON(offset > sizeof(struct net_device_stats) ||
@@ -278,7 +277,7 @@ static ssize_t netstat_show(const struct device *d,
 
 	read_lock(&dev_base_lock);
 	if (dev_isalive(dev)) {
-		stats = dev->get_stats(dev);
+		const struct net_device_stats *stats = dev_get_stats(dev);
 		ret = sprintf(buf, fmt_ulong,
 			      *(unsigned long *)(((u8 *) stats) + offset));
 	}
diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index 6f8e0778e565..790dd205bb5d 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -551,7 +551,7 @@ static void set_operstate(struct net_device *dev, unsigned char transition)
 }
 
 static void copy_rtnl_link_stats(struct rtnl_link_stats *a,
-				 struct net_device_stats *b)
+				 const struct net_device_stats *b)
 {
 	a->rx_packets = b->rx_packets;
 	a->tx_packets = b->tx_packets;
@@ -609,7 +609,7 @@ static int rtnl_fill_ifinfo(struct sk_buff *skb, struct net_device *dev,
 	struct netdev_queue *txq;
 	struct ifinfomsg *ifm;
 	struct nlmsghdr *nlh;
-	struct net_device_stats *stats;
+	const struct net_device_stats *stats;
 	struct nlattr *attr;
 
 	nlh = nlmsg_put(skb, pid, seq, type, sizeof(*ifm), flags);
@@ -666,7 +666,7 @@ static int rtnl_fill_ifinfo(struct sk_buff *skb, struct net_device *dev,
 	if (attr == NULL)
 		goto nla_put_failure;
 
-	stats = dev->get_stats(dev);
+	stats = dev_get_stats(dev);
 	copy_rtnl_link_stats(nla_data(attr), stats);
 
 	if (dev->rtnl_link_ops) {
-- 
cgit v1.2.3


From ccad637b0c57de1825ffd34c311bf71487545ac2 Mon Sep 17 00:00:00 2001
From: Stephen Hemminger <shemminger@vyatta.com>
Date: Wed, 19 Nov 2008 22:42:31 -0800
Subject: netdev: expose ethernet address primitives

When ethernet devices are converted, the function pointer setup
by eth_setup() need to be done during intialization.

Signed-off-by: Stephen Hemminger <shemminger@vyatta.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/usb/gadget/u_ether.c |  4 ++--
 include/linux/etherdevice.h  |  4 ++++
 net/ethernet/eth.c           | 13 ++++++++-----
 3 files changed, 14 insertions(+), 7 deletions(-)

(limited to 'include')

diff --git a/drivers/usb/gadget/u_ether.c b/drivers/usb/gadget/u_ether.c
index 00fa5239879d..d9739d52f8f5 100644
--- a/drivers/usb/gadget/u_ether.c
+++ b/drivers/usb/gadget/u_ether.c
@@ -146,7 +146,7 @@ static inline int qlen(struct usb_gadget *gadget)
 
 /* NETWORK DRIVER HOOKUP (to the layer above this driver) */
 
-static int eth_change_mtu(struct net_device *net, int new_mtu)
+static int ueth_change_mtu(struct net_device *net, int new_mtu)
 {
 	struct eth_dev	*dev = netdev_priv(net);
 	unsigned long	flags;
@@ -764,7 +764,7 @@ int __init gether_setup(struct usb_gadget *g, u8 ethaddr[ETH_ALEN])
 	if (ethaddr)
 		memcpy(ethaddr, dev->host_mac, ETH_ALEN);
 
-	net->change_mtu = eth_change_mtu;
+	net->change_mtu = ueth_change_mtu;
 	net->hard_start_xmit = eth_start_xmit;
 	net->open = eth_open;
 	net->stop = eth_stop;
diff --git a/include/linux/etherdevice.h b/include/linux/etherdevice.h
index 25d62e6e3290..0e5e97060034 100644
--- a/include/linux/etherdevice.h
+++ b/include/linux/etherdevice.h
@@ -41,6 +41,10 @@ extern int eth_header_cache(const struct neighbour *neigh, struct hh_cache *hh);
 extern void eth_header_cache_update(struct hh_cache *hh,
 				    const struct net_device *dev,
 				    const unsigned char *haddr);
+extern int eth_mac_addr(struct net_device *dev, void *p);
+extern int eth_change_mtu(struct net_device *dev, int new_mtu);
+extern int eth_validate_addr(struct net_device *dev);
+
 
 
 extern struct net_device *alloc_etherdev_mq(int sizeof_priv, unsigned int queue_count);
diff --git a/net/ethernet/eth.c b/net/ethernet/eth.c
index b9d85af2dd31..a87a171d9914 100644
--- a/net/ethernet/eth.c
+++ b/net/ethernet/eth.c
@@ -282,7 +282,7 @@ EXPORT_SYMBOL(eth_header_cache_update);
  * This doesn't change hardware matching, so needs to be overridden
  * for most real devices.
  */
-static int eth_mac_addr(struct net_device *dev, void *p)
+int eth_mac_addr(struct net_device *dev, void *p)
 {
 	struct sockaddr *addr = p;
 
@@ -293,6 +293,7 @@ static int eth_mac_addr(struct net_device *dev, void *p)
 	memcpy(dev->dev_addr, addr->sa_data, ETH_ALEN);
 	return 0;
 }
+EXPORT_SYMBOL(eth_mac_addr);
 
 /**
  * eth_change_mtu - set new MTU size
@@ -302,21 +303,23 @@ static int eth_mac_addr(struct net_device *dev, void *p)
  * Allow changing MTU size. Needs to be overridden for devices
  * supporting jumbo frames.
  */
-static int eth_change_mtu(struct net_device *dev, int new_mtu)
+int eth_change_mtu(struct net_device *dev, int new_mtu)
 {
 	if (new_mtu < 68 || new_mtu > ETH_DATA_LEN)
 		return -EINVAL;
 	dev->mtu = new_mtu;
 	return 0;
 }
+EXPORT_SYMBOL(eth_change_mtu);
 
-static int eth_validate_addr(struct net_device *dev)
+int eth_validate_addr(struct net_device *dev)
 {
 	if (!is_valid_ether_addr(dev->dev_addr))
 		return -EADDRNOTAVAIL;
 
 	return 0;
 }
+EXPORT_SYMBOL(eth_validate_addr);
 
 const struct header_ops eth_header_ops ____cacheline_aligned = {
 	.create		= eth_header,
@@ -334,11 +337,11 @@ const struct header_ops eth_header_ops ____cacheline_aligned = {
 void ether_setup(struct net_device *dev)
 {
 	dev->header_ops		= &eth_header_ops;
-
+#ifdef CONFIG_COMPAT_NET_DEV_OPS
 	dev->change_mtu		= eth_change_mtu;
 	dev->set_mac_address 	= eth_mac_addr;
 	dev->validate_addr	= eth_validate_addr;
-
+#endif
 	dev->type		= ARPHRD_ETHER;
 	dev->hard_header_len 	= ETH_HLEN;
 	dev->mtu		= ETH_DATA_LEN;
-- 
cgit v1.2.3


From 5caea4ea7088e80ac5410d04660346094608b909 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <dada1@cosmosbay.com>
Date: Thu, 20 Nov 2008 00:40:07 -0800
Subject: net: listening_hash get a spinlock per bucket

This patch prepares RCU migration of listening_hash table for
TCP/DCCP protocols.

listening_hash table being small (32 slots per protocol), we add
a spinlock for each slot, instead of a single rwlock for whole table.

This should reduce hold time of readers, and writers concurrency.

Signed-off-by: Eric Dumazet <dada1@cosmosbay.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/inet_hashtables.h | 45 ++++++++--------------
 net/dccp/proto.c              |  8 +---
 net/ipv4/inet_diag.c          | 12 +++---
 net/ipv4/inet_hashtables.c    | 86 ++++++++++++++++---------------------------
 net/ipv4/tcp_ipv4.c           | 24 ++++++------
 net/ipv6/inet6_hashtables.c   | 23 ++++++------
 6 files changed, 79 insertions(+), 119 deletions(-)

(limited to 'include')

diff --git a/include/net/inet_hashtables.h b/include/net/inet_hashtables.h
index 481896045111..62d2dd0d7860 100644
--- a/include/net/inet_hashtables.h
+++ b/include/net/inet_hashtables.h
@@ -99,6 +99,11 @@ struct inet_bind_hashbucket {
 	struct hlist_head	chain;
 };
 
+struct inet_listen_hashbucket {
+	spinlock_t		lock;
+	struct hlist_head	head;
+};
+
 /* This is for listening sockets, thus all sockets which possess wildcards. */
 #define INET_LHTABLE_SIZE	32	/* Yes, really, this is all you need. */
 
@@ -123,22 +128,21 @@ struct inet_hashinfo {
 	unsigned int			bhash_size;
 	/* Note : 4 bytes padding on 64 bit arches */
 
-	/* All sockets in TCP_LISTEN state will be in here.  This is the only
-	 * table where wildcard'd TCP sockets can exist.  Hash function here
-	 * is just local port number.
-	 */
-	struct hlist_head		listening_hash[INET_LHTABLE_SIZE];
+	struct kmem_cache		*bind_bucket_cachep;
 
 	/* All the above members are written once at bootup and
 	 * never written again _or_ are predominantly read-access.
 	 *
 	 * Now align to a new cache line as all the following members
-	 * are often dirty.
+	 * might be often dirty.
+	 */
+	/* All sockets in TCP_LISTEN state will be in here.  This is the only
+	 * table where wildcard'd TCP sockets can exist.  Hash function here
+	 * is just local port number.
 	 */
-	rwlock_t			lhash_lock ____cacheline_aligned;
-	atomic_t			lhash_users;
-	wait_queue_head_t		lhash_wait;
-	struct kmem_cache			*bind_bucket_cachep;
+	struct inet_listen_hashbucket	listening_hash[INET_LHTABLE_SIZE]
+					____cacheline_aligned_in_smp;
+
 };
 
 static inline struct inet_ehash_bucket *inet_ehash_bucket(
@@ -236,26 +240,7 @@ extern void __inet_inherit_port(struct sock *sk, struct sock *child);
 
 extern void inet_put_port(struct sock *sk);
 
-extern void inet_listen_wlock(struct inet_hashinfo *hashinfo);
-
-/*
- * - We may sleep inside this lock.
- * - If sleeping is not required (or called from BH),
- *   use plain read_(un)lock(&inet_hashinfo.lhash_lock).
- */
-static inline void inet_listen_lock(struct inet_hashinfo *hashinfo)
-{
-	/* read_lock synchronizes to candidates to writers */
-	read_lock(&hashinfo->lhash_lock);
-	atomic_inc(&hashinfo->lhash_users);
-	read_unlock(&hashinfo->lhash_lock);
-}
-
-static inline void inet_listen_unlock(struct inet_hashinfo *hashinfo)
-{
-	if (atomic_dec_and_test(&hashinfo->lhash_users))
-		wake_up(&hashinfo->lhash_wait);
-}
+void inet_hashinfo_init(struct inet_hashinfo *h);
 
 extern void __inet_hash_nolisten(struct sock *sk);
 extern void inet_hash(struct sock *sk);
diff --git a/net/dccp/proto.c b/net/dccp/proto.c
index bdf784c422b5..8b63394ec24c 100644
--- a/net/dccp/proto.c
+++ b/net/dccp/proto.c
@@ -44,12 +44,7 @@ atomic_t dccp_orphan_count = ATOMIC_INIT(0);
 
 EXPORT_SYMBOL_GPL(dccp_orphan_count);
 
-struct inet_hashinfo __cacheline_aligned dccp_hashinfo = {
-	.lhash_lock	= RW_LOCK_UNLOCKED,
-	.lhash_users	= ATOMIC_INIT(0),
-	.lhash_wait = __WAIT_QUEUE_HEAD_INITIALIZER(dccp_hashinfo.lhash_wait),
-};
-
+struct inet_hashinfo dccp_hashinfo;
 EXPORT_SYMBOL_GPL(dccp_hashinfo);
 
 /* the maximum queue length for tx in packets. 0 is no limit */
@@ -1030,6 +1025,7 @@ static int __init dccp_init(void)
 	BUILD_BUG_ON(sizeof(struct dccp_skb_cb) >
 		     FIELD_SIZEOF(struct sk_buff, cb));
 
+	inet_hashinfo_init(&dccp_hashinfo);
 	dccp_hashinfo.bind_bucket_cachep =
 		kmem_cache_create("dccp_bind_bucket",
 				  sizeof(struct inet_bind_bucket), 0,
diff --git a/net/ipv4/inet_diag.c b/net/ipv4/inet_diag.c
index 41b36720e977..1cb154ed75ad 100644
--- a/net/ipv4/inet_diag.c
+++ b/net/ipv4/inet_diag.c
@@ -718,13 +718,15 @@ static int inet_diag_dump(struct sk_buff *skb, struct netlink_callback *cb)
 		if (!(r->idiag_states & (TCPF_LISTEN | TCPF_SYN_RECV)))
 			goto skip_listen_ht;
 
-		inet_listen_lock(hashinfo);
 		for (i = s_i; i < INET_LHTABLE_SIZE; i++) {
 			struct sock *sk;
 			struct hlist_node *node;
+			struct inet_listen_hashbucket *ilb;
 
 			num = 0;
-			sk_for_each(sk, node, &hashinfo->listening_hash[i]) {
+			ilb = &hashinfo->listening_hash[i];
+			spin_lock_bh(&ilb->lock);
+			sk_for_each(sk, node, &ilb->head) {
 				struct inet_sock *inet = inet_sk(sk);
 
 				if (num < s_num) {
@@ -742,7 +744,7 @@ static int inet_diag_dump(struct sk_buff *skb, struct netlink_callback *cb)
 					goto syn_recv;
 
 				if (inet_csk_diag_dump(sk, skb, cb) < 0) {
-					inet_listen_unlock(hashinfo);
+					spin_unlock_bh(&ilb->lock);
 					goto done;
 				}
 
@@ -751,7 +753,7 @@ syn_recv:
 					goto next_listen;
 
 				if (inet_diag_dump_reqs(skb, sk, cb) < 0) {
-					inet_listen_unlock(hashinfo);
+					spin_unlock_bh(&ilb->lock);
 					goto done;
 				}
 
@@ -760,12 +762,12 @@ next_listen:
 				cb->args[4] = 0;
 				++num;
 			}
+			spin_unlock_bh(&ilb->lock);
 
 			s_num = 0;
 			cb->args[3] = 0;
 			cb->args[4] = 0;
 		}
-		inet_listen_unlock(hashinfo);
 skip_listen_ht:
 		cb->args[0] = 1;
 		s_i = num = s_num = 0;
diff --git a/net/ipv4/inet_hashtables.c b/net/ipv4/inet_hashtables.c
index fd269cfef0ec..377d004e5723 100644
--- a/net/ipv4/inet_hashtables.c
+++ b/net/ipv4/inet_hashtables.c
@@ -110,35 +110,6 @@ void __inet_inherit_port(struct sock *sk, struct sock *child)
 
 EXPORT_SYMBOL_GPL(__inet_inherit_port);
 
-/*
- * This lock without WQ_FLAG_EXCLUSIVE is good on UP and it can be very bad on SMP.
- * Look, when several writers sleep and reader wakes them up, all but one
- * immediately hit write lock and grab all the cpus. Exclusive sleep solves
- * this, _but_ remember, it adds useless work on UP machines (wake up each
- * exclusive lock release). It should be ifdefed really.
- */
-void inet_listen_wlock(struct inet_hashinfo *hashinfo)
-	__acquires(hashinfo->lhash_lock)
-{
-	write_lock(&hashinfo->lhash_lock);
-
-	if (atomic_read(&hashinfo->lhash_users)) {
-		DEFINE_WAIT(wait);
-
-		for (;;) {
-			prepare_to_wait_exclusive(&hashinfo->lhash_wait,
-						  &wait, TASK_UNINTERRUPTIBLE);
-			if (!atomic_read(&hashinfo->lhash_users))
-				break;
-			write_unlock_bh(&hashinfo->lhash_lock);
-			schedule();
-			write_lock_bh(&hashinfo->lhash_lock);
-		}
-
-		finish_wait(&hashinfo->lhash_wait, &wait);
-	}
-}
-
 /*
  * Don't inline this cruft. Here are some nice properties to exploit here. The
  * BSD API does not allow a listening sock to specify the remote port nor the
@@ -191,25 +162,25 @@ struct sock *__inet_lookup_listener(struct net *net,
 				    const int dif)
 {
 	struct sock *sk = NULL;
-	const struct hlist_head *head;
+	struct inet_listen_hashbucket *ilb;
 
-	read_lock(&hashinfo->lhash_lock);
-	head = &hashinfo->listening_hash[inet_lhashfn(net, hnum)];
-	if (!hlist_empty(head)) {
-		const struct inet_sock *inet = inet_sk((sk = __sk_head(head)));
+	ilb = &hashinfo->listening_hash[inet_lhashfn(net, hnum)];
+	spin_lock(&ilb->lock);
+	if (!hlist_empty(&ilb->head)) {
+		const struct inet_sock *inet = inet_sk((sk = __sk_head(&ilb->head)));
 
 		if (inet->num == hnum && !sk->sk_node.next &&
 		    (!inet->rcv_saddr || inet->rcv_saddr == daddr) &&
 		    (sk->sk_family == PF_INET || !ipv6_only_sock(sk)) &&
 		    !sk->sk_bound_dev_if && net_eq(sock_net(sk), net))
 			goto sherry_cache;
-		sk = inet_lookup_listener_slow(net, head, daddr, hnum, dif);
+		sk = inet_lookup_listener_slow(net, &ilb->head, daddr, hnum, dif);
 	}
 	if (sk) {
 sherry_cache:
 		sock_hold(sk);
 	}
-	read_unlock(&hashinfo->lhash_lock);
+	spin_unlock(&ilb->lock);
 	return sk;
 }
 EXPORT_SYMBOL_GPL(__inet_lookup_listener);
@@ -389,8 +360,7 @@ EXPORT_SYMBOL_GPL(__inet_hash_nolisten);
 static void __inet_hash(struct sock *sk)
 {
 	struct inet_hashinfo *hashinfo = sk->sk_prot->h.hashinfo;
-	struct hlist_head *list;
-	rwlock_t *lock;
+	struct inet_listen_hashbucket *ilb;
 
 	if (sk->sk_state != TCP_LISTEN) {
 		__inet_hash_nolisten(sk);
@@ -398,14 +368,12 @@ static void __inet_hash(struct sock *sk)
 	}
 
 	WARN_ON(!sk_unhashed(sk));
-	list = &hashinfo->listening_hash[inet_sk_listen_hashfn(sk)];
-	lock = &hashinfo->lhash_lock;
+	ilb = &hashinfo->listening_hash[inet_sk_listen_hashfn(sk)];
 
-	inet_listen_wlock(hashinfo);
-	__sk_add_node(sk, list);
+	spin_lock(&ilb->lock);
+	__sk_add_node(sk, &ilb->head);
 	sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1);
-	write_unlock(lock);
-	wake_up(&hashinfo->lhash_wait);
+	spin_unlock(&ilb->lock);
 }
 
 void inet_hash(struct sock *sk)
@@ -420,29 +388,27 @@ EXPORT_SYMBOL_GPL(inet_hash);
 
 void inet_unhash(struct sock *sk)
 {
-	rwlock_t *lock;
 	struct inet_hashinfo *hashinfo = sk->sk_prot->h.hashinfo;
 
 	if (sk_unhashed(sk))
-		goto out;
+		return;
 
 	if (sk->sk_state == TCP_LISTEN) {
-		local_bh_disable();
-		inet_listen_wlock(hashinfo);
-		lock = &hashinfo->lhash_lock;
+		struct inet_listen_hashbucket *ilb;
+
+		ilb = &hashinfo->listening_hash[inet_sk_listen_hashfn(sk)];
+		spin_lock_bh(&ilb->lock);
 		if (__sk_del_node_init(sk))
 			sock_prot_inuse_add(sock_net(sk), sk->sk_prot, -1);
+		spin_unlock_bh(&ilb->lock);
 	} else {
-		lock = inet_ehash_lockp(hashinfo, sk->sk_hash);
+		rwlock_t *lock = inet_ehash_lockp(hashinfo, sk->sk_hash);
+
 		write_lock_bh(lock);
 		if (__sk_nulls_del_node_init_rcu(sk))
 			sock_prot_inuse_add(sock_net(sk), sk->sk_prot, -1);
+		write_unlock_bh(lock);
 	}
-
-	write_unlock_bh(lock);
-out:
-	if (sk->sk_state == TCP_LISTEN)
-		wake_up(&hashinfo->lhash_wait);
 }
 EXPORT_SYMBOL_GPL(inet_unhash);
 
@@ -556,3 +522,13 @@ int inet_hash_connect(struct inet_timewait_death_row *death_row,
 }
 
 EXPORT_SYMBOL_GPL(inet_hash_connect);
+
+void inet_hashinfo_init(struct inet_hashinfo *h)
+{
+	int i;
+
+	for (i = 0; i < INET_LHTABLE_SIZE; i++)
+		spin_lock_init(&h->listening_hash[i].lock);
+}
+
+EXPORT_SYMBOL_GPL(inet_hashinfo_init);
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index 5559fea61e87..330b08a12274 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -97,11 +97,7 @@ struct tcp_md5sig_key *tcp_v4_md5_do_lookup(struct sock *sk, __be32 addr)
 }
 #endif
 
-struct inet_hashinfo __cacheline_aligned tcp_hashinfo = {
-	.lhash_lock  = __RW_LOCK_UNLOCKED(tcp_hashinfo.lhash_lock),
-	.lhash_users = ATOMIC_INIT(0),
-	.lhash_wait  = __WAIT_QUEUE_HEAD_INITIALIZER(tcp_hashinfo.lhash_wait),
-};
+struct inet_hashinfo tcp_hashinfo;
 
 static inline __u32 tcp_v4_init_sequence(struct sk_buff *skb)
 {
@@ -1874,15 +1870,18 @@ static void *listening_get_next(struct seq_file *seq, void *cur)
 	struct inet_connection_sock *icsk;
 	struct hlist_node *node;
 	struct sock *sk = cur;
+	struct inet_listen_hashbucket *ilb;
 	struct tcp_iter_state *st = seq->private;
 	struct net *net = seq_file_net(seq);
 
 	if (!sk) {
 		st->bucket = 0;
-		sk = sk_head(&tcp_hashinfo.listening_hash[0]);
+		ilb = &tcp_hashinfo.listening_hash[0];
+		spin_lock_bh(&ilb->lock);
+		sk = sk_head(&ilb->head);
 		goto get_sk;
 	}
-
+	ilb = &tcp_hashinfo.listening_hash[st->bucket];
 	++st->num;
 
 	if (st->state == TCP_SEQ_STATE_OPENREQ) {
@@ -1932,8 +1931,11 @@ start_req:
 		}
 		read_unlock_bh(&icsk->icsk_accept_queue.syn_wait_lock);
 	}
+	spin_unlock_bh(&ilb->lock);
 	if (++st->bucket < INET_LHTABLE_SIZE) {
-		sk = sk_head(&tcp_hashinfo.listening_hash[st->bucket]);
+		ilb = &tcp_hashinfo.listening_hash[st->bucket];
+		spin_lock_bh(&ilb->lock);
+		sk = sk_head(&ilb->head);
 		goto get_sk;
 	}
 	cur = NULL;
@@ -2066,12 +2068,10 @@ static void *tcp_get_idx(struct seq_file *seq, loff_t pos)
 	void *rc;
 	struct tcp_iter_state *st = seq->private;
 
-	inet_listen_lock(&tcp_hashinfo);
 	st->state = TCP_SEQ_STATE_LISTENING;
 	rc	  = listening_get_idx(seq, &pos);
 
 	if (!rc) {
-		inet_listen_unlock(&tcp_hashinfo);
 		st->state = TCP_SEQ_STATE_ESTABLISHED;
 		rc	  = established_get_idx(seq, pos);
 	}
@@ -2103,7 +2103,6 @@ static void *tcp_seq_next(struct seq_file *seq, void *v, loff_t *pos)
 	case TCP_SEQ_STATE_LISTENING:
 		rc = listening_get_next(seq, v);
 		if (!rc) {
-			inet_listen_unlock(&tcp_hashinfo);
 			st->state = TCP_SEQ_STATE_ESTABLISHED;
 			rc	  = established_get_first(seq);
 		}
@@ -2130,7 +2129,7 @@ static void tcp_seq_stop(struct seq_file *seq, void *v)
 		}
 	case TCP_SEQ_STATE_LISTENING:
 		if (v != SEQ_START_TOKEN)
-			inet_listen_unlock(&tcp_hashinfo);
+			spin_unlock_bh(&tcp_hashinfo.listening_hash[st->bucket].lock);
 		break;
 	case TCP_SEQ_STATE_TIME_WAIT:
 	case TCP_SEQ_STATE_ESTABLISHED:
@@ -2405,6 +2404,7 @@ static struct pernet_operations __net_initdata tcp_sk_ops = {
 
 void __init tcp_v4_init(void)
 {
+	inet_hashinfo_init(&tcp_hashinfo);
 	if (register_pernet_device(&tcp_sk_ops))
 		panic("Failed to create the TCP control socket.\n");
 }
diff --git a/net/ipv6/inet6_hashtables.c b/net/ipv6/inet6_hashtables.c
index c1b4d401fd95..21544b9be259 100644
--- a/net/ipv6/inet6_hashtables.c
+++ b/net/ipv6/inet6_hashtables.c
@@ -25,30 +25,30 @@
 void __inet6_hash(struct sock *sk)
 {
 	struct inet_hashinfo *hashinfo = sk->sk_prot->h.hashinfo;
-	rwlock_t *lock;
 
 	WARN_ON(!sk_unhashed(sk));
 
 	if (sk->sk_state == TCP_LISTEN) {
-		struct hlist_head *list;
+		struct inet_listen_hashbucket *ilb;
 
-		list = &hashinfo->listening_hash[inet_sk_listen_hashfn(sk)];
-		lock = &hashinfo->lhash_lock;
-		inet_listen_wlock(hashinfo);
-		__sk_add_node(sk, list);
+		ilb = &hashinfo->listening_hash[inet_sk_listen_hashfn(sk)];
+		spin_lock(&ilb->lock);
+		__sk_add_node(sk, &ilb->head);
+		spin_unlock(&ilb->lock);
 	} else {
 		unsigned int hash;
 		struct hlist_nulls_head *list;
+		rwlock_t *lock;
 
 		sk->sk_hash = hash = inet6_sk_ehashfn(sk);
 		list = &inet_ehash_bucket(hashinfo, hash)->chain;
 		lock = inet_ehash_lockp(hashinfo, hash);
 		write_lock(lock);
 		__sk_nulls_add_node_rcu(sk, list);
+		write_unlock(lock);
 	}
 
 	sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1);
-	write_unlock(lock);
 }
 EXPORT_SYMBOL(__inet6_hash);
 
@@ -126,10 +126,11 @@ struct sock *inet6_lookup_listener(struct net *net,
 	const struct hlist_node *node;
 	struct sock *result = NULL;
 	int score, hiscore = 0;
+	struct inet_listen_hashbucket *ilb;
 
-	read_lock(&hashinfo->lhash_lock);
-	sk_for_each(sk, node,
-			&hashinfo->listening_hash[inet_lhashfn(net, hnum)]) {
+	ilb = &hashinfo->listening_hash[inet_lhashfn(net, hnum)];
+	spin_lock(&ilb->lock);
+	sk_for_each(sk, node, &ilb->head) {
 		if (net_eq(sock_net(sk), net) && inet_sk(sk)->num == hnum &&
 				sk->sk_family == PF_INET6) {
 			const struct ipv6_pinfo *np = inet6_sk(sk);
@@ -157,7 +158,7 @@ struct sock *inet6_lookup_listener(struct net *net,
 	}
 	if (result)
 		sock_hold(result);
-	read_unlock(&hashinfo->lhash_lock);
+	spin_unlock(&ilb->lock);
 	return result;
 }
 
-- 
cgit v1.2.3


From d214c7537bbf2f247991fb65b3420b0b3d712c67 Mon Sep 17 00:00:00 2001
From: Pablo Neira Ayuso <pablo@netfilter.org>
Date: Thu, 20 Nov 2008 00:49:27 -0800
Subject: filter: add SKF_AD_NLATTR_NEST to look for nested attributes

SKF_AD_NLATTR allows us to find the first matching attribute in a
stream of netlink attributes from one offset to the end of the
netlink message. This is not suitable to look for a specific
matching inside a set of nested attributes.

For example, in ctnetlink messages, if we look for the CTA_V6_SRC
attribute in a message that talks about an IPv4 connection,
SKF_AD_NLATTR returns the offset of CTA_STATUS which has the same
value of CTA_V6_SRC but outside the nest. To differenciate
CTA_STATUS and CTA_V6_SRC, we would have to make assumptions on the
size of the attribute and the usual offset, resulting in horrible
BSF code.

This patch adds SKF_AD_NLATTR_NEST, which is a variant of
SKF_AD_NLATTR, that looks for an attribute inside the limits of
a nested attributes, but not further.

This patch validates that we have enough room to look for the
nested attributes - based on a suggestion from Patrick McHardy.

Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
Acked-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/filter.h |  3 ++-
 net/core/filter.c      | 19 +++++++++++++++++++
 2 files changed, 21 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/filter.h b/include/linux/filter.h
index b6ea9aa9e853..1354aaf6abbe 100644
--- a/include/linux/filter.h
+++ b/include/linux/filter.h
@@ -122,7 +122,8 @@ struct sock_fprog	/* Required for SO_ATTACH_FILTER. */
 #define SKF_AD_PKTTYPE 	4
 #define SKF_AD_IFINDEX 	8
 #define SKF_AD_NLATTR	12
-#define SKF_AD_MAX 	16
+#define SKF_AD_NLATTR_NEST	16
+#define SKF_AD_MAX	20
 #define SKF_NET_OFF   (-0x100000)
 #define SKF_LL_OFF    (-0x200000)
 
diff --git a/net/core/filter.c b/net/core/filter.c
index df3744355839..d1d779ca096d 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -319,6 +319,25 @@ load_b:
 				A = 0;
 			continue;
 		}
+		case SKF_AD_NLATTR_NEST: {
+			struct nlattr *nla;
+
+			if (skb_is_nonlinear(skb))
+				return 0;
+			if (A > skb->len - sizeof(struct nlattr))
+				return 0;
+
+			nla = (struct nlattr *)&skb->data[A];
+			if (nla->nla_len > A - skb->len)
+				return 0;
+
+			nla = nla_find_nested(nla, X);
+			if (nla)
+				A = (void *)nla - (void *)skb->data;
+			else
+				A = 0;
+			continue;
+		}
 		default:
 			return 0;
 		}
-- 
cgit v1.2.3


From 0c19b0adb8dd33dbd10ff48e41971231c486855c Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Thu, 20 Nov 2008 04:08:29 -0800
Subject: netlink: avoid memset of 0 bytes sparse warning

A netlink attribute padding of zero triggers this sparse warning:

include/linux/netlink.h:245:8: warning: memset with byte count of 0

Avoid the memset when the size parameter is constant and requires no padding.

Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netlink.h | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/netlink.h b/include/linux/netlink.h
index 9ff1b54908f3..51b09a1f46c3 100644
--- a/include/linux/netlink.h
+++ b/include/linux/netlink.h
@@ -242,7 +242,8 @@ __nlmsg_put(struct sk_buff *skb, u32 pid, u32 seq, int type, int len, int flags)
 	nlh->nlmsg_flags = flags;
 	nlh->nlmsg_pid = pid;
 	nlh->nlmsg_seq = seq;
-	memset(NLMSG_DATA(nlh) + len, 0, NLMSG_ALIGN(size) - size);
+	if (!__builtin_constant_p(size) || NLMSG_ALIGN(size) - size != 0)
+		memset(NLMSG_DATA(nlh) + len, 0, NLMSG_ALIGN(size) - size);
 	return nlh;
 }
 
-- 
cgit v1.2.3


From 13d2a1d2b032de08d7dcab6a1edcd47802681f96 Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Thu, 20 Nov 2008 04:10:00 -0800
Subject: pkt_sched: add DRR scheduler

Add classful DRR scheduler as a more flexible replacement for SFQ.

The main difference to the algorithm described in "Efficient Fair Queueing
using Deficit Round Robin" is that this implementation doesn't drop packets
from the longest queue on overrun because its classful and limits are
handled by each individual child qdisc.

Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/pkt_sched.h |  16 ++
 net/sched/Kconfig         |  11 +
 net/sched/Makefile        |   1 +
 net/sched/sch_drr.c       | 505 ++++++++++++++++++++++++++++++++++++++++++++++
 4 files changed, 533 insertions(+)
 create mode 100644 net/sched/sch_drr.c

(limited to 'include')

diff --git a/include/linux/pkt_sched.h b/include/linux/pkt_sched.h
index 5d921fa91a5b..e3f133adba78 100644
--- a/include/linux/pkt_sched.h
+++ b/include/linux/pkt_sched.h
@@ -500,4 +500,20 @@ struct tc_netem_corrupt
 
 #define NETEM_DIST_SCALE	8192
 
+/* DRR */
+
+enum
+{
+	TCA_DRR_UNSPEC,
+	TCA_DRR_QUANTUM,
+	__TCA_DRR_MAX
+};
+
+#define TCA_DRR_MAX	(__TCA_DRR_MAX - 1)
+
+struct tc_drr_stats
+{
+	u32	deficit;
+};
+
 #endif
diff --git a/net/sched/Kconfig b/net/sched/Kconfig
index 36543b6fcef3..4f7ef0db302b 100644
--- a/net/sched/Kconfig
+++ b/net/sched/Kconfig
@@ -194,6 +194,17 @@ config NET_SCH_NETEM
 
 	  If unsure, say N.
 
+config NET_SCH_DRR
+	tristate "Deficit Round Robin scheduler (DRR)"
+	help
+	  Say Y here if you want to use the Deficit Round Robin (DRR) packet
+	  scheduling algorithm.
+
+	  To compile this driver as a module, choose M here: the module
+	  will be called sch_drr.
+
+	  If unsure, say N.
+
 config NET_SCH_INGRESS
 	tristate "Ingress Qdisc"
 	depends on NET_CLS_ACT
diff --git a/net/sched/Makefile b/net/sched/Makefile
index 70b35f8708c3..54d950cd4b8d 100644
--- a/net/sched/Makefile
+++ b/net/sched/Makefile
@@ -30,6 +30,7 @@ obj-$(CONFIG_NET_SCH_PRIO)	+= sch_prio.o
 obj-$(CONFIG_NET_SCH_MULTIQ)	+= sch_multiq.o
 obj-$(CONFIG_NET_SCH_ATM)	+= sch_atm.o
 obj-$(CONFIG_NET_SCH_NETEM)	+= sch_netem.o
+obj-$(CONFIG_NET_SCH_DRR)	+= sch_drr.o
 obj-$(CONFIG_NET_CLS_U32)	+= cls_u32.o
 obj-$(CONFIG_NET_CLS_ROUTE4)	+= cls_route.o
 obj-$(CONFIG_NET_CLS_FW)	+= cls_fw.o
diff --git a/net/sched/sch_drr.c b/net/sched/sch_drr.c
new file mode 100644
index 000000000000..e71a5de23e23
--- /dev/null
+++ b/net/sched/sch_drr.c
@@ -0,0 +1,505 @@
+/*
+ * net/sched/sch_drr.c         Deficit Round Robin scheduler
+ *
+ * Copyright (c) 2008 Patrick McHardy <kaber@trash.net>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * version 2 as published by the Free Software Foundation.
+ */
+
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/errno.h>
+#include <linux/netdevice.h>
+#include <linux/pkt_sched.h>
+#include <net/sch_generic.h>
+#include <net/pkt_sched.h>
+#include <net/pkt_cls.h>
+
+struct drr_class {
+	struct Qdisc_class_common	common;
+	unsigned int			refcnt;
+	unsigned int			filter_cnt;
+
+	struct gnet_stats_basic		bstats;
+	struct gnet_stats_queue		qstats;
+	struct gnet_stats_rate_est	rate_est;
+	struct list_head		alist;
+	struct Qdisc			*qdisc;
+
+	u32				quantum;
+	u32				deficit;
+};
+
+struct drr_sched {
+	struct list_head		active;
+	struct tcf_proto		*filter_list;
+	struct Qdisc_class_hash		clhash;
+};
+
+static struct drr_class *drr_find_class(struct Qdisc *sch, u32 classid)
+{
+	struct drr_sched *q = qdisc_priv(sch);
+	struct Qdisc_class_common *clc;
+
+	clc = qdisc_class_find(&q->clhash, classid);
+	if (clc == NULL)
+		return NULL;
+	return container_of(clc, struct drr_class, common);
+}
+
+static void drr_purge_queue(struct drr_class *cl)
+{
+	unsigned int len = cl->qdisc->q.qlen;
+
+	qdisc_reset(cl->qdisc);
+	qdisc_tree_decrease_qlen(cl->qdisc, len);
+}
+
+static const struct nla_policy drr_policy[TCA_DRR_MAX + 1] = {
+	[TCA_DRR_QUANTUM]	= { .type = NLA_U32 },
+};
+
+static int drr_change_class(struct Qdisc *sch, u32 classid, u32 parentid,
+			    struct nlattr **tca, unsigned long *arg)
+{
+	struct drr_sched *q = qdisc_priv(sch);
+	struct drr_class *cl = (struct drr_class *)*arg;
+	struct nlattr *tb[TCA_DRR_MAX + 1];
+	u32 quantum;
+	int err;
+
+	err = nla_parse_nested(tb, TCA_DRR_MAX, tca[TCA_OPTIONS], drr_policy);
+	if (err < 0)
+		return err;
+
+	if (tb[TCA_DRR_QUANTUM]) {
+		quantum = nla_get_u32(tb[TCA_DRR_QUANTUM]);
+		if (quantum == 0)
+			return -EINVAL;
+	} else
+		quantum = psched_mtu(qdisc_dev(sch));
+
+	if (cl != NULL) {
+		sch_tree_lock(sch);
+		if (tb[TCA_DRR_QUANTUM])
+			cl->quantum = quantum;
+		sch_tree_unlock(sch);
+
+		if (tca[TCA_RATE])
+			gen_replace_estimator(&cl->bstats, &cl->rate_est,
+					      qdisc_root_sleeping_lock(sch),
+					      tca[TCA_RATE]);
+		return 0;
+	}
+
+	cl = kzalloc(sizeof(struct drr_class), GFP_KERNEL);
+	if (cl == NULL)
+		return -ENOBUFS;
+
+	cl->refcnt	   = 1;
+	cl->common.classid = classid;
+	cl->quantum	   = quantum;
+	cl->qdisc	   = qdisc_create_dflt(qdisc_dev(sch), sch->dev_queue,
+					       &pfifo_qdisc_ops, classid);
+	if (cl->qdisc == NULL)
+		cl->qdisc = &noop_qdisc;
+
+	if (tca[TCA_RATE])
+		gen_replace_estimator(&cl->bstats, &cl->rate_est,
+				      qdisc_root_sleeping_lock(sch),
+				      tca[TCA_RATE]);
+
+	sch_tree_lock(sch);
+	qdisc_class_hash_insert(&q->clhash, &cl->common);
+	sch_tree_unlock(sch);
+
+	qdisc_class_hash_grow(sch, &q->clhash);
+
+	*arg = (unsigned long)cl;
+	return 0;
+}
+
+static void drr_destroy_class(struct Qdisc *sch, struct drr_class *cl)
+{
+	gen_kill_estimator(&cl->bstats, &cl->rate_est);
+	qdisc_destroy(cl->qdisc);
+	kfree(cl);
+}
+
+static int drr_delete_class(struct Qdisc *sch, unsigned long arg)
+{
+	struct drr_sched *q = qdisc_priv(sch);
+	struct drr_class *cl = (struct drr_class *)arg;
+
+	if (cl->filter_cnt > 0)
+		return -EBUSY;
+
+	sch_tree_lock(sch);
+
+	drr_purge_queue(cl);
+	qdisc_class_hash_remove(&q->clhash, &cl->common);
+
+	if (--cl->refcnt == 0)
+		drr_destroy_class(sch, cl);
+
+	sch_tree_unlock(sch);
+	return 0;
+}
+
+static unsigned long drr_get_class(struct Qdisc *sch, u32 classid)
+{
+	struct drr_class *cl = drr_find_class(sch, classid);
+
+	if (cl != NULL)
+		cl->refcnt++;
+
+	return (unsigned long)cl;
+}
+
+static void drr_put_class(struct Qdisc *sch, unsigned long arg)
+{
+	struct drr_class *cl = (struct drr_class *)arg;
+
+	if (--cl->refcnt == 0)
+		drr_destroy_class(sch, cl);
+}
+
+static struct tcf_proto **drr_tcf_chain(struct Qdisc *sch, unsigned long cl)
+{
+	struct drr_sched *q = qdisc_priv(sch);
+
+	if (cl)
+		return NULL;
+
+	return &q->filter_list;
+}
+
+static unsigned long drr_bind_tcf(struct Qdisc *sch, unsigned long parent,
+				  u32 classid)
+{
+	struct drr_class *cl = drr_find_class(sch, classid);
+
+	if (cl != NULL)
+		cl->filter_cnt++;
+
+	return (unsigned long)cl;
+}
+
+static void drr_unbind_tcf(struct Qdisc *sch, unsigned long arg)
+{
+	struct drr_class *cl = (struct drr_class *)arg;
+
+	cl->filter_cnt--;
+}
+
+static int drr_graft_class(struct Qdisc *sch, unsigned long arg,
+			   struct Qdisc *new, struct Qdisc **old)
+{
+	struct drr_class *cl = (struct drr_class *)arg;
+
+	if (new == NULL) {
+		new = qdisc_create_dflt(qdisc_dev(sch), sch->dev_queue,
+					&pfifo_qdisc_ops, cl->common.classid);
+		if (new == NULL)
+			new = &noop_qdisc;
+	}
+
+	sch_tree_lock(sch);
+	drr_purge_queue(cl);
+	*old = xchg(&cl->qdisc, new);
+	sch_tree_unlock(sch);
+	return 0;
+}
+
+static struct Qdisc *drr_class_leaf(struct Qdisc *sch, unsigned long arg)
+{
+	struct drr_class *cl = (struct drr_class *)arg;
+
+	return cl->qdisc;
+}
+
+static void drr_qlen_notify(struct Qdisc *csh, unsigned long arg)
+{
+	struct drr_class *cl = (struct drr_class *)arg;
+
+	if (cl->qdisc->q.qlen == 0)
+		list_del(&cl->alist);
+}
+
+static int drr_dump_class(struct Qdisc *sch, unsigned long arg,
+			  struct sk_buff *skb, struct tcmsg *tcm)
+{
+	struct drr_class *cl = (struct drr_class *)arg;
+	struct nlattr *nest;
+
+	tcm->tcm_parent	= TC_H_ROOT;
+	tcm->tcm_handle	= cl->common.classid;
+	tcm->tcm_info	= cl->qdisc->handle;
+
+	nest = nla_nest_start(skb, TCA_OPTIONS);
+	if (nest == NULL)
+		goto nla_put_failure;
+	NLA_PUT_U32(skb, TCA_DRR_QUANTUM, cl->quantum);
+	return nla_nest_end(skb, nest);
+
+nla_put_failure:
+	nla_nest_cancel(skb, nest);
+	return -EMSGSIZE;
+}
+
+static int drr_dump_class_stats(struct Qdisc *sch, unsigned long arg,
+				struct gnet_dump *d)
+{
+	struct drr_class *cl = (struct drr_class *)arg;
+	struct tc_drr_stats xstats;
+
+	memset(&xstats, 0, sizeof(xstats));
+	if (cl->qdisc->q.qlen)
+		xstats.deficit = cl->deficit;
+
+	if (gnet_stats_copy_basic(d, &cl->bstats) < 0 ||
+	    gnet_stats_copy_rate_est(d, &cl->rate_est) < 0 ||
+	    gnet_stats_copy_queue(d, &cl->qdisc->qstats) < 0)
+		return -1;
+
+	return gnet_stats_copy_app(d, &xstats, sizeof(xstats));
+}
+
+static void drr_walk(struct Qdisc *sch, struct qdisc_walker *arg)
+{
+	struct drr_sched *q = qdisc_priv(sch);
+	struct drr_class *cl;
+	struct hlist_node *n;
+	unsigned int i;
+
+	if (arg->stop)
+		return;
+
+	for (i = 0; i < q->clhash.hashsize; i++) {
+		hlist_for_each_entry(cl, n, &q->clhash.hash[i], common.hnode) {
+			if (arg->count < arg->skip) {
+				arg->count++;
+				continue;
+			}
+			if (arg->fn(sch, (unsigned long)cl, arg) < 0) {
+				arg->stop = 1;
+				return;
+			}
+			arg->count++;
+		}
+	}
+}
+
+static struct drr_class *drr_classify(struct sk_buff *skb, struct Qdisc *sch,
+				      int *qerr)
+{
+	struct drr_sched *q = qdisc_priv(sch);
+	struct drr_class *cl;
+	struct tcf_result res;
+	int result;
+
+	if (TC_H_MAJ(skb->priority ^ sch->handle) == 0) {
+		cl = drr_find_class(sch, skb->priority);
+		if (cl != NULL)
+			return cl;
+	}
+
+	*qerr = NET_XMIT_SUCCESS | __NET_XMIT_BYPASS;
+	result = tc_classify(skb, q->filter_list, &res);
+	if (result >= 0) {
+#ifdef CONFIG_NET_CLS_ACT
+		switch (result) {
+		case TC_ACT_QUEUED:
+		case TC_ACT_STOLEN:
+			*qerr = NET_XMIT_SUCCESS | __NET_XMIT_STOLEN;
+		case TC_ACT_SHOT:
+			return NULL;
+		}
+#endif
+		cl = (struct drr_class *)res.class;
+		if (cl == NULL)
+			cl = drr_find_class(sch, res.classid);
+		return cl;
+	}
+	return NULL;
+}
+
+static int drr_enqueue(struct sk_buff *skb, struct Qdisc *sch)
+{
+	struct drr_sched *q = qdisc_priv(sch);
+	struct drr_class *cl;
+	unsigned int len;
+	int err;
+
+	cl = drr_classify(skb, sch, &err);
+	if (cl == NULL) {
+		if (err & __NET_XMIT_BYPASS)
+			sch->qstats.drops++;
+		kfree_skb(skb);
+		return err;
+	}
+
+	len = qdisc_pkt_len(skb);
+	err = qdisc_enqueue(skb, cl->qdisc);
+	if (unlikely(err != NET_XMIT_SUCCESS)) {
+		if (net_xmit_drop_count(err)) {
+			cl->qstats.drops++;
+			sch->qstats.drops++;
+		}
+		return err;
+	}
+
+	if (cl->qdisc->q.qlen == 1) {
+		list_add_tail(&cl->alist, &q->active);
+		cl->deficit = cl->quantum;
+	}
+
+	cl->bstats.packets++;
+	cl->bstats.bytes += len;
+	sch->bstats.packets++;
+	sch->bstats.bytes += len;
+
+	sch->q.qlen++;
+	return err;
+}
+
+static struct sk_buff *drr_dequeue(struct Qdisc *sch)
+{
+	struct drr_sched *q = qdisc_priv(sch);
+	struct drr_class *cl;
+	struct sk_buff *skb;
+	unsigned int len;
+
+	while (!list_empty(&q->active)) {
+		cl = list_first_entry(&q->active, struct drr_class, alist);
+		skb = cl->qdisc->ops->peek(cl->qdisc);
+		if (skb == NULL)
+			goto skip;
+
+		len = qdisc_pkt_len(skb);
+		if (len <= cl->deficit) {
+			cl->deficit -= len;
+			skb = qdisc_dequeue_peeked(cl->qdisc);
+			if (cl->qdisc->q.qlen == 0)
+				list_del(&cl->alist);
+			sch->q.qlen--;
+			return skb;
+		}
+
+		cl->deficit += cl->quantum;
+skip:
+		list_move_tail(&cl->alist, &q->active);
+	}
+	return NULL;
+}
+
+static unsigned int drr_drop(struct Qdisc *sch)
+{
+	struct drr_sched *q = qdisc_priv(sch);
+	struct drr_class *cl;
+	unsigned int len;
+
+	list_for_each_entry(cl, &q->active, alist) {
+		if (cl->qdisc->ops->drop) {
+			len = cl->qdisc->ops->drop(cl->qdisc);
+			if (len > 0) {
+				if (cl->qdisc->q.qlen == 0)
+					list_del(&cl->alist);
+				return len;
+			}
+		}
+	}
+	return 0;
+}
+
+static int drr_init_qdisc(struct Qdisc *sch, struct nlattr *opt)
+{
+	struct drr_sched *q = qdisc_priv(sch);
+	int err;
+
+	err = qdisc_class_hash_init(&q->clhash);
+	if (err < 0)
+		return err;
+	INIT_LIST_HEAD(&q->active);
+	return 0;
+}
+
+static void drr_reset_qdisc(struct Qdisc *sch)
+{
+	struct drr_sched *q = qdisc_priv(sch);
+	struct drr_class *cl;
+	struct hlist_node *n;
+	unsigned int i;
+
+	for (i = 0; i < q->clhash.hashsize; i++) {
+		hlist_for_each_entry(cl, n, &q->clhash.hash[i], common.hnode) {
+			if (cl->qdisc->q.qlen)
+				list_del(&cl->alist);
+			qdisc_reset(cl->qdisc);
+		}
+	}
+	sch->q.qlen = 0;
+}
+
+static void drr_destroy_qdisc(struct Qdisc *sch)
+{
+	struct drr_sched *q = qdisc_priv(sch);
+	struct drr_class *cl;
+	struct hlist_node *n, *next;
+	unsigned int i;
+
+	tcf_destroy_chain(&q->filter_list);
+
+	for (i = 0; i < q->clhash.hashsize; i++) {
+		hlist_for_each_entry_safe(cl, n, next, &q->clhash.hash[i],
+					  common.hnode)
+			drr_destroy_class(sch, cl);
+	}
+	qdisc_class_hash_destroy(&q->clhash);
+}
+
+static const struct Qdisc_class_ops drr_class_ops = {
+	.change		= drr_change_class,
+	.delete		= drr_delete_class,
+	.get		= drr_get_class,
+	.put		= drr_put_class,
+	.tcf_chain	= drr_tcf_chain,
+	.bind_tcf	= drr_bind_tcf,
+	.unbind_tcf	= drr_unbind_tcf,
+	.graft		= drr_graft_class,
+	.leaf		= drr_class_leaf,
+	.qlen_notify	= drr_qlen_notify,
+	.dump		= drr_dump_class,
+	.dump_stats	= drr_dump_class_stats,
+	.walk		= drr_walk,
+};
+
+static struct Qdisc_ops drr_qdisc_ops __read_mostly = {
+	.cl_ops		= &drr_class_ops,
+	.id		= "drr",
+	.priv_size	= sizeof(struct drr_sched),
+	.enqueue	= drr_enqueue,
+	.dequeue	= drr_dequeue,
+	.peek		= qdisc_peek_dequeued,
+	.drop		= drr_drop,
+	.init		= drr_init_qdisc,
+	.reset		= drr_reset_qdisc,
+	.destroy	= drr_destroy_qdisc,
+	.owner		= THIS_MODULE,
+};
+
+static int __init drr_init(void)
+{
+	return register_qdisc(&drr_qdisc_ops);
+}
+
+static void __exit drr_exit(void)
+{
+	unregister_qdisc(&drr_qdisc_ops);
+}
+
+module_init(drr_init);
+module_exit(drr_exit);
+MODULE_LICENSE("GPL");
-- 
cgit v1.2.3


From 018a7bf1e55000dd792194238c9043918d24d3dd Mon Sep 17 00:00:00 2001
From: Andy Whitcroft <apw@canonical.com>
Date: Thu, 20 Nov 2008 15:59:56 +0100
Subject: netfilter: ip{,6}t_policy.h should include xp_policy.h

It seems that all of the include/netfilter_{ipv4,ipv6}/{ipt,ip6t}_*.h which
share constants include the corresponding include/netfilter/xp_*.h files.
Neither ipt_policy.h not ip6t_policy.h do.  Make these consistant with
the norm.

Signed-off-by: Andy Whitcroft <apw@canonical.com>
Signed-off-by: Patrick McHardy <kaber@trash.net>
---
 include/linux/netfilter_ipv4/ipt_policy.h  | 2 ++
 include/linux/netfilter_ipv6/ip6t_policy.h | 2 ++
 2 files changed, 4 insertions(+)

(limited to 'include')

diff --git a/include/linux/netfilter_ipv4/ipt_policy.h b/include/linux/netfilter_ipv4/ipt_policy.h
index b9478a255301..1037fb2cd206 100644
--- a/include/linux/netfilter_ipv4/ipt_policy.h
+++ b/include/linux/netfilter_ipv4/ipt_policy.h
@@ -1,6 +1,8 @@
 #ifndef _IPT_POLICY_H
 #define _IPT_POLICY_H
 
+#include <linux/netfilter/xt_policy.h>
+
 #define IPT_POLICY_MAX_ELEM		XT_POLICY_MAX_ELEM
 
 /* ipt_policy_flags */
diff --git a/include/linux/netfilter_ipv6/ip6t_policy.h b/include/linux/netfilter_ipv6/ip6t_policy.h
index 6bab3163d2fb..b1c449d7ec89 100644
--- a/include/linux/netfilter_ipv6/ip6t_policy.h
+++ b/include/linux/netfilter_ipv6/ip6t_policy.h
@@ -1,6 +1,8 @@
 #ifndef _IP6T_POLICY_H
 #define _IP6T_POLICY_H
 
+#include <linux/netfilter/xt_policy.h>
+
 #define IP6T_POLICY_MAX_ELEM		XT_POLICY_MAX_ELEM
 
 /* ip6t_policy_flags */
-- 
cgit v1.2.3


From 008298231abbeb91bc7be9e8b078607b816d1a4a Mon Sep 17 00:00:00 2001
From: Stephen Hemminger <shemminger@vyatta.com>
Date: Thu, 20 Nov 2008 20:14:53 -0800
Subject: netdev: add more functions to netdevice ops

This patch moves neigh_setup and hard_start_xmit into the network device ops
structure. For bisection, fix all the previously converted drivers as well.
Bonding driver took the biggest hit on this.

Added a prefetch of the hard_start_xmit in the fast path to try and reduce
any impact this would have.

Signed-off-by: Stephen Hemminger <shemminger@vyatta.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/8139cp.c            |  2 +-
 drivers/net/8139too.c           |  2 +-
 drivers/net/acenic.c            |  4 ++-
 drivers/net/atl1e/atl1e_main.c  |  3 ++-
 drivers/net/atlx/atl1.c         |  4 +--
 drivers/net/atlx/atl2.c         |  2 +-
 drivers/net/bonding/bond_main.c | 56 +++++++++++++++++++++++++++++++++--------
 drivers/net/chelsio/cxgb2.c     |  6 ++---
 drivers/net/cxgb3/cxgb3_main.c  |  1 -
 drivers/net/e100.c              |  2 +-
 drivers/net/e1000/e1000_main.c  |  2 +-
 drivers/net/e1000e/netdev.c     |  2 +-
 drivers/net/enic/enic_main.c    |  2 +-
 drivers/net/forcedeth.c         | 22 +++++++++++++---
 drivers/net/ifb.c               |  4 +--
 drivers/net/igb/igb_main.c      |  2 +-
 drivers/net/ixgb/ixgb_main.c    |  2 +-
 drivers/net/ixgbe/ixgbe_main.c  |  2 +-
 drivers/net/loopback.c          |  2 +-
 drivers/net/macvlan.c           |  4 +--
 drivers/net/niu.c               |  2 +-
 drivers/net/ppp_generic.c       |  5 ++--
 drivers/net/r8169.c             |  2 +-
 drivers/net/skge.c              |  2 +-
 drivers/net/sky2.c              |  3 ++-
 drivers/net/tg3.c               | 45 ++++++++++++++++++++++-----------
 drivers/net/tun.c               |  4 +--
 drivers/net/veth.c              |  2 +-
 drivers/net/via-velocity.c      |  2 +-
 include/linux/netdevice.h       | 39 ++++++++++++++++++----------
 net/bridge/br_device.c          | 10 ++++----
 net/bridge/br_if.c              |  2 +-
 net/core/dev.c                  | 12 ++++++---
 net/core/neighbour.c            |  6 ++---
 net/core/netpoll.c              |  6 +++--
 net/core/pktgen.c               |  8 +++---
 36 files changed, 183 insertions(+), 93 deletions(-)

(limited to 'include')

diff --git a/drivers/net/8139cp.c b/drivers/net/8139cp.c
index 13f75b67872d..f6d9d1353dd5 100644
--- a/drivers/net/8139cp.c
+++ b/drivers/net/8139cp.c
@@ -1824,6 +1824,7 @@ static const struct net_device_ops cp_netdev_ops = {
 	.ndo_set_multicast_list	= cp_set_rx_mode,
 	.ndo_get_stats		= cp_get_stats,
 	.ndo_do_ioctl		= cp_ioctl,
+	.ndo_start_xmit		= cp_start_xmit,
 	.ndo_tx_timeout		= cp_tx_timeout,
 #if CP_VLAN_TAG_USED
 	.ndo_vlan_rx_register	= cp_vlan_rx_register,
@@ -1949,7 +1950,6 @@ static int cp_init_one (struct pci_dev *pdev, const struct pci_device_id *ent)
 	memcpy(dev->perm_addr, dev->dev_addr, dev->addr_len);
 
 	dev->netdev_ops = &cp_netdev_ops;
-	dev->hard_start_xmit = cp_start_xmit;
 	netif_napi_add(dev, &cp->napi, cp_rx_poll, 16);
 	dev->ethtool_ops = &cp_ethtool_ops;
 	dev->watchdog_timeo = TX_TIMEOUT;
diff --git a/drivers/net/8139too.c b/drivers/net/8139too.c
index f8866552386a..445a479db79d 100644
--- a/drivers/net/8139too.c
+++ b/drivers/net/8139too.c
@@ -921,6 +921,7 @@ static const struct net_device_ops rtl8139_netdev_ops = {
 	.ndo_stop		= rtl8139_close,
 	.ndo_get_stats		= rtl8139_get_stats,
 	.ndo_validate_addr	= eth_validate_addr,
+	.ndo_start_xmit		= rtl8139_start_xmit,
 	.ndo_set_multicast_list	= rtl8139_set_rx_mode,
 	.ndo_do_ioctl		= netdev_ioctl,
 	.ndo_tx_timeout		= rtl8139_tx_timeout,
@@ -992,7 +993,6 @@ static int __devinit rtl8139_init_one (struct pci_dev *pdev,
 	dev->netdev_ops = &rtl8139_netdev_ops;
 	dev->ethtool_ops = &rtl8139_ethtool_ops;
 	dev->watchdog_timeo = TX_TIMEOUT;
-	dev->hard_start_xmit = rtl8139_start_xmit;
 	netif_napi_add(dev, &tp->napi, rtl8139_poll, 64);
 
 	/* note: the hardware is not capable of sg/csum/highdma, however
diff --git a/drivers/net/acenic.c b/drivers/net/acenic.c
index 309a90ea9211..21d24320210a 100644
--- a/drivers/net/acenic.c
+++ b/drivers/net/acenic.c
@@ -455,10 +455,13 @@ static const struct net_device_ops ace_netdev_ops = {
 	.ndo_stop		= ace_close,
 	.ndo_tx_timeout		= ace_watchdog,
 	.ndo_get_stats		= ace_get_stats,
+	.ndo_start_xmit		= ace_start_xmit,
 	.ndo_set_multicast_list	= ace_set_multicast_list,
 	.ndo_set_mac_address	= ace_set_mac_addr,
 	.ndo_change_mtu		= ace_change_mtu,
+#if ACENIC_DO_VLAN
 	.ndo_vlan_rx_register	= ace_vlan_rx_register,
+#endif
 };
 
 static int __devinit acenic_probe_one(struct pci_dev *pdev,
@@ -489,7 +492,6 @@ static int __devinit acenic_probe_one(struct pci_dev *pdev,
 	dev->watchdog_timeo = 5*HZ;
 
 	dev->netdev_ops = &ace_netdev_ops;
-	dev->hard_start_xmit = &ace_start_xmit;
 	SET_ETHTOOL_OPS(dev, &ace_ethtool_ops);
 
 	/* we only display this string ONCE */
diff --git a/drivers/net/atl1e/atl1e_main.c b/drivers/net/atl1e/atl1e_main.c
index a815fffc2a5b..98b2a7a466b8 100644
--- a/drivers/net/atl1e/atl1e_main.c
+++ b/drivers/net/atl1e/atl1e_main.c
@@ -2256,6 +2256,7 @@ static void atl1e_shutdown(struct pci_dev *pdev)
 static const struct net_device_ops atl1e_netdev_ops = {
 	.ndo_open		= atl1e_open,
 	.ndo_stop		= atl1e_close,
+	.ndo_start_xmit		= atl1e_xmit_frame,
 	.ndo_get_stats		= atl1e_get_stats,
 	.ndo_set_multicast_list	= atl1e_set_multi,
 	.ndo_validate_addr	= eth_validate_addr,
@@ -2277,7 +2278,7 @@ static int atl1e_init_netdev(struct net_device *netdev, struct pci_dev *pdev)
 
 	netdev->irq  = pdev->irq;
 	netdev->netdev_ops = &atl1e_netdev_ops;
-	netdev->hard_start_xmit = atl1e_xmit_frame,
+
 	netdev->watchdog_timeo = AT_TX_WATCHDOG;
 	atl1e_set_ethtool_ops(netdev);
 
diff --git a/drivers/net/atlx/atl1.c b/drivers/net/atlx/atl1.c
index 7a0fb04e3480..aef7e47fdd24 100644
--- a/drivers/net/atlx/atl1.c
+++ b/drivers/net/atlx/atl1.c
@@ -2883,12 +2883,13 @@ static void atl1_poll_controller(struct net_device *netdev)
 static const struct net_device_ops atl1_netdev_ops = {
 	.ndo_open		= atl1_open,
 	.ndo_stop		= atl1_close,
+	.ndo_start_xmit		= atl1_xmit_frame,
 	.ndo_set_multicast_list	= atlx_set_multi,
 	.ndo_validate_addr	= eth_validate_addr,
 	.ndo_set_mac_address	= atl1_set_mac,
 	.ndo_change_mtu		= atl1_change_mtu,
 	.ndo_do_ioctl		= atlx_ioctl,
-	.ndo_tx_timeout	= atlx_tx_timeout,
+	.ndo_tx_timeout		= atlx_tx_timeout,
 	.ndo_vlan_rx_register	= atlx_vlan_rx_register,
 #ifdef CONFIG_NET_POLL_CONTROLLER
 	.ndo_poll_controller	= atl1_poll_controller,
@@ -2983,7 +2984,6 @@ static int __devinit atl1_probe(struct pci_dev *pdev,
 	adapter->mii.reg_num_mask = 0x1f;
 
 	netdev->netdev_ops = &atl1_netdev_ops;
-	netdev->hard_start_xmit = &atl1_xmit_frame;
 	netdev->watchdog_timeo = 5 * HZ;
 
 	netdev->ethtool_ops = &atl1_ethtool_ops;
diff --git a/drivers/net/atlx/atl2.c b/drivers/net/atlx/atl2.c
index b8d585722e1a..0326a84503e3 100644
--- a/drivers/net/atlx/atl2.c
+++ b/drivers/net/atlx/atl2.c
@@ -1315,6 +1315,7 @@ static void atl2_poll_controller(struct net_device *netdev)
 static const struct net_device_ops atl2_netdev_ops = {
 	.ndo_open		= atl2_open,
 	.ndo_stop		= atl2_close,
+	.ndo_start_xmit		= atl2_xmit_frame,
 	.ndo_set_multicast_list	= atl2_set_multi,
 	.ndo_validate_addr	= eth_validate_addr,
 	.ndo_set_mac_address	= atl2_set_mac,
@@ -1400,7 +1401,6 @@ static int __devinit atl2_probe(struct pci_dev *pdev,
 
 	atl2_setup_pcicmd(pdev);
 
-	netdev->hard_start_xmit = &atl2_xmit_frame;
 	netdev->netdev_ops = &atl2_netdev_ops;
 	atl2_set_ethtool_ops(netdev);
 	netdev->watchdog_timeo = 5 * HZ;
diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c
index 614656c8187b..a339a8052737 100644
--- a/drivers/net/bonding/bond_main.c
+++ b/drivers/net/bonding/bond_main.c
@@ -1377,14 +1377,12 @@ done:
 	return 0;
 }
 
-
 static void bond_setup_by_slave(struct net_device *bond_dev,
 				struct net_device *slave_dev)
 {
 	struct bonding *bond = netdev_priv(bond_dev);
 
-	bond_dev->neigh_setup           = slave_dev->neigh_setup;
-	bond_dev->header_ops		= slave_dev->header_ops;
+	bond_dev->header_ops	    = slave_dev->header_ops;
 
 	bond_dev->type		    = slave_dev->type;
 	bond_dev->hard_header_len   = slave_dev->hard_header_len;
@@ -4124,6 +4122,20 @@ static void bond_set_multicast_list(struct net_device *bond_dev)
 	read_unlock(&bond->lock);
 }
 
+static int bond_neigh_setup(struct net_device *dev, struct neigh_parms *parms)
+{
+	struct bonding *bond = netdev_priv(dev);
+	struct slave *slave = bond->first_slave;
+
+	if (slave) {
+		const struct net_device_ops *slave_ops
+			= slave->dev->netdev_ops;
+		if (slave_ops->ndo_neigh_setup)
+			return slave_ops->ndo_neigh_setup(dev, parms);
+	}
+	return 0;
+}
+
 /*
  * Change the MTU of all of a master's slaves to match the master
  */
@@ -4490,6 +4502,35 @@ static void bond_set_xmit_hash_policy(struct bonding *bond)
 	}
 }
 
+static int bond_start_xmit(struct sk_buff *skb, struct net_device *dev)
+{
+	const struct bonding *bond = netdev_priv(dev);
+
+	switch (bond->params.mode) {
+	case BOND_MODE_ROUNDROBIN:
+		return bond_xmit_roundrobin(skb, dev);
+	case BOND_MODE_ACTIVEBACKUP:
+		return bond_xmit_activebackup(skb, dev);
+	case BOND_MODE_XOR:
+		return bond_xmit_xor(skb, dev);
+	case BOND_MODE_BROADCAST:
+		return bond_xmit_broadcast(skb, dev);
+	case BOND_MODE_8023AD:
+		return bond_3ad_xmit_xor(skb, dev);
+	case BOND_MODE_ALB:
+	case BOND_MODE_TLB:
+		return bond_alb_xmit(skb, dev);
+	default:
+		/* Should never happen, mode already checked */
+		printk(KERN_ERR DRV_NAME ": %s: Error: Unknown bonding mode %d\n",
+		     dev->name, bond->params.mode);
+		WARN_ON_ONCE(1);
+		dev_kfree_skb(skb);
+		return NETDEV_TX_OK;
+	}
+}
+
+
 /*
  * set bond mode specific net device operations
  */
@@ -4499,28 +4540,22 @@ void bond_set_mode_ops(struct bonding *bond, int mode)
 
 	switch (mode) {
 	case BOND_MODE_ROUNDROBIN:
-		bond_dev->hard_start_xmit = bond_xmit_roundrobin;
 		break;
 	case BOND_MODE_ACTIVEBACKUP:
-		bond_dev->hard_start_xmit = bond_xmit_activebackup;
 		break;
 	case BOND_MODE_XOR:
-		bond_dev->hard_start_xmit = bond_xmit_xor;
 		bond_set_xmit_hash_policy(bond);
 		break;
 	case BOND_MODE_BROADCAST:
-		bond_dev->hard_start_xmit = bond_xmit_broadcast;
 		break;
 	case BOND_MODE_8023AD:
 		bond_set_master_3ad_flags(bond);
-		bond_dev->hard_start_xmit = bond_3ad_xmit_xor;
 		bond_set_xmit_hash_policy(bond);
 		break;
 	case BOND_MODE_ALB:
 		bond_set_master_alb_flags(bond);
 		/* FALLTHRU */
 	case BOND_MODE_TLB:
-		bond_dev->hard_start_xmit = bond_alb_xmit;
 		break;
 	default:
 		/* Should never happen, mode already checked */
@@ -4553,12 +4588,13 @@ static const struct ethtool_ops bond_ethtool_ops = {
 static const struct net_device_ops bond_netdev_ops = {
 	.ndo_open		= bond_open,
 	.ndo_stop		= bond_close,
+	.ndo_start_xmit		= bond_start_xmit,
 	.ndo_get_stats		= bond_get_stats,
 	.ndo_do_ioctl		= bond_do_ioctl,
 	.ndo_set_multicast_list	= bond_set_multicast_list,
 	.ndo_change_mtu		= bond_change_mtu,
-	.ndo_validate_addr	= NULL,
 	.ndo_set_mac_address 	= bond_set_mac_address,
+	.ndo_neigh_setup	= bond_neigh_setup,
 	.ndo_vlan_rx_register	= bond_vlan_rx_register,
 	.ndo_vlan_rx_add_vid 	= bond_vlan_rx_add_vid,
 	.ndo_vlan_rx_kill_vid	= bond_vlan_rx_kill_vid,
diff --git a/drivers/net/chelsio/cxgb2.c b/drivers/net/chelsio/cxgb2.c
index 482741797ebf..9b6011e7678e 100644
--- a/drivers/net/chelsio/cxgb2.c
+++ b/drivers/net/chelsio/cxgb2.c
@@ -915,7 +915,7 @@ static int t1_set_mac_addr(struct net_device *dev, void *p)
 }
 
 #if defined(CONFIG_VLAN_8021Q) || defined(CONFIG_VLAN_8021Q_MODULE)
-static void vlan_rx_register(struct net_device *dev,
+static void t1_vlan_rx_register(struct net_device *dev,
 				   struct vlan_group *grp)
 {
 	struct adapter *adapter = dev->ml_priv;
@@ -1013,6 +1013,7 @@ void t1_fatal_err(struct adapter *adapter)
 static const struct net_device_ops cxgb_netdev_ops = {
 	.ndo_open		= cxgb_open,
 	.ndo_stop		= cxgb_close,
+	.ndo_start_xmit		= t1_start_xmit,
 	.ndo_get_stats		= t1_get_stats,
 	.ndo_validate_addr	= eth_validate_addr,
 	.ndo_set_multicast_list	= t1_set_rxmode,
@@ -1020,7 +1021,7 @@ static const struct net_device_ops cxgb_netdev_ops = {
 	.ndo_change_mtu		= t1_change_mtu,
 	.ndo_set_mac_address	= t1_set_mac_addr,
 #if defined(CONFIG_VLAN_8021Q) || defined(CONFIG_VLAN_8021Q_MODULE)
-	.ndo_vlan_rx_register	= vlan_rx_register,
+	.ndo_vlan_rx_register	= t1_vlan_rx_register,
 #endif
 #ifdef CONFIG_NET_POLL_CONTROLLER
 	.ndo_poll_controller	= t1_netpoll,
@@ -1157,7 +1158,6 @@ static int __devinit init_one(struct pci_dev *pdev,
 		}
 
 		netdev->netdev_ops = &cxgb_netdev_ops;
-		netdev->hard_start_xmit = t1_start_xmit;
 		netdev->hard_header_len += (adapter->flags & TSO_CAPABLE) ?
 			sizeof(struct cpl_tx_pkt_lso) : sizeof(struct cpl_tx_pkt);
 
diff --git a/drivers/net/cxgb3/cxgb3_main.c b/drivers/net/cxgb3/cxgb3_main.c
index a9479be53ec3..cd9fcaca70f3 100644
--- a/drivers/net/cxgb3/cxgb3_main.c
+++ b/drivers/net/cxgb3/cxgb3_main.c
@@ -2955,7 +2955,6 @@ static int __devinit init_one(struct pci_dev *pdev,
 
 		netdev->features |= NETIF_F_HW_VLAN_TX | NETIF_F_HW_VLAN_RX;
 		netdev->netdev_ops = &cxgb_netdev_ops;
-		netdev->hard_start_xmit = t3_eth_xmit;
 		SET_ETHTOOL_OPS(netdev, &cxgb_ethtool_ops);
 	}
 
diff --git a/drivers/net/e100.c b/drivers/net/e100.c
index 5894716de19f..2001a63794f5 100644
--- a/drivers/net/e100.c
+++ b/drivers/net/e100.c
@@ -2615,6 +2615,7 @@ static int e100_close(struct net_device *netdev)
 static const struct net_device_ops e100_netdev_ops = {
 	.ndo_open		= e100_open,
 	.ndo_stop		= e100_close,
+	.ndo_start_xmit		= e100_xmit_frame,
 	.ndo_validate_addr	= eth_validate_addr,
 	.ndo_set_multicast_list	= e100_set_multicast_list,
 	.ndo_set_mac_address	= e100_set_mac_address,
@@ -2640,7 +2641,6 @@ static int __devinit e100_probe(struct pci_dev *pdev,
 	}
 
 	netdev->netdev_ops = &e100_netdev_ops;
-	netdev->hard_start_xmit = e100_xmit_frame;
 	SET_ETHTOOL_OPS(netdev, &e100_ethtool_ops);
 	netdev->watchdog_timeo = E100_WATCHDOG_PERIOD;
 	strncpy(netdev->name, pci_name(pdev), sizeof(netdev->name) - 1);
diff --git a/drivers/net/e1000/e1000_main.c b/drivers/net/e1000/e1000_main.c
index debbba390d40..5c098c9d584e 100644
--- a/drivers/net/e1000/e1000_main.c
+++ b/drivers/net/e1000/e1000_main.c
@@ -891,6 +891,7 @@ static int e1000_is_need_ioport(struct pci_dev *pdev)
 static const struct net_device_ops e1000_netdev_ops = {
 	.ndo_open		= e1000_open,
 	.ndo_stop		= e1000_close,
+	.ndo_start_xmit		= e1000_xmit_frame,
 	.ndo_get_stats		= e1000_get_stats,
 	.ndo_set_rx_mode	= e1000_set_rx_mode,
 	.ndo_set_mac_address	= e1000_set_mac,
@@ -1001,7 +1002,6 @@ static int __devinit e1000_probe(struct pci_dev *pdev,
 	}
 
 	netdev->netdev_ops = &e1000_netdev_ops;
-	netdev->hard_start_xmit = &e1000_xmit_frame;
 	e1000_set_ethtool_ops(netdev);
 	netdev->watchdog_timeo = 5 * HZ;
 	netif_napi_add(netdev, &adapter->napi, e1000_clean, 64);
diff --git a/drivers/net/e1000e/netdev.c b/drivers/net/e1000e/netdev.c
index ced839e4cae8..cc0502bbb9ff 100644
--- a/drivers/net/e1000e/netdev.c
+++ b/drivers/net/e1000e/netdev.c
@@ -4707,6 +4707,7 @@ static void e1000_eeprom_checks(struct e1000_adapter *adapter)
 static const struct net_device_ops e1000e_netdev_ops = {
 	.ndo_open		= e1000_open,
 	.ndo_stop		= e1000_close,
+	.ndo_start_xmit		= e1000_xmit_frame,
 	.ndo_get_stats		= e1000_get_stats,
 	.ndo_set_multicast_list	= e1000_set_multi,
 	.ndo_set_mac_address	= e1000_set_mac,
@@ -4822,7 +4823,6 @@ static int __devinit e1000_probe(struct pci_dev *pdev,
 
 	/* construct the net_device struct */
 	netdev->netdev_ops		= &e1000e_netdev_ops;
-	netdev->hard_start_xmit		= &e1000_xmit_frame;
 	e1000e_set_ethtool_ops(netdev);
 	netdev->watchdog_timeo		= 5 * HZ;
 	netif_napi_add(netdev, &adapter->napi, e1000_clean, 64);
diff --git a/drivers/net/enic/enic_main.c b/drivers/net/enic/enic_main.c
index 40f8c88b166d..1c409df735d4 100644
--- a/drivers/net/enic/enic_main.c
+++ b/drivers/net/enic/enic_main.c
@@ -1593,6 +1593,7 @@ static void enic_iounmap(struct enic *enic)
 static const struct net_device_ops enic_netdev_ops = {
 	.ndo_open		= enic_open,
 	.ndo_stop		= enic_stop,
+	.ndo_start_xmit		= enic_hard_start_xmit,
 	.ndo_get_stats		= enic_get_stats,
 	.ndo_validate_addr	= eth_validate_addr,
 	.ndo_set_multicast_list	= enic_set_multicast_list,
@@ -1830,7 +1831,6 @@ static int __devinit enic_probe(struct pci_dev *pdev,
 	}
 
 	netdev->netdev_ops = &enic_netdev_ops;
-	netdev->hard_start_xmit = enic_hard_start_xmit;
 	netdev->watchdog_timeo = 2 * HZ;
 	netdev->ethtool_ops = &enic_ethtool_ops;
 
diff --git a/drivers/net/forcedeth.c b/drivers/net/forcedeth.c
index dd2e1f670b0d..0d7e5750245a 100644
--- a/drivers/net/forcedeth.c
+++ b/drivers/net/forcedeth.c
@@ -5412,6 +5412,23 @@ static const struct net_device_ops nv_netdev_ops = {
 	.ndo_open		= nv_open,
 	.ndo_stop		= nv_close,
 	.ndo_get_stats		= nv_get_stats,
+	.ndo_start_xmit		= nv_start_xmit,
+	.ndo_tx_timeout		= nv_tx_timeout,
+	.ndo_change_mtu		= nv_change_mtu,
+	.ndo_validate_addr	= eth_validate_addr,
+	.ndo_set_mac_address	= nv_set_mac_address,
+	.ndo_set_multicast_list	= nv_set_multicast,
+	.ndo_vlan_rx_register	= nv_vlan_rx_register,
+#ifdef CONFIG_NET_POLL_CONTROLLER
+	.ndo_poll_controller	= nv_poll_controller,
+#endif
+};
+
+static const struct net_device_ops nv_netdev_ops_optimized = {
+	.ndo_open		= nv_open,
+	.ndo_stop		= nv_close,
+	.ndo_get_stats		= nv_get_stats,
+	.ndo_start_xmit		= nv_start_xmit_optimized,
 	.ndo_tx_timeout		= nv_tx_timeout,
 	.ndo_change_mtu		= nv_change_mtu,
 	.ndo_validate_addr	= eth_validate_addr,
@@ -5592,11 +5609,10 @@ static int __devinit nv_probe(struct pci_dev *pci_dev, const struct pci_device_i
 		goto out_freering;
 
 	if (!nv_optimized(np))
-		dev->hard_start_xmit = nv_start_xmit;
+		dev->netdev_ops = &nv_netdev_ops;
 	else
-		dev->hard_start_xmit = nv_start_xmit_optimized;
+		dev->netdev_ops = &nv_netdev_ops_optimized;
 
-	dev->netdev_ops = &nv_netdev_ops;
 #ifdef CONFIG_FORCEDETH_NAPI
 	netif_napi_add(dev, &np->napi, nv_napi_poll, RX_WORK_PER_LOOP);
 #endif
diff --git a/drivers/net/ifb.c b/drivers/net/ifb.c
index 363a166df8fb..60a263001933 100644
--- a/drivers/net/ifb.c
+++ b/drivers/net/ifb.c
@@ -138,15 +138,15 @@ resched:
 }
 
 static const struct net_device_ops ifb_netdev_ops = {
-	.ndo_validate_addr = eth_validate_addr,
 	.ndo_open	= ifb_open,
 	.ndo_stop	= ifb_close,
+	.ndo_start_xmit	= ifb_xmit,
+	.ndo_validate_addr = eth_validate_addr,
 };
 
 static void ifb_setup(struct net_device *dev)
 {
 	/* Initialize the device structure. */
-	dev->hard_start_xmit = ifb_xmit;
 	dev->destructor = free_netdev;
 	dev->netdev_ops = &ifb_netdev_ops;
 
diff --git a/drivers/net/igb/igb_main.c b/drivers/net/igb/igb_main.c
index ceb0a0458796..eca5684d5655 100644
--- a/drivers/net/igb/igb_main.c
+++ b/drivers/net/igb/igb_main.c
@@ -953,6 +953,7 @@ static int igb_is_need_ioport(struct pci_dev *pdev)
 static const struct net_device_ops igb_netdev_ops = {
 	.ndo_open 		= igb_open,
 	.ndo_stop		= igb_close,
+	.ndo_start_xmit		= igb_xmit_frame_adv,
 	.ndo_get_stats		= igb_get_stats,
 	.ndo_set_multicast_list	= igb_set_multi,
 	.ndo_set_mac_address	= igb_set_mac,
@@ -1080,7 +1081,6 @@ static int __devinit igb_probe(struct pci_dev *pdev,
 	netdev->netdev_ops = &igb_netdev_ops;
 	igb_set_ethtool_ops(netdev);
 	netdev->watchdog_timeo = 5 * HZ;
-	netdev->hard_start_xmit = &igb_xmit_frame_adv;
 
 	strncpy(netdev->name, pci_name(pdev), sizeof(netdev->name) - 1);
 
diff --git a/drivers/net/ixgb/ixgb_main.c b/drivers/net/ixgb/ixgb_main.c
index 3ca9daa70b38..a04e3892ddf4 100644
--- a/drivers/net/ixgb/ixgb_main.c
+++ b/drivers/net/ixgb/ixgb_main.c
@@ -324,6 +324,7 @@ ixgb_reset(struct ixgb_adapter *adapter)
 static const struct net_device_ops ixgb_netdev_ops = {
 	.ndo_open 		= ixgb_open,
 	.ndo_stop		= ixgb_close,
+	.ndo_start_xmit		= ixgb_xmit_frame,
 	.ndo_get_stats		= ixgb_get_stats,
 	.ndo_set_multicast_list	= ixgb_set_multi,
 	.ndo_validate_addr	= eth_validate_addr,
@@ -414,7 +415,6 @@ ixgb_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
 	}
 
 	netdev->netdev_ops = &ixgb_netdev_ops;
-	netdev->hard_start_xmit = &ixgb_xmit_frame;
 	ixgb_set_ethtool_ops(netdev);
 	netdev->watchdog_timeo = 5 * HZ;
 	netif_napi_add(netdev, &adapter->napi, ixgb_clean, 64);
diff --git a/drivers/net/ixgbe/ixgbe_main.c b/drivers/net/ixgbe/ixgbe_main.c
index 7ad07a00680a..40108523377f 100644
--- a/drivers/net/ixgbe/ixgbe_main.c
+++ b/drivers/net/ixgbe/ixgbe_main.c
@@ -3728,6 +3728,7 @@ static int ixgbe_link_config(struct ixgbe_hw *hw)
 static const struct net_device_ops ixgbe_netdev_ops = {
 	.ndo_open 		= ixgbe_open,
 	.ndo_stop		= ixgbe_close,
+	.ndo_start_xmit		= ixgbe_xmit_frame,
 	.ndo_get_stats		= ixgbe_get_stats,
 	.ndo_set_multicast_list	= ixgbe_set_rx_mode,
 	.ndo_validate_addr	= eth_validate_addr,
@@ -3824,7 +3825,6 @@ static int __devinit ixgbe_probe(struct pci_dev *pdev,
 	}
 
 	netdev->netdev_ops = &ixgbe_netdev_ops;
-	netdev->hard_start_xmit = &ixgbe_xmit_frame;
 	ixgbe_set_ethtool_ops(netdev);
 	netdev->watchdog_timeo = 5 * HZ;
 	strcpy(netdev->name, pci_name(pdev));
diff --git a/drivers/net/loopback.c b/drivers/net/loopback.c
index 958450124dec..b7d438a367f3 100644
--- a/drivers/net/loopback.c
+++ b/drivers/net/loopback.c
@@ -145,6 +145,7 @@ static void loopback_dev_free(struct net_device *dev)
 
 static const struct net_device_ops loopback_ops = {
 	.ndo_init      = loopback_dev_init,
+	.ndo_start_xmit= loopback_xmit,
 	.ndo_get_stats = loopback_get_stats,
 };
 
@@ -155,7 +156,6 @@ static const struct net_device_ops loopback_ops = {
 static void loopback_setup(struct net_device *dev)
 {
 	dev->mtu		= (16 * 1024) + 20 + 20 + 12;
-	dev->hard_start_xmit	= loopback_xmit;
 	dev->hard_header_len	= ETH_HLEN;	/* 14	*/
 	dev->addr_len		= ETH_ALEN;	/* 6	*/
 	dev->tx_queue_len	= 0;
diff --git a/drivers/net/macvlan.c b/drivers/net/macvlan.c
index d00ea444e0a3..e8879217a1d2 100644
--- a/drivers/net/macvlan.c
+++ b/drivers/net/macvlan.c
@@ -140,7 +140,7 @@ static struct sk_buff *macvlan_handle_frame(struct sk_buff *skb)
 	return NULL;
 }
 
-static int macvlan_hard_start_xmit(struct sk_buff *skb, struct net_device *dev)
+static int macvlan_start_xmit(struct sk_buff *skb, struct net_device *dev)
 {
 	const struct macvlan_dev *vlan = netdev_priv(dev);
 	unsigned int len = skb->len;
@@ -365,6 +365,7 @@ static const struct net_device_ops macvlan_netdev_ops = {
 	.ndo_init		= macvlan_init,
 	.ndo_open		= macvlan_open,
 	.ndo_stop		= macvlan_stop,
+	.ndo_start_xmit		= macvlan_start_xmit,
 	.ndo_change_mtu		= macvlan_change_mtu,
 	.ndo_change_rx_flags	= macvlan_change_rx_flags,
 	.ndo_set_mac_address	= macvlan_set_mac_address,
@@ -377,7 +378,6 @@ static void macvlan_setup(struct net_device *dev)
 	ether_setup(dev);
 
 	dev->netdev_ops		= &macvlan_netdev_ops;
-	dev->hard_start_xmit	= macvlan_hard_start_xmit;
 	dev->destructor		= free_netdev;
 	dev->header_ops		= &macvlan_hard_header_ops,
 	dev->ethtool_ops	= &macvlan_ethtool_ops;
diff --git a/drivers/net/niu.c b/drivers/net/niu.c
index 318537efd583..a8d10630f804 100644
--- a/drivers/net/niu.c
+++ b/drivers/net/niu.c
@@ -8892,6 +8892,7 @@ static struct net_device * __devinit niu_alloc_and_init(
 static const struct net_device_ops niu_netdev_ops = {
 	.ndo_open		= niu_open,
 	.ndo_stop		= niu_close,
+	.ndo_start_xmit		= niu_start_xmit,
 	.ndo_get_stats		= niu_get_stats,
 	.ndo_set_multicast_list	= niu_set_rx_mode,
 	.ndo_validate_addr	= eth_validate_addr,
@@ -8904,7 +8905,6 @@ static const struct net_device_ops niu_netdev_ops = {
 static void __devinit niu_assign_netdev_ops(struct net_device *dev)
 {
 	dev->netdev_ops = &niu_netdev_ops;
-	dev->hard_start_xmit = niu_start_xmit;
 	dev->ethtool_ops = &niu_ethtool_ops;
 	dev->watchdog_timeo = NIU_TX_TIMEOUT;
 }
diff --git a/drivers/net/ppp_generic.c b/drivers/net/ppp_generic.c
index bad99e8cac33..1b15a088a3ba 100644
--- a/drivers/net/ppp_generic.c
+++ b/drivers/net/ppp_generic.c
@@ -972,7 +972,8 @@ ppp_net_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
 }
 
 static const struct net_device_ops ppp_netdev_ops = {
-	.ndo_do_ioctl = ppp_net_ioctl,
+	.ndo_start_xmit = ppp_start_xmit,
+	.ndo_do_ioctl   = ppp_net_ioctl,
 };
 
 static void ppp_setup(struct net_device *dev)
@@ -2437,8 +2438,6 @@ ppp_create_interface(int unit, int *retp)
 	skb_queue_head_init(&ppp->mrq);
 #endif /* CONFIG_PPP_MULTILINK */
 
-	dev->hard_start_xmit = ppp_start_xmit;
-
 	ret = -EEXIST;
 	mutex_lock(&all_ppp_mutex);
 	if (unit < 0)
diff --git a/drivers/net/r8169.c b/drivers/net/r8169.c
index bac58ca628dd..dddf6aeff498 100644
--- a/drivers/net/r8169.c
+++ b/drivers/net/r8169.c
@@ -1927,6 +1927,7 @@ static const struct net_device_ops rtl8169_netdev_ops = {
 	.ndo_open		= rtl8169_open,
 	.ndo_stop		= rtl8169_close,
 	.ndo_get_stats		= rtl8169_get_stats,
+	.ndo_start_xmit		= rtl8169_start_xmit,
 	.ndo_tx_timeout		= rtl8169_tx_timeout,
 	.ndo_validate_addr	= eth_validate_addr,
 	.ndo_change_mtu		= rtl8169_change_mtu,
@@ -2125,7 +2126,6 @@ rtl8169_init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
 		dev->dev_addr[i] = RTL_R8(MAC0 + i);
 	memcpy(dev->perm_addr, dev->dev_addr, dev->addr_len);
 
-	dev->hard_start_xmit = rtl8169_start_xmit;
 	SET_ETHTOOL_OPS(dev, &rtl8169_ethtool_ops);
 	dev->watchdog_timeo = RTL8169_TX_TIMEOUT;
 	dev->irq = pdev->irq;
diff --git a/drivers/net/skge.c b/drivers/net/skge.c
index 93c1b1d92962..f73ee7974003 100644
--- a/drivers/net/skge.c
+++ b/drivers/net/skge.c
@@ -3805,6 +3805,7 @@ static __exit void skge_debug_cleanup(void)
 static const struct net_device_ops skge_netdev_ops = {
 	.ndo_open		= skge_up,
 	.ndo_stop		= skge_down,
+	.ndo_start_xmit		= skge_xmit_frame,
 	.ndo_do_ioctl		= skge_ioctl,
 	.ndo_get_stats		= skge_get_stats,
 	.ndo_tx_timeout		= skge_tx_timeout,
@@ -3831,7 +3832,6 @@ static struct net_device *skge_devinit(struct skge_hw *hw, int port,
 	}
 
 	SET_NETDEV_DEV(dev, &hw->pdev->dev);
-	dev->hard_start_xmit = skge_xmit_frame;
 	dev->netdev_ops = &skge_netdev_ops;
 	dev->ethtool_ops = &skge_ethtool_ops;
 	dev->watchdog_timeo = TX_WATCHDOG;
diff --git a/drivers/net/sky2.c b/drivers/net/sky2.c
index 251505125cb8..3668e81e474d 100644
--- a/drivers/net/sky2.c
+++ b/drivers/net/sky2.c
@@ -4047,6 +4047,7 @@ static const struct net_device_ops sky2_netdev_ops[2] = {
   {
 	.ndo_open		= sky2_up,
 	.ndo_stop		= sky2_down,
+	.ndo_start_xmit		= sky2_xmit_frame,
 	.ndo_do_ioctl		= sky2_ioctl,
 	.ndo_validate_addr	= eth_validate_addr,
 	.ndo_set_mac_address	= sky2_set_mac_address,
@@ -4063,6 +4064,7 @@ static const struct net_device_ops sky2_netdev_ops[2] = {
   {
 	.ndo_open		= sky2_up,
 	.ndo_stop		= sky2_down,
+	.ndo_start_xmit		= sky2_xmit_frame,
 	.ndo_do_ioctl		= sky2_ioctl,
 	.ndo_validate_addr	= eth_validate_addr,
 	.ndo_set_mac_address	= sky2_set_mac_address,
@@ -4090,7 +4092,6 @@ static __devinit struct net_device *sky2_init_netdev(struct sky2_hw *hw,
 
 	SET_NETDEV_DEV(dev, &hw->pdev->dev);
 	dev->irq = hw->pdev->irq;
-	dev->hard_start_xmit = sky2_xmit_frame;
 	SET_ETHTOOL_OPS(dev, &sky2_ethtool_ops);
 	dev->watchdog_timeo = TX_WATCHDOG;
 	dev->netdev_ops = &sky2_netdev_ops[port];
diff --git a/drivers/net/tg3.c b/drivers/net/tg3.c
index 4b97cb601361..9ba18e1bc341 100644
--- a/drivers/net/tg3.c
+++ b/drivers/net/tg3.c
@@ -12614,19 +12614,6 @@ static int __devinit tg3_get_invariants(struct tg3 *tp)
 	else
 		tp->tg3_flags &= ~TG3_FLAG_POLL_SERDES;
 
-	/* All chips before 5787 can get confused if TX buffers
-	 * straddle the 4GB address boundary in some cases.
-	 */
-	if (GET_ASIC_REV(tp->pci_chip_rev_id) == ASIC_REV_5755 ||
-	    GET_ASIC_REV(tp->pci_chip_rev_id) == ASIC_REV_5787 ||
-	    GET_ASIC_REV(tp->pci_chip_rev_id) == ASIC_REV_5784 ||
-	    GET_ASIC_REV(tp->pci_chip_rev_id) == ASIC_REV_5761 ||
-	    GET_ASIC_REV(tp->pci_chip_rev_id) == ASIC_REV_5785 ||
-	    GET_ASIC_REV(tp->pci_chip_rev_id) == ASIC_REV_5906)
-		tp->dev->hard_start_xmit = tg3_start_xmit;
-	else
-		tp->dev->hard_start_xmit = tg3_start_xmit_dma_bug;
-
 	tp->rx_offset = 2;
 	if (GET_ASIC_REV(tp->pci_chip_rev_id) == ASIC_REV_5701 &&
 	    (tp->tg3_flags & TG3_FLAG_PCIX_MODE) != 0)
@@ -13346,6 +13333,26 @@ static void __devinit tg3_init_coal(struct tg3 *tp)
 static const struct net_device_ops tg3_netdev_ops = {
 	.ndo_open		= tg3_open,
 	.ndo_stop		= tg3_close,
+	.ndo_start_xmit		= tg3_start_xmit,
+	.ndo_get_stats		= tg3_get_stats,
+	.ndo_validate_addr	= eth_validate_addr,
+	.ndo_set_multicast_list	= tg3_set_rx_mode,
+	.ndo_set_mac_address	= tg3_set_mac_addr,
+	.ndo_do_ioctl		= tg3_ioctl,
+	.ndo_tx_timeout		= tg3_tx_timeout,
+	.ndo_change_mtu		= tg3_change_mtu,
+#if TG3_VLAN_TAG_USED
+	.ndo_vlan_rx_register	= tg3_vlan_rx_register,
+#endif
+#ifdef CONFIG_NET_POLL_CONTROLLER
+	.ndo_poll_controller	= tg3_poll_controller,
+#endif
+};
+
+static const struct net_device_ops tg3_netdev_ops_dma_bug = {
+	.ndo_open		= tg3_open,
+	.ndo_stop		= tg3_close,
+	.ndo_start_xmit		= tg3_start_xmit_dma_bug,
 	.ndo_get_stats		= tg3_get_stats,
 	.ndo_validate_addr	= eth_validate_addr,
 	.ndo_set_multicast_list	= tg3_set_rx_mode,
@@ -13475,7 +13482,6 @@ static int __devinit tg3_init_one(struct pci_dev *pdev,
 	tp->rx_jumbo_pending = TG3_DEF_RX_JUMBO_RING_PENDING;
 	tp->tx_pending = TG3_DEF_TX_RING_PENDING;
 
-	dev->netdev_ops = &tg3_netdev_ops;
 	netif_napi_add(dev, &tp->napi, tg3_poll, 64);
 	dev->ethtool_ops = &tg3_ethtool_ops;
 	dev->watchdog_timeo = TG3_TX_TIMEOUT;
@@ -13488,6 +13494,17 @@ static int __devinit tg3_init_one(struct pci_dev *pdev,
 		goto err_out_iounmap;
 	}
 
+	if (GET_ASIC_REV(tp->pci_chip_rev_id) == ASIC_REV_5755 ||
+	    GET_ASIC_REV(tp->pci_chip_rev_id) == ASIC_REV_5787 ||
+	    GET_ASIC_REV(tp->pci_chip_rev_id) == ASIC_REV_5784 ||
+	    GET_ASIC_REV(tp->pci_chip_rev_id) == ASIC_REV_5761 ||
+	    GET_ASIC_REV(tp->pci_chip_rev_id) == ASIC_REV_5785 ||
+	    GET_ASIC_REV(tp->pci_chip_rev_id) == ASIC_REV_5906)
+		dev->netdev_ops = &tg3_netdev_ops;
+	else
+		dev->netdev_ops = &tg3_netdev_ops_dma_bug;
+
+
 	/* The EPB bridge inside 5714, 5715, and 5780 and any
 	 * device behind the EPB cannot support DMA addresses > 40-bit.
 	 * On 64-bit systems with IOMMU, use 40-bit dma_mask.
diff --git a/drivers/net/tun.c b/drivers/net/tun.c
index b4c941444756..fd0b11ea5562 100644
--- a/drivers/net/tun.c
+++ b/drivers/net/tun.c
@@ -308,13 +308,14 @@ tun_net_change_mtu(struct net_device *dev, int new_mtu)
 static const struct net_device_ops tun_netdev_ops = {
 	.ndo_open		= tun_net_open,
 	.ndo_stop		= tun_net_close,
+	.ndo_start_xmit		= tun_net_xmit,
 	.ndo_change_mtu		= tun_net_change_mtu,
-
 };
 
 static const struct net_device_ops tap_netdev_ops = {
 	.ndo_open		= tun_net_open,
 	.ndo_stop		= tun_net_close,
+	.ndo_start_xmit		= tun_net_xmit,
 	.ndo_change_mtu		= tun_net_change_mtu,
 	.ndo_set_multicast_list	= tun_net_mclist,
 	.ndo_set_mac_address	= eth_mac_addr,
@@ -691,7 +692,6 @@ static void tun_setup(struct net_device *dev)
 	tun->owner = -1;
 	tun->group = -1;
 
-	dev->hard_start_xmit = tun_net_xmit;
 	dev->ethtool_ops = &tun_ethtool_ops;
 	dev->destructor = free_netdev;
 	dev->features |= NETIF_F_NETNS_LOCAL;
diff --git a/drivers/net/veth.c b/drivers/net/veth.c
index 4f93a55aaaa5..852d0e7c4e62 100644
--- a/drivers/net/veth.c
+++ b/drivers/net/veth.c
@@ -265,6 +265,7 @@ static void veth_dev_free(struct net_device *dev)
 static const struct net_device_ops veth_netdev_ops = {
 	.ndo_init	= veth_dev_init,
 	.ndo_open	= veth_open,
+	.ndo_start_xmit = veth_xmit,
 	.ndo_get_stats	= veth_get_stats,
 };
 
@@ -273,7 +274,6 @@ static void veth_setup(struct net_device *dev)
 	ether_setup(dev);
 
 	dev->netdev_ops = &veth_netdev_ops;
-	dev->hard_start_xmit = veth_xmit;
 	dev->ethtool_ops = &veth_ethtool_ops;
 	dev->features |= NETIF_F_LLTX;
 	dev->destructor = veth_dev_free;
diff --git a/drivers/net/via-velocity.c b/drivers/net/via-velocity.c
index 033e63a68436..58e25d090ae0 100644
--- a/drivers/net/via-velocity.c
+++ b/drivers/net/via-velocity.c
@@ -852,6 +852,7 @@ static int velocity_soft_reset(struct velocity_info *vptr)
 static const struct net_device_ops velocity_netdev_ops = {
 	.ndo_open		= velocity_open,
 	.ndo_stop		= velocity_close,
+	.ndo_start_xmit		= velocity_xmit,
 	.ndo_get_stats		= velocity_get_stats,
 	.ndo_validate_addr	= eth_validate_addr,
 	.ndo_set_multicast_list	= velocity_set_multi,
@@ -971,7 +972,6 @@ static int __devinit velocity_found1(struct pci_dev *pdev, const struct pci_devi
 	vptr->phy_id = MII_GET_PHY_ID(vptr->mac_regs);
 
 	dev->irq = pdev->irq;
-	dev->hard_start_xmit = velocity_xmit;
 	dev->netdev_ops = &velocity_netdev_ops;
 	dev->ethtool_ops = &velocity_ethtool_ops;
 
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 981a089d5149..d8fb23679ee3 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -454,8 +454,8 @@ struct netdev_queue {
 
 /*
  * This structure defines the management hooks for network devices.
- * The following hooks can bed defined and are optonal (can be null)
- * unless otherwise noted.
+ * The following hooks can be defined; unless noted otherwise, they are
+ * optional and can be filled with a null pointer.
  *
  * int (*ndo_init)(struct net_device *dev);
  *     This function is called once when network device is registered.
@@ -475,6 +475,15 @@ struct netdev_queue {
  *     This function is called when network device transistions to the down
  *     state.
  *
+ * int (*ndo_hard_start_xmit)(struct sk_buff *skb, struct net_device *dev);
+ *	Called when a packet needs to be transmitted.
+ *	Must return NETDEV_TX_OK , NETDEV_TX_BUSY, or NETDEV_TX_LOCKED,
+ *	Required can not be NULL.
+ *
+ * u16 (*ndo_select_queue)(struct net_device *dev, struct sk_buff *skb);
+ *	Called to decide which queue to when device supports multiple
+ *	transmit queues.
+ *
  * void (*ndo_change_rx_flags)(struct net_device *dev, int flags);
  *	This function is called to allow device receiver to make
  *	changes to configuration when multicast or promiscious is enabled.
@@ -508,7 +517,7 @@ struct netdev_queue {
  *	of a device. If not defined, any request to change MTU will
  *	will return an error.
  *
- * void (*ndo_tx_timeout) (struct net_device *dev);
+ * void (*ndo_tx_timeout)(struct net_device *dev);
  *	Callback uses when the transmitter has not made any progress
  *	for dev->watchdog ticks.
  *
@@ -538,6 +547,10 @@ struct net_device_ops {
 	void			(*ndo_uninit)(struct net_device *dev);
 	int			(*ndo_open)(struct net_device *dev);
 	int			(*ndo_stop)(struct net_device *dev);
+	int			(*ndo_start_xmit) (struct sk_buff *skb,
+						   struct net_device *dev);
+	u16			(*ndo_select_queue)(struct net_device *dev,
+						    struct sk_buff *skb);
 #define HAVE_CHANGE_RX_FLAGS
 	void			(*ndo_change_rx_flags)(struct net_device *dev,
 						       int flags);
@@ -557,8 +570,10 @@ struct net_device_ops {
 	int			(*ndo_set_config)(struct net_device *dev,
 					          struct ifmap *map);
 #define HAVE_CHANGE_MTU
-	int			(*ndo_change_mtu)(struct net_device *dev, int new_mtu);
-
+	int			(*ndo_change_mtu)(struct net_device *dev,
+						  int new_mtu);
+	int			(*ndo_neigh_setup)(struct net_device *dev,
+						   struct neigh_parms *);
 #define HAVE_TX_TIMEOUT
 	void			(*ndo_tx_timeout) (struct net_device *dev);
 
@@ -761,18 +776,12 @@ struct net_device
 	/* Number of TX queues currently active in device  */
 	unsigned int		real_num_tx_queues;
 
-	/* Map buffer to appropriate transmit queue */
-	u16			(*select_queue)(struct net_device *dev,
-						struct sk_buff *skb);
-
 	unsigned long		tx_queue_len;	/* Max frames per queue allowed */
 	spinlock_t		tx_global_lock;
 /*
  * One part is mostly used on xmit path (device)
  */
 	void			*priv;	/* pointer to private data	*/
-	int			(*hard_start_xmit) (struct sk_buff *skb,
-						    struct net_device *dev);
 	/* These may be needed for future network-power-down code. */
 	unsigned long		trans_start;	/* Time (in jiffies) of last Tx	*/
 
@@ -800,8 +809,6 @@ struct net_device
 	/* Called from unregister, can be used to call free_netdev */
 	void (*destructor)(struct net_device *dev);
 
-	int (*neigh_setup)(struct net_device *dev, struct neigh_parms *);
-
 #ifdef CONFIG_NETPOLL
 	struct netpoll_info	*npinfo;
 #endif
@@ -842,6 +849,10 @@ struct net_device
 		void			(*uninit)(struct net_device *dev);
 		int			(*open)(struct net_device *dev);
 		int			(*stop)(struct net_device *dev);
+		int			(*hard_start_xmit) (struct sk_buff *skb,
+							    struct net_device *dev);
+		u16			(*select_queue)(struct net_device *dev,
+							struct sk_buff *skb);
 		void			(*change_rx_flags)(struct net_device *dev,
 							   int flags);
 		void			(*set_rx_mode)(struct net_device *dev);
@@ -854,6 +865,8 @@ struct net_device
 		int			(*set_config)(struct net_device *dev,
 						      struct ifmap *map);
 		int			(*change_mtu)(struct net_device *dev, int new_mtu);
+		int			(*neigh_setup)(struct net_device *dev,
+						       struct neigh_parms *);
 		void			(*tx_timeout) (struct net_device *dev);
 		struct net_device_stats* (*get_stats)(struct net_device *dev);
 		void			(*vlan_rx_register)(struct net_device *dev,
diff --git a/net/bridge/br_device.c b/net/bridge/br_device.c
index 920ce3348398..18538d7460d7 100644
--- a/net/bridge/br_device.c
+++ b/net/bridge/br_device.c
@@ -163,10 +163,11 @@ static const struct ethtool_ops br_ethtool_ops = {
 static const struct net_device_ops br_netdev_ops = {
 	.ndo_open		 = br_dev_open,
 	.ndo_stop		 = br_dev_stop,
-	.ndo_set_mac_address = br_set_mac_address,
-	.ndo_set_multicast_list = br_dev_set_multicast_list,
-	.ndo_change_mtu	 = br_change_mtu,
-	.ndo_do_ioctl	= br_dev_ioctl,
+	.ndo_start_xmit		 = br_dev_xmit,
+	.ndo_set_mac_address	 = br_set_mac_address,
+	.ndo_set_multicast_list	 = br_dev_set_multicast_list,
+	.ndo_change_mtu		 = br_change_mtu,
+	.ndo_do_ioctl		 = br_dev_ioctl,
 };
 
 void br_dev_setup(struct net_device *dev)
@@ -175,7 +176,6 @@ void br_dev_setup(struct net_device *dev)
 	ether_setup(dev);
 
 	dev->netdev_ops = &br_netdev_ops;
-	dev->hard_start_xmit = br_dev_xmit;
 	dev->destructor = free_netdev;
 	SET_ETHTOOL_OPS(dev, &br_ethtool_ops);
 	dev->tx_queue_len = 0;
diff --git a/net/bridge/br_if.c b/net/bridge/br_if.c
index ee3a8dd13f55..727c5c510a60 100644
--- a/net/bridge/br_if.c
+++ b/net/bridge/br_if.c
@@ -373,7 +373,7 @@ int br_add_if(struct net_bridge *br, struct net_device *dev)
 	if (dev->flags & IFF_LOOPBACK || dev->type != ARPHRD_ETHER)
 		return -EINVAL;
 
-	if (dev->hard_start_xmit == br_dev_xmit)
+	if (dev->netdev_ops->ndo_start_xmit == br_dev_xmit)
 		return -ELOOP;
 
 	if (dev->br_port != NULL)
diff --git a/net/core/dev.c b/net/core/dev.c
index 8843f4e3f5e1..4615e9a443aa 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -1660,6 +1660,9 @@ static int dev_gso_segment(struct sk_buff *skb)
 int dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev,
 			struct netdev_queue *txq)
 {
+	const struct net_device_ops *ops = dev->netdev_ops;
+
+	prefetch(&dev->netdev_ops->ndo_start_xmit);
 	if (likely(!skb->next)) {
 		if (!list_empty(&ptype_all))
 			dev_queue_xmit_nit(skb, dev);
@@ -1671,7 +1674,7 @@ int dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev,
 				goto gso;
 		}
 
-		return dev->hard_start_xmit(skb, dev);
+		return ops->ndo_start_xmit(skb, dev);
 	}
 
 gso:
@@ -1681,7 +1684,7 @@ gso:
 
 		skb->next = nskb->next;
 		nskb->next = NULL;
-		rc = dev->hard_start_xmit(nskb, dev);
+		rc = ops->ndo_start_xmit(nskb, dev);
 		if (unlikely(rc)) {
 			nskb->next = skb->next;
 			skb->next = nskb;
@@ -1755,10 +1758,11 @@ static u16 simple_tx_hash(struct net_device *dev, struct sk_buff *skb)
 static struct netdev_queue *dev_pick_tx(struct net_device *dev,
 					struct sk_buff *skb)
 {
+	const struct net_device_ops *ops = dev->netdev_ops;
 	u16 queue_index = 0;
 
-	if (dev->select_queue)
-		queue_index = dev->select_queue(dev, skb);
+	if (ops->ndo_select_queue)
+		queue_index = ops->ndo_select_queue(dev, skb);
 	else if (dev->real_num_tx_queues > 1)
 		queue_index = simple_tx_hash(dev, skb);
 
diff --git a/net/core/neighbour.c b/net/core/neighbour.c
index cca6a55909eb..9c3717a23cf7 100644
--- a/net/core/neighbour.c
+++ b/net/core/neighbour.c
@@ -1327,9 +1327,9 @@ struct neigh_parms *neigh_parms_alloc(struct net_device *dev,
 				      struct neigh_table *tbl)
 {
 	struct neigh_parms *p, *ref;
-	struct net *net;
+	struct net *net = dev_net(dev);
+	const struct net_device_ops *ops = dev->netdev_ops;
 
-	net = dev_net(dev);
 	ref = lookup_neigh_params(tbl, net, 0);
 	if (!ref)
 		return NULL;
@@ -1341,7 +1341,7 @@ struct neigh_parms *neigh_parms_alloc(struct net_device *dev,
 		p->reachable_time =
 				neigh_rand_reach_time(p->base_reachable_time);
 
-		if (dev->neigh_setup && dev->neigh_setup(dev, p)) {
+		if (ops->ndo_neigh_setup && ops->ndo_neigh_setup(dev, p)) {
 			kfree(p);
 			return NULL;
 		}
diff --git a/net/core/netpoll.c b/net/core/netpoll.c
index 630df6034444..96fb0519eb7a 100644
--- a/net/core/netpoll.c
+++ b/net/core/netpoll.c
@@ -58,6 +58,7 @@ static void queue_process(struct work_struct *work)
 
 	while ((skb = skb_dequeue(&npinfo->txq))) {
 		struct net_device *dev = skb->dev;
+		const struct net_device_ops *ops = dev->netdev_ops;
 		struct netdev_queue *txq;
 
 		if (!netif_device_present(dev) || !netif_running(dev)) {
@@ -71,7 +72,7 @@ static void queue_process(struct work_struct *work)
 		__netif_tx_lock(txq, smp_processor_id());
 		if (netif_tx_queue_stopped(txq) ||
 		    netif_tx_queue_frozen(txq) ||
-		    dev->hard_start_xmit(skb, dev) != NETDEV_TX_OK) {
+		    ops->ndo_start_xmit(skb, dev) != NETDEV_TX_OK) {
 			skb_queue_head(&npinfo->txq, skb);
 			__netif_tx_unlock(txq);
 			local_irq_restore(flags);
@@ -273,6 +274,7 @@ static void netpoll_send_skb(struct netpoll *np, struct sk_buff *skb)
 	int status = NETDEV_TX_BUSY;
 	unsigned long tries;
 	struct net_device *dev = np->dev;
+	const struct net_device_ops *ops = dev->netdev_ops;
 	struct netpoll_info *npinfo = np->dev->npinfo;
 
 	if (!npinfo || !netif_running(dev) || !netif_device_present(dev)) {
@@ -293,7 +295,7 @@ static void netpoll_send_skb(struct netpoll *np, struct sk_buff *skb)
 		     tries > 0; --tries) {
 			if (__netif_tx_trylock(txq)) {
 				if (!netif_tx_queue_stopped(txq))
-					status = dev->hard_start_xmit(skb, dev);
+					status = ops->ndo_start_xmit(skb, dev);
 				__netif_tx_unlock(txq);
 
 				if (status == NETDEV_TX_OK)
diff --git a/net/core/pktgen.c b/net/core/pktgen.c
index 4e77914c4d42..15e0c2c7aacf 100644
--- a/net/core/pktgen.c
+++ b/net/core/pktgen.c
@@ -3352,14 +3352,14 @@ static void pktgen_rem_thread(struct pktgen_thread *t)
 
 static __inline__ void pktgen_xmit(struct pktgen_dev *pkt_dev)
 {
-	struct net_device *odev = NULL;
+	struct net_device *odev = pkt_dev->odev;
+	int (*xmit)(struct sk_buff *, struct net_device *)
+		= odev->netdev_ops->ndo_start_xmit;
 	struct netdev_queue *txq;
 	__u64 idle_start = 0;
 	u16 queue_map;
 	int ret;
 
-	odev = pkt_dev->odev;
-
 	if (pkt_dev->delay_us || pkt_dev->delay_ns) {
 		u64 now;
 
@@ -3440,7 +3440,7 @@ static __inline__ void pktgen_xmit(struct pktgen_dev *pkt_dev)
 
 		atomic_inc(&(pkt_dev->skb->users));
 	      retry_now:
-		ret = odev->hard_start_xmit(pkt_dev->skb, odev);
+		ret = (*xmit)(pkt_dev->skb, odev);
 		if (likely(ret == NETDEV_TX_OK)) {
 			pkt_dev->last_ok = 1;
 			pkt_dev->sofar++;
-- 
cgit v1.2.3


From 145186a39570244aead77dc2efc559e5cac90548 Mon Sep 17 00:00:00 2001
From: Stephen Hemminger <shemminger@vyatta.com>
Date: Thu, 20 Nov 2008 20:29:48 -0800
Subject: fddi: convert to new network device ops

Similar to ethernet. Convert infrastructure and the one lone FDDI
driver (for the one lone user of that hardware??). Compile tested only.

Signed-off-by: Stephen Hemminger <shemminger@vyatta.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/skfp/skfddi.c  | 19 ++++++++++++-------
 include/linux/fddidevice.h |  1 +
 net/802/fddi.c             |  8 ++++++--
 3 files changed, 19 insertions(+), 9 deletions(-)

(limited to 'include')

diff --git a/drivers/net/skfp/skfddi.c b/drivers/net/skfp/skfddi.c
index 282bb47deccc..7fbd4f84f272 100644
--- a/drivers/net/skfp/skfddi.c
+++ b/drivers/net/skfp/skfddi.c
@@ -168,6 +168,17 @@ static int num_boards;	/* total number of adapters configured */
 #define PRINTK(s, args...)
 #endif				// DRIVERDEBUG
 
+static const struct net_device_ops skfp_netdev_ops = {
+	.ndo_open		= skfp_open,
+	.ndo_stop		= skfp_close,
+	.ndo_start_xmit		= skfp_send_pkt,
+	.ndo_get_stats		= skfp_ctl_get_stats,
+	.ndo_change_mtu		= fddi_change_mtu,
+	.ndo_set_multicast_list = skfp_ctl_set_multicast_list,
+	.ndo_set_mac_address	= skfp_ctl_set_mac_address,
+	.ndo_do_ioctl		= skfp_ioctl,
+};
+
 /*
  * =================
  * = skfp_init_one =
@@ -253,13 +264,7 @@ static int skfp_init_one(struct pci_dev *pdev,
 	}
 
 	dev->irq = pdev->irq;
-	dev->get_stats = &skfp_ctl_get_stats;
-	dev->open = &skfp_open;
-	dev->stop = &skfp_close;
-	dev->hard_start_xmit = &skfp_send_pkt;
-	dev->set_multicast_list = &skfp_ctl_set_multicast_list;
-	dev->set_mac_address = &skfp_ctl_set_mac_address;
-	dev->do_ioctl = &skfp_ioctl;
+	dev->netdev_ops = &skfp_netdev_ops;
 
 	SET_NETDEV_DEV(dev, &pdev->dev);
 
diff --git a/include/linux/fddidevice.h b/include/linux/fddidevice.h
index e61e42dfd317..155bafd9e886 100644
--- a/include/linux/fddidevice.h
+++ b/include/linux/fddidevice.h
@@ -27,6 +27,7 @@
 #ifdef __KERNEL__
 extern __be16	fddi_type_trans(struct sk_buff *skb,
 				struct net_device *dev);
+extern int fddi_change_mtu(struct net_device *dev, int new_mtu);
 extern struct net_device *alloc_fddidev(int sizeof_priv);
 #endif
 
diff --git a/net/802/fddi.c b/net/802/fddi.c
index 0549317b9356..f1611a1e06a7 100644
--- a/net/802/fddi.c
+++ b/net/802/fddi.c
@@ -167,23 +167,27 @@ __be16 fddi_type_trans(struct sk_buff *skb, struct net_device *dev)
 
 EXPORT_SYMBOL(fddi_type_trans);
 
-static int fddi_change_mtu(struct net_device *dev, int new_mtu)
+int fddi_change_mtu(struct net_device *dev, int new_mtu)
 {
 	if ((new_mtu < FDDI_K_SNAP_HLEN) || (new_mtu > FDDI_K_SNAP_DLEN))
 		return(-EINVAL);
 	dev->mtu = new_mtu;
 	return(0);
 }
+EXPORT_SYMBOL(fddi_change_mtu);
 
 static const struct header_ops fddi_header_ops = {
 	.create		= fddi_header,
 	.rebuild	= fddi_rebuild_header,
 };
 
+
 static void fddi_setup(struct net_device *dev)
 {
-	dev->change_mtu		= fddi_change_mtu;
 	dev->header_ops		= &fddi_header_ops;
+#ifdef CONFIG_COMPAT_NET_DEV_OPS
+	dev->change_mtu		= fddi_change_mtu,
+#endif
 
 	dev->type		= ARPHRD_FDDI;
 	dev->hard_header_len	= FDDI_K_SNAP_HLEN+3;	/* Assume 802.2 SNAP hdr len + 3 pad bytes */
-- 
cgit v1.2.3


From 748ff68fad9600593c6abe47856037602bd5d133 Mon Sep 17 00:00:00 2001
From: Stephen Hemminger <shemminger@vyatta.com>
Date: Thu, 20 Nov 2008 20:32:15 -0800
Subject: hippi: convert driver to net_device_ops

Convert the HIPPI infrastructure for use with net_device_ops.

Signed-off-by: Stephen Hemminger <shemminger@vyatta.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/rrunner.c       | 15 +++++++++++----
 include/linux/hippidevice.h |  4 +++-
 net/802/hippi.c             | 14 +++++++++-----
 3 files changed, 23 insertions(+), 10 deletions(-)

(limited to 'include')

diff --git a/drivers/net/rrunner.c b/drivers/net/rrunner.c
index 6e4131f9a933..b4e3ddd0b59c 100644
--- a/drivers/net/rrunner.c
+++ b/drivers/net/rrunner.c
@@ -63,6 +63,16 @@ MODULE_LICENSE("GPL");
 
 static char version[] __devinitdata = "rrunner.c: v0.50 11/11/2002  Jes Sorensen (jes@wildopensource.com)\n";
 
+
+static const struct net_device_ops rr_netdev_ops = {
+	.ndo_open 		= rr_open,
+	.ndo_stop		= rr_close,
+	.ndo_do_ioctl		= rr_ioctl,
+	.ndo_start_xmit		= rr_start_xmit,
+	.ndo_change_mtu		= hippi_change_mtu,
+	.ndo_set_mac_address	= hippi_mac_addr,
+};
+
 /*
  * Implementation notes:
  *
@@ -115,10 +125,7 @@ static int __devinit rr_init_one(struct pci_dev *pdev,
 	spin_lock_init(&rrpriv->lock);
 
 	dev->irq = pdev->irq;
-	dev->open = &rr_open;
-	dev->hard_start_xmit = &rr_start_xmit;
-	dev->stop = &rr_close;
-	dev->do_ioctl = &rr_ioctl;
+	dev->netdev_ops = &rr_netdev_ops;
 
 	dev->base_addr = pci_resource_start(pdev, 0);
 
diff --git a/include/linux/hippidevice.h b/include/linux/hippidevice.h
index bab303dafd6e..f148e4908410 100644
--- a/include/linux/hippidevice.h
+++ b/include/linux/hippidevice.h
@@ -32,7 +32,9 @@ struct hippi_cb {
 };
 
 extern __be16 hippi_type_trans(struct sk_buff *skb, struct net_device *dev);
-
+extern int hippi_change_mtu(struct net_device *dev, int new_mtu);
+extern int hippi_mac_addr(struct net_device *dev, void *p);
+extern int hippi_neigh_setup_dev(struct net_device *dev, struct neigh_parms *p);
 extern struct net_device *alloc_hippi_dev(int sizeof_priv);
 #endif
 
diff --git a/net/802/hippi.c b/net/802/hippi.c
index e35dc1e0915d..313b9ebf92ee 100644
--- a/net/802/hippi.c
+++ b/net/802/hippi.c
@@ -144,7 +144,7 @@ __be16 hippi_type_trans(struct sk_buff *skb, struct net_device *dev)
 
 EXPORT_SYMBOL(hippi_type_trans);
 
-static int hippi_change_mtu(struct net_device *dev, int new_mtu)
+int hippi_change_mtu(struct net_device *dev, int new_mtu)
 {
 	/*
 	 * HIPPI's got these nice large MTUs.
@@ -154,12 +154,13 @@ static int hippi_change_mtu(struct net_device *dev, int new_mtu)
 	dev->mtu = new_mtu;
 	return(0);
 }
+EXPORT_SYMBOL(hippi_change_mtu);
 
 /*
  * For HIPPI we will actually use the lower 4 bytes of the hardware
  * address as the I-FIELD rather than the actual hardware address.
  */
-static int hippi_mac_addr(struct net_device *dev, void *p)
+int hippi_mac_addr(struct net_device *dev, void *p)
 {
 	struct sockaddr *addr = p;
 	if (netif_running(dev))
@@ -167,8 +168,9 @@ static int hippi_mac_addr(struct net_device *dev, void *p)
 	memcpy(dev->dev_addr, addr->sa_data, dev->addr_len);
 	return 0;
 }
+EXPORT_SYMBOL(hippi_mac_addr);
 
-static int hippi_neigh_setup_dev(struct net_device *dev, struct neigh_parms *p)
+int hippi_neigh_setup_dev(struct net_device *dev, struct neigh_parms *p)
 {
 	/* Never send broadcast/multicast ARP messages */
 	p->mcast_probes = 0;
@@ -181,6 +183,7 @@ static int hippi_neigh_setup_dev(struct net_device *dev, struct neigh_parms *p)
 		p->ucast_probes = 0;
 	return 0;
 }
+EXPORT_SYMBOL(hippi_neigh_setup_dev);
 
 static const struct header_ops hippi_header_ops = {
 	.create		= hippi_header,
@@ -190,11 +193,12 @@ static const struct header_ops hippi_header_ops = {
 
 static void hippi_setup(struct net_device *dev)
 {
-	dev->set_multicast_list		= NULL;
+#ifdef CONFIG_COMPAT_NET_DEV_OPS
 	dev->change_mtu			= hippi_change_mtu;
-	dev->header_ops			= &hippi_header_ops;
 	dev->set_mac_address 		= hippi_mac_addr;
 	dev->neigh_setup 		= hippi_neigh_setup_dev;
+#endif
+	dev->header_ops			= &hippi_header_ops;
 
 	/*
 	 * We don't support HIPPI `ARP' for the time being, and probably
-- 
cgit v1.2.3


From 9db66bdcc83749affe61c61eb8ff3cf08f42afec Mon Sep 17 00:00:00 2001
From: Eric Dumazet <dada1@cosmosbay.com>
Date: Thu, 20 Nov 2008 20:39:09 -0800
Subject: net: convert TCP/DCCP ehash rwlocks to spinlocks

Now TCP & DCCP use RCU lookups, we can convert ehash rwlocks to spinlocks.

/proc/net/tcp and other seq_file 'readers' can safely be converted to 'writers'.

This should speedup writers, since spin_lock()/spin_unlock()
only use one atomic operation instead of two for write_lock()/write_unlock()

Signed-off-by: Eric Dumazet <dada1@cosmosbay.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/inet_hashtables.h | 14 +++++++-------
 net/ipv4/inet_hashtables.c    | 21 ++++++++++-----------
 net/ipv4/inet_timewait_sock.c | 22 +++++++++++-----------
 net/ipv4/tcp_ipv4.c           | 12 ++++++------
 net/ipv6/inet6_hashtables.c   | 15 +++++++--------
 5 files changed, 41 insertions(+), 43 deletions(-)

(limited to 'include')

diff --git a/include/net/inet_hashtables.h b/include/net/inet_hashtables.h
index 62d2dd0d7860..28b3ee3e8d6d 100644
--- a/include/net/inet_hashtables.h
+++ b/include/net/inet_hashtables.h
@@ -116,7 +116,7 @@ struct inet_hashinfo {
 	 * TIME_WAIT sockets use a separate chain (twchain).
 	 */
 	struct inet_ehash_bucket	*ehash;
-	rwlock_t			*ehash_locks;
+	spinlock_t			*ehash_locks;
 	unsigned int			ehash_size;
 	unsigned int			ehash_locks_mask;
 
@@ -152,7 +152,7 @@ static inline struct inet_ehash_bucket *inet_ehash_bucket(
 	return &hashinfo->ehash[hash & (hashinfo->ehash_size - 1)];
 }
 
-static inline rwlock_t *inet_ehash_lockp(
+static inline spinlock_t *inet_ehash_lockp(
 	struct inet_hashinfo *hashinfo,
 	unsigned int hash)
 {
@@ -177,16 +177,16 @@ static inline int inet_ehash_locks_alloc(struct inet_hashinfo *hashinfo)
 		size = 4096;
 	if (sizeof(rwlock_t) != 0) {
 #ifdef CONFIG_NUMA
-		if (size * sizeof(rwlock_t) > PAGE_SIZE)
-			hashinfo->ehash_locks = vmalloc(size * sizeof(rwlock_t));
+		if (size * sizeof(spinlock_t) > PAGE_SIZE)
+			hashinfo->ehash_locks = vmalloc(size * sizeof(spinlock_t));
 		else
 #endif
-		hashinfo->ehash_locks =	kmalloc(size * sizeof(rwlock_t),
+		hashinfo->ehash_locks =	kmalloc(size * sizeof(spinlock_t),
 						GFP_KERNEL);
 		if (!hashinfo->ehash_locks)
 			return ENOMEM;
 		for (i = 0; i < size; i++)
-			rwlock_init(&hashinfo->ehash_locks[i]);
+			spin_lock_init(&hashinfo->ehash_locks[i]);
 	}
 	hashinfo->ehash_locks_mask = size - 1;
 	return 0;
@@ -197,7 +197,7 @@ static inline void inet_ehash_locks_free(struct inet_hashinfo *hashinfo)
 	if (hashinfo->ehash_locks) {
 #ifdef CONFIG_NUMA
 		unsigned int size = (hashinfo->ehash_locks_mask + 1) *
-							sizeof(rwlock_t);
+							sizeof(spinlock_t);
 		if (size > PAGE_SIZE)
 			vfree(hashinfo->ehash_locks);
 		else
diff --git a/net/ipv4/inet_hashtables.c b/net/ipv4/inet_hashtables.c
index 377d004e5723..4c273a9981a6 100644
--- a/net/ipv4/inet_hashtables.c
+++ b/net/ipv4/inet_hashtables.c
@@ -271,13 +271,12 @@ static int __inet_check_established(struct inet_timewait_death_row *death_row,
 	struct net *net = sock_net(sk);
 	unsigned int hash = inet_ehashfn(net, daddr, lport, saddr, inet->dport);
 	struct inet_ehash_bucket *head = inet_ehash_bucket(hinfo, hash);
-	rwlock_t *lock = inet_ehash_lockp(hinfo, hash);
+	spinlock_t *lock = inet_ehash_lockp(hinfo, hash);
 	struct sock *sk2;
 	const struct hlist_nulls_node *node;
 	struct inet_timewait_sock *tw;
 
-	prefetch(head->chain.first);
-	write_lock(lock);
+	spin_lock(lock);
 
 	/* Check TIME-WAIT sockets first. */
 	sk_nulls_for_each(sk2, node, &head->twchain) {
@@ -308,8 +307,8 @@ unique:
 	sk->sk_hash = hash;
 	WARN_ON(!sk_unhashed(sk));
 	__sk_nulls_add_node_rcu(sk, &head->chain);
+	spin_unlock(lock);
 	sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1);
-	write_unlock(lock);
 
 	if (twp) {
 		*twp = tw;
@@ -325,7 +324,7 @@ unique:
 	return 0;
 
 not_unique:
-	write_unlock(lock);
+	spin_unlock(lock);
 	return -EADDRNOTAVAIL;
 }
 
@@ -340,7 +339,7 @@ void __inet_hash_nolisten(struct sock *sk)
 {
 	struct inet_hashinfo *hashinfo = sk->sk_prot->h.hashinfo;
 	struct hlist_nulls_head *list;
-	rwlock_t *lock;
+	spinlock_t *lock;
 	struct inet_ehash_bucket *head;
 
 	WARN_ON(!sk_unhashed(sk));
@@ -350,10 +349,10 @@ void __inet_hash_nolisten(struct sock *sk)
 	list = &head->chain;
 	lock = inet_ehash_lockp(hashinfo, sk->sk_hash);
 
-	write_lock(lock);
+	spin_lock(lock);
 	__sk_nulls_add_node_rcu(sk, list);
+	spin_unlock(lock);
 	sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1);
-	write_unlock(lock);
 }
 EXPORT_SYMBOL_GPL(__inet_hash_nolisten);
 
@@ -402,12 +401,12 @@ void inet_unhash(struct sock *sk)
 			sock_prot_inuse_add(sock_net(sk), sk->sk_prot, -1);
 		spin_unlock_bh(&ilb->lock);
 	} else {
-		rwlock_t *lock = inet_ehash_lockp(hashinfo, sk->sk_hash);
+		spinlock_t *lock = inet_ehash_lockp(hashinfo, sk->sk_hash);
 
-		write_lock_bh(lock);
+		spin_lock_bh(lock);
 		if (__sk_nulls_del_node_init_rcu(sk))
 			sock_prot_inuse_add(sock_net(sk), sk->sk_prot, -1);
-		write_unlock_bh(lock);
+		spin_unlock_bh(lock);
 	}
 }
 EXPORT_SYMBOL_GPL(inet_unhash);
diff --git a/net/ipv4/inet_timewait_sock.c b/net/ipv4/inet_timewait_sock.c
index 60689951ecdb..8554d0ea1719 100644
--- a/net/ipv4/inet_timewait_sock.c
+++ b/net/ipv4/inet_timewait_sock.c
@@ -20,16 +20,16 @@ static void __inet_twsk_kill(struct inet_timewait_sock *tw,
 	struct inet_bind_hashbucket *bhead;
 	struct inet_bind_bucket *tb;
 	/* Unlink from established hashes. */
-	rwlock_t *lock = inet_ehash_lockp(hashinfo, tw->tw_hash);
+	spinlock_t *lock = inet_ehash_lockp(hashinfo, tw->tw_hash);
 
-	write_lock(lock);
+	spin_lock(lock);
 	if (hlist_nulls_unhashed(&tw->tw_node)) {
-		write_unlock(lock);
+		spin_unlock(lock);
 		return;
 	}
 	hlist_nulls_del_rcu(&tw->tw_node);
 	sk_nulls_node_init(&tw->tw_node);
-	write_unlock(lock);
+	spin_unlock(lock);
 
 	/* Disassociate with bind bucket. */
 	bhead = &hashinfo->bhash[inet_bhashfn(twsk_net(tw), tw->tw_num,
@@ -76,7 +76,7 @@ void __inet_twsk_hashdance(struct inet_timewait_sock *tw, struct sock *sk,
 	const struct inet_sock *inet = inet_sk(sk);
 	const struct inet_connection_sock *icsk = inet_csk(sk);
 	struct inet_ehash_bucket *ehead = inet_ehash_bucket(hashinfo, sk->sk_hash);
-	rwlock_t *lock = inet_ehash_lockp(hashinfo, sk->sk_hash);
+	spinlock_t *lock = inet_ehash_lockp(hashinfo, sk->sk_hash);
 	struct inet_bind_hashbucket *bhead;
 	/* Step 1: Put TW into bind hash. Original socket stays there too.
 	   Note, that any socket with inet->num != 0 MUST be bound in
@@ -90,7 +90,7 @@ void __inet_twsk_hashdance(struct inet_timewait_sock *tw, struct sock *sk,
 	inet_twsk_add_bind_node(tw, &tw->tw_tb->owners);
 	spin_unlock(&bhead->lock);
 
-	write_lock(lock);
+	spin_lock(lock);
 
 	/*
 	 * Step 2: Hash TW into TIMEWAIT chain.
@@ -104,7 +104,7 @@ void __inet_twsk_hashdance(struct inet_timewait_sock *tw, struct sock *sk,
 	if (__sk_nulls_del_node_init_rcu(sk))
 		sock_prot_inuse_add(sock_net(sk), sk->sk_prot, -1);
 
-	write_unlock(lock);
+	spin_unlock(lock);
 }
 
 EXPORT_SYMBOL_GPL(__inet_twsk_hashdance);
@@ -427,9 +427,9 @@ void inet_twsk_purge(struct net *net, struct inet_hashinfo *hashinfo,
 	for (h = 0; h < (hashinfo->ehash_size); h++) {
 		struct inet_ehash_bucket *head =
 			inet_ehash_bucket(hashinfo, h);
-		rwlock_t *lock = inet_ehash_lockp(hashinfo, h);
+		spinlock_t *lock = inet_ehash_lockp(hashinfo, h);
 restart:
-		write_lock(lock);
+		spin_lock(lock);
 		sk_nulls_for_each(sk, node, &head->twchain) {
 
 			tw = inet_twsk(sk);
@@ -438,13 +438,13 @@ restart:
 				continue;
 
 			atomic_inc(&tw->tw_refcnt);
-			write_unlock(lock);
+			spin_unlock(lock);
 			inet_twsk_deschedule(tw, twdr);
 			inet_twsk_put(tw);
 
 			goto restart;
 		}
-		write_unlock(lock);
+		spin_unlock(lock);
 	}
 	local_bh_enable();
 }
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index 330b08a12274..a81caa1be0cf 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -1970,13 +1970,13 @@ static void *established_get_first(struct seq_file *seq)
 		struct sock *sk;
 		struct hlist_nulls_node *node;
 		struct inet_timewait_sock *tw;
-		rwlock_t *lock = inet_ehash_lockp(&tcp_hashinfo, st->bucket);
+		spinlock_t *lock = inet_ehash_lockp(&tcp_hashinfo, st->bucket);
 
 		/* Lockless fast path for the common case of empty buckets */
 		if (empty_bucket(st))
 			continue;
 
-		read_lock_bh(lock);
+		spin_lock_bh(lock);
 		sk_nulls_for_each(sk, node, &tcp_hashinfo.ehash[st->bucket].chain) {
 			if (sk->sk_family != st->family ||
 			    !net_eq(sock_net(sk), net)) {
@@ -1995,7 +1995,7 @@ static void *established_get_first(struct seq_file *seq)
 			rc = tw;
 			goto out;
 		}
-		read_unlock_bh(lock);
+		spin_unlock_bh(lock);
 		st->state = TCP_SEQ_STATE_ESTABLISHED;
 	}
 out:
@@ -2023,7 +2023,7 @@ get_tw:
 			cur = tw;
 			goto out;
 		}
-		read_unlock_bh(inet_ehash_lockp(&tcp_hashinfo, st->bucket));
+		spin_unlock_bh(inet_ehash_lockp(&tcp_hashinfo, st->bucket));
 		st->state = TCP_SEQ_STATE_ESTABLISHED;
 
 		/* Look for next non empty bucket */
@@ -2033,7 +2033,7 @@ get_tw:
 		if (st->bucket >= tcp_hashinfo.ehash_size)
 			return NULL;
 
-		read_lock_bh(inet_ehash_lockp(&tcp_hashinfo, st->bucket));
+		spin_lock_bh(inet_ehash_lockp(&tcp_hashinfo, st->bucket));
 		sk = sk_nulls_head(&tcp_hashinfo.ehash[st->bucket].chain);
 	} else
 		sk = sk_nulls_next(sk);
@@ -2134,7 +2134,7 @@ static void tcp_seq_stop(struct seq_file *seq, void *v)
 	case TCP_SEQ_STATE_TIME_WAIT:
 	case TCP_SEQ_STATE_ESTABLISHED:
 		if (v)
-			read_unlock_bh(inet_ehash_lockp(&tcp_hashinfo, st->bucket));
+			spin_unlock_bh(inet_ehash_lockp(&tcp_hashinfo, st->bucket));
 		break;
 	}
 }
diff --git a/net/ipv6/inet6_hashtables.c b/net/ipv6/inet6_hashtables.c
index 21544b9be259..e0fd68187f83 100644
--- a/net/ipv6/inet6_hashtables.c
+++ b/net/ipv6/inet6_hashtables.c
@@ -38,14 +38,14 @@ void __inet6_hash(struct sock *sk)
 	} else {
 		unsigned int hash;
 		struct hlist_nulls_head *list;
-		rwlock_t *lock;
+		spinlock_t *lock;
 
 		sk->sk_hash = hash = inet6_sk_ehashfn(sk);
 		list = &inet_ehash_bucket(hashinfo, hash)->chain;
 		lock = inet_ehash_lockp(hashinfo, hash);
-		write_lock(lock);
+		spin_lock(lock);
 		__sk_nulls_add_node_rcu(sk, list);
-		write_unlock(lock);
+		spin_unlock(lock);
 	}
 
 	sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1);
@@ -195,13 +195,12 @@ static int __inet6_check_established(struct inet_timewait_death_row *death_row,
 	const unsigned int hash = inet6_ehashfn(net, daddr, lport, saddr,
 						inet->dport);
 	struct inet_ehash_bucket *head = inet_ehash_bucket(hinfo, hash);
-	rwlock_t *lock = inet_ehash_lockp(hinfo, hash);
+	spinlock_t *lock = inet_ehash_lockp(hinfo, hash);
 	struct sock *sk2;
 	const struct hlist_nulls_node *node;
 	struct inet_timewait_sock *tw;
 
-	prefetch(head->chain.first);
-	write_lock(lock);
+	spin_lock(lock);
 
 	/* Check TIME-WAIT sockets first. */
 	sk_nulls_for_each(sk2, node, &head->twchain) {
@@ -230,8 +229,8 @@ unique:
 	WARN_ON(!sk_unhashed(sk));
 	__sk_nulls_add_node_rcu(sk, &head->chain);
 	sk->sk_hash = hash;
+	spin_unlock(lock);
 	sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1);
-	write_unlock(lock);
 
 	if (twp != NULL) {
 		*twp = tw;
@@ -246,7 +245,7 @@ unique:
 	return 0;
 
 not_unique:
-	write_unlock(lock);
+	spin_unlock(lock);
 	return -EADDRNOTAVAIL;
 }
 
-- 
cgit v1.2.3


From 2f90b8657ec942d1880f720e0177ee71df7c8e3c Mon Sep 17 00:00:00 2001
From: Alexander Duyck <alexander.h.duyck@intel.com>
Date: Thu, 20 Nov 2008 20:52:10 -0800
Subject: ixgbe: this patch adds support for DCB to the kernel and ixgbe driver

This adds support for Data Center Bridging (DCB) features in the ixgbe
driver and adds an rtnetlink interface for configuring DCB to the
kernel.  The DCB feature support included are Priority Grouping (PG) -
which allows bandwidth guarantees to be allocated to groups to traffic
based on the 802.1q priority, and Priority Based Flow Control (PFC) -
which introduces a new MAC control PAUSE frame which works at
granularity of the 802.1p priority instead of the link (IEEE 802.3x).

Signed-off-by: Alexander Duyck <alexander.h.duyck@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
Signed-off-by: Peter P Waskiewicz Jr <peter.p.waskiewicz.jr@intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/Kconfig                 |  10 +
 drivers/net/ixgbe/Makefile          |   2 +
 drivers/net/ixgbe/ixgbe.h           |  25 +-
 drivers/net/ixgbe/ixgbe_dcb.c       | 332 +++++++++++++++++
 drivers/net/ixgbe/ixgbe_dcb.h       | 157 ++++++++
 drivers/net/ixgbe/ixgbe_dcb_82598.c | 398 ++++++++++++++++++++
 drivers/net/ixgbe/ixgbe_dcb_82598.h |  94 +++++
 drivers/net/ixgbe/ixgbe_dcb_nl.c    | 356 ++++++++++++++++++
 drivers/net/ixgbe/ixgbe_ethtool.c   |  30 +-
 drivers/net/ixgbe/ixgbe_main.c      | 200 +++++++++-
 include/linux/dcbnl.h               | 230 ++++++++++++
 include/linux/netdevice.h           |   8 +
 include/linux/rtnetlink.h           |   5 +
 include/net/dcbnl.h                 |  44 +++
 net/Kconfig                         |   1 +
 net/Makefile                        |   3 +
 net/dcb/Kconfig                     |  12 +
 net/dcb/Makefile                    |   1 +
 net/dcb/dcbnl.c                     | 704 ++++++++++++++++++++++++++++++++++++
 19 files changed, 2593 insertions(+), 19 deletions(-)
 create mode 100644 drivers/net/ixgbe/ixgbe_dcb.c
 create mode 100644 drivers/net/ixgbe/ixgbe_dcb.h
 create mode 100644 drivers/net/ixgbe/ixgbe_dcb_82598.c
 create mode 100644 drivers/net/ixgbe/ixgbe_dcb_82598.h
 create mode 100644 drivers/net/ixgbe/ixgbe_dcb_nl.c
 create mode 100644 include/linux/dcbnl.h
 create mode 100644 include/net/dcbnl.h
 create mode 100644 net/dcb/Kconfig
 create mode 100644 net/dcb/Makefile
 create mode 100644 net/dcb/dcbnl.c

(limited to 'include')

diff --git a/drivers/net/Kconfig b/drivers/net/Kconfig
index afa206590ada..efd461d7c2bb 100644
--- a/drivers/net/Kconfig
+++ b/drivers/net/Kconfig
@@ -2451,6 +2451,16 @@ config IXGBE_DCA
 	  driver.  DCA is a method for warming the CPU cache before data
 	  is used, with the intent of lessening the impact of cache misses.
 
+config IXGBE_DCBNL
+	bool "Data Center Bridging (DCB) Support"
+	default n
+	depends on IXGBE && DCBNL
+	---help---
+	  Say Y here if you want to use Data Center Bridging (DCB) in the
+	  driver.
+
+	  If unsure, say N.
+
 config IXGB
 	tristate "Intel(R) PRO/10GbE support"
 	depends on PCI
diff --git a/drivers/net/ixgbe/Makefile b/drivers/net/ixgbe/Makefile
index ccd83d9f579e..3228e508e628 100644
--- a/drivers/net/ixgbe/Makefile
+++ b/drivers/net/ixgbe/Makefile
@@ -34,3 +34,5 @@ obj-$(CONFIG_IXGBE) += ixgbe.o
 
 ixgbe-objs := ixgbe_main.o ixgbe_common.o ixgbe_ethtool.o \
               ixgbe_82598.o ixgbe_phy.o
+
+ixgbe-$(CONFIG_IXGBE_DCBNL) +=  ixgbe_dcb.o ixgbe_dcb_82598.o ixgbe_dcb_nl.o
diff --git a/drivers/net/ixgbe/ixgbe.h b/drivers/net/ixgbe/ixgbe.h
index 132854f646ba..796f189f3879 100644
--- a/drivers/net/ixgbe/ixgbe.h
+++ b/drivers/net/ixgbe/ixgbe.h
@@ -35,7 +35,7 @@
 
 #include "ixgbe_type.h"
 #include "ixgbe_common.h"
-
+#include "ixgbe_dcb.h"
 #ifdef CONFIG_IXGBE_DCA
 #include <linux/dca.h>
 #endif
@@ -84,6 +84,7 @@
 #define IXGBE_TX_FLAGS_TSO		(u32)(1 << 2)
 #define IXGBE_TX_FLAGS_IPV4		(u32)(1 << 3)
 #define IXGBE_TX_FLAGS_VLAN_MASK	0xffff0000
+#define IXGBE_TX_FLAGS_VLAN_PRIO_MASK   0x0000e000
 #define IXGBE_TX_FLAGS_VLAN_SHIFT	16
 
 #define IXGBE_MAX_LRO_DESCRIPTORS       8
@@ -134,7 +135,7 @@ struct ixgbe_ring {
 
 	u16 reg_idx; /* holds the special value that gets the hardware register
 		      * offset associated with this ring, which is different
-		      * for DCE and RSS modes */
+		      * for DCB and RSS modes */
 
 #ifdef CONFIG_IXGBE_DCA
 	/* cpu for tx queue */
@@ -152,8 +153,10 @@ struct ixgbe_ring {
 	u16 rx_buf_len;
 };
 
+#define RING_F_DCB  0
 #define RING_F_VMDQ 1
 #define RING_F_RSS  2
+#define IXGBE_MAX_DCB_INDICES   8
 #define IXGBE_MAX_RSS_INDICES  16
 #define IXGBE_MAX_VMDQ_INDICES 16
 struct ixgbe_ring_feature {
@@ -164,6 +167,10 @@ struct ixgbe_ring_feature {
 #define MAX_RX_QUEUES 64
 #define MAX_TX_QUEUES 32
 
+#define MAX_RX_PACKET_BUFFERS ((adapter->flags & IXGBE_FLAG_DCB_ENABLED) \
+                              ? 8 : 1)
+#define MAX_TX_PACKET_BUFFERS MAX_RX_PACKET_BUFFERS
+
 /* MAX_MSIX_Q_VECTORS of these are allocated,
  * but we only use one per queue-specific vector.
  */
@@ -215,6 +222,9 @@ struct ixgbe_adapter {
 	struct work_struct reset_task;
 	struct ixgbe_q_vector q_vector[MAX_MSIX_Q_VECTORS];
 	char name[MAX_MSIX_COUNT][IFNAMSIZ + 5];
+	struct ixgbe_dcb_config dcb_cfg;
+	struct ixgbe_dcb_config temp_dcb_cfg;
+	u8 dcb_set_bitmap;
 
 	/* Interrupt Throttle Rate */
 	u32 itr_setting;
@@ -270,6 +280,7 @@ struct ixgbe_adapter {
 #define IXGBE_FLAG_FAN_FAIL_CAPABLE             (u32)(1 << 20)
 #define IXGBE_FLAG_NEED_LINK_UPDATE             (u32)(1 << 22)
 #define IXGBE_FLAG_IN_WATCHDOG_TASK             (u32)(1 << 23)
+#define IXGBE_FLAG_DCB_ENABLED                  (u32)(1 << 24)
 
 /* default to trying for four seconds */
 #define IXGBE_TRY_LINK_TIMEOUT (4 * HZ)
@@ -313,6 +324,12 @@ enum ixgbe_boards {
 };
 
 extern struct ixgbe_info ixgbe_82598_info;
+#ifdef CONFIG_IXGBE_DCBNL
+extern struct dcbnl_rtnl_ops dcbnl_ops;
+extern int ixgbe_copy_dcb_cfg(struct ixgbe_dcb_config *src_dcb_cfg,
+                              struct ixgbe_dcb_config *dst_dcb_cfg,
+                              int tc_max);
+#endif
 
 extern char ixgbe_driver_name[];
 extern const char ixgbe_driver_version[];
@@ -327,5 +344,9 @@ extern int ixgbe_setup_tx_resources(struct ixgbe_adapter *, struct ixgbe_ring *)
 extern void ixgbe_free_rx_resources(struct ixgbe_adapter *, struct ixgbe_ring *);
 extern void ixgbe_free_tx_resources(struct ixgbe_adapter *, struct ixgbe_ring *);
 extern void ixgbe_update_stats(struct ixgbe_adapter *adapter);
+extern void ixgbe_reset_interrupt_capability(struct ixgbe_adapter *adapter);
+extern int ixgbe_init_interrupt_scheme(struct ixgbe_adapter *adapter);
+void ixgbe_napi_add_all(struct ixgbe_adapter *adapter);
+void ixgbe_napi_del_all(struct ixgbe_adapter *adapter);
 
 #endif /* _IXGBE_H_ */
diff --git a/drivers/net/ixgbe/ixgbe_dcb.c b/drivers/net/ixgbe/ixgbe_dcb.c
new file mode 100644
index 000000000000..e2e28ac63dec
--- /dev/null
+++ b/drivers/net/ixgbe/ixgbe_dcb.c
@@ -0,0 +1,332 @@
+/*******************************************************************************
+
+  Intel 10 Gigabit PCI Express Linux driver
+  Copyright(c) 1999 - 2007 Intel Corporation.
+
+  This program is free software; you can redistribute it and/or modify it
+  under the terms and conditions of the GNU General Public License,
+  version 2, as published by the Free Software Foundation.
+
+  This program is distributed in the hope it will be useful, but WITHOUT
+  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+  more details.
+
+  You should have received a copy of the GNU General Public License along with
+  this program; if not, write to the Free Software Foundation, Inc.,
+  51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
+
+  The full GNU General Public License is included in this distribution in
+  the file called "COPYING".
+
+  Contact Information:
+  Linux NICS <linux.nics@intel.com>
+  e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
+  Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
+
+*******************************************************************************/
+
+
+#include "ixgbe.h"
+#include "ixgbe_type.h"
+#include "ixgbe_dcb.h"
+#include "ixgbe_dcb_82598.h"
+
+/**
+ * ixgbe_dcb_config - Struct containing DCB settings.
+ * @dcb_config: Pointer to DCB config structure
+ *
+ * This function checks DCB rules for DCB settings.
+ * The following rules are checked:
+ * 1. The sum of bandwidth percentages of all Bandwidth Groups must total 100%.
+ * 2. The sum of bandwidth percentages of all Traffic Classes within a Bandwidth
+ *    Group must total 100.
+ * 3. A Traffic Class should not be set to both Link Strict Priority
+ *    and Group Strict Priority.
+ * 4. Link strict Bandwidth Groups can only have link strict traffic classes
+ *    with zero bandwidth.
+ */
+s32 ixgbe_dcb_check_config(struct ixgbe_dcb_config *dcb_config)
+{
+	struct tc_bw_alloc *p;
+	s32 ret_val = 0;
+	u8 i, j, bw = 0, bw_id;
+	u8 bw_sum[2][MAX_BW_GROUP];
+	bool link_strict[2][MAX_BW_GROUP];
+
+	memset(bw_sum, 0, sizeof(bw_sum));
+	memset(link_strict, 0, sizeof(link_strict));
+
+	/* First Tx, then Rx */
+	for (i = 0; i < 2; i++) {
+		/* Check each traffic class for rule violation */
+		for (j = 0; j < MAX_TRAFFIC_CLASS; j++) {
+			p = &dcb_config->tc_config[j].path[i];
+
+			bw = p->bwg_percent;
+			bw_id = p->bwg_id;
+
+			if (bw_id >= MAX_BW_GROUP) {
+				ret_val = DCB_ERR_CONFIG;
+				goto err_config;
+			}
+			if (p->prio_type == prio_link) {
+				link_strict[i][bw_id] = true;
+				/* Link strict should have zero bandwidth */
+				if (bw) {
+					ret_val = DCB_ERR_LS_BW_NONZERO;
+					goto err_config;
+				}
+			} else if (!bw) {
+				/*
+				 * Traffic classes without link strict
+				 * should have non-zero bandwidth.
+				 */
+				ret_val = DCB_ERR_TC_BW_ZERO;
+				goto err_config;
+			}
+			bw_sum[i][bw_id] += bw;
+		}
+
+		bw = 0;
+
+		/* Check each bandwidth group for rule violation */
+		for (j = 0; j < MAX_BW_GROUP; j++) {
+			bw += dcb_config->bw_percentage[i][j];
+			/*
+			 * Sum of bandwidth percentages of all traffic classes
+			 * within a Bandwidth Group must total 100 except for
+			 * link strict group (zero bandwidth).
+			 */
+			if (link_strict[i][j]) {
+				if (bw_sum[i][j]) {
+					/*
+					 * Link strict group should have zero
+					 * bandwidth.
+					 */
+					ret_val = DCB_ERR_LS_BWG_NONZERO;
+					goto err_config;
+				}
+			} else if (bw_sum[i][j] != BW_PERCENT &&
+				   bw_sum[i][j] != 0) {
+				ret_val = DCB_ERR_TC_BW;
+				goto err_config;
+			}
+		}
+
+		if (bw != BW_PERCENT) {
+			ret_val = DCB_ERR_BW_GROUP;
+			goto err_config;
+		}
+	}
+
+err_config:
+	return ret_val;
+}
+
+/**
+ * ixgbe_dcb_calculate_tc_credits - Calculates traffic class credits
+ * @ixgbe_dcb_config: Struct containing DCB settings.
+ * @direction: Configuring either Tx or Rx.
+ *
+ * This function calculates the credits allocated to each traffic class.
+ * It should be called only after the rules are checked by
+ * ixgbe_dcb_check_config().
+ */
+s32 ixgbe_dcb_calculate_tc_credits(struct ixgbe_dcb_config *dcb_config,
+                                   u8 direction)
+{
+	struct tc_bw_alloc *p;
+	s32 ret_val = 0;
+	/* Initialization values default for Tx settings */
+	u32 credit_refill       = 0;
+	u32 credit_max          = 0;
+	u16 link_percentage     = 0;
+	u8  bw_percent          = 0;
+	u8  i;
+
+	if (dcb_config == NULL) {
+		ret_val = DCB_ERR_CONFIG;
+		goto out;
+	}
+
+	/* Find out the link percentage for each TC first */
+	for (i = 0; i < MAX_TRAFFIC_CLASS; i++) {
+		p = &dcb_config->tc_config[i].path[direction];
+		bw_percent = dcb_config->bw_percentage[direction][p->bwg_id];
+
+		link_percentage = p->bwg_percent;
+		/* Must be careful of integer division for very small nums */
+		link_percentage = (link_percentage * bw_percent) / 100;
+		if (p->bwg_percent > 0 && link_percentage == 0)
+			link_percentage = 1;
+
+		/* Save link_percentage for reference */
+		p->link_percent = (u8)link_percentage;
+
+		/* Calculate credit refill and save it */
+		credit_refill = link_percentage * MINIMUM_CREDIT_REFILL;
+		p->data_credits_refill = (u16)credit_refill;
+
+		/* Calculate maximum credit for the TC */
+		credit_max = (link_percentage * MAX_CREDIT) / 100;
+
+		/*
+		 * Adjustment based on rule checking, if the percentage
+		 * of a TC is too small, the maximum credit may not be
+		 * enough to send out a jumbo frame in data plane arbitration.
+		 */
+		if (credit_max && (credit_max < MINIMUM_CREDIT_FOR_JUMBO))
+			credit_max = MINIMUM_CREDIT_FOR_JUMBO;
+
+		if (direction == DCB_TX_CONFIG) {
+			/*
+			 * Adjustment based on rule checking, if the
+			 * percentage of a TC is too small, the maximum
+			 * credit may not be enough to send out a TSO
+			 * packet in descriptor plane arbitration.
+			 */
+			if (credit_max &&
+			    (credit_max < MINIMUM_CREDIT_FOR_TSO))
+				credit_max = MINIMUM_CREDIT_FOR_TSO;
+
+			dcb_config->tc_config[i].desc_credits_max =
+				(u16)credit_max;
+		}
+
+		p->data_credits_max = (u16)credit_max;
+	}
+
+out:
+	return ret_val;
+}
+
+/**
+ * ixgbe_dcb_get_tc_stats - Returns status of each traffic class
+ * @hw: pointer to hardware structure
+ * @stats: pointer to statistics structure
+ * @tc_count:  Number of elements in bwg_array.
+ *
+ * This function returns the status data for each of the Traffic Classes in use.
+ */
+s32 ixgbe_dcb_get_tc_stats(struct ixgbe_hw *hw, struct ixgbe_hw_stats *stats,
+                           u8 tc_count)
+{
+	s32 ret = 0;
+	if (hw->mac.type == ixgbe_mac_82598EB)
+		ret = ixgbe_dcb_get_tc_stats_82598(hw, stats, tc_count);
+	return ret;
+}
+
+/**
+ * ixgbe_dcb_get_pfc_stats - Returns CBFC status of each traffic class
+ * hw - pointer to hardware structure
+ * stats - pointer to statistics structure
+ * tc_count -  Number of elements in bwg_array.
+ *
+ * This function returns the CBFC status data for each of the Traffic Classes.
+ */
+s32 ixgbe_dcb_get_pfc_stats(struct ixgbe_hw *hw, struct ixgbe_hw_stats *stats,
+                            u8 tc_count)
+{
+	s32 ret = 0;
+	if (hw->mac.type == ixgbe_mac_82598EB)
+		ret = ixgbe_dcb_get_pfc_stats_82598(hw, stats, tc_count);
+	return ret;
+}
+
+/**
+ * ixgbe_dcb_config_rx_arbiter - Config Rx arbiter
+ * @hw: pointer to hardware structure
+ * @dcb_config: pointer to ixgbe_dcb_config structure
+ *
+ * Configure Rx Data Arbiter and credits for each traffic class.
+ */
+s32 ixgbe_dcb_config_rx_arbiter(struct ixgbe_hw *hw,
+                                struct ixgbe_dcb_config *dcb_config)
+{
+	s32 ret = 0;
+	if (hw->mac.type == ixgbe_mac_82598EB)
+		ret = ixgbe_dcb_config_rx_arbiter_82598(hw, dcb_config);
+	return ret;
+}
+
+/**
+ * ixgbe_dcb_config_tx_desc_arbiter - Config Tx Desc arbiter
+ * @hw: pointer to hardware structure
+ * @dcb_config: pointer to ixgbe_dcb_config structure
+ *
+ * Configure Tx Descriptor Arbiter and credits for each traffic class.
+ */
+s32 ixgbe_dcb_config_tx_desc_arbiter(struct ixgbe_hw *hw,
+                                     struct ixgbe_dcb_config *dcb_config)
+{
+	s32 ret = 0;
+	if (hw->mac.type == ixgbe_mac_82598EB)
+		ret = ixgbe_dcb_config_tx_desc_arbiter_82598(hw, dcb_config);
+	return ret;
+}
+
+/**
+ * ixgbe_dcb_config_tx_data_arbiter - Config Tx data arbiter
+ * @hw: pointer to hardware structure
+ * @dcb_config: pointer to ixgbe_dcb_config structure
+ *
+ * Configure Tx Data Arbiter and credits for each traffic class.
+ */
+s32 ixgbe_dcb_config_tx_data_arbiter(struct ixgbe_hw *hw,
+                                     struct ixgbe_dcb_config *dcb_config)
+{
+	s32 ret = 0;
+	if (hw->mac.type == ixgbe_mac_82598EB)
+		ret = ixgbe_dcb_config_tx_data_arbiter_82598(hw, dcb_config);
+	return ret;
+}
+
+/**
+ * ixgbe_dcb_config_pfc - Config priority flow control
+ * @hw: pointer to hardware structure
+ * @dcb_config: pointer to ixgbe_dcb_config structure
+ *
+ * Configure Priority Flow Control for each traffic class.
+ */
+s32 ixgbe_dcb_config_pfc(struct ixgbe_hw *hw,
+                         struct ixgbe_dcb_config *dcb_config)
+{
+	s32 ret = 0;
+	if (hw->mac.type == ixgbe_mac_82598EB)
+		ret = ixgbe_dcb_config_pfc_82598(hw, dcb_config);
+	return ret;
+}
+
+/**
+ * ixgbe_dcb_config_tc_stats - Config traffic class statistics
+ * @hw: pointer to hardware structure
+ *
+ * Configure queue statistics registers, all queues belonging to same traffic
+ * class uses a single set of queue statistics counters.
+ */
+s32 ixgbe_dcb_config_tc_stats(struct ixgbe_hw *hw)
+{
+	s32 ret = 0;
+	if (hw->mac.type == ixgbe_mac_82598EB)
+		ret = ixgbe_dcb_config_tc_stats_82598(hw);
+	return ret;
+}
+
+/**
+ * ixgbe_dcb_hw_config - Config and enable DCB
+ * @hw: pointer to hardware structure
+ * @dcb_config: pointer to ixgbe_dcb_config structure
+ *
+ * Configure dcb settings and enable dcb mode.
+ */
+s32 ixgbe_dcb_hw_config(struct ixgbe_hw *hw,
+                        struct ixgbe_dcb_config *dcb_config)
+{
+	s32 ret = 0;
+	if (hw->mac.type == ixgbe_mac_82598EB)
+		ret = ixgbe_dcb_hw_config_82598(hw, dcb_config);
+	return ret;
+}
+
diff --git a/drivers/net/ixgbe/ixgbe_dcb.h b/drivers/net/ixgbe/ixgbe_dcb.h
new file mode 100644
index 000000000000..62dfd243bedc
--- /dev/null
+++ b/drivers/net/ixgbe/ixgbe_dcb.h
@@ -0,0 +1,157 @@
+/*******************************************************************************
+
+  Intel 10 Gigabit PCI Express Linux driver
+  Copyright(c) 1999 - 2007 Intel Corporation.
+
+  This program is free software; you can redistribute it and/or modify it
+  under the terms and conditions of the GNU General Public License,
+  version 2, as published by the Free Software Foundation.
+
+  This program is distributed in the hope it will be useful, but WITHOUT
+  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+  more details.
+
+  You should have received a copy of the GNU General Public License along with
+  this program; if not, write to the Free Software Foundation, Inc.,
+  51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
+
+  The full GNU General Public License is included in this distribution in
+  the file called "COPYING".
+
+  Contact Information:
+  Linux NICS <linux.nics@intel.com>
+  e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
+  Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
+
+*******************************************************************************/
+
+#ifndef _DCB_CONFIG_H_
+#define _DCB_CONFIG_H_
+
+#include "ixgbe_type.h"
+
+/* DCB data structures */
+
+#define IXGBE_MAX_PACKET_BUFFERS 8
+#define MAX_USER_PRIORITY        8
+#define MAX_TRAFFIC_CLASS        8
+#define MAX_BW_GROUP             8
+#define BW_PERCENT               100
+
+#define DCB_TX_CONFIG            0
+#define DCB_RX_CONFIG            1
+
+/* DCB error Codes */
+#define DCB_SUCCESS              0
+#define DCB_ERR_CONFIG           -1
+#define DCB_ERR_PARAM            -2
+
+/* Transmit and receive Errors */
+/* Error in bandwidth group allocation */
+#define DCB_ERR_BW_GROUP        -3
+/* Error in traffic class bandwidth allocation */
+#define DCB_ERR_TC_BW           -4
+/* Traffic class has both link strict and group strict enabled */
+#define DCB_ERR_LS_GS           -5
+/* Link strict traffic class has non zero bandwidth */
+#define DCB_ERR_LS_BW_NONZERO   -6
+/* Link strict bandwidth group has non zero bandwidth */
+#define DCB_ERR_LS_BWG_NONZERO  -7
+/*  Traffic class has zero bandwidth */
+#define DCB_ERR_TC_BW_ZERO      -8
+
+#define DCB_NOT_IMPLEMENTED      0x7FFFFFFF
+
+struct dcb_pfc_tc_debug {
+	u8  tc;
+	u8  pause_status;
+	u64 pause_quanta;
+};
+
+enum strict_prio_type {
+	prio_none = 0,
+	prio_group,
+	prio_link
+};
+
+/* Traffic class bandwidth allocation per direction */
+struct tc_bw_alloc {
+	u8 bwg_id;		  /* Bandwidth Group (BWG) ID */
+	u8 bwg_percent;		  /* % of BWG's bandwidth */
+	u8 link_percent;	  /* % of link bandwidth */
+	u8 up_to_tc_bitmap;	  /* User Priority to Traffic Class mapping */
+	u16 data_credits_refill;  /* Credit refill amount in 64B granularity */
+	u16 data_credits_max;	  /* Max credits for a configured packet buffer
+				   * in 64B granularity.*/
+	enum strict_prio_type prio_type; /* Link or Group Strict Priority */
+};
+
+enum dcb_pfc_type {
+	pfc_disabled = 0,
+	pfc_enabled_full,
+	pfc_enabled_tx,
+	pfc_enabled_rx
+};
+
+/* Traffic class configuration */
+struct tc_configuration {
+	struct tc_bw_alloc path[2]; /* One each for Tx/Rx */
+	enum dcb_pfc_type  dcb_pfc; /* Class based flow control setting */
+
+	u16 desc_credits_max; /* For Tx Descriptor arbitration */
+	u8 tc; /* Traffic class (TC) */
+};
+
+enum dcb_rx_pba_cfg {
+	pba_equal,     /* PBA[0-7] each use 64KB FIFO */
+	pba_80_48      /* PBA[0-3] each use 80KB, PBA[4-7] each use 48KB */
+};
+
+struct ixgbe_dcb_config {
+	struct tc_configuration tc_config[MAX_TRAFFIC_CLASS];
+	u8     bw_percentage[2][MAX_BW_GROUP]; /* One each for Tx/Rx */
+
+	bool  round_robin_enable;
+
+	enum dcb_rx_pba_cfg rx_pba_cfg;
+
+	u32  dcb_cfg_version; /* Not used...OS-specific? */
+	u32  link_speed; /* For bandwidth allocation validation purpose */
+};
+
+/* DCB driver APIs */
+
+/* DCB rule checking function.*/
+s32 ixgbe_dcb_check_config(struct ixgbe_dcb_config *config);
+
+/* DCB credits calculation */
+s32 ixgbe_dcb_calculate_tc_credits(struct ixgbe_dcb_config *, u8);
+
+/* DCB PFC functions */
+s32 ixgbe_dcb_config_pfc(struct ixgbe_hw *, struct ixgbe_dcb_config *g);
+s32 ixgbe_dcb_get_pfc_stats(struct ixgbe_hw *, struct ixgbe_hw_stats *, u8);
+
+/* DCB traffic class stats */
+s32 ixgbe_dcb_config_tc_stats(struct ixgbe_hw *);
+s32 ixgbe_dcb_get_tc_stats(struct ixgbe_hw *, struct ixgbe_hw_stats *, u8);
+
+/* DCB config arbiters */
+s32 ixgbe_dcb_config_tx_desc_arbiter(struct ixgbe_hw *,
+                                     struct ixgbe_dcb_config *);
+s32 ixgbe_dcb_config_tx_data_arbiter(struct ixgbe_hw *,
+                                     struct ixgbe_dcb_config *);
+s32 ixgbe_dcb_config_rx_arbiter(struct ixgbe_hw *, struct ixgbe_dcb_config *);
+
+/* DCB hw initialization */
+s32 ixgbe_dcb_hw_config(struct ixgbe_hw *, struct ixgbe_dcb_config *);
+
+/* DCB definitions for credit calculation */
+#define MAX_CREDIT_REFILL       511  /* 0x1FF * 64B = 32704B */
+#define MINIMUM_CREDIT_REFILL   5    /* 5*64B = 320B */
+#define MINIMUM_CREDIT_FOR_JUMBO 145  /* 145= UpperBound((9*1024+54)/64B) for 9KB jumbo frame */
+#define DCB_MAX_TSO_SIZE        (32*1024) /* MAX TSO packet size supported in DCB mode */
+#define MINIMUM_CREDIT_FOR_TSO  (DCB_MAX_TSO_SIZE/64 + 1) /* 513 for 32KB TSO packet */
+#define MAX_CREDIT              4095 /* Maximum credit supported: 256KB * 1204 / 64B */
+
+#endif /* _DCB_CONFIG_H */
diff --git a/drivers/net/ixgbe/ixgbe_dcb_82598.c b/drivers/net/ixgbe/ixgbe_dcb_82598.c
new file mode 100644
index 000000000000..fce6867a4517
--- /dev/null
+++ b/drivers/net/ixgbe/ixgbe_dcb_82598.c
@@ -0,0 +1,398 @@
+/*******************************************************************************
+
+  Intel 10 Gigabit PCI Express Linux driver
+  Copyright(c) 1999 - 2007 Intel Corporation.
+
+  This program is free software; you can redistribute it and/or modify it
+  under the terms and conditions of the GNU General Public License,
+  version 2, as published by the Free Software Foundation.
+
+  This program is distributed in the hope it will be useful, but WITHOUT
+  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+  more details.
+
+  You should have received a copy of the GNU General Public License along with
+  this program; if not, write to the Free Software Foundation, Inc.,
+  51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
+
+  The full GNU General Public License is included in this distribution in
+  the file called "COPYING".
+
+  Contact Information:
+  Linux NICS <linux.nics@intel.com>
+  e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
+  Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
+
+*******************************************************************************/
+
+#include "ixgbe.h"
+#include "ixgbe_type.h"
+#include "ixgbe_dcb.h"
+#include "ixgbe_dcb_82598.h"
+
+/**
+ * ixgbe_dcb_get_tc_stats_82598 - Return status data for each traffic class
+ * @hw: pointer to hardware structure
+ * @stats: pointer to statistics structure
+ * @tc_count:  Number of elements in bwg_array.
+ *
+ * This function returns the status data for each of the Traffic Classes in use.
+ */
+s32 ixgbe_dcb_get_tc_stats_82598(struct ixgbe_hw *hw,
+                                 struct ixgbe_hw_stats *stats,
+                                 u8 tc_count)
+{
+	int tc;
+
+	if (tc_count > MAX_TRAFFIC_CLASS)
+		return DCB_ERR_PARAM;
+
+	/* Statistics pertaining to each traffic class */
+	for (tc = 0; tc < tc_count; tc++) {
+		/* Transmitted Packets */
+		stats->qptc[tc] += IXGBE_READ_REG(hw, IXGBE_QPTC(tc));
+		/* Transmitted Bytes */
+		stats->qbtc[tc] += IXGBE_READ_REG(hw, IXGBE_QBTC(tc));
+		/* Received Packets */
+		stats->qprc[tc] += IXGBE_READ_REG(hw, IXGBE_QPRC(tc));
+		/* Received Bytes */
+		stats->qbrc[tc] += IXGBE_READ_REG(hw, IXGBE_QBRC(tc));
+	}
+
+	return 0;
+}
+
+/**
+ * ixgbe_dcb_get_pfc_stats_82598 - Returns CBFC status data
+ * @hw: pointer to hardware structure
+ * @stats: pointer to statistics structure
+ * @tc_count:  Number of elements in bwg_array.
+ *
+ * This function returns the CBFC status data for each of the Traffic Classes.
+ */
+s32 ixgbe_dcb_get_pfc_stats_82598(struct ixgbe_hw *hw,
+                                  struct ixgbe_hw_stats *stats,
+                                  u8 tc_count)
+{
+	int tc;
+
+	if (tc_count > MAX_TRAFFIC_CLASS)
+		return DCB_ERR_PARAM;
+
+	for (tc = 0; tc < tc_count; tc++) {
+		/* Priority XOFF Transmitted */
+		stats->pxofftxc[tc] += IXGBE_READ_REG(hw, IXGBE_PXOFFTXC(tc));
+		/* Priority XOFF Received */
+		stats->pxoffrxc[tc] += IXGBE_READ_REG(hw, IXGBE_PXOFFRXC(tc));
+	}
+
+	return 0;
+}
+
+/**
+ * ixgbe_dcb_config_packet_buffers_82598 - Configure packet buffers
+ * @hw: pointer to hardware structure
+ * @dcb_config: pointer to ixgbe_dcb_config structure
+ *
+ * Configure packet buffers for DCB mode.
+ */
+s32 ixgbe_dcb_config_packet_buffers_82598(struct ixgbe_hw *hw,
+                                          struct ixgbe_dcb_config *dcb_config)
+{
+	s32 ret_val = 0;
+	u32 value = IXGBE_RXPBSIZE_64KB;
+	u8  i = 0;
+
+	/* Setup Rx packet buffer sizes */
+	switch (dcb_config->rx_pba_cfg) {
+	case pba_80_48:
+		/* Setup the first four at 80KB */
+		value = IXGBE_RXPBSIZE_80KB;
+		for (; i < 4; i++)
+			IXGBE_WRITE_REG(hw, IXGBE_RXPBSIZE(i), value);
+		/* Setup the last four at 48KB...don't re-init i */
+		value = IXGBE_RXPBSIZE_48KB;
+		/* Fall Through */
+	case pba_equal:
+	default:
+		for (; i < IXGBE_MAX_PACKET_BUFFERS; i++)
+			IXGBE_WRITE_REG(hw, IXGBE_RXPBSIZE(i), value);
+
+		/* Setup Tx packet buffer sizes */
+		for (i = 0; i < IXGBE_MAX_PACKET_BUFFERS; i++) {
+			IXGBE_WRITE_REG(hw, IXGBE_TXPBSIZE(i),
+					IXGBE_TXPBSIZE_40KB);
+		}
+		break;
+	}
+
+	return ret_val;
+}
+
+/**
+ * ixgbe_dcb_config_rx_arbiter_82598 - Config Rx data arbiter
+ * @hw: pointer to hardware structure
+ * @dcb_config: pointer to ixgbe_dcb_config structure
+ *
+ * Configure Rx Data Arbiter and credits for each traffic class.
+ */
+s32 ixgbe_dcb_config_rx_arbiter_82598(struct ixgbe_hw *hw,
+                                      struct ixgbe_dcb_config *dcb_config)
+{
+	struct tc_bw_alloc    *p;
+	u32    reg           = 0;
+	u32    credit_refill = 0;
+	u32    credit_max    = 0;
+	u8     i             = 0;
+
+	reg = IXGBE_READ_REG(hw, IXGBE_RUPPBMR) | IXGBE_RUPPBMR_MQA;
+	IXGBE_WRITE_REG(hw, IXGBE_RUPPBMR, reg);
+
+	reg = IXGBE_READ_REG(hw, IXGBE_RMCS);
+	/* Enable Arbiter */
+	reg &= ~IXGBE_RMCS_ARBDIS;
+	/* Enable Receive Recycle within the BWG */
+	reg |= IXGBE_RMCS_RRM;
+	/* Enable Deficit Fixed Priority arbitration*/
+	reg |= IXGBE_RMCS_DFP;
+
+	IXGBE_WRITE_REG(hw, IXGBE_RMCS, reg);
+
+	/* Configure traffic class credits and priority */
+	for (i = 0; i < MAX_TRAFFIC_CLASS; i++) {
+		p = &dcb_config->tc_config[i].path[DCB_RX_CONFIG];
+		credit_refill = p->data_credits_refill;
+		credit_max    = p->data_credits_max;
+
+		reg = credit_refill | (credit_max << IXGBE_RT2CR_MCL_SHIFT);
+
+		if (p->prio_type == prio_link)
+			reg |= IXGBE_RT2CR_LSP;
+
+		IXGBE_WRITE_REG(hw, IXGBE_RT2CR(i), reg);
+	}
+
+	reg = IXGBE_READ_REG(hw, IXGBE_RDRXCTL);
+	reg |= IXGBE_RDRXCTL_RDMTS_1_2;
+	reg |= IXGBE_RDRXCTL_MPBEN;
+	reg |= IXGBE_RDRXCTL_MCEN;
+	IXGBE_WRITE_REG(hw, IXGBE_RDRXCTL, reg);
+
+	reg = IXGBE_READ_REG(hw, IXGBE_RXCTRL);
+	/* Make sure there is enough descriptors before arbitration */
+	reg &= ~IXGBE_RXCTRL_DMBYPS;
+	IXGBE_WRITE_REG(hw, IXGBE_RXCTRL, reg);
+
+	return 0;
+}
+
+/**
+ * ixgbe_dcb_config_tx_desc_arbiter_82598 - Config Tx Desc. arbiter
+ * @hw: pointer to hardware structure
+ * @dcb_config: pointer to ixgbe_dcb_config structure
+ *
+ * Configure Tx Descriptor Arbiter and credits for each traffic class.
+ */
+s32 ixgbe_dcb_config_tx_desc_arbiter_82598(struct ixgbe_hw *hw,
+                                           struct ixgbe_dcb_config *dcb_config)
+{
+	struct tc_bw_alloc *p;
+	u32    reg, max_credits;
+	u8     i;
+
+	reg = IXGBE_READ_REG(hw, IXGBE_DPMCS);
+
+	/* Enable arbiter */
+	reg &= ~IXGBE_DPMCS_ARBDIS;
+	if (!(dcb_config->round_robin_enable)) {
+		/* Enable DFP and Recycle mode */
+		reg |= (IXGBE_DPMCS_TDPAC | IXGBE_DPMCS_TRM);
+	}
+	reg |= IXGBE_DPMCS_TSOEF;
+	/* Configure Max TSO packet size 34KB including payload and headers */
+	reg |= (0x4 << IXGBE_DPMCS_MTSOS_SHIFT);
+
+	IXGBE_WRITE_REG(hw, IXGBE_DPMCS, reg);
+
+	/* Configure traffic class credits and priority */
+	for (i = 0; i < MAX_TRAFFIC_CLASS; i++) {
+		p = &dcb_config->tc_config[i].path[DCB_TX_CONFIG];
+		max_credits = dcb_config->tc_config[i].desc_credits_max;
+		reg = max_credits << IXGBE_TDTQ2TCCR_MCL_SHIFT;
+		reg |= p->data_credits_refill;
+		reg |= (u32)(p->bwg_id) << IXGBE_TDTQ2TCCR_BWG_SHIFT;
+
+		if (p->prio_type == prio_group)
+			reg |= IXGBE_TDTQ2TCCR_GSP;
+
+		if (p->prio_type == prio_link)
+			reg |= IXGBE_TDTQ2TCCR_LSP;
+
+		IXGBE_WRITE_REG(hw, IXGBE_TDTQ2TCCR(i), reg);
+	}
+
+	return 0;
+}
+
+/**
+ * ixgbe_dcb_config_tx_data_arbiter_82598 - Config Tx data arbiter
+ * @hw: pointer to hardware structure
+ * @dcb_config: pointer to ixgbe_dcb_config structure
+ *
+ * Configure Tx Data Arbiter and credits for each traffic class.
+ */
+s32 ixgbe_dcb_config_tx_data_arbiter_82598(struct ixgbe_hw *hw,
+                                           struct ixgbe_dcb_config *dcb_config)
+{
+	struct tc_bw_alloc *p;
+	u32 reg;
+	u8 i;
+
+	reg = IXGBE_READ_REG(hw, IXGBE_PDPMCS);
+	/* Enable Data Plane Arbiter */
+	reg &= ~IXGBE_PDPMCS_ARBDIS;
+	/* Enable DFP and Transmit Recycle Mode */
+	reg |= (IXGBE_PDPMCS_TPPAC | IXGBE_PDPMCS_TRM);
+
+	IXGBE_WRITE_REG(hw, IXGBE_PDPMCS, reg);
+
+	/* Configure traffic class credits and priority */
+	for (i = 0; i < MAX_TRAFFIC_CLASS; i++) {
+		p = &dcb_config->tc_config[i].path[DCB_TX_CONFIG];
+		reg = p->data_credits_refill;
+		reg |= (u32)(p->data_credits_max) << IXGBE_TDPT2TCCR_MCL_SHIFT;
+		reg |= (u32)(p->bwg_id) << IXGBE_TDPT2TCCR_BWG_SHIFT;
+
+		if (p->prio_type == prio_group)
+			reg |= IXGBE_TDPT2TCCR_GSP;
+
+		if (p->prio_type == prio_link)
+			reg |= IXGBE_TDPT2TCCR_LSP;
+
+		IXGBE_WRITE_REG(hw, IXGBE_TDPT2TCCR(i), reg);
+	}
+
+	/* Enable Tx packet buffer division */
+	reg = IXGBE_READ_REG(hw, IXGBE_DTXCTL);
+	reg |= IXGBE_DTXCTL_ENDBUBD;
+	IXGBE_WRITE_REG(hw, IXGBE_DTXCTL, reg);
+
+	return 0;
+}
+
+/**
+ * ixgbe_dcb_config_pfc_82598 - Config priority flow control
+ * @hw: pointer to hardware structure
+ * @dcb_config: pointer to ixgbe_dcb_config structure
+ *
+ * Configure Priority Flow Control for each traffic class.
+ */
+s32 ixgbe_dcb_config_pfc_82598(struct ixgbe_hw *hw,
+                               struct ixgbe_dcb_config *dcb_config)
+{
+	u32 reg, rx_pba_size;
+	u8  i;
+
+	/* Enable Transmit Priority Flow Control */
+	reg = IXGBE_READ_REG(hw, IXGBE_RMCS);
+	reg &= ~IXGBE_RMCS_TFCE_802_3X;
+	/* correct the reporting of our flow control status */
+	hw->fc.type = ixgbe_fc_none;
+	reg |= IXGBE_RMCS_TFCE_PRIORITY;
+	IXGBE_WRITE_REG(hw, IXGBE_RMCS, reg);
+
+	/* Enable Receive Priority Flow Control */
+	reg = IXGBE_READ_REG(hw, IXGBE_FCTRL);
+	reg &= ~IXGBE_FCTRL_RFCE;
+	reg |= IXGBE_FCTRL_RPFCE;
+	IXGBE_WRITE_REG(hw, IXGBE_FCTRL, reg);
+
+	/*
+	 * Configure flow control thresholds and enable priority flow control
+	 * for each traffic class.
+	 */
+	for (i = 0; i < MAX_TRAFFIC_CLASS; i++) {
+		if (dcb_config->rx_pba_cfg == pba_equal) {
+			rx_pba_size = IXGBE_RXPBSIZE_64KB;
+		} else {
+			rx_pba_size = (i < 4) ? IXGBE_RXPBSIZE_80KB
+					      : IXGBE_RXPBSIZE_48KB;
+		}
+
+		reg = ((rx_pba_size >> 5) &  0xFFF0);
+		if (dcb_config->tc_config[i].dcb_pfc == pfc_enabled_tx ||
+		    dcb_config->tc_config[i].dcb_pfc == pfc_enabled_full)
+			reg |= IXGBE_FCRTL_XONE;
+
+		IXGBE_WRITE_REG(hw, IXGBE_FCRTL(i), reg);
+
+		reg = ((rx_pba_size >> 2) & 0xFFF0);
+		if (dcb_config->tc_config[i].dcb_pfc == pfc_enabled_tx ||
+		    dcb_config->tc_config[i].dcb_pfc == pfc_enabled_full)
+			reg |= IXGBE_FCRTH_FCEN;
+
+		IXGBE_WRITE_REG(hw, IXGBE_FCRTH(i), reg);
+	}
+
+	/* Configure pause time */
+	for (i = 0; i < (MAX_TRAFFIC_CLASS >> 1); i++)
+		IXGBE_WRITE_REG(hw, IXGBE_FCTTV(i), 0x68006800);
+
+	/* Configure flow control refresh threshold value */
+	IXGBE_WRITE_REG(hw, IXGBE_FCRTV, 0x3400);
+
+	return 0;
+}
+
+/**
+ * ixgbe_dcb_config_tc_stats_82598 - Configure traffic class statistics
+ * @hw: pointer to hardware structure
+ *
+ * Configure queue statistics registers, all queues belonging to same traffic
+ * class uses a single set of queue statistics counters.
+ */
+s32 ixgbe_dcb_config_tc_stats_82598(struct ixgbe_hw *hw)
+{
+	u32 reg = 0;
+	u8  i   = 0;
+	u8  j   = 0;
+
+	/* Receive Queues stats setting -  8 queues per statistics reg */
+	for (i = 0, j = 0; i < 15 && j < 8; i = i + 2, j++) {
+		reg = IXGBE_READ_REG(hw, IXGBE_RQSMR(i));
+		reg |= ((0x1010101) * j);
+		IXGBE_WRITE_REG(hw, IXGBE_RQSMR(i), reg);
+		reg = IXGBE_READ_REG(hw, IXGBE_RQSMR(i + 1));
+		reg |= ((0x1010101) * j);
+		IXGBE_WRITE_REG(hw, IXGBE_RQSMR(i + 1), reg);
+	}
+	/* Transmit Queues stats setting -  4 queues per statistics reg */
+	for (i = 0; i < 8; i++) {
+		reg = IXGBE_READ_REG(hw, IXGBE_TQSMR(i));
+		reg |= ((0x1010101) * i);
+		IXGBE_WRITE_REG(hw, IXGBE_TQSMR(i), reg);
+	}
+
+	return 0;
+}
+
+/**
+ * ixgbe_dcb_hw_config_82598 - Config and enable DCB
+ * @hw: pointer to hardware structure
+ * @dcb_config: pointer to ixgbe_dcb_config structure
+ *
+ * Configure dcb settings and enable dcb mode.
+ */
+s32 ixgbe_dcb_hw_config_82598(struct ixgbe_hw *hw,
+                              struct ixgbe_dcb_config *dcb_config)
+{
+	ixgbe_dcb_config_packet_buffers_82598(hw, dcb_config);
+	ixgbe_dcb_config_rx_arbiter_82598(hw, dcb_config);
+	ixgbe_dcb_config_tx_desc_arbiter_82598(hw, dcb_config);
+	ixgbe_dcb_config_tx_data_arbiter_82598(hw, dcb_config);
+	ixgbe_dcb_config_pfc_82598(hw, dcb_config);
+	ixgbe_dcb_config_tc_stats_82598(hw);
+
+	return 0;
+}
diff --git a/drivers/net/ixgbe/ixgbe_dcb_82598.h b/drivers/net/ixgbe/ixgbe_dcb_82598.h
new file mode 100644
index 000000000000..1e6a313719d7
--- /dev/null
+++ b/drivers/net/ixgbe/ixgbe_dcb_82598.h
@@ -0,0 +1,94 @@
+/*******************************************************************************
+
+  Intel 10 Gigabit PCI Express Linux driver
+  Copyright(c) 1999 - 2007 Intel Corporation.
+
+  This program is free software; you can redistribute it and/or modify it
+  under the terms and conditions of the GNU General Public License,
+  version 2, as published by the Free Software Foundation.
+
+  This program is distributed in the hope it will be useful, but WITHOUT
+  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+  more details.
+
+  You should have received a copy of the GNU General Public License along with
+  this program; if not, write to the Free Software Foundation, Inc.,
+  51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
+
+  The full GNU General Public License is included in this distribution in
+  the file called "COPYING".
+
+  Contact Information:
+  Linux NICS <linux.nics@intel.com>
+  e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
+  Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
+
+*******************************************************************************/
+
+#ifndef _DCB_82598_CONFIG_H_
+#define _DCB_82598_CONFIG_H_
+
+/* DCB register definitions */
+
+#define IXGBE_DPMCS_MTSOS_SHIFT 16
+#define IXGBE_DPMCS_TDPAC       0x00000001 /* 0 Round Robin, 1 DFP - Deficit Fixed Priority */
+#define IXGBE_DPMCS_TRM         0x00000010 /* Transmit Recycle Mode */
+#define IXGBE_DPMCS_ARBDIS      0x00000040 /* DCB arbiter disable */
+#define IXGBE_DPMCS_TSOEF       0x00080000 /* TSO Expand Factor: 0=x4, 1=x2 */
+
+#define IXGBE_RUPPBMR_MQA       0x80000000 /* Enable UP to queue mapping */
+
+#define IXGBE_RT2CR_MCL_SHIFT   12 /* Offset to Max Credit Limit setting */
+#define IXGBE_RT2CR_LSP         0x80000000 /* LSP enable bit */
+
+#define IXGBE_RDRXCTL_MPBEN     0x00000010 /* DMA config for multiple packet buffers enable */
+#define IXGBE_RDRXCTL_MCEN      0x00000040 /* DMA config for multiple cores (RSS) enable */
+
+#define IXGBE_TDTQ2TCCR_MCL_SHIFT   12
+#define IXGBE_TDTQ2TCCR_BWG_SHIFT   9
+#define IXGBE_TDTQ2TCCR_GSP     0x40000000
+#define IXGBE_TDTQ2TCCR_LSP     0x80000000
+
+#define IXGBE_TDPT2TCCR_MCL_SHIFT   12
+#define IXGBE_TDPT2TCCR_BWG_SHIFT   9
+#define IXGBE_TDPT2TCCR_GSP     0x40000000
+#define IXGBE_TDPT2TCCR_LSP     0x80000000
+
+#define IXGBE_PDPMCS_TPPAC      0x00000020 /* 0 Round Robin, 1 for DFP - Deficit Fixed Priority */
+#define IXGBE_PDPMCS_ARBDIS     0x00000040 /* Arbiter disable */
+#define IXGBE_PDPMCS_TRM        0x00000100 /* Transmit Recycle Mode enable */
+
+#define IXGBE_DTXCTL_ENDBUBD    0x00000004 /* Enable DBU buffer division */
+
+#define IXGBE_TXPBSIZE_40KB     0x0000A000 /* 40KB Packet Buffer */
+#define IXGBE_RXPBSIZE_48KB     0x0000C000 /* 48KB Packet Buffer */
+#define IXGBE_RXPBSIZE_64KB     0x00010000 /* 64KB Packet Buffer */
+#define IXGBE_RXPBSIZE_80KB     0x00014000 /* 80KB Packet Buffer */
+
+#define IXGBE_RDRXCTL_RDMTS_1_2 0x00000000
+
+/* DCB hardware-specific driver APIs */
+
+/* DCB PFC functions */
+s32 ixgbe_dcb_config_pfc_82598(struct ixgbe_hw *, struct ixgbe_dcb_config *);
+s32 ixgbe_dcb_get_pfc_stats_82598(struct ixgbe_hw *, struct ixgbe_hw_stats *,
+                                  u8);
+
+/* DCB traffic class stats */
+s32 ixgbe_dcb_config_tc_stats_82598(struct ixgbe_hw *);
+s32 ixgbe_dcb_get_tc_stats_82598(struct ixgbe_hw *, struct ixgbe_hw_stats *,
+                                 u8);
+
+/* DCB config arbiters */
+s32 ixgbe_dcb_config_tx_desc_arbiter_82598(struct ixgbe_hw *,
+                                           struct ixgbe_dcb_config *);
+s32 ixgbe_dcb_config_tx_data_arbiter_82598(struct ixgbe_hw *,
+                                           struct ixgbe_dcb_config *);
+s32 ixgbe_dcb_config_rx_arbiter_82598(struct ixgbe_hw *,
+                                      struct ixgbe_dcb_config *);
+
+/* DCB hw initialization */
+s32 ixgbe_dcb_hw_config_82598(struct ixgbe_hw *, struct ixgbe_dcb_config *);
+
+#endif /* _DCB_82598_CONFIG_H */
diff --git a/drivers/net/ixgbe/ixgbe_dcb_nl.c b/drivers/net/ixgbe/ixgbe_dcb_nl.c
new file mode 100644
index 000000000000..50bff2af6b04
--- /dev/null
+++ b/drivers/net/ixgbe/ixgbe_dcb_nl.c
@@ -0,0 +1,356 @@
+/*******************************************************************************
+
+  Intel 10 Gigabit PCI Express Linux driver
+  Copyright(c) 1999 - 2008 Intel Corporation.
+
+  This program is free software; you can redistribute it and/or modify it
+  under the terms and conditions of the GNU General Public License,
+  version 2, as published by the Free Software Foundation.
+
+  This program is distributed in the hope it will be useful, but WITHOUT
+  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+  more details.
+
+  You should have received a copy of the GNU General Public License along with
+  this program; if not, write to the Free Software Foundation, Inc.,
+  51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
+
+  The full GNU General Public License is included in this distribution in
+  the file called "COPYING".
+
+  Contact Information:
+  Linux NICS <linux.nics@intel.com>
+  e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
+  Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
+
+*******************************************************************************/
+
+#include "ixgbe.h"
+#include <linux/dcbnl.h>
+
+/* Callbacks for DCB netlink in the kernel */
+#define BIT_DCB_MODE	0x01
+#define BIT_PFC		0x02
+#define BIT_PG_RX	0x04
+#define BIT_PG_TX	0x08
+
+int ixgbe_copy_dcb_cfg(struct ixgbe_dcb_config *src_dcb_cfg,
+                       struct ixgbe_dcb_config *dst_dcb_cfg, int tc_max)
+{
+	struct tc_configuration *src_tc_cfg = NULL;
+	struct tc_configuration *dst_tc_cfg = NULL;
+	int i;
+
+	if (!src_dcb_cfg || !dst_dcb_cfg)
+		return -EINVAL;
+
+	for (i = DCB_PG_ATTR_TC_0; i < tc_max + DCB_PG_ATTR_TC_0; i++) {
+		src_tc_cfg = &src_dcb_cfg->tc_config[i - DCB_PG_ATTR_TC_0];
+		dst_tc_cfg = &dst_dcb_cfg->tc_config[i - DCB_PG_ATTR_TC_0];
+
+		dst_tc_cfg->path[DCB_TX_CONFIG].prio_type =
+				src_tc_cfg->path[DCB_TX_CONFIG].prio_type;
+
+		dst_tc_cfg->path[DCB_TX_CONFIG].bwg_id =
+				src_tc_cfg->path[DCB_TX_CONFIG].bwg_id;
+
+		dst_tc_cfg->path[DCB_TX_CONFIG].bwg_percent =
+				src_tc_cfg->path[DCB_TX_CONFIG].bwg_percent;
+
+		dst_tc_cfg->path[DCB_TX_CONFIG].up_to_tc_bitmap =
+				src_tc_cfg->path[DCB_TX_CONFIG].up_to_tc_bitmap;
+
+		dst_tc_cfg->path[DCB_RX_CONFIG].prio_type =
+				src_tc_cfg->path[DCB_RX_CONFIG].prio_type;
+
+		dst_tc_cfg->path[DCB_RX_CONFIG].bwg_id =
+				src_tc_cfg->path[DCB_RX_CONFIG].bwg_id;
+
+		dst_tc_cfg->path[DCB_RX_CONFIG].bwg_percent =
+				src_tc_cfg->path[DCB_RX_CONFIG].bwg_percent;
+
+		dst_tc_cfg->path[DCB_RX_CONFIG].up_to_tc_bitmap =
+				src_tc_cfg->path[DCB_RX_CONFIG].up_to_tc_bitmap;
+	}
+
+	for (i = DCB_PG_ATTR_BW_ID_0; i < DCB_PG_ATTR_BW_ID_MAX; i++) {
+		dst_dcb_cfg->bw_percentage[DCB_TX_CONFIG]
+			[i-DCB_PG_ATTR_BW_ID_0] = src_dcb_cfg->bw_percentage
+				[DCB_TX_CONFIG][i-DCB_PG_ATTR_BW_ID_0];
+		dst_dcb_cfg->bw_percentage[DCB_RX_CONFIG]
+			[i-DCB_PG_ATTR_BW_ID_0] = src_dcb_cfg->bw_percentage
+				[DCB_RX_CONFIG][i-DCB_PG_ATTR_BW_ID_0];
+	}
+
+	for (i = DCB_PFC_UP_ATTR_0; i < DCB_PFC_UP_ATTR_MAX; i++) {
+		dst_dcb_cfg->tc_config[i - DCB_PFC_UP_ATTR_0].dcb_pfc =
+			src_dcb_cfg->tc_config[i - DCB_PFC_UP_ATTR_0].dcb_pfc;
+	}
+
+	return 0;
+}
+
+static u8 ixgbe_dcbnl_get_state(struct net_device *netdev)
+{
+	struct ixgbe_adapter *adapter = netdev_priv(netdev);
+
+	DPRINTK(DRV, INFO, "Get DCB Admin Mode.\n");
+
+	return !!(adapter->flags & IXGBE_FLAG_DCB_ENABLED);
+}
+
+static u16 ixgbe_dcb_select_queue(struct net_device *dev, struct sk_buff *skb)
+{
+	/* All traffic should default to class 0 */
+	return 0;
+}
+
+static void ixgbe_dcbnl_set_state(struct net_device *netdev, u8 state)
+{
+	struct ixgbe_adapter *adapter = netdev_priv(netdev);
+
+	DPRINTK(DRV, INFO, "Set DCB Admin Mode.\n");
+
+	if (state > 0) {
+		/* Turn on DCB */
+		if (adapter->flags & IXGBE_FLAG_DCB_ENABLED) {
+			return;
+		} else {
+			if (netif_running(netdev))
+				netdev->stop(netdev);
+			ixgbe_reset_interrupt_capability(adapter);
+			ixgbe_napi_del_all(adapter);
+			kfree(adapter->tx_ring);
+			kfree(adapter->rx_ring);
+			adapter->tx_ring = NULL;
+			adapter->rx_ring = NULL;
+			netdev->select_queue = &ixgbe_dcb_select_queue;
+
+			adapter->flags &= ~IXGBE_FLAG_RSS_ENABLED;
+			adapter->flags |= IXGBE_FLAG_DCB_ENABLED;
+			ixgbe_init_interrupt_scheme(adapter);
+			ixgbe_napi_add_all(adapter);
+			if (netif_running(netdev))
+				netdev->open(netdev);
+		}
+	} else {
+		/* Turn off DCB */
+		if (adapter->flags & IXGBE_FLAG_DCB_ENABLED) {
+			if (netif_running(netdev))
+				netdev->stop(netdev);
+			ixgbe_reset_interrupt_capability(adapter);
+			ixgbe_napi_del_all(adapter);
+			kfree(adapter->tx_ring);
+			kfree(adapter->rx_ring);
+			adapter->tx_ring = NULL;
+			adapter->rx_ring = NULL;
+			netdev->select_queue = NULL;
+
+			adapter->flags &= ~IXGBE_FLAG_DCB_ENABLED;
+			adapter->flags |= IXGBE_FLAG_RSS_ENABLED;
+			ixgbe_init_interrupt_scheme(adapter);
+			ixgbe_napi_add_all(adapter);
+			if (netif_running(netdev))
+				netdev->open(netdev);
+		} else {
+			return;
+		}
+	}
+}
+
+static void ixgbe_dcbnl_get_perm_hw_addr(struct net_device *netdev,
+					 u8 *perm_addr)
+{
+	struct ixgbe_adapter *adapter = netdev_priv(netdev);
+	int i;
+
+	for (i = 0; i < netdev->addr_len; i++)
+		perm_addr[i] = adapter->hw.mac.perm_addr[i];
+}
+
+static void ixgbe_dcbnl_set_pg_tc_cfg_tx(struct net_device *netdev, int tc,
+                                         u8 prio, u8 bwg_id, u8 bw_pct,
+                                         u8 up_map)
+{
+	struct ixgbe_adapter *adapter = netdev_priv(netdev);
+
+	if (prio != DCB_ATTR_VALUE_UNDEFINED)
+		adapter->temp_dcb_cfg.tc_config[tc].path[0].prio_type = prio;
+	if (bwg_id != DCB_ATTR_VALUE_UNDEFINED)
+		adapter->temp_dcb_cfg.tc_config[tc].path[0].bwg_id = bwg_id;
+	if (bw_pct != DCB_ATTR_VALUE_UNDEFINED)
+		adapter->temp_dcb_cfg.tc_config[tc].path[0].bwg_percent =
+			bw_pct;
+	if (up_map != DCB_ATTR_VALUE_UNDEFINED)
+		adapter->temp_dcb_cfg.tc_config[tc].path[0].up_to_tc_bitmap =
+			up_map;
+
+	if ((adapter->temp_dcb_cfg.tc_config[tc].path[0].prio_type !=
+	     adapter->dcb_cfg.tc_config[tc].path[0].prio_type) ||
+	    (adapter->temp_dcb_cfg.tc_config[tc].path[0].bwg_id !=
+	     adapter->dcb_cfg.tc_config[tc].path[0].bwg_id) ||
+	    (adapter->temp_dcb_cfg.tc_config[tc].path[0].bwg_percent !=
+	     adapter->dcb_cfg.tc_config[tc].path[0].bwg_percent) ||
+	    (adapter->temp_dcb_cfg.tc_config[tc].path[0].up_to_tc_bitmap !=
+	     adapter->dcb_cfg.tc_config[tc].path[0].up_to_tc_bitmap))
+		adapter->dcb_set_bitmap |= BIT_PG_TX;
+}
+
+static void ixgbe_dcbnl_set_pg_bwg_cfg_tx(struct net_device *netdev, int bwg_id,
+                                          u8 bw_pct)
+{
+	struct ixgbe_adapter *adapter = netdev_priv(netdev);
+
+	adapter->temp_dcb_cfg.bw_percentage[0][bwg_id] = bw_pct;
+
+	if (adapter->temp_dcb_cfg.bw_percentage[0][bwg_id] !=
+	    adapter->dcb_cfg.bw_percentage[0][bwg_id])
+		adapter->dcb_set_bitmap |= BIT_PG_RX;
+}
+
+static void ixgbe_dcbnl_set_pg_tc_cfg_rx(struct net_device *netdev, int tc,
+                                         u8 prio, u8 bwg_id, u8 bw_pct,
+                                         u8 up_map)
+{
+	struct ixgbe_adapter *adapter = netdev_priv(netdev);
+
+	if (prio != DCB_ATTR_VALUE_UNDEFINED)
+		adapter->temp_dcb_cfg.tc_config[tc].path[1].prio_type = prio;
+	if (bwg_id != DCB_ATTR_VALUE_UNDEFINED)
+		adapter->temp_dcb_cfg.tc_config[tc].path[1].bwg_id = bwg_id;
+	if (bw_pct != DCB_ATTR_VALUE_UNDEFINED)
+		adapter->temp_dcb_cfg.tc_config[tc].path[1].bwg_percent =
+			bw_pct;
+	if (up_map != DCB_ATTR_VALUE_UNDEFINED)
+		adapter->temp_dcb_cfg.tc_config[tc].path[1].up_to_tc_bitmap =
+			up_map;
+
+	if ((adapter->temp_dcb_cfg.tc_config[tc].path[1].prio_type !=
+	     adapter->dcb_cfg.tc_config[tc].path[1].prio_type) ||
+	    (adapter->temp_dcb_cfg.tc_config[tc].path[1].bwg_id !=
+	     adapter->dcb_cfg.tc_config[tc].path[1].bwg_id) ||
+	    (adapter->temp_dcb_cfg.tc_config[tc].path[1].bwg_percent !=
+	     adapter->dcb_cfg.tc_config[tc].path[1].bwg_percent) ||
+	    (adapter->temp_dcb_cfg.tc_config[tc].path[1].up_to_tc_bitmap !=
+	     adapter->dcb_cfg.tc_config[tc].path[1].up_to_tc_bitmap))
+		adapter->dcb_set_bitmap |= BIT_PG_RX;
+}
+
+static void ixgbe_dcbnl_set_pg_bwg_cfg_rx(struct net_device *netdev, int bwg_id,
+                                          u8 bw_pct)
+{
+	struct ixgbe_adapter *adapter = netdev_priv(netdev);
+
+	adapter->temp_dcb_cfg.bw_percentage[1][bwg_id] = bw_pct;
+
+	if (adapter->temp_dcb_cfg.bw_percentage[1][bwg_id] !=
+	    adapter->dcb_cfg.bw_percentage[1][bwg_id])
+		adapter->dcb_set_bitmap |= BIT_PG_RX;
+}
+
+static void ixgbe_dcbnl_get_pg_tc_cfg_tx(struct net_device *netdev, int tc,
+                                         u8 *prio, u8 *bwg_id, u8 *bw_pct,
+                                         u8 *up_map)
+{
+	struct ixgbe_adapter *adapter = netdev_priv(netdev);
+
+	*prio = adapter->dcb_cfg.tc_config[tc].path[0].prio_type;
+	*bwg_id = adapter->dcb_cfg.tc_config[tc].path[0].bwg_id;
+	*bw_pct = adapter->dcb_cfg.tc_config[tc].path[0].bwg_percent;
+	*up_map = adapter->dcb_cfg.tc_config[tc].path[0].up_to_tc_bitmap;
+}
+
+static void ixgbe_dcbnl_get_pg_bwg_cfg_tx(struct net_device *netdev, int bwg_id,
+                                          u8 *bw_pct)
+{
+	struct ixgbe_adapter *adapter = netdev_priv(netdev);
+
+	*bw_pct = adapter->dcb_cfg.bw_percentage[0][bwg_id];
+}
+
+static void ixgbe_dcbnl_get_pg_tc_cfg_rx(struct net_device *netdev, int tc,
+                                         u8 *prio, u8 *bwg_id, u8 *bw_pct,
+                                         u8 *up_map)
+{
+	struct ixgbe_adapter *adapter = netdev_priv(netdev);
+
+	*prio = adapter->dcb_cfg.tc_config[tc].path[1].prio_type;
+	*bwg_id = adapter->dcb_cfg.tc_config[tc].path[1].bwg_id;
+	*bw_pct = adapter->dcb_cfg.tc_config[tc].path[1].bwg_percent;
+	*up_map = adapter->dcb_cfg.tc_config[tc].path[1].up_to_tc_bitmap;
+}
+
+static void ixgbe_dcbnl_get_pg_bwg_cfg_rx(struct net_device *netdev, int bwg_id,
+                                          u8 *bw_pct)
+{
+	struct ixgbe_adapter *adapter = netdev_priv(netdev);
+
+	*bw_pct = adapter->dcb_cfg.bw_percentage[1][bwg_id];
+}
+
+static void ixgbe_dcbnl_set_pfc_cfg(struct net_device *netdev, int priority,
+                                    u8 setting)
+{
+	struct ixgbe_adapter *adapter = netdev_priv(netdev);
+
+	adapter->temp_dcb_cfg.tc_config[priority].dcb_pfc = setting;
+	if (adapter->temp_dcb_cfg.tc_config[priority].dcb_pfc !=
+	    adapter->dcb_cfg.tc_config[priority].dcb_pfc)
+		adapter->dcb_set_bitmap |= BIT_PFC;
+}
+
+static void ixgbe_dcbnl_get_pfc_cfg(struct net_device *netdev, int priority,
+                                    u8 *setting)
+{
+	struct ixgbe_adapter *adapter = netdev_priv(netdev);
+
+	*setting = adapter->dcb_cfg.tc_config[priority].dcb_pfc;
+}
+
+static u8 ixgbe_dcbnl_set_all(struct net_device *netdev)
+{
+	struct ixgbe_adapter *adapter = netdev_priv(netdev);
+	int ret;
+
+	if (!adapter->dcb_set_bitmap)
+		return 1;
+
+	while (test_and_set_bit(__IXGBE_RESETTING, &adapter->state))
+		msleep(1);
+
+	if (netif_running(netdev))
+		ixgbe_down(adapter);
+
+	ret = ixgbe_copy_dcb_cfg(&adapter->temp_dcb_cfg, &adapter->dcb_cfg,
+				 adapter->ring_feature[RING_F_DCB].indices);
+	if (ret) {
+		clear_bit(__IXGBE_RESETTING, &adapter->state);
+		return ret;
+	}
+
+	if (netif_running(netdev))
+		ixgbe_up(adapter);
+
+	adapter->dcb_set_bitmap = 0x00;
+	clear_bit(__IXGBE_RESETTING, &adapter->state);
+	return ret;
+}
+
+struct dcbnl_rtnl_ops dcbnl_ops = {
+	.getstate	= ixgbe_dcbnl_get_state,
+	.setstate	= ixgbe_dcbnl_set_state,
+	.getpermhwaddr	= ixgbe_dcbnl_get_perm_hw_addr,
+	.setpgtccfgtx	= ixgbe_dcbnl_set_pg_tc_cfg_tx,
+	.setpgbwgcfgtx	= ixgbe_dcbnl_set_pg_bwg_cfg_tx,
+	.setpgtccfgrx	= ixgbe_dcbnl_set_pg_tc_cfg_rx,
+	.setpgbwgcfgrx	= ixgbe_dcbnl_set_pg_bwg_cfg_rx,
+	.getpgtccfgtx	= ixgbe_dcbnl_get_pg_tc_cfg_tx,
+	.getpgbwgcfgtx	= ixgbe_dcbnl_get_pg_bwg_cfg_tx,
+	.getpgtccfgrx	= ixgbe_dcbnl_get_pg_tc_cfg_rx,
+	.getpgbwgcfgrx	= ixgbe_dcbnl_get_pg_bwg_cfg_rx,
+	.setpfccfg	= ixgbe_dcbnl_set_pfc_cfg,
+	.getpfccfg	= ixgbe_dcbnl_get_pfc_cfg,
+	.setall		= ixgbe_dcbnl_set_all
+};
+
diff --git a/drivers/net/ixgbe/ixgbe_ethtool.c b/drivers/net/ixgbe/ixgbe_ethtool.c
index a610016a0172..aaa4404e7c5f 100644
--- a/drivers/net/ixgbe/ixgbe_ethtool.c
+++ b/drivers/net/ixgbe/ixgbe_ethtool.c
@@ -97,9 +97,18 @@ static struct ixgbe_stats ixgbe_gstrings_stats[] = {
 	((((struct ixgbe_adapter *)netdev_priv(netdev))->num_tx_queues + \
 	((struct ixgbe_adapter *)netdev_priv(netdev))->num_rx_queues) * \
 	(sizeof(struct ixgbe_queue_stats) / sizeof(u64)))
-#define IXGBE_STATS_LEN (IXGBE_GLOBAL_STATS_LEN + IXGBE_QUEUE_STATS_LEN)
 #define IXGBE_GLOBAL_STATS_LEN ARRAY_SIZE(ixgbe_gstrings_stats)
-#define IXGBE_STATS_LEN (IXGBE_GLOBAL_STATS_LEN + IXGBE_QUEUE_STATS_LEN)
+#define IXGBE_PB_STATS_LEN ( \
+                 (((struct ixgbe_adapter *)netdev->priv)->flags & \
+                 IXGBE_FLAG_DCB_ENABLED) ? \
+                 (sizeof(((struct ixgbe_adapter *)0)->stats.pxonrxc) + \
+                  sizeof(((struct ixgbe_adapter *)0)->stats.pxontxc) + \
+                  sizeof(((struct ixgbe_adapter *)0)->stats.pxoffrxc) + \
+                  sizeof(((struct ixgbe_adapter *)0)->stats.pxofftxc)) \
+                  / sizeof(u64) : 0)
+#define IXGBE_STATS_LEN (IXGBE_GLOBAL_STATS_LEN + \
+                         IXGBE_PB_STATS_LEN + \
+                         IXGBE_QUEUE_STATS_LEN)
 
 static int ixgbe_get_settings(struct net_device *netdev,
                               struct ethtool_cmd *ecmd)
@@ -831,6 +840,16 @@ static void ixgbe_get_ethtool_stats(struct net_device *netdev,
 			data[i + k] = queue_stat[k];
 		i += k;
 	}
+	if (adapter->flags & IXGBE_FLAG_DCB_ENABLED) {
+		for (j = 0; j < MAX_TX_PACKET_BUFFERS; j++) {
+			data[i++] = adapter->stats.pxontxc[j];
+			data[i++] = adapter->stats.pxofftxc[j];
+		}
+		for (j = 0; j < MAX_RX_PACKET_BUFFERS; j++) {
+			data[i++] = adapter->stats.pxonrxc[j];
+			data[i++] = adapter->stats.pxoffrxc[j];
+		}
+	}
 }
 
 static void ixgbe_get_strings(struct net_device *netdev, u32 stringset,
@@ -859,6 +878,13 @@ static void ixgbe_get_strings(struct net_device *netdev, u32 stringset,
 			sprintf(p, "rx_queue_%u_bytes", i);
 			p += ETH_GSTRING_LEN;
 		}
+		if (adapter->flags & IXGBE_FLAG_DCB_ENABLED) {
+			for (i = 0; i < MAX_TX_PACKET_BUFFERS; i++) {
+				sprintf(p, "tx_pb_%u_pxon", i);
+			}
+			for (i = 0; i < MAX_RX_PACKET_BUFFERS; i++) {
+			}
+		}
 		/* BUG_ON(p - data != IXGBE_STATS_LEN * ETH_GSTRING_LEN); */
 		break;
 	}
diff --git a/drivers/net/ixgbe/ixgbe_main.c b/drivers/net/ixgbe/ixgbe_main.c
index 40108523377f..91dde9cdab66 100644
--- a/drivers/net/ixgbe/ixgbe_main.c
+++ b/drivers/net/ixgbe/ixgbe_main.c
@@ -404,7 +404,7 @@ static void ixgbe_receive_skb(struct ixgbe_adapter *adapter,
 
 	if (adapter->netdev->features & NETIF_F_LRO &&
 	    skb->ip_summed == CHECKSUM_UNNECESSARY) {
-		if (adapter->vlgrp && is_vlan)
+		if (adapter->vlgrp && is_vlan && (tag != 0))
 			lro_vlan_hwaccel_receive_skb(&ring->lro_mgr, skb,
 			                             adapter->vlgrp, tag,
 			                             rx_desc);
@@ -413,12 +413,12 @@ static void ixgbe_receive_skb(struct ixgbe_adapter *adapter,
 		ring->lro_used = true;
 	} else {
 		if (!(adapter->flags & IXGBE_FLAG_IN_NETPOLL)) {
-			if (adapter->vlgrp && is_vlan)
+			if (adapter->vlgrp && is_vlan && (tag != 0))
 				vlan_hwaccel_receive_skb(skb, adapter->vlgrp, tag);
 			else
 				netif_receive_skb(skb);
 		} else {
-			if (adapter->vlgrp && is_vlan)
+			if (adapter->vlgrp && is_vlan && (tag != 0))
 				vlan_hwaccel_rx(skb, adapter->vlgrp, tag);
 			else
 				netif_rx(skb);
@@ -1670,10 +1670,12 @@ static void ixgbe_configure_rx(struct ixgbe_adapter *adapter)
 	 * effects of setting this bit are only that SRRCTL must be
 	 * fully programmed [0..15]
 	 */
-	rdrxctl = IXGBE_READ_REG(hw, IXGBE_RDRXCTL);
-	rdrxctl |= IXGBE_RDRXCTL_MVMEN;
-	IXGBE_WRITE_REG(hw, IXGBE_RDRXCTL, rdrxctl);
-
+	if (adapter->flags &
+	    (IXGBE_FLAG_RSS_ENABLED | IXGBE_FLAG_VMDQ_ENABLED)) {
+		rdrxctl = IXGBE_READ_REG(hw, IXGBE_RDRXCTL);
+		rdrxctl |= IXGBE_RDRXCTL_MVMEN;
+		IXGBE_WRITE_REG(hw, IXGBE_RDRXCTL, rdrxctl);
+	}
 
 	if (adapter->flags & IXGBE_FLAG_RSS_ENABLED) {
 		/* Fill out redirection table */
@@ -1732,6 +1734,16 @@ static void ixgbe_vlan_rx_register(struct net_device *netdev,
 		ixgbe_irq_disable(adapter);
 	adapter->vlgrp = grp;
 
+	/*
+	 * For a DCB driver, always enable VLAN tag stripping so we can
+	 * still receive traffic from a DCB-enabled host even if we're
+	 * not in DCB mode.
+	 */
+	ctrl = IXGBE_READ_REG(&adapter->hw, IXGBE_VLNCTRL);
+	ctrl |= IXGBE_VLNCTRL_VME;
+	ctrl &= ~IXGBE_VLNCTRL_CFIEN;
+	IXGBE_WRITE_REG(&adapter->hw, IXGBE_VLNCTRL, ctrl);
+
 	if (grp) {
 		/* enable VLAN tag insert/strip */
 		ctrl = IXGBE_READ_REG(&adapter->hw, IXGBE_VLNCTRL);
@@ -1896,6 +1908,44 @@ static void ixgbe_napi_disable_all(struct ixgbe_adapter *adapter)
 	}
 }
 
+#ifdef CONFIG_IXGBE_DCBNL
+/*
+ * ixgbe_configure_dcb - Configure DCB hardware
+ * @adapter: ixgbe adapter struct
+ *
+ * This is called by the driver on open to configure the DCB hardware.
+ * This is also called by the gennetlink interface when reconfiguring
+ * the DCB state.
+ */
+static void ixgbe_configure_dcb(struct ixgbe_adapter *adapter)
+{
+	struct ixgbe_hw *hw = &adapter->hw;
+	u32 txdctl, vlnctrl;
+	int i, j;
+
+	ixgbe_dcb_check_config(&adapter->dcb_cfg);
+	ixgbe_dcb_calculate_tc_credits(&adapter->dcb_cfg, DCB_TX_CONFIG);
+	ixgbe_dcb_calculate_tc_credits(&adapter->dcb_cfg, DCB_RX_CONFIG);
+
+	/* reconfigure the hardware */
+	ixgbe_dcb_hw_config(&adapter->hw, &adapter->dcb_cfg);
+
+	for (i = 0; i < adapter->num_tx_queues; i++) {
+		j = adapter->tx_ring[i].reg_idx;
+		txdctl = IXGBE_READ_REG(hw, IXGBE_TXDCTL(j));
+		/* PThresh workaround for Tx hang with DFP enabled. */
+		txdctl |= 32;
+		IXGBE_WRITE_REG(hw, IXGBE_TXDCTL(j), txdctl);
+	}
+	/* Enable VLAN tag insert/strip */
+	vlnctrl = IXGBE_READ_REG(hw, IXGBE_VLNCTRL);
+	vlnctrl |= IXGBE_VLNCTRL_VME | IXGBE_VLNCTRL_VFE;
+	vlnctrl &= ~IXGBE_VLNCTRL_CFIEN;
+	IXGBE_WRITE_REG(hw, IXGBE_VLNCTRL, vlnctrl);
+	hw->mac.ops.set_vfta(&adapter->hw, 0, 0, true);
+}
+
+#endif
 static void ixgbe_configure(struct ixgbe_adapter *adapter)
 {
 	struct net_device *netdev = adapter->netdev;
@@ -1904,6 +1954,16 @@ static void ixgbe_configure(struct ixgbe_adapter *adapter)
 	ixgbe_set_rx_mode(netdev);
 
 	ixgbe_restore_vlan(adapter);
+#ifdef CONFIG_IXGBE_DCBNL
+	if (adapter->flags & IXGBE_FLAG_DCB_ENABLED) {
+		netif_set_gso_max_size(netdev, 32768);
+		ixgbe_configure_dcb(adapter);
+	} else {
+		netif_set_gso_max_size(netdev, 65536);
+	}
+#else
+	netif_set_gso_max_size(netdev, 65536);
+#endif
 
 	ixgbe_configure_tx(adapter);
 	ixgbe_configure_rx(adapter);
@@ -1995,9 +2055,6 @@ static int ixgbe_up_complete(struct ixgbe_adapter *adapter)
 
 	ixgbe_irq_enable(adapter);
 
-	/* enable transmits */
-	netif_tx_start_all_queues(netdev);
-
 	/* bring the link up in the watchdog, this could race with our first
 	 * link up interrupt but shouldn't be a problem */
 	adapter->flags |= IXGBE_FLAG_NEED_LINK_UPDATE;
@@ -2260,6 +2317,11 @@ static void ixgbe_reset_task(struct work_struct *work)
 	struct ixgbe_adapter *adapter;
 	adapter = container_of(work, struct ixgbe_adapter, reset_task);
 
+	/* If we're already down or resetting, just bail */
+	if (test_bit(__IXGBE_DOWN, &adapter->state) ||
+	    test_bit(__IXGBE_RESETTING, &adapter->state))
+		return;
+
 	adapter->tx_timeout_count++;
 
 	ixgbe_reinit_locked(adapter);
@@ -2269,15 +2331,31 @@ static void ixgbe_set_num_queues(struct ixgbe_adapter *adapter)
 {
 	int nrq = 1, ntq = 1;
 	int feature_mask = 0, rss_i, rss_m;
+	int dcb_i, dcb_m;
 
 	/* Number of supported queues */
 	switch (adapter->hw.mac.type) {
 	case ixgbe_mac_82598EB:
+		dcb_i = adapter->ring_feature[RING_F_DCB].indices;
+		dcb_m = 0;
 		rss_i = adapter->ring_feature[RING_F_RSS].indices;
 		rss_m = 0;
 		feature_mask |= IXGBE_FLAG_RSS_ENABLED;
+		feature_mask |= IXGBE_FLAG_DCB_ENABLED;
 
 		switch (adapter->flags & feature_mask) {
+		case (IXGBE_FLAG_RSS_ENABLED | IXGBE_FLAG_DCB_ENABLED):
+			dcb_m = 0x7 << 3;
+			rss_i = min(8, rss_i);
+			rss_m = 0x7;
+			nrq = dcb_i * rss_i;
+			ntq = min(MAX_TX_QUEUES, dcb_i * rss_i);
+			break;
+		case (IXGBE_FLAG_DCB_ENABLED):
+			dcb_m = 0x7 << 3;
+			nrq = dcb_i;
+			ntq = dcb_i;
+			break;
 		case (IXGBE_FLAG_RSS_ENABLED):
 			rss_m = 0xF;
 			nrq = rss_i;
@@ -2285,6 +2363,8 @@ static void ixgbe_set_num_queues(struct ixgbe_adapter *adapter)
 			break;
 		case 0:
 		default:
+			dcb_i = 0;
+			dcb_m = 0;
 			rss_i = 0;
 			rss_m = 0;
 			nrq = 1;
@@ -2292,6 +2372,12 @@ static void ixgbe_set_num_queues(struct ixgbe_adapter *adapter)
 			break;
 		}
 
+		/* Sanity check, we should never have zero queues */
+		nrq = (nrq ?:1);
+		ntq = (ntq ?:1);
+
+		adapter->ring_feature[RING_F_DCB].indices = dcb_i;
+		adapter->ring_feature[RING_F_DCB].mask = dcb_m;
 		adapter->ring_feature[RING_F_RSS].indices = rss_i;
 		adapter->ring_feature[RING_F_RSS].mask = rss_m;
 		break;
@@ -2343,6 +2429,7 @@ static void ixgbe_acquire_msix_vectors(struct ixgbe_adapter *adapter,
 		adapter->flags &= ~IXGBE_FLAG_MSIX_ENABLED;
 		kfree(adapter->msix_entries);
 		adapter->msix_entries = NULL;
+		adapter->flags &= ~IXGBE_FLAG_DCB_ENABLED;
 		adapter->flags &= ~IXGBE_FLAG_RSS_ENABLED;
 		ixgbe_set_num_queues(adapter);
 	} else {
@@ -2362,15 +2449,42 @@ static void __devinit ixgbe_cache_ring_register(struct ixgbe_adapter *adapter)
 {
 	int feature_mask = 0, rss_i;
 	int i, txr_idx, rxr_idx;
+	int dcb_i;
 
 	/* Number of supported queues */
 	switch (adapter->hw.mac.type) {
 	case ixgbe_mac_82598EB:
+		dcb_i = adapter->ring_feature[RING_F_DCB].indices;
 		rss_i = adapter->ring_feature[RING_F_RSS].indices;
 		txr_idx = 0;
 		rxr_idx = 0;
+		feature_mask |= IXGBE_FLAG_DCB_ENABLED;
 		feature_mask |= IXGBE_FLAG_RSS_ENABLED;
 		switch (adapter->flags & feature_mask) {
+		case (IXGBE_FLAG_RSS_ENABLED | IXGBE_FLAG_DCB_ENABLED):
+			for (i = 0; i < dcb_i; i++) {
+				int j;
+				/* Rx first */
+				for (j = 0; j < adapter->num_rx_queues; j++) {
+					adapter->rx_ring[rxr_idx].reg_idx =
+						i << 3 | j;
+					rxr_idx++;
+				}
+				/* Tx now */
+				for (j = 0; j < adapter->num_tx_queues; j++) {
+					adapter->tx_ring[txr_idx].reg_idx =
+						i << 2 | (j >> 1);
+					if (j & 1)
+						txr_idx++;
+				}
+			}
+		case (IXGBE_FLAG_DCB_ENABLED):
+			/* the number of queues is assumed to be symmetric */
+			for (i = 0; i < dcb_i; i++) {
+				adapter->rx_ring[i].reg_idx = i << 3;
+				adapter->tx_ring[i].reg_idx = i << 2;
+			}
+			break;
 		case (IXGBE_FLAG_RSS_ENABLED):
 			for (i = 0; i < adapter->num_rx_queues; i++)
 				adapter->rx_ring[i].reg_idx = i;
@@ -2395,7 +2509,7 @@ static void __devinit ixgbe_cache_ring_register(struct ixgbe_adapter *adapter)
  * number of queues at compile-time.  The polling_netdev array is
  * intended for Multiqueue, but should work fine with a single queue.
  **/
-static int __devinit ixgbe_alloc_queues(struct ixgbe_adapter *adapter)
+static int ixgbe_alloc_queues(struct ixgbe_adapter *adapter)
 {
 	int i;
 
@@ -2465,6 +2579,7 @@ static int __devinit ixgbe_set_interrupt_capability(struct ixgbe_adapter
 	adapter->msix_entries = kcalloc(v_budget,
 	                                sizeof(struct msix_entry), GFP_KERNEL);
 	if (!adapter->msix_entries) {
+		adapter->flags &= ~IXGBE_FLAG_DCB_ENABLED;
 		adapter->flags &= ~IXGBE_FLAG_RSS_ENABLED;
 		ixgbe_set_num_queues(adapter);
 		kfree(adapter->tx_ring);
@@ -2505,7 +2620,7 @@ out:
 	return err;
 }
 
-static void ixgbe_reset_interrupt_capability(struct ixgbe_adapter *adapter)
+void ixgbe_reset_interrupt_capability(struct ixgbe_adapter *adapter)
 {
 	if (adapter->flags & IXGBE_FLAG_MSIX_ENABLED) {
 		adapter->flags &= ~IXGBE_FLAG_MSIX_ENABLED;
@@ -2529,7 +2644,7 @@ static void ixgbe_reset_interrupt_capability(struct ixgbe_adapter *adapter)
  * - Hardware queue count (num_*_queues)
  *   - defined by miscellaneous hardware support/features (RSS, etc.)
  **/
-static int __devinit ixgbe_init_interrupt_scheme(struct ixgbe_adapter *adapter)
+int ixgbe_init_interrupt_scheme(struct ixgbe_adapter *adapter)
 {
 	int err;
 
@@ -2577,6 +2692,10 @@ static int __devinit ixgbe_sw_init(struct ixgbe_adapter *adapter)
 	struct ixgbe_hw *hw = &adapter->hw;
 	struct pci_dev *pdev = adapter->pdev;
 	unsigned int rss;
+#ifdef CONFIG_IXGBE_DCBNL
+	int j;
+	struct tc_configuration *tc;
+#endif
 
 	/* PCI config space info */
 
@@ -2590,6 +2709,27 @@ static int __devinit ixgbe_sw_init(struct ixgbe_adapter *adapter)
 	rss = min(IXGBE_MAX_RSS_INDICES, (int)num_online_cpus());
 	adapter->ring_feature[RING_F_RSS].indices = rss;
 	adapter->flags |= IXGBE_FLAG_RSS_ENABLED;
+	adapter->ring_feature[RING_F_DCB].indices = IXGBE_MAX_DCB_INDICES;
+
+#ifdef CONFIG_IXGBE_DCBNL
+	/* Configure DCB traffic classes */
+	for (j = 0; j < MAX_TRAFFIC_CLASS; j++) {
+		tc = &adapter->dcb_cfg.tc_config[j];
+		tc->path[DCB_TX_CONFIG].bwg_id = 0;
+		tc->path[DCB_TX_CONFIG].bwg_percent = 12 + (j & 1);
+		tc->path[DCB_RX_CONFIG].bwg_id = 0;
+		tc->path[DCB_RX_CONFIG].bwg_percent = 12 + (j & 1);
+		tc->dcb_pfc = pfc_disabled;
+	}
+	adapter->dcb_cfg.bw_percentage[DCB_TX_CONFIG][0] = 100;
+	adapter->dcb_cfg.bw_percentage[DCB_RX_CONFIG][0] = 100;
+	adapter->dcb_cfg.rx_pba_cfg = pba_equal;
+	adapter->dcb_cfg.round_robin_enable = false;
+	adapter->dcb_set_bitmap = 0x00;
+	ixgbe_copy_dcb_cfg(&adapter->dcb_cfg, &adapter->temp_dcb_cfg,
+	                   adapter->ring_feature[RING_F_DCB].indices);
+
+#endif
 	if (hw->mac.ops.get_media_type &&
 	    (hw->mac.ops.get_media_type(hw) == ixgbe_media_type_copper))
 		adapter->flags |= IXGBE_FLAG_FAN_FAIL_CAPABLE;
@@ -2967,7 +3107,7 @@ static int ixgbe_close(struct net_device *netdev)
  * @adapter: private struct
  * helper function to napi_add each possible q_vector->napi
  */
-static void ixgbe_napi_add_all(struct ixgbe_adapter *adapter)
+void ixgbe_napi_add_all(struct ixgbe_adapter *adapter)
 {
 	int q_idx, q_vectors;
 	int (*poll)(struct napi_struct *, int);
@@ -2988,7 +3128,7 @@ static void ixgbe_napi_add_all(struct ixgbe_adapter *adapter)
 	}
 }
 
-static void ixgbe_napi_del_all(struct ixgbe_adapter *adapter)
+void ixgbe_napi_del_all(struct ixgbe_adapter *adapter)
 {
 	int q_idx;
 	int q_vectors = adapter->num_msix_vectors - NON_Q_VECTORS;
@@ -3109,6 +3249,18 @@ void ixgbe_update_stats(struct ixgbe_adapter *adapter)
 		adapter->stats.mpc[i] += mpc;
 		total_mpc += adapter->stats.mpc[i];
 		adapter->stats.rnbc[i] += IXGBE_READ_REG(hw, IXGBE_RNBC(i));
+		adapter->stats.qptc[i] += IXGBE_READ_REG(hw, IXGBE_QPTC(i));
+		adapter->stats.qbtc[i] += IXGBE_READ_REG(hw, IXGBE_QBTC(i));
+		adapter->stats.qprc[i] += IXGBE_READ_REG(hw, IXGBE_QPRC(i));
+		adapter->stats.qbrc[i] += IXGBE_READ_REG(hw, IXGBE_QBRC(i));
+		adapter->stats.pxonrxc[i] += IXGBE_READ_REG(hw,
+		                                            IXGBE_PXONRXC(i));
+		adapter->stats.pxontxc[i] += IXGBE_READ_REG(hw,
+		                                            IXGBE_PXONTXC(i));
+		adapter->stats.pxoffrxc[i] += IXGBE_READ_REG(hw,
+		                                            IXGBE_PXOFFRXC(i));
+		adapter->stats.pxofftxc[i] += IXGBE_READ_REG(hw,
+		                                            IXGBE_PXOFFTXC(i));
 	}
 	adapter->stats.gprc += IXGBE_READ_REG(hw, IXGBE_GPRC);
 	/* work around hardware counting issue */
@@ -3248,6 +3400,7 @@ static void ixgbe_watchdog_task(struct work_struct *work)
 			         (FLOW_TX ? "TX" : "None"))));
 
 			netif_carrier_on(netdev);
+			netif_tx_wake_all_queues(netdev);
 		} else {
 			/* Force detection of hung controller */
 			adapter->detect_tx_hung = true;
@@ -3258,6 +3411,7 @@ static void ixgbe_watchdog_task(struct work_struct *work)
 		if (netif_carrier_ok(netdev)) {
 			DPRINTK(LINK, INFO, "NIC Link is Down\n");
 			netif_carrier_off(netdev);
+			netif_tx_stop_all_queues(netdev);
 		}
 	}
 
@@ -3604,6 +3758,14 @@ static int ixgbe_xmit_frame(struct sk_buff *skb, struct net_device *netdev)
 
 	if (adapter->vlgrp && vlan_tx_tag_present(skb)) {
 		tx_flags |= vlan_tx_tag_get(skb);
+		if (adapter->flags & IXGBE_FLAG_DCB_ENABLED) {
+			tx_flags &= ~IXGBE_TX_FLAGS_VLAN_PRIO_MASK;
+			tx_flags |= (skb->queue_mapping << 13);
+		}
+		tx_flags <<= IXGBE_TX_FLAGS_VLAN_SHIFT;
+		tx_flags |= IXGBE_TX_FLAGS_VLAN;
+	} else if (adapter->flags & IXGBE_FLAG_DCB_ENABLED) {
+		tx_flags |= (skb->queue_mapping << 13);
 		tx_flags <<= IXGBE_TX_FLAGS_VLAN_SHIFT;
 		tx_flags |= IXGBE_TX_FLAGS_VLAN;
 	}
@@ -3878,6 +4040,13 @@ static int __devinit ixgbe_probe(struct pci_dev *pdev,
 	netdev->vlan_features |= NETIF_F_IP_CSUM;
 	netdev->vlan_features |= NETIF_F_SG;
 
+	if (adapter->flags & IXGBE_FLAG_DCB_ENABLED)
+		adapter->flags &= ~IXGBE_FLAG_RSS_ENABLED;
+
+#ifdef CONFIG_IXGBE_DCBNL
+	netdev->dcbnl_ops = &dcbnl_ops;
+#endif
+
 	if (pci_using_dac)
 		netdev->features |= NETIF_F_HIGHDMA;
 
@@ -3946,6 +4115,7 @@ static int __devinit ixgbe_probe(struct pci_dev *pdev,
 	}
 
 	netif_carrier_off(netdev);
+	netif_tx_stop_all_queues(netdev);
 
 	ixgbe_napi_add_all(adapter);
 
diff --git a/include/linux/dcbnl.h b/include/linux/dcbnl.h
new file mode 100644
index 000000000000..32d32c1ee410
--- /dev/null
+++ b/include/linux/dcbnl.h
@@ -0,0 +1,230 @@
+/*
+ * Copyright (c) 2008, Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+ * Place - Suite 330, Boston, MA 02111-1307 USA.
+ *
+ * Author: Lucy Liu <lucy.liu@intel.com>
+ */
+
+#ifndef __LINUX_DCBNL_H__
+#define __LINUX_DCBNL_H__
+
+#define DCB_PROTO_VERSION 1
+
+struct dcbmsg {
+	unsigned char      dcb_family;
+	__u8               cmd;
+	__u16              dcb_pad;
+};
+
+/**
+ * enum dcbnl_commands - supported DCB commands
+ *
+ * @DCB_CMD_UNDEFINED: unspecified command to catch errors
+ * @DCB_CMD_GSTATE: request the state of DCB in the device
+ * @DCB_CMD_SSTATE: set the state of DCB in the device
+ * @DCB_CMD_PGTX_GCFG: request the priority group configuration for Tx
+ * @DCB_CMD_PGTX_SCFG: set the priority group configuration for Tx
+ * @DCB_CMD_PGRX_GCFG: request the priority group configuration for Rx
+ * @DCB_CMD_PGRX_SCFG: set the priority group configuration for Rx
+ * @DCB_CMD_PFC_GCFG: request the priority flow control configuration
+ * @DCB_CMD_PFC_SCFG: set the priority flow control configuration
+ * @DCB_CMD_SET_ALL: apply all changes to the underlying device
+ * @DCB_CMD_GPERM_HWADDR: get the permanent MAC address of the underlying
+ *                        device.  Only useful when using bonding.
+ */
+enum dcbnl_commands {
+	DCB_CMD_UNDEFINED,
+
+	DCB_CMD_GSTATE,
+	DCB_CMD_SSTATE,
+
+	DCB_CMD_PGTX_GCFG,
+	DCB_CMD_PGTX_SCFG,
+	DCB_CMD_PGRX_GCFG,
+	DCB_CMD_PGRX_SCFG,
+
+	DCB_CMD_PFC_GCFG,
+	DCB_CMD_PFC_SCFG,
+
+	DCB_CMD_SET_ALL,
+	DCB_CMD_GPERM_HWADDR,
+
+	__DCB_CMD_ENUM_MAX,
+	DCB_CMD_MAX = __DCB_CMD_ENUM_MAX - 1,
+};
+
+
+/**
+ * enum dcbnl_attrs - DCB top-level netlink attributes
+ *
+ * @DCB_ATTR_UNDEFINED: unspecified attribute to catch errors
+ * @DCB_ATTR_IFNAME: interface name of the underlying device (NLA_STRING)
+ * @DCB_ATTR_STATE: enable state of DCB in the device (NLA_U8)
+ * @DCB_ATTR_PFC_STATE: enable state of PFC in the device (NLA_U8)
+ * @DCB_ATTR_PFC_CFG: priority flow control configuration (NLA_NESTED)
+ * @DCB_ATTR_NUM_TC: number of traffic classes supported in the device (NLA_U8)
+ * @DCB_ATTR_PG_CFG: priority group configuration (NLA_NESTED)
+ * @DCB_ATTR_SET_ALL: bool to commit changes to hardware or not (NLA_U8)
+ * @DCB_ATTR_PERM_HWADDR: MAC address of the physical device (NLA_NESTED)
+ */
+enum dcbnl_attrs {
+	DCB_ATTR_UNDEFINED,
+
+	DCB_ATTR_IFNAME,
+	DCB_ATTR_STATE,
+	DCB_ATTR_PFC_STATE,
+	DCB_ATTR_PFC_CFG,
+	DCB_ATTR_NUM_TC,
+	DCB_ATTR_PG_CFG,
+	DCB_ATTR_SET_ALL,
+	DCB_ATTR_PERM_HWADDR,
+
+	__DCB_ATTR_ENUM_MAX,
+	DCB_ATTR_MAX = __DCB_ATTR_ENUM_MAX - 1,
+};
+
+/**
+ * enum dcbnl_pfc_attrs - DCB Priority Flow Control user priority nested attrs
+ *
+ * @DCB_PFC_UP_ATTR_UNDEFINED: unspecified attribute to catch errors
+ * @DCB_PFC_UP_ATTR_0: Priority Flow Control value for User Priority 0 (NLA_U8)
+ * @DCB_PFC_UP_ATTR_1: Priority Flow Control value for User Priority 1 (NLA_U8)
+ * @DCB_PFC_UP_ATTR_2: Priority Flow Control value for User Priority 2 (NLA_U8)
+ * @DCB_PFC_UP_ATTR_3: Priority Flow Control value for User Priority 3 (NLA_U8)
+ * @DCB_PFC_UP_ATTR_4: Priority Flow Control value for User Priority 4 (NLA_U8)
+ * @DCB_PFC_UP_ATTR_5: Priority Flow Control value for User Priority 5 (NLA_U8)
+ * @DCB_PFC_UP_ATTR_6: Priority Flow Control value for User Priority 6 (NLA_U8)
+ * @DCB_PFC_UP_ATTR_7: Priority Flow Control value for User Priority 7 (NLA_U8)
+ * @DCB_PFC_UP_ATTR_MAX: highest attribute number currently defined
+ * @DCB_PFC_UP_ATTR_ALL: apply to all priority flow control attrs (NLA_FLAG)
+ *
+ */
+enum dcbnl_pfc_up_attrs {
+	DCB_PFC_UP_ATTR_UNDEFINED,
+
+	DCB_PFC_UP_ATTR_0,
+	DCB_PFC_UP_ATTR_1,
+	DCB_PFC_UP_ATTR_2,
+	DCB_PFC_UP_ATTR_3,
+	DCB_PFC_UP_ATTR_4,
+	DCB_PFC_UP_ATTR_5,
+	DCB_PFC_UP_ATTR_6,
+	DCB_PFC_UP_ATTR_7,
+	DCB_PFC_UP_ATTR_ALL,
+
+	__DCB_PFC_UP_ATTR_ENUM_MAX,
+	DCB_PFC_UP_ATTR_MAX = __DCB_PFC_UP_ATTR_ENUM_MAX - 1,
+};
+
+/**
+ * enum dcbnl_pg_attrs - DCB Priority Group attributes
+ *
+ * @DCB_PG_ATTR_UNDEFINED: unspecified attribute to catch errors
+ * @DCB_PG_ATTR_TC_0: Priority Group Traffic Class 0 configuration (NLA_NESTED)
+ * @DCB_PG_ATTR_TC_1: Priority Group Traffic Class 1 configuration (NLA_NESTED)
+ * @DCB_PG_ATTR_TC_2: Priority Group Traffic Class 2 configuration (NLA_NESTED)
+ * @DCB_PG_ATTR_TC_3: Priority Group Traffic Class 3 configuration (NLA_NESTED)
+ * @DCB_PG_ATTR_TC_4: Priority Group Traffic Class 4 configuration (NLA_NESTED)
+ * @DCB_PG_ATTR_TC_5: Priority Group Traffic Class 5 configuration (NLA_NESTED)
+ * @DCB_PG_ATTR_TC_6: Priority Group Traffic Class 6 configuration (NLA_NESTED)
+ * @DCB_PG_ATTR_TC_7: Priority Group Traffic Class 7 configuration (NLA_NESTED)
+ * @DCB_PG_ATTR_TC_MAX: highest attribute number currently defined
+ * @DCB_PG_ATTR_TC_ALL: apply to all traffic classes (NLA_NESTED)
+ * @DCB_PG_ATTR_BW_ID_0: Percent of link bandwidth for Priority Group 0 (NLA_U8)
+ * @DCB_PG_ATTR_BW_ID_1: Percent of link bandwidth for Priority Group 1 (NLA_U8)
+ * @DCB_PG_ATTR_BW_ID_2: Percent of link bandwidth for Priority Group 2 (NLA_U8)
+ * @DCB_PG_ATTR_BW_ID_3: Percent of link bandwidth for Priority Group 3 (NLA_U8)
+ * @DCB_PG_ATTR_BW_ID_4: Percent of link bandwidth for Priority Group 4 (NLA_U8)
+ * @DCB_PG_ATTR_BW_ID_5: Percent of link bandwidth for Priority Group 5 (NLA_U8)
+ * @DCB_PG_ATTR_BW_ID_6: Percent of link bandwidth for Priority Group 6 (NLA_U8)
+ * @DCB_PG_ATTR_BW_ID_7: Percent of link bandwidth for Priority Group 7 (NLA_U8)
+ * @DCB_PG_ATTR_BW_ID_MAX: highest attribute number currently defined
+ * @DCB_PG_ATTR_BW_ID_ALL: apply to all priority groups (NLA_FLAG)
+ *
+ */
+enum dcbnl_pg_attrs {
+	DCB_PG_ATTR_UNDEFINED,
+
+	DCB_PG_ATTR_TC_0,
+	DCB_PG_ATTR_TC_1,
+	DCB_PG_ATTR_TC_2,
+	DCB_PG_ATTR_TC_3,
+	DCB_PG_ATTR_TC_4,
+	DCB_PG_ATTR_TC_5,
+	DCB_PG_ATTR_TC_6,
+	DCB_PG_ATTR_TC_7,
+	DCB_PG_ATTR_TC_MAX,
+	DCB_PG_ATTR_TC_ALL,
+
+	DCB_PG_ATTR_BW_ID_0,
+	DCB_PG_ATTR_BW_ID_1,
+	DCB_PG_ATTR_BW_ID_2,
+	DCB_PG_ATTR_BW_ID_3,
+	DCB_PG_ATTR_BW_ID_4,
+	DCB_PG_ATTR_BW_ID_5,
+	DCB_PG_ATTR_BW_ID_6,
+	DCB_PG_ATTR_BW_ID_7,
+	DCB_PG_ATTR_BW_ID_MAX,
+	DCB_PG_ATTR_BW_ID_ALL,
+
+	__DCB_PG_ATTR_ENUM_MAX,
+	DCB_PG_ATTR_MAX = __DCB_PG_ATTR_ENUM_MAX - 1,
+};
+
+/**
+ * enum dcbnl_tc_attrs - DCB Traffic Class attributes
+ *
+ * @DCB_TC_ATTR_PARAM_UNDEFINED: unspecified attribute to catch errors
+ * @DCB_TC_ATTR_PARAM_PGID: (NLA_U8) Priority group the traffic class belongs to
+ *                          Valid values are:  0-7
+ * @DCB_TC_ATTR_PARAM_UP_MAPPING: (NLA_U8) Traffic class to user priority map
+ *                                Some devices may not support changing the
+ *                                user priority map of a TC.
+ * @DCB_TC_ATTR_PARAM_STRICT_PRIO: (NLA_U8) Strict priority setting
+ *                                 0 - none
+ *                                 1 - group strict
+ *                                 2 - link strict
+ * @DCB_TC_ATTR_PARAM_BW_PCT: optional - (NLA_U8) If supported by the device and
+ *                            not configured to use link strict priority,
+ *                            this is the percentage of bandwidth of the
+ *                            priority group this traffic class belongs to
+ * @DCB_TC_ATTR_PARAM_ALL: (NLA_FLAG) all traffic class parameters
+ *
+ */
+enum dcbnl_tc_attrs {
+	DCB_TC_ATTR_PARAM_UNDEFINED,
+
+	DCB_TC_ATTR_PARAM_PGID,
+	DCB_TC_ATTR_PARAM_UP_MAPPING,
+	DCB_TC_ATTR_PARAM_STRICT_PRIO,
+	DCB_TC_ATTR_PARAM_BW_PCT,
+	DCB_TC_ATTR_PARAM_ALL,
+
+	__DCB_TC_ATTR_PARAM_ENUM_MAX,
+	DCB_TC_ATTR_PARAM_MAX = __DCB_TC_ATTR_PARAM_ENUM_MAX - 1,
+};
+
+/**
+ * enum dcb_general_attr_values - general DCB attribute values
+ *
+ * @DCB_ATTR_UNDEFINED: value used to indicate an attribute is not supported
+ *
+ */
+enum dcb_general_attr_values {
+	DCB_ATTR_VALUE_UNDEFINED = 0xff
+};
+
+
+#endif /* __LINUX_DCBNL_H__ */
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index d8fb23679ee3..6095af572dfd 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -43,6 +43,9 @@
 
 #include <net/net_namespace.h>
 #include <net/dsa.h>
+#ifdef CONFIG_DCBNL
+#include <net/dcbnl.h>
+#endif
 
 struct vlan_group;
 struct ethtool_ops;
@@ -843,6 +846,11 @@ struct net_device
 #define GSO_MAX_SIZE		65536
 	unsigned int		gso_max_size;
 
+#ifdef CONFIG_DCBNL
+	/* Data Center Bridging netlink ops */
+	struct dcbnl_rtnl_ops *dcbnl_ops;
+#endif
+
 #ifdef CONFIG_COMPAT_NET_DEV_OPS
 	struct {
 		int			(*init)(struct net_device *dev);
diff --git a/include/linux/rtnetlink.h b/include/linux/rtnetlink.h
index 2b3d51c6ec9c..e88f7058b3a1 100644
--- a/include/linux/rtnetlink.h
+++ b/include/linux/rtnetlink.h
@@ -107,6 +107,11 @@ enum {
 	RTM_GETADDRLABEL,
 #define RTM_GETADDRLABEL RTM_GETADDRLABEL
 
+	RTM_GETDCB = 78,
+#define RTM_GETDCB RTM_GETDCB
+	RTM_SETDCB,
+#define RTM_SETDCB RTM_SETDCB
+
 	__RTM_MAX,
 #define RTM_MAX		(((__RTM_MAX + 3) & ~3) - 1)
 };
diff --git a/include/net/dcbnl.h b/include/net/dcbnl.h
new file mode 100644
index 000000000000..0ef0c5a46d8b
--- /dev/null
+++ b/include/net/dcbnl.h
@@ -0,0 +1,44 @@
+/*
+ * Copyright (c) 2008, Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+ * Place - Suite 330, Boston, MA 02111-1307 USA.
+ *
+ * Author: Lucy Liu <lucy.liu@intel.com>
+ */
+
+#ifndef __NET_DCBNL_H__
+#define __NET_DCBNL_H__
+
+/*
+ * Ops struct for the netlink callbacks.  Used by DCB-enabled drivers through
+ * the netdevice struct.
+ */
+struct dcbnl_rtnl_ops {
+	u8   (*getstate)(struct net_device *);
+	void (*setstate)(struct net_device *, u8);
+	void (*getpermhwaddr)(struct net_device *, u8 *);
+	void (*setpgtccfgtx)(struct net_device *, int, u8, u8, u8, u8);
+	void (*setpgbwgcfgtx)(struct net_device *, int, u8);
+	void (*setpgtccfgrx)(struct net_device *, int, u8, u8, u8, u8);
+	void (*setpgbwgcfgrx)(struct net_device *, int, u8);
+	void (*getpgtccfgtx)(struct net_device *, int, u8 *, u8 *, u8 *, u8 *);
+	void (*getpgbwgcfgtx)(struct net_device *, int, u8 *);
+	void (*getpgtccfgrx)(struct net_device *, int, u8 *, u8 *, u8 *, u8 *);
+	void (*getpgbwgcfgrx)(struct net_device *, int, u8 *);
+	void (*setpfccfg)(struct net_device *, int, u8);
+	void (*getpfccfg)(struct net_device *, int, u8 *);
+	u8   (*setall)(struct net_device *);
+};
+
+#endif /* __NET_DCBNL_H__ */
diff --git a/net/Kconfig b/net/Kconfig
index 4e2e40ba8ba6..c7d01c3a23c5 100644
--- a/net/Kconfig
+++ b/net/Kconfig
@@ -194,6 +194,7 @@ source "net/lapb/Kconfig"
 source "net/econet/Kconfig"
 source "net/wanrouter/Kconfig"
 source "net/sched/Kconfig"
+source "net/dcb/Kconfig"
 
 menu "Network testing"
 
diff --git a/net/Makefile b/net/Makefile
index 27d1f10dc0e0..83b064651f1d 100644
--- a/net/Makefile
+++ b/net/Makefile
@@ -57,6 +57,9 @@ obj-$(CONFIG_NETLABEL)		+= netlabel/
 obj-$(CONFIG_IUCV)		+= iucv/
 obj-$(CONFIG_RFKILL)		+= rfkill/
 obj-$(CONFIG_NET_9P)		+= 9p/
+ifeq ($(CONFIG_DCBNL),y)
+obj-$(CONFIG_DCB)		+= dcb/
+endif
 
 ifeq ($(CONFIG_NET),y)
 obj-$(CONFIG_SYSCTL)		+= sysctl_net.o
diff --git a/net/dcb/Kconfig b/net/dcb/Kconfig
new file mode 100644
index 000000000000..bdf38802d339
--- /dev/null
+++ b/net/dcb/Kconfig
@@ -0,0 +1,12 @@
+config DCB
+        tristate "Data Center Bridging support"
+
+config DCBNL
+	bool "Data Center Bridging netlink interface support"
+	depends on DCB
+	default n
+	---help---
+	  This option turns on the netlink interface
+	  (dcbnl) for Data Center Bridging capable devices.
+
+	  If unsure, say N.
diff --git a/net/dcb/Makefile b/net/dcb/Makefile
new file mode 100644
index 000000000000..9930f4cde818
--- /dev/null
+++ b/net/dcb/Makefile
@@ -0,0 +1 @@
+obj-$(CONFIG_DCB) += dcbnl.o
diff --git a/net/dcb/dcbnl.c b/net/dcb/dcbnl.c
new file mode 100644
index 000000000000..516e8be83d72
--- /dev/null
+++ b/net/dcb/dcbnl.c
@@ -0,0 +1,704 @@
+/*
+ * Copyright (c) 2008, Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+ * Place - Suite 330, Boston, MA 02111-1307 USA.
+ *
+ * Author: Lucy Liu <lucy.liu@intel.com>
+ */
+
+#include <linux/netdevice.h>
+#include <linux/netlink.h>
+#include <net/netlink.h>
+#include <net/rtnetlink.h>
+#include <linux/dcbnl.h>
+#include <linux/rtnetlink.h>
+#include <net/sock.h>
+
+/**
+ * Data Center Bridging (DCB) is a collection of Ethernet enhancements
+ * intended to allow network traffic with differing requirements
+ * (highly reliable, no drops vs. best effort vs. low latency) to operate
+ * and co-exist on Ethernet.  Current DCB features are:
+ *
+ * Enhanced Transmission Selection (aka Priority Grouping [PG]) - provides a
+ *   framework for assigning bandwidth guarantees to traffic classes.
+ *
+ * Priority-based Flow Control (PFC) - provides a flow control mechanism which
+ *   can work independently for each 802.1p priority.
+ *
+ * Congestion Notification - provides a mechanism for end-to-end congestion
+ *   control for protocols which do not have built-in congestion management.
+ *
+ * More information about the emerging standards for these Ethernet features
+ * can be found at: http://www.ieee802.org/1/pages/dcbridges.html
+ *
+ * This file implements an rtnetlink interface to allow configuration of DCB
+ * features for capable devices.
+ */
+
+MODULE_AUTHOR("Lucy Liu, <lucy.liu@intel.com>");
+MODULE_DESCRIPTION("Data Center Bridging generic netlink interface");
+MODULE_LICENSE("GPL");
+
+/**************** DCB attribute policies *************************************/
+
+/* DCB netlink attributes policy */
+static struct nla_policy dcbnl_rtnl_policy[DCB_ATTR_MAX + 1] = {
+	[DCB_ATTR_IFNAME]    = {.type = NLA_STRING, .len = IFNAMSIZ - 1},
+	[DCB_ATTR_STATE]     = {.type = NLA_U8},
+	[DCB_ATTR_PFC_CFG]   = {.type = NLA_NESTED},
+	[DCB_ATTR_PG_CFG]    = {.type = NLA_NESTED},
+	[DCB_ATTR_SET_ALL]   = {.type = NLA_U8},
+	[DCB_ATTR_PERM_HWADDR] = {.type = NLA_FLAG},
+};
+
+/* DCB priority flow control to User Priority nested attributes */
+static struct nla_policy dcbnl_pfc_up_nest[DCB_PFC_UP_ATTR_MAX + 1] = {
+	[DCB_PFC_UP_ATTR_0]   = {.type = NLA_U8},
+	[DCB_PFC_UP_ATTR_1]   = {.type = NLA_U8},
+	[DCB_PFC_UP_ATTR_2]   = {.type = NLA_U8},
+	[DCB_PFC_UP_ATTR_3]   = {.type = NLA_U8},
+	[DCB_PFC_UP_ATTR_4]   = {.type = NLA_U8},
+	[DCB_PFC_UP_ATTR_5]   = {.type = NLA_U8},
+	[DCB_PFC_UP_ATTR_6]   = {.type = NLA_U8},
+	[DCB_PFC_UP_ATTR_7]   = {.type = NLA_U8},
+	[DCB_PFC_UP_ATTR_ALL] = {.type = NLA_FLAG},
+};
+
+/* DCB priority grouping nested attributes */
+static struct nla_policy dcbnl_pg_nest[DCB_PG_ATTR_MAX + 1] = {
+	[DCB_PG_ATTR_TC_0]      = {.type = NLA_NESTED},
+	[DCB_PG_ATTR_TC_1]      = {.type = NLA_NESTED},
+	[DCB_PG_ATTR_TC_2]      = {.type = NLA_NESTED},
+	[DCB_PG_ATTR_TC_3]      = {.type = NLA_NESTED},
+	[DCB_PG_ATTR_TC_4]      = {.type = NLA_NESTED},
+	[DCB_PG_ATTR_TC_5]      = {.type = NLA_NESTED},
+	[DCB_PG_ATTR_TC_6]      = {.type = NLA_NESTED},
+	[DCB_PG_ATTR_TC_7]      = {.type = NLA_NESTED},
+	[DCB_PG_ATTR_TC_ALL]    = {.type = NLA_NESTED},
+	[DCB_PG_ATTR_BW_ID_0]   = {.type = NLA_U8},
+	[DCB_PG_ATTR_BW_ID_1]   = {.type = NLA_U8},
+	[DCB_PG_ATTR_BW_ID_2]   = {.type = NLA_U8},
+	[DCB_PG_ATTR_BW_ID_3]   = {.type = NLA_U8},
+	[DCB_PG_ATTR_BW_ID_4]   = {.type = NLA_U8},
+	[DCB_PG_ATTR_BW_ID_5]   = {.type = NLA_U8},
+	[DCB_PG_ATTR_BW_ID_6]   = {.type = NLA_U8},
+	[DCB_PG_ATTR_BW_ID_7]   = {.type = NLA_U8},
+	[DCB_PG_ATTR_BW_ID_ALL] = {.type = NLA_FLAG},
+};
+
+/* DCB traffic class nested attributes. */
+static struct nla_policy dcbnl_tc_param_nest[DCB_TC_ATTR_PARAM_MAX + 1] = {
+	[DCB_TC_ATTR_PARAM_PGID]            = {.type = NLA_U8},
+	[DCB_TC_ATTR_PARAM_UP_MAPPING]      = {.type = NLA_U8},
+	[DCB_TC_ATTR_PARAM_STRICT_PRIO]     = {.type = NLA_U8},
+	[DCB_TC_ATTR_PARAM_BW_PCT]          = {.type = NLA_U8},
+	[DCB_TC_ATTR_PARAM_ALL]             = {.type = NLA_FLAG},
+};
+
+
+/* standard netlink reply call */
+static int dcbnl_reply(u8 value, u8 event, u8 cmd, u8 attr, u32 pid,
+                       u32 seq, u16 flags)
+{
+	struct sk_buff *dcbnl_skb;
+	struct dcbmsg *dcb;
+	struct nlmsghdr *nlh;
+	int ret = -EINVAL;
+
+	dcbnl_skb = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
+	if (!dcbnl_skb)
+		return ret;
+
+	nlh = NLMSG_NEW(dcbnl_skb, pid, seq, event, sizeof(*dcb), flags);
+
+	dcb = NLMSG_DATA(nlh);
+	dcb->dcb_family = AF_UNSPEC;
+	dcb->cmd = cmd;
+	dcb->dcb_pad = 0;
+
+	ret = nla_put_u8(dcbnl_skb, attr, value);
+	if (ret)
+		goto err;
+
+	/* end the message, assign the nlmsg_len. */
+	nlmsg_end(dcbnl_skb, nlh);
+	ret = rtnl_unicast(dcbnl_skb, &init_net, pid);
+	if (ret)
+		goto err;
+
+	return 0;
+nlmsg_failure:
+err:
+	kfree(dcbnl_skb);
+	return ret;
+}
+
+static int dcbnl_getstate(struct net_device *netdev, struct nlattr **tb,
+                          u32 pid, u32 seq, u16 flags)
+{
+	int ret = -EINVAL;
+
+	/* if (!tb[DCB_ATTR_STATE] || !netdev->dcbnl_ops->getstate) */
+	if (!netdev->dcbnl_ops->getstate)
+		return ret;
+
+	ret = dcbnl_reply(netdev->dcbnl_ops->getstate(netdev), RTM_GETDCB,
+	                  DCB_CMD_GSTATE, DCB_ATTR_STATE, pid, seq, flags);
+
+	return ret;
+}
+
+static int dcbnl_getpfccfg(struct net_device *netdev, struct nlattr **tb,
+                           u32 pid, u32 seq, u16 flags)
+{
+	struct sk_buff *dcbnl_skb;
+	struct nlmsghdr *nlh;
+	struct dcbmsg *dcb;
+	struct nlattr *data[DCB_PFC_UP_ATTR_MAX + 1], *nest;
+	u8 value;
+	int ret = -EINVAL;
+	int i;
+	int getall = 0;
+
+	if (!tb[DCB_ATTR_PFC_CFG] || !netdev->dcbnl_ops->getpfccfg)
+		return ret;
+
+	ret = nla_parse_nested(data, DCB_PFC_UP_ATTR_MAX,
+	                       tb[DCB_ATTR_PFC_CFG],
+	                       dcbnl_pfc_up_nest);
+	if (ret)
+		goto err_out;
+
+	dcbnl_skb = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
+	if (!dcbnl_skb)
+		goto err_out;
+
+	nlh = NLMSG_NEW(dcbnl_skb, pid, seq, RTM_GETDCB, sizeof(*dcb), flags);
+
+	dcb = NLMSG_DATA(nlh);
+	dcb->dcb_family = AF_UNSPEC;
+	dcb->cmd = DCB_CMD_PFC_GCFG;
+
+	nest = nla_nest_start(dcbnl_skb, DCB_ATTR_PFC_CFG);
+	if (!nest)
+		goto err;
+
+	if (data[DCB_PFC_UP_ATTR_ALL])
+		getall = 1;
+
+	for (i = DCB_PFC_UP_ATTR_0; i <= DCB_PFC_UP_ATTR_7; i++) {
+		if (!getall && !data[i])
+			continue;
+
+		netdev->dcbnl_ops->getpfccfg(netdev, i - DCB_PFC_UP_ATTR_0,
+		                             &value);
+		ret = nla_put_u8(dcbnl_skb, i, value);
+
+		if (ret) {
+			nla_nest_cancel(dcbnl_skb, nest);
+			goto err;
+		}
+	}
+	nla_nest_end(dcbnl_skb, nest);
+
+	nlmsg_end(dcbnl_skb, nlh);
+
+	ret = rtnl_unicast(dcbnl_skb, &init_net, pid);
+	if (ret)
+		goto err;
+
+	return 0;
+nlmsg_failure:
+err:
+	kfree(dcbnl_skb);
+err_out:
+	return -EINVAL;
+}
+
+static int dcbnl_getperm_hwaddr(struct net_device *netdev, struct nlattr **tb,
+                                u32 pid, u32 seq, u16 flags)
+{
+	struct sk_buff *dcbnl_skb;
+	struct nlmsghdr *nlh;
+	struct dcbmsg *dcb;
+	u8 perm_addr[MAX_ADDR_LEN];
+	int ret = -EINVAL;
+
+	if (!netdev->dcbnl_ops->getpermhwaddr)
+		return ret;
+
+	dcbnl_skb = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
+	if (!dcbnl_skb)
+		goto err_out;
+
+	nlh = NLMSG_NEW(dcbnl_skb, pid, seq, RTM_GETDCB, sizeof(*dcb), flags);
+
+	dcb = NLMSG_DATA(nlh);
+	dcb->dcb_family = AF_UNSPEC;
+	dcb->cmd = DCB_CMD_GPERM_HWADDR;
+
+	netdev->dcbnl_ops->getpermhwaddr(netdev, perm_addr);
+
+	ret = nla_put(dcbnl_skb, DCB_ATTR_PERM_HWADDR, sizeof(perm_addr),
+	              perm_addr);
+
+	nlmsg_end(dcbnl_skb, nlh);
+
+	ret = rtnl_unicast(dcbnl_skb, &init_net, pid);
+	if (ret)
+		goto err;
+
+	return 0;
+
+nlmsg_failure:
+err:
+	kfree(dcbnl_skb);
+err_out:
+	return -EINVAL;
+}
+
+static int __dcbnl_pg_getcfg(struct net_device *netdev, struct nlattr **tb,
+                             u32 pid, u32 seq, u16 flags, int dir)
+{
+	struct sk_buff *dcbnl_skb;
+	struct nlmsghdr *nlh;
+	struct dcbmsg *dcb;
+	struct nlattr *pg_nest, *param_nest, *data;
+	struct nlattr *pg_tb[DCB_PG_ATTR_MAX + 1];
+	struct nlattr *param_tb[DCB_TC_ATTR_PARAM_MAX + 1];
+	u8 prio, pgid, tc_pct, up_map;
+	int ret  = -EINVAL;
+	int getall = 0;
+	int i;
+
+	if (!tb[DCB_ATTR_PG_CFG] ||
+	    !netdev->dcbnl_ops->getpgtccfgtx ||
+	    !netdev->dcbnl_ops->getpgtccfgrx ||
+	    !netdev->dcbnl_ops->getpgbwgcfgtx ||
+	    !netdev->dcbnl_ops->getpgbwgcfgrx)
+		return ret;
+
+	ret = nla_parse_nested(pg_tb, DCB_PG_ATTR_MAX,
+	                       tb[DCB_ATTR_PG_CFG], dcbnl_pg_nest);
+
+	if (ret)
+		goto err_out;
+
+	dcbnl_skb = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
+	if (!dcbnl_skb)
+		goto err_out;
+
+	nlh = NLMSG_NEW(dcbnl_skb, pid, seq, RTM_GETDCB, sizeof(*dcb), flags);
+
+	dcb = NLMSG_DATA(nlh);
+	dcb->dcb_family = AF_UNSPEC;
+	dcb->cmd = (dir) ? DCB_CMD_PGRX_GCFG : DCB_CMD_PGTX_GCFG;
+
+	pg_nest = nla_nest_start(dcbnl_skb, DCB_ATTR_PG_CFG);
+	if (!pg_nest)
+		goto err;
+
+	if (pg_tb[DCB_PG_ATTR_TC_ALL])
+		getall = 1;
+
+	for (i = DCB_PG_ATTR_TC_0; i <= DCB_PG_ATTR_TC_7; i++) {
+		if (!getall && !pg_tb[i])
+			continue;
+
+		if (pg_tb[DCB_PG_ATTR_TC_ALL])
+			data = pg_tb[DCB_PG_ATTR_TC_ALL];
+		else
+			data = pg_tb[i];
+		ret = nla_parse_nested(param_tb, DCB_TC_ATTR_PARAM_MAX,
+				       data, dcbnl_tc_param_nest);
+		if (ret)
+			goto err_pg;
+
+		param_nest = nla_nest_start(dcbnl_skb, i);
+		if (!param_nest)
+			goto err_pg;
+
+		pgid = DCB_ATTR_VALUE_UNDEFINED;
+		prio = DCB_ATTR_VALUE_UNDEFINED;
+		tc_pct = DCB_ATTR_VALUE_UNDEFINED;
+		up_map = DCB_ATTR_VALUE_UNDEFINED;
+
+		if (dir) {
+			/* Rx */
+			netdev->dcbnl_ops->getpgtccfgrx(netdev,
+						i - DCB_PG_ATTR_TC_0, &prio,
+						&pgid, &tc_pct, &up_map);
+		} else {
+			/* Tx */
+			netdev->dcbnl_ops->getpgtccfgtx(netdev,
+						i - DCB_PG_ATTR_TC_0, &prio,
+						&pgid, &tc_pct, &up_map);
+		}
+
+		if (param_tb[DCB_TC_ATTR_PARAM_PGID] ||
+		    param_tb[DCB_TC_ATTR_PARAM_ALL]) {
+			ret = nla_put_u8(dcbnl_skb,
+			                 DCB_TC_ATTR_PARAM_PGID, pgid);
+			if (ret)
+				goto err_param;
+		}
+		if (param_tb[DCB_TC_ATTR_PARAM_UP_MAPPING] ||
+		    param_tb[DCB_TC_ATTR_PARAM_ALL]) {
+			ret = nla_put_u8(dcbnl_skb,
+			                 DCB_TC_ATTR_PARAM_UP_MAPPING, up_map);
+			if (ret)
+				goto err_param;
+		}
+		if (param_tb[DCB_TC_ATTR_PARAM_STRICT_PRIO] ||
+		    param_tb[DCB_TC_ATTR_PARAM_ALL]) {
+			ret = nla_put_u8(dcbnl_skb,
+			                 DCB_TC_ATTR_PARAM_STRICT_PRIO, prio);
+			if (ret)
+				goto err_param;
+		}
+		if (param_tb[DCB_TC_ATTR_PARAM_BW_PCT] ||
+		    param_tb[DCB_TC_ATTR_PARAM_ALL]) {
+			ret = nla_put_u8(dcbnl_skb, DCB_TC_ATTR_PARAM_BW_PCT,
+			                 tc_pct);
+			if (ret)
+				goto err_param;
+		}
+		nla_nest_end(dcbnl_skb, param_nest);
+	}
+
+	if (pg_tb[DCB_PG_ATTR_BW_ID_ALL])
+		getall = 1;
+	else
+		getall = 0;
+
+	for (i = DCB_PG_ATTR_BW_ID_0; i <= DCB_PG_ATTR_BW_ID_7; i++) {
+		if (!getall && !pg_tb[i])
+			continue;
+
+		tc_pct = DCB_ATTR_VALUE_UNDEFINED;
+
+		if (dir) {
+			/* Rx */
+			netdev->dcbnl_ops->getpgbwgcfgrx(netdev,
+					i - DCB_PG_ATTR_BW_ID_0, &tc_pct);
+		} else {
+			/* Tx */
+			netdev->dcbnl_ops->getpgbwgcfgtx(netdev,
+					i - DCB_PG_ATTR_BW_ID_0, &tc_pct);
+		}
+		ret = nla_put_u8(dcbnl_skb, i, tc_pct);
+
+		if (ret)
+			goto err_pg;
+	}
+
+	nla_nest_end(dcbnl_skb, pg_nest);
+
+	nlmsg_end(dcbnl_skb, nlh);
+
+	ret = rtnl_unicast(dcbnl_skb, &init_net, pid);
+	if (ret)
+		goto err;
+
+	return 0;
+
+err_param:
+	nla_nest_cancel(dcbnl_skb, param_nest);
+err_pg:
+	nla_nest_cancel(dcbnl_skb, pg_nest);
+nlmsg_failure:
+err:
+	kfree(dcbnl_skb);
+err_out:
+	ret  = -EINVAL;
+	return ret;
+}
+
+static int dcbnl_pgtx_getcfg(struct net_device *netdev, struct nlattr **tb,
+                             u32 pid, u32 seq, u16 flags)
+{
+	return __dcbnl_pg_getcfg(netdev, tb, pid, seq, flags, 0);
+}
+
+static int dcbnl_pgrx_getcfg(struct net_device *netdev, struct nlattr **tb,
+                             u32 pid, u32 seq, u16 flags)
+{
+	return __dcbnl_pg_getcfg(netdev, tb, pid, seq, flags, 1);
+}
+
+static int dcbnl_setstate(struct net_device *netdev, struct nlattr **tb,
+                          u32 pid, u32 seq, u16 flags)
+{
+	int ret = -EINVAL;
+	u8 value;
+
+	if (!tb[DCB_ATTR_STATE] || !netdev->dcbnl_ops->setstate)
+		return ret;
+
+	value = nla_get_u8(tb[DCB_ATTR_STATE]);
+
+	netdev->dcbnl_ops->setstate(netdev, value);
+
+	ret = dcbnl_reply(0, RTM_SETDCB, DCB_CMD_SSTATE, DCB_ATTR_STATE,
+	                  pid, seq, flags);
+
+	return ret;
+}
+
+static int dcbnl_setpfccfg(struct net_device *netdev, struct nlattr **tb,
+                           u32 pid, u32 seq, u16 flags)
+{
+	struct nlattr *data[DCB_PFC_UP_ATTR_MAX + 1];
+	int i;
+	int ret = -EINVAL;
+	u8 value;
+
+	if (!tb[DCB_ATTR_PFC_CFG] || !netdev->dcbnl_ops->setpfccfg)
+		return ret;
+
+	ret = nla_parse_nested(data, DCB_PFC_UP_ATTR_MAX,
+	                       tb[DCB_ATTR_PFC_CFG],
+	                       dcbnl_pfc_up_nest);
+	if (ret)
+		goto err;
+
+	for (i = DCB_PFC_UP_ATTR_0; i <= DCB_PFC_UP_ATTR_7; i++) {
+		if (data[i] == NULL)
+			continue;
+		value = nla_get_u8(data[i]);
+		netdev->dcbnl_ops->setpfccfg(netdev,
+			data[i]->nla_type - DCB_PFC_UP_ATTR_0, value);
+	}
+
+	ret = dcbnl_reply(0, RTM_SETDCB, DCB_CMD_PFC_SCFG, DCB_ATTR_PFC_CFG,
+	                  pid, seq, flags);
+err:
+	return ret;
+}
+
+static int dcbnl_setall(struct net_device *netdev, struct nlattr **tb,
+                        u32 pid, u32 seq, u16 flags)
+{
+	int ret = -EINVAL;
+
+	if (!tb[DCB_ATTR_SET_ALL] || !netdev->dcbnl_ops->setall)
+		return ret;
+
+	ret = dcbnl_reply(netdev->dcbnl_ops->setall(netdev), RTM_SETDCB,
+	                  DCB_CMD_SET_ALL, DCB_ATTR_SET_ALL, pid, seq, flags);
+
+	return ret;
+}
+
+static int __dcbnl_pg_setcfg(struct net_device *netdev, struct nlattr **tb,
+                             u32 pid, u32 seq, u16 flags, int dir)
+{
+	struct nlattr *pg_tb[DCB_PG_ATTR_MAX + 1];
+	struct nlattr *param_tb[DCB_TC_ATTR_PARAM_MAX + 1];
+	int ret = -EINVAL;
+	int i;
+	u8 pgid;
+	u8 up_map;
+	u8 prio;
+	u8 tc_pct;
+
+	if (!tb[DCB_ATTR_PG_CFG] ||
+	    !netdev->dcbnl_ops->setpgtccfgtx ||
+	    !netdev->dcbnl_ops->setpgtccfgrx ||
+	    !netdev->dcbnl_ops->setpgbwgcfgtx ||
+	    !netdev->dcbnl_ops->setpgbwgcfgrx)
+		return ret;
+
+	ret = nla_parse_nested(pg_tb, DCB_PG_ATTR_MAX,
+	                       tb[DCB_ATTR_PG_CFG], dcbnl_pg_nest);
+	if (ret)
+		goto err;
+
+	for (i = DCB_PG_ATTR_TC_0; i <= DCB_PG_ATTR_TC_7; i++) {
+		if (!pg_tb[i])
+			continue;
+
+		ret = nla_parse_nested(param_tb, DCB_TC_ATTR_PARAM_MAX,
+		                       pg_tb[i], dcbnl_tc_param_nest);
+		if (ret)
+			goto err;
+
+		pgid = DCB_ATTR_VALUE_UNDEFINED;
+		prio = DCB_ATTR_VALUE_UNDEFINED;
+		tc_pct = DCB_ATTR_VALUE_UNDEFINED;
+		up_map = DCB_ATTR_VALUE_UNDEFINED;
+
+		if (param_tb[DCB_TC_ATTR_PARAM_STRICT_PRIO])
+			prio =
+			    nla_get_u8(param_tb[DCB_TC_ATTR_PARAM_STRICT_PRIO]);
+
+		if (param_tb[DCB_TC_ATTR_PARAM_PGID])
+			pgid = nla_get_u8(param_tb[DCB_TC_ATTR_PARAM_PGID]);
+
+		if (param_tb[DCB_TC_ATTR_PARAM_BW_PCT])
+			tc_pct = nla_get_u8(param_tb[DCB_TC_ATTR_PARAM_BW_PCT]);
+
+		if (param_tb[DCB_TC_ATTR_PARAM_UP_MAPPING])
+			up_map =
+			     nla_get_u8(param_tb[DCB_TC_ATTR_PARAM_UP_MAPPING]);
+
+		/* dir: Tx = 0, Rx = 1 */
+		if (dir) {
+			/* Rx */
+			netdev->dcbnl_ops->setpgtccfgrx(netdev,
+				i - DCB_PG_ATTR_TC_0,
+				prio, pgid, tc_pct, up_map);
+		} else {
+			/* Tx */
+			netdev->dcbnl_ops->setpgtccfgtx(netdev,
+				i - DCB_PG_ATTR_TC_0,
+				prio, pgid, tc_pct, up_map);
+		}
+	}
+
+	for (i = DCB_PG_ATTR_BW_ID_0; i <= DCB_PG_ATTR_BW_ID_7; i++) {
+		if (!pg_tb[i])
+			continue;
+
+		tc_pct = nla_get_u8(pg_tb[i]);
+
+		/* dir: Tx = 0, Rx = 1 */
+		if (dir) {
+			/* Rx */
+			netdev->dcbnl_ops->setpgbwgcfgrx(netdev,
+					 i - DCB_PG_ATTR_BW_ID_0, tc_pct);
+		} else {
+			/* Tx */
+			netdev->dcbnl_ops->setpgbwgcfgtx(netdev,
+					 i - DCB_PG_ATTR_BW_ID_0, tc_pct);
+		}
+	}
+
+	ret = dcbnl_reply(0, RTM_SETDCB,
+			  (dir ? DCB_CMD_PGRX_SCFG : DCB_CMD_PGTX_SCFG),
+			  DCB_ATTR_PG_CFG, pid, seq, flags);
+
+err:
+	return ret;
+}
+
+static int dcbnl_pgtx_setcfg(struct net_device *netdev, struct nlattr **tb,
+                             u32 pid, u32 seq, u16 flags)
+{
+	return __dcbnl_pg_setcfg(netdev, tb, pid, seq, flags, 0);
+}
+
+static int dcbnl_pgrx_setcfg(struct net_device *netdev, struct nlattr **tb,
+                             u32 pid, u32 seq, u16 flags)
+{
+	return __dcbnl_pg_setcfg(netdev, tb, pid, seq, flags, 1);
+}
+
+static int dcb_doit(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
+{
+	struct net *net = sock_net(skb->sk);
+	struct net_device *netdev;
+	struct dcbmsg  *dcb = (struct dcbmsg *)NLMSG_DATA(nlh);
+	struct nlattr *tb[DCB_ATTR_MAX + 1];
+	u32 pid = skb ? NETLINK_CB(skb).pid : 0;
+	int ret = -EINVAL;
+
+	if (net != &init_net)
+		return -EINVAL;
+
+	ret = nlmsg_parse(nlh, sizeof(*dcb), tb, DCB_ATTR_MAX,
+			  dcbnl_rtnl_policy);
+	if (ret < 0)
+		return ret;
+
+	if (!tb[DCB_ATTR_IFNAME])
+		return -EINVAL;
+
+	netdev = dev_get_by_name(&init_net, nla_data(tb[DCB_ATTR_IFNAME]));
+	if (!netdev)
+		return -EINVAL;
+
+	if (!netdev->dcbnl_ops)
+		goto errout;
+
+	switch (dcb->cmd) {
+	case DCB_CMD_GSTATE:
+		ret = dcbnl_getstate(netdev, tb, pid, nlh->nlmsg_seq,
+		                     nlh->nlmsg_flags);
+		goto out;
+	case DCB_CMD_PFC_GCFG:
+		ret = dcbnl_getpfccfg(netdev, tb, pid, nlh->nlmsg_seq,
+		                      nlh->nlmsg_flags);
+		goto out;
+	case DCB_CMD_GPERM_HWADDR:
+		ret = dcbnl_getperm_hwaddr(netdev, tb, pid, nlh->nlmsg_seq,
+		                           nlh->nlmsg_flags);
+		goto out;
+	case DCB_CMD_PGTX_GCFG:
+		ret = dcbnl_pgtx_getcfg(netdev, tb, pid, nlh->nlmsg_seq,
+		                        nlh->nlmsg_flags);
+		goto out;
+	case DCB_CMD_PGRX_GCFG:
+		ret = dcbnl_pgrx_getcfg(netdev, tb, pid, nlh->nlmsg_seq,
+		                        nlh->nlmsg_flags);
+		goto out;
+	case DCB_CMD_SSTATE:
+		ret = dcbnl_setstate(netdev, tb, pid, nlh->nlmsg_seq,
+		                     nlh->nlmsg_flags);
+		goto out;
+	case DCB_CMD_PFC_SCFG:
+		ret = dcbnl_setpfccfg(netdev, tb, pid, nlh->nlmsg_seq,
+		                      nlh->nlmsg_flags);
+		goto out;
+
+	case DCB_CMD_SET_ALL:
+		ret = dcbnl_setall(netdev, tb, pid, nlh->nlmsg_seq,
+		                   nlh->nlmsg_flags);
+		goto out;
+	case DCB_CMD_PGTX_SCFG:
+		ret = dcbnl_pgtx_setcfg(netdev, tb, pid, nlh->nlmsg_seq,
+		                        nlh->nlmsg_flags);
+		goto out;
+	case DCB_CMD_PGRX_SCFG:
+		ret = dcbnl_pgrx_setcfg(netdev, tb, pid, nlh->nlmsg_seq,
+		                        nlh->nlmsg_flags);
+		goto out;
+	default:
+		goto errout;
+	}
+errout:
+	ret = -EINVAL;
+out:
+	dev_put(netdev);
+	return ret;
+}
+
+static int __init dcbnl_init(void)
+{
+	rtnl_register(PF_UNSPEC, RTM_GETDCB, dcb_doit, NULL);
+	rtnl_register(PF_UNSPEC, RTM_SETDCB, dcb_doit, NULL);
+
+	return 0;
+}
+module_init(dcbnl_init);
+
+static void __exit dcbnl_exit(void)
+{
+	rtnl_unregister(PF_UNSPEC, RTM_GETDCB);
+	rtnl_unregister(PF_UNSPEC, RTM_SETDCB);
+}
+module_exit(dcbnl_exit);
+
+
-- 
cgit v1.2.3


From 46132188bf72e22ef097f16ed5c969ee8cea1e8b Mon Sep 17 00:00:00 2001
From: Alexander Duyck <alexander.h.duyck@intel.com>
Date: Thu, 20 Nov 2008 21:05:08 -0800
Subject: DCB: Add interface to query for the DCB capabilities of an device.

Adds to the netlink interface for Data Center Bridging (DCB), allowing
the DCB capabilities supported by a device to be queried.

Signed-off-by: Alexander Duyck <alexander.h.duyck@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
Signed-off-by: Peter P Waskiewicz Jr <peter.p.waskiewicz.jr@intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ixgbe/ixgbe_dcb_nl.c | 42 +++++++++++++++++++-
 include/linux/dcbnl.h            | 37 ++++++++++++++++++
 include/net/dcbnl.h              |  1 +
 net/dcb/dcbnl.c                  | 82 ++++++++++++++++++++++++++++++++++++++++
 4 files changed, 161 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/drivers/net/ixgbe/ixgbe_dcb_nl.c b/drivers/net/ixgbe/ixgbe_dcb_nl.c
index 50bff2af6b04..eb3a6cea1aaa 100644
--- a/drivers/net/ixgbe/ixgbe_dcb_nl.c
+++ b/drivers/net/ixgbe/ixgbe_dcb_nl.c
@@ -337,6 +337,45 @@ static u8 ixgbe_dcbnl_set_all(struct net_device *netdev)
 	return ret;
 }
 
+static u8 ixgbe_dcbnl_getcap(struct net_device *netdev, int capid, u8 *cap)
+{
+	struct ixgbe_adapter *adapter = netdev_priv(netdev);
+	u8 rval = 0;
+
+	if (adapter->flags & IXGBE_FLAG_DCB_ENABLED) {
+		switch (capid) {
+		case DCB_CAP_ATTR_PG:
+			*cap = true;
+			break;
+		case DCB_CAP_ATTR_PFC:
+			*cap = true;
+			break;
+		case DCB_CAP_ATTR_UP2TC:
+			*cap = false;
+			break;
+		case DCB_CAP_ATTR_PG_TCS:
+			*cap = 0x80;
+			break;
+		case DCB_CAP_ATTR_PFC_TCS:
+			*cap = 0x80;
+			break;
+		case DCB_CAP_ATTR_GSP:
+			*cap = true;
+			break;
+		case DCB_CAP_ATTR_BCN:
+			*cap = false;
+			break;
+		default:
+			rval = -EINVAL;
+			break;
+		}
+	} else {
+		rval = -EINVAL;
+	}
+
+	return rval;
+}
+
 struct dcbnl_rtnl_ops dcbnl_ops = {
 	.getstate	= ixgbe_dcbnl_get_state,
 	.setstate	= ixgbe_dcbnl_set_state,
@@ -351,6 +390,7 @@ struct dcbnl_rtnl_ops dcbnl_ops = {
 	.getpgbwgcfgrx	= ixgbe_dcbnl_get_pg_bwg_cfg_rx,
 	.setpfccfg	= ixgbe_dcbnl_set_pfc_cfg,
 	.getpfccfg	= ixgbe_dcbnl_get_pfc_cfg,
-	.setall		= ixgbe_dcbnl_set_all
+	.setall		= ixgbe_dcbnl_set_all,
+	.getcap		= ixgbe_dcbnl_getcap
 };
 
diff --git a/include/linux/dcbnl.h b/include/linux/dcbnl.h
index 32d32c1ee410..13f0c638a695 100644
--- a/include/linux/dcbnl.h
+++ b/include/linux/dcbnl.h
@@ -43,6 +43,7 @@ struct dcbmsg {
  * @DCB_CMD_SET_ALL: apply all changes to the underlying device
  * @DCB_CMD_GPERM_HWADDR: get the permanent MAC address of the underlying
  *                        device.  Only useful when using bonding.
+ * @DCB_CMD_GCAP: request the DCB capabilities of the device
  */
 enum dcbnl_commands {
 	DCB_CMD_UNDEFINED,
@@ -60,6 +61,7 @@ enum dcbnl_commands {
 
 	DCB_CMD_SET_ALL,
 	DCB_CMD_GPERM_HWADDR,
+	DCB_CMD_GCAP,
 
 	__DCB_CMD_ENUM_MAX,
 	DCB_CMD_MAX = __DCB_CMD_ENUM_MAX - 1,
@@ -78,6 +80,7 @@ enum dcbnl_commands {
  * @DCB_ATTR_PG_CFG: priority group configuration (NLA_NESTED)
  * @DCB_ATTR_SET_ALL: bool to commit changes to hardware or not (NLA_U8)
  * @DCB_ATTR_PERM_HWADDR: MAC address of the physical device (NLA_NESTED)
+ * @DCB_ATTR_CAP: DCB capabilities of the device (NLA_NESTED)
  */
 enum dcbnl_attrs {
 	DCB_ATTR_UNDEFINED,
@@ -90,6 +93,7 @@ enum dcbnl_attrs {
 	DCB_ATTR_PG_CFG,
 	DCB_ATTR_SET_ALL,
 	DCB_ATTR_PERM_HWADDR,
+	DCB_ATTR_CAP,
 
 	__DCB_ATTR_ENUM_MAX,
 	DCB_ATTR_MAX = __DCB_ATTR_ENUM_MAX - 1,
@@ -216,6 +220,39 @@ enum dcbnl_tc_attrs {
 	DCB_TC_ATTR_PARAM_MAX = __DCB_TC_ATTR_PARAM_ENUM_MAX - 1,
 };
 
+/**
+ * enum dcbnl_cap_attrs - DCB Capability attributes
+ *
+ * @DCB_CAP_ATTR_UNDEFINED: unspecified attribute to catch errors
+ * @DCB_CAP_ATTR_ALL: (NLA_FLAG) all capability parameters
+ * @DCB_CAP_ATTR_PG: (NLA_U8) device supports Priority Groups
+ * @DCB_CAP_ATTR_PFC: (NLA_U8) device supports Priority Flow Control
+ * @DCB_CAP_ATTR_UP2TC: (NLA_U8) device supports user priority to
+ *                               traffic class mapping
+ * @DCB_CAP_ATTR_PG_TCS: (NLA_U8) bitmap where each bit represents a
+ *                                number of traffic classes the device
+ *                                can be configured to use for Priority Groups
+ * @DCB_CAP_ATTR_PFC_TCS: (NLA_U8) bitmap where each bit represents a
+ *                                 number of traffic classes the device can be
+ *                                 configured to use for Priority Flow Control
+ * @DCB_CAP_ATTR_GSP: (NLA_U8) device supports group strict priority
+ * @DCB_CAP_ATTR_BCN: (NLA_U8) device supports Backwards Congestion
+ *                             Notification
+ */
+enum dcbnl_cap_attrs {
+	DCB_CAP_ATTR_UNDEFINED,
+	DCB_CAP_ATTR_ALL,
+	DCB_CAP_ATTR_PG,
+	DCB_CAP_ATTR_PFC,
+	DCB_CAP_ATTR_UP2TC,
+	DCB_CAP_ATTR_PG_TCS,
+	DCB_CAP_ATTR_PFC_TCS,
+	DCB_CAP_ATTR_GSP,
+	DCB_CAP_ATTR_BCN,
+
+	__DCB_CAP_ATTR_ENUM_MAX,
+	DCB_CAP_ATTR_MAX = __DCB_CAP_ATTR_ENUM_MAX - 1,
+};
 /**
  * enum dcb_general_attr_values - general DCB attribute values
  *
diff --git a/include/net/dcbnl.h b/include/net/dcbnl.h
index 0ef0c5a46d8b..183ed040cf4c 100644
--- a/include/net/dcbnl.h
+++ b/include/net/dcbnl.h
@@ -39,6 +39,7 @@ struct dcbnl_rtnl_ops {
 	void (*setpfccfg)(struct net_device *, int, u8);
 	void (*getpfccfg)(struct net_device *, int, u8 *);
 	u8   (*setall)(struct net_device *);
+	u8   (*getcap)(struct net_device *, int, u8 *);
 };
 
 #endif /* __NET_DCBNL_H__ */
diff --git a/net/dcb/dcbnl.c b/net/dcb/dcbnl.c
index 516e8be83d72..de61d64d102d 100644
--- a/net/dcb/dcbnl.c
+++ b/net/dcb/dcbnl.c
@@ -61,6 +61,7 @@ static struct nla_policy dcbnl_rtnl_policy[DCB_ATTR_MAX + 1] = {
 	[DCB_ATTR_PG_CFG]    = {.type = NLA_NESTED},
 	[DCB_ATTR_SET_ALL]   = {.type = NLA_U8},
 	[DCB_ATTR_PERM_HWADDR] = {.type = NLA_FLAG},
+	[DCB_ATTR_CAP]       = {.type = NLA_NESTED},
 };
 
 /* DCB priority flow control to User Priority nested attributes */
@@ -107,6 +108,17 @@ static struct nla_policy dcbnl_tc_param_nest[DCB_TC_ATTR_PARAM_MAX + 1] = {
 	[DCB_TC_ATTR_PARAM_ALL]             = {.type = NLA_FLAG},
 };
 
+/* DCB capabilities nested attributes. */
+static struct nla_policy dcbnl_cap_nest[DCB_CAP_ATTR_MAX + 1] = {
+	[DCB_CAP_ATTR_ALL]     = {.type = NLA_FLAG},
+	[DCB_CAP_ATTR_PG]      = {.type = NLA_U8},
+	[DCB_CAP_ATTR_PFC]     = {.type = NLA_U8},
+	[DCB_CAP_ATTR_UP2TC]   = {.type = NLA_U8},
+	[DCB_CAP_ATTR_PG_TCS]  = {.type = NLA_U8},
+	[DCB_CAP_ATTR_PFC_TCS] = {.type = NLA_U8},
+	[DCB_CAP_ATTR_GSP]     = {.type = NLA_U8},
+	[DCB_CAP_ATTR_BCN]     = {.type = NLA_U8},
+};
 
 /* standard netlink reply call */
 static int dcbnl_reply(u8 value, u8 event, u8 cmd, u8 attr, u32 pid,
@@ -269,6 +281,72 @@ err_out:
 	return -EINVAL;
 }
 
+static int dcbnl_getcap(struct net_device *netdev, struct nlattr **tb,
+                        u32 pid, u32 seq, u16 flags)
+{
+	struct sk_buff *dcbnl_skb;
+	struct nlmsghdr *nlh;
+	struct dcbmsg *dcb;
+	struct nlattr *data[DCB_CAP_ATTR_MAX + 1], *nest;
+	u8 value;
+	int ret = -EINVAL;
+	int i;
+	int getall = 0;
+
+	if (!tb[DCB_ATTR_CAP] || !netdev->dcbnl_ops->getcap)
+		return ret;
+
+	ret = nla_parse_nested(data, DCB_CAP_ATTR_MAX, tb[DCB_ATTR_CAP],
+	                       dcbnl_cap_nest);
+	if (ret)
+		goto err_out;
+
+	dcbnl_skb = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
+	if (!dcbnl_skb)
+		goto err_out;
+
+	nlh = NLMSG_NEW(dcbnl_skb, pid, seq, RTM_GETDCB, sizeof(*dcb), flags);
+
+	dcb = NLMSG_DATA(nlh);
+	dcb->dcb_family = AF_UNSPEC;
+	dcb->cmd = DCB_CMD_GCAP;
+
+	nest = nla_nest_start(dcbnl_skb, DCB_ATTR_CAP);
+	if (!nest)
+		goto err;
+
+	if (data[DCB_CAP_ATTR_ALL])
+		getall = 1;
+
+	for (i = DCB_CAP_ATTR_ALL+1; i <= DCB_CAP_ATTR_MAX; i++) {
+		if (!getall && !data[i])
+			continue;
+
+		if (!netdev->dcbnl_ops->getcap(netdev, i, &value)) {
+			ret = nla_put_u8(dcbnl_skb, i, value);
+
+			if (ret) {
+				nla_nest_cancel(dcbnl_skb, nest);
+				goto err;
+			}
+		}
+	}
+	nla_nest_end(dcbnl_skb, nest);
+
+	nlmsg_end(dcbnl_skb, nlh);
+
+	ret = rtnl_unicast(dcbnl_skb, &init_net, pid);
+	if (ret)
+		goto err;
+
+	return 0;
+nlmsg_failure:
+err:
+	kfree(dcbnl_skb);
+err_out:
+	return -EINVAL;
+}
+
 static int __dcbnl_pg_getcfg(struct net_device *netdev, struct nlattr **tb,
                              u32 pid, u32 seq, u16 flags, int dir)
 {
@@ -675,6 +753,10 @@ static int dcb_doit(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
 		ret = dcbnl_pgrx_setcfg(netdev, tb, pid, nlh->nlmsg_seq,
 		                        nlh->nlmsg_flags);
 		goto out;
+	case DCB_CMD_GCAP:
+		ret = dcbnl_getcap(netdev, tb, pid, nlh->nlmsg_seq,
+		                   nlh->nlmsg_flags);
+		goto out;
 	default:
 		goto errout;
 	}
-- 
cgit v1.2.3


From 33dbabc4a7f7bd72313c73a3c199f31f3900336f Mon Sep 17 00:00:00 2001
From: Alexander Duyck <alexander.h.duyck@intel.com>
Date: Thu, 20 Nov 2008 21:08:19 -0800
Subject: DCB: Add interface to query # of TCs supported by device

Adds interface for Data Center Bridging (DCB) to query (and set if
supported) the number of traffic classes currently supported by the
device for the two (DCB) features: priority groups (PG) and priority
flow control (PFC).

Signed-off-by: Alexander Duyck <alexander.h.duyck@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
Signed-off-by: Peter P Waskiewicz Jr <peter.p.waskiewicz.jr@intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ixgbe/ixgbe_dcb_nl.c |  33 +++++++++-
 include/linux/dcbnl.h            |  27 ++++++++
 include/net/dcbnl.h              |   2 +
 net/dcb/dcbnl.c                  | 132 +++++++++++++++++++++++++++++++++++++++
 4 files changed, 193 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/drivers/net/ixgbe/ixgbe_dcb_nl.c b/drivers/net/ixgbe/ixgbe_dcb_nl.c
index eb3a6cea1aaa..5921795f8403 100644
--- a/drivers/net/ixgbe/ixgbe_dcb_nl.c
+++ b/drivers/net/ixgbe/ixgbe_dcb_nl.c
@@ -376,6 +376,35 @@ static u8 ixgbe_dcbnl_getcap(struct net_device *netdev, int capid, u8 *cap)
 	return rval;
 }
 
+static u8 ixgbe_dcbnl_getnumtcs(struct net_device *netdev, int tcid, u8 *num)
+{
+	struct ixgbe_adapter *adapter = netdev_priv(netdev);
+	u8 rval = 0;
+
+	if (adapter->flags & IXGBE_FLAG_DCB_ENABLED) {
+		switch (tcid) {
+		case DCB_NUMTCS_ATTR_PG:
+			*num = MAX_TRAFFIC_CLASS;
+			break;
+		case DCB_NUMTCS_ATTR_PFC:
+			*num = MAX_TRAFFIC_CLASS;
+			break;
+		default:
+			rval = -EINVAL;
+			break;
+		}
+	} else {
+		rval = -EINVAL;
+	}
+
+	return rval;
+}
+
+static u8 ixgbe_dcbnl_setnumtcs(struct net_device *netdev, int tcid, u8 num)
+{
+	return -EINVAL;
+}
+
 struct dcbnl_rtnl_ops dcbnl_ops = {
 	.getstate	= ixgbe_dcbnl_get_state,
 	.setstate	= ixgbe_dcbnl_set_state,
@@ -391,6 +420,8 @@ struct dcbnl_rtnl_ops dcbnl_ops = {
 	.setpfccfg	= ixgbe_dcbnl_set_pfc_cfg,
 	.getpfccfg	= ixgbe_dcbnl_get_pfc_cfg,
 	.setall		= ixgbe_dcbnl_set_all,
-	.getcap		= ixgbe_dcbnl_getcap
+	.getcap		= ixgbe_dcbnl_getcap,
+	.getnumtcs	= ixgbe_dcbnl_getnumtcs,
+	.setnumtcs	= ixgbe_dcbnl_setnumtcs
 };
 
diff --git a/include/linux/dcbnl.h b/include/linux/dcbnl.h
index 13f0c638a695..1077fba1dadc 100644
--- a/include/linux/dcbnl.h
+++ b/include/linux/dcbnl.h
@@ -44,6 +44,8 @@ struct dcbmsg {
  * @DCB_CMD_GPERM_HWADDR: get the permanent MAC address of the underlying
  *                        device.  Only useful when using bonding.
  * @DCB_CMD_GCAP: request the DCB capabilities of the device
+ * @DCB_CMD_GNUMTCS: get the number of traffic classes currently supported
+ * @DCB_CMD_SNUMTCS: set the number of traffic classes
  */
 enum dcbnl_commands {
 	DCB_CMD_UNDEFINED,
@@ -62,6 +64,8 @@ enum dcbnl_commands {
 	DCB_CMD_SET_ALL,
 	DCB_CMD_GPERM_HWADDR,
 	DCB_CMD_GCAP,
+	DCB_CMD_GNUMTCS,
+	DCB_CMD_SNUMTCS,
 
 	__DCB_CMD_ENUM_MAX,
 	DCB_CMD_MAX = __DCB_CMD_ENUM_MAX - 1,
@@ -81,6 +85,7 @@ enum dcbnl_commands {
  * @DCB_ATTR_SET_ALL: bool to commit changes to hardware or not (NLA_U8)
  * @DCB_ATTR_PERM_HWADDR: MAC address of the physical device (NLA_NESTED)
  * @DCB_ATTR_CAP: DCB capabilities of the device (NLA_NESTED)
+ * @DCB_ATTR_NUMTCS: number of traffic classes supported (NLA_NESTED)
  */
 enum dcbnl_attrs {
 	DCB_ATTR_UNDEFINED,
@@ -94,6 +99,7 @@ enum dcbnl_attrs {
 	DCB_ATTR_SET_ALL,
 	DCB_ATTR_PERM_HWADDR,
 	DCB_ATTR_CAP,
+	DCB_ATTR_NUMTCS,
 
 	__DCB_ATTR_ENUM_MAX,
 	DCB_ATTR_MAX = __DCB_ATTR_ENUM_MAX - 1,
@@ -253,6 +259,27 @@ enum dcbnl_cap_attrs {
 	__DCB_CAP_ATTR_ENUM_MAX,
 	DCB_CAP_ATTR_MAX = __DCB_CAP_ATTR_ENUM_MAX - 1,
 };
+
+/**
+ * enum dcbnl_numtcs_attrs - number of traffic classes
+ *
+ * @DCB_NUMTCS_ATTR_UNDEFINED: unspecified attribute to catch errors
+ * @DCB_NUMTCS_ATTR_ALL: (NLA_FLAG) all traffic class attributes
+ * @DCB_NUMTCS_ATTR_PG: (NLA_U8) number of traffic classes used for
+ *                               priority groups
+ * @DCB_NUMTCS_ATTR_PFC: (NLA_U8) number of traffic classes which can
+ *                                support priority flow control
+ */
+enum dcbnl_numtcs_attrs {
+	DCB_NUMTCS_ATTR_UNDEFINED,
+	DCB_NUMTCS_ATTR_ALL,
+	DCB_NUMTCS_ATTR_PG,
+	DCB_NUMTCS_ATTR_PFC,
+
+	__DCB_NUMTCS_ATTR_ENUM_MAX,
+	DCB_NUMTCS_ATTR_MAX = __DCB_NUMTCS_ATTR_ENUM_MAX - 1,
+};
+
 /**
  * enum dcb_general_attr_values - general DCB attribute values
  *
diff --git a/include/net/dcbnl.h b/include/net/dcbnl.h
index 183ed040cf4c..f0a65281ea73 100644
--- a/include/net/dcbnl.h
+++ b/include/net/dcbnl.h
@@ -40,6 +40,8 @@ struct dcbnl_rtnl_ops {
 	void (*getpfccfg)(struct net_device *, int, u8 *);
 	u8   (*setall)(struct net_device *);
 	u8   (*getcap)(struct net_device *, int, u8 *);
+	u8   (*getnumtcs)(struct net_device *, int, u8 *);
+	u8   (*setnumtcs)(struct net_device *, int, u8);
 };
 
 #endif /* __NET_DCBNL_H__ */
diff --git a/net/dcb/dcbnl.c b/net/dcb/dcbnl.c
index de61d64d102d..5ff7e3c0c172 100644
--- a/net/dcb/dcbnl.c
+++ b/net/dcb/dcbnl.c
@@ -120,6 +120,13 @@ static struct nla_policy dcbnl_cap_nest[DCB_CAP_ATTR_MAX + 1] = {
 	[DCB_CAP_ATTR_BCN]     = {.type = NLA_U8},
 };
 
+/* DCB capabilities nested attributes. */
+static struct nla_policy dcbnl_numtcs_nest[DCB_NUMTCS_ATTR_MAX + 1] = {
+	[DCB_NUMTCS_ATTR_ALL]     = {.type = NLA_FLAG},
+	[DCB_NUMTCS_ATTR_PG]      = {.type = NLA_U8},
+	[DCB_NUMTCS_ATTR_PFC]     = {.type = NLA_U8},
+};
+
 /* standard netlink reply call */
 static int dcbnl_reply(u8 value, u8 event, u8 cmd, u8 attr, u32 pid,
                        u32 seq, u16 flags)
@@ -347,6 +354,123 @@ err_out:
 	return -EINVAL;
 }
 
+static int dcbnl_getnumtcs(struct net_device *netdev, struct nlattr **tb,
+                           u32 pid, u32 seq, u16 flags)
+{
+	struct sk_buff *dcbnl_skb;
+	struct nlmsghdr *nlh;
+	struct dcbmsg *dcb;
+	struct nlattr *data[DCB_NUMTCS_ATTR_MAX + 1], *nest;
+	u8 value;
+	int ret = -EINVAL;
+	int i;
+	int getall = 0;
+
+	if (!tb[DCB_ATTR_NUMTCS] || !netdev->dcbnl_ops->getnumtcs)
+		return ret;
+
+	ret = nla_parse_nested(data, DCB_NUMTCS_ATTR_MAX, tb[DCB_ATTR_NUMTCS],
+	                       dcbnl_numtcs_nest);
+	if (ret) {
+		ret = -EINVAL;
+		goto err_out;
+	}
+
+	dcbnl_skb = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
+	if (!dcbnl_skb) {
+		ret = -EINVAL;
+		goto err_out;
+	}
+
+	nlh = NLMSG_NEW(dcbnl_skb, pid, seq, RTM_GETDCB, sizeof(*dcb), flags);
+
+	dcb = NLMSG_DATA(nlh);
+	dcb->dcb_family = AF_UNSPEC;
+	dcb->cmd = DCB_CMD_GNUMTCS;
+
+	nest = nla_nest_start(dcbnl_skb, DCB_ATTR_NUMTCS);
+	if (!nest) {
+		ret = -EINVAL;
+		goto err;
+	}
+
+	if (data[DCB_NUMTCS_ATTR_ALL])
+		getall = 1;
+
+	for (i = DCB_NUMTCS_ATTR_ALL+1; i <= DCB_NUMTCS_ATTR_MAX; i++) {
+		if (!getall && !data[i])
+			continue;
+
+		ret = netdev->dcbnl_ops->getnumtcs(netdev, i, &value);
+		if (!ret) {
+			ret = nla_put_u8(dcbnl_skb, i, value);
+
+			if (ret) {
+				nla_nest_cancel(dcbnl_skb, nest);
+				ret = -EINVAL;
+				goto err;
+			}
+		} else {
+			goto err;
+		}
+	}
+	nla_nest_end(dcbnl_skb, nest);
+
+	nlmsg_end(dcbnl_skb, nlh);
+
+	ret = rtnl_unicast(dcbnl_skb, &init_net, pid);
+	if (ret) {
+		ret = -EINVAL;
+		goto err;
+	}
+
+	return 0;
+nlmsg_failure:
+err:
+	kfree(dcbnl_skb);
+err_out:
+	return ret;
+}
+
+static int dcbnl_setnumtcs(struct net_device *netdev, struct nlattr **tb,
+                           u32 pid, u32 seq, u16 flags)
+{
+	struct nlattr *data[DCB_NUMTCS_ATTR_MAX + 1];
+	int ret = -EINVAL;
+	u8 value;
+	int i;
+
+	if (!tb[DCB_ATTR_NUMTCS] || !netdev->dcbnl_ops->setstate)
+		return ret;
+
+	ret = nla_parse_nested(data, DCB_NUMTCS_ATTR_MAX, tb[DCB_ATTR_NUMTCS],
+	                       dcbnl_numtcs_nest);
+
+	if (ret) {
+		ret = -EINVAL;
+		goto err;
+	}
+
+	for (i = DCB_NUMTCS_ATTR_ALL+1; i <= DCB_NUMTCS_ATTR_MAX; i++) {
+		if (data[i] == NULL)
+			continue;
+
+		value = nla_get_u8(data[i]);
+
+		ret = netdev->dcbnl_ops->setnumtcs(netdev, i, value);
+
+		if (ret)
+			goto operr;
+	}
+
+operr:
+	ret = dcbnl_reply(!!ret, RTM_SETDCB, DCB_CMD_SNUMTCS,
+	                  DCB_ATTR_NUMTCS, pid, seq, flags);
+
+err:
+	return ret;
+}
+
 static int __dcbnl_pg_getcfg(struct net_device *netdev, struct nlattr **tb,
                              u32 pid, u32 seq, u16 flags, int dir)
 {
@@ -757,6 +881,14 @@ static int dcb_doit(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
 		ret = dcbnl_getcap(netdev, tb, pid, nlh->nlmsg_seq,
 		                   nlh->nlmsg_flags);
 		goto out;
+	case DCB_CMD_GNUMTCS:
+		ret = dcbnl_getnumtcs(netdev, tb, pid, nlh->nlmsg_seq,
+		                      nlh->nlmsg_flags);
+		goto out;
+	case DCB_CMD_SNUMTCS:
+		ret = dcbnl_setnumtcs(netdev, tb, pid, nlh->nlmsg_seq,
+		                      nlh->nlmsg_flags);
+		goto out;
 	default:
 		goto errout;
 	}
-- 
cgit v1.2.3


From 0eb3aa9bab20217fb42244ccdcb5bf8a002f504c Mon Sep 17 00:00:00 2001
From: Alexander Duyck <alexander.h.duyck@intel.com>
Date: Thu, 20 Nov 2008 21:09:23 -0800
Subject: DCB: Add interface to query the state of PFC feature.

Adds a netlink interface for Data Center Bridging (DCB) to get and set
the enable state of the Priority Flow Control (PFC) feature.
Primarily, this is a way to turn off PFC in the driver while DCB
remains enabled.

Signed-off-by: Alexander Duyck <alexander.h.duyck@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
Signed-off-by: Peter P Waskiewicz Jr <peter.p.waskiewicz.jr@intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ixgbe/ixgbe_dcb_nl.c | 16 ++++++++++++++-
 include/linux/dcbnl.h            |  2 ++
 include/net/dcbnl.h              |  2 ++
 net/dcb/dcbnl.c                  | 43 ++++++++++++++++++++++++++++++++++++++++
 4 files changed, 62 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/drivers/net/ixgbe/ixgbe_dcb_nl.c b/drivers/net/ixgbe/ixgbe_dcb_nl.c
index 5921795f8403..dd940a8f9357 100644
--- a/drivers/net/ixgbe/ixgbe_dcb_nl.c
+++ b/drivers/net/ixgbe/ixgbe_dcb_nl.c
@@ -405,6 +405,18 @@ static u8 ixgbe_dcbnl_setnumtcs(struct net_device *netdev, int tcid, u8 num)
 	return -EINVAL;
 }
 
+static u8 ixgbe_dcbnl_getpfcstate(struct net_device *netdev)
+{
+	struct ixgbe_adapter *adapter = netdev_priv(netdev);
+
+	return !!(adapter->flags & IXGBE_FLAG_DCB_ENABLED);
+}
+
+static void ixgbe_dcbnl_setpfcstate(struct net_device *netdev, u8 state)
+{
+	return;
+}
+
 struct dcbnl_rtnl_ops dcbnl_ops = {
 	.getstate	= ixgbe_dcbnl_get_state,
 	.setstate	= ixgbe_dcbnl_set_state,
@@ -422,6 +434,8 @@ struct dcbnl_rtnl_ops dcbnl_ops = {
 	.setall		= ixgbe_dcbnl_set_all,
 	.getcap		= ixgbe_dcbnl_getcap,
 	.getnumtcs	= ixgbe_dcbnl_getnumtcs,
-	.setnumtcs	= ixgbe_dcbnl_setnumtcs
+	.setnumtcs	= ixgbe_dcbnl_setnumtcs,
+	.getpfcstate	= ixgbe_dcbnl_getpfcstate,
+	.setpfcstate	= ixgbe_dcbnl_setpfcstate
 };
 
diff --git a/include/linux/dcbnl.h b/include/linux/dcbnl.h
index 1077fba1dadc..6cc4560bc376 100644
--- a/include/linux/dcbnl.h
+++ b/include/linux/dcbnl.h
@@ -66,6 +66,8 @@ enum dcbnl_commands {
 	DCB_CMD_GCAP,
 	DCB_CMD_GNUMTCS,
 	DCB_CMD_SNUMTCS,
+	DCB_CMD_PFC_GSTATE,
+	DCB_CMD_PFC_SSTATE,
 
 	__DCB_CMD_ENUM_MAX,
 	DCB_CMD_MAX = __DCB_CMD_ENUM_MAX - 1,
diff --git a/include/net/dcbnl.h b/include/net/dcbnl.h
index f0a65281ea73..c7d87caf3f99 100644
--- a/include/net/dcbnl.h
+++ b/include/net/dcbnl.h
@@ -42,6 +42,8 @@ struct dcbnl_rtnl_ops {
 	u8   (*getcap)(struct net_device *, int, u8 *);
 	u8   (*getnumtcs)(struct net_device *, int, u8 *);
 	u8   (*setnumtcs)(struct net_device *, int, u8);
+	u8   (*getpfcstate)(struct net_device *);
+	void (*setpfcstate)(struct net_device *, u8);
 };
 
 #endif /* __NET_DCBNL_H__ */
diff --git a/net/dcb/dcbnl.c b/net/dcb/dcbnl.c
index 5ff7e3c0c172..758419c6f59b 100644
--- a/net/dcb/dcbnl.c
+++ b/net/dcb/dcbnl.c
@@ -62,6 +62,7 @@ static struct nla_policy dcbnl_rtnl_policy[DCB_ATTR_MAX + 1] = {
 	[DCB_ATTR_SET_ALL]   = {.type = NLA_U8},
 	[DCB_ATTR_PERM_HWADDR] = {.type = NLA_FLAG},
 	[DCB_ATTR_CAP]       = {.type = NLA_NESTED},
+	[DCB_ATTR_PFC_STATE] = {.type = NLA_U8},
 };
 
 /* DCB priority flow control to User Priority nested attributes */
@@ -471,6 +472,40 @@ err:
 	return ret;
 }
 
+static int dcbnl_getpfcstate(struct net_device *netdev, struct nlattr **tb,
+                             u32 pid, u32 seq, u16 flags)
+{
+	int ret = -EINVAL;
+
+	if (!netdev->dcbnl_ops->getpfcstate)
+		return ret;
+
+	ret = dcbnl_reply(netdev->dcbnl_ops->getpfcstate(netdev), RTM_GETDCB,
+	                  DCB_CMD_PFC_GSTATE, DCB_ATTR_PFC_STATE,
+	                  pid, seq, flags);
+
+	return ret;
+}
+
+static int dcbnl_setpfcstate(struct net_device *netdev, struct nlattr **tb,
+                             u32 pid, u32 seq, u16 flags)
+{
+	int ret = -EINVAL;
+	u8 value;
+
+	if (!tb[DCB_ATTR_PFC_STATE] || !netdev->dcbnl_ops->setpfcstate)
+		return ret;
+
+	value = nla_get_u8(tb[DCB_ATTR_PFC_STATE]);
+
+	netdev->dcbnl_ops->setpfcstate(netdev, value);
+
+	ret = dcbnl_reply(0, RTM_SETDCB, DCB_CMD_PFC_SSTATE, DCB_ATTR_PFC_STATE,
+	                  pid, seq, flags);
+
+	return ret;
+}
+
 static int __dcbnl_pg_getcfg(struct net_device *netdev, struct nlattr **tb,
                              u32 pid, u32 seq, u16 flags, int dir)
 {
@@ -889,6 +924,14 @@ static int dcb_doit(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
 		ret = dcbnl_setnumtcs(netdev, tb, pid, nlh->nlmsg_seq,
 		                      nlh->nlmsg_flags);
 		goto out;
+	case DCB_CMD_PFC_GSTATE:
+		ret = dcbnl_getpfcstate(netdev, tb, pid, nlh->nlmsg_seq,
+		                        nlh->nlmsg_flags);
+		goto out;
+	case DCB_CMD_PFC_SSTATE:
+		ret = dcbnl_setpfcstate(netdev, tb, pid, nlh->nlmsg_seq,
+		                        nlh->nlmsg_flags);
+		goto out;
 	default:
 		goto errout;
 	}
-- 
cgit v1.2.3


From 859ee3c43812051e21816c6d6d4cc04fb7ce9b2e Mon Sep 17 00:00:00 2001
From: Alexander Duyck <alexander.h.duyck@intel.com>
Date: Thu, 20 Nov 2008 21:10:23 -0800
Subject: DCB: Add support for DCB BCN

Adds an interface to configure the Backward Congestion Notification
(BCN) feature.  In a BCN capabale network, congestion notifications
from congested points out in the network can cause the end station
limit the rate of a given traffic flow.

Signed-off-by: Alexander Duyck <alexander.h.duyck@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
Signed-off-by: Peter P Waskiewicz Jr <peter.p.waskiewicz.jr@intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ixgbe/ixgbe_dcb.h    |  27 ++++++
 drivers/net/ixgbe/ixgbe_dcb_nl.c | 176 ++++++++++++++++++++++++++++++++++++++-
 include/linux/dcbnl.h            |  44 +++++++++-
 include/net/dcbnl.h              |   4 +
 net/dcb/dcbnl.c                  | 174 ++++++++++++++++++++++++++++++++++++--
 5 files changed, 416 insertions(+), 9 deletions(-)

(limited to 'include')

diff --git a/drivers/net/ixgbe/ixgbe_dcb.h b/drivers/net/ixgbe/ixgbe_dcb.h
index 62dfd243bedc..75f6efe1e369 100644
--- a/drivers/net/ixgbe/ixgbe_dcb.h
+++ b/drivers/net/ixgbe/ixgbe_dcb.h
@@ -108,7 +108,34 @@ enum dcb_rx_pba_cfg {
 	pba_80_48      /* PBA[0-3] each use 80KB, PBA[4-7] each use 48KB */
 };
 
+/*
+ * This structure contains many values encoded as fixed-point
+ * numbers, meaning that some of bits are dedicated to the
+ * magnitude and others to the fraction part. In the comments
+ * this is shown as f=n, where n is the number of fraction bits.
+ * These fraction bits are always the low-order bits. The size
+ * of the magnitude is not specified.
+ */
+struct bcn_config {
+	u32 rp_admin_mode[MAX_TRAFFIC_CLASS]; /* BCN enabled, per TC */
+	u32 bcna_option[2]; /* BCNA Port + MAC Addr */
+	u32 rp_w;        /* Derivative Weight, f=3 */
+	u32 rp_gi;       /* Increase Gain, f=12 */
+	u32 rp_gd;       /* Decrease Gain, f=12 */
+	u32 rp_ru;       /* Rate Unit */
+	u32 rp_alpha;    /* Max Decrease Factor, f=12 */
+	u32 rp_beta;     /* Max Increase Factor, f=12 */
+	u32 rp_ri;       /* Initial Rate */
+	u32 rp_td;       /* Drift Interval Timer */
+	u32 rp_rd;       /* Drift Increase */
+	u32 rp_tmax;     /* Severe Congestion Backoff Timer Range */
+	u32 rp_rmin;     /* Severe Congestion Restart Rate */
+	u32 rp_wrtt;     /* RTT Moving Average Weight */
+};
+
 struct ixgbe_dcb_config {
+	struct bcn_config bcn;
+
 	struct tc_configuration tc_config[MAX_TRAFFIC_CLASS];
 	u8     bw_percentage[2][MAX_BW_GROUP]; /* One each for Tx/Rx */
 
diff --git a/drivers/net/ixgbe/ixgbe_dcb_nl.c b/drivers/net/ixgbe/ixgbe_dcb_nl.c
index dd940a8f9357..615c2803202a 100644
--- a/drivers/net/ixgbe/ixgbe_dcb_nl.c
+++ b/drivers/net/ixgbe/ixgbe_dcb_nl.c
@@ -34,6 +34,7 @@
 #define BIT_PFC		0x02
 #define BIT_PG_RX	0x04
 #define BIT_PG_TX	0x08
+#define BIT_BCN         0x10
 
 int ixgbe_copy_dcb_cfg(struct ixgbe_dcb_config *src_dcb_cfg,
                        struct ixgbe_dcb_config *dst_dcb_cfg, int tc_max)
@@ -88,6 +89,23 @@ int ixgbe_copy_dcb_cfg(struct ixgbe_dcb_config *src_dcb_cfg,
 			src_dcb_cfg->tc_config[i - DCB_PFC_UP_ATTR_0].dcb_pfc;
 	}
 
+	for (i = DCB_BCN_ATTR_RP_0; i < DCB_BCN_ATTR_RP_ALL; i++) {
+		dst_dcb_cfg->bcn.rp_admin_mode[i - DCB_BCN_ATTR_RP_0] =
+			src_dcb_cfg->bcn.rp_admin_mode[i - DCB_BCN_ATTR_RP_0];
+	}
+	dst_dcb_cfg->bcn.rp_alpha = src_dcb_cfg->bcn.rp_alpha;
+	dst_dcb_cfg->bcn.rp_beta = src_dcb_cfg->bcn.rp_beta;
+	dst_dcb_cfg->bcn.rp_gd = src_dcb_cfg->bcn.rp_gd;
+	dst_dcb_cfg->bcn.rp_gi = src_dcb_cfg->bcn.rp_gi;
+	dst_dcb_cfg->bcn.rp_tmax = src_dcb_cfg->bcn.rp_tmax;
+	dst_dcb_cfg->bcn.rp_td = src_dcb_cfg->bcn.rp_td;
+	dst_dcb_cfg->bcn.rp_rmin = src_dcb_cfg->bcn.rp_rmin;
+	dst_dcb_cfg->bcn.rp_w = src_dcb_cfg->bcn.rp_w;
+	dst_dcb_cfg->bcn.rp_rd = src_dcb_cfg->bcn.rp_rd;
+	dst_dcb_cfg->bcn.rp_ru = src_dcb_cfg->bcn.rp_ru;
+	dst_dcb_cfg->bcn.rp_wrtt = src_dcb_cfg->bcn.rp_wrtt;
+	dst_dcb_cfg->bcn.rp_ri = src_dcb_cfg->bcn.rp_ri;
+
 	return 0;
 }
 
@@ -313,6 +331,7 @@ static u8 ixgbe_dcbnl_set_all(struct net_device *netdev)
 	struct ixgbe_adapter *adapter = netdev_priv(netdev);
 	int ret;
 
+	adapter->dcb_set_bitmap &= ~BIT_BCN;	/* no set for BCN */
 	if (!adapter->dcb_set_bitmap)
 		return 1;
 
@@ -417,6 +436,157 @@ static void ixgbe_dcbnl_setpfcstate(struct net_device *netdev, u8 state)
 	return;
 }
 
+static void ixgbe_dcbnl_getbcnrp(struct net_device *netdev, int priority,
+				  u8 *setting)
+{
+	struct ixgbe_adapter *adapter = netdev_priv(netdev);
+
+	*setting = adapter->dcb_cfg.bcn.rp_admin_mode[priority];
+}
+
+
+static void ixgbe_dcbnl_getbcncfg(struct net_device *netdev, int enum_index,
+				  u32 *setting)
+{
+	struct ixgbe_adapter *adapter = netdev_priv(netdev);
+
+	switch (enum_index) {
+	case DCB_BCN_ATTR_ALPHA:
+		*setting = adapter->dcb_cfg.bcn.rp_alpha;
+		break;
+	case DCB_BCN_ATTR_BETA:
+		*setting = adapter->dcb_cfg.bcn.rp_beta;
+		break;
+	case DCB_BCN_ATTR_GD:
+		*setting = adapter->dcb_cfg.bcn.rp_gd;
+		break;
+	case DCB_BCN_ATTR_GI:
+		*setting = adapter->dcb_cfg.bcn.rp_gi;
+		break;
+	case DCB_BCN_ATTR_TMAX:
+		*setting = adapter->dcb_cfg.bcn.rp_tmax;
+		break;
+	case DCB_BCN_ATTR_TD:
+		*setting = adapter->dcb_cfg.bcn.rp_td;
+		break;
+	case DCB_BCN_ATTR_RMIN:
+		*setting = adapter->dcb_cfg.bcn.rp_rmin;
+		break;
+	case DCB_BCN_ATTR_W:
+		*setting = adapter->dcb_cfg.bcn.rp_w;
+		break;
+	case DCB_BCN_ATTR_RD:
+		*setting = adapter->dcb_cfg.bcn.rp_rd;
+		break;
+	case DCB_BCN_ATTR_RU:
+		*setting = adapter->dcb_cfg.bcn.rp_ru;
+		break;
+	case DCB_BCN_ATTR_WRTT:
+		*setting = adapter->dcb_cfg.bcn.rp_wrtt;
+		break;
+	case DCB_BCN_ATTR_RI:
+		*setting = adapter->dcb_cfg.bcn.rp_ri;
+		break;
+	default:
+		*setting = -1;
+	}
+}
+
+static void ixgbe_dcbnl_setbcnrp(struct net_device *netdev, int priority,
+				 u8 setting)
+{
+	struct ixgbe_adapter *adapter = netdev_priv(netdev);
+
+	adapter->temp_dcb_cfg.bcn.rp_admin_mode[priority] = setting;
+
+	if (adapter->temp_dcb_cfg.bcn.rp_admin_mode[priority] !=
+	    adapter->dcb_cfg.bcn.rp_admin_mode[priority])
+		adapter->dcb_set_bitmap |= BIT_BCN;
+}
+
+static void ixgbe_dcbnl_setbcncfg(struct net_device *netdev, int enum_index,
+				 u32 setting)
+{
+	struct ixgbe_adapter *adapter = netdev_priv(netdev);
+
+	switch (enum_index) {
+	case DCB_BCN_ATTR_ALPHA:
+		adapter->temp_dcb_cfg.bcn.rp_alpha = setting;
+		if (adapter->temp_dcb_cfg.bcn.rp_alpha !=
+		    adapter->dcb_cfg.bcn.rp_alpha)
+			adapter->dcb_set_bitmap |= BIT_BCN;
+		break;
+	case DCB_BCN_ATTR_BETA:
+		adapter->temp_dcb_cfg.bcn.rp_beta = setting;
+		if (adapter->temp_dcb_cfg.bcn.rp_beta !=
+		    adapter->dcb_cfg.bcn.rp_beta)
+			adapter->dcb_set_bitmap |= BIT_BCN;
+		break;
+	case DCB_BCN_ATTR_GD:
+		adapter->temp_dcb_cfg.bcn.rp_gd = setting;
+		if (adapter->temp_dcb_cfg.bcn.rp_gd !=
+		    adapter->dcb_cfg.bcn.rp_gd)
+			adapter->dcb_set_bitmap |= BIT_BCN;
+		break;
+	case DCB_BCN_ATTR_GI:
+		adapter->temp_dcb_cfg.bcn.rp_gi = setting;
+		if (adapter->temp_dcb_cfg.bcn.rp_gi !=
+		    adapter->dcb_cfg.bcn.rp_gi)
+			adapter->dcb_set_bitmap |= BIT_BCN;
+		break;
+	case DCB_BCN_ATTR_TMAX:
+		adapter->temp_dcb_cfg.bcn.rp_tmax = setting;
+		if (adapter->temp_dcb_cfg.bcn.rp_tmax !=
+		    adapter->dcb_cfg.bcn.rp_tmax)
+			adapter->dcb_set_bitmap |= BIT_BCN;
+		break;
+	case DCB_BCN_ATTR_TD:
+		adapter->temp_dcb_cfg.bcn.rp_td = setting;
+		if (adapter->temp_dcb_cfg.bcn.rp_td !=
+		    adapter->dcb_cfg.bcn.rp_td)
+			adapter->dcb_set_bitmap |= BIT_BCN;
+		break;
+	case DCB_BCN_ATTR_RMIN:
+		adapter->temp_dcb_cfg.bcn.rp_rmin = setting;
+		if (adapter->temp_dcb_cfg.bcn.rp_rmin !=
+		    adapter->dcb_cfg.bcn.rp_rmin)
+			adapter->dcb_set_bitmap |= BIT_BCN;
+		break;
+	case DCB_BCN_ATTR_W:
+		adapter->temp_dcb_cfg.bcn.rp_w = setting;
+		if (adapter->temp_dcb_cfg.bcn.rp_w !=
+		    adapter->dcb_cfg.bcn.rp_w)
+			adapter->dcb_set_bitmap |= BIT_BCN;
+		break;
+	case DCB_BCN_ATTR_RD:
+		adapter->temp_dcb_cfg.bcn.rp_rd = setting;
+		if (adapter->temp_dcb_cfg.bcn.rp_rd !=
+		    adapter->dcb_cfg.bcn.rp_rd)
+			adapter->dcb_set_bitmap |= BIT_BCN;
+		break;
+	case DCB_BCN_ATTR_RU:
+		adapter->temp_dcb_cfg.bcn.rp_ru = setting;
+		if (adapter->temp_dcb_cfg.bcn.rp_ru !=
+		    adapter->dcb_cfg.bcn.rp_ru)
+			adapter->dcb_set_bitmap |= BIT_BCN;
+		break;
+	case DCB_BCN_ATTR_WRTT:
+		adapter->temp_dcb_cfg.bcn.rp_wrtt = setting;
+		if (adapter->temp_dcb_cfg.bcn.rp_wrtt !=
+		    adapter->dcb_cfg.bcn.rp_wrtt)
+			adapter->dcb_set_bitmap |= BIT_BCN;
+		break;
+	case DCB_BCN_ATTR_RI:
+		adapter->temp_dcb_cfg.bcn.rp_ri = setting;
+		if (adapter->temp_dcb_cfg.bcn.rp_ri !=
+		    adapter->dcb_cfg.bcn.rp_ri)
+			adapter->dcb_set_bitmap |= BIT_BCN;
+		break;
+	default:
+		break;
+	}
+}
+
 struct dcbnl_rtnl_ops dcbnl_ops = {
 	.getstate	= ixgbe_dcbnl_get_state,
 	.setstate	= ixgbe_dcbnl_set_state,
@@ -436,6 +606,10 @@ struct dcbnl_rtnl_ops dcbnl_ops = {
 	.getnumtcs	= ixgbe_dcbnl_getnumtcs,
 	.setnumtcs	= ixgbe_dcbnl_setnumtcs,
 	.getpfcstate	= ixgbe_dcbnl_getpfcstate,
-	.setpfcstate	= ixgbe_dcbnl_setpfcstate
+	.setpfcstate	= ixgbe_dcbnl_setpfcstate,
+	.getbcncfg      = ixgbe_dcbnl_getbcncfg,
+	.getbcnrp       = ixgbe_dcbnl_getbcnrp,
+	.setbcncfg      = ixgbe_dcbnl_setbcncfg,
+	.setbcnrp       = ixgbe_dcbnl_setbcnrp
 };
 
diff --git a/include/linux/dcbnl.h b/include/linux/dcbnl.h
index 6cc4560bc376..e73a61449ad6 100644
--- a/include/linux/dcbnl.h
+++ b/include/linux/dcbnl.h
@@ -46,6 +46,8 @@ struct dcbmsg {
  * @DCB_CMD_GCAP: request the DCB capabilities of the device
  * @DCB_CMD_GNUMTCS: get the number of traffic classes currently supported
  * @DCB_CMD_SNUMTCS: set the number of traffic classes
+ * @DCB_CMD_GBCN: set backward congestion notification configuration
+ * @DCB_CMD_SBCN: get backward congestion notification configration.
  */
 enum dcbnl_commands {
 	DCB_CMD_UNDEFINED,
@@ -62,18 +64,24 @@ enum dcbnl_commands {
 	DCB_CMD_PFC_SCFG,
 
 	DCB_CMD_SET_ALL,
+
 	DCB_CMD_GPERM_HWADDR,
+
 	DCB_CMD_GCAP,
+
 	DCB_CMD_GNUMTCS,
 	DCB_CMD_SNUMTCS,
+
 	DCB_CMD_PFC_GSTATE,
 	DCB_CMD_PFC_SSTATE,
 
+	DCB_CMD_BCN_GCFG,
+	DCB_CMD_BCN_SCFG,
+
 	__DCB_CMD_ENUM_MAX,
 	DCB_CMD_MAX = __DCB_CMD_ENUM_MAX - 1,
 };
 
-
 /**
  * enum dcbnl_attrs - DCB top-level netlink attributes
  *
@@ -88,6 +96,7 @@ enum dcbnl_commands {
  * @DCB_ATTR_PERM_HWADDR: MAC address of the physical device (NLA_NESTED)
  * @DCB_ATTR_CAP: DCB capabilities of the device (NLA_NESTED)
  * @DCB_ATTR_NUMTCS: number of traffic classes supported (NLA_NESTED)
+ * @DCB_ATTR_BCN: backward congestion notification configuration (NLA_NESTED)
  */
 enum dcbnl_attrs {
 	DCB_ATTR_UNDEFINED,
@@ -102,6 +111,7 @@ enum dcbnl_attrs {
 	DCB_ATTR_PERM_HWADDR,
 	DCB_ATTR_CAP,
 	DCB_ATTR_NUMTCS,
+	DCB_ATTR_BCN,
 
 	__DCB_ATTR_ENUM_MAX,
 	DCB_ATTR_MAX = __DCB_ATTR_ENUM_MAX - 1,
@@ -282,6 +292,38 @@ enum dcbnl_numtcs_attrs {
 	DCB_NUMTCS_ATTR_MAX = __DCB_NUMTCS_ATTR_ENUM_MAX - 1,
 };
 
+enum dcbnl_bcn_attrs{
+	DCB_BCN_ATTR_UNDEFINED = 0,
+
+	DCB_BCN_ATTR_RP_0,
+	DCB_BCN_ATTR_RP_1,
+	DCB_BCN_ATTR_RP_2,
+	DCB_BCN_ATTR_RP_3,
+	DCB_BCN_ATTR_RP_4,
+	DCB_BCN_ATTR_RP_5,
+	DCB_BCN_ATTR_RP_6,
+	DCB_BCN_ATTR_RP_7,
+	DCB_BCN_ATTR_RP_ALL,
+
+	DCB_BCN_ATTR_ALPHA,
+	DCB_BCN_ATTR_BETA,
+	DCB_BCN_ATTR_GD,
+	DCB_BCN_ATTR_GI,
+	DCB_BCN_ATTR_TMAX,
+	DCB_BCN_ATTR_TD,
+	DCB_BCN_ATTR_RMIN,
+	DCB_BCN_ATTR_W,
+	DCB_BCN_ATTR_RD,
+	DCB_BCN_ATTR_RU,
+	DCB_BCN_ATTR_WRTT,
+	DCB_BCN_ATTR_RI,
+	DCB_BCN_ATTR_C,
+	DCB_BCN_ATTR_ALL,
+
+	__DCB_BCN_ATTR_ENUM_MAX,
+	DCB_BCN_ATTR_MAX = __DCB_BCN_ATTR_ENUM_MAX - 1,
+};
+
 /**
  * enum dcb_general_attr_values - general DCB attribute values
  *
diff --git a/include/net/dcbnl.h b/include/net/dcbnl.h
index c7d87caf3f99..91e0a3d7faf2 100644
--- a/include/net/dcbnl.h
+++ b/include/net/dcbnl.h
@@ -44,6 +44,10 @@ struct dcbnl_rtnl_ops {
 	u8   (*setnumtcs)(struct net_device *, int, u8);
 	u8   (*getpfcstate)(struct net_device *);
 	void (*setpfcstate)(struct net_device *, u8);
+	void (*getbcncfg)(struct net_device *, int, u32 *);
+	void (*setbcncfg)(struct net_device *, int, u32);
+	void (*getbcnrp)(struct net_device *, int, u8 *);
+	void (*setbcnrp)(struct net_device *, int, u8);
 };
 
 #endif /* __NET_DCBNL_H__ */
diff --git a/net/dcb/dcbnl.c b/net/dcb/dcbnl.c
index 758419c6f59b..b2bda3f610df 100644
--- a/net/dcb/dcbnl.c
+++ b/net/dcb/dcbnl.c
@@ -55,14 +55,15 @@ MODULE_LICENSE("GPL");
 
 /* DCB netlink attributes policy */
 static struct nla_policy dcbnl_rtnl_policy[DCB_ATTR_MAX + 1] = {
-	[DCB_ATTR_IFNAME]    = {.type = NLA_STRING, .len = IFNAMSIZ - 1},
-	[DCB_ATTR_STATE]     = {.type = NLA_U8},
-	[DCB_ATTR_PFC_CFG]   = {.type = NLA_NESTED},
-	[DCB_ATTR_PG_CFG]    = {.type = NLA_NESTED},
-	[DCB_ATTR_SET_ALL]   = {.type = NLA_U8},
+	[DCB_ATTR_IFNAME]      = {.type = NLA_NUL_STRING, .len = IFNAMSIZ - 1},
+	[DCB_ATTR_STATE]       = {.type = NLA_U8},
+	[DCB_ATTR_PFC_CFG]     = {.type = NLA_NESTED},
+	[DCB_ATTR_PG_CFG]      = {.type = NLA_NESTED},
+	[DCB_ATTR_SET_ALL]     = {.type = NLA_U8},
 	[DCB_ATTR_PERM_HWADDR] = {.type = NLA_FLAG},
-	[DCB_ATTR_CAP]       = {.type = NLA_NESTED},
-	[DCB_ATTR_PFC_STATE] = {.type = NLA_U8},
+	[DCB_ATTR_CAP]         = {.type = NLA_NESTED},
+	[DCB_ATTR_PFC_STATE]   = {.type = NLA_U8},
+	[DCB_ATTR_BCN]         = {.type = NLA_NESTED},
 };
 
 /* DCB priority flow control to User Priority nested attributes */
@@ -128,6 +129,33 @@ static struct nla_policy dcbnl_numtcs_nest[DCB_NUMTCS_ATTR_MAX + 1] = {
 	[DCB_NUMTCS_ATTR_PFC]     = {.type = NLA_U8},
 };
 
+/* DCB BCN nested attributes. */
+static struct nla_policy dcbnl_bcn_nest[DCB_BCN_ATTR_MAX + 1] = {
+	[DCB_BCN_ATTR_RP_0]         = {.type = NLA_U8},
+	[DCB_BCN_ATTR_RP_1]         = {.type = NLA_U8},
+	[DCB_BCN_ATTR_RP_2]         = {.type = NLA_U8},
+	[DCB_BCN_ATTR_RP_3]         = {.type = NLA_U8},
+	[DCB_BCN_ATTR_RP_4]         = {.type = NLA_U8},
+	[DCB_BCN_ATTR_RP_5]         = {.type = NLA_U8},
+	[DCB_BCN_ATTR_RP_6]         = {.type = NLA_U8},
+	[DCB_BCN_ATTR_RP_7]         = {.type = NLA_U8},
+	[DCB_BCN_ATTR_RP_ALL]       = {.type = NLA_FLAG},
+	[DCB_BCN_ATTR_ALPHA]        = {.type = NLA_U32},
+	[DCB_BCN_ATTR_BETA]         = {.type = NLA_U32},
+	[DCB_BCN_ATTR_GD]           = {.type = NLA_U32},
+	[DCB_BCN_ATTR_GI]           = {.type = NLA_U32},
+	[DCB_BCN_ATTR_TMAX]         = {.type = NLA_U32},
+	[DCB_BCN_ATTR_TD]           = {.type = NLA_U32},
+	[DCB_BCN_ATTR_RMIN]         = {.type = NLA_U32},
+	[DCB_BCN_ATTR_W]            = {.type = NLA_U32},
+	[DCB_BCN_ATTR_RD]           = {.type = NLA_U32},
+	[DCB_BCN_ATTR_RU]           = {.type = NLA_U32},
+	[DCB_BCN_ATTR_WRTT]         = {.type = NLA_U32},
+	[DCB_BCN_ATTR_RI]           = {.type = NLA_U32},
+	[DCB_BCN_ATTR_C]            = {.type = NLA_U32},
+	[DCB_BCN_ATTR_ALL]          = {.type = NLA_FLAG},
+};
+
 /* standard netlink reply call */
 static int dcbnl_reply(u8 value, u8 event, u8 cmd, u8 attr, u32 pid,
                        u32 seq, u16 flags)
@@ -843,6 +871,130 @@ static int dcbnl_pgrx_setcfg(struct net_device *netdev, struct nlattr **tb,
 	return __dcbnl_pg_setcfg(netdev, tb, pid, seq, flags, 1);
 }
 
+static int dcbnl_bcn_getcfg(struct net_device *netdev, struct nlattr **tb,
+                            u32 pid, u32 seq, u16 flags)
+{
+	struct sk_buff *dcbnl_skb;
+	struct nlmsghdr *nlh;
+	struct dcbmsg *dcb;
+	struct nlattr *bcn_nest;
+	struct nlattr *bcn_tb[DCB_BCN_ATTR_MAX + 1];
+	u8 value_byte;
+	u32 value_integer;
+	int ret  = -EINVAL;
+	bool getall = false;
+	int i;
+
+	if (!tb[DCB_ATTR_BCN] || !netdev->dcbnl_ops->getbcnrp ||
+	    !netdev->dcbnl_ops->getbcncfg)
+		return ret;
+
+	ret = nla_parse_nested(bcn_tb, DCB_BCN_ATTR_MAX,
+	                       tb[DCB_ATTR_BCN], dcbnl_bcn_nest);
+
+	if (ret)
+		goto err_out;
+
+	dcbnl_skb = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
+	if (!dcbnl_skb)
+		goto err_out;
+
+	nlh = NLMSG_NEW(dcbnl_skb, pid, seq, RTM_GETDCB, sizeof(*dcb), flags);
+
+	dcb = NLMSG_DATA(nlh);
+	dcb->dcb_family = AF_UNSPEC;
+	dcb->cmd = DCB_CMD_BCN_GCFG;
+
+	bcn_nest = nla_nest_start(dcbnl_skb, DCB_ATTR_BCN);
+	if (!bcn_nest)
+		goto err;
+
+	if (bcn_tb[DCB_BCN_ATTR_ALL])
+		getall = true;
+
+	for (i = DCB_BCN_ATTR_RP_0; i <= DCB_BCN_ATTR_RP_7; i++) {
+		if (!getall && !bcn_tb[i])
+			continue;
+
+		netdev->dcbnl_ops->getbcnrp(netdev, i - DCB_BCN_ATTR_RP_0,
+		                            &value_byte);
+		ret = nla_put_u8(dcbnl_skb, i, value_byte);
+		if (ret)
+			goto err_bcn;
+	}
+
+	for (i = DCB_BCN_ATTR_ALPHA; i <= DCB_BCN_ATTR_RI; i++) {
+		if (!getall && !bcn_tb[i])
+			continue;
+
+		netdev->dcbnl_ops->getbcncfg(netdev, i,
+		                             &value_integer);
+		ret = nla_put_u32(dcbnl_skb, i, value_integer);
+		if (ret)
+			goto err_bcn;
+	}
+
+	nla_nest_end(dcbnl_skb, bcn_nest);
+
+	nlmsg_end(dcbnl_skb, nlh);
+
+	ret = rtnl_unicast(dcbnl_skb, &init_net, pid);
+	if (ret)
+		goto err;
+
+	return 0;
+
+err_bcn:
+	nla_nest_cancel(dcbnl_skb, bcn_nest);
+nlmsg_failure:
+err:
+	kfree(dcbnl_skb);
+err_out:
+	ret  = -EINVAL;
+	return ret;
+}
+
+static int dcbnl_bcn_setcfg(struct net_device *netdev, struct nlattr **tb,
+                            u32 pid, u32 seq, u16 flags)
+{
+	struct nlattr *data[DCB_BCN_ATTR_MAX + 1];
+	int i;
+	int ret = -EINVAL;
+	u8 value_byte;
+	u32 value_int;
+
+	if (!tb[DCB_ATTR_BCN] || !netdev->dcbnl_ops->setbcncfg
+	    || !netdev->dcbnl_ops->setbcnrp)
+		return ret;
+
+	ret = nla_parse_nested(data, DCB_BCN_ATTR_MAX,
+	                       tb[DCB_ATTR_BCN],
+	                       dcbnl_pfc_up_nest);
+	if (ret)
+		goto err;
+
+	for (i = DCB_BCN_ATTR_RP_0; i <= DCB_BCN_ATTR_RP_7; i++) {
+		if (data[i] == NULL)
+			continue;
+		value_byte = nla_get_u8(data[i]);
+		netdev->dcbnl_ops->setbcnrp(netdev,
+			data[i]->nla_type - DCB_BCN_ATTR_RP_0, value_byte);
+	}
+
+	for (i = DCB_BCN_ATTR_ALPHA; i <= DCB_BCN_ATTR_RI; i++) {
+		if (data[i] == NULL)
+			continue;
+		value_int = nla_get_u32(data[i]);
+		netdev->dcbnl_ops->setbcncfg(netdev,
+	                                     i, value_int);
+	}
+
+	ret = dcbnl_reply(0, RTM_SETDCB, DCB_CMD_BCN_SCFG, DCB_ATTR_BCN,
+	                  pid, seq, flags);
+err:
+	return ret;
+}
+
 static int dcb_doit(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
 {
 	struct net *net = sock_net(skb->sk);
@@ -891,6 +1043,10 @@ static int dcb_doit(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
 		ret = dcbnl_pgrx_getcfg(netdev, tb, pid, nlh->nlmsg_seq,
 		                        nlh->nlmsg_flags);
 		goto out;
+	case DCB_CMD_BCN_GCFG:
+		ret = dcbnl_bcn_getcfg(netdev, tb, pid, nlh->nlmsg_seq,
+		                       nlh->nlmsg_flags);
+		goto out;
 	case DCB_CMD_SSTATE:
 		ret = dcbnl_setstate(netdev, tb, pid, nlh->nlmsg_seq,
 		                     nlh->nlmsg_flags);
@@ -932,6 +1088,10 @@ static int dcb_doit(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
 		ret = dcbnl_setpfcstate(netdev, tb, pid, nlh->nlmsg_seq,
 		                        nlh->nlmsg_flags);
 		goto out;
+	case DCB_CMD_BCN_SCFG:
+		ret = dcbnl_bcn_setcfg(netdev, tb, pid, nlh->nlmsg_seq,
+		                       nlh->nlmsg_flags);
+		goto out;
 	default:
 		goto errout;
 	}
-- 
cgit v1.2.3


From 875065491fba8eb13219f16c36e79a6fb4e15c68 Mon Sep 17 00:00:00 2001
From: Mark Brown <broonie@opensource.wolfsonmicro.com>
Date: Tue, 18 Nov 2008 20:50:34 +0000
Subject: ASoC: Rename snd_soc_card to snd_soc_machine

One of the issues with the ASoC v1 API which has been addressed in the
ASoC v2 work that Liam Girdwood has done is that the ALSA card provided
by ASoC is distributed around the ASoC structures. For example, machine
wide data such as the struct snd_card are maintained as part of the
CODEC data structure, preventing the use of multiple codecs. This has
been addressed by refactoring the data structures so that all the data
for the ALSA card is contained in a single structure snd_soc_card which
replaces the existing snd_soc_machine and snd_soc_device.

Begin the process of backporting this by renaming struct snd_soc_machine
to struct snd_soc_card, better reflecting its function and bringing it
closer to standard ALSA terminology.

Signed-off-by: Mark Brown <broonie@opensource.wolfsonmicro.com>
---
 Documentation/sound/alsa/soc/machine.txt |  8 +--
 include/sound/soc.h                      |  8 +--
 sound/soc/atmel/playpaq_wm8510.c         |  4 +-
 sound/soc/atmel/sam9g20_wm8731.c         |  4 +-
 sound/soc/au1x/sample-ac97.c             |  4 +-
 sound/soc/blackfin/bf5xx-ad1980.c        |  6 +--
 sound/soc/blackfin/bf5xx-ad73311.c       |  6 +--
 sound/soc/blackfin/bf5xx-ssm2602.c       |  6 +--
 sound/soc/davinci/davinci-evm.c          |  4 +-
 sound/soc/davinci/davinci-i2s.c          |  8 +--
 sound/soc/davinci/davinci-sffsdr.c       |  4 +-
 sound/soc/fsl/fsl_dma.c                  |  2 +-
 sound/soc/fsl/mpc8610_hpcd.c             |  6 +--
 sound/soc/fsl/soc-of-simple.c            | 10 ++--
 sound/soc/omap/n810.c                    |  4 +-
 sound/soc/omap/omap2evm.c                |  4 +-
 sound/soc/omap/omap3beagle.c             |  4 +-
 sound/soc/omap/osk5912.c                 |  4 +-
 sound/soc/omap/overo.c                   |  4 +-
 sound/soc/pxa/corgi.c                    |  4 +-
 sound/soc/pxa/e800_wm9712.c              |  6 +--
 sound/soc/pxa/em-x270.c                  |  4 +-
 sound/soc/pxa/palm27x.c                  |  4 +-
 sound/soc/pxa/poodle.c                   |  4 +-
 sound/soc/pxa/spitz.c                    |  4 +-
 sound/soc/pxa/tosa.c                     |  6 +--
 sound/soc/s3c24xx/ln2440sbc_alc650.c     |  6 +--
 sound/soc/s3c24xx/neo1973_wm8753.c       |  6 +--
 sound/soc/s3c24xx/s3c24xx_uda134x.c      |  4 +-
 sound/soc/s3c24xx/smdk2443_wm9710.c      |  6 +--
 sound/soc/sh/sh7760-ac97.c               |  4 +-
 sound/soc/soc-core.c                     | 90 ++++++++++++++++----------------
 sound/soc/soc-dapm.c                     |  6 +--
 33 files changed, 127 insertions(+), 127 deletions(-)

(limited to 'include')

diff --git a/Documentation/sound/alsa/soc/machine.txt b/Documentation/sound/alsa/soc/machine.txt
index f370e7db86af..4a9f51e2905c 100644
--- a/Documentation/sound/alsa/soc/machine.txt
+++ b/Documentation/sound/alsa/soc/machine.txt
@@ -9,7 +9,7 @@ the audio subsystem with the kernel as a platform device and is represented by
 the following struct:-
 
 /* SoC machine */
-struct snd_soc_machine {
+struct snd_soc_card {_
 	char *name;
 
 	int (*probe)(struct platform_device *pdev);
@@ -67,10 +67,10 @@ static struct snd_soc_dai_link corgi_dai = {
 	.ops = &corgi_ops,
 };
 
-struct snd_soc_machine then sets up the machine with it's DAIs. e.g.
+struct snd_soc_card then sets up the machine with it's DAIs. e.g.
 
 /* corgi audio machine driver */
-static struct snd_soc_machine snd_soc_machine_corgi = {
+static struct snd_soc_card snd_soc_corgi = {
 	.name = "Corgi",
 	.dai_link = &corgi_dai,
 	.num_links = 1,
@@ -90,7 +90,7 @@ static struct wm8731_setup_data corgi_wm8731_setup = {
 
 /* corgi audio subsystem */
 static struct snd_soc_device corgi_snd_devdata = {
-	.machine = &snd_soc_machine_corgi,
+	.machine = &snd_soc_corgi,
 	.platform = &pxa2xx_soc_platform,
 	.codec_dev = &soc_codec_dev_wm8731,
 	.codec_data = &corgi_wm8731_setup,
diff --git a/include/sound/soc.h b/include/sound/soc.h
index 077dfe4e51f0..3be17b3c650c 100644
--- a/include/sound/soc.h
+++ b/include/sound/soc.h
@@ -482,8 +482,8 @@ struct snd_soc_dai_link  {
 	struct snd_pcm *pcm;
 };
 
-/* SoC machine */
-struct snd_soc_machine {
+/* SoC card */
+struct snd_soc_card {
 	char *name;
 
 	int (*probe)(struct platform_device *pdev);
@@ -497,7 +497,7 @@ struct snd_soc_machine {
 	int (*resume_post)(struct platform_device *pdev);
 
 	/* callbacks */
-	int (*set_bias_level)(struct snd_soc_machine *,
+	int (*set_bias_level)(struct snd_soc_card *,
 			      enum snd_soc_bias_level level);
 
 	/* CPU <--> Codec DAI links  */
@@ -508,7 +508,7 @@ struct snd_soc_machine {
 /* SoC Device - the audio subsystem */
 struct snd_soc_device {
 	struct device *dev;
-	struct snd_soc_machine *machine;
+	struct snd_soc_card *card;
 	struct snd_soc_platform *platform;
 	struct snd_soc_codec *codec;
 	struct snd_soc_codec_device *codec_dev;
diff --git a/sound/soc/atmel/playpaq_wm8510.c b/sound/soc/atmel/playpaq_wm8510.c
index ea7935d2a66d..d40b5a52a8d2 100644
--- a/sound/soc/atmel/playpaq_wm8510.c
+++ b/sound/soc/atmel/playpaq_wm8510.c
@@ -361,7 +361,7 @@ static struct snd_soc_dai_link playpaq_wm8510_dai = {
 
 
-static struct snd_soc_machine snd_soc_machine_playpaq = {
+static struct snd_soc_card snd_soc_playpaq = {
 	.name = "LRS_PlayPaq_WM8510",
 	.dai_link = &playpaq_wm8510_dai,
 	.num_links = 1,
@@ -377,7 +377,7 @@ static struct wm8510_setup_data playpaq_wm8510_setup = {
 
 
 static struct snd_soc_device playpaq_wm8510_snd_devdata = {
-	.machine = &snd_soc_machine_playpaq,
+	.card = &snd_soc_playpaq,
 	.platform = &at32_soc_platform,
 	.codec_dev = &soc_codec_dev_wm8510,
 	.codec_data = &playpaq_wm8510_setup,
diff --git a/sound/soc/atmel/sam9g20_wm8731.c b/sound/soc/atmel/sam9g20_wm8731.c
index 710addcc66b3..fdc1d0206e0b 100644
--- a/sound/soc/atmel/sam9g20_wm8731.c
+++ b/sound/soc/atmel/sam9g20_wm8731.c
@@ -242,7 +242,7 @@ static struct snd_soc_dai_link at91sam9g20ek_dai = {
 	.ops = &at91sam9g20ek_ops,
 };
 
-static struct snd_soc_machine snd_soc_machine_at91sam9g20ek = {
+static struct snd_soc_card snd_soc_at91sam9g20ek = {
 	.name = "WM8731",
 	.dai_link = &at91sam9g20ek_dai,
 	.num_links = 1,
@@ -254,7 +254,7 @@ static struct wm8731_setup_data at91sam9g20ek_wm8731_setup = {
 };
 
 static struct snd_soc_device at91sam9g20ek_snd_devdata = {
-	.machine = &snd_soc_machine_at91sam9g20ek,
+	.card = &snd_soc_at91sam9g20ek,
 	.platform = &atmel_soc_platform,
 	.codec_dev = &soc_codec_dev_wm8731,
 	.codec_data = &at91sam9g20ek_wm8731_setup,
diff --git a/sound/soc/au1x/sample-ac97.c b/sound/soc/au1x/sample-ac97.c
index f75ae7f62c3d..27683eb7905e 100644
--- a/sound/soc/au1x/sample-ac97.c
+++ b/sound/soc/au1x/sample-ac97.c
@@ -42,14 +42,14 @@ static struct snd_soc_dai_link au1xpsc_sample_ac97_dai = {
 	.ops		= NULL,
 };
 
-static struct snd_soc_machine au1xpsc_sample_ac97_machine = {
+static struct snd_soc_card au1xpsc_sample_ac97_machine = {
 	.name		= "Au1xxx PSC AC97 Audio",
 	.dai_link	= &au1xpsc_sample_ac97_dai,
 	.num_links	= 1,
 };
 
 static struct snd_soc_device au1xpsc_sample_ac97_devdata = {
-	.machine	= &au1xpsc_sample_ac97_machine,
+	.card		= &au1xpsc_sample_ac97_machine,
 	.platform	= &au1xpsc_soc_platform, /* see dbdma2.c */
 	.codec_dev	= &soc_codec_dev_ac97,
 };
diff --git a/sound/soc/blackfin/bf5xx-ad1980.c b/sound/soc/blackfin/bf5xx-ad1980.c
index 124425d22320..36c569a43ce1 100644
--- a/sound/soc/blackfin/bf5xx-ad1980.c
+++ b/sound/soc/blackfin/bf5xx-ad1980.c
@@ -43,7 +43,7 @@
 #include "bf5xx-ac97-pcm.h"
 #include "bf5xx-ac97.h"
 
-static struct snd_soc_machine bf5xx_board;
+static struct snd_soc_card bf5xx_board;
 
 static int bf5xx_board_startup(struct snd_pcm_substream *substream)
 {
@@ -67,14 +67,14 @@ static struct snd_soc_dai_link bf5xx_board_dai = {
 	.ops = &bf5xx_board_ops,
 };
 
-static struct snd_soc_machine bf5xx_board = {
+static struct snd_soc_card bf5xx_board = {
 	.name = "bf5xx-board",
 	.dai_link = &bf5xx_board_dai,
 	.num_links = 1,
 };
 
 static struct snd_soc_device bf5xx_board_snd_devdata = {
-	.machine = &bf5xx_board,
+	.card = &bf5xx_board,
 	.platform = &bf5xx_ac97_soc_platform,
 	.codec_dev = &soc_codec_dev_ad1980,
 };
diff --git a/sound/soc/blackfin/bf5xx-ad73311.c b/sound/soc/blackfin/bf5xx-ad73311.c
index 47da49b9aeac..57da14799375 100644
--- a/sound/soc/blackfin/bf5xx-ad73311.c
+++ b/sound/soc/blackfin/bf5xx-ad73311.c
@@ -65,7 +65,7 @@
 
 #define GPIO_SE CONFIG_SND_BFIN_AD73311_SE
 
-static struct snd_soc_machine bf5xx_ad73311;
+static struct snd_soc_card bf5xx_ad73311;
 
 static int snd_ad73311_startup(void)
 {
@@ -190,7 +190,7 @@ static struct snd_soc_dai_link bf5xx_ad73311_dai = {
 	.ops = &bf5xx_ad73311_ops,
 };
 
-static struct snd_soc_machine bf5xx_ad73311 = {
+static struct snd_soc_card bf5xx_ad73311 = {
 	.name = "bf5xx_ad73311",
 	.probe = bf5xx_probe,
 	.dai_link = &bf5xx_ad73311_dai,
@@ -198,7 +198,7 @@ static struct snd_soc_machine bf5xx_ad73311 = {
 };
 
 static struct snd_soc_device bf5xx_ad73311_snd_devdata = {
-	.machine = &bf5xx_ad73311,
+	.card = &bf5xx_ad73311,
 	.platform = &bf5xx_i2s_soc_platform,
 	.codec_dev = &soc_codec_dev_ad73311,
 };
diff --git a/sound/soc/blackfin/bf5xx-ssm2602.c b/sound/soc/blackfin/bf5xx-ssm2602.c
index 744a90e765d9..0078dfcd95b9 100644
--- a/sound/soc/blackfin/bf5xx-ssm2602.c
+++ b/sound/soc/blackfin/bf5xx-ssm2602.c
@@ -44,7 +44,7 @@
 #include "bf5xx-i2s-pcm.h"
 #include "bf5xx-i2s.h"
 
-static struct snd_soc_machine bf5xx_ssm2602;
+static struct snd_soc_card bf5xx_ssm2602;
 
 static int bf5xx_ssm2602_startup(struct snd_pcm_substream *substream)
 {
@@ -135,14 +135,14 @@ static struct ssm2602_setup_data bf5xx_ssm2602_setup = {
 	.i2c_address = 0x1b,
 };
 
-static struct snd_soc_machine bf5xx_ssm2602 = {
+static struct snd_soc_card bf5xx_ssm2602 = {
 	.name = "bf5xx_ssm2602",
 	.dai_link = &bf5xx_ssm2602_dai,
 	.num_links = 1,
 };
 
 static struct snd_soc_device bf5xx_ssm2602_snd_devdata = {
-	.machine = &bf5xx_ssm2602,
+	.card = &bf5xx_ssm2602,
 	.platform = &bf5xx_i2s_soc_platform,
 	.codec_dev = &soc_codec_dev_ssm2602,
 	.codec_data = &bf5xx_ssm2602_setup,
diff --git a/sound/soc/davinci/davinci-evm.c b/sound/soc/davinci/davinci-evm.c
index 9e6062cd6b59..2ce34d44b15c 100644
--- a/sound/soc/davinci/davinci-evm.c
+++ b/sound/soc/davinci/davinci-evm.c
@@ -128,7 +128,7 @@ static struct snd_soc_dai_link evm_dai = {
 };
 
 /* davinci-evm audio machine driver */
-static struct snd_soc_machine snd_soc_machine_evm = {
+static struct snd_soc_card snd_soc_card_evm = {
 	.name = "DaVinci EVM",
 	.dai_link = &evm_dai,
 	.num_links = 1,
@@ -142,7 +142,7 @@ static struct aic3x_setup_data evm_aic3x_setup = {
 
 /* evm audio subsystem */
 static struct snd_soc_device evm_snd_devdata = {
-	.machine = &snd_soc_machine_evm,
+	.card = &snd_soc_card_evm,
 	.platform = &davinci_soc_platform,
 	.codec_dev = &soc_codec_dev_aic3x,
 	.codec_data = &evm_aic3x_setup,
diff --git a/sound/soc/davinci/davinci-i2s.c b/sound/soc/davinci/davinci-i2s.c
index 11c20d0b7bcc..95df51e803b4 100644
--- a/sound/soc/davinci/davinci-i2s.c
+++ b/sound/soc/davinci/davinci-i2s.c
@@ -375,8 +375,8 @@ static int davinci_i2s_probe(struct platform_device *pdev,
 			     struct snd_soc_dai *dai)
 {
 	struct snd_soc_device *socdev = platform_get_drvdata(pdev);
-	struct snd_soc_machine *machine = socdev->machine;
-	struct snd_soc_dai *cpu_dai = machine->dai_link[pdev->id].cpu_dai;
+	struct snd_soc_card *card = socdev->card;
+	struct snd_soc_dai *cpu_dai = card->dai_link[pdev->id].cpu_dai;
 	struct davinci_mcbsp_dev *dev;
 	struct resource *mem, *ioarea;
 	struct evm_snd_platform_data *pdata;
@@ -437,8 +437,8 @@ static void davinci_i2s_remove(struct platform_device *pdev,
 			       struct snd_soc_dai *dai)
 {
 	struct snd_soc_device *socdev = platform_get_drvdata(pdev);
-	struct snd_soc_machine *machine = socdev->machine;
-	struct snd_soc_dai *cpu_dai = machine->dai_link[pdev->id].cpu_dai;
+	struct snd_soc_card *card = socdev->card;
+	struct snd_soc_dai *cpu_dai = card->dai_link[pdev->id].cpu_dai;
 	struct davinci_mcbsp_dev *dev = cpu_dai->private_data;
 	struct resource *mem;
 
diff --git a/sound/soc/davinci/davinci-sffsdr.c b/sound/soc/davinci/davinci-sffsdr.c
index 69a8a769f4d8..fa38f9cd3506 100644
--- a/sound/soc/davinci/davinci-sffsdr.c
+++ b/sound/soc/davinci/davinci-sffsdr.c
@@ -73,7 +73,7 @@ static struct snd_soc_dai_link sffsdr_dai = {
 };
 
 /* davinci-sffsdr audio machine driver */
-static struct snd_soc_machine snd_soc_machine_sffsdr = {
+static struct snd_soc_card snd_soc_sffsdr = {
 	.name = "DaVinci SFFSDR",
 	.dai_link = &sffsdr_dai,
 	.num_links = 1,
@@ -89,7 +89,7 @@ static struct pcm3008_setup_data sffsdr_pcm3008_setup = {
 
 /* sffsdr audio subsystem */
 static struct snd_soc_device sffsdr_snd_devdata = {
-	.machine = &snd_soc_machine_sffsdr,
+	.card = &snd_soc_sffsdr,
 	.platform = &davinci_soc_platform,
 	.codec_dev = &soc_codec_dev_pcm3008,
 	.codec_data = &sffsdr_pcm3008_setup,
diff --git a/sound/soc/fsl/fsl_dma.c b/sound/soc/fsl/fsl_dma.c
index d2d3da9729f2..bf92331b4768 100644
--- a/sound/soc/fsl/fsl_dma.c
+++ b/sound/soc/fsl/fsl_dma.c
@@ -284,7 +284,7 @@ static irqreturn_t fsl_dma_isr(int irq, void *dev_id)
  * fsl_dma_new: initialize this PCM driver.
  *
  * This function is called when the codec driver calls snd_soc_new_pcms(),
- * once for each .dai_link in the machine driver's snd_soc_machine
+ * once for each .dai_link in the machine driver's snd_soc_card
  * structure.
  */
 static int fsl_dma_new(struct snd_card *card, struct snd_soc_dai *dai,
diff --git a/sound/soc/fsl/mpc8610_hpcd.c b/sound/soc/fsl/mpc8610_hpcd.c
index 94f89debde1f..1cf4d6eeb538 100644
--- a/sound/soc/fsl/mpc8610_hpcd.c
+++ b/sound/soc/fsl/mpc8610_hpcd.c
@@ -29,7 +29,7 @@
 struct mpc8610_hpcd_data {
 	struct snd_soc_device sound_devdata;
 	struct snd_soc_dai_link dai;
-	struct snd_soc_machine machine;
+	struct snd_soc_card machine;
 	unsigned int dai_format;
 	unsigned int codec_clk_direction;
 	unsigned int cpu_clk_direction;
@@ -185,7 +185,7 @@ static struct snd_soc_ops mpc8610_hpcd_ops = {
 /**
  * mpc8610_hpcd_machine: ASoC machine data
  */
-static struct snd_soc_machine mpc8610_hpcd_machine = {
+static struct snd_soc_card mpc8610_hpcd_machine = {
 	.probe = mpc8610_hpcd_machine_probe,
 	.remove = mpc8610_hpcd_machine_remove,
 	.name = "MPC8610 HPCD",
@@ -465,7 +465,7 @@ static int mpc8610_hpcd_probe(struct of_device *ofdev,
 		goto error;
 	}
 
-	machine_data->sound_devdata.machine = &mpc8610_hpcd_machine;
+	machine_data->sound_devdata.card = &mpc8610_hpcd_machine;
 	machine_data->sound_devdata.codec_dev = &soc_codec_device_cs4270;
 	machine_data->sound_devdata.platform = &fsl_soc_platform;
 
diff --git a/sound/soc/fsl/soc-of-simple.c b/sound/soc/fsl/soc-of-simple.c
index 0382fdac51cd..53be6491320a 100644
--- a/sound/soc/fsl/soc-of-simple.c
+++ b/sound/soc/fsl/soc-of-simple.c
@@ -31,7 +31,7 @@ struct of_snd_soc_device {
 	int id;
 	struct list_head list;
 	struct snd_soc_device device;
-	struct snd_soc_machine machine;
+	struct snd_soc_card card;
 	struct snd_soc_dai_link dai_link;
 	struct platform_device *pdev;
 	struct device_node *platform_node;
@@ -58,9 +58,9 @@ of_snd_soc_get_device(struct device_node *codec_node)
 	/* Initialize the structure and add it to the global list */
 	of_soc->codec_node = codec_node;
 	of_soc->id = of_snd_soc_next_index++;
-	of_soc->machine.dai_link = &of_soc->dai_link;
-	of_soc->machine.num_links = 1;
-	of_soc->device.machine = &of_soc->machine;
+	of_soc->card.dai_link = &of_soc->dai_link;
+	of_soc->card.num_links = 1;
+	of_soc->device.card = &of_soc->card;
 	of_soc->dai_link.ops = &of_snd_soc_ops;
 	list_add(&of_soc->list, &of_snd_soc_device_list);
 
@@ -159,7 +159,7 @@ int of_snd_soc_register_platform(struct snd_soc_platform *platform,
 	of_soc->platform_node = node;
 	of_soc->dai_link.cpu_dai = cpu_dai;
 	of_soc->device.platform = platform;
-	of_soc->machine.name = of_soc->dai_link.cpu_dai->name;
+	of_soc->card.name = of_soc->dai_link.cpu_dai->name;
 
 	/* Now try to register the SoC device */
 	of_snd_soc_register_device(of_soc);
diff --git a/sound/soc/omap/n810.c b/sound/soc/omap/n810.c
index fae3ad36e0bf..d216b4f9e14e 100644
--- a/sound/soc/omap/n810.c
+++ b/sound/soc/omap/n810.c
@@ -282,7 +282,7 @@ static struct snd_soc_dai_link n810_dai = {
 };
 
 /* Audio machine driver */
-static struct snd_soc_machine snd_soc_machine_n810 = {
+static struct snd_soc_card snd_soc_n810 = {
 	.name = "N810",
 	.dai_link = &n810_dai,
 	.num_links = 1,
@@ -298,7 +298,7 @@ static struct aic3x_setup_data n810_aic33_setup = {
 
 /* Audio subsystem */
 static struct snd_soc_device n810_snd_devdata = {
-	.machine = &snd_soc_machine_n810,
+	.card = &snd_soc_n810,
 	.platform = &omap_soc_platform,
 	.codec_dev = &soc_codec_dev_aic3x,
 	.codec_data = &n810_aic33_setup,
diff --git a/sound/soc/omap/omap2evm.c b/sound/soc/omap/omap2evm.c
index c37621357d00..5bea31157a94 100644
--- a/sound/soc/omap/omap2evm.c
+++ b/sound/soc/omap/omap2evm.c
@@ -90,7 +90,7 @@ static struct snd_soc_dai_link omap2evm_dai = {
 };
 
 /* Audio machine driver */
-static struct snd_soc_machine snd_soc_machine_omap2evm = {
+static struct snd_soc_card snd_soc_omap2evm = {
 	.name = "omap2evm",
 	.dai_link = &omap2evm_dai,
 	.num_links = 1,
@@ -98,7 +98,7 @@ static struct snd_soc_machine snd_soc_machine_omap2evm = {
 
 /* Audio subsystem */
 static struct snd_soc_device omap2evm_snd_devdata = {
-	.machine = &snd_soc_machine_omap2evm,
+	.card = &snd_soc_omap2evm,
 	.platform = &omap_soc_platform,
 	.codec_dev = &soc_codec_dev_twl4030,
 };
diff --git a/sound/soc/omap/omap3beagle.c b/sound/soc/omap/omap3beagle.c
index ec84a9bbc563..3ed25464627f 100644
--- a/sound/soc/omap/omap3beagle.c
+++ b/sound/soc/omap/omap3beagle.c
@@ -88,7 +88,7 @@ static struct snd_soc_dai_link omap3beagle_dai = {
 };
 
 /* Audio machine driver */
-static struct snd_soc_machine snd_soc_machine_omap3beagle = {
+static struct snd_soc_card snd_soc_omap3beagle = {
 	.name = "omap3beagle",
 	.dai_link = &omap3beagle_dai,
 	.num_links = 1,
@@ -96,7 +96,7 @@ static struct snd_soc_machine snd_soc_machine_omap3beagle = {
 
 /* Audio subsystem */
 static struct snd_soc_device omap3beagle_snd_devdata = {
-	.machine = &snd_soc_machine_omap3beagle,
+	.card = &snd_soc_omap3beagle,
 	.platform = &omap_soc_platform,
 	.codec_dev = &soc_codec_dev_twl4030,
 };
diff --git a/sound/soc/omap/osk5912.c b/sound/soc/omap/osk5912.c
index 0fe733796898..7a8f14d0c772 100644
--- a/sound/soc/omap/osk5912.c
+++ b/sound/soc/omap/osk5912.c
@@ -143,7 +143,7 @@ static struct snd_soc_dai_link osk_dai = {
 };
 
 /* Audio machine driver */
-static struct snd_soc_machine snd_soc_machine_osk = {
+static struct snd_soc_card snd_soc_card_osk = {
 	.name = "OSK5912",
 	.dai_link = &osk_dai,
 	.num_links = 1,
@@ -151,7 +151,7 @@ static struct snd_soc_machine snd_soc_machine_osk = {
 
 /* Audio subsystem */
 static struct snd_soc_device osk_snd_devdata = {
-	.machine = &snd_soc_machine_osk,
+	.card = &snd_soc_card_osk,
 	.platform = &omap_soc_platform,
 	.codec_dev = &soc_codec_dev_tlv320aic23,
 };
diff --git a/sound/soc/omap/overo.c b/sound/soc/omap/overo.c
index c26d1de7da51..eea0c372bb3f 100644
--- a/sound/soc/omap/overo.c
+++ b/sound/soc/omap/overo.c
@@ -88,7 +88,7 @@ static struct snd_soc_dai_link overo_dai = {
 };
 
 /* Audio machine driver */
-static struct snd_soc_machine snd_soc_machine_overo = {
+static struct snd_soc_card snd_soc_card_overo = {
 	.name = "overo",
 	.dai_link = &overo_dai,
 	.num_links = 1,
@@ -96,7 +96,7 @@ static struct snd_soc_machine snd_soc_machine_overo = {
 
 /* Audio subsystem */
 static struct snd_soc_device overo_snd_devdata = {
-	.machine = &snd_soc_machine_overo,
+	.card = &snd_soc_card_overo,
 	.platform = &omap_soc_platform,
 	.codec_dev = &soc_codec_dev_twl4030,
 };
diff --git a/sound/soc/pxa/corgi.c b/sound/soc/pxa/corgi.c
index 2718eaf7895f..647f056a3cb3 100644
--- a/sound/soc/pxa/corgi.c
+++ b/sound/soc/pxa/corgi.c
@@ -314,7 +314,7 @@ static struct snd_soc_dai_link corgi_dai = {
 };
 
 /* corgi audio machine driver */
-static struct snd_soc_machine snd_soc_machine_corgi = {
+static struct snd_soc_card snd_soc_corgi = {
 	.name = "Corgi",
 	.dai_link = &corgi_dai,
 	.num_links = 1,
@@ -328,7 +328,7 @@ static struct wm8731_setup_data corgi_wm8731_setup = {
 
 /* corgi audio subsystem */
 static struct snd_soc_device corgi_snd_devdata = {
-	.machine = &snd_soc_machine_corgi,
+	.card = &snd_soc_corgi,
 	.platform = &pxa2xx_soc_platform,
 	.codec_dev = &soc_codec_dev_wm8731,
 	.codec_data = &corgi_wm8731_setup,
diff --git a/sound/soc/pxa/e800_wm9712.c b/sound/soc/pxa/e800_wm9712.c
index 6781c5be242f..60c64861512a 100644
--- a/sound/soc/pxa/e800_wm9712.c
+++ b/sound/soc/pxa/e800_wm9712.c
@@ -29,7 +29,7 @@
 #include "pxa2xx-pcm.h"
 #include "pxa2xx-ac97.h"
 
-static struct snd_soc_machine e800;
+static struct snd_soc_card e800;
 
 static struct snd_soc_dai_link e800_dai[] = {
 {
@@ -40,14 +40,14 @@ static struct snd_soc_dai_link e800_dai[] = {
 },
 };
 
-static struct snd_soc_machine e800 = {
+static struct snd_soc_card e800 = {
 	.name = "Toshiba e800",
 	.dai_link = e800_dai,
 	.num_links = ARRAY_SIZE(e800_dai),
 };
 
 static struct snd_soc_device e800_snd_devdata = {
-	.machine = &e800,
+	.card = &e800,
 	.platform = &pxa2xx_soc_platform,
 	.codec_dev = &soc_codec_dev_wm9712,
 };
diff --git a/sound/soc/pxa/em-x270.c b/sound/soc/pxa/em-x270.c
index e6ff6929ab4b..4a61925c3104 100644
--- a/sound/soc/pxa/em-x270.c
+++ b/sound/soc/pxa/em-x270.c
@@ -53,14 +53,14 @@ static struct snd_soc_dai_link em_x270_dai[] = {
 	},
 };
 
-static struct snd_soc_machine em_x270 = {
+static struct snd_soc_card em_x270 = {
 	.name = "EM-X270",
 	.dai_link = em_x270_dai,
 	.num_links = ARRAY_SIZE(em_x270_dai),
 };
 
 static struct snd_soc_device em_x270_snd_devdata = {
-	.machine = &em_x270,
+	.card = &em_x270,
 	.platform = &pxa2xx_soc_platform,
 	.codec_dev = &soc_codec_dev_wm9712,
 };
diff --git a/sound/soc/pxa/palm27x.c b/sound/soc/pxa/palm27x.c
index e364abc700db..3bb8879ac8a2 100644
--- a/sound/soc/pxa/palm27x.c
+++ b/sound/soc/pxa/palm27x.c
@@ -189,14 +189,14 @@ static struct snd_soc_dai_link palm27x_dai[] = {
 },
 };
 
-static struct snd_soc_machine palm27x_asoc = {
+static struct snd_soc_card palm27x_asoc = {
 	.name = "Palm/PXA27x",
 	.dai_link = palm27x_dai,
 	.num_links = ARRAY_SIZE(palm27x_dai),
 };
 
 static struct snd_soc_device palm27x_snd_devdata = {
-	.machine = &palm27x_asoc,
+	.card = &palm27x_asoc,
 	.platform = &pxa2xx_soc_platform,
 	.codec_dev = &soc_codec_dev_wm9712,
 };
diff --git a/sound/soc/pxa/poodle.c b/sound/soc/pxa/poodle.c
index 4d9930c52789..03b510ab2824 100644
--- a/sound/soc/pxa/poodle.c
+++ b/sound/soc/pxa/poodle.c
@@ -276,7 +276,7 @@ static struct snd_soc_dai_link poodle_dai = {
 };
 
 /* poodle audio machine driver */
-static struct snd_soc_machine snd_soc_machine_poodle = {
+static struct snd_soc_card snd_soc_poodle = {
 	.name = "Poodle",
 	.dai_link = &poodle_dai,
 	.num_links = 1,
@@ -290,7 +290,7 @@ static struct wm8731_setup_data poodle_wm8731_setup = {
 
 /* poodle audio subsystem */
 static struct snd_soc_device poodle_snd_devdata = {
-	.machine = &snd_soc_machine_poodle,
+	.card = &snd_soc_poodle,
 	.platform = &pxa2xx_soc_platform,
 	.codec_dev = &soc_codec_dev_wm8731,
 	.codec_data = &poodle_wm8731_setup,
diff --git a/sound/soc/pxa/spitz.c b/sound/soc/pxa/spitz.c
index d307b6757e95..579d93368f14 100644
--- a/sound/soc/pxa/spitz.c
+++ b/sound/soc/pxa/spitz.c
@@ -319,7 +319,7 @@ static struct snd_soc_dai_link spitz_dai = {
 };
 
 /* spitz audio machine driver */
-static struct snd_soc_machine snd_soc_machine_spitz = {
+static struct snd_soc_card snd_soc_spitz = {
 	.name = "Spitz",
 	.dai_link = &spitz_dai,
 	.num_links = 1,
@@ -333,7 +333,7 @@ static struct wm8750_setup_data spitz_wm8750_setup = {
 
 /* spitz audio subsystem */
 static struct snd_soc_device spitz_snd_devdata = {
-	.machine = &snd_soc_machine_spitz,
+	.card = &snd_soc_spitz,
 	.platform = &pxa2xx_soc_platform,
 	.codec_dev = &soc_codec_dev_wm8750,
 	.codec_data = &spitz_wm8750_setup,
diff --git a/sound/soc/pxa/tosa.c b/sound/soc/pxa/tosa.c
index afefe41b8c46..9d9be5a14d14 100644
--- a/sound/soc/pxa/tosa.c
+++ b/sound/soc/pxa/tosa.c
@@ -38,7 +38,7 @@
 #include "pxa2xx-pcm.h"
 #include "pxa2xx-ac97.h"
 
-static struct snd_soc_machine tosa;
+static struct snd_soc_card tosa;
 
 #define TOSA_HP        0
 #define TOSA_MIC_INT   1
@@ -230,14 +230,14 @@ static struct snd_soc_dai_link tosa_dai[] = {
 },
 };
 
-static struct snd_soc_machine tosa = {
+static struct snd_soc_card tosa = {
 	.name = "Tosa",
 	.dai_link = tosa_dai,
 	.num_links = ARRAY_SIZE(tosa_dai),
 };
 
 static struct snd_soc_device tosa_snd_devdata = {
-	.machine = &tosa,
+	.card = &tosa,
 	.platform = &pxa2xx_soc_platform,
 	.codec_dev = &soc_codec_dev_wm9712,
 };
diff --git a/sound/soc/s3c24xx/ln2440sbc_alc650.c b/sound/soc/s3c24xx/ln2440sbc_alc650.c
index 4eab2c19c454..a70cbc0fa070 100644
--- a/sound/soc/s3c24xx/ln2440sbc_alc650.c
+++ b/sound/soc/s3c24xx/ln2440sbc_alc650.c
@@ -27,7 +27,7 @@
 #include "s3c24xx-pcm.h"
 #include "s3c24xx-ac97.h"
 
-static struct snd_soc_machine ln2440sbc;
+static struct snd_soc_card ln2440sbc;
 
 static struct snd_soc_dai_link ln2440sbc_dai[] = {
 {
@@ -38,14 +38,14 @@ static struct snd_soc_dai_link ln2440sbc_dai[] = {
 },
 };
 
-static struct snd_soc_machine ln2440sbc = {
+static struct snd_soc_card ln2440sbc = {
 	.name = "LN2440SBC",
 	.dai_link = ln2440sbc_dai,
 	.num_links = ARRAY_SIZE(ln2440sbc_dai),
 };
 
 static struct snd_soc_device ln2440sbc_snd_ac97_devdata = {
-	.machine = &ln2440sbc,
+	.card = &ln2440sbc,
 	.platform = &s3c24xx_soc_platform,
 	.codec_dev = &soc_codec_dev_ac97,
 };
diff --git a/sound/soc/s3c24xx/neo1973_wm8753.c b/sound/soc/s3c24xx/neo1973_wm8753.c
index 87ddfefcc2fb..528fc3f1b45b 100644
--- a/sound/soc/s3c24xx/neo1973_wm8753.c
+++ b/sound/soc/s3c24xx/neo1973_wm8753.c
@@ -59,7 +59,7 @@
 #define NEO_CAPTURE_HEADSET		7
 #define NEO_CAPTURE_BLUETOOTH		8
 
-static struct snd_soc_machine neo1973;
+static struct snd_soc_card neo1973;
 static struct i2c_client *i2c;
 
 static int neo1973_hifi_hw_params(struct snd_pcm_substream *substream,
@@ -579,7 +579,7 @@ static struct snd_soc_dai_link neo1973_dai[] = {
 },
 };
 
-static struct snd_soc_machine neo1973 = {
+static struct snd_soc_card neo1973 = {
 	.name = "neo1973",
 	.dai_link = neo1973_dai,
 	.num_links = ARRAY_SIZE(neo1973_dai),
@@ -591,7 +591,7 @@ static struct wm8753_setup_data neo1973_wm8753_setup = {
 };
 
 static struct snd_soc_device neo1973_snd_devdata = {
-	.machine = &neo1973,
+	.card = &neo1973,
 	.platform = &s3c24xx_soc_platform,
 	.codec_dev = &soc_codec_dev_wm8753,
 	.codec_data = &neo1973_wm8753_setup,
diff --git a/sound/soc/s3c24xx/s3c24xx_uda134x.c b/sound/soc/s3c24xx/s3c24xx_uda134x.c
index 92d90f2f6901..23325fca1f64 100644
--- a/sound/soc/s3c24xx/s3c24xx_uda134x.c
+++ b/sound/soc/s3c24xx/s3c24xx_uda134x.c
@@ -232,7 +232,7 @@ static struct snd_soc_dai_link s3c24xx_uda134x_dai_link = {
 	.ops = &s3c24xx_uda134x_ops,
 };
 
-static struct snd_soc_machine snd_soc_machine_s3c24xx_uda134x = {
+static struct snd_soc_card snd_soc_s3c24xx_uda134x = {
 	.name = "S3C24XX_UDA134X",
 	.dai_link = &s3c24xx_uda134x_dai_link,
 	.num_links = 1,
@@ -270,7 +270,7 @@ static struct uda134x_platform_data s3c24xx_uda134x = {
 };
 
 static struct snd_soc_device s3c24xx_uda134x_snd_devdata = {
-	.machine = &snd_soc_machine_s3c24xx_uda134x,
+	.card = &snd_soc_s3c24xx_uda134x,
 	.platform = &s3c24xx_soc_platform,
 	.codec_dev = &soc_codec_dev_uda134x,
 	.codec_data = &s3c24xx_uda134x,
diff --git a/sound/soc/s3c24xx/smdk2443_wm9710.c b/sound/soc/s3c24xx/smdk2443_wm9710.c
index 8515d6ff03f2..3d2e6a0417ec 100644
--- a/sound/soc/s3c24xx/smdk2443_wm9710.c
+++ b/sound/soc/s3c24xx/smdk2443_wm9710.c
@@ -23,7 +23,7 @@
 #include "s3c24xx-pcm.h"
 #include "s3c24xx-ac97.h"
 
-static struct snd_soc_machine smdk2443;
+static struct snd_soc_card smdk2443;
 
 static struct snd_soc_dai_link smdk2443_dai[] = {
 {
@@ -34,14 +34,14 @@ static struct snd_soc_dai_link smdk2443_dai[] = {
 },
 };
 
-static struct snd_soc_machine smdk2443 = {
+static struct snd_soc_card smdk2443 = {
 	.name = "SMDK2443",
 	.dai_link = smdk2443_dai,
 	.num_links = ARRAY_SIZE(smdk2443_dai),
 };
 
 static struct snd_soc_device smdk2443_snd_ac97_devdata = {
-	.machine = &smdk2443,
+	.card = &smdk2443,
 	.platform = &s3c24xx_soc_platform,
 	.codec_dev = &soc_codec_dev_ac97,
 };
diff --git a/sound/soc/sh/sh7760-ac97.c b/sound/soc/sh/sh7760-ac97.c
index 92bfaf4774a7..8b44f9c8a9ff 100644
--- a/sound/soc/sh/sh7760-ac97.c
+++ b/sound/soc/sh/sh7760-ac97.c
@@ -38,14 +38,14 @@ static struct snd_soc_dai_link sh7760_ac97_dai = {
 	.ops = NULL,
 };
 
-static struct snd_soc_machine sh7760_ac97_soc_machine  = {
+static struct snd_soc_card sh7760_ac97_soc_machine  = {
 	.name = "SH7760 AC97",
 	.dai_link = &sh7760_ac97_dai,
 	.num_links = 1,
 };
 
 static struct snd_soc_device sh7760_ac97_snd_devdata = {
-	.machine = &sh7760_ac97_soc_machine,
+	.card = &sh7760_ac97_soc_machine,
 	.platform = &sh7760_soc_platform,
 	.codec_dev = &soc_codec_dev_ac97,
 };
diff --git a/sound/soc/soc-core.c b/sound/soc/soc-core.c
index 9feaa7b6dc34..c5cb9516fea4 100644
--- a/sound/soc/soc-core.c
+++ b/sound/soc/soc-core.c
@@ -620,7 +620,7 @@ static struct snd_pcm_ops soc_pcm_ops = {
 static int soc_suspend(struct platform_device *pdev, pm_message_t state)
 {
 	struct snd_soc_device *socdev = platform_get_drvdata(pdev);
-	struct snd_soc_machine *machine = socdev->machine;
+	struct snd_soc_card *card = socdev->card;
 	struct snd_soc_platform *platform = socdev->platform;
 	struct snd_soc_codec_device *codec_dev = socdev->codec_dev;
 	struct snd_soc_codec *codec = socdev->codec;
@@ -644,14 +644,14 @@ static int soc_suspend(struct platform_device *pdev, pm_message_t state)
 	}
 
 	/* suspend all pcms */
-	for (i = 0; i < machine->num_links; i++)
-		snd_pcm_suspend_all(machine->dai_link[i].pcm);
+	for (i = 0; i < card->num_links; i++)
+		snd_pcm_suspend_all(card->dai_link[i].pcm);
 
-	if (machine->suspend_pre)
-		machine->suspend_pre(pdev, state);
+	if (card->suspend_pre)
+		card->suspend_pre(pdev, state);
 
-	for (i = 0; i < machine->num_links; i++) {
-		struct snd_soc_dai  *cpu_dai = machine->dai_link[i].cpu_dai;
+	for (i = 0; i < card->num_links; i++) {
+		struct snd_soc_dai  *cpu_dai = card->dai_link[i].cpu_dai;
 		if (cpu_dai->suspend && cpu_dai->type != SND_SOC_DAI_AC97)
 			cpu_dai->suspend(pdev, cpu_dai);
 		if (platform->suspend)
@@ -676,14 +676,14 @@ static int soc_suspend(struct platform_device *pdev, pm_message_t state)
 	if (codec_dev->suspend)
 		codec_dev->suspend(pdev, state);
 
-	for (i = 0; i < machine->num_links; i++) {
-		struct snd_soc_dai *cpu_dai = machine->dai_link[i].cpu_dai;
+	for (i = 0; i < card->num_links; i++) {
+		struct snd_soc_dai *cpu_dai = card->dai_link[i].cpu_dai;
 		if (cpu_dai->suspend && cpu_dai->type == SND_SOC_DAI_AC97)
 			cpu_dai->suspend(pdev, cpu_dai);
 	}
 
-	if (machine->suspend_post)
-		machine->suspend_post(pdev, state);
+	if (card->suspend_post)
+		card->suspend_post(pdev, state);
 
 	return 0;
 }
@@ -696,7 +696,7 @@ static void soc_resume_deferred(struct work_struct *work)
 	struct snd_soc_device *socdev = container_of(work,
 						     struct snd_soc_device,
 						     deferred_resume_work);
-	struct snd_soc_machine *machine = socdev->machine;
+	struct snd_soc_card *card = socdev->card;
 	struct snd_soc_platform *platform = socdev->platform;
 	struct snd_soc_codec_device *codec_dev = socdev->codec_dev;
 	struct snd_soc_codec *codec = socdev->codec;
@@ -709,11 +709,11 @@ static void soc_resume_deferred(struct work_struct *work)
 
 	dev_info(socdev->dev, "starting resume work\n");
 
-	if (machine->resume_pre)
-		machine->resume_pre(pdev);
+	if (card->resume_pre)
+		card->resume_pre(pdev);
 
-	for (i = 0; i < machine->num_links; i++) {
-		struct snd_soc_dai *cpu_dai = machine->dai_link[i].cpu_dai;
+	for (i = 0; i < card->num_links; i++) {
+		struct snd_soc_dai *cpu_dai = card->dai_link[i].cpu_dai;
 		if (cpu_dai->resume && cpu_dai->type == SND_SOC_DAI_AC97)
 			cpu_dai->resume(pdev, cpu_dai);
 	}
@@ -739,16 +739,16 @@ static void soc_resume_deferred(struct work_struct *work)
 			dai->dai_ops.digital_mute(dai, 0);
 	}
 
-	for (i = 0; i < machine->num_links; i++) {
-		struct snd_soc_dai *cpu_dai = machine->dai_link[i].cpu_dai;
+	for (i = 0; i < card->num_links; i++) {
+		struct snd_soc_dai *cpu_dai = card->dai_link[i].cpu_dai;
 		if (cpu_dai->resume && cpu_dai->type != SND_SOC_DAI_AC97)
 			cpu_dai->resume(pdev, cpu_dai);
 		if (platform->resume)
 			platform->resume(pdev, cpu_dai);
 	}
 
-	if (machine->resume_post)
-		machine->resume_post(pdev);
+	if (card->resume_post)
+		card->resume_post(pdev);
 
 	dev_info(socdev->dev, "resume work completed\n");
 
@@ -779,18 +779,18 @@ static int soc_probe(struct platform_device *pdev)
 {
 	int ret = 0, i;
 	struct snd_soc_device *socdev = platform_get_drvdata(pdev);
-	struct snd_soc_machine *machine = socdev->machine;
+	struct snd_soc_card *card = socdev->card;
 	struct snd_soc_platform *platform = socdev->platform;
 	struct snd_soc_codec_device *codec_dev = socdev->codec_dev;
 
-	if (machine->probe) {
-		ret = machine->probe(pdev);
+	if (card->probe) {
+		ret = card->probe(pdev);
 		if (ret < 0)
 			return ret;
 	}
 
-	for (i = 0; i < machine->num_links; i++) {
-		struct snd_soc_dai *cpu_dai = machine->dai_link[i].cpu_dai;
+	for (i = 0; i < card->num_links; i++) {
+		struct snd_soc_dai *cpu_dai = card->dai_link[i].cpu_dai;
 		if (cpu_dai->probe) {
 			ret = cpu_dai->probe(pdev, cpu_dai);
 			if (ret < 0)
@@ -825,13 +825,13 @@ platform_err:
 
 cpu_dai_err:
 	for (i--; i >= 0; i--) {
-		struct snd_soc_dai *cpu_dai = machine->dai_link[i].cpu_dai;
+		struct snd_soc_dai *cpu_dai = card->dai_link[i].cpu_dai;
 		if (cpu_dai->remove)
 			cpu_dai->remove(pdev, cpu_dai);
 	}
 
-	if (machine->remove)
-		machine->remove(pdev);
+	if (card->remove)
+		card->remove(pdev);
 
 	return ret;
 }
@@ -841,7 +841,7 @@ static int soc_remove(struct platform_device *pdev)
 {
 	int i;
 	struct snd_soc_device *socdev = platform_get_drvdata(pdev);
-	struct snd_soc_machine *machine = socdev->machine;
+	struct snd_soc_card *card = socdev->card;
 	struct snd_soc_platform *platform = socdev->platform;
 	struct snd_soc_codec_device *codec_dev = socdev->codec_dev;
 
@@ -853,14 +853,14 @@ static int soc_remove(struct platform_device *pdev)
 	if (codec_dev->remove)
 		codec_dev->remove(pdev);
 
-	for (i = 0; i < machine->num_links; i++) {
-		struct snd_soc_dai *cpu_dai = machine->dai_link[i].cpu_dai;
+	for (i = 0; i < card->num_links; i++) {
+		struct snd_soc_dai *cpu_dai = card->dai_link[i].cpu_dai;
 		if (cpu_dai->remove)
 			cpu_dai->remove(pdev, cpu_dai);
 	}
 
-	if (machine->remove)
-		machine->remove(pdev);
+	if (card->remove)
+		card->remove(pdev);
 
 	return 0;
 }
@@ -1212,7 +1212,7 @@ EXPORT_SYMBOL_GPL(snd_soc_test_bits);
 int snd_soc_new_pcms(struct snd_soc_device *socdev, int idx, const char *xid)
 {
 	struct snd_soc_codec *codec = socdev->codec;
-	struct snd_soc_machine *machine = socdev->machine;
+	struct snd_soc_card *card = socdev->card;
 	int ret = 0, i;
 
 	mutex_lock(&codec->mutex);
@@ -1231,11 +1231,11 @@ int snd_soc_new_pcms(struct snd_soc_device *socdev, int idx, const char *xid)
 	strncpy(codec->card->driver, codec->name, sizeof(codec->card->driver));
 
 	/* create the pcms */
-	for (i = 0; i < machine->num_links; i++) {
-		ret = soc_new_pcm(socdev, &machine->dai_link[i], i);
+	for (i = 0; i < card->num_links; i++) {
+		ret = soc_new_pcm(socdev, &card->dai_link[i], i);
 		if (ret < 0) {
 			printk(KERN_ERR "asoc: can't create pcm %s\n",
-				machine->dai_link[i].stream_name);
+				card->dai_link[i].stream_name);
 			mutex_unlock(&codec->mutex);
 			return ret;
 		}
@@ -1258,26 +1258,26 @@ EXPORT_SYMBOL_GPL(snd_soc_new_pcms);
 int snd_soc_register_card(struct snd_soc_device *socdev)
 {
 	struct snd_soc_codec *codec = socdev->codec;
-	struct snd_soc_machine *machine = socdev->machine;
+	struct snd_soc_card *card = socdev->card;
 	int ret = 0, i, ac97 = 0, err = 0;
 
-	for (i = 0; i < machine->num_links; i++) {
-		if (socdev->machine->dai_link[i].init) {
-			err = socdev->machine->dai_link[i].init(codec);
+	for (i = 0; i < card->num_links; i++) {
+		if (card->dai_link[i].init) {
+			err = card->dai_link[i].init(codec);
 			if (err < 0) {
 				printk(KERN_ERR "asoc: failed to init %s\n",
-					socdev->machine->dai_link[i].stream_name);
+					card->dai_link[i].stream_name);
 				continue;
 			}
 		}
-		if (socdev->machine->dai_link[i].codec_dai->type ==
+		if (card->dai_link[i].codec_dai->type ==
 			SND_SOC_DAI_AC97_BUS)
 			ac97 = 1;
 	}
 	snprintf(codec->card->shortname, sizeof(codec->card->shortname),
-		 "%s", machine->name);
+		 "%s",  card->name);
 	snprintf(codec->card->longname, sizeof(codec->card->longname),
-		 "%s (%s)", machine->name, codec->name);
+		 "%s (%s)", card->name, codec->name);
 
 	ret = snd_card_register(codec->card);
 	if (ret < 0) {
diff --git a/sound/soc/soc-dapm.c b/sound/soc/soc-dapm.c
index 0fecbb44726b..61d7d85aa578 100644
--- a/sound/soc/soc-dapm.c
+++ b/sound/soc/soc-dapm.c
@@ -1402,11 +1402,11 @@ int snd_soc_dapm_set_bias_level(struct snd_soc_device *socdev,
 				enum snd_soc_bias_level level)
 {
 	struct snd_soc_codec *codec = socdev->codec;
-	struct snd_soc_machine *machine = socdev->machine;
+	struct snd_soc_card *card = socdev->card;
 	int ret = 0;
 
-	if (machine->set_bias_level)
-		ret = machine->set_bias_level(machine, level);
+	if (card->set_bias_level)
+		ret = card->set_bias_level(card, level);
 	if (ret == 0 && codec->set_bias_level)
 		ret = codec->set_bias_level(codec, level);
 
-- 
cgit v1.2.3


From a47cbe7263236691ee0bbc392f7fd4ec0da1159f Mon Sep 17 00:00:00 2001
From: Mark Brown <broonie@opensource.wolfsonmicro.com>
Date: Wed, 23 Jul 2008 14:03:07 +0100
Subject: ASoC: Move DAI structure definitions into new soc-dai.h

ASoC v2 factors most of the contents of soc.h out into separate headers,
including soc-dai.h for the DAI. Factor the existing DAI API out into
this file in order to prepare for backporting of the ASoC v2 DAI API.
Also backport some of Liam's improvements to the documentation.

Signed-off-by: Mark Brown <broonie@opensource.wolfsonmicro.com>
---
 include/sound/soc-dai.h | 209 ++++++++++++++++++++++++++++++++++++++++++++++++
 include/sound/soc.h     | 148 +---------------------------------
 2 files changed, 211 insertions(+), 146 deletions(-)
 create mode 100644 include/sound/soc-dai.h

(limited to 'include')

diff --git a/include/sound/soc-dai.h b/include/sound/soc-dai.h
new file mode 100644
index 000000000000..08b8f7025c64
--- /dev/null
+++ b/include/sound/soc-dai.h
@@ -0,0 +1,209 @@
+/*
+ * linux/sound/soc-dai.h -- ALSA SoC Layer
+ *
+ * Copyright:	2005-2008 Wolfson Microelectronics. PLC.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * Digital Audio Interface (DAI) API.
+ */
+
+#ifndef __LINUX_SND_SOC_DAI_H
+#define __LINUX_SND_SOC_DAI_H
+
+
+#include <linux/list.h>
+
+struct snd_pcm_substream;
+
+/*
+ * DAI hardware audio formats.
+ *
+ * Describes the physical PCM data formating and clocking. Add new formats
+ * to the end.
+ */
+#define SND_SOC_DAIFMT_I2S		0 /* I2S mode */
+#define SND_SOC_DAIFMT_RIGHT_J		1 /* Right Justified mode */
+#define SND_SOC_DAIFMT_LEFT_J		2 /* Left Justified mode */
+#define SND_SOC_DAIFMT_DSP_A		3 /* L data msb after FRM LRC */
+#define SND_SOC_DAIFMT_DSP_B		4 /* L data msb during FRM LRC */
+#define SND_SOC_DAIFMT_AC97		5 /* AC97 */
+
+/* left and right justified also known as MSB and LSB respectively */
+#define SND_SOC_DAIFMT_MSB		SND_SOC_DAIFMT_LEFT_J
+#define SND_SOC_DAIFMT_LSB		SND_SOC_DAIFMT_RIGHT_J
+
+/*
+ * DAI Clock gating.
+ *
+ * DAI bit clocks can be be gated (disabled) when not the DAI is not
+ * sending or receiving PCM data in a frame. This can be used to save power.
+ */
+#define SND_SOC_DAIFMT_CONT		(0 << 4) /* continuous clock */
+#define SND_SOC_DAIFMT_GATED		(1 << 4) /* clock is gated */
+
+/*
+ * DAI Left/Right Clocks.
+ *
+ * Specifies whether the DAI can support different samples for similtanious
+ * playback and capture. This usually requires a seperate physical frame
+ * clock for playback and capture.
+ */
+#define SND_SOC_DAIFMT_SYNC		(0 << 5) /* Tx FRM = Rx FRM */
+#define SND_SOC_DAIFMT_ASYNC		(1 << 5) /* Tx FRM ~ Rx FRM */
+
+/*
+ * TDM
+ *
+ * Time Division Multiplexing. Allows PCM data to be multplexed with other
+ * data on the DAI.
+ */
+#define SND_SOC_DAIFMT_TDM		(1 << 6)
+
+/*
+ * DAI hardware signal inversions.
+ *
+ * Specifies whether the DAI can also support inverted clocks for the specified
+ * format.
+ */
+#define SND_SOC_DAIFMT_NB_NF		(0 << 8) /* normal bit clock + frame */
+#define SND_SOC_DAIFMT_NB_IF		(1 << 8) /* normal bclk + inv frm */
+#define SND_SOC_DAIFMT_IB_NF		(2 << 8) /* invert bclk + nor frm */
+#define SND_SOC_DAIFMT_IB_IF		(3 << 8) /* invert bclk + frm */
+
+/*
+ * DAI hardware clock masters.
+ *
+ * This is wrt the codec, the inverse is true for the interface
+ * i.e. if the codec is clk and frm master then the interface is
+ * clk and frame slave.
+ */
+#define SND_SOC_DAIFMT_CBM_CFM		(0 << 12) /* codec clk & frm master */
+#define SND_SOC_DAIFMT_CBS_CFM		(1 << 12) /* codec clk slave & frm master */
+#define SND_SOC_DAIFMT_CBM_CFS		(2 << 12) /* codec clk master & frame slave */
+#define SND_SOC_DAIFMT_CBS_CFS		(3 << 12) /* codec clk & frm slave */
+
+#define SND_SOC_DAIFMT_FORMAT_MASK	0x000f
+#define SND_SOC_DAIFMT_CLOCK_MASK	0x00f0
+#define SND_SOC_DAIFMT_INV_MASK		0x0f00
+#define SND_SOC_DAIFMT_MASTER_MASK	0xf000
+
+/*
+ * Master Clock Directions
+ */
+#define SND_SOC_CLOCK_IN		0
+#define SND_SOC_CLOCK_OUT		1
+
+struct snd_soc_dai_ops;
+struct snd_soc_dai;
+struct snd_ac97_bus_ops;
+
+/* Digital Audio Interface clocking API.*/
+int snd_soc_dai_set_sysclk(struct snd_soc_dai *dai, int clk_id,
+	unsigned int freq, int dir);
+
+int snd_soc_dai_set_clkdiv(struct snd_soc_dai *dai,
+	int div_id, int div);
+
+int snd_soc_dai_set_pll(struct snd_soc_dai *dai,
+	int pll_id, unsigned int freq_in, unsigned int freq_out);
+
+/* Digital Audio interface formatting */
+int snd_soc_dai_set_fmt(struct snd_soc_dai *dai, unsigned int fmt);
+
+int snd_soc_dai_set_tdm_slot(struct snd_soc_dai *dai,
+	unsigned int mask, int slots);
+
+int snd_soc_dai_set_tristate(struct snd_soc_dai *dai, int tristate);
+
+/* Digital Audio Interface mute */
+int snd_soc_dai_digital_mute(struct snd_soc_dai *dai, int mute);
+
+/*
+ * Digital Audio Interface.
+ *
+ * Describes the Digital Audio Interface in terms of it's ALSA, DAI and AC97
+ * operations an capabilities. Codec and platfom drivers will register a this
+ * structure for every DAI they have.
+ *
+ * This structure covers the clocking, formating and ALSA operations for each
+ * interface a
+ */
+struct snd_soc_dai_ops {
+	/*
+	 * DAI clocking configuration, all optional.
+	 * Called by soc_card drivers, normally in their hw_params.
+	 */
+	int (*set_sysclk)(struct snd_soc_dai *dai,
+		int clk_id, unsigned int freq, int dir);
+	int (*set_pll)(struct snd_soc_dai *dai,
+		int pll_id, unsigned int freq_in, unsigned int freq_out);
+	int (*set_clkdiv)(struct snd_soc_dai *dai, int div_id, int div);
+
+	/*
+	 * DAI format configuration
+	 * Called by soc_card drivers, normally in their hw_params.
+	 */
+	int (*set_fmt)(struct snd_soc_dai *dai, unsigned int fmt);
+	int (*set_tdm_slot)(struct snd_soc_dai *dai,
+		unsigned int mask, int slots);
+	int (*set_tristate)(struct snd_soc_dai *dai, int tristate);
+
+	/*
+	 * DAI digital mute - optional.
+	 * Called by soc-core to minimise any pops.
+	 */
+	int (*digital_mute)(struct snd_soc_dai *dai, int mute);
+};
+
+/*
+ * Digital Audio Interface runtime data.
+ *
+ * Holds runtime data for a DAI.
+ */
+struct snd_soc_dai {
+	/* DAI description */
+	char *name;
+	unsigned int id;
+	unsigned char type;
+
+	/* DAI callbacks */
+	int (*probe)(struct platform_device *pdev,
+		     struct snd_soc_dai *dai);
+	void (*remove)(struct platform_device *pdev,
+		       struct snd_soc_dai *dai);
+	int (*suspend)(struct platform_device *pdev,
+		struct snd_soc_dai *dai);
+	int (*resume)(struct platform_device *pdev,
+		struct snd_soc_dai *dai);
+
+	/* ops */
+	struct snd_soc_ops ops;
+	struct snd_soc_dai_ops dai_ops;
+
+	/* DAI capabilities */
+	struct snd_soc_pcm_stream capture;
+	struct snd_soc_pcm_stream playback;
+
+	/* DAI runtime info */
+	struct snd_pcm_runtime *runtime;
+	struct snd_soc_codec *codec;
+	unsigned int active;
+	unsigned char pop_wait:1;
+	void *dma_data;
+
+	/* DAI private data */
+	void *private_data;
+
+	/* parent codec/platform */
+	union {
+		struct snd_soc_codec *codec;
+		struct snd_soc_platform *platform;
+	};
+
+	struct list_head list;
+};
+
+#endif
diff --git a/include/sound/soc.h b/include/sound/soc.h
index 3be17b3c650c..e4465f73aa46 100644
--- a/include/sound/soc.h
+++ b/include/sound/soc.h
@@ -151,76 +151,6 @@ enum snd_soc_bias_level {
 #define SND_SOC_DAI_PCM		0x4
 #define SND_SOC_DAI_AC97_BUS	0x8	/* for custom i.e. non ac97_codec.c */
 
-/*
- * DAI hardware audio formats
- */
-#define SND_SOC_DAIFMT_I2S		0	/* I2S mode */
-#define SND_SOC_DAIFMT_RIGHT_J	1	/* Right justified mode */
-#define SND_SOC_DAIFMT_LEFT_J	2	/* Left Justified mode */
-#define SND_SOC_DAIFMT_DSP_A	3	/* L data msb after FRM or LRC */
-#define SND_SOC_DAIFMT_DSP_B	4	/* L data msb during FRM or LRC */
-#define SND_SOC_DAIFMT_AC97		5	/* AC97 */
-
-#define SND_SOC_DAIFMT_MSB 	SND_SOC_DAIFMT_LEFT_J
-#define SND_SOC_DAIFMT_LSB	SND_SOC_DAIFMT_RIGHT_J
-
-/*
- * DAI Gating
- */
-#define SND_SOC_DAIFMT_CONT			(0 << 4)	/* continuous clock */
-#define SND_SOC_DAIFMT_GATED		(1 << 4)	/* clock is gated when not Tx/Rx */
-
-/*
- * DAI Sync
- * Synchronous LR (Left Right) clocks and Frame signals.
- */
-#define SND_SOC_DAIFMT_SYNC		(0 << 5)	/* Tx FRM = Rx FRM */
-#define SND_SOC_DAIFMT_ASYNC		(1 << 5)	/* Tx FRM ~ Rx FRM */
-
-/*
- * TDM
- */
-#define SND_SOC_DAIFMT_TDM		(1 << 6)
-
-/*
- * DAI hardware signal inversions
- */
-#define SND_SOC_DAIFMT_NB_NF		(0 << 8)	/* normal bclk + frm */
-#define SND_SOC_DAIFMT_NB_IF		(1 << 8)	/* normal bclk + inv frm */
-#define SND_SOC_DAIFMT_IB_NF		(2 << 8)	/* invert bclk + nor frm */
-#define SND_SOC_DAIFMT_IB_IF		(3 << 8)	/* invert bclk + frm */
-
-/*
- * DAI hardware clock masters
- * This is wrt the codec, the inverse is true for the interface
- * i.e. if the codec is clk and frm master then the interface is
- * clk and frame slave.
- */
-#define SND_SOC_DAIFMT_CBM_CFM	(0 << 12) /* codec clk & frm master */
-#define SND_SOC_DAIFMT_CBS_CFM	(1 << 12) /* codec clk slave & frm master */
-#define SND_SOC_DAIFMT_CBM_CFS	(2 << 12) /* codec clk master & frame slave */
-#define SND_SOC_DAIFMT_CBS_CFS	(3 << 12) /* codec clk & frm slave */
-
-#define SND_SOC_DAIFMT_FORMAT_MASK		0x000f
-#define SND_SOC_DAIFMT_CLOCK_MASK		0x00f0
-#define SND_SOC_DAIFMT_INV_MASK			0x0f00
-#define SND_SOC_DAIFMT_MASTER_MASK		0xf000
-
-
-/*
- * Master Clock Directions
- */
-#define SND_SOC_CLOCK_IN	0
-#define SND_SOC_CLOCK_OUT	1
-
-/*
- * AC97 codec ID's bitmask
- */
-#define SND_SOC_DAI_AC97_ID0	(1 << 0)
-#define SND_SOC_DAI_AC97_ID1	(1 << 1)
-#define SND_SOC_DAI_AC97_ID2	(1 << 2)
-#define SND_SOC_DAI_AC97_ID3	(1 << 3)
-
 struct snd_soc_device;
 struct snd_soc_pcm_stream;
 struct snd_soc_ops;
@@ -260,27 +190,6 @@ int snd_soc_new_ac97_codec(struct snd_soc_codec *codec,
 	struct snd_ac97_bus_ops *ops, int num);
 void snd_soc_free_ac97_codec(struct snd_soc_codec *codec);
 
-/* Digital Audio Interface clocking API.*/
-int snd_soc_dai_set_sysclk(struct snd_soc_dai *dai, int clk_id,
-	unsigned int freq, int dir);
-
-int snd_soc_dai_set_clkdiv(struct snd_soc_dai *dai,
-	int div_id, int div);
-
-int snd_soc_dai_set_pll(struct snd_soc_dai *dai,
-	int pll_id, unsigned int freq_in, unsigned int freq_out);
-
-/* Digital Audio interface formatting */
-int snd_soc_dai_set_fmt(struct snd_soc_dai *dai, unsigned int fmt);
-
-int snd_soc_dai_set_tdm_slot(struct snd_soc_dai *dai,
-	unsigned int mask, int slots);
-
-int snd_soc_dai_set_tristate(struct snd_soc_dai *dai, int tristate);
-
-/* Digital Audio Interface mute */
-int snd_soc_dai_digital_mute(struct snd_soc_dai *dai, int mute);
-
 /*
  *Controls
  */
@@ -338,61 +247,6 @@ struct snd_soc_ops {
 	int (*trigger)(struct snd_pcm_substream *, int);
 };
 
-/* ASoC DAI ops */
-struct snd_soc_dai_ops {
-	/* DAI clocking configuration */
-	int (*set_sysclk)(struct snd_soc_dai *dai,
-		int clk_id, unsigned int freq, int dir);
-	int (*set_pll)(struct snd_soc_dai *dai,
-		int pll_id, unsigned int freq_in, unsigned int freq_out);
-	int (*set_clkdiv)(struct snd_soc_dai *dai, int div_id, int div);
-
-	/* DAI format configuration */
-	int (*set_fmt)(struct snd_soc_dai *dai, unsigned int fmt);
-	int (*set_tdm_slot)(struct snd_soc_dai *dai,
-		unsigned int mask, int slots);
-	int (*set_tristate)(struct snd_soc_dai *dai, int tristate);
-
-	/* digital mute */
-	int (*digital_mute)(struct snd_soc_dai *dai, int mute);
-};
-
-/* SoC  DAI (Digital Audio Interface) */
-struct snd_soc_dai {
-	/* DAI description */
-	char *name;
-	unsigned int id;
-	unsigned char type;
-
-	/* DAI callbacks */
-	int (*probe)(struct platform_device *pdev,
-		     struct snd_soc_dai *dai);
-	void (*remove)(struct platform_device *pdev,
-		       struct snd_soc_dai *dai);
-	int (*suspend)(struct platform_device *pdev,
-		struct snd_soc_dai *dai);
-	int (*resume)(struct platform_device *pdev,
-		struct snd_soc_dai *dai);
-
-	/* ops */
-	struct snd_soc_ops ops;
-	struct snd_soc_dai_ops dai_ops;
-
-	/* DAI capabilities */
-	struct snd_soc_pcm_stream capture;
-	struct snd_soc_pcm_stream playback;
-
-	/* DAI runtime info */
-	struct snd_pcm_runtime *runtime;
-	struct snd_soc_codec *codec;
-	unsigned int active;
-	unsigned char pop_wait:1;
-	void *dma_data;
-
-	/* DAI private data */
-	void *private_data;
-};
-
 /* SoC Audio Codec */
 struct snd_soc_codec {
 	char *name;
@@ -543,4 +397,6 @@ struct soc_enum {
 	void *dapm;
 };
 
+#include <sound/soc-dai.h>
+
 #endif
-- 
cgit v1.2.3


From dee89c4d94433520e4e3977ae203d4cfbfe385fb Mon Sep 17 00:00:00 2001
From: Mark Brown <broonie@opensource.wolfsonmicro.com>
Date: Tue, 18 Nov 2008 22:11:38 +0000
Subject: ASoC: Merge snd_soc_ops into snd_soc_dai_ops

Liam Girdwood's ASoC v2 work avoids having two different ops structures
for DAIs by merging the members of struct snd_soc_ops into struct
snd_soc_dai_ops, allowing per DAI configuration for everything.
Backport this change.

This paves the way for future work allowing any combination of DAIs to
be connected rather than having fixed purpose CODEC and CPU DAIs and
only allowing CODEC<->CPU interconnections.

Signed-off-by: Mark Brown <broonie@opensource.wolfsonmicro.com>
---
 include/sound/soc-dai.h            | 20 +++++++++--
 sound/soc/atmel/atmel_ssc_dai.c    | 21 +++++------
 sound/soc/au1x/psc-ac97.c          |  5 +--
 sound/soc/au1x/psc-i2s.c           |  8 ++---
 sound/soc/blackfin/bf5xx-i2s.c     | 12 ++++---
 sound/soc/codecs/ac97.c            |  3 +-
 sound/soc/codecs/ak4535.c          |  5 ++-
 sound/soc/codecs/cs4270.c          | 11 +++---
 sound/soc/codecs/ssm2602.c         | 14 ++++----
 sound/soc/codecs/tlv320aic23.c     | 19 +++++-----
 sound/soc/codecs/tlv320aic26.c     |  5 ++-
 sound/soc/codecs/tlv320aic3x.c     |  5 ++-
 sound/soc/codecs/twl4030.c         |  5 ++-
 sound/soc/codecs/uda134x.c         | 12 +++----
 sound/soc/codecs/uda1380.c         | 15 ++++----
 sound/soc/codecs/wm8510.c          |  5 ++-
 sound/soc/codecs/wm8580.c          | 12 +++----
 sound/soc/codecs/wm8728.c          |  5 ++-
 sound/soc/codecs/wm8731.c          | 11 +++---
 sound/soc/codecs/wm8750.c          |  5 ++-
 sound/soc/codecs/wm8753.c          | 25 ++++++-------
 sound/soc/codecs/wm8900.c          |  5 ++-
 sound/soc/codecs/wm8903.c          | 11 +++---
 sound/soc/codecs/wm8971.c          |  5 ++-
 sound/soc/codecs/wm8990.c          |  6 ++--
 sound/soc/codecs/wm9712.c          |  6 ++--
 sound/soc/codecs/wm9713.c          | 21 +++++------
 sound/soc/davinci/davinci-i2s.c    | 12 ++++---
 sound/soc/davinci/davinci-sffsdr.c |  3 +-
 sound/soc/fsl/fsl_ssi.c            | 14 ++++----
 sound/soc/fsl/mpc5200_psc_i2s.c    | 17 +++++----
 sound/soc/omap/omap-mcbsp.c        | 14 ++++----
 sound/soc/omap/omap2evm.c          |  3 +-
 sound/soc/pxa/pxa-ssp.c            | 20 +++++------
 sound/soc/pxa/pxa2xx-ac97.c        |  9 +++--
 sound/soc/pxa/pxa2xx-i2s.c         | 15 ++++----
 sound/soc/s3c24xx/s3c2412-i2s.c    |  8 ++---
 sound/soc/s3c24xx/s3c2443-ac97.c   |  8 +++--
 sound/soc/s3c24xx/s3c24xx-i2s.c    |  9 ++---
 sound/soc/sh/hac.c                 |  3 +-
 sound/soc/sh/ssi.c                 | 16 ++++-----
 sound/soc/soc-core.c               | 74 +++++++++++++++++++-------------------
 42 files changed, 265 insertions(+), 237 deletions(-)

(limited to 'include')

diff --git a/include/sound/soc-dai.h b/include/sound/soc-dai.h
index 08b8f7025c64..f51cb55902f7 100644
--- a/include/sound/soc-dai.h
+++ b/include/sound/soc-dai.h
@@ -156,6 +156,23 @@ struct snd_soc_dai_ops {
 	 * Called by soc-core to minimise any pops.
 	 */
 	int (*digital_mute)(struct snd_soc_dai *dai, int mute);
+
+	/*
+	 * ALSA PCM audio operations - all optional.
+	 * Called by soc-core during audio PCM operations.
+	 */
+	int (*startup)(struct snd_pcm_substream *,
+		struct snd_soc_dai *);
+	void (*shutdown)(struct snd_pcm_substream *,
+		struct snd_soc_dai *);
+	int (*hw_params)(struct snd_pcm_substream *,
+		struct snd_pcm_hw_params *, struct snd_soc_dai *);
+	int (*hw_free)(struct snd_pcm_substream *,
+		struct snd_soc_dai *);
+	int (*prepare)(struct snd_pcm_substream *,
+		struct snd_soc_dai *);
+	int (*trigger)(struct snd_pcm_substream *, int,
+		struct snd_soc_dai *);
 };
 
 /*
@@ -180,8 +197,7 @@ struct snd_soc_dai {
 		struct snd_soc_dai *dai);
 
 	/* ops */
-	struct snd_soc_ops ops;
-	struct snd_soc_dai_ops dai_ops;
+	struct snd_soc_dai_ops ops;
 
 	/* DAI capabilities */
 	struct snd_soc_pcm_stream capture;
diff --git a/sound/soc/atmel/atmel_ssc_dai.c b/sound/soc/atmel/atmel_ssc_dai.c
index d290b7894917..916f73b9a18f 100644
--- a/sound/soc/atmel/atmel_ssc_dai.c
+++ b/sound/soc/atmel/atmel_ssc_dai.c
@@ -202,7 +202,8 @@ static irqreturn_t atmel_ssc_interrupt(int irq, void *dev_id)
 /*
  * Startup.  Only that one substream allowed in each direction.
  */
-static int atmel_ssc_startup(struct snd_pcm_substream *substream)
+static int atmel_ssc_startup(struct snd_pcm_substream *substream,
+			     struct snd_soc_dai *dai)
 {
 	struct snd_soc_pcm_runtime *rtd = snd_pcm_substream_chip(substream);
 	struct atmel_ssc_info *ssc_p = &ssc_info[rtd->dai->cpu_dai->id];
@@ -231,7 +232,8 @@ static int atmel_ssc_startup(struct snd_pcm_substream *substream)
  * Shutdown.  Clear DMA parameters and shutdown the SSC if there
  * are no other substreams open.
  */
-static void atmel_ssc_shutdown(struct snd_pcm_substream *substream)
+static void atmel_ssc_shutdown(struct snd_pcm_substream *substream,
+			       struct snd_soc_dai *dai)
 {
 	struct snd_soc_pcm_runtime *rtd = snd_pcm_substream_chip(substream);
 	struct atmel_ssc_info *ssc_p = &ssc_info[rtd->dai->cpu_dai->id];
@@ -332,7 +334,8 @@ static int atmel_ssc_set_dai_clkdiv(struct snd_soc_dai *cpu_dai,
  * Configure the SSC.
  */
 static int atmel_ssc_hw_params(struct snd_pcm_substream *substream,
-	struct snd_pcm_hw_params *params)
+	struct snd_pcm_hw_params *params,
+	struct snd_soc_dai *dai)
 {
 	struct snd_soc_pcm_runtime *rtd = snd_pcm_substream_chip(substream);
 	int id = rtd->dai->cpu_dai->id;
@@ -600,7 +603,8 @@ static int atmel_ssc_hw_params(struct snd_pcm_substream *substream,
 }
 
 
-static int atmel_ssc_prepare(struct snd_pcm_substream *substream)
+static int atmel_ssc_prepare(struct snd_pcm_substream *substream,
+			     struct snd_soc_dai *dai)
 {
 	struct snd_soc_pcm_runtime *rtd = snd_pcm_substream_chip(substream);
 	struct atmel_ssc_info *ssc_p = &ssc_info[rtd->dai->cpu_dai->id];
@@ -715,8 +719,7 @@ struct snd_soc_dai atmel_ssc_dai[NUM_SSC_DEVICES] = {
 			.startup = atmel_ssc_startup,
 			.shutdown = atmel_ssc_shutdown,
 			.prepare = atmel_ssc_prepare,
-			.hw_params = atmel_ssc_hw_params,},
-		.dai_ops = {
+			.hw_params = atmel_ssc_hw_params,
 			.set_fmt = atmel_ssc_set_dai_fmt,
 			.set_clkdiv = atmel_ssc_set_dai_clkdiv,},
 		.private_data = &ssc_info[0],
@@ -741,8 +744,7 @@ struct snd_soc_dai atmel_ssc_dai[NUM_SSC_DEVICES] = {
 			.startup = atmel_ssc_startup,
 			.shutdown = atmel_ssc_shutdown,
 			.prepare = atmel_ssc_prepare,
-			.hw_params = atmel_ssc_hw_params,},
-		.dai_ops = {
+			.hw_params = atmel_ssc_hw_params,
 			.set_fmt = atmel_ssc_set_dai_fmt,
 			.set_clkdiv = atmel_ssc_set_dai_clkdiv,},
 		.private_data = &ssc_info[1],
@@ -766,8 +768,7 @@ struct snd_soc_dai atmel_ssc_dai[NUM_SSC_DEVICES] = {
 			.startup = atmel_ssc_startup,
 			.shutdown = atmel_ssc_shutdown,
 			.prepare = atmel_ssc_prepare,
-			.hw_params = atmel_ssc_hw_params,},
-		.dai_ops = {
+			.hw_params = atmel_ssc_hw_params,
 			.set_fmt = atmel_ssc_set_dai_fmt,
 			.set_clkdiv = atmel_ssc_set_dai_clkdiv,},
 		.private_data = &ssc_info[2],
diff --git a/sound/soc/au1x/psc-ac97.c b/sound/soc/au1x/psc-ac97.c
index 57facbad6825..ad60a6042cad 100644
--- a/sound/soc/au1x/psc-ac97.c
+++ b/sound/soc/au1x/psc-ac97.c
@@ -160,7 +160,8 @@ struct snd_ac97_bus_ops soc_ac97_ops = {
 EXPORT_SYMBOL_GPL(soc_ac97_ops);
 
 static int au1xpsc_ac97_hw_params(struct snd_pcm_substream *substream,
-				  struct snd_pcm_hw_params *params)
+				  struct snd_pcm_hw_params *params,
+				  struct snd_soc_dai *dai)
 {
 	/* FIXME */
 	struct au1xpsc_audio_data *pscdata = au1xpsc_ac97_workdata;
@@ -210,7 +211,7 @@ static int au1xpsc_ac97_hw_params(struct snd_pcm_substream *substream,
 }
 
 static int au1xpsc_ac97_trigger(struct snd_pcm_substream *substream,
-				int cmd)
+				int cmd, struct snd_soc_dai *dai)
 {
 	/* FIXME */
 	struct au1xpsc_audio_data *pscdata = au1xpsc_ac97_workdata;
diff --git a/sound/soc/au1x/psc-i2s.c b/sound/soc/au1x/psc-i2s.c
index 9384702c7ebd..05a5acbb16ae 100644
--- a/sound/soc/au1x/psc-i2s.c
+++ b/sound/soc/au1x/psc-i2s.c
@@ -116,7 +116,8 @@ out:
 }
 
 static int au1xpsc_i2s_hw_params(struct snd_pcm_substream *substream,
-				 struct snd_pcm_hw_params *params)
+				 struct snd_pcm_hw_params *params,
+				 struct snd_soc_dai *dai)
 {
 	struct au1xpsc_audio_data *pscdata = au1xpsc_i2s_workdata;
 
@@ -240,7 +241,8 @@ static int au1xpsc_i2s_stop(struct au1xpsc_audio_data *pscdata, int stype)
 	return 0;
 }
 
-static int au1xpsc_i2s_trigger(struct snd_pcm_substream *substream, int cmd)
+static int au1xpsc_i2s_trigger(struct snd_pcm_substream *substream, int cmd,
+			       struct snd_soc_dai *dai)
 {
 	struct au1xpsc_audio_data *pscdata = au1xpsc_i2s_workdata;
 	int ret, stype = SUBSTREAM_TYPE(substream);
@@ -389,8 +391,6 @@ struct snd_soc_dai au1xpsc_i2s_dai = {
 	.ops = {
 		.trigger	= au1xpsc_i2s_trigger,
 		.hw_params	= au1xpsc_i2s_hw_params,
-	},
-	.dai_ops = {
 		.set_fmt	= au1xpsc_i2s_set_fmt,
 	},
 };
diff --git a/sound/soc/blackfin/bf5xx-i2s.c b/sound/soc/blackfin/bf5xx-i2s.c
index e020c160ee44..4e675b55b182 100644
--- a/sound/soc/blackfin/bf5xx-i2s.c
+++ b/sound/soc/blackfin/bf5xx-i2s.c
@@ -132,7 +132,8 @@ static int bf5xx_i2s_set_dai_fmt(struct snd_soc_dai *cpu_dai,
 	return ret;
 }
 
-static int bf5xx_i2s_startup(struct snd_pcm_substream *substream)
+static int bf5xx_i2s_startup(struct snd_pcm_substream *substream,
+			     struct snd_soc_dai *dai)
 {
 	pr_debug("%s enter\n", __func__);
 
@@ -142,7 +143,8 @@ static int bf5xx_i2s_startup(struct snd_pcm_substream *substream)
 }
 
 static int bf5xx_i2s_hw_params(struct snd_pcm_substream *substream,
-				struct snd_pcm_hw_params *params)
+				struct snd_pcm_hw_params *params,
+				struct snd_soc_dai *dai)
 {
 	int ret = 0;
 
@@ -193,7 +195,8 @@ static int bf5xx_i2s_hw_params(struct snd_pcm_substream *substream,
 	return 0;
 }
 
-static void bf5xx_i2s_shutdown(struct snd_pcm_substream *substream)
+static void bf5xx_i2s_shutdown(struct snd_pcm_substream *substream,
+			       struct snd_soc_dai *dai)
 {
 	pr_debug("%s enter\n", __func__);
 	bf5xx_i2s.counter--;
@@ -307,8 +310,7 @@ struct snd_soc_dai bf5xx_i2s_dai = {
 	.ops = {
 		.startup   = bf5xx_i2s_startup,
 		.shutdown  = bf5xx_i2s_shutdown,
-		.hw_params = bf5xx_i2s_hw_params,},
-	.dai_ops = {
+		.hw_params = bf5xx_i2s_hw_params,
 		.set_fmt = bf5xx_i2s_set_dai_fmt,
 	},
 };
diff --git a/sound/soc/codecs/ac97.c b/sound/soc/codecs/ac97.c
index bd1ebdc6c86c..8a93aff359d0 100644
--- a/sound/soc/codecs/ac97.c
+++ b/sound/soc/codecs/ac97.c
@@ -24,7 +24,8 @@
 
 #define AC97_VERSION "0.6"
 
-static int ac97_prepare(struct snd_pcm_substream *substream)
+static int ac97_prepare(struct snd_pcm_substream *substream,
+			struct snd_soc_dai *dai)
 {
 	struct snd_pcm_runtime *runtime = substream->runtime;
 	struct snd_soc_pcm_runtime *rtd = substream->private_data;
diff --git a/sound/soc/codecs/ak4535.c b/sound/soc/codecs/ak4535.c
index 2a89b5888e11..c742290e5533 100644
--- a/sound/soc/codecs/ak4535.c
+++ b/sound/soc/codecs/ak4535.c
@@ -339,7 +339,8 @@ static int ak4535_set_dai_sysclk(struct snd_soc_dai *codec_dai,
 }
 
 static int ak4535_hw_params(struct snd_pcm_substream *substream,
-	struct snd_pcm_hw_params *params)
+			    struct snd_pcm_hw_params *params,
+			    struct snd_soc_dai *dai)
 {
 	struct snd_soc_pcm_runtime *rtd = substream->private_data;
 	struct snd_soc_device *socdev = rtd->socdev;
@@ -451,8 +452,6 @@ struct snd_soc_dai ak4535_dai = {
 		.formats = SNDRV_PCM_FMTBIT_S16_LE,},
 	.ops = {
 		.hw_params = ak4535_hw_params,
-	},
-	.dai_ops = {
 		.set_fmt = ak4535_set_dai_fmt,
 		.digital_mute = ak4535_mute,
 		.set_sysclk = ak4535_set_dai_sysclk,
diff --git a/sound/soc/codecs/cs4270.c b/sound/soc/codecs/cs4270.c
index 0ff476d7057c..7507d468b200 100644
--- a/sound/soc/codecs/cs4270.c
+++ b/sound/soc/codecs/cs4270.c
@@ -360,13 +360,14 @@ static int cs4270_i2c_write(struct snd_soc_codec *codec, unsigned int reg,
 /*
  * Program the CS4270 with the given hardware parameters.
  *
- * The .dai_ops functions are used to provide board-specific data, like
+ * The .ops functions are used to provide board-specific data, like
  * input frequencies, to this driver.  This function takes that information,
  * combines it with the hardware parameters provided, and programs the
  * hardware accordingly.
  */
 static int cs4270_hw_params(struct snd_pcm_substream *substream,
-			    struct snd_pcm_hw_params *params)
+			    struct snd_pcm_hw_params *params,
+			    struct snd_soc_dai *dai)
 {
 	struct snd_soc_pcm_runtime *rtd = substream->private_data;
 	struct snd_soc_device *socdev = rtd->socdev;
@@ -710,10 +711,10 @@ static int cs4270_probe(struct platform_device *pdev)
 	if (codec->control_data) {
 		/* Initialize codec ops */
 		cs4270_dai.ops.hw_params = cs4270_hw_params;
-		cs4270_dai.dai_ops.set_sysclk = cs4270_set_dai_sysclk;
-		cs4270_dai.dai_ops.set_fmt = cs4270_set_dai_fmt;
+		cs4270_dai.ops.set_sysclk = cs4270_set_dai_sysclk;
+		cs4270_dai.ops.set_fmt = cs4270_set_dai_fmt;
 #ifdef CONFIG_SND_SOC_CS4270_HWMUTE
-		cs4270_dai.dai_ops.digital_mute = cs4270_mute;
+		cs4270_dai.ops.digital_mute = cs4270_mute;
 #endif
 	} else
 		printk(KERN_INFO "cs4270: no I2C device found, "
diff --git a/sound/soc/codecs/ssm2602.c b/sound/soc/codecs/ssm2602.c
index 56dc1c9c7c52..0c5884ea1b00 100644
--- a/sound/soc/codecs/ssm2602.c
+++ b/sound/soc/codecs/ssm2602.c
@@ -285,7 +285,8 @@ static inline int get_coeff(int mclk, int rate)
 }
 
 static int ssm2602_hw_params(struct snd_pcm_substream *substream,
-	struct snd_pcm_hw_params *params)
+	struct snd_pcm_hw_params *params,
+	struct snd_soc_dai *dai)
 {
 	u16 srate;
 	struct snd_soc_pcm_runtime *rtd = substream->private_data;
@@ -330,7 +331,8 @@ static int ssm2602_hw_params(struct snd_pcm_substream *substream,
 	return 0;
 }
 
-static int ssm2602_startup(struct snd_pcm_substream *substream)
+static int ssm2602_startup(struct snd_pcm_substream *substream,
+			   struct snd_soc_dai *dai)
 {
 	struct snd_soc_pcm_runtime *rtd = substream->private_data;
 	struct snd_soc_device *socdev = rtd->socdev;
@@ -366,7 +368,8 @@ static int ssm2602_startup(struct snd_pcm_substream *substream)
 	return 0;
 }
 
-static int ssm2602_pcm_prepare(struct snd_pcm_substream *substream)
+static int ssm2602_pcm_prepare(struct snd_pcm_substream *substream,
+			       struct snd_soc_dai *dai)
 {
 	struct snd_soc_pcm_runtime *rtd = substream->private_data;
 	struct snd_soc_device *socdev = rtd->socdev;
@@ -377,7 +380,8 @@ static int ssm2602_pcm_prepare(struct snd_pcm_substream *substream)
 	return 0;
 }
 
-static void ssm2602_shutdown(struct snd_pcm_substream *substream)
+static void ssm2602_shutdown(struct snd_pcm_substream *substream,
+			     struct snd_soc_dai *dai)
 {
 	struct snd_soc_pcm_runtime *rtd = substream->private_data;
 	struct snd_soc_device *socdev = rtd->socdev;
@@ -536,8 +540,6 @@ struct snd_soc_dai ssm2602_dai = {
 		.prepare = ssm2602_pcm_prepare,
 		.hw_params = ssm2602_hw_params,
 		.shutdown = ssm2602_shutdown,
-	},
-	.dai_ops = {
 		.digital_mute = ssm2602_mute,
 		.set_sysclk = ssm2602_set_dai_sysclk,
 		.set_fmt = ssm2602_set_dai_fmt,
diff --git a/sound/soc/codecs/tlv320aic23.c b/sound/soc/codecs/tlv320aic23.c
index c903e4f48dc4..a4e13d0688c9 100644
--- a/sound/soc/codecs/tlv320aic23.c
+++ b/sound/soc/codecs/tlv320aic23.c
@@ -418,7 +418,8 @@ static int tlv320aic23_add_widgets(struct snd_soc_codec *codec)
 }
 
 static int tlv320aic23_hw_params(struct snd_pcm_substream *substream,
-				 struct snd_pcm_hw_params *params)
+				 struct snd_pcm_hw_params *params,
+				 struct snd_soc_dai *dai)
 {
 	struct snd_soc_pcm_runtime *rtd = substream->private_data;
 	struct snd_soc_device *socdev = rtd->socdev;
@@ -465,7 +466,8 @@ static int tlv320aic23_hw_params(struct snd_pcm_substream *substream,
 	return 0;
 }
 
-static int tlv320aic23_pcm_prepare(struct snd_pcm_substream *substream)
+static int tlv320aic23_pcm_prepare(struct snd_pcm_substream *substream,
+				   struct snd_soc_dai *dai)
 {
 	struct snd_soc_pcm_runtime *rtd = substream->private_data;
 	struct snd_soc_device *socdev = rtd->socdev;
@@ -477,7 +479,8 @@ static int tlv320aic23_pcm_prepare(struct snd_pcm_substream *substream)
 	return 0;
 }
 
-static void tlv320aic23_shutdown(struct snd_pcm_substream *substream)
+static void tlv320aic23_shutdown(struct snd_pcm_substream *substream,
+				 struct snd_soc_dai *dai)
 {
 	struct snd_soc_pcm_runtime *rtd = substream->private_data;
 	struct snd_soc_device *socdev = rtd->socdev;
@@ -613,12 +616,10 @@ struct snd_soc_dai tlv320aic23_dai = {
 		.prepare = tlv320aic23_pcm_prepare,
 		.hw_params = tlv320aic23_hw_params,
 		.shutdown = tlv320aic23_shutdown,
-		},
-	.dai_ops = {
-		    .digital_mute = tlv320aic23_mute,
-		    .set_fmt = tlv320aic23_set_dai_fmt,
-		    .set_sysclk = tlv320aic23_set_dai_sysclk,
-		    }
+		.digital_mute = tlv320aic23_mute,
+		.set_fmt = tlv320aic23_set_dai_fmt,
+		.set_sysclk = tlv320aic23_set_dai_sysclk,
+	}
 };
 EXPORT_SYMBOL_GPL(tlv320aic23_dai);
 
diff --git a/sound/soc/codecs/tlv320aic26.c b/sound/soc/codecs/tlv320aic26.c
index bed8a9e63ddc..6b7ddfc92573 100644
--- a/sound/soc/codecs/tlv320aic26.c
+++ b/sound/soc/codecs/tlv320aic26.c
@@ -125,7 +125,8 @@ static int aic26_reg_write(struct snd_soc_codec *codec, unsigned int reg,
  * Digital Audio Interface Operations
  */
 static int aic26_hw_params(struct snd_pcm_substream *substream,
-			   struct snd_pcm_hw_params *params)
+			   struct snd_pcm_hw_params *params,
+			   struct snd_soc_dai *dai)
 {
 	struct snd_soc_pcm_runtime *rtd = substream->private_data;
 	struct snd_soc_device *socdev = rtd->socdev;
@@ -287,8 +288,6 @@ struct snd_soc_dai aic26_dai = {
 	},
 	.ops = {
 		.hw_params = aic26_hw_params,
-	},
-	.dai_ops = {
 		.digital_mute = aic26_mute,
 		.set_sysclk = aic26_set_sysclk,
 		.set_fmt = aic26_set_fmt,
diff --git a/sound/soc/codecs/tlv320aic3x.c b/sound/soc/codecs/tlv320aic3x.c
index cff276ee261e..b76bcc3c4110 100644
--- a/sound/soc/codecs/tlv320aic3x.c
+++ b/sound/soc/codecs/tlv320aic3x.c
@@ -694,7 +694,8 @@ static int aic3x_add_widgets(struct snd_soc_codec *codec)
 }
 
 static int aic3x_hw_params(struct snd_pcm_substream *substream,
-			   struct snd_pcm_hw_params *params)
+			   struct snd_pcm_hw_params *params,
+			   struct snd_soc_dai *dai)
 {
 	struct snd_soc_pcm_runtime *rtd = substream->private_data;
 	struct snd_soc_device *socdev = rtd->socdev;
@@ -1009,8 +1010,6 @@ struct snd_soc_dai aic3x_dai = {
 		.formats = AIC3X_FORMATS,},
 	.ops = {
 		.hw_params = aic3x_hw_params,
-	},
-	.dai_ops = {
 		.digital_mute = aic3x_mute,
 		.set_sysclk = aic3x_set_dai_sysclk,
 		.set_fmt = aic3x_set_dai_fmt,
diff --git a/sound/soc/codecs/twl4030.c b/sound/soc/codecs/twl4030.c
index c778eb446a5b..33489515b928 100644
--- a/sound/soc/codecs/twl4030.c
+++ b/sound/soc/codecs/twl4030.c
@@ -343,7 +343,8 @@ static int twl4030_set_bias_level(struct snd_soc_codec *codec,
 }
 
 static int twl4030_hw_params(struct snd_pcm_substream *substream,
-			   struct snd_pcm_hw_params *params)
+			   struct snd_pcm_hw_params *params,
+			   struct snd_soc_dai *dai)
 {
 	struct snd_soc_pcm_runtime *rtd = substream->private_data;
 	struct snd_soc_device *socdev = rtd->socdev;
@@ -523,8 +524,6 @@ struct snd_soc_dai twl4030_dai = {
 		.formats = TWL4030_FORMATS,},
 	.ops = {
 		.hw_params = twl4030_hw_params,
-	},
-	.dai_ops = {
 		.set_sysclk = twl4030_set_dai_sysclk,
 		.set_fmt = twl4030_set_dai_fmt,
 	}
diff --git a/sound/soc/codecs/uda134x.c b/sound/soc/codecs/uda134x.c
index 69ef521a2ed1..91f333cdc7cf 100644
--- a/sound/soc/codecs/uda134x.c
+++ b/sound/soc/codecs/uda134x.c
@@ -168,7 +168,8 @@ static int uda134x_mute(struct snd_soc_dai *dai, int mute)
 	return 0;
 }
 
-static int uda134x_startup(struct snd_pcm_substream *substream)
+static int uda134x_startup(struct snd_pcm_substream *substream,
+	struct snd_soc_dai *dai)
 {
 	struct snd_soc_pcm_runtime *rtd = substream->private_data;
 	struct snd_soc_device *socdev = rtd->socdev;
@@ -200,7 +201,8 @@ static int uda134x_startup(struct snd_pcm_substream *substream)
 	return 0;
 }
 
-static void uda134x_shutdown(struct snd_pcm_substream *substream)
+static void uda134x_shutdown(struct snd_pcm_substream *substream,
+	struct snd_soc_dai *dai)
 {
 	struct snd_soc_pcm_runtime *rtd = substream->private_data;
 	struct snd_soc_device *socdev = rtd->socdev;
@@ -214,7 +216,8 @@ static void uda134x_shutdown(struct snd_pcm_substream *substream)
 }
 
 static int uda134x_hw_params(struct snd_pcm_substream *substream,
-	struct snd_pcm_hw_params *params)
+	struct snd_pcm_hw_params *params,
+	struct snd_soc_dai *dai)
 {
 	struct snd_soc_pcm_runtime *rtd = substream->private_data;
 	struct snd_soc_device *socdev = rtd->socdev;
@@ -484,9 +487,6 @@ struct snd_soc_dai uda134x_dai = {
 		.startup = uda134x_startup,
 		.shutdown = uda134x_shutdown,
 		.hw_params = uda134x_hw_params,
-	},
-	/* DAI operations */
-	.dai_ops = {
 		.digital_mute = uda134x_mute,
 		.set_sysclk = uda134x_set_dai_sysclk,
 		.set_fmt = uda134x_set_dai_fmt,
diff --git a/sound/soc/codecs/uda1380.c b/sound/soc/codecs/uda1380.c
index a69ee72a7af5..330877c70699 100644
--- a/sound/soc/codecs/uda1380.c
+++ b/sound/soc/codecs/uda1380.c
@@ -407,7 +407,8 @@ static int uda1380_set_dai_fmt(struct snd_soc_dai *codec_dai,
  * when the DAI is being clocked by the CPU DAI. It's up to the
  * machine and cpu DAI driver to do this before we are called.
  */
-static int uda1380_pcm_prepare(struct snd_pcm_substream *substream)
+static int uda1380_pcm_prepare(struct snd_pcm_substream *substream,
+			       struct snd_soc_dai *dai)
 {
 	struct snd_soc_pcm_runtime *rtd = substream->private_data;
 	struct snd_soc_device *socdev = rtd->socdev;
@@ -439,7 +440,8 @@ static int uda1380_pcm_prepare(struct snd_pcm_substream *substream)
 }
 
 static int uda1380_pcm_hw_params(struct snd_pcm_substream *substream,
-	struct snd_pcm_hw_params *params)
+				 struct snd_pcm_hw_params *params,
+				 struct snd_soc_dai *dai)
 {
 	struct snd_soc_pcm_runtime *rtd = substream->private_data;
 	struct snd_soc_device *socdev = rtd->socdev;
@@ -477,7 +479,8 @@ static int uda1380_pcm_hw_params(struct snd_pcm_substream *substream,
 	return 0;
 }
 
-static void uda1380_pcm_shutdown(struct snd_pcm_substream *substream)
+static void uda1380_pcm_shutdown(struct snd_pcm_substream *substream,
+				 struct snd_soc_dai *dai)
 {
 	struct snd_soc_pcm_runtime *rtd = substream->private_data;
 	struct snd_soc_device *socdev = rtd->socdev;
@@ -560,8 +563,6 @@ struct snd_soc_dai uda1380_dai[] = {
 		.hw_params = uda1380_pcm_hw_params,
 		.shutdown = uda1380_pcm_shutdown,
 		.prepare = uda1380_pcm_prepare,
-	},
-	.dai_ops = {
 		.digital_mute = uda1380_mute,
 		.set_fmt = uda1380_set_dai_fmt,
 	},
@@ -579,8 +580,6 @@ struct snd_soc_dai uda1380_dai[] = {
 		.hw_params = uda1380_pcm_hw_params,
 		.shutdown = uda1380_pcm_shutdown,
 		.prepare = uda1380_pcm_prepare,
-	},
-	.dai_ops = {
 		.digital_mute = uda1380_mute,
 		.set_fmt = uda1380_set_dai_fmt,
 	},
@@ -598,8 +597,6 @@ struct snd_soc_dai uda1380_dai[] = {
 		.hw_params = uda1380_pcm_hw_params,
 		.shutdown = uda1380_pcm_shutdown,
 		.prepare = uda1380_pcm_prepare,
-	},
-	.dai_ops = {
 		.set_fmt = uda1380_set_dai_fmt,
 	},
 },
diff --git a/sound/soc/codecs/wm8510.c b/sound/soc/codecs/wm8510.c
index d8ca2da8d634..173b66c0c766 100644
--- a/sound/soc/codecs/wm8510.c
+++ b/sound/soc/codecs/wm8510.c
@@ -463,7 +463,8 @@ static int wm8510_set_dai_fmt(struct snd_soc_dai *codec_dai,
 }
 
 static int wm8510_pcm_hw_params(struct snd_pcm_substream *substream,
-	struct snd_pcm_hw_params *params)
+				struct snd_pcm_hw_params *params,
+				struct snd_soc_dai *dai)
 {
 	struct snd_soc_pcm_runtime *rtd = substream->private_data;
 	struct snd_soc_device *socdev = rtd->socdev;
@@ -585,8 +586,6 @@ struct snd_soc_dai wm8510_dai = {
 		.formats = WM8510_FORMATS,},
 	.ops = {
 		.hw_params = wm8510_pcm_hw_params,
-	},
-	.dai_ops = {
 		.digital_mute = wm8510_mute,
 		.set_fmt = wm8510_set_dai_fmt,
 		.set_clkdiv = wm8510_set_dai_clkdiv,
diff --git a/sound/soc/codecs/wm8580.c b/sound/soc/codecs/wm8580.c
index cbcd7c324ab9..220d4b68904a 100644
--- a/sound/soc/codecs/wm8580.c
+++ b/sound/soc/codecs/wm8580.c
@@ -548,13 +548,13 @@ static int wm8580_set_dai_pll(struct snd_soc_dai *codec_dai,
  * Set PCM DAI bit size and sample rate.
  */
 static int wm8580_paif_hw_params(struct snd_pcm_substream *substream,
-	struct snd_pcm_hw_params *params)
+				 struct snd_pcm_hw_params *params,
+				 struct snd_soc_dai *dai)
 {
 	struct snd_soc_pcm_runtime *rtd = substream->private_data;
-	struct snd_soc_dai_link *dai = rtd->dai;
 	struct snd_soc_device *socdev = rtd->socdev;
 	struct snd_soc_codec *codec = socdev->codec;
-	u16 paifb = wm8580_read(codec, WM8580_PAIF3 + dai->codec_dai->id);
+	u16 paifb = wm8580_read(codec, WM8580_PAIF3 + dai->id);
 
 	paifb &= ~WM8580_AIF_LENGTH_MASK;
 	/* bit size */
@@ -574,7 +574,7 @@ static int wm8580_paif_hw_params(struct snd_pcm_substream *substream,
 		return -EINVAL;
 	}
 
-	wm8580_write(codec, WM8580_PAIF3 + dai->codec_dai->id, paifb);
+	wm8580_write(codec, WM8580_PAIF3 + dai->id, paifb);
 	return 0;
 }
 
@@ -798,8 +798,6 @@ struct snd_soc_dai wm8580_dai[] = {
 		},
 		.ops = {
 			 .hw_params = wm8580_paif_hw_params,
-		 },
-		.dai_ops = {
 			 .set_fmt = wm8580_set_paif_dai_fmt,
 			 .set_clkdiv = wm8580_set_dai_clkdiv,
 			 .set_pll = wm8580_set_dai_pll,
@@ -818,8 +816,6 @@ struct snd_soc_dai wm8580_dai[] = {
 		},
 		.ops = {
 			 .hw_params = wm8580_paif_hw_params,
-		 },
-		.dai_ops = {
 			 .set_fmt = wm8580_set_paif_dai_fmt,
 			 .set_clkdiv = wm8580_set_dai_clkdiv,
 			 .set_pll = wm8580_set_dai_pll,
diff --git a/sound/soc/codecs/wm8728.c b/sound/soc/codecs/wm8728.c
index 3e39dea61241..71949bd320d3 100644
--- a/sound/soc/codecs/wm8728.c
+++ b/sound/soc/codecs/wm8728.c
@@ -147,7 +147,8 @@ static int wm8728_mute(struct snd_soc_dai *dai, int mute)
 }
 
 static int wm8728_hw_params(struct snd_pcm_substream *substream,
-	struct snd_pcm_hw_params *params)
+	struct snd_pcm_hw_params *params,
+	struct snd_soc_dai *dai)
 {
 	struct snd_soc_pcm_runtime *rtd = substream->private_data;
 	struct snd_soc_device *socdev = rtd->socdev;
@@ -269,8 +270,6 @@ struct snd_soc_dai wm8728_dai = {
 	},
 	.ops = {
 		 .hw_params = wm8728_hw_params,
-	},
-	.dai_ops = {
 		 .digital_mute = wm8728_mute,
 		 .set_fmt = wm8728_set_dai_fmt,
 	}
diff --git a/sound/soc/codecs/wm8731.c b/sound/soc/codecs/wm8731.c
index 7f8a7e36b33e..c0f277053bb2 100644
--- a/sound/soc/codecs/wm8731.c
+++ b/sound/soc/codecs/wm8731.c
@@ -264,7 +264,8 @@ static inline int get_coeff(int mclk, int rate)
 }
 
 static int wm8731_hw_params(struct snd_pcm_substream *substream,
-	struct snd_pcm_hw_params *params)
+			    struct snd_pcm_hw_params *params,
+			    struct snd_soc_dai *dai)
 {
 	struct snd_soc_pcm_runtime *rtd = substream->private_data;
 	struct snd_soc_device *socdev = rtd->socdev;
@@ -293,7 +294,8 @@ static int wm8731_hw_params(struct snd_pcm_substream *substream,
 	return 0;
 }
 
-static int wm8731_pcm_prepare(struct snd_pcm_substream *substream)
+static int wm8731_pcm_prepare(struct snd_pcm_substream *substream,
+			      struct snd_soc_dai *dai)
 {
 	struct snd_soc_pcm_runtime *rtd = substream->private_data;
 	struct snd_soc_device *socdev = rtd->socdev;
@@ -305,7 +307,8 @@ static int wm8731_pcm_prepare(struct snd_pcm_substream *substream)
 	return 0;
 }
 
-static void wm8731_shutdown(struct snd_pcm_substream *substream)
+static void wm8731_shutdown(struct snd_pcm_substream *substream,
+			    struct snd_soc_dai *dai)
 {
 	struct snd_soc_pcm_runtime *rtd = substream->private_data;
 	struct snd_soc_device *socdev = rtd->socdev;
@@ -461,8 +464,6 @@ struct snd_soc_dai wm8731_dai = {
 		.prepare = wm8731_pcm_prepare,
 		.hw_params = wm8731_hw_params,
 		.shutdown = wm8731_shutdown,
-	},
-	.dai_ops = {
 		.digital_mute = wm8731_mute,
 		.set_sysclk = wm8731_set_dai_sysclk,
 		.set_fmt = wm8731_set_dai_fmt,
diff --git a/sound/soc/codecs/wm8750.c b/sound/soc/codecs/wm8750.c
index 9b7296ee5b08..860a1d56830a 100644
--- a/sound/soc/codecs/wm8750.c
+++ b/sound/soc/codecs/wm8750.c
@@ -614,7 +614,8 @@ static int wm8750_set_dai_fmt(struct snd_soc_dai *codec_dai,
 }
 
 static int wm8750_pcm_hw_params(struct snd_pcm_substream *substream,
-	struct snd_pcm_hw_params *params)
+				struct snd_pcm_hw_params *params,
+				struct snd_soc_dai *dai)
 {
 	struct snd_soc_pcm_runtime *rtd = substream->private_data;
 	struct snd_soc_device *socdev = rtd->socdev;
@@ -709,8 +710,6 @@ struct snd_soc_dai wm8750_dai = {
 		.formats = WM8750_FORMATS,},
 	.ops = {
 		.hw_params = wm8750_pcm_hw_params,
-	},
-	.dai_ops = {
 		.digital_mute = wm8750_mute,
 		.set_fmt = wm8750_set_dai_fmt,
 		.set_sysclk = wm8750_set_dai_sysclk,
diff --git a/sound/soc/codecs/wm8753.c b/sound/soc/codecs/wm8753.c
index d426eaa22185..5e4cd3bb824a 100644
--- a/sound/soc/codecs/wm8753.c
+++ b/sound/soc/codecs/wm8753.c
@@ -922,7 +922,8 @@ static int wm8753_vdac_adc_set_dai_fmt(struct snd_soc_dai *codec_dai,
  * Set PCM DAI bit size and sample rate.
  */
 static int wm8753_pcm_hw_params(struct snd_pcm_substream *substream,
-	struct snd_pcm_hw_params *params)
+				struct snd_pcm_hw_params *params,
+				struct snd_soc_dai *dai)
 {
 	struct snd_soc_pcm_runtime *rtd = substream->private_data;
 	struct snd_soc_device *socdev = rtd->socdev;
@@ -1155,7 +1156,8 @@ static int wm8753_i2s_set_dai_fmt(struct snd_soc_dai *codec_dai,
  * Set PCM DAI bit size and sample rate.
  */
 static int wm8753_i2s_hw_params(struct snd_pcm_substream *substream,
-	struct snd_pcm_hw_params *params)
+				struct snd_pcm_hw_params *params,
+				struct snd_soc_dai *dai)
 {
 	struct snd_soc_pcm_runtime *rtd = substream->private_data;
 	struct snd_soc_device *socdev = rtd->socdev;
@@ -1323,16 +1325,15 @@ static const struct snd_soc_dai wm8753_all_dai[] = {
 		.channels_min = 1,
 		.channels_max = 2,
 		.rates = WM8753_RATES,
-		.formats = WM8753_FORMATS,},
+		.formats = WM8753_FORMATS},
 	.capture = { /* dummy for fast DAI switching */
 		.stream_name = "Capture",
 		.channels_min = 1,
 		.channels_max = 2,
 		.rates = WM8753_RATES,
-		.formats = WM8753_FORMATS,},
+		.formats = WM8753_FORMATS},
 	.ops = {
-		.hw_params = wm8753_i2s_hw_params,},
-	.dai_ops = {
+		.hw_params = wm8753_i2s_hw_params,
 		.digital_mute = wm8753_mute,
 		.set_fmt = wm8753_mode1h_set_dai_fmt,
 		.set_clkdiv = wm8753_set_dai_clkdiv,
@@ -1356,8 +1357,7 @@ static const struct snd_soc_dai wm8753_all_dai[] = {
 		.rates = WM8753_RATES,
 		.formats = WM8753_FORMATS,},
 	.ops = {
-		.hw_params = wm8753_pcm_hw_params,},
-	.dai_ops = {
+		.hw_params = wm8753_pcm_hw_params,
 		.digital_mute = wm8753_mute,
 		.set_fmt = wm8753_mode1v_set_dai_fmt,
 		.set_clkdiv = wm8753_set_dai_clkdiv,
@@ -1385,8 +1385,7 @@ static const struct snd_soc_dai wm8753_all_dai[] = {
 		.rates = WM8753_RATES,
 		.formats = WM8753_FORMATS,},
 	.ops = {
-		.hw_params = wm8753_pcm_hw_params,},
-	.dai_ops = {
+		.hw_params = wm8753_pcm_hw_params,
 		.digital_mute = wm8753_mute,
 		.set_fmt = wm8753_mode2_set_dai_fmt,
 		.set_clkdiv = wm8753_set_dai_clkdiv,
@@ -1410,8 +1409,7 @@ static const struct snd_soc_dai wm8753_all_dai[] = {
 		.rates = WM8753_RATES,
 		.formats = WM8753_FORMATS,},
 	.ops = {
-		.hw_params = wm8753_i2s_hw_params,},
-	.dai_ops = {
+		.hw_params = wm8753_i2s_hw_params,
 		.digital_mute = wm8753_mute,
 		.set_fmt = wm8753_mode3_4_set_dai_fmt,
 		.set_clkdiv = wm8753_set_dai_clkdiv,
@@ -1439,8 +1437,7 @@ static const struct snd_soc_dai wm8753_all_dai[] = {
 		.rates = WM8753_RATES,
 		.formats = WM8753_FORMATS,},
 	.ops = {
-		.hw_params = wm8753_i2s_hw_params,},
-	.dai_ops = {
+		.hw_params = wm8753_i2s_hw_params,
 		.digital_mute = wm8753_mute,
 		.set_fmt = wm8753_mode3_4_set_dai_fmt,
 		.set_clkdiv = wm8753_set_dai_clkdiv,
diff --git a/sound/soc/codecs/wm8900.c b/sound/soc/codecs/wm8900.c
index de016f41e04c..d1326be91c8b 100644
--- a/sound/soc/codecs/wm8900.c
+++ b/sound/soc/codecs/wm8900.c
@@ -727,7 +727,8 @@ static int wm8900_add_widgets(struct snd_soc_codec *codec)
 }
 
 static int wm8900_hw_params(struct snd_pcm_substream *substream,
-	struct snd_pcm_hw_params *params)
+	struct snd_pcm_hw_params *params,
+	struct snd_soc_dai *dai)
 {
 	struct snd_soc_pcm_runtime *rtd = substream->private_data;
 	struct snd_soc_device *socdev = rtd->socdev;
@@ -1117,8 +1118,6 @@ struct snd_soc_dai wm8900_dai = {
 	 },
 	.ops = {
 		.hw_params = wm8900_hw_params,
-	 },
-	.dai_ops = {
 		 .set_clkdiv = wm8900_set_dai_clkdiv,
 		 .set_pll = wm8900_set_dai_pll,
 		 .set_fmt = wm8900_set_dai_fmt,
diff --git a/sound/soc/codecs/wm8903.c b/sound/soc/codecs/wm8903.c
index ce40d7877605..efbe8927b7d2 100644
--- a/sound/soc/codecs/wm8903.c
+++ b/sound/soc/codecs/wm8903.c
@@ -1257,7 +1257,8 @@ static struct {
 	{ 0,      0 },
 };
 
-static int wm8903_startup(struct snd_pcm_substream *substream)
+static int wm8903_startup(struct snd_pcm_substream *substream,
+			  struct snd_soc_dai *dai)
 {
 	struct snd_soc_pcm_runtime *rtd = substream->private_data;
 	struct snd_soc_device *socdev = rtd->socdev;
@@ -1298,7 +1299,8 @@ static int wm8903_startup(struct snd_pcm_substream *substream)
 	return 0;
 }
 
-static void wm8903_shutdown(struct snd_pcm_substream *substream)
+static void wm8903_shutdown(struct snd_pcm_substream *substream,
+			    struct snd_soc_dai *dai)
 {
 	struct snd_soc_pcm_runtime *rtd = substream->private_data;
 	struct snd_soc_device *socdev = rtd->socdev;
@@ -1317,7 +1319,8 @@ static void wm8903_shutdown(struct snd_pcm_substream *substream)
 }
 
 static int wm8903_hw_params(struct snd_pcm_substream *substream,
-			    struct snd_pcm_hw_params *params)
+			    struct snd_pcm_hw_params *params,
+			    struct snd_soc_dai *dai)
 {
 	struct snd_soc_pcm_runtime *rtd = substream->private_data;
 	struct snd_soc_device *socdev = rtd->socdev;
@@ -1515,8 +1518,6 @@ struct snd_soc_dai wm8903_dai = {
 		 .startup = wm8903_startup,
 		 .shutdown = wm8903_shutdown,
 		 .hw_params = wm8903_hw_params,
-	},
-	.dai_ops = {
 		 .digital_mute = wm8903_digital_mute,
 		 .set_fmt = wm8903_set_dai_fmt,
 		 .set_sysclk = wm8903_set_dai_sysclk
diff --git a/sound/soc/codecs/wm8971.c b/sound/soc/codecs/wm8971.c
index f41a578ddd4f..26edcc9d6e87 100644
--- a/sound/soc/codecs/wm8971.c
+++ b/sound/soc/codecs/wm8971.c
@@ -541,7 +541,8 @@ static int wm8971_set_dai_fmt(struct snd_soc_dai *codec_dai,
 }
 
 static int wm8971_pcm_hw_params(struct snd_pcm_substream *substream,
-	struct snd_pcm_hw_params *params)
+	struct snd_pcm_hw_params *params,
+	struct snd_soc_dai *dai)
 {
 	struct snd_soc_pcm_runtime *rtd = substream->private_data;
 	struct snd_soc_device *socdev = rtd->socdev;
@@ -634,8 +635,6 @@ struct snd_soc_dai wm8971_dai = {
 		.formats = WM8971_FORMATS,},
 	.ops = {
 		.hw_params = wm8971_pcm_hw_params,
-	},
-	.dai_ops = {
 		.digital_mute = wm8971_mute,
 		.set_fmt = wm8971_set_dai_fmt,
 		.set_sysclk = wm8971_set_dai_sysclk,
diff --git a/sound/soc/codecs/wm8990.c b/sound/soc/codecs/wm8990.c
index 2d7b0096d929..13926516d16e 100644
--- a/sound/soc/codecs/wm8990.c
+++ b/sound/soc/codecs/wm8990.c
@@ -1172,7 +1172,8 @@ static int wm8990_set_dai_clkdiv(struct snd_soc_dai *codec_dai,
  * Set PCM DAI bit size and sample rate.
  */
 static int wm8990_hw_params(struct snd_pcm_substream *substream,
-	struct snd_pcm_hw_params *params)
+			    struct snd_pcm_hw_params *params,
+			    struct snd_soc_dai *dai)
 {
 	struct snd_soc_pcm_runtime *rtd = substream->private_data;
 	struct snd_soc_device *socdev = rtd->socdev;
@@ -1362,8 +1363,7 @@ struct snd_soc_dai wm8990_dai = {
 		.rates = WM8990_RATES,
 		.formats = WM8990_FORMATS,},
 	.ops = {
-		.hw_params = wm8990_hw_params,},
-	.dai_ops = {
+		.hw_params = wm8990_hw_params,
 		.digital_mute = wm8990_mute,
 		.set_fmt = wm8990_set_dai_fmt,
 		.set_clkdiv = wm8990_set_dai_clkdiv,
diff --git a/sound/soc/codecs/wm9712.c b/sound/soc/codecs/wm9712.c
index ffb471e420e2..81c38e7ad344 100644
--- a/sound/soc/codecs/wm9712.c
+++ b/sound/soc/codecs/wm9712.c
@@ -487,7 +487,8 @@ static int ac97_write(struct snd_soc_codec *codec, unsigned int reg,
 	return 0;
 }
 
-static int ac97_prepare(struct snd_pcm_substream *substream)
+static int ac97_prepare(struct snd_pcm_substream *substream,
+			struct snd_soc_dai *dai)
 {
 	struct snd_pcm_runtime *runtime = substream->runtime;
 	struct snd_soc_pcm_runtime *rtd = substream->private_data;
@@ -507,7 +508,8 @@ static int ac97_prepare(struct snd_pcm_substream *substream)
 	return ac97_write(codec, reg, runtime->rate);
 }
 
-static int ac97_aux_prepare(struct snd_pcm_substream *substream)
+static int ac97_aux_prepare(struct snd_pcm_substream *substream,
+			    struct snd_soc_dai *dai)
 {
 	struct snd_pcm_runtime *runtime = substream->runtime;
 	struct snd_soc_pcm_runtime *rtd = substream->private_data;
diff --git a/sound/soc/codecs/wm9713.c b/sound/soc/codecs/wm9713.c
index 740bf3cde18d..a0cc5ac969a1 100644
--- a/sound/soc/codecs/wm9713.c
+++ b/sound/soc/codecs/wm9713.c
@@ -928,7 +928,8 @@ static int wm9713_set_dai_fmt(struct snd_soc_dai *codec_dai,
 }
 
 static int wm9713_pcm_hw_params(struct snd_pcm_substream *substream,
-	struct snd_pcm_hw_params *params)
+				struct snd_pcm_hw_params *params,
+				struct snd_soc_dai *dai)
 {
 	struct snd_soc_pcm_runtime *rtd = substream->private_data;
 	struct snd_soc_device *socdev = rtd->socdev;
@@ -954,7 +955,8 @@ static int wm9713_pcm_hw_params(struct snd_pcm_substream *substream,
 	return 0;
 }
 
-static void wm9713_voiceshutdown(struct snd_pcm_substream *substream)
+static void wm9713_voiceshutdown(struct snd_pcm_substream *substream,
+				 struct snd_soc_dai *dai)
 {
 	struct snd_soc_pcm_runtime *rtd = substream->private_data;
 	struct snd_soc_device *socdev = rtd->socdev;
@@ -969,7 +971,8 @@ static void wm9713_voiceshutdown(struct snd_pcm_substream *substream)
 	ac97_write(codec, AC97_EXTENDED_MID, status);
 }
 
-static int ac97_hifi_prepare(struct snd_pcm_substream *substream)
+static int ac97_hifi_prepare(struct snd_pcm_substream *substream,
+			     struct snd_soc_dai *dai)
 {
 	struct snd_pcm_runtime *runtime = substream->runtime;
 	struct snd_soc_pcm_runtime *rtd = substream->private_data;
@@ -989,7 +992,8 @@ static int ac97_hifi_prepare(struct snd_pcm_substream *substream)
 	return ac97_write(codec, reg, runtime->rate);
 }
 
-static int ac97_aux_prepare(struct snd_pcm_substream *substream)
+static int ac97_aux_prepare(struct snd_pcm_substream *substream,
+			    struct snd_soc_dai *dai)
 {
 	struct snd_pcm_runtime *runtime = substream->runtime;
 	struct snd_soc_pcm_runtime *rtd = substream->private_data;
@@ -1042,8 +1046,7 @@ struct snd_soc_dai wm9713_dai[] = {
 		.rates = WM9713_RATES,
 		.formats = SNDRV_PCM_FMTBIT_S16_LE,},
 	.ops = {
-		.prepare = ac97_hifi_prepare,},
-	.dai_ops = {
+		.prepare = ac97_hifi_prepare,
 		.set_clkdiv = wm9713_set_dai_clkdiv,
 		.set_pll = wm9713_set_dai_pll,},
 	},
@@ -1056,8 +1059,7 @@ struct snd_soc_dai wm9713_dai[] = {
 		.rates = WM9713_RATES,
 		.formats = SNDRV_PCM_FMTBIT_S16_LE,},
 	.ops = {
-		.prepare = ac97_aux_prepare,},
-	.dai_ops = {
+		.prepare = ac97_aux_prepare,
 		.set_clkdiv = wm9713_set_dai_clkdiv,
 		.set_pll = wm9713_set_dai_pll,},
 	},
@@ -1077,8 +1079,7 @@ struct snd_soc_dai wm9713_dai[] = {
 		.formats = WM9713_PCM_FORMATS,},
 	.ops = {
 		.hw_params = wm9713_pcm_hw_params,
-		.shutdown = wm9713_voiceshutdown,},
-	.dai_ops = {
+		.shutdown = wm9713_voiceshutdown,
 		.set_clkdiv = wm9713_set_dai_clkdiv,
 		.set_pll = wm9713_set_dai_pll,
 		.set_fmt = wm9713_set_dai_fmt,
diff --git a/sound/soc/davinci/davinci-i2s.c b/sound/soc/davinci/davinci-i2s.c
index 95df51e803b4..7a17cd0ecf64 100644
--- a/sound/soc/davinci/davinci-i2s.c
+++ b/sound/soc/davinci/davinci-i2s.c
@@ -188,7 +188,8 @@ static void davinci_mcbsp_stop(struct snd_pcm_substream *substream)
 	davinci_mcbsp_write_reg(dev, DAVINCI_MCBSP_SPCR_REG, w);
 }
 
-static int davinci_i2s_startup(struct snd_pcm_substream *substream)
+static int davinci_i2s_startup(struct snd_pcm_substream *substream,
+			       struct snd_soc_dai *dai)
 {
 	struct snd_soc_pcm_runtime *rtd = substream->private_data;
 	struct snd_soc_dai *cpu_dai = rtd->dai->cpu_dai;
@@ -285,7 +286,8 @@ static int davinci_i2s_set_dai_fmt(struct snd_soc_dai *cpu_dai,
 }
 
 static int davinci_i2s_hw_params(struct snd_pcm_substream *substream,
-				 struct snd_pcm_hw_params *params)
+				 struct snd_pcm_hw_params *params,
+				 struct snd_soc_dai *dai)
 {
 	struct snd_soc_pcm_runtime *rtd = substream->private_data;
 	struct davinci_pcm_dma_params *dma_params = rtd->dai->cpu_dai->dma_data;
@@ -349,7 +351,8 @@ static int davinci_i2s_hw_params(struct snd_pcm_substream *substream,
 	return 0;
 }
 
-static int davinci_i2s_trigger(struct snd_pcm_substream *substream, int cmd)
+static int davinci_i2s_trigger(struct snd_pcm_substream *substream, int cmd,
+			       struct snd_soc_dai *dai)
 {
 	int ret = 0;
 
@@ -473,8 +476,7 @@ struct snd_soc_dai davinci_i2s_dai = {
 	.ops = {
 		.startup = davinci_i2s_startup,
 		.trigger = davinci_i2s_trigger,
-		.hw_params = davinci_i2s_hw_params,},
-	.dai_ops = {
+		.hw_params = davinci_i2s_hw_params,
 		.set_fmt = davinci_i2s_set_dai_fmt,
 	},
 };
diff --git a/sound/soc/davinci/davinci-sffsdr.c b/sound/soc/davinci/davinci-sffsdr.c
index fa38f9cd3506..e95fde1766b5 100644
--- a/sound/soc/davinci/davinci-sffsdr.c
+++ b/sound/soc/davinci/davinci-sffsdr.c
@@ -34,7 +34,8 @@
 #include "davinci-i2s.h"
 
 static int sffsdr_hw_params(struct snd_pcm_substream *substream,
-			    struct snd_pcm_hw_params *params)
+			    struct snd_pcm_hw_params *params,
+			    struct snd_soc_dai *dai)
 {
 	struct snd_soc_pcm_runtime *rtd = substream->private_data;
 	struct snd_soc_dai *cpu_dai = rtd->dai->cpu_dai;
diff --git a/sound/soc/fsl/fsl_ssi.c b/sound/soc/fsl/fsl_ssi.c
index 157a7895ffa1..52c290bb47bf 100644
--- a/sound/soc/fsl/fsl_ssi.c
+++ b/sound/soc/fsl/fsl_ssi.c
@@ -266,7 +266,8 @@ static irqreturn_t fsl_ssi_isr(int irq, void *dev_id)
  * If this is the first stream open, then grab the IRQ and program most of
  * the SSI registers.
  */
-static int fsl_ssi_startup(struct snd_pcm_substream *substream)
+static int fsl_ssi_startup(struct snd_pcm_substream *substream,
+			   struct snd_soc_dai *dai)
 {
 	struct snd_soc_pcm_runtime *rtd = substream->private_data;
 	struct fsl_ssi_private *ssi_private = rtd->dai->cpu_dai->private_data;
@@ -411,7 +412,8 @@ static int fsl_ssi_startup(struct snd_pcm_substream *substream)
  * Note: The SxCCR.DC and SxCCR.PM bits are only used if the SSI is the
  * clock master.
  */
-static int fsl_ssi_prepare(struct snd_pcm_substream *substream)
+static int fsl_ssi_prepare(struct snd_pcm_substream *substream,
+			   struct snd_soc_dai *dai)
 {
 	struct snd_pcm_runtime *runtime = substream->runtime;
 	struct snd_soc_pcm_runtime *rtd = substream->private_data;
@@ -441,7 +443,8 @@ static int fsl_ssi_prepare(struct snd_pcm_substream *substream)
  * The DMA channel is in external master start and pause mode, which
  * means the SSI completely controls the flow of data.
  */
-static int fsl_ssi_trigger(struct snd_pcm_substream *substream, int cmd)
+static int fsl_ssi_trigger(struct snd_pcm_substream *substream, int cmd,
+			   struct snd_soc_dai *dai)
 {
 	struct snd_soc_pcm_runtime *rtd = substream->private_data;
 	struct fsl_ssi_private *ssi_private = rtd->dai->cpu_dai->private_data;
@@ -490,7 +493,8 @@ static int fsl_ssi_trigger(struct snd_pcm_substream *substream, int cmd)
  *
  * Shutdown the SSI if there are no other substreams open.
  */
-static void fsl_ssi_shutdown(struct snd_pcm_substream *substream)
+static void fsl_ssi_shutdown(struct snd_pcm_substream *substream,
+			     struct snd_soc_dai *dai)
 {
 	struct snd_soc_pcm_runtime *rtd = substream->private_data;
 	struct fsl_ssi_private *ssi_private = rtd->dai->cpu_dai->private_data;
@@ -578,8 +582,6 @@ static struct snd_soc_dai fsl_ssi_dai_template = {
 		.prepare = fsl_ssi_prepare,
 		.shutdown = fsl_ssi_shutdown,
 		.trigger = fsl_ssi_trigger,
-	},
-	.dai_ops = {
 		.set_sysclk = fsl_ssi_set_sysclk,
 		.set_fmt = fsl_ssi_set_fmt,
 	},
diff --git a/sound/soc/fsl/mpc5200_psc_i2s.c b/sound/soc/fsl/mpc5200_psc_i2s.c
index 94a02eaa4825..e2c172f38979 100644
--- a/sound/soc/fsl/mpc5200_psc_i2s.c
+++ b/sound/soc/fsl/mpc5200_psc_i2s.c
@@ -187,7 +187,8 @@ static irqreturn_t psc_i2s_bcom_irq(int irq, void *_psc_i2s_stream)
  * If this is the first stream open, then grab the IRQ and program most of
  * the PSC registers.
  */
-static int psc_i2s_startup(struct snd_pcm_substream *substream)
+static int psc_i2s_startup(struct snd_pcm_substream *substream,
+			   struct snd_soc_dai *dai)
 {
 	struct snd_soc_pcm_runtime *rtd = substream->private_data;
 	struct psc_i2s *psc_i2s = rtd->dai->cpu_dai->private_data;
@@ -220,7 +221,8 @@ static int psc_i2s_startup(struct snd_pcm_substream *substream)
 }
 
 static int psc_i2s_hw_params(struct snd_pcm_substream *substream,
-				 struct snd_pcm_hw_params *params)
+				 struct snd_pcm_hw_params *params,
+				 struct snd_soc_dai *dai)
 {
 	struct snd_soc_pcm_runtime *rtd = substream->private_data;
 	struct psc_i2s *psc_i2s = rtd->dai->cpu_dai->private_data;
@@ -256,7 +258,8 @@ static int psc_i2s_hw_params(struct snd_pcm_substream *substream,
 	return 0;
 }
 
-static int psc_i2s_hw_free(struct snd_pcm_substream *substream)
+static int psc_i2s_hw_free(struct snd_pcm_substream *substream,
+			   struct snd_soc_dai *dai)
 {
 	snd_pcm_set_runtime_buffer(substream, NULL);
 	return 0;
@@ -268,7 +271,8 @@ static int psc_i2s_hw_free(struct snd_pcm_substream *substream)
  * This function is called by ALSA to start, stop, pause, and resume the DMA
  * transfer of data.
  */
-static int psc_i2s_trigger(struct snd_pcm_substream *substream, int cmd)
+static int psc_i2s_trigger(struct snd_pcm_substream *substream, int cmd,
+			   struct snd_soc_dai *dai)
 {
 	struct snd_soc_pcm_runtime *rtd = substream->private_data;
 	struct psc_i2s *psc_i2s = rtd->dai->cpu_dai->private_data;
@@ -383,7 +387,8 @@ static int psc_i2s_trigger(struct snd_pcm_substream *substream, int cmd)
  *
  * Shutdown the PSC if there are no other substreams open.
  */
-static void psc_i2s_shutdown(struct snd_pcm_substream *substream)
+static void psc_i2s_shutdown(struct snd_pcm_substream *substream,
+			     struct snd_soc_dai *dai)
 {
 	struct snd_soc_pcm_runtime *rtd = substream->private_data;
 	struct psc_i2s *psc_i2s = rtd->dai->cpu_dai->private_data;
@@ -483,8 +488,6 @@ static struct snd_soc_dai psc_i2s_dai_template = {
 		.hw_free = psc_i2s_hw_free,
 		.shutdown = psc_i2s_shutdown,
 		.trigger = psc_i2s_trigger,
-	},
-	.dai_ops = {
 		.set_sysclk = psc_i2s_set_sysclk,
 		.set_fmt = psc_i2s_set_fmt,
 	},
diff --git a/sound/soc/omap/omap-mcbsp.c b/sound/soc/omap/omap-mcbsp.c
index 3d4060b00eb3..6013898f49b4 100644
--- a/sound/soc/omap/omap-mcbsp.c
+++ b/sound/soc/omap/omap-mcbsp.c
@@ -138,7 +138,8 @@ static const unsigned long omap34xx_mcbsp_port[][2] = {
 static const unsigned long omap34xx_mcbsp_port[][2] = {};
 #endif
 
-static int omap_mcbsp_dai_startup(struct snd_pcm_substream *substream)
+static int omap_mcbsp_dai_startup(struct snd_pcm_substream *substream,
+				  struct snd_soc_dai *dai)
 {
 	struct snd_soc_pcm_runtime *rtd = substream->private_data;
 	struct snd_soc_dai *cpu_dai = rtd->dai->cpu_dai;
@@ -151,7 +152,8 @@ static int omap_mcbsp_dai_startup(struct snd_pcm_substream *substream)
 	return err;
 }
 
-static void omap_mcbsp_dai_shutdown(struct snd_pcm_substream *substream)
+static void omap_mcbsp_dai_shutdown(struct snd_pcm_substream *substream,
+				    struct snd_soc_dai *dai)
 {
 	struct snd_soc_pcm_runtime *rtd = substream->private_data;
 	struct snd_soc_dai *cpu_dai = rtd->dai->cpu_dai;
@@ -163,7 +165,8 @@ static void omap_mcbsp_dai_shutdown(struct snd_pcm_substream *substream)
 	}
 }
 
-static int omap_mcbsp_dai_trigger(struct snd_pcm_substream *substream, int cmd)
+static int omap_mcbsp_dai_trigger(struct snd_pcm_substream *substream, int cmd,
+				  struct snd_soc_dai *dai)
 {
 	struct snd_soc_pcm_runtime *rtd = substream->private_data;
 	struct snd_soc_dai *cpu_dai = rtd->dai->cpu_dai;
@@ -192,7 +195,8 @@ static int omap_mcbsp_dai_trigger(struct snd_pcm_substream *substream, int cmd)
 }
 
 static int omap_mcbsp_dai_hw_params(struct snd_pcm_substream *substream,
-				    struct snd_pcm_hw_params *params)
+				    struct snd_pcm_hw_params *params,
+				    struct snd_soc_dai *dai)
 {
 	struct snd_soc_pcm_runtime *rtd = substream->private_data;
 	struct snd_soc_dai *cpu_dai = rtd->dai->cpu_dai;
@@ -470,8 +474,6 @@ static int omap_mcbsp_dai_set_dai_sysclk(struct snd_soc_dai *cpu_dai,
 		.shutdown = omap_mcbsp_dai_shutdown,		\
 		.trigger = omap_mcbsp_dai_trigger,		\
 		.hw_params = omap_mcbsp_dai_hw_params,		\
-	},							\
-	.dai_ops = {						\
 		.set_fmt = omap_mcbsp_dai_set_dai_fmt,		\
 		.set_clkdiv = omap_mcbsp_dai_set_clkdiv,	\
 		.set_sysclk = omap_mcbsp_dai_set_dai_sysclk,	\
diff --git a/sound/soc/omap/omap2evm.c b/sound/soc/omap/omap2evm.c
index 5bea31157a94..7b160f9d83f9 100644
--- a/sound/soc/omap/omap2evm.c
+++ b/sound/soc/omap/omap2evm.c
@@ -38,7 +38,8 @@
 #include "../codecs/twl4030.h"
 
 static int omap2evm_hw_params(struct snd_pcm_substream *substream,
-	struct snd_pcm_hw_params *params)
+	struct snd_pcm_hw_params *params,
+	struct snd_soc_dai *dai)
 {
 	struct snd_soc_pcm_runtime *rtd = substream->private_data;
 	struct snd_soc_dai *codec_dai = rtd->dai->codec_dai;
diff --git a/sound/soc/pxa/pxa-ssp.c b/sound/soc/pxa/pxa-ssp.c
index e2b54b88c380..d0dd6245a20a 100644
--- a/sound/soc/pxa/pxa-ssp.c
+++ b/sound/soc/pxa/pxa-ssp.c
@@ -212,7 +212,8 @@ static struct pxa2xx_pcm_dma_params *ssp_dma_params[4][4] = {
 	},
 };
 
-static int pxa_ssp_startup(struct snd_pcm_substream *substream)
+static int pxa_ssp_startup(struct snd_pcm_substream *substream,
+			   struct snd_soc_dai *dai)
 {
 	struct snd_soc_pcm_runtime *rtd = substream->private_data;
 	struct snd_soc_dai *cpu_dai = rtd->dai->cpu_dai;
@@ -228,7 +229,8 @@ static int pxa_ssp_startup(struct snd_pcm_substream *substream)
 	return ret;
 }
 
-static void pxa_ssp_shutdown(struct snd_pcm_substream *substream)
+static void pxa_ssp_shutdown(struct snd_pcm_substream *substream,
+			     struct snd_soc_dai *dai)
 {
 	struct snd_soc_pcm_runtime *rtd = substream->private_data;
 	struct snd_soc_dai *cpu_dai = rtd->dai->cpu_dai;
@@ -604,7 +606,8 @@ static int pxa_ssp_set_dai_fmt(struct snd_soc_dai *cpu_dai,
  * Can be called multiple times by oss emulation.
  */
 static int pxa_ssp_hw_params(struct snd_pcm_substream *substream,
-				struct snd_pcm_hw_params *params)
+				struct snd_pcm_hw_params *params,
+				struct snd_soc_dai *dai)
 {
 	struct snd_soc_pcm_runtime *rtd = substream->private_data;
 	struct snd_soc_dai *cpu_dai = rtd->dai->cpu_dai;
@@ -678,7 +681,8 @@ static int pxa_ssp_hw_params(struct snd_pcm_substream *substream,
 	return 0;
 }
 
-static int pxa_ssp_trigger(struct snd_pcm_substream *substream, int cmd)
+static int pxa_ssp_trigger(struct snd_pcm_substream *substream, int cmd,
+			   struct snd_soc_dai *dai)
 {
 	struct snd_soc_pcm_runtime *rtd = substream->private_data;
 	struct snd_soc_dai *cpu_dai = rtd->dai->cpu_dai;
@@ -806,8 +810,6 @@ struct snd_soc_dai pxa_ssp_dai[] = {
 			.shutdown = pxa_ssp_shutdown,
 			.trigger = pxa_ssp_trigger,
 			.hw_params = pxa_ssp_hw_params,
-		 },
-		.dai_ops = {
 			.set_sysclk = pxa_ssp_set_dai_sysclk,
 			.set_clkdiv = pxa_ssp_set_dai_clkdiv,
 			.set_pll = pxa_ssp_set_dai_pll,
@@ -840,8 +842,6 @@ struct snd_soc_dai pxa_ssp_dai[] = {
 			.shutdown = pxa_ssp_shutdown,
 			.trigger = pxa_ssp_trigger,
 			.hw_params = pxa_ssp_hw_params,
-		 },
-		.dai_ops = {
 			.set_sysclk = pxa_ssp_set_dai_sysclk,
 			.set_clkdiv = pxa_ssp_set_dai_clkdiv,
 			.set_pll = pxa_ssp_set_dai_pll,
@@ -875,8 +875,6 @@ struct snd_soc_dai pxa_ssp_dai[] = {
 			.shutdown = pxa_ssp_shutdown,
 			.trigger = pxa_ssp_trigger,
 			.hw_params = pxa_ssp_hw_params,
-		 },
-		.dai_ops = {
 			.set_sysclk = pxa_ssp_set_dai_sysclk,
 			.set_clkdiv = pxa_ssp_set_dai_clkdiv,
 			.set_pll = pxa_ssp_set_dai_pll,
@@ -910,8 +908,6 @@ struct snd_soc_dai pxa_ssp_dai[] = {
 			.shutdown = pxa_ssp_shutdown,
 			.trigger = pxa_ssp_trigger,
 			.hw_params = pxa_ssp_hw_params,
-		 },
-		.dai_ops = {
 			.set_sysclk = pxa_ssp_set_dai_sysclk,
 			.set_clkdiv = pxa_ssp_set_dai_clkdiv,
 			.set_pll = pxa_ssp_set_dai_pll,
diff --git a/sound/soc/pxa/pxa2xx-ac97.c b/sound/soc/pxa/pxa2xx-ac97.c
index a7a3a9c5c6ff..86667d2f1b75 100644
--- a/sound/soc/pxa/pxa2xx-ac97.c
+++ b/sound/soc/pxa/pxa2xx-ac97.c
@@ -117,7 +117,8 @@ static void pxa2xx_ac97_remove(struct platform_device *pdev,
 }
 
 static int pxa2xx_ac97_hw_params(struct snd_pcm_substream *substream,
-				struct snd_pcm_hw_params *params)
+				 struct snd_pcm_hw_params *params,
+				 struct snd_soc_dai *dai)
 {
 	struct snd_soc_pcm_runtime *rtd = substream->private_data;
 	struct snd_soc_dai *cpu_dai = rtd->dai->cpu_dai;
@@ -131,7 +132,8 @@ static int pxa2xx_ac97_hw_params(struct snd_pcm_substream *substream,
 }
 
 static int pxa2xx_ac97_hw_aux_params(struct snd_pcm_substream *substream,
-	struct snd_pcm_hw_params *params)
+				     struct snd_pcm_hw_params *params,
+				     struct snd_soc_dai *dai)
 {
 	struct snd_soc_pcm_runtime *rtd = substream->private_data;
 	struct snd_soc_dai *cpu_dai = rtd->dai->cpu_dai;
@@ -145,7 +147,8 @@ static int pxa2xx_ac97_hw_aux_params(struct snd_pcm_substream *substream,
 }
 
 static int pxa2xx_ac97_hw_mic_params(struct snd_pcm_substream *substream,
-	struct snd_pcm_hw_params *params)
+				     struct snd_pcm_hw_params *params,
+				     struct snd_soc_dai *dai)
 {
 	struct snd_soc_pcm_runtime *rtd = substream->private_data;
 	struct snd_soc_dai *cpu_dai = rtd->dai->cpu_dai;
diff --git a/sound/soc/pxa/pxa2xx-i2s.c b/sound/soc/pxa/pxa2xx-i2s.c
index e758034db5c3..9a3e55b48129 100644
--- a/sound/soc/pxa/pxa2xx-i2s.c
+++ b/sound/soc/pxa/pxa2xx-i2s.c
@@ -121,7 +121,8 @@ static struct pxa2xx_gpio gpio_bus[] = {
 	},
 };
 
-static int pxa2xx_i2s_startup(struct snd_pcm_substream *substream)
+static int pxa2xx_i2s_startup(struct snd_pcm_substream *substream,
+			      struct snd_soc_dai *dai)
 {
 	struct snd_soc_pcm_runtime *rtd = substream->private_data;
 	struct snd_soc_dai *cpu_dai = rtd->dai->cpu_dai;
@@ -187,7 +188,8 @@ static int pxa2xx_i2s_set_dai_sysclk(struct snd_soc_dai *cpu_dai,
 }
 
 static int pxa2xx_i2s_hw_params(struct snd_pcm_substream *substream,
-				struct snd_pcm_hw_params *params)
+				struct snd_pcm_hw_params *params,
+				struct snd_soc_dai *dai)
 {
 	struct snd_soc_pcm_runtime *rtd = substream->private_data;
 	struct snd_soc_dai *cpu_dai = rtd->dai->cpu_dai;
@@ -248,7 +250,8 @@ static int pxa2xx_i2s_hw_params(struct snd_pcm_substream *substream,
 	return 0;
 }
 
-static int pxa2xx_i2s_trigger(struct snd_pcm_substream *substream, int cmd)
+static int pxa2xx_i2s_trigger(struct snd_pcm_substream *substream, int cmd,
+			      struct snd_soc_dai *dai)
 {
 	int ret = 0;
 
@@ -269,7 +272,8 @@ static int pxa2xx_i2s_trigger(struct snd_pcm_substream *substream, int cmd)
 	return ret;
 }
 
-static void pxa2xx_i2s_shutdown(struct snd_pcm_substream *substream)
+static void pxa2xx_i2s_shutdown(struct snd_pcm_substream *substream,
+				struct snd_soc_dai *dai)
 {
 	if (substream->stream == SNDRV_PCM_STREAM_PLAYBACK) {
 		SACR1 |= SACR1_DRPL;
@@ -353,8 +357,7 @@ struct snd_soc_dai pxa_i2s_dai = {
 		.startup = pxa2xx_i2s_startup,
 		.shutdown = pxa2xx_i2s_shutdown,
 		.trigger = pxa2xx_i2s_trigger,
-		.hw_params = pxa2xx_i2s_hw_params,},
-	.dai_ops = {
+		.hw_params = pxa2xx_i2s_hw_params,
 		.set_fmt = pxa2xx_i2s_set_dai_fmt,
 		.set_sysclk = pxa2xx_i2s_set_dai_sysclk,
 	},
diff --git a/sound/soc/s3c24xx/s3c2412-i2s.c b/sound/soc/s3c24xx/s3c2412-i2s.c
index ded7d995a922..360cc2a49d9d 100644
--- a/sound/soc/s3c24xx/s3c2412-i2s.c
+++ b/sound/soc/s3c24xx/s3c2412-i2s.c
@@ -343,7 +343,8 @@ static int s3c2412_i2s_set_fmt(struct snd_soc_dai *cpu_dai,
 }
 
 static int s3c2412_i2s_hw_params(struct snd_pcm_substream *substream,
-				 struct snd_pcm_hw_params *params)
+				 struct snd_pcm_hw_params *params,
+				 struct snd_soc_dai *dai)
 {
 	struct snd_soc_pcm_runtime *rtd = substream->private_data;
 	u32 iismod;
@@ -373,7 +374,8 @@ static int s3c2412_i2s_hw_params(struct snd_pcm_substream *substream,
 	return 0;
 }
 
-static int s3c2412_i2s_trigger(struct snd_pcm_substream *substream, int cmd)
+static int s3c2412_i2s_trigger(struct snd_pcm_substream *substream, int cmd,
+			       struct snd_soc_dai *dai)
 {
 	int capture = (substream->stream == SNDRV_PCM_STREAM_CAPTURE);
 	unsigned long irqs;
@@ -730,8 +732,6 @@ struct snd_soc_dai s3c2412_i2s_dai = {
 	.ops = {
 		.trigger	= s3c2412_i2s_trigger,
 		.hw_params	= s3c2412_i2s_hw_params,
-	},
-	.dai_ops = {
 		.set_fmt	= s3c2412_i2s_set_fmt,
 		.set_clkdiv	= s3c2412_i2s_set_clkdiv,
 		.set_sysclk	= s3c2412_i2s_set_sysclk,
diff --git a/sound/soc/s3c24xx/s3c2443-ac97.c b/sound/soc/s3c24xx/s3c2443-ac97.c
index 19c5c3cf5d8c..31377821b2c5 100644
--- a/sound/soc/s3c24xx/s3c2443-ac97.c
+++ b/sound/soc/s3c24xx/s3c2443-ac97.c
@@ -271,7 +271,8 @@ static void s3c2443_ac97_remove(struct platform_device *pdev,
 }
 
 static int s3c2443_ac97_hw_params(struct snd_pcm_substream *substream,
-				struct snd_pcm_hw_params *params)
+				  struct snd_pcm_hw_params *params,
+				  struct snd_soc_dai *dai)
 {
 	struct snd_soc_pcm_runtime *rtd = substream->private_data;
 	struct snd_soc_dai *cpu_dai = rtd->dai->cpu_dai;
@@ -313,7 +314,8 @@ static int s3c2443_ac97_trigger(struct snd_pcm_substream *substream, int cmd)
 }
 
 static int s3c2443_ac97_hw_mic_params(struct snd_pcm_substream *substream,
-	struct snd_pcm_hw_params *params)
+				      struct snd_pcm_hw_params *params,
+				      struct snd_soc_dai *dai)
 {
 	struct snd_soc_pcm_runtime *rtd = substream->private_data;
 	struct snd_soc_dai *cpu_dai = rtd->dai->cpu_dai;
@@ -327,7 +329,7 @@ static int s3c2443_ac97_hw_mic_params(struct snd_pcm_substream *substream,
 }
 
 static int s3c2443_ac97_mic_trigger(struct snd_pcm_substream *substream,
-	int cmd)
+				    int cmd, struct snd_soc_dai *dai)
 {
 	u32 ac_glbctrl;
 
diff --git a/sound/soc/s3c24xx/s3c24xx-i2s.c b/sound/soc/s3c24xx/s3c24xx-i2s.c
index c18977bceaf2..1bac9dd3dbd4 100644
--- a/sound/soc/s3c24xx/s3c24xx-i2s.c
+++ b/sound/soc/s3c24xx/s3c24xx-i2s.c
@@ -243,7 +243,8 @@ static int s3c24xx_i2s_set_fmt(struct snd_soc_dai *cpu_dai,
 }
 
 static int s3c24xx_i2s_hw_params(struct snd_pcm_substream *substream,
-				struct snd_pcm_hw_params *params)
+				 struct snd_pcm_hw_params *params,
+				 struct snd_soc_dai *dai)
 {
 	struct snd_soc_pcm_runtime *rtd = substream->private_data;
 	u32 iismod;
@@ -279,7 +280,8 @@ static int s3c24xx_i2s_hw_params(struct snd_pcm_substream *substream,
 	return 0;
 }
 
-static int s3c24xx_i2s_trigger(struct snd_pcm_substream *substream, int cmd)
+static int s3c24xx_i2s_trigger(struct snd_pcm_substream *substream, int cmd,
+			       struct snd_soc_dai *dai)
 {
 	int ret = 0;
 
@@ -475,8 +477,7 @@ struct snd_soc_dai s3c24xx_i2s_dai = {
 		.formats = SNDRV_PCM_FMTBIT_S8 | SNDRV_PCM_FMTBIT_S16_LE,},
 	.ops = {
 		.trigger = s3c24xx_i2s_trigger,
-		.hw_params = s3c24xx_i2s_hw_params,},
-	.dai_ops = {
+		.hw_params = s3c24xx_i2s_hw_params,
 		.set_fmt = s3c24xx_i2s_set_fmt,
 		.set_clkdiv = s3c24xx_i2s_set_clkdiv,
 		.set_sysclk = s3c24xx_i2s_set_sysclk,
diff --git a/sound/soc/sh/hac.c b/sound/soc/sh/hac.c
index df7bc345c320..3318071dc80f 100644
--- a/sound/soc/sh/hac.c
+++ b/sound/soc/sh/hac.c
@@ -236,7 +236,8 @@ struct snd_ac97_bus_ops soc_ac97_ops = {
 EXPORT_SYMBOL_GPL(soc_ac97_ops);
 
 static int hac_hw_params(struct snd_pcm_substream *substream,
-			 struct snd_pcm_hw_params *params)
+			 struct snd_pcm_hw_params *params,
+			 struct snd_soc_dai *dai)
 {
 	struct snd_soc_pcm_runtime *rtd = substream->private_data;
 	struct hac_priv *hac = &hac_cpu_data[rtd->dai->cpu_dai->id];
diff --git a/sound/soc/sh/ssi.c b/sound/soc/sh/ssi.c
index 55c3464163ab..52a233840d27 100644
--- a/sound/soc/sh/ssi.c
+++ b/sound/soc/sh/ssi.c
@@ -89,7 +89,8 @@ struct ssi_priv {
  * track usage of the SSI; it is simplex-only so prevent attempts of
  * concurrent playback + capture. FIXME: any locking required?
  */
-static int ssi_startup(struct snd_pcm_substream *substream)
+static int ssi_startup(struct snd_pcm_substream *substream,
+		       struct snd_soc_dai *dai)
 {
 	struct snd_soc_pcm_runtime *rtd = substream->private_data;
 	struct ssi_priv *ssi = &ssi_cpu_data[rtd->dai->cpu_dai->id];
@@ -101,7 +102,8 @@ static int ssi_startup(struct snd_pcm_substream *substream)
 	return 0;
 }
 
-static void ssi_shutdown(struct snd_pcm_substream *substream)
+static void ssi_shutdown(struct snd_pcm_substream *substream,
+			 struct snd_soc_dai *dai)
 {
 	struct snd_soc_pcm_runtime *rtd = substream->private_data;
 	struct ssi_priv *ssi = &ssi_cpu_data[rtd->dai->cpu_dai->id];
@@ -109,7 +111,8 @@ static void ssi_shutdown(struct snd_pcm_substream *substream)
 	ssi->inuse = 0;
 }
 
-static int ssi_trigger(struct snd_pcm_substream *substream, int cmd)
+static int ssi_trigger(struct snd_pcm_substream *substream, int cmd,
+		       struct snd_soc_dai *dai)
 {
 	struct snd_soc_pcm_runtime *rtd = substream->private_data;
 	struct ssi_priv *ssi = &ssi_cpu_data[rtd->dai->cpu_dai->id];
@@ -129,7 +132,8 @@ static int ssi_trigger(struct snd_pcm_substream *substream, int cmd)
 }
 
 static int ssi_hw_params(struct snd_pcm_substream *substream,
-			 struct snd_pcm_hw_params *params)
+			 struct snd_pcm_hw_params *params,
+			 struct snd_soc_dai *dai)
 {
 	struct snd_soc_pcm_runtime *rtd = substream->private_data;
 	struct ssi_priv *ssi = &ssi_cpu_data[rtd->dai->cpu_dai->id];
@@ -354,8 +358,6 @@ struct snd_soc_dai sh4_ssi_dai[] = {
 		.shutdown	= ssi_shutdown,
 		.trigger	= ssi_trigger,
 		.hw_params	= ssi_hw_params,
-	},
-	.dai_ops = {
 		.set_sysclk	= ssi_set_sysclk,
 		.set_clkdiv	= ssi_set_clkdiv,
 		.set_fmt	= ssi_set_fmt,
@@ -383,8 +385,6 @@ struct snd_soc_dai sh4_ssi_dai[] = {
 		.shutdown	= ssi_shutdown,
 		.trigger	= ssi_trigger,
 		.hw_params	= ssi_hw_params,
-	},
-	.dai_ops = {
 		.set_sysclk	= ssi_set_sysclk,
 		.set_clkdiv	= ssi_set_clkdiv,
 		.set_fmt	= ssi_set_fmt,
diff --git a/sound/soc/soc-core.c b/sound/soc/soc-core.c
index c5cb9516fea4..43f4060dbe75 100644
--- a/sound/soc/soc-core.c
+++ b/sound/soc/soc-core.c
@@ -134,7 +134,7 @@ static int soc_pcm_open(struct snd_pcm_substream *substream)
 
 	/* startup the audio subsystem */
 	if (cpu_dai->ops.startup) {
-		ret = cpu_dai->ops.startup(substream);
+		ret = cpu_dai->ops.startup(substream, cpu_dai);
 		if (ret < 0) {
 			printk(KERN_ERR "asoc: can't open interface %s\n",
 				cpu_dai->name);
@@ -151,7 +151,7 @@ static int soc_pcm_open(struct snd_pcm_substream *substream)
 	}
 
 	if (codec_dai->ops.startup) {
-		ret = codec_dai->ops.startup(substream);
+		ret = codec_dai->ops.startup(substream, codec_dai);
 		if (ret < 0) {
 			printk(KERN_ERR "asoc: can't open codec %s\n",
 				codec_dai->name);
@@ -248,7 +248,7 @@ codec_dai_err:
 
 platform_err:
 	if (cpu_dai->ops.shutdown)
-		cpu_dai->ops.shutdown(substream);
+		cpu_dai->ops.shutdown(substream, cpu_dai);
 out:
 	mutex_unlock(&pcm_mutex);
 	return ret;
@@ -339,10 +339,10 @@ static int soc_codec_close(struct snd_pcm_substream *substream)
 		snd_soc_dai_digital_mute(codec_dai, 1);
 
 	if (cpu_dai->ops.shutdown)
-		cpu_dai->ops.shutdown(substream);
+		cpu_dai->ops.shutdown(substream, cpu_dai);
 
 	if (codec_dai->ops.shutdown)
-		codec_dai->ops.shutdown(substream);
+		codec_dai->ops.shutdown(substream, codec_dai);
 
 	if (machine->ops && machine->ops->shutdown)
 		machine->ops->shutdown(substream);
@@ -406,7 +406,7 @@ static int soc_pcm_prepare(struct snd_pcm_substream *substream)
 	}
 
 	if (codec_dai->ops.prepare) {
-		ret = codec_dai->ops.prepare(substream);
+		ret = codec_dai->ops.prepare(substream, codec_dai);
 		if (ret < 0) {
 			printk(KERN_ERR "asoc: codec DAI prepare error\n");
 			goto out;
@@ -414,7 +414,7 @@ static int soc_pcm_prepare(struct snd_pcm_substream *substream)
 	}
 
 	if (cpu_dai->ops.prepare) {
-		ret = cpu_dai->ops.prepare(substream);
+		ret = cpu_dai->ops.prepare(substream, cpu_dai);
 		if (ret < 0) {
 			printk(KERN_ERR "asoc: cpu DAI prepare error\n");
 			goto out;
@@ -491,7 +491,7 @@ static int soc_pcm_hw_params(struct snd_pcm_substream *substream,
 	}
 
 	if (codec_dai->ops.hw_params) {
-		ret = codec_dai->ops.hw_params(substream, params);
+		ret = codec_dai->ops.hw_params(substream, params, codec_dai);
 		if (ret < 0) {
 			printk(KERN_ERR "asoc: can't set codec %s hw params\n",
 				codec_dai->name);
@@ -500,7 +500,7 @@ static int soc_pcm_hw_params(struct snd_pcm_substream *substream,
 	}
 
 	if (cpu_dai->ops.hw_params) {
-		ret = cpu_dai->ops.hw_params(substream, params);
+		ret = cpu_dai->ops.hw_params(substream, params, cpu_dai);
 		if (ret < 0) {
 			printk(KERN_ERR "asoc: interface %s hw params failed\n",
 				cpu_dai->name);
@@ -523,11 +523,11 @@ out:
 
 platform_err:
 	if (cpu_dai->ops.hw_free)
-		cpu_dai->ops.hw_free(substream);
+		cpu_dai->ops.hw_free(substream, cpu_dai);
 
 interface_err:
 	if (codec_dai->ops.hw_free)
-		codec_dai->ops.hw_free(substream);
+		codec_dai->ops.hw_free(substream, codec_dai);
 
 codec_err:
 	if (machine->ops && machine->ops->hw_free)
@@ -566,10 +566,10 @@ static int soc_pcm_hw_free(struct snd_pcm_substream *substream)
 
 	/* now free hw params for the DAI's  */
 	if (codec_dai->ops.hw_free)
-		codec_dai->ops.hw_free(substream);
+		codec_dai->ops.hw_free(substream, codec_dai);
 
 	if (cpu_dai->ops.hw_free)
-		cpu_dai->ops.hw_free(substream);
+		cpu_dai->ops.hw_free(substream, cpu_dai);
 
 	mutex_unlock(&pcm_mutex);
 	return 0;
@@ -586,7 +586,7 @@ static int soc_pcm_trigger(struct snd_pcm_substream *substream, int cmd)
 	int ret;
 
 	if (codec_dai->ops.trigger) {
-		ret = codec_dai->ops.trigger(substream, cmd);
+		ret = codec_dai->ops.trigger(substream, cmd, codec_dai);
 		if (ret < 0)
 			return ret;
 	}
@@ -598,7 +598,7 @@ static int soc_pcm_trigger(struct snd_pcm_substream *substream, int cmd)
 	}
 
 	if (cpu_dai->ops.trigger) {
-		ret = cpu_dai->ops.trigger(substream, cmd);
+		ret = cpu_dai->ops.trigger(substream, cmd, cpu_dai);
 		if (ret < 0)
 			return ret;
 	}
@@ -637,10 +637,10 @@ static int soc_suspend(struct platform_device *pdev, pm_message_t state)
 	snd_power_change_state(codec->card, SNDRV_CTL_POWER_D3hot);
 
 	/* mute any active DAC's */
-	for (i = 0; i < machine->num_links; i++) {
-		struct snd_soc_dai *dai = machine->dai_link[i].codec_dai;
-		if (dai->dai_ops.digital_mute && dai->playback.active)
-			dai->dai_ops.digital_mute(dai, 1);
+	for (i = 0; i < card->num_links; i++) {
+		struct snd_soc_dai *dai = card->dai_link[i].codec_dai;
+		if (dai->ops.digital_mute && dai->playback.active)
+			dai->ops.digital_mute(dai, 1);
 	}
 
 	/* suspend all pcms */
@@ -733,10 +733,10 @@ static void soc_resume_deferred(struct work_struct *work)
 	}
 
 	/* unmute any active DACs */
-	for (i = 0; i < machine->num_links; i++) {
-		struct snd_soc_dai *dai = machine->dai_link[i].codec_dai;
-		if (dai->dai_ops.digital_mute && dai->playback.active)
-			dai->dai_ops.digital_mute(dai, 0);
+	for (i = 0; i < card->num_links; i++) {
+		struct snd_soc_dai *dai = card->dai_link[i].codec_dai;
+		if (dai->ops.digital_mute && dai->playback.active)
+			dai->ops.digital_mute(dai, 0);
 	}
 
 	for (i = 0; i < card->num_links; i++) {
@@ -1849,8 +1849,8 @@ EXPORT_SYMBOL_GPL(snd_soc_put_volsw_s8);
 int snd_soc_dai_set_sysclk(struct snd_soc_dai *dai, int clk_id,
 	unsigned int freq, int dir)
 {
-	if (dai->dai_ops.set_sysclk)
-		return dai->dai_ops.set_sysclk(dai, clk_id, freq, dir);
+	if (dai->ops.set_sysclk)
+		return dai->ops.set_sysclk(dai, clk_id, freq, dir);
 	else
 		return -EINVAL;
 }
@@ -1869,8 +1869,8 @@ EXPORT_SYMBOL_GPL(snd_soc_dai_set_sysclk);
 int snd_soc_dai_set_clkdiv(struct snd_soc_dai *dai,
 	int div_id, int div)
 {
-	if (dai->dai_ops.set_clkdiv)
-		return dai->dai_ops.set_clkdiv(dai, div_id, div);
+	if (dai->ops.set_clkdiv)
+		return dai->ops.set_clkdiv(dai, div_id, div);
 	else
 		return -EINVAL;
 }
@@ -1888,8 +1888,8 @@ EXPORT_SYMBOL_GPL(snd_soc_dai_set_clkdiv);
 int snd_soc_dai_set_pll(struct snd_soc_dai *dai,
 	int pll_id, unsigned int freq_in, unsigned int freq_out)
 {
-	if (dai->dai_ops.set_pll)
-		return dai->dai_ops.set_pll(dai, pll_id, freq_in, freq_out);
+	if (dai->ops.set_pll)
+		return dai->ops.set_pll(dai, pll_id, freq_in, freq_out);
 	else
 		return -EINVAL;
 }
@@ -1905,8 +1905,8 @@ EXPORT_SYMBOL_GPL(snd_soc_dai_set_pll);
  */
 int snd_soc_dai_set_fmt(struct snd_soc_dai *dai, unsigned int fmt)
 {
-	if (dai->dai_ops.set_fmt)
-		return dai->dai_ops.set_fmt(dai, fmt);
+	if (dai->ops.set_fmt)
+		return dai->ops.set_fmt(dai, fmt);
 	else
 		return -EINVAL;
 }
@@ -1924,8 +1924,8 @@ EXPORT_SYMBOL_GPL(snd_soc_dai_set_fmt);
 int snd_soc_dai_set_tdm_slot(struct snd_soc_dai *dai,
 	unsigned int mask, int slots)
 {
-	if (dai->dai_ops.set_sysclk)
-		return dai->dai_ops.set_tdm_slot(dai, mask, slots);
+	if (dai->ops.set_sysclk)
+		return dai->ops.set_tdm_slot(dai, mask, slots);
 	else
 		return -EINVAL;
 }
@@ -1940,8 +1940,8 @@ EXPORT_SYMBOL_GPL(snd_soc_dai_set_tdm_slot);
  */
 int snd_soc_dai_set_tristate(struct snd_soc_dai *dai, int tristate)
 {
-	if (dai->dai_ops.set_sysclk)
-		return dai->dai_ops.set_tristate(dai, tristate);
+	if (dai->ops.set_sysclk)
+		return dai->ops.set_tristate(dai, tristate);
 	else
 		return -EINVAL;
 }
@@ -1956,8 +1956,8 @@ EXPORT_SYMBOL_GPL(snd_soc_dai_set_tristate);
  */
 int snd_soc_dai_digital_mute(struct snd_soc_dai *dai, int mute)
 {
-	if (dai->dai_ops.digital_mute)
-		return dai->dai_ops.digital_mute(dai, mute);
+	if (dai->ops.digital_mute)
+		return dai->ops.digital_mute(dai, mute);
 	else
 		return -EINVAL;
 }
-- 
cgit v1.2.3


From 4821277f36e008b531728e359fbbedb229117f4b Mon Sep 17 00:00:00 2001
From: Felix Fietkau <nbd@openwrt.org>
Date: Mon, 3 Nov 2008 21:05:01 +0100
Subject: mac80211: fix BUILD_BUG_ON() caused by misalignment on arm

On ARM alignment is done slightly different from other architectures.
struct ieee80211_tx_rate is aligned to word size, even though it only has 3
single-byte members, which triggers the BUILD_BUG_ON in
ieee80211_tx_info_clear_status

This patch marks the struct ieee80211_tx_rate as packed, so that ARM
behaves like the other architectures.

Signed-off-by: Felix Fietkau <nbd@openwrt.org>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 include/net/mac80211.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/net/mac80211.h b/include/net/mac80211.h
index 1b8ed421feaa..c813fbf7d364 100644
--- a/include/net/mac80211.h
+++ b/include/net/mac80211.h
@@ -307,7 +307,7 @@ struct ieee80211_tx_rate {
 	s8 idx;
 	u8 count;
 	u8 flags;
-};
+} __attribute__((packed));
 
 /**
  * struct ieee80211_tx_info - skb transmit information
-- 
cgit v1.2.3


From 0ed94eaaed618634f68197161203aac9f849471e Mon Sep 17 00:00:00 2001
From: Randy Dunlap <randy.dunlap@oracle.com>
Date: Fri, 7 Nov 2008 19:50:42 -0800
Subject: mac80211: remove more excess kernel-doc

Delete kernel-doc struct descriptions for fields that don't exist:

Warning(include/net/mac80211.h:1263): Excess struct/union/enum/typedef member 'conf_ht' description in 'ieee80211_ops'
Warning(net/mac80211/sta_info.h:309): Excess struct/union/enum/typedef member 'addr' description in 'sta_info'
Warning(net/mac80211/sta_info.h:309): Excess struct/union/enum/typedef member 'aid' description in 'sta_info'

Signed-off-by: Randy Dunlap <randy.dunlap@oracle.com>
cc: Johannes Berg <johannes@sipsolutions.net>
cc: John W. Linville <linville@tuxdriver.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 include/net/mac80211.h  | 2 --
 net/mac80211/sta_info.h | 3 ---
 2 files changed, 5 deletions(-)

(limited to 'include')

diff --git a/include/net/mac80211.h b/include/net/mac80211.h
index c813fbf7d364..0dbbfc7b0509 100644
--- a/include/net/mac80211.h
+++ b/include/net/mac80211.h
@@ -1269,8 +1269,6 @@ enum ieee80211_ampdu_mlme_action {
  *	This is needed only for IBSS mode and the result of this function is
  *	used to determine whether to reply to Probe Requests.
  *
- * @conf_ht: Configures low level driver with 802.11n HT data. Must be atomic.
- *
  * @ampdu_action: Perform a certain A-MPDU action
  * 	The RA/TID combination determines the destination and TID we want
  * 	the ampdu action to be performed for. The action is defined through
diff --git a/net/mac80211/sta_info.h b/net/mac80211/sta_info.h
index 5ad9250b63ab..dc2606d0ae77 100644
--- a/net/mac80211/sta_info.h
+++ b/net/mac80211/sta_info.h
@@ -169,9 +169,6 @@ struct sta_ampdu_mlme {
  * @lock: used for locking all fields that require locking, see comments
  *	in the header file.
  * @flaglock: spinlock for flags accesses
- * @addr: MAC address of this STA
- * @aid: STA's unique AID (1..2007, 0 = not assigned yet),
- *	only used in AP (and IBSS?) mode
  * @listen_interval: listen interval of this station, when we're acting as AP
  * @pin_status: used internally for pinning a STA struct into memory
  * @flags: STA flags, see &enum ieee80211_sta_info_flags
-- 
cgit v1.2.3


From 274bfb8dc5ffa16cb073801bebe76ab7f4e2e73d Mon Sep 17 00:00:00 2001
From: "John W. Linville" <linville@tuxdriver.com>
Date: Wed, 29 Oct 2008 11:35:05 -0400
Subject: lib80211: absorb crypto bits from net/ieee80211

These bits are shared already between ipw2x00 and hostap, and could
probably be shared both more cleanly and with other drivers.  This
commit simply relocates the code to lib80211 and adjusts the drivers
appropriately.

Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 drivers/net/wireless/hostap/Kconfig           |   6 +-
 drivers/net/wireless/hostap/hostap.h          |   2 +-
 drivers/net/wireless/hostap/hostap_80211.h    |   2 +-
 drivers/net/wireless/hostap/hostap_80211_rx.c |  10 +-
 drivers/net/wireless/hostap/hostap_80211_tx.c |   8 +-
 drivers/net/wireless/hostap/hostap_ap.c       |  12 +-
 drivers/net/wireless/hostap/hostap_ap.h       |   8 +-
 drivers/net/wireless/hostap/hostap_hw.c       |  36 +-
 drivers/net/wireless/hostap/hostap_ioctl.c    | 110 ++--
 drivers/net/wireless/hostap/hostap_main.c     |  19 +-
 drivers/net/wireless/hostap/hostap_proc.c     |  20 +-
 drivers/net/wireless/hostap/hostap_wlan.h     |   6 +-
 drivers/net/wireless/ipw2100.c                |  12 +-
 drivers/net/wireless/ipw2200.c                |  12 +-
 include/net/ieee80211.h                       |  10 +-
 include/net/ieee80211_crypt.h                 | 108 ----
 include/net/lib80211.h                        | 108 ++++
 net/ieee80211/Kconfig                         |  15 +-
 net/ieee80211/Makefile                        |   4 -
 net/ieee80211/ieee80211_crypt.c               | 206 -------
 net/ieee80211/ieee80211_crypt_ccmp.c          | 492 ----------------
 net/ieee80211/ieee80211_crypt_tkip.c          | 782 -------------------------
 net/ieee80211/ieee80211_crypt_wep.c           | 295 ----------
 net/ieee80211/ieee80211_module.c              |  23 +-
 net/ieee80211/ieee80211_rx.c                  |   8 +-
 net/ieee80211/ieee80211_tx.c                  |   7 +-
 net/ieee80211/ieee80211_wx.c                  |  68 +--
 net/wireless/Kconfig                          |   9 +
 net/wireless/Makefile                         |   3 +
 net/wireless/lib80211.c                       | 191 ++++++-
 net/wireless/lib80211_crypt_ccmp.c            | 492 ++++++++++++++++
 net/wireless/lib80211_crypt_tkip.c            | 784 ++++++++++++++++++++++++++
 net/wireless/lib80211_crypt_wep.c             | 296 ++++++++++
 33 files changed, 2075 insertions(+), 2089 deletions(-)
 delete mode 100644 include/net/ieee80211_crypt.h
 delete mode 100644 net/ieee80211/ieee80211_crypt.c
 delete mode 100644 net/ieee80211/ieee80211_crypt_ccmp.c
 delete mode 100644 net/ieee80211/ieee80211_crypt_tkip.c
 delete mode 100644 net/ieee80211/ieee80211_crypt_wep.c
 create mode 100644 net/wireless/lib80211_crypt_ccmp.c
 create mode 100644 net/wireless/lib80211_crypt_tkip.c
 create mode 100644 net/wireless/lib80211_crypt_wep.c

(limited to 'include')

diff --git a/drivers/net/wireless/hostap/Kconfig b/drivers/net/wireless/hostap/Kconfig
index 1fef33169fdd..87bbd4db4bad 100644
--- a/drivers/net/wireless/hostap/Kconfig
+++ b/drivers/net/wireless/hostap/Kconfig
@@ -2,8 +2,10 @@ config HOSTAP
 	tristate "IEEE 802.11 for Host AP (Prism2/2.5/3 and WEP/TKIP/CCMP)"
 	depends on WLAN_80211
 	select WIRELESS_EXT
-	select IEEE80211
-	select IEEE80211_CRYPT_WEP
+	select LIB80211
+	select LIB80211_CRYPT_WEP
+	select LIB80211_CRYPT_TKIP
+	select LIB80211_CRYPT_CCMP
 	---help---
 	Shared driver code for IEEE 802.11b wireless cards based on
 	Intersil Prism2/2.5/3 chipset. This driver supports so called
diff --git a/drivers/net/wireless/hostap/hostap.h b/drivers/net/wireless/hostap/hostap.h
index 3a386a636cca..2453deaa3e00 100644
--- a/drivers/net/wireless/hostap/hostap.h
+++ b/drivers/net/wireless/hostap/hostap.h
@@ -63,7 +63,7 @@ void ap_control_flush_macs(struct mac_restrictions *mac_restrictions);
 int ap_control_kick_mac(struct ap_data *ap, struct net_device *dev, u8 *mac);
 void ap_control_kickall(struct ap_data *ap);
 void * ap_crypt_get_ptrs(struct ap_data *ap, u8 *addr, int permanent,
-			 struct ieee80211_crypt_data ***crypt);
+			 struct lib80211_crypt_data ***crypt);
 int prism2_ap_get_sta_qual(local_info_t *local, struct sockaddr addr[],
 			   struct iw_quality qual[], int buf_size,
 			   int aplist);
diff --git a/drivers/net/wireless/hostap/hostap_80211.h b/drivers/net/wireless/hostap/hostap_80211.h
index 3694b1eba521..3a9474d9a907 100644
--- a/drivers/net/wireless/hostap/hostap_80211.h
+++ b/drivers/net/wireless/hostap/hostap_80211.h
@@ -2,7 +2,7 @@
 #define HOSTAP_80211_H
 
 #include <linux/types.h>
-#include <net/ieee80211_crypt.h>
+#include <net/ieee80211.h>
 
 struct hostap_ieee80211_mgmt {
 	__le16 frame_control;
diff --git a/drivers/net/wireless/hostap/hostap_80211_rx.c b/drivers/net/wireless/hostap/hostap_80211_rx.c
index 5f64461aa54e..19b1bf0478bd 100644
--- a/drivers/net/wireless/hostap/hostap_80211_rx.c
+++ b/drivers/net/wireless/hostap/hostap_80211_rx.c
@@ -1,5 +1,5 @@
 #include <linux/etherdevice.h>
-#include <net/ieee80211_crypt.h>
+#include <net/lib80211.h>
 
 #include "hostap_80211.h"
 #include "hostap.h"
@@ -649,7 +649,7 @@ static int hostap_is_eapol_frame(local_info_t *local, struct sk_buff *skb)
 /* Called only as a tasklet (software IRQ) */
 static int
 hostap_rx_frame_decrypt(local_info_t *local, struct sk_buff *skb,
-			struct ieee80211_crypt_data *crypt)
+			struct lib80211_crypt_data *crypt)
 {
 	struct ieee80211_hdr_4addr *hdr;
 	int res, hdrlen;
@@ -687,7 +687,7 @@ hostap_rx_frame_decrypt(local_info_t *local, struct sk_buff *skb,
 /* Called only as a tasklet (software IRQ) */
 static int
 hostap_rx_frame_decrypt_msdu(local_info_t *local, struct sk_buff *skb,
-			     int keyidx, struct ieee80211_crypt_data *crypt)
+			     int keyidx, struct lib80211_crypt_data *crypt)
 {
 	struct ieee80211_hdr_4addr *hdr;
 	int res, hdrlen;
@@ -733,7 +733,7 @@ void hostap_80211_rx(struct net_device *dev, struct sk_buff *skb,
 	int from_assoc_ap = 0;
 	u8 dst[ETH_ALEN];
 	u8 src[ETH_ALEN];
-	struct ieee80211_crypt_data *crypt = NULL;
+	struct lib80211_crypt_data *crypt = NULL;
 	void *sta = NULL;
 	int keyidx = 0;
 
@@ -785,7 +785,7 @@ void hostap_80211_rx(struct net_device *dev, struct sk_buff *skb,
 		int idx = 0;
 		if (skb->len >= hdrlen + 3)
 			idx = skb->data[hdrlen + 3] >> 6;
-		crypt = local->crypt[idx];
+		crypt = local->crypt_info.crypt[idx];
 		sta = NULL;
 
 		/* Use station specific key to override default keys if the
diff --git a/drivers/net/wireless/hostap/hostap_80211_tx.c b/drivers/net/wireless/hostap/hostap_80211_tx.c
index 075247188e64..078a010f39a0 100644
--- a/drivers/net/wireless/hostap/hostap_80211_tx.c
+++ b/drivers/net/wireless/hostap/hostap_80211_tx.c
@@ -306,7 +306,7 @@ int hostap_mgmt_start_xmit(struct sk_buff *skb, struct net_device *dev)
 
 /* Called only from software IRQ */
 static struct sk_buff * hostap_tx_encrypt(struct sk_buff *skb,
-					  struct ieee80211_crypt_data *crypt)
+					  struct lib80211_crypt_data *crypt)
 {
 	struct hostap_interface *iface;
 	local_info_t *local;
@@ -405,7 +405,7 @@ int hostap_master_start_xmit(struct sk_buff *skb, struct net_device *dev)
 	if (local->host_encrypt) {
 		/* Set crypt to default algorithm and key; will be replaced in
 		 * AP code if STA has own alg/key */
-		tx.crypt = local->crypt[local->tx_keyidx];
+		tx.crypt = local->crypt_info.crypt[local->crypt_info.tx_keyidx];
 		tx.host_encrypt = 1;
 	} else {
 		tx.crypt = NULL;
@@ -487,7 +487,9 @@ int hostap_master_start_xmit(struct sk_buff *skb, struct net_device *dev)
 
 	if (tx.crypt && (!tx.crypt->ops || !tx.crypt->ops->encrypt_mpdu))
 		tx.crypt = NULL;
-	else if ((tx.crypt || local->crypt[local->tx_keyidx]) && !no_encrypt) {
+	else if ((tx.crypt ||
+		 local->crypt_info.crypt[local->crypt_info.tx_keyidx]) &&
+		 !no_encrypt) {
 		/* Add ISWEP flag both for firmware and host based encryption
 		 */
 		fc |= IEEE80211_FCTL_PROTECTED;
diff --git a/drivers/net/wireless/hostap/hostap_ap.c b/drivers/net/wireless/hostap/hostap_ap.c
index dec3dbe1bf8f..0903db786d5f 100644
--- a/drivers/net/wireless/hostap/hostap_ap.c
+++ b/drivers/net/wireless/hostap/hostap_ap.c
@@ -1206,7 +1206,7 @@ static void prism2_check_tx_rates(struct sta_info *sta)
 
 static void ap_crypt_init(struct ap_data *ap)
 {
-	ap->crypt = ieee80211_get_crypto_ops("WEP");
+	ap->crypt = lib80211_get_crypto_ops("WEP");
 
 	if (ap->crypt) {
 		if (ap->crypt->init) {
@@ -1224,7 +1224,7 @@ static void ap_crypt_init(struct ap_data *ap)
 
 	if (ap->crypt == NULL) {
 		printk(KERN_WARNING "AP could not initialize WEP: load module "
-		       "ieee80211_crypt_wep.ko\n");
+		       "lib80211_crypt_wep.ko\n");
 	}
 }
 
@@ -1293,7 +1293,7 @@ static void handle_authen(local_info_t *local, struct sk_buff *skb,
 	__le16 *pos;
 	u16 resp = WLAN_STATUS_SUCCESS, fc;
 	struct sta_info *sta = NULL;
-	struct ieee80211_crypt_data *crypt;
+	struct lib80211_crypt_data *crypt;
 	char *txt = "";
 
 	len = skb->len - IEEE80211_MGMT_HDR_LEN;
@@ -1319,7 +1319,7 @@ static void handle_authen(local_info_t *local, struct sk_buff *skb,
 		int idx = 0;
 		if (skb->len >= hdrlen + 3)
 			idx = skb->data[hdrlen + 3] >> 6;
-		crypt = local->crypt[idx];
+		crypt = local->crypt_info.crypt[idx];
 	}
 
 	pos = (__le16 *) (skb->data + IEEE80211_MGMT_HDR_LEN);
@@ -3065,7 +3065,7 @@ ap_rx_ret hostap_handle_sta_rx(local_info_t *local, struct net_device *dev,
 /* Called only as a tasklet (software IRQ) */
 int hostap_handle_sta_crypto(local_info_t *local,
 			     struct ieee80211_hdr_4addr *hdr,
-			     struct ieee80211_crypt_data **crypt,
+			     struct lib80211_crypt_data **crypt,
 			     void **sta_ptr)
 {
 	struct sta_info *sta;
@@ -3213,7 +3213,7 @@ void hostap_update_rates(local_info_t *local)
 
 
 void * ap_crypt_get_ptrs(struct ap_data *ap, u8 *addr, int permanent,
-			 struct ieee80211_crypt_data ***crypt)
+			 struct lib80211_crypt_data ***crypt)
 {
 	struct sta_info *sta;
 
diff --git a/drivers/net/wireless/hostap/hostap_ap.h b/drivers/net/wireless/hostap/hostap_ap.h
index 2fa2452b6b07..d36e4b175336 100644
--- a/drivers/net/wireless/hostap/hostap_ap.h
+++ b/drivers/net/wireless/hostap/hostap_ap.h
@@ -74,7 +74,7 @@ struct sta_info {
 	u32 tx_since_last_failure;
 	u32 tx_consecutive_exc;
 
-	struct ieee80211_crypt_data *crypt;
+	struct lib80211_crypt_data *crypt;
 
 	int ap; /* whether this station is an AP */
 
@@ -209,7 +209,7 @@ struct ap_data {
 
 	/* WEP operations for generating challenges to be used with shared key
 	 * authentication */
-	struct ieee80211_crypto_ops *crypt;
+	struct lib80211_crypto_ops *crypt;
 	void *crypt_priv;
 #endif /* PRISM2_NO_KERNEL_IEEE80211_MGMT */
 };
@@ -229,7 +229,7 @@ typedef enum {
 struct hostap_tx_data {
 	struct sk_buff *skb;
 	int host_encrypt;
-	struct ieee80211_crypt_data *crypt;
+	struct lib80211_crypt_data *crypt;
 	void *sta_ptr;
 };
 ap_tx_ret hostap_handle_sta_tx(local_info_t *local, struct hostap_tx_data *tx);
@@ -244,7 +244,7 @@ ap_rx_ret hostap_handle_sta_rx(local_info_t *local, struct net_device *dev,
 			       struct hostap_80211_rx_status *rx_stats,
 			       int wds);
 int hostap_handle_sta_crypto(local_info_t *local, struct ieee80211_hdr_4addr *hdr,
-			     struct ieee80211_crypt_data **crypt,
+			     struct lib80211_crypt_data **crypt,
 			     void **sta_ptr);
 int hostap_is_sta_assoc(struct ap_data *ap, u8 *sta_addr);
 int hostap_is_sta_authorized(struct ap_data *ap, u8 *sta_addr);
diff --git a/drivers/net/wireless/hostap/hostap_hw.c b/drivers/net/wireless/hostap/hostap_hw.c
index fd7f7ceeac46..066299fc9259 100644
--- a/drivers/net/wireless/hostap/hostap_hw.c
+++ b/drivers/net/wireless/hostap/hostap_hw.c
@@ -47,7 +47,7 @@
 #include <linux/wireless.h>
 #include <net/iw_handler.h>
 #include <net/ieee80211.h>
-#include <net/ieee80211_crypt.h>
+#include <net/lib80211.h>
 #include <asm/irq.h>
 
 #include "hostap_80211.h"
@@ -2791,11 +2791,12 @@ static void prism2_check_sta_fw_version(local_info_t *local)
 static void prism2_crypt_deinit_entries(local_info_t *local, int force)
 {
 	struct list_head *ptr, *n;
-	struct ieee80211_crypt_data *entry;
+	struct lib80211_crypt_data *entry;
 
-	for (ptr = local->crypt_deinit_list.next, n = ptr->next;
-	     ptr != &local->crypt_deinit_list; ptr = n, n = ptr->next) {
-		entry = list_entry(ptr, struct ieee80211_crypt_data, list);
+	for (ptr = local->crypt_info.crypt_deinit_list.next, n = ptr->next;
+	     ptr != &local->crypt_info.crypt_deinit_list;
+	     ptr = n, n = ptr->next) {
+		entry = list_entry(ptr, struct lib80211_crypt_data, list);
 
 		if (atomic_read(&entry->refcnt) != 0 && !force)
 			continue;
@@ -2816,11 +2817,11 @@ static void prism2_crypt_deinit_handler(unsigned long data)
 
 	spin_lock_irqsave(&local->lock, flags);
 	prism2_crypt_deinit_entries(local, 0);
-	if (!list_empty(&local->crypt_deinit_list)) {
+	if (!list_empty(&local->crypt_info.crypt_deinit_list)) {
 		printk(KERN_DEBUG "%s: entries remaining in delayed crypt "
 		       "deletion list\n", local->dev->name);
-		local->crypt_deinit_timer.expires = jiffies + HZ;
-		add_timer(&local->crypt_deinit_timer);
+		local->crypt_info.crypt_deinit_timer.expires = jiffies + HZ;
+		add_timer(&local->crypt_info.crypt_deinit_timer);
 	}
 	spin_unlock_irqrestore(&local->lock, flags);
 
@@ -3250,10 +3251,13 @@ while (0)
 
 	INIT_LIST_HEAD(&local->cmd_queue);
 	init_waitqueue_head(&local->hostscan_wq);
-	INIT_LIST_HEAD(&local->crypt_deinit_list);
-	init_timer(&local->crypt_deinit_timer);
-	local->crypt_deinit_timer.data = (unsigned long) local;
-	local->crypt_deinit_timer.function = prism2_crypt_deinit_handler;
+
+	local->crypt_info.name = dev->name;
+	local->crypt_info.lock = &local->lock;
+	INIT_LIST_HEAD(&local->crypt_info.crypt_deinit_list);
+	init_timer(&local->crypt_info.crypt_deinit_timer);
+	local->crypt_info.crypt_deinit_timer.data = (unsigned long) local;
+	local->crypt_info.crypt_deinit_timer.function = prism2_crypt_deinit_handler;
 
 	init_timer(&local->passive_scan_timer);
 	local->passive_scan_timer.data = (unsigned long) local;
@@ -3354,8 +3358,8 @@ static void prism2_free_local_data(struct net_device *dev)
 
 	flush_scheduled_work();
 
-	if (timer_pending(&local->crypt_deinit_timer))
-		del_timer(&local->crypt_deinit_timer);
+	if (timer_pending(&local->crypt_info.crypt_deinit_timer))
+		del_timer(&local->crypt_info.crypt_deinit_timer);
 	prism2_crypt_deinit_entries(local, 1);
 
 	if (timer_pending(&local->passive_scan_timer))
@@ -3374,12 +3378,12 @@ static void prism2_free_local_data(struct net_device *dev)
 		prism2_callback(local, PRISM2_CALLBACK_DISABLE);
 
 	for (i = 0; i < WEP_KEYS; i++) {
-		struct ieee80211_crypt_data *crypt = local->crypt[i];
+		struct lib80211_crypt_data *crypt = local->crypt_info.crypt[i];
 		if (crypt) {
 			if (crypt->ops)
 				crypt->ops->deinit(crypt->priv);
 			kfree(crypt);
-			local->crypt[i] = NULL;
+			local->crypt_info.crypt[i] = NULL;
 		}
 	}
 
diff --git a/drivers/net/wireless/hostap/hostap_ioctl.c b/drivers/net/wireless/hostap/hostap_ioctl.c
index 2318c5df7a08..29aebb679099 100644
--- a/drivers/net/wireless/hostap/hostap_ioctl.c
+++ b/drivers/net/wireless/hostap/hostap_ioctl.c
@@ -2,7 +2,7 @@
 
 #include <linux/types.h>
 #include <linux/ethtool.h>
-#include <net/ieee80211_crypt.h>
+#include <net/lib80211.h>
 
 #include "hostap_wlan.h"
 #include "hostap.h"
@@ -117,9 +117,9 @@ static int prism2_get_name(struct net_device *dev,
 
 
 static void prism2_crypt_delayed_deinit(local_info_t *local,
-					struct ieee80211_crypt_data **crypt)
+					struct lib80211_crypt_data **crypt)
 {
-	struct ieee80211_crypt_data *tmp;
+	struct lib80211_crypt_data *tmp;
 	unsigned long flags;
 
 	tmp = *crypt;
@@ -133,10 +133,10 @@ static void prism2_crypt_delayed_deinit(local_info_t *local,
 	 * locking. */
 
 	spin_lock_irqsave(&local->lock, flags);
-	list_add(&tmp->list, &local->crypt_deinit_list);
-	if (!timer_pending(&local->crypt_deinit_timer)) {
-		local->crypt_deinit_timer.expires = jiffies + HZ;
-		add_timer(&local->crypt_deinit_timer);
+	list_add(&tmp->list, &local->crypt_info.crypt_deinit_list);
+	if (!timer_pending(&local->crypt_info.crypt_deinit_timer)) {
+		local->crypt_info.crypt_deinit_timer.expires = jiffies + HZ;
+		add_timer(&local->crypt_info.crypt_deinit_timer);
 	}
 	spin_unlock_irqrestore(&local->lock, flags);
 }
@@ -149,20 +149,20 @@ static int prism2_ioctl_siwencode(struct net_device *dev,
 	struct hostap_interface *iface;
 	local_info_t *local;
 	int i;
-	struct ieee80211_crypt_data **crypt;
+	struct lib80211_crypt_data **crypt;
 
 	iface = netdev_priv(dev);
 	local = iface->local;
 
 	i = erq->flags & IW_ENCODE_INDEX;
 	if (i < 1 || i > 4)
-		i = local->tx_keyidx;
+		i = local->crypt_info.tx_keyidx;
 	else
 		i--;
 	if (i < 0 || i >= WEP_KEYS)
 		return -EINVAL;
 
-	crypt = &local->crypt[i];
+	crypt = &local->crypt_info.crypt[i];
 
 	if (erq->flags & IW_ENCODE_DISABLED) {
 		if (*crypt)
@@ -177,17 +177,17 @@ static int prism2_ioctl_siwencode(struct net_device *dev,
 	}
 
 	if (*crypt == NULL) {
-		struct ieee80211_crypt_data *new_crypt;
+		struct lib80211_crypt_data *new_crypt;
 
 		/* take WEP into use */
-		new_crypt = kzalloc(sizeof(struct ieee80211_crypt_data),
+		new_crypt = kzalloc(sizeof(struct lib80211_crypt_data),
 				GFP_KERNEL);
 		if (new_crypt == NULL)
 			return -ENOMEM;
-		new_crypt->ops = ieee80211_get_crypto_ops("WEP");
+		new_crypt->ops = lib80211_get_crypto_ops("WEP");
 		if (!new_crypt->ops) {
-			request_module("ieee80211_crypt_wep");
-			new_crypt->ops = ieee80211_get_crypto_ops("WEP");
+			request_module("lib80211_crypt_wep");
+			new_crypt->ops = lib80211_get_crypto_ops("WEP");
 		}
 		if (new_crypt->ops)
 			new_crypt->priv = new_crypt->ops->init(i);
@@ -210,16 +210,16 @@ static int prism2_ioctl_siwencode(struct net_device *dev,
 			memset(keybuf + erq->length, 0, len - erq->length);
 		(*crypt)->ops->set_key(keybuf, len, NULL, (*crypt)->priv);
 		for (j = 0; j < WEP_KEYS; j++) {
-			if (j != i && local->crypt[j]) {
+			if (j != i && local->crypt_info.crypt[j]) {
 				first = 0;
 				break;
 			}
 		}
 		if (first)
-			local->tx_keyidx = i;
+			local->crypt_info.tx_keyidx = i;
 	} else {
 		/* No key data - just set the default TX key index */
-		local->tx_keyidx = i;
+		local->crypt_info.tx_keyidx = i;
 	}
 
  done:
@@ -252,20 +252,20 @@ static int prism2_ioctl_giwencode(struct net_device *dev,
 	local_info_t *local;
 	int i, len;
 	u16 val;
-	struct ieee80211_crypt_data *crypt;
+	struct lib80211_crypt_data *crypt;
 
 	iface = netdev_priv(dev);
 	local = iface->local;
 
 	i = erq->flags & IW_ENCODE_INDEX;
 	if (i < 1 || i > 4)
-		i = local->tx_keyidx;
+		i = local->crypt_info.tx_keyidx;
 	else
 		i--;
 	if (i < 0 || i >= WEP_KEYS)
 		return -EINVAL;
 
-	crypt = local->crypt[i];
+	crypt = local->crypt_info.crypt[i];
 	erq->flags = i + 1;
 
 	if (crypt == NULL || crypt->ops == NULL) {
@@ -3227,8 +3227,8 @@ static int prism2_ioctl_siwencodeext(struct net_device *dev,
 	local_info_t *local = iface->local;
 	struct iw_encode_ext *ext = (struct iw_encode_ext *) extra;
 	int i, ret = 0;
-	struct ieee80211_crypto_ops *ops;
-	struct ieee80211_crypt_data **crypt;
+	struct lib80211_crypto_ops *ops;
+	struct lib80211_crypt_data **crypt;
 	void *sta_ptr;
 	u8 *addr;
 	const char *alg, *module;
@@ -3237,7 +3237,7 @@ static int prism2_ioctl_siwencodeext(struct net_device *dev,
 	if (i > WEP_KEYS)
 		return -EINVAL;
 	if (i < 1 || i > WEP_KEYS)
-		i = local->tx_keyidx;
+		i = local->crypt_info.tx_keyidx;
 	else
 		i--;
 	if (i < 0 || i >= WEP_KEYS)
@@ -3247,7 +3247,7 @@ static int prism2_ioctl_siwencodeext(struct net_device *dev,
 	if (addr[0] == 0xff && addr[1] == 0xff && addr[2] == 0xff &&
 	    addr[3] == 0xff && addr[4] == 0xff && addr[5] == 0xff) {
 		sta_ptr = NULL;
-		crypt = &local->crypt[i];
+		crypt = &local->crypt_info.crypt[i];
 	} else {
 		if (i != 0)
 			return -EINVAL;
@@ -3260,7 +3260,7 @@ static int prism2_ioctl_siwencodeext(struct net_device *dev,
 				 * is emulated by using default key idx 0.
 				 */
 				i = 0;
-				crypt = &local->crypt[i];
+				crypt = &local->crypt_info.crypt[i];
 			} else
 				return -EINVAL;
 		}
@@ -3276,15 +3276,15 @@ static int prism2_ioctl_siwencodeext(struct net_device *dev,
 	switch (ext->alg) {
 	case IW_ENCODE_ALG_WEP:
 		alg = "WEP";
-		module = "ieee80211_crypt_wep";
+		module = "lib80211_crypt_wep";
 		break;
 	case IW_ENCODE_ALG_TKIP:
 		alg = "TKIP";
-		module = "ieee80211_crypt_tkip";
+		module = "lib80211_crypt_tkip";
 		break;
 	case IW_ENCODE_ALG_CCMP:
 		alg = "CCMP";
-		module = "ieee80211_crypt_ccmp";
+		module = "lib80211_crypt_ccmp";
 		break;
 	default:
 		printk(KERN_DEBUG "%s: unsupported algorithm %d\n",
@@ -3293,10 +3293,10 @@ static int prism2_ioctl_siwencodeext(struct net_device *dev,
 		goto done;
 	}
 
-	ops = ieee80211_get_crypto_ops(alg);
+	ops = lib80211_get_crypto_ops(alg);
 	if (ops == NULL) {
 		request_module(module);
-		ops = ieee80211_get_crypto_ops(alg);
+		ops = lib80211_get_crypto_ops(alg);
 	}
 	if (ops == NULL) {
 		printk(KERN_DEBUG "%s: unknown crypto alg '%s'\n",
@@ -3315,11 +3315,11 @@ static int prism2_ioctl_siwencodeext(struct net_device *dev,
 	}
 
 	if (*crypt == NULL || (*crypt)->ops != ops) {
-		struct ieee80211_crypt_data *new_crypt;
+		struct lib80211_crypt_data *new_crypt;
 
 		prism2_crypt_delayed_deinit(local, crypt);
 
-		new_crypt = kzalloc(sizeof(struct ieee80211_crypt_data),
+		new_crypt = kzalloc(sizeof(struct lib80211_crypt_data),
 				GFP_KERNEL);
 		if (new_crypt == NULL) {
 			ret = -ENOMEM;
@@ -3354,20 +3354,20 @@ static int prism2_ioctl_siwencodeext(struct net_device *dev,
 
 	if (ext->ext_flags & IW_ENCODE_EXT_SET_TX_KEY) {
 		if (!sta_ptr)
-			local->tx_keyidx = i;
+			local->crypt_info.tx_keyidx = i;
 	}
 
 
 	if (sta_ptr == NULL && ext->key_len > 0) {
 		int first = 1, j;
 		for (j = 0; j < WEP_KEYS; j++) {
-			if (j != i && local->crypt[j]) {
+			if (j != i && local->crypt_info.crypt[j]) {
 				first = 0;
 				break;
 			}
 		}
 		if (first)
-			local->tx_keyidx = i;
+			local->crypt_info.tx_keyidx = i;
 	}
 
  done:
@@ -3399,7 +3399,7 @@ static int prism2_ioctl_giwencodeext(struct net_device *dev,
 {
 	struct hostap_interface *iface = netdev_priv(dev);
 	local_info_t *local = iface->local;
-	struct ieee80211_crypt_data **crypt;
+	struct lib80211_crypt_data **crypt;
 	void *sta_ptr;
 	int max_key_len, i;
 	struct iw_encode_ext *ext = (struct iw_encode_ext *) extra;
@@ -3411,7 +3411,7 @@ static int prism2_ioctl_giwencodeext(struct net_device *dev,
 
 	i = erq->flags & IW_ENCODE_INDEX;
 	if (i < 1 || i > WEP_KEYS)
-		i = local->tx_keyidx;
+		i = local->crypt_info.tx_keyidx;
 	else
 		i--;
 
@@ -3419,7 +3419,7 @@ static int prism2_ioctl_giwencodeext(struct net_device *dev,
 	if (addr[0] == 0xff && addr[1] == 0xff && addr[2] == 0xff &&
 	    addr[3] == 0xff && addr[4] == 0xff && addr[5] == 0xff) {
 		sta_ptr = NULL;
-		crypt = &local->crypt[i];
+		crypt = &local->crypt_info.crypt[i];
 	} else {
 		i = 0;
 		sta_ptr = ap_crypt_get_ptrs(local->ap, addr, 0, &crypt);
@@ -3468,8 +3468,8 @@ static int prism2_ioctl_set_encryption(local_info_t *local,
 				       int param_len)
 {
 	int ret = 0;
-	struct ieee80211_crypto_ops *ops;
-	struct ieee80211_crypt_data **crypt;
+	struct lib80211_crypto_ops *ops;
+	struct lib80211_crypt_data **crypt;
 	void *sta_ptr;
 
 	param->u.crypt.err = 0;
@@ -3486,7 +3486,7 @@ static int prism2_ioctl_set_encryption(local_info_t *local,
 		if (param->u.crypt.idx >= WEP_KEYS)
 			return -EINVAL;
 		sta_ptr = NULL;
-		crypt = &local->crypt[param->u.crypt.idx];
+		crypt = &local->crypt_info.crypt[param->u.crypt.idx];
 	} else {
 		if (param->u.crypt.idx)
 			return -EINVAL;
@@ -3507,16 +3507,16 @@ static int prism2_ioctl_set_encryption(local_info_t *local,
 		goto done;
 	}
 
-	ops = ieee80211_get_crypto_ops(param->u.crypt.alg);
+	ops = lib80211_get_crypto_ops(param->u.crypt.alg);
 	if (ops == NULL && strcmp(param->u.crypt.alg, "WEP") == 0) {
-		request_module("ieee80211_crypt_wep");
-		ops = ieee80211_get_crypto_ops(param->u.crypt.alg);
+		request_module("lib80211_crypt_wep");
+		ops = lib80211_get_crypto_ops(param->u.crypt.alg);
 	} else if (ops == NULL && strcmp(param->u.crypt.alg, "TKIP") == 0) {
-		request_module("ieee80211_crypt_tkip");
-		ops = ieee80211_get_crypto_ops(param->u.crypt.alg);
+		request_module("lib80211_crypt_tkip");
+		ops = lib80211_get_crypto_ops(param->u.crypt.alg);
 	} else if (ops == NULL && strcmp(param->u.crypt.alg, "CCMP") == 0) {
-		request_module("ieee80211_crypt_ccmp");
-		ops = ieee80211_get_crypto_ops(param->u.crypt.alg);
+		request_module("lib80211_crypt_ccmp");
+		ops = lib80211_get_crypto_ops(param->u.crypt.alg);
 	}
 	if (ops == NULL) {
 		printk(KERN_DEBUG "%s: unknown crypto alg '%s'\n",
@@ -3531,11 +3531,11 @@ static int prism2_ioctl_set_encryption(local_info_t *local,
 	local->host_decrypt = local->host_encrypt = 1;
 
 	if (*crypt == NULL || (*crypt)->ops != ops) {
-		struct ieee80211_crypt_data *new_crypt;
+		struct lib80211_crypt_data *new_crypt;
 
 		prism2_crypt_delayed_deinit(local, crypt);
 
-		new_crypt = kzalloc(sizeof(struct ieee80211_crypt_data),
+		new_crypt = kzalloc(sizeof(struct lib80211_crypt_data),
 				GFP_KERNEL);
 		if (new_crypt == NULL) {
 			ret = -ENOMEM;
@@ -3568,7 +3568,7 @@ static int prism2_ioctl_set_encryption(local_info_t *local,
 
 	if (param->u.crypt.flags & HOSTAP_CRYPT_FLAG_SET_TX_KEY) {
 		if (!sta_ptr)
-			local->tx_keyidx = param->u.crypt.idx;
+			local->crypt_info.tx_keyidx = param->u.crypt.idx;
 		else if (param->u.crypt.idx) {
 			printk(KERN_DEBUG "%s: TX key idx setting failed\n",
 			       local->dev->name);
@@ -3604,7 +3604,7 @@ static int prism2_ioctl_get_encryption(local_info_t *local,
 				       struct prism2_hostapd_param *param,
 				       int param_len)
 {
-	struct ieee80211_crypt_data **crypt;
+	struct lib80211_crypt_data **crypt;
 	void *sta_ptr;
 	int max_key_len;
 
@@ -3620,8 +3620,8 @@ static int prism2_ioctl_get_encryption(local_info_t *local,
 	    param->sta_addr[4] == 0xff && param->sta_addr[5] == 0xff) {
 		sta_ptr = NULL;
 		if (param->u.crypt.idx >= WEP_KEYS)
-			param->u.crypt.idx = local->tx_keyidx;
-		crypt = &local->crypt[param->u.crypt.idx];
+			param->u.crypt.idx = local->crypt_info.tx_keyidx;
+		crypt = &local->crypt_info.crypt[param->u.crypt.idx];
 	} else {
 		param->u.crypt.idx = 0;
 		sta_ptr = ap_crypt_get_ptrs(local->ap, param->sta_addr, 0,
diff --git a/drivers/net/wireless/hostap/hostap_main.c b/drivers/net/wireless/hostap/hostap_main.c
index 4c36eb2fafd1..02a312ca8607 100644
--- a/drivers/net/wireless/hostap/hostap_main.c
+++ b/drivers/net/wireless/hostap/hostap_main.c
@@ -27,7 +27,7 @@
 #include <net/net_namespace.h>
 #include <net/iw_handler.h>
 #include <net/ieee80211.h>
-#include <net/ieee80211_crypt.h>
+#include <net/lib80211.h>
 #include <asm/uaccess.h>
 
 #include "hostap_wlan.h"
@@ -343,10 +343,11 @@ int hostap_set_encryption(local_info_t *local)
 	char keybuf[WEP_KEY_LEN + 1];
 	enum { NONE, WEP, OTHER } encrypt_type;
 
-	idx = local->tx_keyidx;
-	if (local->crypt[idx] == NULL || local->crypt[idx]->ops == NULL)
+	idx = local->crypt_info.tx_keyidx;
+	if (local->crypt_info.crypt[idx] == NULL ||
+	    local->crypt_info.crypt[idx]->ops == NULL)
 		encrypt_type = NONE;
-	else if (strcmp(local->crypt[idx]->ops->name, "WEP") == 0)
+	else if (strcmp(local->crypt_info.crypt[idx]->ops->name, "WEP") == 0)
 		encrypt_type = WEP;
 	else
 		encrypt_type = OTHER;
@@ -394,17 +395,17 @@ int hostap_set_encryption(local_info_t *local)
 	/* 104-bit support seems to require that all the keys are set to the
 	 * same keylen */
 	keylen = 6; /* first 5 octets */
-	len = local->crypt[idx]->ops->get_key(keybuf, sizeof(keybuf),
-					      NULL, local->crypt[idx]->priv);
+	len = local->crypt_info.crypt[idx]->ops->get_key(keybuf, sizeof(keybuf), NULL,
+							   local->crypt_info.crypt[idx]->priv);
 	if (idx >= 0 && idx < WEP_KEYS && len > 5)
 		keylen = WEP_KEY_LEN + 1; /* first 13 octets */
 
 	for (i = 0; i < WEP_KEYS; i++) {
 		memset(keybuf, 0, sizeof(keybuf));
-		if (local->crypt[i]) {
-			(void) local->crypt[i]->ops->get_key(
+		if (local->crypt_info.crypt[i]) {
+			(void) local->crypt_info.crypt[i]->ops->get_key(
 				keybuf, sizeof(keybuf),
-				NULL, local->crypt[i]->priv);
+				NULL, local->crypt_info.crypt[i]->priv);
 		}
 		if (local->func->set_rid(local->dev,
 					 HFA384X_RID_CNFDEFAULTKEY0 + i,
diff --git a/drivers/net/wireless/hostap/hostap_proc.c b/drivers/net/wireless/hostap/hostap_proc.c
index ae7d3caf3dae..005ff25a405f 100644
--- a/drivers/net/wireless/hostap/hostap_proc.c
+++ b/drivers/net/wireless/hostap/hostap_proc.c
@@ -2,7 +2,7 @@
 
 #include <linux/types.h>
 #include <linux/proc_fs.h>
-#include <net/ieee80211_crypt.h>
+#include <net/lib80211.h>
 
 #include "hostap_wlan.h"
 #include "hostap.h"
@@ -36,9 +36,10 @@ static int prism2_debug_proc_read(char *page, char **start, off_t off,
 	p += sprintf(p, "dev_enabled=%d\n", local->dev_enabled);
 	p += sprintf(p, "sw_tick_stuck=%d\n", local->sw_tick_stuck);
 	for (i = 0; i < WEP_KEYS; i++) {
-		if (local->crypt[i] && local->crypt[i]->ops) {
-			p += sprintf(p, "crypt[%d]=%s\n",
-				     i, local->crypt[i]->ops->name);
+		if (local->crypt_info.crypt[i] &&
+		    local->crypt_info.crypt[i]->ops) {
+			p += sprintf(p, "crypt[%d]=%s\n", i,
+				     local->crypt_info.crypt[i]->ops->name);
 		}
 	}
 	p += sprintf(p, "pri_only=%d\n", local->pri_only);
@@ -206,12 +207,13 @@ static int prism2_crypt_proc_read(char *page, char **start, off_t off,
 		return 0;
 	}
 
-	p += sprintf(p, "tx_keyidx=%d\n", local->tx_keyidx);
+	p += sprintf(p, "tx_keyidx=%d\n", local->crypt_info.tx_keyidx);
 	for (i = 0; i < WEP_KEYS; i++) {
-		if (local->crypt[i] && local->crypt[i]->ops &&
-		    local->crypt[i]->ops->print_stats) {
-			p = local->crypt[i]->ops->print_stats(
-				p, local->crypt[i]->priv);
+		if (local->crypt_info.crypt[i] &&
+		    local->crypt_info.crypt[i]->ops &&
+		    local->crypt_info.crypt[i]->ops->print_stats) {
+			p = local->crypt_info.crypt[i]->ops->print_stats(
+				p, local->crypt_info.crypt[i]->priv);
 		}
 	}
 
diff --git a/drivers/net/wireless/hostap/hostap_wlan.h b/drivers/net/wireless/hostap/hostap_wlan.h
index d2c7a56b8b59..4d8d51a353cd 100644
--- a/drivers/net/wireless/hostap/hostap_wlan.h
+++ b/drivers/net/wireless/hostap/hostap_wlan.h
@@ -6,6 +6,7 @@
 #include <linux/mutex.h>
 #include <net/iw_handler.h>
 #include <net/ieee80211_radiotap.h>
+#include <net/lib80211.h>
 
 #include "hostap_config.h"
 #include "hostap_common.h"
@@ -763,10 +764,7 @@ struct local_info {
 
 #define WEP_KEYS 4
 #define WEP_KEY_LEN 13
-	struct ieee80211_crypt_data *crypt[WEP_KEYS];
-	int tx_keyidx; /* default TX key index (crypt[tx_keyidx]) */
-	struct timer_list crypt_deinit_timer;
-	struct list_head crypt_deinit_list;
+	struct lib80211_crypt_info crypt_info;
 
 	int open_wep; /* allow unencrypted frames */
 	int host_encrypt;
diff --git a/drivers/net/wireless/ipw2100.c b/drivers/net/wireless/ipw2100.c
index 062c9f280304..2d2044d3d1c9 100644
--- a/drivers/net/wireless/ipw2100.c
+++ b/drivers/net/wireless/ipw2100.c
@@ -4010,7 +4010,7 @@ static ssize_t show_internals(struct device *d, struct device_attribute *attr,
 	else
 		len += sprintf(buf + len, "not connected\n");
 
-	DUMP_VAR(ieee->crypt[priv->ieee->tx_keyidx], "p");
+	DUMP_VAR(ieee->crypt_info.crypt[priv->ieee->crypt_info.tx_keyidx], "p");
 	DUMP_VAR(status, "08lx");
 	DUMP_VAR(config, "08lx");
 	DUMP_VAR(capability, "08lx");
@@ -5514,7 +5514,7 @@ static int ipw2100_configure_security(struct ipw2100_priv *priv, int batch_mode)
 			}
 		}
 
-		ipw2100_set_key_index(priv, priv->ieee->tx_keyidx, 1);
+		ipw2100_set_key_index(priv, priv->ieee->crypt_info.tx_keyidx, 1);
 	}
 
 	/* Always enable privacy so the Host can filter WEP packets if
@@ -7620,7 +7620,7 @@ static int ipw2100_wx_set_auth(struct net_device *dev,
 	struct ipw2100_priv *priv = ieee80211_priv(dev);
 	struct ieee80211_device *ieee = priv->ieee;
 	struct iw_param *param = &wrqu->param;
-	struct ieee80211_crypt_data *crypt;
+	struct lib80211_crypt_data *crypt;
 	unsigned long flags;
 	int ret = 0;
 
@@ -7635,7 +7635,7 @@ static int ipw2100_wx_set_auth(struct net_device *dev,
 		break;
 
 	case IW_AUTH_TKIP_COUNTERMEASURES:
-		crypt = priv->ieee->crypt[priv->ieee->tx_keyidx];
+		crypt = priv->ieee->crypt_info.crypt[priv->ieee->crypt_info.tx_keyidx];
 		if (!crypt || !crypt->ops->set_flags || !crypt->ops->get_flags)
 			break;
 
@@ -7712,7 +7712,7 @@ static int ipw2100_wx_get_auth(struct net_device *dev,
 {
 	struct ipw2100_priv *priv = ieee80211_priv(dev);
 	struct ieee80211_device *ieee = priv->ieee;
-	struct ieee80211_crypt_data *crypt;
+	struct lib80211_crypt_data *crypt;
 	struct iw_param *param = &wrqu->param;
 	int ret = 0;
 
@@ -7728,7 +7728,7 @@ static int ipw2100_wx_get_auth(struct net_device *dev,
 		break;
 
 	case IW_AUTH_TKIP_COUNTERMEASURES:
-		crypt = priv->ieee->crypt[priv->ieee->tx_keyidx];
+		crypt = priv->ieee->crypt_info.crypt[priv->ieee->crypt_info.tx_keyidx];
 		if (!crypt || !crypt->ops->get_flags) {
 			IPW_DEBUG_WARNING("Can't get TKIP countermeasures: "
 					  "crypt not set!\n");
diff --git a/drivers/net/wireless/ipw2200.c b/drivers/net/wireless/ipw2200.c
index 6ca0f1c05048..d2a2b7586d08 100644
--- a/drivers/net/wireless/ipw2200.c
+++ b/drivers/net/wireless/ipw2200.c
@@ -6600,7 +6600,7 @@ static int ipw_wx_set_auth(struct net_device *dev,
 	struct ipw_priv *priv = ieee80211_priv(dev);
 	struct ieee80211_device *ieee = priv->ieee;
 	struct iw_param *param = &wrqu->param;
-	struct ieee80211_crypt_data *crypt;
+	struct lib80211_crypt_data *crypt;
 	unsigned long flags;
 	int ret = 0;
 
@@ -6622,7 +6622,7 @@ static int ipw_wx_set_auth(struct net_device *dev,
 		break;
 
 	case IW_AUTH_TKIP_COUNTERMEASURES:
-		crypt = priv->ieee->crypt[priv->ieee->tx_keyidx];
+		crypt = priv->ieee->crypt_info.crypt[priv->ieee->crypt_info.tx_keyidx];
 		if (!crypt || !crypt->ops->set_flags || !crypt->ops->get_flags)
 			break;
 
@@ -6699,7 +6699,7 @@ static int ipw_wx_get_auth(struct net_device *dev,
 {
 	struct ipw_priv *priv = ieee80211_priv(dev);
 	struct ieee80211_device *ieee = priv->ieee;
-	struct ieee80211_crypt_data *crypt;
+	struct lib80211_crypt_data *crypt;
 	struct iw_param *param = &wrqu->param;
 	int ret = 0;
 
@@ -6715,7 +6715,7 @@ static int ipw_wx_get_auth(struct net_device *dev,
 		break;
 
 	case IW_AUTH_TKIP_COUNTERMEASURES:
-		crypt = priv->ieee->crypt[priv->ieee->tx_keyidx];
+		crypt = priv->ieee->crypt_info.crypt[priv->ieee->crypt_info.tx_keyidx];
 		if (!crypt || !crypt->ops->get_flags)
 			break;
 
@@ -10251,8 +10251,8 @@ static int ipw_tx_skb(struct ipw_priv *priv, struct ieee80211_txb *txb,
 		case SEC_LEVEL_1:
 			tfd->u.data.tfd.tfd_24.mchdr.frame_ctl |=
 			    cpu_to_le16(IEEE80211_FCTL_PROTECTED);
-			tfd->u.data.key_index = priv->ieee->tx_keyidx;
-			if (priv->ieee->sec.key_sizes[priv->ieee->tx_keyidx] <=
+			tfd->u.data.key_index = priv->ieee->crypt_info.tx_keyidx;
+			if (priv->ieee->sec.key_sizes[priv->ieee->crypt_info.tx_keyidx] <=
 			    40)
 				tfd->u.data.key_index |= DCT_WEP_KEY_64Bit;
 			else
diff --git a/include/net/ieee80211.h b/include/net/ieee80211.h
index 738734a4653b..7ab3ed2bbccb 100644
--- a/include/net/ieee80211.h
+++ b/include/net/ieee80211.h
@@ -30,6 +30,8 @@
 #include <linux/wireless.h>
 #include <linux/ieee80211.h>
 
+#include <net/lib80211.h>
+
 #define IEEE80211_VERSION "git-1.1.13"
 
 #define IEEE80211_DATA_LEN		2304
@@ -355,8 +357,6 @@ struct ieee80211_stats {
 
 struct ieee80211_device;
 
-#include "ieee80211_crypt.h"
-
 #define SEC_KEY_1		(1<<0)
 #define SEC_KEY_2		(1<<1)
 #define SEC_KEY_3		(1<<2)
@@ -937,11 +937,7 @@ struct ieee80211_device {
 	size_t wpa_ie_len;
 	u8 *wpa_ie;
 
-	struct list_head crypt_deinit_list;
-	struct ieee80211_crypt_data *crypt[WEP_KEYS];
-	int tx_keyidx;		/* default TX key index (crypt[tx_keyidx]) */
-	struct timer_list crypt_deinit_timer;
-	int crypt_quiesced;
+	struct lib80211_crypt_info crypt_info;
 
 	int bcrx_sta_key;	/* use individual keys to override default keys even
 				 * with RX of broad/multicast frames */
diff --git a/include/net/ieee80211_crypt.h b/include/net/ieee80211_crypt.h
deleted file mode 100644
index b3d65e0bedd3..000000000000
--- a/include/net/ieee80211_crypt.h
+++ /dev/null
@@ -1,108 +0,0 @@
-/*
- * Original code based on Host AP (software wireless LAN access point) driver
- * for Intersil Prism2/2.5/3.
- *
- * Copyright (c) 2001-2002, SSH Communications Security Corp and Jouni Malinen
- * <j@w1.fi>
- * Copyright (c) 2002-2003, Jouni Malinen <j@w1.fi>
- *
- * Adaption to a generic IEEE 802.11 stack by James Ketrenos
- * <jketreno@linux.intel.com>
- *
- * Copyright (c) 2004, Intel Corporation
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation. See README and COPYING for
- * more details.
- */
-
-/*
- * This file defines the interface to the ieee80211 crypto module.
- */
-#ifndef IEEE80211_CRYPT_H
-#define IEEE80211_CRYPT_H
-
-#include <linux/types.h>
-#include <linux/list.h>
-#include <net/ieee80211.h>
-#include <asm/atomic.h>
-
-enum {
-	IEEE80211_CRYPTO_TKIP_COUNTERMEASURES = (1 << 0),
-};
-
-struct sk_buff;
-struct module;
-
-struct ieee80211_crypto_ops {
-	const char *name;
-	struct list_head list;
-
-	/* init new crypto context (e.g., allocate private data space,
-	 * select IV, etc.); returns NULL on failure or pointer to allocated
-	 * private data on success */
-	void *(*init) (int keyidx);
-
-	/* deinitialize crypto context and free allocated private data */
-	void (*deinit) (void *priv);
-
-	int (*build_iv) (struct sk_buff * skb, int hdr_len,
-			 u8 *key, int keylen, void *priv);
-
-	/* encrypt/decrypt return < 0 on error or >= 0 on success. The return
-	 * value from decrypt_mpdu is passed as the keyidx value for
-	 * decrypt_msdu. skb must have enough head and tail room for the
-	 * encryption; if not, error will be returned; these functions are
-	 * called for all MPDUs (i.e., fragments).
-	 */
-	int (*encrypt_mpdu) (struct sk_buff * skb, int hdr_len, void *priv);
-	int (*decrypt_mpdu) (struct sk_buff * skb, int hdr_len, void *priv);
-
-	/* These functions are called for full MSDUs, i.e. full frames.
-	 * These can be NULL if full MSDU operations are not needed. */
-	int (*encrypt_msdu) (struct sk_buff * skb, int hdr_len, void *priv);
-	int (*decrypt_msdu) (struct sk_buff * skb, int keyidx, int hdr_len,
-			     void *priv);
-
-	int (*set_key) (void *key, int len, u8 * seq, void *priv);
-	int (*get_key) (void *key, int len, u8 * seq, void *priv);
-
-	/* procfs handler for printing out key information and possible
-	 * statistics */
-	char *(*print_stats) (char *p, void *priv);
-
-	/* Crypto specific flag get/set for configuration settings */
-	unsigned long (*get_flags) (void *priv);
-	unsigned long (*set_flags) (unsigned long flags, void *priv);
-
-	/* maximum number of bytes added by encryption; encrypt buf is
-	 * allocated with extra_prefix_len bytes, copy of in_buf, and
-	 * extra_postfix_len; encrypt need not use all this space, but
-	 * the result must start at the beginning of the buffer and correct
-	 * length must be returned */
-	int extra_mpdu_prefix_len, extra_mpdu_postfix_len;
-	int extra_msdu_prefix_len, extra_msdu_postfix_len;
-
-	struct module *owner;
-};
-
-struct ieee80211_crypt_data {
-	struct list_head list;	/* delayed deletion list */
-	struct ieee80211_crypto_ops *ops;
-	void *priv;
-	atomic_t refcnt;
-};
-
-struct ieee80211_device;
-
-int ieee80211_register_crypto_ops(struct ieee80211_crypto_ops *ops);
-int ieee80211_unregister_crypto_ops(struct ieee80211_crypto_ops *ops);
-struct ieee80211_crypto_ops *ieee80211_get_crypto_ops(const char *name);
-void ieee80211_crypt_deinit_entries(struct ieee80211_device *, int);
-void ieee80211_crypt_deinit_handler(unsigned long);
-void ieee80211_crypt_delayed_deinit(struct ieee80211_device *ieee,
-				    struct ieee80211_crypt_data **crypt);
-void ieee80211_crypt_quiescing(struct ieee80211_device *ieee);
-
-#endif
diff --git a/include/net/lib80211.h b/include/net/lib80211.h
index e1558a187ac0..dd1079f98da4 100644
--- a/include/net/lib80211.h
+++ b/include/net/lib80211.h
@@ -3,11 +3,32 @@
  *
  * Copyright (c) 2008, John W. Linville <linville@tuxdriver.com>
  *
+ * Some bits copied from old ieee80211 component, w/ original copyright
+ * notices below:
+ *
+ * Original code based on Host AP (software wireless LAN access point) driver
+ * for Intersil Prism2/2.5/3.
+ *
+ * Copyright (c) 2001-2002, SSH Communications Security Corp and Jouni Malinen
+ * <j@w1.fi>
+ * Copyright (c) 2002-2003, Jouni Malinen <j@w1.fi>
+ *
+ * Adaption to a generic IEEE 802.11 stack by James Ketrenos
+ * <jketreno@linux.intel.com>
+ *
+ * Copyright (c) 2004, Intel Corporation
+ *
  */
 
 #ifndef LIB80211_H
 #define LIB80211_H
 
+#include <linux/types.h>
+#include <linux/list.h>
+#include <linux/module.h>
+#include <asm/atomic.h>
+#include <linux/if.h>
+#include <linux/skbuff.h>
 #include <linux/ieee80211.h>
 
 /* print_ssid() is intended to be used in debug (and possibly error)
@@ -15,4 +36,91 @@
 const char *print_ssid(char *buf, const char *ssid, u8 ssid_len);
 #define DECLARE_SSID_BUF(var) char var[IEEE80211_MAX_SSID_LEN * 4 + 1] __maybe_unused
 
+#define NUM_WEP_KEYS	4
+
+enum {
+	IEEE80211_CRYPTO_TKIP_COUNTERMEASURES = (1 << 0),
+};
+
+struct lib80211_crypto_ops {
+	const char *name;
+	struct list_head list;
+
+	/* init new crypto context (e.g., allocate private data space,
+	 * select IV, etc.); returns NULL on failure or pointer to allocated
+	 * private data on success */
+	void *(*init) (int keyidx);
+
+	/* deinitialize crypto context and free allocated private data */
+	void (*deinit) (void *priv);
+
+	int (*build_iv) (struct sk_buff * skb, int hdr_len,
+			 u8 *key, int keylen, void *priv);
+
+	/* encrypt/decrypt return < 0 on error or >= 0 on success. The return
+	 * value from decrypt_mpdu is passed as the keyidx value for
+	 * decrypt_msdu. skb must have enough head and tail room for the
+	 * encryption; if not, error will be returned; these functions are
+	 * called for all MPDUs (i.e., fragments).
+	 */
+	int (*encrypt_mpdu) (struct sk_buff * skb, int hdr_len, void *priv);
+	int (*decrypt_mpdu) (struct sk_buff * skb, int hdr_len, void *priv);
+
+	/* These functions are called for full MSDUs, i.e. full frames.
+	 * These can be NULL if full MSDU operations are not needed. */
+	int (*encrypt_msdu) (struct sk_buff * skb, int hdr_len, void *priv);
+	int (*decrypt_msdu) (struct sk_buff * skb, int keyidx, int hdr_len,
+			     void *priv);
+
+	int (*set_key) (void *key, int len, u8 * seq, void *priv);
+	int (*get_key) (void *key, int len, u8 * seq, void *priv);
+
+	/* procfs handler for printing out key information and possible
+	 * statistics */
+	char *(*print_stats) (char *p, void *priv);
+
+	/* Crypto specific flag get/set for configuration settings */
+	unsigned long (*get_flags) (void *priv);
+	unsigned long (*set_flags) (unsigned long flags, void *priv);
+
+	/* maximum number of bytes added by encryption; encrypt buf is
+	 * allocated with extra_prefix_len bytes, copy of in_buf, and
+	 * extra_postfix_len; encrypt need not use all this space, but
+	 * the result must start at the beginning of the buffer and correct
+	 * length must be returned */
+	int extra_mpdu_prefix_len, extra_mpdu_postfix_len;
+	int extra_msdu_prefix_len, extra_msdu_postfix_len;
+
+	struct module *owner;
+};
+
+struct lib80211_crypt_data {
+	struct list_head list;	/* delayed deletion list */
+	struct lib80211_crypto_ops *ops;
+	void *priv;
+	atomic_t refcnt;
+};
+
+struct lib80211_crypt_info {
+	char *name;
+	/* Most clients will already have a lock,
+	   so just point to that. */
+	spinlock_t *lock;
+
+	struct lib80211_crypt_data *crypt[NUM_WEP_KEYS];
+	int tx_keyidx;		/* default TX key index (crypt[tx_keyidx]) */
+	struct list_head crypt_deinit_list;
+	struct timer_list crypt_deinit_timer;
+	int crypt_quiesced;
+};
+
+int lib80211_register_crypto_ops(struct lib80211_crypto_ops *ops);
+int lib80211_unregister_crypto_ops(struct lib80211_crypto_ops *ops);
+struct lib80211_crypto_ops *lib80211_get_crypto_ops(const char *name);
+void lib80211_crypt_deinit_entries(struct lib80211_crypt_info *, int);
+void lib80211_crypt_deinit_handler(unsigned long);
+void lib80211_crypt_delayed_deinit(struct lib80211_crypt_info *info,
+				    struct lib80211_crypt_data **crypt);
+void lib80211_crypt_quiescing(struct lib80211_crypt_info *info);
+
 #endif /* LIB80211_H */
diff --git a/net/ieee80211/Kconfig b/net/ieee80211/Kconfig
index d2282bb2e4f1..46f24f4c9dc7 100644
--- a/net/ieee80211/Kconfig
+++ b/net/ieee80211/Kconfig
@@ -8,10 +8,10 @@ config IEEE80211
 	select CRYPTO_MICHAEL_MIC
 	select CRYPTO_ECB
 	select CRC32
-	select IEEE80211_CRYPT_WEP
-	select IEEE80211_CRYPT_TKIP
-	select IEEE80211_CRYPT_CCMP
 	select LIB80211
+	select LIB80211_CRYPT_WEP
+	select LIB80211_CRYPT_TKIP
+	select LIB80211_CRYPT_CCMP
 	---help---
 	This option enables the hardware independent IEEE 802.11
 	networking stack.  This component is deprecated in favor of the
@@ -39,12 +39,3 @@ config IEEE80211_DEBUG
 
 	  If you are not trying to debug or develop the ieee80211
 	  subsystem, you most likely want to say N here.
-
-config IEEE80211_CRYPT_WEP
-	tristate
-
-config IEEE80211_CRYPT_CCMP
-	tristate
-
-config IEEE80211_CRYPT_TKIP
-	tristate
diff --git a/net/ieee80211/Makefile b/net/ieee80211/Makefile
index f988417121da..158963ff18d2 100644
--- a/net/ieee80211/Makefile
+++ b/net/ieee80211/Makefile
@@ -1,8 +1,4 @@
 obj-$(CONFIG_IEEE80211) += ieee80211.o
-obj-$(CONFIG_IEEE80211) += ieee80211_crypt.o
-obj-$(CONFIG_IEEE80211_CRYPT_WEP) += ieee80211_crypt_wep.o
-obj-$(CONFIG_IEEE80211_CRYPT_CCMP) += ieee80211_crypt_ccmp.o
-obj-$(CONFIG_IEEE80211_CRYPT_TKIP) += ieee80211_crypt_tkip.o
 ieee80211-objs := \
 	ieee80211_module.o \
 	ieee80211_tx.o \
diff --git a/net/ieee80211/ieee80211_crypt.c b/net/ieee80211/ieee80211_crypt.c
deleted file mode 100644
index df5592c9339f..000000000000
--- a/net/ieee80211/ieee80211_crypt.c
+++ /dev/null
@@ -1,206 +0,0 @@
-/*
- * Host AP crypto routines
- *
- * Copyright (c) 2002-2003, Jouni Malinen <j@w1.fi>
- * Portions Copyright (C) 2004, Intel Corporation <jketreno@linux.intel.com>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation. See README and COPYING for
- * more details.
- *
- */
-
-#include <linux/errno.h>
-#include <linux/module.h>
-#include <linux/init.h>
-#include <linux/slab.h>
-#include <linux/string.h>
-#include <net/ieee80211.h>
-
-MODULE_AUTHOR("Jouni Malinen");
-MODULE_DESCRIPTION("HostAP crypto");
-MODULE_LICENSE("GPL");
-
-struct ieee80211_crypto_alg {
-	struct list_head list;
-	struct ieee80211_crypto_ops *ops;
-};
-
-static LIST_HEAD(ieee80211_crypto_algs);
-static DEFINE_SPINLOCK(ieee80211_crypto_lock);
-
-void ieee80211_crypt_deinit_entries(struct ieee80211_device *ieee, int force)
-{
-	struct ieee80211_crypt_data *entry, *next;
-	unsigned long flags;
-
-	spin_lock_irqsave(&ieee->lock, flags);
-	list_for_each_entry_safe(entry, next, &ieee->crypt_deinit_list, list) {
-		if (atomic_read(&entry->refcnt) != 0 && !force)
-			continue;
-
-		list_del(&entry->list);
-
-		if (entry->ops) {
-			entry->ops->deinit(entry->priv);
-			module_put(entry->ops->owner);
-		}
-		kfree(entry);
-	}
-	spin_unlock_irqrestore(&ieee->lock, flags);
-}
-
-/* After this, crypt_deinit_list won't accept new members */
-void ieee80211_crypt_quiescing(struct ieee80211_device *ieee)
-{
-	unsigned long flags;
-
-	spin_lock_irqsave(&ieee->lock, flags);
-	ieee->crypt_quiesced = 1;
-	spin_unlock_irqrestore(&ieee->lock, flags);
-}
-
-void ieee80211_crypt_deinit_handler(unsigned long data)
-{
-	struct ieee80211_device *ieee = (struct ieee80211_device *)data;
-	unsigned long flags;
-
-	ieee80211_crypt_deinit_entries(ieee, 0);
-
-	spin_lock_irqsave(&ieee->lock, flags);
-	if (!list_empty(&ieee->crypt_deinit_list) && !ieee->crypt_quiesced) {
-		printk(KERN_DEBUG "%s: entries remaining in delayed crypt "
-		       "deletion list\n", ieee->dev->name);
-		ieee->crypt_deinit_timer.expires = jiffies + HZ;
-		add_timer(&ieee->crypt_deinit_timer);
-	}
-	spin_unlock_irqrestore(&ieee->lock, flags);
-}
-
-void ieee80211_crypt_delayed_deinit(struct ieee80211_device *ieee,
-				    struct ieee80211_crypt_data **crypt)
-{
-	struct ieee80211_crypt_data *tmp;
-	unsigned long flags;
-
-	if (*crypt == NULL)
-		return;
-
-	tmp = *crypt;
-	*crypt = NULL;
-
-	/* must not run ops->deinit() while there may be pending encrypt or
-	 * decrypt operations. Use a list of delayed deinits to avoid needing
-	 * locking. */
-
-	spin_lock_irqsave(&ieee->lock, flags);
-	if (!ieee->crypt_quiesced) {
-		list_add(&tmp->list, &ieee->crypt_deinit_list);
-		if (!timer_pending(&ieee->crypt_deinit_timer)) {
-			ieee->crypt_deinit_timer.expires = jiffies + HZ;
-			add_timer(&ieee->crypt_deinit_timer);
-		}
-	}
-	spin_unlock_irqrestore(&ieee->lock, flags);
-}
-
-int ieee80211_register_crypto_ops(struct ieee80211_crypto_ops *ops)
-{
-	unsigned long flags;
-	struct ieee80211_crypto_alg *alg;
-
-	alg = kzalloc(sizeof(*alg), GFP_KERNEL);
-	if (alg == NULL)
-		return -ENOMEM;
-
-	alg->ops = ops;
-
-	spin_lock_irqsave(&ieee80211_crypto_lock, flags);
-	list_add(&alg->list, &ieee80211_crypto_algs);
-	spin_unlock_irqrestore(&ieee80211_crypto_lock, flags);
-
-	printk(KERN_DEBUG "ieee80211_crypt: registered algorithm '%s'\n",
-	       ops->name);
-
-	return 0;
-}
-
-int ieee80211_unregister_crypto_ops(struct ieee80211_crypto_ops *ops)
-{
-	struct ieee80211_crypto_alg *alg;
-	unsigned long flags;
-
-	spin_lock_irqsave(&ieee80211_crypto_lock, flags);
-	list_for_each_entry(alg, &ieee80211_crypto_algs, list) {
-		if (alg->ops == ops)
-			goto found;
-	}
-	spin_unlock_irqrestore(&ieee80211_crypto_lock, flags);
-	return -EINVAL;
-
-      found:
-	printk(KERN_DEBUG "ieee80211_crypt: unregistered algorithm "
-	       "'%s'\n", ops->name);
-	list_del(&alg->list);
-	spin_unlock_irqrestore(&ieee80211_crypto_lock, flags);
-	kfree(alg);
-	return 0;
-}
-
-struct ieee80211_crypto_ops *ieee80211_get_crypto_ops(const char *name)
-{
-	struct ieee80211_crypto_alg *alg;
-	unsigned long flags;
-
-	spin_lock_irqsave(&ieee80211_crypto_lock, flags);
-	list_for_each_entry(alg, &ieee80211_crypto_algs, list) {
-		if (strcmp(alg->ops->name, name) == 0)
-			goto found;
-	}
-	spin_unlock_irqrestore(&ieee80211_crypto_lock, flags);
-	return NULL;
-
-      found:
-	spin_unlock_irqrestore(&ieee80211_crypto_lock, flags);
-	return alg->ops;
-}
-
-static void *ieee80211_crypt_null_init(int keyidx)
-{
-	return (void *)1;
-}
-
-static void ieee80211_crypt_null_deinit(void *priv)
-{
-}
-
-static struct ieee80211_crypto_ops ieee80211_crypt_null = {
-	.name = "NULL",
-	.init = ieee80211_crypt_null_init,
-	.deinit = ieee80211_crypt_null_deinit,
-	.owner = THIS_MODULE,
-};
-
-static int __init ieee80211_crypto_init(void)
-{
-	return ieee80211_register_crypto_ops(&ieee80211_crypt_null);
-}
-
-static void __exit ieee80211_crypto_deinit(void)
-{
-	ieee80211_unregister_crypto_ops(&ieee80211_crypt_null);
-	BUG_ON(!list_empty(&ieee80211_crypto_algs));
-}
-
-EXPORT_SYMBOL(ieee80211_crypt_deinit_entries);
-EXPORT_SYMBOL(ieee80211_crypt_deinit_handler);
-EXPORT_SYMBOL(ieee80211_crypt_delayed_deinit);
-EXPORT_SYMBOL(ieee80211_crypt_quiescing);
-
-EXPORT_SYMBOL(ieee80211_register_crypto_ops);
-EXPORT_SYMBOL(ieee80211_unregister_crypto_ops);
-EXPORT_SYMBOL(ieee80211_get_crypto_ops);
-
-module_init(ieee80211_crypto_init);
-module_exit(ieee80211_crypto_deinit);
diff --git a/net/ieee80211/ieee80211_crypt_ccmp.c b/net/ieee80211/ieee80211_crypt_ccmp.c
deleted file mode 100644
index bea04af0b482..000000000000
--- a/net/ieee80211/ieee80211_crypt_ccmp.c
+++ /dev/null
@@ -1,492 +0,0 @@
-/*
- * Host AP crypt: host-based CCMP encryption implementation for Host AP driver
- *
- * Copyright (c) 2003-2004, Jouni Malinen <j@w1.fi>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation. See README and COPYING for
- * more details.
- */
-
-#include <linux/kernel.h>
-#include <linux/err.h>
-#include <linux/module.h>
-#include <linux/init.h>
-#include <linux/slab.h>
-#include <linux/random.h>
-#include <linux/skbuff.h>
-#include <linux/netdevice.h>
-#include <linux/if_ether.h>
-#include <linux/if_arp.h>
-#include <asm/string.h>
-#include <linux/wireless.h>
-
-#include <net/ieee80211.h>
-
-#include <linux/crypto.h>
-
-MODULE_AUTHOR("Jouni Malinen");
-MODULE_DESCRIPTION("Host AP crypt: CCMP");
-MODULE_LICENSE("GPL");
-
-#define AES_BLOCK_LEN 16
-#define CCMP_HDR_LEN 8
-#define CCMP_MIC_LEN 8
-#define CCMP_TK_LEN 16
-#define CCMP_PN_LEN 6
-
-struct ieee80211_ccmp_data {
-	u8 key[CCMP_TK_LEN];
-	int key_set;
-
-	u8 tx_pn[CCMP_PN_LEN];
-	u8 rx_pn[CCMP_PN_LEN];
-
-	u32 dot11RSNAStatsCCMPFormatErrors;
-	u32 dot11RSNAStatsCCMPReplays;
-	u32 dot11RSNAStatsCCMPDecryptErrors;
-
-	int key_idx;
-
-	struct crypto_cipher *tfm;
-
-	/* scratch buffers for virt_to_page() (crypto API) */
-	u8 tx_b0[AES_BLOCK_LEN], tx_b[AES_BLOCK_LEN],
-	    tx_e[AES_BLOCK_LEN], tx_s0[AES_BLOCK_LEN];
-	u8 rx_b0[AES_BLOCK_LEN], rx_b[AES_BLOCK_LEN], rx_a[AES_BLOCK_LEN];
-};
-
-static inline void ieee80211_ccmp_aes_encrypt(struct crypto_cipher *tfm,
-					      const u8 pt[16], u8 ct[16])
-{
-	crypto_cipher_encrypt_one(tfm, ct, pt);
-}
-
-static void *ieee80211_ccmp_init(int key_idx)
-{
-	struct ieee80211_ccmp_data *priv;
-
-	priv = kzalloc(sizeof(*priv), GFP_ATOMIC);
-	if (priv == NULL)
-		goto fail;
-	priv->key_idx = key_idx;
-
-	priv->tfm = crypto_alloc_cipher("aes", 0, CRYPTO_ALG_ASYNC);
-	if (IS_ERR(priv->tfm)) {
-		printk(KERN_DEBUG "ieee80211_crypt_ccmp: could not allocate "
-		       "crypto API aes\n");
-		priv->tfm = NULL;
-		goto fail;
-	}
-
-	return priv;
-
-      fail:
-	if (priv) {
-		if (priv->tfm)
-			crypto_free_cipher(priv->tfm);
-		kfree(priv);
-	}
-
-	return NULL;
-}
-
-static void ieee80211_ccmp_deinit(void *priv)
-{
-	struct ieee80211_ccmp_data *_priv = priv;
-	if (_priv && _priv->tfm)
-		crypto_free_cipher(_priv->tfm);
-	kfree(priv);
-}
-
-static inline void xor_block(u8 * b, u8 * a, size_t len)
-{
-	int i;
-	for (i = 0; i < len; i++)
-		b[i] ^= a[i];
-}
-
-static void ccmp_init_blocks(struct crypto_cipher *tfm,
-			     struct ieee80211_hdr_4addr *hdr,
-			     u8 * pn, size_t dlen, u8 * b0, u8 * auth, u8 * s0)
-{
-	u8 *pos, qc = 0;
-	size_t aad_len;
-	u16 fc;
-	int a4_included, qc_included;
-	u8 aad[2 * AES_BLOCK_LEN];
-
-	fc = le16_to_cpu(hdr->frame_ctl);
-	a4_included = ((fc & (IEEE80211_FCTL_TODS | IEEE80211_FCTL_FROMDS)) ==
-		       (IEEE80211_FCTL_TODS | IEEE80211_FCTL_FROMDS));
-	qc_included = ((WLAN_FC_GET_TYPE(fc) == IEEE80211_FTYPE_DATA) &&
-		       (WLAN_FC_GET_STYPE(fc) & IEEE80211_STYPE_QOS_DATA));
-	aad_len = 22;
-	if (a4_included)
-		aad_len += 6;
-	if (qc_included) {
-		pos = (u8 *) & hdr->addr4;
-		if (a4_included)
-			pos += 6;
-		qc = *pos & 0x0f;
-		aad_len += 2;
-	}
-
-	/* CCM Initial Block:
-	 * Flag (Include authentication header, M=3 (8-octet MIC),
-	 *       L=1 (2-octet Dlen))
-	 * Nonce: 0x00 | A2 | PN
-	 * Dlen */
-	b0[0] = 0x59;
-	b0[1] = qc;
-	memcpy(b0 + 2, hdr->addr2, ETH_ALEN);
-	memcpy(b0 + 8, pn, CCMP_PN_LEN);
-	b0[14] = (dlen >> 8) & 0xff;
-	b0[15] = dlen & 0xff;
-
-	/* AAD:
-	 * FC with bits 4..6 and 11..13 masked to zero; 14 is always one
-	 * A1 | A2 | A3
-	 * SC with bits 4..15 (seq#) masked to zero
-	 * A4 (if present)
-	 * QC (if present)
-	 */
-	pos = (u8 *) hdr;
-	aad[0] = 0;		/* aad_len >> 8 */
-	aad[1] = aad_len & 0xff;
-	aad[2] = pos[0] & 0x8f;
-	aad[3] = pos[1] & 0xc7;
-	memcpy(aad + 4, hdr->addr1, 3 * ETH_ALEN);
-	pos = (u8 *) & hdr->seq_ctl;
-	aad[22] = pos[0] & 0x0f;
-	aad[23] = 0;		/* all bits masked */
-	memset(aad + 24, 0, 8);
-	if (a4_included)
-		memcpy(aad + 24, hdr->addr4, ETH_ALEN);
-	if (qc_included) {
-		aad[a4_included ? 30 : 24] = qc;
-		/* rest of QC masked */
-	}
-
-	/* Start with the first block and AAD */
-	ieee80211_ccmp_aes_encrypt(tfm, b0, auth);
-	xor_block(auth, aad, AES_BLOCK_LEN);
-	ieee80211_ccmp_aes_encrypt(tfm, auth, auth);
-	xor_block(auth, &aad[AES_BLOCK_LEN], AES_BLOCK_LEN);
-	ieee80211_ccmp_aes_encrypt(tfm, auth, auth);
-	b0[0] &= 0x07;
-	b0[14] = b0[15] = 0;
-	ieee80211_ccmp_aes_encrypt(tfm, b0, s0);
-}
-
-static int ieee80211_ccmp_hdr(struct sk_buff *skb, int hdr_len,
-			      u8 *aeskey, int keylen, void *priv)
-{
-	struct ieee80211_ccmp_data *key = priv;
-	int i;
-	u8 *pos;
-
-	if (skb_headroom(skb) < CCMP_HDR_LEN || skb->len < hdr_len)
-		return -1;
-
-	if (aeskey != NULL && keylen >= CCMP_TK_LEN)
-		memcpy(aeskey, key->key, CCMP_TK_LEN);
-
-	pos = skb_push(skb, CCMP_HDR_LEN);
-	memmove(pos, pos + CCMP_HDR_LEN, hdr_len);
-	pos += hdr_len;
-
-	i = CCMP_PN_LEN - 1;
-	while (i >= 0) {
-		key->tx_pn[i]++;
-		if (key->tx_pn[i] != 0)
-			break;
-		i--;
-	}
-
-	*pos++ = key->tx_pn[5];
-	*pos++ = key->tx_pn[4];
-	*pos++ = 0;
-	*pos++ = (key->key_idx << 6) | (1 << 5) /* Ext IV included */ ;
-	*pos++ = key->tx_pn[3];
-	*pos++ = key->tx_pn[2];
-	*pos++ = key->tx_pn[1];
-	*pos++ = key->tx_pn[0];
-
-	return CCMP_HDR_LEN;
-}
-
-static int ieee80211_ccmp_encrypt(struct sk_buff *skb, int hdr_len, void *priv)
-{
-	struct ieee80211_ccmp_data *key = priv;
-	int data_len, i, blocks, last, len;
-	u8 *pos, *mic;
-	struct ieee80211_hdr_4addr *hdr;
-	u8 *b0 = key->tx_b0;
-	u8 *b = key->tx_b;
-	u8 *e = key->tx_e;
-	u8 *s0 = key->tx_s0;
-
-	if (skb_tailroom(skb) < CCMP_MIC_LEN || skb->len < hdr_len)
-		return -1;
-
-	data_len = skb->len - hdr_len;
-	len = ieee80211_ccmp_hdr(skb, hdr_len, NULL, 0, priv);
-	if (len < 0)
-		return -1;
-
-	pos = skb->data + hdr_len + CCMP_HDR_LEN;
-	mic = skb_put(skb, CCMP_MIC_LEN);
-	hdr = (struct ieee80211_hdr_4addr *)skb->data;
-	ccmp_init_blocks(key->tfm, hdr, key->tx_pn, data_len, b0, b, s0);
-
-	blocks = DIV_ROUND_UP(data_len, AES_BLOCK_LEN);
-	last = data_len % AES_BLOCK_LEN;
-
-	for (i = 1; i <= blocks; i++) {
-		len = (i == blocks && last) ? last : AES_BLOCK_LEN;
-		/* Authentication */
-		xor_block(b, pos, len);
-		ieee80211_ccmp_aes_encrypt(key->tfm, b, b);
-		/* Encryption, with counter */
-		b0[14] = (i >> 8) & 0xff;
-		b0[15] = i & 0xff;
-		ieee80211_ccmp_aes_encrypt(key->tfm, b0, e);
-		xor_block(pos, e, len);
-		pos += len;
-	}
-
-	for (i = 0; i < CCMP_MIC_LEN; i++)
-		mic[i] = b[i] ^ s0[i];
-
-	return 0;
-}
-
-/*
- * deal with seq counter wrapping correctly.
- * refer to timer_after() for jiffies wrapping handling
- */
-static inline int ccmp_replay_check(u8 *pn_n, u8 *pn_o)
-{
-	u32 iv32_n, iv16_n;
-	u32 iv32_o, iv16_o;
-
-	iv32_n = (pn_n[0] << 24) | (pn_n[1] << 16) | (pn_n[2] << 8) | pn_n[3];
-	iv16_n = (pn_n[4] << 8) | pn_n[5];
-
-	iv32_o = (pn_o[0] << 24) | (pn_o[1] << 16) | (pn_o[2] << 8) | pn_o[3];
-	iv16_o = (pn_o[4] << 8) | pn_o[5];
-
-	if ((s32)iv32_n - (s32)iv32_o < 0 ||
-	    (iv32_n == iv32_o && iv16_n <= iv16_o))
-		return 1;
-	return 0;
-}
-
-static int ieee80211_ccmp_decrypt(struct sk_buff *skb, int hdr_len, void *priv)
-{
-	struct ieee80211_ccmp_data *key = priv;
-	u8 keyidx, *pos;
-	struct ieee80211_hdr_4addr *hdr;
-	u8 *b0 = key->rx_b0;
-	u8 *b = key->rx_b;
-	u8 *a = key->rx_a;
-	u8 pn[6];
-	int i, blocks, last, len;
-	size_t data_len = skb->len - hdr_len - CCMP_HDR_LEN - CCMP_MIC_LEN;
-	u8 *mic = skb->data + skb->len - CCMP_MIC_LEN;
-
-	if (skb->len < hdr_len + CCMP_HDR_LEN + CCMP_MIC_LEN) {
-		key->dot11RSNAStatsCCMPFormatErrors++;
-		return -1;
-	}
-
-	hdr = (struct ieee80211_hdr_4addr *)skb->data;
-	pos = skb->data + hdr_len;
-	keyidx = pos[3];
-	if (!(keyidx & (1 << 5))) {
-		if (net_ratelimit()) {
-			printk(KERN_DEBUG "CCMP: received packet without ExtIV"
-			       " flag from %pM\n", hdr->addr2);
-		}
-		key->dot11RSNAStatsCCMPFormatErrors++;
-		return -2;
-	}
-	keyidx >>= 6;
-	if (key->key_idx != keyidx) {
-		printk(KERN_DEBUG "CCMP: RX tkey->key_idx=%d frame "
-		       "keyidx=%d priv=%p\n", key->key_idx, keyidx, priv);
-		return -6;
-	}
-	if (!key->key_set) {
-		if (net_ratelimit()) {
-			printk(KERN_DEBUG "CCMP: received packet from %pM"
-			       " with keyid=%d that does not have a configured"
-			       " key\n", hdr->addr2, keyidx);
-		}
-		return -3;
-	}
-
-	pn[0] = pos[7];
-	pn[1] = pos[6];
-	pn[2] = pos[5];
-	pn[3] = pos[4];
-	pn[4] = pos[1];
-	pn[5] = pos[0];
-	pos += 8;
-
-	if (ccmp_replay_check(pn, key->rx_pn)) {
-		if (ieee80211_ratelimit_debug(IEEE80211_DL_DROP)) {
-			IEEE80211_DEBUG_DROP("CCMP: replay detected: STA=%pM "
-				 "previous PN %02x%02x%02x%02x%02x%02x "
-				 "received PN %02x%02x%02x%02x%02x%02x\n",
-				 hdr->addr2,
-				 key->rx_pn[0], key->rx_pn[1], key->rx_pn[2],
-				 key->rx_pn[3], key->rx_pn[4], key->rx_pn[5],
-				 pn[0], pn[1], pn[2], pn[3], pn[4], pn[5]);
-		}
-		key->dot11RSNAStatsCCMPReplays++;
-		return -4;
-	}
-
-	ccmp_init_blocks(key->tfm, hdr, pn, data_len, b0, a, b);
-	xor_block(mic, b, CCMP_MIC_LEN);
-
-	blocks = DIV_ROUND_UP(data_len, AES_BLOCK_LEN);
-	last = data_len % AES_BLOCK_LEN;
-
-	for (i = 1; i <= blocks; i++) {
-		len = (i == blocks && last) ? last : AES_BLOCK_LEN;
-		/* Decrypt, with counter */
-		b0[14] = (i >> 8) & 0xff;
-		b0[15] = i & 0xff;
-		ieee80211_ccmp_aes_encrypt(key->tfm, b0, b);
-		xor_block(pos, b, len);
-		/* Authentication */
-		xor_block(a, pos, len);
-		ieee80211_ccmp_aes_encrypt(key->tfm, a, a);
-		pos += len;
-	}
-
-	if (memcmp(mic, a, CCMP_MIC_LEN) != 0) {
-		if (net_ratelimit()) {
-			printk(KERN_DEBUG "CCMP: decrypt failed: STA="
-			       "%pM\n", hdr->addr2);
-		}
-		key->dot11RSNAStatsCCMPDecryptErrors++;
-		return -5;
-	}
-
-	memcpy(key->rx_pn, pn, CCMP_PN_LEN);
-
-	/* Remove hdr and MIC */
-	memmove(skb->data + CCMP_HDR_LEN, skb->data, hdr_len);
-	skb_pull(skb, CCMP_HDR_LEN);
-	skb_trim(skb, skb->len - CCMP_MIC_LEN);
-
-	return keyidx;
-}
-
-static int ieee80211_ccmp_set_key(void *key, int len, u8 * seq, void *priv)
-{
-	struct ieee80211_ccmp_data *data = priv;
-	int keyidx;
-	struct crypto_cipher *tfm = data->tfm;
-
-	keyidx = data->key_idx;
-	memset(data, 0, sizeof(*data));
-	data->key_idx = keyidx;
-	data->tfm = tfm;
-	if (len == CCMP_TK_LEN) {
-		memcpy(data->key, key, CCMP_TK_LEN);
-		data->key_set = 1;
-		if (seq) {
-			data->rx_pn[0] = seq[5];
-			data->rx_pn[1] = seq[4];
-			data->rx_pn[2] = seq[3];
-			data->rx_pn[3] = seq[2];
-			data->rx_pn[4] = seq[1];
-			data->rx_pn[5] = seq[0];
-		}
-		crypto_cipher_setkey(data->tfm, data->key, CCMP_TK_LEN);
-	} else if (len == 0)
-		data->key_set = 0;
-	else
-		return -1;
-
-	return 0;
-}
-
-static int ieee80211_ccmp_get_key(void *key, int len, u8 * seq, void *priv)
-{
-	struct ieee80211_ccmp_data *data = priv;
-
-	if (len < CCMP_TK_LEN)
-		return -1;
-
-	if (!data->key_set)
-		return 0;
-	memcpy(key, data->key, CCMP_TK_LEN);
-
-	if (seq) {
-		seq[0] = data->tx_pn[5];
-		seq[1] = data->tx_pn[4];
-		seq[2] = data->tx_pn[3];
-		seq[3] = data->tx_pn[2];
-		seq[4] = data->tx_pn[1];
-		seq[5] = data->tx_pn[0];
-	}
-
-	return CCMP_TK_LEN;
-}
-
-static char *ieee80211_ccmp_print_stats(char *p, void *priv)
-{
-	struct ieee80211_ccmp_data *ccmp = priv;
-
-	p += sprintf(p, "key[%d] alg=CCMP key_set=%d "
-		     "tx_pn=%02x%02x%02x%02x%02x%02x "
-		     "rx_pn=%02x%02x%02x%02x%02x%02x "
-		     "format_errors=%d replays=%d decrypt_errors=%d\n",
-		     ccmp->key_idx, ccmp->key_set,
-		     ccmp->tx_pn[0], ccmp->tx_pn[1], ccmp->tx_pn[2],
-		     ccmp->tx_pn[3], ccmp->tx_pn[4], ccmp->tx_pn[5],
-		     ccmp->rx_pn[0], ccmp->rx_pn[1], ccmp->rx_pn[2],
-		     ccmp->rx_pn[3], ccmp->rx_pn[4], ccmp->rx_pn[5],
-		     ccmp->dot11RSNAStatsCCMPFormatErrors,
-		     ccmp->dot11RSNAStatsCCMPReplays,
-		     ccmp->dot11RSNAStatsCCMPDecryptErrors);
-
-	return p;
-}
-
-static struct ieee80211_crypto_ops ieee80211_crypt_ccmp = {
-	.name = "CCMP",
-	.init = ieee80211_ccmp_init,
-	.deinit = ieee80211_ccmp_deinit,
-	.build_iv = ieee80211_ccmp_hdr,
-	.encrypt_mpdu = ieee80211_ccmp_encrypt,
-	.decrypt_mpdu = ieee80211_ccmp_decrypt,
-	.encrypt_msdu = NULL,
-	.decrypt_msdu = NULL,
-	.set_key = ieee80211_ccmp_set_key,
-	.get_key = ieee80211_ccmp_get_key,
-	.print_stats = ieee80211_ccmp_print_stats,
-	.extra_mpdu_prefix_len = CCMP_HDR_LEN,
-	.extra_mpdu_postfix_len = CCMP_MIC_LEN,
-	.owner = THIS_MODULE,
-};
-
-static int __init ieee80211_crypto_ccmp_init(void)
-{
-	return ieee80211_register_crypto_ops(&ieee80211_crypt_ccmp);
-}
-
-static void __exit ieee80211_crypto_ccmp_exit(void)
-{
-	ieee80211_unregister_crypto_ops(&ieee80211_crypt_ccmp);
-}
-
-module_init(ieee80211_crypto_ccmp_init);
-module_exit(ieee80211_crypto_ccmp_exit);
diff --git a/net/ieee80211/ieee80211_crypt_tkip.c b/net/ieee80211/ieee80211_crypt_tkip.c
deleted file mode 100644
index d12da1da6328..000000000000
--- a/net/ieee80211/ieee80211_crypt_tkip.c
+++ /dev/null
@@ -1,782 +0,0 @@
-/*
- * Host AP crypt: host-based TKIP encryption implementation for Host AP driver
- *
- * Copyright (c) 2003-2004, Jouni Malinen <j@w1.fi>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation. See README and COPYING for
- * more details.
- */
-
-#include <linux/err.h>
-#include <linux/module.h>
-#include <linux/init.h>
-#include <linux/slab.h>
-#include <linux/random.h>
-#include <linux/scatterlist.h>
-#include <linux/skbuff.h>
-#include <linux/netdevice.h>
-#include <linux/mm.h>
-#include <linux/if_ether.h>
-#include <linux/if_arp.h>
-#include <asm/string.h>
-
-#include <net/ieee80211.h>
-
-#include <linux/crypto.h>
-#include <linux/crc32.h>
-
-MODULE_AUTHOR("Jouni Malinen");
-MODULE_DESCRIPTION("Host AP crypt: TKIP");
-MODULE_LICENSE("GPL");
-
-struct ieee80211_tkip_data {
-#define TKIP_KEY_LEN 32
-	u8 key[TKIP_KEY_LEN];
-	int key_set;
-
-	u32 tx_iv32;
-	u16 tx_iv16;
-	u16 tx_ttak[5];
-	int tx_phase1_done;
-
-	u32 rx_iv32;
-	u16 rx_iv16;
-	u16 rx_ttak[5];
-	int rx_phase1_done;
-	u32 rx_iv32_new;
-	u16 rx_iv16_new;
-
-	u32 dot11RSNAStatsTKIPReplays;
-	u32 dot11RSNAStatsTKIPICVErrors;
-	u32 dot11RSNAStatsTKIPLocalMICFailures;
-
-	int key_idx;
-
-	struct crypto_blkcipher *rx_tfm_arc4;
-	struct crypto_hash *rx_tfm_michael;
-	struct crypto_blkcipher *tx_tfm_arc4;
-	struct crypto_hash *tx_tfm_michael;
-
-	/* scratch buffers for virt_to_page() (crypto API) */
-	u8 rx_hdr[16], tx_hdr[16];
-
-	unsigned long flags;
-};
-
-static unsigned long ieee80211_tkip_set_flags(unsigned long flags, void *priv)
-{
-	struct ieee80211_tkip_data *_priv = priv;
-	unsigned long old_flags = _priv->flags;
-	_priv->flags = flags;
-	return old_flags;
-}
-
-static unsigned long ieee80211_tkip_get_flags(void *priv)
-{
-	struct ieee80211_tkip_data *_priv = priv;
-	return _priv->flags;
-}
-
-static void *ieee80211_tkip_init(int key_idx)
-{
-	struct ieee80211_tkip_data *priv;
-
-	priv = kzalloc(sizeof(*priv), GFP_ATOMIC);
-	if (priv == NULL)
-		goto fail;
-
-	priv->key_idx = key_idx;
-
-	priv->tx_tfm_arc4 = crypto_alloc_blkcipher("ecb(arc4)", 0,
-						CRYPTO_ALG_ASYNC);
-	if (IS_ERR(priv->tx_tfm_arc4)) {
-		printk(KERN_DEBUG "ieee80211_crypt_tkip: could not allocate "
-		       "crypto API arc4\n");
-		priv->tx_tfm_arc4 = NULL;
-		goto fail;
-	}
-
-	priv->tx_tfm_michael = crypto_alloc_hash("michael_mic", 0,
-						 CRYPTO_ALG_ASYNC);
-	if (IS_ERR(priv->tx_tfm_michael)) {
-		printk(KERN_DEBUG "ieee80211_crypt_tkip: could not allocate "
-		       "crypto API michael_mic\n");
-		priv->tx_tfm_michael = NULL;
-		goto fail;
-	}
-
-	priv->rx_tfm_arc4 = crypto_alloc_blkcipher("ecb(arc4)", 0,
-						CRYPTO_ALG_ASYNC);
-	if (IS_ERR(priv->rx_tfm_arc4)) {
-		printk(KERN_DEBUG "ieee80211_crypt_tkip: could not allocate "
-		       "crypto API arc4\n");
-		priv->rx_tfm_arc4 = NULL;
-		goto fail;
-	}
-
-	priv->rx_tfm_michael = crypto_alloc_hash("michael_mic", 0,
-						 CRYPTO_ALG_ASYNC);
-	if (IS_ERR(priv->rx_tfm_michael)) {
-		printk(KERN_DEBUG "ieee80211_crypt_tkip: could not allocate "
-		       "crypto API michael_mic\n");
-		priv->rx_tfm_michael = NULL;
-		goto fail;
-	}
-
-	return priv;
-
-      fail:
-	if (priv) {
-		if (priv->tx_tfm_michael)
-			crypto_free_hash(priv->tx_tfm_michael);
-		if (priv->tx_tfm_arc4)
-			crypto_free_blkcipher(priv->tx_tfm_arc4);
-		if (priv->rx_tfm_michael)
-			crypto_free_hash(priv->rx_tfm_michael);
-		if (priv->rx_tfm_arc4)
-			crypto_free_blkcipher(priv->rx_tfm_arc4);
-		kfree(priv);
-	}
-
-	return NULL;
-}
-
-static void ieee80211_tkip_deinit(void *priv)
-{
-	struct ieee80211_tkip_data *_priv = priv;
-	if (_priv) {
-		if (_priv->tx_tfm_michael)
-			crypto_free_hash(_priv->tx_tfm_michael);
-		if (_priv->tx_tfm_arc4)
-			crypto_free_blkcipher(_priv->tx_tfm_arc4);
-		if (_priv->rx_tfm_michael)
-			crypto_free_hash(_priv->rx_tfm_michael);
-		if (_priv->rx_tfm_arc4)
-			crypto_free_blkcipher(_priv->rx_tfm_arc4);
-	}
-	kfree(priv);
-}
-
-static inline u16 RotR1(u16 val)
-{
-	return (val >> 1) | (val << 15);
-}
-
-static inline u8 Lo8(u16 val)
-{
-	return val & 0xff;
-}
-
-static inline u8 Hi8(u16 val)
-{
-	return val >> 8;
-}
-
-static inline u16 Lo16(u32 val)
-{
-	return val & 0xffff;
-}
-
-static inline u16 Hi16(u32 val)
-{
-	return val >> 16;
-}
-
-static inline u16 Mk16(u8 hi, u8 lo)
-{
-	return lo | (((u16) hi) << 8);
-}
-
-static inline u16 Mk16_le(__le16 * v)
-{
-	return le16_to_cpu(*v);
-}
-
-static const u16 Sbox[256] = {
-	0xC6A5, 0xF884, 0xEE99, 0xF68D, 0xFF0D, 0xD6BD, 0xDEB1, 0x9154,
-	0x6050, 0x0203, 0xCEA9, 0x567D, 0xE719, 0xB562, 0x4DE6, 0xEC9A,
-	0x8F45, 0x1F9D, 0x8940, 0xFA87, 0xEF15, 0xB2EB, 0x8EC9, 0xFB0B,
-	0x41EC, 0xB367, 0x5FFD, 0x45EA, 0x23BF, 0x53F7, 0xE496, 0x9B5B,
-	0x75C2, 0xE11C, 0x3DAE, 0x4C6A, 0x6C5A, 0x7E41, 0xF502, 0x834F,
-	0x685C, 0x51F4, 0xD134, 0xF908, 0xE293, 0xAB73, 0x6253, 0x2A3F,
-	0x080C, 0x9552, 0x4665, 0x9D5E, 0x3028, 0x37A1, 0x0A0F, 0x2FB5,
-	0x0E09, 0x2436, 0x1B9B, 0xDF3D, 0xCD26, 0x4E69, 0x7FCD, 0xEA9F,
-	0x121B, 0x1D9E, 0x5874, 0x342E, 0x362D, 0xDCB2, 0xB4EE, 0x5BFB,
-	0xA4F6, 0x764D, 0xB761, 0x7DCE, 0x527B, 0xDD3E, 0x5E71, 0x1397,
-	0xA6F5, 0xB968, 0x0000, 0xC12C, 0x4060, 0xE31F, 0x79C8, 0xB6ED,
-	0xD4BE, 0x8D46, 0x67D9, 0x724B, 0x94DE, 0x98D4, 0xB0E8, 0x854A,
-	0xBB6B, 0xC52A, 0x4FE5, 0xED16, 0x86C5, 0x9AD7, 0x6655, 0x1194,
-	0x8ACF, 0xE910, 0x0406, 0xFE81, 0xA0F0, 0x7844, 0x25BA, 0x4BE3,
-	0xA2F3, 0x5DFE, 0x80C0, 0x058A, 0x3FAD, 0x21BC, 0x7048, 0xF104,
-	0x63DF, 0x77C1, 0xAF75, 0x4263, 0x2030, 0xE51A, 0xFD0E, 0xBF6D,
-	0x814C, 0x1814, 0x2635, 0xC32F, 0xBEE1, 0x35A2, 0x88CC, 0x2E39,
-	0x9357, 0x55F2, 0xFC82, 0x7A47, 0xC8AC, 0xBAE7, 0x322B, 0xE695,
-	0xC0A0, 0x1998, 0x9ED1, 0xA37F, 0x4466, 0x547E, 0x3BAB, 0x0B83,
-	0x8CCA, 0xC729, 0x6BD3, 0x283C, 0xA779, 0xBCE2, 0x161D, 0xAD76,
-	0xDB3B, 0x6456, 0x744E, 0x141E, 0x92DB, 0x0C0A, 0x486C, 0xB8E4,
-	0x9F5D, 0xBD6E, 0x43EF, 0xC4A6, 0x39A8, 0x31A4, 0xD337, 0xF28B,
-	0xD532, 0x8B43, 0x6E59, 0xDAB7, 0x018C, 0xB164, 0x9CD2, 0x49E0,
-	0xD8B4, 0xACFA, 0xF307, 0xCF25, 0xCAAF, 0xF48E, 0x47E9, 0x1018,
-	0x6FD5, 0xF088, 0x4A6F, 0x5C72, 0x3824, 0x57F1, 0x73C7, 0x9751,
-	0xCB23, 0xA17C, 0xE89C, 0x3E21, 0x96DD, 0x61DC, 0x0D86, 0x0F85,
-	0xE090, 0x7C42, 0x71C4, 0xCCAA, 0x90D8, 0x0605, 0xF701, 0x1C12,
-	0xC2A3, 0x6A5F, 0xAEF9, 0x69D0, 0x1791, 0x9958, 0x3A27, 0x27B9,
-	0xD938, 0xEB13, 0x2BB3, 0x2233, 0xD2BB, 0xA970, 0x0789, 0x33A7,
-	0x2DB6, 0x3C22, 0x1592, 0xC920, 0x8749, 0xAAFF, 0x5078, 0xA57A,
-	0x038F, 0x59F8, 0x0980, 0x1A17, 0x65DA, 0xD731, 0x84C6, 0xD0B8,
-	0x82C3, 0x29B0, 0x5A77, 0x1E11, 0x7BCB, 0xA8FC, 0x6DD6, 0x2C3A,
-};
-
-static inline u16 _S_(u16 v)
-{
-	u16 t = Sbox[Hi8(v)];
-	return Sbox[Lo8(v)] ^ ((t << 8) | (t >> 8));
-}
-
-#define PHASE1_LOOP_COUNT 8
-
-static void tkip_mixing_phase1(u16 * TTAK, const u8 * TK, const u8 * TA,
-			       u32 IV32)
-{
-	int i, j;
-
-	/* Initialize the 80-bit TTAK from TSC (IV32) and TA[0..5] */
-	TTAK[0] = Lo16(IV32);
-	TTAK[1] = Hi16(IV32);
-	TTAK[2] = Mk16(TA[1], TA[0]);
-	TTAK[3] = Mk16(TA[3], TA[2]);
-	TTAK[4] = Mk16(TA[5], TA[4]);
-
-	for (i = 0; i < PHASE1_LOOP_COUNT; i++) {
-		j = 2 * (i & 1);
-		TTAK[0] += _S_(TTAK[4] ^ Mk16(TK[1 + j], TK[0 + j]));
-		TTAK[1] += _S_(TTAK[0] ^ Mk16(TK[5 + j], TK[4 + j]));
-		TTAK[2] += _S_(TTAK[1] ^ Mk16(TK[9 + j], TK[8 + j]));
-		TTAK[3] += _S_(TTAK[2] ^ Mk16(TK[13 + j], TK[12 + j]));
-		TTAK[4] += _S_(TTAK[3] ^ Mk16(TK[1 + j], TK[0 + j])) + i;
-	}
-}
-
-static void tkip_mixing_phase2(u8 * WEPSeed, const u8 * TK, const u16 * TTAK,
-			       u16 IV16)
-{
-	/* Make temporary area overlap WEP seed so that the final copy can be
-	 * avoided on little endian hosts. */
-	u16 *PPK = (u16 *) & WEPSeed[4];
-
-	/* Step 1 - make copy of TTAK and bring in TSC */
-	PPK[0] = TTAK[0];
-	PPK[1] = TTAK[1];
-	PPK[2] = TTAK[2];
-	PPK[3] = TTAK[3];
-	PPK[4] = TTAK[4];
-	PPK[5] = TTAK[4] + IV16;
-
-	/* Step 2 - 96-bit bijective mixing using S-box */
-	PPK[0] += _S_(PPK[5] ^ Mk16_le((__le16 *) & TK[0]));
-	PPK[1] += _S_(PPK[0] ^ Mk16_le((__le16 *) & TK[2]));
-	PPK[2] += _S_(PPK[1] ^ Mk16_le((__le16 *) & TK[4]));
-	PPK[3] += _S_(PPK[2] ^ Mk16_le((__le16 *) & TK[6]));
-	PPK[4] += _S_(PPK[3] ^ Mk16_le((__le16 *) & TK[8]));
-	PPK[5] += _S_(PPK[4] ^ Mk16_le((__le16 *) & TK[10]));
-
-	PPK[0] += RotR1(PPK[5] ^ Mk16_le((__le16 *) & TK[12]));
-	PPK[1] += RotR1(PPK[0] ^ Mk16_le((__le16 *) & TK[14]));
-	PPK[2] += RotR1(PPK[1]);
-	PPK[3] += RotR1(PPK[2]);
-	PPK[4] += RotR1(PPK[3]);
-	PPK[5] += RotR1(PPK[4]);
-
-	/* Step 3 - bring in last of TK bits, assign 24-bit WEP IV value
-	 * WEPSeed[0..2] is transmitted as WEP IV */
-	WEPSeed[0] = Hi8(IV16);
-	WEPSeed[1] = (Hi8(IV16) | 0x20) & 0x7F;
-	WEPSeed[2] = Lo8(IV16);
-	WEPSeed[3] = Lo8((PPK[5] ^ Mk16_le((__le16 *) & TK[0])) >> 1);
-
-#ifdef __BIG_ENDIAN
-	{
-		int i;
-		for (i = 0; i < 6; i++)
-			PPK[i] = (PPK[i] << 8) | (PPK[i] >> 8);
-	}
-#endif
-}
-
-static int ieee80211_tkip_hdr(struct sk_buff *skb, int hdr_len,
-			      u8 * rc4key, int keylen, void *priv)
-{
-	struct ieee80211_tkip_data *tkey = priv;
-	int len;
-	u8 *pos;
-	struct ieee80211_hdr_4addr *hdr;
-
-	hdr = (struct ieee80211_hdr_4addr *)skb->data;
-
-	if (skb_headroom(skb) < 8 || skb->len < hdr_len)
-		return -1;
-
-	if (rc4key == NULL || keylen < 16)
-		return -1;
-
-	if (!tkey->tx_phase1_done) {
-		tkip_mixing_phase1(tkey->tx_ttak, tkey->key, hdr->addr2,
-				   tkey->tx_iv32);
-		tkey->tx_phase1_done = 1;
-	}
-	tkip_mixing_phase2(rc4key, tkey->key, tkey->tx_ttak, tkey->tx_iv16);
-
-	len = skb->len - hdr_len;
-	pos = skb_push(skb, 8);
-	memmove(pos, pos + 8, hdr_len);
-	pos += hdr_len;
-
-	*pos++ = *rc4key;
-	*pos++ = *(rc4key + 1);
-	*pos++ = *(rc4key + 2);
-	*pos++ = (tkey->key_idx << 6) | (1 << 5) /* Ext IV included */ ;
-	*pos++ = tkey->tx_iv32 & 0xff;
-	*pos++ = (tkey->tx_iv32 >> 8) & 0xff;
-	*pos++ = (tkey->tx_iv32 >> 16) & 0xff;
-	*pos++ = (tkey->tx_iv32 >> 24) & 0xff;
-
-	tkey->tx_iv16++;
-	if (tkey->tx_iv16 == 0) {
-		tkey->tx_phase1_done = 0;
-		tkey->tx_iv32++;
-	}
-
-	return 8;
-}
-
-static int ieee80211_tkip_encrypt(struct sk_buff *skb, int hdr_len, void *priv)
-{
-	struct ieee80211_tkip_data *tkey = priv;
-	struct blkcipher_desc desc = { .tfm = tkey->tx_tfm_arc4 };
-	int len;
-	u8 rc4key[16], *pos, *icv;
-	u32 crc;
-	struct scatterlist sg;
-
-	if (tkey->flags & IEEE80211_CRYPTO_TKIP_COUNTERMEASURES) {
-		if (net_ratelimit()) {
-			struct ieee80211_hdr_4addr *hdr =
-			    (struct ieee80211_hdr_4addr *)skb->data;
-			printk(KERN_DEBUG ": TKIP countermeasures: dropped "
-			       "TX packet to %pM\n", hdr->addr1);
-		}
-		return -1;
-	}
-
-	if (skb_tailroom(skb) < 4 || skb->len < hdr_len)
-		return -1;
-
-	len = skb->len - hdr_len;
-	pos = skb->data + hdr_len;
-
-	if ((ieee80211_tkip_hdr(skb, hdr_len, rc4key, 16, priv)) < 0)
-		return -1;
-
-	icv = skb_put(skb, 4);
-
-	crc = ~crc32_le(~0, pos, len);
-	icv[0] = crc;
-	icv[1] = crc >> 8;
-	icv[2] = crc >> 16;
-	icv[3] = crc >> 24;
-
-	crypto_blkcipher_setkey(tkey->tx_tfm_arc4, rc4key, 16);
-	sg_init_one(&sg, pos, len + 4);
-	return crypto_blkcipher_encrypt(&desc, &sg, &sg, len + 4);
-}
-
-/*
- * deal with seq counter wrapping correctly.
- * refer to timer_after() for jiffies wrapping handling
- */
-static inline int tkip_replay_check(u32 iv32_n, u16 iv16_n,
-				    u32 iv32_o, u16 iv16_o)
-{
-	if ((s32)iv32_n - (s32)iv32_o < 0 ||
-	    (iv32_n == iv32_o && iv16_n <= iv16_o))
-		return 1;
-	return 0;
-}
-
-static int ieee80211_tkip_decrypt(struct sk_buff *skb, int hdr_len, void *priv)
-{
-	struct ieee80211_tkip_data *tkey = priv;
-	struct blkcipher_desc desc = { .tfm = tkey->rx_tfm_arc4 };
-	u8 rc4key[16];
-	u8 keyidx, *pos;
-	u32 iv32;
-	u16 iv16;
-	struct ieee80211_hdr_4addr *hdr;
-	u8 icv[4];
-	u32 crc;
-	struct scatterlist sg;
-	int plen;
-
-	hdr = (struct ieee80211_hdr_4addr *)skb->data;
-
-	if (tkey->flags & IEEE80211_CRYPTO_TKIP_COUNTERMEASURES) {
-		if (net_ratelimit()) {
-			printk(KERN_DEBUG ": TKIP countermeasures: dropped "
-			       "received packet from %pM\n", hdr->addr2);
-		}
-		return -1;
-	}
-
-	if (skb->len < hdr_len + 8 + 4)
-		return -1;
-
-	pos = skb->data + hdr_len;
-	keyidx = pos[3];
-	if (!(keyidx & (1 << 5))) {
-		if (net_ratelimit()) {
-			printk(KERN_DEBUG "TKIP: received packet without ExtIV"
-			       " flag from %pM\n", hdr->addr2);
-		}
-		return -2;
-	}
-	keyidx >>= 6;
-	if (tkey->key_idx != keyidx) {
-		printk(KERN_DEBUG "TKIP: RX tkey->key_idx=%d frame "
-		       "keyidx=%d priv=%p\n", tkey->key_idx, keyidx, priv);
-		return -6;
-	}
-	if (!tkey->key_set) {
-		if (net_ratelimit()) {
-			printk(KERN_DEBUG "TKIP: received packet from %pM"
-			       " with keyid=%d that does not have a configured"
-			       " key\n", hdr->addr2, keyidx);
-		}
-		return -3;
-	}
-	iv16 = (pos[0] << 8) | pos[2];
-	iv32 = pos[4] | (pos[5] << 8) | (pos[6] << 16) | (pos[7] << 24);
-	pos += 8;
-
-	if (tkip_replay_check(iv32, iv16, tkey->rx_iv32, tkey->rx_iv16)) {
-		if (ieee80211_ratelimit_debug(IEEE80211_DL_DROP)) {
-			IEEE80211_DEBUG_DROP("TKIP: replay detected: STA=%pM"
-			       " previous TSC %08x%04x received TSC "
-			       "%08x%04x\n", hdr->addr2,
-			       tkey->rx_iv32, tkey->rx_iv16, iv32, iv16);
-		}
-		tkey->dot11RSNAStatsTKIPReplays++;
-		return -4;
-	}
-
-	if (iv32 != tkey->rx_iv32 || !tkey->rx_phase1_done) {
-		tkip_mixing_phase1(tkey->rx_ttak, tkey->key, hdr->addr2, iv32);
-		tkey->rx_phase1_done = 1;
-	}
-	tkip_mixing_phase2(rc4key, tkey->key, tkey->rx_ttak, iv16);
-
-	plen = skb->len - hdr_len - 12;
-
-	crypto_blkcipher_setkey(tkey->rx_tfm_arc4, rc4key, 16);
-	sg_init_one(&sg, pos, plen + 4);
-	if (crypto_blkcipher_decrypt(&desc, &sg, &sg, plen + 4)) {
-		if (net_ratelimit()) {
-			printk(KERN_DEBUG ": TKIP: failed to decrypt "
-			       "received packet from %pM\n",
-			       hdr->addr2);
-		}
-		return -7;
-	}
-
-	crc = ~crc32_le(~0, pos, plen);
-	icv[0] = crc;
-	icv[1] = crc >> 8;
-	icv[2] = crc >> 16;
-	icv[3] = crc >> 24;
-	if (memcmp(icv, pos + plen, 4) != 0) {
-		if (iv32 != tkey->rx_iv32) {
-			/* Previously cached Phase1 result was already lost, so
-			 * it needs to be recalculated for the next packet. */
-			tkey->rx_phase1_done = 0;
-		}
-		if (ieee80211_ratelimit_debug(IEEE80211_DL_DROP)) {
-			IEEE80211_DEBUG_DROP("TKIP: ICV error detected: STA="
-			       "%pM\n", hdr->addr2);
-		}
-		tkey->dot11RSNAStatsTKIPICVErrors++;
-		return -5;
-	}
-
-	/* Update real counters only after Michael MIC verification has
-	 * completed */
-	tkey->rx_iv32_new = iv32;
-	tkey->rx_iv16_new = iv16;
-
-	/* Remove IV and ICV */
-	memmove(skb->data + 8, skb->data, hdr_len);
-	skb_pull(skb, 8);
-	skb_trim(skb, skb->len - 4);
-
-	return keyidx;
-}
-
-static int michael_mic(struct crypto_hash *tfm_michael, u8 * key, u8 * hdr,
-		       u8 * data, size_t data_len, u8 * mic)
-{
-	struct hash_desc desc;
-	struct scatterlist sg[2];
-
-	if (tfm_michael == NULL) {
-		printk(KERN_WARNING "michael_mic: tfm_michael == NULL\n");
-		return -1;
-	}
-	sg_init_table(sg, 2);
-	sg_set_buf(&sg[0], hdr, 16);
-	sg_set_buf(&sg[1], data, data_len);
-
-	if (crypto_hash_setkey(tfm_michael, key, 8))
-		return -1;
-
-	desc.tfm = tfm_michael;
-	desc.flags = 0;
-	return crypto_hash_digest(&desc, sg, data_len + 16, mic);
-}
-
-static void michael_mic_hdr(struct sk_buff *skb, u8 * hdr)
-{
-	struct ieee80211_hdr_4addr *hdr11;
-	u16 stype;
-
-	hdr11 = (struct ieee80211_hdr_4addr *)skb->data;
-	stype  = WLAN_FC_GET_STYPE(le16_to_cpu(hdr11->frame_ctl));
-
-	switch (le16_to_cpu(hdr11->frame_ctl) &
-		(IEEE80211_FCTL_FROMDS | IEEE80211_FCTL_TODS)) {
-	case IEEE80211_FCTL_TODS:
-		memcpy(hdr, hdr11->addr3, ETH_ALEN);	/* DA */
-		memcpy(hdr + ETH_ALEN, hdr11->addr2, ETH_ALEN);	/* SA */
-		break;
-	case IEEE80211_FCTL_FROMDS:
-		memcpy(hdr, hdr11->addr1, ETH_ALEN);	/* DA */
-		memcpy(hdr + ETH_ALEN, hdr11->addr3, ETH_ALEN);	/* SA */
-		break;
-	case IEEE80211_FCTL_FROMDS | IEEE80211_FCTL_TODS:
-		memcpy(hdr, hdr11->addr3, ETH_ALEN);	/* DA */
-		memcpy(hdr + ETH_ALEN, hdr11->addr4, ETH_ALEN);	/* SA */
-		break;
-	case 0:
-		memcpy(hdr, hdr11->addr1, ETH_ALEN);	/* DA */
-		memcpy(hdr + ETH_ALEN, hdr11->addr2, ETH_ALEN);	/* SA */
-		break;
-	}
-
-	if (stype & IEEE80211_STYPE_QOS_DATA) {
-		const struct ieee80211_hdr_3addrqos *qoshdr =
-			(struct ieee80211_hdr_3addrqos *)skb->data;
-		hdr[12] = le16_to_cpu(qoshdr->qos_ctl) & IEEE80211_QCTL_TID;
-	} else
-		hdr[12] = 0;		/* priority */
-
-	hdr[13] = hdr[14] = hdr[15] = 0;	/* reserved */
-}
-
-static int ieee80211_michael_mic_add(struct sk_buff *skb, int hdr_len,
-				     void *priv)
-{
-	struct ieee80211_tkip_data *tkey = priv;
-	u8 *pos;
-
-	if (skb_tailroom(skb) < 8 || skb->len < hdr_len) {
-		printk(KERN_DEBUG "Invalid packet for Michael MIC add "
-		       "(tailroom=%d hdr_len=%d skb->len=%d)\n",
-		       skb_tailroom(skb), hdr_len, skb->len);
-		return -1;
-	}
-
-	michael_mic_hdr(skb, tkey->tx_hdr);
-	pos = skb_put(skb, 8);
-	if (michael_mic(tkey->tx_tfm_michael, &tkey->key[16], tkey->tx_hdr,
-			skb->data + hdr_len, skb->len - 8 - hdr_len, pos))
-		return -1;
-
-	return 0;
-}
-
-static void ieee80211_michael_mic_failure(struct net_device *dev,
-					  struct ieee80211_hdr_4addr *hdr,
-					  int keyidx)
-{
-	union iwreq_data wrqu;
-	struct iw_michaelmicfailure ev;
-
-	/* TODO: needed parameters: count, keyid, key type, TSC */
-	memset(&ev, 0, sizeof(ev));
-	ev.flags = keyidx & IW_MICFAILURE_KEY_ID;
-	if (hdr->addr1[0] & 0x01)
-		ev.flags |= IW_MICFAILURE_GROUP;
-	else
-		ev.flags |= IW_MICFAILURE_PAIRWISE;
-	ev.src_addr.sa_family = ARPHRD_ETHER;
-	memcpy(ev.src_addr.sa_data, hdr->addr2, ETH_ALEN);
-	memset(&wrqu, 0, sizeof(wrqu));
-	wrqu.data.length = sizeof(ev);
-	wireless_send_event(dev, IWEVMICHAELMICFAILURE, &wrqu, (char *)&ev);
-}
-
-static int ieee80211_michael_mic_verify(struct sk_buff *skb, int keyidx,
-					int hdr_len, void *priv)
-{
-	struct ieee80211_tkip_data *tkey = priv;
-	u8 mic[8];
-
-	if (!tkey->key_set)
-		return -1;
-
-	michael_mic_hdr(skb, tkey->rx_hdr);
-	if (michael_mic(tkey->rx_tfm_michael, &tkey->key[24], tkey->rx_hdr,
-			skb->data + hdr_len, skb->len - 8 - hdr_len, mic))
-		return -1;
-	if (memcmp(mic, skb->data + skb->len - 8, 8) != 0) {
-		struct ieee80211_hdr_4addr *hdr;
-		hdr = (struct ieee80211_hdr_4addr *)skb->data;
-		printk(KERN_DEBUG "%s: Michael MIC verification failed for "
-		       "MSDU from %pM keyidx=%d\n",
-		       skb->dev ? skb->dev->name : "N/A", hdr->addr2,
-		       keyidx);
-		if (skb->dev)
-			ieee80211_michael_mic_failure(skb->dev, hdr, keyidx);
-		tkey->dot11RSNAStatsTKIPLocalMICFailures++;
-		return -1;
-	}
-
-	/* Update TSC counters for RX now that the packet verification has
-	 * completed. */
-	tkey->rx_iv32 = tkey->rx_iv32_new;
-	tkey->rx_iv16 = tkey->rx_iv16_new;
-
-	skb_trim(skb, skb->len - 8);
-
-	return 0;
-}
-
-static int ieee80211_tkip_set_key(void *key, int len, u8 * seq, void *priv)
-{
-	struct ieee80211_tkip_data *tkey = priv;
-	int keyidx;
-	struct crypto_hash *tfm = tkey->tx_tfm_michael;
-	struct crypto_blkcipher *tfm2 = tkey->tx_tfm_arc4;
-	struct crypto_hash *tfm3 = tkey->rx_tfm_michael;
-	struct crypto_blkcipher *tfm4 = tkey->rx_tfm_arc4;
-
-	keyidx = tkey->key_idx;
-	memset(tkey, 0, sizeof(*tkey));
-	tkey->key_idx = keyidx;
-	tkey->tx_tfm_michael = tfm;
-	tkey->tx_tfm_arc4 = tfm2;
-	tkey->rx_tfm_michael = tfm3;
-	tkey->rx_tfm_arc4 = tfm4;
-	if (len == TKIP_KEY_LEN) {
-		memcpy(tkey->key, key, TKIP_KEY_LEN);
-		tkey->key_set = 1;
-		tkey->tx_iv16 = 1;	/* TSC is initialized to 1 */
-		if (seq) {
-			tkey->rx_iv32 = (seq[5] << 24) | (seq[4] << 16) |
-			    (seq[3] << 8) | seq[2];
-			tkey->rx_iv16 = (seq[1] << 8) | seq[0];
-		}
-	} else if (len == 0)
-		tkey->key_set = 0;
-	else
-		return -1;
-
-	return 0;
-}
-
-static int ieee80211_tkip_get_key(void *key, int len, u8 * seq, void *priv)
-{
-	struct ieee80211_tkip_data *tkey = priv;
-
-	if (len < TKIP_KEY_LEN)
-		return -1;
-
-	if (!tkey->key_set)
-		return 0;
-	memcpy(key, tkey->key, TKIP_KEY_LEN);
-
-	if (seq) {
-		/* Return the sequence number of the last transmitted frame. */
-		u16 iv16 = tkey->tx_iv16;
-		u32 iv32 = tkey->tx_iv32;
-		if (iv16 == 0)
-			iv32--;
-		iv16--;
-		seq[0] = tkey->tx_iv16;
-		seq[1] = tkey->tx_iv16 >> 8;
-		seq[2] = tkey->tx_iv32;
-		seq[3] = tkey->tx_iv32 >> 8;
-		seq[4] = tkey->tx_iv32 >> 16;
-		seq[5] = tkey->tx_iv32 >> 24;
-	}
-
-	return TKIP_KEY_LEN;
-}
-
-static char *ieee80211_tkip_print_stats(char *p, void *priv)
-{
-	struct ieee80211_tkip_data *tkip = priv;
-	p += sprintf(p, "key[%d] alg=TKIP key_set=%d "
-		     "tx_pn=%02x%02x%02x%02x%02x%02x "
-		     "rx_pn=%02x%02x%02x%02x%02x%02x "
-		     "replays=%d icv_errors=%d local_mic_failures=%d\n",
-		     tkip->key_idx, tkip->key_set,
-		     (tkip->tx_iv32 >> 24) & 0xff,
-		     (tkip->tx_iv32 >> 16) & 0xff,
-		     (tkip->tx_iv32 >> 8) & 0xff,
-		     tkip->tx_iv32 & 0xff,
-		     (tkip->tx_iv16 >> 8) & 0xff,
-		     tkip->tx_iv16 & 0xff,
-		     (tkip->rx_iv32 >> 24) & 0xff,
-		     (tkip->rx_iv32 >> 16) & 0xff,
-		     (tkip->rx_iv32 >> 8) & 0xff,
-		     tkip->rx_iv32 & 0xff,
-		     (tkip->rx_iv16 >> 8) & 0xff,
-		     tkip->rx_iv16 & 0xff,
-		     tkip->dot11RSNAStatsTKIPReplays,
-		     tkip->dot11RSNAStatsTKIPICVErrors,
-		     tkip->dot11RSNAStatsTKIPLocalMICFailures);
-	return p;
-}
-
-static struct ieee80211_crypto_ops ieee80211_crypt_tkip = {
-	.name = "TKIP",
-	.init = ieee80211_tkip_init,
-	.deinit = ieee80211_tkip_deinit,
-	.build_iv = ieee80211_tkip_hdr,
-	.encrypt_mpdu = ieee80211_tkip_encrypt,
-	.decrypt_mpdu = ieee80211_tkip_decrypt,
-	.encrypt_msdu = ieee80211_michael_mic_add,
-	.decrypt_msdu = ieee80211_michael_mic_verify,
-	.set_key = ieee80211_tkip_set_key,
-	.get_key = ieee80211_tkip_get_key,
-	.print_stats = ieee80211_tkip_print_stats,
-	.extra_mpdu_prefix_len = 4 + 4,	/* IV + ExtIV */
-	.extra_mpdu_postfix_len = 4,	/* ICV */
-	.extra_msdu_postfix_len = 8,	/* MIC */
-	.get_flags = ieee80211_tkip_get_flags,
-	.set_flags = ieee80211_tkip_set_flags,
-	.owner = THIS_MODULE,
-};
-
-static int __init ieee80211_crypto_tkip_init(void)
-{
-	return ieee80211_register_crypto_ops(&ieee80211_crypt_tkip);
-}
-
-static void __exit ieee80211_crypto_tkip_exit(void)
-{
-	ieee80211_unregister_crypto_ops(&ieee80211_crypt_tkip);
-}
-
-module_init(ieee80211_crypto_tkip_init);
-module_exit(ieee80211_crypto_tkip_exit);
diff --git a/net/ieee80211/ieee80211_crypt_wep.c b/net/ieee80211/ieee80211_crypt_wep.c
deleted file mode 100644
index 3fa30c40779f..000000000000
--- a/net/ieee80211/ieee80211_crypt_wep.c
+++ /dev/null
@@ -1,295 +0,0 @@
-/*
- * Host AP crypt: host-based WEP encryption implementation for Host AP driver
- *
- * Copyright (c) 2002-2004, Jouni Malinen <j@w1.fi>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation. See README and COPYING for
- * more details.
- */
-
-#include <linux/err.h>
-#include <linux/module.h>
-#include <linux/init.h>
-#include <linux/slab.h>
-#include <linux/random.h>
-#include <linux/scatterlist.h>
-#include <linux/skbuff.h>
-#include <linux/mm.h>
-#include <asm/string.h>
-
-#include <net/ieee80211.h>
-
-#include <linux/crypto.h>
-#include <linux/crc32.h>
-
-MODULE_AUTHOR("Jouni Malinen");
-MODULE_DESCRIPTION("Host AP crypt: WEP");
-MODULE_LICENSE("GPL");
-
-struct prism2_wep_data {
-	u32 iv;
-#define WEP_KEY_LEN 13
-	u8 key[WEP_KEY_LEN + 1];
-	u8 key_len;
-	u8 key_idx;
-	struct crypto_blkcipher *tx_tfm;
-	struct crypto_blkcipher *rx_tfm;
-};
-
-static void *prism2_wep_init(int keyidx)
-{
-	struct prism2_wep_data *priv;
-
-	priv = kzalloc(sizeof(*priv), GFP_ATOMIC);
-	if (priv == NULL)
-		goto fail;
-	priv->key_idx = keyidx;
-
-	priv->tx_tfm = crypto_alloc_blkcipher("ecb(arc4)", 0, CRYPTO_ALG_ASYNC);
-	if (IS_ERR(priv->tx_tfm)) {
-		printk(KERN_DEBUG "ieee80211_crypt_wep: could not allocate "
-		       "crypto API arc4\n");
-		priv->tx_tfm = NULL;
-		goto fail;
-	}
-
-	priv->rx_tfm = crypto_alloc_blkcipher("ecb(arc4)", 0, CRYPTO_ALG_ASYNC);
-	if (IS_ERR(priv->rx_tfm)) {
-		printk(KERN_DEBUG "ieee80211_crypt_wep: could not allocate "
-		       "crypto API arc4\n");
-		priv->rx_tfm = NULL;
-		goto fail;
-	}
-	/* start WEP IV from a random value */
-	get_random_bytes(&priv->iv, 4);
-
-	return priv;
-
-      fail:
-	if (priv) {
-		if (priv->tx_tfm)
-			crypto_free_blkcipher(priv->tx_tfm);
-		if (priv->rx_tfm)
-			crypto_free_blkcipher(priv->rx_tfm);
-		kfree(priv);
-	}
-	return NULL;
-}
-
-static void prism2_wep_deinit(void *priv)
-{
-	struct prism2_wep_data *_priv = priv;
-	if (_priv) {
-		if (_priv->tx_tfm)
-			crypto_free_blkcipher(_priv->tx_tfm);
-		if (_priv->rx_tfm)
-			crypto_free_blkcipher(_priv->rx_tfm);
-	}
-	kfree(priv);
-}
-
-/* Add WEP IV/key info to a frame that has at least 4 bytes of headroom */
-static int prism2_wep_build_iv(struct sk_buff *skb, int hdr_len,
-			       u8 *key, int keylen, void *priv)
-{
-	struct prism2_wep_data *wep = priv;
-	u32 klen, len;
-	u8 *pos;
-
-	if (skb_headroom(skb) < 4 || skb->len < hdr_len)
-		return -1;
-
-	len = skb->len - hdr_len;
-	pos = skb_push(skb, 4);
-	memmove(pos, pos + 4, hdr_len);
-	pos += hdr_len;
-
-	klen = 3 + wep->key_len;
-
-	wep->iv++;
-
-	/* Fluhrer, Mantin, and Shamir have reported weaknesses in the key
-	 * scheduling algorithm of RC4. At least IVs (KeyByte + 3, 0xff, N)
-	 * can be used to speedup attacks, so avoid using them. */
-	if ((wep->iv & 0xff00) == 0xff00) {
-		u8 B = (wep->iv >> 16) & 0xff;
-		if (B >= 3 && B < klen)
-			wep->iv += 0x0100;
-	}
-
-	/* Prepend 24-bit IV to RC4 key and TX frame */
-	*pos++ = (wep->iv >> 16) & 0xff;
-	*pos++ = (wep->iv >> 8) & 0xff;
-	*pos++ = wep->iv & 0xff;
-	*pos++ = wep->key_idx << 6;
-
-	return 0;
-}
-
-/* Perform WEP encryption on given skb that has at least 4 bytes of headroom
- * for IV and 4 bytes of tailroom for ICV. Both IV and ICV will be transmitted,
- * so the payload length increases with 8 bytes.
- *
- * WEP frame payload: IV + TX key idx, RC4(data), ICV = RC4(CRC32(data))
- */
-static int prism2_wep_encrypt(struct sk_buff *skb, int hdr_len, void *priv)
-{
-	struct prism2_wep_data *wep = priv;
-	struct blkcipher_desc desc = { .tfm = wep->tx_tfm };
-	u32 crc, klen, len;
-	u8 *pos, *icv;
-	struct scatterlist sg;
-	u8 key[WEP_KEY_LEN + 3];
-
-	/* other checks are in prism2_wep_build_iv */
-	if (skb_tailroom(skb) < 4)
-		return -1;
-
-	/* add the IV to the frame */
-	if (prism2_wep_build_iv(skb, hdr_len, NULL, 0, priv))
-		return -1;
-
-	/* Copy the IV into the first 3 bytes of the key */
-	skb_copy_from_linear_data_offset(skb, hdr_len, key, 3);
-
-	/* Copy rest of the WEP key (the secret part) */
-	memcpy(key + 3, wep->key, wep->key_len);
-
-	len = skb->len - hdr_len - 4;
-	pos = skb->data + hdr_len + 4;
-	klen = 3 + wep->key_len;
-
-	/* Append little-endian CRC32 over only the data and encrypt it to produce ICV */
-	crc = ~crc32_le(~0, pos, len);
-	icv = skb_put(skb, 4);
-	icv[0] = crc;
-	icv[1] = crc >> 8;
-	icv[2] = crc >> 16;
-	icv[3] = crc >> 24;
-
-	crypto_blkcipher_setkey(wep->tx_tfm, key, klen);
-	sg_init_one(&sg, pos, len + 4);
-	return crypto_blkcipher_encrypt(&desc, &sg, &sg, len + 4);
-}
-
-/* Perform WEP decryption on given buffer. Buffer includes whole WEP part of
- * the frame: IV (4 bytes), encrypted payload (including SNAP header),
- * ICV (4 bytes). len includes both IV and ICV.
- *
- * Returns 0 if frame was decrypted successfully and ICV was correct and -1 on
- * failure. If frame is OK, IV and ICV will be removed.
- */
-static int prism2_wep_decrypt(struct sk_buff *skb, int hdr_len, void *priv)
-{
-	struct prism2_wep_data *wep = priv;
-	struct blkcipher_desc desc = { .tfm = wep->rx_tfm };
-	u32 crc, klen, plen;
-	u8 key[WEP_KEY_LEN + 3];
-	u8 keyidx, *pos, icv[4];
-	struct scatterlist sg;
-
-	if (skb->len < hdr_len + 8)
-		return -1;
-
-	pos = skb->data + hdr_len;
-	key[0] = *pos++;
-	key[1] = *pos++;
-	key[2] = *pos++;
-	keyidx = *pos++ >> 6;
-	if (keyidx != wep->key_idx)
-		return -1;
-
-	klen = 3 + wep->key_len;
-
-	/* Copy rest of the WEP key (the secret part) */
-	memcpy(key + 3, wep->key, wep->key_len);
-
-	/* Apply RC4 to data and compute CRC32 over decrypted data */
-	plen = skb->len - hdr_len - 8;
-
-	crypto_blkcipher_setkey(wep->rx_tfm, key, klen);
-	sg_init_one(&sg, pos, plen + 4);
-	if (crypto_blkcipher_decrypt(&desc, &sg, &sg, plen + 4))
-		return -7;
-
-	crc = ~crc32_le(~0, pos, plen);
-	icv[0] = crc;
-	icv[1] = crc >> 8;
-	icv[2] = crc >> 16;
-	icv[3] = crc >> 24;
-	if (memcmp(icv, pos + plen, 4) != 0) {
-		/* ICV mismatch - drop frame */
-		return -2;
-	}
-
-	/* Remove IV and ICV */
-	memmove(skb->data + 4, skb->data, hdr_len);
-	skb_pull(skb, 4);
-	skb_trim(skb, skb->len - 4);
-
-	return 0;
-}
-
-static int prism2_wep_set_key(void *key, int len, u8 * seq, void *priv)
-{
-	struct prism2_wep_data *wep = priv;
-
-	if (len < 0 || len > WEP_KEY_LEN)
-		return -1;
-
-	memcpy(wep->key, key, len);
-	wep->key_len = len;
-
-	return 0;
-}
-
-static int prism2_wep_get_key(void *key, int len, u8 * seq, void *priv)
-{
-	struct prism2_wep_data *wep = priv;
-
-	if (len < wep->key_len)
-		return -1;
-
-	memcpy(key, wep->key, wep->key_len);
-
-	return wep->key_len;
-}
-
-static char *prism2_wep_print_stats(char *p, void *priv)
-{
-	struct prism2_wep_data *wep = priv;
-	p += sprintf(p, "key[%d] alg=WEP len=%d\n", wep->key_idx, wep->key_len);
-	return p;
-}
-
-static struct ieee80211_crypto_ops ieee80211_crypt_wep = {
-	.name = "WEP",
-	.init = prism2_wep_init,
-	.deinit = prism2_wep_deinit,
-	.build_iv = prism2_wep_build_iv,
-	.encrypt_mpdu = prism2_wep_encrypt,
-	.decrypt_mpdu = prism2_wep_decrypt,
-	.encrypt_msdu = NULL,
-	.decrypt_msdu = NULL,
-	.set_key = prism2_wep_set_key,
-	.get_key = prism2_wep_get_key,
-	.print_stats = prism2_wep_print_stats,
-	.extra_mpdu_prefix_len = 4,	/* IV */
-	.extra_mpdu_postfix_len = 4,	/* ICV */
-	.owner = THIS_MODULE,
-};
-
-static int __init ieee80211_crypto_wep_init(void)
-{
-	return ieee80211_register_crypto_ops(&ieee80211_crypt_wep);
-}
-
-static void __exit ieee80211_crypto_wep_exit(void)
-{
-	ieee80211_unregister_crypto_ops(&ieee80211_crypt_wep);
-}
-
-module_init(ieee80211_crypto_wep_init);
-module_exit(ieee80211_crypto_wep_exit);
diff --git a/net/ieee80211/ieee80211_module.c b/net/ieee80211/ieee80211_module.c
index d34d4e79b6f7..cf21f0bd8569 100644
--- a/net/ieee80211/ieee80211_module.c
+++ b/net/ieee80211/ieee80211_module.c
@@ -180,13 +180,16 @@ struct net_device *alloc_ieee80211(int sizeof_priv)
 	ieee->host_open_frag = 1;
 	ieee->ieee802_1x = 1;	/* Default to supporting 802.1x */
 
-	INIT_LIST_HEAD(&ieee->crypt_deinit_list);
-	setup_timer(&ieee->crypt_deinit_timer, ieee80211_crypt_deinit_handler,
-			(unsigned long)ieee);
-	ieee->crypt_quiesced = 0;
-
 	spin_lock_init(&ieee->lock);
 
+	ieee->crypt_info.name = dev->name;
+	ieee->crypt_info.lock = &ieee->lock;
+	INIT_LIST_HEAD(&ieee->crypt_info.crypt_deinit_list);
+	setup_timer(&ieee->crypt_info.crypt_deinit_timer,
+			lib80211_crypt_deinit_handler,
+			(unsigned long)&ieee->crypt_info);
+	ieee->crypt_info.crypt_quiesced = 0;
+
 	ieee->wpa_enabled = 0;
 	ieee->drop_unencrypted = 0;
 	ieee->privacy_invoked = 0;
@@ -205,19 +208,19 @@ void free_ieee80211(struct net_device *dev)
 
 	int i;
 
-	ieee80211_crypt_quiescing(ieee);
-	del_timer_sync(&ieee->crypt_deinit_timer);
-	ieee80211_crypt_deinit_entries(ieee, 1);
+	lib80211_crypt_quiescing(&ieee->crypt_info);
+	del_timer_sync(&ieee->crypt_info.crypt_deinit_timer);
+	lib80211_crypt_deinit_entries(&ieee->crypt_info, 1);
 
 	for (i = 0; i < WEP_KEYS; i++) {
-		struct ieee80211_crypt_data *crypt = ieee->crypt[i];
+		struct lib80211_crypt_data *crypt = ieee->crypt_info.crypt[i];
 		if (crypt) {
 			if (crypt->ops) {
 				crypt->ops->deinit(crypt->priv);
 				module_put(crypt->ops->owner);
 			}
 			kfree(crypt);
-			ieee->crypt[i] = NULL;
+			ieee->crypt_info.crypt[i] = NULL;
 		}
 	}
 
diff --git a/net/ieee80211/ieee80211_rx.c b/net/ieee80211/ieee80211_rx.c
index 3dd58b594f6a..9c67dfae4320 100644
--- a/net/ieee80211/ieee80211_rx.c
+++ b/net/ieee80211/ieee80211_rx.c
@@ -268,7 +268,7 @@ static int ieee80211_is_eapol_frame(struct ieee80211_device *ieee,
 /* Called only as a tasklet (software IRQ), by ieee80211_rx */
 static int
 ieee80211_rx_frame_decrypt(struct ieee80211_device *ieee, struct sk_buff *skb,
-			   struct ieee80211_crypt_data *crypt)
+			   struct lib80211_crypt_data *crypt)
 {
 	struct ieee80211_hdr_3addr *hdr;
 	int res, hdrlen;
@@ -300,7 +300,7 @@ ieee80211_rx_frame_decrypt(struct ieee80211_device *ieee, struct sk_buff *skb,
 static int
 ieee80211_rx_frame_decrypt_msdu(struct ieee80211_device *ieee,
 				struct sk_buff *skb, int keyidx,
-				struct ieee80211_crypt_data *crypt)
+				struct lib80211_crypt_data *crypt)
 {
 	struct ieee80211_hdr_3addr *hdr;
 	int res, hdrlen;
@@ -348,7 +348,7 @@ int ieee80211_rx(struct ieee80211_device *ieee, struct sk_buff *skb,
 #endif
 	u8 dst[ETH_ALEN];
 	u8 src[ETH_ALEN];
-	struct ieee80211_crypt_data *crypt = NULL;
+	struct lib80211_crypt_data *crypt = NULL;
 	int keyidx = 0;
 	int can_be_decrypted = 0;
 
@@ -431,7 +431,7 @@ int ieee80211_rx(struct ieee80211_device *ieee, struct sk_buff *skb,
 		 * is only allowed 2-bits of storage, no value of keyidx can
 		 * be provided via above code that would result in keyidx
 		 * being out of range */
-		crypt = ieee->crypt[keyidx];
+		crypt = ieee->crypt_info.crypt[keyidx];
 
 #ifdef NOT_YET
 		sta = NULL;
diff --git a/net/ieee80211/ieee80211_tx.c b/net/ieee80211/ieee80211_tx.c
index d996547f7a62..f78f57e8844a 100644
--- a/net/ieee80211/ieee80211_tx.c
+++ b/net/ieee80211/ieee80211_tx.c
@@ -152,7 +152,8 @@ static int ieee80211_copy_snap(u8 * data, __be16 h_proto)
 static int ieee80211_encrypt_fragment(struct ieee80211_device *ieee,
 					     struct sk_buff *frag, int hdr_len)
 {
-	struct ieee80211_crypt_data *crypt = ieee->crypt[ieee->tx_keyidx];
+	struct lib80211_crypt_data *crypt =
+		ieee->crypt_info.crypt[ieee->crypt_info.tx_keyidx];
 	int res;
 
 	if (crypt == NULL)
@@ -270,7 +271,7 @@ int ieee80211_xmit(struct sk_buff *skb, struct net_device *dev)
 		.qos_ctl = 0
 	};
 	u8 dest[ETH_ALEN], src[ETH_ALEN];
-	struct ieee80211_crypt_data *crypt;
+	struct lib80211_crypt_data *crypt;
 	int priority = skb->priority;
 	int snapped = 0;
 
@@ -294,7 +295,7 @@ int ieee80211_xmit(struct sk_buff *skb, struct net_device *dev)
 
 	ether_type = ((struct ethhdr *)skb->data)->h_proto;
 
-	crypt = ieee->crypt[ieee->tx_keyidx];
+	crypt = ieee->crypt_info.crypt[ieee->crypt_info.tx_keyidx];
 
 	encrypt = !(ether_type == htons(ETH_P_PAE) && ieee->ieee802_1x) &&
 	    ieee->sec.encrypt;
diff --git a/net/ieee80211/ieee80211_wx.c b/net/ieee80211/ieee80211_wx.c
index 7cc4e5ee3660..31ea3abfc327 100644
--- a/net/ieee80211/ieee80211_wx.c
+++ b/net/ieee80211/ieee80211_wx.c
@@ -307,7 +307,7 @@ int ieee80211_wx_set_encode(struct ieee80211_device *ieee,
 		.flags = 0
 	};
 	int i, key, key_provided, len;
-	struct ieee80211_crypt_data **crypt;
+	struct lib80211_crypt_data **crypt;
 	int host_crypto = ieee->host_encrypt || ieee->host_decrypt || ieee->host_build_iv;
 	DECLARE_SSID_BUF(ssid);
 
@@ -321,30 +321,30 @@ int ieee80211_wx_set_encode(struct ieee80211_device *ieee,
 		key_provided = 1;
 	} else {
 		key_provided = 0;
-		key = ieee->tx_keyidx;
+		key = ieee->crypt_info.tx_keyidx;
 	}
 
 	IEEE80211_DEBUG_WX("Key: %d [%s]\n", key, key_provided ?
 			   "provided" : "default");
 
-	crypt = &ieee->crypt[key];
+	crypt = &ieee->crypt_info.crypt[key];
 
 	if (erq->flags & IW_ENCODE_DISABLED) {
 		if (key_provided && *crypt) {
 			IEEE80211_DEBUG_WX("Disabling encryption on key %d.\n",
 					   key);
-			ieee80211_crypt_delayed_deinit(ieee, crypt);
+			lib80211_crypt_delayed_deinit(&ieee->crypt_info, crypt);
 		} else
 			IEEE80211_DEBUG_WX("Disabling encryption.\n");
 
 		/* Check all the keys to see if any are still configured,
 		 * and if no key index was provided, de-init them all */
 		for (i = 0; i < WEP_KEYS; i++) {
-			if (ieee->crypt[i] != NULL) {
+			if (ieee->crypt_info.crypt[i] != NULL) {
 				if (key_provided)
 					break;
-				ieee80211_crypt_delayed_deinit(ieee,
-							       &ieee->crypt[i]);
+				lib80211_crypt_delayed_deinit(&ieee->crypt_info,
+							       &ieee->crypt_info.crypt[i]);
 			}
 		}
 
@@ -366,21 +366,21 @@ int ieee80211_wx_set_encode(struct ieee80211_device *ieee,
 	    strcmp((*crypt)->ops->name, "WEP") != 0) {
 		/* changing to use WEP; deinit previously used algorithm
 		 * on this key */
-		ieee80211_crypt_delayed_deinit(ieee, crypt);
+		lib80211_crypt_delayed_deinit(&ieee->crypt_info, crypt);
 	}
 
 	if (*crypt == NULL && host_crypto) {
-		struct ieee80211_crypt_data *new_crypt;
+		struct lib80211_crypt_data *new_crypt;
 
 		/* take WEP into use */
-		new_crypt = kzalloc(sizeof(struct ieee80211_crypt_data),
+		new_crypt = kzalloc(sizeof(struct lib80211_crypt_data),
 				    GFP_KERNEL);
 		if (new_crypt == NULL)
 			return -ENOMEM;
-		new_crypt->ops = ieee80211_get_crypto_ops("WEP");
+		new_crypt->ops = lib80211_get_crypto_ops("WEP");
 		if (!new_crypt->ops) {
-			request_module("ieee80211_crypt_wep");
-			new_crypt->ops = ieee80211_get_crypto_ops("WEP");
+			request_module("lib80211_crypt_wep");
+			new_crypt->ops = lib80211_get_crypto_ops("WEP");
 		}
 
 		if (new_crypt->ops && try_module_get(new_crypt->ops->owner))
@@ -391,7 +391,7 @@ int ieee80211_wx_set_encode(struct ieee80211_device *ieee,
 			new_crypt = NULL;
 
 			printk(KERN_WARNING "%s: could not initialize WEP: "
-			       "load module ieee80211_crypt_wep\n", dev->name);
+			       "load module lib80211_crypt_wep\n", dev->name);
 			return -EOPNOTSUPP;
 		}
 		*crypt = new_crypt;
@@ -440,7 +440,7 @@ int ieee80211_wx_set_encode(struct ieee80211_device *ieee,
 		if (key_provided) {
 			IEEE80211_DEBUG_WX("Setting key %d to default Tx "
 					   "key.\n", key);
-			ieee->tx_keyidx = key;
+			ieee->crypt_info.tx_keyidx = key;
 			sec.active_key = key;
 			sec.flags |= SEC_ACTIVE_KEY;
 		}
@@ -485,7 +485,7 @@ int ieee80211_wx_get_encode(struct ieee80211_device *ieee,
 {
 	struct iw_point *erq = &(wrqu->encoding);
 	int len, key;
-	struct ieee80211_crypt_data *crypt;
+	struct lib80211_crypt_data *crypt;
 	struct ieee80211_security *sec = &ieee->sec;
 
 	IEEE80211_DEBUG_WX("GET_ENCODE\n");
@@ -496,9 +496,9 @@ int ieee80211_wx_get_encode(struct ieee80211_device *ieee,
 			return -EINVAL;
 		key--;
 	} else
-		key = ieee->tx_keyidx;
+		key = ieee->crypt_info.tx_keyidx;
 
-	crypt = ieee->crypt[key];
+	crypt = ieee->crypt_info.crypt[key];
 	erq->flags = key + 1;
 
 	if (!sec->enabled) {
@@ -531,8 +531,8 @@ int ieee80211_wx_set_encodeext(struct ieee80211_device *ieee,
 	int i, idx, ret = 0;
 	int group_key = 0;
 	const char *alg, *module;
-	struct ieee80211_crypto_ops *ops;
-	struct ieee80211_crypt_data **crypt;
+	struct lib80211_crypto_ops *ops;
+	struct lib80211_crypt_data **crypt;
 
 	struct ieee80211_security sec = {
 		.flags = 0,
@@ -544,17 +544,17 @@ int ieee80211_wx_set_encodeext(struct ieee80211_device *ieee,
 			return -EINVAL;
 		idx--;
 	} else
-		idx = ieee->tx_keyidx;
+		idx = ieee->crypt_info.tx_keyidx;
 
 	if (ext->ext_flags & IW_ENCODE_EXT_GROUP_KEY) {
-		crypt = &ieee->crypt[idx];
+		crypt = &ieee->crypt_info.crypt[idx];
 		group_key = 1;
 	} else {
 		/* some Cisco APs use idx>0 for unicast in dynamic WEP */
 		if (idx != 0 && ext->alg != IW_ENCODE_ALG_WEP)
 			return -EINVAL;
 		if (ieee->iw_mode == IW_MODE_INFRA)
-			crypt = &ieee->crypt[idx];
+			crypt = &ieee->crypt_info.crypt[idx];
 		else
 			return -EINVAL;
 	}
@@ -563,10 +563,10 @@ int ieee80211_wx_set_encodeext(struct ieee80211_device *ieee,
 	if ((encoding->flags & IW_ENCODE_DISABLED) ||
 	    ext->alg == IW_ENCODE_ALG_NONE) {
 		if (*crypt)
-			ieee80211_crypt_delayed_deinit(ieee, crypt);
+			lib80211_crypt_delayed_deinit(&ieee->crypt_info, crypt);
 
 		for (i = 0; i < WEP_KEYS; i++)
-			if (ieee->crypt[i] != NULL)
+			if (ieee->crypt_info.crypt[i] != NULL)
 				break;
 
 		if (i == WEP_KEYS) {
@@ -589,15 +589,15 @@ int ieee80211_wx_set_encodeext(struct ieee80211_device *ieee,
 	switch (ext->alg) {
 	case IW_ENCODE_ALG_WEP:
 		alg = "WEP";
-		module = "ieee80211_crypt_wep";
+		module = "lib80211_crypt_wep";
 		break;
 	case IW_ENCODE_ALG_TKIP:
 		alg = "TKIP";
-		module = "ieee80211_crypt_tkip";
+		module = "lib80211_crypt_tkip";
 		break;
 	case IW_ENCODE_ALG_CCMP:
 		alg = "CCMP";
-		module = "ieee80211_crypt_ccmp";
+		module = "lib80211_crypt_ccmp";
 		break;
 	default:
 		IEEE80211_DEBUG_WX("%s: unknown crypto alg %d\n",
@@ -606,10 +606,10 @@ int ieee80211_wx_set_encodeext(struct ieee80211_device *ieee,
 		goto done;
 	}
 
-	ops = ieee80211_get_crypto_ops(alg);
+	ops = lib80211_get_crypto_ops(alg);
 	if (ops == NULL) {
 		request_module(module);
-		ops = ieee80211_get_crypto_ops(alg);
+		ops = lib80211_get_crypto_ops(alg);
 	}
 	if (ops == NULL) {
 		IEEE80211_DEBUG_WX("%s: unknown crypto alg %d\n",
@@ -619,9 +619,9 @@ int ieee80211_wx_set_encodeext(struct ieee80211_device *ieee,
 	}
 
 	if (*crypt == NULL || (*crypt)->ops != ops) {
-		struct ieee80211_crypt_data *new_crypt;
+		struct lib80211_crypt_data *new_crypt;
 
-		ieee80211_crypt_delayed_deinit(ieee, crypt);
+		lib80211_crypt_delayed_deinit(&ieee->crypt_info, crypt);
 
 		new_crypt = kzalloc(sizeof(*new_crypt), GFP_KERNEL);
 		if (new_crypt == NULL) {
@@ -649,7 +649,7 @@ int ieee80211_wx_set_encodeext(struct ieee80211_device *ieee,
 
       skip_host_crypt:
 	if (ext->ext_flags & IW_ENCODE_EXT_SET_TX_KEY) {
-		ieee->tx_keyidx = idx;
+		ieee->crypt_info.tx_keyidx = idx;
 		sec.active_key = idx;
 		sec.flags |= SEC_ACTIVE_KEY;
 	}
@@ -715,7 +715,7 @@ int ieee80211_wx_get_encodeext(struct ieee80211_device *ieee,
 			return -EINVAL;
 		idx--;
 	} else
-		idx = ieee->tx_keyidx;
+		idx = ieee->crypt_info.tx_keyidx;
 
 	if (!(ext->ext_flags & IW_ENCODE_EXT_GROUP_KEY) &&
 	    ext->alg != IW_ENCODE_ALG_WEP)
diff --git a/net/wireless/Kconfig b/net/wireless/Kconfig
index ae7f2262dfb5..f7c64dbe86cc 100644
--- a/net/wireless/Kconfig
+++ b/net/wireless/Kconfig
@@ -82,3 +82,12 @@ config LIB80211
 
 	  Drivers should select this themselves if needed.  Say Y if
 	  you want this built into your kernel.
+
+config LIB80211_CRYPT_WEP
+	tristate
+
+config LIB80211_CRYPT_CCMP
+	tristate
+
+config LIB80211_CRYPT_TKIP
+	tristate
diff --git a/net/wireless/Makefile b/net/wireless/Makefile
index d2d848d445f2..cc547edb111f 100644
--- a/net/wireless/Makefile
+++ b/net/wireless/Makefile
@@ -1,6 +1,9 @@
 obj-$(CONFIG_WIRELESS_EXT) += wext.o
 obj-$(CONFIG_CFG80211) += cfg80211.o
 obj-$(CONFIG_LIB80211) += lib80211.o
+obj-$(CONFIG_LIB80211_CRYPT_WEP) += lib80211_crypt_wep.o
+obj-$(CONFIG_LIB80211_CRYPT_CCMP) += lib80211_crypt_ccmp.o
+obj-$(CONFIG_LIB80211_CRYPT_TKIP) += lib80211_crypt_tkip.o
 
 cfg80211-y += core.o sysfs.o radiotap.o util.o reg.o
 cfg80211-$(CONFIG_NL80211) += nl80211.o
diff --git a/net/wireless/lib80211.c b/net/wireless/lib80211.c
index e71f7d085621..d681721f4acb 100644
--- a/net/wireless/lib80211.c
+++ b/net/wireless/lib80211.c
@@ -3,11 +3,23 @@
  *
  * Copyright(c) 2008 John W. Linville <linville@tuxdriver.com>
  *
+ * Portions copied from old ieee80211 component, w/ original copyright
+ * notices below:
+ *
+ * Host AP crypto routines
+ *
+ * Copyright (c) 2002-2003, Jouni Malinen <j@w1.fi>
+ * Portions Copyright (C) 2004, Intel Corporation <jketreno@linux.intel.com>
+ *
  */
 
 #include <linux/module.h>
 #include <linux/ctype.h>
 #include <linux/ieee80211.h>
+#include <linux/errno.h>
+#include <linux/init.h>
+#include <linux/slab.h>
+#include <linux/string.h>
 
 #include <net/lib80211.h>
 
@@ -19,6 +31,14 @@ MODULE_DESCRIPTION(DRV_DESCRIPTION);
 MODULE_AUTHOR("John W. Linville <linville@tuxdriver.com>");
 MODULE_LICENSE("GPL");
 
+struct lib80211_crypto_alg {
+	struct list_head list;
+	struct lib80211_crypto_ops *ops;
+};
+
+static LIST_HEAD(lib80211_crypto_algs);
+static DEFINE_SPINLOCK(lib80211_crypto_lock);
+
 const char *print_ssid(char *buf, const char *ssid, u8 ssid_len)
 {
 	const char *s = ssid;
@@ -51,15 +71,176 @@ const char *print_ssid(char *buf, const char *ssid, u8 ssid_len)
 }
 EXPORT_SYMBOL(print_ssid);
 
-static int __init ieee80211_init(void)
+void lib80211_crypt_deinit_entries(struct lib80211_crypt_info *info, int force)
 {
-	printk(KERN_INFO DRV_NAME ": " DRV_DESCRIPTION "\n");
+	struct lib80211_crypt_data *entry, *next;
+	unsigned long flags;
+
+	spin_lock_irqsave(info->lock, flags);
+	list_for_each_entry_safe(entry, next, &info->crypt_deinit_list, list) {
+		if (atomic_read(&entry->refcnt) != 0 && !force)
+			continue;
+
+		list_del(&entry->list);
+
+		if (entry->ops) {
+			entry->ops->deinit(entry->priv);
+			module_put(entry->ops->owner);
+		}
+		kfree(entry);
+	}
+	spin_unlock_irqrestore(info->lock, flags);
+}
+EXPORT_SYMBOL(lib80211_crypt_deinit_entries);
+
+/* After this, crypt_deinit_list won't accept new members */
+void lib80211_crypt_quiescing(struct lib80211_crypt_info *info)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(info->lock, flags);
+	info->crypt_quiesced = 1;
+	spin_unlock_irqrestore(info->lock, flags);
+}
+EXPORT_SYMBOL(lib80211_crypt_quiescing);
+
+void lib80211_crypt_deinit_handler(unsigned long data)
+{
+	struct lib80211_crypt_info *info = (struct lib80211_crypt_info *)data;
+	unsigned long flags;
+
+	lib80211_crypt_deinit_entries(info, 0);
+
+	spin_lock_irqsave(info->lock, flags);
+	if (!list_empty(&info->crypt_deinit_list) && !info->crypt_quiesced) {
+		printk(KERN_DEBUG "%s: entries remaining in delayed crypt "
+		       "deletion list\n", info->name);
+		info->crypt_deinit_timer.expires = jiffies + HZ;
+		add_timer(&info->crypt_deinit_timer);
+	}
+	spin_unlock_irqrestore(info->lock, flags);
+}
+EXPORT_SYMBOL(lib80211_crypt_deinit_handler);
+
+void lib80211_crypt_delayed_deinit(struct lib80211_crypt_info *info,
+				    struct lib80211_crypt_data **crypt)
+{
+	struct lib80211_crypt_data *tmp;
+	unsigned long flags;
+
+	if (*crypt == NULL)
+		return;
+
+	tmp = *crypt;
+	*crypt = NULL;
+
+	/* must not run ops->deinit() while there may be pending encrypt or
+	 * decrypt operations. Use a list of delayed deinits to avoid needing
+	 * locking. */
+
+	spin_lock_irqsave(info->lock, flags);
+	if (!info->crypt_quiesced) {
+		list_add(&tmp->list, &info->crypt_deinit_list);
+		if (!timer_pending(&info->crypt_deinit_timer)) {
+			info->crypt_deinit_timer.expires = jiffies + HZ;
+			add_timer(&info->crypt_deinit_timer);
+		}
+	}
+	spin_unlock_irqrestore(info->lock, flags);
+}
+EXPORT_SYMBOL(lib80211_crypt_delayed_deinit);
+
+int lib80211_register_crypto_ops(struct lib80211_crypto_ops *ops)
+{
+	unsigned long flags;
+	struct lib80211_crypto_alg *alg;
+
+	alg = kzalloc(sizeof(*alg), GFP_KERNEL);
+	if (alg == NULL)
+		return -ENOMEM;
+
+	alg->ops = ops;
+
+	spin_lock_irqsave(&lib80211_crypto_lock, flags);
+	list_add(&alg->list, &lib80211_crypto_algs);
+	spin_unlock_irqrestore(&lib80211_crypto_lock, flags);
+
+	printk(KERN_DEBUG "lib80211_crypt: registered algorithm '%s'\n",
+	       ops->name);
+
+	return 0;
+}
+EXPORT_SYMBOL(lib80211_register_crypto_ops);
+
+int lib80211_unregister_crypto_ops(struct lib80211_crypto_ops *ops)
+{
+	struct lib80211_crypto_alg *alg;
+	unsigned long flags;
+
+	spin_lock_irqsave(&lib80211_crypto_lock, flags);
+	list_for_each_entry(alg, &lib80211_crypto_algs, list) {
+		if (alg->ops == ops)
+			goto found;
+	}
+	spin_unlock_irqrestore(&lib80211_crypto_lock, flags);
+	return -EINVAL;
+
+      found:
+	printk(KERN_DEBUG "lib80211_crypt: unregistered algorithm "
+	       "'%s'\n", ops->name);
+	list_del(&alg->list);
+	spin_unlock_irqrestore(&lib80211_crypto_lock, flags);
+	kfree(alg);
 	return 0;
 }
+EXPORT_SYMBOL(lib80211_unregister_crypto_ops);
+
+struct lib80211_crypto_ops *lib80211_get_crypto_ops(const char *name)
+{
+	struct lib80211_crypto_alg *alg;
+	unsigned long flags;
+
+	spin_lock_irqsave(&lib80211_crypto_lock, flags);
+	list_for_each_entry(alg, &lib80211_crypto_algs, list) {
+		if (strcmp(alg->ops->name, name) == 0)
+			goto found;
+	}
+	spin_unlock_irqrestore(&lib80211_crypto_lock, flags);
+	return NULL;
+
+      found:
+	spin_unlock_irqrestore(&lib80211_crypto_lock, flags);
+	return alg->ops;
+}
+EXPORT_SYMBOL(lib80211_get_crypto_ops);
+
+static void *lib80211_crypt_null_init(int keyidx)
+{
+	return (void *)1;
+}
+
+static void lib80211_crypt_null_deinit(void *priv)
+{
+}
+
+static struct lib80211_crypto_ops lib80211_crypt_null = {
+	.name = "NULL",
+	.init = lib80211_crypt_null_init,
+	.deinit = lib80211_crypt_null_deinit,
+	.owner = THIS_MODULE,
+};
+
+static int __init lib80211_init(void)
+{
+	printk(KERN_INFO DRV_NAME ": " DRV_DESCRIPTION "\n");
+	return lib80211_register_crypto_ops(&lib80211_crypt_null);
+}
 
-static void __exit ieee80211_exit(void)
+static void __exit lib80211_exit(void)
 {
+	lib80211_unregister_crypto_ops(&lib80211_crypt_null);
+	BUG_ON(!list_empty(&lib80211_crypto_algs));
 }
 
-module_init(ieee80211_init);
-module_exit(ieee80211_exit);
+module_init(lib80211_init);
+module_exit(lib80211_exit);
diff --git a/net/wireless/lib80211_crypt_ccmp.c b/net/wireless/lib80211_crypt_ccmp.c
new file mode 100644
index 000000000000..db428194c16a
--- /dev/null
+++ b/net/wireless/lib80211_crypt_ccmp.c
@@ -0,0 +1,492 @@
+/*
+ * lib80211 crypt: host-based CCMP encryption implementation for lib80211
+ *
+ * Copyright (c) 2003-2004, Jouni Malinen <j@w1.fi>
+ * Copyright (c) 2008, John W. Linville <linville@tuxdriver.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation. See README and COPYING for
+ * more details.
+ */
+
+#include <linux/kernel.h>
+#include <linux/err.h>
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/slab.h>
+#include <linux/random.h>
+#include <linux/skbuff.h>
+#include <linux/netdevice.h>
+#include <linux/if_ether.h>
+#include <linux/if_arp.h>
+#include <asm/string.h>
+#include <linux/wireless.h>
+
+#include <linux/ieee80211.h>
+
+#include <linux/crypto.h>
+
+#include <net/lib80211.h>
+
+MODULE_AUTHOR("Jouni Malinen");
+MODULE_DESCRIPTION("Host AP crypt: CCMP");
+MODULE_LICENSE("GPL");
+
+#define AES_BLOCK_LEN 16
+#define CCMP_HDR_LEN 8
+#define CCMP_MIC_LEN 8
+#define CCMP_TK_LEN 16
+#define CCMP_PN_LEN 6
+
+struct lib80211_ccmp_data {
+	u8 key[CCMP_TK_LEN];
+	int key_set;
+
+	u8 tx_pn[CCMP_PN_LEN];
+	u8 rx_pn[CCMP_PN_LEN];
+
+	u32 dot11RSNAStatsCCMPFormatErrors;
+	u32 dot11RSNAStatsCCMPReplays;
+	u32 dot11RSNAStatsCCMPDecryptErrors;
+
+	int key_idx;
+
+	struct crypto_cipher *tfm;
+
+	/* scratch buffers for virt_to_page() (crypto API) */
+	u8 tx_b0[AES_BLOCK_LEN], tx_b[AES_BLOCK_LEN],
+	    tx_e[AES_BLOCK_LEN], tx_s0[AES_BLOCK_LEN];
+	u8 rx_b0[AES_BLOCK_LEN], rx_b[AES_BLOCK_LEN], rx_a[AES_BLOCK_LEN];
+};
+
+static inline void lib80211_ccmp_aes_encrypt(struct crypto_cipher *tfm,
+					      const u8 pt[16], u8 ct[16])
+{
+	crypto_cipher_encrypt_one(tfm, ct, pt);
+}
+
+static void *lib80211_ccmp_init(int key_idx)
+{
+	struct lib80211_ccmp_data *priv;
+
+	priv = kzalloc(sizeof(*priv), GFP_ATOMIC);
+	if (priv == NULL)
+		goto fail;
+	priv->key_idx = key_idx;
+
+	priv->tfm = crypto_alloc_cipher("aes", 0, CRYPTO_ALG_ASYNC);
+	if (IS_ERR(priv->tfm)) {
+		printk(KERN_DEBUG "lib80211_crypt_ccmp: could not allocate "
+		       "crypto API aes\n");
+		priv->tfm = NULL;
+		goto fail;
+	}
+
+	return priv;
+
+      fail:
+	if (priv) {
+		if (priv->tfm)
+			crypto_free_cipher(priv->tfm);
+		kfree(priv);
+	}
+
+	return NULL;
+}
+
+static void lib80211_ccmp_deinit(void *priv)
+{
+	struct lib80211_ccmp_data *_priv = priv;
+	if (_priv && _priv->tfm)
+		crypto_free_cipher(_priv->tfm);
+	kfree(priv);
+}
+
+static inline void xor_block(u8 * b, u8 * a, size_t len)
+{
+	int i;
+	for (i = 0; i < len; i++)
+		b[i] ^= a[i];
+}
+
+static void ccmp_init_blocks(struct crypto_cipher *tfm,
+			     struct ieee80211_hdr *hdr,
+			     u8 * pn, size_t dlen, u8 * b0, u8 * auth, u8 * s0)
+{
+	u8 *pos, qc = 0;
+	size_t aad_len;
+	int a4_included, qc_included;
+	u8 aad[2 * AES_BLOCK_LEN];
+
+	a4_included = ieee80211_has_a4(hdr->frame_control);
+	qc_included = ieee80211_is_data_qos(hdr->frame_control);
+
+	aad_len = 22;
+	if (a4_included)
+		aad_len += 6;
+	if (qc_included) {
+		pos = (u8 *) & hdr->addr4;
+		if (a4_included)
+			pos += 6;
+		qc = *pos & 0x0f;
+		aad_len += 2;
+	}
+
+	/* CCM Initial Block:
+	 * Flag (Include authentication header, M=3 (8-octet MIC),
+	 *       L=1 (2-octet Dlen))
+	 * Nonce: 0x00 | A2 | PN
+	 * Dlen */
+	b0[0] = 0x59;
+	b0[1] = qc;
+	memcpy(b0 + 2, hdr->addr2, ETH_ALEN);
+	memcpy(b0 + 8, pn, CCMP_PN_LEN);
+	b0[14] = (dlen >> 8) & 0xff;
+	b0[15] = dlen & 0xff;
+
+	/* AAD:
+	 * FC with bits 4..6 and 11..13 masked to zero; 14 is always one
+	 * A1 | A2 | A3
+	 * SC with bits 4..15 (seq#) masked to zero
+	 * A4 (if present)
+	 * QC (if present)
+	 */
+	pos = (u8 *) hdr;
+	aad[0] = 0;		/* aad_len >> 8 */
+	aad[1] = aad_len & 0xff;
+	aad[2] = pos[0] & 0x8f;
+	aad[3] = pos[1] & 0xc7;
+	memcpy(aad + 4, hdr->addr1, 3 * ETH_ALEN);
+	pos = (u8 *) & hdr->seq_ctrl;
+	aad[22] = pos[0] & 0x0f;
+	aad[23] = 0;		/* all bits masked */
+	memset(aad + 24, 0, 8);
+	if (a4_included)
+		memcpy(aad + 24, hdr->addr4, ETH_ALEN);
+	if (qc_included) {
+		aad[a4_included ? 30 : 24] = qc;
+		/* rest of QC masked */
+	}
+
+	/* Start with the first block and AAD */
+	lib80211_ccmp_aes_encrypt(tfm, b0, auth);
+	xor_block(auth, aad, AES_BLOCK_LEN);
+	lib80211_ccmp_aes_encrypt(tfm, auth, auth);
+	xor_block(auth, &aad[AES_BLOCK_LEN], AES_BLOCK_LEN);
+	lib80211_ccmp_aes_encrypt(tfm, auth, auth);
+	b0[0] &= 0x07;
+	b0[14] = b0[15] = 0;
+	lib80211_ccmp_aes_encrypt(tfm, b0, s0);
+}
+
+static int lib80211_ccmp_hdr(struct sk_buff *skb, int hdr_len,
+			      u8 *aeskey, int keylen, void *priv)
+{
+	struct lib80211_ccmp_data *key = priv;
+	int i;
+	u8 *pos;
+
+	if (skb_headroom(skb) < CCMP_HDR_LEN || skb->len < hdr_len)
+		return -1;
+
+	if (aeskey != NULL && keylen >= CCMP_TK_LEN)
+		memcpy(aeskey, key->key, CCMP_TK_LEN);
+
+	pos = skb_push(skb, CCMP_HDR_LEN);
+	memmove(pos, pos + CCMP_HDR_LEN, hdr_len);
+	pos += hdr_len;
+
+	i = CCMP_PN_LEN - 1;
+	while (i >= 0) {
+		key->tx_pn[i]++;
+		if (key->tx_pn[i] != 0)
+			break;
+		i--;
+	}
+
+	*pos++ = key->tx_pn[5];
+	*pos++ = key->tx_pn[4];
+	*pos++ = 0;
+	*pos++ = (key->key_idx << 6) | (1 << 5) /* Ext IV included */ ;
+	*pos++ = key->tx_pn[3];
+	*pos++ = key->tx_pn[2];
+	*pos++ = key->tx_pn[1];
+	*pos++ = key->tx_pn[0];
+
+	return CCMP_HDR_LEN;
+}
+
+static int lib80211_ccmp_encrypt(struct sk_buff *skb, int hdr_len, void *priv)
+{
+	struct lib80211_ccmp_data *key = priv;
+	int data_len, i, blocks, last, len;
+	u8 *pos, *mic;
+	struct ieee80211_hdr *hdr;
+	u8 *b0 = key->tx_b0;
+	u8 *b = key->tx_b;
+	u8 *e = key->tx_e;
+	u8 *s0 = key->tx_s0;
+
+	if (skb_tailroom(skb) < CCMP_MIC_LEN || skb->len < hdr_len)
+		return -1;
+
+	data_len = skb->len - hdr_len;
+	len = lib80211_ccmp_hdr(skb, hdr_len, NULL, 0, priv);
+	if (len < 0)
+		return -1;
+
+	pos = skb->data + hdr_len + CCMP_HDR_LEN;
+	mic = skb_put(skb, CCMP_MIC_LEN);
+	hdr = (struct ieee80211_hdr *)skb->data;
+	ccmp_init_blocks(key->tfm, hdr, key->tx_pn, data_len, b0, b, s0);
+
+	blocks = DIV_ROUND_UP(data_len, AES_BLOCK_LEN);
+	last = data_len % AES_BLOCK_LEN;
+
+	for (i = 1; i <= blocks; i++) {
+		len = (i == blocks && last) ? last : AES_BLOCK_LEN;
+		/* Authentication */
+		xor_block(b, pos, len);
+		lib80211_ccmp_aes_encrypt(key->tfm, b, b);
+		/* Encryption, with counter */
+		b0[14] = (i >> 8) & 0xff;
+		b0[15] = i & 0xff;
+		lib80211_ccmp_aes_encrypt(key->tfm, b0, e);
+		xor_block(pos, e, len);
+		pos += len;
+	}
+
+	for (i = 0; i < CCMP_MIC_LEN; i++)
+		mic[i] = b[i] ^ s0[i];
+
+	return 0;
+}
+
+/*
+ * deal with seq counter wrapping correctly.
+ * refer to timer_after() for jiffies wrapping handling
+ */
+static inline int ccmp_replay_check(u8 *pn_n, u8 *pn_o)
+{
+	u32 iv32_n, iv16_n;
+	u32 iv32_o, iv16_o;
+
+	iv32_n = (pn_n[0] << 24) | (pn_n[1] << 16) | (pn_n[2] << 8) | pn_n[3];
+	iv16_n = (pn_n[4] << 8) | pn_n[5];
+
+	iv32_o = (pn_o[0] << 24) | (pn_o[1] << 16) | (pn_o[2] << 8) | pn_o[3];
+	iv16_o = (pn_o[4] << 8) | pn_o[5];
+
+	if ((s32)iv32_n - (s32)iv32_o < 0 ||
+	    (iv32_n == iv32_o && iv16_n <= iv16_o))
+		return 1;
+	return 0;
+}
+
+static int lib80211_ccmp_decrypt(struct sk_buff *skb, int hdr_len, void *priv)
+{
+	struct lib80211_ccmp_data *key = priv;
+	u8 keyidx, *pos;
+	struct ieee80211_hdr *hdr;
+	u8 *b0 = key->rx_b0;
+	u8 *b = key->rx_b;
+	u8 *a = key->rx_a;
+	u8 pn[6];
+	int i, blocks, last, len;
+	size_t data_len = skb->len - hdr_len - CCMP_HDR_LEN - CCMP_MIC_LEN;
+	u8 *mic = skb->data + skb->len - CCMP_MIC_LEN;
+
+	if (skb->len < hdr_len + CCMP_HDR_LEN + CCMP_MIC_LEN) {
+		key->dot11RSNAStatsCCMPFormatErrors++;
+		return -1;
+	}
+
+	hdr = (struct ieee80211_hdr *)skb->data;
+	pos = skb->data + hdr_len;
+	keyidx = pos[3];
+	if (!(keyidx & (1 << 5))) {
+		if (net_ratelimit()) {
+			printk(KERN_DEBUG "CCMP: received packet without ExtIV"
+			       " flag from %pM\n", hdr->addr2);
+		}
+		key->dot11RSNAStatsCCMPFormatErrors++;
+		return -2;
+	}
+	keyidx >>= 6;
+	if (key->key_idx != keyidx) {
+		printk(KERN_DEBUG "CCMP: RX tkey->key_idx=%d frame "
+		       "keyidx=%d priv=%p\n", key->key_idx, keyidx, priv);
+		return -6;
+	}
+	if (!key->key_set) {
+		if (net_ratelimit()) {
+			printk(KERN_DEBUG "CCMP: received packet from %pM"
+			       " with keyid=%d that does not have a configured"
+			       " key\n", hdr->addr2, keyidx);
+		}
+		return -3;
+	}
+
+	pn[0] = pos[7];
+	pn[1] = pos[6];
+	pn[2] = pos[5];
+	pn[3] = pos[4];
+	pn[4] = pos[1];
+	pn[5] = pos[0];
+	pos += 8;
+
+	if (ccmp_replay_check(pn, key->rx_pn)) {
+		if (net_ratelimit()) {
+			printk(KERN_DEBUG "CCMP: replay detected: STA=%pM "
+				 "previous PN %02x%02x%02x%02x%02x%02x "
+				 "received PN %02x%02x%02x%02x%02x%02x\n",
+				 hdr->addr2,
+				 key->rx_pn[0], key->rx_pn[1], key->rx_pn[2],
+				 key->rx_pn[3], key->rx_pn[4], key->rx_pn[5],
+				 pn[0], pn[1], pn[2], pn[3], pn[4], pn[5]);
+		}
+		key->dot11RSNAStatsCCMPReplays++;
+		return -4;
+	}
+
+	ccmp_init_blocks(key->tfm, hdr, pn, data_len, b0, a, b);
+	xor_block(mic, b, CCMP_MIC_LEN);
+
+	blocks = DIV_ROUND_UP(data_len, AES_BLOCK_LEN);
+	last = data_len % AES_BLOCK_LEN;
+
+	for (i = 1; i <= blocks; i++) {
+		len = (i == blocks && last) ? last : AES_BLOCK_LEN;
+		/* Decrypt, with counter */
+		b0[14] = (i >> 8) & 0xff;
+		b0[15] = i & 0xff;
+		lib80211_ccmp_aes_encrypt(key->tfm, b0, b);
+		xor_block(pos, b, len);
+		/* Authentication */
+		xor_block(a, pos, len);
+		lib80211_ccmp_aes_encrypt(key->tfm, a, a);
+		pos += len;
+	}
+
+	if (memcmp(mic, a, CCMP_MIC_LEN) != 0) {
+		if (net_ratelimit()) {
+			printk(KERN_DEBUG "CCMP: decrypt failed: STA="
+			       "%pM\n", hdr->addr2);
+		}
+		key->dot11RSNAStatsCCMPDecryptErrors++;
+		return -5;
+	}
+
+	memcpy(key->rx_pn, pn, CCMP_PN_LEN);
+
+	/* Remove hdr and MIC */
+	memmove(skb->data + CCMP_HDR_LEN, skb->data, hdr_len);
+	skb_pull(skb, CCMP_HDR_LEN);
+	skb_trim(skb, skb->len - CCMP_MIC_LEN);
+
+	return keyidx;
+}
+
+static int lib80211_ccmp_set_key(void *key, int len, u8 * seq, void *priv)
+{
+	struct lib80211_ccmp_data *data = priv;
+	int keyidx;
+	struct crypto_cipher *tfm = data->tfm;
+
+	keyidx = data->key_idx;
+	memset(data, 0, sizeof(*data));
+	data->key_idx = keyidx;
+	data->tfm = tfm;
+	if (len == CCMP_TK_LEN) {
+		memcpy(data->key, key, CCMP_TK_LEN);
+		data->key_set = 1;
+		if (seq) {
+			data->rx_pn[0] = seq[5];
+			data->rx_pn[1] = seq[4];
+			data->rx_pn[2] = seq[3];
+			data->rx_pn[3] = seq[2];
+			data->rx_pn[4] = seq[1];
+			data->rx_pn[5] = seq[0];
+		}
+		crypto_cipher_setkey(data->tfm, data->key, CCMP_TK_LEN);
+	} else if (len == 0)
+		data->key_set = 0;
+	else
+		return -1;
+
+	return 0;
+}
+
+static int lib80211_ccmp_get_key(void *key, int len, u8 * seq, void *priv)
+{
+	struct lib80211_ccmp_data *data = priv;
+
+	if (len < CCMP_TK_LEN)
+		return -1;
+
+	if (!data->key_set)
+		return 0;
+	memcpy(key, data->key, CCMP_TK_LEN);
+
+	if (seq) {
+		seq[0] = data->tx_pn[5];
+		seq[1] = data->tx_pn[4];
+		seq[2] = data->tx_pn[3];
+		seq[3] = data->tx_pn[2];
+		seq[4] = data->tx_pn[1];
+		seq[5] = data->tx_pn[0];
+	}
+
+	return CCMP_TK_LEN;
+}
+
+static char *lib80211_ccmp_print_stats(char *p, void *priv)
+{
+	struct lib80211_ccmp_data *ccmp = priv;
+
+	p += sprintf(p, "key[%d] alg=CCMP key_set=%d "
+		     "tx_pn=%02x%02x%02x%02x%02x%02x "
+		     "rx_pn=%02x%02x%02x%02x%02x%02x "
+		     "format_errors=%d replays=%d decrypt_errors=%d\n",
+		     ccmp->key_idx, ccmp->key_set,
+		     ccmp->tx_pn[0], ccmp->tx_pn[1], ccmp->tx_pn[2],
+		     ccmp->tx_pn[3], ccmp->tx_pn[4], ccmp->tx_pn[5],
+		     ccmp->rx_pn[0], ccmp->rx_pn[1], ccmp->rx_pn[2],
+		     ccmp->rx_pn[3], ccmp->rx_pn[4], ccmp->rx_pn[5],
+		     ccmp->dot11RSNAStatsCCMPFormatErrors,
+		     ccmp->dot11RSNAStatsCCMPReplays,
+		     ccmp->dot11RSNAStatsCCMPDecryptErrors);
+
+	return p;
+}
+
+static struct lib80211_crypto_ops lib80211_crypt_ccmp = {
+	.name = "CCMP",
+	.init = lib80211_ccmp_init,
+	.deinit = lib80211_ccmp_deinit,
+	.build_iv = lib80211_ccmp_hdr,
+	.encrypt_mpdu = lib80211_ccmp_encrypt,
+	.decrypt_mpdu = lib80211_ccmp_decrypt,
+	.encrypt_msdu = NULL,
+	.decrypt_msdu = NULL,
+	.set_key = lib80211_ccmp_set_key,
+	.get_key = lib80211_ccmp_get_key,
+	.print_stats = lib80211_ccmp_print_stats,
+	.extra_mpdu_prefix_len = CCMP_HDR_LEN,
+	.extra_mpdu_postfix_len = CCMP_MIC_LEN,
+	.owner = THIS_MODULE,
+};
+
+static int __init lib80211_crypto_ccmp_init(void)
+{
+	return lib80211_register_crypto_ops(&lib80211_crypt_ccmp);
+}
+
+static void __exit lib80211_crypto_ccmp_exit(void)
+{
+	lib80211_unregister_crypto_ops(&lib80211_crypt_ccmp);
+}
+
+module_init(lib80211_crypto_ccmp_init);
+module_exit(lib80211_crypto_ccmp_exit);
diff --git a/net/wireless/lib80211_crypt_tkip.c b/net/wireless/lib80211_crypt_tkip.c
new file mode 100644
index 000000000000..7e8e22bfed90
--- /dev/null
+++ b/net/wireless/lib80211_crypt_tkip.c
@@ -0,0 +1,784 @@
+/*
+ * lib80211 crypt: host-based TKIP encryption implementation for lib80211
+ *
+ * Copyright (c) 2003-2004, Jouni Malinen <j@w1.fi>
+ * Copyright (c) 2008, John W. Linville <linville@tuxdriver.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation. See README and COPYING for
+ * more details.
+ */
+
+#include <linux/err.h>
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/slab.h>
+#include <linux/random.h>
+#include <linux/scatterlist.h>
+#include <linux/skbuff.h>
+#include <linux/netdevice.h>
+#include <linux/mm.h>
+#include <linux/if_ether.h>
+#include <linux/if_arp.h>
+#include <asm/string.h>
+
+#include <linux/wireless.h>
+#include <linux/ieee80211.h>
+#include <net/iw_handler.h>
+
+#include <linux/crypto.h>
+#include <linux/crc32.h>
+
+#include <net/lib80211.h>
+
+MODULE_AUTHOR("Jouni Malinen");
+MODULE_DESCRIPTION("lib80211 crypt: TKIP");
+MODULE_LICENSE("GPL");
+
+struct lib80211_tkip_data {
+#define TKIP_KEY_LEN 32
+	u8 key[TKIP_KEY_LEN];
+	int key_set;
+
+	u32 tx_iv32;
+	u16 tx_iv16;
+	u16 tx_ttak[5];
+	int tx_phase1_done;
+
+	u32 rx_iv32;
+	u16 rx_iv16;
+	u16 rx_ttak[5];
+	int rx_phase1_done;
+	u32 rx_iv32_new;
+	u16 rx_iv16_new;
+
+	u32 dot11RSNAStatsTKIPReplays;
+	u32 dot11RSNAStatsTKIPICVErrors;
+	u32 dot11RSNAStatsTKIPLocalMICFailures;
+
+	int key_idx;
+
+	struct crypto_blkcipher *rx_tfm_arc4;
+	struct crypto_hash *rx_tfm_michael;
+	struct crypto_blkcipher *tx_tfm_arc4;
+	struct crypto_hash *tx_tfm_michael;
+
+	/* scratch buffers for virt_to_page() (crypto API) */
+	u8 rx_hdr[16], tx_hdr[16];
+
+	unsigned long flags;
+};
+
+static unsigned long lib80211_tkip_set_flags(unsigned long flags, void *priv)
+{
+	struct lib80211_tkip_data *_priv = priv;
+	unsigned long old_flags = _priv->flags;
+	_priv->flags = flags;
+	return old_flags;
+}
+
+static unsigned long lib80211_tkip_get_flags(void *priv)
+{
+	struct lib80211_tkip_data *_priv = priv;
+	return _priv->flags;
+}
+
+static void *lib80211_tkip_init(int key_idx)
+{
+	struct lib80211_tkip_data *priv;
+
+	priv = kzalloc(sizeof(*priv), GFP_ATOMIC);
+	if (priv == NULL)
+		goto fail;
+
+	priv->key_idx = key_idx;
+
+	priv->tx_tfm_arc4 = crypto_alloc_blkcipher("ecb(arc4)", 0,
+						CRYPTO_ALG_ASYNC);
+	if (IS_ERR(priv->tx_tfm_arc4)) {
+		printk(KERN_DEBUG "lib80211_crypt_tkip: could not allocate "
+		       "crypto API arc4\n");
+		priv->tx_tfm_arc4 = NULL;
+		goto fail;
+	}
+
+	priv->tx_tfm_michael = crypto_alloc_hash("michael_mic", 0,
+						 CRYPTO_ALG_ASYNC);
+	if (IS_ERR(priv->tx_tfm_michael)) {
+		printk(KERN_DEBUG "lib80211_crypt_tkip: could not allocate "
+		       "crypto API michael_mic\n");
+		priv->tx_tfm_michael = NULL;
+		goto fail;
+	}
+
+	priv->rx_tfm_arc4 = crypto_alloc_blkcipher("ecb(arc4)", 0,
+						CRYPTO_ALG_ASYNC);
+	if (IS_ERR(priv->rx_tfm_arc4)) {
+		printk(KERN_DEBUG "lib80211_crypt_tkip: could not allocate "
+		       "crypto API arc4\n");
+		priv->rx_tfm_arc4 = NULL;
+		goto fail;
+	}
+
+	priv->rx_tfm_michael = crypto_alloc_hash("michael_mic", 0,
+						 CRYPTO_ALG_ASYNC);
+	if (IS_ERR(priv->rx_tfm_michael)) {
+		printk(KERN_DEBUG "lib80211_crypt_tkip: could not allocate "
+		       "crypto API michael_mic\n");
+		priv->rx_tfm_michael = NULL;
+		goto fail;
+	}
+
+	return priv;
+
+      fail:
+	if (priv) {
+		if (priv->tx_tfm_michael)
+			crypto_free_hash(priv->tx_tfm_michael);
+		if (priv->tx_tfm_arc4)
+			crypto_free_blkcipher(priv->tx_tfm_arc4);
+		if (priv->rx_tfm_michael)
+			crypto_free_hash(priv->rx_tfm_michael);
+		if (priv->rx_tfm_arc4)
+			crypto_free_blkcipher(priv->rx_tfm_arc4);
+		kfree(priv);
+	}
+
+	return NULL;
+}
+
+static void lib80211_tkip_deinit(void *priv)
+{
+	struct lib80211_tkip_data *_priv = priv;
+	if (_priv) {
+		if (_priv->tx_tfm_michael)
+			crypto_free_hash(_priv->tx_tfm_michael);
+		if (_priv->tx_tfm_arc4)
+			crypto_free_blkcipher(_priv->tx_tfm_arc4);
+		if (_priv->rx_tfm_michael)
+			crypto_free_hash(_priv->rx_tfm_michael);
+		if (_priv->rx_tfm_arc4)
+			crypto_free_blkcipher(_priv->rx_tfm_arc4);
+	}
+	kfree(priv);
+}
+
+static inline u16 RotR1(u16 val)
+{
+	return (val >> 1) | (val << 15);
+}
+
+static inline u8 Lo8(u16 val)
+{
+	return val & 0xff;
+}
+
+static inline u8 Hi8(u16 val)
+{
+	return val >> 8;
+}
+
+static inline u16 Lo16(u32 val)
+{
+	return val & 0xffff;
+}
+
+static inline u16 Hi16(u32 val)
+{
+	return val >> 16;
+}
+
+static inline u16 Mk16(u8 hi, u8 lo)
+{
+	return lo | (((u16) hi) << 8);
+}
+
+static inline u16 Mk16_le(__le16 * v)
+{
+	return le16_to_cpu(*v);
+}
+
+static const u16 Sbox[256] = {
+	0xC6A5, 0xF884, 0xEE99, 0xF68D, 0xFF0D, 0xD6BD, 0xDEB1, 0x9154,
+	0x6050, 0x0203, 0xCEA9, 0x567D, 0xE719, 0xB562, 0x4DE6, 0xEC9A,
+	0x8F45, 0x1F9D, 0x8940, 0xFA87, 0xEF15, 0xB2EB, 0x8EC9, 0xFB0B,
+	0x41EC, 0xB367, 0x5FFD, 0x45EA, 0x23BF, 0x53F7, 0xE496, 0x9B5B,
+	0x75C2, 0xE11C, 0x3DAE, 0x4C6A, 0x6C5A, 0x7E41, 0xF502, 0x834F,
+	0x685C, 0x51F4, 0xD134, 0xF908, 0xE293, 0xAB73, 0x6253, 0x2A3F,
+	0x080C, 0x9552, 0x4665, 0x9D5E, 0x3028, 0x37A1, 0x0A0F, 0x2FB5,
+	0x0E09, 0x2436, 0x1B9B, 0xDF3D, 0xCD26, 0x4E69, 0x7FCD, 0xEA9F,
+	0x121B, 0x1D9E, 0x5874, 0x342E, 0x362D, 0xDCB2, 0xB4EE, 0x5BFB,
+	0xA4F6, 0x764D, 0xB761, 0x7DCE, 0x527B, 0xDD3E, 0x5E71, 0x1397,
+	0xA6F5, 0xB968, 0x0000, 0xC12C, 0x4060, 0xE31F, 0x79C8, 0xB6ED,
+	0xD4BE, 0x8D46, 0x67D9, 0x724B, 0x94DE, 0x98D4, 0xB0E8, 0x854A,
+	0xBB6B, 0xC52A, 0x4FE5, 0xED16, 0x86C5, 0x9AD7, 0x6655, 0x1194,
+	0x8ACF, 0xE910, 0x0406, 0xFE81, 0xA0F0, 0x7844, 0x25BA, 0x4BE3,
+	0xA2F3, 0x5DFE, 0x80C0, 0x058A, 0x3FAD, 0x21BC, 0x7048, 0xF104,
+	0x63DF, 0x77C1, 0xAF75, 0x4263, 0x2030, 0xE51A, 0xFD0E, 0xBF6D,
+	0x814C, 0x1814, 0x2635, 0xC32F, 0xBEE1, 0x35A2, 0x88CC, 0x2E39,
+	0x9357, 0x55F2, 0xFC82, 0x7A47, 0xC8AC, 0xBAE7, 0x322B, 0xE695,
+	0xC0A0, 0x1998, 0x9ED1, 0xA37F, 0x4466, 0x547E, 0x3BAB, 0x0B83,
+	0x8CCA, 0xC729, 0x6BD3, 0x283C, 0xA779, 0xBCE2, 0x161D, 0xAD76,
+	0xDB3B, 0x6456, 0x744E, 0x141E, 0x92DB, 0x0C0A, 0x486C, 0xB8E4,
+	0x9F5D, 0xBD6E, 0x43EF, 0xC4A6, 0x39A8, 0x31A4, 0xD337, 0xF28B,
+	0xD532, 0x8B43, 0x6E59, 0xDAB7, 0x018C, 0xB164, 0x9CD2, 0x49E0,
+	0xD8B4, 0xACFA, 0xF307, 0xCF25, 0xCAAF, 0xF48E, 0x47E9, 0x1018,
+	0x6FD5, 0xF088, 0x4A6F, 0x5C72, 0x3824, 0x57F1, 0x73C7, 0x9751,
+	0xCB23, 0xA17C, 0xE89C, 0x3E21, 0x96DD, 0x61DC, 0x0D86, 0x0F85,
+	0xE090, 0x7C42, 0x71C4, 0xCCAA, 0x90D8, 0x0605, 0xF701, 0x1C12,
+	0xC2A3, 0x6A5F, 0xAEF9, 0x69D0, 0x1791, 0x9958, 0x3A27, 0x27B9,
+	0xD938, 0xEB13, 0x2BB3, 0x2233, 0xD2BB, 0xA970, 0x0789, 0x33A7,
+	0x2DB6, 0x3C22, 0x1592, 0xC920, 0x8749, 0xAAFF, 0x5078, 0xA57A,
+	0x038F, 0x59F8, 0x0980, 0x1A17, 0x65DA, 0xD731, 0x84C6, 0xD0B8,
+	0x82C3, 0x29B0, 0x5A77, 0x1E11, 0x7BCB, 0xA8FC, 0x6DD6, 0x2C3A,
+};
+
+static inline u16 _S_(u16 v)
+{
+	u16 t = Sbox[Hi8(v)];
+	return Sbox[Lo8(v)] ^ ((t << 8) | (t >> 8));
+}
+
+#define PHASE1_LOOP_COUNT 8
+
+static void tkip_mixing_phase1(u16 * TTAK, const u8 * TK, const u8 * TA,
+			       u32 IV32)
+{
+	int i, j;
+
+	/* Initialize the 80-bit TTAK from TSC (IV32) and TA[0..5] */
+	TTAK[0] = Lo16(IV32);
+	TTAK[1] = Hi16(IV32);
+	TTAK[2] = Mk16(TA[1], TA[0]);
+	TTAK[3] = Mk16(TA[3], TA[2]);
+	TTAK[4] = Mk16(TA[5], TA[4]);
+
+	for (i = 0; i < PHASE1_LOOP_COUNT; i++) {
+		j = 2 * (i & 1);
+		TTAK[0] += _S_(TTAK[4] ^ Mk16(TK[1 + j], TK[0 + j]));
+		TTAK[1] += _S_(TTAK[0] ^ Mk16(TK[5 + j], TK[4 + j]));
+		TTAK[2] += _S_(TTAK[1] ^ Mk16(TK[9 + j], TK[8 + j]));
+		TTAK[3] += _S_(TTAK[2] ^ Mk16(TK[13 + j], TK[12 + j]));
+		TTAK[4] += _S_(TTAK[3] ^ Mk16(TK[1 + j], TK[0 + j])) + i;
+	}
+}
+
+static void tkip_mixing_phase2(u8 * WEPSeed, const u8 * TK, const u16 * TTAK,
+			       u16 IV16)
+{
+	/* Make temporary area overlap WEP seed so that the final copy can be
+	 * avoided on little endian hosts. */
+	u16 *PPK = (u16 *) & WEPSeed[4];
+
+	/* Step 1 - make copy of TTAK and bring in TSC */
+	PPK[0] = TTAK[0];
+	PPK[1] = TTAK[1];
+	PPK[2] = TTAK[2];
+	PPK[3] = TTAK[3];
+	PPK[4] = TTAK[4];
+	PPK[5] = TTAK[4] + IV16;
+
+	/* Step 2 - 96-bit bijective mixing using S-box */
+	PPK[0] += _S_(PPK[5] ^ Mk16_le((__le16 *) & TK[0]));
+	PPK[1] += _S_(PPK[0] ^ Mk16_le((__le16 *) & TK[2]));
+	PPK[2] += _S_(PPK[1] ^ Mk16_le((__le16 *) & TK[4]));
+	PPK[3] += _S_(PPK[2] ^ Mk16_le((__le16 *) & TK[6]));
+	PPK[4] += _S_(PPK[3] ^ Mk16_le((__le16 *) & TK[8]));
+	PPK[5] += _S_(PPK[4] ^ Mk16_le((__le16 *) & TK[10]));
+
+	PPK[0] += RotR1(PPK[5] ^ Mk16_le((__le16 *) & TK[12]));
+	PPK[1] += RotR1(PPK[0] ^ Mk16_le((__le16 *) & TK[14]));
+	PPK[2] += RotR1(PPK[1]);
+	PPK[3] += RotR1(PPK[2]);
+	PPK[4] += RotR1(PPK[3]);
+	PPK[5] += RotR1(PPK[4]);
+
+	/* Step 3 - bring in last of TK bits, assign 24-bit WEP IV value
+	 * WEPSeed[0..2] is transmitted as WEP IV */
+	WEPSeed[0] = Hi8(IV16);
+	WEPSeed[1] = (Hi8(IV16) | 0x20) & 0x7F;
+	WEPSeed[2] = Lo8(IV16);
+	WEPSeed[3] = Lo8((PPK[5] ^ Mk16_le((__le16 *) & TK[0])) >> 1);
+
+#ifdef __BIG_ENDIAN
+	{
+		int i;
+		for (i = 0; i < 6; i++)
+			PPK[i] = (PPK[i] << 8) | (PPK[i] >> 8);
+	}
+#endif
+}
+
+static int lib80211_tkip_hdr(struct sk_buff *skb, int hdr_len,
+			      u8 * rc4key, int keylen, void *priv)
+{
+	struct lib80211_tkip_data *tkey = priv;
+	int len;
+	u8 *pos;
+	struct ieee80211_hdr *hdr;
+
+	hdr = (struct ieee80211_hdr *)skb->data;
+
+	if (skb_headroom(skb) < 8 || skb->len < hdr_len)
+		return -1;
+
+	if (rc4key == NULL || keylen < 16)
+		return -1;
+
+	if (!tkey->tx_phase1_done) {
+		tkip_mixing_phase1(tkey->tx_ttak, tkey->key, hdr->addr2,
+				   tkey->tx_iv32);
+		tkey->tx_phase1_done = 1;
+	}
+	tkip_mixing_phase2(rc4key, tkey->key, tkey->tx_ttak, tkey->tx_iv16);
+
+	len = skb->len - hdr_len;
+	pos = skb_push(skb, 8);
+	memmove(pos, pos + 8, hdr_len);
+	pos += hdr_len;
+
+	*pos++ = *rc4key;
+	*pos++ = *(rc4key + 1);
+	*pos++ = *(rc4key + 2);
+	*pos++ = (tkey->key_idx << 6) | (1 << 5) /* Ext IV included */ ;
+	*pos++ = tkey->tx_iv32 & 0xff;
+	*pos++ = (tkey->tx_iv32 >> 8) & 0xff;
+	*pos++ = (tkey->tx_iv32 >> 16) & 0xff;
+	*pos++ = (tkey->tx_iv32 >> 24) & 0xff;
+
+	tkey->tx_iv16++;
+	if (tkey->tx_iv16 == 0) {
+		tkey->tx_phase1_done = 0;
+		tkey->tx_iv32++;
+	}
+
+	return 8;
+}
+
+static int lib80211_tkip_encrypt(struct sk_buff *skb, int hdr_len, void *priv)
+{
+	struct lib80211_tkip_data *tkey = priv;
+	struct blkcipher_desc desc = { .tfm = tkey->tx_tfm_arc4 };
+	int len;
+	u8 rc4key[16], *pos, *icv;
+	u32 crc;
+	struct scatterlist sg;
+
+	if (tkey->flags & IEEE80211_CRYPTO_TKIP_COUNTERMEASURES) {
+		if (net_ratelimit()) {
+			struct ieee80211_hdr *hdr =
+			    (struct ieee80211_hdr *)skb->data;
+			printk(KERN_DEBUG ": TKIP countermeasures: dropped "
+			       "TX packet to %pM\n", hdr->addr1);
+		}
+		return -1;
+	}
+
+	if (skb_tailroom(skb) < 4 || skb->len < hdr_len)
+		return -1;
+
+	len = skb->len - hdr_len;
+	pos = skb->data + hdr_len;
+
+	if ((lib80211_tkip_hdr(skb, hdr_len, rc4key, 16, priv)) < 0)
+		return -1;
+
+	icv = skb_put(skb, 4);
+
+	crc = ~crc32_le(~0, pos, len);
+	icv[0] = crc;
+	icv[1] = crc >> 8;
+	icv[2] = crc >> 16;
+	icv[3] = crc >> 24;
+
+	crypto_blkcipher_setkey(tkey->tx_tfm_arc4, rc4key, 16);
+	sg_init_one(&sg, pos, len + 4);
+	return crypto_blkcipher_encrypt(&desc, &sg, &sg, len + 4);
+}
+
+/*
+ * deal with seq counter wrapping correctly.
+ * refer to timer_after() for jiffies wrapping handling
+ */
+static inline int tkip_replay_check(u32 iv32_n, u16 iv16_n,
+				    u32 iv32_o, u16 iv16_o)
+{
+	if ((s32)iv32_n - (s32)iv32_o < 0 ||
+	    (iv32_n == iv32_o && iv16_n <= iv16_o))
+		return 1;
+	return 0;
+}
+
+static int lib80211_tkip_decrypt(struct sk_buff *skb, int hdr_len, void *priv)
+{
+	struct lib80211_tkip_data *tkey = priv;
+	struct blkcipher_desc desc = { .tfm = tkey->rx_tfm_arc4 };
+	u8 rc4key[16];
+	u8 keyidx, *pos;
+	u32 iv32;
+	u16 iv16;
+	struct ieee80211_hdr *hdr;
+	u8 icv[4];
+	u32 crc;
+	struct scatterlist sg;
+	int plen;
+
+	hdr = (struct ieee80211_hdr *)skb->data;
+
+	if (tkey->flags & IEEE80211_CRYPTO_TKIP_COUNTERMEASURES) {
+		if (net_ratelimit()) {
+			printk(KERN_DEBUG ": TKIP countermeasures: dropped "
+			       "received packet from %pM\n", hdr->addr2);
+		}
+		return -1;
+	}
+
+	if (skb->len < hdr_len + 8 + 4)
+		return -1;
+
+	pos = skb->data + hdr_len;
+	keyidx = pos[3];
+	if (!(keyidx & (1 << 5))) {
+		if (net_ratelimit()) {
+			printk(KERN_DEBUG "TKIP: received packet without ExtIV"
+			       " flag from %pM\n", hdr->addr2);
+		}
+		return -2;
+	}
+	keyidx >>= 6;
+	if (tkey->key_idx != keyidx) {
+		printk(KERN_DEBUG "TKIP: RX tkey->key_idx=%d frame "
+		       "keyidx=%d priv=%p\n", tkey->key_idx, keyidx, priv);
+		return -6;
+	}
+	if (!tkey->key_set) {
+		if (net_ratelimit()) {
+			printk(KERN_DEBUG "TKIP: received packet from %pM"
+			       " with keyid=%d that does not have a configured"
+			       " key\n", hdr->addr2, keyidx);
+		}
+		return -3;
+	}
+	iv16 = (pos[0] << 8) | pos[2];
+	iv32 = pos[4] | (pos[5] << 8) | (pos[6] << 16) | (pos[7] << 24);
+	pos += 8;
+
+	if (tkip_replay_check(iv32, iv16, tkey->rx_iv32, tkey->rx_iv16)) {
+		if (net_ratelimit()) {
+			printk(KERN_DEBUG "TKIP: replay detected: STA=%pM"
+			       " previous TSC %08x%04x received TSC "
+			       "%08x%04x\n", hdr->addr2,
+			       tkey->rx_iv32, tkey->rx_iv16, iv32, iv16);
+		}
+		tkey->dot11RSNAStatsTKIPReplays++;
+		return -4;
+	}
+
+	if (iv32 != tkey->rx_iv32 || !tkey->rx_phase1_done) {
+		tkip_mixing_phase1(tkey->rx_ttak, tkey->key, hdr->addr2, iv32);
+		tkey->rx_phase1_done = 1;
+	}
+	tkip_mixing_phase2(rc4key, tkey->key, tkey->rx_ttak, iv16);
+
+	plen = skb->len - hdr_len - 12;
+
+	crypto_blkcipher_setkey(tkey->rx_tfm_arc4, rc4key, 16);
+	sg_init_one(&sg, pos, plen + 4);
+	if (crypto_blkcipher_decrypt(&desc, &sg, &sg, plen + 4)) {
+		if (net_ratelimit()) {
+			printk(KERN_DEBUG ": TKIP: failed to decrypt "
+			       "received packet from %pM\n",
+			       hdr->addr2);
+		}
+		return -7;
+	}
+
+	crc = ~crc32_le(~0, pos, plen);
+	icv[0] = crc;
+	icv[1] = crc >> 8;
+	icv[2] = crc >> 16;
+	icv[3] = crc >> 24;
+	if (memcmp(icv, pos + plen, 4) != 0) {
+		if (iv32 != tkey->rx_iv32) {
+			/* Previously cached Phase1 result was already lost, so
+			 * it needs to be recalculated for the next packet. */
+			tkey->rx_phase1_done = 0;
+		}
+		if (net_ratelimit()) {
+			printk(KERN_DEBUG "TKIP: ICV error detected: STA="
+			       "%pM\n", hdr->addr2);
+		}
+		tkey->dot11RSNAStatsTKIPICVErrors++;
+		return -5;
+	}
+
+	/* Update real counters only after Michael MIC verification has
+	 * completed */
+	tkey->rx_iv32_new = iv32;
+	tkey->rx_iv16_new = iv16;
+
+	/* Remove IV and ICV */
+	memmove(skb->data + 8, skb->data, hdr_len);
+	skb_pull(skb, 8);
+	skb_trim(skb, skb->len - 4);
+
+	return keyidx;
+}
+
+static int michael_mic(struct crypto_hash *tfm_michael, u8 * key, u8 * hdr,
+		       u8 * data, size_t data_len, u8 * mic)
+{
+	struct hash_desc desc;
+	struct scatterlist sg[2];
+
+	if (tfm_michael == NULL) {
+		printk(KERN_WARNING "michael_mic: tfm_michael == NULL\n");
+		return -1;
+	}
+	sg_init_table(sg, 2);
+	sg_set_buf(&sg[0], hdr, 16);
+	sg_set_buf(&sg[1], data, data_len);
+
+	if (crypto_hash_setkey(tfm_michael, key, 8))
+		return -1;
+
+	desc.tfm = tfm_michael;
+	desc.flags = 0;
+	return crypto_hash_digest(&desc, sg, data_len + 16, mic);
+}
+
+static void michael_mic_hdr(struct sk_buff *skb, u8 * hdr)
+{
+	struct ieee80211_hdr *hdr11;
+
+	hdr11 = (struct ieee80211_hdr *)skb->data;
+
+	switch (le16_to_cpu(hdr11->frame_control) &
+		(IEEE80211_FCTL_FROMDS | IEEE80211_FCTL_TODS)) {
+	case IEEE80211_FCTL_TODS:
+		memcpy(hdr, hdr11->addr3, ETH_ALEN);	/* DA */
+		memcpy(hdr + ETH_ALEN, hdr11->addr2, ETH_ALEN);	/* SA */
+		break;
+	case IEEE80211_FCTL_FROMDS:
+		memcpy(hdr, hdr11->addr1, ETH_ALEN);	/* DA */
+		memcpy(hdr + ETH_ALEN, hdr11->addr3, ETH_ALEN);	/* SA */
+		break;
+	case IEEE80211_FCTL_FROMDS | IEEE80211_FCTL_TODS:
+		memcpy(hdr, hdr11->addr3, ETH_ALEN);	/* DA */
+		memcpy(hdr + ETH_ALEN, hdr11->addr4, ETH_ALEN);	/* SA */
+		break;
+	case 0:
+		memcpy(hdr, hdr11->addr1, ETH_ALEN);	/* DA */
+		memcpy(hdr + ETH_ALEN, hdr11->addr2, ETH_ALEN);	/* SA */
+		break;
+	}
+
+	if (ieee80211_is_data_qos(hdr11->frame_control)) {
+		hdr[12] = le16_to_cpu(*ieee80211_get_qos_ctl(hdr11))
+			& IEEE80211_QOS_CTL_TID_MASK;
+	} else
+		hdr[12] = 0;		/* priority */
+
+	hdr[13] = hdr[14] = hdr[15] = 0;	/* reserved */
+}
+
+static int lib80211_michael_mic_add(struct sk_buff *skb, int hdr_len,
+				     void *priv)
+{
+	struct lib80211_tkip_data *tkey = priv;
+	u8 *pos;
+
+	if (skb_tailroom(skb) < 8 || skb->len < hdr_len) {
+		printk(KERN_DEBUG "Invalid packet for Michael MIC add "
+		       "(tailroom=%d hdr_len=%d skb->len=%d)\n",
+		       skb_tailroom(skb), hdr_len, skb->len);
+		return -1;
+	}
+
+	michael_mic_hdr(skb, tkey->tx_hdr);
+	pos = skb_put(skb, 8);
+	if (michael_mic(tkey->tx_tfm_michael, &tkey->key[16], tkey->tx_hdr,
+			skb->data + hdr_len, skb->len - 8 - hdr_len, pos))
+		return -1;
+
+	return 0;
+}
+
+static void lib80211_michael_mic_failure(struct net_device *dev,
+					  struct ieee80211_hdr *hdr,
+					  int keyidx)
+{
+	union iwreq_data wrqu;
+	struct iw_michaelmicfailure ev;
+
+	/* TODO: needed parameters: count, keyid, key type, TSC */
+	memset(&ev, 0, sizeof(ev));
+	ev.flags = keyidx & IW_MICFAILURE_KEY_ID;
+	if (hdr->addr1[0] & 0x01)
+		ev.flags |= IW_MICFAILURE_GROUP;
+	else
+		ev.flags |= IW_MICFAILURE_PAIRWISE;
+	ev.src_addr.sa_family = ARPHRD_ETHER;
+	memcpy(ev.src_addr.sa_data, hdr->addr2, ETH_ALEN);
+	memset(&wrqu, 0, sizeof(wrqu));
+	wrqu.data.length = sizeof(ev);
+	wireless_send_event(dev, IWEVMICHAELMICFAILURE, &wrqu, (char *)&ev);
+}
+
+static int lib80211_michael_mic_verify(struct sk_buff *skb, int keyidx,
+					int hdr_len, void *priv)
+{
+	struct lib80211_tkip_data *tkey = priv;
+	u8 mic[8];
+
+	if (!tkey->key_set)
+		return -1;
+
+	michael_mic_hdr(skb, tkey->rx_hdr);
+	if (michael_mic(tkey->rx_tfm_michael, &tkey->key[24], tkey->rx_hdr,
+			skb->data + hdr_len, skb->len - 8 - hdr_len, mic))
+		return -1;
+	if (memcmp(mic, skb->data + skb->len - 8, 8) != 0) {
+		struct ieee80211_hdr *hdr;
+		hdr = (struct ieee80211_hdr *)skb->data;
+		printk(KERN_DEBUG "%s: Michael MIC verification failed for "
+		       "MSDU from %pM keyidx=%d\n",
+		       skb->dev ? skb->dev->name : "N/A", hdr->addr2,
+		       keyidx);
+		if (skb->dev)
+			lib80211_michael_mic_failure(skb->dev, hdr, keyidx);
+		tkey->dot11RSNAStatsTKIPLocalMICFailures++;
+		return -1;
+	}
+
+	/* Update TSC counters for RX now that the packet verification has
+	 * completed. */
+	tkey->rx_iv32 = tkey->rx_iv32_new;
+	tkey->rx_iv16 = tkey->rx_iv16_new;
+
+	skb_trim(skb, skb->len - 8);
+
+	return 0;
+}
+
+static int lib80211_tkip_set_key(void *key, int len, u8 * seq, void *priv)
+{
+	struct lib80211_tkip_data *tkey = priv;
+	int keyidx;
+	struct crypto_hash *tfm = tkey->tx_tfm_michael;
+	struct crypto_blkcipher *tfm2 = tkey->tx_tfm_arc4;
+	struct crypto_hash *tfm3 = tkey->rx_tfm_michael;
+	struct crypto_blkcipher *tfm4 = tkey->rx_tfm_arc4;
+
+	keyidx = tkey->key_idx;
+	memset(tkey, 0, sizeof(*tkey));
+	tkey->key_idx = keyidx;
+	tkey->tx_tfm_michael = tfm;
+	tkey->tx_tfm_arc4 = tfm2;
+	tkey->rx_tfm_michael = tfm3;
+	tkey->rx_tfm_arc4 = tfm4;
+	if (len == TKIP_KEY_LEN) {
+		memcpy(tkey->key, key, TKIP_KEY_LEN);
+		tkey->key_set = 1;
+		tkey->tx_iv16 = 1;	/* TSC is initialized to 1 */
+		if (seq) {
+			tkey->rx_iv32 = (seq[5] << 24) | (seq[4] << 16) |
+			    (seq[3] << 8) | seq[2];
+			tkey->rx_iv16 = (seq[1] << 8) | seq[0];
+		}
+	} else if (len == 0)
+		tkey->key_set = 0;
+	else
+		return -1;
+
+	return 0;
+}
+
+static int lib80211_tkip_get_key(void *key, int len, u8 * seq, void *priv)
+{
+	struct lib80211_tkip_data *tkey = priv;
+
+	if (len < TKIP_KEY_LEN)
+		return -1;
+
+	if (!tkey->key_set)
+		return 0;
+	memcpy(key, tkey->key, TKIP_KEY_LEN);
+
+	if (seq) {
+		/* Return the sequence number of the last transmitted frame. */
+		u16 iv16 = tkey->tx_iv16;
+		u32 iv32 = tkey->tx_iv32;
+		if (iv16 == 0)
+			iv32--;
+		iv16--;
+		seq[0] = tkey->tx_iv16;
+		seq[1] = tkey->tx_iv16 >> 8;
+		seq[2] = tkey->tx_iv32;
+		seq[3] = tkey->tx_iv32 >> 8;
+		seq[4] = tkey->tx_iv32 >> 16;
+		seq[5] = tkey->tx_iv32 >> 24;
+	}
+
+	return TKIP_KEY_LEN;
+}
+
+static char *lib80211_tkip_print_stats(char *p, void *priv)
+{
+	struct lib80211_tkip_data *tkip = priv;
+	p += sprintf(p, "key[%d] alg=TKIP key_set=%d "
+		     "tx_pn=%02x%02x%02x%02x%02x%02x "
+		     "rx_pn=%02x%02x%02x%02x%02x%02x "
+		     "replays=%d icv_errors=%d local_mic_failures=%d\n",
+		     tkip->key_idx, tkip->key_set,
+		     (tkip->tx_iv32 >> 24) & 0xff,
+		     (tkip->tx_iv32 >> 16) & 0xff,
+		     (tkip->tx_iv32 >> 8) & 0xff,
+		     tkip->tx_iv32 & 0xff,
+		     (tkip->tx_iv16 >> 8) & 0xff,
+		     tkip->tx_iv16 & 0xff,
+		     (tkip->rx_iv32 >> 24) & 0xff,
+		     (tkip->rx_iv32 >> 16) & 0xff,
+		     (tkip->rx_iv32 >> 8) & 0xff,
+		     tkip->rx_iv32 & 0xff,
+		     (tkip->rx_iv16 >> 8) & 0xff,
+		     tkip->rx_iv16 & 0xff,
+		     tkip->dot11RSNAStatsTKIPReplays,
+		     tkip->dot11RSNAStatsTKIPICVErrors,
+		     tkip->dot11RSNAStatsTKIPLocalMICFailures);
+	return p;
+}
+
+static struct lib80211_crypto_ops lib80211_crypt_tkip = {
+	.name = "TKIP",
+	.init = lib80211_tkip_init,
+	.deinit = lib80211_tkip_deinit,
+	.build_iv = lib80211_tkip_hdr,
+	.encrypt_mpdu = lib80211_tkip_encrypt,
+	.decrypt_mpdu = lib80211_tkip_decrypt,
+	.encrypt_msdu = lib80211_michael_mic_add,
+	.decrypt_msdu = lib80211_michael_mic_verify,
+	.set_key = lib80211_tkip_set_key,
+	.get_key = lib80211_tkip_get_key,
+	.print_stats = lib80211_tkip_print_stats,
+	.extra_mpdu_prefix_len = 4 + 4,	/* IV + ExtIV */
+	.extra_mpdu_postfix_len = 4,	/* ICV */
+	.extra_msdu_postfix_len = 8,	/* MIC */
+	.get_flags = lib80211_tkip_get_flags,
+	.set_flags = lib80211_tkip_set_flags,
+	.owner = THIS_MODULE,
+};
+
+static int __init lib80211_crypto_tkip_init(void)
+{
+	return lib80211_register_crypto_ops(&lib80211_crypt_tkip);
+}
+
+static void __exit lib80211_crypto_tkip_exit(void)
+{
+	lib80211_unregister_crypto_ops(&lib80211_crypt_tkip);
+}
+
+module_init(lib80211_crypto_tkip_init);
+module_exit(lib80211_crypto_tkip_exit);
diff --git a/net/wireless/lib80211_crypt_wep.c b/net/wireless/lib80211_crypt_wep.c
new file mode 100644
index 000000000000..6d41e05ca33b
--- /dev/null
+++ b/net/wireless/lib80211_crypt_wep.c
@@ -0,0 +1,296 @@
+/*
+ * lib80211 crypt: host-based WEP encryption implementation for lib80211
+ *
+ * Copyright (c) 2002-2004, Jouni Malinen <j@w1.fi>
+ * Copyright (c) 2008, John W. Linville <linville@tuxdriver.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation. See README and COPYING for
+ * more details.
+ */
+
+#include <linux/err.h>
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/slab.h>
+#include <linux/random.h>
+#include <linux/scatterlist.h>
+#include <linux/skbuff.h>
+#include <linux/mm.h>
+#include <asm/string.h>
+
+#include <net/lib80211.h>
+
+#include <linux/crypto.h>
+#include <linux/crc32.h>
+
+MODULE_AUTHOR("Jouni Malinen");
+MODULE_DESCRIPTION("lib80211 crypt: WEP");
+MODULE_LICENSE("GPL");
+
+struct lib80211_wep_data {
+	u32 iv;
+#define WEP_KEY_LEN 13
+	u8 key[WEP_KEY_LEN + 1];
+	u8 key_len;
+	u8 key_idx;
+	struct crypto_blkcipher *tx_tfm;
+	struct crypto_blkcipher *rx_tfm;
+};
+
+static void *lib80211_wep_init(int keyidx)
+{
+	struct lib80211_wep_data *priv;
+
+	priv = kzalloc(sizeof(*priv), GFP_ATOMIC);
+	if (priv == NULL)
+		goto fail;
+	priv->key_idx = keyidx;
+
+	priv->tx_tfm = crypto_alloc_blkcipher("ecb(arc4)", 0, CRYPTO_ALG_ASYNC);
+	if (IS_ERR(priv->tx_tfm)) {
+		printk(KERN_DEBUG "lib80211_crypt_wep: could not allocate "
+		       "crypto API arc4\n");
+		priv->tx_tfm = NULL;
+		goto fail;
+	}
+
+	priv->rx_tfm = crypto_alloc_blkcipher("ecb(arc4)", 0, CRYPTO_ALG_ASYNC);
+	if (IS_ERR(priv->rx_tfm)) {
+		printk(KERN_DEBUG "lib80211_crypt_wep: could not allocate "
+		       "crypto API arc4\n");
+		priv->rx_tfm = NULL;
+		goto fail;
+	}
+	/* start WEP IV from a random value */
+	get_random_bytes(&priv->iv, 4);
+
+	return priv;
+
+      fail:
+	if (priv) {
+		if (priv->tx_tfm)
+			crypto_free_blkcipher(priv->tx_tfm);
+		if (priv->rx_tfm)
+			crypto_free_blkcipher(priv->rx_tfm);
+		kfree(priv);
+	}
+	return NULL;
+}
+
+static void lib80211_wep_deinit(void *priv)
+{
+	struct lib80211_wep_data *_priv = priv;
+	if (_priv) {
+		if (_priv->tx_tfm)
+			crypto_free_blkcipher(_priv->tx_tfm);
+		if (_priv->rx_tfm)
+			crypto_free_blkcipher(_priv->rx_tfm);
+	}
+	kfree(priv);
+}
+
+/* Add WEP IV/key info to a frame that has at least 4 bytes of headroom */
+static int lib80211_wep_build_iv(struct sk_buff *skb, int hdr_len,
+			       u8 *key, int keylen, void *priv)
+{
+	struct lib80211_wep_data *wep = priv;
+	u32 klen, len;
+	u8 *pos;
+
+	if (skb_headroom(skb) < 4 || skb->len < hdr_len)
+		return -1;
+
+	len = skb->len - hdr_len;
+	pos = skb_push(skb, 4);
+	memmove(pos, pos + 4, hdr_len);
+	pos += hdr_len;
+
+	klen = 3 + wep->key_len;
+
+	wep->iv++;
+
+	/* Fluhrer, Mantin, and Shamir have reported weaknesses in the key
+	 * scheduling algorithm of RC4. At least IVs (KeyByte + 3, 0xff, N)
+	 * can be used to speedup attacks, so avoid using them. */
+	if ((wep->iv & 0xff00) == 0xff00) {
+		u8 B = (wep->iv >> 16) & 0xff;
+		if (B >= 3 && B < klen)
+			wep->iv += 0x0100;
+	}
+
+	/* Prepend 24-bit IV to RC4 key and TX frame */
+	*pos++ = (wep->iv >> 16) & 0xff;
+	*pos++ = (wep->iv >> 8) & 0xff;
+	*pos++ = wep->iv & 0xff;
+	*pos++ = wep->key_idx << 6;
+
+	return 0;
+}
+
+/* Perform WEP encryption on given skb that has at least 4 bytes of headroom
+ * for IV and 4 bytes of tailroom for ICV. Both IV and ICV will be transmitted,
+ * so the payload length increases with 8 bytes.
+ *
+ * WEP frame payload: IV + TX key idx, RC4(data), ICV = RC4(CRC32(data))
+ */
+static int lib80211_wep_encrypt(struct sk_buff *skb, int hdr_len, void *priv)
+{
+	struct lib80211_wep_data *wep = priv;
+	struct blkcipher_desc desc = { .tfm = wep->tx_tfm };
+	u32 crc, klen, len;
+	u8 *pos, *icv;
+	struct scatterlist sg;
+	u8 key[WEP_KEY_LEN + 3];
+
+	/* other checks are in lib80211_wep_build_iv */
+	if (skb_tailroom(skb) < 4)
+		return -1;
+
+	/* add the IV to the frame */
+	if (lib80211_wep_build_iv(skb, hdr_len, NULL, 0, priv))
+		return -1;
+
+	/* Copy the IV into the first 3 bytes of the key */
+	skb_copy_from_linear_data_offset(skb, hdr_len, key, 3);
+
+	/* Copy rest of the WEP key (the secret part) */
+	memcpy(key + 3, wep->key, wep->key_len);
+
+	len = skb->len - hdr_len - 4;
+	pos = skb->data + hdr_len + 4;
+	klen = 3 + wep->key_len;
+
+	/* Append little-endian CRC32 over only the data and encrypt it to produce ICV */
+	crc = ~crc32_le(~0, pos, len);
+	icv = skb_put(skb, 4);
+	icv[0] = crc;
+	icv[1] = crc >> 8;
+	icv[2] = crc >> 16;
+	icv[3] = crc >> 24;
+
+	crypto_blkcipher_setkey(wep->tx_tfm, key, klen);
+	sg_init_one(&sg, pos, len + 4);
+	return crypto_blkcipher_encrypt(&desc, &sg, &sg, len + 4);
+}
+
+/* Perform WEP decryption on given buffer. Buffer includes whole WEP part of
+ * the frame: IV (4 bytes), encrypted payload (including SNAP header),
+ * ICV (4 bytes). len includes both IV and ICV.
+ *
+ * Returns 0 if frame was decrypted successfully and ICV was correct and -1 on
+ * failure. If frame is OK, IV and ICV will be removed.
+ */
+static int lib80211_wep_decrypt(struct sk_buff *skb, int hdr_len, void *priv)
+{
+	struct lib80211_wep_data *wep = priv;
+	struct blkcipher_desc desc = { .tfm = wep->rx_tfm };
+	u32 crc, klen, plen;
+	u8 key[WEP_KEY_LEN + 3];
+	u8 keyidx, *pos, icv[4];
+	struct scatterlist sg;
+
+	if (skb->len < hdr_len + 8)
+		return -1;
+
+	pos = skb->data + hdr_len;
+	key[0] = *pos++;
+	key[1] = *pos++;
+	key[2] = *pos++;
+	keyidx = *pos++ >> 6;
+	if (keyidx != wep->key_idx)
+		return -1;
+
+	klen = 3 + wep->key_len;
+
+	/* Copy rest of the WEP key (the secret part) */
+	memcpy(key + 3, wep->key, wep->key_len);
+
+	/* Apply RC4 to data and compute CRC32 over decrypted data */
+	plen = skb->len - hdr_len - 8;
+
+	crypto_blkcipher_setkey(wep->rx_tfm, key, klen);
+	sg_init_one(&sg, pos, plen + 4);
+	if (crypto_blkcipher_decrypt(&desc, &sg, &sg, plen + 4))
+		return -7;
+
+	crc = ~crc32_le(~0, pos, plen);
+	icv[0] = crc;
+	icv[1] = crc >> 8;
+	icv[2] = crc >> 16;
+	icv[3] = crc >> 24;
+	if (memcmp(icv, pos + plen, 4) != 0) {
+		/* ICV mismatch - drop frame */
+		return -2;
+	}
+
+	/* Remove IV and ICV */
+	memmove(skb->data + 4, skb->data, hdr_len);
+	skb_pull(skb, 4);
+	skb_trim(skb, skb->len - 4);
+
+	return 0;
+}
+
+static int lib80211_wep_set_key(void *key, int len, u8 * seq, void *priv)
+{
+	struct lib80211_wep_data *wep = priv;
+
+	if (len < 0 || len > WEP_KEY_LEN)
+		return -1;
+
+	memcpy(wep->key, key, len);
+	wep->key_len = len;
+
+	return 0;
+}
+
+static int lib80211_wep_get_key(void *key, int len, u8 * seq, void *priv)
+{
+	struct lib80211_wep_data *wep = priv;
+
+	if (len < wep->key_len)
+		return -1;
+
+	memcpy(key, wep->key, wep->key_len);
+
+	return wep->key_len;
+}
+
+static char *lib80211_wep_print_stats(char *p, void *priv)
+{
+	struct lib80211_wep_data *wep = priv;
+	p += sprintf(p, "key[%d] alg=WEP len=%d\n", wep->key_idx, wep->key_len);
+	return p;
+}
+
+static struct lib80211_crypto_ops lib80211_crypt_wep = {
+	.name = "WEP",
+	.init = lib80211_wep_init,
+	.deinit = lib80211_wep_deinit,
+	.build_iv = lib80211_wep_build_iv,
+	.encrypt_mpdu = lib80211_wep_encrypt,
+	.decrypt_mpdu = lib80211_wep_decrypt,
+	.encrypt_msdu = NULL,
+	.decrypt_msdu = NULL,
+	.set_key = lib80211_wep_set_key,
+	.get_key = lib80211_wep_get_key,
+	.print_stats = lib80211_wep_print_stats,
+	.extra_mpdu_prefix_len = 4,	/* IV */
+	.extra_mpdu_postfix_len = 4,	/* ICV */
+	.owner = THIS_MODULE,
+};
+
+static int __init lib80211_crypto_wep_init(void)
+{
+	return lib80211_register_crypto_ops(&lib80211_crypt_wep);
+}
+
+static void __exit lib80211_crypto_wep_exit(void)
+{
+	lib80211_unregister_crypto_ops(&lib80211_crypt_wep);
+}
+
+module_init(lib80211_crypto_wep_init);
+module_exit(lib80211_crypto_wep_exit);
-- 
cgit v1.2.3


From 2ba4b32ecf748d5f45f298fc9677fa46d1dd9aff Mon Sep 17 00:00:00 2001
From: "John W. Linville" <linville@tuxdriver.com>
Date: Tue, 11 Nov 2008 16:00:06 -0500
Subject: lib80211: consolidate crypt init routines

Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 drivers/net/wireless/hostap/hostap_hw.c    | 61 +-----------------------------
 drivers/net/wireless/hostap/hostap_ioctl.c | 43 +++++----------------
 include/net/lib80211.h                     |  3 ++
 net/ieee80211/ieee80211_module.c           | 26 +------------
 net/wireless/lib80211.c                    | 38 +++++++++++++++++++
 5 files changed, 54 insertions(+), 117 deletions(-)

(limited to 'include')

diff --git a/drivers/net/wireless/hostap/hostap_hw.c b/drivers/net/wireless/hostap/hostap_hw.c
index 066299fc9259..0f27059bbe85 100644
--- a/drivers/net/wireless/hostap/hostap_hw.c
+++ b/drivers/net/wireless/hostap/hostap_hw.c
@@ -2788,46 +2788,6 @@ static void prism2_check_sta_fw_version(local_info_t *local)
 }
 
 
-static void prism2_crypt_deinit_entries(local_info_t *local, int force)
-{
-	struct list_head *ptr, *n;
-	struct lib80211_crypt_data *entry;
-
-	for (ptr = local->crypt_info.crypt_deinit_list.next, n = ptr->next;
-	     ptr != &local->crypt_info.crypt_deinit_list;
-	     ptr = n, n = ptr->next) {
-		entry = list_entry(ptr, struct lib80211_crypt_data, list);
-
-		if (atomic_read(&entry->refcnt) != 0 && !force)
-			continue;
-
-		list_del(ptr);
-
-		if (entry->ops)
-			entry->ops->deinit(entry->priv);
-		kfree(entry);
-	}
-}
-
-
-static void prism2_crypt_deinit_handler(unsigned long data)
-{
-	local_info_t *local = (local_info_t *) data;
-	unsigned long flags;
-
-	spin_lock_irqsave(&local->lock, flags);
-	prism2_crypt_deinit_entries(local, 0);
-	if (!list_empty(&local->crypt_info.crypt_deinit_list)) {
-		printk(KERN_DEBUG "%s: entries remaining in delayed crypt "
-		       "deletion list\n", local->dev->name);
-		local->crypt_info.crypt_deinit_timer.expires = jiffies + HZ;
-		add_timer(&local->crypt_info.crypt_deinit_timer);
-	}
-	spin_unlock_irqrestore(&local->lock, flags);
-
-}
-
-
 static void hostap_passive_scan(unsigned long data)
 {
 	local_info_t *local = (local_info_t *) data;
@@ -3252,12 +3212,7 @@ while (0)
 	INIT_LIST_HEAD(&local->cmd_queue);
 	init_waitqueue_head(&local->hostscan_wq);
 
-	local->crypt_info.name = dev->name;
-	local->crypt_info.lock = &local->lock;
-	INIT_LIST_HEAD(&local->crypt_info.crypt_deinit_list);
-	init_timer(&local->crypt_info.crypt_deinit_timer);
-	local->crypt_info.crypt_deinit_timer.data = (unsigned long) local;
-	local->crypt_info.crypt_deinit_timer.function = prism2_crypt_deinit_handler;
+	lib80211_crypt_info_init(&local->crypt_info, dev->name, &local->lock);
 
 	init_timer(&local->passive_scan_timer);
 	local->passive_scan_timer.data = (unsigned long) local;
@@ -3358,9 +3313,7 @@ static void prism2_free_local_data(struct net_device *dev)
 
 	flush_scheduled_work();
 
-	if (timer_pending(&local->crypt_info.crypt_deinit_timer))
-		del_timer(&local->crypt_info.crypt_deinit_timer);
-	prism2_crypt_deinit_entries(local, 1);
+	lib80211_crypt_info_free(&local->crypt_info);
 
 	if (timer_pending(&local->passive_scan_timer))
 		del_timer(&local->passive_scan_timer);
@@ -3377,16 +3330,6 @@ static void prism2_free_local_data(struct net_device *dev)
 	if (local->dev_enabled)
 		prism2_callback(local, PRISM2_CALLBACK_DISABLE);
 
-	for (i = 0; i < WEP_KEYS; i++) {
-		struct lib80211_crypt_data *crypt = local->crypt_info.crypt[i];
-		if (crypt) {
-			if (crypt->ops)
-				crypt->ops->deinit(crypt->priv);
-			kfree(crypt);
-			local->crypt_info.crypt[i] = NULL;
-		}
-	}
-
 	if (local->ap != NULL)
 		hostap_free_data(local->ap);
 
diff --git a/drivers/net/wireless/hostap/hostap_ioctl.c b/drivers/net/wireless/hostap/hostap_ioctl.c
index 29aebb679099..c40fdf4c79de 100644
--- a/drivers/net/wireless/hostap/hostap_ioctl.c
+++ b/drivers/net/wireless/hostap/hostap_ioctl.c
@@ -116,32 +116,6 @@ static int prism2_get_name(struct net_device *dev,
 }
 
 
-static void prism2_crypt_delayed_deinit(local_info_t *local,
-					struct lib80211_crypt_data **crypt)
-{
-	struct lib80211_crypt_data *tmp;
-	unsigned long flags;
-
-	tmp = *crypt;
-	*crypt = NULL;
-
-	if (tmp == NULL)
-		return;
-
-	/* must not run ops->deinit() while there may be pending encrypt or
-	 * decrypt operations. Use a list of delayed deinits to avoid needing
-	 * locking. */
-
-	spin_lock_irqsave(&local->lock, flags);
-	list_add(&tmp->list, &local->crypt_info.crypt_deinit_list);
-	if (!timer_pending(&local->crypt_info.crypt_deinit_timer)) {
-		local->crypt_info.crypt_deinit_timer.expires = jiffies + HZ;
-		add_timer(&local->crypt_info.crypt_deinit_timer);
-	}
-	spin_unlock_irqrestore(&local->lock, flags);
-}
-
-
 static int prism2_ioctl_siwencode(struct net_device *dev,
 				  struct iw_request_info *info,
 				  struct iw_point *erq, char *keybuf)
@@ -166,14 +140,14 @@ static int prism2_ioctl_siwencode(struct net_device *dev,
 
 	if (erq->flags & IW_ENCODE_DISABLED) {
 		if (*crypt)
-			prism2_crypt_delayed_deinit(local, crypt);
+			lib80211_crypt_delayed_deinit(&local->crypt_info, crypt);
 		goto done;
 	}
 
 	if (*crypt != NULL && (*crypt)->ops != NULL &&
 	    strcmp((*crypt)->ops->name, "WEP") != 0) {
 		/* changing to use WEP; deinit previously used algorithm */
-		prism2_crypt_delayed_deinit(local, crypt);
+		lib80211_crypt_delayed_deinit(&local->crypt_info, crypt);
 	}
 
 	if (*crypt == NULL) {
@@ -189,7 +163,7 @@ static int prism2_ioctl_siwencode(struct net_device *dev,
 			request_module("lib80211_crypt_wep");
 			new_crypt->ops = lib80211_get_crypto_ops("WEP");
 		}
-		if (new_crypt->ops)
+		if (new_crypt->ops && try_module_get(new_crypt->ops->owner))
 			new_crypt->priv = new_crypt->ops->init(i);
 		if (!new_crypt->ops || !new_crypt->priv) {
 			kfree(new_crypt);
@@ -3269,7 +3243,7 @@ static int prism2_ioctl_siwencodeext(struct net_device *dev,
 	if ((erq->flags & IW_ENCODE_DISABLED) ||
 	    ext->alg == IW_ENCODE_ALG_NONE) {
 		if (*crypt)
-			prism2_crypt_delayed_deinit(local, crypt);
+			lib80211_crypt_delayed_deinit(&local->crypt_info, crypt);
 		goto done;
 	}
 
@@ -3317,7 +3291,7 @@ static int prism2_ioctl_siwencodeext(struct net_device *dev,
 	if (*crypt == NULL || (*crypt)->ops != ops) {
 		struct lib80211_crypt_data *new_crypt;
 
-		prism2_crypt_delayed_deinit(local, crypt);
+		lib80211_crypt_delayed_deinit(&local->crypt_info, crypt);
 
 		new_crypt = kzalloc(sizeof(struct lib80211_crypt_data),
 				GFP_KERNEL);
@@ -3326,7 +3300,8 @@ static int prism2_ioctl_siwencodeext(struct net_device *dev,
 			goto done;
 		}
 		new_crypt->ops = ops;
-		new_crypt->priv = new_crypt->ops->init(i);
+		if (new_crypt->ops && try_module_get(new_crypt->ops->owner))
+			new_crypt->priv = new_crypt->ops->init(i);
 		if (new_crypt->priv == NULL) {
 			kfree(new_crypt);
 			ret = -EINVAL;
@@ -3503,7 +3478,7 @@ static int prism2_ioctl_set_encryption(local_info_t *local,
 
 	if (strcmp(param->u.crypt.alg, "none") == 0) {
 		if (crypt)
-			prism2_crypt_delayed_deinit(local, crypt);
+			lib80211_crypt_delayed_deinit(&local->crypt_info, crypt);
 		goto done;
 	}
 
@@ -3533,7 +3508,7 @@ static int prism2_ioctl_set_encryption(local_info_t *local,
 	if (*crypt == NULL || (*crypt)->ops != ops) {
 		struct lib80211_crypt_data *new_crypt;
 
-		prism2_crypt_delayed_deinit(local, crypt);
+		lib80211_crypt_delayed_deinit(&local->crypt_info, crypt);
 
 		new_crypt = kzalloc(sizeof(struct lib80211_crypt_data),
 				GFP_KERNEL);
diff --git a/include/net/lib80211.h b/include/net/lib80211.h
index dd1079f98da4..a269b23d1125 100644
--- a/include/net/lib80211.h
+++ b/include/net/lib80211.h
@@ -114,6 +114,9 @@ struct lib80211_crypt_info {
 	int crypt_quiesced;
 };
 
+int lib80211_crypt_info_init(struct lib80211_crypt_info *info, char *name,
+                                spinlock_t *lock);
+void lib80211_crypt_info_free(struct lib80211_crypt_info *info);
 int lib80211_register_crypto_ops(struct lib80211_crypto_ops *ops);
 int lib80211_unregister_crypto_ops(struct lib80211_crypto_ops *ops);
 struct lib80211_crypto_ops *lib80211_get_crypto_ops(const char *name);
diff --git a/net/ieee80211/ieee80211_module.c b/net/ieee80211/ieee80211_module.c
index cf21f0bd8569..a2f5616d5b09 100644
--- a/net/ieee80211/ieee80211_module.c
+++ b/net/ieee80211/ieee80211_module.c
@@ -182,13 +182,7 @@ struct net_device *alloc_ieee80211(int sizeof_priv)
 
 	spin_lock_init(&ieee->lock);
 
-	ieee->crypt_info.name = dev->name;
-	ieee->crypt_info.lock = &ieee->lock;
-	INIT_LIST_HEAD(&ieee->crypt_info.crypt_deinit_list);
-	setup_timer(&ieee->crypt_info.crypt_deinit_timer,
-			lib80211_crypt_deinit_handler,
-			(unsigned long)&ieee->crypt_info);
-	ieee->crypt_info.crypt_quiesced = 0;
+	lib80211_crypt_info_init(&ieee->crypt_info, dev->name, &ieee->lock);
 
 	ieee->wpa_enabled = 0;
 	ieee->drop_unencrypted = 0;
@@ -206,23 +200,7 @@ void free_ieee80211(struct net_device *dev)
 {
 	struct ieee80211_device *ieee = netdev_priv(dev);
 
-	int i;
-
-	lib80211_crypt_quiescing(&ieee->crypt_info);
-	del_timer_sync(&ieee->crypt_info.crypt_deinit_timer);
-	lib80211_crypt_deinit_entries(&ieee->crypt_info, 1);
-
-	for (i = 0; i < WEP_KEYS; i++) {
-		struct lib80211_crypt_data *crypt = ieee->crypt_info.crypt[i];
-		if (crypt) {
-			if (crypt->ops) {
-				crypt->ops->deinit(crypt->priv);
-				module_put(crypt->ops->owner);
-			}
-			kfree(crypt);
-			ieee->crypt_info.crypt[i] = NULL;
-		}
-	}
+	lib80211_crypt_info_free(&ieee->crypt_info);
 
 	ieee80211_networks_free(ieee);
 	free_netdev(dev);
diff --git a/net/wireless/lib80211.c b/net/wireless/lib80211.c
index d681721f4acb..97d411f74507 100644
--- a/net/wireless/lib80211.c
+++ b/net/wireless/lib80211.c
@@ -71,6 +71,44 @@ const char *print_ssid(char *buf, const char *ssid, u8 ssid_len)
 }
 EXPORT_SYMBOL(print_ssid);
 
+int lib80211_crypt_info_init(struct lib80211_crypt_info *info, char *name,
+				spinlock_t *lock)
+{
+	memset(info, 0, sizeof(*info));
+
+	info->name = name;
+	info->lock = lock;
+
+	INIT_LIST_HEAD(&info->crypt_deinit_list);
+	setup_timer(&info->crypt_deinit_timer, lib80211_crypt_deinit_handler,
+			(unsigned long)info);
+
+	return 0;
+}
+EXPORT_SYMBOL(lib80211_crypt_info_init);
+
+void lib80211_crypt_info_free(struct lib80211_crypt_info *info)
+{
+	int i;
+
+        lib80211_crypt_quiescing(info);
+        del_timer_sync(&info->crypt_deinit_timer);
+        lib80211_crypt_deinit_entries(info, 1);
+
+        for (i = 0; i < NUM_WEP_KEYS; i++) {
+                struct lib80211_crypt_data *crypt = info->crypt[i];
+                if (crypt) {
+                        if (crypt->ops) {
+                                crypt->ops->deinit(crypt->priv);
+                                module_put(crypt->ops->owner);
+                        }
+                        kfree(crypt);
+                        info->crypt[i] = NULL;
+                }
+        }
+}
+EXPORT_SYMBOL(lib80211_crypt_info_free);
+
 void lib80211_crypt_deinit_entries(struct lib80211_crypt_info *info, int force)
 {
 	struct lib80211_crypt_data *entry, *next;
-- 
cgit v1.2.3


From 627271018df75c8861b9e75b39d5995842e6ec95 Mon Sep 17 00:00:00 2001
From: "John W. Linville" <linville@tuxdriver.com>
Date: Wed, 12 Nov 2008 10:01:41 -0500
Subject: mac80211: add explicit padding in struct ieee80211_tx_info

Otherwise, the BUILD_BUG_ON calls in ieee80211_tx_info_clear_status can
fail on some architectures.

Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 include/net/mac80211.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/net/mac80211.h b/include/net/mac80211.h
index 0dbbfc7b0509..6a1d4ea18186 100644
--- a/include/net/mac80211.h
+++ b/include/net/mac80211.h
@@ -341,6 +341,7 @@ struct ieee80211_tx_info {
 	u8 antenna_sel_tx;
 
 	/* 2 byte hole */
+	u8 pad[2];
 
 	union {
 		struct {
-- 
cgit v1.2.3


From a1eb5fe319beb9e181aa52c8adf75ad9aab56a89 Mon Sep 17 00:00:00 2001
From: Rami Rosen <roszenrami@gmail.com>
Date: Wed, 19 Nov 2008 09:37:43 +0200
Subject: wireless: missing include in lib80211.h

This patch adds #include <linux/timer.h> in lib80211.h to avoid
these compilation erros.

> In file included from /work/src/wireless-testing/net/wireless/lib80211.c:24:
> /work/src/wireless-testing/include/net/lib80211.h:113: error: field
> 'crypt_deinit_timer' has incomplete type
> /work/src/wireless-testing/net/wireless/lib80211.c: In function
> 'lib80211_crypt_info_init':
> /work/src/wireless-testing/net/wireless/lib80211.c:83: error: implicit
> declaration of function 'setup_timer'
> /work/src/wireless-testing/net/wireless/lib80211.c: In function
> 'lib80211_crypt_info_free':
> /work/src/wireless-testing/net/wireless/lib80211.c:95: error: implicit
> declaration of function 'del_timer_sync'
> /work/src/wireless-testing/net/wireless/lib80211.c: In function
> 'lib80211_crypt_deinit_handler':
> /work/src/wireless-testing/net/wireless/lib80211.c:157: error:
> implicit declaration of function 'add_timer'
> /work/src/wireless-testing/net/wireless/lib80211.c: In function
> 'lib80211_crypt_delayed_deinit':
> /work/src/wireless-testing/net/wireless/lib80211.c:182: error:
> implicit declaration of function 'timer_pending'
> make[3]: *** [net/wireless/lib80211.o] Error 1
> make[2]: *** [net/wireless] Error 2
> make[1]: *** [net] Error 2
> make: *** [sub-make] Error 2

Signed-off-by: Rami Rosen <ramirose@gmail.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 include/net/lib80211.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/net/lib80211.h b/include/net/lib80211.h
index a269b23d1125..fb4e2784857d 100644
--- a/include/net/lib80211.h
+++ b/include/net/lib80211.h
@@ -30,7 +30,7 @@
 #include <linux/if.h>
 #include <linux/skbuff.h>
 #include <linux/ieee80211.h>
-
+#include <linux/timer.h>
 /* print_ssid() is intended to be used in debug (and possibly error)
  * messages. It should never be used for passing ssid to user space. */
 const char *print_ssid(char *buf, const char *ssid, u8 ssid_len);
-- 
cgit v1.2.3


From f757fec4b0d45dfcb52f9a914a12225a6a0a3e05 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <dada1@cosmosbay.com>
Date: Fri, 21 Nov 2008 15:49:19 -0800
Subject: net: use net_eq() in INET_MATCH and INET_TW_MATCH

We can avoid some useless instructions if !CONFIG_NET_NS

Because of RCU, we use INET_MATCH or INET_TW_MATCH twice for the found
socket, so thats six instructions less per incoming TCP packet.

Yet another tbench speedup :)

Signed-off-by: Eric Dumazet <dada1@cosmosbay.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/inet_hashtables.h | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

(limited to 'include')

diff --git a/include/net/inet_hashtables.h b/include/net/inet_hashtables.h
index 28b3ee3e8d6d..ec7ee2e46d8c 100644
--- a/include/net/inet_hashtables.h
+++ b/include/net/inet_hashtables.h
@@ -291,25 +291,25 @@ typedef __u64 __bitwise __addrpair;
 				   ((__force __u64)(__be32)(__saddr)));
 #endif /* __BIG_ENDIAN */
 #define INET_MATCH(__sk, __net, __hash, __cookie, __saddr, __daddr, __ports, __dif)\
-	(((__sk)->sk_hash == (__hash)) && sock_net((__sk)) == (__net)	&&	\
+	(((__sk)->sk_hash == (__hash)) && net_eq(sock_net(__sk), (__net)) &&	\
 	 ((*((__addrpair *)&(inet_sk(__sk)->daddr))) == (__cookie))	&&	\
 	 ((*((__portpair *)&(inet_sk(__sk)->dport))) == (__ports))	&&	\
 	 (!((__sk)->sk_bound_dev_if) || ((__sk)->sk_bound_dev_if == (__dif))))
 #define INET_TW_MATCH(__sk, __net, __hash, __cookie, __saddr, __daddr, __ports, __dif)\
-	(((__sk)->sk_hash == (__hash)) && sock_net((__sk)) == (__net)	&&	\
+	(((__sk)->sk_hash == (__hash)) && net_eq(sock_net(__sk), (__net)) &&	\
 	 ((*((__addrpair *)&(inet_twsk(__sk)->tw_daddr))) == (__cookie)) &&	\
 	 ((*((__portpair *)&(inet_twsk(__sk)->tw_dport))) == (__ports)) &&	\
 	 (!((__sk)->sk_bound_dev_if) || ((__sk)->sk_bound_dev_if == (__dif))))
 #else /* 32-bit arch */
 #define INET_ADDR_COOKIE(__name, __saddr, __daddr)
 #define INET_MATCH(__sk, __net, __hash, __cookie, __saddr, __daddr, __ports, __dif)	\
-	(((__sk)->sk_hash == (__hash)) && sock_net((__sk)) == (__net)	&&	\
+	(((__sk)->sk_hash == (__hash)) && net_eq(sock_net(__sk), (__net))	&&	\
 	 (inet_sk(__sk)->daddr		== (__saddr))		&&	\
 	 (inet_sk(__sk)->rcv_saddr	== (__daddr))		&&	\
 	 ((*((__portpair *)&(inet_sk(__sk)->dport))) == (__ports))	&&	\
 	 (!((__sk)->sk_bound_dev_if) || ((__sk)->sk_bound_dev_if == (__dif))))
 #define INET_TW_MATCH(__sk, __net, __hash,__cookie, __saddr, __daddr, __ports, __dif)	\
-	(((__sk)->sk_hash == (__hash)) && sock_net((__sk)) == (__net)	&&	\
+	(((__sk)->sk_hash == (__hash)) && net_eq(sock_net(__sk), (__net))	&&	\
 	 (inet_twsk(__sk)->tw_daddr	== (__saddr))		&&	\
 	 (inet_twsk(__sk)->tw_rcv_saddr	== (__daddr))		&&	\
 	 ((*((__portpair *)&(inet_twsk(__sk)->tw_dport))) == (__ports)) &&	\
-- 
cgit v1.2.3


From 2baf8a2daab65cdd3f20bfeb4676a2f6aff7c3bf Mon Sep 17 00:00:00 2001
From: Wang Chen <wangchen@cn.fujitsu.com>
Date: Fri, 21 Nov 2008 16:34:18 -0800
Subject: netdevice hdlc: Convert directly reference of netdev->priv

For killing directly reference of netdev->priv, use netdev->ml_priv to replace it.
Because the private pvc data comes from add_pvc() and can't be allocated in
alloc_netdev().

Signed-off-by: Wang Chen <wangchen@cn.fujitsu.com>
Acked-by: Krzysztof Halasa <khc@pm.waw.pl>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/wan/hdlc_fr.c | 10 +++++-----
 include/linux/hdlc.h      |  2 +-
 2 files changed, 6 insertions(+), 6 deletions(-)

(limited to 'include')

diff --git a/drivers/net/wan/hdlc_fr.c b/drivers/net/wan/hdlc_fr.c
index d3d5055741ad..f1ddd7c3459c 100644
--- a/drivers/net/wan/hdlc_fr.c
+++ b/drivers/net/wan/hdlc_fr.c
@@ -342,7 +342,7 @@ static int fr_hard_header(struct sk_buff **skb_p, u16 dlci)
 
 static int pvc_open(struct net_device *dev)
 {
-	pvc_device *pvc = dev->priv;
+	pvc_device *pvc = dev->ml_priv;
 
 	if ((pvc->frad->flags & IFF_UP) == 0)
 		return -EIO;  /* Frad must be UP in order to activate PVC */
@@ -362,7 +362,7 @@ static int pvc_open(struct net_device *dev)
 
 static int pvc_close(struct net_device *dev)
 {
-	pvc_device *pvc = dev->priv;
+	pvc_device *pvc = dev->ml_priv;
 
 	if (--pvc->open_count == 0) {
 		hdlc_device *hdlc = dev_to_hdlc(pvc->frad);
@@ -381,7 +381,7 @@ static int pvc_close(struct net_device *dev)
 
 static int pvc_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
 {
-	pvc_device *pvc = dev->priv;
+	pvc_device *pvc = dev->ml_priv;
 	fr_proto_pvc_info info;
 
 	if (ifr->ifr_settings.type == IF_GET_PROTO) {
@@ -409,7 +409,7 @@ static int pvc_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
 
 static int pvc_xmit(struct sk_buff *skb, struct net_device *dev)
 {
-	pvc_device *pvc = dev->priv;
+	pvc_device *pvc = dev->ml_priv;
 
 	if (pvc->state.active) {
 		if (dev->type == ARPHRD_ETHER) {
@@ -1111,7 +1111,7 @@ static int fr_add_pvc(struct net_device *frad, unsigned int dlci, int type)
 	dev->change_mtu = pvc_change_mtu;
 	dev->mtu = HDLC_MAX_MTU;
 	dev->tx_queue_len = 0;
-	dev->priv = pvc;
+	dev->ml_priv = pvc;
 
 	result = dev_alloc_name(dev, dev->name);
 	if (result < 0) {
diff --git a/include/linux/hdlc.h b/include/linux/hdlc.h
index c59769693bee..e960faac609d 100644
--- a/include/linux/hdlc.h
+++ b/include/linux/hdlc.h
@@ -80,7 +80,7 @@ struct net_device *alloc_hdlcdev(void *priv);
 
 static inline struct hdlc_device* dev_to_hdlc(struct net_device *dev)
 {
-	return dev->priv;
+	return netdev_priv(dev);
 }
 
 static __inline__ void debug_frame(const struct sk_buff *skb)
-- 
cgit v1.2.3


From 72364706c3b7c09a658e356218a918c5f92dcad0 Mon Sep 17 00:00:00 2001
From: Krzysztof Hałasa <khc@pm.waw.pl>
Date: Thu, 14 Aug 2008 19:18:17 +0200
Subject: WAN: syncppp.c is no longer used by any kernel code. Remove it.
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Signed-off-by: Krzysztof Hałasa <khc@pm.waw.pl>
---
 Documentation/DocBook/Makefile        |    2 +-
 Documentation/DocBook/networking.tmpl |    3 -
 Documentation/DocBook/wanbook.tmpl    |   99 ---
 drivers/net/wan/syncppp.c             | 1476 ---------------------------------
 include/net/syncppp.h                 |  102 ---
 5 files changed, 1 insertion(+), 1681 deletions(-)
 delete mode 100644 Documentation/DocBook/wanbook.tmpl
 delete mode 100644 drivers/net/wan/syncppp.c
 delete mode 100644 include/net/syncppp.h

(limited to 'include')

diff --git a/Documentation/DocBook/Makefile b/Documentation/DocBook/Makefile
index 9b1f6ca100d1..0a08126d3094 100644
--- a/Documentation/DocBook/Makefile
+++ b/Documentation/DocBook/Makefile
@@ -6,7 +6,7 @@
 # To add a new book the only step required is to add the book to the
 # list of DOCBOOKS.
 
-DOCBOOKS := wanbook.xml z8530book.xml mcabook.xml \
+DOCBOOKS := z8530book.xml mcabook.xml \
 	    kernel-hacking.xml kernel-locking.xml deviceiobook.xml \
 	    procfs-guide.xml writing_usb_driver.xml networking.xml \
 	    kernel-api.xml filesystems.xml lsm.xml usb.xml kgdb.xml \
diff --git a/Documentation/DocBook/networking.tmpl b/Documentation/DocBook/networking.tmpl
index f24f9e85e4ae..627707a3cb9d 100644
--- a/Documentation/DocBook/networking.tmpl
+++ b/Documentation/DocBook/networking.tmpl
@@ -98,9 +98,6 @@
 X!Enet/core/wireless.c
      </sect1>
 -->
-     <sect1><title>Synchronous PPP</title>
-!Edrivers/net/wan/syncppp.c
-     </sect1>
   </chapter>
 
 </book>
diff --git a/Documentation/DocBook/wanbook.tmpl b/Documentation/DocBook/wanbook.tmpl
deleted file mode 100644
index 8c93db122f04..000000000000
--- a/Documentation/DocBook/wanbook.tmpl
+++ /dev/null
@@ -1,99 +0,0 @@
-<?xml version="1.0" encoding="UTF-8"?>
-<!DOCTYPE book PUBLIC "-//OASIS//DTD DocBook XML V4.1.2//EN"
-	"http://www.oasis-open.org/docbook/xml/4.1.2/docbookx.dtd" []>
-
-<book id="WANGuide">
- <bookinfo>
-  <title>Synchronous PPP and Cisco HDLC Programming Guide</title>
-  
-  <authorgroup>
-   <author>
-    <firstname>Alan</firstname>
-    <surname>Cox</surname>
-    <affiliation>
-     <address>
-      <email>alan@lxorguk.ukuu.org.uk</email>
-     </address>
-    </affiliation>
-   </author>
-  </authorgroup>
-
-  <copyright>
-   <year>2000</year>
-   <holder>Alan Cox</holder>
-  </copyright>
-
-  <legalnotice>
-   <para>
-     This documentation is free software; you can redistribute
-     it and/or modify it under the terms of the GNU General Public
-     License as published by the Free Software Foundation; either
-     version 2 of the License, or (at your option) any later
-     version.
-   </para>
-      
-   <para>
-     This program is distributed in the hope that it will be
-     useful, but WITHOUT ANY WARRANTY; without even the implied
-     warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
-     See the GNU General Public License for more details.
-   </para>
-      
-   <para>
-     You should have received a copy of the GNU General Public
-     License along with this program; if not, write to the Free
-     Software Foundation, Inc., 59 Temple Place, Suite 330, Boston,
-     MA 02111-1307 USA
-   </para>
-      
-   <para>
-     For more details see the file COPYING in the source
-     distribution of Linux.
-   </para>
-  </legalnotice>
- </bookinfo>
-
-<toc></toc>
-
-  <chapter id="intro">
-      <title>Introduction</title>
-  <para>
-	The syncppp drivers in Linux provide a fairly complete 
-	implementation of Cisco HDLC and a minimal implementation of
-	PPP. The longer term goal is to switch the PPP layer to the
-	generic PPP interface that is new in Linux 2.3.x. The API should
-	remain unchanged when this is done, but support will then be
-	available for IPX, compression and other PPP features
-  </para>
-  </chapter>
-  <chapter id="bugs">
-     <title>Known Bugs And Assumptions</title>
-  <para>
-  <variablelist>
-    <varlistentry><term>PPP is minimal</term>
-    <listitem>
-    <para>
-	The current PPP implementation is very basic, although sufficient
-	for most wan usages.
-    </para>
-    </listitem></varlistentry>
-
-    <varlistentry><term>Cisco HDLC Quirks</term>
-    <listitem>
-    <para>
-	Currently we do not end all packets with the correct Cisco multicast
-	or unicast flags. Nothing appears to mind too much but this should
-	be corrected.
-    </para>
-    </listitem></varlistentry>
-  </variablelist>
-	
-  </para>
-  </chapter>
-
-  <chapter id="pubfunctions">
-     <title>Public Functions Provided</title>
-!Edrivers/net/wan/syncppp.c
-  </chapter>
-
-</book>
diff --git a/drivers/net/wan/syncppp.c b/drivers/net/wan/syncppp.c
deleted file mode 100644
index 58ae8a2223af..000000000000
--- a/drivers/net/wan/syncppp.c
+++ /dev/null
@@ -1,1476 +0,0 @@
-/*
- *	NET3:	A (fairly minimal) implementation of synchronous PPP for Linux
- *		as well as a CISCO HDLC implementation. See the copyright 
- *		message below for the original source.
- *
- *	This program is free software; you can redistribute it and/or
- *	modify it under the terms of the GNU General Public License
- *	as published by the Free Software Foundation; either version
- *	2 of the license, or (at your option) any later version.
- *
- *	Note however. This code is also used in a different form by FreeBSD.
- *	Therefore when making any non OS specific change please consider
- *	contributing it back to the original author under the terms
- *	below in addition.
- *		-- Alan
- *
- *	Port for Linux-2.1 by Jan "Yenya" Kasprzak <kas@fi.muni.cz>
- */
-
-/*
- * Synchronous PPP/Cisco link level subroutines.
- * Keepalive protocol implemented in both Cisco and PPP modes.
- *
- * Copyright (C) 1994 Cronyx Ltd.
- * Author: Serge Vakulenko, <vak@zebub.msk.su>
- *
- * This software is distributed with NO WARRANTIES, not even the implied
- * warranties for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
- *
- * Authors grant any other persons or organisations permission to use
- * or modify this software as long as this message is kept with the software,
- * all derivative works or modified versions.
- *
- * Version 1.9, Wed Oct  4 18:58:15 MSK 1995
- *
- * $Id: syncppp.c,v 1.18 2000/04/11 05:25:31 asj Exp $
- */
-#undef DEBUG
-
-#include <linux/module.h>
-#include <linux/kernel.h>
-#include <linux/errno.h>
-#include <linux/init.h>
-#include <linux/if_arp.h>
-#include <linux/skbuff.h>
-#include <linux/route.h>
-#include <linux/netdevice.h>
-#include <linux/inetdevice.h>
-#include <linux/random.h>
-#include <linux/pkt_sched.h>
-#include <linux/spinlock.h>
-#include <linux/rcupdate.h>
-
-#include <net/net_namespace.h>
-#include <net/syncppp.h>
-
-#include <asm/byteorder.h>
-#include <asm/uaccess.h>
-
-#define MAXALIVECNT     6               /* max. alive packets */
-
-#define PPP_ALLSTATIONS 0xff            /* All-Stations broadcast address */
-#define PPP_UI          0x03            /* Unnumbered Information */
-#define PPP_IP          0x0021          /* Internet Protocol */
-#define PPP_ISO         0x0023          /* ISO OSI Protocol */
-#define PPP_XNS         0x0025          /* Xerox NS Protocol */
-#define PPP_IPX         0x002b          /* Novell IPX Protocol */
-#define PPP_LCP         0xc021          /* Link Control Protocol */
-#define PPP_IPCP        0x8021          /* Internet Protocol Control Protocol */
-
-#define LCP_CONF_REQ    1               /* PPP LCP configure request */
-#define LCP_CONF_ACK    2               /* PPP LCP configure acknowledge */
-#define LCP_CONF_NAK    3               /* PPP LCP configure negative ack */
-#define LCP_CONF_REJ    4               /* PPP LCP configure reject */
-#define LCP_TERM_REQ    5               /* PPP LCP terminate request */
-#define LCP_TERM_ACK    6               /* PPP LCP terminate acknowledge */
-#define LCP_CODE_REJ    7               /* PPP LCP code reject */
-#define LCP_PROTO_REJ   8               /* PPP LCP protocol reject */
-#define LCP_ECHO_REQ    9               /* PPP LCP echo request */
-#define LCP_ECHO_REPLY  10              /* PPP LCP echo reply */
-#define LCP_DISC_REQ    11              /* PPP LCP discard request */
-
-#define LCP_OPT_MRU             1       /* maximum receive unit */
-#define LCP_OPT_ASYNC_MAP       2       /* async control character map */
-#define LCP_OPT_AUTH_PROTO      3       /* authentication protocol */
-#define LCP_OPT_QUAL_PROTO      4       /* quality protocol */
-#define LCP_OPT_MAGIC           5       /* magic number */
-#define LCP_OPT_RESERVED        6       /* reserved */
-#define LCP_OPT_PROTO_COMP      7       /* protocol field compression */
-#define LCP_OPT_ADDR_COMP       8       /* address/control field compression */
-
-#define IPCP_CONF_REQ   LCP_CONF_REQ    /* PPP IPCP configure request */
-#define IPCP_CONF_ACK   LCP_CONF_ACK    /* PPP IPCP configure acknowledge */
-#define IPCP_CONF_NAK   LCP_CONF_NAK    /* PPP IPCP configure negative ack */
-#define IPCP_CONF_REJ   LCP_CONF_REJ    /* PPP IPCP configure reject */
-#define IPCP_TERM_REQ   LCP_TERM_REQ    /* PPP IPCP terminate request */
-#define IPCP_TERM_ACK   LCP_TERM_ACK    /* PPP IPCP terminate acknowledge */
-#define IPCP_CODE_REJ   LCP_CODE_REJ    /* PPP IPCP code reject */
-
-#define CISCO_MULTICAST         0x8f    /* Cisco multicast address */
-#define CISCO_UNICAST           0x0f    /* Cisco unicast address */
-#define CISCO_KEEPALIVE         0x8035  /* Cisco keepalive protocol */
-#define CISCO_ADDR_REQ          0       /* Cisco address request */
-#define CISCO_ADDR_REPLY        1       /* Cisco address reply */
-#define CISCO_KEEPALIVE_REQ     2       /* Cisco keepalive request */
-
-struct ppp_header {
-	u8 address;
-	u8 control;
-	__be16 protocol;
-};
-#define PPP_HEADER_LEN          sizeof (struct ppp_header)
-
-struct lcp_header {
-	u8 type;
-	u8 ident;
-	__be16 len;
-};
-#define LCP_HEADER_LEN          sizeof (struct lcp_header)
-
-struct cisco_packet {
-	__be32 type;
-	__be32 par1;
-	__be32 par2;
-	__be16 rel;
-	__be16 time0;
-	__be16 time1;
-};
-#define CISCO_PACKET_LEN 18
-#define CISCO_BIG_PACKET_LEN 20
-
-static struct sppp *spppq;
-static struct timer_list sppp_keepalive_timer;
-static DEFINE_SPINLOCK(spppq_lock);
-
-/* global xmit queue for sending packets while spinlock is held */
-static struct sk_buff_head tx_queue;
-
-static void sppp_keepalive (unsigned long dummy);
-static void sppp_cp_send (struct sppp *sp, u16 proto, u8 type,
-	u8 ident, u16 len, void *data);
-static void sppp_cisco_send (struct sppp *sp, int type, u32 par1, u32 par2);
-static void sppp_lcp_input (struct sppp *sp, struct sk_buff *m);
-static void sppp_cisco_input (struct sppp *sp, struct sk_buff *m);
-static void sppp_ipcp_input (struct sppp *sp, struct sk_buff *m);
-static void sppp_lcp_open (struct sppp *sp);
-static void sppp_ipcp_open (struct sppp *sp);
-static int sppp_lcp_conf_parse_options (struct sppp *sp, struct lcp_header *h,
-	int len, u32 *magic);
-static void sppp_cp_timeout (unsigned long arg);
-static char *sppp_lcp_type_name (u8 type);
-static char *sppp_ipcp_type_name (u8 type);
-static void sppp_print_bytes (u8 *p, u16 len);
-
-static int debug;
-
-/* Flush global outgoing packet queue to dev_queue_xmit().
- *
- * dev_queue_xmit() must be called with interrupts enabled
- * which means it can't be called with spinlocks held.
- * If a packet needs to be sent while a spinlock is held,
- * then put the packet into tx_queue, and call sppp_flush_xmit()
- * after spinlock is released.
- */
-static void sppp_flush_xmit(void)
-{
-	struct sk_buff *skb;
-	while ((skb = skb_dequeue(&tx_queue)) != NULL)
-		dev_queue_xmit(skb);
-}
-
-/*
- *	Interface down stub
- */	
-
-static void if_down(struct net_device *dev)
-{
-	struct sppp *sp = (struct sppp *)sppp_of(dev);
-
-	sp->pp_link_state=SPPP_LINK_DOWN;
-}
-
-/*
- * Timeout routine activations.
- */
-
-static void sppp_set_timeout(struct sppp *p,int s) 
-{
-	if (! (p->pp_flags & PP_TIMO)) 
-	{
-		init_timer(&p->pp_timer);
-		p->pp_timer.function=sppp_cp_timeout;
-		p->pp_timer.expires=jiffies+s*HZ;
-		p->pp_timer.data=(unsigned long)p;
-		p->pp_flags |= PP_TIMO;
-		add_timer(&p->pp_timer);
-	}
-}
-
-static void sppp_clear_timeout(struct sppp *p)
-{
-	if (p->pp_flags & PP_TIMO) 
-	{
-		del_timer(&p->pp_timer);
-		p->pp_flags &= ~PP_TIMO; 
-	}
-}
-
-/**
- *	sppp_input -	receive and process a WAN PPP frame
- *	@skb:	The buffer to process
- *	@dev:	The device it arrived on
- *
- *	This can be called directly by cards that do not have
- *	timing constraints but is normally called from the network layer
- *	after interrupt servicing to process frames queued via netif_rx().
- *
- *	We process the options in the card. If the frame is destined for
- *	the protocol stacks then it requeues the frame for the upper level
- *	protocol. If it is a control from it is processed and discarded
- *	here.
- */
- 
-static void sppp_input (struct net_device *dev, struct sk_buff *skb)
-{
-	struct ppp_header *h;
-	struct sppp *sp = (struct sppp *)sppp_of(dev);
-	unsigned long flags;
-
-	skb->dev=dev;
-	skb_reset_mac_header(skb);
-
-	if (!pskb_may_pull(skb, PPP_HEADER_LEN)) {
-		/* Too small packet, drop it. */
-		if (sp->pp_flags & PP_DEBUG)
-			printk (KERN_DEBUG "%s: input packet is too small, %d bytes\n",
-				dev->name, skb->len);
-		kfree_skb(skb);
-		return;
-	}
-
-	/* Get PPP header. */
-	h = (struct ppp_header *)skb->data;
-	skb_pull(skb,sizeof(struct ppp_header));
-
-	spin_lock_irqsave(&sp->lock, flags);
-	
-	switch (h->address) {
-	default:        /* Invalid PPP packet. */
-		goto invalid;
-	case PPP_ALLSTATIONS:
-		if (h->control != PPP_UI)
-			goto invalid;
-		if (sp->pp_flags & PP_CISCO) {
-			if (sp->pp_flags & PP_DEBUG)
-				printk (KERN_WARNING "%s: PPP packet in Cisco mode <0x%x 0x%x 0x%x>\n",
-					dev->name,
-					h->address, h->control, ntohs (h->protocol));
-			goto drop;
-		}
-		switch (ntohs (h->protocol)) {
-		default:
-			if (sp->lcp.state == LCP_STATE_OPENED)
-				sppp_cp_send (sp, PPP_LCP, LCP_PROTO_REJ,
-					++sp->pp_seq, skb->len + 2,
-					&h->protocol);
-			if (sp->pp_flags & PP_DEBUG)
-				printk (KERN_WARNING "%s: invalid input protocol <0x%x 0x%x 0x%x>\n",
-					dev->name,
-					h->address, h->control, ntohs (h->protocol));
-			goto drop;
-		case PPP_LCP:
-			sppp_lcp_input (sp, skb);
-			goto drop;
-		case PPP_IPCP:
-			if (sp->lcp.state == LCP_STATE_OPENED)
-				sppp_ipcp_input (sp, skb);
-			else
-				printk(KERN_DEBUG "IPCP when still waiting LCP finish.\n");
-			goto drop;
-		case PPP_IP:
-			if (sp->ipcp.state == IPCP_STATE_OPENED) {
-				if(sp->pp_flags&PP_DEBUG)
-					printk(KERN_DEBUG "Yow an IP frame.\n");
-				skb->protocol=htons(ETH_P_IP);
-				netif_rx(skb);
-				goto done;
-			}
-			break;
-#ifdef IPX
-		case PPP_IPX:
-			/* IPX IPXCP not implemented yet */
-			if (sp->lcp.state == LCP_STATE_OPENED) {
-				skb->protocol=htons(ETH_P_IPX);
-				netif_rx(skb);
-				goto done;
-			}
-			break;
-#endif
-		}
-		break;
-	case CISCO_MULTICAST:
-	case CISCO_UNICAST:
-		/* Don't check the control field here (RFC 1547). */
-		if (! (sp->pp_flags & PP_CISCO)) {
-			if (sp->pp_flags & PP_DEBUG)
-				printk (KERN_WARNING "%s: Cisco packet in PPP mode <0x%x 0x%x 0x%x>\n",
-					dev->name,
-					h->address, h->control, ntohs (h->protocol));
-			goto drop;
-		}
-		switch (ntohs (h->protocol)) {
-		default:
-			goto invalid;
-		case CISCO_KEEPALIVE:
-			sppp_cisco_input (sp, skb);
-			goto drop;
-#ifdef CONFIG_INET
-		case ETH_P_IP:
-			skb->protocol=htons(ETH_P_IP);
-			netif_rx(skb);
-			goto done;
-#endif
-#ifdef CONFIG_IPX
-		case ETH_P_IPX:
-			skb->protocol=htons(ETH_P_IPX);
-			netif_rx(skb);
-			goto done;
-#endif
-		}
-		break;
-	}
-	goto drop;
-
-invalid:
-	if (sp->pp_flags & PP_DEBUG)
-		printk (KERN_WARNING "%s: invalid input packet <0x%x 0x%x 0x%x>\n",
-			dev->name, h->address, h->control, ntohs (h->protocol));
-drop:
-	kfree_skb(skb);
-done:
-	spin_unlock_irqrestore(&sp->lock, flags);
-	sppp_flush_xmit();
-	return;
-}
-
-/*
- *	Handle transmit packets.
- */
- 
-static int sppp_hard_header(struct sk_buff *skb,
-			    struct net_device *dev, __u16 type,
-			    const void *daddr, const void *saddr,
-			    unsigned int len)
-{
-	struct sppp *sp = (struct sppp *)sppp_of(dev);
-	struct ppp_header *h;
-	skb_push(skb,sizeof(struct ppp_header));
-	h=(struct ppp_header *)skb->data;
-	if(sp->pp_flags&PP_CISCO)
-	{
-		h->address = CISCO_UNICAST;
-		h->control = 0;
-	}
-	else
-	{
-		h->address = PPP_ALLSTATIONS;
-		h->control = PPP_UI;
-	}
-	if(sp->pp_flags & PP_CISCO)
-	{
-		h->protocol = htons(type);
-	}
-	else switch(type)
-	{
-		case ETH_P_IP:
-			h->protocol = htons(PPP_IP);
-			break;
-		case ETH_P_IPX:
-			h->protocol = htons(PPP_IPX);
-			break;
-	}
-	return sizeof(struct ppp_header);
-}
-
-static const struct header_ops sppp_header_ops = {
-	.create = sppp_hard_header,
-};
-
-/*
- * Send keepalive packets, every 10 seconds.
- */
-
-static void sppp_keepalive (unsigned long dummy)
-{
-	struct sppp *sp;
-	unsigned long flags;
-
-	spin_lock_irqsave(&spppq_lock, flags);
-
-	for (sp=spppq; sp; sp=sp->pp_next) 
-	{
-		struct net_device *dev = sp->pp_if;
-
-		/* Keepalive mode disabled or channel down? */
-		if (! (sp->pp_flags & PP_KEEPALIVE) ||
-		    ! (dev->flags & IFF_UP))
-			continue;
-
-		spin_lock(&sp->lock);
-
-		/* No keepalive in PPP mode if LCP not opened yet. */
-		if (! (sp->pp_flags & PP_CISCO) &&
-		    sp->lcp.state != LCP_STATE_OPENED) {
-			spin_unlock(&sp->lock);
-			continue;
-		}
-
-		if (sp->pp_alivecnt == MAXALIVECNT) {
-			/* No keepalive packets got.  Stop the interface. */
-			printk (KERN_WARNING "%s: protocol down\n", dev->name);
-			if_down (dev);
-			if (! (sp->pp_flags & PP_CISCO)) {
-				/* Shut down the PPP link. */
-				sp->lcp.magic = jiffies;
-				sp->lcp.state = LCP_STATE_CLOSED;
-				sp->ipcp.state = IPCP_STATE_CLOSED;
-				sppp_clear_timeout (sp);
-				/* Initiate negotiation. */
-				sppp_lcp_open (sp);
-			}
-		}
-		if (sp->pp_alivecnt <= MAXALIVECNT)
-			++sp->pp_alivecnt;
-		if (sp->pp_flags & PP_CISCO)
-			sppp_cisco_send (sp, CISCO_KEEPALIVE_REQ, ++sp->pp_seq,
-				sp->pp_rseq);
-		else if (sp->lcp.state == LCP_STATE_OPENED) {
-			__be32 nmagic = htonl (sp->lcp.magic);
-			sp->lcp.echoid = ++sp->pp_seq;
-			sppp_cp_send (sp, PPP_LCP, LCP_ECHO_REQ,
-				sp->lcp.echoid, 4, &nmagic);
-		}
-
-		spin_unlock(&sp->lock);
-	}
-	spin_unlock_irqrestore(&spppq_lock, flags);
-	sppp_flush_xmit();
-	sppp_keepalive_timer.expires=jiffies+10*HZ;
-	add_timer(&sppp_keepalive_timer);
-}
-
-/*
- * Handle incoming PPP Link Control Protocol packets.
- */
- 
-static void sppp_lcp_input (struct sppp *sp, struct sk_buff *skb)
-{
-	struct lcp_header *h;
-	struct net_device *dev = sp->pp_if;
-	int len = skb->len;
-	u8 *p, opt[6];
-	u32 rmagic = 0;
-
-	if (!pskb_may_pull(skb, sizeof(struct lcp_header))) {
-		if (sp->pp_flags & PP_DEBUG)
-			printk (KERN_WARNING "%s: invalid lcp packet length: %d bytes\n",
-				dev->name, len);
-		return;
-	}
-	h = (struct lcp_header *)skb->data;
-	skb_pull(skb,sizeof(struct lcp_header *));
-	
-	if (sp->pp_flags & PP_DEBUG) 
-	{
-		char state = '?';
-		switch (sp->lcp.state) {
-		case LCP_STATE_CLOSED:   state = 'C'; break;
-		case LCP_STATE_ACK_RCVD: state = 'R'; break;
-		case LCP_STATE_ACK_SENT: state = 'S'; break;
-		case LCP_STATE_OPENED:   state = 'O'; break;
-		}
-		printk (KERN_WARNING "%s: lcp input(%c): %d bytes <%s id=%xh len=%xh",
-			dev->name, state, len,
-			sppp_lcp_type_name (h->type), h->ident, ntohs (h->len));
-		if (len > 4)
-			sppp_print_bytes ((u8*) (h+1), len-4);
-		printk (">\n");
-	}
-	if (len > ntohs (h->len))
-		len = ntohs (h->len);
-	switch (h->type) {
-	default:
-		/* Unknown packet type -- send Code-Reject packet. */
-		sppp_cp_send (sp, PPP_LCP, LCP_CODE_REJ, ++sp->pp_seq,
-			skb->len, h);
-		break;
-	case LCP_CONF_REQ:
-		if (len < 4) {
-			if (sp->pp_flags & PP_DEBUG)
-				printk (KERN_DEBUG"%s: invalid lcp configure request packet length: %d bytes\n",
-					dev->name, len);
-			break;
-		}
-		if (len>4 && !sppp_lcp_conf_parse_options (sp, h, len, &rmagic))
-			goto badreq;
-		if (rmagic == sp->lcp.magic) {
-			/* Local and remote magics equal -- loopback? */
-			if (sp->pp_loopcnt >= MAXALIVECNT*5) {
-				printk (KERN_WARNING "%s: loopback\n",
-					dev->name);
-				sp->pp_loopcnt = 0;
-				if (dev->flags & IFF_UP) {
-					if_down (dev);
-				}
-			} else if (sp->pp_flags & PP_DEBUG)
-				printk (KERN_DEBUG "%s: conf req: magic glitch\n",
-					dev->name);
-			++sp->pp_loopcnt;
-
-			/* MUST send Conf-Nack packet. */
-			rmagic = ~sp->lcp.magic;
-			opt[0] = LCP_OPT_MAGIC;
-			opt[1] = sizeof (opt);
-			opt[2] = rmagic >> 24;
-			opt[3] = rmagic >> 16;
-			opt[4] = rmagic >> 8;
-			opt[5] = rmagic;
-			sppp_cp_send (sp, PPP_LCP, LCP_CONF_NAK,
-				h->ident, sizeof (opt), &opt);
-badreq:
-			switch (sp->lcp.state) {
-			case LCP_STATE_OPENED:
-				/* Initiate renegotiation. */
-				sppp_lcp_open (sp);
-				/* fall through... */
-			case LCP_STATE_ACK_SENT:
-				/* Go to closed state. */
-				sp->lcp.state = LCP_STATE_CLOSED;
-				sp->ipcp.state = IPCP_STATE_CLOSED;
-			}
-			break;
-		}
-		/* Send Configure-Ack packet. */
-		sp->pp_loopcnt = 0;
-		if (sp->lcp.state != LCP_STATE_OPENED) {
-			sppp_cp_send (sp, PPP_LCP, LCP_CONF_ACK,
-					h->ident, len-4, h+1);
-		}
-		/* Change the state. */
-		switch (sp->lcp.state) {
-		case LCP_STATE_CLOSED:
-			sp->lcp.state = LCP_STATE_ACK_SENT;
-			break;
-		case LCP_STATE_ACK_RCVD:
-			sp->lcp.state = LCP_STATE_OPENED;
-			sppp_ipcp_open (sp);
-			break;
-		case LCP_STATE_OPENED:
-			/* Remote magic changed -- close session. */
-			sp->lcp.state = LCP_STATE_CLOSED;
-			sp->ipcp.state = IPCP_STATE_CLOSED;
-			/* Initiate renegotiation. */
-			sppp_lcp_open (sp);
-			/* Send ACK after our REQ in attempt to break loop */
-			sppp_cp_send (sp, PPP_LCP, LCP_CONF_ACK,
-					h->ident, len-4, h+1);
-			sp->lcp.state = LCP_STATE_ACK_SENT;
-			break;
-		}
-		break;
-	case LCP_CONF_ACK:
-		if (h->ident != sp->lcp.confid)
-			break;
-		sppp_clear_timeout (sp);
-		if ((sp->pp_link_state != SPPP_LINK_UP) &&
-		    (dev->flags & IFF_UP)) {
-			/* Coming out of loopback mode. */
-			sp->pp_link_state=SPPP_LINK_UP;
-			printk (KERN_INFO "%s: protocol up\n", dev->name);
-		}
-		switch (sp->lcp.state) {
-		case LCP_STATE_CLOSED:
-			sp->lcp.state = LCP_STATE_ACK_RCVD;
-			sppp_set_timeout (sp, 5);
-			break;
-		case LCP_STATE_ACK_SENT:
-			sp->lcp.state = LCP_STATE_OPENED;
-			sppp_ipcp_open (sp);
-			break;
-		}
-		break;
-	case LCP_CONF_NAK:
-		if (h->ident != sp->lcp.confid)
-			break;
-		p = (u8*) (h+1);
-		if (len>=10 && p[0] == LCP_OPT_MAGIC && p[1] >= 4) {
-			rmagic = (u32)p[2] << 24 |
-				(u32)p[3] << 16 | p[4] << 8 | p[5];
-			if (rmagic == ~sp->lcp.magic) {
-				int newmagic;
-				if (sp->pp_flags & PP_DEBUG)
-					printk (KERN_DEBUG "%s: conf nak: magic glitch\n",
-						dev->name);
-				get_random_bytes(&newmagic, sizeof(newmagic));
-				sp->lcp.magic += newmagic;
-			} else
-				sp->lcp.magic = rmagic;
-			}
-		if (sp->lcp.state != LCP_STATE_ACK_SENT) {
-			/* Go to closed state. */
-			sp->lcp.state = LCP_STATE_CLOSED;
-			sp->ipcp.state = IPCP_STATE_CLOSED;
-		}
-		/* The link will be renegotiated after timeout,
-		 * to avoid endless req-nack loop. */
-		sppp_clear_timeout (sp);
-		sppp_set_timeout (sp, 2);
-		break;
-	case LCP_CONF_REJ:
-		if (h->ident != sp->lcp.confid)
-			break;
-		sppp_clear_timeout (sp);
-		/* Initiate renegotiation. */
-		sppp_lcp_open (sp);
-		if (sp->lcp.state != LCP_STATE_ACK_SENT) {
-			/* Go to closed state. */
-			sp->lcp.state = LCP_STATE_CLOSED;
-			sp->ipcp.state = IPCP_STATE_CLOSED;
-		}
-		break;
-	case LCP_TERM_REQ:
-		sppp_clear_timeout (sp);
-		/* Send Terminate-Ack packet. */
-		sppp_cp_send (sp, PPP_LCP, LCP_TERM_ACK, h->ident, 0, NULL);
-		/* Go to closed state. */
-		sp->lcp.state = LCP_STATE_CLOSED;
-		sp->ipcp.state = IPCP_STATE_CLOSED;
-		/* Initiate renegotiation. */
-		sppp_lcp_open (sp);
-		break;
-	case LCP_TERM_ACK:
-	case LCP_CODE_REJ:
-	case LCP_PROTO_REJ:
-		/* Ignore for now. */
-		break;
-	case LCP_DISC_REQ:
-		/* Discard the packet. */
-		break;
-	case LCP_ECHO_REQ:
-		if (sp->lcp.state != LCP_STATE_OPENED)
-			break;
-		if (len < 8) {
-			if (sp->pp_flags & PP_DEBUG)
-				printk (KERN_WARNING "%s: invalid lcp echo request packet length: %d bytes\n",
-					dev->name, len);
-			break;
-		}
-		if (ntohl (*(__be32*)(h+1)) == sp->lcp.magic) {
-			/* Line loopback mode detected. */
-			printk (KERN_WARNING "%s: loopback\n", dev->name);
-			if_down (dev);
-
-			/* Shut down the PPP link. */
-			sp->lcp.state = LCP_STATE_CLOSED;
-			sp->ipcp.state = IPCP_STATE_CLOSED;
-			sppp_clear_timeout (sp);
-			/* Initiate negotiation. */
-			sppp_lcp_open (sp);
-			break;
-		}
-		*(__be32 *)(h+1) = htonl (sp->lcp.magic);
-		sppp_cp_send (sp, PPP_LCP, LCP_ECHO_REPLY, h->ident, len-4, h+1);
-		break;
-	case LCP_ECHO_REPLY:
-		if (h->ident != sp->lcp.echoid)
-			break;
-		if (len < 8) {
-			if (sp->pp_flags & PP_DEBUG)
-				printk (KERN_WARNING "%s: invalid lcp echo reply packet length: %d bytes\n",
-					dev->name, len);
-			break;
-		}
-		if (ntohl(*(__be32 *)(h+1)) != sp->lcp.magic)
-		sp->pp_alivecnt = 0;
-		break;
-	}
-}
-
-/*
- * Handle incoming Cisco keepalive protocol packets.
- */
-
-static void sppp_cisco_input (struct sppp *sp, struct sk_buff *skb)
-{
-	struct cisco_packet *h;
-	struct net_device *dev = sp->pp_if;
-
-	if (!pskb_may_pull(skb, sizeof(struct cisco_packet))
-	    || (skb->len != CISCO_PACKET_LEN
-		&& skb->len != CISCO_BIG_PACKET_LEN)) {
-		if (sp->pp_flags & PP_DEBUG)
-			printk (KERN_WARNING "%s: invalid cisco packet length: %d bytes\n",
-				dev->name,  skb->len);
-		return;
-	}
-	h = (struct cisco_packet *)skb->data;
-	skb_pull(skb, sizeof(struct cisco_packet*));
-	if (sp->pp_flags & PP_DEBUG)
-		printk (KERN_WARNING "%s: cisco input: %d bytes <%xh %xh %xh %xh %xh-%xh>\n",
-			dev->name,  skb->len,
-			ntohl (h->type), h->par1, h->par2, h->rel,
-			h->time0, h->time1);
-	switch (ntohl (h->type)) {
-	default:
-		if (sp->pp_flags & PP_DEBUG)
-			printk (KERN_WARNING "%s: unknown cisco packet type: 0x%x\n",
-				dev->name,  ntohl (h->type));
-		break;
-	case CISCO_ADDR_REPLY:
-		/* Reply on address request, ignore */
-		break;
-	case CISCO_KEEPALIVE_REQ:
-		sp->pp_alivecnt = 0;
-		sp->pp_rseq = ntohl (h->par1);
-		if (sp->pp_seq == sp->pp_rseq) {
-			/* Local and remote sequence numbers are equal.
-			 * Probably, the line is in loopback mode. */
-			int newseq;
-			if (sp->pp_loopcnt >= MAXALIVECNT) {
-				printk (KERN_WARNING "%s: loopback\n",
-					dev->name);
-				sp->pp_loopcnt = 0;
-				if (dev->flags & IFF_UP) {
-					if_down (dev);
-				}
-			}
-			++sp->pp_loopcnt;
-
-			/* Generate new local sequence number */
-			get_random_bytes(&newseq, sizeof(newseq));
-			sp->pp_seq ^= newseq;
-			break;
-		}
-		sp->pp_loopcnt = 0;
-		if (sp->pp_link_state==SPPP_LINK_DOWN &&
-		    (dev->flags & IFF_UP)) {
-			sp->pp_link_state=SPPP_LINK_UP;
-			printk (KERN_INFO "%s: protocol up\n", dev->name);
-		}
-		break;
-	case CISCO_ADDR_REQ:
-		/* Stolen from net/ipv4/devinet.c -- SIOCGIFADDR ioctl */
-		{
-		__be32 addr = 0, mask = htonl(~0U); /* FIXME: is the mask correct? */
-#ifdef CONFIG_INET
-		struct in_device *in_dev;
-		struct in_ifaddr *ifa;
-
-		rcu_read_lock();
-		if ((in_dev = __in_dev_get_rcu(dev)) != NULL)
-		{
-			for (ifa=in_dev->ifa_list; ifa != NULL;
-				ifa=ifa->ifa_next) {
-				if (strcmp(dev->name, ifa->ifa_label) == 0) 
-				{
-					addr = ifa->ifa_local;
-					mask = ifa->ifa_mask;
-					break;
-				}
-			}
-		}
-		rcu_read_unlock();
-#endif		
-		sppp_cisco_send (sp, CISCO_ADDR_REPLY, ntohl(addr), ntohl(mask));
-		break;
-		}
-	}
-}
-
-
-/*
- * Send PPP LCP packet.
- */
-
-static void sppp_cp_send (struct sppp *sp, u16 proto, u8 type,
-	u8 ident, u16 len, void *data)
-{
-	struct ppp_header *h;
-	struct lcp_header *lh;
-	struct sk_buff *skb;
-	struct net_device *dev = sp->pp_if;
-
-	skb=alloc_skb(dev->hard_header_len+PPP_HEADER_LEN+LCP_HEADER_LEN+len,
-		GFP_ATOMIC);
-	if (skb==NULL)
-		return;
-
-	skb_reserve(skb,dev->hard_header_len);
-	
-	h = (struct ppp_header *)skb_put(skb, sizeof(struct ppp_header));
-	h->address = PPP_ALLSTATIONS;        /* broadcast address */
-	h->control = PPP_UI;                 /* Unnumbered Info */
-	h->protocol = htons (proto);         /* Link Control Protocol */
-
-	lh = (struct lcp_header *)skb_put(skb, sizeof(struct lcp_header));
-	lh->type = type;
-	lh->ident = ident;
-	lh->len = htons (LCP_HEADER_LEN + len);
-
-	if (len)
-		memcpy(skb_put(skb,len),data, len);
-
-	if (sp->pp_flags & PP_DEBUG) {
-		printk (KERN_WARNING "%s: %s output <%s id=%xh len=%xh",
-			dev->name, 
-			proto==PPP_LCP ? "lcp" : "ipcp",
-			proto==PPP_LCP ? sppp_lcp_type_name (lh->type) :
-			sppp_ipcp_type_name (lh->type), lh->ident,
-			ntohs (lh->len));
-		if (len)
-			sppp_print_bytes ((u8*) (lh+1), len);
-		printk (">\n");
-	}
-	/* Control is high priority so it doesn't get queued behind data */
-	skb->priority=TC_PRIO_CONTROL;
-	skb->dev = dev;
-	skb_queue_tail(&tx_queue, skb);
-}
-
-/*
- * Send Cisco keepalive packet.
- */
-
-static void sppp_cisco_send (struct sppp *sp, int type, u32 par1, u32 par2)
-{
-	struct ppp_header *h;
-	struct cisco_packet *ch;
-	struct sk_buff *skb;
-	struct net_device *dev = sp->pp_if;
-	u32 t = jiffies * 1000/HZ;
-
-	skb=alloc_skb(dev->hard_header_len+PPP_HEADER_LEN+CISCO_PACKET_LEN,
-		GFP_ATOMIC);
-
-	if(skb==NULL)
-		return;
-		
-	skb_reserve(skb, dev->hard_header_len);
-	h = (struct ppp_header *)skb_put (skb, sizeof(struct ppp_header));
-	h->address = CISCO_MULTICAST;
-	h->control = 0;
-	h->protocol = htons (CISCO_KEEPALIVE);
-
-	ch = (struct cisco_packet*)skb_put(skb, CISCO_PACKET_LEN);
-	ch->type = htonl (type);
-	ch->par1 = htonl (par1);
-	ch->par2 = htonl (par2);
-	ch->rel = htons(0xffff);
-	ch->time0 = htons ((u16) (t >> 16));
-	ch->time1 = htons ((u16) t);
-
-	if (sp->pp_flags & PP_DEBUG)
-		printk (KERN_WARNING "%s: cisco output: <%xh %xh %xh %xh %xh-%xh>\n",
-			dev->name,  ntohl (ch->type), ch->par1,
-			ch->par2, ch->rel, ch->time0, ch->time1);
-	skb->priority=TC_PRIO_CONTROL;
-	skb->dev = dev;
-	skb_queue_tail(&tx_queue, skb);
-}
-
-/**
- *	sppp_close - close down a synchronous PPP or Cisco HDLC link
- *	@dev: The network device to drop the link of
- *
- *	This drops the logical interface to the channel. It is not
- *	done politely as we assume we will also be dropping DTR. Any
- *	timeouts are killed.
- */
-
-int sppp_close (struct net_device *dev)
-{
-	struct sppp *sp = (struct sppp *)sppp_of(dev);
-	unsigned long flags;
-
-	spin_lock_irqsave(&sp->lock, flags);
-	sp->pp_link_state = SPPP_LINK_DOWN;
-	sp->lcp.state = LCP_STATE_CLOSED;
-	sp->ipcp.state = IPCP_STATE_CLOSED;
-	sppp_clear_timeout (sp);
-	spin_unlock_irqrestore(&sp->lock, flags);
-
-	return 0;
-}
-
-EXPORT_SYMBOL(sppp_close);
-
-/**
- *	sppp_open - open a synchronous PPP or Cisco HDLC link
- *	@dev:	Network device to activate
- *	
- *	Close down any existing synchronous session and commence
- *	from scratch. In the PPP case this means negotiating LCP/IPCP
- *	and friends, while for Cisco HDLC we simply need to start sending
- *	keepalives
- */
-
-int sppp_open (struct net_device *dev)
-{
-	struct sppp *sp = (struct sppp *)sppp_of(dev);
-	unsigned long flags;
-
-	sppp_close(dev);
-
-	spin_lock_irqsave(&sp->lock, flags);
-	if (!(sp->pp_flags & PP_CISCO)) {
-		sppp_lcp_open (sp);
-	}
-	sp->pp_link_state = SPPP_LINK_DOWN;
-	spin_unlock_irqrestore(&sp->lock, flags);
-	sppp_flush_xmit();
-
-	return 0;
-}
-
-EXPORT_SYMBOL(sppp_open);
-
-/**
- *	sppp_reopen - notify of physical link loss
- *	@dev: Device that lost the link
- *
- *	This function informs the synchronous protocol code that
- *	the underlying link died (for example a carrier drop on X.21)
- *
- *	We increment the magic numbers to ensure that if the other end
- *	failed to notice we will correctly start a new session. It happens
- *	do to the nature of telco circuits is that you can lose carrier on
- *	one endonly.
- *
- *	Having done this we go back to negotiating. This function may
- *	be called from an interrupt context.
- */
- 
-int sppp_reopen (struct net_device *dev)
-{
-	struct sppp *sp = (struct sppp *)sppp_of(dev);
-	unsigned long flags;
-
-	sppp_close(dev);
-
-	spin_lock_irqsave(&sp->lock, flags);
-	if (!(sp->pp_flags & PP_CISCO))
-	{
-		sp->lcp.magic = jiffies;
-		++sp->pp_seq;
-		sp->lcp.state = LCP_STATE_CLOSED;
-		sp->ipcp.state = IPCP_STATE_CLOSED;
-		/* Give it a moment for the line to settle then go */
-		sppp_set_timeout (sp, 1);
-	} 
-	sp->pp_link_state=SPPP_LINK_DOWN;
-	spin_unlock_irqrestore(&sp->lock, flags);
-
-	return 0;
-}
-
-EXPORT_SYMBOL(sppp_reopen);
-
-/**
- *	sppp_change_mtu - Change the link MTU
- *	@dev:	Device to change MTU on
- *	@new_mtu: New MTU
- *
- *	Change the MTU on the link. This can only be called with
- *	the link down. It returns an error if the link is up or
- *	the mtu is out of range.
- */
- 
-static int sppp_change_mtu(struct net_device *dev, int new_mtu)
-{
-	if(new_mtu<128||new_mtu>PPP_MTU||(dev->flags&IFF_UP))
-		return -EINVAL;
-	dev->mtu=new_mtu;
-	return 0;
-}
-
-/**
- *	sppp_do_ioctl - Ioctl handler for ppp/hdlc
- *	@dev: Device subject to ioctl
- *	@ifr: Interface request block from the user
- *	@cmd: Command that is being issued
- *	
- *	This function handles the ioctls that may be issued by the user
- *	to control the settings of a PPP/HDLC link. It does both busy
- *	and security checks. This function is intended to be wrapped by
- *	callers who wish to add additional ioctl calls of their own.
- */
- 
-int sppp_do_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
-{
-	struct sppp *sp = (struct sppp *)sppp_of(dev);
-
-	if(dev->flags&IFF_UP)
-		return -EBUSY;
-		
-	if(!capable(CAP_NET_ADMIN))
-		return -EPERM;
-	
-	switch(cmd)
-	{
-		case SPPPIOCCISCO:
-			sp->pp_flags|=PP_CISCO;
-			dev->type = ARPHRD_HDLC;
-			break;
-		case SPPPIOCPPP:
-			sp->pp_flags&=~PP_CISCO;
-			dev->type = ARPHRD_PPP;
-			break;
-		case SPPPIOCDEBUG:
-			sp->pp_flags&=~PP_DEBUG;
-			if(ifr->ifr_flags)
-				sp->pp_flags|=PP_DEBUG;
-			break;
-		case SPPPIOCGFLAGS:
-			if(copy_to_user(ifr->ifr_data, &sp->pp_flags, sizeof(sp->pp_flags)))
-				return -EFAULT;
-			break;
-		case SPPPIOCSFLAGS:
-			if(copy_from_user(&sp->pp_flags, ifr->ifr_data, sizeof(sp->pp_flags)))
-				return -EFAULT;
-			break;
-		default:
-			return -EINVAL;
-	}
-	return 0;
-}
-
-EXPORT_SYMBOL(sppp_do_ioctl);
-
-/**
- *	sppp_attach - attach synchronous PPP/HDLC to a device
- *	@pd:	PPP device to initialise
- *
- *	This initialises the PPP/HDLC support on an interface. At the
- *	time of calling the dev element must point to the network device
- *	that this interface is attached to. The interface should not yet
- *	be registered. 
- */
- 
-void sppp_attach(struct ppp_device *pd)
-{
-	struct net_device *dev = pd->dev;
-	struct sppp *sp = &pd->sppp;
-	unsigned long flags;
-
-	/* Make sure embedding is safe for sppp_of */
-	BUG_ON(sppp_of(dev) != sp);
-
-	spin_lock_irqsave(&spppq_lock, flags);
-	/* Initialize keepalive handler. */
-	if (! spppq)
-	{
-		init_timer(&sppp_keepalive_timer);
-		sppp_keepalive_timer.expires=jiffies+10*HZ;
-		sppp_keepalive_timer.function=sppp_keepalive;
-		add_timer(&sppp_keepalive_timer);
-	}
-	/* Insert new entry into the keepalive list. */
-	sp->pp_next = spppq;
-	spppq = sp;
-	spin_unlock_irqrestore(&spppq_lock, flags);
-
-	sp->pp_loopcnt = 0;
-	sp->pp_alivecnt = 0;
-	sp->pp_seq = 0;
-	sp->pp_rseq = 0;
-	sp->pp_flags = PP_KEEPALIVE|PP_CISCO|debug;/*PP_DEBUG;*/
-	sp->lcp.magic = 0;
-	sp->lcp.state = LCP_STATE_CLOSED;
-	sp->ipcp.state = IPCP_STATE_CLOSED;
-	sp->pp_if = dev;
-	spin_lock_init(&sp->lock);
-	
-	/* 
-	 *	Device specific setup. All but interrupt handler and
-	 *	hard_start_xmit.
-	 */
-	 
-	dev->header_ops = &sppp_header_ops;
-
-	dev->tx_queue_len = 10;
-	dev->type = ARPHRD_HDLC;
-	dev->addr_len = 0;
-	dev->hard_header_len = sizeof(struct ppp_header);
-	dev->mtu = PPP_MTU;
-	/*
-	 *	These 4 are callers but MUST also call sppp_ functions
-	 */
-	dev->do_ioctl = sppp_do_ioctl;
-#if 0
-	dev->get_stats = NULL;		/* Let the driver override these */
-	dev->open = sppp_open;
-	dev->stop = sppp_close;
-#endif	
-	dev->change_mtu = sppp_change_mtu;
-	dev->flags = IFF_MULTICAST|IFF_POINTOPOINT|IFF_NOARP;
-}
-
-EXPORT_SYMBOL(sppp_attach);
-
-/**
- *	sppp_detach - release PPP resources from a device
- *	@dev:	Network device to release
- *
- *	Stop and free up any PPP/HDLC resources used by this
- *	interface. This must be called before the device is
- *	freed.
- */
- 
-void sppp_detach (struct net_device *dev)
-{
-	struct sppp **q, *p, *sp = (struct sppp *)sppp_of(dev);
-	unsigned long flags;
-
-	spin_lock_irqsave(&spppq_lock, flags);
-	/* Remove the entry from the keepalive list. */
-	for (q = &spppq; (p = *q); q = &p->pp_next)
-		if (p == sp) {
-			*q = p->pp_next;
-			break;
-		}
-
-	/* Stop keepalive handler. */
-	if (! spppq)
-		del_timer(&sppp_keepalive_timer);
-	sppp_clear_timeout (sp);
-	spin_unlock_irqrestore(&spppq_lock, flags);
-}
-
-EXPORT_SYMBOL(sppp_detach);
-
-/*
- * Analyze the LCP Configure-Request options list
- * for the presence of unknown options.
- * If the request contains unknown options, build and
- * send Configure-reject packet, containing only unknown options.
- */
-static int
-sppp_lcp_conf_parse_options (struct sppp *sp, struct lcp_header *h,
-	int len, u32 *magic)
-{
-	u8 *buf, *r, *p;
-	int rlen;
-
-	len -= 4;
-	buf = r = kmalloc (len, GFP_ATOMIC);
-	if (! buf)
-		return (0);
-
-	p = (void*) (h+1);
-	for (rlen=0; len>1 && p[1]; len-=p[1], p+=p[1]) {
-		switch (*p) {
-		case LCP_OPT_MAGIC:
-			/* Magic number -- extract. */
-			if (len >= 6 && p[1] == 6) {
-				*magic = (u32)p[2] << 24 |
-					(u32)p[3] << 16 | p[4] << 8 | p[5];
-				continue;
-			}
-			break;
-		case LCP_OPT_ASYNC_MAP:
-			/* Async control character map -- check to be zero. */
-			if (len >= 6 && p[1] == 6 && ! p[2] && ! p[3] &&
-			    ! p[4] && ! p[5])
-				continue;
-			break;
-		case LCP_OPT_MRU:
-			/* Maximum receive unit -- always OK. */
-			continue;
-		default:
-			/* Others not supported. */
-			break;
-		}
-		/* Add the option to rejected list. */
-		memcpy(r, p, p[1]);
-		r += p[1];
-		rlen += p[1];
-	}
-	if (rlen)
-		sppp_cp_send (sp, PPP_LCP, LCP_CONF_REJ, h->ident, rlen, buf);
-	kfree(buf);
-	return (rlen == 0);
-}
-
-static void sppp_ipcp_input (struct sppp *sp, struct sk_buff *skb)
-{
-	struct lcp_header *h;
-	struct net_device *dev = sp->pp_if;
-	int len = skb->len;
-
-	if (!pskb_may_pull(skb, sizeof(struct lcp_header))) {
-		if (sp->pp_flags & PP_DEBUG)
-			printk (KERN_WARNING "%s: invalid ipcp packet length: %d bytes\n",
-				dev->name,  len);
-		return;
-	}
-	h = (struct lcp_header *)skb->data;
-	skb_pull(skb,sizeof(struct lcp_header));
-	if (sp->pp_flags & PP_DEBUG) {
-		printk (KERN_WARNING "%s: ipcp input: %d bytes <%s id=%xh len=%xh",
-			dev->name,  len,
-			sppp_ipcp_type_name (h->type), h->ident, ntohs (h->len));
-		if (len > 4)
-			sppp_print_bytes ((u8*) (h+1), len-4);
-		printk (">\n");
-	}
-	if (len > ntohs (h->len))
-		len = ntohs (h->len);
-	switch (h->type) {
-	default:
-		/* Unknown packet type -- send Code-Reject packet. */
-		sppp_cp_send (sp, PPP_IPCP, IPCP_CODE_REJ, ++sp->pp_seq, len, h);
-		break;
-	case IPCP_CONF_REQ:
-		if (len < 4) {
-			if (sp->pp_flags & PP_DEBUG)
-				printk (KERN_WARNING "%s: invalid ipcp configure request packet length: %d bytes\n",
-					dev->name, len);
-			return;
-		}
-		if (len > 4) {
-			sppp_cp_send (sp, PPP_IPCP, LCP_CONF_REJ, h->ident,
-				len-4, h+1);
-
-			switch (sp->ipcp.state) {
-			case IPCP_STATE_OPENED:
-				/* Initiate renegotiation. */
-				sppp_ipcp_open (sp);
-				/* fall through... */
-			case IPCP_STATE_ACK_SENT:
-				/* Go to closed state. */
-				sp->ipcp.state = IPCP_STATE_CLOSED;
-			}
-		} else {
-			/* Send Configure-Ack packet. */
-			sppp_cp_send (sp, PPP_IPCP, IPCP_CONF_ACK, h->ident,
-				0, NULL);
-			/* Change the state. */
-			if (sp->ipcp.state == IPCP_STATE_ACK_RCVD)
-				sp->ipcp.state = IPCP_STATE_OPENED;
-			else
-				sp->ipcp.state = IPCP_STATE_ACK_SENT;
-		}
-		break;
-	case IPCP_CONF_ACK:
-		if (h->ident != sp->ipcp.confid)
-			break;
-		sppp_clear_timeout (sp);
-		switch (sp->ipcp.state) {
-		case IPCP_STATE_CLOSED:
-			sp->ipcp.state = IPCP_STATE_ACK_RCVD;
-			sppp_set_timeout (sp, 5);
-			break;
-		case IPCP_STATE_ACK_SENT:
-			sp->ipcp.state = IPCP_STATE_OPENED;
-			break;
-		}
-		break;
-	case IPCP_CONF_NAK:
-	case IPCP_CONF_REJ:
-		if (h->ident != sp->ipcp.confid)
-			break;
-		sppp_clear_timeout (sp);
-			/* Initiate renegotiation. */
-		sppp_ipcp_open (sp);
-		if (sp->ipcp.state != IPCP_STATE_ACK_SENT)
-			/* Go to closed state. */
-			sp->ipcp.state = IPCP_STATE_CLOSED;
-		break;
-	case IPCP_TERM_REQ:
-		/* Send Terminate-Ack packet. */
-		sppp_cp_send (sp, PPP_IPCP, IPCP_TERM_ACK, h->ident, 0, NULL);
-		/* Go to closed state. */
-		sp->ipcp.state = IPCP_STATE_CLOSED;
-		/* Initiate renegotiation. */
-		sppp_ipcp_open (sp);
-		break;
-	case IPCP_TERM_ACK:
-		/* Ignore for now. */
-	case IPCP_CODE_REJ:
-		/* Ignore for now. */
-		break;
-	}
-}
-
-static void sppp_lcp_open (struct sppp *sp)
-{
-	char opt[6];
-
-	if (! sp->lcp.magic)
-		sp->lcp.magic = jiffies;
-	opt[0] = LCP_OPT_MAGIC;
-	opt[1] = sizeof (opt);
-	opt[2] = sp->lcp.magic >> 24;
-	opt[3] = sp->lcp.magic >> 16;
-	opt[4] = sp->lcp.magic >> 8;
-	opt[5] = sp->lcp.magic;
-	sp->lcp.confid = ++sp->pp_seq;
-	sppp_cp_send (sp, PPP_LCP, LCP_CONF_REQ, sp->lcp.confid,
-		sizeof (opt), &opt);
-	sppp_set_timeout (sp, 2);
-}
-
-static void sppp_ipcp_open (struct sppp *sp)
-{
-	sp->ipcp.confid = ++sp->pp_seq;
-	sppp_cp_send (sp, PPP_IPCP, IPCP_CONF_REQ, sp->ipcp.confid, 0, NULL);
-	sppp_set_timeout (sp, 2);
-}
-
-/*
- * Process PPP control protocol timeouts.
- */
- 
-static void sppp_cp_timeout (unsigned long arg)
-{
-	struct sppp *sp = (struct sppp*) arg;
-	unsigned long flags;
-
-	spin_lock_irqsave(&sp->lock, flags);
-
-	sp->pp_flags &= ~PP_TIMO;
-	if (! (sp->pp_if->flags & IFF_UP) || (sp->pp_flags & PP_CISCO)) {
-		spin_unlock_irqrestore(&sp->lock, flags);
-		return;
-	}
-	switch (sp->lcp.state) {
-	case LCP_STATE_CLOSED:
-		/* No ACK for Configure-Request, retry. */
-		sppp_lcp_open (sp);
-		break;
-	case LCP_STATE_ACK_RCVD:
-		/* ACK got, but no Configure-Request for peer, retry. */
-		sppp_lcp_open (sp);
-		sp->lcp.state = LCP_STATE_CLOSED;
-		break;
-	case LCP_STATE_ACK_SENT:
-		/* ACK sent but no ACK for Configure-Request, retry. */
-		sppp_lcp_open (sp);
-		break;
-	case LCP_STATE_OPENED:
-		/* LCP is already OK, try IPCP. */
-		switch (sp->ipcp.state) {
-		case IPCP_STATE_CLOSED:
-			/* No ACK for Configure-Request, retry. */
-			sppp_ipcp_open (sp);
-			break;
-		case IPCP_STATE_ACK_RCVD:
-			/* ACK got, but no Configure-Request for peer, retry. */
-			sppp_ipcp_open (sp);
-			sp->ipcp.state = IPCP_STATE_CLOSED;
-			break;
-		case IPCP_STATE_ACK_SENT:
-			/* ACK sent but no ACK for Configure-Request, retry. */
-			sppp_ipcp_open (sp);
-			break;
-		case IPCP_STATE_OPENED:
-			/* IPCP is OK. */
-			break;
-		}
-		break;
-	}
-	spin_unlock_irqrestore(&sp->lock, flags);
-	sppp_flush_xmit();
-}
-
-static char *sppp_lcp_type_name (u8 type)
-{
-	static char buf [8];
-	switch (type) {
-	case LCP_CONF_REQ:   return ("conf-req");
-	case LCP_CONF_ACK:   return ("conf-ack");
-	case LCP_CONF_NAK:   return ("conf-nack");
-	case LCP_CONF_REJ:   return ("conf-rej");
-	case LCP_TERM_REQ:   return ("term-req");
-	case LCP_TERM_ACK:   return ("term-ack");
-	case LCP_CODE_REJ:   return ("code-rej");
-	case LCP_PROTO_REJ:  return ("proto-rej");
-	case LCP_ECHO_REQ:   return ("echo-req");
-	case LCP_ECHO_REPLY: return ("echo-reply");
-	case LCP_DISC_REQ:   return ("discard-req");
-	}
-	sprintf (buf, "%xh", type);
-	return (buf);
-}
-
-static char *sppp_ipcp_type_name (u8 type)
-{
-	static char buf [8];
-	switch (type) {
-	case IPCP_CONF_REQ:   return ("conf-req");
-	case IPCP_CONF_ACK:   return ("conf-ack");
-	case IPCP_CONF_NAK:   return ("conf-nack");
-	case IPCP_CONF_REJ:   return ("conf-rej");
-	case IPCP_TERM_REQ:   return ("term-req");
-	case IPCP_TERM_ACK:   return ("term-ack");
-	case IPCP_CODE_REJ:   return ("code-rej");
-	}
-	sprintf (buf, "%xh", type);
-	return (buf);
-}
-
-static void sppp_print_bytes (u_char *p, u16 len)
-{
-	printk (" %x", *p++);
-	while (--len > 0)
-		printk ("-%x", *p++);
-}
-
-/**
- *	sppp_rcv -	receive and process a WAN PPP frame
- *	@skb:	The buffer to process
- *	@dev:	The device it arrived on
- *	@p: Unused
- *	@orig_dev: Unused
- *
- *	Protocol glue. This drives the deferred processing mode the poorer
- *	cards use. This can be called directly by cards that do not have
- *	timing constraints but is normally called from the network layer
- *	after interrupt servicing to process frames queued via netif_rx.
- */
-
-static int sppp_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *p, struct net_device *orig_dev)
-{
-	if (dev_net(dev) != &init_net) {
-		kfree_skb(skb);
-		return 0;
-	}
-
-	if ((skb = skb_share_check(skb, GFP_ATOMIC)) == NULL)
-		return NET_RX_DROP;
-	sppp_input(dev,skb);
-	return 0;
-}
-
-static struct packet_type sppp_packet_type = {
-	.type	= __constant_htons(ETH_P_WAN_PPP),
-	.func	= sppp_rcv,
-};
-
-static char banner[] __initdata = 
-	KERN_INFO "Cronyx Ltd, Synchronous PPP and CISCO HDLC (c) 1994\n"
-	KERN_INFO "Linux port (c) 1998 Building Number Three Ltd & "
-		  "Jan \"Yenya\" Kasprzak.\n";
-
-static int __init sync_ppp_init(void)
-{
-	if(debug)
-		debug=PP_DEBUG;
-	printk(banner);
-	skb_queue_head_init(&tx_queue);
-	dev_add_pack(&sppp_packet_type);
-	return 0;
-}
-
-
-static void __exit sync_ppp_cleanup(void)
-{
-	dev_remove_pack(&sppp_packet_type);
-}
-
-module_init(sync_ppp_init);
-module_exit(sync_ppp_cleanup);
-module_param(debug, int, 0);
-MODULE_LICENSE("GPL");
-
diff --git a/include/net/syncppp.h b/include/net/syncppp.h
deleted file mode 100644
index 9e306f7f579a..000000000000
--- a/include/net/syncppp.h
+++ /dev/null
@@ -1,102 +0,0 @@
-/*
- * Defines for synchronous PPP/Cisco link level subroutines.
- *
- * Copyright (C) 1994 Cronyx Ltd.
- * Author: Serge Vakulenko, <vak@zebub.msk.su>
- *
- * This software is distributed with NO WARRANTIES, not even the implied
- * warranties for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
- *
- * Authors grant any other persons or organizations permission to use
- * or modify this software as long as this message is kept with the software,
- * all derivative works or modified versions.
- *
- * Version 1.7, Wed Jun  7 22:12:02 MSD 1995
- *
- *
- *
- */
-
-#ifndef _SYNCPPP_H_
-#define _SYNCPPP_H_ 1
-
-#ifdef __KERNEL__
-struct slcp {
-	u16	state;          /* state machine */
-	u32	magic;          /* local magic number */
-	u_char	echoid;         /* id of last keepalive echo request */
-	u_char	confid;         /* id of last configuration request */
-};
-
-struct sipcp {
-	u16	state;          /* state machine */
-	u_char  confid;         /* id of last configuration request */
-};
-
-struct sppp 
-{
-	struct sppp *	pp_next;	/* next interface in keepalive list */
-	u32		pp_flags;	/* use Cisco protocol instead of PPP */
-	u16		pp_alivecnt;	/* keepalive packets counter */
-	u16		pp_loopcnt;	/* loopback detection counter */
-	u32		pp_seq;		/* local sequence number */
-	u32		pp_rseq;	/* remote sequence number */
-	struct slcp	lcp;		/* LCP params */
-	struct sipcp	ipcp;		/* IPCP params */
-	struct timer_list	pp_timer;
-	struct net_device	*pp_if;
-	char		pp_link_state;	/* Link status */
-	spinlock_t      lock;
-};
-
-struct ppp_device
-{	
-	struct net_device *dev;	/* Network device pointer */
-	struct sppp sppp;	/* Synchronous PPP */
-};
-
-static inline struct sppp *sppp_of(struct net_device *dev) 
-{
-	struct ppp_device **ppp = dev->ml_priv;
-	BUG_ON((*ppp)->dev != dev);
-	return &(*ppp)->sppp;
-}
-
-#define PP_KEEPALIVE    0x01    /* use keepalive protocol */
-#define PP_CISCO        0x02    /* use Cisco protocol instead of PPP */
-#define PP_TIMO         0x04    /* cp_timeout routine active */
-#define PP_DEBUG	0x08
-
-#define PPP_MTU          1500    /* max. transmit unit */
-
-#define LCP_STATE_CLOSED        0       /* LCP state: closed (conf-req sent) */
-#define LCP_STATE_ACK_RCVD      1       /* LCP state: conf-ack received */
-#define LCP_STATE_ACK_SENT      2       /* LCP state: conf-ack sent */
-#define LCP_STATE_OPENED        3       /* LCP state: opened */
-
-#define IPCP_STATE_CLOSED       0       /* IPCP state: closed (conf-req sent) */
-#define IPCP_STATE_ACK_RCVD     1       /* IPCP state: conf-ack received */
-#define IPCP_STATE_ACK_SENT     2       /* IPCP state: conf-ack sent */
-#define IPCP_STATE_OPENED       3       /* IPCP state: opened */
-
-#define SPPP_LINK_DOWN		0	/* link down - no keepalive */
-#define SPPP_LINK_UP		1	/* link is up - keepalive ok */
-
-void sppp_attach (struct ppp_device *pd);
-void sppp_detach (struct net_device *dev);
-int sppp_do_ioctl (struct net_device *dev, struct ifreq *ifr, int cmd);
-struct sk_buff *sppp_dequeue (struct net_device *dev);
-int sppp_isempty (struct net_device *dev);
-void sppp_flush (struct net_device *dev);
-int sppp_open (struct net_device *dev);
-int sppp_reopen (struct net_device *dev);
-int sppp_close (struct net_device *dev);
-#endif
-
-#define SPPPIOCCISCO	(SIOCDEVPRIVATE)
-#define SPPPIOCPPP	(SIOCDEVPRIVATE+1)
-#define SPPPIOCDEBUG	(SIOCDEVPRIVATE+2)
-#define SPPPIOCSFLAGS	(SIOCDEVPRIVATE+3)
-#define SPPPIOCGFLAGS	(SIOCDEVPRIVATE+4)
-
-#endif /* _SYNCPPP_H_ */
-- 
cgit v1.2.3


From f201ae2356c74bcae130b2177b3dca903ea98071 Mon Sep 17 00:00:00 2001
From: Frederic Weisbecker <fweisbec@gmail.com>
Date: Sun, 23 Nov 2008 06:22:56 +0100
Subject: tracing/function-return-tracer: store return stack into task_struct
 and allocate it dynamically

Impact: use deeper function tracing depth safely

Some tests showed that function return tracing needed a more deeper depth
of function calls. But it could be unsafe to store these return addresses
to the stack.

So these arrays will now be allocated dynamically into task_struct of current
only when the tracer is activated.

Typical scheme when tracer is activated:
- allocate a return stack for each task in global list.
- fork: allocate the return stack for the newly created task
- exit: free return stack of current
- idle init: same as fork

I chose a default depth of 50. I don't have overruns anymore.

Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/include/asm/ftrace.h      |  1 -
 arch/x86/include/asm/thread_info.h | 29 ------------
 arch/x86/kernel/ftrace.c           | 29 ++++++------
 include/linux/ftrace.h             |  5 ++
 include/linux/sched.h              | 23 +++++----
 kernel/exit.c                      |  5 +-
 kernel/fork.c                      |  4 ++
 kernel/sched.c                     |  3 ++
 kernel/trace/ftrace.c              | 96 +++++++++++++++++++++++++++++++++++++-
 9 files changed, 137 insertions(+), 58 deletions(-)

(limited to 'include')

diff --git a/arch/x86/include/asm/ftrace.h b/arch/x86/include/asm/ftrace.h
index 2bb43b433e07..754a3e082f94 100644
--- a/arch/x86/include/asm/ftrace.h
+++ b/arch/x86/include/asm/ftrace.h
@@ -29,7 +29,6 @@ struct dyn_arch_ftrace {
 #endif /* CONFIG_FUNCTION_TRACER */
 
 #ifdef CONFIG_FUNCTION_RET_TRACER
-#define FTRACE_RET_STACK_SIZE 20
 
 #ifndef __ASSEMBLY__
 
diff --git a/arch/x86/include/asm/thread_info.h b/arch/x86/include/asm/thread_info.h
index e90e81ef6ab9..0921b4018c11 100644
--- a/arch/x86/include/asm/thread_info.h
+++ b/arch/x86/include/asm/thread_info.h
@@ -40,36 +40,8 @@ struct thread_info {
 						*/
 	__u8			supervisor_stack[0];
 #endif
-
-#ifdef CONFIG_FUNCTION_RET_TRACER
-	/* Index of current stored adress in ret_stack */
-	int		curr_ret_stack;
-	/* Stack of return addresses for return function tracing */
-	struct ftrace_ret_stack	ret_stack[FTRACE_RET_STACK_SIZE];
-	/*
-	 * Number of functions that haven't been traced
-	 * because of depth overrun.
-	 */
-	atomic_t	trace_overrun;
-#endif
 };
 
-#ifdef CONFIG_FUNCTION_RET_TRACER
-#define INIT_THREAD_INFO(tsk)			\
-{						\
-	.task		= &tsk,			\
-	.exec_domain	= &default_exec_domain,	\
-	.flags		= 0,			\
-	.cpu		= 0,			\
-	.preempt_count	= 1,			\
-	.addr_limit	= KERNEL_DS,		\
-	.restart_block = {			\
-		.fn = do_no_restart_syscall,	\
-	},					\
-	.curr_ret_stack = -1,\
-	.trace_overrun	= ATOMIC_INIT(0)	\
-}
-#else
 #define INIT_THREAD_INFO(tsk)			\
 {						\
 	.task		= &tsk,			\
@@ -82,7 +54,6 @@ struct thread_info {
 		.fn = do_no_restart_syscall,	\
 	},					\
 }
-#endif
 
 #define init_thread_info	(init_thread_union.thread_info)
 #define init_stack		(init_thread_union.stack)
diff --git a/arch/x86/kernel/ftrace.c b/arch/x86/kernel/ftrace.c
index 356bb1eb6e9a..bb137f7297ed 100644
--- a/arch/x86/kernel/ftrace.c
+++ b/arch/x86/kernel/ftrace.c
@@ -350,19 +350,21 @@ static int push_return_trace(unsigned long ret, unsigned long long time,
 				unsigned long func)
 {
 	int index;
-	struct thread_info *ti = current_thread_info();
+
+	if (!current->ret_stack)
+		return -EBUSY;
 
 	/* The return trace stack is full */
-	if (ti->curr_ret_stack == FTRACE_RET_STACK_SIZE - 1) {
-		atomic_inc(&ti->trace_overrun);
+	if (current->curr_ret_stack == FTRACE_RETFUNC_DEPTH - 1) {
+		atomic_inc(&current->trace_overrun);
 		return -EBUSY;
 	}
 
-	index = ++ti->curr_ret_stack;
+	index = ++current->curr_ret_stack;
 	barrier();
-	ti->ret_stack[index].ret = ret;
-	ti->ret_stack[index].func = func;
-	ti->ret_stack[index].calltime = time;
+	current->ret_stack[index].ret = ret;
+	current->ret_stack[index].func = func;
+	current->ret_stack[index].calltime = time;
 
 	return 0;
 }
@@ -373,13 +375,12 @@ static void pop_return_trace(unsigned long *ret, unsigned long long *time,
 {
 	int index;
 
-	struct thread_info *ti = current_thread_info();
-	index = ti->curr_ret_stack;
-	*ret = ti->ret_stack[index].ret;
-	*func = ti->ret_stack[index].func;
-	*time = ti->ret_stack[index].calltime;
-	*overrun = atomic_read(&ti->trace_overrun);
-	ti->curr_ret_stack--;
+	index = current->curr_ret_stack;
+	*ret = current->ret_stack[index].ret;
+	*func = current->ret_stack[index].func;
+	*time = current->ret_stack[index].calltime;
+	*overrun = atomic_read(&current->trace_overrun);
+	current->curr_ret_stack--;
 }
 
 /*
diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h
index f7ba4ea5e128..2ba259b2defa 100644
--- a/include/linux/ftrace.h
+++ b/include/linux/ftrace.h
@@ -323,6 +323,8 @@ struct ftrace_retfunc {
 };
 
 #ifdef CONFIG_FUNCTION_RET_TRACER
+#define FTRACE_RETFUNC_DEPTH 50
+#define FTRACE_RETSTACK_ALLOC_SIZE 32
 /* Type of a callback handler of tracing return function */
 typedef void (*trace_function_return_t)(struct ftrace_retfunc *);
 
@@ -330,6 +332,9 @@ extern int register_ftrace_return(trace_function_return_t func);
 /* The current handler in use */
 extern trace_function_return_t ftrace_function_return;
 extern void unregister_ftrace_return(void);
+
+extern void ftrace_retfunc_init_task(struct task_struct *t);
+extern void ftrace_retfunc_exit_task(struct task_struct *t);
 #endif
 
 #endif /* _LINUX_FTRACE_H */
diff --git a/include/linux/sched.h b/include/linux/sched.h
index c8e0db464206..bee1e93c95ad 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1352,6 +1352,17 @@ struct task_struct {
 	unsigned long default_timer_slack_ns;
 
 	struct list_head	*scm_work_list;
+#ifdef CONFIG_FUNCTION_RET_TRACER
+	/* Index of current stored adress in ret_stack */
+	int curr_ret_stack;
+	/* Stack of return addresses for return function tracing */
+	struct ftrace_ret_stack	*ret_stack;
+	/*
+	 * Number of functions that haven't been traced
+	 * because of depth overrun.
+	 */
+	atomic_t trace_overrun;
+#endif
 };
 
 /*
@@ -2006,18 +2017,6 @@ static inline void setup_thread_stack(struct task_struct *p, struct task_struct
 {
 	*task_thread_info(p) = *task_thread_info(org);
 	task_thread_info(p)->task = p;
-
-#ifdef CONFIG_FUNCTION_RET_TRACER
-	/*
-	 * When fork() creates a child process, this function is called.
-	 * But the child task may not inherit the return adresses traced
-	 * by the return function tracer because it will directly execute
-	 * in userspace and will not return to kernel functions its parent
-	 * used.
-	 */
-	task_thread_info(p)->curr_ret_stack = -1;
-	atomic_set(&task_thread_info(p)->trace_overrun, 0);
-#endif
 }
 
 static inline unsigned long *end_of_stack(struct task_struct *p)
diff --git a/kernel/exit.c b/kernel/exit.c
index 35c8ec2ba03a..b9d446329da1 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -47,6 +47,7 @@
 #include <linux/task_io_accounting_ops.h>
 #include <linux/tracehook.h>
 #include <trace/sched.h>
+#include <linux/ftrace.h>
 
 #include <asm/uaccess.h>
 #include <asm/unistd.h>
@@ -1127,7 +1128,9 @@ NORET_TYPE void do_exit(long code)
 	preempt_disable();
 	/* causes final put_task_struct in finish_task_switch(). */
 	tsk->state = TASK_DEAD;
-
+#ifdef CONFIG_FUNCTION_RET_TRACER
+	ftrace_retfunc_exit_task(tsk);
+#endif
 	schedule();
 	BUG();
 	/* Avoid "noreturn function does return".  */
diff --git a/kernel/fork.c b/kernel/fork.c
index ac62f43ee430..d1eb30e69ccc 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -47,6 +47,7 @@
 #include <linux/mount.h>
 #include <linux/audit.h>
 #include <linux/memcontrol.h>
+#include <linux/ftrace.h>
 #include <linux/profile.h>
 #include <linux/rmap.h>
 #include <linux/acct.h>
@@ -1269,6 +1270,9 @@ static struct task_struct *copy_process(unsigned long clone_flags,
 	total_forks++;
 	spin_unlock(&current->sighand->siglock);
 	write_unlock_irq(&tasklist_lock);
+#ifdef CONFIG_FUNCTION_RET_TRACER
+	ftrace_retfunc_init_task(p);
+#endif
 	proc_fork_connector(p);
 	cgroup_post_fork(p);
 	return p;
diff --git a/kernel/sched.c b/kernel/sched.c
index 4de56108c86f..fb17205950de 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -5901,6 +5901,9 @@ void __cpuinit init_idle(struct task_struct *idle, int cpu)
 	 * The idle tasks have their own, simple scheduling class:
 	 */
 	idle->sched_class = &idle_sched_class;
+#ifdef CONFIG_FUNCTION_RET_TRACER
+	ftrace_retfunc_init_task(idle);
+#endif
 }
 
 /*
diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
index f212da486689..90d99fb02ae4 100644
--- a/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c
@@ -1498,10 +1498,77 @@ ftrace_enable_sysctl(struct ctl_table *table, int write,
 
 #ifdef CONFIG_FUNCTION_RET_TRACER
 
+static atomic_t ftrace_retfunc_active;
+
 /* The callback that hooks the return of a function */
 trace_function_return_t ftrace_function_return =
 			(trace_function_return_t)ftrace_stub;
 
+
+/* Try to assign a return stack array on FTRACE_RETSTACK_ALLOC_SIZE tasks. */
+static int alloc_retstack_tasklist(struct ftrace_ret_stack **ret_stack_list)
+{
+	int i;
+	int ret = 0;
+	unsigned long flags;
+	int start = 0, end = FTRACE_RETSTACK_ALLOC_SIZE;
+	struct task_struct *g, *t;
+
+	for (i = 0; i < FTRACE_RETSTACK_ALLOC_SIZE; i++) {
+		ret_stack_list[i] = kmalloc(FTRACE_RETFUNC_DEPTH
+					* sizeof(struct ftrace_ret_stack),
+					GFP_KERNEL);
+		if (!ret_stack_list[i]) {
+			start = 0;
+			end = i;
+			ret = -ENOMEM;
+			goto free;
+		}
+	}
+
+	read_lock_irqsave(&tasklist_lock, flags);
+	do_each_thread(g, t) {
+		if (start == end) {
+			ret = -EAGAIN;
+			goto unlock;
+		}
+
+		if (t->ret_stack == NULL) {
+			t->ret_stack = ret_stack_list[start++];
+			t->curr_ret_stack = -1;
+			atomic_set(&t->trace_overrun, 0);
+		}
+	} while_each_thread(g, t);
+
+unlock:
+	read_unlock_irqrestore(&tasklist_lock, flags);
+free:
+	for (i = start; i < end; i++)
+		kfree(ret_stack_list[i]);
+	return ret;
+}
+
+/* Allocate a return stack for each task */
+static int start_return_tracing(void)
+{
+	struct ftrace_ret_stack **ret_stack_list;
+	int ret;
+
+	ret_stack_list = kmalloc(FTRACE_RETSTACK_ALLOC_SIZE *
+				sizeof(struct ftrace_ret_stack *),
+				GFP_KERNEL);
+
+	if (!ret_stack_list)
+		return -ENOMEM;
+
+	do {
+		ret = alloc_retstack_tasklist(ret_stack_list);
+	} while (ret == -EAGAIN);
+
+	kfree(ret_stack_list);
+	return ret;
+}
+
 int register_ftrace_return(trace_function_return_t func)
 {
 	int ret = 0;
@@ -1516,7 +1583,12 @@ int register_ftrace_return(trace_function_return_t func)
 		ret = -EBUSY;
 		goto out;
 	}
-
+	atomic_inc(&ftrace_retfunc_active);
+	ret = start_return_tracing();
+	if (ret) {
+		atomic_dec(&ftrace_retfunc_active);
+		goto out;
+	}
 	ftrace_tracing_type = FTRACE_TYPE_RETURN;
 	ftrace_function_return = func;
 	ftrace_startup();
@@ -1530,6 +1602,7 @@ void unregister_ftrace_return(void)
 {
 	mutex_lock(&ftrace_sysctl_lock);
 
+	atomic_dec(&ftrace_retfunc_active);
 	ftrace_function_return = (trace_function_return_t)ftrace_stub;
 	ftrace_shutdown();
 	/* Restore normal tracing type */
@@ -1537,6 +1610,27 @@ void unregister_ftrace_return(void)
 
 	mutex_unlock(&ftrace_sysctl_lock);
 }
+
+/* Allocate a return stack for newly created task */
+void ftrace_retfunc_init_task(struct task_struct *t)
+{
+	if (atomic_read(&ftrace_retfunc_active)) {
+		t->ret_stack = kmalloc(FTRACE_RETFUNC_DEPTH
+				* sizeof(struct ftrace_ret_stack),
+				GFP_KERNEL);
+		if (!t->ret_stack)
+			return;
+		t->curr_ret_stack = -1;
+		atomic_set(&t->trace_overrun, 0);
+	} else
+		t->ret_stack = NULL;
+}
+
+void ftrace_retfunc_exit_task(struct task_struct *t)
+{
+	kfree(t->ret_stack);
+	t->ret_stack = NULL;
+}
 #endif
 
 
-- 
cgit v1.2.3


From 82f60f0bc854aada696f27d863c03bef91f1509d Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Sun, 23 Nov 2008 09:18:56 +0100
Subject: tracing/function-return-tracer: clean up task start/exit callbacks

Impact: cleanup

Eliminate #ifdefs in core code by using empty inline functions.

Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/ftrace.h | 3 +++
 kernel/exit.c          | 2 --
 kernel/fork.c          | 2 --
 kernel/sched.c         | 2 --
 4 files changed, 3 insertions(+), 6 deletions(-)

(limited to 'include')

diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h
index 2ba259b2defa..938ca1942641 100644
--- a/include/linux/ftrace.h
+++ b/include/linux/ftrace.h
@@ -335,6 +335,9 @@ extern void unregister_ftrace_return(void);
 
 extern void ftrace_retfunc_init_task(struct task_struct *t);
 extern void ftrace_retfunc_exit_task(struct task_struct *t);
+#else
+static inline void ftrace_retfunc_init_task(struct task_struct *t) { }
+static inline void ftrace_retfunc_exit_task(struct task_struct *t) { }
 #endif
 
 #endif /* _LINUX_FTRACE_H */
diff --git a/kernel/exit.c b/kernel/exit.c
index b9d446329da1..ef04d03b3286 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -1128,9 +1128,7 @@ NORET_TYPE void do_exit(long code)
 	preempt_disable();
 	/* causes final put_task_struct in finish_task_switch(). */
 	tsk->state = TASK_DEAD;
-#ifdef CONFIG_FUNCTION_RET_TRACER
 	ftrace_retfunc_exit_task(tsk);
-#endif
 	schedule();
 	BUG();
 	/* Avoid "noreturn function does return".  */
diff --git a/kernel/fork.c b/kernel/fork.c
index d1eb30e69ccc..fbf4a4c0a628 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -1270,9 +1270,7 @@ static struct task_struct *copy_process(unsigned long clone_flags,
 	total_forks++;
 	spin_unlock(&current->sighand->siglock);
 	write_unlock_irq(&tasklist_lock);
-#ifdef CONFIG_FUNCTION_RET_TRACER
 	ftrace_retfunc_init_task(p);
-#endif
 	proc_fork_connector(p);
 	cgroup_post_fork(p);
 	return p;
diff --git a/kernel/sched.c b/kernel/sched.c
index fb17205950de..388d9db044ab 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -5901,9 +5901,7 @@ void __cpuinit init_idle(struct task_struct *idle, int cpu)
 	 * The idle tasks have their own, simple scheduling class:
 	 */
 	idle->sched_class = &idle_sched_class;
-#ifdef CONFIG_FUNCTION_RET_TRACER
 	ftrace_retfunc_init_task(idle);
-#endif
 }
 
 /*
-- 
cgit v1.2.3


From 02b67518e2b1c490787dac7f35e1204e74fe21ba Mon Sep 17 00:00:00 2001
From: Török Edwin <edwintorok@gmail.com>
Date: Sat, 22 Nov 2008 13:28:47 +0200
Subject: tracing: add support for userspace stacktraces in tracing/iter_ctrl
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Impact: add new (default-off) tracing visualization feature

Usage example:

 mount -t debugfs nodev /sys/kernel/debug
 cd /sys/kernel/debug/tracing
 echo userstacktrace >iter_ctrl
 echo sched_switch >current_tracer
 echo 1 >tracing_enabled
 .... run application ...
 echo 0 >tracing_enabled

Then read one of 'trace','latency_trace','trace_pipe'.

To get the best output you can compile your userspace programs with
frame pointers (at least glibc + the app you are tracing).

Signed-off-by: Török Edwin <edwintorok@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 Documentation/ftrace.txt     |  5 ++-
 arch/x86/kernel/stacktrace.c | 57 +++++++++++++++++++++++++++
 include/linux/stacktrace.h   |  8 ++++
 kernel/trace/trace.c         | 93 ++++++++++++++++++++++++++++++++++++++++++++
 kernel/trace/trace.h         |  9 +++++
 5 files changed, 171 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/Documentation/ftrace.txt b/Documentation/ftrace.txt
index 753f4de4b175..79a80f79c062 100644
--- a/Documentation/ftrace.txt
+++ b/Documentation/ftrace.txt
@@ -324,7 +324,7 @@ output. To see what is available, simply cat the file:
 
   cat /debug/tracing/trace_options
   print-parent nosym-offset nosym-addr noverbose noraw nohex nobin \
- noblock nostacktrace nosched-tree
+ noblock nostacktrace nosched-tree nouserstacktrace
 
 To disable one of the options, echo in the option prepended with "no".
 
@@ -378,6 +378,9 @@ Here are the available options:
 		When a trace is recorded, so is the stack of functions.
 		This allows for back traces of trace sites.
 
+  userstacktrace - This option changes the trace.
+		   It records a stacktrace of the current userspace thread.
+
   sched-tree - TBD (any users??)
 
 
diff --git a/arch/x86/kernel/stacktrace.c b/arch/x86/kernel/stacktrace.c
index a03e7f6d90c3..b15153060417 100644
--- a/arch/x86/kernel/stacktrace.c
+++ b/arch/x86/kernel/stacktrace.c
@@ -6,6 +6,7 @@
 #include <linux/sched.h>
 #include <linux/stacktrace.h>
 #include <linux/module.h>
+#include <linux/uaccess.h>
 #include <asm/stacktrace.h>
 
 static void save_stack_warning(void *data, char *msg)
@@ -83,3 +84,59 @@ void save_stack_trace_tsk(struct task_struct *tsk, struct stack_trace *trace)
 		trace->entries[trace->nr_entries++] = ULONG_MAX;
 }
 EXPORT_SYMBOL_GPL(save_stack_trace_tsk);
+
+/* Userspace stacktrace - based on kernel/trace/trace_sysprof.c */
+
+struct stack_frame {
+	const void __user	*next_fp;
+	unsigned long		return_address;
+};
+
+static int copy_stack_frame(const void __user *fp, struct stack_frame *frame)
+{
+	int ret;
+
+	if (!access_ok(VERIFY_READ, fp, sizeof(*frame)))
+		return 0;
+
+	ret = 1;
+	pagefault_disable();
+	if (__copy_from_user_inatomic(frame, fp, sizeof(*frame)))
+		ret = 0;
+	pagefault_enable();
+
+	return ret;
+}
+
+void save_stack_trace_user(struct stack_trace *trace)
+{
+	/*
+	 * Trace user stack if we are not a kernel thread
+	 */
+	if (current->mm) {
+		const struct pt_regs *regs = task_pt_regs(current);
+		const void __user *fp = (const void __user *)regs->bp;
+
+		if (trace->nr_entries < trace->max_entries)
+			trace->entries[trace->nr_entries++] = regs->ip;
+
+		while (trace->nr_entries < trace->max_entries) {
+			struct stack_frame frame;
+			frame.next_fp = NULL;
+			frame.return_address = 0;
+			if (!copy_stack_frame(fp, &frame))
+				break;
+			if ((unsigned long)fp < regs->sp)
+				break;
+			if (frame.return_address)
+				trace->entries[trace->nr_entries++] =
+					frame.return_address;
+			if (fp == frame.next_fp)
+				break;
+			fp = frame.next_fp;
+		}
+	}
+	if (trace->nr_entries < trace->max_entries)
+		trace->entries[trace->nr_entries++] = ULONG_MAX;
+}
+
diff --git a/include/linux/stacktrace.h b/include/linux/stacktrace.h
index b106fd8e0d5c..68de51468f5d 100644
--- a/include/linux/stacktrace.h
+++ b/include/linux/stacktrace.h
@@ -15,9 +15,17 @@ extern void save_stack_trace_tsk(struct task_struct *tsk,
 				struct stack_trace *trace);
 
 extern void print_stack_trace(struct stack_trace *trace, int spaces);
+
+#ifdef CONFIG_X86
+extern void save_stack_trace_user(struct stack_trace *trace);
+#else
+# define save_stack_trace_user(trace)              do { } while (0)
+#endif
+
 #else
 # define save_stack_trace(trace)			do { } while (0)
 # define save_stack_trace_tsk(tsk, trace)		do { } while (0)
+# define save_stack_trace_user(trace)              do { } while (0)
 # define print_stack_trace(trace, spaces)		do { } while (0)
 #endif
 
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index 4ee6f0375222..ced8b4fa9f51 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -275,6 +275,7 @@ static const char *trace_options[] = {
 	"ftrace_preempt",
 	"branch",
 	"annotate",
+	"userstacktrace",
 	NULL
 };
 
@@ -918,6 +919,44 @@ void __trace_stack(struct trace_array *tr,
 	ftrace_trace_stack(tr, data, flags, skip, preempt_count());
 }
 
+static void ftrace_trace_userstack(struct trace_array *tr,
+		   struct trace_array_cpu *data,
+		   unsigned long flags, int pc)
+{
+	struct userstack_entry *entry;
+	struct stack_trace trace;
+	struct ring_buffer_event *event;
+	unsigned long irq_flags;
+
+	if (!(trace_flags & TRACE_ITER_USERSTACKTRACE))
+		return;
+
+	event = ring_buffer_lock_reserve(tr->buffer, sizeof(*entry),
+					 &irq_flags);
+	if (!event)
+		return;
+	entry	= ring_buffer_event_data(event);
+	tracing_generic_entry_update(&entry->ent, flags, pc);
+	entry->ent.type		= TRACE_USER_STACK;
+
+	memset(&entry->caller, 0, sizeof(entry->caller));
+
+	trace.nr_entries	= 0;
+	trace.max_entries	= FTRACE_STACK_ENTRIES;
+	trace.skip		= 0;
+	trace.entries		= entry->caller;
+
+	save_stack_trace_user(&trace);
+	ring_buffer_unlock_commit(tr->buffer, event, irq_flags);
+}
+
+void __trace_userstack(struct trace_array *tr,
+		   struct trace_array_cpu *data,
+		   unsigned long flags)
+{
+	ftrace_trace_userstack(tr, data, flags, preempt_count());
+}
+
 static void
 ftrace_trace_special(void *__tr, void *__data,
 		     unsigned long arg1, unsigned long arg2, unsigned long arg3,
@@ -941,6 +980,7 @@ ftrace_trace_special(void *__tr, void *__data,
 	entry->arg3			= arg3;
 	ring_buffer_unlock_commit(tr->buffer, event, irq_flags);
 	ftrace_trace_stack(tr, data, irq_flags, 4, pc);
+	ftrace_trace_userstack(tr, data, irq_flags, pc);
 
 	trace_wake_up();
 }
@@ -979,6 +1019,7 @@ tracing_sched_switch_trace(struct trace_array *tr,
 	entry->next_cpu	= task_cpu(next);
 	ring_buffer_unlock_commit(tr->buffer, event, irq_flags);
 	ftrace_trace_stack(tr, data, flags, 5, pc);
+	ftrace_trace_userstack(tr, data, flags, pc);
 }
 
 void
@@ -1008,6 +1049,7 @@ tracing_sched_wakeup_trace(struct trace_array *tr,
 	entry->next_cpu			= task_cpu(wakee);
 	ring_buffer_unlock_commit(tr->buffer, event, irq_flags);
 	ftrace_trace_stack(tr, data, flags, 6, pc);
+	ftrace_trace_userstack(tr, data, flags, pc);
 
 	trace_wake_up();
 }
@@ -1387,6 +1429,31 @@ seq_print_ip_sym(struct trace_seq *s, unsigned long ip, unsigned long sym_flags)
 	return ret;
 }
 
+static int
+seq_print_userip_objs(const struct userstack_entry *entry, struct trace_seq *s,
+		unsigned long sym_flags)
+{
+	int ret = 1;
+	unsigned i;
+
+	for (i = 0; i < FTRACE_STACK_ENTRIES; i++) {
+		unsigned long ip = entry->caller[i];
+
+		if (ip == ULONG_MAX || !ret)
+			break;
+		if (i)
+			ret = trace_seq_puts(s, " <- ");
+		if (!ip) {
+			ret = trace_seq_puts(s, "??");
+			continue;
+		}
+		if (ret /*&& (sym_flags & TRACE_ITER_SYM_ADDR)*/)
+			ret = trace_seq_printf(s, " <" IP_FMT ">", ip);
+	}
+
+	return ret;
+}
+
 static void print_lat_help_header(struct seq_file *m)
 {
 	seq_puts(m, "#                  _------=> CPU#            \n");
@@ -1702,6 +1769,16 @@ print_lat_fmt(struct trace_iterator *iter, unsigned int trace_idx, int cpu)
 				 field->line);
 		break;
 	}
+	case TRACE_USER_STACK: {
+		struct userstack_entry *field;
+
+		trace_assign_type(field, entry);
+
+		seq_print_userip_objs(field, s, sym_flags);
+		if (entry->flags & TRACE_FLAG_CONT)
+			trace_seq_print_cont(s, iter);
+		break;
+	}
 	default:
 		trace_seq_printf(s, "Unknown type %d\n", entry->type);
 	}
@@ -1853,6 +1930,19 @@ static enum print_line_t print_trace_fmt(struct trace_iterator *iter)
 				 field->line);
 		break;
 	}
+	case TRACE_USER_STACK: {
+		struct userstack_entry *field;
+
+		trace_assign_type(field, entry);
+
+		ret = seq_print_userip_objs(field, s, sym_flags);
+		if (!ret)
+			return TRACE_TYPE_PARTIAL_LINE;
+		ret = trace_seq_putc(s, '\n');
+		if (!ret)
+			return TRACE_TYPE_PARTIAL_LINE;
+		break;
+	}
 	}
 	return TRACE_TYPE_HANDLED;
 }
@@ -1912,6 +2002,7 @@ static enum print_line_t print_raw_fmt(struct trace_iterator *iter)
 		break;
 	}
 	case TRACE_SPECIAL:
+	case TRACE_USER_STACK:
 	case TRACE_STACK: {
 		struct special_entry *field;
 
@@ -2000,6 +2091,7 @@ static enum print_line_t print_hex_fmt(struct trace_iterator *iter)
 		break;
 	}
 	case TRACE_SPECIAL:
+	case TRACE_USER_STACK:
 	case TRACE_STACK: {
 		struct special_entry *field;
 
@@ -2054,6 +2146,7 @@ static enum print_line_t print_bin_fmt(struct trace_iterator *iter)
 		break;
 	}
 	case TRACE_SPECIAL:
+	case TRACE_USER_STACK:
 	case TRACE_STACK: {
 		struct special_entry *field;
 
diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h
index 2cb12fd98f6b..17bb4c830b01 100644
--- a/kernel/trace/trace.h
+++ b/kernel/trace/trace.h
@@ -26,6 +26,7 @@ enum trace_type {
 	TRACE_BOOT_CALL,
 	TRACE_BOOT_RET,
 	TRACE_FN_RET,
+	TRACE_USER_STACK,
 
 	__TRACE_LAST_TYPE
 };
@@ -42,6 +43,7 @@ struct trace_entry {
 	unsigned char		flags;
 	unsigned char		preempt_count;
 	int			pid;
+	int			tgid;
 };
 
 /*
@@ -99,6 +101,11 @@ struct stack_entry {
 	unsigned long		caller[FTRACE_STACK_ENTRIES];
 };
 
+struct userstack_entry {
+	struct trace_entry	ent;
+	unsigned long		caller[FTRACE_STACK_ENTRIES];
+};
+
 /*
  * ftrace_printk entry:
  */
@@ -240,6 +247,7 @@ extern void __ftrace_bad_type(void);
 		IF_ASSIGN(var, ent, struct ctx_switch_entry, 0);	\
 		IF_ASSIGN(var, ent, struct trace_field_cont, TRACE_CONT); \
 		IF_ASSIGN(var, ent, struct stack_entry, TRACE_STACK);	\
+		IF_ASSIGN(var, ent, struct userstack_entry, TRACE_USER_STACK);\
 		IF_ASSIGN(var, ent, struct print_entry, TRACE_PRINT);	\
 		IF_ASSIGN(var, ent, struct special_entry, 0);		\
 		IF_ASSIGN(var, ent, struct trace_mmiotrace_rw,		\
@@ -500,6 +508,7 @@ enum trace_iterator_flags {
 	TRACE_ITER_PREEMPTONLY		= 0x800,
 	TRACE_ITER_BRANCH		= 0x1000,
 	TRACE_ITER_ANNOTATE		= 0x2000,
+	TRACE_ITER_USERSTACKTRACE       = 0x4000
 };
 
 /*
-- 
cgit v1.2.3


From 74e2f334f4440cbcb63e9ebbcdcea430d41bdfa3 Mon Sep 17 00:00:00 2001
From: Török Edwin <edwintorok@gmail.com>
Date: Sat, 22 Nov 2008 13:28:48 +0200
Subject: vfs, seqfile: make mangle_path() global
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Impact: expose new VFS API

make mangle_path() available, as per the suggestions of Christoph Hellwig
and Al Viro:

  http://lkml.org/lkml/2008/11/4/338

Signed-off-by: Török Edwin <edwintorok@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 fs/seq_file.c            | 14 +++++++++++++-
 include/linux/seq_file.h |  1 +
 2 files changed, 14 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/fs/seq_file.c b/fs/seq_file.c
index eba2eabcd2b8..f5b61cc1cc1a 100644
--- a/fs/seq_file.c
+++ b/fs/seq_file.c
@@ -357,7 +357,18 @@ int seq_printf(struct seq_file *m, const char *f, ...)
 }
 EXPORT_SYMBOL(seq_printf);
 
-static char *mangle_path(char *s, char *p, char *esc)
+/**
+ * 	mangle_path - mangle and copy path to buffer beginning
+ * 	@s - buffer start
+ * 	@p - beginning of path in above buffer
+ *      @esc - set of characters that need escaping
+ *
+ *      Copy the path from @p to @s, replacing each occurrence of character from
+ *      @esc with usual octal escape.
+ *      Returns pointer past last written character in @s, or NULL in case of
+ *      failure.
+ */
+char *mangle_path(char *s, char *p, char *esc)
 {
 	while (s <= p) {
 		char c = *p++;
@@ -376,6 +387,7 @@ static char *mangle_path(char *s, char *p, char *esc)
 	}
 	return NULL;
 }
+EXPORT_SYMBOL_GPL(mangle_path);
 
 /*
  * return the absolute path of 'dentry' residing in mount 'mnt'.
diff --git a/include/linux/seq_file.h b/include/linux/seq_file.h
index dc50bcc282a8..b3dfa72f13b9 100644
--- a/include/linux/seq_file.h
+++ b/include/linux/seq_file.h
@@ -34,6 +34,7 @@ struct seq_operations {
 
 #define SEQ_SKIP 1
 
+char *mangle_path(char *s, char *p, char *esc);
 int seq_open(struct file *, const struct seq_operations *);
 ssize_t seq_read(struct file *, char __user *, size_t, loff_t *);
 loff_t seq_lseek(struct file *, loff_t, int);
-- 
cgit v1.2.3


From 42f565e116e0408b5ddc21a33c4a4d41fd572420 Mon Sep 17 00:00:00 2001
From: Steven Rostedt <srostedt@redhat.com>
Date: Thu, 20 Nov 2008 23:57:47 -0500
Subject: trace: remove extra assign in branch check

Impact: clean up of branch check

The unlikely/likely profiler does an extra assign of the f.line.
This is not needed since it is already calculated at compile time.

Signed-off-by: Steven Rostedt <srostedt@redhat.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/compiler.h | 2 --
 1 file changed, 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/compiler.h b/include/linux/compiler.h
index c7d804a7a4d6..c25e525121f0 100644
--- a/include/linux/compiler.h
+++ b/include/linux/compiler.h
@@ -87,7 +87,6 @@ void ftrace_likely_update(struct ftrace_branch_data *f, int val, int expect);
 				.file = __FILE__,			\
 				.line = __LINE__,			\
 			};						\
-			______f.line = __LINE__;			\
 			______r = likely_notrace(x);			\
 			ftrace_likely_update(&______f, ______r, 1);	\
 			______r;					\
@@ -102,7 +101,6 @@ void ftrace_likely_update(struct ftrace_branch_data *f, int val, int expect);
 				.file = __FILE__,			\
 				.line = __LINE__,			\
 			};						\
-			______f.line = __LINE__;			\
 			______r = unlikely_notrace(x);			\
 			ftrace_likely_update(&______f, ______r, 0);	\
 			______r;					\
-- 
cgit v1.2.3


From 45b797492a0758e64dff74e9db70e1f65e0603a5 Mon Sep 17 00:00:00 2001
From: Steven Rostedt <srostedt@redhat.com>
Date: Fri, 21 Nov 2008 00:40:40 -0500
Subject: trace: consolidate unlikely and likely profiler

Impact: clean up to make one profiler of like and unlikely tracer

The likely and unlikely profiler prints out the file and line numbers
of the annotated branches that it is profiling. It shows the number
of times it was correct or incorrect in its guess. Having two
different files or sections for that matter to tell us if it was a
likely or unlikely is pretty pointless. We really only care if
it was correct or not.

Signed-off-by: Steven Rostedt <srostedt@redhat.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/asm-generic/vmlinux.lds.h |  9 +++------
 include/linux/compiler.h          | 24 +++++-------------------
 kernel/trace/Kconfig              |  3 +--
 kernel/trace/trace_branch.c       | 39 +++++++++++++--------------------------
 4 files changed, 22 insertions(+), 53 deletions(-)

(limited to 'include')

diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h
index 3b46ae464933..8bccb49981e5 100644
--- a/include/asm-generic/vmlinux.lds.h
+++ b/include/asm-generic/vmlinux.lds.h
@@ -46,12 +46,9 @@
 #endif
 
 #ifdef CONFIG_TRACE_BRANCH_PROFILING
-#define LIKELY_PROFILE()	VMLINUX_SYMBOL(__start_likely_profile) = .;   \
-				*(_ftrace_likely)			      \
-				VMLINUX_SYMBOL(__stop_likely_profile) = .;    \
-				VMLINUX_SYMBOL(__start_unlikely_profile) = .; \
-				*(_ftrace_unlikely)			      \
-				VMLINUX_SYMBOL(__stop_unlikely_profile) = .;
+#define LIKELY_PROFILE()	VMLINUX_SYMBOL(__start_annotated_branch_profile) = .; \
+				*(_ftrace_annotated_branch)			      \
+				VMLINUX_SYMBOL(__stop_annotated_branch_profile) = .;
 #else
 #define LIKELY_PROFILE()
 #endif
diff --git a/include/linux/compiler.h b/include/linux/compiler.h
index c25e525121f0..0628a2013fae 100644
--- a/include/linux/compiler.h
+++ b/include/linux/compiler.h
@@ -77,32 +77,18 @@ void ftrace_likely_update(struct ftrace_branch_data *f, int val, int expect);
 #define likely_notrace(x)	__builtin_expect(!!(x), 1)
 #define unlikely_notrace(x)	__builtin_expect(!!(x), 0)
 
-#define likely_check(x) ({						\
+#define __branch_check__(x, expect) ({					\
 			int ______r;					\
 			static struct ftrace_branch_data		\
 				__attribute__((__aligned__(4)))		\
-				__attribute__((section("_ftrace_likely"))) \
+				__attribute__((section("_ftrace_annotated_branch"))) \
 				______f = {				\
 				.func = __func__,			\
 				.file = __FILE__,			\
 				.line = __LINE__,			\
 			};						\
 			______r = likely_notrace(x);			\
-			ftrace_likely_update(&______f, ______r, 1);	\
-			______r;					\
-		})
-#define unlikely_check(x) ({						\
-			int ______r;					\
-			static struct ftrace_branch_data		\
-				__attribute__((__aligned__(4)))		\
-				__attribute__((section("_ftrace_unlikely"))) \
-				______f = {				\
-				.func = __func__,			\
-				.file = __FILE__,			\
-				.line = __LINE__,			\
-			};						\
-			______r = unlikely_notrace(x);			\
-			ftrace_likely_update(&______f, ______r, 0);	\
+			ftrace_likely_update(&______f, ______r, expect); \
 			______r;					\
 		})
 
@@ -112,10 +98,10 @@ void ftrace_likely_update(struct ftrace_branch_data *f, int val, int expect);
  * written by Daniel Walker.
  */
 # ifndef likely
-#  define likely(x)	(__builtin_constant_p(x) ? !!(x) : likely_check(x))
+#  define likely(x)	(__builtin_constant_p(x) ? !!(x) : __branch_check__(x, 1))
 # endif
 # ifndef unlikely
-#  define unlikely(x)	(__builtin_constant_p(x) ? !!(x) : unlikely_check(x))
+#  define unlikely(x)	(__builtin_constant_p(x) ? !!(x) : __branch_check__(x, 0))
 # endif
 #else
 # define likely(x)	__builtin_expect(!!(x), 1)
diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig
index b8378fad29a3..7e3548705708 100644
--- a/kernel/trace/Kconfig
+++ b/kernel/trace/Kconfig
@@ -166,8 +166,7 @@ config TRACE_BRANCH_PROFILING
 	  This tracer profiles all the the likely and unlikely macros
 	  in the kernel. It will display the results in:
 
-	  /debugfs/tracing/profile_likely
-	  /debugfs/tracing/profile_unlikely
+	  /debugfs/tracing/profile_annotated_branch
 
 	  Note: this will add a significant overhead, only turn this
 	  on if you need to profile the system's use of these macros.
diff --git a/kernel/trace/trace_branch.c b/kernel/trace/trace_branch.c
index 23f9b02ce967..21dedc8b50a4 100644
--- a/kernel/trace/trace_branch.c
+++ b/kernel/trace/trace_branch.c
@@ -261,7 +261,7 @@ static struct seq_operations tracing_likely_seq_ops = {
 	.show		= t_show,
 };
 
-static int tracing_likely_open(struct inode *inode, struct file *file)
+static int tracing_branch_open(struct inode *inode, struct file *file)
 {
 	int ret;
 
@@ -274,25 +274,18 @@ static int tracing_likely_open(struct inode *inode, struct file *file)
 	return ret;
 }
 
-static struct file_operations tracing_likely_fops = {
-	.open		= tracing_likely_open,
+static const struct file_operations tracing_branch_fops = {
+	.open		= tracing_branch_open,
 	.read		= seq_read,
 	.llseek		= seq_lseek,
 };
 
-extern unsigned long __start_likely_profile[];
-extern unsigned long __stop_likely_profile[];
-extern unsigned long __start_unlikely_profile[];
-extern unsigned long __stop_unlikely_profile[];
+extern unsigned long __start_annotated_branch_profile[];
+extern unsigned long __stop_annotated_branch_profile[];
 
-static struct ftrace_pointer ftrace_likely_pos = {
-	.start			= __start_likely_profile,
-	.stop			= __stop_likely_profile,
-};
-
-static struct ftrace_pointer ftrace_unlikely_pos = {
-	.start			= __start_unlikely_profile,
-	.stop			= __stop_unlikely_profile,
+static const struct ftrace_pointer ftrace_annotated_branch_pos = {
+	.start			= __start_annotated_branch_profile,
+	.stop			= __stop_annotated_branch_profile,
 };
 
 static __init int ftrace_branch_init(void)
@@ -302,18 +295,12 @@ static __init int ftrace_branch_init(void)
 
 	d_tracer = tracing_init_dentry();
 
-	entry = debugfs_create_file("profile_likely", 0444, d_tracer,
-				    &ftrace_likely_pos,
-				    &tracing_likely_fops);
-	if (!entry)
-		pr_warning("Could not create debugfs 'profile_likely' entry\n");
-
-	entry = debugfs_create_file("profile_unlikely", 0444, d_tracer,
-				    &ftrace_unlikely_pos,
-				    &tracing_likely_fops);
+	entry = debugfs_create_file("profile_annotated_branch", 0444, d_tracer,
+				    &ftrace_annotated_branch_pos,
+				    &tracing_branch_fops);
 	if (!entry)
-		pr_warning("Could not create debugfs"
-			   " 'profile_unlikely' entry\n");
+		pr_warning("Could not create debugfs "
+			   "'profile_annotatet_branch' entry\n");
 
 	return 0;
 }
-- 
cgit v1.2.3


From 2bcd521a684cc94befbe2ce7d5b613c841b0d304 Mon Sep 17 00:00:00 2001
From: Steven Rostedt <srostedt@redhat.com>
Date: Fri, 21 Nov 2008 01:30:54 -0500
Subject: trace: profile all if conditionals

Impact: feature to profile if statements

This patch adds a branch profiler for all if () statements.
The results will be found in:

  /debugfs/tracing/profile_branch

For example:

   miss      hit    %        Function                  File              Line
 ------- ---------  -        --------                  ----              ----
       0        1 100 x86_64_start_reservations      head64.c             127
       0        1 100 copy_bootdata                  head64.c             69
       1        0   0 x86_64_start_kernel            head64.c             111
      32        0   0 set_intr_gate                  desc.h               319
       1        0   0 reserve_ebda_region            head.c               51
       1        0   0 reserve_ebda_region            head.c               47
       0        1 100 reserve_ebda_region            head.c               42
       0        0   X maxcpus                        main.c               165

Miss means the branch was not taken. Hit means the branch was taken.
The percent is the percentage the branch was taken.

This adds a significant amount of overhead and should only be used
by those analyzing their system.

Signed-off-by: Steven Rostedt <srostedt@redhat.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/asm-generic/vmlinux.lds.h | 11 ++++++++++-
 include/linux/compiler.h          | 38 ++++++++++++++++++++++++++++++++++++--
 kernel/trace/Kconfig              | 16 ++++++++++++++++
 kernel/trace/trace_branch.c       | 33 +++++++++++++++++++++++++++++++--
 4 files changed, 93 insertions(+), 5 deletions(-)

(limited to 'include')

diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h
index 8bccb49981e5..eba835a2c2cd 100644
--- a/include/asm-generic/vmlinux.lds.h
+++ b/include/asm-generic/vmlinux.lds.h
@@ -53,6 +53,14 @@
 #define LIKELY_PROFILE()
 #endif
 
+#ifdef CONFIG_PROFILE_ALL_BRANCHES
+#define BRANCH_PROFILE()	VMLINUX_SYMBOL(__start_branch_profile) = .;   \
+				*(_ftrace_branch)			      \
+				VMLINUX_SYMBOL(__stop_branch_profile) = .;
+#else
+#define BRANCH_PROFILE()
+#endif
+
 /* .data section */
 #define DATA_DATA							\
 	*(.data)							\
@@ -72,7 +80,8 @@
 	VMLINUX_SYMBOL(__start___tracepoints) = .;			\
 	*(__tracepoints)						\
 	VMLINUX_SYMBOL(__stop___tracepoints) = .;			\
-	LIKELY_PROFILE()
+	LIKELY_PROFILE()		       				\
+	BRANCH_PROFILE()
 
 #define RO_DATA(align)							\
 	. = ALIGN((align));						\
diff --git a/include/linux/compiler.h b/include/linux/compiler.h
index 0628a2013fae..ea7c6be354b7 100644
--- a/include/linux/compiler.h
+++ b/include/linux/compiler.h
@@ -63,8 +63,16 @@ struct ftrace_branch_data {
 	const char *func;
 	const char *file;
 	unsigned line;
-	unsigned long correct;
-	unsigned long incorrect;
+	union {
+		struct {
+			unsigned long correct;
+			unsigned long incorrect;
+		};
+		struct {
+			unsigned long miss;
+			unsigned long hit;
+		};
+	};
 };
 
 /*
@@ -103,6 +111,32 @@ void ftrace_likely_update(struct ftrace_branch_data *f, int val, int expect);
 # ifndef unlikely
 #  define unlikely(x)	(__builtin_constant_p(x) ? !!(x) : __branch_check__(x, 0))
 # endif
+
+#ifdef CONFIG_PROFILE_ALL_BRANCHES
+/*
+ * "Define 'is'", Bill Clinton
+ * "Define 'if'", Steven Rostedt
+ */
+#define if(cond) if (__builtin_constant_p((cond)) ? !!(cond) :		\
+	({								\
+		int ______r;						\
+		static struct ftrace_branch_data			\
+			__attribute__((__aligned__(4)))			\
+			__attribute__((section("_ftrace_branch")))	\
+			______f = {					\
+				.func = __func__,			\
+				.file = __FILE__,			\
+				.line = __LINE__,			\
+			};						\
+		______r = !!(cond);					\
+		if (______r)						\
+			______f.hit++;					\
+		else							\
+			______f.miss++;					\
+		______r;						\
+	}))
+#endif /* CONFIG_PROFILE_ALL_BRANCHES */
+
 #else
 # define likely(x)	__builtin_expect(!!(x), 1)
 # define unlikely(x)	__builtin_expect(!!(x), 0)
diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig
index 7e3548705708..61e8cca6ff45 100644
--- a/kernel/trace/Kconfig
+++ b/kernel/trace/Kconfig
@@ -173,6 +173,22 @@ config TRACE_BRANCH_PROFILING
 
 	  Say N if unsure.
 
+config PROFILE_ALL_BRANCHES
+	bool "Profile all if conditionals"
+	depends on TRACE_BRANCH_PROFILING
+	help
+	  This tracer profiles all branch conditions. Every if ()
+	  taken in the kernel is recorded whether it hit or miss.
+	  The results will be displayed in:
+
+	  /debugfs/tracing/profile_branch
+
+	  This configuration, when enabled, will impose a great overhead
+	  on the system. This should only be enabled when the system
+	  is to be analyzed
+
+	  Say N if unsure.
+
 config TRACING_BRANCHES
 	bool
 	help
diff --git a/kernel/trace/trace_branch.c b/kernel/trace/trace_branch.c
index 142acb3b4e00..85792aec64d2 100644
--- a/kernel/trace/trace_branch.c
+++ b/kernel/trace/trace_branch.c
@@ -185,6 +185,7 @@ EXPORT_SYMBOL(ftrace_likely_update);
 struct ftrace_pointer {
 	void		*start;
 	void		*stop;
+	int		hit;
 };
 
 static void *
@@ -223,13 +224,17 @@ static void t_stop(struct seq_file *m, void *p)
 
 static int t_show(struct seq_file *m, void *v)
 {
+	struct ftrace_pointer *fp = m->private;
 	struct ftrace_branch_data *p = v;
 	const char *f;
 	long percent;
 
 	if (v == (void *)1) {
-		seq_printf(m, " correct incorrect  %% "
-			      "       Function                "
+		if (fp->hit)
+			seq_printf(m, "   miss      hit    %% ");
+		else
+			seq_printf(m, " correct incorrect  %% ");
+		seq_printf(m, "       Function                "
 			      "  File              Line\n"
 			      " ------- ---------  - "
 			      "       --------                "
@@ -243,6 +248,9 @@ static int t_show(struct seq_file *m, void *v)
 		f--;
 	f++;
 
+	/*
+	 * The miss is overlayed on correct, and hit on incorrect.
+	 */
 	if (p->correct) {
 		percent = p->incorrect * 100;
 		percent /= p->correct + p->incorrect;
@@ -284,6 +292,18 @@ static const struct file_operations tracing_branch_fops = {
 	.llseek		= seq_lseek,
 };
 
+#ifdef CONFIG_PROFILE_ALL_BRANCHES
+extern unsigned long __start_branch_profile[];
+extern unsigned long __stop_branch_profile[];
+
+static struct ftrace_pointer ftrace_branch_pos = {
+	.start			= __start_branch_profile,
+	.stop			= __stop_branch_profile,
+	.hit			= 1,
+};
+
+#endif /* CONFIG_PROFILE_ALL_BRANCHES */
+
 extern unsigned long __start_annotated_branch_profile[];
 extern unsigned long __stop_annotated_branch_profile[];
 
@@ -306,6 +326,15 @@ static __init int ftrace_branch_init(void)
 		pr_warning("Could not create debugfs "
 			   "'profile_annotatet_branch' entry\n");
 
+#ifdef CONFIG_PROFILE_ALL_BRANCHES
+	entry = debugfs_create_file("profile_branch", 0444, d_tracer,
+				    &ftrace_branch_pos,
+				    &tracing_branch_fops);
+	if (!entry)
+		pr_warning("Could not create debugfs"
+			   " 'profile_branch' entry\n");
+#endif
+
 	return 0;
 }
 
-- 
cgit v1.2.3


From 033601a32b2012b6948e80e739cca40bff4de4a0 Mon Sep 17 00:00:00 2001
From: Steven Rostedt <srostedt@redhat.com>
Date: Fri, 21 Nov 2008 12:41:55 -0500
Subject: ring-buffer: add tracing_off_permanent

Impact: feature to permanently disable ring buffer

This patch adds a API to the ring buffer code that will permanently
disable the ring buffer from ever recording. This should only be
called when some serious anomaly is detected, and the system
may be in an unstable state. When that happens, shutting down the
recording to the ring buffers may be appropriate.

Signed-off-by: Steven Rostedt <srostedt@redhat.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/ring_buffer.h |  1 +
 kernel/trace/ring_buffer.c  | 79 +++++++++++++++++++++++++++++++++++++--------
 2 files changed, 67 insertions(+), 13 deletions(-)

(limited to 'include')

diff --git a/include/linux/ring_buffer.h b/include/linux/ring_buffer.h
index e097c2e6b6dc..3bb87a753fa3 100644
--- a/include/linux/ring_buffer.h
+++ b/include/linux/ring_buffer.h
@@ -122,6 +122,7 @@ void ring_buffer_normalize_time_stamp(int cpu, u64 *ts);
 
 void tracing_on(void);
 void tracing_off(void);
+void tracing_off_permanent(void);
 
 enum ring_buffer_flags {
 	RB_FL_OVERWRITE		= 1 << 0,
diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c
index 85ced143c2c4..e206951603c1 100644
--- a/kernel/trace/ring_buffer.c
+++ b/kernel/trace/ring_buffer.c
@@ -18,8 +18,46 @@
 
 #include "trace.h"
 
-/* Global flag to disable all recording to ring buffers */
-static int ring_buffers_off __read_mostly;
+/*
+ * A fast way to enable or disable all ring buffers is to
+ * call tracing_on or tracing_off. Turning off the ring buffers
+ * prevents all ring buffers from being recorded to.
+ * Turning this switch on, makes it OK to write to the
+ * ring buffer, if the ring buffer is enabled itself.
+ *
+ * There's three layers that must be on in order to write
+ * to the ring buffer.
+ *
+ * 1) This global flag must be set.
+ * 2) The ring buffer must be enabled for recording.
+ * 3) The per cpu buffer must be enabled for recording.
+ *
+ * In case of an anomaly, this global flag has a bit set that
+ * will permantly disable all ring buffers.
+ */
+
+/*
+ * Global flag to disable all recording to ring buffers
+ *  This has two bits: ON, DISABLED
+ *
+ *  ON   DISABLED
+ * ---- ----------
+ *   0      0        : ring buffers are off
+ *   1      0        : ring buffers are on
+ *   X      1        : ring buffers are permanently disabled
+ */
+
+enum {
+	RB_BUFFERS_ON_BIT	= 0,
+	RB_BUFFERS_DISABLED_BIT	= 1,
+};
+
+enum {
+	RB_BUFFERS_ON		= 1 << RB_BUFFERS_ON_BIT,
+	RB_BUFFERS_DISABLED	= 1 << RB_BUFFERS_DISABLED_BIT,
+};
+
+static long ring_buffer_flags __read_mostly = RB_BUFFERS_ON;
 
 /**
  * tracing_on - enable all tracing buffers
@@ -29,7 +67,7 @@ static int ring_buffers_off __read_mostly;
  */
 void tracing_on(void)
 {
-	ring_buffers_off = 0;
+	set_bit(RB_BUFFERS_ON_BIT, &ring_buffer_flags);
 }
 
 /**
@@ -42,7 +80,18 @@ void tracing_on(void)
  */
 void tracing_off(void)
 {
-	ring_buffers_off = 1;
+	clear_bit(RB_BUFFERS_ON_BIT, &ring_buffer_flags);
+}
+
+/**
+ * tracing_off_permanent - permanently disable ring buffers
+ *
+ * This function, once called, will disable all ring buffers
+ * permanenty.
+ */
+void tracing_off_permanent(void)
+{
+	set_bit(RB_BUFFERS_DISABLED_BIT, &ring_buffer_flags);
 }
 
 #include "trace.h"
@@ -1185,7 +1234,7 @@ ring_buffer_lock_reserve(struct ring_buffer *buffer,
 	struct ring_buffer_event *event;
 	int cpu, resched;
 
-	if (ring_buffers_off)
+	if (ring_buffer_flags != RB_BUFFERS_ON)
 		return NULL;
 
 	if (atomic_read(&buffer->record_disabled))
@@ -1297,7 +1346,7 @@ int ring_buffer_write(struct ring_buffer *buffer,
 	int ret = -EBUSY;
 	int cpu, resched;
 
-	if (ring_buffers_off)
+	if (ring_buffer_flags != RB_BUFFERS_ON)
 		return -EBUSY;
 
 	if (atomic_read(&buffer->record_disabled))
@@ -2178,12 +2227,14 @@ static ssize_t
 rb_simple_read(struct file *filp, char __user *ubuf,
 	       size_t cnt, loff_t *ppos)
 {
-	int *p = filp->private_data;
+	long *p = filp->private_data;
 	char buf[64];
 	int r;
 
-	/* !ring_buffers_off == tracing_on */
-	r = sprintf(buf, "%d\n", !*p);
+	if (test_bit(RB_BUFFERS_DISABLED_BIT, p))
+		r = sprintf(buf, "permanently disabled\n");
+	else
+		r = sprintf(buf, "%d\n", test_bit(RB_BUFFERS_ON_BIT, p));
 
 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
 }
@@ -2192,7 +2243,7 @@ static ssize_t
 rb_simple_write(struct file *filp, const char __user *ubuf,
 		size_t cnt, loff_t *ppos)
 {
-	int *p = filp->private_data;
+	long *p = filp->private_data;
 	char buf[64];
 	long val;
 	int ret;
@@ -2209,8 +2260,10 @@ rb_simple_write(struct file *filp, const char __user *ubuf,
 	if (ret < 0)
 		return ret;
 
-	/* !ring_buffers_off == tracing_on */
-	*p = !val;
+	if (val)
+		set_bit(RB_BUFFERS_ON_BIT, p);
+	else
+		clear_bit(RB_BUFFERS_ON_BIT, p);
 
 	(*ppos)++;
 
@@ -2232,7 +2285,7 @@ static __init int rb_init_debugfs(void)
 	d_tracer = tracing_init_dentry();
 
 	entry = debugfs_create_file("tracing_on", 0644, d_tracer,
-				    &ring_buffers_off, &rb_simple_fops);
+				    &ring_buffer_flags, &rb_simple_fops);
 	if (!entry)
 		pr_warning("Could not create debugfs 'tracing_on' entry\n");
 
-- 
cgit v1.2.3


From 69bb54ec05f57da7f6fac2cec0820cbc970df20f Mon Sep 17 00:00:00 2001
From: Steven Rostedt <srostedt@redhat.com>
Date: Fri, 21 Nov 2008 12:59:38 -0500
Subject: ftrace: add ftrace_off_permanent

Impact: add new API to disable all of ftrace on anomalies

It case of a serious anomaly being detected (like something caught by
lockdep) it is a good idea to disable all tracing immediately, without
grabing any locks.

This patch adds ftrace_off_permanent that disables the tracers, function
tracing and ring buffers without a way to enable them again. This should
only be used when something serious has been detected.

Signed-off-by: Steven Rostedt <srostedt@redhat.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/ftrace.h |  2 ++
 kernel/trace/trace.c   | 15 +++++++++++++++
 2 files changed, 17 insertions(+)

(limited to 'include')

diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h
index f7ba4ea5e128..13e9cfc09928 100644
--- a/include/linux/ftrace.h
+++ b/include/linux/ftrace.h
@@ -257,6 +257,7 @@ extern int ftrace_dump_on_oops;
 
 extern void tracing_start(void);
 extern void tracing_stop(void);
+extern void ftrace_off_permanent(void);
 
 extern void
 ftrace_special(unsigned long arg1, unsigned long arg2, unsigned long arg3);
@@ -290,6 +291,7 @@ ftrace_printk(const char *fmt, ...) __attribute__ ((format (printf, 1, 0)));
 
 static inline void tracing_start(void) { }
 static inline void tracing_stop(void) { }
+static inline void ftrace_off_permanent(void) { }
 static inline int
 ftrace_printk(const char *fmt, ...)
 {
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index 4ee6f0375222..0dbfb23ced97 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -660,6 +660,21 @@ static void trace_init_cmdlines(void)
 static int trace_stop_count;
 static DEFINE_SPINLOCK(tracing_start_lock);
 
+/**
+ * ftrace_off_permanent - disable all ftrace code permanently
+ *
+ * This should only be called when a serious anomally has
+ * been detected.  This will turn off the function tracing,
+ * ring buffers, and other tracing utilites. It takes no
+ * locks and can be called from any context.
+ */
+void ftrace_off_permanent(void)
+{
+	tracing_disabled = 1;
+	ftrace_stop();
+	tracing_off_permanent();
+}
+
 /**
  * tracing_start - quick start of the tracer
  *
-- 
cgit v1.2.3


From 8d7c6a96164651dbbab449ef0b5c20ae1f76a3a1 Mon Sep 17 00:00:00 2001
From: Török Edwin <edwintorok@gmail.com>
Date: Sun, 23 Nov 2008 12:39:06 +0200
Subject: tracing/stack-tracer: fix style issues
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Impact: cleanup

Signed-off-by: Török Edwin <edwintorok@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/stacktrace.c | 51 +++++++++++++++++++++++++-------------------
 include/linux/stacktrace.h   |  2 +-
 kernel/trace/trace.c         |  7 +++---
 3 files changed, 33 insertions(+), 27 deletions(-)

(limited to 'include')

diff --git a/arch/x86/kernel/stacktrace.c b/arch/x86/kernel/stacktrace.c
index b15153060417..10786af95545 100644
--- a/arch/x86/kernel/stacktrace.c
+++ b/arch/x86/kernel/stacktrace.c
@@ -89,7 +89,7 @@ EXPORT_SYMBOL_GPL(save_stack_trace_tsk);
 
 struct stack_frame {
 	const void __user	*next_fp;
-	unsigned long		return_address;
+	unsigned long		ret_addr;
 };
 
 static int copy_stack_frame(const void __user *fp, struct stack_frame *frame)
@@ -108,33 +108,40 @@ static int copy_stack_frame(const void __user *fp, struct stack_frame *frame)
 	return ret;
 }
 
+static inline void __save_stack_trace_user(struct stack_trace *trace)
+{
+	const struct pt_regs *regs = task_pt_regs(current);
+	const void __user *fp = (const void __user *)regs->bp;
+
+	if (trace->nr_entries < trace->max_entries)
+		trace->entries[trace->nr_entries++] = regs->ip;
+
+	while (trace->nr_entries < trace->max_entries) {
+		struct stack_frame frame;
+
+		frame.next_fp = NULL;
+		frame.ret_addr = 0;
+		if (!copy_stack_frame(fp, &frame))
+			break;
+		if ((unsigned long)fp < regs->sp)
+			break;
+		if (frame.ret_addr) {
+			trace->entries[trace->nr_entries++] =
+				frame.ret_addr;
+		}
+		if (fp == frame.next_fp)
+			break;
+		fp = frame.next_fp;
+	}
+}
+
 void save_stack_trace_user(struct stack_trace *trace)
 {
 	/*
 	 * Trace user stack if we are not a kernel thread
 	 */
 	if (current->mm) {
-		const struct pt_regs *regs = task_pt_regs(current);
-		const void __user *fp = (const void __user *)regs->bp;
-
-		if (trace->nr_entries < trace->max_entries)
-			trace->entries[trace->nr_entries++] = regs->ip;
-
-		while (trace->nr_entries < trace->max_entries) {
-			struct stack_frame frame;
-			frame.next_fp = NULL;
-			frame.return_address = 0;
-			if (!copy_stack_frame(fp, &frame))
-				break;
-			if ((unsigned long)fp < regs->sp)
-				break;
-			if (frame.return_address)
-				trace->entries[trace->nr_entries++] =
-					frame.return_address;
-			if (fp == frame.next_fp)
-				break;
-			fp = frame.next_fp;
-		}
+		__save_stack_trace_user(trace);
 	}
 	if (trace->nr_entries < trace->max_entries)
 		trace->entries[trace->nr_entries++] = ULONG_MAX;
diff --git a/include/linux/stacktrace.h b/include/linux/stacktrace.h
index 68de51468f5d..fd42d6851109 100644
--- a/include/linux/stacktrace.h
+++ b/include/linux/stacktrace.h
@@ -25,7 +25,7 @@ extern void save_stack_trace_user(struct stack_trace *trace);
 #else
 # define save_stack_trace(trace)			do { } while (0)
 # define save_stack_trace_tsk(tsk, trace)		do { } while (0)
-# define save_stack_trace_user(trace)              do { } while (0)
+# define save_stack_trace_user(trace)			do { } while (0)
 # define print_stack_trace(trace, spaces)		do { } while (0)
 #endif
 
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index 62776b71b1c5..dedf35f36971 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -948,9 +948,9 @@ static void ftrace_trace_userstack(struct trace_array *tr,
 		   struct trace_array_cpu *data,
 		   unsigned long flags, int pc)
 {
+	struct ring_buffer_event *event;
 	struct userstack_entry *entry;
 	struct stack_trace trace;
-	struct ring_buffer_event *event;
 	unsigned long irq_flags;
 
 	if (!(trace_flags & TRACE_ITER_USERSTACKTRACE))
@@ -1471,8 +1471,7 @@ static inline int seq_print_user_ip(struct trace_seq *s, struct mm_struct *mm,
 	if (file) {
 		ret = trace_seq_path(s, &file->f_path);
 		if (ret)
-			ret = trace_seq_printf(s, "[+0x%lx]",
-					ip - vmstart);
+			ret = trace_seq_printf(s, "[+0x%lx]", ip - vmstart);
 	}
 	if (ret && ((sym_flags & TRACE_ITER_SYM_ADDR) || !file))
 		ret = trace_seq_printf(s, " <" IP_FMT ">", ip);
@@ -1485,7 +1484,7 @@ seq_print_userip_objs(const struct userstack_entry *entry, struct trace_seq *s,
 {
 	struct mm_struct *mm = NULL;
 	int ret = 1;
-	unsigned i;
+	unsigned int i;
 
 	if (trace_flags & TRACE_ITER_SYM_USEROBJ) {
 		struct task_struct *task;
-- 
cgit v1.2.3


From 8d26487fd4ddda7a0237da418fb8669fb06ae557 Mon Sep 17 00:00:00 2001
From: Török Edwin <edwintorok@gmail.com>
Date: Sun, 23 Nov 2008 12:39:08 +0200
Subject: tracing/stack-tracer: introduce CONFIG_USER_STACKTRACE_SUPPORT
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Impact: cleanup

User stack tracing is just implemented for x86, but it is not x86 specific.

Introduce a generic config flag, that is currently enabled only for x86.
When other arches implement it, they will have to
SELECT USER_STACKTRACE_SUPPORT.

Signed-off-by: Török Edwin <edwintorok@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/Kconfig           | 1 +
 include/linux/stacktrace.h | 2 +-
 kernel/trace/Kconfig       | 3 +++
 3 files changed, 5 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 7a146baaa990..e49a4fd718fe 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -36,6 +36,7 @@ config X86
 	select HAVE_ARCH_TRACEHOOK
 	select HAVE_GENERIC_DMA_COHERENT if X86_32
 	select HAVE_EFFICIENT_UNALIGNED_ACCESS
+	select USER_STACKTRACE_SUPPORT
 
 config ARCH_DEFCONFIG
 	string
diff --git a/include/linux/stacktrace.h b/include/linux/stacktrace.h
index fd42d6851109..1a8cecc4f38c 100644
--- a/include/linux/stacktrace.h
+++ b/include/linux/stacktrace.h
@@ -16,7 +16,7 @@ extern void save_stack_trace_tsk(struct task_struct *tsk,
 
 extern void print_stack_trace(struct stack_trace *trace, int spaces);
 
-#ifdef CONFIG_X86
+#ifdef CONFIG_USER_STACKTRACE_SUPPORT
 extern void save_stack_trace_user(struct stack_trace *trace);
 #else
 # define save_stack_trace_user(trace)              do { } while (0)
diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig
index b8378fad29a3..87fc34a1bb91 100644
--- a/kernel/trace/Kconfig
+++ b/kernel/trace/Kconfig
@@ -3,6 +3,9 @@
 #  select HAVE_FUNCTION_TRACER:
 #
 
+config USER_STACKTRACE_SUPPORT
+	bool
+
 config NOP_TRACER
 	bool
 
-- 
cgit v1.2.3


From e262a7ba31f0cd4dae225e6d2e9037e5ac6108e8 Mon Sep 17 00:00:00 2001
From: Richard Kennedy <richard@rsk.demon.co.uk>
Date: Sun, 23 Nov 2008 14:34:43 +0000
Subject: irq.h: remove padding from irq_desc on 64bits

Impact: reduce struct irq_desc size

struct irq_desc: reorder to remove padding on 64bits

shrinks irq_desc to 128 bytes which saves data space & cache lines

On a generic x86_64/SMP build this reduces the reported data size by
64k.

Signed-off-by: Richard Kennedy <richard@rsk.demon.co.uk>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/irq.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/irq.h b/include/linux/irq.h
index d058c57be02d..838a977885e1 100644
--- a/include/linux/irq.h
+++ b/include/linux/irq.h
@@ -142,8 +142,8 @@ struct irq_chip {
  * @depth:		disable-depth, for nested irq_disable() calls
  * @wake_depth:		enable depth, for multiple set_irq_wake() callers
  * @irq_count:		stats field to detect stalled irqs
- * @irqs_unhandled:	stats field for spurious unhandled interrupts
  * @last_unhandled:	aging timer for unhandled count
+ * @irqs_unhandled:	stats field for spurious unhandled interrupts
  * @lock:		locking for SMP
  * @affinity:		IRQ affinity on SMP
  * @cpu:		cpu index useful for balancing
@@ -165,8 +165,8 @@ struct irq_desc {
 	unsigned int		depth;		/* nested irq disables */
 	unsigned int		wake_depth;	/* nested wake enables */
 	unsigned int		irq_count;	/* For detecting broken IRQs */
-	unsigned int		irqs_unhandled;
 	unsigned long		last_unhandled;	/* Aging timer for unhandled count */
+	unsigned int		irqs_unhandled;
 	spinlock_t		lock;
 #ifdef CONFIG_SMP
 	cpumask_t		affinity;
-- 
cgit v1.2.3


From b20a9c24d5c5d466d7e4a25c6f1bedbd2d16ad4f Mon Sep 17 00:00:00 2001
From: Gerrit Renker <gerrit@erg.abdn.ac.uk>
Date: Sun, 23 Nov 2008 16:02:31 -0800
Subject: dccp: Set per-connection CCIDs via socket options

With this patch, TX/RX CCIDs can now be changed on a per-connection
basis, which overrides the defaults set by the global sysctl variables
for TX/RX CCIDs.

To make full use of this facility, the remaining patches of this patch
set are needed, which track dependencies and activate negotiated
feature values.

Signed-off-by: Gerrit Renker <gerrit@erg.abdn.ac.uk>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 Documentation/networking/dccp.txt | 14 ++++++++++++++
 include/linux/dccp.h              |  5 +++++
 net/dccp/ackvec.c                 |  9 ++++-----
 net/dccp/ackvec.h                 |  5 ++---
 net/dccp/feat.h                   |  2 ++
 net/dccp/proto.c                  | 34 ++++++++++++++++++++++++++++++++++
 6 files changed, 61 insertions(+), 8 deletions(-)

(limited to 'include')

diff --git a/Documentation/networking/dccp.txt b/Documentation/networking/dccp.txt
index 43df4487379b..610083ff73f6 100644
--- a/Documentation/networking/dccp.txt
+++ b/Documentation/networking/dccp.txt
@@ -61,6 +61,20 @@ DCCP_SOCKOPT_AVAILABLE_CCIDS is also read-only and returns the list of CCIDs
 supported by the endpoint (see include/linux/dccp.h for symbolic constants).
 The caller needs to provide a sufficiently large (> 2) array of type uint8_t.
 
+DCCP_SOCKOPT_CCID is write-only and sets both the TX and RX CCIDs at the same
+time, combining the operation of the next two socket options. This option is
+preferrable over the latter two, since often applications will use the same
+type of CCID for both directions; and mixed use of CCIDs is not currently well
+understood. This socket option takes as argument at least one uint8_t value, or
+an array of uint8_t values, which must match available CCIDS (see above). CCIDs
+must be registered on the socket before calling connect() or listen().
+
+DCCP_SOCKOPT_TX_CCID is read/write. It returns the current CCID (if set) or sets
+the preference list for the TX CCID, using the same format as DCCP_SOCKOPT_CCID.
+Please note that the getsockopt argument type here is `int', not uint8_t.
+
+DCCP_SOCKOPT_RX_CCID is analogous to DCCP_SOCKOPT_TX_CCID, but for the RX CCID.
+
 DCCP_SOCKOPT_SERVER_TIMEWAIT enables the server (listening socket) to hold
 timewait state when closing the connection (RFC 4340, 8.3). The usual case is
 that the closing server sends a CloseReq, whereupon the client holds timewait
diff --git a/include/linux/dccp.h b/include/linux/dccp.h
index eda389ce04f4..6a72ff52a8a4 100644
--- a/include/linux/dccp.h
+++ b/include/linux/dccp.h
@@ -168,6 +168,8 @@ enum {
 	DCCPO_MIN_CCID_SPECIFIC = 128,
 	DCCPO_MAX_CCID_SPECIFIC = 255,
 };
+/* maximum size of a single TLV-encoded DCCP option (sans type/len bytes) */
+#define DCCP_SINGLE_OPT_MAXLEN	253
 
 /* DCCP CCIDS */
 enum {
@@ -203,6 +205,9 @@ enum dccp_feature_numbers {
 #define DCCP_SOCKOPT_SEND_CSCOV		10
 #define DCCP_SOCKOPT_RECV_CSCOV		11
 #define DCCP_SOCKOPT_AVAILABLE_CCIDS	12
+#define DCCP_SOCKOPT_CCID		13
+#define DCCP_SOCKOPT_TX_CCID		14
+#define DCCP_SOCKOPT_RX_CCID		15
 #define DCCP_SOCKOPT_CCID_RX_INFO	128
 #define DCCP_SOCKOPT_CCID_TX_INFO	192
 
diff --git a/net/dccp/ackvec.c b/net/dccp/ackvec.c
index 1e8be246ad15..01e4d39fa232 100644
--- a/net/dccp/ackvec.c
+++ b/net/dccp/ackvec.c
@@ -12,7 +12,6 @@
 #include "ackvec.h"
 #include "dccp.h"
 
-#include <linux/dccp.h>
 #include <linux/init.h>
 #include <linux/errno.h>
 #include <linux/kernel.h>
@@ -68,7 +67,7 @@ int dccp_insert_option_ackvec(struct sock *sk, struct sk_buff *skb)
 	struct dccp_sock *dp = dccp_sk(sk);
 	struct dccp_ackvec *av = dp->dccps_hc_rx_ackvec;
 	/* Figure out how many options do we need to represent the ackvec */
-	const u16 nr_opts = DIV_ROUND_UP(av->av_vec_len, DCCP_MAX_ACKVEC_OPT_LEN);
+	const u8 nr_opts = DIV_ROUND_UP(av->av_vec_len, DCCP_SINGLE_OPT_MAXLEN);
 	u16 len = av->av_vec_len + 2 * nr_opts, i;
 	u32 elapsed_time;
 	const unsigned char *tail, *from;
@@ -100,8 +99,8 @@ int dccp_insert_option_ackvec(struct sock *sk, struct sk_buff *skb)
 	for (i = 0; i < nr_opts; ++i) {
 		int copylen = len;
 
-		if (len > DCCP_MAX_ACKVEC_OPT_LEN)
-			copylen = DCCP_MAX_ACKVEC_OPT_LEN;
+		if (len > DCCP_SINGLE_OPT_MAXLEN)
+			copylen = DCCP_SINGLE_OPT_MAXLEN;
 
 		*to++ = DCCPO_ACK_VECTOR_0;
 		*to++ = copylen + 2;
@@ -432,7 +431,7 @@ found:
 int dccp_ackvec_parse(struct sock *sk, const struct sk_buff *skb,
 		      u64 *ackno, const u8 opt, const u8 *value, const u8 len)
 {
-	if (len > DCCP_MAX_ACKVEC_OPT_LEN)
+	if (len > DCCP_SINGLE_OPT_MAXLEN)
 		return -1;
 
 	/* dccp_ackvector_print(DCCP_SKB_CB(skb)->dccpd_ack_seq, value, len); */
diff --git a/net/dccp/ackvec.h b/net/dccp/ackvec.h
index bcb64fb4acef..4ccee030524e 100644
--- a/net/dccp/ackvec.h
+++ b/net/dccp/ackvec.h
@@ -11,15 +11,14 @@
  *	published by the Free Software Foundation.
  */
 
+#include <linux/dccp.h>
 #include <linux/compiler.h>
 #include <linux/ktime.h>
 #include <linux/list.h>
 #include <linux/types.h>
 
-/* Read about the ECN nonce to see why it is 253 */
-#define DCCP_MAX_ACKVEC_OPT_LEN 253
 /* We can spread an ack vector across multiple options */
-#define DCCP_MAX_ACKVEC_LEN (DCCP_MAX_ACKVEC_OPT_LEN * 2)
+#define DCCP_MAX_ACKVEC_LEN (DCCP_SINGLE_OPT_MAXLEN * 2)
 
 #define DCCP_ACKVEC_STATE_RECEIVED	0
 #define DCCP_ACKVEC_STATE_ECN_MARKED	(1 << 6)
diff --git a/net/dccp/feat.h b/net/dccp/feat.h
index 4d172822df17..093af1610d11 100644
--- a/net/dccp/feat.h
+++ b/net/dccp/feat.h
@@ -22,6 +22,8 @@
 /* Wmin=32 and Wmax=2^46-1 from 7.5.2 */
 #define DCCPF_SEQ_WMIN		32
 #define DCCPF_SEQ_WMAX		0x3FFFFFFFFFFFull
+/* Maximum number of SP values that fit in a single (Confirm) option */
+#define DCCP_FEAT_MAX_SP_VALS	(DCCP_SINGLE_OPT_MAXLEN - 2)
 
 enum dccp_feat_type {
 	FEAT_AT_RX   = 1,	/* located at RX side of half-connection  */
diff --git a/net/dccp/proto.c b/net/dccp/proto.c
index 8b63394ec24c..445884cf1c29 100644
--- a/net/dccp/proto.c
+++ b/net/dccp/proto.c
@@ -501,6 +501,36 @@ static int dccp_setsockopt_cscov(struct sock *sk, int cscov, bool rx)
 	return rc;
 }
 
+static int dccp_setsockopt_ccid(struct sock *sk, int type,
+				char __user *optval, int optlen)
+{
+	u8 *val;
+	int rc = 0;
+
+	if (optlen < 1 || optlen > DCCP_FEAT_MAX_SP_VALS)
+		return -EINVAL;
+
+	val = kmalloc(optlen, GFP_KERNEL);
+	if (val == NULL)
+		return -ENOMEM;
+
+	if (copy_from_user(val, optval, optlen)) {
+		kfree(val);
+		return -EFAULT;
+	}
+
+	lock_sock(sk);
+	if (type == DCCP_SOCKOPT_TX_CCID || type == DCCP_SOCKOPT_CCID)
+		rc = dccp_feat_register_sp(sk, DCCPF_CCID, 1, val, optlen);
+
+	if (!rc && (type == DCCP_SOCKOPT_RX_CCID || type == DCCP_SOCKOPT_CCID))
+		rc = dccp_feat_register_sp(sk, DCCPF_CCID, 0, val, optlen);
+	release_sock(sk);
+
+	kfree(val);
+	return rc;
+}
+
 static int do_dccp_setsockopt(struct sock *sk, int level, int optname,
 		char __user *optval, int optlen)
 {
@@ -515,6 +545,10 @@ static int do_dccp_setsockopt(struct sock *sk, int level, int optname,
 	case DCCP_SOCKOPT_CHANGE_R:
 		DCCP_WARN("sockopt(CHANGE_L/R) is deprecated: fix your app\n");
 		return 0;
+	case DCCP_SOCKOPT_CCID:
+	case DCCP_SOCKOPT_RX_CCID:
+	case DCCP_SOCKOPT_TX_CCID:
+		return dccp_setsockopt_ccid(sk, optname, optval, optlen);
 	}
 
 	if (optlen < (int)sizeof(int))
-- 
cgit v1.2.3


From c25eb3bfb97294d0543a81230fbc237046b4b84c Mon Sep 17 00:00:00 2001
From: Eric Dumazet <dada1@cosmosbay.com>
Date: Sun, 23 Nov 2008 17:22:55 -0800
Subject: net: Convert TCP/DCCP listening hash tables to use RCU

This is the last step to be able to perform full RCU lookups
in __inet_lookup() : After established/timewait tables, we
add RCU lookups to listening hash table.

The only trick here is that a socket of a given type (TCP ipv4,
TCP ipv6, ...) can now flight between two different tables
(established and listening) during a RCU grace period, so we
must use different 'nulls' end-of-chain values for two tables.

We define a large value :

#define LISTENING_NULLS_BASE (1U << 29)

So that slots in listening table are guaranteed to have different
end-of-chain values than slots in established table. A reader can
still detect it finished its lookup in the right chain.

Signed-off-by: Eric Dumazet <dada1@cosmosbay.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/inet_hashtables.h |   9 ++-
 net/ipv4/inet_diag.c          |   4 +-
 net/ipv4/inet_hashtables.c    | 148 +++++++++++++++++++++---------------------
 net/ipv4/tcp_ipv4.c           |   8 +--
 net/ipv6/inet6_hashtables.c   |  94 +++++++++++++++++----------
 5 files changed, 147 insertions(+), 116 deletions(-)

(limited to 'include')

diff --git a/include/net/inet_hashtables.h b/include/net/inet_hashtables.h
index ec7ee2e46d8c..f44bb5c77a70 100644
--- a/include/net/inet_hashtables.h
+++ b/include/net/inet_hashtables.h
@@ -99,9 +99,16 @@ struct inet_bind_hashbucket {
 	struct hlist_head	chain;
 };
 
+/*
+ * Sockets can be hashed in established or listening table
+ * We must use different 'nulls' end-of-chain value for listening
+ * hash table, or we might find a socket that was closed and
+ * reallocated/inserted into established hash table
+ */
+#define LISTENING_NULLS_BASE (1U << 29)
 struct inet_listen_hashbucket {
 	spinlock_t		lock;
-	struct hlist_head	head;
+	struct hlist_nulls_head	head;
 };
 
 /* This is for listening sockets, thus all sockets which possess wildcards. */
diff --git a/net/ipv4/inet_diag.c b/net/ipv4/inet_diag.c
index 998a78f169ff..588a7796e3e3 100644
--- a/net/ipv4/inet_diag.c
+++ b/net/ipv4/inet_diag.c
@@ -720,13 +720,13 @@ static int inet_diag_dump(struct sk_buff *skb, struct netlink_callback *cb)
 
 		for (i = s_i; i < INET_LHTABLE_SIZE; i++) {
 			struct sock *sk;
-			struct hlist_node *node;
+			struct hlist_nulls_node *node;
 			struct inet_listen_hashbucket *ilb;
 
 			num = 0;
 			ilb = &hashinfo->listening_hash[i];
 			spin_lock_bh(&ilb->lock);
-			sk_for_each(sk, node, &ilb->head) {
+			sk_nulls_for_each(sk, node, &ilb->head) {
 				struct inet_sock *inet = inet_sk(sk);
 
 				if (num < s_num) {
diff --git a/net/ipv4/inet_hashtables.c b/net/ipv4/inet_hashtables.c
index 4c273a9981a6..11fcb87a1fdd 100644
--- a/net/ipv4/inet_hashtables.c
+++ b/net/ipv4/inet_hashtables.c
@@ -110,78 +110,79 @@ void __inet_inherit_port(struct sock *sk, struct sock *child)
 
 EXPORT_SYMBOL_GPL(__inet_inherit_port);
 
+static inline int compute_score(struct sock *sk, struct net *net,
+				const unsigned short hnum, const __be32 daddr,
+				const int dif)
+{
+	int score = -1;
+	struct inet_sock *inet = inet_sk(sk);
+
+	if (net_eq(sock_net(sk), net) && inet->num == hnum &&
+			!ipv6_only_sock(sk)) {
+		__be32 rcv_saddr = inet->rcv_saddr;
+		score = sk->sk_family == PF_INET ? 1 : 0;
+		if (rcv_saddr) {
+			if (rcv_saddr != daddr)
+				return -1;
+			score += 2;
+		}
+		if (sk->sk_bound_dev_if) {
+			if (sk->sk_bound_dev_if != dif)
+				return -1;
+			score += 2;
+		}
+	}
+	return score;
+}
+
 /*
  * Don't inline this cruft. Here are some nice properties to exploit here. The
  * BSD API does not allow a listening sock to specify the remote port nor the
  * remote address for the connection. So always assume those are both
  * wildcarded during the search since they can never be otherwise.
  */
-static struct sock *inet_lookup_listener_slow(struct net *net,
-					      const struct hlist_head *head,
-					      const __be32 daddr,
-					      const unsigned short hnum,
-					      const int dif)
-{
-	struct sock *result = NULL, *sk;
-	const struct hlist_node *node;
-	int hiscore = -1;
-
-	sk_for_each(sk, node, head) {
-		const struct inet_sock *inet = inet_sk(sk);
-
-		if (net_eq(sock_net(sk), net) && inet->num == hnum &&
-				!ipv6_only_sock(sk)) {
-			const __be32 rcv_saddr = inet->rcv_saddr;
-			int score = sk->sk_family == PF_INET ? 1 : 0;
-
-			if (rcv_saddr) {
-				if (rcv_saddr != daddr)
-					continue;
-				score += 2;
-			}
-			if (sk->sk_bound_dev_if) {
-				if (sk->sk_bound_dev_if != dif)
-					continue;
-				score += 2;
-			}
-			if (score == 5)
-				return sk;
-			if (score > hiscore) {
-				hiscore	= score;
-				result	= sk;
-			}
-		}
-	}
-	return result;
-}
 
-/* Optimize the common listener case. */
+
 struct sock *__inet_lookup_listener(struct net *net,
 				    struct inet_hashinfo *hashinfo,
 				    const __be32 daddr, const unsigned short hnum,
 				    const int dif)
 {
-	struct sock *sk = NULL;
-	struct inet_listen_hashbucket *ilb;
+	struct sock *sk, *result;
+	struct hlist_nulls_node *node;
+	unsigned int hash = inet_lhashfn(net, hnum);
+	struct inet_listen_hashbucket *ilb = &hashinfo->listening_hash[hash];
+	int score, hiscore;
 
-	ilb = &hashinfo->listening_hash[inet_lhashfn(net, hnum)];
-	spin_lock(&ilb->lock);
-	if (!hlist_empty(&ilb->head)) {
-		const struct inet_sock *inet = inet_sk((sk = __sk_head(&ilb->head)));
-
-		if (inet->num == hnum && !sk->sk_node.next &&
-		    (!inet->rcv_saddr || inet->rcv_saddr == daddr) &&
-		    (sk->sk_family == PF_INET || !ipv6_only_sock(sk)) &&
-		    !sk->sk_bound_dev_if && net_eq(sock_net(sk), net))
-			goto sherry_cache;
-		sk = inet_lookup_listener_slow(net, &ilb->head, daddr, hnum, dif);
+	rcu_read_lock();
+begin:
+	result = NULL;
+	hiscore = -1;
+	sk_nulls_for_each_rcu(sk, node, &ilb->head) {
+		score = compute_score(sk, net, hnum, daddr, dif);
+		if (score > hiscore) {
+			result = sk;
+			hiscore = score;
+		}
 	}
-	if (sk) {
-sherry_cache:
-		sock_hold(sk);
+	/*
+	 * if the nulls value we got at the end of this lookup is
+	 * not the expected one, we must restart lookup.
+	 * We probably met an item that was moved to another chain.
+	 */
+	if (get_nulls_value(node) != hash + LISTENING_NULLS_BASE)
+		goto begin;
+	if (result) {
+		if (unlikely(!atomic_inc_not_zero(&result->sk_refcnt)))
+			result = NULL;
+		else if (unlikely(compute_score(result, net, hnum, daddr,
+				  dif) < hiscore)) {
+			sock_put(result);
+			goto begin;
+		}
 	}
-	spin_unlock(&ilb->lock);
-	return sk;
+	rcu_read_unlock();
+	return result;
 }
 EXPORT_SYMBOL_GPL(__inet_lookup_listener);
 
@@ -370,7 +371,7 @@ static void __inet_hash(struct sock *sk)
 	ilb = &hashinfo->listening_hash[inet_sk_listen_hashfn(sk)];
 
 	spin_lock(&ilb->lock);
-	__sk_add_node(sk, &ilb->head);
+	__sk_nulls_add_node_rcu(sk, &ilb->head);
 	sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1);
 	spin_unlock(&ilb->lock);
 }
@@ -388,26 +389,22 @@ EXPORT_SYMBOL_GPL(inet_hash);
 void inet_unhash(struct sock *sk)
 {
 	struct inet_hashinfo *hashinfo = sk->sk_prot->h.hashinfo;
+	spinlock_t *lock;
+	int done;
 
 	if (sk_unhashed(sk))
 		return;
 
-	if (sk->sk_state == TCP_LISTEN) {
-		struct inet_listen_hashbucket *ilb;
+	if (sk->sk_state == TCP_LISTEN)
+		lock = &hashinfo->listening_hash[inet_sk_listen_hashfn(sk)].lock;
+	else
+		lock = inet_ehash_lockp(hashinfo, sk->sk_hash);
 
-		ilb = &hashinfo->listening_hash[inet_sk_listen_hashfn(sk)];
-		spin_lock_bh(&ilb->lock);
-		if (__sk_del_node_init(sk))
-			sock_prot_inuse_add(sock_net(sk), sk->sk_prot, -1);
-		spin_unlock_bh(&ilb->lock);
-	} else {
-		spinlock_t *lock = inet_ehash_lockp(hashinfo, sk->sk_hash);
-
-		spin_lock_bh(lock);
-		if (__sk_nulls_del_node_init_rcu(sk))
-			sock_prot_inuse_add(sock_net(sk), sk->sk_prot, -1);
-		spin_unlock_bh(lock);
-	}
+	spin_lock_bh(lock);
+	done =__sk_nulls_del_node_init_rcu(sk);
+	spin_unlock_bh(lock);
+	if (done)
+		sock_prot_inuse_add(sock_net(sk), sk->sk_prot, -1);
 }
 EXPORT_SYMBOL_GPL(inet_unhash);
 
@@ -526,8 +523,11 @@ void inet_hashinfo_init(struct inet_hashinfo *h)
 {
 	int i;
 
-	for (i = 0; i < INET_LHTABLE_SIZE; i++)
+	for (i = 0; i < INET_LHTABLE_SIZE; i++) {
 		spin_lock_init(&h->listening_hash[i].lock);
+		INIT_HLIST_NULLS_HEAD(&h->listening_hash[i].head,
+				      i + LISTENING_NULLS_BASE);
+		}
 }
 
 EXPORT_SYMBOL_GPL(inet_hashinfo_init);
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index a81caa1be0cf..cab2458f86fd 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -1868,7 +1868,7 @@ static inline struct inet_timewait_sock *tw_next(struct inet_timewait_sock *tw)
 static void *listening_get_next(struct seq_file *seq, void *cur)
 {
 	struct inet_connection_sock *icsk;
-	struct hlist_node *node;
+	struct hlist_nulls_node *node;
 	struct sock *sk = cur;
 	struct inet_listen_hashbucket *ilb;
 	struct tcp_iter_state *st = seq->private;
@@ -1878,7 +1878,7 @@ static void *listening_get_next(struct seq_file *seq, void *cur)
 		st->bucket = 0;
 		ilb = &tcp_hashinfo.listening_hash[0];
 		spin_lock_bh(&ilb->lock);
-		sk = sk_head(&ilb->head);
+		sk = sk_nulls_head(&ilb->head);
 		goto get_sk;
 	}
 	ilb = &tcp_hashinfo.listening_hash[st->bucket];
@@ -1914,7 +1914,7 @@ get_req:
 		sk = sk_next(sk);
 	}
 get_sk:
-	sk_for_each_from(sk, node) {
+	sk_nulls_for_each_from(sk, node) {
 		if (sk->sk_family == st->family && net_eq(sock_net(sk), net)) {
 			cur = sk;
 			goto out;
@@ -1935,7 +1935,7 @@ start_req:
 	if (++st->bucket < INET_LHTABLE_SIZE) {
 		ilb = &tcp_hashinfo.listening_hash[st->bucket];
 		spin_lock_bh(&ilb->lock);
-		sk = sk_head(&ilb->head);
+		sk = sk_nulls_head(&ilb->head);
 		goto get_sk;
 	}
 	cur = NULL;
diff --git a/net/ipv6/inet6_hashtables.c b/net/ipv6/inet6_hashtables.c
index e0fd68187f83..8fe267feb81e 100644
--- a/net/ipv6/inet6_hashtables.c
+++ b/net/ipv6/inet6_hashtables.c
@@ -33,7 +33,7 @@ void __inet6_hash(struct sock *sk)
 
 		ilb = &hashinfo->listening_hash[inet_sk_listen_hashfn(sk)];
 		spin_lock(&ilb->lock);
-		__sk_add_node(sk, &ilb->head);
+		__sk_nulls_add_node_rcu(sk, &ilb->head);
 		spin_unlock(&ilb->lock);
 	} else {
 		unsigned int hash;
@@ -118,47 +118,71 @@ out:
 }
 EXPORT_SYMBOL(__inet6_lookup_established);
 
+static int inline compute_score(struct sock *sk, struct net *net,
+				const unsigned short hnum,
+				const struct in6_addr *daddr,
+				const int dif)
+{
+	int score = -1;
+
+	if (net_eq(sock_net(sk), net) && inet_sk(sk)->num == hnum &&
+	    sk->sk_family == PF_INET6) {
+		const struct ipv6_pinfo *np = inet6_sk(sk);
+
+		score = 1;
+		if (!ipv6_addr_any(&np->rcv_saddr)) {
+			if (!ipv6_addr_equal(&np->rcv_saddr, daddr))
+				return -1;
+			score++;
+		}
+		if (sk->sk_bound_dev_if) {
+			if (sk->sk_bound_dev_if != dif)
+				return -1;
+			score++;
+		}
+	}
+	return score;
+}
+
 struct sock *inet6_lookup_listener(struct net *net,
 		struct inet_hashinfo *hashinfo, const struct in6_addr *daddr,
 		const unsigned short hnum, const int dif)
 {
 	struct sock *sk;
-	const struct hlist_node *node;
-	struct sock *result = NULL;
-	int score, hiscore = 0;
-	struct inet_listen_hashbucket *ilb;
-
-	ilb = &hashinfo->listening_hash[inet_lhashfn(net, hnum)];
-	spin_lock(&ilb->lock);
-	sk_for_each(sk, node, &ilb->head) {
-		if (net_eq(sock_net(sk), net) && inet_sk(sk)->num == hnum &&
-				sk->sk_family == PF_INET6) {
-			const struct ipv6_pinfo *np = inet6_sk(sk);
-
-			score = 1;
-			if (!ipv6_addr_any(&np->rcv_saddr)) {
-				if (!ipv6_addr_equal(&np->rcv_saddr, daddr))
-					continue;
-				score++;
-			}
-			if (sk->sk_bound_dev_if) {
-				if (sk->sk_bound_dev_if != dif)
-					continue;
-				score++;
-			}
-			if (score == 3) {
-				result = sk;
-				break;
-			}
-			if (score > hiscore) {
-				hiscore = score;
-				result = sk;
-			}
+	const struct hlist_nulls_node *node;
+	struct sock *result;
+	int score, hiscore;
+	unsigned int hash = inet_lhashfn(net, hnum);
+	struct inet_listen_hashbucket *ilb = &hashinfo->listening_hash[hash];
+
+	rcu_read_lock();
+begin:
+	result = NULL;
+	hiscore = -1;
+	sk_nulls_for_each(sk, node, &ilb->head) {
+		score = compute_score(sk, net, hnum, daddr, dif);
+		if (score > hiscore) {
+			hiscore = score;
+			result = sk;
 		}
 	}
-	if (result)
-		sock_hold(result);
-	spin_unlock(&ilb->lock);
+	/*
+	 * if the nulls value we got at the end of this lookup is
+	 * not the expected one, we must restart lookup.
+	 * We probably met an item that was moved to another chain.
+	 */
+	if (get_nulls_value(node) != hash + LISTENING_NULLS_BASE)
+		goto begin;
+	if (result) {
+		if (unlikely(!atomic_inc_not_zero(&result->sk_refcnt)))
+			result = NULL;
+		else if (unlikely(compute_score(result, net, hnum, daddr,
+				  dif) < hiscore)) {
+			sock_put(result);
+			goto begin;
+		}
+	}
+	rcu_read_unlock();
 	return result;
 }
 
-- 
cgit v1.2.3


From 1f87e235e6fb92c2968b52b9191de04f1aff8e77 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <dada1@cosmosbay.com>
Date: Sun, 23 Nov 2008 23:24:32 -0800
Subject: eth: Declare an optimized compare_ether_addr_64bits() function

Linus mentioned we could try to perform long word operations, even
on potentially unaligned addresses, on x86 at least. David mentioned
the HAVE_EFFICIENT_UNALIGNED_ACCESS test to handle this on all
arches that have efficient unailgned accesses.

I tried this idea and got nice assembly on 32 bits:

158:   33 82 38 01 00 00       xor    0x138(%edx),%eax
15e:   33 8a 34 01 00 00       xor    0x134(%edx),%ecx
164:   c1 e0 10                shl    $0x10,%eax
167:   09 c1                   or     %eax,%ecx
169:   74 0b                   je     176 <eth_type_trans+0x87>

And very nice assembly on 64 bits of course (one xor, one shl)

Nice oprofile improvement in eth_type_trans(), 0.17 % instead of 0.41 %,
expected since we remove 8 instructions on a fast path.

This patch implements a compare_ether_addr_64bits() function, that
uses the CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS ifdef to efficiently
perform the 6 bytes comparison on all capable arches.

Signed-off-by: Eric Dumazet <dada1@cosmosbay.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/etherdevice.h | 42 ++++++++++++++++++++++++++++++++++++++++++
 net/ethernet/eth.c          |  6 +++---
 2 files changed, 45 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/include/linux/etherdevice.h b/include/linux/etherdevice.h
index 0e5e97060034..1cb0f0b90926 100644
--- a/include/linux/etherdevice.h
+++ b/include/linux/etherdevice.h
@@ -27,6 +27,7 @@
 #include <linux/if_ether.h>
 #include <linux/netdevice.h>
 #include <linux/random.h>
+#include <asm/unaligned.h>
 
 #ifdef __KERNEL__
 extern __be16		eth_type_trans(struct sk_buff *skb, struct net_device *dev);
@@ -140,6 +141,47 @@ static inline unsigned compare_ether_addr(const u8 *addr1, const u8 *addr2)
 	BUILD_BUG_ON(ETH_ALEN != 6);
 	return ((a[0] ^ b[0]) | (a[1] ^ b[1]) | (a[2] ^ b[2])) != 0;
 }
+
+static inline unsigned long zap_last_2bytes(unsigned long value)
+{
+#ifdef __BIG_ENDIAN
+	return value >> 16;
+#else
+	return value << 16;
+#endif
+}
+
+/**
+ * compare_ether_addr_64bits - Compare two Ethernet addresses
+ * @addr1: Pointer to an array of 8 bytes
+ * @addr2: Pointer to an other array of 8 bytes
+ *
+ * Compare two ethernet addresses, returns 0 if equal.
+ * Same result than "memcmp(addr1, addr2, ETH_ALEN)" but without conditional
+ * branches, and possibly long word memory accesses on CPU allowing cheap
+ * unaligned memory reads.
+ * arrays = { byte1, byte2, byte3, byte4, byte6, byte7, pad1, pad2}
+ *
+ * Please note that alignment of addr1 & addr2 is only guaranted to be 16 bits.
+ */
+
+static inline unsigned compare_ether_addr_64bits(const u8 addr1[6+2],
+						 const u8 addr2[6+2])
+{
+#ifdef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS
+	unsigned long fold = ((*(unsigned long *)addr1) ^
+			      (*(unsigned long *)addr2));
+
+	if (sizeof(fold) == 8)
+		return zap_last_2bytes(fold) != 0;
+
+	fold |= zap_last_2bytes((*(unsigned long *)(addr1 + 4)) ^
+				(*(unsigned long *)(addr2 + 4)));
+	return fold != 0;
+#else
+	return compare_ether_addr(addr1, addr2);
+#endif
+}
 #endif	/* __KERNEL__ */
 
 #endif	/* _LINUX_ETHERDEVICE_H */
diff --git a/net/ethernet/eth.c b/net/ethernet/eth.c
index a87a171d9914..280352aba403 100644
--- a/net/ethernet/eth.c
+++ b/net/ethernet/eth.c
@@ -165,8 +165,8 @@ __be16 eth_type_trans(struct sk_buff *skb, struct net_device *dev)
 	skb_pull(skb, ETH_HLEN);
 	eth = eth_hdr(skb);
 
-	if (is_multicast_ether_addr(eth->h_dest)) {
-		if (!compare_ether_addr(eth->h_dest, dev->broadcast))
+	if (unlikely(is_multicast_ether_addr(eth->h_dest))) {
+		if (!compare_ether_addr_64bits(eth->h_dest, dev->broadcast))
 			skb->pkt_type = PACKET_BROADCAST;
 		else
 			skb->pkt_type = PACKET_MULTICAST;
@@ -181,7 +181,7 @@ __be16 eth_type_trans(struct sk_buff *skb, struct net_device *dev)
 	 */
 
 	else if (1 /*dev->flags&IFF_PROMISC */ ) {
-		if (unlikely(compare_ether_addr(eth->h_dest, dev->dev_addr)))
+		if (unlikely(compare_ether_addr_64bits(eth->h_dest, dev->dev_addr)))
 			skb->pkt_type = PACKET_OTHERHOST;
 	}
 
-- 
cgit v1.2.3


From 3ba9e10a6d3b6abf5f5952572cff8f8d5a35ae54 Mon Sep 17 00:00:00 2001
From: Mark Brown <broonie@opensource.wolfsonmicro.com>
Date: Mon, 24 Nov 2008 18:01:05 +0000
Subject: ASoC: Remove DAI type information

DAI type information is only ever used within ASoC in order to special
case AC97 and for diagnostic purposes. Since modern CPUs and codecs
support multi function DAIs which can be configured for several modes
it is more trouble than it's worth to maintain anything other than a
flag identifying AC97 DAIs so remove the type field and replace it with
an ac97_control flag.

Signed-off-by: Mark Brown <broonie@opensource.wolfsonmicro.com>
---
 include/sound/soc-dai.h            |  2 +-
 include/sound/soc.h                |  8 --------
 sound/soc/atmel/atmel_ssc_dai.c    |  3 ---
 sound/soc/au1x/psc-ac97.c          |  2 +-
 sound/soc/au1x/psc-i2s.c           |  1 -
 sound/soc/blackfin/bf5xx-ac97.c    |  2 +-
 sound/soc/blackfin/bf5xx-i2s.c     |  1 -
 sound/soc/codecs/ac97.c            |  2 +-
 sound/soc/codecs/pcm3008.c         |  1 -
 sound/soc/codecs/wm9712.c          |  2 +-
 sound/soc/codecs/wm9713.c          |  2 +-
 sound/soc/davinci/davinci-i2s.c    |  1 -
 sound/soc/fsl/mpc5200_psc_i2s.c    |  1 -
 sound/soc/omap/omap-mcbsp.c        |  1 -
 sound/soc/pxa/pxa-ssp.c            |  4 ----
 sound/soc/pxa/pxa2xx-ac97.c        |  6 +++---
 sound/soc/pxa/pxa2xx-i2s.c         |  1 -
 sound/soc/s3c24xx/neo1973_wm8753.c |  1 -
 sound/soc/s3c24xx/s3c2412-i2s.c    |  1 -
 sound/soc/s3c24xx/s3c2443-ac97.c   |  4 ++--
 sound/soc/s3c24xx/s3c24xx-i2s.c    |  1 -
 sound/soc/sh/hac.c                 |  4 ++--
 sound/soc/sh/ssi.c                 |  2 --
 sound/soc/soc-core.c               | 31 ++++++++-----------------------
 24 files changed, 21 insertions(+), 63 deletions(-)

(limited to 'include')

diff --git a/include/sound/soc-dai.h b/include/sound/soc-dai.h
index f51cb55902f7..a01a24b10196 100644
--- a/include/sound/soc-dai.h
+++ b/include/sound/soc-dai.h
@@ -184,7 +184,7 @@ struct snd_soc_dai {
 	/* DAI description */
 	char *name;
 	unsigned int id;
-	unsigned char type;
+	int ac97_control;
 
 	/* DAI callbacks */
 	int (*probe)(struct platform_device *pdev,
diff --git a/include/sound/soc.h b/include/sound/soc.h
index e4465f73aa46..444f9c211379 100644
--- a/include/sound/soc.h
+++ b/include/sound/soc.h
@@ -143,14 +143,6 @@ enum snd_soc_bias_level {
 	SND_SOC_BIAS_OFF,
 };
 
-/*
- * Digital Audio Interface (DAI) types
- */
-#define SND_SOC_DAI_AC97	0x1
-#define SND_SOC_DAI_I2S		0x2
-#define SND_SOC_DAI_PCM		0x4
-#define SND_SOC_DAI_AC97_BUS	0x8	/* for custom i.e. non ac97_codec.c */
-
 struct snd_soc_device;
 struct snd_soc_pcm_stream;
 struct snd_soc_ops;
diff --git a/sound/soc/atmel/atmel_ssc_dai.c b/sound/soc/atmel/atmel_ssc_dai.c
index 916f73b9a18f..0bb18dfa9495 100644
--- a/sound/soc/atmel/atmel_ssc_dai.c
+++ b/sound/soc/atmel/atmel_ssc_dai.c
@@ -702,7 +702,6 @@ static int atmel_ssc_resume(struct platform_device *pdev,
 struct snd_soc_dai atmel_ssc_dai[NUM_SSC_DEVICES] = {
 	{	.name = "atmel-ssc0",
 		.id = 0,
-		.type = SND_SOC_DAI_PCM,
 		.suspend = atmel_ssc_suspend,
 		.resume = atmel_ssc_resume,
 		.playback = {
@@ -727,7 +726,6 @@ struct snd_soc_dai atmel_ssc_dai[NUM_SSC_DEVICES] = {
 #if NUM_SSC_DEVICES == 3
 	{	.name = "atmel-ssc1",
 		.id = 1,
-		.type = SND_SOC_DAI_PCM,
 		.suspend = atmel_ssc_suspend,
 		.resume = atmel_ssc_resume,
 		.playback = {
@@ -751,7 +749,6 @@ struct snd_soc_dai atmel_ssc_dai[NUM_SSC_DEVICES] = {
 	},
 	{	.name = "atmel-ssc2",
 		.id = 2,
-		.type = SND_SOC_DAI_PCM,
 		.suspend = atmel_ssc_suspend,
 		.resume = atmel_ssc_resume,
 		.playback = {
diff --git a/sound/soc/au1x/psc-ac97.c b/sound/soc/au1x/psc-ac97.c
index ad60a6042cad..a0bcfeaf5f86 100644
--- a/sound/soc/au1x/psc-ac97.c
+++ b/sound/soc/au1x/psc-ac97.c
@@ -346,7 +346,7 @@ static int au1xpsc_ac97_resume(struct platform_device *pdev,
 
 struct snd_soc_dai au1xpsc_ac97_dai = {
 	.name			= "au1xpsc_ac97",
-	.type			= SND_SOC_DAI_AC97,
+	.ac97_control		= 1,
 	.probe			= au1xpsc_ac97_probe,
 	.remove			= au1xpsc_ac97_remove,
 	.suspend		= au1xpsc_ac97_suspend,
diff --git a/sound/soc/au1x/psc-i2s.c b/sound/soc/au1x/psc-i2s.c
index 05a5acbb16ae..f4217e70a787 100644
--- a/sound/soc/au1x/psc-i2s.c
+++ b/sound/soc/au1x/psc-i2s.c
@@ -371,7 +371,6 @@ static int au1xpsc_i2s_resume(struct platform_device *pdev,
 
 struct snd_soc_dai au1xpsc_i2s_dai = {
 	.name			= "au1xpsc_i2s",
-	.type			= SND_SOC_DAI_I2S,
 	.probe			= au1xpsc_i2s_probe,
 	.remove			= au1xpsc_i2s_remove,
 	.suspend		= au1xpsc_i2s_suspend,
diff --git a/sound/soc/blackfin/bf5xx-ac97.c b/sound/soc/blackfin/bf5xx-ac97.c
index 5dcd3f665ab1..709bdf08e398 100644
--- a/sound/soc/blackfin/bf5xx-ac97.c
+++ b/sound/soc/blackfin/bf5xx-ac97.c
@@ -409,7 +409,7 @@ static void bf5xx_ac97_remove(struct platform_device *pdev,
 struct snd_soc_dai bfin_ac97_dai = {
 	.name = "bf5xx-ac97",
 	.id = 0,
-	.type = SND_SOC_DAI_AC97,
+	.ac97_control = 1,
 	.probe = bf5xx_ac97_probe,
 	.remove = bf5xx_ac97_remove,
 	.suspend = bf5xx_ac97_suspend,
diff --git a/sound/soc/blackfin/bf5xx-i2s.c b/sound/soc/blackfin/bf5xx-i2s.c
index 4e675b55b182..6e5036bf9245 100644
--- a/sound/soc/blackfin/bf5xx-i2s.c
+++ b/sound/soc/blackfin/bf5xx-i2s.c
@@ -292,7 +292,6 @@ static int bf5xx_i2s_resume(struct platform_device *pdev,
 struct snd_soc_dai bf5xx_i2s_dai = {
 	.name = "bf5xx-i2s",
 	.id = 0,
-	.type = SND_SOC_DAI_I2S,
 	.probe = bf5xx_i2s_probe,
 	.remove = bf5xx_i2s_remove,
 	.suspend = bf5xx_i2s_suspend,
diff --git a/sound/soc/codecs/ac97.c b/sound/soc/codecs/ac97.c
index 8a93aff359d0..c4208c8210c8 100644
--- a/sound/soc/codecs/ac97.c
+++ b/sound/soc/codecs/ac97.c
@@ -43,7 +43,7 @@ static int ac97_prepare(struct snd_pcm_substream *substream,
 
 struct snd_soc_dai ac97_dai = {
 	.name = "AC97 HiFi",
-	.type = SND_SOC_DAI_AC97,
+	.ac97_control = 1,
 	.playback = {
 		.stream_name = "AC97 Playback",
 		.channels_min = 1,
diff --git a/sound/soc/codecs/pcm3008.c b/sound/soc/codecs/pcm3008.c
index 2b26e1d80c8d..651a15eb8c2c 100644
--- a/sound/soc/codecs/pcm3008.c
+++ b/sound/soc/codecs/pcm3008.c
@@ -33,7 +33,6 @@
 
 struct snd_soc_dai pcm3008_dai = {
 	.name = "PCM3008 HiFi",
-	.type = SND_SOC_DAI_I2S,
 	.playback = {
 		.stream_name = "PCM3008 Playback",
 		.channels_min = 1,
diff --git a/sound/soc/codecs/wm9712.c b/sound/soc/codecs/wm9712.c
index 6e3e0f340179..40f14061fb72 100644
--- a/sound/soc/codecs/wm9712.c
+++ b/sound/soc/codecs/wm9712.c
@@ -535,7 +535,7 @@ static int ac97_aux_prepare(struct snd_pcm_substream *substream,
 struct snd_soc_dai wm9712_dai[] = {
 {
 	.name = "AC97 HiFi",
-	.type = SND_SOC_DAI_AC97_BUS,
+	.ac97_control = 1,
 	.playback = {
 		.stream_name = "HiFi Playback",
 		.channels_min = 1,
diff --git a/sound/soc/codecs/wm9713.c b/sound/soc/codecs/wm9713.c
index a502667fca7a..9dad0bffcb05 100644
--- a/sound/soc/codecs/wm9713.c
+++ b/sound/soc/codecs/wm9713.c
@@ -1024,7 +1024,7 @@ static int ac97_aux_prepare(struct snd_pcm_substream *substream,
 struct snd_soc_dai wm9713_dai[] = {
 {
 	.name = "AC97 HiFi",
-	.type = SND_SOC_DAI_AC97_BUS,
+	.ac97_control = 1,
 	.playback = {
 		.stream_name = "HiFi Playback",
 		.channels_min = 1,
diff --git a/sound/soc/davinci/davinci-i2s.c b/sound/soc/davinci/davinci-i2s.c
index 7a17cd0ecf64..cf31b3bb96cf 100644
--- a/sound/soc/davinci/davinci-i2s.c
+++ b/sound/soc/davinci/davinci-i2s.c
@@ -460,7 +460,6 @@ static void davinci_i2s_remove(struct platform_device *pdev,
 struct snd_soc_dai davinci_i2s_dai = {
 	.name = "davinci-i2s",
 	.id = 0,
-	.type = SND_SOC_DAI_I2S,
 	.probe = davinci_i2s_probe,
 	.remove = davinci_i2s_remove,
 	.playback = {
diff --git a/sound/soc/fsl/mpc5200_psc_i2s.c b/sound/soc/fsl/mpc5200_psc_i2s.c
index e2c172f38979..9ad8f9a2d8e9 100644
--- a/sound/soc/fsl/mpc5200_psc_i2s.c
+++ b/sound/soc/fsl/mpc5200_psc_i2s.c
@@ -469,7 +469,6 @@ static int psc_i2s_set_fmt(struct snd_soc_dai *cpu_dai, unsigned int format)
  * psc_i2s_dai_template: template CPU Digital Audio Interface
  */
 static struct snd_soc_dai psc_i2s_dai_template = {
-	.type = SND_SOC_DAI_I2S,
 	.playback = {
 		.channels_min = 2,
 		.channels_max = 2,
diff --git a/sound/soc/omap/omap-mcbsp.c b/sound/soc/omap/omap-mcbsp.c
index 2eeb135c1e4b..252bc7ebb194 100644
--- a/sound/soc/omap/omap-mcbsp.c
+++ b/sound/soc/omap/omap-mcbsp.c
@@ -456,7 +456,6 @@ static int omap_mcbsp_dai_set_dai_sysclk(struct snd_soc_dai *cpu_dai,
 {								\
 	.name = "omap-mcbsp-dai-"#link_id,			\
 	.id = (link_id),					\
-	.type = SND_SOC_DAI_I2S,				\
 	.playback = {						\
 		.channels_min = 2,				\
 		.channels_max = 2,				\
diff --git a/sound/soc/pxa/pxa-ssp.c b/sound/soc/pxa/pxa-ssp.c
index d0dd6245a20a..402fc5ba65e7 100644
--- a/sound/soc/pxa/pxa-ssp.c
+++ b/sound/soc/pxa/pxa-ssp.c
@@ -788,7 +788,6 @@ struct snd_soc_dai pxa_ssp_dai[] = {
 	{
 		.name = "pxa2xx-ssp1",
 		.id = 0,
-		.type = SND_SOC_DAI_PCM,
 		.probe = pxa_ssp_probe,
 		.remove = pxa_ssp_remove,
 		.suspend = pxa_ssp_suspend,
@@ -820,7 +819,6 @@ struct snd_soc_dai pxa_ssp_dai[] = {
 	},
 	{	.name = "pxa2xx-ssp2",
 		.id = 1,
-		.type = SND_SOC_DAI_PCM,
 		.probe = pxa_ssp_probe,
 		.remove = pxa_ssp_remove,
 		.suspend = pxa_ssp_suspend,
@@ -853,7 +851,6 @@ struct snd_soc_dai pxa_ssp_dai[] = {
 	{
 		.name = "pxa2xx-ssp3",
 		.id = 2,
-		.type = SND_SOC_DAI_PCM,
 		.probe = pxa_ssp_probe,
 		.remove = pxa_ssp_remove,
 		.suspend = pxa_ssp_suspend,
@@ -886,7 +883,6 @@ struct snd_soc_dai pxa_ssp_dai[] = {
 	{
 		.name = "pxa2xx-ssp4",
 		.id = 3,
-		.type = SND_SOC_DAI_PCM,
 		.probe = pxa_ssp_probe,
 		.remove = pxa_ssp_remove,
 		.suspend = pxa_ssp_suspend,
diff --git a/sound/soc/pxa/pxa2xx-ac97.c b/sound/soc/pxa/pxa2xx-ac97.c
index 86667d2f1b75..bffbe288634c 100644
--- a/sound/soc/pxa/pxa2xx-ac97.c
+++ b/sound/soc/pxa/pxa2xx-ac97.c
@@ -173,7 +173,7 @@ struct snd_soc_dai pxa_ac97_dai[] = {
 {
 	.name = "pxa2xx-ac97",
 	.id = 0,
-	.type = SND_SOC_DAI_AC97,
+	.ac97_control = 1,
 	.probe = pxa2xx_ac97_probe,
 	.remove = pxa2xx_ac97_remove,
 	.suspend = pxa2xx_ac97_suspend,
@@ -196,7 +196,7 @@ struct snd_soc_dai pxa_ac97_dai[] = {
 {
 	.name = "pxa2xx-ac97-aux",
 	.id = 1,
-	.type = SND_SOC_DAI_AC97,
+	.ac97_control = 1,
 	.playback = {
 		.stream_name = "AC97 Aux Playback",
 		.channels_min = 1,
@@ -215,7 +215,7 @@ struct snd_soc_dai pxa_ac97_dai[] = {
 {
 	.name = "pxa2xx-ac97-mic",
 	.id = 2,
-	.type = SND_SOC_DAI_AC97,
+	.ac97_control = 1,
 	.capture = {
 		.stream_name = "AC97 Mic Capture",
 		.channels_min = 1,
diff --git a/sound/soc/pxa/pxa2xx-i2s.c b/sound/soc/pxa/pxa2xx-i2s.c
index 9a3e55b48129..f9a9e2ebafa1 100644
--- a/sound/soc/pxa/pxa2xx-i2s.c
+++ b/sound/soc/pxa/pxa2xx-i2s.c
@@ -340,7 +340,6 @@ static int pxa2xx_i2s_resume(struct platform_device *pdev,
 struct snd_soc_dai pxa_i2s_dai = {
 	.name = "pxa2xx-i2s",
 	.id = 0,
-	.type = SND_SOC_DAI_I2S,
 	.suspend = pxa2xx_i2s_suspend,
 	.resume = pxa2xx_i2s_resume,
 	.playback = {
diff --git a/sound/soc/s3c24xx/neo1973_wm8753.c b/sound/soc/s3c24xx/neo1973_wm8753.c
index 528fc3f1b45b..3df2224a6723 100644
--- a/sound/soc/s3c24xx/neo1973_wm8753.c
+++ b/sound/soc/s3c24xx/neo1973_wm8753.c
@@ -548,7 +548,6 @@ static int neo1973_wm8753_init(struct snd_soc_codec *codec)
 static struct snd_soc_dai bt_dai = {
 	.name = "Bluetooth",
 	.id = 0,
-	.type = SND_SOC_DAI_PCM,
 	.playback = {
 		.channels_min = 1,
 		.channels_max = 1,
diff --git a/sound/soc/s3c24xx/s3c2412-i2s.c b/sound/soc/s3c24xx/s3c2412-i2s.c
index 360cc2a49d9d..1c741047ae35 100644
--- a/sound/soc/s3c24xx/s3c2412-i2s.c
+++ b/sound/soc/s3c24xx/s3c2412-i2s.c
@@ -713,7 +713,6 @@ static int s3c2412_i2s_resume(struct platform_device *pdev,
 struct snd_soc_dai s3c2412_i2s_dai = {
 	.name	= "s3c2412-i2s",
 	.id	= 0,
-	.type	= SND_SOC_DAI_I2S,
 	.probe	= s3c2412_i2s_probe,
 	.suspend = s3c2412_i2s_suspend,
 	.resume = s3c2412_i2s_resume,
diff --git a/sound/soc/s3c24xx/s3c2443-ac97.c b/sound/soc/s3c24xx/s3c2443-ac97.c
index 31377821b2c5..41bde6a3883b 100644
--- a/sound/soc/s3c24xx/s3c2443-ac97.c
+++ b/sound/soc/s3c24xx/s3c2443-ac97.c
@@ -358,7 +358,7 @@ struct snd_soc_dai s3c2443_ac97_dai[] = {
 {
 	.name = "s3c2443-ac97",
 	.id = 0,
-	.type = SND_SOC_DAI_AC97,
+	.ac97_control = 1,
 	.probe = s3c2443_ac97_probe,
 	.remove = s3c2443_ac97_remove,
 	.playback = {
@@ -380,7 +380,7 @@ struct snd_soc_dai s3c2443_ac97_dai[] = {
 {
 	.name = "pxa2xx-ac97-mic",
 	.id = 1,
-	.type = SND_SOC_DAI_AC97,
+	.ac97_control = 1,
 	.capture = {
 		.stream_name = "AC97 Mic Capture",
 		.channels_min = 1,
diff --git a/sound/soc/s3c24xx/s3c24xx-i2s.c b/sound/soc/s3c24xx/s3c24xx-i2s.c
index 1bac9dd3dbd4..8d9135f41bc9 100644
--- a/sound/soc/s3c24xx/s3c24xx-i2s.c
+++ b/sound/soc/s3c24xx/s3c24xx-i2s.c
@@ -461,7 +461,6 @@ static int s3c24xx_i2s_resume(struct platform_device *pdev,
 struct snd_soc_dai s3c24xx_i2s_dai = {
 	.name = "s3c24xx-i2s",
 	.id = 0,
-	.type = SND_SOC_DAI_I2S,
 	.probe = s3c24xx_i2s_probe,
 	.suspend = s3c24xx_i2s_suspend,
 	.resume = s3c24xx_i2s_resume,
diff --git a/sound/soc/sh/hac.c b/sound/soc/sh/hac.c
index 3318071dc80f..c435913c60eb 100644
--- a/sound/soc/sh/hac.c
+++ b/sound/soc/sh/hac.c
@@ -271,7 +271,7 @@ struct snd_soc_dai sh4_hac_dai[] = {
 {
 	.name			= "HAC0",
 	.id			= 0,
-	.type			= SND_SOC_DAI_AC97,
+	.ac97_control		= 1,
 	.playback = {
 		.rates		= AC97_RATES,
 		.formats	= AC97_FMTS,
@@ -291,8 +291,8 @@ struct snd_soc_dai sh4_hac_dai[] = {
 #ifdef CONFIG_CPU_SUBTYPE_SH7760
 {
 	.name			= "HAC1",
+	.ac97_control		= 1,
 	.id			= 1,
-	.type			= SND_SOC_DAI_AC97,
 	.playback = {
 		.rates		= AC97_RATES,
 		.formats	= AC97_FMTS,
diff --git a/sound/soc/sh/ssi.c b/sound/soc/sh/ssi.c
index 52a233840d27..fed544a3deff 100644
--- a/sound/soc/sh/ssi.c
+++ b/sound/soc/sh/ssi.c
@@ -340,7 +340,6 @@ struct snd_soc_dai sh4_ssi_dai[] = {
 {
 	.name			= "SSI0",
 	.id			= 0,
-	.type			= SND_SOC_DAI_I2S,
 	.playback = {
 		.rates		= SSI_RATES,
 		.formats	= SSI_FMTS,
@@ -367,7 +366,6 @@ struct snd_soc_dai sh4_ssi_dai[] = {
 {
 	.name			= "SSI1",
 	.id			= 1,
-	.type			= SND_SOC_DAI_I2S,
 	.playback = {
 		.rates		= SSI_RATES,
 		.formats	= SSI_FMTS,
diff --git a/sound/soc/soc-core.c b/sound/soc/soc-core.c
index 43f4060dbe75..0d47696ccd07 100644
--- a/sound/soc/soc-core.c
+++ b/sound/soc/soc-core.c
@@ -100,20 +100,6 @@ static int soc_ac97_dev_register(struct snd_soc_codec *codec)
 }
 #endif
 
-static inline const char *get_dai_name(int type)
-{
-	switch (type) {
-	case SND_SOC_DAI_AC97_BUS:
-	case SND_SOC_DAI_AC97:
-		return "AC97";
-	case SND_SOC_DAI_I2S:
-		return "I2S";
-	case SND_SOC_DAI_PCM:
-		return "PCM";
-	}
-	return NULL;
-}
-
 /*
  * Called by ALSA when a PCM substream is opened, the runtime->hw record is
  * then initialized and any private data can be allocated. This also calls
@@ -652,7 +638,7 @@ static int soc_suspend(struct platform_device *pdev, pm_message_t state)
 
 	for (i = 0; i < card->num_links; i++) {
 		struct snd_soc_dai  *cpu_dai = card->dai_link[i].cpu_dai;
-		if (cpu_dai->suspend && cpu_dai->type != SND_SOC_DAI_AC97)
+		if (cpu_dai->suspend && !cpu_dai->ac97_control)
 			cpu_dai->suspend(pdev, cpu_dai);
 		if (platform->suspend)
 			platform->suspend(pdev, cpu_dai);
@@ -678,7 +664,7 @@ static int soc_suspend(struct platform_device *pdev, pm_message_t state)
 
 	for (i = 0; i < card->num_links; i++) {
 		struct snd_soc_dai *cpu_dai = card->dai_link[i].cpu_dai;
-		if (cpu_dai->suspend && cpu_dai->type == SND_SOC_DAI_AC97)
+		if (cpu_dai->suspend && cpu_dai->ac97_control)
 			cpu_dai->suspend(pdev, cpu_dai);
 	}
 
@@ -714,7 +700,7 @@ static void soc_resume_deferred(struct work_struct *work)
 
 	for (i = 0; i < card->num_links; i++) {
 		struct snd_soc_dai *cpu_dai = card->dai_link[i].cpu_dai;
-		if (cpu_dai->resume && cpu_dai->type == SND_SOC_DAI_AC97)
+		if (cpu_dai->resume && cpu_dai->ac97_control)
 			cpu_dai->resume(pdev, cpu_dai);
 	}
 
@@ -741,7 +727,7 @@ static void soc_resume_deferred(struct work_struct *work)
 
 	for (i = 0; i < card->num_links; i++) {
 		struct snd_soc_dai *cpu_dai = card->dai_link[i].cpu_dai;
-		if (cpu_dai->resume && cpu_dai->type != SND_SOC_DAI_AC97)
+		if (cpu_dai->resume && !cpu_dai->ac97_control)
 			cpu_dai->resume(pdev, cpu_dai);
 		if (platform->resume)
 			platform->resume(pdev, cpu_dai);
@@ -898,8 +884,8 @@ static int soc_new_pcm(struct snd_soc_device *socdev,
 	codec_dai->codec = socdev->codec;
 
 	/* check client and interface hw capabilities */
-	sprintf(new_name, "%s %s-%s-%d", dai_link->stream_name, codec_dai->name,
-		get_dai_name(cpu_dai->type), num);
+	sprintf(new_name, "%s %s-%d", dai_link->stream_name, codec_dai->name,
+		num);
 
 	if (codec_dai->playback.channels_min)
 		playback = 1;
@@ -1270,8 +1256,7 @@ int snd_soc_register_card(struct snd_soc_device *socdev)
 				continue;
 			}
 		}
-		if (card->dai_link[i].codec_dai->type ==
-			SND_SOC_DAI_AC97_BUS)
+		if (card->dai_link[i].codec_dai->ac97_control)
 			ac97 = 1;
 	}
 	snprintf(codec->card->shortname, sizeof(codec->card->shortname),
@@ -1335,7 +1320,7 @@ void snd_soc_free_pcms(struct snd_soc_device *socdev)
 #ifdef CONFIG_SND_SOC_AC97_BUS
 	for (i = 0; i < codec->num_dai; i++) {
 		codec_dai = &codec->dai[i];
-		if (codec_dai->type == SND_SOC_DAI_AC97_BUS && codec->ac97) {
+		if (codec_dai->ac97_control && codec->ac97) {
 			soc_ac97_dev_unregister(codec);
 			goto free_card;
 		}
-- 
cgit v1.2.3


From 1acdac104668a0834cfa267de9946fac7764d486 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Thu, 20 Nov 2008 10:02:53 -0800
Subject: futex: make clock selectable for FUTEX_WAIT_BITSET

FUTEX_WAIT_BITSET could be used instead of FUTEX_WAIT by setting the
bit set to FUTEX_BITSET_MATCH_ANY, but FUTEX_WAIT uses CLOCK_REALTIME
while FUTEX_WAIT_BITSET uses CLOCK_MONOTONIC.

Add a flag to select CLOCK_REALTIME for FUTEX_WAIT_BITSET so glibc can
replace the FUTEX_WAIT logic which needs to do gettimeofday() calls
before and after the syscall to convert the absolute timeout to a
relative timeout for FUTEX_WAIT.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Cc: Ulrich Drepper <drepper@redhat.com>
---
 include/linux/futex.h |  3 ++-
 kernel/futex.c        | 24 +++++++++++++++++-------
 2 files changed, 19 insertions(+), 8 deletions(-)

(limited to 'include')

diff --git a/include/linux/futex.h b/include/linux/futex.h
index 8f627b9ae2b1..3bf5bb5a34f9 100644
--- a/include/linux/futex.h
+++ b/include/linux/futex.h
@@ -25,7 +25,8 @@ union ktime;
 #define FUTEX_WAKE_BITSET	10
 
 #define FUTEX_PRIVATE_FLAG	128
-#define FUTEX_CMD_MASK		~FUTEX_PRIVATE_FLAG
+#define FUTEX_CLOCK_REALTIME	256
+#define FUTEX_CMD_MASK		~(FUTEX_PRIVATE_FLAG | FUTEX_CLOCK_REALTIME)
 
 #define FUTEX_WAIT_PRIVATE	(FUTEX_WAIT | FUTEX_PRIVATE_FLAG)
 #define FUTEX_WAKE_PRIVATE	(FUTEX_WAKE | FUTEX_PRIVATE_FLAG)
diff --git a/kernel/futex.c b/kernel/futex.c
index e10c5c8786a6..ba0d3b83c091 100644
--- a/kernel/futex.c
+++ b/kernel/futex.c
@@ -1142,12 +1142,13 @@ handle_fault:
  * In case we must use restart_block to restart a futex_wait,
  * we encode in the 'flags' shared capability
  */
-#define FLAGS_SHARED  1
+#define FLAGS_SHARED		0x01
+#define FLAGS_CLOCKRT		0x02
 
 static long futex_wait_restart(struct restart_block *restart);
 
 static int futex_wait(u32 __user *uaddr, int fshared,
-		      u32 val, ktime_t *abs_time, u32 bitset)
+		      u32 val, ktime_t *abs_time, u32 bitset, int clockrt)
 {
 	struct task_struct *curr = current;
 	DECLARE_WAITQUEUE(wait, curr);
@@ -1233,8 +1234,10 @@ static int futex_wait(u32 __user *uaddr, int fshared,
 			slack = current->timer_slack_ns;
 			if (rt_task(current))
 				slack = 0;
-			hrtimer_init_on_stack(&t.timer, CLOCK_MONOTONIC,
-						HRTIMER_MODE_ABS);
+			hrtimer_init_on_stack(&t.timer,
+					      clockrt ? CLOCK_REALTIME :
+					      CLOCK_MONOTONIC,
+					      HRTIMER_MODE_ABS);
 			hrtimer_init_sleeper(&t, current);
 			hrtimer_set_expires_range_ns(&t.timer, *abs_time, slack);
 
@@ -1289,6 +1292,8 @@ static int futex_wait(u32 __user *uaddr, int fshared,
 
 		if (fshared)
 			restart->futex.flags |= FLAGS_SHARED;
+		if (clockrt)
+			restart->futex.flags |= FLAGS_CLOCKRT;
 		return -ERESTART_RESTARTBLOCK;
 	}
 
@@ -1312,7 +1317,8 @@ static long futex_wait_restart(struct restart_block *restart)
 	if (restart->futex.flags & FLAGS_SHARED)
 		fshared = 1;
 	return (long)futex_wait(uaddr, fshared, restart->futex.val, &t,
-				restart->futex.bitset);
+				restart->futex.bitset,
+				restart->futex.flags & FLAGS_CLOCKRT);
 }
 
 
@@ -1905,18 +1911,22 @@ void exit_robust_list(struct task_struct *curr)
 long do_futex(u32 __user *uaddr, int op, u32 val, ktime_t *timeout,
 		u32 __user *uaddr2, u32 val2, u32 val3)
 {
-	int ret = -ENOSYS;
+	int clockrt, ret = -ENOSYS;
 	int cmd = op & FUTEX_CMD_MASK;
 	int fshared = 0;
 
 	if (!(op & FUTEX_PRIVATE_FLAG))
 		fshared = 1;
 
+	clockrt = op & FUTEX_CLOCK_REALTIME;
+	if (clockrt && cmd != FUTEX_WAIT_BITSET)
+		return -ENOSYS;
+
 	switch (cmd) {
 	case FUTEX_WAIT:
 		val3 = FUTEX_BITSET_MATCH_ANY;
 	case FUTEX_WAIT_BITSET:
-		ret = futex_wait(uaddr, fshared, val, timeout, val3);
+		ret = futex_wait(uaddr, fshared, val, timeout, val3, clockrt);
 		break;
 	case FUTEX_WAKE:
 		val3 = FUTEX_BITSET_MATCH_ANY;
-- 
cgit v1.2.3


From 2e77d89b2fa8e3f8325b8ce7893ec3645f41aff5 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <dada1@cosmosbay.com>
Date: Mon, 24 Nov 2008 15:52:46 -0800
Subject: net: avoid a pair of dst_hold()/dst_release() in ip_append_data()

We can reduce pressure on dst entry refcount that slowdown UDP transmit
path on SMP machines. This pressure is visible on RTP servers when
delivering content to mediagateways, especially big ones, handling
thousand of streams. Several cpus send UDP frames to the same
destination, hence use the same dst entry.

This patch makes ip_append_data() eventually steal the refcount its
callers had to take on the dst entry.

This doesnt avoid all refcounting, but still gives speedups on SMP,
on UDP/RAW transmit path

Signed-off-by: Eric Dumazet <dada1@cosmosbay.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/ip.h     |  2 +-
 net/ipv4/icmp.c      |  8 ++++----
 net/ipv4/ip_output.c | 11 ++++++++---
 net/ipv4/raw.c       |  2 +-
 net/ipv4/udp.c       |  2 +-
 5 files changed, 15 insertions(+), 10 deletions(-)

(limited to 'include')

diff --git a/include/net/ip.h b/include/net/ip.h
index bc026ecb513f..ddef10c22e3a 100644
--- a/include/net/ip.h
+++ b/include/net/ip.h
@@ -110,7 +110,7 @@ extern int		ip_append_data(struct sock *sk,
 						   int odd, struct sk_buff *skb),
 				void *from, int len, int protolen,
 				struct ipcm_cookie *ipc,
-				struct rtable *rt,
+				struct rtable **rt,
 				unsigned int flags);
 extern int		ip_generic_getfrag(void *from, char *to, int offset, int len, int odd, struct sk_buff *skb);
 extern ssize_t		ip_append_page(struct sock *sk, struct page *page,
diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c
index 21e497efbd7f..7b88be9803b1 100644
--- a/net/ipv4/icmp.c
+++ b/net/ipv4/icmp.c
@@ -321,12 +321,12 @@ static int icmp_glue_bits(void *from, char *to, int offset, int len, int odd,
 }
 
 static void icmp_push_reply(struct icmp_bxm *icmp_param,
-			    struct ipcm_cookie *ipc, struct rtable *rt)
+			    struct ipcm_cookie *ipc, struct rtable **rt)
 {
 	struct sock *sk;
 	struct sk_buff *skb;
 
-	sk = icmp_sk(dev_net(rt->u.dst.dev));
+	sk = icmp_sk(dev_net((*rt)->u.dst.dev));
 	if (ip_append_data(sk, icmp_glue_bits, icmp_param,
 			   icmp_param->data_len+icmp_param->head_len,
 			   icmp_param->head_len,
@@ -392,7 +392,7 @@ static void icmp_reply(struct icmp_bxm *icmp_param, struct sk_buff *skb)
 	}
 	if (icmpv4_xrlim_allow(net, rt, icmp_param->data.icmph.type,
 			       icmp_param->data.icmph.code))
-		icmp_push_reply(icmp_param, &ipc, rt);
+		icmp_push_reply(icmp_param, &ipc, &rt);
 	ip_rt_put(rt);
 out_unlock:
 	icmp_xmit_unlock(sk);
@@ -635,7 +635,7 @@ route_done:
 		icmp_param.data_len = room;
 	icmp_param.head_len = sizeof(struct icmphdr);
 
-	icmp_push_reply(&icmp_param, &ipc, rt);
+	icmp_push_reply(&icmp_param, &ipc, &rt);
 ende:
 	ip_rt_put(rt);
 out_unlock:
diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c
index 46d7be233eac..5516825a0751 100644
--- a/net/ipv4/ip_output.c
+++ b/net/ipv4/ip_output.c
@@ -778,7 +778,7 @@ int ip_append_data(struct sock *sk,
 		   int getfrag(void *from, char *to, int offset, int len,
 			       int odd, struct sk_buff *skb),
 		   void *from, int length, int transhdrlen,
-		   struct ipcm_cookie *ipc, struct rtable *rt,
+		   struct ipcm_cookie *ipc, struct rtable **rtp,
 		   unsigned int flags)
 {
 	struct inet_sock *inet = inet_sk(sk);
@@ -793,6 +793,7 @@ int ip_append_data(struct sock *sk,
 	int offset = 0;
 	unsigned int maxfraglen, fragheaderlen;
 	int csummode = CHECKSUM_NONE;
+	struct rtable *rt;
 
 	if (flags&MSG_PROBE)
 		return 0;
@@ -812,7 +813,11 @@ int ip_append_data(struct sock *sk,
 			inet->cork.flags |= IPCORK_OPT;
 			inet->cork.addr = ipc->addr;
 		}
-		dst_hold(&rt->u.dst);
+		rt = *rtp;
+		/*
+		 * We steal reference to this route, caller should not release it
+		 */
+		*rtp = NULL;
 		inet->cork.fragsize = mtu = inet->pmtudisc == IP_PMTUDISC_PROBE ?
 					    rt->u.dst.dev->mtu :
 					    dst_mtu(rt->u.dst.path);
@@ -1391,7 +1396,7 @@ void ip_send_reply(struct sock *sk, struct sk_buff *skb, struct ip_reply_arg *ar
 	sk->sk_protocol = ip_hdr(skb)->protocol;
 	sk->sk_bound_dev_if = arg->bound_dev_if;
 	ip_append_data(sk, ip_reply_glue_bits, arg->iov->iov_base, len, 0,
-		       &ipc, rt, MSG_DONTWAIT);
+		       &ipc, &rt, MSG_DONTWAIT);
 	if ((skb = skb_peek(&sk->sk_write_queue)) != NULL) {
 		if (arg->csumoffset >= 0)
 			*((__sum16 *)skb_transport_header(skb) +
diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c
index 998fcffc9e15..dff8bc4e0fac 100644
--- a/net/ipv4/raw.c
+++ b/net/ipv4/raw.c
@@ -572,7 +572,7 @@ back_from_confirm:
 			ipc.addr = rt->rt_dst;
 		lock_sock(sk);
 		err = ip_append_data(sk, ip_generic_getfrag, msg->msg_iov, len, 0,
-					&ipc, rt, msg->msg_flags);
+					&ipc, &rt, msg->msg_flags);
 		if (err)
 			ip_flush_pending_frames(sk);
 		else if (!(msg->msg_flags & MSG_MORE))
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index da869ce041d9..549114472db3 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -719,7 +719,7 @@ do_append_data:
 	up->len += ulen;
 	getfrag  =  is_udplite ?  udplite_getfrag : ip_generic_getfrag;
 	err = ip_append_data(sk, getfrag, msg->msg_iov, ulen,
-			sizeof(struct udphdr), &ipc, rt,
+			sizeof(struct udphdr), &ipc, &rt,
 			corkreq ? msg->msg_flags|MSG_MORE : msg->msg_flags);
 	if (err)
 		udp_flush_pending_frames(sk);
-- 
cgit v1.2.3


From 18b6e0414e42d95183f07d8177e3ff0241abd825 Mon Sep 17 00:00:00 2001
From: Serge Hallyn <serue@us.ibm.com>
Date: Wed, 15 Oct 2008 16:38:45 -0500
Subject: User namespaces: set of cleanups (v2)

The user_ns is moved from nsproxy to user_struct, so that a struct
cred by itself is sufficient to determine access (which it otherwise
would not be).  Corresponding ecryptfs fixes (by David Howells) are
here as well.

Fix refcounting.  The following rules now apply:
        1. The task pins the user struct.
        2. The user struct pins its user namespace.
        3. The user namespace pins the struct user which created it.

User namespaces are cloned during copy_creds().  Unsharing a new user_ns
is no longer possible.  (We could re-add that, but it'll cause code
duplication and doesn't seem useful if PAM doesn't need to clone user
namespaces).

When a user namespace is created, its first user (uid 0) gets empty
keyrings and a clean group_info.

This incorporates a previous patch by David Howells.  Here
is his original patch description:

>I suggest adding the attached incremental patch.  It makes the following
>changes:
>
> (1) Provides a current_user_ns() macro to wrap accesses to current's user
>     namespace.
>
> (2) Fixes eCryptFS.
>
> (3) Renames create_new_userns() to create_user_ns() to be more consistent
>     with the other associated functions and because the 'new' in the name is
>     superfluous.
>
> (4) Moves the argument and permission checks made for CLONE_NEWUSER to the
>     beginning of do_fork() so that they're done prior to making any attempts
>     at allocation.
>
> (5) Calls create_user_ns() after prepare_creds(), and gives it the new creds
>     to fill in rather than have it return the new root user.  I don't imagine
>     the new root user being used for anything other than filling in a cred
>     struct.
>
>     This also permits me to get rid of a get_uid() and a free_uid(), as the
>     reference the creds were holding on the old user_struct can just be
>     transferred to the new namespace's creator pointer.
>
> (6) Makes create_user_ns() reset the UIDs and GIDs of the creds under
>     preparation rather than doing it in copy_creds().
>
>David

>Signed-off-by: David Howells <dhowells@redhat.com>

Changelog:
	Oct 20: integrate dhowells comments
		1. leave thread_keyring alone
		2. use current_user_ns() in set_user()

Signed-off-by: Serge Hallyn <serue@us.ibm.com>
---
 fs/ecryptfs/messaging.c        | 13 ++++----
 fs/ecryptfs/miscdev.c          | 19 ++++-------
 include/linux/cred.h           |  2 ++
 include/linux/init_task.h      |  1 -
 include/linux/nsproxy.h        |  1 -
 include/linux/sched.h          |  1 +
 include/linux/user_namespace.h | 13 +++-----
 kernel/cred.c                  | 15 +++++++--
 kernel/fork.c                  | 19 +++++++++--
 kernel/nsproxy.c               | 15 ++-------
 kernel/sys.c                   |  4 +--
 kernel/user.c                  | 47 ++++++++------------------
 kernel/user_namespace.c        | 75 +++++++++++++++++-------------------------
 13 files changed, 96 insertions(+), 129 deletions(-)

(limited to 'include')

diff --git a/fs/ecryptfs/messaging.c b/fs/ecryptfs/messaging.c
index e0b0a4e28b9b..6913f727624d 100644
--- a/fs/ecryptfs/messaging.c
+++ b/fs/ecryptfs/messaging.c
@@ -360,7 +360,7 @@ int ecryptfs_process_response(struct ecryptfs_message *msg, uid_t euid,
 	struct ecryptfs_msg_ctx *msg_ctx;
 	size_t msg_size;
 	struct nsproxy *nsproxy;
-	struct user_namespace *current_user_ns;
+	struct user_namespace *tsk_user_ns;
 	uid_t ctx_euid;
 	int rc;
 
@@ -385,9 +385,9 @@ int ecryptfs_process_response(struct ecryptfs_message *msg, uid_t euid,
 		mutex_unlock(&ecryptfs_daemon_hash_mux);
 		goto wake_up;
 	}
-	current_user_ns = nsproxy->user_ns;
+	tsk_user_ns = __task_cred(msg_ctx->task)->user->user_ns;
 	ctx_euid = task_euid(msg_ctx->task);
-	rc = ecryptfs_find_daemon_by_euid(&daemon, ctx_euid, current_user_ns);
+	rc = ecryptfs_find_daemon_by_euid(&daemon, ctx_euid, tsk_user_ns);
 	rcu_read_unlock();
 	mutex_unlock(&ecryptfs_daemon_hash_mux);
 	if (rc) {
@@ -405,11 +405,11 @@ int ecryptfs_process_response(struct ecryptfs_message *msg, uid_t euid,
 		       euid, ctx_euid);
 		goto unlock;
 	}
-	if (current_user_ns != user_ns) {
+	if (tsk_user_ns != user_ns) {
 		rc = -EBADMSG;
 		printk(KERN_WARNING "%s: Received message from user_ns "
 		       "[0x%p]; expected message from user_ns [0x%p]\n",
-		       __func__, user_ns, nsproxy->user_ns);
+		       __func__, user_ns, tsk_user_ns);
 		goto unlock;
 	}
 	if (daemon->pid != pid) {
@@ -468,8 +468,7 @@ ecryptfs_send_message_locked(char *data, int data_len, u8 msg_type,
 	uid_t euid = current_euid();
 	int rc;
 
-	rc = ecryptfs_find_daemon_by_euid(&daemon, euid,
-					  current->nsproxy->user_ns);
+	rc = ecryptfs_find_daemon_by_euid(&daemon, euid, current_user_ns());
 	if (rc || !daemon) {
 		rc = -ENOTCONN;
 		printk(KERN_ERR "%s: User [%d] does not have a daemon "
diff --git a/fs/ecryptfs/miscdev.c b/fs/ecryptfs/miscdev.c
index 047ac609695b..efd95a0ed1ea 100644
--- a/fs/ecryptfs/miscdev.c
+++ b/fs/ecryptfs/miscdev.c
@@ -47,8 +47,7 @@ ecryptfs_miscdev_poll(struct file *file, poll_table *pt)
 
 	mutex_lock(&ecryptfs_daemon_hash_mux);
 	/* TODO: Just use file->private_data? */
-	rc = ecryptfs_find_daemon_by_euid(&daemon, euid,
-					  current->nsproxy->user_ns);
+	rc = ecryptfs_find_daemon_by_euid(&daemon, euid, current_user_ns());
 	BUG_ON(rc || !daemon);
 	mutex_lock(&daemon->mux);
 	mutex_unlock(&ecryptfs_daemon_hash_mux);
@@ -95,11 +94,9 @@ ecryptfs_miscdev_open(struct inode *inode, struct file *file)
 		       "count; rc = [%d]\n", __func__, rc);
 		goto out_unlock_daemon_list;
 	}
-	rc = ecryptfs_find_daemon_by_euid(&daemon, euid,
-					  current->nsproxy->user_ns);
+	rc = ecryptfs_find_daemon_by_euid(&daemon, euid, current_user_ns());
 	if (rc || !daemon) {
-		rc = ecryptfs_spawn_daemon(&daemon, euid,
-					   current->nsproxy->user_ns,
+		rc = ecryptfs_spawn_daemon(&daemon, euid, current_user_ns(),
 					   task_pid(current));
 		if (rc) {
 			printk(KERN_ERR "%s: Error attempting to spawn daemon; "
@@ -153,8 +150,7 @@ ecryptfs_miscdev_release(struct inode *inode, struct file *file)
 	int rc;
 
 	mutex_lock(&ecryptfs_daemon_hash_mux);
-	rc = ecryptfs_find_daemon_by_euid(&daemon, euid,
-					  current->nsproxy->user_ns);
+	rc = ecryptfs_find_daemon_by_euid(&daemon, euid, current_user_ns());
 	BUG_ON(rc || !daemon);
 	mutex_lock(&daemon->mux);
 	BUG_ON(daemon->pid != task_pid(current));
@@ -254,8 +250,7 @@ ecryptfs_miscdev_read(struct file *file, char __user *buf, size_t count,
 
 	mutex_lock(&ecryptfs_daemon_hash_mux);
 	/* TODO: Just use file->private_data? */
-	rc = ecryptfs_find_daemon_by_euid(&daemon, euid,
-					  current->nsproxy->user_ns);
+	rc = ecryptfs_find_daemon_by_euid(&daemon, euid, current_user_ns());
 	BUG_ON(rc || !daemon);
 	mutex_lock(&daemon->mux);
 	if (daemon->flags & ECRYPTFS_DAEMON_ZOMBIE) {
@@ -295,7 +290,7 @@ check_list:
 		goto check_list;
 	}
 	BUG_ON(euid != daemon->euid);
-	BUG_ON(current->nsproxy->user_ns != daemon->user_ns);
+	BUG_ON(current_user_ns() != daemon->user_ns);
 	BUG_ON(task_pid(current) != daemon->pid);
 	msg_ctx = list_first_entry(&daemon->msg_ctx_out_queue,
 				   struct ecryptfs_msg_ctx, daemon_out_list);
@@ -468,7 +463,7 @@ ecryptfs_miscdev_write(struct file *file, const char __user *buf,
 			goto out_free;
 		}
 		rc = ecryptfs_miscdev_response(&data[i], packet_size,
-					       euid, current->nsproxy->user_ns,
+					       euid, current_user_ns(),
 					       task_pid(current), seq);
 		if (rc)
 			printk(KERN_WARNING "%s: Failed to deliver miscdev "
diff --git a/include/linux/cred.h b/include/linux/cred.h
index 26c1ab179946..3282ee4318e7 100644
--- a/include/linux/cred.h
+++ b/include/linux/cred.h
@@ -60,6 +60,7 @@ do {							\
 } while (0)
 
 extern struct group_info *groups_alloc(int);
+extern struct group_info init_groups;
 extern void groups_free(struct group_info *);
 extern int set_current_groups(struct group_info *);
 extern int set_groups(struct cred *, struct group_info *);
@@ -315,6 +316,7 @@ static inline void put_cred(const struct cred *_cred)
 #define current_fsgid() 	(current_cred_xxx(fsgid))
 #define current_cap()		(current_cred_xxx(cap_effective))
 #define current_user()		(current_cred_xxx(user))
+#define current_user_ns()	(current_cred_xxx(user)->user_ns)
 #define current_security()	(current_cred_xxx(security))
 
 #define current_uid_gid(_uid, _gid)		\
diff --git a/include/linux/init_task.h b/include/linux/init_task.h
index 2597858035cd..959f5522d10a 100644
--- a/include/linux/init_task.h
+++ b/include/linux/init_task.h
@@ -57,7 +57,6 @@ extern struct nsproxy init_nsproxy;
 	.mnt_ns		= NULL,						\
 	INIT_NET_NS(net_ns)                                             \
 	INIT_IPC_NS(ipc_ns)						\
-	.user_ns	= &init_user_ns,				\
 }
 
 #define INIT_SIGHAND(sighand) {						\
diff --git a/include/linux/nsproxy.h b/include/linux/nsproxy.h
index c8a768e59640..afad7dec1b36 100644
--- a/include/linux/nsproxy.h
+++ b/include/linux/nsproxy.h
@@ -27,7 +27,6 @@ struct nsproxy {
 	struct ipc_namespace *ipc_ns;
 	struct mnt_namespace *mnt_ns;
 	struct pid_namespace *pid_ns;
-	struct user_namespace *user_ns;
 	struct net 	     *net_ns;
 };
 extern struct nsproxy init_nsproxy;
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 2036e9f26020..7f8015a3082e 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -638,6 +638,7 @@ struct user_struct {
 	/* Hash table maintenance information */
 	struct hlist_node uidhash_node;
 	uid_t uid;
+	struct user_namespace *user_ns;
 
 #ifdef CONFIG_USER_SCHED
 	struct task_group *tg;
diff --git a/include/linux/user_namespace.h b/include/linux/user_namespace.h
index b5f41d4c2eec..315bcd375224 100644
--- a/include/linux/user_namespace.h
+++ b/include/linux/user_namespace.h
@@ -12,7 +12,7 @@
 struct user_namespace {
 	struct kref		kref;
 	struct hlist_head	uidhash_table[UIDHASH_SZ];
-	struct user_struct	*root_user;
+	struct user_struct	*creator;
 };
 
 extern struct user_namespace init_user_ns;
@@ -26,8 +26,7 @@ static inline struct user_namespace *get_user_ns(struct user_namespace *ns)
 	return ns;
 }
 
-extern struct user_namespace *copy_user_ns(int flags,
-					   struct user_namespace *old_ns);
+extern int create_user_ns(struct cred *new);
 extern void free_user_ns(struct kref *kref);
 
 static inline void put_user_ns(struct user_namespace *ns)
@@ -43,13 +42,9 @@ static inline struct user_namespace *get_user_ns(struct user_namespace *ns)
 	return &init_user_ns;
 }
 
-static inline struct user_namespace *copy_user_ns(int flags,
-						  struct user_namespace *old_ns)
+static inline int create_user_ns(struct cred *new)
 {
-	if (flags & CLONE_NEWUSER)
-		return ERR_PTR(-EINVAL);
-
-	return old_ns;
+	return -EINVAL;
 }
 
 static inline void put_user_ns(struct user_namespace *ns)
diff --git a/kernel/cred.c b/kernel/cred.c
index 13697ca2bb38..ff7bc071991c 100644
--- a/kernel/cred.c
+++ b/kernel/cred.c
@@ -274,6 +274,7 @@ int copy_creds(struct task_struct *p, unsigned long clone_flags)
 	struct thread_group_cred *tgcred;
 #endif
 	struct cred *new;
+	int ret;
 
 	mutex_init(&p->cred_exec_mutex);
 
@@ -293,6 +294,12 @@ int copy_creds(struct task_struct *p, unsigned long clone_flags)
 	if (!new)
 		return -ENOMEM;
 
+	if (clone_flags & CLONE_NEWUSER) {
+		ret = create_user_ns(new);
+		if (ret < 0)
+			goto error_put;
+	}
+
 #ifdef CONFIG_KEYS
 	/* new threads get their own thread keyrings if their parent already
 	 * had one */
@@ -309,8 +316,8 @@ int copy_creds(struct task_struct *p, unsigned long clone_flags)
 	if (!(clone_flags & CLONE_THREAD)) {
 		tgcred = kmalloc(sizeof(*tgcred), GFP_KERNEL);
 		if (!tgcred) {
-			put_cred(new);
-			return -ENOMEM;
+			ret = -ENOMEM;
+			goto error_put;
 		}
 		atomic_set(&tgcred->usage, 1);
 		spin_lock_init(&tgcred->lock);
@@ -325,6 +332,10 @@ int copy_creds(struct task_struct *p, unsigned long clone_flags)
 	atomic_inc(&new->user->processes);
 	p->cred = p->real_cred = get_cred(new);
 	return 0;
+
+error_put:
+	put_cred(new);
+	return ret;
 }
 
 /**
diff --git a/kernel/fork.c b/kernel/fork.c
index 29c18c14812d..1dd89451fae4 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -976,7 +976,7 @@ static struct task_struct *copy_process(unsigned long clone_flags,
 	if (atomic_read(&p->real_cred->user->processes) >=
 			p->signal->rlim[RLIMIT_NPROC].rlim_cur) {
 		if (!capable(CAP_SYS_ADMIN) && !capable(CAP_SYS_RESOURCE) &&
-		    p->real_cred->user != current->nsproxy->user_ns->root_user)
+		    p->real_cred->user != INIT_USER)
 			goto bad_fork_free;
 	}
 
@@ -1334,6 +1334,20 @@ long do_fork(unsigned long clone_flags,
 	int trace = 0;
 	long nr;
 
+	/*
+	 * Do some preliminary argument and permissions checking before we
+	 * actually start allocating stuff
+	 */
+	if (clone_flags & CLONE_NEWUSER) {
+		if (clone_flags & CLONE_THREAD)
+			return -EINVAL;
+		/* hopefully this check will go away when userns support is
+		 * complete
+		 */
+		if (!capable(CAP_SYS_ADMIN))
+			return -EPERM;
+	}
+
 	/*
 	 * We hope to recycle these flags after 2.6.26
 	 */
@@ -1581,8 +1595,7 @@ asmlinkage long sys_unshare(unsigned long unshare_flags)
 	err = -EINVAL;
 	if (unshare_flags & ~(CLONE_THREAD|CLONE_FS|CLONE_NEWNS|CLONE_SIGHAND|
 				CLONE_VM|CLONE_FILES|CLONE_SYSVSEM|
-				CLONE_NEWUTS|CLONE_NEWIPC|CLONE_NEWUSER|
-				CLONE_NEWNET))
+				CLONE_NEWUTS|CLONE_NEWIPC|CLONE_NEWNET))
 		goto bad_unshare_out;
 
 	/*
diff --git a/kernel/nsproxy.c b/kernel/nsproxy.c
index 1d3ef29a2583..63598dca2d0c 100644
--- a/kernel/nsproxy.c
+++ b/kernel/nsproxy.c
@@ -80,12 +80,6 @@ static struct nsproxy *create_new_namespaces(unsigned long flags,
 		goto out_pid;
 	}
 
-	new_nsp->user_ns = copy_user_ns(flags, tsk->nsproxy->user_ns);
-	if (IS_ERR(new_nsp->user_ns)) {
-		err = PTR_ERR(new_nsp->user_ns);
-		goto out_user;
-	}
-
 	new_nsp->net_ns = copy_net_ns(flags, tsk->nsproxy->net_ns);
 	if (IS_ERR(new_nsp->net_ns)) {
 		err = PTR_ERR(new_nsp->net_ns);
@@ -95,9 +89,6 @@ static struct nsproxy *create_new_namespaces(unsigned long flags,
 	return new_nsp;
 
 out_net:
-	if (new_nsp->user_ns)
-		put_user_ns(new_nsp->user_ns);
-out_user:
 	if (new_nsp->pid_ns)
 		put_pid_ns(new_nsp->pid_ns);
 out_pid:
@@ -130,7 +121,7 @@ int copy_namespaces(unsigned long flags, struct task_struct *tsk)
 	get_nsproxy(old_ns);
 
 	if (!(flags & (CLONE_NEWNS | CLONE_NEWUTS | CLONE_NEWIPC |
-				CLONE_NEWUSER | CLONE_NEWPID | CLONE_NEWNET)))
+				CLONE_NEWPID | CLONE_NEWNET)))
 		return 0;
 
 	if (!capable(CAP_SYS_ADMIN)) {
@@ -173,8 +164,6 @@ void free_nsproxy(struct nsproxy *ns)
 		put_ipc_ns(ns->ipc_ns);
 	if (ns->pid_ns)
 		put_pid_ns(ns->pid_ns);
-	if (ns->user_ns)
-		put_user_ns(ns->user_ns);
 	put_net(ns->net_ns);
 	kmem_cache_free(nsproxy_cachep, ns);
 }
@@ -189,7 +178,7 @@ int unshare_nsproxy_namespaces(unsigned long unshare_flags,
 	int err = 0;
 
 	if (!(unshare_flags & (CLONE_NEWNS | CLONE_NEWUTS | CLONE_NEWIPC |
-			       CLONE_NEWUSER | CLONE_NEWNET)))
+			       CLONE_NEWNET)))
 		return 0;
 
 	if (!capable(CAP_SYS_ADMIN))
diff --git a/kernel/sys.c b/kernel/sys.c
index ab735040468a..ebe65c2c9873 100644
--- a/kernel/sys.c
+++ b/kernel/sys.c
@@ -565,13 +565,13 @@ static int set_user(struct cred *new)
 {
 	struct user_struct *new_user;
 
-	new_user = alloc_uid(current->nsproxy->user_ns, new->uid);
+	new_user = alloc_uid(current_user_ns(), new->uid);
 	if (!new_user)
 		return -EAGAIN;
 
 	if (atomic_read(&new_user->processes) >=
 				current->signal->rlim[RLIMIT_NPROC].rlim_cur &&
-			new_user != current->nsproxy->user_ns->root_user) {
+			new_user != INIT_USER) {
 		free_uid(new_user);
 		return -EAGAIN;
 	}
diff --git a/kernel/user.c b/kernel/user.c
index d476307dd4b0..c0ef3a464438 100644
--- a/kernel/user.c
+++ b/kernel/user.c
@@ -20,9 +20,9 @@
 
 struct user_namespace init_user_ns = {
 	.kref = {
-		.refcount	= ATOMIC_INIT(2),
+		.refcount	= ATOMIC_INIT(1),
 	},
-	.root_user = &root_user,
+	.creator = &root_user,
 };
 EXPORT_SYMBOL_GPL(init_user_ns);
 
@@ -48,12 +48,14 @@ static struct kmem_cache *uid_cachep;
  */
 static DEFINE_SPINLOCK(uidhash_lock);
 
+/* root_user.__count is 2, 1 for init task cred, 1 for init_user_ns->creator */
 struct user_struct root_user = {
-	.__count	= ATOMIC_INIT(1),
+	.__count	= ATOMIC_INIT(2),
 	.processes	= ATOMIC_INIT(1),
 	.files		= ATOMIC_INIT(0),
 	.sigpending	= ATOMIC_INIT(0),
 	.locked_shm     = 0,
+	.user_ns	= &init_user_ns,
 #ifdef CONFIG_USER_SCHED
 	.tg		= &init_task_group,
 #endif
@@ -314,12 +316,13 @@ done:
  * IRQ state (as stored in flags) is restored and uidhash_lock released
  * upon function exit.
  */
-static inline void free_user(struct user_struct *up, unsigned long flags)
+static void free_user(struct user_struct *up, unsigned long flags)
 {
 	/* restore back the count */
 	atomic_inc(&up->__count);
 	spin_unlock_irqrestore(&uidhash_lock, flags);
 
+	put_user_ns(up->user_ns);
 	INIT_WORK(&up->work, remove_user_sysfs_dir);
 	schedule_work(&up->work);
 }
@@ -335,13 +338,14 @@ static inline void uids_mutex_unlock(void) { }
  * IRQ state (as stored in flags) is restored and uidhash_lock released
  * upon function exit.
  */
-static inline void free_user(struct user_struct *up, unsigned long flags)
+static void free_user(struct user_struct *up, unsigned long flags)
 {
 	uid_hash_remove(up);
 	spin_unlock_irqrestore(&uidhash_lock, flags);
 	sched_destroy_user(up);
 	key_put(up->uid_keyring);
 	key_put(up->session_keyring);
+	put_user_ns(up->user_ns);
 	kmem_cache_free(uid_cachep, up);
 }
 
@@ -357,7 +361,7 @@ struct user_struct *find_user(uid_t uid)
 {
 	struct user_struct *ret;
 	unsigned long flags;
-	struct user_namespace *ns = current->nsproxy->user_ns;
+	struct user_namespace *ns = current_user()->user_ns;
 
 	spin_lock_irqsave(&uidhash_lock, flags);
 	ret = uid_hash_find(uid, uidhashentry(ns, uid));
@@ -404,6 +408,8 @@ struct user_struct *alloc_uid(struct user_namespace *ns, uid_t uid)
 		if (sched_create_user(new) < 0)
 			goto out_free_user;
 
+		new->user_ns = get_user_ns(ns);
+
 		if (uids_user_create(new))
 			goto out_destoy_sched;
 
@@ -427,7 +433,6 @@ struct user_struct *alloc_uid(struct user_namespace *ns, uid_t uid)
 			up = new;
 		}
 		spin_unlock_irq(&uidhash_lock);
-
 	}
 
 	uids_mutex_unlock();
@@ -436,6 +441,7 @@ struct user_struct *alloc_uid(struct user_namespace *ns, uid_t uid)
 
 out_destoy_sched:
 	sched_destroy_user(new);
+	put_user_ns(new->user_ns);
 out_free_user:
 	kmem_cache_free(uid_cachep, new);
 out_unlock:
@@ -443,33 +449,6 @@ out_unlock:
 	return NULL;
 }
 
-#ifdef CONFIG_USER_NS
-void release_uids(struct user_namespace *ns)
-{
-	int i;
-	unsigned long flags;
-	struct hlist_head *head;
-	struct hlist_node *nd;
-
-	spin_lock_irqsave(&uidhash_lock, flags);
-	/*
-	 * collapse the chains so that the user_struct-s will
-	 * be still alive, but not in hashes. subsequent free_uid()
-	 * will free them.
-	 */
-	for (i = 0; i < UIDHASH_SZ; i++) {
-		head = ns->uidhash_table + i;
-		while (!hlist_empty(head)) {
-			nd = head->first;
-			hlist_del_init(nd);
-		}
-	}
-	spin_unlock_irqrestore(&uidhash_lock, flags);
-
-	free_uid(ns->root_user);
-}
-#endif
-
 static int __init uid_cache_init(void)
 {
 	int n;
diff --git a/kernel/user_namespace.c b/kernel/user_namespace.c
index 0d9c51d67333..79084311ee57 100644
--- a/kernel/user_namespace.c
+++ b/kernel/user_namespace.c
@@ -9,70 +9,55 @@
 #include <linux/nsproxy.h>
 #include <linux/slab.h>
 #include <linux/user_namespace.h>
+#include <linux/cred.h>
 
 /*
- * Clone a new ns copying an original user ns, setting refcount to 1
- * @old_ns: namespace to clone
- * Return NULL on error (failure to kmalloc), new ns otherwise
+ * Create a new user namespace, deriving the creator from the user in the
+ * passed credentials, and replacing that user with the new root user for the
+ * new namespace.
+ *
+ * This is called by copy_creds(), which will finish setting the target task's
+ * credentials.
  */
-static struct user_namespace *clone_user_ns(struct user_namespace *old_ns)
+int create_user_ns(struct cred *new)
 {
 	struct user_namespace *ns;
-	struct user_struct *new_user;
-	struct cred *new;
+	struct user_struct *root_user;
 	int n;
 
 	ns = kmalloc(sizeof(struct user_namespace), GFP_KERNEL);
 	if (!ns)
-		return ERR_PTR(-ENOMEM);
+		return -ENOMEM;
 
 	kref_init(&ns->kref);
 
 	for (n = 0; n < UIDHASH_SZ; ++n)
 		INIT_HLIST_HEAD(ns->uidhash_table + n);
 
-	/* Insert new root user.  */
-	ns->root_user = alloc_uid(ns, 0);
-	if (!ns->root_user) {
+	/* Alloc new root user.  */
+	root_user = alloc_uid(ns, 0);
+	if (!root_user) {
 		kfree(ns);
-		return ERR_PTR(-ENOMEM);
+		return -ENOMEM;
 	}
 
-	/* Reset current->user with a new one */
-	new_user = alloc_uid(ns, current_uid());
-	if (!new_user) {
-		free_uid(ns->root_user);
-		kfree(ns);
-		return ERR_PTR(-ENOMEM);
-	}
-
-	/* Install the new user */
-	new = prepare_creds();
-	if (!new) {
-		free_uid(new_user);
-		free_uid(ns->root_user);
-		kfree(ns);
-	}
-	free_uid(new->user);
-	new->user = new_user;
-	commit_creds(new);
-	return ns;
-}
-
-struct user_namespace * copy_user_ns(int flags, struct user_namespace *old_ns)
-{
-	struct user_namespace *new_ns;
-
-	BUG_ON(!old_ns);
-	get_user_ns(old_ns);
-
-	if (!(flags & CLONE_NEWUSER))
-		return old_ns;
+	/* set the new root user in the credentials under preparation */
+	ns->creator = new->user;
+	new->user = root_user;
+	new->uid = new->euid = new->suid = new->fsuid = 0;
+	new->gid = new->egid = new->sgid = new->fsgid = 0;
+	put_group_info(new->group_info);
+	new->group_info = get_group_info(&init_groups);
+#ifdef CONFIG_KEYS
+	key_put(new->request_key_auth);
+	new->request_key_auth = NULL;
+#endif
+	/* tgcred will be cleared in our caller bc CLONE_THREAD won't be set */
 
-	new_ns = clone_user_ns(old_ns);
+	/* alloc_uid() incremented the userns refcount.  Just set it to 1 */
+	kref_set(&ns->kref, 1);
 
-	put_user_ns(old_ns);
-	return new_ns;
+	return 0;
 }
 
 void free_user_ns(struct kref *kref)
@@ -80,7 +65,7 @@ void free_user_ns(struct kref *kref)
 	struct user_namespace *ns;
 
 	ns = container_of(kref, struct user_namespace, kref);
-	release_uids(ns);
+	free_uid(ns->creator);
 	kfree(ns);
 }
 EXPORT_SYMBOL(free_user_ns);
-- 
cgit v1.2.3


From e1aa680fa40e7492260a09cb57d94002245cc8fe Mon Sep 17 00:00:00 2001
From: Ilpo Järvinen <ilpo.jarvinen@helsinki.fi>
Date: Mon, 24 Nov 2008 21:11:55 -0800
Subject: tcp: move tcp_simple_retransmit to tcp_input
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Signed-off-by: Ilpo Järvinen <ilpo.jarvinen@helsinki.fi>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/tcp.h     |  2 --
 net/ipv4/tcp_input.c  | 53 ++++++++++++++++++++++++++++++++++++++++++++++++++-
 net/ipv4/tcp_output.c | 50 ------------------------------------------------
 3 files changed, 52 insertions(+), 53 deletions(-)

(limited to 'include')

diff --git a/include/net/tcp.h b/include/net/tcp.h
index 8f26b28fb407..90b4c3b4c336 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -472,8 +472,6 @@ extern void tcp_send_delayed_ack(struct sock *sk);
 
 /* tcp_input.c */
 extern void tcp_cwnd_application_limited(struct sock *sk);
-extern void tcp_skb_mark_lost_uncond_verify(struct tcp_sock *tp,
-					    struct sk_buff *skb);
 
 /* tcp_timer.c */
 extern void tcp_init_xmit_timers(struct sock *);
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 097294b7da3e..8085704863fb 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -1002,7 +1002,8 @@ static void tcp_skb_mark_lost(struct tcp_sock *tp, struct sk_buff *skb)
 	}
 }
 
-void tcp_skb_mark_lost_uncond_verify(struct tcp_sock *tp, struct sk_buff *skb)
+static void tcp_skb_mark_lost_uncond_verify(struct tcp_sock *tp,
+					    struct sk_buff *skb)
 {
 	tcp_verify_retransmit_hint(tp, skb);
 
@@ -2559,6 +2560,56 @@ static void tcp_mtup_probe_success(struct sock *sk, struct sk_buff *skb)
 	tcp_sync_mss(sk, icsk->icsk_pmtu_cookie);
 }
 
+/* Do a simple retransmit without using the backoff mechanisms in
+ * tcp_timer. This is used for path mtu discovery.
+ * The socket is already locked here.
+ */
+void tcp_simple_retransmit(struct sock *sk)
+{
+	const struct inet_connection_sock *icsk = inet_csk(sk);
+	struct tcp_sock *tp = tcp_sk(sk);
+	struct sk_buff *skb;
+	unsigned int mss = tcp_current_mss(sk, 0);
+	u32 prior_lost = tp->lost_out;
+
+	tcp_for_write_queue(skb, sk) {
+		if (skb == tcp_send_head(sk))
+			break;
+		if (skb->len > mss &&
+		    !(TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED)) {
+			if (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_RETRANS) {
+				TCP_SKB_CB(skb)->sacked &= ~TCPCB_SACKED_RETRANS;
+				tp->retrans_out -= tcp_skb_pcount(skb);
+			}
+			tcp_skb_mark_lost_uncond_verify(tp, skb);
+		}
+	}
+
+	tcp_clear_retrans_hints_partial(tp);
+
+	if (prior_lost == tp->lost_out)
+		return;
+
+	if (tcp_is_reno(tp))
+		tcp_limit_reno_sacked(tp);
+
+	tcp_verify_left_out(tp);
+
+	/* Don't muck with the congestion window here.
+	 * Reason is that we do not increase amount of _data_
+	 * in network, but units changed and effective
+	 * cwnd/ssthresh really reduced now.
+	 */
+	if (icsk->icsk_ca_state != TCP_CA_Loss) {
+		tp->high_seq = tp->snd_nxt;
+		tp->snd_ssthresh = tcp_current_ssthresh(sk);
+		tp->prior_ssthresh = 0;
+		tp->undo_marker = 0;
+		tcp_set_ca_state(sk, TCP_CA_Loss);
+	}
+	tcp_xmit_retransmit_queue(sk);
+}
+
 /* Process an event, which can update packets-in-flight not trivially.
  * Main goal of this function is to calculate new estimate for left_out,
  * taking into account both packets sitting in receiver's buffer and
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index 86ef98975e94..c069ecb81ea5 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -1879,56 +1879,6 @@ static void tcp_retrans_try_collapse(struct sock *sk, struct sk_buff *to,
 	}
 }
 
-/* Do a simple retransmit without using the backoff mechanisms in
- * tcp_timer. This is used for path mtu discovery.
- * The socket is already locked here.
- */
-void tcp_simple_retransmit(struct sock *sk)
-{
-	const struct inet_connection_sock *icsk = inet_csk(sk);
-	struct tcp_sock *tp = tcp_sk(sk);
-	struct sk_buff *skb;
-	unsigned int mss = tcp_current_mss(sk, 0);
-	u32 prior_lost = tp->lost_out;
-
-	tcp_for_write_queue(skb, sk) {
-		if (skb == tcp_send_head(sk))
-			break;
-		if (skb->len > mss &&
-		    !(TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED)) {
-			if (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_RETRANS) {
-				TCP_SKB_CB(skb)->sacked &= ~TCPCB_SACKED_RETRANS;
-				tp->retrans_out -= tcp_skb_pcount(skb);
-			}
-			tcp_skb_mark_lost_uncond_verify(tp, skb);
-		}
-	}
-
-	tcp_clear_retrans_hints_partial(tp);
-
-	if (prior_lost == tp->lost_out)
-		return;
-
-	if (tcp_is_reno(tp))
-		tcp_limit_reno_sacked(tp);
-
-	tcp_verify_left_out(tp);
-
-	/* Don't muck with the congestion window here.
-	 * Reason is that we do not increase amount of _data_
-	 * in network, but units changed and effective
-	 * cwnd/ssthresh really reduced now.
-	 */
-	if (icsk->icsk_ca_state != TCP_CA_Loss) {
-		tp->high_seq = tp->snd_nxt;
-		tp->snd_ssthresh = tcp_current_ssthresh(sk);
-		tp->prior_ssthresh = 0;
-		tp->undo_marker = 0;
-		tcp_set_ca_state(sk, TCP_CA_Loss);
-	}
-	tcp_xmit_retransmit_queue(sk);
-}
-
 /* This retransmits one SKB.  Policy decisions and retransmit queue
  * state updates are done by the caller.  Returns non-zero if an
  * error occurred which prevented the send.
-- 
cgit v1.2.3


From 832d11c5cd076abc0aa1eaf7be96c81d1a59ce41 Mon Sep 17 00:00:00 2001
From: Ilpo Järvinen <ilpo.jarvinen@helsinki.fi>
Date: Mon, 24 Nov 2008 21:20:15 -0800
Subject: tcp: Try to restore large SKBs while SACK processing
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

During SACK processing, most of the benefits of TSO are eaten by
the SACK blocks that one-by-one fragment SKBs to MSS sized chunks.
Then we're in problems when cleanup work for them has to be done
when a large cumulative ACK comes. Try to return back to pre-split
state already while more and more SACK info gets discovered by
combining newly discovered SACK areas with the previous skb if
that's SACKed as well.

This approach has a number of benefits:

1) The processing overhead is spread more equally over the RTT
2) Write queue has less skbs to process (affect everything
   which has to walk in the queue past the sacked areas)
3) Write queue is consistent whole the time, so no other parts
   of TCP has to be aware of this (this was not the case with
   some other approach that was, well, quite intrusive all
   around).
4) Clean_rtx_queue can release most of the pages using single
   put_page instead of previous PAGE_SIZE/mss+1 calls

In case a hole is fully filled by the new SACK block, we attempt
to combine the next skb too which allows construction of skbs
that are even larger than what tso split them to and it handles
hole per on every nth patterns that often occur during slow start
overshoot pretty nicely. Though this to be really useful also
a retransmission would have to get lost since cumulative ACKs
advance one hole at a time in the most typical case.

TODO: handle upwards only merging. That should be rather easy
when segment is fully sacked but I'm leaving that as future
work item (it won't make very large difference anyway since
this current approach already covers quite a lot of normal
cases).

I was earlier thinking of some sophisticated way of tracking
timestamps of the first and the last segment but later on
realized that it won't be that necessary at all to store the
timestamp of the last segment. The cases that can occur are
basically either:
  1) ambiguous => no sensible measurement can be taken anyway
  2) non-ambiguous is due to reordering => having the timestamp
     of the last segment there is just skewing things more off
     than does some good since the ack got triggered by one of
     the holes (besides some substle issues that would make
     determining right hole/skb even harder problem). Anyway,
     it has nothing to do with this change then.

I choose to route some abnormal looking cases with goto noop,
some could be handled differently (eg., by stopping the
walking at that skb but again). In general, they either
shouldn't happen at all or are rare enough to make no difference
in practice.

In theory this change (as whole) could cause some macroscale
regression (global) because of cache misses that are taken over
the round-trip time but it gets very likely better because of much
less (local) cache misses per other write queue walkers and the
big recovery clearing cumulative ack.

Worth to note that these benefits would be very easy to get also
without TSO/GSO being on as long as the data is in pages so that
we can merge them. Currently I won't let that happen because
DSACK splitting at fragment that would mess up pcounts due to
sk_can_gso in tcp_set_skb_tso_segs. Once DSACKs fragments gets
avoided, we have some conditions that can be made less strict.

TODO: I will probably have to convert the excessive pointer
passing to struct sacktag_state... :-)

My testing revealed that considerable amount of skbs couldn't
be shifted because they were cloned (most likely still awaiting
tx reclaim)...

[The rest is considering future work instead since I got
repeatably EFAULT to tcpdump's recvfrom when I added
pskb_expand_head to deal with clones, so I separated that
into another, later patch]

...To counter that, I gave up on the fifth advantage:

5) When growing previous SACK block, less allocs for new skbs
   are done, basically a new alloc is needed only when new hole
   is detected and when the previous skb runs out of frags space

...which now only happens of if reclaim is fast enough to dispose
the clone before the SACK block comes in (the window is RTT long),
otherwise we'll have to alloc some.

With clones being handled I got these numbers (will be somewhat
worse without that), taken with fine-grained mibs:

                  TCPSackShifted 398
                   TCPSackMerged 877
            TCPSackShiftFallback 320
      TCPSACKCOLLAPSEFALLBACKGSO 0
  TCPSACKCOLLAPSEFALLBACKSKBBITS 0
  TCPSACKCOLLAPSEFALLBACKSKBDATA 0
    TCPSACKCOLLAPSEFALLBACKBELOW 0
    TCPSACKCOLLAPSEFALLBACKFIRST 1
 TCPSACKCOLLAPSEFALLBACKPREVBITS 318
      TCPSACKCOLLAPSEFALLBACKMSS 1
   TCPSACKCOLLAPSEFALLBACKNOHEAD 0
    TCPSACKCOLLAPSEFALLBACKSHIFT 0
          TCPSACKCOLLAPSENOOPSEQ 0
  TCPSACKCOLLAPSENOOPSMALLPCOUNT 0
     TCPSACKCOLLAPSENOOPSMALLLEN 0
             TCPSACKCOLLAPSEHOLE 12

Signed-off-by: Ilpo Järvinen <ilpo.jarvinen@helsinki.fi>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/skbuff.h |  33 +++++++
 include/net/tcp.h      |   5 +
 net/core/skbuff.c      | 140 +++++++++++++++++++++++++++
 net/ipv4/tcp_input.c   | 256 +++++++++++++++++++++++++++++++++++++++++++++++--
 4 files changed, 427 insertions(+), 7 deletions(-)

(limited to 'include')

diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index a01b6f84e3bc..acf17af45af9 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -492,6 +492,19 @@ static inline bool skb_queue_is_last(const struct sk_buff_head *list,
 	return (skb->next == (struct sk_buff *) list);
 }
 
+/**
+ *	skb_queue_is_first - check if skb is the first entry in the queue
+ *	@list: queue head
+ *	@skb: buffer
+ *
+ *	Returns true if @skb is the first buffer on the list.
+ */
+static inline bool skb_queue_is_first(const struct sk_buff_head *list,
+				      const struct sk_buff *skb)
+{
+	return (skb->prev == (struct sk_buff *) list);
+}
+
 /**
  *	skb_queue_next - return the next packet in the queue
  *	@list: queue head
@@ -510,6 +523,24 @@ static inline struct sk_buff *skb_queue_next(const struct sk_buff_head *list,
 	return skb->next;
 }
 
+/**
+ *	skb_queue_prev - return the prev packet in the queue
+ *	@list: queue head
+ *	@skb: current buffer
+ *
+ *	Return the prev packet in @list before @skb.  It is only valid to
+ *	call this if skb_queue_is_first() evaluates to false.
+ */
+static inline struct sk_buff *skb_queue_prev(const struct sk_buff_head *list,
+					     const struct sk_buff *skb)
+{
+	/* This BUG_ON may seem severe, but if we just return then we
+	 * are going to dereference garbage.
+	 */
+	BUG_ON(skb_queue_is_first(list, skb));
+	return skb->prev;
+}
+
 /**
  *	skb_get - reference buffer
  *	@skb: buffer to reference
@@ -1652,6 +1683,8 @@ extern int             skb_splice_bits(struct sk_buff *skb,
 extern void	       skb_copy_and_csum_dev(const struct sk_buff *skb, u8 *to);
 extern void	       skb_split(struct sk_buff *skb,
 				 struct sk_buff *skb1, const u32 len);
+extern int	       skb_shift(struct sk_buff *tgt, struct sk_buff *skb,
+				 int shiftlen);
 
 extern struct sk_buff *skb_segment(struct sk_buff *skb, int features);
 
diff --git a/include/net/tcp.h b/include/net/tcp.h
index 90b4c3b4c336..265392470b26 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -1192,6 +1192,11 @@ static inline struct sk_buff *tcp_write_queue_next(struct sock *sk, struct sk_bu
 	return skb_queue_next(&sk->sk_write_queue, skb);
 }
 
+static inline struct sk_buff *tcp_write_queue_prev(struct sock *sk, struct sk_buff *skb)
+{
+	return skb_queue_prev(&sk->sk_write_queue, skb);
+}
+
 #define tcp_for_write_queue(skb, sk)					\
 	skb_queue_walk(&(sk)->sk_write_queue, skb)
 
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index 267185a848f6..844b8abeb18c 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -2018,6 +2018,146 @@ void skb_split(struct sk_buff *skb, struct sk_buff *skb1, const u32 len)
 		skb_split_no_header(skb, skb1, len, pos);
 }
 
+/* Shifting from/to a cloned skb is a no-go.
+ *
+ * TODO: handle cloned skbs by using pskb_expand_head()
+ */
+static int skb_prepare_for_shift(struct sk_buff *skb)
+{
+	return skb_cloned(skb);
+}
+
+/**
+ * skb_shift - Shifts paged data partially from skb to another
+ * @tgt: buffer into which tail data gets added
+ * @skb: buffer from which the paged data comes from
+ * @shiftlen: shift up to this many bytes
+ *
+ * Attempts to shift up to shiftlen worth of bytes, which may be less than
+ * the length of the skb, from tgt to skb. Returns number bytes shifted.
+ * It's up to caller to free skb if everything was shifted.
+ *
+ * If @tgt runs out of frags, the whole operation is aborted.
+ *
+ * Skb cannot include anything else but paged data while tgt is allowed
+ * to have non-paged data as well.
+ *
+ * TODO: full sized shift could be optimized but that would need
+ * specialized skb free'er to handle frags without up-to-date nr_frags.
+ */
+int skb_shift(struct sk_buff *tgt, struct sk_buff *skb, int shiftlen)
+{
+	int from, to, merge, todo;
+	struct skb_frag_struct *fragfrom, *fragto;
+
+	BUG_ON(shiftlen > skb->len);
+	BUG_ON(skb_headlen(skb));	/* Would corrupt stream */
+
+	todo = shiftlen;
+	from = 0;
+	to = skb_shinfo(tgt)->nr_frags;
+	fragfrom = &skb_shinfo(skb)->frags[from];
+
+	/* Actual merge is delayed until the point when we know we can
+	 * commit all, so that we don't have to undo partial changes
+	 */
+	if (!to ||
+	    !skb_can_coalesce(tgt, to, fragfrom->page, fragfrom->page_offset)) {
+		merge = -1;
+	} else {
+		merge = to - 1;
+
+		todo -= fragfrom->size;
+		if (todo < 0) {
+			if (skb_prepare_for_shift(skb) ||
+			    skb_prepare_for_shift(tgt))
+				return 0;
+
+			fragto = &skb_shinfo(tgt)->frags[merge];
+
+			fragto->size += shiftlen;
+			fragfrom->size -= shiftlen;
+			fragfrom->page_offset += shiftlen;
+
+			goto onlymerged;
+		}
+
+		from++;
+	}
+
+	/* Skip full, not-fitting skb to avoid expensive operations */
+	if ((shiftlen == skb->len) &&
+	    (skb_shinfo(skb)->nr_frags - from) > (MAX_SKB_FRAGS - to))
+		return 0;
+
+	if (skb_prepare_for_shift(skb) || skb_prepare_for_shift(tgt))
+		return 0;
+
+	while ((todo > 0) && (from < skb_shinfo(skb)->nr_frags)) {
+		if (to == MAX_SKB_FRAGS)
+			return 0;
+
+		fragfrom = &skb_shinfo(skb)->frags[from];
+		fragto = &skb_shinfo(tgt)->frags[to];
+
+		if (todo >= fragfrom->size) {
+			*fragto = *fragfrom;
+			todo -= fragfrom->size;
+			from++;
+			to++;
+
+		} else {
+			get_page(fragfrom->page);
+			fragto->page = fragfrom->page;
+			fragto->page_offset = fragfrom->page_offset;
+			fragto->size = todo;
+
+			fragfrom->page_offset += todo;
+			fragfrom->size -= todo;
+			todo = 0;
+
+			to++;
+			break;
+		}
+	}
+
+	/* Ready to "commit" this state change to tgt */
+	skb_shinfo(tgt)->nr_frags = to;
+
+	if (merge >= 0) {
+		fragfrom = &skb_shinfo(skb)->frags[0];
+		fragto = &skb_shinfo(tgt)->frags[merge];
+
+		fragto->size += fragfrom->size;
+		put_page(fragfrom->page);
+	}
+
+	/* Reposition in the original skb */
+	to = 0;
+	while (from < skb_shinfo(skb)->nr_frags)
+		skb_shinfo(skb)->frags[to++] = skb_shinfo(skb)->frags[from++];
+	skb_shinfo(skb)->nr_frags = to;
+
+	BUG_ON(todo > 0 && !skb_shinfo(skb)->nr_frags);
+
+onlymerged:
+	/* Most likely the tgt won't ever need its checksum anymore, skb on
+	 * the other hand might need it if it needs to be resent
+	 */
+	tgt->ip_summed = CHECKSUM_PARTIAL;
+	skb->ip_summed = CHECKSUM_PARTIAL;
+
+	/* Yak, is it really working this way? Some helper please? */
+	skb->len -= shiftlen;
+	skb->data_len -= shiftlen;
+	skb->truesize -= shiftlen;
+	tgt->len += shiftlen;
+	tgt->data_len += shiftlen;
+	tgt->truesize += shiftlen;
+
+	return shiftlen;
+}
+
 /**
  * skb_prepare_seq_read - Prepare a sequential read of skb data
  * @skb: the buffer to read
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 3c8e297e2c39..97d57676b8ee 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -1242,6 +1242,8 @@ static int tcp_check_dsack(struct sock *sk, struct sk_buff *ack_skb,
  * aligned portion of it that matches. Therefore we might need to fragment
  * which may fail and creates some hassle (caller must handle error case
  * returns).
+ *
+ * FIXME: this could be merged to shift decision code
  */
 static int tcp_match_skb_to_sack(struct sock *sk, struct sk_buff *skb,
 				 u32 start_seq, u32 end_seq)
@@ -1353,9 +1355,6 @@ static int tcp_sacktag_one(struct sk_buff *skb, struct sock *sk,
 
 		if (fack_count > tp->fackets_out)
 			tp->fackets_out = fack_count;
-
-		if (!before(TCP_SKB_CB(skb)->seq, tcp_highest_sack_seq(tp)))
-			tcp_advance_highest_sack(sk, skb);
 	}
 
 	/* D-SACK. We can detect redundant retransmission in S|R and plain R
@@ -1370,12 +1369,231 @@ static int tcp_sacktag_one(struct sk_buff *skb, struct sock *sk,
 	return flag;
 }
 
+static int tcp_shifted_skb(struct sock *sk, struct sk_buff *prev,
+			   struct sk_buff *skb, unsigned int pcount,
+			   int shifted, int fack_count, int *reord,
+			   int *flag, int mss)
+{
+	struct tcp_sock *tp = tcp_sk(sk);
+	u8 dummy_sacked = TCP_SKB_CB(skb)->sacked;	/* We discard results */
+
+	BUG_ON(!pcount);
+
+	TCP_SKB_CB(prev)->end_seq += shifted;
+	TCP_SKB_CB(skb)->seq += shifted;
+
+	skb_shinfo(prev)->gso_segs += pcount;
+	BUG_ON(skb_shinfo(skb)->gso_segs < pcount);
+	skb_shinfo(skb)->gso_segs -= pcount;
+
+	/* When we're adding to gso_segs == 1, gso_size will be zero,
+	 * in theory this shouldn't be necessary but as long as DSACK
+	 * code can come after this skb later on it's better to keep
+	 * setting gso_size to something.
+	 */
+	if (!skb_shinfo(prev)->gso_size) {
+		skb_shinfo(prev)->gso_size = mss;
+		skb_shinfo(prev)->gso_type = sk->sk_gso_type;
+	}
+
+	/* CHECKME: To clear or not to clear? Mimics normal skb currently */
+	if (skb_shinfo(skb)->gso_segs <= 1) {
+		skb_shinfo(skb)->gso_size = 0;
+		skb_shinfo(skb)->gso_type = 0;
+	}
+
+	*flag |= tcp_sacktag_one(skb, sk, reord, 0, fack_count, &dummy_sacked,
+				 pcount);
+
+	/* Difference in this won't matter, both ACKed by the same cumul. ACK */
+	TCP_SKB_CB(prev)->sacked |= (TCP_SKB_CB(skb)->sacked & TCPCB_EVER_RETRANS);
+
+	tcp_clear_all_retrans_hints(tp);
+
+	if (skb->len > 0) {
+		BUG_ON(!tcp_skb_pcount(skb));
+		return 0;
+	}
+
+	/* Whole SKB was eaten :-) */
+
+	TCP_SKB_CB(skb)->flags |= TCP_SKB_CB(prev)->flags;
+	if (skb == tcp_highest_sack(sk))
+		tcp_advance_highest_sack(sk, skb);
+
+	tcp_unlink_write_queue(skb, sk);
+	sk_wmem_free_skb(sk, skb);
+
+	return 1;
+}
+
+/* I wish gso_size would have a bit more sane initialization than
+ * something-or-zero which complicates things
+ */
+static int tcp_shift_mss(struct sk_buff *skb)
+{
+	int mss = tcp_skb_mss(skb);
+
+	if (!mss)
+		mss = skb->len;
+
+	return mss;
+}
+
+/* Shifting pages past head area doesn't work */
+static int skb_can_shift(struct sk_buff *skb)
+{
+	return !skb_headlen(skb) && skb_is_nonlinear(skb);
+}
+
+/* Try collapsing SACK blocks spanning across multiple skbs to a single
+ * skb.
+ */
+static struct sk_buff *tcp_shift_skb_data(struct sock *sk, struct sk_buff *skb,
+					  u32 start_seq, u32 end_seq,
+					  int dup_sack, int *fack_count,
+					  int *reord, int *flag)
+{
+	struct tcp_sock *tp = tcp_sk(sk);
+	struct sk_buff *prev;
+	int mss;
+	int pcount = 0;
+	int len;
+	int in_sack;
+
+	if (!sk_can_gso(sk))
+		goto fallback;
+
+	/* Normally R but no L won't result in plain S */
+	if (!dup_sack &&
+	    (TCP_SKB_CB(skb)->sacked & TCPCB_TAGBITS) == TCPCB_SACKED_RETRANS)
+		goto fallback;
+	if (!skb_can_shift(skb))
+		goto fallback;
+	/* This frame is about to be dropped (was ACKed). */
+	if (!after(TCP_SKB_CB(skb)->end_seq, tp->snd_una))
+		goto fallback;
+
+	/* Can only happen with delayed DSACK + discard craziness */
+	if (unlikely(skb == tcp_write_queue_head(sk)))
+		goto fallback;
+	prev = tcp_write_queue_prev(sk, skb);
+
+	if ((TCP_SKB_CB(prev)->sacked & TCPCB_TAGBITS) != TCPCB_SACKED_ACKED)
+		goto fallback;
+
+	in_sack = !after(start_seq, TCP_SKB_CB(skb)->seq) &&
+		  !before(end_seq, TCP_SKB_CB(skb)->end_seq);
+
+	if (in_sack) {
+		len = skb->len;
+		pcount = tcp_skb_pcount(skb);
+		mss = tcp_shift_mss(skb);
+
+		/* TODO: Fix DSACKs to not fragment already SACKed and we can
+		 * drop this restriction as unnecessary
+		 */
+		if (mss != tcp_shift_mss(prev))
+			goto fallback;
+	} else {
+		if (!after(TCP_SKB_CB(skb)->end_seq, start_seq))
+			goto noop;
+		/* CHECKME: This is non-MSS split case only?, this will
+		 * cause skipped skbs due to advancing loop btw, original
+		 * has that feature too
+		 */
+		if (tcp_skb_pcount(skb) <= 1)
+			goto noop;
+
+		in_sack = !after(start_seq, TCP_SKB_CB(skb)->seq);
+		if (!in_sack) {
+			/* TODO: head merge to next could be attempted here
+			 * if (!after(TCP_SKB_CB(skb)->end_seq, end_seq)),
+			 * though it might not be worth of the additional hassle
+			 *
+			 * ...we can probably just fallback to what was done
+			 * previously. We could try merging non-SACKed ones
+			 * as well but it probably isn't going to buy off
+			 * because later SACKs might again split them, and
+			 * it would make skb timestamp tracking considerably
+			 * harder problem.
+			 */
+			goto fallback;
+		}
+
+		len = end_seq - TCP_SKB_CB(skb)->seq;
+		BUG_ON(len < 0);
+		BUG_ON(len > skb->len);
+
+		/* MSS boundaries should be honoured or else pcount will
+		 * severely break even though it makes things bit trickier.
+		 * Optimize common case to avoid most of the divides
+		 */
+		mss = tcp_skb_mss(skb);
+
+		/* TODO: Fix DSACKs to not fragment already SACKed and we can
+		 * drop this restriction as unnecessary
+		 */
+		if (mss != tcp_shift_mss(prev))
+			goto fallback;
+
+		if (len == mss) {
+			pcount = 1;
+		} else if (len < mss) {
+			goto noop;
+		} else {
+			pcount = len / mss;
+			len = pcount * mss;
+		}
+	}
+
+	if (!skb_shift(prev, skb, len))
+		goto fallback;
+	if (!tcp_shifted_skb(sk, prev, skb, pcount, len, *fack_count, reord,
+			     flag, mss))
+		goto out;
+
+	/* Hole filled allows collapsing with the next as well, this is very
+	 * useful when hole on every nth skb pattern happens
+	 */
+	if (prev == tcp_write_queue_tail(sk))
+		goto out;
+	skb = tcp_write_queue_next(sk, prev);
+
+	if (!skb_can_shift(skb))
+		goto out;
+	if (skb == tcp_send_head(sk))
+		goto out;
+	if ((TCP_SKB_CB(skb)->sacked & TCPCB_TAGBITS) != TCPCB_SACKED_ACKED)
+		goto out;
+
+	len = skb->len;
+	if (skb_shift(prev, skb, len)) {
+		pcount += tcp_skb_pcount(skb);
+		tcp_shifted_skb(sk, prev, skb, tcp_skb_pcount(skb), len,
+				*fack_count, reord, flag, mss);
+	}
+
+out:
+	*fack_count += pcount;
+	return prev;
+
+noop:
+	return skb;
+
+fallback:
+	return NULL;
+}
+
 static struct sk_buff *tcp_sacktag_walk(struct sk_buff *skb, struct sock *sk,
 					struct tcp_sack_block *next_dup,
 					u32 start_seq, u32 end_seq,
 					int dup_sack_in, int *fack_count,
 					int *reord, int *flag)
 {
+	struct tcp_sock *tp = tcp_sk(sk);
+	struct sk_buff *tmp;
+
 	tcp_for_write_queue_from(skb, sk) {
 		int in_sack = 0;
 		int dup_sack = dup_sack_in;
@@ -1396,18 +1614,42 @@ static struct sk_buff *tcp_sacktag_walk(struct sk_buff *skb, struct sock *sk,
 				dup_sack = 1;
 		}
 
-		if (in_sack <= 0)
-			in_sack = tcp_match_skb_to_sack(sk, skb, start_seq,
-							end_seq);
+		/* skb reference here is a bit tricky to get right, since
+		 * shifting can eat and free both this skb and the next,
+		 * so not even _safe variant of the loop is enough.
+		 */
+		if (in_sack <= 0) {
+			tmp = tcp_shift_skb_data(sk, skb, start_seq,
+						 end_seq, dup_sack,
+						 fack_count, reord, flag);
+			if (tmp != NULL) {
+				if (tmp != skb) {
+					skb = tmp;
+					continue;
+				}
+
+				in_sack = 0;
+			} else {
+				in_sack = tcp_match_skb_to_sack(sk, skb,
+								start_seq,
+								end_seq);
+			}
+		}
+
 		if (unlikely(in_sack < 0))
 			break;
 
-		if (in_sack)
+		if (in_sack) {
 			*flag |= tcp_sacktag_one(skb, sk, reord, dup_sack,
 						 *fack_count,
 						 &(TCP_SKB_CB(skb)->sacked),
 						 tcp_skb_pcount(skb));
 
+			if (!before(TCP_SKB_CB(skb)->seq,
+				    tcp_highest_sack_seq(tp)))
+				tcp_advance_highest_sack(sk, skb);
+		}
+
 		*fack_count += tcp_skb_pcount(skb);
 	}
 	return skb;
-- 
cgit v1.2.3


From 111cc8b913b42ef07793648b1699288332f273e1 Mon Sep 17 00:00:00 2001
From: Ilpo Järvinen <ilpo.jarvinen@helsinki.fi>
Date: Mon, 24 Nov 2008 21:27:22 -0800
Subject: tcp: add some mibs to track collapsing
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Signed-off-by: Ilpo Järvinen <ilpo.jarvinen@helsinki.fi>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/snmp.h | 3 +++
 net/ipv4/proc.c      | 3 +++
 net/ipv4/tcp_input.c | 4 ++++
 3 files changed, 10 insertions(+)

(limited to 'include')

diff --git a/include/linux/snmp.h b/include/linux/snmp.h
index 7a6e6bba4a71..aee3f1e1d1ce 100644
--- a/include/linux/snmp.h
+++ b/include/linux/snmp.h
@@ -216,6 +216,9 @@ enum
 	LINUX_MIB_TCPSPURIOUSRTOS,		/* TCPSpuriousRTOs */
 	LINUX_MIB_TCPMD5NOTFOUND,		/* TCPMD5NotFound */
 	LINUX_MIB_TCPMD5UNEXPECTED,		/* TCPMD5Unexpected */
+	LINUX_MIB_SACKSHIFTED,
+	LINUX_MIB_SACKMERGED,
+	LINUX_MIB_SACKSHIFTFALLBACK,
 	__LINUX_MIB_MAX
 };
 
diff --git a/net/ipv4/proc.c b/net/ipv4/proc.c
index a631a1f110ca..731789bb499f 100644
--- a/net/ipv4/proc.c
+++ b/net/ipv4/proc.c
@@ -234,6 +234,9 @@ static const struct snmp_mib snmp4_net_list[] = {
 	SNMP_MIB_ITEM("TCPSpuriousRTOs", LINUX_MIB_TCPSPURIOUSRTOS),
 	SNMP_MIB_ITEM("TCPMD5NotFound", LINUX_MIB_TCPMD5NOTFOUND),
 	SNMP_MIB_ITEM("TCPMD5Unexpected", LINUX_MIB_TCPMD5UNEXPECTED),
+	SNMP_MIB_ITEM("TCPSackShifted", LINUX_MIB_SACKSHIFTED),
+	SNMP_MIB_ITEM("TCPSackMerged", LINUX_MIB_SACKMERGED),
+	SNMP_MIB_ITEM("TCPSackShiftFallback", LINUX_MIB_SACKSHIFTFALLBACK),
 	SNMP_MIB_SENTINEL
 };
 
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index e6291dde3348..9f8a80ba17bd 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -1415,6 +1415,7 @@ static int tcp_shifted_skb(struct sock *sk, struct sk_buff *prev,
 
 	if (skb->len > 0) {
 		BUG_ON(!tcp_skb_pcount(skb));
+		NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_SACKSHIFTED);
 		return 0;
 	}
 
@@ -1436,6 +1437,8 @@ static int tcp_shifted_skb(struct sock *sk, struct sk_buff *prev,
 	tcp_unlink_write_queue(skb, sk);
 	sk_wmem_free_skb(sk, skb);
 
+	NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_SACKMERGED);
+
 	return 1;
 }
 
@@ -1594,6 +1597,7 @@ noop:
 	return skb;
 
 fallback:
+	NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_SACKSHIFTFALLBACK);
 	return NULL;
 }
 
-- 
cgit v1.2.3


From 14bfc987e395797dfe03e915e8b4c7fc9e5078e4 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Tue, 25 Nov 2008 08:58:11 +0100
Subject: tracing, tty: fix warnings caused by branch tracing and
 tty_kref_get()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Stephen Rothwell reported tht this warning started triggering in
linux-next:

  In file included from init/main.c:27:
  include/linux/tty.h: In function ‘tty_kref_get’:
  include/linux/tty.h:330: warning: ‘______f’ is static but declared in inline function ‘tty_kref_get’ which is not static

Which gcc emits for 'extern inline' functions that nevertheless define
static variables. Change it to 'static inline', which is the norm
in the kernel anyway.

Reported-by: Stephen Rothwell <sfr@canb.auug.org.au>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/tty.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/tty.h b/include/linux/tty.h
index 3b8121d4e36f..eaec37c9d83d 100644
--- a/include/linux/tty.h
+++ b/include/linux/tty.h
@@ -325,7 +325,7 @@ extern struct class *tty_class;
  *	go away
  */
 
-extern inline struct tty_struct *tty_kref_get(struct tty_struct *tty)
+static inline struct tty_struct *tty_kref_get(struct tty_struct *tty)
 {
 	if (tty)
 		kref_get(&tty->kref);
-- 
cgit v1.2.3


From 47fd5b8373ecc6bf5473e4139b62b06425448252 Mon Sep 17 00:00:00 2001
From: Stephen Hemminger <shemminger@vyatta.com>
Date: Tue, 25 Nov 2008 00:20:43 -0800
Subject: netdev: add HAVE_NET_DEVICE_OPS

As a concession to vendors who have to deal with one source for different
kernel versions, add a HAVE_NET_DEVICE_OPS so they don't end up hard
coding ifdef against kernel version.

Signed-off-by: Stephen Hemminger <shemminger@vyatta.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netdevice.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 6095af572dfd..76a89f8e6a19 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -545,6 +545,7 @@ struct netdev_queue {
  *
  * void (*ndo_poll_controller)(struct net_device *dev);
  */
+#define HAVE_NET_DEVICE_OPS
 struct net_device_ops {
 	int			(*ndo_init)(struct net_device *dev);
 	void			(*ndo_uninit)(struct net_device *dev);
-- 
cgit v1.2.3


From 7a6b6f515f77d1c62a2f383b6dce18cb0af0cf4f Mon Sep 17 00:00:00 2001
From: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
Date: Tue, 25 Nov 2008 01:02:08 -0800
Subject: DCB: fix kconfig option

Since the netlink option for DCB is necessary to actually be useful,
simplified the Kconfig option.  In addition, added useful help text for the
Kconfig option.

Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/Kconfig            |  4 ++--
 drivers/net/ixgbe/Makefile     |  2 +-
 drivers/net/ixgbe/ixgbe.h      |  2 +-
 drivers/net/ixgbe/ixgbe_main.c | 10 +++++-----
 include/linux/netdevice.h      |  4 ++--
 net/Makefile                   |  4 ++--
 net/dcb/Kconfig                | 24 +++++++++++++++++-------
 net/dcb/dcbnl.c                |  2 +-
 8 files changed, 31 insertions(+), 21 deletions(-)

(limited to 'include')

diff --git a/drivers/net/Kconfig b/drivers/net/Kconfig
index efd461d7c2bb..75f9f7f2fbed 100644
--- a/drivers/net/Kconfig
+++ b/drivers/net/Kconfig
@@ -2451,10 +2451,10 @@ config IXGBE_DCA
 	  driver.  DCA is a method for warming the CPU cache before data
 	  is used, with the intent of lessening the impact of cache misses.
 
-config IXGBE_DCBNL
+config IXGBE_DCB
 	bool "Data Center Bridging (DCB) Support"
 	default n
-	depends on IXGBE && DCBNL
+	depends on IXGBE && DCB
 	---help---
 	  Say Y here if you want to use Data Center Bridging (DCB) in the
 	  driver.
diff --git a/drivers/net/ixgbe/Makefile b/drivers/net/ixgbe/Makefile
index 3228e508e628..6e7ef765bcd8 100644
--- a/drivers/net/ixgbe/Makefile
+++ b/drivers/net/ixgbe/Makefile
@@ -35,4 +35,4 @@ obj-$(CONFIG_IXGBE) += ixgbe.o
 ixgbe-objs := ixgbe_main.o ixgbe_common.o ixgbe_ethtool.o \
               ixgbe_82598.o ixgbe_phy.o
 
-ixgbe-$(CONFIG_IXGBE_DCBNL) +=  ixgbe_dcb.o ixgbe_dcb_82598.o ixgbe_dcb_nl.o
+ixgbe-$(CONFIG_IXGBE_DCB) +=  ixgbe_dcb.o ixgbe_dcb_82598.o ixgbe_dcb_nl.o
diff --git a/drivers/net/ixgbe/ixgbe.h b/drivers/net/ixgbe/ixgbe.h
index 9509aee2d361..6cbf26e3db80 100644
--- a/drivers/net/ixgbe/ixgbe.h
+++ b/drivers/net/ixgbe/ixgbe.h
@@ -327,7 +327,7 @@ enum ixgbe_boards {
 };
 
 extern struct ixgbe_info ixgbe_82598_info;
-#ifdef CONFIG_IXGBE_DCBNL
+#ifdef CONFIG_IXGBE_DCB
 extern struct dcbnl_rtnl_ops dcbnl_ops;
 extern int ixgbe_copy_dcb_cfg(struct ixgbe_dcb_config *src_dcb_cfg,
                               struct ixgbe_dcb_config *dst_dcb_cfg,
diff --git a/drivers/net/ixgbe/ixgbe_main.c b/drivers/net/ixgbe/ixgbe_main.c
index 6620397a1fb4..667a6463193d 100644
--- a/drivers/net/ixgbe/ixgbe_main.c
+++ b/drivers/net/ixgbe/ixgbe_main.c
@@ -1914,7 +1914,7 @@ static void ixgbe_napi_disable_all(struct ixgbe_adapter *adapter)
 	}
 }
 
-#ifdef CONFIG_IXGBE_DCBNL
+#ifdef CONFIG_IXGBE_DCB
 /*
  * ixgbe_configure_dcb - Configure DCB hardware
  * @adapter: ixgbe adapter struct
@@ -1960,7 +1960,7 @@ static void ixgbe_configure(struct ixgbe_adapter *adapter)
 	ixgbe_set_rx_mode(netdev);
 
 	ixgbe_restore_vlan(adapter);
-#ifdef CONFIG_IXGBE_DCBNL
+#ifdef CONFIG_IXGBE_DCB
 	if (adapter->flags & IXGBE_FLAG_DCB_ENABLED) {
 		netif_set_gso_max_size(netdev, 32768);
 		ixgbe_configure_dcb(adapter);
@@ -2749,7 +2749,7 @@ static int __devinit ixgbe_sw_init(struct ixgbe_adapter *adapter)
 	struct ixgbe_hw *hw = &adapter->hw;
 	struct pci_dev *pdev = adapter->pdev;
 	unsigned int rss;
-#ifdef CONFIG_IXGBE_DCBNL
+#ifdef CONFIG_IXGBE_DCB
 	int j;
 	struct tc_configuration *tc;
 #endif
@@ -2768,7 +2768,7 @@ static int __devinit ixgbe_sw_init(struct ixgbe_adapter *adapter)
 	adapter->flags |= IXGBE_FLAG_RSS_ENABLED;
 	adapter->ring_feature[RING_F_DCB].indices = IXGBE_MAX_DCB_INDICES;
 
-#ifdef CONFIG_IXGBE_DCBNL
+#ifdef CONFIG_IXGBE_DCB
 	/* Configure DCB traffic classes */
 	for (j = 0; j < MAX_TRAFFIC_CLASS; j++) {
 		tc = &adapter->dcb_cfg.tc_config[j];
@@ -4120,7 +4120,7 @@ static int __devinit ixgbe_probe(struct pci_dev *pdev,
 	if (adapter->flags & IXGBE_FLAG_DCB_ENABLED)
 		adapter->flags &= ~IXGBE_FLAG_RSS_ENABLED;
 
-#ifdef CONFIG_IXGBE_DCBNL
+#ifdef CONFIG_IXGBE_DCB
 	netdev->dcbnl_ops = &dcbnl_ops;
 #endif
 
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 76a89f8e6a19..0df0db068ac3 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -43,7 +43,7 @@
 
 #include <net/net_namespace.h>
 #include <net/dsa.h>
-#ifdef CONFIG_DCBNL
+#ifdef CONFIG_DCB
 #include <net/dcbnl.h>
 #endif
 
@@ -847,7 +847,7 @@ struct net_device
 #define GSO_MAX_SIZE		65536
 	unsigned int		gso_max_size;
 
-#ifdef CONFIG_DCBNL
+#ifdef CONFIG_DCB
 	/* Data Center Bridging netlink ops */
 	struct dcbnl_rtnl_ops *dcbnl_ops;
 #endif
diff --git a/net/Makefile b/net/Makefile
index e5af3dc3a037..ba4460432b7c 100644
--- a/net/Makefile
+++ b/net/Makefile
@@ -56,8 +56,8 @@ obj-$(CONFIG_NETLABEL)		+= netlabel/
 obj-$(CONFIG_IUCV)		+= iucv/
 obj-$(CONFIG_RFKILL)		+= rfkill/
 obj-$(CONFIG_NET_9P)		+= 9p/
-ifeq ($(CONFIG_DCBNL),y)
-obj-$(CONFIG_DCB)		+= dcb/
+ifneq ($(CONFIG_DCB),)
+obj-y				+= dcb/
 endif
 
 ifeq ($(CONFIG_NET),y)
diff --git a/net/dcb/Kconfig b/net/dcb/Kconfig
index bdf38802d339..4066d59c8de5 100644
--- a/net/dcb/Kconfig
+++ b/net/dcb/Kconfig
@@ -1,12 +1,22 @@
 config DCB
-        tristate "Data Center Bridging support"
-
-config DCBNL
-	bool "Data Center Bridging netlink interface support"
-	depends on DCB
+	bool "Data Center Bridging support"
 	default n
 	---help---
-	  This option turns on the netlink interface
-	  (dcbnl) for Data Center Bridging capable devices.
+	  This enables support for configuring Data Center Bridging (DCB)
+	  features on DCB capable Ethernet adapters via rtnetlink.  Say 'Y'
+	  if you have a DCB capable Ethernet adapter which supports this
+	  interface and you are connected to a DCB capable switch.
+
+	  DCB is a collection of Ethernet enhancements which allow DCB capable
+	  NICs and switches to support network traffic with differing
+	  requirements (highly reliable, no drops vs. best effort vs. low
+	  latency) to co-exist on Ethernet.
+
+	  DCB features include:
+	    Enhanced Transmission Selection (aka Priority Grouping) - provides a
+	      framework for assigning bandwidth guarantees to traffic classes.
+	    Priority-based Flow Control (PFC) - a MAC control pause frame which
+	      works at the granularity of the 802.1p priority instead of the
+	      link (802.3x).
 
 	  If unsure, say N.
diff --git a/net/dcb/dcbnl.c b/net/dcb/dcbnl.c
index b2bda3f610df..79a351d323af 100644
--- a/net/dcb/dcbnl.c
+++ b/net/dcb/dcbnl.c
@@ -48,7 +48,7 @@
  */
 
 MODULE_AUTHOR("Lucy Liu, <lucy.liu@intel.com>");
-MODULE_DESCRIPTION("Data Center Bridging generic netlink interface");
+MODULE_DESCRIPTION("Data Center Bridging netlink interface");
 MODULE_LICENSE("GPL");
 
 /**************** DCB attribute policies *************************************/
-- 
cgit v1.2.3


From ca109491f612aab5c8152207631c0444f63da97f Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Tue, 25 Nov 2008 12:43:51 +0100
Subject: hrtimer: removing all ur callback modes

Impact: cleanup, move all hrtimer processing into hardirq context

This is an attempt at removing some of the hrtimer complexity by
reducing the number of callback modes to 1.

This means that all hrtimer callback functions will be ran from HARD-irq
context.

I went through all the 30 odd hrtimer callback functions in the kernel
and saw only one that I'm not quite sure of, which is the one in
net/can/bcm.c - hence I'm CC-ing the folks responsible for that code.

Furthermore, the hrtimer core now calls callbacks directly with IRQs
disabled in case you try to enqueue an expired timer. If this timer is a
periodic timer (which should use hrtimer_forward() to advance its time)
then it might be possible to end up in an inf. recursive loop due to the
fact that hrtimer_forward() doesn't round up to the next timer
granularity, and therefore keeps on calling the callback - obviously
this needs a fix.

Aside from that, this seems to compile and actually boot on my dual core
test box - although I'm sure there are some bugs in, me not hitting any
makes me certain :-)

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 drivers/input/touchscreen/ads7846.c |   4 +-
 include/linux/hrtimer.h             |  34 +----
 include/linux/interrupt.h           |   3 -
 kernel/hrtimer.c                    | 280 ++++--------------------------------
 kernel/sched.c                      |   2 -
 kernel/time/ntp.c                   |   4 +-
 kernel/time/tick-sched.c            |   1 -
 kernel/trace/trace_sysprof.c        |   1 -
 sound/drivers/pcsp/pcsp.c           |   1 -
 9 files changed, 37 insertions(+), 293 deletions(-)

(limited to 'include')

diff --git a/drivers/input/touchscreen/ads7846.c b/drivers/input/touchscreen/ads7846.c
index b9b7fc6ff1eb..e1ece89fe922 100644
--- a/drivers/input/touchscreen/ads7846.c
+++ b/drivers/input/touchscreen/ads7846.c
@@ -697,7 +697,7 @@ static enum hrtimer_restart ads7846_timer(struct hrtimer *handle)
 	struct ads7846	*ts = container_of(handle, struct ads7846, timer);
 	int		status = 0;
 
-	spin_lock_irq(&ts->lock);
+	spin_lock(&ts->lock);
 
 	if (unlikely(!get_pendown_state(ts) ||
 		     device_suspended(&ts->spi->dev))) {
@@ -728,7 +728,7 @@ static enum hrtimer_restart ads7846_timer(struct hrtimer *handle)
 			dev_err(&ts->spi->dev, "spi_async --> %d\n", status);
 	}
 
-	spin_unlock_irq(&ts->lock);
+	spin_unlock(&ts->lock);
 	return HRTIMER_NORESTART;
 }
 
diff --git a/include/linux/hrtimer.h b/include/linux/hrtimer.h
index 3eba43878dcb..bd37078c2d7d 100644
--- a/include/linux/hrtimer.h
+++ b/include/linux/hrtimer.h
@@ -42,26 +42,6 @@ enum hrtimer_restart {
 	HRTIMER_RESTART,	/* Timer must be restarted */
 };
 
-/*
- * hrtimer callback modes:
- *
- *	HRTIMER_CB_SOFTIRQ:		Callback must run in softirq context
- *	HRTIMER_CB_IRQSAFE_PERCPU:	Callback must run in hardirq context
- *					Special mode for tick emulation and
- *					scheduler timer. Such timers are per
- *					cpu and not allowed to be migrated on
- *					cpu unplug.
- *	HRTIMER_CB_IRQSAFE_UNLOCKED:	Callback should run in hardirq context
- *					with timer->base lock unlocked
- *					used for timers which call wakeup to
- *					avoid lock order problems with rq->lock
- */
-enum hrtimer_cb_mode {
-	HRTIMER_CB_SOFTIRQ,
-	HRTIMER_CB_IRQSAFE_PERCPU,
-	HRTIMER_CB_IRQSAFE_UNLOCKED,
-};
-
 /*
  * Values to track state of the timer
  *
@@ -70,7 +50,6 @@ enum hrtimer_cb_mode {
  * 0x00		inactive
  * 0x01		enqueued into rbtree
  * 0x02		callback function running
- * 0x04		callback pending (high resolution mode)
  *
  * Special cases:
  * 0x03		callback function running and enqueued
@@ -92,8 +71,7 @@ enum hrtimer_cb_mode {
 #define HRTIMER_STATE_INACTIVE	0x00
 #define HRTIMER_STATE_ENQUEUED	0x01
 #define HRTIMER_STATE_CALLBACK	0x02
-#define HRTIMER_STATE_PENDING	0x04
-#define HRTIMER_STATE_MIGRATE	0x08
+#define HRTIMER_STATE_MIGRATE	0x04
 
 /**
  * struct hrtimer - the basic hrtimer structure
@@ -109,8 +87,6 @@ enum hrtimer_cb_mode {
  * @function:	timer expiry callback function
  * @base:	pointer to the timer base (per cpu and per clock)
  * @state:	state information (See bit values above)
- * @cb_mode:	high resolution timer feature to select the callback execution
- *		 mode
  * @cb_entry:	list head to enqueue an expired timer into the callback list
  * @start_site:	timer statistics field to store the site where the timer
  *		was started
@@ -129,7 +105,6 @@ struct hrtimer {
 	struct hrtimer_clock_base	*base;
 	unsigned long			state;
 	struct list_head		cb_entry;
-	enum hrtimer_cb_mode		cb_mode;
 #ifdef CONFIG_TIMER_STATS
 	int				start_pid;
 	void				*start_site;
@@ -188,15 +163,11 @@ struct hrtimer_clock_base {
  * @check_clocks:	Indictator, when set evaluate time source and clock
  *			event devices whether high resolution mode can be
  *			activated.
- * @cb_pending:		Expired timers are moved from the rbtree to this
- *			list in the timer interrupt. The list is processed
- *			in the softirq.
  * @nr_events:		Total number of timer interrupt events
  */
 struct hrtimer_cpu_base {
 	spinlock_t			lock;
 	struct hrtimer_clock_base	clock_base[HRTIMER_MAX_CLOCK_BASES];
-	struct list_head		cb_pending;
 #ifdef CONFIG_HIGH_RES_TIMERS
 	ktime_t				expires_next;
 	int				hres_active;
@@ -404,8 +375,7 @@ static inline int hrtimer_active(const struct hrtimer *timer)
  */
 static inline int hrtimer_is_queued(struct hrtimer *timer)
 {
-	return timer->state &
-		(HRTIMER_STATE_ENQUEUED | HRTIMER_STATE_PENDING);
+	return timer->state & HRTIMER_STATE_ENQUEUED;
 }
 
 /*
diff --git a/include/linux/interrupt.h b/include/linux/interrupt.h
index f58a0cf8929a..d6210a97a8ca 100644
--- a/include/linux/interrupt.h
+++ b/include/linux/interrupt.h
@@ -251,9 +251,6 @@ enum
 	BLOCK_SOFTIRQ,
 	TASKLET_SOFTIRQ,
 	SCHED_SOFTIRQ,
-#ifdef CONFIG_HIGH_RES_TIMERS
-	HRTIMER_SOFTIRQ,
-#endif
 	RCU_SOFTIRQ, 	/* Preferable RCU should always be the last softirq */
 
 	NR_SOFTIRQS
diff --git a/kernel/hrtimer.c b/kernel/hrtimer.c
index 47e63349d1b2..efd6f41e1c16 100644
--- a/kernel/hrtimer.c
+++ b/kernel/hrtimer.c
@@ -442,22 +442,6 @@ static inline void debug_hrtimer_activate(struct hrtimer *timer) { }
 static inline void debug_hrtimer_deactivate(struct hrtimer *timer) { }
 #endif
 
-/*
- * Check, whether the timer is on the callback pending list
- */
-static inline int hrtimer_cb_pending(const struct hrtimer *timer)
-{
-	return timer->state & HRTIMER_STATE_PENDING;
-}
-
-/*
- * Remove a timer from the callback pending list
- */
-static inline void hrtimer_remove_cb_pending(struct hrtimer *timer)
-{
-	list_del_init(&timer->cb_entry);
-}
-
 /* High resolution timer related functions */
 #ifdef CONFIG_HIGH_RES_TIMERS
 
@@ -651,6 +635,8 @@ static inline void hrtimer_init_timer_hres(struct hrtimer *timer)
 {
 }
 
+static void __run_hrtimer(struct hrtimer *timer);
+
 /*
  * When High resolution timers are active, try to reprogram. Note, that in case
  * the state has HRTIMER_STATE_CALLBACK set, no reprogramming and no expiry
@@ -661,31 +647,14 @@ static inline int hrtimer_enqueue_reprogram(struct hrtimer *timer,
 					    struct hrtimer_clock_base *base)
 {
 	if (base->cpu_base->hres_active && hrtimer_reprogram(timer, base)) {
-
-		/* Timer is expired, act upon the callback mode */
-		switch(timer->cb_mode) {
-		case HRTIMER_CB_IRQSAFE_PERCPU:
-		case HRTIMER_CB_IRQSAFE_UNLOCKED:
-			/*
-			 * This is solely for the sched tick emulation with
-			 * dynamic tick support to ensure that we do not
-			 * restart the tick right on the edge and end up with
-			 * the tick timer in the softirq ! The calling site
-			 * takes care of this. Also used for hrtimer sleeper !
-			 */
-			debug_hrtimer_deactivate(timer);
-			return 1;
-		case HRTIMER_CB_SOFTIRQ:
-			/*
-			 * Move everything else into the softirq pending list !
-			 */
-			list_add_tail(&timer->cb_entry,
-				      &base->cpu_base->cb_pending);
-			timer->state = HRTIMER_STATE_PENDING;
-			return 1;
-		default:
-			BUG();
-		}
+		/*
+		 * XXX: recursion check?
+		 * hrtimer_forward() should round up with timer granularity
+		 * so that we never get into inf recursion here,
+		 * it doesn't do that though
+		 */
+		__run_hrtimer(timer);
+		return 1;
 	}
 	return 0;
 }
@@ -724,11 +693,6 @@ static int hrtimer_switch_to_hres(void)
 	return 1;
 }
 
-static inline void hrtimer_raise_softirq(void)
-{
-	raise_softirq(HRTIMER_SOFTIRQ);
-}
-
 #else
 
 static inline int hrtimer_hres_active(void) { return 0; }
@@ -747,7 +711,6 @@ static inline int hrtimer_reprogram(struct hrtimer *timer,
 {
 	return 0;
 }
-static inline void hrtimer_raise_softirq(void) { }
 
 #endif /* CONFIG_HIGH_RES_TIMERS */
 
@@ -890,10 +853,7 @@ static void __remove_hrtimer(struct hrtimer *timer,
 			     struct hrtimer_clock_base *base,
 			     unsigned long newstate, int reprogram)
 {
-	/* High res. callback list. NOP for !HIGHRES */
-	if (hrtimer_cb_pending(timer))
-		hrtimer_remove_cb_pending(timer);
-	else {
+	if (timer->state & HRTIMER_STATE_ENQUEUED) {
 		/*
 		 * Remove the timer from the rbtree and replace the
 		 * first entry pointer if necessary.
@@ -953,7 +913,7 @@ hrtimer_start_range_ns(struct hrtimer *timer, ktime_t tim, unsigned long delta_n
 {
 	struct hrtimer_clock_base *base, *new_base;
 	unsigned long flags;
-	int ret, raise;
+	int ret;
 
 	base = lock_hrtimer_base(timer, &flags);
 
@@ -988,26 +948,8 @@ hrtimer_start_range_ns(struct hrtimer *timer, ktime_t tim, unsigned long delta_n
 	enqueue_hrtimer(timer, new_base,
 			new_base->cpu_base == &__get_cpu_var(hrtimer_bases));
 
-	/*
-	 * The timer may be expired and moved to the cb_pending
-	 * list. We can not raise the softirq with base lock held due
-	 * to a possible deadlock with runqueue lock.
-	 */
-	raise = timer->state == HRTIMER_STATE_PENDING;
-
-	/*
-	 * We use preempt_disable to prevent this task from migrating after
-	 * setting up the softirq and raising it. Otherwise, if me migrate
-	 * we will raise the softirq on the wrong CPU.
-	 */
-	preempt_disable();
-
 	unlock_hrtimer_base(timer, &flags);
 
-	if (raise)
-		hrtimer_raise_softirq();
-	preempt_enable();
-
 	return ret;
 }
 EXPORT_SYMBOL_GPL(hrtimer_start_range_ns);
@@ -1192,75 +1134,6 @@ int hrtimer_get_res(const clockid_t which_clock, struct timespec *tp)
 }
 EXPORT_SYMBOL_GPL(hrtimer_get_res);
 
-static void run_hrtimer_pending(struct hrtimer_cpu_base *cpu_base)
-{
-	spin_lock_irq(&cpu_base->lock);
-
-	while (!list_empty(&cpu_base->cb_pending)) {
-		enum hrtimer_restart (*fn)(struct hrtimer *);
-		struct hrtimer *timer;
-		int restart;
-		int emulate_hardirq_ctx = 0;
-
-		timer = list_entry(cpu_base->cb_pending.next,
-				   struct hrtimer, cb_entry);
-
-		debug_hrtimer_deactivate(timer);
-		timer_stats_account_hrtimer(timer);
-
-		fn = timer->function;
-		/*
-		 * A timer might have been added to the cb_pending list
-		 * when it was migrated during a cpu-offline operation.
-		 * Emulate hardirq context for such timers.
-		 */
-		if (timer->cb_mode == HRTIMER_CB_IRQSAFE_PERCPU ||
-		    timer->cb_mode == HRTIMER_CB_IRQSAFE_UNLOCKED)
-			emulate_hardirq_ctx = 1;
-
-		__remove_hrtimer(timer, timer->base, HRTIMER_STATE_CALLBACK, 0);
-		spin_unlock_irq(&cpu_base->lock);
-
-		if (unlikely(emulate_hardirq_ctx)) {
-			local_irq_disable();
-			restart = fn(timer);
-			local_irq_enable();
-		} else
-			restart = fn(timer);
-
-		spin_lock_irq(&cpu_base->lock);
-
-		timer->state &= ~HRTIMER_STATE_CALLBACK;
-		if (restart == HRTIMER_RESTART) {
-			BUG_ON(hrtimer_active(timer));
-			/*
-			 * Enqueue the timer, allow reprogramming of the event
-			 * device
-			 */
-			enqueue_hrtimer(timer, timer->base, 1);
-		} else if (hrtimer_active(timer)) {
-			/*
-			 * If the timer was rearmed on another CPU, reprogram
-			 * the event device.
-			 */
-			struct hrtimer_clock_base *base = timer->base;
-
-			if (base->first == &timer->node &&
-			    hrtimer_reprogram(timer, base)) {
-				/*
-				 * Timer is expired. Thus move it from tree to
-				 * pending list again.
-				 */
-				__remove_hrtimer(timer, base,
-						 HRTIMER_STATE_PENDING, 0);
-				list_add_tail(&timer->cb_entry,
-					      &base->cpu_base->cb_pending);
-			}
-		}
-	}
-	spin_unlock_irq(&cpu_base->lock);
-}
-
 static void __run_hrtimer(struct hrtimer *timer)
 {
 	struct hrtimer_clock_base *base = timer->base;
@@ -1268,25 +1141,21 @@ static void __run_hrtimer(struct hrtimer *timer)
 	enum hrtimer_restart (*fn)(struct hrtimer *);
 	int restart;
 
+	WARN_ON(!irqs_disabled());
+
 	debug_hrtimer_deactivate(timer);
 	__remove_hrtimer(timer, base, HRTIMER_STATE_CALLBACK, 0);
 	timer_stats_account_hrtimer(timer);
-
 	fn = timer->function;
-	if (timer->cb_mode == HRTIMER_CB_IRQSAFE_PERCPU ||
-	    timer->cb_mode == HRTIMER_CB_IRQSAFE_UNLOCKED) {
-		/*
-		 * Used for scheduler timers, avoid lock inversion with
-		 * rq->lock and tasklist_lock.
-		 *
-		 * These timers are required to deal with enqueue expiry
-		 * themselves and are not allowed to migrate.
-		 */
-		spin_unlock(&cpu_base->lock);
-		restart = fn(timer);
-		spin_lock(&cpu_base->lock);
-	} else
-		restart = fn(timer);
+
+	/*
+	 * Because we run timers from hardirq context, there is no chance
+	 * they get migrated to another cpu, therefore its safe to unlock
+	 * the timer base.
+	 */
+	spin_unlock(&cpu_base->lock);
+	restart = fn(timer);
+	spin_lock(&cpu_base->lock);
 
 	/*
 	 * Note: We clear the CALLBACK bit after enqueue_hrtimer to avoid
@@ -1311,7 +1180,7 @@ void hrtimer_interrupt(struct clock_event_device *dev)
 	struct hrtimer_cpu_base *cpu_base = &__get_cpu_var(hrtimer_bases);
 	struct hrtimer_clock_base *base;
 	ktime_t expires_next, now;
-	int i, raise = 0;
+	int i;
 
 	BUG_ON(!cpu_base->hres_active);
 	cpu_base->nr_events++;
@@ -1360,16 +1229,6 @@ void hrtimer_interrupt(struct clock_event_device *dev)
 				break;
 			}
 
-			/* Move softirq callbacks to the pending list */
-			if (timer->cb_mode == HRTIMER_CB_SOFTIRQ) {
-				__remove_hrtimer(timer, base,
-						 HRTIMER_STATE_PENDING, 0);
-				list_add_tail(&timer->cb_entry,
-					      &base->cpu_base->cb_pending);
-				raise = 1;
-				continue;
-			}
-
 			__run_hrtimer(timer);
 		}
 		spin_unlock(&cpu_base->lock);
@@ -1383,10 +1242,6 @@ void hrtimer_interrupt(struct clock_event_device *dev)
 		if (tick_program_event(expires_next, 0))
 			goto retry;
 	}
-
-	/* Raise softirq ? */
-	if (raise)
-		raise_softirq(HRTIMER_SOFTIRQ);
 }
 
 /**
@@ -1413,11 +1268,6 @@ void hrtimer_peek_ahead_timers(void)
 	local_irq_restore(flags);
 }
 
-static void run_hrtimer_softirq(struct softirq_action *h)
-{
-	run_hrtimer_pending(&__get_cpu_var(hrtimer_bases));
-}
-
 #endif	/* CONFIG_HIGH_RES_TIMERS */
 
 /*
@@ -1429,8 +1279,6 @@ static void run_hrtimer_softirq(struct softirq_action *h)
  */
 void hrtimer_run_pending(void)
 {
-	struct hrtimer_cpu_base *cpu_base = &__get_cpu_var(hrtimer_bases);
-
 	if (hrtimer_hres_active())
 		return;
 
@@ -1444,8 +1292,6 @@ void hrtimer_run_pending(void)
 	 */
 	if (tick_check_oneshot_change(!hrtimer_is_hres_enabled()))
 		hrtimer_switch_to_hres();
-
-	run_hrtimer_pending(cpu_base);
 }
 
 /*
@@ -1482,14 +1328,6 @@ void hrtimer_run_queues(void)
 					hrtimer_get_expires_tv64(timer))
 				break;
 
-			if (timer->cb_mode == HRTIMER_CB_SOFTIRQ) {
-				__remove_hrtimer(timer, base,
-					HRTIMER_STATE_PENDING, 0);
-				list_add_tail(&timer->cb_entry,
-					&base->cpu_base->cb_pending);
-				continue;
-			}
-
 			__run_hrtimer(timer);
 		}
 		spin_unlock(&cpu_base->lock);
@@ -1516,9 +1354,6 @@ void hrtimer_init_sleeper(struct hrtimer_sleeper *sl, struct task_struct *task)
 {
 	sl->timer.function = hrtimer_wakeup;
 	sl->task = task;
-#ifdef CONFIG_HIGH_RES_TIMERS
-	sl->timer.cb_mode = HRTIMER_CB_IRQSAFE_UNLOCKED;
-#endif
 }
 
 static int __sched do_nanosleep(struct hrtimer_sleeper *t, enum hrtimer_mode mode)
@@ -1655,36 +1490,22 @@ static void __cpuinit init_hrtimers_cpu(int cpu)
 	for (i = 0; i < HRTIMER_MAX_CLOCK_BASES; i++)
 		cpu_base->clock_base[i].cpu_base = cpu_base;
 
-	INIT_LIST_HEAD(&cpu_base->cb_pending);
 	hrtimer_init_hres(cpu_base);
 }
 
 #ifdef CONFIG_HOTPLUG_CPU
 
-static int migrate_hrtimer_list(struct hrtimer_clock_base *old_base,
-				struct hrtimer_clock_base *new_base, int dcpu)
+static void migrate_hrtimer_list(struct hrtimer_clock_base *old_base,
+				 struct hrtimer_clock_base *new_base, int dcpu)
 {
 	struct hrtimer *timer;
 	struct rb_node *node;
-	int raise = 0;
 
 	while ((node = rb_first(&old_base->active))) {
 		timer = rb_entry(node, struct hrtimer, node);
 		BUG_ON(hrtimer_callback_running(timer));
 		debug_hrtimer_deactivate(timer);
 
-		/*
-		 * Should not happen. Per CPU timers should be
-		 * canceled _before_ the migration code is called
-		 */
-		if (timer->cb_mode == HRTIMER_CB_IRQSAFE_PERCPU) {
-			__remove_hrtimer(timer, old_base,
-					 HRTIMER_STATE_INACTIVE, 0);
-			WARN(1, "hrtimer (%p %p)active but cpu %d dead\n",
-			     timer, timer->function, dcpu);
-			continue;
-		}
-
 		/*
 		 * Mark it as STATE_MIGRATE not INACTIVE otherwise the
 		 * timer could be seen as !active and just vanish away
@@ -1708,48 +1529,19 @@ static int migrate_hrtimer_list(struct hrtimer_clock_base *old_base,
 		 * otherwise we end up with a stale timer.
 		 */
 		if (timer->state == HRTIMER_STATE_MIGRATE) {
-			timer->state = HRTIMER_STATE_PENDING;
-			list_add_tail(&timer->cb_entry,
-				      &new_base->cpu_base->cb_pending);
-			raise = 1;
+			/* XXX: running on offline cpu */
+			__run_hrtimer(timer);
 		}
 #endif
 		/* Clear the migration state bit */
 		timer->state &= ~HRTIMER_STATE_MIGRATE;
 	}
-	return raise;
 }
 
-#ifdef CONFIG_HIGH_RES_TIMERS
-static int migrate_hrtimer_pending(struct hrtimer_cpu_base *old_base,
-				   struct hrtimer_cpu_base *new_base)
-{
-	struct hrtimer *timer;
-	int raise = 0;
-
-	while (!list_empty(&old_base->cb_pending)) {
-		timer = list_entry(old_base->cb_pending.next,
-				   struct hrtimer, cb_entry);
-
-		__remove_hrtimer(timer, timer->base, HRTIMER_STATE_PENDING, 0);
-		timer->base = &new_base->clock_base[timer->base->index];
-		list_add_tail(&timer->cb_entry, &new_base->cb_pending);
-		raise = 1;
-	}
-	return raise;
-}
-#else
-static int migrate_hrtimer_pending(struct hrtimer_cpu_base *old_base,
-				   struct hrtimer_cpu_base *new_base)
-{
-	return 0;
-}
-#endif
-
 static void migrate_hrtimers(int cpu)
 {
 	struct hrtimer_cpu_base *old_base, *new_base;
-	int i, raise = 0;
+	int i;
 
 	BUG_ON(cpu_online(cpu));
 	old_base = &per_cpu(hrtimer_bases, cpu);
@@ -1764,20 +1556,13 @@ static void migrate_hrtimers(int cpu)
 	spin_lock_nested(&old_base->lock, SINGLE_DEPTH_NESTING);
 
 	for (i = 0; i < HRTIMER_MAX_CLOCK_BASES; i++) {
-		if (migrate_hrtimer_list(&old_base->clock_base[i],
-					 &new_base->clock_base[i], cpu))
-			raise = 1;
+		migrate_hrtimer_list(&old_base->clock_base[i],
+				     &new_base->clock_base[i], cpu);
 	}
 
-	if (migrate_hrtimer_pending(old_base, new_base))
-		raise = 1;
-
 	spin_unlock(&old_base->lock);
 	spin_unlock_irq(&new_base->lock);
 	put_cpu_var(hrtimer_bases);
-
-	if (raise)
-		hrtimer_raise_softirq();
 }
 #endif /* CONFIG_HOTPLUG_CPU */
 
@@ -1817,9 +1602,6 @@ void __init hrtimers_init(void)
 	hrtimer_cpu_notify(&hrtimers_nb, (unsigned long)CPU_UP_PREPARE,
 			  (void *)(long)smp_processor_id());
 	register_cpu_notifier(&hrtimers_nb);
-#ifdef CONFIG_HIGH_RES_TIMERS
-	open_softirq(HRTIMER_SOFTIRQ, run_hrtimer_softirq);
-#endif
 }
 
 /**
diff --git a/kernel/sched.c b/kernel/sched.c
index 9b1e79371c20..5ac5e9536168 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -203,7 +203,6 @@ void init_rt_bandwidth(struct rt_bandwidth *rt_b, u64 period, u64 runtime)
 	hrtimer_init(&rt_b->rt_period_timer,
 			CLOCK_MONOTONIC, HRTIMER_MODE_REL);
 	rt_b->rt_period_timer.function = sched_rt_period_timer;
-	rt_b->rt_period_timer.cb_mode = HRTIMER_CB_IRQSAFE_UNLOCKED;
 }
 
 static inline int rt_bandwidth_enabled(void)
@@ -1139,7 +1138,6 @@ static void init_rq_hrtick(struct rq *rq)
 
 	hrtimer_init(&rq->hrtick_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
 	rq->hrtick_timer.function = hrtick;
-	rq->hrtick_timer.cb_mode = HRTIMER_CB_IRQSAFE_PERCPU;
 }
 #else	/* CONFIG_SCHED_HRTICK */
 static inline void hrtick_clear(struct rq *rq)
diff --git a/kernel/time/ntp.c b/kernel/time/ntp.c
index 8ff15e5d486b..f5f793d92415 100644
--- a/kernel/time/ntp.c
+++ b/kernel/time/ntp.c
@@ -131,7 +131,7 @@ static enum hrtimer_restart ntp_leap_second(struct hrtimer *timer)
 {
 	enum hrtimer_restart res = HRTIMER_NORESTART;
 
-	write_seqlock_irq(&xtime_lock);
+	write_seqlock(&xtime_lock);
 
 	switch (time_state) {
 	case TIME_OK:
@@ -164,7 +164,7 @@ static enum hrtimer_restart ntp_leap_second(struct hrtimer *timer)
 	}
 	update_vsyscall(&xtime, clock);
 
-	write_sequnlock_irq(&xtime_lock);
+	write_sequnlock(&xtime_lock);
 
 	return res;
 }
diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c
index 342fc9ccab46..502a81e2639b 100644
--- a/kernel/time/tick-sched.c
+++ b/kernel/time/tick-sched.c
@@ -681,7 +681,6 @@ void tick_setup_sched_timer(void)
 	 */
 	hrtimer_init(&ts->sched_timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS);
 	ts->sched_timer.function = tick_sched_timer;
-	ts->sched_timer.cb_mode = HRTIMER_CB_IRQSAFE_PERCPU;
 
 	/* Get the next period (per cpu) */
 	hrtimer_set_expires(&ts->sched_timer, tick_init_jiffy_update());
diff --git a/kernel/trace/trace_sysprof.c b/kernel/trace/trace_sysprof.c
index 9587d3bcba55..ae542e2e38d5 100644
--- a/kernel/trace/trace_sysprof.c
+++ b/kernel/trace/trace_sysprof.c
@@ -202,7 +202,6 @@ static void start_stack_timer(int cpu)
 
 	hrtimer_init(hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
 	hrtimer->function = stack_trace_timer_fn;
-	hrtimer->cb_mode = HRTIMER_CB_IRQSAFE_PERCPU;
 
 	hrtimer_start(hrtimer, ns_to_ktime(sample_period), HRTIMER_MODE_REL);
 }
diff --git a/sound/drivers/pcsp/pcsp.c b/sound/drivers/pcsp/pcsp.c
index 1899cf0685bc..8e52b2a8a13a 100644
--- a/sound/drivers/pcsp/pcsp.c
+++ b/sound/drivers/pcsp/pcsp.c
@@ -96,7 +96,6 @@ static int __devinit snd_card_pcsp_probe(int devnum, struct device *dev)
 		return -EINVAL;
 
 	hrtimer_init(&pcsp_chip.timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
-	pcsp_chip.timer.cb_mode = HRTIMER_CB_SOFTIRQ;
 	pcsp_chip.timer.function = pcsp_do_timer;
 
 	card = snd_card_new(index, id, THIS_MODULE, 0);
-- 
cgit v1.2.3


From ca0002a179bfa532d009a9272d619732872c49bd Mon Sep 17 00:00:00 2001
From: Markus Metzger <markus.t.metzger@intel.com>
Date: Tue, 25 Nov 2008 09:01:25 +0100
Subject: x86, bts: base in-kernel ds interface on handles

Impact: generalize the DS code to shared buffers

Change the in-kernel ds.h interface to identify the tracer via a
handle returned on ds_request_~().

Tracers used to be identified via their task_struct.

The changes are required to allow DS to be shared between different
tasks, which is needed for perfmon2 and for ftrace.

For ptrace, the handle is stored in the traced task's task_struct.
This should probably go into a (arch-specific) ptrace context some
time.

Signed-off-by: Markus Metzger <markus.t.metzger@intel.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/include/asm/ds.h | 124 ++++-----
 arch/x86/kernel/ds.c      | 679 +++++++++++++++++++++++-----------------------
 arch/x86/kernel/ptrace.c  |  73 ++---
 include/linux/sched.h     |   9 +
 4 files changed, 446 insertions(+), 439 deletions(-)

(limited to 'include')

diff --git a/arch/x86/include/asm/ds.h b/arch/x86/include/asm/ds.h
index a95008457ea4..0af997de5f01 100644
--- a/arch/x86/include/asm/ds.h
+++ b/arch/x86/include/asm/ds.h
@@ -26,11 +26,18 @@
 
 #include <linux/types.h>
 #include <linux/init.h>
+#include <linux/err.h>
 
 
 #ifdef CONFIG_X86_DS
 
 struct task_struct;
+struct ds_tracer;
+struct bts_tracer;
+struct pebs_tracer;
+
+typedef void (*bts_ovfl_callback_t)(struct bts_tracer *);
+typedef void (*pebs_ovfl_callback_t)(struct pebs_tracer *);
 
 /*
  * Request BTS or PEBS
@@ -38,21 +45,29 @@ struct task_struct;
  * Due to alignement constraints, the actual buffer may be slightly
  * smaller than the requested or provided buffer.
  *
- * Returns 0 on success; -Eerrno otherwise
+ * Returns a pointer to a tracer structure on success, or
+ * ERR_PTR(errcode) on failure.
+ *
+ * The interrupt threshold is independent from the overflow callback
+ * to allow users to use their own overflow interrupt handling mechanism.
  *
  * task: the task to request recording for;
  *       NULL for per-cpu recording on the current cpu
  * base: the base pointer for the (non-pageable) buffer;
  *       NULL if buffer allocation requested
- * size: the size of the requested or provided buffer
+ * size: the size of the requested or provided buffer in bytes
  * ovfl: pointer to a function to be called on buffer overflow;
  *       NULL if cyclic buffer requested
+ * th: the interrupt threshold in records from the end of the buffer;
+ *     -1 if no interrupt threshold is requested.
  */
-typedef void (*ds_ovfl_callback_t)(struct task_struct *);
-extern int ds_request_bts(struct task_struct *task, void *base, size_t size,
-			  ds_ovfl_callback_t ovfl);
-extern int ds_request_pebs(struct task_struct *task, void *base, size_t size,
-			   ds_ovfl_callback_t ovfl);
+extern struct bts_tracer *ds_request_bts(struct task_struct *task,
+					 void *base, size_t size,
+					 bts_ovfl_callback_t ovfl, size_t th);
+extern struct pebs_tracer *ds_request_pebs(struct task_struct *task,
+					   void *base, size_t size,
+					   pebs_ovfl_callback_t ovfl,
+					   size_t th);
 
 /*
  * Release BTS or PEBS resources
@@ -61,37 +76,34 @@ extern int ds_request_pebs(struct task_struct *task, void *base, size_t size,
  *
  * Returns 0 on success; -Eerrno otherwise
  *
- * task: the task to release resources for;
- *       NULL to release resources for the current cpu
+ * tracer: the tracer handle returned from ds_request_~()
  */
-extern int ds_release_bts(struct task_struct *task);
-extern int ds_release_pebs(struct task_struct *task);
+extern int ds_release_bts(struct bts_tracer *tracer);
+extern int ds_release_pebs(struct pebs_tracer *tracer);
 
 /*
- * Return the (array) index of the write pointer.
+ * Get the (array) index of the write pointer.
  * (assuming an array of BTS/PEBS records)
  *
- * Returns -Eerrno on error
+ * Returns 0 on success; -Eerrno on error
  *
- * task: the task to access;
- *       NULL to access the current cpu
- * pos (out): if not NULL, will hold the result
+ * tracer: the tracer handle returned from ds_request_~()
+ * pos (out): will hold the result
  */
-extern int ds_get_bts_index(struct task_struct *task, size_t *pos);
-extern int ds_get_pebs_index(struct task_struct *task, size_t *pos);
+extern int ds_get_bts_index(struct bts_tracer *tracer, size_t *pos);
+extern int ds_get_pebs_index(struct pebs_tracer *tracer, size_t *pos);
 
 /*
- * Return the (array) index one record beyond the end of the array.
+ * Get the (array) index one record beyond the end of the array.
  * (assuming an array of BTS/PEBS records)
  *
- * Returns -Eerrno on error
+ * Returns 0 on success; -Eerrno on error
  *
- * task: the task to access;
- *       NULL to access the current cpu
- * pos (out): if not NULL, will hold the result
+ * tracer: the tracer handle returned from ds_request_~()
+ * pos (out): will hold the result
  */
-extern int ds_get_bts_end(struct task_struct *task, size_t *pos);
-extern int ds_get_pebs_end(struct task_struct *task, size_t *pos);
+extern int ds_get_bts_end(struct bts_tracer *tracer, size_t *pos);
+extern int ds_get_pebs_end(struct pebs_tracer *tracer, size_t *pos);
 
 /*
  * Provide a pointer to the BTS/PEBS record at parameter index.
@@ -102,14 +114,13 @@ extern int ds_get_pebs_end(struct task_struct *task, size_t *pos);
  *
  * Returns the size of a single record on success; -Eerrno on error
  *
- * task: the task to access;
- *       NULL to access the current cpu
+ * tracer: the tracer handle returned from ds_request_~()
  * index: the index of the requested record
  * record (out): pointer to the requested record
  */
-extern int ds_access_bts(struct task_struct *task,
+extern int ds_access_bts(struct bts_tracer *tracer,
 			 size_t index, const void **record);
-extern int ds_access_pebs(struct task_struct *task,
+extern int ds_access_pebs(struct pebs_tracer *tracer,
 			  size_t index, const void **record);
 
 /*
@@ -129,38 +140,24 @@ extern int ds_access_pebs(struct task_struct *task,
  *
  * Returns the number of bytes written or -Eerrno.
  *
- * task: the task to access;
- *       NULL to access the current cpu
+ * tracer: the tracer handle returned from ds_request_~()
  * buffer: the buffer to write
  * size: the size of the buffer
  */
-extern int ds_write_bts(struct task_struct *task,
+extern int ds_write_bts(struct bts_tracer *tracer,
 			const void *buffer, size_t size);
-extern int ds_write_pebs(struct task_struct *task,
+extern int ds_write_pebs(struct pebs_tracer *tracer,
 			 const void *buffer, size_t size);
 
-/*
- * Same as ds_write_bts/pebs, but omit ownership checks.
- *
- * This is needed to have some other task than the owner of the
- * BTS/PEBS buffer or the parameter task itself write into the
- * respective buffer.
- */
-extern int ds_unchecked_write_bts(struct task_struct *task,
-				  const void *buffer, size_t size);
-extern int ds_unchecked_write_pebs(struct task_struct *task,
-				   const void *buffer, size_t size);
-
 /*
  * Reset the write pointer of the BTS/PEBS buffer.
  *
  * Returns 0 on success; -Eerrno on error
  *
- * task: the task to access;
- *       NULL to access the current cpu
+ * tracer: the tracer handle returned from ds_request_~()
  */
-extern int ds_reset_bts(struct task_struct *task);
-extern int ds_reset_pebs(struct task_struct *task);
+extern int ds_reset_bts(struct bts_tracer *tracer);
+extern int ds_reset_pebs(struct pebs_tracer *tracer);
 
 /*
  * Clear the BTS/PEBS buffer and reset the write pointer.
@@ -168,33 +165,30 @@ extern int ds_reset_pebs(struct task_struct *task);
  *
  * Returns 0 on success; -Eerrno on error
  *
- * task: the task to access;
- *       NULL to access the current cpu
+ * tracer: the tracer handle returned from ds_request_~()
  */
-extern int ds_clear_bts(struct task_struct *task);
-extern int ds_clear_pebs(struct task_struct *task);
+extern int ds_clear_bts(struct bts_tracer *tracer);
+extern int ds_clear_pebs(struct pebs_tracer *tracer);
 
 /*
  * Provide the PEBS counter reset value.
  *
  * Returns 0 on success; -Eerrno on error
  *
- * task: the task to access;
- *       NULL to access the current cpu
+ * tracer: the tracer handle returned from ds_request_pebs()
  * value (out): the counter reset value
  */
-extern int ds_get_pebs_reset(struct task_struct *task, u64 *value);
+extern int ds_get_pebs_reset(struct pebs_tracer *tracer, u64 *value);
 
 /*
  * Set the PEBS counter reset value.
  *
  * Returns 0 on success; -Eerrno on error
  *
- * task: the task to access;
- *       NULL to access the current cpu
+ * tracer: the tracer handle returned from ds_request_pebs()
  * value: the new counter reset value
  */
-extern int ds_set_pebs_reset(struct task_struct *task, u64 value);
+extern int ds_set_pebs_reset(struct pebs_tracer *tracer, u64 value);
 
 /*
  * Initialization
@@ -207,17 +201,13 @@ extern void __cpuinit ds_init_intel(struct cpuinfo_x86 *);
 /*
  * The DS context - part of struct thread_struct.
  */
+#define MAX_SIZEOF_DS (12 * 8)
+
 struct ds_context {
 	/* pointer to the DS configuration; goes into MSR_IA32_DS_AREA */
-	unsigned char *ds;
+	unsigned char ds[MAX_SIZEOF_DS];
 	/* the owner of the BTS and PEBS configuration, respectively */
-	struct task_struct *owner[2];
-	/* buffer overflow notification function for BTS and PEBS */
-	ds_ovfl_callback_t callback[2];
-	/* the original buffer address */
-	void *buffer[2];
-	/* the number of allocated pages for on-request allocated buffers */
-	unsigned int pages[2];
+	struct ds_tracer  *owner[2];
 	/* use count */
 	unsigned long count;
 	/* a pointer to the context location inside the thread_struct
diff --git a/arch/x86/kernel/ds.c b/arch/x86/kernel/ds.c
index d6938d9351cf..96768e9cce99 100644
--- a/arch/x86/kernel/ds.c
+++ b/arch/x86/kernel/ds.c
@@ -28,6 +28,7 @@
 #include <linux/slab.h>
 #include <linux/sched.h>
 #include <linux/mm.h>
+#include <linux/kernel.h>
 
 
 /*
@@ -44,6 +45,35 @@ struct ds_configuration {
 };
 static struct ds_configuration ds_cfg;
 
+/*
+ * A BTS or PEBS tracer.
+ *
+ * This holds the configuration of the tracer and serves as a handle
+ * to identify tracers.
+ */
+struct ds_tracer {
+	/* the DS context (partially) owned by this tracer */
+	struct ds_context *context;
+	/* the buffer provided on ds_request() and its size in bytes */
+	void *buffer;
+	size_t size;
+	/* the number of allocated pages for on-request allocated buffers */
+	unsigned int pages;
+};
+
+struct bts_tracer {
+	/* the common DS part */
+	struct ds_tracer ds;
+	/* buffer overflow notification function */
+	bts_ovfl_callback_t ovfl;
+};
+
+struct pebs_tracer {
+	/* the common DS part */
+	struct ds_tracer ds;
+	/* buffer overflow notification function */
+	pebs_ovfl_callback_t ovfl;
+};
 
 /*
  * Debug Store (DS) save area configuration (see Intel64 and IA32
@@ -107,35 +137,15 @@ static inline void ds_set(unsigned char *base, enum ds_qualifier qual,
 	(*(unsigned long *)base) = value;
 }
 
+#define DS_ALIGNMENT (1 << 3)	/* BTS and PEBS buffer alignment */
+
 
 /*
  * Locking is done only for allocating BTS or PEBS resources and for
  * guarding context and buffer memory allocation.
- *
- * Most functions require the current task to own the ds context part
- * they are going to access. All the locking is done when validating
- * access to the context.
  */
 static spinlock_t ds_lock = __SPIN_LOCK_UNLOCKED(ds_lock);
 
-/*
- * Validate that the current task is allowed to access the BTS/PEBS
- * buffer of the parameter task.
- *
- * Returns 0, if access is granted; -Eerrno, otherwise.
- */
-static inline int ds_validate_access(struct ds_context *context,
-				     enum ds_qualifier qual)
-{
-	if (!context)
-		return -EPERM;
-
-	if (context->owner[qual] == current)
-		return 0;
-
-	return -EPERM;
-}
-
 
 /*
  * We either support (system-wide) per-cpu or per-thread allocation.
@@ -183,50 +193,12 @@ static inline int check_tracer(struct task_struct *task)
  *
  * Contexts are use-counted. They are allocated on first access and
  * deallocated when the last user puts the context.
- *
- * We distinguish between an allocating and a non-allocating get of a
- * context:
- * - the allocating get is used for requesting BTS/PEBS resources. It
- *   requires the caller to hold the global ds_lock.
- * - the non-allocating get is used for all other cases. A
- *   non-existing context indicates an error. It acquires and releases
- *   the ds_lock itself for obtaining the context.
- *
- * A context and its DS configuration are allocated and deallocated
- * together. A context always has a DS configuration of the
- * appropriate size.
  */
 static DEFINE_PER_CPU(struct ds_context *, system_context);
 
 #define this_system_context per_cpu(system_context, smp_processor_id())
 
-/*
- * Returns the pointer to the parameter task's context or to the
- * system-wide context, if task is NULL.
- *
- * Increases the use count of the returned context, if not NULL.
- */
 static inline struct ds_context *ds_get_context(struct task_struct *task)
-{
-	struct ds_context *context;
-	unsigned long irq;
-
-	spin_lock_irqsave(&ds_lock, irq);
-
-	context = (task ? task->thread.ds_ctx : this_system_context);
-	if (context)
-		context->count++;
-
-	spin_unlock_irqrestore(&ds_lock, irq);
-
-	return context;
-}
-
-/*
- * Same as ds_get_context, but allocates the context and it's DS
- * structure, if necessary; returns NULL; if out of memory.
- */
-static inline struct ds_context *ds_alloc_context(struct task_struct *task)
 {
 	struct ds_context **p_context =
 		(task ? &task->thread.ds_ctx : &this_system_context);
@@ -238,16 +210,9 @@ static inline struct ds_context *ds_alloc_context(struct task_struct *task)
 		if (!context)
 			return NULL;
 
-		context->ds = kzalloc(ds_cfg.sizeof_ds, GFP_KERNEL);
-		if (!context->ds) {
-			kfree(context);
-			return NULL;
-		}
-
 		spin_lock_irqsave(&ds_lock, irq);
 
 		if (*p_context) {
-			kfree(context->ds);
 			kfree(context);
 
 			context = *p_context;
@@ -272,10 +237,6 @@ static inline struct ds_context *ds_alloc_context(struct task_struct *task)
 	return context;
 }
 
-/*
- * Decreases the use count of the parameter context, if not NULL.
- * Deallocates the context, if the use count reaches zero.
- */
 static inline void ds_put_context(struct ds_context *context)
 {
 	unsigned long irq;
@@ -296,13 +257,6 @@ static inline void ds_put_context(struct ds_context *context)
 	if (!context->task || (context->task == current))
 		wrmsrl(MSR_IA32_DS_AREA, 0);
 
-	put_tracer(context->task);
-
-	/* free any leftover buffers from tracers that did not
-	 * deallocate them properly. */
-	kfree(context->buffer[ds_bts]);
-	kfree(context->buffer[ds_pebs]);
-	kfree(context->ds);
 	kfree(context);
  out:
 	spin_unlock_irqrestore(&ds_lock, irq);
@@ -312,21 +266,29 @@ static inline void ds_put_context(struct ds_context *context)
 /*
  * Handle a buffer overflow
  *
- * task: the task whose buffers are overflowing;
- *       NULL for a buffer overflow on the current cpu
  * context: the ds context
  * qual: the buffer type
  */
-static void ds_overflow(struct task_struct *task, struct ds_context *context,
-			enum ds_qualifier qual)
-{
-	if (!context)
-		return;
-
-	if (context->callback[qual])
-		(*context->callback[qual])(task);
-
-	/* todo: do some more overflow handling */
+static void ds_overflow(struct ds_context *context, enum ds_qualifier qual)
+{
+	switch (qual) {
+	case ds_bts: {
+		struct bts_tracer *tracer =
+			container_of(context->owner[qual],
+				     struct bts_tracer, ds);
+		if (tracer->ovfl)
+			tracer->ovfl(tracer);
+	}
+		break;
+	case ds_pebs: {
+		struct pebs_tracer *tracer =
+			container_of(context->owner[qual],
+				     struct pebs_tracer, ds);
+		if (tracer->ovfl)
+			tracer->ovfl(tracer);
+	}
+		break;
+	}
 }
 
 
@@ -343,23 +305,25 @@ static void ds_overflow(struct task_struct *task, struct ds_context *context,
 static inline void *ds_allocate_buffer(size_t size, unsigned int *pages)
 {
 	unsigned long rlim, vm, pgsz;
-	void *buffer;
+	void *buffer = NULL;
 
 	pgsz = PAGE_ALIGN(size) >> PAGE_SHIFT;
 
+	down_write(&current->mm->mmap_sem);
+
 	rlim = current->signal->rlim[RLIMIT_AS].rlim_cur >> PAGE_SHIFT;
 	vm   = current->mm->total_vm  + pgsz;
 	if (rlim < vm)
-		return NULL;
+		goto out;
 
 	rlim = current->signal->rlim[RLIMIT_MEMLOCK].rlim_cur >> PAGE_SHIFT;
 	vm   = current->mm->locked_vm  + pgsz;
 	if (rlim < vm)
-		return NULL;
+		goto out;
 
 	buffer = kzalloc(size, GFP_KERNEL);
 	if (!buffer)
-		return NULL;
+		goto out;
 
 	current->mm->total_vm  += pgsz;
 	current->mm->locked_vm += pgsz;
@@ -367,290 +331,337 @@ static inline void *ds_allocate_buffer(size_t size, unsigned int *pages)
 	if (pages)
 		*pages = pgsz;
 
+ out:
+	up_write(&current->mm->mmap_sem);
 	return buffer;
 }
 
-static int ds_request(struct task_struct *task, void *base, size_t size,
-		      ds_ovfl_callback_t ovfl, enum ds_qualifier qual)
+static void ds_install_ds_config(struct ds_context *context,
+				 enum ds_qualifier qual,
+				 void *base, size_t size, size_t ith)
 {
-	struct ds_context *context;
 	unsigned long buffer, adj;
-	const unsigned long alignment = (1 << 3);
+
+	/* adjust the buffer address and size to meet alignment
+	 * constraints:
+	 * - buffer is double-word aligned
+	 * - size is multiple of record size
+	 *
+	 * We checked the size at the very beginning; we have enough
+	 * space to do the adjustment.
+	 */
+	buffer = (unsigned long)base;
+
+	adj = ALIGN(buffer, DS_ALIGNMENT) - buffer;
+	buffer += adj;
+	size   -= adj;
+
+	size /= ds_cfg.sizeof_rec[qual];
+	size *= ds_cfg.sizeof_rec[qual];
+
+	ds_set(context->ds, qual, ds_buffer_base, buffer);
+	ds_set(context->ds, qual, ds_index, buffer);
+	ds_set(context->ds, qual, ds_absolute_maximum, buffer + size);
+
+	/* The value for 'no threshold' is -1, which will set the
+	 * threshold outside of the buffer, just like we want it.
+	 */
+	ds_set(context->ds, qual,
+	       ds_interrupt_threshold, buffer + size - ith);
+}
+
+static int ds_request(struct ds_tracer *tracer, enum ds_qualifier qual,
+		      struct task_struct *task,
+		      void *base, size_t size, size_t th)
+{
+	struct ds_context *context;
 	unsigned long irq;
-	int error = 0;
+	int error;
 
+	error = -EOPNOTSUPP;
 	if (!ds_cfg.sizeof_ds)
-		return -EOPNOTSUPP;
+		goto out;
 
 	/* we require some space to do alignment adjustments below */
-	if (size < (alignment + ds_cfg.sizeof_rec[qual]))
-		return -EINVAL;
+	error = -EINVAL;
+	if (size < (DS_ALIGNMENT + ds_cfg.sizeof_rec[qual]))
+		goto out;
 
-	/* buffer overflow notification is not yet implemented */
-	if (ovfl)
-		return -EOPNOTSUPP;
+	if (th != (size_t)-1) {
+		th *= ds_cfg.sizeof_rec[qual];
+
+		error = -EINVAL;
+		if (size <= th)
+			goto out;
+	}
+
+	error = -ENOMEM;
+	if (!base) {
+		base = ds_allocate_buffer(size, &tracer->pages);
+		if (!base)
+			goto out;
+	}
 
+	tracer->buffer = base;
+	tracer->size = size;
 
-	context = ds_alloc_context(task);
+	error = -ENOMEM;
+	context = ds_get_context(task);
 	if (!context)
-		return -ENOMEM;
+		goto out;
+	tracer->context = context;
+
 
 	spin_lock_irqsave(&ds_lock, irq);
 
 	error = -EPERM;
 	if (!check_tracer(task))
 		goto out_unlock;
-
 	get_tracer(task);
 
-	error = -EALREADY;
-	if (context->owner[qual] == current)
-		goto out_put_tracer;
 	error = -EPERM;
-	if (context->owner[qual] != NULL)
+	if (context->owner[qual])
 		goto out_put_tracer;
-	context->owner[qual] = current;
+	context->owner[qual] = tracer;
 
 	spin_unlock_irqrestore(&ds_lock, irq);
 
 
-	error = -ENOMEM;
-	if (!base) {
-		base = ds_allocate_buffer(size, &context->pages[qual]);
-		if (!base)
-			goto out_release;
-
-		context->buffer[qual]   = base;
-	}
-	error = 0;
+	ds_install_ds_config(context, qual, base, size, th);
 
-	context->callback[qual] = ovfl;
-
-	/* adjust the buffer address and size to meet alignment
-	 * constraints:
-	 * - buffer is double-word aligned
-	 * - size is multiple of record size
-	 *
-	 * We checked the size at the very beginning; we have enough
-	 * space to do the adjustment.
-	 */
-	buffer = (unsigned long)base;
-
-	adj = ALIGN(buffer, alignment) - buffer;
-	buffer += adj;
-	size   -= adj;
-
-	size /= ds_cfg.sizeof_rec[qual];
-	size *= ds_cfg.sizeof_rec[qual];
-
-	ds_set(context->ds, qual, ds_buffer_base, buffer);
-	ds_set(context->ds, qual, ds_index, buffer);
-	ds_set(context->ds, qual, ds_absolute_maximum, buffer + size);
-
-	if (ovfl) {
-		/* todo: select a suitable interrupt threshold */
-	} else
-		ds_set(context->ds, qual,
-		       ds_interrupt_threshold, buffer + size + 1);
-
-	/* we keep the context until ds_release */
-	return error;
-
- out_release:
-	context->owner[qual] = NULL;
-	ds_put_context(context);
-	put_tracer(task);
-	return error;
+	return 0;
 
  out_put_tracer:
-	spin_unlock_irqrestore(&ds_lock, irq);
-	ds_put_context(context);
 	put_tracer(task);
-	return error;
-
  out_unlock:
 	spin_unlock_irqrestore(&ds_lock, irq);
 	ds_put_context(context);
+	tracer->context = NULL;
+ out:
 	return error;
 }
 
-int ds_request_bts(struct task_struct *task, void *base, size_t size,
-		   ds_ovfl_callback_t ovfl)
+struct bts_tracer *ds_request_bts(struct task_struct *task,
+				  void *base, size_t size,
+				  bts_ovfl_callback_t ovfl, size_t th)
 {
-	return ds_request(task, base, size, ovfl, ds_bts);
-}
+	struct bts_tracer *tracer;
+	int error;
 
-int ds_request_pebs(struct task_struct *task, void *base, size_t size,
-		    ds_ovfl_callback_t ovfl)
-{
-	return ds_request(task, base, size, ovfl, ds_pebs);
+	/* buffer overflow notification is not yet implemented */
+	error = -EOPNOTSUPP;
+	if (ovfl)
+		goto out;
+
+	error = -ENOMEM;
+	tracer = kzalloc(sizeof(*tracer), GFP_KERNEL);
+	if (!tracer)
+		goto out;
+	tracer->ovfl = ovfl;
+
+	error = ds_request(&tracer->ds, ds_bts, task, base, size, th);
+	if (error < 0)
+		goto out_tracer;
+
+	return tracer;
+
+ out_tracer:
+	(void)ds_release_bts(tracer);
+ out:
+	return ERR_PTR(error);
 }
 
-static int ds_release(struct task_struct *task, enum ds_qualifier qual)
+struct pebs_tracer *ds_request_pebs(struct task_struct *task,
+				    void *base, size_t size,
+				    pebs_ovfl_callback_t ovfl, size_t th)
 {
-	struct ds_context *context;
+	struct pebs_tracer *tracer;
 	int error;
 
-	context = ds_get_context(task);
-	error = ds_validate_access(context, qual);
-	if (error < 0)
+	/* buffer overflow notification is not yet implemented */
+	error = -EOPNOTSUPP;
+	if (ovfl)
 		goto out;
 
-	kfree(context->buffer[qual]);
-	context->buffer[qual] = NULL;
+	error = -ENOMEM;
+	tracer = kzalloc(sizeof(*tracer), GFP_KERNEL);
+	if (!tracer)
+		goto out;
+	tracer->ovfl = ovfl;
 
-	current->mm->total_vm  -= context->pages[qual];
-	current->mm->locked_vm -= context->pages[qual];
-	context->pages[qual] = 0;
-	context->owner[qual] = NULL;
+	error = ds_request(&tracer->ds, ds_pebs, task, base, size, th);
+	if (error < 0)
+		goto out_tracer;
 
-	/*
-	 * we put the context twice:
-	 *   once for the ds_get_context
-	 *   once for the corresponding ds_request
-	 */
-	ds_put_context(context);
+	return tracer;
+
+ out_tracer:
+	(void)ds_release_pebs(tracer);
  out:
-	ds_put_context(context);
-	return error;
+	return ERR_PTR(error);
+}
+
+static void ds_release(struct ds_tracer *tracer, enum ds_qualifier qual)
+{
+	if (tracer->context) {
+		BUG_ON(tracer->context->owner[qual] != tracer);
+		tracer->context->owner[qual] = NULL;
+
+		put_tracer(tracer->context->task);
+		ds_put_context(tracer->context);
+	}
+
+	if (tracer->pages) {
+		kfree(tracer->buffer);
+
+		down_write(&current->mm->mmap_sem);
+
+		current->mm->total_vm  -= tracer->pages;
+		current->mm->locked_vm -= tracer->pages;
+
+		up_write(&current->mm->mmap_sem);
+	}
 }
 
-int ds_release_bts(struct task_struct *task)
+int ds_release_bts(struct bts_tracer *tracer)
 {
-	return ds_release(task, ds_bts);
+	if (!tracer)
+		return -EINVAL;
+
+	ds_release(&tracer->ds, ds_bts);
+	kfree(tracer);
+
+	return 0;
 }
 
-int ds_release_pebs(struct task_struct *task)
+int ds_release_pebs(struct pebs_tracer *tracer)
 {
-	return ds_release(task, ds_pebs);
+	if (!tracer)
+		return -EINVAL;
+
+	ds_release(&tracer->ds, ds_pebs);
+	kfree(tracer);
+
+	return 0;
 }
 
-static int ds_get_index(struct task_struct *task, size_t *pos,
-			enum ds_qualifier qual)
+static size_t ds_get_index(struct ds_context *context, enum ds_qualifier qual)
 {
-	struct ds_context *context;
 	unsigned long base, index;
-	int error;
-
-	context = ds_get_context(task);
-	error = ds_validate_access(context, qual);
-	if (error < 0)
-		goto out;
 
 	base  = ds_get(context->ds, qual, ds_buffer_base);
 	index = ds_get(context->ds, qual, ds_index);
 
-	error = ((index - base) / ds_cfg.sizeof_rec[qual]);
-	if (pos)
-		*pos = error;
- out:
-	ds_put_context(context);
-	return error;
+	return (index - base) / ds_cfg.sizeof_rec[qual];
 }
 
-int ds_get_bts_index(struct task_struct *task, size_t *pos)
+int ds_get_bts_index(struct bts_tracer *tracer, size_t *pos)
 {
-	return ds_get_index(task, pos, ds_bts);
+	if (!tracer)
+		return -EINVAL;
+
+	if (!pos)
+		return -EINVAL;
+
+	*pos = ds_get_index(tracer->ds.context, ds_bts);
+
+	return 0;
 }
 
-int ds_get_pebs_index(struct task_struct *task, size_t *pos)
+int ds_get_pebs_index(struct pebs_tracer *tracer, size_t *pos)
 {
-	return ds_get_index(task, pos, ds_pebs);
+	if (!tracer)
+		return -EINVAL;
+
+	if (!pos)
+		return -EINVAL;
+
+	*pos = ds_get_index(tracer->ds.context, ds_pebs);
+
+	return 0;
 }
 
-static int ds_get_end(struct task_struct *task, size_t *pos,
-		      enum ds_qualifier qual)
+static size_t ds_get_end(struct ds_context *context, enum ds_qualifier qual)
 {
-	struct ds_context *context;
-	unsigned long base, end;
-	int error;
-
-	context = ds_get_context(task);
-	error = ds_validate_access(context, qual);
-	if (error < 0)
-		goto out;
+	unsigned long base, max;
 
 	base = ds_get(context->ds, qual, ds_buffer_base);
-	end  = ds_get(context->ds, qual, ds_absolute_maximum);
+	max  = ds_get(context->ds, qual, ds_absolute_maximum);
 
-	error = ((end - base) / ds_cfg.sizeof_rec[qual]);
-	if (pos)
-		*pos = error;
- out:
-	ds_put_context(context);
-	return error;
+	return (max - base) / ds_cfg.sizeof_rec[qual];
 }
 
-int ds_get_bts_end(struct task_struct *task, size_t *pos)
+int ds_get_bts_end(struct bts_tracer *tracer, size_t *pos)
 {
-	return ds_get_end(task, pos, ds_bts);
+	if (!tracer)
+		return -EINVAL;
+
+	if (!pos)
+		return -EINVAL;
+
+	*pos = ds_get_end(tracer->ds.context, ds_bts);
+
+	return 0;
 }
 
-int ds_get_pebs_end(struct task_struct *task, size_t *pos)
+int ds_get_pebs_end(struct pebs_tracer *tracer, size_t *pos)
 {
-	return ds_get_end(task, pos, ds_pebs);
+	if (!tracer)
+		return -EINVAL;
+
+	if (!pos)
+		return -EINVAL;
+
+	*pos = ds_get_end(tracer->ds.context, ds_pebs);
+
+	return 0;
 }
 
-static int ds_access(struct task_struct *task, size_t index,
-		     const void **record, enum ds_qualifier qual)
+static int ds_access(struct ds_context *context, enum ds_qualifier qual,
+		     size_t index, const void **record)
 {
-	struct ds_context *context;
 	unsigned long base, idx;
-	int error;
 
 	if (!record)
 		return -EINVAL;
 
-	context = ds_get_context(task);
-	error = ds_validate_access(context, qual);
-	if (error < 0)
-		goto out;
-
 	base = ds_get(context->ds, qual, ds_buffer_base);
 	idx = base + (index * ds_cfg.sizeof_rec[qual]);
 
-	error = -EINVAL;
 	if (idx > ds_get(context->ds, qual, ds_absolute_maximum))
-		goto out;
+		return -EINVAL;
 
 	*record = (const void *)idx;
-	error = ds_cfg.sizeof_rec[qual];
- out:
-	ds_put_context(context);
-	return error;
+
+	return ds_cfg.sizeof_rec[qual];
 }
 
-int ds_access_bts(struct task_struct *task, size_t index, const void **record)
+int ds_access_bts(struct bts_tracer *tracer, size_t index,
+		  const void **record)
 {
-	return ds_access(task, index, record, ds_bts);
+	if (!tracer)
+		return -EINVAL;
+
+	return ds_access(tracer->ds.context, ds_bts, index, record);
 }
 
-int ds_access_pebs(struct task_struct *task, size_t index, const void **record)
+int ds_access_pebs(struct pebs_tracer *tracer, size_t index,
+		   const void **record)
 {
-	return ds_access(task, index, record, ds_pebs);
+	if (!tracer)
+		return -EINVAL;
+
+	return ds_access(tracer->ds.context, ds_pebs, index, record);
 }
 
-static int ds_write(struct task_struct *task, const void *record, size_t size,
-		    enum ds_qualifier qual, int force)
+static int ds_write(struct ds_context *context, enum ds_qualifier qual,
+		    const void *record, size_t size)
 {
-	struct ds_context *context;
-	int error;
+	int bytes_written = 0;
 
 	if (!record)
 		return -EINVAL;
 
-	error = -EPERM;
-	context = ds_get_context(task);
-	if (!context)
-		goto out;
-
-	if (!force) {
-		error = ds_validate_access(context, qual);
-		if (error < 0)
-			goto out;
-	}
-
-	error = 0;
 	while (size) {
 		unsigned long base, index, end, write_end, int_th;
 		unsigned long write_size, adj_write_size;
@@ -678,14 +689,14 @@ static int ds_write(struct task_struct *task, const void *record, size_t size,
 			write_end = end;
 
 		if (write_end <= index)
-			goto out;
+			break;
 
 		write_size = min((unsigned long) size, write_end - index);
 		memcpy((void *)index, record, write_size);
 
 		record = (const char *)record + write_size;
-		size  -= write_size;
-		error += write_size;
+		size -= write_size;
+		bytes_written += write_size;
 
 		adj_write_size = write_size / ds_cfg.sizeof_rec[qual];
 		adj_write_size *= ds_cfg.sizeof_rec[qual];
@@ -700,47 +711,32 @@ static int ds_write(struct task_struct *task, const void *record, size_t size,
 		ds_set(context->ds, qual, ds_index, index);
 
 		if (index >= int_th)
-			ds_overflow(task, context, qual);
+			ds_overflow(context, qual);
 	}
 
- out:
-	ds_put_context(context);
-	return error;
+	return bytes_written;
 }
 
-int ds_write_bts(struct task_struct *task, const void *record, size_t size)
+int ds_write_bts(struct bts_tracer *tracer, const void *record, size_t size)
 {
-	return ds_write(task, record, size, ds_bts, /* force = */ 0);
-}
+	if (!tracer)
+		return -EINVAL;
 
-int ds_write_pebs(struct task_struct *task, const void *record, size_t size)
-{
-	return ds_write(task, record, size, ds_pebs, /* force = */ 0);
+	return ds_write(tracer->ds.context, ds_bts, record, size);
 }
 
-int ds_unchecked_write_bts(struct task_struct *task,
-			   const void *record, size_t size)
+int ds_write_pebs(struct pebs_tracer *tracer, const void *record, size_t size)
 {
-	return ds_write(task, record, size, ds_bts, /* force = */ 1);
-}
+	if (!tracer)
+		return -EINVAL;
 
-int ds_unchecked_write_pebs(struct task_struct *task,
-			    const void *record, size_t size)
-{
-	return ds_write(task, record, size, ds_pebs, /* force = */ 1);
+	return ds_write(tracer->ds.context, ds_pebs, record, size);
 }
 
-static int ds_reset_or_clear(struct task_struct *task,
-			     enum ds_qualifier qual, int clear)
+static void ds_reset_or_clear(struct ds_context *context,
+			      enum ds_qualifier qual, int clear)
 {
-	struct ds_context *context;
 	unsigned long base, end;
-	int error;
-
-	context = ds_get_context(task);
-	error = ds_validate_access(context, qual);
-	if (error < 0)
-		goto out;
 
 	base = ds_get(context->ds, qual, ds_buffer_base);
 	end  = ds_get(context->ds, qual, ds_absolute_maximum);
@@ -749,70 +745,69 @@ static int ds_reset_or_clear(struct task_struct *task,
 		memset((void *)base, 0, end - base);
 
 	ds_set(context->ds, qual, ds_index, base);
-
-	error = 0;
- out:
-	ds_put_context(context);
-	return error;
 }
 
-int ds_reset_bts(struct task_struct *task)
+int ds_reset_bts(struct bts_tracer *tracer)
 {
-	return ds_reset_or_clear(task, ds_bts, /* clear = */ 0);
+	if (!tracer)
+		return -EINVAL;
+
+	ds_reset_or_clear(tracer->ds.context, ds_bts, /* clear = */ 0);
+
+	return 0;
 }
 
-int ds_reset_pebs(struct task_struct *task)
+int ds_reset_pebs(struct pebs_tracer *tracer)
 {
-	return ds_reset_or_clear(task, ds_pebs, /* clear = */ 0);
+	if (!tracer)
+		return -EINVAL;
+
+	ds_reset_or_clear(tracer->ds.context, ds_pebs, /* clear = */ 0);
+
+	return 0;
 }
 
-int ds_clear_bts(struct task_struct *task)
+int ds_clear_bts(struct bts_tracer *tracer)
 {
-	return ds_reset_or_clear(task, ds_bts, /* clear = */ 1);
+	if (!tracer)
+		return -EINVAL;
+
+	ds_reset_or_clear(tracer->ds.context, ds_bts, /* clear = */ 1);
+
+	return 0;
 }
 
-int ds_clear_pebs(struct task_struct *task)
+int ds_clear_pebs(struct pebs_tracer *tracer)
 {
-	return ds_reset_or_clear(task, ds_pebs, /* clear = */ 1);
+	if (!tracer)
+		return -EINVAL;
+
+	ds_reset_or_clear(tracer->ds.context, ds_pebs, /* clear = */ 1);
+
+	return 0;
 }
 
-int ds_get_pebs_reset(struct task_struct *task, u64 *value)
+int ds_get_pebs_reset(struct pebs_tracer *tracer, u64 *value)
 {
-	struct ds_context *context;
-	int error;
+	if (!tracer)
+		return -EINVAL;
 
 	if (!value)
 		return -EINVAL;
 
-	context = ds_get_context(task);
-	error = ds_validate_access(context, ds_pebs);
-	if (error < 0)
-		goto out;
-
-	*value = *(u64 *)(context->ds + (ds_cfg.sizeof_field * 8));
+	*value = *(u64 *)(tracer->ds.context->ds + (ds_cfg.sizeof_field * 8));
 
-	error = 0;
- out:
-	ds_put_context(context);
-	return error;
+	return 0;
 }
 
-int ds_set_pebs_reset(struct task_struct *task, u64 value)
+int ds_set_pebs_reset(struct pebs_tracer *tracer, u64 value)
 {
-	struct ds_context *context;
-	int error;
-
-	context = ds_get_context(task);
-	error = ds_validate_access(context, ds_pebs);
-	if (error < 0)
-		goto out;
+	if (!tracer)
+		return -EINVAL;
 
-	*(u64 *)(context->ds + (ds_cfg.sizeof_field * 8)) = value;
+	*(u64 *)(tracer->ds.context->ds + (ds_cfg.sizeof_field * 8)) = value;
 
-	error = 0;
- out:
-	ds_put_context(context);
-	return error;
+	return 0;
 }
 
 static const struct ds_configuration ds_cfg_var = {
@@ -840,6 +835,10 @@ static inline void
 ds_configure(const struct ds_configuration *cfg)
 {
 	ds_cfg = *cfg;
+
+	printk(KERN_INFO "DS available\n");
+
+	BUG_ON(MAX_SIZEOF_DS < ds_cfg.sizeof_ds);
 }
 
 void __cpuinit ds_init_intel(struct cpuinfo_x86 *c)
@@ -883,6 +882,8 @@ void ds_free(struct ds_context *context)
 	 * is dying. There should not be any user of that context left
 	 * to disturb us, anymore. */
 	unsigned long leftovers = context->count;
-	while (leftovers--)
+	while (leftovers--) {
+		put_tracer(context->task);
 		ds_put_context(context);
+	}
 }
diff --git a/arch/x86/kernel/ptrace.c b/arch/x86/kernel/ptrace.c
index 06180dff5b2e..76adf5b640ff 100644
--- a/arch/x86/kernel/ptrace.c
+++ b/arch/x86/kernel/ptrace.c
@@ -668,14 +668,14 @@ static int ptrace_bts_read_record(struct task_struct *child, size_t index,
 	size_t bts_index, bts_end;
 	int error;
 
-	error = ds_get_bts_end(child, &bts_end);
+	error = ds_get_bts_end(child->bts, &bts_end);
 	if (error < 0)
 		return error;
 
 	if (bts_end <= index)
 		return -EINVAL;
 
-	error = ds_get_bts_index(child, &bts_index);
+	error = ds_get_bts_index(child->bts, &bts_index);
 	if (error < 0)
 		return error;
 
@@ -684,7 +684,7 @@ static int ptrace_bts_read_record(struct task_struct *child, size_t index,
 	if (bts_end <= bts_index)
 		bts_index -= bts_end;
 
-	error = ds_access_bts(child, bts_index, &bts_record);
+	error = ds_access_bts(child->bts, bts_index, &bts_record);
 	if (error < 0)
 		return error;
 
@@ -705,14 +705,14 @@ static int ptrace_bts_drain(struct task_struct *child,
 	size_t end, i;
 	int error;
 
-	error = ds_get_bts_index(child, &end);
+	error = ds_get_bts_index(child->bts, &end);
 	if (error < 0)
 		return error;
 
 	if (size < (end * sizeof(struct bts_struct)))
 		return -EIO;
 
-	error = ds_access_bts(child, 0, (const void **)&raw);
+	error = ds_access_bts(child->bts, 0, (const void **)&raw);
 	if (error < 0)
 		return error;
 
@@ -723,18 +723,13 @@ static int ptrace_bts_drain(struct task_struct *child,
 			return -EFAULT;
 	}
 
-	error = ds_clear_bts(child);
+	error = ds_clear_bts(child->bts);
 	if (error < 0)
 		return error;
 
 	return end;
 }
 
-static void ptrace_bts_ovfl(struct task_struct *child)
-{
-	send_sig(child->thread.bts_ovfl_signal, child, 0);
-}
-
 static int ptrace_bts_config(struct task_struct *child,
 			     long cfg_size,
 			     const struct ptrace_bts_config __user *ucfg)
@@ -760,23 +755,29 @@ static int ptrace_bts_config(struct task_struct *child,
 		goto errout;
 
 	if (cfg.flags & PTRACE_BTS_O_ALLOC) {
-		ds_ovfl_callback_t ovfl = NULL;
+		bts_ovfl_callback_t ovfl = NULL;
 		unsigned int sig = 0;
 
-		/* we ignore the error in case we were not tracing child */
-		(void)ds_release_bts(child);
-
 		if (cfg.flags & PTRACE_BTS_O_SIGNAL) {
 			if (!cfg.signal)
 				goto errout;
 
+			error = -EOPNOTSUPP;
+			goto errout;
+
 			sig  = cfg.signal;
-			ovfl = ptrace_bts_ovfl;
 		}
 
-		error = ds_request_bts(child, /* base = */ NULL, cfg.size, ovfl);
-		if (error < 0)
+		if (child->bts)
+			(void)ds_release_bts(child->bts);
+
+		child->bts = ds_request_bts(child, /* base = */ NULL, cfg.size,
+					    ovfl, /* th = */ (size_t)-1);
+		if (IS_ERR(child->bts)) {
+			error = PTR_ERR(child->bts);
+			child->bts = NULL;
 			goto errout;
+		}
 
 		child->thread.bts_ovfl_signal = sig;
 	}
@@ -823,15 +824,15 @@ static int ptrace_bts_status(struct task_struct *child,
 	if (cfg_size < sizeof(cfg))
 		return -EIO;
 
-	error = ds_get_bts_end(child, &end);
+	error = ds_get_bts_end(child->bts, &end);
 	if (error < 0)
 		return error;
 
-	error = ds_access_bts(child, /* index = */ 0, &base);
+	error = ds_access_bts(child->bts, /* index = */ 0, &base);
 	if (error < 0)
 		return error;
 
-	error = ds_access_bts(child, /* index = */ end, &max);
+	error = ds_access_bts(child->bts, /* index = */ end, &max);
 	if (error < 0)
 		return error;
 
@@ -884,10 +885,7 @@ static int ptrace_bts_write_record(struct task_struct *child,
 		return -EINVAL;
 	}
 
-	/* The writing task will be the switched-to task on a context
-	 * switch. It needs to write into the switched-from task's BTS
-	 * buffer. */
-	return ds_unchecked_write_bts(child, bts_record, bts_cfg.sizeof_bts);
+	return ds_write_bts(child->bts, bts_record, bts_cfg.sizeof_bts);
 }
 
 void ptrace_bts_take_timestamp(struct task_struct *tsk,
@@ -972,13 +970,15 @@ void ptrace_disable(struct task_struct *child)
 	clear_tsk_thread_flag(child, TIF_SYSCALL_EMU);
 #endif
 #ifdef CONFIG_X86_PTRACE_BTS
-	(void)ds_release_bts(child);
+	if (child->bts) {
+		(void)ds_release_bts(child->bts);
 
-	child->thread.debugctlmsr &= ~bts_cfg.debugctl_mask;
-	if (!child->thread.debugctlmsr)
-		clear_tsk_thread_flag(child, TIF_DEBUGCTLMSR);
+		child->thread.debugctlmsr &= ~bts_cfg.debugctl_mask;
+		if (!child->thread.debugctlmsr)
+			clear_tsk_thread_flag(child, TIF_DEBUGCTLMSR);
 
-	clear_tsk_thread_flag(child, TIF_BTS_TRACE_TS);
+		clear_tsk_thread_flag(child, TIF_BTS_TRACE_TS);
+	}
 #endif /* CONFIG_X86_PTRACE_BTS */
 }
 
@@ -1110,9 +1110,16 @@ long arch_ptrace(struct task_struct *child, long request, long addr, long data)
 			(child, data, (struct ptrace_bts_config __user *)addr);
 		break;
 
-	case PTRACE_BTS_SIZE:
-		ret = ds_get_bts_index(child, /* pos = */ NULL);
+	case PTRACE_BTS_SIZE: {
+		size_t size;
+
+		ret = ds_get_bts_index(child->bts, &size);
+		if (ret == 0) {
+			BUG_ON(size != (int) size);
+			ret = (int) size;
+		}
 		break;
+	}
 
 	case PTRACE_BTS_GET:
 		ret = ptrace_bts_read_record
@@ -1120,7 +1127,7 @@ long arch_ptrace(struct task_struct *child, long request, long addr, long data)
 		break;
 
 	case PTRACE_BTS_CLEAR:
-		ret = ds_clear_bts(child);
+		ret = ds_clear_bts(child->bts);
 		break;
 
 	case PTRACE_BTS_DRAIN:
diff --git a/include/linux/sched.h b/include/linux/sched.h
index bee1e93c95ad..a9780eaa6737 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -96,6 +96,7 @@ struct exec_domain;
 struct futex_pi_state;
 struct robust_list_head;
 struct bio;
+struct bts_tracer;
 
 /*
  * List of flags we want to share for kernel threads,
@@ -1161,6 +1162,14 @@ struct task_struct {
 	struct list_head ptraced;
 	struct list_head ptrace_entry;
 
+#ifdef CONFIG_X86_PTRACE_BTS
+	/*
+	 * This is the tracer handle for the ptrace BTS extension.
+	 * This field actually belongs to the ptracer task.
+	 */
+	struct bts_tracer *bts;
+#endif /* CONFIG_X86_PTRACE_BTS */
+
 	/* PID/PID hash table linkage. */
 	struct pid_link pids[PIDTYPE_MAX];
 	struct list_head thread_group;
-- 
cgit v1.2.3


From 6abb11aecd888d1da6276399380b7355f127c006 Mon Sep 17 00:00:00 2001
From: Markus Metzger <markus.t.metzger@intel.com>
Date: Tue, 25 Nov 2008 09:05:27 +0100
Subject: x86, bts, ptrace: move BTS buffer allocation from ds.c into ptrace.c

Impact: restructure DS memory allocation to be done by the usage site of DS

Require pre-allocated buffers in ds.h.

Move the BTS buffer allocation for ptrace into ptrace.c.
The pointer to the allocated buffer is stored in the traced task's
task_struct together with the handle returned by ds_request_bts().

Removes memory accounting code.

Signed-off-by: Markus Metzger <markus.t.metzger@intel.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/include/asm/ds.h | 12 +++----
 arch/x86/kernel/ds.c      | 92 ++++++++---------------------------------------
 arch/x86/kernel/ptrace.c  | 22 ++++++++++--
 include/linux/sched.h     |  4 +++
 4 files changed, 42 insertions(+), 88 deletions(-)

(limited to 'include')

diff --git a/arch/x86/include/asm/ds.h b/arch/x86/include/asm/ds.h
index 0af997de5f01..99b6c39774a4 100644
--- a/arch/x86/include/asm/ds.h
+++ b/arch/x86/include/asm/ds.h
@@ -7,13 +7,12 @@
  *
  * It manages:
  * - per-thread and per-cpu allocation of BTS and PEBS
- * - buffer memory allocation (optional)
- * - buffer overflow handling
+ * - buffer overflow handling (to be done)
  * - buffer access
  *
  * It assumes:
- * - get_task_struct on all parameter tasks
- * - current is allowed to trace parameter tasks
+ * - get_task_struct on all traced tasks
+ * - current is allowed to trace tasks
  *
  *
  * Copyright (C) 2007-2008 Intel Corporation.
@@ -54,8 +53,7 @@ typedef void (*pebs_ovfl_callback_t)(struct pebs_tracer *);
  * task: the task to request recording for;
  *       NULL for per-cpu recording on the current cpu
  * base: the base pointer for the (non-pageable) buffer;
- *       NULL if buffer allocation requested
- * size: the size of the requested or provided buffer in bytes
+ * size: the size of the provided buffer in bytes
  * ovfl: pointer to a function to be called on buffer overflow;
  *       NULL if cyclic buffer requested
  * th: the interrupt threshold in records from the end of the buffer;
@@ -72,8 +70,6 @@ extern struct pebs_tracer *ds_request_pebs(struct task_struct *task,
 /*
  * Release BTS or PEBS resources
  *
- * Frees buffers allocated on ds_request.
- *
  * Returns 0 on success; -Eerrno otherwise
  *
  * tracer: the tracer handle returned from ds_request_~()
diff --git a/arch/x86/kernel/ds.c b/arch/x86/kernel/ds.c
index 96768e9cce99..19a8c2c0389f 100644
--- a/arch/x86/kernel/ds.c
+++ b/arch/x86/kernel/ds.c
@@ -7,13 +7,12 @@
  *
  * It manages:
  * - per-thread and per-cpu allocation of BTS and PEBS
- * - buffer memory allocation (optional)
- * - buffer overflow handling
+ * - buffer overflow handling (to be done)
  * - buffer access
  *
  * It assumes:
- * - get_task_struct on all parameter tasks
- * - current is allowed to trace parameter tasks
+ * - get_task_struct on all traced tasks
+ * - current is allowed to trace tasks
  *
  *
  * Copyright (C) 2007-2008 Intel Corporation.
@@ -57,8 +56,6 @@ struct ds_tracer {
 	/* the buffer provided on ds_request() and its size in bytes */
 	void *buffer;
 	size_t size;
-	/* the number of allocated pages for on-request allocated buffers */
-	unsigned int pages;
 };
 
 struct bts_tracer {
@@ -141,8 +138,7 @@ static inline void ds_set(unsigned char *base, enum ds_qualifier qual,
 
 
 /*
- * Locking is done only for allocating BTS or PEBS resources and for
- * guarding context and buffer memory allocation.
+ * Locking is done only for allocating BTS or PEBS resources.
  */
 static spinlock_t ds_lock = __SPIN_LOCK_UNLOCKED(ds_lock);
 
@@ -292,50 +288,6 @@ static void ds_overflow(struct ds_context *context, enum ds_qualifier qual)
 }
 
 
-/*
- * Allocate a non-pageable buffer of the parameter size.
- * Checks the memory and the locked memory rlimit.
- *
- * Returns the buffer, if successful;
- *         NULL, if out of memory or rlimit exceeded.
- *
- * size: the requested buffer size in bytes
- * pages (out): if not NULL, contains the number of pages reserved
- */
-static inline void *ds_allocate_buffer(size_t size, unsigned int *pages)
-{
-	unsigned long rlim, vm, pgsz;
-	void *buffer = NULL;
-
-	pgsz = PAGE_ALIGN(size) >> PAGE_SHIFT;
-
-	down_write(&current->mm->mmap_sem);
-
-	rlim = current->signal->rlim[RLIMIT_AS].rlim_cur >> PAGE_SHIFT;
-	vm   = current->mm->total_vm  + pgsz;
-	if (rlim < vm)
-		goto out;
-
-	rlim = current->signal->rlim[RLIMIT_MEMLOCK].rlim_cur >> PAGE_SHIFT;
-	vm   = current->mm->locked_vm  + pgsz;
-	if (rlim < vm)
-		goto out;
-
-	buffer = kzalloc(size, GFP_KERNEL);
-	if (!buffer)
-		goto out;
-
-	current->mm->total_vm  += pgsz;
-	current->mm->locked_vm += pgsz;
-
-	if (pages)
-		*pages = pgsz;
-
- out:
-	up_write(&current->mm->mmap_sem);
-	return buffer;
-}
-
 static void ds_install_ds_config(struct ds_context *context,
 				 enum ds_qualifier qual,
 				 void *base, size_t size, size_t ith)
@@ -382,6 +334,10 @@ static int ds_request(struct ds_tracer *tracer, enum ds_qualifier qual,
 	if (!ds_cfg.sizeof_ds)
 		goto out;
 
+	error = -EINVAL;
+	if (!base)
+		goto out;
+
 	/* we require some space to do alignment adjustments below */
 	error = -EINVAL;
 	if (size < (DS_ALIGNMENT + ds_cfg.sizeof_rec[qual]))
@@ -395,13 +351,6 @@ static int ds_request(struct ds_tracer *tracer, enum ds_qualifier qual,
 			goto out;
 	}
 
-	error = -ENOMEM;
-	if (!base) {
-		base = ds_allocate_buffer(size, &tracer->pages);
-		if (!base)
-			goto out;
-	}
-
 	tracer->buffer = base;
 	tracer->size = size;
 
@@ -466,7 +415,7 @@ struct bts_tracer *ds_request_bts(struct task_struct *task,
 	return tracer;
 
  out_tracer:
-	(void)ds_release_bts(tracer);
+	kfree(tracer);
  out:
 	return ERR_PTR(error);
 }
@@ -496,31 +445,18 @@ struct pebs_tracer *ds_request_pebs(struct task_struct *task,
 	return tracer;
 
  out_tracer:
-	(void)ds_release_pebs(tracer);
+	kfree(tracer);
  out:
 	return ERR_PTR(error);
 }
 
 static void ds_release(struct ds_tracer *tracer, enum ds_qualifier qual)
 {
-	if (tracer->context) {
-		BUG_ON(tracer->context->owner[qual] != tracer);
-		tracer->context->owner[qual] = NULL;
-
-		put_tracer(tracer->context->task);
-		ds_put_context(tracer->context);
-	}
+	BUG_ON(tracer->context->owner[qual] != tracer);
+	tracer->context->owner[qual] = NULL;
 
-	if (tracer->pages) {
-		kfree(tracer->buffer);
-
-		down_write(&current->mm->mmap_sem);
-
-		current->mm->total_vm  -= tracer->pages;
-		current->mm->locked_vm -= tracer->pages;
-
-		up_write(&current->mm->mmap_sem);
-	}
+	put_tracer(tracer->context->task);
+	ds_put_context(tracer->context);
 }
 
 int ds_release_bts(struct bts_tracer *tracer)
diff --git a/arch/x86/kernel/ptrace.c b/arch/x86/kernel/ptrace.c
index 76adf5b640ff..2c8ec1ba75e6 100644
--- a/arch/x86/kernel/ptrace.c
+++ b/arch/x86/kernel/ptrace.c
@@ -758,6 +758,10 @@ static int ptrace_bts_config(struct task_struct *child,
 		bts_ovfl_callback_t ovfl = NULL;
 		unsigned int sig = 0;
 
+		error = -EINVAL;
+		if (cfg.size < (10 * bts_cfg.sizeof_bts))
+			goto errout;
+
 		if (cfg.flags & PTRACE_BTS_O_SIGNAL) {
 			if (!cfg.signal)
 				goto errout;
@@ -768,14 +772,26 @@ static int ptrace_bts_config(struct task_struct *child,
 			sig  = cfg.signal;
 		}
 
-		if (child->bts)
+		if (child->bts) {
 			(void)ds_release_bts(child->bts);
+			kfree(child->bts_buffer);
+
+			child->bts = NULL;
+			child->bts_buffer = NULL;
+		}
+
+		error = -ENOMEM;
+		child->bts_buffer = kzalloc(cfg.size, GFP_KERNEL);
+		if (!child->bts_buffer)
+			goto errout;
 
-		child->bts = ds_request_bts(child, /* base = */ NULL, cfg.size,
+		child->bts = ds_request_bts(child, child->bts_buffer, cfg.size,
 					    ovfl, /* th = */ (size_t)-1);
 		if (IS_ERR(child->bts)) {
 			error = PTR_ERR(child->bts);
+			kfree(child->bts_buffer);
 			child->bts = NULL;
+			child->bts_buffer = NULL;
 			goto errout;
 		}
 
@@ -972,6 +988,8 @@ void ptrace_disable(struct task_struct *child)
 #ifdef CONFIG_X86_PTRACE_BTS
 	if (child->bts) {
 		(void)ds_release_bts(child->bts);
+		kfree(child->bts_buffer);
+		child->bts_buffer = NULL;
 
 		child->thread.debugctlmsr &= ~bts_cfg.debugctl_mask;
 		if (!child->thread.debugctlmsr)
diff --git a/include/linux/sched.h b/include/linux/sched.h
index a9780eaa6737..d02a0ca70ee9 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1168,6 +1168,10 @@ struct task_struct {
 	 * This field actually belongs to the ptracer task.
 	 */
 	struct bts_tracer *bts;
+	/*
+	 * The buffer to hold the BTS data.
+	 */
+	void *bts_buffer;
 #endif /* CONFIG_X86_PTRACE_BTS */
 
 	/* PID/PID hash table linkage. */
-- 
cgit v1.2.3


From 65f233fb1669e6c990cd1d7fd308ac7dc66dc207 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Tue, 25 Nov 2008 18:20:13 +0100
Subject: netfilter: fix warning in net/netfilter/nf_conntrack_proto_tcp.c

fix this warning:

  net/netfilter/nf_conntrack_proto_tcp.c: In function \u2018tcp_in_window\u2019:
  net/netfilter/nf_conntrack_proto_tcp.c:491: warning: unused variable \u2018net\u2019
  net/netfilter/nf_conntrack_proto_tcp.c: In function \u2018tcp_packet\u2019:
  net/netfilter/nf_conntrack_proto_tcp.c:812: warning: unused variable \u2018net\u2019

Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Patrick McHardy <kaber@trash.net>
---
 include/net/netfilter/nf_conntrack_l4proto.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/net/netfilter/nf_conntrack_l4proto.h b/include/net/netfilter/nf_conntrack_l4proto.h
index 7f2f43c77284..debdaf75cecf 100644
--- a/include/net/netfilter/nf_conntrack_l4proto.h
+++ b/include/net/netfilter/nf_conntrack_l4proto.h
@@ -129,7 +129,7 @@ extern const struct nla_policy nf_ct_port_nla_policy[];
 	 && net_ratelimit())
 #endif
 #else
-#define LOG_INVALID(net, proto) 0
+static inline int LOG_INVALID(struct net *net, int proto) { return 0; }
 #endif /* CONFIG_SYSCTL */
 
 #endif /*_NF_CONNTRACK_PROTOCOL_H*/
-- 
cgit v1.2.3


From 3f2355cb9111ac04e7ae06a4d7044da2ae813863 Mon Sep 17 00:00:00 2001
From: "Luis R. Rodriguez" <lrodriguez@atheros.com>
Date: Wed, 12 Nov 2008 14:22:02 -0800
Subject: cfg80211/mac80211: Add 802.11d support

This adds country IE parsing to mac80211 and enables its usage
within the new regulatory infrastructure in cfg80211. We parse
the country IEs only on management beacons for the BSSID you are
associated to and disregard the IEs when the country and environment
(indoor, outdoor, any) matches the already processed country IE.

To avoid following misinformed or outdated APs we build and use
a regulatory domain out of the intersection between what the AP
provides us on the country IE and what CRDA is aware is allowed
on the same country.

A secondary device is allowed to follow only the same country IE
as it make no sense for two devices on a system to be in two
different countries.

In the case the AP is using country IEs for an incorrect country
the user may help compliance further by setting the regulatory
domain before or after the IE is parsed and in that case another
intersection will be performed.

CONFIG_WIRELESS_OLD_REGULATORY is supported but requires CRDA
present.

Signed-off-by: Luis R. Rodriguez <lrodriguez@atheros.com>
Acked-by: Johannes Berg <johannes@sipsolutions.net>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 include/linux/ieee80211.h |  62 ++++++
 include/net/wireless.h    |  15 ++
 net/mac80211/mlme.c       |   7 +
 net/wireless/Kconfig      |  11 ++
 net/wireless/core.c       |   5 +-
 net/wireless/core.h       |  13 ++
 net/wireless/nl80211.c    |   2 +-
 net/wireless/reg.c        | 479 ++++++++++++++++++++++++++++++++++++++++++++--
 net/wireless/reg.h        |  21 +-
 9 files changed, 591 insertions(+), 24 deletions(-)

(limited to 'include')

diff --git a/include/linux/ieee80211.h b/include/linux/ieee80211.h
index 56b0eb25d927..a6ec928186ad 100644
--- a/include/linux/ieee80211.h
+++ b/include/linux/ieee80211.h
@@ -1042,6 +1042,68 @@ enum ieee80211_spectrum_mgmt_actioncode {
 	WLAN_ACTION_SPCT_CHL_SWITCH = 4,
 };
 
+/*
+ * IEEE 802.11-2007 7.3.2.9 Country information element
+ *
+ * Minimum length is 8 octets, ie len must be evenly
+ * divisible by 2
+ */
+
+/* Although the spec says 8 I'm seeing 6 in practice */
+#define IEEE80211_COUNTRY_IE_MIN_LEN	6
+
+/*
+ * For regulatory extension stuff see IEEE 802.11-2007
+ * Annex I (page 1141) and Annex J (page 1147). Also
+ * review 7.3.2.9.
+ *
+ * When dot11RegulatoryClassesRequired is true and the
+ * first_channel/reg_extension_id is >= 201 then the IE
+ * compromises of the 'ext' struct represented below:
+ *
+ *  - Regulatory extension ID - when generating IE this just needs
+ *    to be monotonically increasing for each triplet passed in
+ *    the IE
+ *  - Regulatory class - index into set of rules
+ *  - Coverage class - index into air propagation time (Table 7-27),
+ *    in microseconds, you can compute the air propagation time from
+ *    the index by multiplying by 3, so index 10 yields a propagation
+ *    of 10 us. Valid values are 0-31, values 32-255 are not defined
+ *    yet. A value of 0 inicates air propagation of <= 1 us.
+ *
+ *  See also Table I.2 for Emission limit sets and table
+ *  I.3 for Behavior limit sets. Table J.1 indicates how to map
+ *  a reg_class to an emission limit set and behavior limit set.
+ */
+#define IEEE80211_COUNTRY_EXTENSION_ID 201
+
+/*
+ *  Channels numbers in the IE must be monotonically increasing
+ *  if dot11RegulatoryClassesRequired is not true.
+ *
+ *  If dot11RegulatoryClassesRequired is true consecutive
+ *  subband triplets following a regulatory triplet shall
+ *  have monotonically increasing first_channel number fields.
+ *
+ *  Channel numbers shall not overlap.
+ *
+ *  Note that max_power is signed.
+ */
+struct ieee80211_country_ie_triplet {
+	union {
+		struct {
+			u8 first_channel;
+			u8 num_channels;
+			s8 max_power;
+		} __attribute__ ((packed)) chans;
+		struct {
+			u8 reg_extension_id;
+			u8 reg_class;
+			u8 coverage_class;
+		} __attribute__ ((packed)) ext;
+	};
+} __attribute__ ((packed));
+
 /* BACK action code */
 enum ieee80211_back_actioncode {
 	WLAN_ACTION_ADDBA_REQ = 0,
diff --git a/include/net/wireless.h b/include/net/wireless.h
index 412351560b76..c85fa0ea5c51 100644
--- a/include/net/wireless.h
+++ b/include/net/wireless.h
@@ -373,4 +373,19 @@ ieee80211_get_response_rate(struct ieee80211_supported_band *sband,
  * for a regulatory domain structure for the respective country.
  */
 extern void regulatory_hint(struct wiphy *wiphy, const char *alpha2);
+
+/**
+ * regulatory_hint_11d - hints a country IE as a regulatory domain
+ * @wiphy: the wireless device giving the hint (used only for reporting
+ *	conflicts)
+ * @country_ie: pointer to the country IE
+ * @country_ie_len: length of the country IE
+ *
+ * We will intersect the rd with the what CRDA tells us should apply
+ * for the alpha2 this country IE belongs to, this prevents APs from
+ * sending us incorrect or outdated information against a country.
+ */
+extern void regulatory_hint_11d(struct wiphy *wiphy,
+				u8 *country_ie,
+				u8 country_ie_len);
 #endif /* __NET_WIRELESS_H */
diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c
index d81a4d2cd3aa..30adaddeed27 100644
--- a/net/mac80211/mlme.c
+++ b/net/mac80211/mlme.c
@@ -1736,6 +1736,13 @@ static void ieee80211_rx_mgmt_beacon(struct ieee80211_sub_if_data *sdata,
 					       ap_ht_cap_flags);
 	}
 
+	if (elems.country_elem) {
+		/* Note we are only reviewing this on beacons
+		 * for the BSSID we are associated to */
+		regulatory_hint_11d(local->hw.wiphy,
+			elems.country_elem, elems.country_elem_len);
+	}
+
 	ieee80211_bss_info_change_notify(sdata, changed);
 }
 
diff --git a/net/wireless/Kconfig b/net/wireless/Kconfig
index f7c64dbe86cc..e28e2b8fa436 100644
--- a/net/wireless/Kconfig
+++ b/net/wireless/Kconfig
@@ -1,6 +1,15 @@
 config CFG80211
         tristate "Improved wireless configuration API"
 
+config CFG80211_REG_DEBUG
+	bool "cfg80211 regulatory debugging"
+	depends on CFG80211
+	default n
+	---help---
+	  You can enable this if you want to debug regulatory changes.
+
+	  If unsure, say N.
+
 config NL80211
 	bool "nl80211 new netlink interface support"
 	depends on CFG80211
@@ -40,6 +49,8 @@ config WIRELESS_OLD_REGULATORY
 	  ieee80211_regdom module parameter. This is being phased out and you
 	  should stop using them ASAP.
 
+	  Note: You will need CRDA if you want 802.11d support
+
 	  Say Y unless you have installed a new userspace application.
 	  Also say Y if have one currently depending on the ieee80211_regdom
 	  module parameter and cannot port it to use the new userspace
diff --git a/net/wireless/core.c b/net/wireless/core.c
index 39e3d10fccde..b96fc0c3f1c4 100644
--- a/net/wireless/core.c
+++ b/net/wireless/core.c
@@ -19,7 +19,6 @@
 #include "nl80211.h"
 #include "core.h"
 #include "sysfs.h"
-#include "reg.h"
 
 /* name for sysfs, %d is appended */
 #define PHY_NAME "phy"
@@ -348,6 +347,10 @@ void wiphy_unregister(struct wiphy *wiphy)
 	/* unlock again before freeing */
 	mutex_unlock(&drv->mtx);
 
+	/* If this device got a regulatory hint tell core its
+	 * free to listen now to a new shiny device regulatory hint */
+	reg_device_remove(wiphy);
+
 	list_del(&drv->list);
 	device_del(&drv->wiphy.dev);
 	debugfs_remove(drv->wiphy.debugfsdir);
diff --git a/net/wireless/core.h b/net/wireless/core.h
index 771cc5cc7658..f7fb9f413028 100644
--- a/net/wireless/core.h
+++ b/net/wireless/core.h
@@ -11,6 +11,7 @@
 #include <net/genetlink.h>
 #include <net/wireless.h>
 #include <net/cfg80211.h>
+#include "reg.h"
 
 struct cfg80211_registered_device {
 	struct cfg80211_ops *ops;
@@ -21,6 +22,18 @@ struct cfg80211_registered_device {
 	 * any call is in progress */
 	struct mutex mtx;
 
+	/* ISO / IEC 3166 alpha2 for which this device is receiving
+	 * country IEs on, this can help disregard country IEs from APs
+	 * on the same alpha2 quickly. The alpha2 may differ from
+	 * cfg80211_regdomain's alpha2 when an intersection has occurred.
+	 * If the AP is reconfigured this can also be used to tell us if
+	 * the country on the country IE changed. */
+	char country_ie_alpha2[2];
+
+	/* If a Country IE has been received this tells us the environment
+	 * which its telling us its in. This defaults to ENVIRON_ANY */
+	enum environment_cap env;
+
 	/* wiphy index, internal only */
 	int idx;
 
diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c
index e3e1494e769a..00121ceddb14 100644
--- a/net/wireless/nl80211.c
+++ b/net/wireless/nl80211.c
@@ -1760,7 +1760,7 @@ static int nl80211_req_set_reg(struct sk_buff *skb, struct genl_info *info)
 		return -EINVAL;
 #endif
 	mutex_lock(&cfg80211_drv_mutex);
-	r = __regulatory_hint(NULL, REGDOM_SET_BY_USER, data);
+	r = __regulatory_hint(NULL, REGDOM_SET_BY_USER, data, 0, ENVIRON_ANY);
 	mutex_unlock(&cfg80211_drv_mutex);
 	return r;
 }
diff --git a/net/wireless/reg.c b/net/wireless/reg.c
index f0ff3d1779da..4dab993ea488 100644
--- a/net/wireless/reg.c
+++ b/net/wireless/reg.c
@@ -60,12 +60,18 @@
  * @intersect: indicates whether the wireless core should intersect
  * 	the requested regulatory domain with the presently set regulatory
  * 	domain.
+ * @country_ie_checksum: checksum of the last processed and accepted
+ * 	country IE
+ * @country_ie_env: lets us know if the AP is telling us we are outdoor,
+ * 	indoor, or if it doesn't matter
  */
 struct regulatory_request {
 	struct wiphy *wiphy;
 	enum reg_set_by initiator;
 	char alpha2[2];
 	bool intersect;
+	u32 country_ie_checksum;
+	enum environment_cap country_ie_env;
 };
 
 /* Receipt of information from last regulatory request */
@@ -85,6 +91,11 @@ static u32 supported_bandwidths[] = {
  * information to give us an alpha2 */
 static const struct ieee80211_regdomain *cfg80211_regdomain;
 
+/* We use this as a place for the rd structure built from the
+ * last parsed country IE to rest until CRDA gets back to us with
+ * what it thinks should apply for the same country */
+static const struct ieee80211_regdomain *country_ie_regdomain;
+
 /* We keep a static world regulatory domain in case of the absence of CRDA */
 static const struct ieee80211_regdomain world_regdom = {
 	.n_reg_rules = 1,
@@ -264,6 +275,18 @@ static bool is_unknown_alpha2(const char *alpha2)
 	return false;
 }
 
+static bool is_intersected_alpha2(const char *alpha2)
+{
+	if (!alpha2)
+		return false;
+	/* Special case where regulatory domain is the
+	 * result of an intersection between two regulatory domain
+	 * structures */
+	if (alpha2[0] == '9' && alpha2[1] == '8')
+		return true;
+	return false;
+}
+
 static bool is_an_alpha2(const char *alpha2)
 {
 	if (!alpha2)
@@ -292,6 +315,25 @@ static bool regdom_changed(const char *alpha2)
 	return true;
 }
 
+/**
+ * country_ie_integrity_changes - tells us if the country IE has changed
+ * @checksum: checksum of country IE of fields we are interested in
+ *
+ * If the country IE has not changed you can ignore it safely. This is
+ * useful to determine if two devices are seeing two different country IEs
+ * even on the same alpha2. Note that this will return false if no IE has
+ * been set on the wireless core yet.
+ */
+static bool country_ie_integrity_changes(u32 checksum)
+{
+	/* If no IE has been set then the checksum doesn't change */
+	if (unlikely(!last_request->country_ie_checksum))
+		return false;
+	if (unlikely(last_request->country_ie_checksum != checksum))
+		return true;
+	return false;
+}
+
 /* This lets us keep regulatory code which is updated on a regulatory
  * basis in userspace. */
 static int call_crda(const char *alpha2)
@@ -379,6 +421,174 @@ static u32 freq_max_bandwidth(const struct ieee80211_freq_range *freq_range,
 	return 0;
 }
 
+/* Converts a country IE to a regulatory domain. A regulatory domain
+ * structure has a lot of information which the IE doesn't yet have,
+ * so for the other values we use upper max values as we will intersect
+ * with our userspace regulatory agent to get lower bounds. */
+static struct ieee80211_regdomain *country_ie_2_rd(
+				u8 *country_ie,
+				u8 country_ie_len,
+				u32 *checksum)
+{
+	struct ieee80211_regdomain *rd = NULL;
+	unsigned int i = 0;
+	char alpha2[2];
+	u32 flags = 0;
+	u32 num_rules = 0, size_of_regd = 0;
+	u8 *triplets_start = NULL;
+	u8 len_at_triplet = 0;
+	/* the last channel we have registered in a subband (triplet) */
+	int last_sub_max_channel = 0;
+
+	*checksum = 0xDEADBEEF;
+
+	/* Country IE requirements */
+	BUG_ON(country_ie_len < IEEE80211_COUNTRY_IE_MIN_LEN ||
+		country_ie_len & 0x01);
+
+	alpha2[0] = country_ie[0];
+	alpha2[1] = country_ie[1];
+
+	/*
+	 * Third octet can be:
+	 *    'I' - Indoor
+	 *    'O' - Outdoor
+	 *
+	 *  anything else we assume is no restrictions
+	 */
+	if (country_ie[2] == 'I')
+		flags = NL80211_RRF_NO_OUTDOOR;
+	else if (country_ie[2] == 'O')
+		flags = NL80211_RRF_NO_INDOOR;
+
+	country_ie += 3;
+	country_ie_len -= 3;
+
+	triplets_start = country_ie;
+	len_at_triplet = country_ie_len;
+
+	*checksum ^= ((flags ^ alpha2[0] ^ alpha2[1]) << 8);
+
+	/* We need to build a reg rule for each triplet, but first we must
+	 * calculate the number of reg rules we will need. We will need one
+	 * for each channel subband */
+	while (country_ie_len >= 3) {
+		struct ieee80211_country_ie_triplet *triplet =
+			(struct ieee80211_country_ie_triplet *) country_ie;
+		int cur_sub_max_channel = 0, cur_channel = 0;
+
+		if (triplet->ext.reg_extension_id >=
+				IEEE80211_COUNTRY_EXTENSION_ID) {
+			country_ie += 3;
+			country_ie_len -= 3;
+			continue;
+		}
+
+		cur_channel = triplet->chans.first_channel;
+		cur_sub_max_channel = ieee80211_channel_to_frequency(
+			cur_channel + triplet->chans.num_channels);
+
+		/* Basic sanity check */
+		if (cur_sub_max_channel < cur_channel)
+			return NULL;
+
+		/* Do not allow overlapping channels. Also channels
+		 * passed in each subband must be monotonically
+		 * increasing */
+		if (last_sub_max_channel) {
+			if (cur_channel <= last_sub_max_channel)
+				return NULL;
+			if (cur_sub_max_channel <= last_sub_max_channel)
+				return NULL;
+		}
+
+		/* When dot11RegulatoryClassesRequired is supported
+		 * we can throw ext triplets as part of this soup,
+		 * for now we don't care when those change as we
+		 * don't support them */
+		*checksum ^= ((cur_channel ^ cur_sub_max_channel) << 8) |
+		  ((cur_sub_max_channel ^ cur_sub_max_channel) << 16) |
+		  ((triplet->chans.max_power ^ cur_sub_max_channel) << 24);
+
+		last_sub_max_channel = cur_sub_max_channel;
+
+		country_ie += 3;
+		country_ie_len -= 3;
+		num_rules++;
+
+		/* Note: this is not a IEEE requirement but
+		 * simply a memory requirement */
+		if (num_rules > NL80211_MAX_SUPP_REG_RULES)
+			return NULL;
+	}
+
+	country_ie = triplets_start;
+	country_ie_len = len_at_triplet;
+
+	size_of_regd = sizeof(struct ieee80211_regdomain) +
+		(num_rules * sizeof(struct ieee80211_reg_rule));
+
+	rd = kzalloc(size_of_regd, GFP_KERNEL);
+	if (!rd)
+		return NULL;
+
+	rd->n_reg_rules = num_rules;
+	rd->alpha2[0] = alpha2[0];
+	rd->alpha2[1] = alpha2[1];
+
+	/* This time around we fill in the rd */
+	while (country_ie_len >= 3) {
+		struct ieee80211_country_ie_triplet *triplet =
+			(struct ieee80211_country_ie_triplet *) country_ie;
+		struct ieee80211_reg_rule *reg_rule = NULL;
+		struct ieee80211_freq_range *freq_range = NULL;
+		struct ieee80211_power_rule *power_rule = NULL;
+
+		/* Must parse if dot11RegulatoryClassesRequired is true,
+		 * we don't support this yet */
+		if (triplet->ext.reg_extension_id >=
+				IEEE80211_COUNTRY_EXTENSION_ID) {
+			country_ie += 3;
+			country_ie_len -= 3;
+			continue;
+		}
+
+		reg_rule = &rd->reg_rules[i];
+		freq_range = &reg_rule->freq_range;
+		power_rule = &reg_rule->power_rule;
+
+		reg_rule->flags = flags;
+
+		/* The +10 is since the regulatory domain expects
+		 * the actual band edge, not the center of freq for
+		 * its start and end freqs, assuming 20 MHz bandwidth on
+		 * the channels passed */
+		freq_range->start_freq_khz =
+			MHZ_TO_KHZ(ieee80211_channel_to_frequency(
+				triplet->chans.first_channel) - 10);
+		freq_range->end_freq_khz =
+			MHZ_TO_KHZ(ieee80211_channel_to_frequency(
+				triplet->chans.first_channel +
+					triplet->chans.num_channels) + 10);
+
+		/* Large arbitrary values, we intersect later */
+		/* Increment this if we ever support >= 40 MHz channels
+		 * in IEEE 802.11 */
+		freq_range->max_bandwidth_khz = MHZ_TO_KHZ(40);
+		power_rule->max_antenna_gain = DBI_TO_MBI(100);
+		power_rule->max_eirp = DBM_TO_MBM(100);
+
+		country_ie += 3;
+		country_ie_len -= 3;
+		i++;
+
+		BUG_ON(i > NL80211_MAX_SUPP_REG_RULES);
+	}
+
+	return rd;
+}
+
+
 /* Helper for regdom_intersect(), this does the real
  * mathematical intersection fun */
 static int reg_rules_intersect(
@@ -663,16 +873,14 @@ static int ignore_request(struct wiphy *wiphy, enum reg_set_by set_by,
 					return -EOPNOTSUPP;
 				return -EALREADY;
 			}
-			/* Two consecutive Country IE hints on the same wiphy */
-			if (!alpha2_equal(cfg80211_regdomain->alpha2, alpha2))
+			/* Two consecutive Country IE hints on the same wiphy.
+			 * This should be picked up early by the driver/stack */
+			if (WARN_ON(!alpha2_equal(cfg80211_regdomain->alpha2,
+				  alpha2)))
 				return 0;
 			return -EALREADY;
 		}
-		/*
-		 * Ignore Country IE hints for now, need to think about
-		 * what we need to do to support multi-domain operation.
-		 */
-		return -EOPNOTSUPP;
+		return REG_INTERSECT;
 	case REGDOM_SET_BY_DRIVER:
 		if (last_request->initiator == REGDOM_SET_BY_DRIVER)
 			return -EALREADY;
@@ -680,6 +888,11 @@ static int ignore_request(struct wiphy *wiphy, enum reg_set_by set_by,
 	case REGDOM_SET_BY_USER:
 		if (last_request->initiator == REGDOM_SET_BY_COUNTRY_IE)
 			return REG_INTERSECT;
+		/* If the user knows better the user should set the regdom
+		 * to their country before the IE is picked up */
+		if (last_request->initiator == REGDOM_SET_BY_USER &&
+			  last_request->intersect)
+			return -EOPNOTSUPP;
 		return 0;
 	}
 
@@ -688,7 +901,9 @@ static int ignore_request(struct wiphy *wiphy, enum reg_set_by set_by,
 
 /* Caller must hold &cfg80211_drv_mutex */
 int __regulatory_hint(struct wiphy *wiphy, enum reg_set_by set_by,
-		      const char *alpha2)
+			const char *alpha2,
+			u32 country_ie_checksum,
+			enum environment_cap env)
 {
 	struct regulatory_request *request;
 	bool intersect = false;
@@ -711,9 +926,21 @@ int __regulatory_hint(struct wiphy *wiphy, enum reg_set_by set_by,
 	request->initiator = set_by;
 	request->wiphy = wiphy;
 	request->intersect = intersect;
+	request->country_ie_checksum = country_ie_checksum;
+	request->country_ie_env = env;
 
 	kfree(last_request);
 	last_request = request;
+	/*
+	 * Note: When CONFIG_WIRELESS_OLD_REGULATORY is enabled
+	 * AND if CRDA is NOT present nothing will happen, if someone
+	 * wants to bother with 11d with OLD_REG you can add a timer.
+	 * If after x amount of time nothing happens you can call:
+	 *
+	 * return set_regdom(country_ie_regdomain);
+	 *
+	 * to intersect with the static rd
+	 */
 	return call_crda(alpha2);
 }
 
@@ -722,11 +949,120 @@ void regulatory_hint(struct wiphy *wiphy, const char *alpha2)
 	BUG_ON(!alpha2);
 
 	mutex_lock(&cfg80211_drv_mutex);
-	__regulatory_hint(wiphy, REGDOM_SET_BY_DRIVER, alpha2);
+	__regulatory_hint(wiphy, REGDOM_SET_BY_DRIVER, alpha2, 0, ENVIRON_ANY);
 	mutex_unlock(&cfg80211_drv_mutex);
 }
 EXPORT_SYMBOL(regulatory_hint);
 
+static bool reg_same_country_ie_hint(struct wiphy *wiphy,
+			u32 country_ie_checksum)
+{
+	if (!last_request->wiphy)
+		return false;
+	if (likely(last_request->wiphy != wiphy))
+		return !country_ie_integrity_changes(country_ie_checksum);
+	/* We should not have let these through at this point, they
+	 * should have been picked up earlier by the first alpha2 check
+	 * on the device */
+	if (WARN_ON(!country_ie_integrity_changes(country_ie_checksum)))
+		return true;
+	return false;
+}
+
+void regulatory_hint_11d(struct wiphy *wiphy,
+			u8 *country_ie,
+			u8 country_ie_len)
+{
+	struct ieee80211_regdomain *rd = NULL;
+	char alpha2[2];
+	u32 checksum = 0;
+	enum environment_cap env = ENVIRON_ANY;
+
+	mutex_lock(&cfg80211_drv_mutex);
+
+	/* IE len must be evenly divisible by 2 */
+	if (country_ie_len & 0x01)
+		goto out;
+
+	if (country_ie_len < IEEE80211_COUNTRY_IE_MIN_LEN)
+		goto out;
+
+	/* Pending country IE processing, this can happen after we
+	 * call CRDA and wait for a response if a beacon was received before
+	 * we were able to process the last regulatory_hint_11d() call */
+	if (country_ie_regdomain)
+		goto out;
+
+	alpha2[0] = country_ie[0];
+	alpha2[1] = country_ie[1];
+
+	if (country_ie[2] == 'I')
+		env = ENVIRON_INDOOR;
+	else if (country_ie[2] == 'O')
+		env = ENVIRON_OUTDOOR;
+
+	/* We will run this for *every* beacon processed for the BSSID, so
+	 * we optimize an early check to exit out early if we don't have to
+	 * do anything */
+	if (likely(last_request->wiphy)) {
+		struct cfg80211_registered_device *drv_last_ie;
+
+		drv_last_ie = wiphy_to_dev(last_request->wiphy);
+
+		/* Lets keep this simple -- we trust the first AP
+		 * after we intersect with CRDA */
+		if (likely(last_request->wiphy == wiphy)) {
+			/* Ignore IEs coming in on this wiphy with
+			 * the same alpha2 and environment cap */
+			if (likely(alpha2_equal(drv_last_ie->country_ie_alpha2,
+				  alpha2) &&
+				  env == drv_last_ie->env)) {
+				goto out;
+			}
+			/* the wiphy moved on to another BSSID or the AP
+			 * was reconfigured. XXX: We need to deal with the
+			 * case where the user suspends and goes to goes
+			 * to another country, and then gets IEs from an
+			 * AP with different settings */
+			goto out;
+		} else {
+			/* Ignore IEs coming in on two separate wiphys with
+			 * the same alpha2 and environment cap */
+			if (likely(alpha2_equal(drv_last_ie->country_ie_alpha2,
+				  alpha2) &&
+				  env == drv_last_ie->env)) {
+				goto out;
+			}
+			/* We could potentially intersect though */
+			goto out;
+		}
+	}
+
+	rd = country_ie_2_rd(country_ie, country_ie_len, &checksum);
+	if (!rd)
+		goto out;
+
+	/* This will not happen right now but we leave it here for the
+	 * the future when we want to add suspend/resume support and having
+	 * the user move to another country after doing so, or having the user
+	 * move to another AP. Right now we just trust the first AP. This is why
+	 * this is marked as likley(). If we hit this before we add this support
+	 * we want to be informed of it as it would indicate a mistake in the
+	 * current design  */
+	if (likely(WARN_ON(reg_same_country_ie_hint(wiphy, checksum))))
+		goto out;
+
+	/* We keep this around for when CRDA comes back with a response so
+	 * we can intersect with that */
+	country_ie_regdomain = rd;
+
+	__regulatory_hint(wiphy, REGDOM_SET_BY_COUNTRY_IE,
+		country_ie_regdomain->alpha2, checksum, env);
+
+out:
+	mutex_unlock(&cfg80211_drv_mutex);
+}
+EXPORT_SYMBOL(regulatory_hint_11d);
 
 static void print_rd_rules(const struct ieee80211_regdomain *rd)
 {
@@ -766,7 +1102,25 @@ static void print_rd_rules(const struct ieee80211_regdomain *rd)
 static void print_regdomain(const struct ieee80211_regdomain *rd)
 {
 
-	if (is_world_regdom(rd->alpha2))
+	if (is_intersected_alpha2(rd->alpha2)) {
+		struct wiphy *wiphy = NULL;
+		struct cfg80211_registered_device *drv;
+
+		if (last_request->initiator == REGDOM_SET_BY_COUNTRY_IE) {
+			if (last_request->wiphy) {
+				wiphy = last_request->wiphy;
+				drv = wiphy_to_dev(wiphy);
+				printk(KERN_INFO "cfg80211: Current regulatory "
+					"domain updated by AP to: %c%c\n",
+					drv->country_ie_alpha2[0],
+					drv->country_ie_alpha2[1]);
+			} else
+				printk(KERN_INFO "cfg80211: Current regulatory "
+					"domain intersected: \n");
+		} else
+				printk(KERN_INFO "cfg80211: Current regulatory "
+					"intersected: \n");
+	} else if (is_world_regdom(rd->alpha2))
 		printk(KERN_INFO "cfg80211: World regulatory "
 			"domain updated:\n");
 	else {
@@ -789,10 +1143,39 @@ static void print_regdomain_info(const struct ieee80211_regdomain *rd)
 	print_rd_rules(rd);
 }
 
+#ifdef CONFIG_CFG80211_REG_DEBUG
+static void reg_country_ie_process_debug(
+	const struct ieee80211_regdomain *rd,
+	const struct ieee80211_regdomain *country_ie_regdomain,
+	const struct ieee80211_regdomain *intersected_rd)
+{
+	printk(KERN_DEBUG "cfg80211: Received country IE:\n");
+	print_regdomain_info(country_ie_regdomain);
+	printk(KERN_DEBUG "cfg80211: CRDA thinks this should applied:\n");
+	print_regdomain_info(rd);
+	if (intersected_rd) {
+		printk(KERN_DEBUG "cfg80211: We intersect both of these "
+			"and get:\n");
+		print_regdomain_info(rd);
+		return;
+	}
+	printk(KERN_DEBUG "cfg80211: Intersection between both failed\n");
+}
+#else
+static inline void reg_country_ie_process_debug(
+	const struct ieee80211_regdomain *rd,
+	const struct ieee80211_regdomain *country_ie_regdomain,
+	const struct ieee80211_regdomain *intersected_rd)
+{
+}
+#endif
+
 /* Takes ownership of rd only if it doesn't fail */
 static int __set_regdom(const struct ieee80211_regdomain *rd)
 {
 	const struct ieee80211_regdomain *intersected_rd = NULL;
+	struct cfg80211_registered_device *drv = NULL;
+	struct wiphy *wiphy = NULL;
 	/* Some basic sanity checks first */
 
 	if (is_world_regdom(rd->alpha2)) {
@@ -809,10 +1192,18 @@ static int __set_regdom(const struct ieee80211_regdomain *rd)
 	if (!last_request)
 		return -EINVAL;
 
-	/* allow overriding the static definitions if CRDA is present */
-	if (!is_old_static_regdom(cfg80211_regdomain) &&
-	    !regdom_changed(rd->alpha2))
-		return -EINVAL;
+	/* Lets only bother proceeding on the same alpha2 if the current
+	 * rd is non static (it means CRDA was present and was used last)
+	 * and the pending request came in from a country IE */
+	if (last_request->initiator != REGDOM_SET_BY_COUNTRY_IE) {
+		/* If someone else asked us to change the rd lets only bother
+		 * checking if the alpha2 changes if CRDA was already called */
+		if (!is_old_static_regdom(cfg80211_regdomain) &&
+		    !regdom_changed(rd->alpha2))
+			return -EINVAL;
+	}
+
+	wiphy = last_request->wiphy;
 
 	/* Now lets set the regulatory domain, update all driver channels
 	 * and finally inform them of what we have done, in case they want
@@ -853,9 +1244,47 @@ static int __set_regdom(const struct ieee80211_regdomain *rd)
 		return 0;
 	}
 
-	/* Country IE parsing coming soon */
+	/*
+	 * Country IE requests are handled a bit differently, we intersect
+	 * the country IE rd with what CRDA believes that country should have
+	 */
+
+	BUG_ON(!country_ie_regdomain);
+
+	if (rd != country_ie_regdomain) {
+		/* Intersect what CRDA returned and our what we
+		 * had built from the Country IE received */
+
+		intersected_rd = regdom_intersect(rd, country_ie_regdomain);
+
+		reg_country_ie_process_debug(rd, country_ie_regdomain,
+			intersected_rd);
+
+		kfree(country_ie_regdomain);
+		country_ie_regdomain = NULL;
+	} else {
+		/* This would happen when CRDA was not present and
+		 * OLD_REGULATORY was enabled. We intersect our Country
+		 * IE rd and what was set on cfg80211 originally */
+		intersected_rd = regdom_intersect(rd, cfg80211_regdomain);
+	}
+
+	if (!intersected_rd)
+		return -EINVAL;
+
+	drv = wiphy_to_dev(wiphy);
+
+	drv->country_ie_alpha2[0] = rd->alpha2[0];
+	drv->country_ie_alpha2[1] = rd->alpha2[1];
+	drv->env = last_request->country_ie_env;
+
+	BUG_ON(intersected_rd == rd);
+
+	kfree(rd);
+	rd = NULL;
+
 	reset_regdomains();
-	WARN_ON(1);
+	cfg80211_regdomain = intersected_rd;
 
 	return 0;
 }
@@ -887,6 +1316,17 @@ int set_regdom(const struct ieee80211_regdomain *rd)
 	return r;
 }
 
+/* Caller must hold cfg80211_drv_mutex */
+void reg_device_remove(struct wiphy *wiphy)
+{
+	if (!last_request->wiphy)
+		return;
+	if (last_request->wiphy != wiphy)
+		return;
+	last_request->wiphy = NULL;
+	last_request->country_ie_env = ENVIRON_ANY;
+}
+
 int regulatory_init(void)
 {
 	int err;
@@ -906,11 +1346,11 @@ int regulatory_init(void)
 	 * that is not a valid ISO / IEC 3166 alpha2 */
 	if (ieee80211_regdom[0] != 'E' || ieee80211_regdom[1] != 'U')
 		err = __regulatory_hint(NULL, REGDOM_SET_BY_CORE,
-					ieee80211_regdom);
+					ieee80211_regdom, 0, ENVIRON_ANY);
 #else
 	cfg80211_regdomain = cfg80211_world_regdom;
 
-	err = __regulatory_hint(NULL, REGDOM_SET_BY_CORE, "00");
+	err = __regulatory_hint(NULL, REGDOM_SET_BY_CORE, "00", 0, ENVIRON_ANY);
 	if (err)
 		printk(KERN_ERR "cfg80211: calling CRDA failed - "
 		       "unable to update world regulatory domain, "
@@ -926,6 +1366,9 @@ void regulatory_exit(void)
 
 	reset_regdomains();
 
+	kfree(country_ie_regdomain);
+	country_ie_regdomain = NULL;
+
 	kfree(last_request);
 
 	platform_device_unregister(reg_pdev);
diff --git a/net/wireless/reg.h b/net/wireless/reg.h
index c9b6b6358bbe..a76ea3ff7cd6 100644
--- a/net/wireless/reg.h
+++ b/net/wireless/reg.h
@@ -4,28 +4,41 @@
 bool is_world_regdom(const char *alpha2);
 bool reg_is_valid_request(const char *alpha2);
 
+void reg_device_remove(struct wiphy *wiphy);
+
 int regulatory_init(void);
 void regulatory_exit(void);
 
 int set_regdom(const struct ieee80211_regdomain *rd);
 
+enum environment_cap {
+	ENVIRON_ANY,
+	ENVIRON_INDOOR,
+	ENVIRON_OUTDOOR,
+};
+
+
 /**
  * __regulatory_hint - hint to the wireless core a regulatory domain
  * @wiphy: if the hint comes from country information from an AP, this
  *	is required to be set to the wiphy that received the information
  * @alpha2: the ISO/IEC 3166 alpha2 being claimed the regulatory domain
  *	should be in.
+ * @country_ie_checksum: checksum of processed country IE, set this to 0
+ * 	if the hint did not come from a country IE
+ * @country_ie_env: the environment the IE told us we are in, %ENVIRON_*
  *
  * The Wireless subsystem can use this function to hint to the wireless core
- * what it believes should be the current regulatory domain by
- * giving it an ISO/IEC 3166 alpha2 country code it knows its regulatory
- * domain should be in.
+ * what it believes should be the current regulatory domain by giving it an
+ * ISO/IEC 3166 alpha2 country code it knows its regulatory domain should be
+ * in.
  *
  * Returns zero if all went fine, %-EALREADY if a regulatory domain had
  * already been set or other standard error codes.
  *
  */
 extern int __regulatory_hint(struct wiphy *wiphy, enum reg_set_by set_by,
-			     const char *alpha2);
+			     const char *alpha2, u32 country_ie_checksum,
+			     enum environment_cap country_ie_env);
 
 #endif  /* __NET_WIRELESS_REG_H */
-- 
cgit v1.2.3


From 14b9815af3f4fe0e171ee0c4325c31d2a2c1570b Mon Sep 17 00:00:00 2001
From: "Luis R. Rodriguez" <lrodriguez@atheros.com>
Date: Wed, 12 Nov 2008 14:22:03 -0800
Subject: cfg80211: add support for custom firmware regulatory solutions

This adds API to cfg80211 to allow wireless drivers to inform
us if their firmware can handle regulatory considerations *and*
they cannot map these regulatory domains to an ISO / IEC 3166
alpha2. In these cases we skip the first regulatory hint instead
of expecting the driver to build their own regulatory structure,
providing us with an alpha2, or using the reg_notifier().

Signed-off-by: Luis R. Rodriguez <lrodriguez@atheros.com>
Acked-by: Zhu Yi <yi.zhu@intel.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 include/net/wireless.h |  7 +++++++
 net/wireless/reg.c     | 13 ++++++++++++-
 2 files changed, 19 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/net/wireless.h b/include/net/wireless.h
index c85fa0ea5c51..21c5d966142d 100644
--- a/include/net/wireless.h
+++ b/include/net/wireless.h
@@ -181,6 +181,11 @@ struct ieee80211_supported_band {
  * struct wiphy - wireless hardware description
  * @idx: the wiphy index assigned to this item
  * @class_dev: the class device representing /sys/class/ieee80211/<wiphy-name>
+ * @fw_handles_regulatory: tells us the firmware for this device
+ * 	has its own regulatory solution and cannot identify the
+ * 	ISO / IEC 3166 alpha2 it belongs to. When this is enabled
+ * 	we will disregard the first regulatory hint (when the
+ * 	initiator is %REGDOM_SET_BY_CORE).
  * @reg_notifier: the driver's regulatory notification callback
  */
 struct wiphy {
@@ -192,6 +197,8 @@ struct wiphy {
 	/* Supported interface modes, OR together BIT(NL80211_IFTYPE_...) */
 	u16 interface_modes;
 
+	bool fw_handles_regulatory;
+
 	/* If multiple wiphys are registered and you're handed e.g.
 	 * a regular netdev with assigned ieee80211_ptr, you won't
 	 * know whether it points to a wiphy your driver has registered
diff --git a/net/wireless/reg.c b/net/wireless/reg.c
index 4dab993ea488..0990059f7e48 100644
--- a/net/wireless/reg.c
+++ b/net/wireless/reg.c
@@ -816,12 +816,23 @@ static void handle_band(struct ieee80211_supported_band *sband)
 		handle_channel(&sband->channels[i]);
 }
 
+static bool ignore_reg_update(struct wiphy *wiphy, enum reg_set_by setby)
+{
+	if (!last_request)
+		return true;
+	if (setby == REGDOM_SET_BY_CORE &&
+		  wiphy->fw_handles_regulatory)
+		return true;
+	return false;
+}
+
 static void update_all_wiphy_regulatory(enum reg_set_by setby)
 {
 	struct cfg80211_registered_device *drv;
 
 	list_for_each_entry(drv, &cfg80211_drv_list, list)
-		wiphy_update_regulatory(&drv->wiphy, setby);
+		if (!ignore_reg_update(&drv->wiphy, setby))
+			wiphy_update_regulatory(&drv->wiphy, setby);
 }
 
 void wiphy_update_regulatory(struct wiphy *wiphy, enum reg_set_by setby)
-- 
cgit v1.2.3


From 8eecaba900e89643029fd2c253ad8ebb60761165 Mon Sep 17 00:00:00 2001
From: Ilpo Järvinen <ilpo.jarvinen@helsinki.fi>
Date: Tue, 25 Nov 2008 13:45:29 -0800
Subject: tcp: tcp_limit_reno_sacked can become static
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Signed-off-by: Ilpo Järvinen <ilpo.jarvinen@helsinki.fi>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/tcp.h    | 2 --
 net/ipv4/tcp_input.c | 2 +-
 2 files changed, 1 insertion(+), 3 deletions(-)

(limited to 'include')

diff --git a/include/net/tcp.h b/include/net/tcp.h
index 265392470b26..e8ae90a8c35e 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -761,8 +761,6 @@ static inline unsigned int tcp_packets_in_flight(const struct tcp_sock *tp)
 	return tp->packets_out - tcp_left_out(tp) + tp->retrans_out;
 }
 
-extern int tcp_limit_reno_sacked(struct tcp_sock *tp);
-
 /* If cwnd > ssthresh, we may raise ssthresh to be half-way to cwnd.
  * The exception is rate halving phase, when cwnd is decreasing towards
  * ssthresh.
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 9f8a80ba17bd..d67b6e9cc540 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -1940,7 +1940,7 @@ out:
 /* Limits sacked_out so that sum with lost_out isn't ever larger than
  * packets_out. Returns zero if sacked_out adjustement wasn't necessary.
  */
-int tcp_limit_reno_sacked(struct tcp_sock *tp)
+static int tcp_limit_reno_sacked(struct tcp_sock *tp)
 {
 	u32 holes;
 
-- 
cgit v1.2.3


From fb52607afcd0629776f1dc9e657647ceae81dd50 Mon Sep 17 00:00:00 2001
From: Frederic Weisbecker <fweisbec@gmail.com>
Date: Tue, 25 Nov 2008 21:07:04 +0100
Subject: tracing/function-return-tracer: change the name into
 function-graph-tracer

Impact: cleanup

This patch changes the name of the "return function tracer" into
function-graph-tracer which is a more suitable name for a tracing
which makes one able to retrieve the ordered call stack during
the code flow.

Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>
Acked-by: Steven Rostedt <rostedt@goodmis.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/Kconfig                     |  2 +-
 arch/x86/include/asm/ftrace.h        |  4 +-
 arch/x86/kernel/Makefile             |  4 +-
 arch/x86/kernel/entry_32.S           | 12 ++---
 arch/x86/kernel/ftrace.c             | 12 ++---
 include/linux/ftrace.h               | 24 ++++-----
 include/linux/ftrace_irq.h           |  2 +-
 include/linux/sched.h                |  2 +-
 kernel/Makefile                      |  2 +-
 kernel/fork.c                        |  4 +-
 kernel/sched.c                       |  2 +-
 kernel/trace/Kconfig                 | 19 ++++---
 kernel/trace/Makefile                |  2 +-
 kernel/trace/ftrace.c                | 26 +++++-----
 kernel/trace/trace.c                 | 18 +++----
 kernel/trace/trace.h                 | 12 ++---
 kernel/trace/trace_functions_graph.c | 98 ++++++++++++++++++++++++++++++++++++
 17 files changed, 173 insertions(+), 72 deletions(-)
 create mode 100644 kernel/trace/trace_functions_graph.c

(limited to 'include')

diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index e49a4fd718fe..0842b1127684 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -29,7 +29,7 @@ config X86
 	select HAVE_FTRACE_MCOUNT_RECORD
 	select HAVE_DYNAMIC_FTRACE
 	select HAVE_FUNCTION_TRACER
-	select HAVE_FUNCTION_RET_TRACER if X86_32
+	select HAVE_FUNCTION_GRAPH_TRACER if X86_32
 	select HAVE_FUNCTION_TRACE_MCOUNT_TEST
 	select HAVE_KVM if ((X86_32 && !X86_VOYAGER && !X86_VISWS && !X86_NUMAQ) || X86_64)
 	select HAVE_ARCH_KGDB if !X86_VOYAGER
diff --git a/arch/x86/include/asm/ftrace.h b/arch/x86/include/asm/ftrace.h
index 754a3e082f94..7e61b4ceb9a4 100644
--- a/arch/x86/include/asm/ftrace.h
+++ b/arch/x86/include/asm/ftrace.h
@@ -28,7 +28,7 @@ struct dyn_arch_ftrace {
 #endif /* __ASSEMBLY__ */
 #endif /* CONFIG_FUNCTION_TRACER */
 
-#ifdef CONFIG_FUNCTION_RET_TRACER
+#ifdef CONFIG_FUNCTION_GRAPH_TRACER
 
 #ifndef __ASSEMBLY__
 
@@ -51,6 +51,6 @@ struct ftrace_ret_stack {
 extern void return_to_handler(void);
 
 #endif /* __ASSEMBLY__ */
-#endif /* CONFIG_FUNCTION_RET_TRACER */
+#endif /* CONFIG_FUNCTION_GRAPH_TRACER */
 
 #endif /* _ASM_X86_FTRACE_H */
diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile
index af2bc36ca1c4..64939a0c3986 100644
--- a/arch/x86/kernel/Makefile
+++ b/arch/x86/kernel/Makefile
@@ -14,7 +14,7 @@ CFLAGS_REMOVE_paravirt-spinlocks.o = -pg
 CFLAGS_REMOVE_ftrace.o = -pg
 endif
 
-ifdef CONFIG_FUNCTION_RET_TRACER
+ifdef CONFIG_FUNCTION_GRAPH_TRACER
 # Don't trace __switch_to() but let it for function tracer
 CFLAGS_REMOVE_process_32.o = -pg
 endif
@@ -70,7 +70,7 @@ obj-$(CONFIG_X86_LOCAL_APIC)	+= apic.o nmi.o
 obj-$(CONFIG_X86_IO_APIC)	+= io_apic.o
 obj-$(CONFIG_X86_REBOOTFIXUPS)	+= reboot_fixups_32.o
 obj-$(CONFIG_DYNAMIC_FTRACE)	+= ftrace.o
-obj-$(CONFIG_FUNCTION_RET_TRACER)	+= ftrace.o
+obj-$(CONFIG_FUNCTION_GRAPH_TRACER)	+= ftrace.o
 obj-$(CONFIG_KEXEC)		+= machine_kexec_$(BITS).o
 obj-$(CONFIG_KEXEC)		+= relocate_kernel_$(BITS).o crash.o
 obj-$(CONFIG_CRASH_DUMP)	+= crash_dump_$(BITS).o
diff --git a/arch/x86/kernel/entry_32.S b/arch/x86/kernel/entry_32.S
index 74defe21ba42..2b1f0f081a6b 100644
--- a/arch/x86/kernel/entry_32.S
+++ b/arch/x86/kernel/entry_32.S
@@ -1188,9 +1188,9 @@ ENTRY(mcount)
 
 	cmpl $ftrace_stub, ftrace_trace_function
 	jnz trace
-#ifdef CONFIG_FUNCTION_RET_TRACER
-	cmpl $ftrace_stub, ftrace_function_return
-	jnz ftrace_return_caller
+#ifdef CONFIG_FUNCTION_GRAPH_TRACER
+	cmpl $ftrace_stub, ftrace_graph_function
+	jnz ftrace_graph_caller
 #endif
 .globl ftrace_stub
 ftrace_stub:
@@ -1215,8 +1215,8 @@ END(mcount)
 #endif /* CONFIG_DYNAMIC_FTRACE */
 #endif /* CONFIG_FUNCTION_TRACER */
 
-#ifdef CONFIG_FUNCTION_RET_TRACER
-ENTRY(ftrace_return_caller)
+#ifdef CONFIG_FUNCTION_GRAPH_TRACER
+ENTRY(ftrace_graph_caller)
 	cmpl $0, function_trace_stop
 	jne ftrace_stub
 
@@ -1230,7 +1230,7 @@ ENTRY(ftrace_return_caller)
 	popl %ecx
 	popl %eax
 	ret
-END(ftrace_return_caller)
+END(ftrace_graph_caller)
 
 .globl return_to_handler
 return_to_handler:
diff --git a/arch/x86/kernel/ftrace.c b/arch/x86/kernel/ftrace.c
index bb137f7297ed..3595a4c14aba 100644
--- a/arch/x86/kernel/ftrace.c
+++ b/arch/x86/kernel/ftrace.c
@@ -323,7 +323,7 @@ int __init ftrace_dyn_arch_init(void *data)
 }
 #endif
 
-#ifdef CONFIG_FUNCTION_RET_TRACER
+#ifdef CONFIG_FUNCTION_GRAPH_TRACER
 
 #ifndef CONFIG_DYNAMIC_FTRACE
 
@@ -389,11 +389,11 @@ static void pop_return_trace(unsigned long *ret, unsigned long long *time,
  */
 unsigned long ftrace_return_to_handler(void)
 {
-	struct ftrace_retfunc trace;
+	struct ftrace_graph_ret trace;
 	pop_return_trace(&trace.ret, &trace.calltime, &trace.func,
 			&trace.overrun);
 	trace.rettime = cpu_clock(raw_smp_processor_id());
-	ftrace_function_return(&trace);
+	ftrace_graph_function(&trace);
 
 	return trace.ret;
 }
@@ -440,12 +440,12 @@ void prepare_ftrace_return(unsigned long *parent, unsigned long self_addr)
 	);
 
 	if (WARN_ON(faulted)) {
-		unregister_ftrace_return();
+		unregister_ftrace_graph();
 		return;
 	}
 
 	if (WARN_ON(!__kernel_text_address(old))) {
-		unregister_ftrace_return();
+		unregister_ftrace_graph();
 		*parent = old;
 		return;
 	}
@@ -456,4 +456,4 @@ void prepare_ftrace_return(unsigned long *parent, unsigned long self_addr)
 		*parent = old;
 }
 
-#endif /* CONFIG_FUNCTION_RET_TRACER */
+#endif /* CONFIG_FUNCTION_GRAPH_TRACER */
diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h
index 7854d87b97b2..b4ac734ad8d6 100644
--- a/include/linux/ftrace.h
+++ b/include/linux/ftrace.h
@@ -115,8 +115,8 @@ extern int ftrace_update_ftrace_func(ftrace_func_t func);
 extern void ftrace_caller(void);
 extern void ftrace_call(void);
 extern void mcount_call(void);
-#ifdef CONFIG_FUNCTION_RET_TRACER
-extern void ftrace_return_caller(void);
+#ifdef CONFIG_FUNCTION_GRAPH_TRACER
+extern void ftrace_graph_caller(void);
 #endif
 
 /**
@@ -315,7 +315,7 @@ ftrace_init_module(struct module *mod,
 /*
  * Structure that defines a return function trace.
  */
-struct ftrace_retfunc {
+struct ftrace_graph_ret {
 	unsigned long ret; /* Return address */
 	unsigned long func; /* Current function */
 	unsigned long long calltime;
@@ -324,22 +324,22 @@ struct ftrace_retfunc {
 	unsigned long overrun;
 };
 
-#ifdef CONFIG_FUNCTION_RET_TRACER
+#ifdef CONFIG_FUNCTION_GRAPH_TRACER
 #define FTRACE_RETFUNC_DEPTH 50
 #define FTRACE_RETSTACK_ALLOC_SIZE 32
 /* Type of a callback handler of tracing return function */
-typedef void (*trace_function_return_t)(struct ftrace_retfunc *);
+typedef void (*trace_function_graph_t)(struct ftrace_graph_ret *);
 
-extern int register_ftrace_return(trace_function_return_t func);
+extern int register_ftrace_graph(trace_function_graph_t func);
 /* The current handler in use */
-extern trace_function_return_t ftrace_function_return;
-extern void unregister_ftrace_return(void);
+extern trace_function_graph_t ftrace_graph_function;
+extern void unregister_ftrace_graph(void);
 
-extern void ftrace_retfunc_init_task(struct task_struct *t);
-extern void ftrace_retfunc_exit_task(struct task_struct *t);
+extern void ftrace_graph_init_task(struct task_struct *t);
+extern void ftrace_graph_exit_task(struct task_struct *t);
 #else
-static inline void ftrace_retfunc_init_task(struct task_struct *t) { }
-static inline void ftrace_retfunc_exit_task(struct task_struct *t) { }
+static inline void ftrace_graph_init_task(struct task_struct *t) { }
+static inline void ftrace_graph_exit_task(struct task_struct *t) { }
 #endif
 
 #endif /* _LINUX_FTRACE_H */
diff --git a/include/linux/ftrace_irq.h b/include/linux/ftrace_irq.h
index 0b4df55d7a74..366a054d0b05 100644
--- a/include/linux/ftrace_irq.h
+++ b/include/linux/ftrace_irq.h
@@ -2,7 +2,7 @@
 #define _LINUX_FTRACE_IRQ_H
 
 
-#if defined(CONFIG_DYNAMIC_FTRACE) || defined(CONFIG_FUNCTION_RET_TRACER)
+#if defined(CONFIG_DYNAMIC_FTRACE) || defined(CONFIG_FUNCTION_GRAPH_TRACER)
 extern void ftrace_nmi_enter(void);
 extern void ftrace_nmi_exit(void);
 #else
diff --git a/include/linux/sched.h b/include/linux/sched.h
index d02a0ca70ee9..7ad48f2a2758 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1365,7 +1365,7 @@ struct task_struct {
 	unsigned long default_timer_slack_ns;
 
 	struct list_head	*scm_work_list;
-#ifdef CONFIG_FUNCTION_RET_TRACER
+#ifdef CONFIG_FUNCTION_GRAPH_TRACER
 	/* Index of current stored adress in ret_stack */
 	int curr_ret_stack;
 	/* Stack of return addresses for return function tracing */
diff --git a/kernel/Makefile b/kernel/Makefile
index 03a45e7e87b7..703cf3b7389c 100644
--- a/kernel/Makefile
+++ b/kernel/Makefile
@@ -21,7 +21,7 @@ CFLAGS_REMOVE_cgroup-debug.o = -pg
 CFLAGS_REMOVE_sched_clock.o = -pg
 CFLAGS_REMOVE_sched.o = -pg
 endif
-ifdef CONFIG_FUNCTION_RET_TRACER
+ifdef CONFIG_FUNCTION_GRAPH_TRACER
 CFLAGS_REMOVE_extable.o = -pg # For __kernel_text_address()
 CFLAGS_REMOVE_module.o = -pg # For __module_text_address()
 endif
diff --git a/kernel/fork.c b/kernel/fork.c
index d6e1a3205f62..5f82a999c032 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -140,7 +140,7 @@ void free_task(struct task_struct *tsk)
 	prop_local_destroy_single(&tsk->dirties);
 	free_thread_info(tsk->stack);
 	rt_mutex_debug_task_free(tsk);
-	ftrace_retfunc_exit_task(tsk);
+	ftrace_graph_exit_task(tsk);
 	free_task_struct(tsk);
 }
 EXPORT_SYMBOL(free_task);
@@ -1271,7 +1271,7 @@ static struct task_struct *copy_process(unsigned long clone_flags,
 	total_forks++;
 	spin_unlock(&current->sighand->siglock);
 	write_unlock_irq(&tasklist_lock);
-	ftrace_retfunc_init_task(p);
+	ftrace_graph_init_task(p);
 	proc_fork_connector(p);
 	cgroup_post_fork(p);
 	return p;
diff --git a/kernel/sched.c b/kernel/sched.c
index 388d9db044ab..52490bf6b884 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -5901,7 +5901,7 @@ void __cpuinit init_idle(struct task_struct *idle, int cpu)
 	 * The idle tasks have their own, simple scheduling class:
 	 */
 	idle->sched_class = &idle_sched_class;
-	ftrace_retfunc_init_task(idle);
+	ftrace_graph_init_task(idle);
 }
 
 /*
diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig
index 620feadff67a..eb9b901e0777 100644
--- a/kernel/trace/Kconfig
+++ b/kernel/trace/Kconfig
@@ -12,7 +12,7 @@ config NOP_TRACER
 config HAVE_FUNCTION_TRACER
 	bool
 
-config HAVE_FUNCTION_RET_TRACER
+config HAVE_FUNCTION_GRAPH_TRACER
 	bool
 
 config HAVE_FUNCTION_TRACE_MCOUNT_TEST
@@ -63,15 +63,18 @@ config FUNCTION_TRACER
 	  (the bootup default), then the overhead of the instructions is very
 	  small and not measurable even in micro-benchmarks.
 
-config FUNCTION_RET_TRACER
-	bool "Kernel Function return Tracer"
-	depends on HAVE_FUNCTION_RET_TRACER
+config FUNCTION_GRAPH_TRACER
+	bool "Kernel Function Graph Tracer"
+	depends on HAVE_FUNCTION_GRAPH_TRACER
 	depends on FUNCTION_TRACER
 	help
-	  Enable the kernel to trace a function at its return.
-	  It's first purpose is to trace the duration of functions.
-	  This is done by setting the current return address on the thread
-	  info structure of the current task.
+	  Enable the kernel to trace a function at both its return
+	  and its entry.
+	  It's first purpose is to trace the duration of functions and
+	  draw a call graph for each thread with some informations like
+	  the return value.
+	  This is done by setting the current return address on the current
+	  task structure into a stack of calls.
 
 config IRQSOFF_TRACER
 	bool "Interrupts-off Latency Tracer"
diff --git a/kernel/trace/Makefile b/kernel/trace/Makefile
index cef4bcb4e822..08c5fe6ddc09 100644
--- a/kernel/trace/Makefile
+++ b/kernel/trace/Makefile
@@ -29,7 +29,7 @@ obj-$(CONFIG_NOP_TRACER) += trace_nop.o
 obj-$(CONFIG_STACK_TRACER) += trace_stack.o
 obj-$(CONFIG_MMIOTRACE) += trace_mmiotrace.o
 obj-$(CONFIG_BOOT_TRACER) += trace_boot.o
-obj-$(CONFIG_FUNCTION_RET_TRACER) += trace_functions_return.o
+obj-$(CONFIG_FUNCTION_GRAPH_TRACER) += trace_functions_graph.o
 obj-$(CONFIG_TRACE_BRANCH_PROFILING) += trace_branch.o
 obj-$(CONFIG_BTS_TRACER) += trace_bts.o
 
diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
index 53042f118f23..9e19976af727 100644
--- a/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c
@@ -395,11 +395,11 @@ __ftrace_replace_code(struct dyn_ftrace *rec, int enable)
 	unsigned long ip, fl;
 	unsigned long ftrace_addr;
 
-#ifdef CONFIG_FUNCTION_RET_TRACER
+#ifdef CONFIG_FUNCTION_GRAPH_TRACER
 	if (ftrace_tracing_type == FTRACE_TYPE_ENTER)
 		ftrace_addr = (unsigned long)ftrace_caller;
 	else
-		ftrace_addr = (unsigned long)ftrace_return_caller;
+		ftrace_addr = (unsigned long)ftrace_graph_caller;
 #else
 	ftrace_addr = (unsigned long)ftrace_caller;
 #endif
@@ -1496,13 +1496,13 @@ ftrace_enable_sysctl(struct ctl_table *table, int write,
 	return ret;
 }
 
-#ifdef CONFIG_FUNCTION_RET_TRACER
+#ifdef CONFIG_FUNCTION_GRAPH_TRACER
 
 static atomic_t ftrace_retfunc_active;
 
 /* The callback that hooks the return of a function */
-trace_function_return_t ftrace_function_return =
-			(trace_function_return_t)ftrace_stub;
+trace_function_graph_t ftrace_graph_function =
+			(trace_function_graph_t)ftrace_stub;
 
 
 /* Try to assign a return stack array on FTRACE_RETSTACK_ALLOC_SIZE tasks. */
@@ -1549,7 +1549,7 @@ free:
 }
 
 /* Allocate a return stack for each task */
-static int start_return_tracing(void)
+static int start_graph_tracing(void)
 {
 	struct ftrace_ret_stack **ret_stack_list;
 	int ret;
@@ -1569,7 +1569,7 @@ static int start_return_tracing(void)
 	return ret;
 }
 
-int register_ftrace_return(trace_function_return_t func)
+int register_ftrace_graph(trace_function_graph_t func)
 {
 	int ret = 0;
 
@@ -1584,13 +1584,13 @@ int register_ftrace_return(trace_function_return_t func)
 		goto out;
 	}
 	atomic_inc(&ftrace_retfunc_active);
-	ret = start_return_tracing();
+	ret = start_graph_tracing();
 	if (ret) {
 		atomic_dec(&ftrace_retfunc_active);
 		goto out;
 	}
 	ftrace_tracing_type = FTRACE_TYPE_RETURN;
-	ftrace_function_return = func;
+	ftrace_graph_function = func;
 	ftrace_startup();
 
 out:
@@ -1598,12 +1598,12 @@ out:
 	return ret;
 }
 
-void unregister_ftrace_return(void)
+void unregister_ftrace_graph(void)
 {
 	mutex_lock(&ftrace_sysctl_lock);
 
 	atomic_dec(&ftrace_retfunc_active);
-	ftrace_function_return = (trace_function_return_t)ftrace_stub;
+	ftrace_graph_function = (trace_function_graph_t)ftrace_stub;
 	ftrace_shutdown();
 	/* Restore normal tracing type */
 	ftrace_tracing_type = FTRACE_TYPE_ENTER;
@@ -1612,7 +1612,7 @@ void unregister_ftrace_return(void)
 }
 
 /* Allocate a return stack for newly created task */
-void ftrace_retfunc_init_task(struct task_struct *t)
+void ftrace_graph_init_task(struct task_struct *t)
 {
 	if (atomic_read(&ftrace_retfunc_active)) {
 		t->ret_stack = kmalloc(FTRACE_RETFUNC_DEPTH
@@ -1626,7 +1626,7 @@ void ftrace_retfunc_init_task(struct task_struct *t)
 		t->ret_stack = NULL;
 }
 
-void ftrace_retfunc_exit_task(struct task_struct *t)
+void ftrace_graph_exit_task(struct task_struct *t)
 {
 	struct ftrace_ret_stack	*ret_stack = t->ret_stack;
 
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index 8df8fdd69c95..f21ab2c68fd4 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -878,15 +878,15 @@ trace_function(struct trace_array *tr, struct trace_array_cpu *data,
 	ring_buffer_unlock_commit(tr->buffer, event, irq_flags);
 }
 
-#ifdef CONFIG_FUNCTION_RET_TRACER
-static void __trace_function_return(struct trace_array *tr,
+#ifdef CONFIG_FUNCTION_GRAPH_TRACER
+static void __trace_function_graph(struct trace_array *tr,
 				struct trace_array_cpu *data,
-				struct ftrace_retfunc *trace,
+				struct ftrace_graph_ret *trace,
 				unsigned long flags,
 				int pc)
 {
 	struct ring_buffer_event *event;
-	struct ftrace_ret_entry *entry;
+	struct ftrace_graph_entry *entry;
 	unsigned long irq_flags;
 
 	if (unlikely(local_read(&__get_cpu_var(ftrace_cpu_disabled))))
@@ -1177,8 +1177,8 @@ function_trace_call(unsigned long ip, unsigned long parent_ip)
 	local_irq_restore(flags);
 }
 
-#ifdef CONFIG_FUNCTION_RET_TRACER
-void trace_function_return(struct ftrace_retfunc *trace)
+#ifdef CONFIG_FUNCTION_GRAPH_TRACER
+void trace_function_graph(struct ftrace_graph_ret *trace)
 {
 	struct trace_array *tr = &global_trace;
 	struct trace_array_cpu *data;
@@ -1193,12 +1193,12 @@ void trace_function_return(struct ftrace_retfunc *trace)
 	disabled = atomic_inc_return(&data->disabled);
 	if (likely(disabled == 1)) {
 		pc = preempt_count();
-		__trace_function_return(tr, data, trace, flags, pc);
+		__trace_function_graph(tr, data, trace, flags, pc);
 	}
 	atomic_dec(&data->disabled);
 	raw_local_irq_restore(flags);
 }
-#endif /* CONFIG_FUNCTION_RET_TRACER */
+#endif /* CONFIG_FUNCTION_GRAPH_TRACER */
 
 static struct ftrace_ops trace_ops __read_mostly =
 {
@@ -2001,7 +2001,7 @@ static enum print_line_t print_trace_fmt(struct trace_iterator *iter)
 		break;
 	}
 	case TRACE_FN_RET: {
-		return print_return_function(iter);
+		return print_graph_function(iter);
 		break;
 	}
 	case TRACE_BRANCH: {
diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h
index 3abd645e8af2..72b5ef868765 100644
--- a/kernel/trace/trace.h
+++ b/kernel/trace/trace.h
@@ -57,7 +57,7 @@ struct ftrace_entry {
 };
 
 /* Function return entry */
-struct ftrace_ret_entry {
+struct ftrace_graph_entry {
 	struct trace_entry	ent;
 	unsigned long		ip;
 	unsigned long		parent_ip;
@@ -264,7 +264,7 @@ extern void __ftrace_bad_type(void);
 		IF_ASSIGN(var, ent, struct trace_boot_call, TRACE_BOOT_CALL);\
 		IF_ASSIGN(var, ent, struct trace_boot_ret, TRACE_BOOT_RET);\
 		IF_ASSIGN(var, ent, struct trace_branch, TRACE_BRANCH); \
-		IF_ASSIGN(var, ent, struct ftrace_ret_entry, TRACE_FN_RET);\
+		IF_ASSIGN(var, ent, struct ftrace_graph_entry, TRACE_FN_RET);\
 		IF_ASSIGN(var, ent, struct bts_entry, TRACE_BTS);\
 		__ftrace_bad_type();					\
 	} while (0)
@@ -398,7 +398,7 @@ void trace_function(struct trace_array *tr,
 		    unsigned long parent_ip,
 		    unsigned long flags, int pc);
 void
-trace_function_return(struct ftrace_retfunc *trace);
+trace_function_graph(struct ftrace_graph_ret *trace);
 
 void trace_bts(struct trace_array *tr,
 	       unsigned long from,
@@ -489,11 +489,11 @@ extern int trace_vprintk(unsigned long ip, const char *fmt, va_list args);
 extern unsigned long trace_flags;
 
 /* Standard output formatting function used for function return traces */
-#ifdef CONFIG_FUNCTION_RET_TRACER
-extern enum print_line_t print_return_function(struct trace_iterator *iter);
+#ifdef CONFIG_FUNCTION_GRAPH_TRACER
+extern enum print_line_t print_graph_function(struct trace_iterator *iter);
 #else
 static inline enum print_line_t
-print_return_function(struct trace_iterator *iter)
+print_graph_function(struct trace_iterator *iter)
 {
 	return TRACE_TYPE_UNHANDLED;
 }
diff --git a/kernel/trace/trace_functions_graph.c b/kernel/trace/trace_functions_graph.c
new file mode 100644
index 000000000000..f5bad4624d2b
--- /dev/null
+++ b/kernel/trace/trace_functions_graph.c
@@ -0,0 +1,98 @@
+/*
+ *
+ * Function graph tracer.
+ * Copyright (c) 2008 Frederic Weisbecker <fweisbec@gmail.com>
+ * Mostly borrowed from function tracer which
+ * is Copyright (c) Steven Rostedt <srostedt@redhat.com>
+ *
+ */
+#include <linux/debugfs.h>
+#include <linux/uaccess.h>
+#include <linux/ftrace.h>
+#include <linux/fs.h>
+
+#include "trace.h"
+
+
+#define TRACE_GRAPH_PRINT_OVERRUN	0x1
+static struct tracer_opt trace_opts[] = {
+	/* Display overruns or not */
+	{ TRACER_OPT(overrun, TRACE_GRAPH_PRINT_OVERRUN) },
+	{ } /* Empty entry */
+};
+
+static struct tracer_flags tracer_flags = {
+	.val = 0, /* Don't display overruns by default */
+	.opts = trace_opts
+};
+
+
+static int graph_trace_init(struct trace_array *tr)
+{
+	int cpu;
+	for_each_online_cpu(cpu)
+		tracing_reset(tr, cpu);
+
+	return register_ftrace_graph(&trace_function_graph);
+}
+
+static void graph_trace_reset(struct trace_array *tr)
+{
+		unregister_ftrace_graph();
+}
+
+
+enum print_line_t
+print_graph_function(struct trace_iterator *iter)
+{
+	struct trace_seq *s = &iter->seq;
+	struct trace_entry *entry = iter->ent;
+	struct ftrace_graph_entry *field;
+	int ret;
+
+	if (entry->type == TRACE_FN_RET) {
+		trace_assign_type(field, entry);
+		ret = trace_seq_printf(s, "%pF -> ", (void *)field->parent_ip);
+		if (!ret)
+			return TRACE_TYPE_PARTIAL_LINE;
+
+		ret = seq_print_ip_sym(s, field->ip,
+					trace_flags & TRACE_ITER_SYM_MASK);
+		if (!ret)
+			return TRACE_TYPE_PARTIAL_LINE;
+
+		ret = trace_seq_printf(s, " (%llu ns)",
+					field->rettime - field->calltime);
+		if (!ret)
+			return TRACE_TYPE_PARTIAL_LINE;
+
+		if (tracer_flags.val & TRACE_GRAPH_PRINT_OVERRUN) {
+			ret = trace_seq_printf(s, " (Overruns: %lu)",
+						field->overrun);
+			if (!ret)
+				return TRACE_TYPE_PARTIAL_LINE;
+		}
+
+		ret = trace_seq_printf(s, "\n");
+		if (!ret)
+			return TRACE_TYPE_PARTIAL_LINE;
+
+		return TRACE_TYPE_HANDLED;
+	}
+	return TRACE_TYPE_UNHANDLED;
+}
+
+static struct tracer graph_trace __read_mostly = {
+	.name	     = "function-graph",
+	.init	     = graph_trace_init,
+	.reset	     = graph_trace_reset,
+	.print_line = print_graph_function,
+	.flags		= &tracer_flags,
+};
+
+static __init int init_graph_trace(void)
+{
+	return register_tracer(&graph_trace);
+}
+
+device_initcall(init_graph_trace);
-- 
cgit v1.2.3


From 287b6e68ca7209caec40b2f44f837c580a413bae Mon Sep 17 00:00:00 2001
From: Frederic Weisbecker <fweisbec@gmail.com>
Date: Wed, 26 Nov 2008 00:57:25 +0100
Subject: tracing/function-return-tracer: set a more human readable output

Impact: feature

This patch sets a C-like output for the function graph tracing.
For this aim, we now call two handler for each function: one on the entry
and one other on return. This way we can draw a well-ordered call stack.

The pid of the previous trace is loosely stored to be compared against
the one of the current trace to see if there were a context switch.

Without this little feature, the call tree would seem broken at
some locations.
We could use the sched_tracer to capture these sched_events but this
way of processing is much more simpler.

2 spaces have been chosen for indentation to fit the screen while deep
calls. The time of execution in nanosecs is printed just after closed
braces, it seems more easy this way to find the corresponding function.
If the time was printed as a first column, it would be not so easy to
find the corresponding function if it is called on a deep depth.

I plan to output the return value but on 32 bits CPU, the return value
can be 32 or 64, and its difficult to guess on which case we are.
I don't know what would be the better solution on X86-32: only print
eax (low-part) or even edx (high-part).

Actually it's thee same problem when a function return a 8 bits value, the
high part of eax could contain junk values...

Here is an example of trace:

sys_read() {
  fget_light() {
  } 526
  vfs_read() {
    rw_verify_area() {
      security_file_permission() {
        cap_file_permission() {
        } 519
      } 1564
    } 2640
    do_sync_read() {
      pipe_read() {
        __might_sleep() {
        } 511
        pipe_wait() {
          prepare_to_wait() {
          } 760
          deactivate_task() {
            dequeue_task() {
              dequeue_task_fair() {
                dequeue_entity() {
                  update_curr() {
                    update_min_vruntime() {
                    } 504
                  } 1587
                  clear_buddies() {
                  } 512
                  add_cfs_task_weight() {
                  } 519
                  update_min_vruntime() {
                  } 511
                } 5602
                dequeue_entity() {
                  update_curr() {
                    update_min_vruntime() {
                    } 496
                  } 1631
                  clear_buddies() {
                  } 496
                  update_min_vruntime() {
                  } 527
                } 4580
                hrtick_update() {
                  hrtick_start_fair() {
                  } 488
                } 1489
              } 13700
            } 14949
          } 16016
          msecs_to_jiffies() {
          } 496
          put_prev_task_fair() {
          } 504
          pick_next_task_fair() {
          } 489
          pick_next_task_rt() {
          } 496
          pick_next_task_fair() {
          } 489
          pick_next_task_idle() {
          } 489

------------8<---------- thread 4 ------------8<----------

finish_task_switch() {
} 1203
do_softirq() {
  __do_softirq() {
    __local_bh_disable() {
    } 669
    rcu_process_callbacks() {
      __rcu_process_callbacks() {
        cpu_quiet() {
          rcu_start_batch() {
          } 503
        } 1647
      } 3128
      __rcu_process_callbacks() {
      } 542
    } 5362
    _local_bh_enable() {
    } 587
  } 8880
} 9986
kthread_should_stop() {
} 669
deactivate_task() {
  dequeue_task() {
    dequeue_task_fair() {
      dequeue_entity() {
        update_curr() {
          calc_delta_mine() {
          } 511
          update_min_vruntime() {
          } 511
        } 2813

Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>
Acked-by: Steven Rostedt <rostedt@goodmis.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/ftrace.c              |  32 +++++++----
 include/linux/ftrace.h                |  25 ++++++--
 kernel/trace/ftrace.c                 |  30 +++++-----
 kernel/trace/trace.c                  |  67 ++++++++++++++++++----
 kernel/trace/trace.h                  |  28 +++++----
 kernel/trace/trace_functions_graph.c  | 104 ++++++++++++++++++++++++++--------
 kernel/trace/trace_functions_return.c |  98 --------------------------------
 7 files changed, 208 insertions(+), 176 deletions(-)
 delete mode 100644 kernel/trace/trace_functions_return.c

(limited to 'include')

diff --git a/arch/x86/kernel/ftrace.c b/arch/x86/kernel/ftrace.c
index 3595a4c14aba..26b2d92d48b3 100644
--- a/arch/x86/kernel/ftrace.c
+++ b/arch/x86/kernel/ftrace.c
@@ -347,7 +347,7 @@ void ftrace_nmi_exit(void)
 
 /* Add a function return address to the trace stack on thread info.*/
 static int push_return_trace(unsigned long ret, unsigned long long time,
-				unsigned long func)
+				unsigned long func, int *depth)
 {
 	int index;
 
@@ -365,21 +365,22 @@ static int push_return_trace(unsigned long ret, unsigned long long time,
 	current->ret_stack[index].ret = ret;
 	current->ret_stack[index].func = func;
 	current->ret_stack[index].calltime = time;
+	*depth = index;
 
 	return 0;
 }
 
 /* Retrieve a function return address to the trace stack on thread info.*/
-static void pop_return_trace(unsigned long *ret, unsigned long long *time,
-				unsigned long *func, unsigned long *overrun)
+static void pop_return_trace(struct ftrace_graph_ret *trace, unsigned long *ret)
 {
 	int index;
 
 	index = current->curr_ret_stack;
 	*ret = current->ret_stack[index].ret;
-	*func = current->ret_stack[index].func;
-	*time = current->ret_stack[index].calltime;
-	*overrun = atomic_read(&current->trace_overrun);
+	trace->func = current->ret_stack[index].func;
+	trace->calltime = current->ret_stack[index].calltime;
+	trace->overrun = atomic_read(&current->trace_overrun);
+	trace->depth = index;
 	current->curr_ret_stack--;
 }
 
@@ -390,12 +391,13 @@ static void pop_return_trace(unsigned long *ret, unsigned long long *time,
 unsigned long ftrace_return_to_handler(void)
 {
 	struct ftrace_graph_ret trace;
-	pop_return_trace(&trace.ret, &trace.calltime, &trace.func,
-			&trace.overrun);
+	unsigned long ret;
+
+	pop_return_trace(&trace, &ret);
 	trace.rettime = cpu_clock(raw_smp_processor_id());
-	ftrace_graph_function(&trace);
+	ftrace_graph_return(&trace);
 
-	return trace.ret;
+	return ret;
 }
 
 /*
@@ -407,6 +409,7 @@ void prepare_ftrace_return(unsigned long *parent, unsigned long self_addr)
 	unsigned long old;
 	unsigned long long calltime;
 	int faulted;
+	struct ftrace_graph_ent trace;
 	unsigned long return_hooker = (unsigned long)
 				&return_to_handler;
 
@@ -452,8 +455,15 @@ void prepare_ftrace_return(unsigned long *parent, unsigned long self_addr)
 
 	calltime = cpu_clock(raw_smp_processor_id());
 
-	if (push_return_trace(old, calltime, self_addr) == -EBUSY)
+	if (push_return_trace(old, calltime,
+				self_addr, &trace.depth) == -EBUSY) {
 		*parent = old;
+		return;
+	}
+
+	trace.func = self_addr;
+	ftrace_graph_entry(&trace);
+
 }
 
 #endif /* CONFIG_FUNCTION_GRAPH_TRACER */
diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h
index b4ac734ad8d6..fc2d54987198 100644
--- a/include/linux/ftrace.h
+++ b/include/linux/ftrace.h
@@ -312,27 +312,40 @@ ftrace_init_module(struct module *mod,
 #endif
 
 
+/*
+ * Structure that defines an entry function trace.
+ */
+struct ftrace_graph_ent {
+	unsigned long func; /* Current function */
+	int depth;
+};
+
 /*
  * Structure that defines a return function trace.
  */
 struct ftrace_graph_ret {
-	unsigned long ret; /* Return address */
 	unsigned long func; /* Current function */
 	unsigned long long calltime;
 	unsigned long long rettime;
 	/* Number of functions that overran the depth limit for current task */
 	unsigned long overrun;
+	int depth;
 };
 
 #ifdef CONFIG_FUNCTION_GRAPH_TRACER
 #define FTRACE_RETFUNC_DEPTH 50
 #define FTRACE_RETSTACK_ALLOC_SIZE 32
-/* Type of a callback handler of tracing return function */
-typedef void (*trace_function_graph_t)(struct ftrace_graph_ret *);
+/* Type of the callback handlers for tracing function graph*/
+typedef void (*trace_func_graph_ret_t)(struct ftrace_graph_ret *); /* return */
+typedef void (*trace_func_graph_ent_t)(struct ftrace_graph_ent *); /* entry */
+
+extern int register_ftrace_graph(trace_func_graph_ret_t retfunc,
+				trace_func_graph_ent_t entryfunc);
+
+/* The current handlers in use */
+extern trace_func_graph_ret_t ftrace_graph_return;
+extern trace_func_graph_ent_t ftrace_graph_entry;
 
-extern int register_ftrace_graph(trace_function_graph_t func);
-/* The current handler in use */
-extern trace_function_graph_t ftrace_graph_function;
 extern void unregister_ftrace_graph(void);
 
 extern void ftrace_graph_init_task(struct task_struct *t);
diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
index 9e19976af727..7e2d3b91692d 100644
--- a/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c
@@ -1498,12 +1498,13 @@ ftrace_enable_sysctl(struct ctl_table *table, int write,
 
 #ifdef CONFIG_FUNCTION_GRAPH_TRACER
 
-static atomic_t ftrace_retfunc_active;
-
-/* The callback that hooks the return of a function */
-trace_function_graph_t ftrace_graph_function =
-			(trace_function_graph_t)ftrace_stub;
+static atomic_t ftrace_graph_active;
 
+/* The callbacks that hook a function */
+trace_func_graph_ret_t ftrace_graph_return =
+			(trace_func_graph_ret_t)ftrace_stub;
+trace_func_graph_ent_t ftrace_graph_entry =
+			(trace_func_graph_ent_t)ftrace_stub;
 
 /* Try to assign a return stack array on FTRACE_RETSTACK_ALLOC_SIZE tasks. */
 static int alloc_retstack_tasklist(struct ftrace_ret_stack **ret_stack_list)
@@ -1569,7 +1570,8 @@ static int start_graph_tracing(void)
 	return ret;
 }
 
-int register_ftrace_graph(trace_function_graph_t func)
+int register_ftrace_graph(trace_func_graph_ret_t retfunc,
+			trace_func_graph_ent_t entryfunc)
 {
 	int ret = 0;
 
@@ -1583,14 +1585,15 @@ int register_ftrace_graph(trace_function_graph_t func)
 		ret = -EBUSY;
 		goto out;
 	}
-	atomic_inc(&ftrace_retfunc_active);
+	atomic_inc(&ftrace_graph_active);
 	ret = start_graph_tracing();
 	if (ret) {
-		atomic_dec(&ftrace_retfunc_active);
+		atomic_dec(&ftrace_graph_active);
 		goto out;
 	}
 	ftrace_tracing_type = FTRACE_TYPE_RETURN;
-	ftrace_graph_function = func;
+	ftrace_graph_return = retfunc;
+	ftrace_graph_entry = entryfunc;
 	ftrace_startup();
 
 out:
@@ -1602,8 +1605,9 @@ void unregister_ftrace_graph(void)
 {
 	mutex_lock(&ftrace_sysctl_lock);
 
-	atomic_dec(&ftrace_retfunc_active);
-	ftrace_graph_function = (trace_function_graph_t)ftrace_stub;
+	atomic_dec(&ftrace_graph_active);
+	ftrace_graph_return = (trace_func_graph_ret_t)ftrace_stub;
+	ftrace_graph_entry = (trace_func_graph_ent_t)ftrace_stub;
 	ftrace_shutdown();
 	/* Restore normal tracing type */
 	ftrace_tracing_type = FTRACE_TYPE_ENTER;
@@ -1614,7 +1618,7 @@ void unregister_ftrace_graph(void)
 /* Allocate a return stack for newly created task */
 void ftrace_graph_init_task(struct task_struct *t)
 {
-	if (atomic_read(&ftrace_retfunc_active)) {
+	if (atomic_read(&ftrace_graph_active)) {
 		t->ret_stack = kmalloc(FTRACE_RETFUNC_DEPTH
 				* sizeof(struct ftrace_ret_stack),
 				GFP_KERNEL);
@@ -1638,5 +1642,3 @@ void ftrace_graph_exit_task(struct task_struct *t)
 }
 #endif
 
-
-
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index f21ab2c68fd4..9d5f7c94f251 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -879,14 +879,38 @@ trace_function(struct trace_array *tr, struct trace_array_cpu *data,
 }
 
 #ifdef CONFIG_FUNCTION_GRAPH_TRACER
-static void __trace_function_graph(struct trace_array *tr,
+static void __trace_graph_entry(struct trace_array *tr,
+				struct trace_array_cpu *data,
+				struct ftrace_graph_ent *trace,
+				unsigned long flags,
+				int pc)
+{
+	struct ring_buffer_event *event;
+	struct ftrace_graph_ent_entry *entry;
+	unsigned long irq_flags;
+
+	if (unlikely(local_read(&__get_cpu_var(ftrace_cpu_disabled))))
+		return;
+
+	event = ring_buffer_lock_reserve(global_trace.buffer, sizeof(*entry),
+					 &irq_flags);
+	if (!event)
+		return;
+	entry	= ring_buffer_event_data(event);
+	tracing_generic_entry_update(&entry->ent, flags, pc);
+	entry->ent.type			= TRACE_GRAPH_ENT;
+	entry->graph_ent			= *trace;
+	ring_buffer_unlock_commit(global_trace.buffer, event, irq_flags);
+}
+
+static void __trace_graph_return(struct trace_array *tr,
 				struct trace_array_cpu *data,
 				struct ftrace_graph_ret *trace,
 				unsigned long flags,
 				int pc)
 {
 	struct ring_buffer_event *event;
-	struct ftrace_graph_entry *entry;
+	struct ftrace_graph_ret_entry *entry;
 	unsigned long irq_flags;
 
 	if (unlikely(local_read(&__get_cpu_var(ftrace_cpu_disabled))))
@@ -898,12 +922,8 @@ static void __trace_function_graph(struct trace_array *tr,
 		return;
 	entry	= ring_buffer_event_data(event);
 	tracing_generic_entry_update(&entry->ent, flags, pc);
-	entry->ent.type			= TRACE_FN_RET;
-	entry->ip			= trace->func;
-	entry->parent_ip	= trace->ret;
-	entry->rettime		= trace->rettime;
-	entry->calltime		= trace->calltime;
-	entry->overrun		= trace->overrun;
+	entry->ent.type			= TRACE_GRAPH_RET;
+	entry->ret				= *trace;
 	ring_buffer_unlock_commit(global_trace.buffer, event, irq_flags);
 }
 #endif
@@ -1178,7 +1198,7 @@ function_trace_call(unsigned long ip, unsigned long parent_ip)
 }
 
 #ifdef CONFIG_FUNCTION_GRAPH_TRACER
-void trace_function_graph(struct ftrace_graph_ret *trace)
+void trace_graph_entry(struct ftrace_graph_ent *trace)
 {
 	struct trace_array *tr = &global_trace;
 	struct trace_array_cpu *data;
@@ -1193,7 +1213,28 @@ void trace_function_graph(struct ftrace_graph_ret *trace)
 	disabled = atomic_inc_return(&data->disabled);
 	if (likely(disabled == 1)) {
 		pc = preempt_count();
-		__trace_function_graph(tr, data, trace, flags, pc);
+		__trace_graph_entry(tr, data, trace, flags, pc);
+	}
+	atomic_dec(&data->disabled);
+	raw_local_irq_restore(flags);
+}
+
+void trace_graph_return(struct ftrace_graph_ret *trace)
+{
+	struct trace_array *tr = &global_trace;
+	struct trace_array_cpu *data;
+	unsigned long flags;
+	long disabled;
+	int cpu;
+	int pc;
+
+	raw_local_irq_save(flags);
+	cpu = raw_smp_processor_id();
+	data = tr->data[cpu];
+	disabled = atomic_inc_return(&data->disabled);
+	if (likely(disabled == 1)) {
+		pc = preempt_count();
+		__trace_graph_return(tr, data, trace, flags, pc);
 	}
 	atomic_dec(&data->disabled);
 	raw_local_irq_restore(flags);
@@ -2000,9 +2041,11 @@ static enum print_line_t print_trace_fmt(struct trace_iterator *iter)
 			trace_seq_print_cont(s, iter);
 		break;
 	}
-	case TRACE_FN_RET: {
+	case TRACE_GRAPH_RET: {
+		return print_graph_function(iter);
+	}
+	case TRACE_GRAPH_ENT: {
 		return print_graph_function(iter);
-		break;
 	}
 	case TRACE_BRANCH: {
 		struct trace_branch *field;
diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h
index 72b5ef868765..ffe1bb1eb620 100644
--- a/kernel/trace/trace.h
+++ b/kernel/trace/trace.h
@@ -25,7 +25,8 @@ enum trace_type {
 	TRACE_BRANCH,
 	TRACE_BOOT_CALL,
 	TRACE_BOOT_RET,
-	TRACE_FN_RET,
+	TRACE_GRAPH_RET,
+	TRACE_GRAPH_ENT,
 	TRACE_USER_STACK,
 	TRACE_BTS,
 
@@ -56,14 +57,16 @@ struct ftrace_entry {
 	unsigned long		parent_ip;
 };
 
+/* Function call entry */
+struct ftrace_graph_ent_entry {
+	struct trace_entry			ent;
+	struct ftrace_graph_ent		graph_ent;
+};
+
 /* Function return entry */
-struct ftrace_graph_entry {
-	struct trace_entry	ent;
-	unsigned long		ip;
-	unsigned long		parent_ip;
-	unsigned long long	calltime;
-	unsigned long long	rettime;
-	unsigned long		overrun;
+struct ftrace_graph_ret_entry {
+	struct trace_entry			ent;
+	struct ftrace_graph_ret		ret;
 };
 extern struct tracer boot_tracer;
 
@@ -264,7 +267,10 @@ extern void __ftrace_bad_type(void);
 		IF_ASSIGN(var, ent, struct trace_boot_call, TRACE_BOOT_CALL);\
 		IF_ASSIGN(var, ent, struct trace_boot_ret, TRACE_BOOT_RET);\
 		IF_ASSIGN(var, ent, struct trace_branch, TRACE_BRANCH); \
-		IF_ASSIGN(var, ent, struct ftrace_graph_entry, TRACE_FN_RET);\
+		IF_ASSIGN(var, ent, struct ftrace_graph_ent_entry,	\
+			  TRACE_GRAPH_ENT);		\
+		IF_ASSIGN(var, ent, struct ftrace_graph_ret_entry,	\
+			  TRACE_GRAPH_RET);		\
 		IF_ASSIGN(var, ent, struct bts_entry, TRACE_BTS);\
 		__ftrace_bad_type();					\
 	} while (0)
@@ -397,9 +403,9 @@ void trace_function(struct trace_array *tr,
 		    unsigned long ip,
 		    unsigned long parent_ip,
 		    unsigned long flags, int pc);
-void
-trace_function_graph(struct ftrace_graph_ret *trace);
 
+void trace_graph_return(struct ftrace_graph_ret *trace);
+void trace_graph_entry(struct ftrace_graph_ent *trace);
 void trace_bts(struct trace_array *tr,
 	       unsigned long from,
 	       unsigned long to);
diff --git a/kernel/trace/trace_functions_graph.c b/kernel/trace/trace_functions_graph.c
index f5bad4624d2b..b6f0cc2a00cb 100644
--- a/kernel/trace/trace_functions_graph.c
+++ b/kernel/trace/trace_functions_graph.c
@@ -13,6 +13,7 @@
 
 #include "trace.h"
 
+#define TRACE_GRAPH_INDENT	2
 
 #define TRACE_GRAPH_PRINT_OVERRUN	0x1
 static struct tracer_opt trace_opts[] = {
@@ -26,6 +27,8 @@ static struct tracer_flags tracer_flags = {
 	.opts = trace_opts
 };
 
+/* pid on the last trace processed */
+static pid_t last_pid = -1;
 
 static int graph_trace_init(struct trace_array *tr)
 {
@@ -33,7 +36,8 @@ static int graph_trace_init(struct trace_array *tr)
 	for_each_online_cpu(cpu)
 		tracing_reset(tr, cpu);
 
-	return register_ftrace_graph(&trace_function_graph);
+	return register_ftrace_graph(&trace_graph_return,
+					&trace_graph_entry);
 }
 
 static void graph_trace_reset(struct trace_array *tr)
@@ -41,45 +45,97 @@ static void graph_trace_reset(struct trace_array *tr)
 		unregister_ftrace_graph();
 }
 
+/* If the pid changed since the last trace, output this event */
+static int verif_pid(struct trace_seq *s, pid_t pid)
+{
+	if (last_pid != -1 && last_pid == pid)
+		return 1;
 
-enum print_line_t
-print_graph_function(struct trace_iterator *iter)
+	last_pid = pid;
+	return trace_seq_printf(s, "\n------------8<---------- thread %d"
+				    " ------------8<----------\n\n",
+				  pid);
+}
+
+static enum print_line_t
+print_graph_entry(struct ftrace_graph_ent *call, struct trace_seq *s,
+		struct trace_entry *ent)
 {
-	struct trace_seq *s = &iter->seq;
-	struct trace_entry *entry = iter->ent;
-	struct ftrace_graph_entry *field;
+	int i;
 	int ret;
 
-	if (entry->type == TRACE_FN_RET) {
-		trace_assign_type(field, entry);
-		ret = trace_seq_printf(s, "%pF -> ", (void *)field->parent_ip);
-		if (!ret)
-			return TRACE_TYPE_PARTIAL_LINE;
+	if (!verif_pid(s, ent->pid))
+		return TRACE_TYPE_PARTIAL_LINE;
 
-		ret = seq_print_ip_sym(s, field->ip,
-					trace_flags & TRACE_ITER_SYM_MASK);
+	for (i = 0; i < call->depth * TRACE_GRAPH_INDENT; i++) {
+		ret = trace_seq_printf(s, " ");
 		if (!ret)
 			return TRACE_TYPE_PARTIAL_LINE;
+	}
+
+	ret = seq_print_ip_sym(s, call->func, 0);
+	if (!ret)
+		return TRACE_TYPE_PARTIAL_LINE;
+
+	ret = trace_seq_printf(s, "() {\n");
+	if (!ret)
+		return TRACE_TYPE_PARTIAL_LINE;
+	return TRACE_TYPE_HANDLED;
+}
+
+static enum print_line_t
+print_graph_return(struct ftrace_graph_ret *trace, struct trace_seq *s,
+		   struct trace_entry *ent)
+{
+	int i;
+	int ret;
+
+	if (!verif_pid(s, ent->pid))
+		return TRACE_TYPE_PARTIAL_LINE;
 
-		ret = trace_seq_printf(s, " (%llu ns)",
-					field->rettime - field->calltime);
+	for (i = 0; i < trace->depth * TRACE_GRAPH_INDENT; i++) {
+		ret = trace_seq_printf(s, " ");
 		if (!ret)
 			return TRACE_TYPE_PARTIAL_LINE;
+	}
+
+	ret = trace_seq_printf(s, "} ");
+	if (!ret)
+		return TRACE_TYPE_PARTIAL_LINE;
 
-		if (tracer_flags.val & TRACE_GRAPH_PRINT_OVERRUN) {
-			ret = trace_seq_printf(s, " (Overruns: %lu)",
-						field->overrun);
-			if (!ret)
-				return TRACE_TYPE_PARTIAL_LINE;
-		}
+	ret = trace_seq_printf(s, "%llu\n", trace->rettime - trace->calltime);
+	if (!ret)
+		return TRACE_TYPE_PARTIAL_LINE;
 
-		ret = trace_seq_printf(s, "\n");
+	if (tracer_flags.val & TRACE_GRAPH_PRINT_OVERRUN) {
+		ret = trace_seq_printf(s, " (Overruns: %lu)\n",
+					trace->overrun);
 		if (!ret)
 			return TRACE_TYPE_PARTIAL_LINE;
+	}
+	return TRACE_TYPE_HANDLED;
+}
+
+enum print_line_t
+print_graph_function(struct trace_iterator *iter)
+{
+	struct trace_seq *s = &iter->seq;
+	struct trace_entry *entry = iter->ent;
 
-		return TRACE_TYPE_HANDLED;
+	switch (entry->type) {
+	case TRACE_GRAPH_ENT: {
+		struct ftrace_graph_ent_entry *field;
+		trace_assign_type(field, entry);
+		return print_graph_entry(&field->graph_ent, s, entry);
+	}
+	case TRACE_GRAPH_RET: {
+		struct ftrace_graph_ret_entry *field;
+		trace_assign_type(field, entry);
+		return print_graph_return(&field->ret, s, entry);
+	}
+	default:
+		return TRACE_TYPE_UNHANDLED;
 	}
-	return TRACE_TYPE_UNHANDLED;
 }
 
 static struct tracer graph_trace __read_mostly = {
diff --git a/kernel/trace/trace_functions_return.c b/kernel/trace/trace_functions_return.c
deleted file mode 100644
index e00d64509c9c..000000000000
--- a/kernel/trace/trace_functions_return.c
+++ /dev/null
@@ -1,98 +0,0 @@
-/*
- *
- * Function return tracer.
- * Copyright (c) 2008 Frederic Weisbecker <fweisbec@gmail.com>
- * Mostly borrowed from function tracer which
- * is Copyright (c) Steven Rostedt <srostedt@redhat.com>
- *
- */
-#include <linux/debugfs.h>
-#include <linux/uaccess.h>
-#include <linux/ftrace.h>
-#include <linux/fs.h>
-
-#include "trace.h"
-
-
-#define TRACE_RETURN_PRINT_OVERRUN	0x1
-static struct tracer_opt trace_opts[] = {
-	/* Display overruns or not */
-	{ TRACER_OPT(overrun, TRACE_RETURN_PRINT_OVERRUN) },
-	{ } /* Empty entry */
-};
-
-static struct tracer_flags tracer_flags = {
-	.val = 0, /* Don't display overruns by default */
-	.opts = trace_opts
-};
-
-
-static int return_trace_init(struct trace_array *tr)
-{
-	int cpu;
-	for_each_online_cpu(cpu)
-		tracing_reset(tr, cpu);
-
-	return register_ftrace_return(&trace_function_return);
-}
-
-static void return_trace_reset(struct trace_array *tr)
-{
-		unregister_ftrace_return();
-}
-
-
-enum print_line_t
-print_return_function(struct trace_iterator *iter)
-{
-	struct trace_seq *s = &iter->seq;
-	struct trace_entry *entry = iter->ent;
-	struct ftrace_ret_entry *field;
-	int ret;
-
-	if (entry->type == TRACE_FN_RET) {
-		trace_assign_type(field, entry);
-		ret = trace_seq_printf(s, "%pF -> ", (void *)field->parent_ip);
-		if (!ret)
-			return TRACE_TYPE_PARTIAL_LINE;
-
-		ret = seq_print_ip_sym(s, field->ip,
-					trace_flags & TRACE_ITER_SYM_MASK);
-		if (!ret)
-			return TRACE_TYPE_PARTIAL_LINE;
-
-		ret = trace_seq_printf(s, " (%llu ns)",
-					field->rettime - field->calltime);
-		if (!ret)
-			return TRACE_TYPE_PARTIAL_LINE;
-
-		if (tracer_flags.val & TRACE_RETURN_PRINT_OVERRUN) {
-			ret = trace_seq_printf(s, " (Overruns: %lu)",
-						field->overrun);
-			if (!ret)
-				return TRACE_TYPE_PARTIAL_LINE;
-		}
-
-		ret = trace_seq_printf(s, "\n");
-		if (!ret)
-			return TRACE_TYPE_PARTIAL_LINE;
-
-		return TRACE_TYPE_HANDLED;
-	}
-	return TRACE_TYPE_UNHANDLED;
-}
-
-static struct tracer return_trace __read_mostly = {
-	.name	     = "return",
-	.init	     = return_trace_init,
-	.reset	     = return_trace_reset,
-	.print_line = print_return_function,
-	.flags		= &tracer_flags,
-};
-
-static __init int init_return_trace(void)
-{
-	return register_tracer(&return_trace);
-}
-
-device_initcall(init_return_trace);
-- 
cgit v1.2.3


From d62ddc21b674b5ac1466091ff3fbf7baa53bc92c Mon Sep 17 00:00:00 2001
From: Alexey Dobriyan <adobriyan@gmail.com>
Date: Tue, 25 Nov 2008 17:14:31 -0800
Subject: netns xfrm: add netns boilerplate

Signed-off-by: Alexey Dobriyan <adobriyan@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/net_namespace.h |  4 ++++
 include/net/netns/xfrm.h    |  7 +++++++
 include/net/xfrm.h          |  3 ++-
 net/xfrm/xfrm_policy.c      | 45 ++++++++++++++++++++++++++++++++++++++++-----
 net/xfrm/xfrm_state.c       |  7 ++++++-
 5 files changed, 59 insertions(+), 7 deletions(-)
 create mode 100644 include/net/netns/xfrm.h

(limited to 'include')

diff --git a/include/net/net_namespace.h b/include/net/net_namespace.h
index 319557789a40..6fc13d905c5f 100644
--- a/include/net/net_namespace.h
+++ b/include/net/net_namespace.h
@@ -19,6 +19,7 @@
 #if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE)
 #include <net/netns/conntrack.h>
 #endif
+#include <net/netns/xfrm.h>
 
 struct proc_dir_entry;
 struct net_device;
@@ -73,6 +74,9 @@ struct net {
 #if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE)
 	struct netns_ct		ct;
 #endif
+#endif
+#ifdef CONFIG_XFRM
+	struct netns_xfrm	xfrm;
 #endif
 	struct net_generic	*gen;
 };
diff --git a/include/net/netns/xfrm.h b/include/net/netns/xfrm.h
new file mode 100644
index 000000000000..1cb0024a3b47
--- /dev/null
+++ b/include/net/netns/xfrm.h
@@ -0,0 +1,7 @@
+#ifndef __NETNS_XFRM_H
+#define __NETNS_XFRM_H
+
+struct netns_xfrm {
+};
+
+#endif
diff --git a/include/net/xfrm.h b/include/net/xfrm.h
index 45e11b3631e4..9107d6f5c297 100644
--- a/include/net/xfrm.h
+++ b/include/net/xfrm.h
@@ -1269,7 +1269,8 @@ struct xfrm6_tunnel {
 
 extern void xfrm_init(void);
 extern void xfrm4_init(void);
-extern void xfrm_state_init(void);
+extern int xfrm_state_init(struct net *net);
+extern void xfrm_state_fini(struct net *net);
 extern void xfrm4_state_init(void);
 #ifdef CONFIG_XFRM
 extern int xfrm6_init(void);
diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c
index ea3456daa9cb..8e7671b9e76e 100644
--- a/net/xfrm/xfrm_policy.c
+++ b/net/xfrm/xfrm_policy.c
@@ -2394,12 +2394,13 @@ static int __init xfrm_statistics_init(void)
 }
 #endif
 
-static void __init xfrm_policy_init(void)
+static int __net_init xfrm_policy_init(struct net *net)
 {
 	unsigned int hmask, sz;
 	int dir;
 
-	xfrm_dst_cache = kmem_cache_create("xfrm_dst_cache",
+	if (net_eq(net, &init_net))
+		xfrm_dst_cache = kmem_cache_create("xfrm_dst_cache",
 					   sizeof(struct xfrm_dst),
 					   0, SLAB_HWCACHE_ALIGN|SLAB_PANIC,
 					   NULL);
@@ -2425,16 +2426,50 @@ static void __init xfrm_policy_init(void)
 	}
 
 	INIT_LIST_HEAD(&xfrm_policy_all);
-	register_netdevice_notifier(&xfrm_dev_notifier);
+	if (net_eq(net, &init_net))
+		register_netdevice_notifier(&xfrm_dev_notifier);
+	return 0;
+}
+
+static void xfrm_policy_fini(struct net *net)
+{
 }
 
+static int __net_init xfrm_net_init(struct net *net)
+{
+	int rv;
+
+	rv = xfrm_state_init(net);
+	if (rv < 0)
+		goto out_state;
+	rv = xfrm_policy_init(net);
+	if (rv < 0)
+		goto out_policy;
+	return 0;
+
+out_policy:
+	xfrm_state_fini(net);
+out_state:
+	return rv;
+}
+
+static void __net_exit xfrm_net_exit(struct net *net)
+{
+	xfrm_policy_fini(net);
+	xfrm_state_fini(net);
+}
+
+static struct pernet_operations __net_initdata xfrm_net_ops = {
+	.init = xfrm_net_init,
+	.exit = xfrm_net_exit,
+};
+
 void __init xfrm_init(void)
 {
+	register_pernet_subsys(&xfrm_net_ops);
 #ifdef CONFIG_XFRM_STATISTICS
 	xfrm_statistics_init();
 #endif
-	xfrm_state_init();
-	xfrm_policy_init();
 	xfrm_input_init();
 #ifdef CONFIG_XFRM_STATISTICS
 	xfrm_proc_init();
diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c
index cd9d9171ded7..268fe3f9e498 100644
--- a/net/xfrm/xfrm_state.c
+++ b/net/xfrm/xfrm_state.c
@@ -2080,7 +2080,7 @@ error:
 
 EXPORT_SYMBOL(xfrm_init_state);
 
-void __init xfrm_state_init(void)
+int __net_init xfrm_state_init(struct net *net)
 {
 	unsigned int sz;
 
@@ -2094,6 +2094,11 @@ void __init xfrm_state_init(void)
 	xfrm_state_hmask = ((sz / sizeof(struct hlist_head)) - 1);
 
 	INIT_WORK(&xfrm_state_gc_work, xfrm_state_gc_task);
+	return 0;
+}
+
+void xfrm_state_fini(struct net *net)
+{
 }
 
 #ifdef CONFIG_AUDITSYSCALL
-- 
cgit v1.2.3


From 673c09be457bb23aa0eaaa79804cbb342210d195 Mon Sep 17 00:00:00 2001
From: Alexey Dobriyan <adobriyan@gmail.com>
Date: Tue, 25 Nov 2008 17:15:16 -0800
Subject: netns xfrm: add struct xfrm_state::xs_net

To avoid unnecessary complications with passing netns around.

* set once, very early after allocating
* once set, never changes

For a while create every xfrm_state in init_net.

Signed-off-by: Alexey Dobriyan <adobriyan@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/xfrm.h    | 10 +++++++++-
 net/ipv4/ipcomp.c     |  2 +-
 net/ipv6/ipcomp6.c    |  2 +-
 net/key/af_key.c      |  2 +-
 net/xfrm/xfrm_state.c |  9 +++++----
 net/xfrm/xfrm_user.c  |  4 ++--
 6 files changed, 19 insertions(+), 10 deletions(-)

(limited to 'include')

diff --git a/include/net/xfrm.h b/include/net/xfrm.h
index 9107d6f5c297..9da89039832c 100644
--- a/include/net/xfrm.h
+++ b/include/net/xfrm.h
@@ -130,6 +130,9 @@ struct xfrm_state_walk {
 /* Full description of state of transformer. */
 struct xfrm_state
 {
+#ifdef CONFIG_NET_NS
+	struct net		*xs_net;
+#endif
 	union {
 		struct hlist_node	gclist;
 		struct hlist_node	bydst;
@@ -223,6 +226,11 @@ struct xfrm_state
 	void			*data;
 };
 
+static inline struct net *xs_net(struct xfrm_state *x)
+{
+	return read_pnet(&x->xs_net);
+}
+
 /* xflags - make enum if more show up */
 #define XFRM_TIME_DEFER	1
 
@@ -1296,7 +1304,7 @@ extern void xfrm_state_walk_init(struct xfrm_state_walk *walk, u8 proto);
 extern int xfrm_state_walk(struct xfrm_state_walk *walk,
 			   int (*func)(struct xfrm_state *, int, void*), void *);
 extern void xfrm_state_walk_done(struct xfrm_state_walk *walk);
-extern struct xfrm_state *xfrm_state_alloc(void);
+extern struct xfrm_state *xfrm_state_alloc(struct net *net);
 extern struct xfrm_state *xfrm_state_find(xfrm_address_t *daddr, xfrm_address_t *saddr, 
 					  struct flowi *fl, struct xfrm_tmpl *tmpl,
 					  struct xfrm_policy *pol, int *err,
diff --git a/net/ipv4/ipcomp.c b/net/ipv4/ipcomp.c
index ec8264ae45c2..0a35f1b6f22c 100644
--- a/net/ipv4/ipcomp.c
+++ b/net/ipv4/ipcomp.c
@@ -49,7 +49,7 @@ static struct xfrm_state *ipcomp_tunnel_create(struct xfrm_state *x)
 {
 	struct xfrm_state *t;
 
-	t = xfrm_state_alloc();
+	t = xfrm_state_alloc(&init_net);
 	if (t == NULL)
 		goto out;
 
diff --git a/net/ipv6/ipcomp6.c b/net/ipv6/ipcomp6.c
index d4576a9c154f..c369638e208a 100644
--- a/net/ipv6/ipcomp6.c
+++ b/net/ipv6/ipcomp6.c
@@ -76,7 +76,7 @@ static struct xfrm_state *ipcomp6_tunnel_create(struct xfrm_state *x)
 {
 	struct xfrm_state *t = NULL;
 
-	t = xfrm_state_alloc();
+	t = xfrm_state_alloc(&init_net);
 	if (!t)
 		goto out;
 
diff --git a/net/key/af_key.c b/net/key/af_key.c
index 5b22e011653b..bde8aad4cc93 100644
--- a/net/key/af_key.c
+++ b/net/key/af_key.c
@@ -1122,7 +1122,7 @@ static struct xfrm_state * pfkey_msg2xfrm_state(struct sadb_msg *hdr,
 	     (key->sadb_key_bits+7) / 8 > key->sadb_key_len * sizeof(uint64_t)))
 		return ERR_PTR(-EINVAL);
 
-	x = xfrm_state_alloc();
+	x = xfrm_state_alloc(&init_net);
 	if (x == NULL)
 		return ERR_PTR(-ENOBUFS);
 
diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c
index 268fe3f9e498..81bde76d049c 100644
--- a/net/xfrm/xfrm_state.c
+++ b/net/xfrm/xfrm_state.c
@@ -504,13 +504,14 @@ out:
 
 static void xfrm_replay_timer_handler(unsigned long data);
 
-struct xfrm_state *xfrm_state_alloc(void)
+struct xfrm_state *xfrm_state_alloc(struct net *net)
 {
 	struct xfrm_state *x;
 
 	x = kzalloc(sizeof(struct xfrm_state), GFP_ATOMIC);
 
 	if (x) {
+		write_pnet(&x->xs_net, net);
 		atomic_set(&x->refcnt, 1);
 		atomic_set(&x->tunnel_users, 0);
 		INIT_LIST_HEAD(&x->km.all);
@@ -835,7 +836,7 @@ xfrm_state_find(xfrm_address_t *daddr, xfrm_address_t *saddr,
 			error = -EEXIST;
 			goto out;
 		}
-		x = xfrm_state_alloc();
+		x = xfrm_state_alloc(&init_net);
 		if (x == NULL) {
 			error = -ENOMEM;
 			goto out;
@@ -1017,7 +1018,7 @@ static struct xfrm_state *__find_acq_core(unsigned short family, u8 mode, u32 re
 	if (!create)
 		return NULL;
 
-	x = xfrm_state_alloc();
+	x = xfrm_state_alloc(&init_net);
 	if (likely(x)) {
 		switch (family) {
 		case AF_INET:
@@ -1125,7 +1126,7 @@ EXPORT_SYMBOL(xfrm_state_add);
 static struct xfrm_state *xfrm_state_clone(struct xfrm_state *orig, int *errp)
 {
 	int err = -ENOMEM;
-	struct xfrm_state *x = xfrm_state_alloc();
+	struct xfrm_state *x = xfrm_state_alloc(&init_net);
 	if (!x)
 		goto error;
 
diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c
index ee15d5dd6544..65cdaa5c2280 100644
--- a/net/xfrm/xfrm_user.c
+++ b/net/xfrm/xfrm_user.c
@@ -320,7 +320,7 @@ static struct xfrm_state *xfrm_state_construct(struct xfrm_usersa_info *p,
 					       struct nlattr **attrs,
 					       int *errp)
 {
-	struct xfrm_state *x = xfrm_state_alloc();
+	struct xfrm_state *x = xfrm_state_alloc(&init_net);
 	int err = -ENOMEM;
 
 	if (!x)
@@ -1663,7 +1663,7 @@ static int xfrm_add_acquire(struct sk_buff *skb, struct nlmsghdr *nlh,
 	struct nlattr *rt = attrs[XFRMA_TMPL];
 
 	struct xfrm_user_acquire *ua = nlmsg_data(nlh);
-	struct xfrm_state *x = xfrm_state_alloc();
+	struct xfrm_state *x = xfrm_state_alloc(&init_net);
 	int err = -ENOMEM;
 
 	if (!x)
-- 
cgit v1.2.3


From 9d4139c76905833afcb77fe8ccc17f302a0eb9ab Mon Sep 17 00:00:00 2001
From: Alexey Dobriyan <adobriyan@gmail.com>
Date: Tue, 25 Nov 2008 17:16:11 -0800
Subject: netns xfrm: per-netns xfrm_state_all list

This is done to get
a) simple "something leaked" check
b) cover possible DoSes when other netns puts many, many xfrm_states
   onto a list.
c) not miss "alien xfrm_state" check in some of list iterators in future.

Signed-off-by: Alexey Dobriyan <adobriyan@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/netns/xfrm.h |  3 +++
 net/xfrm/xfrm_state.c    | 14 ++++++++------
 2 files changed, 11 insertions(+), 6 deletions(-)

(limited to 'include')

diff --git a/include/net/netns/xfrm.h b/include/net/netns/xfrm.h
index 1cb0024a3b47..6ae234a16517 100644
--- a/include/net/netns/xfrm.h
+++ b/include/net/netns/xfrm.h
@@ -1,7 +1,10 @@
 #ifndef __NETNS_XFRM_H
 #define __NETNS_XFRM_H
 
+#include <linux/list.h>
+
 struct netns_xfrm {
+	struct list_head	state_all;
 };
 
 #endif
diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c
index 81bde76d049c..85bb85484b70 100644
--- a/net/xfrm/xfrm_state.c
+++ b/net/xfrm/xfrm_state.c
@@ -50,7 +50,6 @@ static DEFINE_SPINLOCK(xfrm_state_lock);
  * Main use is finding SA after policy selected tunnel or transport mode.
  * Also, it can be used by ah/esp icmp error handler to find offending SA.
  */
-static LIST_HEAD(xfrm_state_all);
 static struct hlist_head *xfrm_state_bydst __read_mostly;
 static struct hlist_head *xfrm_state_bysrc __read_mostly;
 static struct hlist_head *xfrm_state_byspi __read_mostly;
@@ -855,7 +854,7 @@ xfrm_state_find(xfrm_address_t *daddr, xfrm_address_t *saddr,
 
 		if (km_query(x, tmpl, pol) == 0) {
 			x->km.state = XFRM_STATE_ACQ;
-			list_add(&x->km.all, &xfrm_state_all);
+			list_add(&x->km.all, &init_net.xfrm.state_all);
 			hlist_add_head(&x->bydst, xfrm_state_bydst+h);
 			h = xfrm_src_hash(daddr, saddr, family);
 			hlist_add_head(&x->bysrc, xfrm_state_bysrc+h);
@@ -924,7 +923,7 @@ static void __xfrm_state_insert(struct xfrm_state *x)
 
 	x->genid = ++xfrm_state_genid;
 
-	list_add(&x->km.all, &xfrm_state_all);
+	list_add(&x->km.all, &init_net.xfrm.state_all);
 
 	h = xfrm_dst_hash(&x->id.daddr, &x->props.saddr,
 			  x->props.reqid, x->props.family);
@@ -1053,7 +1052,7 @@ static struct xfrm_state *__find_acq_core(unsigned short family, u8 mode, u32 re
 		xfrm_state_hold(x);
 		x->timer.expires = jiffies + sysctl_xfrm_acq_expires*HZ;
 		add_timer(&x->timer);
-		list_add(&x->km.all, &xfrm_state_all);
+		list_add(&x->km.all, &init_net.xfrm.state_all);
 		hlist_add_head(&x->bydst, xfrm_state_bydst+h);
 		h = xfrm_src_hash(daddr, saddr, family);
 		hlist_add_head(&x->bysrc, xfrm_state_bysrc+h);
@@ -1559,10 +1558,10 @@ int xfrm_state_walk(struct xfrm_state_walk *walk,
 
 	spin_lock_bh(&xfrm_state_lock);
 	if (list_empty(&walk->all))
-		x = list_first_entry(&xfrm_state_all, struct xfrm_state_walk, all);
+		x = list_first_entry(&init_net.xfrm.state_all, struct xfrm_state_walk, all);
 	else
 		x = list_entry(&walk->all, struct xfrm_state_walk, all);
-	list_for_each_entry_from(x, &xfrm_state_all, all) {
+	list_for_each_entry_from(x, &init_net.xfrm.state_all, all) {
 		if (x->state == XFRM_STATE_DEAD)
 			continue;
 		state = container_of(x, struct xfrm_state, km);
@@ -2085,6 +2084,8 @@ int __net_init xfrm_state_init(struct net *net)
 {
 	unsigned int sz;
 
+	INIT_LIST_HEAD(&net->xfrm.state_all);
+
 	sz = sizeof(struct hlist_head) * 8;
 
 	xfrm_state_bydst = xfrm_hash_alloc(sz);
@@ -2100,6 +2101,7 @@ int __net_init xfrm_state_init(struct net *net)
 
 void xfrm_state_fini(struct net *net)
 {
+	WARN_ON(!list_empty(&net->xfrm.state_all));
 }
 
 #ifdef CONFIG_AUDITSYSCALL
-- 
cgit v1.2.3


From 73d189dce486cd6693fa29169b1aac0872efbcea Mon Sep 17 00:00:00 2001
From: Alexey Dobriyan <adobriyan@gmail.com>
Date: Tue, 25 Nov 2008 17:16:58 -0800
Subject: netns xfrm: per-netns xfrm_state_bydst hash

Signed-off-by: Alexey Dobriyan <adobriyan@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/netns/xfrm.h |  9 +++++++++
 net/xfrm/xfrm_state.c    | 50 +++++++++++++++++++++++++-----------------------
 2 files changed, 35 insertions(+), 24 deletions(-)

(limited to 'include')

diff --git a/include/net/netns/xfrm.h b/include/net/netns/xfrm.h
index 6ae234a16517..02487b39ce53 100644
--- a/include/net/netns/xfrm.h
+++ b/include/net/netns/xfrm.h
@@ -5,6 +5,15 @@
 
 struct netns_xfrm {
 	struct list_head	state_all;
+	/*
+	 * Hash table to find appropriate SA towards given target (endpoint of
+	 * tunnel or destination of transport mode) allowed by selector.
+	 *
+	 * Main use is finding SA after policy selected tunnel or transport
+	 * mode. Also, it can be used by ah/esp icmp error handler to find
+	 * offending SA.
+	 */
+	struct hlist_head	*state_bydst;
 };
 
 #endif
diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c
index 85bb85484b70..08b78895ffbc 100644
--- a/net/xfrm/xfrm_state.c
+++ b/net/xfrm/xfrm_state.c
@@ -44,13 +44,6 @@ u32 sysctl_xfrm_acq_expires __read_mostly = 30;
 
 static DEFINE_SPINLOCK(xfrm_state_lock);
 
-/* Hash table to find appropriate SA towards given target (endpoint
- * of tunnel or destination of transport mode) allowed by selector.
- *
- * Main use is finding SA after policy selected tunnel or transport mode.
- * Also, it can be used by ah/esp icmp error handler to find offending SA.
- */
-static struct hlist_head *xfrm_state_bydst __read_mostly;
 static struct hlist_head *xfrm_state_bysrc __read_mostly;
 static struct hlist_head *xfrm_state_byspi __read_mostly;
 static unsigned int xfrm_state_hmask __read_mostly;
@@ -157,15 +150,15 @@ static void xfrm_hash_resize(struct work_struct *__unused)
 
 	nhashmask = (nsize / sizeof(struct hlist_head)) - 1U;
 	for (i = xfrm_state_hmask; i >= 0; i--)
-		xfrm_hash_transfer(xfrm_state_bydst+i, ndst, nsrc, nspi,
+		xfrm_hash_transfer(init_net.xfrm.state_bydst+i, ndst, nsrc, nspi,
 				   nhashmask);
 
-	odst = xfrm_state_bydst;
+	odst = init_net.xfrm.state_bydst;
 	osrc = xfrm_state_bysrc;
 	ospi = xfrm_state_byspi;
 	ohashmask = xfrm_state_hmask;
 
-	xfrm_state_bydst = ndst;
+	init_net.xfrm.state_bydst = ndst;
 	xfrm_state_bysrc = nsrc;
 	xfrm_state_byspi = nspi;
 	xfrm_state_hmask = nhashmask;
@@ -595,7 +588,7 @@ xfrm_state_flush_secctx_check(u8 proto, struct xfrm_audit *audit_info)
 		struct hlist_node *entry;
 		struct xfrm_state *x;
 
-		hlist_for_each_entry(x, entry, xfrm_state_bydst+i, bydst) {
+		hlist_for_each_entry(x, entry, init_net.xfrm.state_bydst+i, bydst) {
 			if (xfrm_id_proto_match(x->id.proto, proto) &&
 			   (err = security_xfrm_state_delete(x)) != 0) {
 				xfrm_audit_state_delete(x, 0,
@@ -630,7 +623,7 @@ int xfrm_state_flush(u8 proto, struct xfrm_audit *audit_info)
 		struct hlist_node *entry;
 		struct xfrm_state *x;
 restart:
-		hlist_for_each_entry(x, entry, xfrm_state_bydst+i, bydst) {
+		hlist_for_each_entry(x, entry, init_net.xfrm.state_bydst+i, bydst) {
 			if (!xfrm_state_kern(x) &&
 			    xfrm_id_proto_match(x->id.proto, proto)) {
 				xfrm_state_hold(x);
@@ -785,7 +778,7 @@ xfrm_state_find(xfrm_address_t *daddr, xfrm_address_t *saddr,
 
 	spin_lock_bh(&xfrm_state_lock);
 	h = xfrm_dst_hash(daddr, saddr, tmpl->reqid, family);
-	hlist_for_each_entry(x, entry, xfrm_state_bydst+h, bydst) {
+	hlist_for_each_entry(x, entry, init_net.xfrm.state_bydst+h, bydst) {
 		if (x->props.family == family &&
 		    x->props.reqid == tmpl->reqid &&
 		    !(x->props.flags & XFRM_STATE_WILDRECV) &&
@@ -855,7 +848,7 @@ xfrm_state_find(xfrm_address_t *daddr, xfrm_address_t *saddr,
 		if (km_query(x, tmpl, pol) == 0) {
 			x->km.state = XFRM_STATE_ACQ;
 			list_add(&x->km.all, &init_net.xfrm.state_all);
-			hlist_add_head(&x->bydst, xfrm_state_bydst+h);
+			hlist_add_head(&x->bydst, init_net.xfrm.state_bydst+h);
 			h = xfrm_src_hash(daddr, saddr, family);
 			hlist_add_head(&x->bysrc, xfrm_state_bysrc+h);
 			if (x->id.spi) {
@@ -895,7 +888,7 @@ xfrm_stateonly_find(xfrm_address_t *daddr, xfrm_address_t *saddr,
 
 	spin_lock(&xfrm_state_lock);
 	h = xfrm_dst_hash(daddr, saddr, reqid, family);
-	hlist_for_each_entry(x, entry, xfrm_state_bydst+h, bydst) {
+	hlist_for_each_entry(x, entry, init_net.xfrm.state_bydst+h, bydst) {
 		if (x->props.family == family &&
 		    x->props.reqid == reqid &&
 		    !(x->props.flags & XFRM_STATE_WILDRECV) &&
@@ -927,7 +920,7 @@ static void __xfrm_state_insert(struct xfrm_state *x)
 
 	h = xfrm_dst_hash(&x->id.daddr, &x->props.saddr,
 			  x->props.reqid, x->props.family);
-	hlist_add_head(&x->bydst, xfrm_state_bydst+h);
+	hlist_add_head(&x->bydst, init_net.xfrm.state_bydst+h);
 
 	h = xfrm_src_hash(&x->id.daddr, &x->props.saddr, x->props.family);
 	hlist_add_head(&x->bysrc, xfrm_state_bysrc+h);
@@ -960,7 +953,7 @@ static void __xfrm_state_bump_genids(struct xfrm_state *xnew)
 	unsigned int h;
 
 	h = xfrm_dst_hash(&xnew->id.daddr, &xnew->props.saddr, reqid, family);
-	hlist_for_each_entry(x, entry, xfrm_state_bydst+h, bydst) {
+	hlist_for_each_entry(x, entry, init_net.xfrm.state_bydst+h, bydst) {
 		if (x->props.family	== family &&
 		    x->props.reqid	== reqid &&
 		    !xfrm_addr_cmp(&x->id.daddr, &xnew->id.daddr, family) &&
@@ -985,7 +978,7 @@ static struct xfrm_state *__find_acq_core(unsigned short family, u8 mode, u32 re
 	struct hlist_node *entry;
 	struct xfrm_state *x;
 
-	hlist_for_each_entry(x, entry, xfrm_state_bydst+h, bydst) {
+	hlist_for_each_entry(x, entry, init_net.xfrm.state_bydst+h, bydst) {
 		if (x->props.reqid  != reqid ||
 		    x->props.mode   != mode ||
 		    x->props.family != family ||
@@ -1053,7 +1046,7 @@ static struct xfrm_state *__find_acq_core(unsigned short family, u8 mode, u32 re
 		x->timer.expires = jiffies + sysctl_xfrm_acq_expires*HZ;
 		add_timer(&x->timer);
 		list_add(&x->km.all, &init_net.xfrm.state_all);
-		hlist_add_head(&x->bydst, xfrm_state_bydst+h);
+		hlist_add_head(&x->bydst, init_net.xfrm.state_bydst+h);
 		h = xfrm_src_hash(daddr, saddr, family);
 		hlist_add_head(&x->bysrc, xfrm_state_bysrc+h);
 
@@ -1208,7 +1201,7 @@ struct xfrm_state * xfrm_migrate_state_find(struct xfrm_migrate *m)
 	if (m->reqid) {
 		h = xfrm_dst_hash(&m->old_daddr, &m->old_saddr,
 				  m->reqid, m->old_family);
-		hlist_for_each_entry(x, entry, xfrm_state_bydst+h, bydst) {
+		hlist_for_each_entry(x, entry, init_net.xfrm.state_bydst+h, bydst) {
 			if (x->props.mode != m->mode ||
 			    x->id.proto != m->proto)
 				continue;
@@ -1457,7 +1450,7 @@ static struct xfrm_state *__xfrm_find_acq_byseq(u32 seq)
 		struct hlist_node *entry;
 		struct xfrm_state *x;
 
-		hlist_for_each_entry(x, entry, xfrm_state_bydst+i, bydst) {
+		hlist_for_each_entry(x, entry, init_net.xfrm.state_bydst+i, bydst) {
 			if (x->km.seq == seq &&
 			    x->km.state == XFRM_STATE_ACQ) {
 				xfrm_state_hold(x);
@@ -2088,20 +2081,29 @@ int __net_init xfrm_state_init(struct net *net)
 
 	sz = sizeof(struct hlist_head) * 8;
 
-	xfrm_state_bydst = xfrm_hash_alloc(sz);
+	net->xfrm.state_bydst = xfrm_hash_alloc(sz);
+	if (!net->xfrm.state_bydst)
+		goto out_bydst;
 	xfrm_state_bysrc = xfrm_hash_alloc(sz);
 	xfrm_state_byspi = xfrm_hash_alloc(sz);
-	if (!xfrm_state_bydst || !xfrm_state_bysrc || !xfrm_state_byspi)
-		panic("XFRM: Cannot allocate bydst/bysrc/byspi hashes.");
 	xfrm_state_hmask = ((sz / sizeof(struct hlist_head)) - 1);
 
 	INIT_WORK(&xfrm_state_gc_work, xfrm_state_gc_task);
 	return 0;
+
+out_bydst:
+	return -ENOMEM;
 }
 
 void xfrm_state_fini(struct net *net)
 {
+	unsigned int sz;
+
 	WARN_ON(!list_empty(&net->xfrm.state_all));
+
+	sz = (xfrm_state_hmask + 1) * sizeof(struct hlist_head);
+	WARN_ON(!hlist_empty(net->xfrm.state_bydst));
+	xfrm_hash_free(net->xfrm.state_bydst, sz);
 }
 
 #ifdef CONFIG_AUDITSYSCALL
-- 
cgit v1.2.3


From d320bbb306f2085892bc958781e8fcaf5d491589 Mon Sep 17 00:00:00 2001
From: Alexey Dobriyan <adobriyan@gmail.com>
Date: Tue, 25 Nov 2008 17:17:24 -0800
Subject: netns xfrm: per-netns xfrm_state_bysrc hash

Signed-off-by: Alexey Dobriyan <adobriyan@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/netns/xfrm.h |  1 +
 net/xfrm/xfrm_state.c    | 23 ++++++++++++++---------
 2 files changed, 15 insertions(+), 9 deletions(-)

(limited to 'include')

diff --git a/include/net/netns/xfrm.h b/include/net/netns/xfrm.h
index 02487b39ce53..bfcd5a3e25b6 100644
--- a/include/net/netns/xfrm.h
+++ b/include/net/netns/xfrm.h
@@ -14,6 +14,7 @@ struct netns_xfrm {
 	 * offending SA.
 	 */
 	struct hlist_head	*state_bydst;
+	struct hlist_head	*state_bysrc;
 };
 
 #endif
diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c
index 08b78895ffbc..52d828bdf3d7 100644
--- a/net/xfrm/xfrm_state.c
+++ b/net/xfrm/xfrm_state.c
@@ -44,7 +44,6 @@ u32 sysctl_xfrm_acq_expires __read_mostly = 30;
 
 static DEFINE_SPINLOCK(xfrm_state_lock);
 
-static struct hlist_head *xfrm_state_bysrc __read_mostly;
 static struct hlist_head *xfrm_state_byspi __read_mostly;
 static unsigned int xfrm_state_hmask __read_mostly;
 static unsigned int xfrm_state_hashmax __read_mostly = 1 * 1024 * 1024;
@@ -154,12 +153,12 @@ static void xfrm_hash_resize(struct work_struct *__unused)
 				   nhashmask);
 
 	odst = init_net.xfrm.state_bydst;
-	osrc = xfrm_state_bysrc;
+	osrc = init_net.xfrm.state_bysrc;
 	ospi = xfrm_state_byspi;
 	ohashmask = xfrm_state_hmask;
 
 	init_net.xfrm.state_bydst = ndst;
-	xfrm_state_bysrc = nsrc;
+	init_net.xfrm.state_bysrc = nsrc;
 	xfrm_state_byspi = nspi;
 	xfrm_state_hmask = nhashmask;
 
@@ -712,7 +711,7 @@ static struct xfrm_state *__xfrm_state_lookup_byaddr(xfrm_address_t *daddr, xfrm
 	struct xfrm_state *x;
 	struct hlist_node *entry;
 
-	hlist_for_each_entry(x, entry, xfrm_state_bysrc+h, bysrc) {
+	hlist_for_each_entry(x, entry, init_net.xfrm.state_bysrc+h, bysrc) {
 		if (x->props.family != family ||
 		    x->id.proto     != proto)
 			continue;
@@ -850,7 +849,7 @@ xfrm_state_find(xfrm_address_t *daddr, xfrm_address_t *saddr,
 			list_add(&x->km.all, &init_net.xfrm.state_all);
 			hlist_add_head(&x->bydst, init_net.xfrm.state_bydst+h);
 			h = xfrm_src_hash(daddr, saddr, family);
-			hlist_add_head(&x->bysrc, xfrm_state_bysrc+h);
+			hlist_add_head(&x->bysrc, init_net.xfrm.state_bysrc+h);
 			if (x->id.spi) {
 				h = xfrm_spi_hash(&x->id.daddr, x->id.spi, x->id.proto, family);
 				hlist_add_head(&x->byspi, xfrm_state_byspi+h);
@@ -923,7 +922,7 @@ static void __xfrm_state_insert(struct xfrm_state *x)
 	hlist_add_head(&x->bydst, init_net.xfrm.state_bydst+h);
 
 	h = xfrm_src_hash(&x->id.daddr, &x->props.saddr, x->props.family);
-	hlist_add_head(&x->bysrc, xfrm_state_bysrc+h);
+	hlist_add_head(&x->bysrc, init_net.xfrm.state_bysrc+h);
 
 	if (x->id.spi) {
 		h = xfrm_spi_hash(&x->id.daddr, x->id.spi, x->id.proto,
@@ -1048,7 +1047,7 @@ static struct xfrm_state *__find_acq_core(unsigned short family, u8 mode, u32 re
 		list_add(&x->km.all, &init_net.xfrm.state_all);
 		hlist_add_head(&x->bydst, init_net.xfrm.state_bydst+h);
 		h = xfrm_src_hash(daddr, saddr, family);
-		hlist_add_head(&x->bysrc, xfrm_state_bysrc+h);
+		hlist_add_head(&x->bysrc, init_net.xfrm.state_bysrc+h);
 
 		xfrm_state_num++;
 
@@ -1218,7 +1217,7 @@ struct xfrm_state * xfrm_migrate_state_find(struct xfrm_migrate *m)
 	} else {
 		h = xfrm_src_hash(&m->old_daddr, &m->old_saddr,
 				  m->old_family);
-		hlist_for_each_entry(x, entry, xfrm_state_bysrc+h, bysrc) {
+		hlist_for_each_entry(x, entry, init_net.xfrm.state_bysrc+h, bysrc) {
 			if (x->props.mode != m->mode ||
 			    x->id.proto != m->proto)
 				continue;
@@ -2084,13 +2083,17 @@ int __net_init xfrm_state_init(struct net *net)
 	net->xfrm.state_bydst = xfrm_hash_alloc(sz);
 	if (!net->xfrm.state_bydst)
 		goto out_bydst;
-	xfrm_state_bysrc = xfrm_hash_alloc(sz);
+	net->xfrm.state_bysrc = xfrm_hash_alloc(sz);
+	if (!net->xfrm.state_bysrc)
+		goto out_bysrc;
 	xfrm_state_byspi = xfrm_hash_alloc(sz);
 	xfrm_state_hmask = ((sz / sizeof(struct hlist_head)) - 1);
 
 	INIT_WORK(&xfrm_state_gc_work, xfrm_state_gc_task);
 	return 0;
 
+out_bysrc:
+	xfrm_hash_free(net->xfrm.state_bydst, sz);
 out_bydst:
 	return -ENOMEM;
 }
@@ -2102,6 +2105,8 @@ void xfrm_state_fini(struct net *net)
 	WARN_ON(!list_empty(&net->xfrm.state_all));
 
 	sz = (xfrm_state_hmask + 1) * sizeof(struct hlist_head);
+	WARN_ON(!hlist_empty(net->xfrm.state_bysrc));
+	xfrm_hash_free(net->xfrm.state_bysrc, sz);
 	WARN_ON(!hlist_empty(net->xfrm.state_bydst));
 	xfrm_hash_free(net->xfrm.state_bydst, sz);
 }
-- 
cgit v1.2.3


From b754a4fd8f58d245c9b5e92914cce09c4309cb67 Mon Sep 17 00:00:00 2001
From: Alexey Dobriyan <adobriyan@gmail.com>
Date: Tue, 25 Nov 2008 17:17:47 -0800
Subject: netns xfrm: per-netns xfrm_state_byspi hash

Signed-off-by: Alexey Dobriyan <adobriyan@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/netns/xfrm.h |  1 +
 net/xfrm/xfrm_state.c    | 21 +++++++++++++--------
 2 files changed, 14 insertions(+), 8 deletions(-)

(limited to 'include')

diff --git a/include/net/netns/xfrm.h b/include/net/netns/xfrm.h
index bfcd5a3e25b6..b05ca3f366aa 100644
--- a/include/net/netns/xfrm.h
+++ b/include/net/netns/xfrm.h
@@ -15,6 +15,7 @@ struct netns_xfrm {
 	 */
 	struct hlist_head	*state_bydst;
 	struct hlist_head	*state_bysrc;
+	struct hlist_head	*state_byspi;
 };
 
 #endif
diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c
index 52d828bdf3d7..66ca1ef7f8eb 100644
--- a/net/xfrm/xfrm_state.c
+++ b/net/xfrm/xfrm_state.c
@@ -44,7 +44,6 @@ u32 sysctl_xfrm_acq_expires __read_mostly = 30;
 
 static DEFINE_SPINLOCK(xfrm_state_lock);
 
-static struct hlist_head *xfrm_state_byspi __read_mostly;
 static unsigned int xfrm_state_hmask __read_mostly;
 static unsigned int xfrm_state_hashmax __read_mostly = 1 * 1024 * 1024;
 static unsigned int xfrm_state_num;
@@ -154,12 +153,12 @@ static void xfrm_hash_resize(struct work_struct *__unused)
 
 	odst = init_net.xfrm.state_bydst;
 	osrc = init_net.xfrm.state_bysrc;
-	ospi = xfrm_state_byspi;
+	ospi = init_net.xfrm.state_byspi;
 	ohashmask = xfrm_state_hmask;
 
 	init_net.xfrm.state_bydst = ndst;
 	init_net.xfrm.state_bysrc = nsrc;
-	xfrm_state_byspi = nspi;
+	init_net.xfrm.state_byspi = nspi;
 	xfrm_state_hmask = nhashmask;
 
 	spin_unlock_bh(&xfrm_state_lock);
@@ -679,7 +678,7 @@ static struct xfrm_state *__xfrm_state_lookup(xfrm_address_t *daddr, __be32 spi,
 	struct xfrm_state *x;
 	struct hlist_node *entry;
 
-	hlist_for_each_entry(x, entry, xfrm_state_byspi+h, byspi) {
+	hlist_for_each_entry(x, entry, init_net.xfrm.state_byspi+h, byspi) {
 		if (x->props.family != family ||
 		    x->id.spi       != spi ||
 		    x->id.proto     != proto)
@@ -852,7 +851,7 @@ xfrm_state_find(xfrm_address_t *daddr, xfrm_address_t *saddr,
 			hlist_add_head(&x->bysrc, init_net.xfrm.state_bysrc+h);
 			if (x->id.spi) {
 				h = xfrm_spi_hash(&x->id.daddr, x->id.spi, x->id.proto, family);
-				hlist_add_head(&x->byspi, xfrm_state_byspi+h);
+				hlist_add_head(&x->byspi, init_net.xfrm.state_byspi+h);
 			}
 			x->lft.hard_add_expires_seconds = sysctl_xfrm_acq_expires;
 			x->timer.expires = jiffies + sysctl_xfrm_acq_expires*HZ;
@@ -928,7 +927,7 @@ static void __xfrm_state_insert(struct xfrm_state *x)
 		h = xfrm_spi_hash(&x->id.daddr, x->id.spi, x->id.proto,
 				  x->props.family);
 
-		hlist_add_head(&x->byspi, xfrm_state_byspi+h);
+		hlist_add_head(&x->byspi, init_net.xfrm.state_byspi+h);
 	}
 
 	mod_timer(&x->timer, jiffies + HZ);
@@ -1524,7 +1523,7 @@ int xfrm_alloc_spi(struct xfrm_state *x, u32 low, u32 high)
 	if (x->id.spi) {
 		spin_lock_bh(&xfrm_state_lock);
 		h = xfrm_spi_hash(&x->id.daddr, x->id.spi, x->id.proto, x->props.family);
-		hlist_add_head(&x->byspi, xfrm_state_byspi+h);
+		hlist_add_head(&x->byspi, init_net.xfrm.state_byspi+h);
 		spin_unlock_bh(&xfrm_state_lock);
 
 		err = 0;
@@ -2086,12 +2085,16 @@ int __net_init xfrm_state_init(struct net *net)
 	net->xfrm.state_bysrc = xfrm_hash_alloc(sz);
 	if (!net->xfrm.state_bysrc)
 		goto out_bysrc;
-	xfrm_state_byspi = xfrm_hash_alloc(sz);
+	net->xfrm.state_byspi = xfrm_hash_alloc(sz);
+	if (!net->xfrm.state_byspi)
+		goto out_byspi;
 	xfrm_state_hmask = ((sz / sizeof(struct hlist_head)) - 1);
 
 	INIT_WORK(&xfrm_state_gc_work, xfrm_state_gc_task);
 	return 0;
 
+out_byspi:
+	xfrm_hash_free(net->xfrm.state_bysrc, sz);
 out_bysrc:
 	xfrm_hash_free(net->xfrm.state_bydst, sz);
 out_bydst:
@@ -2105,6 +2108,8 @@ void xfrm_state_fini(struct net *net)
 	WARN_ON(!list_empty(&net->xfrm.state_all));
 
 	sz = (xfrm_state_hmask + 1) * sizeof(struct hlist_head);
+	WARN_ON(!hlist_empty(net->xfrm.state_byspi));
+	xfrm_hash_free(net->xfrm.state_byspi, sz);
 	WARN_ON(!hlist_empty(net->xfrm.state_bysrc));
 	xfrm_hash_free(net->xfrm.state_bysrc, sz);
 	WARN_ON(!hlist_empty(net->xfrm.state_bydst));
-- 
cgit v1.2.3


From 529983ecabeae3d8e61c9e27079154b1b8544dcd Mon Sep 17 00:00:00 2001
From: Alexey Dobriyan <adobriyan@gmail.com>
Date: Tue, 25 Nov 2008 17:18:12 -0800
Subject: netns xfrm: per-netns xfrm_state_hmask

Since hashtables are per-netns, they can be independently resized.

Signed-off-by: Alexey Dobriyan <adobriyan@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/netns/xfrm.h |  1 +
 net/xfrm/xfrm_state.c    | 31 +++++++++++++++----------------
 2 files changed, 16 insertions(+), 16 deletions(-)

(limited to 'include')

diff --git a/include/net/netns/xfrm.h b/include/net/netns/xfrm.h
index b05ca3f366aa..dbbc0e972273 100644
--- a/include/net/netns/xfrm.h
+++ b/include/net/netns/xfrm.h
@@ -16,6 +16,7 @@ struct netns_xfrm {
 	struct hlist_head	*state_bydst;
 	struct hlist_head	*state_bysrc;
 	struct hlist_head	*state_byspi;
+	unsigned int		state_hmask;
 };
 
 #endif
diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c
index 66ca1ef7f8eb..de08ed9a4775 100644
--- a/net/xfrm/xfrm_state.c
+++ b/net/xfrm/xfrm_state.c
@@ -44,7 +44,6 @@ u32 sysctl_xfrm_acq_expires __read_mostly = 30;
 
 static DEFINE_SPINLOCK(xfrm_state_lock);
 
-static unsigned int xfrm_state_hmask __read_mostly;
 static unsigned int xfrm_state_hashmax __read_mostly = 1 * 1024 * 1024;
 static unsigned int xfrm_state_num;
 static unsigned int xfrm_state_genid;
@@ -64,20 +63,20 @@ static inline unsigned int xfrm_dst_hash(xfrm_address_t *daddr,
 					 u32 reqid,
 					 unsigned short family)
 {
-	return __xfrm_dst_hash(daddr, saddr, reqid, family, xfrm_state_hmask);
+	return __xfrm_dst_hash(daddr, saddr, reqid, family, init_net.xfrm.state_hmask);
 }
 
 static inline unsigned int xfrm_src_hash(xfrm_address_t *daddr,
 					 xfrm_address_t *saddr,
 					 unsigned short family)
 {
-	return __xfrm_src_hash(daddr, saddr, family, xfrm_state_hmask);
+	return __xfrm_src_hash(daddr, saddr, family, init_net.xfrm.state_hmask);
 }
 
 static inline unsigned int
 xfrm_spi_hash(xfrm_address_t *daddr, __be32 spi, u8 proto, unsigned short family)
 {
-	return __xfrm_spi_hash(daddr, spi, proto, family, xfrm_state_hmask);
+	return __xfrm_spi_hash(daddr, spi, proto, family, init_net.xfrm.state_hmask);
 }
 
 static void xfrm_hash_transfer(struct hlist_head *list,
@@ -113,7 +112,7 @@ static void xfrm_hash_transfer(struct hlist_head *list,
 
 static unsigned long xfrm_hash_new_size(void)
 {
-	return ((xfrm_state_hmask + 1) << 1) *
+	return ((init_net.xfrm.state_hmask + 1) << 1) *
 		sizeof(struct hlist_head);
 }
 
@@ -147,19 +146,19 @@ static void xfrm_hash_resize(struct work_struct *__unused)
 	spin_lock_bh(&xfrm_state_lock);
 
 	nhashmask = (nsize / sizeof(struct hlist_head)) - 1U;
-	for (i = xfrm_state_hmask; i >= 0; i--)
+	for (i = init_net.xfrm.state_hmask; i >= 0; i--)
 		xfrm_hash_transfer(init_net.xfrm.state_bydst+i, ndst, nsrc, nspi,
 				   nhashmask);
 
 	odst = init_net.xfrm.state_bydst;
 	osrc = init_net.xfrm.state_bysrc;
 	ospi = init_net.xfrm.state_byspi;
-	ohashmask = xfrm_state_hmask;
+	ohashmask = init_net.xfrm.state_hmask;
 
 	init_net.xfrm.state_bydst = ndst;
 	init_net.xfrm.state_bysrc = nsrc;
 	init_net.xfrm.state_byspi = nspi;
-	xfrm_state_hmask = nhashmask;
+	init_net.xfrm.state_hmask = nhashmask;
 
 	spin_unlock_bh(&xfrm_state_lock);
 
@@ -582,7 +581,7 @@ xfrm_state_flush_secctx_check(u8 proto, struct xfrm_audit *audit_info)
 {
 	int i, err = 0;
 
-	for (i = 0; i <= xfrm_state_hmask; i++) {
+	for (i = 0; i <= init_net.xfrm.state_hmask; i++) {
 		struct hlist_node *entry;
 		struct xfrm_state *x;
 
@@ -617,7 +616,7 @@ int xfrm_state_flush(u8 proto, struct xfrm_audit *audit_info)
 	if (err)
 		goto out;
 
-	for (i = 0; i <= xfrm_state_hmask; i++) {
+	for (i = 0; i <= init_net.xfrm.state_hmask; i++) {
 		struct hlist_node *entry;
 		struct xfrm_state *x;
 restart:
@@ -652,7 +651,7 @@ void xfrm_sad_getinfo(struct xfrmk_sadinfo *si)
 {
 	spin_lock_bh(&xfrm_state_lock);
 	si->sadcnt = xfrm_state_num;
-	si->sadhcnt = xfrm_state_hmask;
+	si->sadhcnt = init_net.xfrm.state_hmask;
 	si->sadhmcnt = xfrm_state_hashmax;
 	spin_unlock_bh(&xfrm_state_lock);
 }
@@ -754,8 +753,8 @@ __xfrm_state_locate(struct xfrm_state *x, int use_spi, int family)
 static void xfrm_hash_grow_check(int have_hash_collision)
 {
 	if (have_hash_collision &&
-	    (xfrm_state_hmask + 1) < xfrm_state_hashmax &&
-	    xfrm_state_num > xfrm_state_hmask)
+	    (init_net.xfrm.state_hmask + 1) < xfrm_state_hashmax &&
+	    xfrm_state_num > init_net.xfrm.state_hmask)
 		schedule_work(&xfrm_hash_work);
 }
 
@@ -1444,7 +1443,7 @@ static struct xfrm_state *__xfrm_find_acq_byseq(u32 seq)
 {
 	int i;
 
-	for (i = 0; i <= xfrm_state_hmask; i++) {
+	for (i = 0; i <= init_net.xfrm.state_hmask; i++) {
 		struct hlist_node *entry;
 		struct xfrm_state *x;
 
@@ -2088,7 +2087,7 @@ int __net_init xfrm_state_init(struct net *net)
 	net->xfrm.state_byspi = xfrm_hash_alloc(sz);
 	if (!net->xfrm.state_byspi)
 		goto out_byspi;
-	xfrm_state_hmask = ((sz / sizeof(struct hlist_head)) - 1);
+	net->xfrm.state_hmask = ((sz / sizeof(struct hlist_head)) - 1);
 
 	INIT_WORK(&xfrm_state_gc_work, xfrm_state_gc_task);
 	return 0;
@@ -2107,7 +2106,7 @@ void xfrm_state_fini(struct net *net)
 
 	WARN_ON(!list_empty(&net->xfrm.state_all));
 
-	sz = (xfrm_state_hmask + 1) * sizeof(struct hlist_head);
+	sz = (net->xfrm.state_hmask + 1) * sizeof(struct hlist_head);
 	WARN_ON(!hlist_empty(net->xfrm.state_byspi));
 	xfrm_hash_free(net->xfrm.state_byspi, sz);
 	WARN_ON(!hlist_empty(net->xfrm.state_bysrc));
-- 
cgit v1.2.3


From 0bf7c5b019518d3fe9cb96b9c97bf44d251472c3 Mon Sep 17 00:00:00 2001
From: Alexey Dobriyan <adobriyan@gmail.com>
Date: Tue, 25 Nov 2008 17:18:39 -0800
Subject: netns xfrm: per-netns xfrm_state counts

Signed-off-by: Alexey Dobriyan <adobriyan@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/netns/xfrm.h |  1 +
 net/xfrm/xfrm_state.c    | 14 +++++++-------
 2 files changed, 8 insertions(+), 7 deletions(-)

(limited to 'include')

diff --git a/include/net/netns/xfrm.h b/include/net/netns/xfrm.h
index dbbc0e972273..492b471d2a7f 100644
--- a/include/net/netns/xfrm.h
+++ b/include/net/netns/xfrm.h
@@ -17,6 +17,7 @@ struct netns_xfrm {
 	struct hlist_head	*state_bysrc;
 	struct hlist_head	*state_byspi;
 	unsigned int		state_hmask;
+	unsigned int		state_num;
 };
 
 #endif
diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c
index de08ed9a4775..7ecf6eeff84a 100644
--- a/net/xfrm/xfrm_state.c
+++ b/net/xfrm/xfrm_state.c
@@ -45,7 +45,6 @@ u32 sysctl_xfrm_acq_expires __read_mostly = 30;
 static DEFINE_SPINLOCK(xfrm_state_lock);
 
 static unsigned int xfrm_state_hashmax __read_mostly = 1 * 1024 * 1024;
-static unsigned int xfrm_state_num;
 static unsigned int xfrm_state_genid;
 
 static struct xfrm_state_afinfo *xfrm_state_get_afinfo(unsigned int family);
@@ -548,7 +547,7 @@ int __xfrm_state_delete(struct xfrm_state *x)
 		hlist_del(&x->bysrc);
 		if (x->id.spi)
 			hlist_del(&x->byspi);
-		xfrm_state_num--;
+		init_net.xfrm.state_num--;
 		spin_unlock(&xfrm_state_lock);
 
 		/* All xfrm_state objects are created by xfrm_state_alloc.
@@ -650,7 +649,7 @@ EXPORT_SYMBOL(xfrm_state_flush);
 void xfrm_sad_getinfo(struct xfrmk_sadinfo *si)
 {
 	spin_lock_bh(&xfrm_state_lock);
-	si->sadcnt = xfrm_state_num;
+	si->sadcnt = init_net.xfrm.state_num;
 	si->sadhcnt = init_net.xfrm.state_hmask;
 	si->sadhmcnt = xfrm_state_hashmax;
 	spin_unlock_bh(&xfrm_state_lock);
@@ -754,7 +753,7 @@ static void xfrm_hash_grow_check(int have_hash_collision)
 {
 	if (have_hash_collision &&
 	    (init_net.xfrm.state_hmask + 1) < xfrm_state_hashmax &&
-	    xfrm_state_num > init_net.xfrm.state_hmask)
+	    init_net.xfrm.state_num > init_net.xfrm.state_hmask)
 		schedule_work(&xfrm_hash_work);
 }
 
@@ -855,7 +854,7 @@ xfrm_state_find(xfrm_address_t *daddr, xfrm_address_t *saddr,
 			x->lft.hard_add_expires_seconds = sysctl_xfrm_acq_expires;
 			x->timer.expires = jiffies + sysctl_xfrm_acq_expires*HZ;
 			add_timer(&x->timer);
-			xfrm_state_num++;
+			init_net.xfrm.state_num++;
 			xfrm_hash_grow_check(x->bydst.next != NULL);
 		} else {
 			x->km.state = XFRM_STATE_DEAD;
@@ -935,7 +934,7 @@ static void __xfrm_state_insert(struct xfrm_state *x)
 
 	wake_up(&km_waitq);
 
-	xfrm_state_num++;
+	init_net.xfrm.state_num++;
 
 	xfrm_hash_grow_check(x->bydst.next != NULL);
 }
@@ -1047,7 +1046,7 @@ static struct xfrm_state *__find_acq_core(unsigned short family, u8 mode, u32 re
 		h = xfrm_src_hash(daddr, saddr, family);
 		hlist_add_head(&x->bysrc, init_net.xfrm.state_bysrc+h);
 
-		xfrm_state_num++;
+		init_net.xfrm.state_num++;
 
 		xfrm_hash_grow_check(x->bydst.next != NULL);
 	}
@@ -2089,6 +2088,7 @@ int __net_init xfrm_state_init(struct net *net)
 		goto out_byspi;
 	net->xfrm.state_hmask = ((sz / sizeof(struct hlist_head)) - 1);
 
+	net->xfrm.state_num = 0;
 	INIT_WORK(&xfrm_state_gc_work, xfrm_state_gc_task);
 	return 0;
 
-- 
cgit v1.2.3


From 630827338585022b851ec0a6335df8e436c900e4 Mon Sep 17 00:00:00 2001
From: Alexey Dobriyan <adobriyan@gmail.com>
Date: Tue, 25 Nov 2008 17:19:07 -0800
Subject: netns xfrm: per-netns xfrm_hash_work

All of this is implicit passing which netns's hashes should be resized.

Signed-off-by: Alexey Dobriyan <adobriyan@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/netns/xfrm.h |  2 ++
 net/xfrm/xfrm_state.c    | 35 +++++++++++++++++------------------
 2 files changed, 19 insertions(+), 18 deletions(-)

(limited to 'include')

diff --git a/include/net/netns/xfrm.h b/include/net/netns/xfrm.h
index 492b471d2a7f..bd688021395a 100644
--- a/include/net/netns/xfrm.h
+++ b/include/net/netns/xfrm.h
@@ -2,6 +2,7 @@
 #define __NETNS_XFRM_H
 
 #include <linux/list.h>
+#include <linux/workqueue.h>
 
 struct netns_xfrm {
 	struct list_head	state_all;
@@ -18,6 +19,7 @@ struct netns_xfrm {
 	struct hlist_head	*state_byspi;
 	unsigned int		state_hmask;
 	unsigned int		state_num;
+	struct work_struct	state_hash_work;
 };
 
 #endif
diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c
index 7ecf6eeff84a..1b2a72c8429c 100644
--- a/net/xfrm/xfrm_state.c
+++ b/net/xfrm/xfrm_state.c
@@ -109,16 +109,16 @@ static void xfrm_hash_transfer(struct hlist_head *list,
 	}
 }
 
-static unsigned long xfrm_hash_new_size(void)
+static unsigned long xfrm_hash_new_size(unsigned int state_hmask)
 {
-	return ((init_net.xfrm.state_hmask + 1) << 1) *
-		sizeof(struct hlist_head);
+	return ((state_hmask + 1) << 1) * sizeof(struct hlist_head);
 }
 
 static DEFINE_MUTEX(hash_resize_mutex);
 
-static void xfrm_hash_resize(struct work_struct *__unused)
+static void xfrm_hash_resize(struct work_struct *work)
 {
+	struct net *net = container_of(work, struct net, xfrm.state_hash_work);
 	struct hlist_head *ndst, *nsrc, *nspi, *odst, *osrc, *ospi;
 	unsigned long nsize, osize;
 	unsigned int nhashmask, ohashmask;
@@ -126,7 +126,7 @@ static void xfrm_hash_resize(struct work_struct *__unused)
 
 	mutex_lock(&hash_resize_mutex);
 
-	nsize = xfrm_hash_new_size();
+	nsize = xfrm_hash_new_size(net->xfrm.state_hmask);
 	ndst = xfrm_hash_alloc(nsize);
 	if (!ndst)
 		goto out_unlock;
@@ -145,19 +145,19 @@ static void xfrm_hash_resize(struct work_struct *__unused)
 	spin_lock_bh(&xfrm_state_lock);
 
 	nhashmask = (nsize / sizeof(struct hlist_head)) - 1U;
-	for (i = init_net.xfrm.state_hmask; i >= 0; i--)
-		xfrm_hash_transfer(init_net.xfrm.state_bydst+i, ndst, nsrc, nspi,
+	for (i = net->xfrm.state_hmask; i >= 0; i--)
+		xfrm_hash_transfer(net->xfrm.state_bydst+i, ndst, nsrc, nspi,
 				   nhashmask);
 
-	odst = init_net.xfrm.state_bydst;
-	osrc = init_net.xfrm.state_bysrc;
-	ospi = init_net.xfrm.state_byspi;
-	ohashmask = init_net.xfrm.state_hmask;
+	odst = net->xfrm.state_bydst;
+	osrc = net->xfrm.state_bysrc;
+	ospi = net->xfrm.state_byspi;
+	ohashmask = net->xfrm.state_hmask;
 
-	init_net.xfrm.state_bydst = ndst;
-	init_net.xfrm.state_bysrc = nsrc;
-	init_net.xfrm.state_byspi = nspi;
-	init_net.xfrm.state_hmask = nhashmask;
+	net->xfrm.state_bydst = ndst;
+	net->xfrm.state_bysrc = nsrc;
+	net->xfrm.state_byspi = nspi;
+	net->xfrm.state_hmask = nhashmask;
 
 	spin_unlock_bh(&xfrm_state_lock);
 
@@ -170,8 +170,6 @@ out_unlock:
 	mutex_unlock(&hash_resize_mutex);
 }
 
-static DECLARE_WORK(xfrm_hash_work, xfrm_hash_resize);
-
 DECLARE_WAIT_QUEUE_HEAD(km_waitq);
 EXPORT_SYMBOL(km_waitq);
 
@@ -754,7 +752,7 @@ static void xfrm_hash_grow_check(int have_hash_collision)
 	if (have_hash_collision &&
 	    (init_net.xfrm.state_hmask + 1) < xfrm_state_hashmax &&
 	    init_net.xfrm.state_num > init_net.xfrm.state_hmask)
-		schedule_work(&xfrm_hash_work);
+		schedule_work(&init_net.xfrm.state_hash_work);
 }
 
 struct xfrm_state *
@@ -2089,6 +2087,7 @@ int __net_init xfrm_state_init(struct net *net)
 	net->xfrm.state_hmask = ((sz / sizeof(struct hlist_head)) - 1);
 
 	net->xfrm.state_num = 0;
+	INIT_WORK(&net->xfrm.state_hash_work, xfrm_hash_resize);
 	INIT_WORK(&xfrm_state_gc_work, xfrm_state_gc_task);
 	return 0;
 
-- 
cgit v1.2.3


From b8a0ae20b0eecd4b86a113d2abe2fa5a582b30a6 Mon Sep 17 00:00:00 2001
From: Alexey Dobriyan <adobriyan@gmail.com>
Date: Tue, 25 Nov 2008 17:20:11 -0800
Subject: netns xfrm: per-netns state GC list

km_waitq is going to be made per-netns to disallow spurious wakeups
in __xfrm_lookup().

To not wakeup after every garbage-collected xfrm_state (which potentially
can be from different netns) make state GC list per-netns.

Signed-off-by: Alexey Dobriyan <adobriyan@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/netns/xfrm.h | 1 +
 net/xfrm/xfrm_state.c    | 6 +++---
 2 files changed, 4 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/include/net/netns/xfrm.h b/include/net/netns/xfrm.h
index bd688021395a..8ceb76568852 100644
--- a/include/net/netns/xfrm.h
+++ b/include/net/netns/xfrm.h
@@ -20,6 +20,7 @@ struct netns_xfrm {
 	unsigned int		state_hmask;
 	unsigned int		state_num;
 	struct work_struct	state_hash_work;
+	struct hlist_head	state_gc_list;
 };
 
 #endif
diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c
index 1b2a72c8429c..864a97477ae5 100644
--- a/net/xfrm/xfrm_state.c
+++ b/net/xfrm/xfrm_state.c
@@ -177,7 +177,6 @@ static DEFINE_RWLOCK(xfrm_state_afinfo_lock);
 static struct xfrm_state_afinfo *xfrm_state_afinfo[NPROTO];
 
 static struct work_struct xfrm_state_gc_work;
-static HLIST_HEAD(xfrm_state_gc_list);
 static DEFINE_SPINLOCK(xfrm_state_gc_lock);
 
 int __xfrm_state_delete(struct xfrm_state *x);
@@ -394,7 +393,7 @@ static void xfrm_state_gc_task(struct work_struct *data)
 	struct hlist_head gc_list;
 
 	spin_lock_bh(&xfrm_state_gc_lock);
-	hlist_move_list(&xfrm_state_gc_list, &gc_list);
+	hlist_move_list(&init_net.xfrm.state_gc_list, &gc_list);
 	spin_unlock_bh(&xfrm_state_gc_lock);
 
 	hlist_for_each_entry_safe(x, entry, tmp, &gc_list, gclist)
@@ -527,7 +526,7 @@ void __xfrm_state_destroy(struct xfrm_state *x)
 	WARN_ON(x->km.state != XFRM_STATE_DEAD);
 
 	spin_lock_bh(&xfrm_state_gc_lock);
-	hlist_add_head(&x->gclist, &xfrm_state_gc_list);
+	hlist_add_head(&x->gclist, &init_net.xfrm.state_gc_list);
 	spin_unlock_bh(&xfrm_state_gc_lock);
 	schedule_work(&xfrm_state_gc_work);
 }
@@ -2088,6 +2087,7 @@ int __net_init xfrm_state_init(struct net *net)
 
 	net->xfrm.state_num = 0;
 	INIT_WORK(&net->xfrm.state_hash_work, xfrm_hash_resize);
+	INIT_HLIST_HEAD(&net->xfrm.state_gc_list);
 	INIT_WORK(&xfrm_state_gc_work, xfrm_state_gc_task);
 	return 0;
 
-- 
cgit v1.2.3


From c78371441c0d957f54c9f8a35b3ee5a378d14808 Mon Sep 17 00:00:00 2001
From: Alexey Dobriyan <adobriyan@gmail.com>
Date: Tue, 25 Nov 2008 17:20:36 -0800
Subject: netns xfrm: per-netns state GC work

State GC is per-netns, and this is part of it.

Signed-off-by: Alexey Dobriyan <adobriyan@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/netns/xfrm.h |  1 +
 net/xfrm/xfrm_state.c    | 10 +++++-----
 2 files changed, 6 insertions(+), 5 deletions(-)

(limited to 'include')

diff --git a/include/net/netns/xfrm.h b/include/net/netns/xfrm.h
index 8ceb76568852..80555351fe5e 100644
--- a/include/net/netns/xfrm.h
+++ b/include/net/netns/xfrm.h
@@ -21,6 +21,7 @@ struct netns_xfrm {
 	unsigned int		state_num;
 	struct work_struct	state_hash_work;
 	struct hlist_head	state_gc_list;
+	struct work_struct	state_gc_work;
 };
 
 #endif
diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c
index 864a97477ae5..69c0b06bf756 100644
--- a/net/xfrm/xfrm_state.c
+++ b/net/xfrm/xfrm_state.c
@@ -176,7 +176,6 @@ EXPORT_SYMBOL(km_waitq);
 static DEFINE_RWLOCK(xfrm_state_afinfo_lock);
 static struct xfrm_state_afinfo *xfrm_state_afinfo[NPROTO];
 
-static struct work_struct xfrm_state_gc_work;
 static DEFINE_SPINLOCK(xfrm_state_gc_lock);
 
 int __xfrm_state_delete(struct xfrm_state *x);
@@ -386,14 +385,15 @@ static void xfrm_state_gc_destroy(struct xfrm_state *x)
 	kfree(x);
 }
 
-static void xfrm_state_gc_task(struct work_struct *data)
+static void xfrm_state_gc_task(struct work_struct *work)
 {
+	struct net *net = container_of(work, struct net, xfrm.state_gc_work);
 	struct xfrm_state *x;
 	struct hlist_node *entry, *tmp;
 	struct hlist_head gc_list;
 
 	spin_lock_bh(&xfrm_state_gc_lock);
-	hlist_move_list(&init_net.xfrm.state_gc_list, &gc_list);
+	hlist_move_list(&net->xfrm.state_gc_list, &gc_list);
 	spin_unlock_bh(&xfrm_state_gc_lock);
 
 	hlist_for_each_entry_safe(x, entry, tmp, &gc_list, gclist)
@@ -528,7 +528,7 @@ void __xfrm_state_destroy(struct xfrm_state *x)
 	spin_lock_bh(&xfrm_state_gc_lock);
 	hlist_add_head(&x->gclist, &init_net.xfrm.state_gc_list);
 	spin_unlock_bh(&xfrm_state_gc_lock);
-	schedule_work(&xfrm_state_gc_work);
+	schedule_work(&init_net.xfrm.state_gc_work);
 }
 EXPORT_SYMBOL(__xfrm_state_destroy);
 
@@ -2088,7 +2088,7 @@ int __net_init xfrm_state_init(struct net *net)
 	net->xfrm.state_num = 0;
 	INIT_WORK(&net->xfrm.state_hash_work, xfrm_hash_resize);
 	INIT_HLIST_HEAD(&net->xfrm.state_gc_list);
-	INIT_WORK(&xfrm_state_gc_work, xfrm_state_gc_task);
+	INIT_WORK(&net->xfrm.state_gc_work, xfrm_state_gc_task);
 	return 0;
 
 out_byspi:
-- 
cgit v1.2.3


From 50a30657fd7ee77a94a6bf0ad86eba7c37c3032e Mon Sep 17 00:00:00 2001
From: Alexey Dobriyan <adobriyan@gmail.com>
Date: Tue, 25 Nov 2008 17:21:01 -0800
Subject: netns xfrm: per-netns km_waitq

Disallow spurious wakeups in __xfrm_lookup().

Signed-off-by: Alexey Dobriyan <adobriyan@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/netns/xfrm.h |  3 +++
 include/net/xfrm.h       |  1 -
 net/key/af_key.c         |  2 +-
 net/xfrm/xfrm_policy.c   |  4 ++--
 net/xfrm/xfrm_state.c    | 16 +++++++---------
 5 files changed, 13 insertions(+), 13 deletions(-)

(limited to 'include')

diff --git a/include/net/netns/xfrm.h b/include/net/netns/xfrm.h
index 80555351fe5e..2a383c8ba1af 100644
--- a/include/net/netns/xfrm.h
+++ b/include/net/netns/xfrm.h
@@ -2,6 +2,7 @@
 #define __NETNS_XFRM_H
 
 #include <linux/list.h>
+#include <linux/wait.h>
 #include <linux/workqueue.h>
 
 struct netns_xfrm {
@@ -22,6 +23,8 @@ struct netns_xfrm {
 	struct work_struct	state_hash_work;
 	struct hlist_head	state_gc_list;
 	struct work_struct	state_gc_work;
+
+	wait_queue_head_t	km_waitq;
 };
 
 #endif
diff --git a/include/net/xfrm.h b/include/net/xfrm.h
index 9da89039832c..0d4353c11093 100644
--- a/include/net/xfrm.h
+++ b/include/net/xfrm.h
@@ -1459,7 +1459,6 @@ extern int xfrm_migrate(struct xfrm_selector *sel, u8 dir, u8 type,
 			struct xfrm_kmaddress *k);
 #endif
 
-extern wait_queue_head_t km_waitq;
 extern int km_new_mapping(struct xfrm_state *x, xfrm_address_t *ipaddr, __be16 sport);
 extern void km_policy_expired(struct xfrm_policy *pol, int dir, int hard, u32 pid);
 extern int km_report(u8 proto, struct xfrm_selector *sel, xfrm_address_t *addr);
diff --git a/net/key/af_key.c b/net/key/af_key.c
index bde8aad4cc93..f202ba6c8dcb 100644
--- a/net/key/af_key.c
+++ b/net/key/af_key.c
@@ -1411,7 +1411,7 @@ static int pfkey_acquire(struct sock *sk, struct sk_buff *skb, struct sadb_msg *
 	spin_lock_bh(&x->lock);
 	if (x->km.state == XFRM_STATE_ACQ) {
 		x->km.state = XFRM_STATE_ERROR;
-		wake_up(&km_waitq);
+		wake_up(&init_net.xfrm.km_waitq);
 	}
 	spin_unlock_bh(&x->lock);
 	xfrm_state_put(x);
diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c
index 8e7671b9e76e..cf2bf3aa7ab4 100644
--- a/net/xfrm/xfrm_policy.c
+++ b/net/xfrm/xfrm_policy.c
@@ -1691,11 +1691,11 @@ restart:
 			if (err == -EAGAIN && (flags & XFRM_LOOKUP_WAIT)) {
 				DECLARE_WAITQUEUE(wait, current);
 
-				add_wait_queue(&km_waitq, &wait);
+				add_wait_queue(&init_net.xfrm.km_waitq, &wait);
 				set_current_state(TASK_INTERRUPTIBLE);
 				schedule();
 				set_current_state(TASK_RUNNING);
-				remove_wait_queue(&km_waitq, &wait);
+				remove_wait_queue(&init_net.xfrm.km_waitq, &wait);
 
 				nx = xfrm_tmpl_resolve(pols, npols, fl, xfrm, family);
 
diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c
index 69c0b06bf756..24bd89e76236 100644
--- a/net/xfrm/xfrm_state.c
+++ b/net/xfrm/xfrm_state.c
@@ -170,9 +170,6 @@ out_unlock:
 	mutex_unlock(&hash_resize_mutex);
 }
 
-DECLARE_WAIT_QUEUE_HEAD(km_waitq);
-EXPORT_SYMBOL(km_waitq);
-
 static DEFINE_RWLOCK(xfrm_state_afinfo_lock);
 static struct xfrm_state_afinfo *xfrm_state_afinfo[NPROTO];
 
@@ -399,7 +396,7 @@ static void xfrm_state_gc_task(struct work_struct *work)
 	hlist_for_each_entry_safe(x, entry, tmp, &gc_list, gclist)
 		xfrm_state_gc_destroy(x);
 
-	wake_up(&km_waitq);
+	wake_up(&net->xfrm.km_waitq);
 }
 
 static inline unsigned long make_jiffies(long secs)
@@ -470,7 +467,7 @@ resched:
 expired:
 	if (x->km.state == XFRM_STATE_ACQ && x->id.spi == 0) {
 		x->km.state = XFRM_STATE_EXPIRED;
-		wake_up(&km_waitq);
+		wake_up(&init_net.xfrm.km_waitq);
 		next = 2;
 		goto resched;
 	}
@@ -638,7 +635,7 @@ restart:
 
 out:
 	spin_unlock_bh(&xfrm_state_lock);
-	wake_up(&km_waitq);
+	wake_up(&init_net.xfrm.km_waitq);
 	return err;
 }
 EXPORT_SYMBOL(xfrm_state_flush);
@@ -929,7 +926,7 @@ static void __xfrm_state_insert(struct xfrm_state *x)
 	if (x->replay_maxage)
 		mod_timer(&x->rtimer, jiffies + x->replay_maxage);
 
-	wake_up(&km_waitq);
+	wake_up(&init_net.xfrm.km_waitq);
 
 	init_net.xfrm.state_num++;
 
@@ -1743,7 +1740,7 @@ void km_state_expired(struct xfrm_state *x, int hard, u32 pid)
 	km_state_notify(x, &c);
 
 	if (hard)
-		wake_up(&km_waitq);
+		wake_up(&init_net.xfrm.km_waitq);
 }
 
 EXPORT_SYMBOL(km_state_expired);
@@ -1794,7 +1791,7 @@ void km_policy_expired(struct xfrm_policy *pol, int dir, int hard, u32 pid)
 	km_policy_notify(pol, dir, &c);
 
 	if (hard)
-		wake_up(&km_waitq);
+		wake_up(&init_net.xfrm.km_waitq);
 }
 EXPORT_SYMBOL(km_policy_expired);
 
@@ -2089,6 +2086,7 @@ int __net_init xfrm_state_init(struct net *net)
 	INIT_WORK(&net->xfrm.state_hash_work, xfrm_hash_resize);
 	INIT_HLIST_HEAD(&net->xfrm.state_gc_list);
 	INIT_WORK(&net->xfrm.state_gc_work, xfrm_state_gc_task);
+	init_waitqueue_head(&net->xfrm.km_waitq);
 	return 0;
 
 out_byspi:
-- 
cgit v1.2.3


From 0331b1f383e1fa4049f8e75cafeea8f006171c64 Mon Sep 17 00:00:00 2001
From: Alexey Dobriyan <adobriyan@gmail.com>
Date: Tue, 25 Nov 2008 17:21:45 -0800
Subject: netns xfrm: add struct xfrm_policy::xp_net

Again, to avoid complications with passing netns when not necessary.
Again, ->xp_net is set-once field, once set it never changes.

Signed-off-by: Alexey Dobriyan <adobriyan@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/xfrm.h     | 10 +++++++++-
 net/key/af_key.c       |  4 ++--
 net/xfrm/xfrm_policy.c |  5 +++--
 net/xfrm/xfrm_user.c   |  4 ++--
 4 files changed, 16 insertions(+), 7 deletions(-)

(limited to 'include')

diff --git a/include/net/xfrm.h b/include/net/xfrm.h
index 0d4353c11093..1ab17565f01c 100644
--- a/include/net/xfrm.h
+++ b/include/net/xfrm.h
@@ -475,6 +475,9 @@ struct xfrm_policy_walk {
 
 struct xfrm_policy
 {
+#ifdef CONFIG_NET_NS
+	struct net		*xp_net;
+#endif
 	struct hlist_node	bydst;
 	struct hlist_node	byidx;
 
@@ -499,6 +502,11 @@ struct xfrm_policy
 	struct xfrm_tmpl       	xfrm_vec[XFRM_MAX_DEPTH];
 };
 
+static inline struct net *xp_net(struct xfrm_policy *xp)
+{
+	return read_pnet(&xp->xp_net);
+}
+
 struct xfrm_kmaddress {
 	xfrm_address_t          local;
 	xfrm_address_t          remote;
@@ -1425,7 +1433,7 @@ static inline int xfrm4_udp_encap_rcv(struct sock *sk, struct sk_buff *skb)
 }
 #endif
 
-struct xfrm_policy *xfrm_policy_alloc(gfp_t gfp);
+struct xfrm_policy *xfrm_policy_alloc(struct net *net, gfp_t gfp);
 
 extern void xfrm_policy_walk_init(struct xfrm_policy_walk *walk, u8 type);
 extern int xfrm_policy_walk(struct xfrm_policy_walk *walk,
diff --git a/net/key/af_key.c b/net/key/af_key.c
index f202ba6c8dcb..036315d6b665 100644
--- a/net/key/af_key.c
+++ b/net/key/af_key.c
@@ -2174,7 +2174,7 @@ static int pfkey_spdadd(struct sock *sk, struct sk_buff *skb, struct sadb_msg *h
 	if (!pol->sadb_x_policy_dir || pol->sadb_x_policy_dir >= IPSEC_DIR_MAX)
 		return -EINVAL;
 
-	xp = xfrm_policy_alloc(GFP_KERNEL);
+	xp = xfrm_policy_alloc(&init_net, GFP_KERNEL);
 	if (xp == NULL)
 		return -ENOBUFS;
 
@@ -3141,7 +3141,7 @@ static struct xfrm_policy *pfkey_compile_policy(struct sock *sk, int opt,
 	    (!pol->sadb_x_policy_dir || pol->sadb_x_policy_dir > IPSEC_DIR_OUTBOUND))
 		return NULL;
 
-	xp = xfrm_policy_alloc(GFP_ATOMIC);
+	xp = xfrm_policy_alloc(&init_net, GFP_ATOMIC);
 	if (xp == NULL) {
 		*dir = -ENOBUFS;
 		return NULL;
diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c
index cf2bf3aa7ab4..3eccefae2c8a 100644
--- a/net/xfrm/xfrm_policy.c
+++ b/net/xfrm/xfrm_policy.c
@@ -228,13 +228,14 @@ expired:
  * SPD calls.
  */
 
-struct xfrm_policy *xfrm_policy_alloc(gfp_t gfp)
+struct xfrm_policy *xfrm_policy_alloc(struct net *net, gfp_t gfp)
 {
 	struct xfrm_policy *policy;
 
 	policy = kzalloc(sizeof(struct xfrm_policy), gfp);
 
 	if (policy) {
+		write_pnet(&policy->xp_net, net);
 		INIT_LIST_HEAD(&policy->walk.all);
 		INIT_HLIST_NODE(&policy->bydst);
 		INIT_HLIST_NODE(&policy->byidx);
@@ -1153,7 +1154,7 @@ int xfrm_sk_policy_insert(struct sock *sk, int dir, struct xfrm_policy *pol)
 
 static struct xfrm_policy *clone_policy(struct xfrm_policy *old, int dir)
 {
-	struct xfrm_policy *newp = xfrm_policy_alloc(GFP_ATOMIC);
+	struct xfrm_policy *newp = xfrm_policy_alloc(xp_net(old), GFP_ATOMIC);
 
 	if (newp) {
 		newp->selector = old->selector;
diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c
index 65cdaa5c2280..765c01e784e9 100644
--- a/net/xfrm/xfrm_user.c
+++ b/net/xfrm/xfrm_user.c
@@ -1080,7 +1080,7 @@ static void copy_to_user_policy(struct xfrm_policy *xp, struct xfrm_userpolicy_i
 
 static struct xfrm_policy *xfrm_policy_construct(struct xfrm_userpolicy_info *p, struct nlattr **attrs, int *errp)
 {
-	struct xfrm_policy *xp = xfrm_policy_alloc(GFP_KERNEL);
+	struct xfrm_policy *xp = xfrm_policy_alloc(&init_net, GFP_KERNEL);
 	int err;
 
 	if (!xp) {
@@ -2291,7 +2291,7 @@ static struct xfrm_policy *xfrm_compile_policy(struct sock *sk, int opt,
 	if (p->dir > XFRM_POLICY_OUT)
 		return NULL;
 
-	xp = xfrm_policy_alloc(GFP_KERNEL);
+	xp = xfrm_policy_alloc(&init_net, GFP_KERNEL);
 	if (xp == NULL) {
 		*dir = -ENOBUFS;
 		return NULL;
-- 
cgit v1.2.3


From adfcf0b27e87d16a6a8c364daa724653d4d8930b Mon Sep 17 00:00:00 2001
From: Alexey Dobriyan <adobriyan@gmail.com>
Date: Tue, 25 Nov 2008 17:22:11 -0800
Subject: netns xfrm: per-netns policy list

Signed-off-by: Alexey Dobriyan <adobriyan@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/netns/xfrm.h |  2 ++
 net/xfrm/xfrm_policy.c   | 12 ++++++------
 2 files changed, 8 insertions(+), 6 deletions(-)

(limited to 'include')

diff --git a/include/net/netns/xfrm.h b/include/net/netns/xfrm.h
index 2a383c8ba1af..66489249cd1a 100644
--- a/include/net/netns/xfrm.h
+++ b/include/net/netns/xfrm.h
@@ -25,6 +25,8 @@ struct netns_xfrm {
 	struct work_struct	state_gc_work;
 
 	wait_queue_head_t	km_waitq;
+
+	struct list_head	policy_all;
 };
 
 #endif
diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c
index 3eccefae2c8a..aabc7f28e9c0 100644
--- a/net/xfrm/xfrm_policy.c
+++ b/net/xfrm/xfrm_policy.c
@@ -46,7 +46,6 @@ EXPORT_SYMBOL(xfrm_cfg_mutex);
 
 static DEFINE_RWLOCK(xfrm_policy_lock);
 
-static struct list_head xfrm_policy_all;
 unsigned int xfrm_policy_count[XFRM_POLICY_MAX*2];
 EXPORT_SYMBOL(xfrm_policy_count);
 
@@ -615,7 +614,7 @@ int xfrm_policy_insert(int dir, struct xfrm_policy *policy, int excl)
 	policy->curlft.use_time = 0;
 	if (!mod_timer(&policy->timer, jiffies + HZ))
 		xfrm_pol_hold(policy);
-	list_add(&policy->walk.all, &xfrm_policy_all);
+	list_add(&policy->walk.all, &init_net.xfrm.policy_all);
 	write_unlock_bh(&xfrm_policy_lock);
 
 	if (delpol)
@@ -881,10 +880,10 @@ int xfrm_policy_walk(struct xfrm_policy_walk *walk,
 
 	write_lock_bh(&xfrm_policy_lock);
 	if (list_empty(&walk->walk.all))
-		x = list_first_entry(&xfrm_policy_all, struct xfrm_policy_walk_entry, all);
+		x = list_first_entry(&init_net.xfrm.policy_all, struct xfrm_policy_walk_entry, all);
 	else
 		x = list_entry(&walk->walk.all, struct xfrm_policy_walk_entry, all);
-	list_for_each_entry_from(x, &xfrm_policy_all, all) {
+	list_for_each_entry_from(x, &init_net.xfrm.policy_all, all) {
 		if (x->dead)
 			continue;
 		pol = container_of(x, struct xfrm_policy, walk);
@@ -1086,7 +1085,7 @@ static void __xfrm_policy_link(struct xfrm_policy *pol, int dir)
 	struct hlist_head *chain = policy_hash_bysel(&pol->selector,
 						     pol->family, dir);
 
-	list_add(&pol->walk.all, &xfrm_policy_all);
+	list_add(&pol->walk.all, &init_net.xfrm.policy_all);
 	hlist_add_head(&pol->bydst, chain);
 	hlist_add_head(&pol->byidx, xfrm_policy_byidx+idx_hash(pol->index));
 	xfrm_policy_count[dir]++;
@@ -2426,7 +2425,7 @@ static int __net_init xfrm_policy_init(struct net *net)
 			panic("XFRM: failed to allocate bydst hash\n");
 	}
 
-	INIT_LIST_HEAD(&xfrm_policy_all);
+	INIT_LIST_HEAD(&net->xfrm.policy_all);
 	if (net_eq(net, &init_net))
 		register_netdevice_notifier(&xfrm_dev_notifier);
 	return 0;
@@ -2434,6 +2433,7 @@ static int __net_init xfrm_policy_init(struct net *net)
 
 static void xfrm_policy_fini(struct net *net)
 {
+	WARN_ON(!list_empty(&net->xfrm.policy_all));
 }
 
 static int __net_init xfrm_net_init(struct net *net)
-- 
cgit v1.2.3


From 93b851c1c93c7d5cd8d94cd3f3a268b2d5460e9e Mon Sep 17 00:00:00 2001
From: Alexey Dobriyan <adobriyan@gmail.com>
Date: Tue, 25 Nov 2008 17:22:35 -0800
Subject: netns xfrm: per-netns xfrm_policy_byidx hash

Signed-off-by: Alexey Dobriyan <adobriyan@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/netns/xfrm.h |  1 +
 net/xfrm/xfrm_policy.c   | 28 ++++++++++++++++++----------
 2 files changed, 19 insertions(+), 10 deletions(-)

(limited to 'include')

diff --git a/include/net/netns/xfrm.h b/include/net/netns/xfrm.h
index 66489249cd1a..5cd7d06c692b 100644
--- a/include/net/netns/xfrm.h
+++ b/include/net/netns/xfrm.h
@@ -27,6 +27,7 @@ struct netns_xfrm {
 	wait_queue_head_t	km_waitq;
 
 	struct list_head	policy_all;
+	struct hlist_head	*policy_byidx;
 };
 
 #endif
diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c
index aabc7f28e9c0..700cdd7564e4 100644
--- a/net/xfrm/xfrm_policy.c
+++ b/net/xfrm/xfrm_policy.c
@@ -329,7 +329,6 @@ struct xfrm_policy_hash {
 
 static struct hlist_head xfrm_policy_inexact[XFRM_POLICY_MAX*2];
 static struct xfrm_policy_hash xfrm_policy_bydst[XFRM_POLICY_MAX*2] __read_mostly;
-static struct hlist_head *xfrm_policy_byidx __read_mostly;
 static unsigned int xfrm_idx_hmask __read_mostly;
 static unsigned int xfrm_policy_hashmax __read_mostly = 1 * 1024 * 1024;
 
@@ -438,7 +437,7 @@ static void xfrm_byidx_resize(int total)
 	unsigned int hmask = xfrm_idx_hmask;
 	unsigned int nhashmask = xfrm_new_hash_mask(hmask);
 	unsigned int nsize = (nhashmask + 1) * sizeof(struct hlist_head);
-	struct hlist_head *oidx = xfrm_policy_byidx;
+	struct hlist_head *oidx = init_net.xfrm.policy_byidx;
 	struct hlist_head *nidx = xfrm_hash_alloc(nsize);
 	int i;
 
@@ -450,7 +449,7 @@ static void xfrm_byidx_resize(int total)
 	for (i = hmask; i >= 0; i--)
 		xfrm_idx_hash_transfer(oidx + i, nidx, nhashmask);
 
-	xfrm_policy_byidx = nidx;
+	init_net.xfrm.policy_byidx = nidx;
 	xfrm_idx_hmask = nhashmask;
 
 	write_unlock_bh(&xfrm_policy_lock);
@@ -536,7 +535,7 @@ static u32 xfrm_gen_index(int dir)
 		idx_generator += 8;
 		if (idx == 0)
 			idx = 8;
-		list = xfrm_policy_byidx + idx_hash(idx);
+		list = init_net.xfrm.policy_byidx + idx_hash(idx);
 		found = 0;
 		hlist_for_each_entry(p, entry, list, byidx) {
 			if (p->index == idx) {
@@ -609,7 +608,7 @@ int xfrm_policy_insert(int dir, struct xfrm_policy *policy, int excl)
 		xfrm_policy_count[dir]--;
 	}
 	policy->index = delpol ? delpol->index : xfrm_gen_index(dir);
-	hlist_add_head(&policy->byidx, xfrm_policy_byidx+idx_hash(policy->index));
+	hlist_add_head(&policy->byidx, init_net.xfrm.policy_byidx+idx_hash(policy->index));
 	policy->curlft.add_time = get_seconds();
 	policy->curlft.use_time = 0;
 	if (!mod_timer(&policy->timer, jiffies + HZ))
@@ -711,7 +710,7 @@ struct xfrm_policy *xfrm_policy_byid(u8 type, int dir, u32 id, int delete,
 
 	*err = 0;
 	write_lock_bh(&xfrm_policy_lock);
-	chain = xfrm_policy_byidx + idx_hash(id);
+	chain = init_net.xfrm.policy_byidx + idx_hash(id);
 	ret = NULL;
 	hlist_for_each_entry(pol, entry, chain, byidx) {
 		if (pol->type == type && pol->index == id) {
@@ -1087,7 +1086,7 @@ static void __xfrm_policy_link(struct xfrm_policy *pol, int dir)
 
 	list_add(&pol->walk.all, &init_net.xfrm.policy_all);
 	hlist_add_head(&pol->bydst, chain);
-	hlist_add_head(&pol->byidx, xfrm_policy_byidx+idx_hash(pol->index));
+	hlist_add_head(&pol->byidx, init_net.xfrm.policy_byidx+idx_hash(pol->index));
 	xfrm_policy_count[dir]++;
 	xfrm_pol_hold(pol);
 
@@ -2408,10 +2407,10 @@ static int __net_init xfrm_policy_init(struct net *net)
 	hmask = 8 - 1;
 	sz = (hmask+1) * sizeof(struct hlist_head);
 
-	xfrm_policy_byidx = xfrm_hash_alloc(sz);
+	net->xfrm.policy_byidx = xfrm_hash_alloc(sz);
+	if (!net->xfrm.policy_byidx)
+		goto out_byidx;
 	xfrm_idx_hmask = hmask;
-	if (!xfrm_policy_byidx)
-		panic("XFRM: failed to allocate byidx hash\n");
 
 	for (dir = 0; dir < XFRM_POLICY_MAX * 2; dir++) {
 		struct xfrm_policy_hash *htab;
@@ -2429,11 +2428,20 @@ static int __net_init xfrm_policy_init(struct net *net)
 	if (net_eq(net, &init_net))
 		register_netdevice_notifier(&xfrm_dev_notifier);
 	return 0;
+
+out_byidx:
+	return -ENOMEM;
 }
 
 static void xfrm_policy_fini(struct net *net)
 {
+	unsigned int sz;
+
 	WARN_ON(!list_empty(&net->xfrm.policy_all));
+
+	sz = (xfrm_idx_hmask + 1) * sizeof(struct hlist_head);
+	WARN_ON(!hlist_empty(net->xfrm.policy_byidx));
+	xfrm_hash_free(net->xfrm.policy_byidx, sz);
 }
 
 static int __net_init xfrm_net_init(struct net *net)
-- 
cgit v1.2.3


From 8100bea7d619e8496ad8e545d1b41f536e076cd5 Mon Sep 17 00:00:00 2001
From: Alexey Dobriyan <adobriyan@gmail.com>
Date: Tue, 25 Nov 2008 17:22:58 -0800
Subject: netns xfrm: per-netns xfrm_policy_byidx hashmask

Per-netns hashes are independently resizeable.

Signed-off-by: Alexey Dobriyan <adobriyan@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/netns/xfrm.h |  1 +
 net/xfrm/xfrm_policy.c   | 15 +++++++--------
 2 files changed, 8 insertions(+), 8 deletions(-)

(limited to 'include')

diff --git a/include/net/netns/xfrm.h b/include/net/netns/xfrm.h
index 5cd7d06c692b..42dc318fe3de 100644
--- a/include/net/netns/xfrm.h
+++ b/include/net/netns/xfrm.h
@@ -28,6 +28,7 @@ struct netns_xfrm {
 
 	struct list_head	policy_all;
 	struct hlist_head	*policy_byidx;
+	unsigned int		policy_idx_hmask;
 };
 
 #endif
diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c
index 700cdd7564e4..9e37a44f148e 100644
--- a/net/xfrm/xfrm_policy.c
+++ b/net/xfrm/xfrm_policy.c
@@ -329,12 +329,11 @@ struct xfrm_policy_hash {
 
 static struct hlist_head xfrm_policy_inexact[XFRM_POLICY_MAX*2];
 static struct xfrm_policy_hash xfrm_policy_bydst[XFRM_POLICY_MAX*2] __read_mostly;
-static unsigned int xfrm_idx_hmask __read_mostly;
 static unsigned int xfrm_policy_hashmax __read_mostly = 1 * 1024 * 1024;
 
 static inline unsigned int idx_hash(u32 index)
 {
-	return __idx_hash(index, xfrm_idx_hmask);
+	return __idx_hash(index, init_net.xfrm.policy_idx_hmask);
 }
 
 static struct hlist_head *policy_hash_bysel(struct xfrm_selector *sel, unsigned short family, int dir)
@@ -434,7 +433,7 @@ static void xfrm_bydst_resize(int dir)
 
 static void xfrm_byidx_resize(int total)
 {
-	unsigned int hmask = xfrm_idx_hmask;
+	unsigned int hmask = init_net.xfrm.policy_idx_hmask;
 	unsigned int nhashmask = xfrm_new_hash_mask(hmask);
 	unsigned int nsize = (nhashmask + 1) * sizeof(struct hlist_head);
 	struct hlist_head *oidx = init_net.xfrm.policy_byidx;
@@ -450,7 +449,7 @@ static void xfrm_byidx_resize(int total)
 		xfrm_idx_hash_transfer(oidx + i, nidx, nhashmask);
 
 	init_net.xfrm.policy_byidx = nidx;
-	xfrm_idx_hmask = nhashmask;
+	init_net.xfrm.policy_idx_hmask = nhashmask;
 
 	write_unlock_bh(&xfrm_policy_lock);
 
@@ -474,7 +473,7 @@ static inline int xfrm_bydst_should_resize(int dir, int *total)
 
 static inline int xfrm_byidx_should_resize(int total)
 {
-	unsigned int hmask = xfrm_idx_hmask;
+	unsigned int hmask = init_net.xfrm.policy_idx_hmask;
 
 	if ((hmask + 1) < xfrm_policy_hashmax &&
 	    total > hmask)
@@ -492,7 +491,7 @@ void xfrm_spd_getinfo(struct xfrmk_spdinfo *si)
 	si->inscnt = xfrm_policy_count[XFRM_POLICY_IN+XFRM_POLICY_MAX];
 	si->outscnt = xfrm_policy_count[XFRM_POLICY_OUT+XFRM_POLICY_MAX];
 	si->fwdscnt = xfrm_policy_count[XFRM_POLICY_FWD+XFRM_POLICY_MAX];
-	si->spdhcnt = xfrm_idx_hmask;
+	si->spdhcnt = init_net.xfrm.policy_idx_hmask;
 	si->spdhmcnt = xfrm_policy_hashmax;
 	read_unlock_bh(&xfrm_policy_lock);
 }
@@ -2410,7 +2409,7 @@ static int __net_init xfrm_policy_init(struct net *net)
 	net->xfrm.policy_byidx = xfrm_hash_alloc(sz);
 	if (!net->xfrm.policy_byidx)
 		goto out_byidx;
-	xfrm_idx_hmask = hmask;
+	net->xfrm.policy_idx_hmask = hmask;
 
 	for (dir = 0; dir < XFRM_POLICY_MAX * 2; dir++) {
 		struct xfrm_policy_hash *htab;
@@ -2439,7 +2438,7 @@ static void xfrm_policy_fini(struct net *net)
 
 	WARN_ON(!list_empty(&net->xfrm.policy_all));
 
-	sz = (xfrm_idx_hmask + 1) * sizeof(struct hlist_head);
+	sz = (net->xfrm.policy_idx_hmask + 1) * sizeof(struct hlist_head);
 	WARN_ON(!hlist_empty(net->xfrm.policy_byidx));
 	xfrm_hash_free(net->xfrm.policy_byidx, sz);
 }
-- 
cgit v1.2.3


From 8b18f8eaf9207d53ba3e69f2b98d7290f4dec227 Mon Sep 17 00:00:00 2001
From: Alexey Dobriyan <adobriyan@gmail.com>
Date: Tue, 25 Nov 2008 17:23:26 -0800
Subject: netns xfrm: per-netns inexact policies

Signed-off-by: Alexey Dobriyan <adobriyan@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/netns/xfrm.h |  2 ++
 net/xfrm/xfrm_policy.c   | 20 ++++++++++++--------
 2 files changed, 14 insertions(+), 8 deletions(-)

(limited to 'include')

diff --git a/include/net/netns/xfrm.h b/include/net/netns/xfrm.h
index 42dc318fe3de..c7568315f16c 100644
--- a/include/net/netns/xfrm.h
+++ b/include/net/netns/xfrm.h
@@ -4,6 +4,7 @@
 #include <linux/list.h>
 #include <linux/wait.h>
 #include <linux/workqueue.h>
+#include <linux/xfrm.h>
 
 struct netns_xfrm {
 	struct list_head	state_all;
@@ -29,6 +30,7 @@ struct netns_xfrm {
 	struct list_head	policy_all;
 	struct hlist_head	*policy_byidx;
 	unsigned int		policy_idx_hmask;
+	struct hlist_head	policy_inexact[XFRM_POLICY_MAX * 2];
 };
 
 #endif
diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c
index 9e37a44f148e..ba4e95b8e24e 100644
--- a/net/xfrm/xfrm_policy.c
+++ b/net/xfrm/xfrm_policy.c
@@ -327,7 +327,6 @@ struct xfrm_policy_hash {
 	unsigned int		hmask;
 };
 
-static struct hlist_head xfrm_policy_inexact[XFRM_POLICY_MAX*2];
 static struct xfrm_policy_hash xfrm_policy_bydst[XFRM_POLICY_MAX*2] __read_mostly;
 static unsigned int xfrm_policy_hashmax __read_mostly = 1 * 1024 * 1024;
 
@@ -342,7 +341,7 @@ static struct hlist_head *policy_hash_bysel(struct xfrm_selector *sel, unsigned
 	unsigned int hash = __sel_hash(sel, family, hmask);
 
 	return (hash == hmask + 1 ?
-		&xfrm_policy_inexact[dir] :
+		&init_net.xfrm.policy_inexact[dir] :
 		xfrm_policy_bydst[dir].table + hash);
 }
 
@@ -752,7 +751,7 @@ xfrm_policy_flush_secctx_check(u8 type, struct xfrm_audit *audit_info)
 		int i;
 
 		hlist_for_each_entry(pol, entry,
-				     &xfrm_policy_inexact[dir], bydst) {
+				     &init_net.xfrm.policy_inexact[dir], bydst) {
 			if (pol->type != type)
 				continue;
 			err = security_xfrm_policy_delete(pol->security);
@@ -810,7 +809,7 @@ int xfrm_policy_flush(u8 type, struct xfrm_audit *audit_info)
 		killed = 0;
 	again1:
 		hlist_for_each_entry(pol, entry,
-				     &xfrm_policy_inexact[dir], bydst) {
+				     &init_net.xfrm.policy_inexact[dir], bydst) {
 			if (pol->type != type)
 				continue;
 			hlist_del(&pol->bydst);
@@ -983,7 +982,7 @@ static struct xfrm_policy *xfrm_policy_lookup_bytype(u8 type, struct flowi *fl,
 			break;
 		}
 	}
-	chain = &xfrm_policy_inexact[dir];
+	chain = &init_net.xfrm.policy_inexact[dir];
 	hlist_for_each_entry(pol, entry, chain, bydst) {
 		err = xfrm_policy_match(pol, fl, type, family, dir);
 		if (err) {
@@ -2152,7 +2151,7 @@ static void xfrm_prune_bundles(int (*func)(struct dst_entry *))
 		int i;
 
 		hlist_for_each_entry(pol, entry,
-				     &xfrm_policy_inexact[dir], bydst)
+				     &init_net.xfrm.policy_inexact[dir], bydst)
 			prune_one_bundle(pol, func, &gc_list);
 
 		table = xfrm_policy_bydst[dir].table;
@@ -2414,7 +2413,7 @@ static int __net_init xfrm_policy_init(struct net *net)
 	for (dir = 0; dir < XFRM_POLICY_MAX * 2; dir++) {
 		struct xfrm_policy_hash *htab;
 
-		INIT_HLIST_HEAD(&xfrm_policy_inexact[dir]);
+		INIT_HLIST_HEAD(&net->xfrm.policy_inexact[dir]);
 
 		htab = &xfrm_policy_bydst[dir];
 		htab->table = xfrm_hash_alloc(sz);
@@ -2435,9 +2434,14 @@ out_byidx:
 static void xfrm_policy_fini(struct net *net)
 {
 	unsigned int sz;
+	int dir;
 
 	WARN_ON(!list_empty(&net->xfrm.policy_all));
 
+	for (dir = 0; dir < XFRM_POLICY_MAX * 2; dir++) {
+		WARN_ON(!hlist_empty(&net->xfrm.policy_inexact[dir]));
+	}
+
 	sz = (net->xfrm.policy_idx_hmask + 1) * sizeof(struct hlist_head);
 	WARN_ON(!hlist_empty(net->xfrm.policy_byidx));
 	xfrm_hash_free(net->xfrm.policy_byidx, sz);
@@ -2590,7 +2594,7 @@ static struct xfrm_policy * xfrm_migrate_policy_find(struct xfrm_selector *sel,
 			break;
 		}
 	}
-	chain = &xfrm_policy_inexact[dir];
+	chain = &init_net.xfrm.policy_inexact[dir];
 	hlist_for_each_entry(pol, entry, chain, bydst) {
 		if (xfrm_migrate_selector_match(sel, &pol->selector) &&
 		    pol->type == type &&
-- 
cgit v1.2.3


From a35f6c5de32664d82c072a7e2c7d5c5234de4158 Mon Sep 17 00:00:00 2001
From: Alexey Dobriyan <adobriyan@gmail.com>
Date: Tue, 25 Nov 2008 17:23:48 -0800
Subject: netns xfrm: per-netns xfrm_policy_bydst hash

Signed-off-by: Alexey Dobriyan <adobriyan@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/netns/xfrm.h |  6 +++++
 net/xfrm/xfrm_policy.c   | 57 ++++++++++++++++++++++++++++--------------------
 2 files changed, 39 insertions(+), 24 deletions(-)

(limited to 'include')

diff --git a/include/net/netns/xfrm.h b/include/net/netns/xfrm.h
index c7568315f16c..39cfa799fa90 100644
--- a/include/net/netns/xfrm.h
+++ b/include/net/netns/xfrm.h
@@ -6,6 +6,11 @@
 #include <linux/workqueue.h>
 #include <linux/xfrm.h>
 
+struct xfrm_policy_hash {
+	struct hlist_head	*table;
+	unsigned int		hmask;
+};
+
 struct netns_xfrm {
 	struct list_head	state_all;
 	/*
@@ -31,6 +36,7 @@ struct netns_xfrm {
 	struct hlist_head	*policy_byidx;
 	unsigned int		policy_idx_hmask;
 	struct hlist_head	policy_inexact[XFRM_POLICY_MAX * 2];
+	struct xfrm_policy_hash	policy_bydst[XFRM_POLICY_MAX * 2];
 };
 
 #endif
diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c
index ba4e95b8e24e..929b2fdaa2ef 100644
--- a/net/xfrm/xfrm_policy.c
+++ b/net/xfrm/xfrm_policy.c
@@ -322,12 +322,6 @@ static void xfrm_policy_kill(struct xfrm_policy *policy)
 	schedule_work(&xfrm_policy_gc_work);
 }
 
-struct xfrm_policy_hash {
-	struct hlist_head	*table;
-	unsigned int		hmask;
-};
-
-static struct xfrm_policy_hash xfrm_policy_bydst[XFRM_POLICY_MAX*2] __read_mostly;
 static unsigned int xfrm_policy_hashmax __read_mostly = 1 * 1024 * 1024;
 
 static inline unsigned int idx_hash(u32 index)
@@ -337,20 +331,20 @@ static inline unsigned int idx_hash(u32 index)
 
 static struct hlist_head *policy_hash_bysel(struct xfrm_selector *sel, unsigned short family, int dir)
 {
-	unsigned int hmask = xfrm_policy_bydst[dir].hmask;
+	unsigned int hmask = init_net.xfrm.policy_bydst[dir].hmask;
 	unsigned int hash = __sel_hash(sel, family, hmask);
 
 	return (hash == hmask + 1 ?
 		&init_net.xfrm.policy_inexact[dir] :
-		xfrm_policy_bydst[dir].table + hash);
+		init_net.xfrm.policy_bydst[dir].table + hash);
 }
 
 static struct hlist_head *policy_hash_direct(xfrm_address_t *daddr, xfrm_address_t *saddr, unsigned short family, int dir)
 {
-	unsigned int hmask = xfrm_policy_bydst[dir].hmask;
+	unsigned int hmask = init_net.xfrm.policy_bydst[dir].hmask;
 	unsigned int hash = __addr_hash(daddr, saddr, family, hmask);
 
-	return xfrm_policy_bydst[dir].table + hash;
+	return init_net.xfrm.policy_bydst[dir].table + hash;
 }
 
 static void xfrm_dst_hash_transfer(struct hlist_head *list,
@@ -407,10 +401,10 @@ static unsigned long xfrm_new_hash_mask(unsigned int old_hmask)
 
 static void xfrm_bydst_resize(int dir)
 {
-	unsigned int hmask = xfrm_policy_bydst[dir].hmask;
+	unsigned int hmask = init_net.xfrm.policy_bydst[dir].hmask;
 	unsigned int nhashmask = xfrm_new_hash_mask(hmask);
 	unsigned int nsize = (nhashmask + 1) * sizeof(struct hlist_head);
-	struct hlist_head *odst = xfrm_policy_bydst[dir].table;
+	struct hlist_head *odst = init_net.xfrm.policy_bydst[dir].table;
 	struct hlist_head *ndst = xfrm_hash_alloc(nsize);
 	int i;
 
@@ -422,8 +416,8 @@ static void xfrm_bydst_resize(int dir)
 	for (i = hmask; i >= 0; i--)
 		xfrm_dst_hash_transfer(odst + i, ndst, nhashmask);
 
-	xfrm_policy_bydst[dir].table = ndst;
-	xfrm_policy_bydst[dir].hmask = nhashmask;
+	init_net.xfrm.policy_bydst[dir].table = ndst;
+	init_net.xfrm.policy_bydst[dir].hmask = nhashmask;
 
 	write_unlock_bh(&xfrm_policy_lock);
 
@@ -458,7 +452,7 @@ static void xfrm_byidx_resize(int total)
 static inline int xfrm_bydst_should_resize(int dir, int *total)
 {
 	unsigned int cnt = xfrm_policy_count[dir];
-	unsigned int hmask = xfrm_policy_bydst[dir].hmask;
+	unsigned int hmask = init_net.xfrm.policy_bydst[dir].hmask;
 
 	if (total)
 		*total += cnt;
@@ -763,9 +757,9 @@ xfrm_policy_flush_secctx_check(u8 type, struct xfrm_audit *audit_info)
 				return err;
 			}
 		}
-		for (i = xfrm_policy_bydst[dir].hmask; i >= 0; i--) {
+		for (i = init_net.xfrm.policy_bydst[dir].hmask; i >= 0; i--) {
 			hlist_for_each_entry(pol, entry,
-					     xfrm_policy_bydst[dir].table + i,
+					     init_net.xfrm.policy_bydst[dir].table + i,
 					     bydst) {
 				if (pol->type != type)
 					continue;
@@ -827,10 +821,10 @@ int xfrm_policy_flush(u8 type, struct xfrm_audit *audit_info)
 			goto again1;
 		}
 
-		for (i = xfrm_policy_bydst[dir].hmask; i >= 0; i--) {
+		for (i = init_net.xfrm.policy_bydst[dir].hmask; i >= 0; i--) {
 	again2:
 			hlist_for_each_entry(pol, entry,
-					     xfrm_policy_bydst[dir].table + i,
+					     init_net.xfrm.policy_bydst[dir].table + i,
 					     bydst) {
 				if (pol->type != type)
 					continue;
@@ -2154,8 +2148,8 @@ static void xfrm_prune_bundles(int (*func)(struct dst_entry *))
 				     &init_net.xfrm.policy_inexact[dir], bydst)
 			prune_one_bundle(pol, func, &gc_list);
 
-		table = xfrm_policy_bydst[dir].table;
-		for (i = xfrm_policy_bydst[dir].hmask; i >= 0; i--) {
+		table = init_net.xfrm.policy_bydst[dir].table;
+		for (i = init_net.xfrm.policy_bydst[dir].hmask; i >= 0; i--) {
 			hlist_for_each_entry(pol, entry, table + i, bydst)
 				prune_one_bundle(pol, func, &gc_list);
 		}
@@ -2415,11 +2409,11 @@ static int __net_init xfrm_policy_init(struct net *net)
 
 		INIT_HLIST_HEAD(&net->xfrm.policy_inexact[dir]);
 
-		htab = &xfrm_policy_bydst[dir];
+		htab = &net->xfrm.policy_bydst[dir];
 		htab->table = xfrm_hash_alloc(sz);
-		htab->hmask = hmask;
 		if (!htab->table)
-			panic("XFRM: failed to allocate bydst hash\n");
+			goto out_bydst;
+		htab->hmask = hmask;
 	}
 
 	INIT_LIST_HEAD(&net->xfrm.policy_all);
@@ -2427,6 +2421,14 @@ static int __net_init xfrm_policy_init(struct net *net)
 		register_netdevice_notifier(&xfrm_dev_notifier);
 	return 0;
 
+out_bydst:
+	for (dir--; dir >= 0; dir--) {
+		struct xfrm_policy_hash *htab;
+
+		htab = &net->xfrm.policy_bydst[dir];
+		xfrm_hash_free(htab->table, sz);
+	}
+	xfrm_hash_free(net->xfrm.policy_byidx, sz);
 out_byidx:
 	return -ENOMEM;
 }
@@ -2439,7 +2441,14 @@ static void xfrm_policy_fini(struct net *net)
 	WARN_ON(!list_empty(&net->xfrm.policy_all));
 
 	for (dir = 0; dir < XFRM_POLICY_MAX * 2; dir++) {
+		struct xfrm_policy_hash *htab;
+
 		WARN_ON(!hlist_empty(&net->xfrm.policy_inexact[dir]));
+
+		htab = &net->xfrm.policy_bydst[dir];
+		sz = (htab->hmask + 1);
+		WARN_ON(!hlist_empty(htab->table));
+		xfrm_hash_free(htab->table, sz);
 	}
 
 	sz = (net->xfrm.policy_idx_hmask + 1) * sizeof(struct hlist_head);
-- 
cgit v1.2.3


From dc2caba7b321289e7d02e63d7216961ccecfa103 Mon Sep 17 00:00:00 2001
From: Alexey Dobriyan <adobriyan@gmail.com>
Date: Tue, 25 Nov 2008 17:24:15 -0800
Subject: netns xfrm: per-netns policy counts

Signed-off-by: Alexey Dobriyan <adobriyan@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/netns/xfrm.h |  1 +
 include/net/xfrm.h       |  6 ++----
 net/xfrm/xfrm_policy.c   | 34 ++++++++++++++++------------------
 3 files changed, 19 insertions(+), 22 deletions(-)

(limited to 'include')

diff --git a/include/net/netns/xfrm.h b/include/net/netns/xfrm.h
index 39cfa799fa90..d5aadf06be46 100644
--- a/include/net/netns/xfrm.h
+++ b/include/net/netns/xfrm.h
@@ -37,6 +37,7 @@ struct netns_xfrm {
 	unsigned int		policy_idx_hmask;
 	struct hlist_head	policy_inexact[XFRM_POLICY_MAX * 2];
 	struct xfrm_policy_hash	policy_bydst[XFRM_POLICY_MAX * 2];
+	unsigned int		policy_count[XFRM_POLICY_MAX * 2];
 };
 
 #endif
diff --git a/include/net/xfrm.h b/include/net/xfrm.h
index 1ab17565f01c..8699620f8c2d 100644
--- a/include/net/xfrm.h
+++ b/include/net/xfrm.h
@@ -559,8 +559,6 @@ struct xfrm_mgr
 extern int xfrm_register_km(struct xfrm_mgr *km);
 extern int xfrm_unregister_km(struct xfrm_mgr *km);
 
-extern unsigned int xfrm_policy_count[XFRM_POLICY_MAX*2];
-
 /*
  * This structure is used for the duration where packets are being
  * transformed by IPsec.  As soon as the packet leaves IPsec the
@@ -999,7 +997,7 @@ static inline int __xfrm_policy_check2(struct sock *sk, int dir,
 	if (sk && sk->sk_policy[XFRM_POLICY_IN])
 		return __xfrm_policy_check(sk, ndir, skb, family);
 
-	return	(!xfrm_policy_count[dir] && !skb->sp) ||
+	return	(!init_net.xfrm.policy_count[dir] && !skb->sp) ||
 		(skb->dst->flags & DST_NOPOLICY) ||
 		__xfrm_policy_check(sk, ndir, skb, family);
 }
@@ -1051,7 +1049,7 @@ extern int __xfrm_route_forward(struct sk_buff *skb, unsigned short family);
 
 static inline int xfrm_route_forward(struct sk_buff *skb, unsigned short family)
 {
-	return	!xfrm_policy_count[XFRM_POLICY_OUT] ||
+	return	!init_net.xfrm.policy_count[XFRM_POLICY_OUT] ||
 		(skb->dst->flags & DST_NOXFRM) ||
 		__xfrm_route_forward(skb, family);
 }
diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c
index 929b2fdaa2ef..630ec048a0d3 100644
--- a/net/xfrm/xfrm_policy.c
+++ b/net/xfrm/xfrm_policy.c
@@ -46,9 +46,6 @@ EXPORT_SYMBOL(xfrm_cfg_mutex);
 
 static DEFINE_RWLOCK(xfrm_policy_lock);
 
-unsigned int xfrm_policy_count[XFRM_POLICY_MAX*2];
-EXPORT_SYMBOL(xfrm_policy_count);
-
 static DEFINE_RWLOCK(xfrm_policy_afinfo_lock);
 static struct xfrm_policy_afinfo *xfrm_policy_afinfo[NPROTO];
 
@@ -451,7 +448,7 @@ static void xfrm_byidx_resize(int total)
 
 static inline int xfrm_bydst_should_resize(int dir, int *total)
 {
-	unsigned int cnt = xfrm_policy_count[dir];
+	unsigned int cnt = init_net.xfrm.policy_count[dir];
 	unsigned int hmask = init_net.xfrm.policy_bydst[dir].hmask;
 
 	if (total)
@@ -478,12 +475,12 @@ static inline int xfrm_byidx_should_resize(int total)
 void xfrm_spd_getinfo(struct xfrmk_spdinfo *si)
 {
 	read_lock_bh(&xfrm_policy_lock);
-	si->incnt = xfrm_policy_count[XFRM_POLICY_IN];
-	si->outcnt = xfrm_policy_count[XFRM_POLICY_OUT];
-	si->fwdcnt = xfrm_policy_count[XFRM_POLICY_FWD];
-	si->inscnt = xfrm_policy_count[XFRM_POLICY_IN+XFRM_POLICY_MAX];
-	si->outscnt = xfrm_policy_count[XFRM_POLICY_OUT+XFRM_POLICY_MAX];
-	si->fwdscnt = xfrm_policy_count[XFRM_POLICY_FWD+XFRM_POLICY_MAX];
+	si->incnt = init_net.xfrm.policy_count[XFRM_POLICY_IN];
+	si->outcnt = init_net.xfrm.policy_count[XFRM_POLICY_OUT];
+	si->fwdcnt = init_net.xfrm.policy_count[XFRM_POLICY_FWD];
+	si->inscnt = init_net.xfrm.policy_count[XFRM_POLICY_IN+XFRM_POLICY_MAX];
+	si->outscnt = init_net.xfrm.policy_count[XFRM_POLICY_OUT+XFRM_POLICY_MAX];
+	si->fwdscnt = init_net.xfrm.policy_count[XFRM_POLICY_FWD+XFRM_POLICY_MAX];
 	si->spdhcnt = init_net.xfrm.policy_idx_hmask;
 	si->spdhmcnt = xfrm_policy_hashmax;
 	read_unlock_bh(&xfrm_policy_lock);
@@ -591,13 +588,13 @@ int xfrm_policy_insert(int dir, struct xfrm_policy *policy, int excl)
 	else
 		hlist_add_head(&policy->bydst, chain);
 	xfrm_pol_hold(policy);
-	xfrm_policy_count[dir]++;
+	init_net.xfrm.policy_count[dir]++;
 	atomic_inc(&flow_cache_genid);
 	if (delpol) {
 		hlist_del(&delpol->bydst);
 		hlist_del(&delpol->byidx);
 		list_del(&delpol->walk.all);
-		xfrm_policy_count[dir]--;
+		init_net.xfrm.policy_count[dir]--;
 	}
 	policy->index = delpol ? delpol->index : xfrm_gen_index(dir);
 	hlist_add_head(&policy->byidx, init_net.xfrm.policy_byidx+idx_hash(policy->index));
@@ -673,7 +670,7 @@ struct xfrm_policy *xfrm_policy_bysel_ctx(u8 type, int dir,
 				hlist_del(&pol->bydst);
 				hlist_del(&pol->byidx);
 				list_del(&pol->walk.all);
-				xfrm_policy_count[dir]--;
+				init_net.xfrm.policy_count[dir]--;
 			}
 			ret = pol;
 			break;
@@ -717,7 +714,7 @@ struct xfrm_policy *xfrm_policy_byid(u8 type, int dir, u32 id, int delete,
 				hlist_del(&pol->bydst);
 				hlist_del(&pol->byidx);
 				list_del(&pol->walk.all);
-				xfrm_policy_count[dir]--;
+				init_net.xfrm.policy_count[dir]--;
 			}
 			ret = pol;
 			break;
@@ -845,7 +842,7 @@ int xfrm_policy_flush(u8 type, struct xfrm_audit *audit_info)
 			}
 		}
 
-		xfrm_policy_count[dir] -= killed;
+		init_net.xfrm.policy_count[dir] -= killed;
 	}
 	atomic_inc(&flow_cache_genid);
 out:
@@ -1079,7 +1076,7 @@ static void __xfrm_policy_link(struct xfrm_policy *pol, int dir)
 	list_add(&pol->walk.all, &init_net.xfrm.policy_all);
 	hlist_add_head(&pol->bydst, chain);
 	hlist_add_head(&pol->byidx, init_net.xfrm.policy_byidx+idx_hash(pol->index));
-	xfrm_policy_count[dir]++;
+	init_net.xfrm.policy_count[dir]++;
 	xfrm_pol_hold(pol);
 
 	if (xfrm_bydst_should_resize(dir, NULL))
@@ -1095,7 +1092,7 @@ static struct xfrm_policy *__xfrm_policy_unlink(struct xfrm_policy *pol,
 	hlist_del(&pol->bydst);
 	hlist_del(&pol->byidx);
 	list_del(&pol->walk.all);
-	xfrm_policy_count[dir]--;
+	init_net.xfrm.policy_count[dir]--;
 
 	return pol;
 }
@@ -1574,7 +1571,7 @@ restart:
 	if (!policy) {
 		/* To accelerate a bit...  */
 		if ((dst_orig->flags & DST_NOXFRM) ||
-		    !xfrm_policy_count[XFRM_POLICY_OUT])
+		    !init_net.xfrm.policy_count[XFRM_POLICY_OUT])
 			goto nopol;
 
 		policy = flow_cache_lookup(fl, dst_orig->ops->family,
@@ -2407,6 +2404,7 @@ static int __net_init xfrm_policy_init(struct net *net)
 	for (dir = 0; dir < XFRM_POLICY_MAX * 2; dir++) {
 		struct xfrm_policy_hash *htab;
 
+		net->xfrm.policy_count[dir] = 0;
 		INIT_HLIST_HEAD(&net->xfrm.policy_inexact[dir]);
 
 		htab = &net->xfrm.policy_bydst[dir];
-- 
cgit v1.2.3


From 66caf628c3b634c57b14a1a104dcd57e4fab2e3b Mon Sep 17 00:00:00 2001
From: Alexey Dobriyan <adobriyan@gmail.com>
Date: Tue, 25 Nov 2008 17:28:57 -0800
Subject: netns xfrm: per-netns policy hash resizing work

Signed-off-by: Alexey Dobriyan <adobriyan@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/netns/xfrm.h |  1 +
 net/xfrm/xfrm_policy.c   | 52 ++++++++++++++++++++++++------------------------
 2 files changed, 27 insertions(+), 26 deletions(-)

(limited to 'include')

diff --git a/include/net/netns/xfrm.h b/include/net/netns/xfrm.h
index d5aadf06be46..c53d17357a49 100644
--- a/include/net/netns/xfrm.h
+++ b/include/net/netns/xfrm.h
@@ -38,6 +38,7 @@ struct netns_xfrm {
 	struct hlist_head	policy_inexact[XFRM_POLICY_MAX * 2];
 	struct xfrm_policy_hash	policy_bydst[XFRM_POLICY_MAX * 2];
 	unsigned int		policy_count[XFRM_POLICY_MAX * 2];
+	struct work_struct	policy_hash_work;
 };
 
 #endif
diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c
index 630ec048a0d3..1d300862dc04 100644
--- a/net/xfrm/xfrm_policy.c
+++ b/net/xfrm/xfrm_policy.c
@@ -396,12 +396,12 @@ static unsigned long xfrm_new_hash_mask(unsigned int old_hmask)
 	return ((old_hmask + 1) << 1) - 1;
 }
 
-static void xfrm_bydst_resize(int dir)
+static void xfrm_bydst_resize(struct net *net, int dir)
 {
-	unsigned int hmask = init_net.xfrm.policy_bydst[dir].hmask;
+	unsigned int hmask = net->xfrm.policy_bydst[dir].hmask;
 	unsigned int nhashmask = xfrm_new_hash_mask(hmask);
 	unsigned int nsize = (nhashmask + 1) * sizeof(struct hlist_head);
-	struct hlist_head *odst = init_net.xfrm.policy_bydst[dir].table;
+	struct hlist_head *odst = net->xfrm.policy_bydst[dir].table;
 	struct hlist_head *ndst = xfrm_hash_alloc(nsize);
 	int i;
 
@@ -413,20 +413,20 @@ static void xfrm_bydst_resize(int dir)
 	for (i = hmask; i >= 0; i--)
 		xfrm_dst_hash_transfer(odst + i, ndst, nhashmask);
 
-	init_net.xfrm.policy_bydst[dir].table = ndst;
-	init_net.xfrm.policy_bydst[dir].hmask = nhashmask;
+	net->xfrm.policy_bydst[dir].table = ndst;
+	net->xfrm.policy_bydst[dir].hmask = nhashmask;
 
 	write_unlock_bh(&xfrm_policy_lock);
 
 	xfrm_hash_free(odst, (hmask + 1) * sizeof(struct hlist_head));
 }
 
-static void xfrm_byidx_resize(int total)
+static void xfrm_byidx_resize(struct net *net, int total)
 {
-	unsigned int hmask = init_net.xfrm.policy_idx_hmask;
+	unsigned int hmask = net->xfrm.policy_idx_hmask;
 	unsigned int nhashmask = xfrm_new_hash_mask(hmask);
 	unsigned int nsize = (nhashmask + 1) * sizeof(struct hlist_head);
-	struct hlist_head *oidx = init_net.xfrm.policy_byidx;
+	struct hlist_head *oidx = net->xfrm.policy_byidx;
 	struct hlist_head *nidx = xfrm_hash_alloc(nsize);
 	int i;
 
@@ -438,18 +438,18 @@ static void xfrm_byidx_resize(int total)
 	for (i = hmask; i >= 0; i--)
 		xfrm_idx_hash_transfer(oidx + i, nidx, nhashmask);
 
-	init_net.xfrm.policy_byidx = nidx;
-	init_net.xfrm.policy_idx_hmask = nhashmask;
+	net->xfrm.policy_byidx = nidx;
+	net->xfrm.policy_idx_hmask = nhashmask;
 
 	write_unlock_bh(&xfrm_policy_lock);
 
 	xfrm_hash_free(oidx, (hmask + 1) * sizeof(struct hlist_head));
 }
 
-static inline int xfrm_bydst_should_resize(int dir, int *total)
+static inline int xfrm_bydst_should_resize(struct net *net, int dir, int *total)
 {
-	unsigned int cnt = init_net.xfrm.policy_count[dir];
-	unsigned int hmask = init_net.xfrm.policy_bydst[dir].hmask;
+	unsigned int cnt = net->xfrm.policy_count[dir];
+	unsigned int hmask = net->xfrm.policy_bydst[dir].hmask;
 
 	if (total)
 		*total += cnt;
@@ -461,9 +461,9 @@ static inline int xfrm_bydst_should_resize(int dir, int *total)
 	return 0;
 }
 
-static inline int xfrm_byidx_should_resize(int total)
+static inline int xfrm_byidx_should_resize(struct net *net, int total)
 {
-	unsigned int hmask = init_net.xfrm.policy_idx_hmask;
+	unsigned int hmask = net->xfrm.policy_idx_hmask;
 
 	if ((hmask + 1) < xfrm_policy_hashmax &&
 	    total > hmask)
@@ -488,25 +488,24 @@ void xfrm_spd_getinfo(struct xfrmk_spdinfo *si)
 EXPORT_SYMBOL(xfrm_spd_getinfo);
 
 static DEFINE_MUTEX(hash_resize_mutex);
-static void xfrm_hash_resize(struct work_struct *__unused)
+static void xfrm_hash_resize(struct work_struct *work)
 {
+	struct net *net = container_of(work, struct net, xfrm.policy_hash_work);
 	int dir, total;
 
 	mutex_lock(&hash_resize_mutex);
 
 	total = 0;
 	for (dir = 0; dir < XFRM_POLICY_MAX * 2; dir++) {
-		if (xfrm_bydst_should_resize(dir, &total))
-			xfrm_bydst_resize(dir);
+		if (xfrm_bydst_should_resize(net, dir, &total))
+			xfrm_bydst_resize(net, dir);
 	}
-	if (xfrm_byidx_should_resize(total))
-		xfrm_byidx_resize(total);
+	if (xfrm_byidx_should_resize(net, total))
+		xfrm_byidx_resize(net, total);
 
 	mutex_unlock(&hash_resize_mutex);
 }
 
-static DECLARE_WORK(xfrm_hash_work, xfrm_hash_resize);
-
 /* Generate new index... KAME seems to generate them ordered by cost
  * of an absolute inpredictability of ordering of rules. This will not pass. */
 static u32 xfrm_gen_index(int dir)
@@ -607,8 +606,8 @@ int xfrm_policy_insert(int dir, struct xfrm_policy *policy, int excl)
 
 	if (delpol)
 		xfrm_policy_kill(delpol);
-	else if (xfrm_bydst_should_resize(dir, NULL))
-		schedule_work(&xfrm_hash_work);
+	else if (xfrm_bydst_should_resize(&init_net, dir, NULL))
+		schedule_work(&init_net.xfrm.policy_hash_work);
 
 	read_lock_bh(&xfrm_policy_lock);
 	gc_list = NULL;
@@ -1079,8 +1078,8 @@ static void __xfrm_policy_link(struct xfrm_policy *pol, int dir)
 	init_net.xfrm.policy_count[dir]++;
 	xfrm_pol_hold(pol);
 
-	if (xfrm_bydst_should_resize(dir, NULL))
-		schedule_work(&xfrm_hash_work);
+	if (xfrm_bydst_should_resize(&init_net, dir, NULL))
+		schedule_work(&init_net.xfrm.policy_hash_work);
 }
 
 static struct xfrm_policy *__xfrm_policy_unlink(struct xfrm_policy *pol,
@@ -2415,6 +2414,7 @@ static int __net_init xfrm_policy_init(struct net *net)
 	}
 
 	INIT_LIST_HEAD(&net->xfrm.policy_all);
+	INIT_WORK(&net->xfrm.policy_hash_work, xfrm_hash_resize);
 	if (net_eq(net, &init_net))
 		register_netdevice_notifier(&xfrm_dev_notifier);
 	return 0;
-- 
cgit v1.2.3


From 0e6024519b4da2d9413b97be1de8122d5709ccc1 Mon Sep 17 00:00:00 2001
From: Alexey Dobriyan <adobriyan@gmail.com>
Date: Tue, 25 Nov 2008 17:30:18 -0800
Subject: netns xfrm: state flush in netns

Signed-off-by: Alexey Dobriyan <adobriyan@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/xfrm.h    |  2 +-
 net/key/af_key.c      |  2 +-
 net/xfrm/xfrm_state.c | 18 +++++++++---------
 net/xfrm/xfrm_user.c  |  2 +-
 4 files changed, 12 insertions(+), 12 deletions(-)

(limited to 'include')

diff --git a/include/net/xfrm.h b/include/net/xfrm.h
index 8699620f8c2d..e4bb67225610 100644
--- a/include/net/xfrm.h
+++ b/include/net/xfrm.h
@@ -1363,7 +1363,7 @@ struct xfrmk_spdinfo {
 
 extern struct xfrm_state *xfrm_find_acq_byseq(u32 seq);
 extern int xfrm_state_delete(struct xfrm_state *x);
-extern int xfrm_state_flush(u8 proto, struct xfrm_audit *audit_info);
+extern int xfrm_state_flush(struct net *net, u8 proto, struct xfrm_audit *audit_info);
 extern void xfrm_sad_getinfo(struct xfrmk_sadinfo *si);
 extern void xfrm_spd_getinfo(struct xfrmk_spdinfo *si);
 extern int xfrm_replay_check(struct xfrm_state *x,
diff --git a/net/key/af_key.c b/net/key/af_key.c
index 036315d6b665..e5d595a60921 100644
--- a/net/key/af_key.c
+++ b/net/key/af_key.c
@@ -1732,7 +1732,7 @@ static int pfkey_flush(struct sock *sk, struct sk_buff *skb, struct sadb_msg *hd
 	audit_info.loginuid = audit_get_loginuid(current);
 	audit_info.sessionid = audit_get_sessionid(current);
 	audit_info.secid = 0;
-	err = xfrm_state_flush(proto, &audit_info);
+	err = xfrm_state_flush(&init_net, proto, &audit_info);
 	if (err)
 		return err;
 	c.data.proto = proto;
diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c
index f3f635d4ee66..5f4c5340ba30 100644
--- a/net/xfrm/xfrm_state.c
+++ b/net/xfrm/xfrm_state.c
@@ -576,15 +576,15 @@ EXPORT_SYMBOL(xfrm_state_delete);
 
 #ifdef CONFIG_SECURITY_NETWORK_XFRM
 static inline int
-xfrm_state_flush_secctx_check(u8 proto, struct xfrm_audit *audit_info)
+xfrm_state_flush_secctx_check(struct net *net, u8 proto, struct xfrm_audit *audit_info)
 {
 	int i, err = 0;
 
-	for (i = 0; i <= init_net.xfrm.state_hmask; i++) {
+	for (i = 0; i <= net->xfrm.state_hmask; i++) {
 		struct hlist_node *entry;
 		struct xfrm_state *x;
 
-		hlist_for_each_entry(x, entry, init_net.xfrm.state_bydst+i, bydst) {
+		hlist_for_each_entry(x, entry, net->xfrm.state_bydst+i, bydst) {
 			if (xfrm_id_proto_match(x->id.proto, proto) &&
 			   (err = security_xfrm_state_delete(x)) != 0) {
 				xfrm_audit_state_delete(x, 0,
@@ -600,26 +600,26 @@ xfrm_state_flush_secctx_check(u8 proto, struct xfrm_audit *audit_info)
 }
 #else
 static inline int
-xfrm_state_flush_secctx_check(u8 proto, struct xfrm_audit *audit_info)
+xfrm_state_flush_secctx_check(struct net *net, u8 proto, struct xfrm_audit *audit_info)
 {
 	return 0;
 }
 #endif
 
-int xfrm_state_flush(u8 proto, struct xfrm_audit *audit_info)
+int xfrm_state_flush(struct net *net, u8 proto, struct xfrm_audit *audit_info)
 {
 	int i, err = 0;
 
 	spin_lock_bh(&xfrm_state_lock);
-	err = xfrm_state_flush_secctx_check(proto, audit_info);
+	err = xfrm_state_flush_secctx_check(net, proto, audit_info);
 	if (err)
 		goto out;
 
-	for (i = 0; i <= init_net.xfrm.state_hmask; i++) {
+	for (i = 0; i <= net->xfrm.state_hmask; i++) {
 		struct hlist_node *entry;
 		struct xfrm_state *x;
 restart:
-		hlist_for_each_entry(x, entry, init_net.xfrm.state_bydst+i, bydst) {
+		hlist_for_each_entry(x, entry, net->xfrm.state_bydst+i, bydst) {
 			if (!xfrm_state_kern(x) &&
 			    xfrm_id_proto_match(x->id.proto, proto)) {
 				xfrm_state_hold(x);
@@ -641,7 +641,7 @@ restart:
 
 out:
 	spin_unlock_bh(&xfrm_state_lock);
-	wake_up(&init_net.xfrm.km_waitq);
+	wake_up(&net->xfrm.km_waitq);
 	return err;
 }
 EXPORT_SYMBOL(xfrm_state_flush);
diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c
index 765c01e784e9..49a7e897ba96 100644
--- a/net/xfrm/xfrm_user.c
+++ b/net/xfrm/xfrm_user.c
@@ -1398,7 +1398,7 @@ static int xfrm_flush_sa(struct sk_buff *skb, struct nlmsghdr *nlh,
 	audit_info.loginuid = NETLINK_CB(skb).loginuid;
 	audit_info.sessionid = NETLINK_CB(skb).sessionid;
 	audit_info.secid = NETLINK_CB(skb).sid;
-	err = xfrm_state_flush(p->proto, &audit_info);
+	err = xfrm_state_flush(&init_net, p->proto, &audit_info);
 	if (err)
 		return err;
 	c.data.proto = p->proto;
-- 
cgit v1.2.3


From 221df1ed33c9284fc7a6f6e47ca7f8d5f3665d43 Mon Sep 17 00:00:00 2001
From: Alexey Dobriyan <adobriyan@gmail.com>
Date: Tue, 25 Nov 2008 17:30:50 -0800
Subject: netns xfrm: state lookup in netns

Signed-off-by: Alexey Dobriyan <adobriyan@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/xfrm.h     |  4 ++--
 net/ipv4/ah4.c         |  2 +-
 net/ipv4/esp4.c        |  2 +-
 net/ipv4/ipcomp.c      |  4 ++--
 net/ipv6/ah6.c         |  2 +-
 net/ipv6/esp6.c        |  2 +-
 net/ipv6/ipcomp6.c     |  4 ++--
 net/ipv6/xfrm6_input.c |  2 +-
 net/key/af_key.c       |  2 +-
 net/xfrm/xfrm_input.c  |  2 +-
 net/xfrm/xfrm_state.c  | 34 +++++++++++++++++++---------------
 net/xfrm/xfrm_user.c   | 12 ++++++------
 12 files changed, 38 insertions(+), 34 deletions(-)

(limited to 'include')

diff --git a/include/net/xfrm.h b/include/net/xfrm.h
index e4bb67225610..15136c5e2622 100644
--- a/include/net/xfrm.h
+++ b/include/net/xfrm.h
@@ -1323,8 +1323,8 @@ extern int xfrm_state_check_expire(struct xfrm_state *x);
 extern void xfrm_state_insert(struct xfrm_state *x);
 extern int xfrm_state_add(struct xfrm_state *x);
 extern int xfrm_state_update(struct xfrm_state *x);
-extern struct xfrm_state *xfrm_state_lookup(xfrm_address_t *daddr, __be32 spi, u8 proto, unsigned short family);
-extern struct xfrm_state *xfrm_state_lookup_byaddr(xfrm_address_t *daddr, xfrm_address_t *saddr, u8 proto, unsigned short family);
+extern struct xfrm_state *xfrm_state_lookup(struct net *net, xfrm_address_t *daddr, __be32 spi, u8 proto, unsigned short family);
+extern struct xfrm_state *xfrm_state_lookup_byaddr(struct net *net, xfrm_address_t *daddr, xfrm_address_t *saddr, u8 proto, unsigned short family);
 #ifdef CONFIG_XFRM_SUB_POLICY
 extern int xfrm_tmpl_sort(struct xfrm_tmpl **dst, struct xfrm_tmpl **src,
 			  int n, unsigned short family);
diff --git a/net/ipv4/ah4.c b/net/ipv4/ah4.c
index 992ecd8662e2..750426b0a276 100644
--- a/net/ipv4/ah4.c
+++ b/net/ipv4/ah4.c
@@ -209,7 +209,7 @@ static void ah4_err(struct sk_buff *skb, u32 info)
 	    icmp_hdr(skb)->code != ICMP_FRAG_NEEDED)
 		return;
 
-	x = xfrm_state_lookup((xfrm_address_t *)&iph->daddr, ah->spi, IPPROTO_AH, AF_INET);
+	x = xfrm_state_lookup(&init_net, (xfrm_address_t *)&iph->daddr, ah->spi, IPPROTO_AH, AF_INET);
 	if (!x)
 		return;
 	printk(KERN_DEBUG "pmtu discovery on SA AH/%08x/%08x\n",
diff --git a/net/ipv4/esp4.c b/net/ipv4/esp4.c
index 95a9c65003f8..35950128aa94 100644
--- a/net/ipv4/esp4.c
+++ b/net/ipv4/esp4.c
@@ -421,7 +421,7 @@ static void esp4_err(struct sk_buff *skb, u32 info)
 	    icmp_hdr(skb)->code != ICMP_FRAG_NEEDED)
 		return;
 
-	x = xfrm_state_lookup((xfrm_address_t *)&iph->daddr, esph->spi, IPPROTO_ESP, AF_INET);
+	x = xfrm_state_lookup(&init_net, (xfrm_address_t *)&iph->daddr, esph->spi, IPPROTO_ESP, AF_INET);
 	if (!x)
 		return;
 	NETDEBUG(KERN_DEBUG "pmtu discovery on SA ESP/%08x/%08x\n",
diff --git a/net/ipv4/ipcomp.c b/net/ipv4/ipcomp.c
index 0a35f1b6f22c..3262ce06294c 100644
--- a/net/ipv4/ipcomp.c
+++ b/net/ipv4/ipcomp.c
@@ -35,7 +35,7 @@ static void ipcomp4_err(struct sk_buff *skb, u32 info)
 		return;
 
 	spi = htonl(ntohs(ipch->cpi));
-	x = xfrm_state_lookup((xfrm_address_t *)&iph->daddr,
+	x = xfrm_state_lookup(&init_net, (xfrm_address_t *)&iph->daddr,
 			      spi, IPPROTO_COMP, AF_INET);
 	if (!x)
 		return;
@@ -85,7 +85,7 @@ static int ipcomp_tunnel_attach(struct xfrm_state *x)
 	int err = 0;
 	struct xfrm_state *t;
 
-	t = xfrm_state_lookup((xfrm_address_t *)&x->id.daddr.a4,
+	t = xfrm_state_lookup(&init_net, (xfrm_address_t *)&x->id.daddr.a4,
 			      x->props.saddr.a4, IPPROTO_IPIP, AF_INET);
 	if (!t) {
 		t = ipcomp_tunnel_create(x);
diff --git a/net/ipv6/ah6.c b/net/ipv6/ah6.c
index 13e330d89178..6ae014b86b69 100644
--- a/net/ipv6/ah6.c
+++ b/net/ipv6/ah6.c
@@ -415,7 +415,7 @@ static void ah6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
 	    type != ICMPV6_PKT_TOOBIG)
 		return;
 
-	x = xfrm_state_lookup((xfrm_address_t *)&iph->daddr, ah->spi, IPPROTO_AH, AF_INET6);
+	x = xfrm_state_lookup(&init_net, (xfrm_address_t *)&iph->daddr, ah->spi, IPPROTO_AH, AF_INET6);
 	if (!x)
 		return;
 
diff --git a/net/ipv6/esp6.c b/net/ipv6/esp6.c
index c02a6308defe..68f2af8c15c0 100644
--- a/net/ipv6/esp6.c
+++ b/net/ipv6/esp6.c
@@ -364,7 +364,7 @@ static void esp6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
 	    type != ICMPV6_PKT_TOOBIG)
 		return;
 
-	x = xfrm_state_lookup((xfrm_address_t *)&iph->daddr, esph->spi, IPPROTO_ESP, AF_INET6);
+	x = xfrm_state_lookup(&init_net, (xfrm_address_t *)&iph->daddr, esph->spi, IPPROTO_ESP, AF_INET6);
 	if (!x)
 		return;
 	printk(KERN_DEBUG "pmtu discovery on SA ESP/%08x/%pI6\n",
diff --git a/net/ipv6/ipcomp6.c b/net/ipv6/ipcomp6.c
index c369638e208a..3a0b3be7ece5 100644
--- a/net/ipv6/ipcomp6.c
+++ b/net/ipv6/ipcomp6.c
@@ -63,7 +63,7 @@ static void ipcomp6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
 		return;
 
 	spi = htonl(ntohs(ipcomph->cpi));
-	x = xfrm_state_lookup((xfrm_address_t *)&iph->daddr, spi, IPPROTO_COMP, AF_INET6);
+	x = xfrm_state_lookup(&init_net, (xfrm_address_t *)&iph->daddr, spi, IPPROTO_COMP, AF_INET6);
 	if (!x)
 		return;
 
@@ -114,7 +114,7 @@ static int ipcomp6_tunnel_attach(struct xfrm_state *x)
 
 	spi = xfrm6_tunnel_spi_lookup((xfrm_address_t *)&x->props.saddr);
 	if (spi)
-		t = xfrm_state_lookup((xfrm_address_t *)&x->id.daddr,
+		t = xfrm_state_lookup(&init_net, (xfrm_address_t *)&x->id.daddr,
 					      spi, IPPROTO_IPV6, AF_INET6);
 	if (!t) {
 		t = ipcomp6_tunnel_create(x);
diff --git a/net/ipv6/xfrm6_input.c b/net/ipv6/xfrm6_input.c
index a71c7ddcb41e..b69766a77743 100644
--- a/net/ipv6/xfrm6_input.c
+++ b/net/ipv6/xfrm6_input.c
@@ -100,7 +100,7 @@ int xfrm6_input_addr(struct sk_buff *skb, xfrm_address_t *daddr,
 			break;
 		}
 
-		x = xfrm_state_lookup_byaddr(dst, src, proto, AF_INET6);
+		x = xfrm_state_lookup_byaddr(&init_net, dst, src, proto, AF_INET6);
 		if (!x)
 			continue;
 
diff --git a/net/key/af_key.c b/net/key/af_key.c
index e5d595a60921..449a5d03e283 100644
--- a/net/key/af_key.c
+++ b/net/key/af_key.c
@@ -683,7 +683,7 @@ static struct  xfrm_state *pfkey_xfrm_state_lookup(struct sadb_msg *hdr, void **
 	if (!xaddr)
 		return NULL;
 
-	return xfrm_state_lookup(xaddr, sa->sadb_sa_spi, proto, family);
+	return xfrm_state_lookup(&init_net, xaddr, sa->sadb_sa_spi, proto, family);
 }
 
 #define PFKEY_ALIGN8(a) (1 + (((a) - 1) | (8 - 1)))
diff --git a/net/xfrm/xfrm_input.c b/net/xfrm/xfrm_input.c
index 75279402ccf4..c08a93e98a36 100644
--- a/net/xfrm/xfrm_input.c
+++ b/net/xfrm/xfrm_input.c
@@ -151,7 +151,7 @@ int xfrm_input(struct sk_buff *skb, int nexthdr, __be32 spi, int encap_type)
 			goto drop;
 		}
 
-		x = xfrm_state_lookup(daddr, spi, nexthdr, family);
+		x = xfrm_state_lookup(&init_net, daddr, spi, nexthdr, family);
 		if (x == NULL) {
 			XFRM_INC_STATS(LINUX_MIB_XFRMINNOSTATES);
 			xfrm_audit_state_notfound(skb, family, spi, seq);
diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c
index 5f4c5340ba30..afde47498cdc 100644
--- a/net/xfrm/xfrm_state.c
+++ b/net/xfrm/xfrm_state.c
@@ -670,13 +670,13 @@ xfrm_init_tempsel(struct xfrm_state *x, struct flowi *fl,
 	return 0;
 }
 
-static struct xfrm_state *__xfrm_state_lookup(xfrm_address_t *daddr, __be32 spi, u8 proto, unsigned short family)
+static struct xfrm_state *__xfrm_state_lookup(struct net *net, xfrm_address_t *daddr, __be32 spi, u8 proto, unsigned short family)
 {
-	unsigned int h = xfrm_spi_hash(&init_net, daddr, spi, proto, family);
+	unsigned int h = xfrm_spi_hash(net, daddr, spi, proto, family);
 	struct xfrm_state *x;
 	struct hlist_node *entry;
 
-	hlist_for_each_entry(x, entry, init_net.xfrm.state_byspi+h, byspi) {
+	hlist_for_each_entry(x, entry, net->xfrm.state_byspi+h, byspi) {
 		if (x->props.family != family ||
 		    x->id.spi       != spi ||
 		    x->id.proto     != proto)
@@ -702,13 +702,13 @@ static struct xfrm_state *__xfrm_state_lookup(xfrm_address_t *daddr, __be32 spi,
 	return NULL;
 }
 
-static struct xfrm_state *__xfrm_state_lookup_byaddr(xfrm_address_t *daddr, xfrm_address_t *saddr, u8 proto, unsigned short family)
+static struct xfrm_state *__xfrm_state_lookup_byaddr(struct net *net, xfrm_address_t *daddr, xfrm_address_t *saddr, u8 proto, unsigned short family)
 {
-	unsigned int h = xfrm_src_hash(&init_net, daddr, saddr, family);
+	unsigned int h = xfrm_src_hash(net, daddr, saddr, family);
 	struct xfrm_state *x;
 	struct hlist_node *entry;
 
-	hlist_for_each_entry(x, entry, init_net.xfrm.state_bysrc+h, bysrc) {
+	hlist_for_each_entry(x, entry, net->xfrm.state_bysrc+h, bysrc) {
 		if (x->props.family != family ||
 		    x->id.proto     != proto)
 			continue;
@@ -740,11 +740,13 @@ static struct xfrm_state *__xfrm_state_lookup_byaddr(xfrm_address_t *daddr, xfrm
 static inline struct xfrm_state *
 __xfrm_state_locate(struct xfrm_state *x, int use_spi, int family)
 {
+	struct net *net = xs_net(x);
+
 	if (use_spi)
-		return __xfrm_state_lookup(&x->id.daddr, x->id.spi,
+		return __xfrm_state_lookup(net, &x->id.daddr, x->id.spi,
 					   x->id.proto, family);
 	else
-		return __xfrm_state_lookup_byaddr(&x->id.daddr,
+		return __xfrm_state_lookup_byaddr(net, &x->id.daddr,
 						  &x->props.saddr,
 						  x->id.proto, family);
 }
@@ -818,7 +820,7 @@ xfrm_state_find(xfrm_address_t *daddr, xfrm_address_t *saddr,
 	x = best;
 	if (!x && !error && !acquire_in_progress) {
 		if (tmpl->id.spi &&
-		    (x0 = __xfrm_state_lookup(daddr, tmpl->id.spi,
+		    (x0 = __xfrm_state_lookup(&init_net, daddr, tmpl->id.spi,
 					      tmpl->id.proto, family)) != NULL) {
 			to_put = x0;
 			error = -EEXIST;
@@ -1361,26 +1363,27 @@ int xfrm_state_check_expire(struct xfrm_state *x)
 EXPORT_SYMBOL(xfrm_state_check_expire);
 
 struct xfrm_state *
-xfrm_state_lookup(xfrm_address_t *daddr, __be32 spi, u8 proto,
+xfrm_state_lookup(struct net *net, xfrm_address_t *daddr, __be32 spi, u8 proto,
 		  unsigned short family)
 {
 	struct xfrm_state *x;
 
 	spin_lock_bh(&xfrm_state_lock);
-	x = __xfrm_state_lookup(daddr, spi, proto, family);
+	x = __xfrm_state_lookup(net, daddr, spi, proto, family);
 	spin_unlock_bh(&xfrm_state_lock);
 	return x;
 }
 EXPORT_SYMBOL(xfrm_state_lookup);
 
 struct xfrm_state *
-xfrm_state_lookup_byaddr(xfrm_address_t *daddr, xfrm_address_t *saddr,
+xfrm_state_lookup_byaddr(struct net *net,
+			 xfrm_address_t *daddr, xfrm_address_t *saddr,
 			 u8 proto, unsigned short family)
 {
 	struct xfrm_state *x;
 
 	spin_lock_bh(&xfrm_state_lock);
-	x = __xfrm_state_lookup_byaddr(daddr, saddr, proto, family);
+	x = __xfrm_state_lookup_byaddr(net, daddr, saddr, proto, family);
 	spin_unlock_bh(&xfrm_state_lock);
 	return x;
 }
@@ -1486,6 +1489,7 @@ EXPORT_SYMBOL(xfrm_get_acqseq);
 
 int xfrm_alloc_spi(struct xfrm_state *x, u32 low, u32 high)
 {
+	struct net *net = xs_net(x);
 	unsigned int h;
 	struct xfrm_state *x0;
 	int err = -ENOENT;
@@ -1503,7 +1507,7 @@ int xfrm_alloc_spi(struct xfrm_state *x, u32 low, u32 high)
 	err = -ENOENT;
 
 	if (minspi == maxspi) {
-		x0 = xfrm_state_lookup(&x->id.daddr, minspi, x->id.proto, x->props.family);
+		x0 = xfrm_state_lookup(net, &x->id.daddr, minspi, x->id.proto, x->props.family);
 		if (x0) {
 			xfrm_state_put(x0);
 			goto unlock;
@@ -1513,7 +1517,7 @@ int xfrm_alloc_spi(struct xfrm_state *x, u32 low, u32 high)
 		u32 spi = 0;
 		for (h=0; h<high-low+1; h++) {
 			spi = low + net_random()%(high-low+1);
-			x0 = xfrm_state_lookup(&x->id.daddr, htonl(spi), x->id.proto, x->props.family);
+			x0 = xfrm_state_lookup(net, &x->id.daddr, htonl(spi), x->id.proto, x->props.family);
 			if (x0 == NULL) {
 				x->id.spi = htonl(spi);
 				break;
diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c
index 49a7e897ba96..e02ef3361190 100644
--- a/net/xfrm/xfrm_user.c
+++ b/net/xfrm/xfrm_user.c
@@ -440,7 +440,7 @@ static struct xfrm_state *xfrm_user_state_lookup(struct xfrm_usersa_id *p,
 
 	if (xfrm_id_proto_match(p->proto, IPSEC_PROTO_ANY)) {
 		err = -ESRCH;
-		x = xfrm_state_lookup(&p->daddr, p->spi, p->proto, p->family);
+		x = xfrm_state_lookup(&init_net, &p->daddr, p->spi, p->proto, p->family);
 	} else {
 		xfrm_address_t *saddr = NULL;
 
@@ -451,8 +451,8 @@ static struct xfrm_state *xfrm_user_state_lookup(struct xfrm_usersa_id *p,
 		}
 
 		err = -ESRCH;
-		x = xfrm_state_lookup_byaddr(&p->daddr, saddr, p->proto,
-					     p->family);
+		x = xfrm_state_lookup_byaddr(&init_net, &p->daddr, saddr,
+					     p->proto, p->family);
 	}
 
  out:
@@ -1468,7 +1468,7 @@ static int xfrm_get_ae(struct sk_buff *skb, struct nlmsghdr *nlh,
 	if (r_skb == NULL)
 		return -ENOMEM;
 
-	x = xfrm_state_lookup(&id->daddr, id->spi, id->proto, id->family);
+	x = xfrm_state_lookup(&init_net, &id->daddr, id->spi, id->proto, id->family);
 	if (x == NULL) {
 		kfree_skb(r_skb);
 		return -ESRCH;
@@ -1509,7 +1509,7 @@ static int xfrm_new_ae(struct sk_buff *skb, struct nlmsghdr *nlh,
 	if (!(nlh->nlmsg_flags&NLM_F_REPLACE))
 		return err;
 
-	x = xfrm_state_lookup(&p->sa_id.daddr, p->sa_id.spi, p->sa_id.proto, p->sa_id.family);
+	x = xfrm_state_lookup(&init_net, &p->sa_id.daddr, p->sa_id.spi, p->sa_id.proto, p->sa_id.family);
 	if (x == NULL)
 		return -ESRCH;
 
@@ -1628,7 +1628,7 @@ static int xfrm_add_sa_expire(struct sk_buff *skb, struct nlmsghdr *nlh,
 	struct xfrm_user_expire *ue = nlmsg_data(nlh);
 	struct xfrm_usersa_info *p = &ue->state;
 
-	x = xfrm_state_lookup(&p->id.daddr, p->id.spi, p->id.proto, p->family);
+	x = xfrm_state_lookup(&init_net, &p->id.daddr, p->id.spi, p->id.proto, p->family);
 
 	err = -ENOENT;
 	if (x == NULL)
-- 
cgit v1.2.3


From 5447c5e401c49aba0c36bb1066f2d25b152553b7 Mon Sep 17 00:00:00 2001
From: Alexey Dobriyan <adobriyan@gmail.com>
Date: Tue, 25 Nov 2008 17:31:51 -0800
Subject: netns xfrm: finding states in netns

Signed-off-by: Alexey Dobriyan <adobriyan@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/xfrm.h    |  7 ++---
 net/core/pktgen.c     |  3 ++-
 net/key/af_key.c      |  6 ++---
 net/xfrm/xfrm_state.c | 73 +++++++++++++++++++++++++++------------------------
 net/xfrm/xfrm_user.c  |  4 +--
 5 files changed, 49 insertions(+), 44 deletions(-)

(limited to 'include')

diff --git a/include/net/xfrm.h b/include/net/xfrm.h
index 15136c5e2622..4cbd0557c698 100644
--- a/include/net/xfrm.h
+++ b/include/net/xfrm.h
@@ -1315,7 +1315,8 @@ extern struct xfrm_state *xfrm_state_find(xfrm_address_t *daddr, xfrm_address_t
 					  struct flowi *fl, struct xfrm_tmpl *tmpl,
 					  struct xfrm_policy *pol, int *err,
 					  unsigned short family);
-extern struct xfrm_state * xfrm_stateonly_find(xfrm_address_t *daddr,
+extern struct xfrm_state * xfrm_stateonly_find(struct net *net,
+					       xfrm_address_t *daddr,
 					       xfrm_address_t *saddr,
 					       unsigned short family,
 					       u8 mode, u8 proto, u32 reqid);
@@ -1361,7 +1362,7 @@ struct xfrmk_spdinfo {
 	u32 spdhmcnt;
 };
 
-extern struct xfrm_state *xfrm_find_acq_byseq(u32 seq);
+extern struct xfrm_state *xfrm_find_acq_byseq(struct net *net, u32 seq);
 extern int xfrm_state_delete(struct xfrm_state *x);
 extern int xfrm_state_flush(struct net *net, u8 proto, struct xfrm_audit *audit_info);
 extern void xfrm_sad_getinfo(struct xfrmk_sadinfo *si);
@@ -1446,7 +1447,7 @@ struct xfrm_policy *xfrm_policy_byid(u8, int dir, u32 id, int delete, int *err);
 int xfrm_policy_flush(u8 type, struct xfrm_audit *audit_info);
 u32 xfrm_get_acqseq(void);
 extern int xfrm_alloc_spi(struct xfrm_state *x, u32 minspi, u32 maxspi);
-struct xfrm_state * xfrm_find_acq(u8 mode, u32 reqid, u8 proto,
+struct xfrm_state * xfrm_find_acq(struct net *net, u8 mode, u32 reqid, u8 proto,
 				  xfrm_address_t *daddr, xfrm_address_t *saddr,
 				  int create, unsigned short family);
 extern int xfrm_sk_policy_insert(struct sock *sk, int dir, struct xfrm_policy *pol);
diff --git a/net/core/pktgen.c b/net/core/pktgen.c
index 15e0c2c7aacf..65498483325a 100644
--- a/net/core/pktgen.c
+++ b/net/core/pktgen.c
@@ -2165,7 +2165,8 @@ static void get_ipsec_sa(struct pktgen_dev *pkt_dev, int flow)
 	struct xfrm_state *x = pkt_dev->flows[flow].x;
 	if (!x) {
 		/*slow path: we dont already have xfrm_state*/
-		x = xfrm_stateonly_find((xfrm_address_t *)&pkt_dev->cur_daddr,
+		x = xfrm_stateonly_find(&init_net,
+					(xfrm_address_t *)&pkt_dev->cur_daddr,
 					(xfrm_address_t *)&pkt_dev->cur_saddr,
 					AF_INET,
 					pkt_dev->ipsmode,
diff --git a/net/key/af_key.c b/net/key/af_key.c
index 449a5d03e283..4ef0827009e9 100644
--- a/net/key/af_key.c
+++ b/net/key/af_key.c
@@ -1348,7 +1348,7 @@ static int pfkey_getspi(struct sock *sk, struct sk_buff *skb, struct sadb_msg *h
 	}
 
 	if (hdr->sadb_msg_seq) {
-		x = xfrm_find_acq_byseq(hdr->sadb_msg_seq);
+		x = xfrm_find_acq_byseq(&init_net, hdr->sadb_msg_seq);
 		if (x && xfrm_addr_cmp(&x->id.daddr, xdaddr, family)) {
 			xfrm_state_put(x);
 			x = NULL;
@@ -1356,7 +1356,7 @@ static int pfkey_getspi(struct sock *sk, struct sk_buff *skb, struct sadb_msg *h
 	}
 
 	if (!x)
-		x = xfrm_find_acq(mode, reqid, proto, xdaddr, xsaddr, 1, family);
+		x = xfrm_find_acq(&init_net, mode, reqid, proto, xdaddr, xsaddr, 1, family);
 
 	if (x == NULL)
 		return -ENOENT;
@@ -1404,7 +1404,7 @@ static int pfkey_acquire(struct sock *sk, struct sk_buff *skb, struct sadb_msg *
 	if (hdr->sadb_msg_seq == 0 || hdr->sadb_msg_errno == 0)
 		return 0;
 
-	x = xfrm_find_acq_byseq(hdr->sadb_msg_seq);
+	x = xfrm_find_acq_byseq(&init_net, hdr->sadb_msg_seq);
 	if (x == NULL)
 		return 0;
 
diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c
index cd51e4e3d023..0d974fc9dd6c 100644
--- a/net/xfrm/xfrm_state.c
+++ b/net/xfrm/xfrm_state.c
@@ -765,6 +765,7 @@ xfrm_state_find(xfrm_address_t *daddr, xfrm_address_t *saddr,
 		struct xfrm_policy *pol, int *err,
 		unsigned short family)
 {
+	struct net *net = xp_net(pol);
 	unsigned int h;
 	struct hlist_node *entry;
 	struct xfrm_state *x, *x0, *to_put;
@@ -775,8 +776,8 @@ xfrm_state_find(xfrm_address_t *daddr, xfrm_address_t *saddr,
 	to_put = NULL;
 
 	spin_lock_bh(&xfrm_state_lock);
-	h = xfrm_dst_hash(&init_net, daddr, saddr, tmpl->reqid, family);
-	hlist_for_each_entry(x, entry, init_net.xfrm.state_bydst+h, bydst) {
+	h = xfrm_dst_hash(net, daddr, saddr, tmpl->reqid, family);
+	hlist_for_each_entry(x, entry, net->xfrm.state_bydst+h, bydst) {
 		if (x->props.family == family &&
 		    x->props.reqid == tmpl->reqid &&
 		    !(x->props.flags & XFRM_STATE_WILDRECV) &&
@@ -820,13 +821,13 @@ xfrm_state_find(xfrm_address_t *daddr, xfrm_address_t *saddr,
 	x = best;
 	if (!x && !error && !acquire_in_progress) {
 		if (tmpl->id.spi &&
-		    (x0 = __xfrm_state_lookup(&init_net, daddr, tmpl->id.spi,
+		    (x0 = __xfrm_state_lookup(net, daddr, tmpl->id.spi,
 					      tmpl->id.proto, family)) != NULL) {
 			to_put = x0;
 			error = -EEXIST;
 			goto out;
 		}
-		x = xfrm_state_alloc(&init_net);
+		x = xfrm_state_alloc(net);
 		if (x == NULL) {
 			error = -ENOMEM;
 			goto out;
@@ -845,19 +846,19 @@ xfrm_state_find(xfrm_address_t *daddr, xfrm_address_t *saddr,
 
 		if (km_query(x, tmpl, pol) == 0) {
 			x->km.state = XFRM_STATE_ACQ;
-			list_add(&x->km.all, &init_net.xfrm.state_all);
-			hlist_add_head(&x->bydst, init_net.xfrm.state_bydst+h);
-			h = xfrm_src_hash(&init_net, daddr, saddr, family);
-			hlist_add_head(&x->bysrc, init_net.xfrm.state_bysrc+h);
+			list_add(&x->km.all, &net->xfrm.state_all);
+			hlist_add_head(&x->bydst, net->xfrm.state_bydst+h);
+			h = xfrm_src_hash(net, daddr, saddr, family);
+			hlist_add_head(&x->bysrc, net->xfrm.state_bysrc+h);
 			if (x->id.spi) {
-				h = xfrm_spi_hash(&init_net, &x->id.daddr, x->id.spi, x->id.proto, family);
-				hlist_add_head(&x->byspi, init_net.xfrm.state_byspi+h);
+				h = xfrm_spi_hash(net, &x->id.daddr, x->id.spi, x->id.proto, family);
+				hlist_add_head(&x->byspi, net->xfrm.state_byspi+h);
 			}
 			x->lft.hard_add_expires_seconds = sysctl_xfrm_acq_expires;
 			x->timer.expires = jiffies + sysctl_xfrm_acq_expires*HZ;
 			add_timer(&x->timer);
-			init_net.xfrm.state_num++;
-			xfrm_hash_grow_check(&init_net, x->bydst.next != NULL);
+			net->xfrm.state_num++;
+			xfrm_hash_grow_check(net, x->bydst.next != NULL);
 		} else {
 			x->km.state = XFRM_STATE_DEAD;
 			to_put = x;
@@ -877,7 +878,8 @@ out:
 }
 
 struct xfrm_state *
-xfrm_stateonly_find(xfrm_address_t *daddr, xfrm_address_t *saddr,
+xfrm_stateonly_find(struct net *net,
+		    xfrm_address_t *daddr, xfrm_address_t *saddr,
 		    unsigned short family, u8 mode, u8 proto, u32 reqid)
 {
 	unsigned int h;
@@ -885,8 +887,8 @@ xfrm_stateonly_find(xfrm_address_t *daddr, xfrm_address_t *saddr,
 	struct hlist_node *entry;
 
 	spin_lock(&xfrm_state_lock);
-	h = xfrm_dst_hash(&init_net, daddr, saddr, reqid, family);
-	hlist_for_each_entry(x, entry, init_net.xfrm.state_bydst+h, bydst) {
+	h = xfrm_dst_hash(net, daddr, saddr, reqid, family);
+	hlist_for_each_entry(x, entry, net->xfrm.state_bydst+h, bydst) {
 		if (x->props.family == family &&
 		    x->props.reqid == reqid &&
 		    !(x->props.flags & XFRM_STATE_WILDRECV) &&
@@ -972,13 +974,13 @@ void xfrm_state_insert(struct xfrm_state *x)
 EXPORT_SYMBOL(xfrm_state_insert);
 
 /* xfrm_state_lock is held */
-static struct xfrm_state *__find_acq_core(unsigned short family, u8 mode, u32 reqid, u8 proto, xfrm_address_t *daddr, xfrm_address_t *saddr, int create)
+static struct xfrm_state *__find_acq_core(struct net *net, unsigned short family, u8 mode, u32 reqid, u8 proto, xfrm_address_t *daddr, xfrm_address_t *saddr, int create)
 {
-	unsigned int h = xfrm_dst_hash(&init_net, daddr, saddr, reqid, family);
+	unsigned int h = xfrm_dst_hash(net, daddr, saddr, reqid, family);
 	struct hlist_node *entry;
 	struct xfrm_state *x;
 
-	hlist_for_each_entry(x, entry, init_net.xfrm.state_bydst+h, bydst) {
+	hlist_for_each_entry(x, entry, net->xfrm.state_bydst+h, bydst) {
 		if (x->props.reqid  != reqid ||
 		    x->props.mode   != mode ||
 		    x->props.family != family ||
@@ -1010,7 +1012,7 @@ static struct xfrm_state *__find_acq_core(unsigned short family, u8 mode, u32 re
 	if (!create)
 		return NULL;
 
-	x = xfrm_state_alloc(&init_net);
+	x = xfrm_state_alloc(net);
 	if (likely(x)) {
 		switch (family) {
 		case AF_INET:
@@ -1045,23 +1047,24 @@ static struct xfrm_state *__find_acq_core(unsigned short family, u8 mode, u32 re
 		xfrm_state_hold(x);
 		x->timer.expires = jiffies + sysctl_xfrm_acq_expires*HZ;
 		add_timer(&x->timer);
-		list_add(&x->km.all, &init_net.xfrm.state_all);
-		hlist_add_head(&x->bydst, init_net.xfrm.state_bydst+h);
-		h = xfrm_src_hash(&init_net, daddr, saddr, family);
-		hlist_add_head(&x->bysrc, init_net.xfrm.state_bysrc+h);
+		list_add(&x->km.all, &net->xfrm.state_all);
+		hlist_add_head(&x->bydst, net->xfrm.state_bydst+h);
+		h = xfrm_src_hash(net, daddr, saddr, family);
+		hlist_add_head(&x->bysrc, net->xfrm.state_bysrc+h);
 
-		init_net.xfrm.state_num++;
+		net->xfrm.state_num++;
 
-		xfrm_hash_grow_check(&init_net, x->bydst.next != NULL);
+		xfrm_hash_grow_check(net, x->bydst.next != NULL);
 	}
 
 	return x;
 }
 
-static struct xfrm_state *__xfrm_find_acq_byseq(u32 seq);
+static struct xfrm_state *__xfrm_find_acq_byseq(struct net *net, u32 seq);
 
 int xfrm_state_add(struct xfrm_state *x)
 {
+	struct net *net = xs_net(x);
 	struct xfrm_state *x1, *to_put;
 	int family;
 	int err;
@@ -1082,7 +1085,7 @@ int xfrm_state_add(struct xfrm_state *x)
 	}
 
 	if (use_spi && x->km.seq) {
-		x1 = __xfrm_find_acq_byseq(x->km.seq);
+		x1 = __xfrm_find_acq_byseq(net, x->km.seq);
 		if (x1 && ((x1->id.proto != x->id.proto) ||
 		    xfrm_addr_cmp(&x1->id.daddr, &x->id.daddr, family))) {
 			to_put = x1;
@@ -1091,7 +1094,7 @@ int xfrm_state_add(struct xfrm_state *x)
 	}
 
 	if (use_spi && !x1)
-		x1 = __find_acq_core(family, x->props.mode, x->props.reqid,
+		x1 = __find_acq_core(net, family, x->props.mode, x->props.reqid,
 				     x->id.proto,
 				     &x->id.daddr, &x->props.saddr, 0);
 
@@ -1390,14 +1393,14 @@ xfrm_state_lookup_byaddr(struct net *net,
 EXPORT_SYMBOL(xfrm_state_lookup_byaddr);
 
 struct xfrm_state *
-xfrm_find_acq(u8 mode, u32 reqid, u8 proto,
+xfrm_find_acq(struct net *net, u8 mode, u32 reqid, u8 proto,
 	      xfrm_address_t *daddr, xfrm_address_t *saddr,
 	      int create, unsigned short family)
 {
 	struct xfrm_state *x;
 
 	spin_lock_bh(&xfrm_state_lock);
-	x = __find_acq_core(family, mode, reqid, proto, daddr, saddr, create);
+	x = __find_acq_core(net, family, mode, reqid, proto, daddr, saddr, create);
 	spin_unlock_bh(&xfrm_state_lock);
 
 	return x;
@@ -1444,15 +1447,15 @@ EXPORT_SYMBOL(xfrm_state_sort);
 
 /* Silly enough, but I'm lazy to build resolution list */
 
-static struct xfrm_state *__xfrm_find_acq_byseq(u32 seq)
+static struct xfrm_state *__xfrm_find_acq_byseq(struct net *net, u32 seq)
 {
 	int i;
 
-	for (i = 0; i <= init_net.xfrm.state_hmask; i++) {
+	for (i = 0; i <= net->xfrm.state_hmask; i++) {
 		struct hlist_node *entry;
 		struct xfrm_state *x;
 
-		hlist_for_each_entry(x, entry, init_net.xfrm.state_bydst+i, bydst) {
+		hlist_for_each_entry(x, entry, net->xfrm.state_bydst+i, bydst) {
 			if (x->km.seq == seq &&
 			    x->km.state == XFRM_STATE_ACQ) {
 				xfrm_state_hold(x);
@@ -1463,12 +1466,12 @@ static struct xfrm_state *__xfrm_find_acq_byseq(u32 seq)
 	return NULL;
 }
 
-struct xfrm_state *xfrm_find_acq_byseq(u32 seq)
+struct xfrm_state *xfrm_find_acq_byseq(struct net *net, u32 seq)
 {
 	struct xfrm_state *x;
 
 	spin_lock_bh(&xfrm_state_lock);
-	x = __xfrm_find_acq_byseq(seq);
+	x = __xfrm_find_acq_byseq(net, seq);
 	spin_unlock_bh(&xfrm_state_lock);
 	return x;
 }
diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c
index e02ef3361190..3d577440b673 100644
--- a/net/xfrm/xfrm_user.c
+++ b/net/xfrm/xfrm_user.c
@@ -837,7 +837,7 @@ static int xfrm_alloc_userspi(struct sk_buff *skb, struct nlmsghdr *nlh,
 
 	x = NULL;
 	if (p->info.seq) {
-		x = xfrm_find_acq_byseq(p->info.seq);
+		x = xfrm_find_acq_byseq(&init_net, p->info.seq);
 		if (x && xfrm_addr_cmp(&x->id.daddr, daddr, family)) {
 			xfrm_state_put(x);
 			x = NULL;
@@ -845,7 +845,7 @@ static int xfrm_alloc_userspi(struct sk_buff *skb, struct nlmsghdr *nlh,
 	}
 
 	if (!x)
-		x = xfrm_find_acq(p->info.mode, p->info.reqid,
+		x = xfrm_find_acq(&init_net, p->info.mode, p->info.reqid,
 				  p->info.id.proto, daddr,
 				  &p->info.saddr, 1,
 				  family);
-- 
cgit v1.2.3


From 284fa7da300adcb700b44df2f64a536b434d4650 Mon Sep 17 00:00:00 2001
From: Alexey Dobriyan <adobriyan@gmail.com>
Date: Tue, 25 Nov 2008 17:32:14 -0800
Subject: netns xfrm: state walking in netns

Signed-off-by: Alexey Dobriyan <adobriyan@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/xfrm.h    | 2 +-
 net/key/af_key.c      | 2 +-
 net/xfrm/xfrm_state.c | 6 +++---
 net/xfrm/xfrm_user.c  | 2 +-
 4 files changed, 6 insertions(+), 6 deletions(-)

(limited to 'include')

diff --git a/include/net/xfrm.h b/include/net/xfrm.h
index 4cbd0557c698..40ed4878bc12 100644
--- a/include/net/xfrm.h
+++ b/include/net/xfrm.h
@@ -1307,7 +1307,7 @@ extern int xfrm_proc_init(void);
 #endif
 
 extern void xfrm_state_walk_init(struct xfrm_state_walk *walk, u8 proto);
-extern int xfrm_state_walk(struct xfrm_state_walk *walk,
+extern int xfrm_state_walk(struct net *net, struct xfrm_state_walk *walk,
 			   int (*func)(struct xfrm_state *, int, void*), void *);
 extern void xfrm_state_walk_done(struct xfrm_state_walk *walk);
 extern struct xfrm_state *xfrm_state_alloc(struct net *net);
diff --git a/net/key/af_key.c b/net/key/af_key.c
index 4ef0827009e9..b74d939e2eed 100644
--- a/net/key/af_key.c
+++ b/net/key/af_key.c
@@ -1776,7 +1776,7 @@ static int dump_sa(struct xfrm_state *x, int count, void *ptr)
 
 static int pfkey_dump_sa(struct pfkey_sock *pfk)
 {
-	return xfrm_state_walk(&pfk->dump.u.state, dump_sa, (void *) pfk);
+	return xfrm_state_walk(&init_net, &pfk->dump.u.state, dump_sa, (void *) pfk);
 }
 
 static void pfkey_dump_sa_done(struct pfkey_sock *pfk)
diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c
index 0d974fc9dd6c..ea340bbbcc64 100644
--- a/net/xfrm/xfrm_state.c
+++ b/net/xfrm/xfrm_state.c
@@ -1544,7 +1544,7 @@ unlock:
 }
 EXPORT_SYMBOL(xfrm_alloc_spi);
 
-int xfrm_state_walk(struct xfrm_state_walk *walk,
+int xfrm_state_walk(struct net *net, struct xfrm_state_walk *walk,
 		    int (*func)(struct xfrm_state *, int, void*),
 		    void *data)
 {
@@ -1557,10 +1557,10 @@ int xfrm_state_walk(struct xfrm_state_walk *walk,
 
 	spin_lock_bh(&xfrm_state_lock);
 	if (list_empty(&walk->all))
-		x = list_first_entry(&init_net.xfrm.state_all, struct xfrm_state_walk, all);
+		x = list_first_entry(&net->xfrm.state_all, struct xfrm_state_walk, all);
 	else
 		x = list_entry(&walk->all, struct xfrm_state_walk, all);
-	list_for_each_entry_from(x, &init_net.xfrm.state_all, all) {
+	list_for_each_entry_from(x, &net->xfrm.state_all, all) {
 		if (x->state == XFRM_STATE_DEAD)
 			continue;
 		state = container_of(x, struct xfrm_state, km);
diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c
index 3d577440b673..787b0ee65034 100644
--- a/net/xfrm/xfrm_user.c
+++ b/net/xfrm/xfrm_user.c
@@ -631,7 +631,7 @@ static int xfrm_dump_sa(struct sk_buff *skb, struct netlink_callback *cb)
 		xfrm_state_walk_init(walk, 0);
 	}
 
-	(void) xfrm_state_walk(walk, dump_one_state, &info);
+	(void) xfrm_state_walk(&init_net, walk, dump_one_state, &info);
 
 	return skb->len;
 }
-- 
cgit v1.2.3


From 33ffbbd52c327225a3e28485c39dc5746d81be03 Mon Sep 17 00:00:00 2001
From: Alexey Dobriyan <adobriyan@gmail.com>
Date: Tue, 25 Nov 2008 17:33:32 -0800
Subject: netns xfrm: policy flushing in netns

Signed-off-by: Alexey Dobriyan <adobriyan@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/xfrm.h     |  2 +-
 net/key/af_key.c       |  2 +-
 net/xfrm/xfrm_policy.c | 22 +++++++++++-----------
 net/xfrm/xfrm_user.c   |  2 +-
 4 files changed, 14 insertions(+), 14 deletions(-)

(limited to 'include')

diff --git a/include/net/xfrm.h b/include/net/xfrm.h
index 40ed4878bc12..766cc71e96d4 100644
--- a/include/net/xfrm.h
+++ b/include/net/xfrm.h
@@ -1444,7 +1444,7 @@ struct xfrm_policy *xfrm_policy_bysel_ctx(u8 type, int dir,
 					  struct xfrm_sec_ctx *ctx, int delete,
 					  int *err);
 struct xfrm_policy *xfrm_policy_byid(u8, int dir, u32 id, int delete, int *err);
-int xfrm_policy_flush(u8 type, struct xfrm_audit *audit_info);
+int xfrm_policy_flush(struct net *net, u8 type, struct xfrm_audit *audit_info);
 u32 xfrm_get_acqseq(void);
 extern int xfrm_alloc_spi(struct xfrm_state *x, u32 minspi, u32 maxspi);
 struct xfrm_state * xfrm_find_acq(struct net *net, u8 mode, u32 reqid, u8 proto,
diff --git a/net/key/af_key.c b/net/key/af_key.c
index b74d939e2eed..0f44856c1f12 100644
--- a/net/key/af_key.c
+++ b/net/key/af_key.c
@@ -2686,7 +2686,7 @@ static int pfkey_spdflush(struct sock *sk, struct sk_buff *skb, struct sadb_msg
 	audit_info.loginuid = audit_get_loginuid(current);
 	audit_info.sessionid = audit_get_sessionid(current);
 	audit_info.secid = 0;
-	err = xfrm_policy_flush(XFRM_POLICY_TYPE_MAIN, &audit_info);
+	err = xfrm_policy_flush(&init_net, XFRM_POLICY_TYPE_MAIN, &audit_info);
 	if (err)
 		return err;
 	c.data.type = XFRM_POLICY_TYPE_MAIN;
diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c
index 11fee87a0cc1..7c264a74edc0 100644
--- a/net/xfrm/xfrm_policy.c
+++ b/net/xfrm/xfrm_policy.c
@@ -732,7 +732,7 @@ EXPORT_SYMBOL(xfrm_policy_byid);
 
 #ifdef CONFIG_SECURITY_NETWORK_XFRM
 static inline int
-xfrm_policy_flush_secctx_check(u8 type, struct xfrm_audit *audit_info)
+xfrm_policy_flush_secctx_check(struct net *net, u8 type, struct xfrm_audit *audit_info)
 {
 	int dir, err = 0;
 
@@ -742,7 +742,7 @@ xfrm_policy_flush_secctx_check(u8 type, struct xfrm_audit *audit_info)
 		int i;
 
 		hlist_for_each_entry(pol, entry,
-				     &init_net.xfrm.policy_inexact[dir], bydst) {
+				     &net->xfrm.policy_inexact[dir], bydst) {
 			if (pol->type != type)
 				continue;
 			err = security_xfrm_policy_delete(pol->security);
@@ -754,9 +754,9 @@ xfrm_policy_flush_secctx_check(u8 type, struct xfrm_audit *audit_info)
 				return err;
 			}
 		}
-		for (i = init_net.xfrm.policy_bydst[dir].hmask; i >= 0; i--) {
+		for (i = net->xfrm.policy_bydst[dir].hmask; i >= 0; i--) {
 			hlist_for_each_entry(pol, entry,
-					     init_net.xfrm.policy_bydst[dir].table + i,
+					     net->xfrm.policy_bydst[dir].table + i,
 					     bydst) {
 				if (pol->type != type)
 					continue;
@@ -776,19 +776,19 @@ xfrm_policy_flush_secctx_check(u8 type, struct xfrm_audit *audit_info)
 }
 #else
 static inline int
-xfrm_policy_flush_secctx_check(u8 type, struct xfrm_audit *audit_info)
+xfrm_policy_flush_secctx_check(struct net *net, u8 type, struct xfrm_audit *audit_info)
 {
 	return 0;
 }
 #endif
 
-int xfrm_policy_flush(u8 type, struct xfrm_audit *audit_info)
+int xfrm_policy_flush(struct net *net, u8 type, struct xfrm_audit *audit_info)
 {
 	int dir, err = 0;
 
 	write_lock_bh(&xfrm_policy_lock);
 
-	err = xfrm_policy_flush_secctx_check(type, audit_info);
+	err = xfrm_policy_flush_secctx_check(net, type, audit_info);
 	if (err)
 		goto out;
 
@@ -800,7 +800,7 @@ int xfrm_policy_flush(u8 type, struct xfrm_audit *audit_info)
 		killed = 0;
 	again1:
 		hlist_for_each_entry(pol, entry,
-				     &init_net.xfrm.policy_inexact[dir], bydst) {
+				     &net->xfrm.policy_inexact[dir], bydst) {
 			if (pol->type != type)
 				continue;
 			hlist_del(&pol->bydst);
@@ -818,10 +818,10 @@ int xfrm_policy_flush(u8 type, struct xfrm_audit *audit_info)
 			goto again1;
 		}
 
-		for (i = init_net.xfrm.policy_bydst[dir].hmask; i >= 0; i--) {
+		for (i = net->xfrm.policy_bydst[dir].hmask; i >= 0; i--) {
 	again2:
 			hlist_for_each_entry(pol, entry,
-					     init_net.xfrm.policy_bydst[dir].table + i,
+					     net->xfrm.policy_bydst[dir].table + i,
 					     bydst) {
 				if (pol->type != type)
 					continue;
@@ -842,7 +842,7 @@ int xfrm_policy_flush(u8 type, struct xfrm_audit *audit_info)
 			}
 		}
 
-		init_net.xfrm.policy_count[dir] -= killed;
+		net->xfrm.policy_count[dir] -= killed;
 	}
 	atomic_inc(&flow_cache_genid);
 out:
diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c
index 787b0ee65034..d4983e831c34 100644
--- a/net/xfrm/xfrm_user.c
+++ b/net/xfrm/xfrm_user.c
@@ -1546,7 +1546,7 @@ static int xfrm_flush_policy(struct sk_buff *skb, struct nlmsghdr *nlh,
 	audit_info.loginuid = NETLINK_CB(skb).loginuid;
 	audit_info.sessionid = NETLINK_CB(skb).sessionid;
 	audit_info.secid = NETLINK_CB(skb).sid;
-	err = xfrm_policy_flush(type, &audit_info);
+	err = xfrm_policy_flush(&init_net, type, &audit_info);
 	if (err)
 		return err;
 	c.data.type = type;
-- 
cgit v1.2.3


From 8d1211a6aaea43ea36151c17b0193eb763ff2d7e Mon Sep 17 00:00:00 2001
From: Alexey Dobriyan <adobriyan@gmail.com>
Date: Tue, 25 Nov 2008 17:34:20 -0800
Subject: netns xfrm: finding policy in netns

Add netns parameter to xfrm_policy_bysel_ctx(), xfrm_policy_byidx().

Signed-off-by: Alexey Dobriyan <adobriyan@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/xfrm.h     |  4 ++--
 net/key/af_key.c       |  6 +++---
 net/xfrm/xfrm_policy.c | 14 +++++++-------
 net/xfrm/xfrm_user.c   |  8 ++++----
 4 files changed, 16 insertions(+), 16 deletions(-)

(limited to 'include')

diff --git a/include/net/xfrm.h b/include/net/xfrm.h
index 766cc71e96d4..ec2b7a9b3aa9 100644
--- a/include/net/xfrm.h
+++ b/include/net/xfrm.h
@@ -1439,11 +1439,11 @@ extern int xfrm_policy_walk(struct xfrm_policy_walk *walk,
 	int (*func)(struct xfrm_policy *, int, int, void*), void *);
 extern void xfrm_policy_walk_done(struct xfrm_policy_walk *walk);
 int xfrm_policy_insert(int dir, struct xfrm_policy *policy, int excl);
-struct xfrm_policy *xfrm_policy_bysel_ctx(u8 type, int dir,
+struct xfrm_policy *xfrm_policy_bysel_ctx(struct net *net, u8 type, int dir,
 					  struct xfrm_selector *sel,
 					  struct xfrm_sec_ctx *ctx, int delete,
 					  int *err);
-struct xfrm_policy *xfrm_policy_byid(u8, int dir, u32 id, int delete, int *err);
+struct xfrm_policy *xfrm_policy_byid(struct net *net, u8, int dir, u32 id, int delete, int *err);
 int xfrm_policy_flush(struct net *net, u8 type, struct xfrm_audit *audit_info);
 u32 xfrm_get_acqseq(void);
 extern int xfrm_alloc_spi(struct xfrm_state *x, u32 minspi, u32 maxspi);
diff --git a/net/key/af_key.c b/net/key/af_key.c
index 0f44856c1f12..ca268116ac11 100644
--- a/net/key/af_key.c
+++ b/net/key/af_key.c
@@ -2324,7 +2324,7 @@ static int pfkey_spddelete(struct sock *sk, struct sk_buff *skb, struct sadb_msg
 			return err;
 	}
 
-	xp = xfrm_policy_bysel_ctx(XFRM_POLICY_TYPE_MAIN,
+	xp = xfrm_policy_bysel_ctx(&init_net, XFRM_POLICY_TYPE_MAIN,
 				   pol->sadb_x_policy_dir - 1, &sel, pol_ctx,
 				   1, &err);
 	security_xfrm_policy_free(pol_ctx);
@@ -2571,8 +2571,8 @@ static int pfkey_spdget(struct sock *sk, struct sk_buff *skb, struct sadb_msg *h
 		return -EINVAL;
 
 	delete = (hdr->sadb_msg_type == SADB_X_SPDDELETE2);
-	xp = xfrm_policy_byid(XFRM_POLICY_TYPE_MAIN, dir, pol->sadb_x_policy_id,
-			      delete, &err);
+	xp = xfrm_policy_byid(&init_net, XFRM_POLICY_TYPE_MAIN, dir,
+			      pol->sadb_x_policy_id, delete, &err);
 	if (xp == NULL)
 		return -ENOENT;
 
diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c
index 7c264a74edc0..96895ef61858 100644
--- a/net/xfrm/xfrm_policy.c
+++ b/net/xfrm/xfrm_policy.c
@@ -642,7 +642,7 @@ int xfrm_policy_insert(int dir, struct xfrm_policy *policy, int excl)
 }
 EXPORT_SYMBOL(xfrm_policy_insert);
 
-struct xfrm_policy *xfrm_policy_bysel_ctx(u8 type, int dir,
+struct xfrm_policy *xfrm_policy_bysel_ctx(struct net *net, u8 type, int dir,
 					  struct xfrm_selector *sel,
 					  struct xfrm_sec_ctx *ctx, int delete,
 					  int *err)
@@ -653,7 +653,7 @@ struct xfrm_policy *xfrm_policy_bysel_ctx(u8 type, int dir,
 
 	*err = 0;
 	write_lock_bh(&xfrm_policy_lock);
-	chain = policy_hash_bysel(&init_net, sel, sel->family, dir);
+	chain = policy_hash_bysel(net, sel, sel->family, dir);
 	ret = NULL;
 	hlist_for_each_entry(pol, entry, chain, bydst) {
 		if (pol->type == type &&
@@ -670,7 +670,7 @@ struct xfrm_policy *xfrm_policy_bysel_ctx(u8 type, int dir,
 				hlist_del(&pol->bydst);
 				hlist_del(&pol->byidx);
 				list_del(&pol->walk.all);
-				init_net.xfrm.policy_count[dir]--;
+				net->xfrm.policy_count[dir]--;
 			}
 			ret = pol;
 			break;
@@ -686,8 +686,8 @@ struct xfrm_policy *xfrm_policy_bysel_ctx(u8 type, int dir,
 }
 EXPORT_SYMBOL(xfrm_policy_bysel_ctx);
 
-struct xfrm_policy *xfrm_policy_byid(u8 type, int dir, u32 id, int delete,
-				     int *err)
+struct xfrm_policy *xfrm_policy_byid(struct net *net, u8 type, int dir, u32 id,
+				     int delete, int *err)
 {
 	struct xfrm_policy *pol, *ret;
 	struct hlist_head *chain;
@@ -699,7 +699,7 @@ struct xfrm_policy *xfrm_policy_byid(u8 type, int dir, u32 id, int delete,
 
 	*err = 0;
 	write_lock_bh(&xfrm_policy_lock);
-	chain = init_net.xfrm.policy_byidx + idx_hash(&init_net, id);
+	chain = net->xfrm.policy_byidx + idx_hash(net, id);
 	ret = NULL;
 	hlist_for_each_entry(pol, entry, chain, byidx) {
 		if (pol->type == type && pol->index == id) {
@@ -714,7 +714,7 @@ struct xfrm_policy *xfrm_policy_byid(u8 type, int dir, u32 id, int delete,
 				hlist_del(&pol->bydst);
 				hlist_del(&pol->byidx);
 				list_del(&pol->walk.all);
-				init_net.xfrm.policy_count[dir]--;
+				net->xfrm.policy_count[dir]--;
 			}
 			ret = pol;
 			break;
diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c
index d4983e831c34..efd6ab5c0aca 100644
--- a/net/xfrm/xfrm_user.c
+++ b/net/xfrm/xfrm_user.c
@@ -1330,7 +1330,7 @@ static int xfrm_get_policy(struct sk_buff *skb, struct nlmsghdr *nlh,
 		return err;
 
 	if (p->index)
-		xp = xfrm_policy_byid(type, p->dir, p->index, delete, &err);
+		xp = xfrm_policy_byid(&init_net, type, p->dir, p->index, delete, &err);
 	else {
 		struct nlattr *rt = attrs[XFRMA_SEC_CTX];
 		struct xfrm_sec_ctx *ctx;
@@ -1347,7 +1347,7 @@ static int xfrm_get_policy(struct sk_buff *skb, struct nlmsghdr *nlh,
 			if (err)
 				return err;
 		}
-		xp = xfrm_policy_bysel_ctx(type, p->dir, &p->sel, ctx,
+		xp = xfrm_policy_bysel_ctx(&init_net, type, p->dir, &p->sel, ctx,
 					   delete, &err);
 		security_xfrm_policy_free(ctx);
 	}
@@ -1571,7 +1571,7 @@ static int xfrm_add_pol_expire(struct sk_buff *skb, struct nlmsghdr *nlh,
 		return err;
 
 	if (p->index)
-		xp = xfrm_policy_byid(type, p->dir, p->index, 0, &err);
+		xp = xfrm_policy_byid(&init_net, type, p->dir, p->index, 0, &err);
 	else {
 		struct nlattr *rt = attrs[XFRMA_SEC_CTX];
 		struct xfrm_sec_ctx *ctx;
@@ -1588,7 +1588,7 @@ static int xfrm_add_pol_expire(struct sk_buff *skb, struct nlmsghdr *nlh,
 			if (err)
 				return err;
 		}
-		xp = xfrm_policy_bysel_ctx(type, p->dir, &p->sel, ctx, 0, &err);
+		xp = xfrm_policy_bysel_ctx(&init_net, type, p->dir, &p->sel, ctx, 0, &err);
 		security_xfrm_policy_free(ctx);
 	}
 	if (xp == NULL)
-- 
cgit v1.2.3


From cdcbca7c1f1946758cfacb69bc1c7eeaccb11e2d Mon Sep 17 00:00:00 2001
From: Alexey Dobriyan <adobriyan@gmail.com>
Date: Tue, 25 Nov 2008 17:34:49 -0800
Subject: netns xfrm: policy walking in netns

Signed-off-by: Alexey Dobriyan <adobriyan@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/xfrm.h     | 2 +-
 net/key/af_key.c       | 4 ++--
 net/xfrm/xfrm_policy.c | 6 +++---
 net/xfrm/xfrm_user.c   | 2 +-
 4 files changed, 7 insertions(+), 7 deletions(-)

(limited to 'include')

diff --git a/include/net/xfrm.h b/include/net/xfrm.h
index ec2b7a9b3aa9..1dc4ff0f4851 100644
--- a/include/net/xfrm.h
+++ b/include/net/xfrm.h
@@ -1435,7 +1435,7 @@ static inline int xfrm4_udp_encap_rcv(struct sock *sk, struct sk_buff *skb)
 struct xfrm_policy *xfrm_policy_alloc(struct net *net, gfp_t gfp);
 
 extern void xfrm_policy_walk_init(struct xfrm_policy_walk *walk, u8 type);
-extern int xfrm_policy_walk(struct xfrm_policy_walk *walk,
+extern int xfrm_policy_walk(struct net *net, struct xfrm_policy_walk *walk,
 	int (*func)(struct xfrm_policy *, int, int, void*), void *);
 extern void xfrm_policy_walk_done(struct xfrm_policy_walk *walk);
 int xfrm_policy_insert(int dir, struct xfrm_policy *policy, int excl);
diff --git a/net/key/af_key.c b/net/key/af_key.c
index ca268116ac11..a0d849848ddd 100644
--- a/net/key/af_key.c
+++ b/net/key/af_key.c
@@ -1846,7 +1846,7 @@ static u32 gen_reqid(void)
 		if (reqid == 0)
 			reqid = IPSEC_MANUAL_REQID_MAX+1;
 		xfrm_policy_walk_init(&walk, XFRM_POLICY_TYPE_MAIN);
-		rc = xfrm_policy_walk(&walk, check_reqid, (void*)&reqid);
+		rc = xfrm_policy_walk(&init_net, &walk, check_reqid, (void*)&reqid);
 		xfrm_policy_walk_done(&walk);
 		if (rc != -EEXIST)
 			return reqid;
@@ -2633,7 +2633,7 @@ static int dump_sp(struct xfrm_policy *xp, int dir, int count, void *ptr)
 
 static int pfkey_dump_sp(struct pfkey_sock *pfk)
 {
-	return xfrm_policy_walk(&pfk->dump.u.policy, dump_sp, (void *) pfk);
+	return xfrm_policy_walk(&init_net, &pfk->dump.u.policy, dump_sp, (void *) pfk);
 }
 
 static void pfkey_dump_sp_done(struct pfkey_sock *pfk)
diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c
index 96895ef61858..6165218fd7c2 100644
--- a/net/xfrm/xfrm_policy.c
+++ b/net/xfrm/xfrm_policy.c
@@ -851,7 +851,7 @@ out:
 }
 EXPORT_SYMBOL(xfrm_policy_flush);
 
-int xfrm_policy_walk(struct xfrm_policy_walk *walk,
+int xfrm_policy_walk(struct net *net, struct xfrm_policy_walk *walk,
 		     int (*func)(struct xfrm_policy *, int, int, void*),
 		     void *data)
 {
@@ -868,10 +868,10 @@ int xfrm_policy_walk(struct xfrm_policy_walk *walk,
 
 	write_lock_bh(&xfrm_policy_lock);
 	if (list_empty(&walk->walk.all))
-		x = list_first_entry(&init_net.xfrm.policy_all, struct xfrm_policy_walk_entry, all);
+		x = list_first_entry(&net->xfrm.policy_all, struct xfrm_policy_walk_entry, all);
 	else
 		x = list_entry(&walk->walk.all, struct xfrm_policy_walk_entry, all);
-	list_for_each_entry_from(x, &init_net.xfrm.policy_all, all) {
+	list_for_each_entry_from(x, &net->xfrm.policy_all, all) {
 		if (x->dead)
 			continue;
 		pol = container_of(x, struct xfrm_policy, walk);
diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c
index efd6ab5c0aca..f6e02726cf1b 100644
--- a/net/xfrm/xfrm_user.c
+++ b/net/xfrm/xfrm_user.c
@@ -1279,7 +1279,7 @@ static int xfrm_dump_policy(struct sk_buff *skb, struct netlink_callback *cb)
 		xfrm_policy_walk_init(walk, XFRM_POLICY_TYPE_ANY);
 	}
 
-	(void) xfrm_policy_walk(walk, dump_one_policy, &info);
+	(void) xfrm_policy_walk(&init_net, walk, dump_one_policy, &info);
 
 	return skb->len;
 }
-- 
cgit v1.2.3


From 52479b623d3d41df84c499325b6a8c7915413032 Mon Sep 17 00:00:00 2001
From: Alexey Dobriyan <adobriyan@gmail.com>
Date: Tue, 25 Nov 2008 17:35:18 -0800
Subject: netns xfrm: lookup in netns

Pass netns to xfrm_lookup()/__xfrm_lookup(). For that pass netns
to flow_cache_lookup() and resolver callback.

Take it from socket or netdevice. Stub DECnet to init_net.

Signed-off-by: Alexey Dobriyan <adobriyan@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/dst.h                | 16 ++++++++--------
 include/net/flow.h               |  9 +++++----
 net/core/flow.c                  |  4 ++--
 net/dccp/ipv6.c                  | 10 +++++-----
 net/decnet/dn_route.c            |  6 +++---
 net/ipv4/icmp.c                  |  4 ++--
 net/ipv4/netfilter.c             |  4 ++--
 net/ipv4/route.c                 |  2 +-
 net/ipv6/af_inet6.c              |  2 +-
 net/ipv6/datagram.c              |  3 ++-
 net/ipv6/icmp.c                  |  6 +++---
 net/ipv6/inet6_connection_sock.c |  2 +-
 net/ipv6/ip6_tunnel.c            |  5 +++--
 net/ipv6/mcast.c                 |  4 ++--
 net/ipv6/ndisc.c                 |  4 ++--
 net/ipv6/netfilter.c             |  2 +-
 net/ipv6/netfilter/ip6t_REJECT.c |  2 +-
 net/ipv6/raw.c                   |  3 ++-
 net/ipv6/syncookies.c            |  2 +-
 net/ipv6/tcp_ipv6.c              | 11 ++++++-----
 net/ipv6/udp.c                   |  3 ++-
 net/xfrm/xfrm_policy.c           | 38 ++++++++++++++++++++------------------
 22 files changed, 75 insertions(+), 67 deletions(-)

(limited to 'include')

diff --git a/include/net/dst.h b/include/net/dst.h
index 6c778799bf10..6be3b082a070 100644
--- a/include/net/dst.h
+++ b/include/net/dst.h
@@ -291,21 +291,21 @@ enum {
 
 struct flowi;
 #ifndef CONFIG_XFRM
-static inline int xfrm_lookup(struct dst_entry **dst_p, struct flowi *fl,
-		       struct sock *sk, int flags)
+static inline int xfrm_lookup(struct net *net, struct dst_entry **dst_p,
+			      struct flowi *fl, struct sock *sk, int flags)
 {
 	return 0;
 } 
-static inline int __xfrm_lookup(struct dst_entry **dst_p, struct flowi *fl,
-				struct sock *sk, int flags)
+static inline int __xfrm_lookup(struct net *net, struct dst_entry **dst_p,
+				struct flowi *fl, struct sock *sk, int flags)
 {
 	return 0;
 }
 #else
-extern int xfrm_lookup(struct dst_entry **dst_p, struct flowi *fl,
-		       struct sock *sk, int flags);
-extern int __xfrm_lookup(struct dst_entry **dst_p, struct flowi *fl,
-			 struct sock *sk, int flags);
+extern int xfrm_lookup(struct net *net, struct dst_entry **dst_p,
+		       struct flowi *fl, struct sock *sk, int flags);
+extern int __xfrm_lookup(struct net *net, struct dst_entry **dst_p,
+			 struct flowi *fl, struct sock *sk, int flags);
 #endif
 #endif
 
diff --git a/include/net/flow.h b/include/net/flow.h
index b45a5e4fcadd..809970b7dfee 100644
--- a/include/net/flow.h
+++ b/include/net/flow.h
@@ -84,12 +84,13 @@ struct flowi {
 #define FLOW_DIR_OUT	1
 #define FLOW_DIR_FWD	2
 
+struct net;
 struct sock;
-typedef int (*flow_resolve_t)(struct flowi *key, u16 family, u8 dir,
-			       void **objp, atomic_t **obj_refp);
+typedef int (*flow_resolve_t)(struct net *net, struct flowi *key, u16 family,
+			      u8 dir, void **objp, atomic_t **obj_refp);
 
-extern void *flow_cache_lookup(struct flowi *key, u16 family, u8 dir,
-	 		       flow_resolve_t resolver);
+extern void *flow_cache_lookup(struct net *net, struct flowi *key, u16 family,
+			       u8 dir, flow_resolve_t resolver);
 extern void flow_cache_flush(void);
 extern atomic_t flow_cache_genid;
 
diff --git a/net/core/flow.c b/net/core/flow.c
index d323388dd1ba..96015871ecea 100644
--- a/net/core/flow.c
+++ b/net/core/flow.c
@@ -165,7 +165,7 @@ static int flow_key_compare(struct flowi *key1, struct flowi *key2)
 	return 0;
 }
 
-void *flow_cache_lookup(struct flowi *key, u16 family, u8 dir,
+void *flow_cache_lookup(struct net *net, struct flowi *key, u16 family, u8 dir,
 			flow_resolve_t resolver)
 {
 	struct flow_cache_entry *fle, **head;
@@ -225,7 +225,7 @@ nocache:
 		void *obj;
 		atomic_t *obj_ref;
 
-		err = resolver(key, family, dir, &obj, &obj_ref);
+		err = resolver(net, key, family, dir, &obj, &obj_ref);
 
 		if (fle && !err) {
 			fle->genid = atomic_read(&flow_cache_genid);
diff --git a/net/dccp/ipv6.c b/net/dccp/ipv6.c
index f033e845bb07..b963f35c65f6 100644
--- a/net/dccp/ipv6.c
+++ b/net/dccp/ipv6.c
@@ -168,7 +168,7 @@ static void dccp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
 				goto out;
 			}
 
-			err = xfrm_lookup(&dst, &fl, sk, 0);
+			err = xfrm_lookup(net, &dst, &fl, sk, 0);
 			if (err < 0) {
 				sk->sk_err_soft = -err;
 				goto out;
@@ -279,7 +279,7 @@ static int dccp_v6_send_response(struct sock *sk, struct request_sock *req)
 	if (final_p)
 		ipv6_addr_copy(&fl.fl6_dst, final_p);
 
-	err = xfrm_lookup(&dst, &fl, sk, 0);
+	err = xfrm_lookup(sock_net(sk), &dst, &fl, sk, 0);
 	if (err < 0)
 		goto done;
 
@@ -343,7 +343,7 @@ static void dccp_v6_ctl_send_reset(struct sock *sk, struct sk_buff *rxskb)
 
 	/* sk = NULL, but it is safe for now. RST socket required. */
 	if (!ip6_dst_lookup(ctl_sk, &skb->dst, &fl)) {
-		if (xfrm_lookup(&skb->dst, &fl, NULL, 0) >= 0) {
+		if (xfrm_lookup(net, &skb->dst, &fl, NULL, 0) >= 0) {
 			ip6_xmit(ctl_sk, skb, &fl, NULL, 0);
 			DCCP_INC_STATS_BH(DCCP_MIB_OUTSEGS);
 			DCCP_INC_STATS_BH(DCCP_MIB_OUTRSTS);
@@ -569,7 +569,7 @@ static struct sock *dccp_v6_request_recv_sock(struct sock *sk,
 		if (final_p)
 			ipv6_addr_copy(&fl.fl6_dst, final_p);
 
-		if ((xfrm_lookup(&dst, &fl, sk, 0)) < 0)
+		if ((xfrm_lookup(sock_net(sk), &dst, &fl, sk, 0)) < 0)
 			goto out;
 	}
 
@@ -1004,7 +1004,7 @@ static int dccp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
 	if (final_p)
 		ipv6_addr_copy(&fl.fl6_dst, final_p);
 
-	err = __xfrm_lookup(&dst, &fl, sk, XFRM_LOOKUP_WAIT);
+	err = __xfrm_lookup(sock_net(sk), &dst, &fl, sk, XFRM_LOOKUP_WAIT);
 	if (err < 0) {
 		if (err == -EREMOTE)
 			err = ip6_dst_blackhole(sk, &dst, &fl);
diff --git a/net/decnet/dn_route.c b/net/decnet/dn_route.c
index 768df000523b..eeaa3d819f9c 100644
--- a/net/decnet/dn_route.c
+++ b/net/decnet/dn_route.c
@@ -1184,7 +1184,7 @@ static int dn_route_output_key(struct dst_entry **pprt, struct flowi *flp, int f
 
 	err = __dn_route_output_key(pprt, flp, flags);
 	if (err == 0 && flp->proto) {
-		err = xfrm_lookup(pprt, flp, NULL, 0);
+		err = xfrm_lookup(&init_net, pprt, flp, NULL, 0);
 	}
 	return err;
 }
@@ -1195,8 +1195,8 @@ int dn_route_output_sock(struct dst_entry **pprt, struct flowi *fl, struct sock
 
 	err = __dn_route_output_key(pprt, fl, flags & MSG_TRYHARD);
 	if (err == 0 && fl->proto) {
-		err = xfrm_lookup(pprt, fl, sk, (flags & MSG_DONTWAIT) ?
-						0 : XFRM_LOOKUP_WAIT);
+		err = xfrm_lookup(&init_net, pprt, fl, sk,
+				 (flags & MSG_DONTWAIT) ? 0 : XFRM_LOOKUP_WAIT);
 	}
 	return err;
 }
diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c
index 7b88be9803b1..705b33b184a3 100644
--- a/net/ipv4/icmp.c
+++ b/net/ipv4/icmp.c
@@ -562,7 +562,7 @@ void icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info)
 		/* No need to clone since we're just using its address. */
 		rt2 = rt;
 
-		err = xfrm_lookup((struct dst_entry **)&rt, &fl, NULL, 0);
+		err = xfrm_lookup(net, (struct dst_entry **)&rt, &fl, NULL, 0);
 		switch (err) {
 		case 0:
 			if (rt != rt2)
@@ -601,7 +601,7 @@ void icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info)
 		if (err)
 			goto relookup_failed;
 
-		err = xfrm_lookup((struct dst_entry **)&rt2, &fl, NULL,
+		err = xfrm_lookup(net, (struct dst_entry **)&rt2, &fl, NULL,
 				  XFRM_LOOKUP_ICMP);
 		switch (err) {
 		case 0:
diff --git a/net/ipv4/netfilter.c b/net/ipv4/netfilter.c
index 6efdb70b3eb2..c99eecf89da5 100644
--- a/net/ipv4/netfilter.c
+++ b/net/ipv4/netfilter.c
@@ -66,7 +66,7 @@ int ip_route_me_harder(struct sk_buff *skb, unsigned addr_type)
 #ifdef CONFIG_XFRM
 	if (!(IPCB(skb)->flags & IPSKB_XFRM_TRANSFORMED) &&
 	    xfrm_decode_session(skb, &fl, AF_INET) == 0)
-		if (xfrm_lookup(&skb->dst, &fl, skb->sk, 0))
+		if (xfrm_lookup(net, &skb->dst, &fl, skb->sk, 0))
 			return -1;
 #endif
 
@@ -97,7 +97,7 @@ int ip_xfrm_me_harder(struct sk_buff *skb)
 		dst = ((struct xfrm_dst *)dst)->route;
 	dst_hold(dst);
 
-	if (xfrm_lookup(&dst, &fl, skb->sk, 0) < 0)
+	if (xfrm_lookup(dev_net(dst->dev), &dst, &fl, skb->sk, 0) < 0)
 		return -1;
 
 	dst_release(skb->dst);
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index 4e6959c29819..77bfba975959 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -2761,7 +2761,7 @@ int ip_route_output_flow(struct net *net, struct rtable **rp, struct flowi *flp,
 			flp->fl4_src = (*rp)->rt_src;
 		if (!flp->fl4_dst)
 			flp->fl4_dst = (*rp)->rt_dst;
-		err = __xfrm_lookup((struct dst_entry **)rp, flp, sk,
+		err = __xfrm_lookup(net, (struct dst_entry **)rp, flp, sk,
 				    flags ? XFRM_LOOKUP_WAIT : 0);
 		if (err == -EREMOTE)
 			err = ipv4_dst_blackhole(net, rp, flp);
diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c
index 01edac888510..437b750b98fd 100644
--- a/net/ipv6/af_inet6.c
+++ b/net/ipv6/af_inet6.c
@@ -637,7 +637,7 @@ int inet6_sk_rebuild_header(struct sock *sk)
 		if (final_p)
 			ipv6_addr_copy(&fl.fl6_dst, final_p);
 
-		if ((err = xfrm_lookup(&dst, &fl, sk, 0)) < 0) {
+		if ((err = xfrm_lookup(sock_net(sk), &dst, &fl, sk, 0)) < 0) {
 			sk->sk_err_soft = -err;
 			return err;
 		}
diff --git a/net/ipv6/datagram.c b/net/ipv6/datagram.c
index e44deb8d4df2..e2bdc6d83a43 100644
--- a/net/ipv6/datagram.c
+++ b/net/ipv6/datagram.c
@@ -175,7 +175,8 @@ ipv4_connected:
 	if (final_p)
 		ipv6_addr_copy(&fl.fl6_dst, final_p);
 
-	if ((err = __xfrm_lookup(&dst, &fl, sk, XFRM_LOOKUP_WAIT)) < 0) {
+	err = __xfrm_lookup(sock_net(sk), &dst, &fl, sk, XFRM_LOOKUP_WAIT);
+	if (err < 0) {
 		if (err == -EREMOTE)
 			err = ip6_dst_blackhole(sk, &dst, &fl);
 		if (err < 0)
diff --git a/net/ipv6/icmp.c b/net/ipv6/icmp.c
index a77b8d103804..4f433847d95f 100644
--- a/net/ipv6/icmp.c
+++ b/net/ipv6/icmp.c
@@ -427,7 +427,7 @@ void icmpv6_send(struct sk_buff *skb, int type, int code, __u32 info,
 	/* No need to clone since we're just using its address. */
 	dst2 = dst;
 
-	err = xfrm_lookup(&dst, &fl, sk, 0);
+	err = xfrm_lookup(net, &dst, &fl, sk, 0);
 	switch (err) {
 	case 0:
 		if (dst != dst2)
@@ -446,7 +446,7 @@ void icmpv6_send(struct sk_buff *skb, int type, int code, __u32 info,
 	if (ip6_dst_lookup(sk, &dst2, &fl))
 		goto relookup_failed;
 
-	err = xfrm_lookup(&dst2, &fl, sk, XFRM_LOOKUP_ICMP);
+	err = xfrm_lookup(net, &dst2, &fl, sk, XFRM_LOOKUP_ICMP);
 	switch (err) {
 	case 0:
 		dst_release(dst);
@@ -552,7 +552,7 @@ static void icmpv6_echo_reply(struct sk_buff *skb)
 	err = ip6_dst_lookup(sk, &dst, &fl);
 	if (err)
 		goto out;
-	if ((err = xfrm_lookup(&dst, &fl, sk, 0)) < 0)
+	if ((err = xfrm_lookup(net, &dst, &fl, sk, 0)) < 0)
 		goto out;
 
 	if (ipv6_addr_is_multicast(&fl.fl6_dst))
diff --git a/net/ipv6/inet6_connection_sock.c b/net/ipv6/inet6_connection_sock.c
index 16d43f20b32f..3c3732d50c1a 100644
--- a/net/ipv6/inet6_connection_sock.c
+++ b/net/ipv6/inet6_connection_sock.c
@@ -219,7 +219,7 @@ int inet6_csk_xmit(struct sk_buff *skb, int ipfragok)
 		if (final_p)
 			ipv6_addr_copy(&fl.fl6_dst, final_p);
 
-		if ((err = xfrm_lookup(&dst, &fl, sk, 0)) < 0) {
+		if ((err = xfrm_lookup(sock_net(sk), &dst, &fl, sk, 0)) < 0) {
 			sk->sk_route_caps = 0;
 			kfree_skb(skb);
 			return err;
diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c
index ef249ab5c93c..58e2b0d93758 100644
--- a/net/ipv6/ip6_tunnel.c
+++ b/net/ipv6/ip6_tunnel.c
@@ -846,6 +846,7 @@ static int ip6_tnl_xmit2(struct sk_buff *skb,
 			 int encap_limit,
 			 __u32 *pmtu)
 {
+	struct net *net = dev_net(dev);
 	struct ip6_tnl *t = netdev_priv(dev);
 	struct net_device_stats *stats = &t->dev->stats;
 	struct ipv6hdr *ipv6h = ipv6_hdr(skb);
@@ -861,9 +862,9 @@ static int ip6_tnl_xmit2(struct sk_buff *skb,
 	if ((dst = ip6_tnl_dst_check(t)) != NULL)
 		dst_hold(dst);
 	else {
-		dst = ip6_route_output(dev_net(dev), NULL, fl);
+		dst = ip6_route_output(net, NULL, fl);
 
-		if (dst->error || xfrm_lookup(&dst, fl, NULL, 0) < 0)
+		if (dst->error || xfrm_lookup(net, &dst, fl, NULL, 0) < 0)
 			goto tx_err_link_failure;
 	}
 
diff --git a/net/ipv6/mcast.c b/net/ipv6/mcast.c
index 870a1d64605a..0f3896032830 100644
--- a/net/ipv6/mcast.c
+++ b/net/ipv6/mcast.c
@@ -1466,7 +1466,7 @@ static void mld_sendpack(struct sk_buff *skb)
 			 &ipv6_hdr(skb)->saddr, &ipv6_hdr(skb)->daddr,
 			 skb->dev->ifindex);
 
-	err = xfrm_lookup(&skb->dst, &fl, NULL, 0);
+	err = xfrm_lookup(net, &skb->dst, &fl, NULL, 0);
 	if (err)
 		goto err_out;
 
@@ -1831,7 +1831,7 @@ static void igmp6_send(struct in6_addr *addr, struct net_device *dev, int type)
 			 &ipv6_hdr(skb)->saddr, &ipv6_hdr(skb)->daddr,
 			 skb->dev->ifindex);
 
-	err = xfrm_lookup(&skb->dst, &fl, NULL, 0);
+	err = xfrm_lookup(net, &skb->dst, &fl, NULL, 0);
 	if (err)
 		goto err_out;
 
diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c
index af6705f03b5c..e4acc212345e 100644
--- a/net/ipv6/ndisc.c
+++ b/net/ipv6/ndisc.c
@@ -524,7 +524,7 @@ void ndisc_send_skb(struct sk_buff *skb,
 		return;
 	}
 
-	err = xfrm_lookup(&dst, &fl, NULL, 0);
+	err = xfrm_lookup(net, &dst, &fl, NULL, 0);
 	if (err < 0) {
 		kfree_skb(skb);
 		return;
@@ -1524,7 +1524,7 @@ void ndisc_send_redirect(struct sk_buff *skb, struct neighbour *neigh,
 	if (dst == NULL)
 		return;
 
-	err = xfrm_lookup(&dst, &fl, NULL, 0);
+	err = xfrm_lookup(net, &dst, &fl, NULL, 0);
 	if (err)
 		return;
 
diff --git a/net/ipv6/netfilter.c b/net/ipv6/netfilter.c
index fd5b3a4e3329..627e21db65df 100644
--- a/net/ipv6/netfilter.c
+++ b/net/ipv6/netfilter.c
@@ -29,7 +29,7 @@ int ip6_route_me_harder(struct sk_buff *skb)
 #ifdef CONFIG_XFRM
 	if (!(IP6CB(skb)->flags & IP6SKB_XFRM_TRANSFORMED) &&
 	    xfrm_decode_session(skb, &fl, AF_INET6) == 0)
-		if (xfrm_lookup(&skb->dst, &fl, skb->sk, 0))
+		if (xfrm_lookup(net, &skb->dst, &fl, skb->sk, 0))
 			return -1;
 #endif
 
diff --git a/net/ipv6/netfilter/ip6t_REJECT.c b/net/ipv6/netfilter/ip6t_REJECT.c
index 0981b4ccb8b1..5a2d0a41694a 100644
--- a/net/ipv6/netfilter/ip6t_REJECT.c
+++ b/net/ipv6/netfilter/ip6t_REJECT.c
@@ -97,7 +97,7 @@ static void send_reset(struct net *net, struct sk_buff *oldskb)
 	dst = ip6_route_output(net, NULL, &fl);
 	if (dst == NULL)
 		return;
-	if (dst->error || xfrm_lookup(&dst, &fl, NULL, 0))
+	if (dst->error || xfrm_lookup(net, &dst, &fl, NULL, 0))
 		return;
 
 	hh_len = (dst->dev->hard_header_len + 15)&~15;
diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c
index 2ba04d41dc25..61f6827e5906 100644
--- a/net/ipv6/raw.c
+++ b/net/ipv6/raw.c
@@ -860,7 +860,8 @@ static int rawv6_sendmsg(struct kiocb *iocb, struct sock *sk,
 	if (final_p)
 		ipv6_addr_copy(&fl.fl6_dst, final_p);
 
-	if ((err = __xfrm_lookup(&dst, &fl, sk, XFRM_LOOKUP_WAIT)) < 0) {
+	err = __xfrm_lookup(sock_net(sk), &dst, &fl, sk, XFRM_LOOKUP_WAIT);
+	if (err < 0) {
 		if (err == -EREMOTE)
 			err = ip6_dst_blackhole(sk, &dst, &fl);
 		if (err < 0)
diff --git a/net/ipv6/syncookies.c b/net/ipv6/syncookies.c
index 676c80b5b14b..711175e0571f 100644
--- a/net/ipv6/syncookies.c
+++ b/net/ipv6/syncookies.c
@@ -259,7 +259,7 @@ struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb)
 
 		if (final_p)
 			ipv6_addr_copy(&fl.fl6_dst, final_p);
-		if ((xfrm_lookup(&dst, &fl, sk, 0)) < 0)
+		if ((xfrm_lookup(sock_net(sk), &dst, &fl, sk, 0)) < 0)
 			goto out_free;
 	}
 
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index a5d750acd793..f259c9671f3e 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -260,7 +260,8 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
 	if (final_p)
 		ipv6_addr_copy(&fl.fl6_dst, final_p);
 
-	if ((err = __xfrm_lookup(&dst, &fl, sk, XFRM_LOOKUP_WAIT)) < 0) {
+	err = __xfrm_lookup(sock_net(sk), &dst, &fl, sk, XFRM_LOOKUP_WAIT);
+	if (err < 0) {
 		if (err == -EREMOTE)
 			err = ip6_dst_blackhole(sk, &dst, &fl);
 		if (err < 0)
@@ -390,7 +391,7 @@ static void tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
 				goto out;
 			}
 
-			if ((err = xfrm_lookup(&dst, &fl, sk, 0)) < 0) {
+			if ((err = xfrm_lookup(net, &dst, &fl, sk, 0)) < 0) {
 				sk->sk_err_soft = -err;
 				goto out;
 			}
@@ -492,7 +493,7 @@ static int tcp_v6_send_synack(struct sock *sk, struct request_sock *req)
 		goto done;
 	if (final_p)
 		ipv6_addr_copy(&fl.fl6_dst, final_p);
-	if ((err = xfrm_lookup(&dst, &fl, sk, 0)) < 0)
+	if ((err = xfrm_lookup(sock_net(sk), &dst, &fl, sk, 0)) < 0)
 		goto done;
 
 	skb = tcp_make_synack(sk, dst, req);
@@ -1018,7 +1019,7 @@ static void tcp_v6_send_response(struct sk_buff *skb, u32 seq, u32 ack, u32 win,
 	 * namespace
 	 */
 	if (!ip6_dst_lookup(ctl_sk, &buff->dst, &fl)) {
-		if (xfrm_lookup(&buff->dst, &fl, NULL, 0) >= 0) {
+		if (xfrm_lookup(net, &buff->dst, &fl, NULL, 0) >= 0) {
 			ip6_xmit(ctl_sk, buff, &fl, NULL, 0);
 			TCP_INC_STATS_BH(net, TCP_MIB_OUTSEGS);
 			if (rst)
@@ -1316,7 +1317,7 @@ static struct sock * tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
 		if (final_p)
 			ipv6_addr_copy(&fl.fl6_dst, final_p);
 
-		if ((xfrm_lookup(&dst, &fl, sk, 0)) < 0)
+		if ((xfrm_lookup(sock_net(sk), &dst, &fl, sk, 0)) < 0)
 			goto out;
 	}
 
diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c
index fd2d9ad4a8a3..38390dd19636 100644
--- a/net/ipv6/udp.c
+++ b/net/ipv6/udp.c
@@ -849,7 +849,8 @@ do_udp_sendmsg:
 	if (final_p)
 		ipv6_addr_copy(&fl.fl6_dst, final_p);
 
-	if ((err = __xfrm_lookup(&dst, &fl, sk, XFRM_LOOKUP_WAIT)) < 0) {
+	err = __xfrm_lookup(sock_net(sk), &dst, &fl, sk, XFRM_LOOKUP_WAIT);
+	if (err < 0) {
 		if (err == -EREMOTE)
 			err = ip6_dst_blackhole(sk, &dst, &fl);
 		if (err < 0)
diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c
index 6165218fd7c2..7c88a25c7af5 100644
--- a/net/xfrm/xfrm_policy.c
+++ b/net/xfrm/xfrm_policy.c
@@ -940,7 +940,8 @@ static int xfrm_policy_match(struct xfrm_policy *pol, struct flowi *fl,
 	return ret;
 }
 
-static struct xfrm_policy *xfrm_policy_lookup_bytype(u8 type, struct flowi *fl,
+static struct xfrm_policy *xfrm_policy_lookup_bytype(struct net *net, u8 type,
+						     struct flowi *fl,
 						     u16 family, u8 dir)
 {
 	int err;
@@ -956,7 +957,7 @@ static struct xfrm_policy *xfrm_policy_lookup_bytype(u8 type, struct flowi *fl,
 		return NULL;
 
 	read_lock_bh(&xfrm_policy_lock);
-	chain = policy_hash_direct(&init_net, daddr, saddr, family, dir);
+	chain = policy_hash_direct(net, daddr, saddr, family, dir);
 	ret = NULL;
 	hlist_for_each_entry(pol, entry, chain, bydst) {
 		err = xfrm_policy_match(pol, fl, type, family, dir);
@@ -973,7 +974,7 @@ static struct xfrm_policy *xfrm_policy_lookup_bytype(u8 type, struct flowi *fl,
 			break;
 		}
 	}
-	chain = &init_net.xfrm.policy_inexact[dir];
+	chain = &net->xfrm.policy_inexact[dir];
 	hlist_for_each_entry(pol, entry, chain, bydst) {
 		err = xfrm_policy_match(pol, fl, type, family, dir);
 		if (err) {
@@ -996,14 +997,14 @@ fail:
 	return ret;
 }
 
-static int xfrm_policy_lookup(struct flowi *fl, u16 family, u8 dir,
-			       void **objp, atomic_t **obj_refp)
+static int xfrm_policy_lookup(struct net *net, struct flowi *fl, u16 family,
+			      u8 dir, void **objp, atomic_t **obj_refp)
 {
 	struct xfrm_policy *pol;
 	int err = 0;
 
 #ifdef CONFIG_XFRM_SUB_POLICY
-	pol = xfrm_policy_lookup_bytype(XFRM_POLICY_TYPE_SUB, fl, family, dir);
+	pol = xfrm_policy_lookup_bytype(net, XFRM_POLICY_TYPE_SUB, fl, family, dir);
 	if (IS_ERR(pol)) {
 		err = PTR_ERR(pol);
 		pol = NULL;
@@ -1011,7 +1012,7 @@ static int xfrm_policy_lookup(struct flowi *fl, u16 family, u8 dir,
 	if (pol || err)
 		goto end;
 #endif
-	pol = xfrm_policy_lookup_bytype(XFRM_POLICY_TYPE_MAIN, fl, family, dir);
+	pol = xfrm_policy_lookup_bytype(net, XFRM_POLICY_TYPE_MAIN, fl, family, dir);
 	if (IS_ERR(pol)) {
 		err = PTR_ERR(pol);
 		pol = NULL;
@@ -1537,7 +1538,7 @@ static int stale_bundle(struct dst_entry *dst);
  * At the moment we eat a raw IP route. Mostly to speed up lookups
  * on interfaces with disabled IPsec.
  */
-int __xfrm_lookup(struct dst_entry **dst_p, struct flowi *fl,
+int __xfrm_lookup(struct net *net, struct dst_entry **dst_p, struct flowi *fl,
 		  struct sock *sk, int flags)
 {
 	struct xfrm_policy *policy;
@@ -1575,10 +1576,10 @@ restart:
 	if (!policy) {
 		/* To accelerate a bit...  */
 		if ((dst_orig->flags & DST_NOXFRM) ||
-		    !init_net.xfrm.policy_count[XFRM_POLICY_OUT])
+		    !net->xfrm.policy_count[XFRM_POLICY_OUT])
 			goto nopol;
 
-		policy = flow_cache_lookup(fl, dst_orig->ops->family,
+		policy = flow_cache_lookup(net, fl, dst_orig->ops->family,
 					   dir, xfrm_policy_lookup);
 		err = PTR_ERR(policy);
 		if (IS_ERR(policy)) {
@@ -1635,7 +1636,8 @@ restart:
 
 #ifdef CONFIG_XFRM_SUB_POLICY
 		if (pols[0]->type != XFRM_POLICY_TYPE_MAIN) {
-			pols[1] = xfrm_policy_lookup_bytype(XFRM_POLICY_TYPE_MAIN,
+			pols[1] = xfrm_policy_lookup_bytype(net,
+							    XFRM_POLICY_TYPE_MAIN,
 							    fl, family,
 							    XFRM_POLICY_OUT);
 			if (pols[1]) {
@@ -1683,11 +1685,11 @@ restart:
 			if (err == -EAGAIN && (flags & XFRM_LOOKUP_WAIT)) {
 				DECLARE_WAITQUEUE(wait, current);
 
-				add_wait_queue(&init_net.xfrm.km_waitq, &wait);
+				add_wait_queue(&net->xfrm.km_waitq, &wait);
 				set_current_state(TASK_INTERRUPTIBLE);
 				schedule();
 				set_current_state(TASK_RUNNING);
-				remove_wait_queue(&init_net.xfrm.km_waitq, &wait);
+				remove_wait_queue(&net->xfrm.km_waitq, &wait);
 
 				nx = xfrm_tmpl_resolve(pols, npols, fl, xfrm, family);
 
@@ -1781,10 +1783,10 @@ nopol:
 }
 EXPORT_SYMBOL(__xfrm_lookup);
 
-int xfrm_lookup(struct dst_entry **dst_p, struct flowi *fl,
+int xfrm_lookup(struct net *net, struct dst_entry **dst_p, struct flowi *fl,
 		struct sock *sk, int flags)
 {
-	int err = __xfrm_lookup(dst_p, fl, sk, flags);
+	int err = __xfrm_lookup(net, dst_p, fl, sk, flags);
 
 	if (err == -EREMOTE) {
 		dst_release(*dst_p);
@@ -1936,7 +1938,7 @@ int __xfrm_policy_check(struct sock *sk, int dir, struct sk_buff *skb,
 	}
 
 	if (!pol)
-		pol = flow_cache_lookup(&fl, family, fl_dir,
+		pol = flow_cache_lookup(&init_net, &fl, family, fl_dir,
 					xfrm_policy_lookup);
 
 	if (IS_ERR(pol)) {
@@ -1959,7 +1961,7 @@ int __xfrm_policy_check(struct sock *sk, int dir, struct sk_buff *skb,
 	npols ++;
 #ifdef CONFIG_XFRM_SUB_POLICY
 	if (pols[0]->type != XFRM_POLICY_TYPE_MAIN) {
-		pols[1] = xfrm_policy_lookup_bytype(XFRM_POLICY_TYPE_MAIN,
+		pols[1] = xfrm_policy_lookup_bytype(&init_net, XFRM_POLICY_TYPE_MAIN,
 						    &fl, family,
 						    XFRM_POLICY_IN);
 		if (pols[1]) {
@@ -2049,7 +2051,7 @@ int __xfrm_route_forward(struct sk_buff *skb, unsigned short family)
 		return 0;
 	}
 
-	return xfrm_lookup(&skb->dst, &fl, NULL, 0) == 0;
+	return xfrm_lookup(&init_net, &skb->dst, &fl, NULL, 0) == 0;
 }
 EXPORT_SYMBOL(__xfrm_route_forward);
 
-- 
cgit v1.2.3


From f6e1e25d703c0a9ba1863384a16851dec52f8e3a Mon Sep 17 00:00:00 2001
From: Alexey Dobriyan <adobriyan@gmail.com>
Date: Tue, 25 Nov 2008 17:35:44 -0800
Subject: netns xfrm: xfrm_policy_check in netns

Signed-off-by: Alexey Dobriyan <adobriyan@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/xfrm.h     | 3 ++-
 net/xfrm/xfrm_policy.c | 5 +++--
 2 files changed, 5 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/include/net/xfrm.h b/include/net/xfrm.h
index 1dc4ff0f4851..158848f55640 100644
--- a/include/net/xfrm.h
+++ b/include/net/xfrm.h
@@ -992,12 +992,13 @@ static inline int __xfrm_policy_check2(struct sock *sk, int dir,
 				       struct sk_buff *skb,
 				       unsigned int family, int reverse)
 {
+	struct net *net = dev_net(skb->dev);
 	int ndir = dir | (reverse ? XFRM_POLICY_MASK + 1 : 0);
 
 	if (sk && sk->sk_policy[XFRM_POLICY_IN])
 		return __xfrm_policy_check(sk, ndir, skb, family);
 
-	return	(!init_net.xfrm.policy_count[dir] && !skb->sp) ||
+	return	(!net->xfrm.policy_count[dir] && !skb->sp) ||
 		(skb->dst->flags & DST_NOPOLICY) ||
 		__xfrm_policy_check(sk, ndir, skb, family);
 }
diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c
index 7c88a25c7af5..8097c9958cfc 100644
--- a/net/xfrm/xfrm_policy.c
+++ b/net/xfrm/xfrm_policy.c
@@ -1894,6 +1894,7 @@ static inline int secpath_has_nontransport(struct sec_path *sp, int k, int *idxp
 int __xfrm_policy_check(struct sock *sk, int dir, struct sk_buff *skb,
 			unsigned short family)
 {
+	struct net *net = dev_net(skb->dev);
 	struct xfrm_policy *pol;
 	struct xfrm_policy *pols[XFRM_POLICY_TYPE_MAX];
 	int npols = 0;
@@ -1938,7 +1939,7 @@ int __xfrm_policy_check(struct sock *sk, int dir, struct sk_buff *skb,
 	}
 
 	if (!pol)
-		pol = flow_cache_lookup(&init_net, &fl, family, fl_dir,
+		pol = flow_cache_lookup(net, &fl, family, fl_dir,
 					xfrm_policy_lookup);
 
 	if (IS_ERR(pol)) {
@@ -1961,7 +1962,7 @@ int __xfrm_policy_check(struct sock *sk, int dir, struct sk_buff *skb,
 	npols ++;
 #ifdef CONFIG_XFRM_SUB_POLICY
 	if (pols[0]->type != XFRM_POLICY_TYPE_MAIN) {
-		pols[1] = xfrm_policy_lookup_bytype(&init_net, XFRM_POLICY_TYPE_MAIN,
+		pols[1] = xfrm_policy_lookup_bytype(net, XFRM_POLICY_TYPE_MAIN,
 						    &fl, family,
 						    XFRM_POLICY_IN);
 		if (pols[1]) {
-- 
cgit v1.2.3


From 99a66657b2f62ae8b2b1e6ffc6abed051e4561ca Mon Sep 17 00:00:00 2001
From: Alexey Dobriyan <adobriyan@gmail.com>
Date: Tue, 25 Nov 2008 17:36:13 -0800
Subject: netns xfrm: xfrm_route_forward() in netns

Signed-off-by: Alexey Dobriyan <adobriyan@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/xfrm.h     | 4 +++-
 net/xfrm/xfrm_policy.c | 3 ++-
 2 files changed, 5 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/net/xfrm.h b/include/net/xfrm.h
index 158848f55640..36c8cffdf4e2 100644
--- a/include/net/xfrm.h
+++ b/include/net/xfrm.h
@@ -1050,7 +1050,9 @@ extern int __xfrm_route_forward(struct sk_buff *skb, unsigned short family);
 
 static inline int xfrm_route_forward(struct sk_buff *skb, unsigned short family)
 {
-	return	!init_net.xfrm.policy_count[XFRM_POLICY_OUT] ||
+	struct net *net = dev_net(skb->dev);
+
+	return	!net->xfrm.policy_count[XFRM_POLICY_OUT] ||
 		(skb->dst->flags & DST_NOXFRM) ||
 		__xfrm_route_forward(skb, family);
 }
diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c
index 8097c9958cfc..54b50a20804f 100644
--- a/net/xfrm/xfrm_policy.c
+++ b/net/xfrm/xfrm_policy.c
@@ -2044,6 +2044,7 @@ EXPORT_SYMBOL(__xfrm_policy_check);
 
 int __xfrm_route_forward(struct sk_buff *skb, unsigned short family)
 {
+	struct net *net = dev_net(skb->dev);
 	struct flowi fl;
 
 	if (xfrm_decode_session(skb, &fl, family) < 0) {
@@ -2052,7 +2053,7 @@ int __xfrm_route_forward(struct sk_buff *skb, unsigned short family)
 		return 0;
 	}
 
-	return xfrm_lookup(&init_net, &skb->dst, &fl, NULL, 0) == 0;
+	return xfrm_lookup(net, &skb->dst, &fl, NULL, 0) == 0;
 }
 EXPORT_SYMBOL(__xfrm_route_forward);
 
-- 
cgit v1.2.3


From ddcfd79680c1dc74eb5f24aa70785c11bf7eec8f Mon Sep 17 00:00:00 2001
From: Alexey Dobriyan <adobriyan@gmail.com>
Date: Tue, 25 Nov 2008 17:37:23 -0800
Subject: netns xfrm: dst garbage-collecting in netns

Pass netns pointer to struct xfrm_policy_afinfo::garbage_collect()

	[This needs more thoughts on what to do with dst_ops]
	[Currently stub to init_net]

Signed-off-by: Alexey Dobriyan <adobriyan@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/xfrm.h      | 2 +-
 net/ipv4/xfrm4_policy.c | 2 +-
 net/ipv6/xfrm6_policy.c | 2 +-
 net/xfrm/xfrm_policy.c  | 4 ++--
 4 files changed, 5 insertions(+), 5 deletions(-)

(limited to 'include')

diff --git a/include/net/xfrm.h b/include/net/xfrm.h
index 36c8cffdf4e2..bd2515005ae2 100644
--- a/include/net/xfrm.h
+++ b/include/net/xfrm.h
@@ -265,7 +265,7 @@ struct xfrm_dst;
 struct xfrm_policy_afinfo {
 	unsigned short		family;
 	struct dst_ops		*dst_ops;
-	void			(*garbage_collect)(void);
+	void			(*garbage_collect)(struct net *net);
 	struct dst_entry	*(*dst_lookup)(int tos, xfrm_address_t *saddr,
 					       xfrm_address_t *daddr);
 	int			(*get_saddr)(xfrm_address_t *saddr, xfrm_address_t *daddr);
diff --git a/net/ipv4/xfrm4_policy.c b/net/ipv4/xfrm4_policy.c
index 84dbb5a03cc2..a881ca38dddb 100644
--- a/net/ipv4/xfrm4_policy.c
+++ b/net/ipv4/xfrm4_policy.c
@@ -187,7 +187,7 @@ _decode_session4(struct sk_buff *skb, struct flowi *fl, int reverse)
 
 static inline int xfrm4_garbage_collect(struct dst_ops *ops)
 {
-	xfrm4_policy_afinfo.garbage_collect();
+	xfrm4_policy_afinfo.garbage_collect(&init_net);
 	return (atomic_read(&xfrm4_dst_ops.entries) > xfrm4_dst_ops.gc_thresh*2);
 }
 
diff --git a/net/ipv6/xfrm6_policy.c b/net/ipv6/xfrm6_policy.c
index 3b67ce7786ec..2df8a78fc3df 100644
--- a/net/ipv6/xfrm6_policy.c
+++ b/net/ipv6/xfrm6_policy.c
@@ -220,7 +220,7 @@ _decode_session6(struct sk_buff *skb, struct flowi *fl, int reverse)
 
 static inline int xfrm6_garbage_collect(struct dst_ops *ops)
 {
-	xfrm6_policy_afinfo.garbage_collect();
+	xfrm6_policy_afinfo.garbage_collect(&init_net);
 	return (atomic_read(&xfrm6_dst_ops.entries) > xfrm6_dst_ops.gc_thresh*2);
 }
 
diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c
index 49e089826f45..7ebbdd63fca0 100644
--- a/net/xfrm/xfrm_policy.c
+++ b/net/xfrm/xfrm_policy.c
@@ -2173,9 +2173,9 @@ static int unused_bundle(struct dst_entry *dst)
 	return !atomic_read(&dst->__refcnt);
 }
 
-static void __xfrm_garbage_collect(void)
+static void __xfrm_garbage_collect(struct net *net)
 {
-	xfrm_prune_bundles(&init_net, unused_bundle);
+	xfrm_prune_bundles(net, unused_bundle);
 }
 
 static int xfrm_flush_bundles(struct net *net)
-- 
cgit v1.2.3


From a6483b790f8efcd8db190c1c0ff93f9d9efe919a Mon Sep 17 00:00:00 2001
From: Alexey Dobriyan <adobriyan@gmail.com>
Date: Tue, 25 Nov 2008 17:38:20 -0800
Subject: netns xfrm: per-netns NETLINK_XFRM socket

Stub senders to init_net's one temporarily.

Signed-off-by: Alexey Dobriyan <adobriyan@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/netns/xfrm.h |   2 +
 include/net/xfrm.h       |   7 +--
 net/xfrm/xfrm_output.c   |   3 +-
 net/xfrm/xfrm_state.c    |   7 +--
 net/xfrm/xfrm_user.c     | 108 ++++++++++++++++++++++++++++++++---------------
 5 files changed, 83 insertions(+), 44 deletions(-)

(limited to 'include')

diff --git a/include/net/netns/xfrm.h b/include/net/netns/xfrm.h
index c53d17357a49..09f3060e9d18 100644
--- a/include/net/netns/xfrm.h
+++ b/include/net/netns/xfrm.h
@@ -39,6 +39,8 @@ struct netns_xfrm {
 	struct xfrm_policy_hash	policy_bydst[XFRM_POLICY_MAX * 2];
 	unsigned int		policy_count[XFRM_POLICY_MAX * 2];
 	struct work_struct	policy_hash_work;
+
+	struct sock		*nlsk;
 };
 
 #endif
diff --git a/include/net/xfrm.h b/include/net/xfrm.h
index bd2515005ae2..e027179e8199 100644
--- a/include/net/xfrm.h
+++ b/include/net/xfrm.h
@@ -48,7 +48,6 @@ DECLARE_SNMP_STAT(struct linux_xfrm_mib, xfrm_statistics);
 #define XFRM_INC_STATS_USER(field)
 #endif
 
-extern struct sock *xfrm_nl;
 extern u32 sysctl_xfrm_aevent_etime;
 extern u32 sysctl_xfrm_aevent_rseqth;
 extern int sysctl_xfrm_larval_drop;
@@ -1516,18 +1515,20 @@ static inline int xfrm_policy_id2dir(u32 index)
 	return index & 7;
 }
 
-static inline int xfrm_aevent_is_on(void)
+#ifdef CONFIG_XFRM
+static inline int xfrm_aevent_is_on(struct net *net)
 {
 	struct sock *nlsk;
 	int ret = 0;
 
 	rcu_read_lock();
-	nlsk = rcu_dereference(xfrm_nl);
+	nlsk = rcu_dereference(net->xfrm.nlsk);
 	if (nlsk)
 		ret = netlink_has_listeners(nlsk, XFRMNLGRP_AEVENTS);
 	rcu_read_unlock();
 	return ret;
 }
+#endif
 
 static inline int xfrm_alg_len(struct xfrm_algo *alg)
 {
diff --git a/net/xfrm/xfrm_output.c b/net/xfrm/xfrm_output.c
index dc50f1e71f76..ba90e5e50ffc 100644
--- a/net/xfrm/xfrm_output.c
+++ b/net/xfrm/xfrm_output.c
@@ -41,6 +41,7 @@ static int xfrm_output_one(struct sk_buff *skb, int err)
 {
 	struct dst_entry *dst = skb->dst;
 	struct xfrm_state *x = dst->xfrm;
+	struct net *net = xs_net(x);
 
 	if (err <= 0)
 		goto resume;
@@ -74,7 +75,7 @@ static int xfrm_output_one(struct sk_buff *skb, int err)
 				err = -EOVERFLOW;
 				goto error;
 			}
-			if (xfrm_aevent_is_on())
+			if (xfrm_aevent_is_on(net))
 				xfrm_replay_notify(x, XFRM_REPLAY_UPDATE);
 		}
 
diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c
index ea340bbbcc64..21db37ab0a2f 100644
--- a/net/xfrm/xfrm_state.c
+++ b/net/xfrm/xfrm_state.c
@@ -24,9 +24,6 @@
 
 #include "xfrm_hash.h"
 
-struct sock *xfrm_nl;
-EXPORT_SYMBOL(xfrm_nl);
-
 u32 sysctl_xfrm_aevent_etime __read_mostly = XFRM_AE_ETIME;
 EXPORT_SYMBOL(sysctl_xfrm_aevent_etime);
 
@@ -1659,7 +1656,7 @@ static void xfrm_replay_timer_handler(unsigned long data)
 	spin_lock(&x->lock);
 
 	if (x->km.state == XFRM_STATE_VALID) {
-		if (xfrm_aevent_is_on())
+		if (xfrm_aevent_is_on(xs_net(x)))
 			xfrm_replay_notify(x, XFRM_REPLAY_TIMEOUT);
 		else
 			x->xflags |= XFRM_TIME_DEFER;
@@ -1715,7 +1712,7 @@ void xfrm_replay_advance(struct xfrm_state *x, __be32 net_seq)
 		x->replay.bitmap |= (1U << diff);
 	}
 
-	if (xfrm_aevent_is_on())
+	if (xfrm_aevent_is_on(xs_net(x)))
 		xfrm_replay_notify(x, XFRM_REPLAY_UPDATE);
 }
 
diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c
index f6e02726cf1b..8b5b01dfb77a 100644
--- a/net/xfrm/xfrm_user.c
+++ b/net/xfrm/xfrm_user.c
@@ -703,6 +703,7 @@ nla_put_failure:
 static int xfrm_get_spdinfo(struct sk_buff *skb, struct nlmsghdr *nlh,
 		struct nlattr **attrs)
 {
+	struct net *net = sock_net(skb->sk);
 	struct sk_buff *r_skb;
 	u32 *flags = nlmsg_data(nlh);
 	u32 spid = NETLINK_CB(skb).pid;
@@ -715,7 +716,7 @@ static int xfrm_get_spdinfo(struct sk_buff *skb, struct nlmsghdr *nlh,
 	if (build_spdinfo(r_skb, spid, seq, *flags) < 0)
 		BUG();
 
-	return nlmsg_unicast(xfrm_nl, r_skb, spid);
+	return nlmsg_unicast(net->xfrm.nlsk, r_skb, spid);
 }
 
 static inline size_t xfrm_sadinfo_msgsize(void)
@@ -756,6 +757,7 @@ nla_put_failure:
 static int xfrm_get_sadinfo(struct sk_buff *skb, struct nlmsghdr *nlh,
 		struct nlattr **attrs)
 {
+	struct net *net = sock_net(skb->sk);
 	struct sk_buff *r_skb;
 	u32 *flags = nlmsg_data(nlh);
 	u32 spid = NETLINK_CB(skb).pid;
@@ -768,12 +770,13 @@ static int xfrm_get_sadinfo(struct sk_buff *skb, struct nlmsghdr *nlh,
 	if (build_sadinfo(r_skb, spid, seq, *flags) < 0)
 		BUG();
 
-	return nlmsg_unicast(xfrm_nl, r_skb, spid);
+	return nlmsg_unicast(net->xfrm.nlsk, r_skb, spid);
 }
 
 static int xfrm_get_sa(struct sk_buff *skb, struct nlmsghdr *nlh,
 		struct nlattr **attrs)
 {
+	struct net *net = &init_net;
 	struct xfrm_usersa_id *p = nlmsg_data(nlh);
 	struct xfrm_state *x;
 	struct sk_buff *resp_skb;
@@ -787,7 +790,7 @@ static int xfrm_get_sa(struct sk_buff *skb, struct nlmsghdr *nlh,
 	if (IS_ERR(resp_skb)) {
 		err = PTR_ERR(resp_skb);
 	} else {
-		err = nlmsg_unicast(xfrm_nl, resp_skb, NETLINK_CB(skb).pid);
+		err = nlmsg_unicast(net->xfrm.nlsk, resp_skb, NETLINK_CB(skb).pid);
 	}
 	xfrm_state_put(x);
 out_noput:
@@ -820,6 +823,7 @@ static int verify_userspi_info(struct xfrm_userspi_info *p)
 static int xfrm_alloc_userspi(struct sk_buff *skb, struct nlmsghdr *nlh,
 		struct nlattr **attrs)
 {
+	struct net *net = &init_net;
 	struct xfrm_state *x;
 	struct xfrm_userspi_info *p;
 	struct sk_buff *resp_skb;
@@ -837,7 +841,7 @@ static int xfrm_alloc_userspi(struct sk_buff *skb, struct nlmsghdr *nlh,
 
 	x = NULL;
 	if (p->info.seq) {
-		x = xfrm_find_acq_byseq(&init_net, p->info.seq);
+		x = xfrm_find_acq_byseq(net, p->info.seq);
 		if (x && xfrm_addr_cmp(&x->id.daddr, daddr, family)) {
 			xfrm_state_put(x);
 			x = NULL;
@@ -845,7 +849,7 @@ static int xfrm_alloc_userspi(struct sk_buff *skb, struct nlmsghdr *nlh,
 	}
 
 	if (!x)
-		x = xfrm_find_acq(&init_net, p->info.mode, p->info.reqid,
+		x = xfrm_find_acq(net, p->info.mode, p->info.reqid,
 				  p->info.id.proto, daddr,
 				  &p->info.saddr, 1,
 				  family);
@@ -863,7 +867,7 @@ static int xfrm_alloc_userspi(struct sk_buff *skb, struct nlmsghdr *nlh,
 		goto out;
 	}
 
-	err = nlmsg_unicast(xfrm_nl, resp_skb, NETLINK_CB(skb).pid);
+	err = nlmsg_unicast(net->xfrm.nlsk, resp_skb, NETLINK_CB(skb).pid);
 
 out:
 	xfrm_state_put(x);
@@ -1311,6 +1315,7 @@ static struct sk_buff *xfrm_policy_netlink(struct sk_buff *in_skb,
 static int xfrm_get_policy(struct sk_buff *skb, struct nlmsghdr *nlh,
 		struct nlattr **attrs)
 {
+	struct net *net = &init_net;
 	struct xfrm_policy *xp;
 	struct xfrm_userpolicy_id *p;
 	u8 type = XFRM_POLICY_TYPE_MAIN;
@@ -1330,7 +1335,7 @@ static int xfrm_get_policy(struct sk_buff *skb, struct nlmsghdr *nlh,
 		return err;
 
 	if (p->index)
-		xp = xfrm_policy_byid(&init_net, type, p->dir, p->index, delete, &err);
+		xp = xfrm_policy_byid(net, type, p->dir, p->index, delete, &err);
 	else {
 		struct nlattr *rt = attrs[XFRMA_SEC_CTX];
 		struct xfrm_sec_ctx *ctx;
@@ -1347,7 +1352,7 @@ static int xfrm_get_policy(struct sk_buff *skb, struct nlmsghdr *nlh,
 			if (err)
 				return err;
 		}
-		xp = xfrm_policy_bysel_ctx(&init_net, type, p->dir, &p->sel, ctx,
+		xp = xfrm_policy_bysel_ctx(net, type, p->dir, &p->sel, ctx,
 					   delete, &err);
 		security_xfrm_policy_free(ctx);
 	}
@@ -1361,7 +1366,7 @@ static int xfrm_get_policy(struct sk_buff *skb, struct nlmsghdr *nlh,
 		if (IS_ERR(resp_skb)) {
 			err = PTR_ERR(resp_skb);
 		} else {
-			err = nlmsg_unicast(xfrm_nl, resp_skb,
+			err = nlmsg_unicast(net->xfrm.nlsk, resp_skb,
 					    NETLINK_CB(skb).pid);
 		}
 	} else {
@@ -1457,6 +1462,7 @@ nla_put_failure:
 static int xfrm_get_ae(struct sk_buff *skb, struct nlmsghdr *nlh,
 		struct nlattr **attrs)
 {
+	struct net *net = &init_net;
 	struct xfrm_state *x;
 	struct sk_buff *r_skb;
 	int err;
@@ -1468,7 +1474,7 @@ static int xfrm_get_ae(struct sk_buff *skb, struct nlmsghdr *nlh,
 	if (r_skb == NULL)
 		return -ENOMEM;
 
-	x = xfrm_state_lookup(&init_net, &id->daddr, id->spi, id->proto, id->family);
+	x = xfrm_state_lookup(net, &id->daddr, id->spi, id->proto, id->family);
 	if (x == NULL) {
 		kfree_skb(r_skb);
 		return -ESRCH;
@@ -1486,7 +1492,7 @@ static int xfrm_get_ae(struct sk_buff *skb, struct nlmsghdr *nlh,
 
 	if (build_aevent(r_skb, x, &c) < 0)
 		BUG();
-	err = nlmsg_unicast(xfrm_nl, r_skb, NETLINK_CB(skb).pid);
+	err = nlmsg_unicast(net->xfrm.nlsk, r_skb, NETLINK_CB(skb).pid);
 	spin_unlock_bh(&x->lock);
 	xfrm_state_put(x);
 	return err;
@@ -1869,6 +1875,7 @@ static int xfrm_send_migrate(struct xfrm_selector *sel, u8 dir, u8 type,
 			     struct xfrm_migrate *m, int num_migrate,
 			     struct xfrm_kmaddress *k)
 {
+	struct net *net = &init_net;
 	struct sk_buff *skb;
 
 	skb = nlmsg_new(xfrm_migrate_msgsize(num_migrate, !!k), GFP_ATOMIC);
@@ -1879,7 +1886,7 @@ static int xfrm_send_migrate(struct xfrm_selector *sel, u8 dir, u8 type,
 	if (build_migrate(skb, m, num_migrate, k, sel, dir, type) < 0)
 		BUG();
 
-	return nlmsg_multicast(xfrm_nl, skb, 0, XFRMNLGRP_MIGRATE, GFP_ATOMIC);
+	return nlmsg_multicast(net->xfrm.nlsk, skb, 0, XFRMNLGRP_MIGRATE, GFP_ATOMIC);
 }
 #else
 static int xfrm_send_migrate(struct xfrm_selector *sel, u8 dir, u8 type,
@@ -1968,6 +1975,7 @@ static struct xfrm_link {
 
 static int xfrm_user_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
 {
+	struct net *net = sock_net(skb->sk);
 	struct nlattr *attrs[XFRMA_MAX+1];
 	struct xfrm_link *link;
 	int type, err;
@@ -1989,7 +1997,7 @@ static int xfrm_user_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
 		if (link->dump == NULL)
 			return -EINVAL;
 
-		return netlink_dump_start(xfrm_nl, skb, nlh, link->dump, link->done);
+		return netlink_dump_start(net->xfrm.nlsk, skb, nlh, link->dump, link->done);
 	}
 
 	err = nlmsg_parse(nlh, xfrm_msg_min[type], attrs, XFRMA_MAX,
@@ -2033,6 +2041,7 @@ static int build_expire(struct sk_buff *skb, struct xfrm_state *x, struct km_eve
 
 static int xfrm_exp_state_notify(struct xfrm_state *x, struct km_event *c)
 {
+	struct net *net = &init_net;
 	struct sk_buff *skb;
 
 	skb = nlmsg_new(xfrm_expire_msgsize(), GFP_ATOMIC);
@@ -2042,11 +2051,12 @@ static int xfrm_exp_state_notify(struct xfrm_state *x, struct km_event *c)
 	if (build_expire(skb, x, c) < 0)
 		BUG();
 
-	return nlmsg_multicast(xfrm_nl, skb, 0, XFRMNLGRP_EXPIRE, GFP_ATOMIC);
+	return nlmsg_multicast(net->xfrm.nlsk, skb, 0, XFRMNLGRP_EXPIRE, GFP_ATOMIC);
 }
 
 static int xfrm_aevent_state_notify(struct xfrm_state *x, struct km_event *c)
 {
+	struct net *net = &init_net;
 	struct sk_buff *skb;
 
 	skb = nlmsg_new(xfrm_aevent_msgsize(), GFP_ATOMIC);
@@ -2056,11 +2066,12 @@ static int xfrm_aevent_state_notify(struct xfrm_state *x, struct km_event *c)
 	if (build_aevent(skb, x, c) < 0)
 		BUG();
 
-	return nlmsg_multicast(xfrm_nl, skb, 0, XFRMNLGRP_AEVENTS, GFP_ATOMIC);
+	return nlmsg_multicast(net->xfrm.nlsk, skb, 0, XFRMNLGRP_AEVENTS, GFP_ATOMIC);
 }
 
 static int xfrm_notify_sa_flush(struct km_event *c)
 {
+	struct net *net = &init_net;
 	struct xfrm_usersa_flush *p;
 	struct nlmsghdr *nlh;
 	struct sk_buff *skb;
@@ -2081,7 +2092,7 @@ static int xfrm_notify_sa_flush(struct km_event *c)
 
 	nlmsg_end(skb, nlh);
 
-	return nlmsg_multicast(xfrm_nl, skb, 0, XFRMNLGRP_SA, GFP_ATOMIC);
+	return nlmsg_multicast(net->xfrm.nlsk, skb, 0, XFRMNLGRP_SA, GFP_ATOMIC);
 }
 
 static inline size_t xfrm_sa_len(struct xfrm_state *x)
@@ -2111,6 +2122,7 @@ static inline size_t xfrm_sa_len(struct xfrm_state *x)
 
 static int xfrm_notify_sa(struct xfrm_state *x, struct km_event *c)
 {
+	struct net *net = &init_net;
 	struct xfrm_usersa_info *p;
 	struct xfrm_usersa_id *id;
 	struct nlmsghdr *nlh;
@@ -2155,7 +2167,7 @@ static int xfrm_notify_sa(struct xfrm_state *x, struct km_event *c)
 
 	nlmsg_end(skb, nlh);
 
-	return nlmsg_multicast(xfrm_nl, skb, 0, XFRMNLGRP_SA, GFP_ATOMIC);
+	return nlmsg_multicast(net->xfrm.nlsk, skb, 0, XFRMNLGRP_SA, GFP_ATOMIC);
 
 nla_put_failure:
 	/* Somebody screwed up with xfrm_sa_len! */
@@ -2235,6 +2247,7 @@ nlmsg_failure:
 static int xfrm_send_acquire(struct xfrm_state *x, struct xfrm_tmpl *xt,
 			     struct xfrm_policy *xp, int dir)
 {
+	struct net *net = xs_net(x);
 	struct sk_buff *skb;
 
 	skb = nlmsg_new(xfrm_acquire_msgsize(x, xp), GFP_ATOMIC);
@@ -2244,7 +2257,7 @@ static int xfrm_send_acquire(struct xfrm_state *x, struct xfrm_tmpl *xt,
 	if (build_acquire(skb, x, xt, xp, dir) < 0)
 		BUG();
 
-	return nlmsg_multicast(xfrm_nl, skb, 0, XFRMNLGRP_ACQUIRE, GFP_ATOMIC);
+	return nlmsg_multicast(net->xfrm.nlsk, skb, 0, XFRMNLGRP_ACQUIRE, GFP_ATOMIC);
 }
 
 /* User gives us xfrm_user_policy_info followed by an array of 0
@@ -2344,6 +2357,7 @@ nlmsg_failure:
 
 static int xfrm_exp_policy_notify(struct xfrm_policy *xp, int dir, struct km_event *c)
 {
+	struct net *net = &init_net;
 	struct sk_buff *skb;
 
 	skb = nlmsg_new(xfrm_polexpire_msgsize(xp), GFP_ATOMIC);
@@ -2353,11 +2367,12 @@ static int xfrm_exp_policy_notify(struct xfrm_policy *xp, int dir, struct km_eve
 	if (build_polexpire(skb, xp, dir, c) < 0)
 		BUG();
 
-	return nlmsg_multicast(xfrm_nl, skb, 0, XFRMNLGRP_EXPIRE, GFP_ATOMIC);
+	return nlmsg_multicast(net->xfrm.nlsk, skb, 0, XFRMNLGRP_EXPIRE, GFP_ATOMIC);
 }
 
 static int xfrm_notify_policy(struct xfrm_policy *xp, int dir, struct km_event *c)
 {
+	struct net *net = &init_net;
 	struct xfrm_userpolicy_info *p;
 	struct xfrm_userpolicy_id *id;
 	struct nlmsghdr *nlh;
@@ -2408,7 +2423,7 @@ static int xfrm_notify_policy(struct xfrm_policy *xp, int dir, struct km_event *
 
 	nlmsg_end(skb, nlh);
 
-	return nlmsg_multicast(xfrm_nl, skb, 0, XFRMNLGRP_POLICY, GFP_ATOMIC);
+	return nlmsg_multicast(net->xfrm.nlsk, skb, 0, XFRMNLGRP_POLICY, GFP_ATOMIC);
 
 nlmsg_failure:
 	kfree_skb(skb);
@@ -2417,6 +2432,7 @@ nlmsg_failure:
 
 static int xfrm_notify_policy_flush(struct km_event *c)
 {
+	struct net *net = &init_net;
 	struct nlmsghdr *nlh;
 	struct sk_buff *skb;
 
@@ -2432,7 +2448,7 @@ static int xfrm_notify_policy_flush(struct km_event *c)
 
 	nlmsg_end(skb, nlh);
 
-	return nlmsg_multicast(xfrm_nl, skb, 0, XFRMNLGRP_POLICY, GFP_ATOMIC);
+	return nlmsg_multicast(net->xfrm.nlsk, skb, 0, XFRMNLGRP_POLICY, GFP_ATOMIC);
 
 nlmsg_failure:
 	kfree_skb(skb);
@@ -2491,6 +2507,7 @@ nla_put_failure:
 static int xfrm_send_report(u8 proto, struct xfrm_selector *sel,
 			    xfrm_address_t *addr)
 {
+	struct net *net = &init_net;
 	struct sk_buff *skb;
 
 	skb = nlmsg_new(xfrm_report_msgsize(), GFP_ATOMIC);
@@ -2500,7 +2517,7 @@ static int xfrm_send_report(u8 proto, struct xfrm_selector *sel,
 	if (build_report(skb, proto, sel, addr) < 0)
 		BUG();
 
-	return nlmsg_multicast(xfrm_nl, skb, 0, XFRMNLGRP_REPORT, GFP_ATOMIC);
+	return nlmsg_multicast(net->xfrm.nlsk, skb, 0, XFRMNLGRP_REPORT, GFP_ATOMIC);
 }
 
 static inline size_t xfrm_mapping_msgsize(void)
@@ -2536,6 +2553,7 @@ static int build_mapping(struct sk_buff *skb, struct xfrm_state *x,
 static int xfrm_send_mapping(struct xfrm_state *x, xfrm_address_t *ipaddr,
 			     __be16 sport)
 {
+	struct net *net = xs_net(x);
 	struct sk_buff *skb;
 
 	if (x->id.proto != IPPROTO_ESP)
@@ -2551,7 +2569,7 @@ static int xfrm_send_mapping(struct xfrm_state *x, xfrm_address_t *ipaddr,
 	if (build_mapping(skb, x, ipaddr, sport) < 0)
 		BUG();
 
-	return nlmsg_multicast(xfrm_nl, skb, 0, XFRMNLGRP_MAPPING, GFP_ATOMIC);
+	return nlmsg_multicast(net->xfrm.nlsk, skb, 0, XFRMNLGRP_MAPPING, GFP_ATOMIC);
 }
 
 static struct xfrm_mgr netlink_mgr = {
@@ -2565,33 +2583,53 @@ static struct xfrm_mgr netlink_mgr = {
 	.new_mapping	= xfrm_send_mapping,
 };
 
-static int __init xfrm_user_init(void)
+static int __net_init xfrm_user_net_init(struct net *net)
 {
 	struct sock *nlsk;
 
-	printk(KERN_INFO "Initializing XFRM netlink socket\n");
-
-	nlsk = netlink_kernel_create(&init_net, NETLINK_XFRM, XFRMNLGRP_MAX,
+	nlsk = netlink_kernel_create(net, NETLINK_XFRM, XFRMNLGRP_MAX,
 				     xfrm_netlink_rcv, NULL, THIS_MODULE);
 	if (nlsk == NULL)
 		return -ENOMEM;
-	rcu_assign_pointer(xfrm_nl, nlsk);
-
-	xfrm_register_km(&netlink_mgr);
-
+	rcu_assign_pointer(net->xfrm.nlsk, nlsk);
 	return 0;
 }
 
-static void __exit xfrm_user_exit(void)
+static void __net_exit xfrm_user_net_exit(struct net *net)
 {
-	struct sock *nlsk = xfrm_nl;
+	struct sock *nlsk = net->xfrm.nlsk;
 
-	xfrm_unregister_km(&netlink_mgr);
-	rcu_assign_pointer(xfrm_nl, NULL);
+	rcu_assign_pointer(net->xfrm.nlsk, NULL);
 	synchronize_rcu();
 	netlink_kernel_release(nlsk);
 }
 
+static struct pernet_operations xfrm_user_net_ops = {
+	.init = xfrm_user_net_init,
+	.exit = xfrm_user_net_exit,
+};
+
+static int __init xfrm_user_init(void)
+{
+	int rv;
+
+	printk(KERN_INFO "Initializing XFRM netlink socket\n");
+
+	rv = register_pernet_subsys(&xfrm_user_net_ops);
+	if (rv < 0)
+		return rv;
+	rv = xfrm_register_km(&netlink_mgr);
+	if (rv < 0)
+		unregister_pernet_subsys(&xfrm_user_net_ops);
+	return rv;
+}
+
+static void __exit xfrm_user_exit(void)
+{
+	xfrm_unregister_km(&netlink_mgr);
+	unregister_pernet_subsys(&xfrm_user_net_ops);
+}
+
 module_init(xfrm_user_init);
 module_exit(xfrm_user_exit);
 MODULE_LICENSE("GPL");
-- 
cgit v1.2.3


From 7067802e262457a9737521e5669b622028b2283a Mon Sep 17 00:00:00 2001
From: Alexey Dobriyan <adobriyan@gmail.com>
Date: Tue, 25 Nov 2008 17:50:36 -0800
Subject: netns xfrm: pass netns with KM notifications

SA and SPD flush are executed with NULL SA and SPD respectively, for
these cases pass netns explicitly from userspace socket.

Signed-off-by: Alexey Dobriyan <adobriyan@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/xfrm.h   | 1 +
 net/key/af_key.c     | 2 ++
 net/xfrm/xfrm_user.c | 6 ++++--
 3 files changed, 7 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/net/xfrm.h b/include/net/xfrm.h
index e027179e8199..52e784fa2c50 100644
--- a/include/net/xfrm.h
+++ b/include/net/xfrm.h
@@ -256,6 +256,7 @@ struct km_event
 	u32	seq;
 	u32	pid;
 	u32	event;
+	struct net *net;
 };
 
 struct net_device;
diff --git a/net/key/af_key.c b/net/key/af_key.c
index a0d849848ddd..ea7755ab7e6a 100644
--- a/net/key/af_key.c
+++ b/net/key/af_key.c
@@ -1739,6 +1739,7 @@ static int pfkey_flush(struct sock *sk, struct sk_buff *skb, struct sadb_msg *hd
 	c.seq = hdr->sadb_msg_seq;
 	c.pid = hdr->sadb_msg_pid;
 	c.event = XFRM_MSG_FLUSHSA;
+	c.net = &init_net;
 	km_state_notify(NULL, &c);
 
 	return 0;
@@ -2693,6 +2694,7 @@ static int pfkey_spdflush(struct sock *sk, struct sk_buff *skb, struct sadb_msg
 	c.event = XFRM_MSG_FLUSHPOLICY;
 	c.pid = hdr->sadb_msg_pid;
 	c.seq = hdr->sadb_msg_seq;
+	c.net = &init_net;
 	km_policy_notify(NULL, 0, &c);
 
 	return 0;
diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c
index ab8b138e5e2f..3e32ec2ea1ad 100644
--- a/net/xfrm/xfrm_user.c
+++ b/net/xfrm/xfrm_user.c
@@ -1418,6 +1418,7 @@ static int xfrm_flush_sa(struct sk_buff *skb, struct nlmsghdr *nlh,
 	c.event = nlh->nlmsg_type;
 	c.seq = nlh->nlmsg_seq;
 	c.pid = nlh->nlmsg_pid;
+	c.net = net;
 	km_state_notify(NULL, &c);
 
 	return 0;
@@ -1569,6 +1570,7 @@ static int xfrm_flush_policy(struct sk_buff *skb, struct nlmsghdr *nlh,
 	c.event = nlh->nlmsg_type;
 	c.seq = nlh->nlmsg_seq;
 	c.pid = nlh->nlmsg_pid;
+	c.net = net;
 	km_policy_notify(NULL, 0, &c);
 	return 0;
 }
@@ -2084,7 +2086,7 @@ static int xfrm_aevent_state_notify(struct xfrm_state *x, struct km_event *c)
 
 static int xfrm_notify_sa_flush(struct km_event *c)
 {
-	struct net *net = &init_net;
+	struct net *net = c->net;
 	struct xfrm_usersa_flush *p;
 	struct nlmsghdr *nlh;
 	struct sk_buff *skb;
@@ -2446,7 +2448,7 @@ nlmsg_failure:
 
 static int xfrm_notify_policy_flush(struct km_event *c)
 {
-	struct net *net = &init_net;
+	struct net *net = c->net;
 	struct nlmsghdr *nlh;
 	struct sk_buff *skb;
 
-- 
cgit v1.2.3


From db983c1144884cab10d6397532f4bf05eb0c01d2 Mon Sep 17 00:00:00 2001
From: Alexey Dobriyan <adobriyan@gmail.com>
Date: Tue, 25 Nov 2008 17:51:01 -0800
Subject: netns xfrm: KM reporting in netns

Signed-off-by: Alexey Dobriyan <adobriyan@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/xfrm.h    | 4 ++--
 net/ipv6/mip6.c       | 3 ++-
 net/xfrm/xfrm_state.c | 4 ++--
 net/xfrm/xfrm_user.c  | 5 ++---
 4 files changed, 8 insertions(+), 8 deletions(-)

(limited to 'include')

diff --git a/include/net/xfrm.h b/include/net/xfrm.h
index 52e784fa2c50..f3ea1607c595 100644
--- a/include/net/xfrm.h
+++ b/include/net/xfrm.h
@@ -552,7 +552,7 @@ struct xfrm_mgr
 	struct xfrm_policy	*(*compile_policy)(struct sock *sk, int opt, u8 *data, int len, int *dir);
 	int			(*new_mapping)(struct xfrm_state *x, xfrm_address_t *ipaddr, __be16 sport);
 	int			(*notify_policy)(struct xfrm_policy *x, int dir, struct km_event *c);
-	int			(*report)(u8 proto, struct xfrm_selector *sel, xfrm_address_t *addr);
+	int			(*report)(struct net *net, u8 proto, struct xfrm_selector *sel, xfrm_address_t *addr);
 	int			(*migrate)(struct xfrm_selector *sel, u8 dir, u8 type, struct xfrm_migrate *m, int num_bundles, struct xfrm_kmaddress *k);
 };
 
@@ -1471,7 +1471,7 @@ extern int xfrm_migrate(struct xfrm_selector *sel, u8 dir, u8 type,
 
 extern int km_new_mapping(struct xfrm_state *x, xfrm_address_t *ipaddr, __be16 sport);
 extern void km_policy_expired(struct xfrm_policy *pol, int dir, int hard, u32 pid);
-extern int km_report(u8 proto, struct xfrm_selector *sel, xfrm_address_t *addr);
+extern int km_report(struct net *net, u8 proto, struct xfrm_selector *sel, xfrm_address_t *addr);
 
 extern void xfrm_input_init(void);
 extern int xfrm_parse_spi(struct sk_buff *skb, u8 nexthdr, __be32 *spi, __be32 *seq);
diff --git a/net/ipv6/mip6.c b/net/ipv6/mip6.c
index 31295c8f6196..f995e19c87a9 100644
--- a/net/ipv6/mip6.c
+++ b/net/ipv6/mip6.c
@@ -205,6 +205,7 @@ static inline int mip6_report_rl_allow(struct timeval *stamp,
 
 static int mip6_destopt_reject(struct xfrm_state *x, struct sk_buff *skb, struct flowi *fl)
 {
+	struct net *net = xs_net(x);
 	struct inet6_skb_parm *opt = (struct inet6_skb_parm *)skb->cb;
 	struct ipv6_destopt_hao *hao = NULL;
 	struct xfrm_selector sel;
@@ -247,7 +248,7 @@ static int mip6_destopt_reject(struct xfrm_state *x, struct sk_buff *skb, struct
 		sel.sport_mask = htons(~0);
 	sel.ifindex = fl->oif;
 
-	err = km_report(IPPROTO_DSTOPTS, &sel,
+	err = km_report(net, IPPROTO_DSTOPTS, &sel,
 			(hao ? (xfrm_address_t *)&hao->addr : NULL));
 
  out:
diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c
index 21db37ab0a2f..d594b5af5f6b 100644
--- a/net/xfrm/xfrm_state.c
+++ b/net/xfrm/xfrm_state.c
@@ -1833,7 +1833,7 @@ int km_migrate(struct xfrm_selector *sel, u8 dir, u8 type,
 EXPORT_SYMBOL(km_migrate);
 #endif
 
-int km_report(u8 proto, struct xfrm_selector *sel, xfrm_address_t *addr)
+int km_report(struct net *net, u8 proto, struct xfrm_selector *sel, xfrm_address_t *addr)
 {
 	int err = -EINVAL;
 	int ret;
@@ -1842,7 +1842,7 @@ int km_report(u8 proto, struct xfrm_selector *sel, xfrm_address_t *addr)
 	read_lock(&xfrm_km_lock);
 	list_for_each_entry(km, &xfrm_km_list, list) {
 		if (km->report) {
-			ret = km->report(proto, sel, addr);
+			ret = km->report(net, proto, sel, addr);
 			if (!ret)
 				err = ret;
 		}
diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c
index 3e32ec2ea1ad..b7240d5b77ad 100644
--- a/net/xfrm/xfrm_user.c
+++ b/net/xfrm/xfrm_user.c
@@ -2520,10 +2520,9 @@ nla_put_failure:
 	return -EMSGSIZE;
 }
 
-static int xfrm_send_report(u8 proto, struct xfrm_selector *sel,
-			    xfrm_address_t *addr)
+static int xfrm_send_report(struct net *net, u8 proto,
+			    struct xfrm_selector *sel, xfrm_address_t *addr)
 {
-	struct net *net = &init_net;
 	struct sk_buff *skb;
 
 	skb = nlmsg_new(xfrm_report_msgsize(), GFP_ATOMIC);
-- 
cgit v1.2.3


From c5b3cf46eabe6e7459125fc6e2033b4222665017 Mon Sep 17 00:00:00 2001
From: Alexey Dobriyan <adobriyan@gmail.com>
Date: Tue, 25 Nov 2008 17:51:25 -0800
Subject: netns xfrm: ->dst_lookup in netns

Signed-off-by: Alexey Dobriyan <adobriyan@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/xfrm.h      | 3 ++-
 net/ipv4/xfrm4_policy.c | 7 ++++---
 net/ipv6/xfrm6_policy.c | 7 ++++---
 net/xfrm/xfrm_policy.c  | 7 ++++---
 4 files changed, 14 insertions(+), 10 deletions(-)

(limited to 'include')

diff --git a/include/net/xfrm.h b/include/net/xfrm.h
index f3ea1607c595..b16d4c0b16e0 100644
--- a/include/net/xfrm.h
+++ b/include/net/xfrm.h
@@ -266,7 +266,8 @@ struct xfrm_policy_afinfo {
 	unsigned short		family;
 	struct dst_ops		*dst_ops;
 	void			(*garbage_collect)(struct net *net);
-	struct dst_entry	*(*dst_lookup)(int tos, xfrm_address_t *saddr,
+	struct dst_entry	*(*dst_lookup)(struct net *net, int tos,
+					       xfrm_address_t *saddr,
 					       xfrm_address_t *daddr);
 	int			(*get_saddr)(xfrm_address_t *saddr, xfrm_address_t *daddr);
 	struct dst_entry	*(*find_bundle)(struct flowi *fl, struct xfrm_policy *policy);
diff --git a/net/ipv4/xfrm4_policy.c b/net/ipv4/xfrm4_policy.c
index a881ca38dddb..e1cf9ed07d49 100644
--- a/net/ipv4/xfrm4_policy.c
+++ b/net/ipv4/xfrm4_policy.c
@@ -18,7 +18,8 @@
 static struct dst_ops xfrm4_dst_ops;
 static struct xfrm_policy_afinfo xfrm4_policy_afinfo;
 
-static struct dst_entry *xfrm4_dst_lookup(int tos, xfrm_address_t *saddr,
+static struct dst_entry *xfrm4_dst_lookup(struct net *net, int tos,
+					  xfrm_address_t *saddr,
 					  xfrm_address_t *daddr)
 {
 	struct flowi fl = {
@@ -36,7 +37,7 @@ static struct dst_entry *xfrm4_dst_lookup(int tos, xfrm_address_t *saddr,
 	if (saddr)
 		fl.fl4_src = saddr->a4;
 
-	err = __ip_route_output_key(&init_net, &rt, &fl);
+	err = __ip_route_output_key(net, &rt, &fl);
 	dst = &rt->u.dst;
 	if (err)
 		dst = ERR_PTR(err);
@@ -48,7 +49,7 @@ static int xfrm4_get_saddr(xfrm_address_t *saddr, xfrm_address_t *daddr)
 	struct dst_entry *dst;
 	struct rtable *rt;
 
-	dst = xfrm4_dst_lookup(0, NULL, daddr);
+	dst = xfrm4_dst_lookup(&init_net, 0, NULL, daddr);
 	if (IS_ERR(dst))
 		return -EHOSTUNREACH;
 
diff --git a/net/ipv6/xfrm6_policy.c b/net/ipv6/xfrm6_policy.c
index 2df8a78fc3df..d7a5b8bc3770 100644
--- a/net/ipv6/xfrm6_policy.c
+++ b/net/ipv6/xfrm6_policy.c
@@ -27,7 +27,8 @@
 static struct dst_ops xfrm6_dst_ops;
 static struct xfrm_policy_afinfo xfrm6_policy_afinfo;
 
-static struct dst_entry *xfrm6_dst_lookup(int tos, xfrm_address_t *saddr,
+static struct dst_entry *xfrm6_dst_lookup(struct net *net, int tos,
+					  xfrm_address_t *saddr,
 					  xfrm_address_t *daddr)
 {
 	struct flowi fl = {};
@@ -38,7 +39,7 @@ static struct dst_entry *xfrm6_dst_lookup(int tos, xfrm_address_t *saddr,
 	if (saddr)
 		memcpy(&fl.fl6_src, saddr, sizeof(fl.fl6_src));
 
-	dst = ip6_route_output(&init_net, NULL, &fl);
+	dst = ip6_route_output(net, NULL, &fl);
 
 	err = dst->error;
 	if (dst->error) {
@@ -54,7 +55,7 @@ static int xfrm6_get_saddr(xfrm_address_t *saddr, xfrm_address_t *daddr)
 	struct dst_entry *dst;
 	struct net_device *dev;
 
-	dst = xfrm6_dst_lookup(0, NULL, daddr);
+	dst = xfrm6_dst_lookup(&init_net, 0, NULL, daddr);
 	if (IS_ERR(dst))
 		return -EHOSTUNREACH;
 
diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c
index 7ebbdd63fca0..2b0a80b6259b 100644
--- a/net/xfrm/xfrm_policy.c
+++ b/net/xfrm/xfrm_policy.c
@@ -92,7 +92,7 @@ int xfrm_selector_match(struct xfrm_selector *sel, struct flowi *fl,
 	return 0;
 }
 
-static inline struct dst_entry *__xfrm_dst_lookup(int tos,
+static inline struct dst_entry *__xfrm_dst_lookup(struct net *net, int tos,
 						  xfrm_address_t *saddr,
 						  xfrm_address_t *daddr,
 						  int family)
@@ -104,7 +104,7 @@ static inline struct dst_entry *__xfrm_dst_lookup(int tos,
 	if (unlikely(afinfo == NULL))
 		return ERR_PTR(-EAFNOSUPPORT);
 
-	dst = afinfo->dst_lookup(tos, saddr, daddr);
+	dst = afinfo->dst_lookup(net, tos, saddr, daddr);
 
 	xfrm_policy_put_afinfo(afinfo);
 
@@ -116,6 +116,7 @@ static inline struct dst_entry *xfrm_dst_lookup(struct xfrm_state *x, int tos,
 						xfrm_address_t *prev_daddr,
 						int family)
 {
+	struct net *net = xs_net(x);
 	xfrm_address_t *saddr = &x->props.saddr;
 	xfrm_address_t *daddr = &x->id.daddr;
 	struct dst_entry *dst;
@@ -129,7 +130,7 @@ static inline struct dst_entry *xfrm_dst_lookup(struct xfrm_state *x, int tos,
 		daddr = x->coaddr;
 	}
 
-	dst = __xfrm_dst_lookup(tos, saddr, daddr, family);
+	dst = __xfrm_dst_lookup(net, tos, saddr, daddr, family);
 
 	if (!IS_ERR(dst)) {
 		if (prev_saddr != saddr)
-- 
cgit v1.2.3


From fbda33b2b85941c1ae3a0d89522dec5c1b1bd98c Mon Sep 17 00:00:00 2001
From: Alexey Dobriyan <adobriyan@gmail.com>
Date: Tue, 25 Nov 2008 17:56:49 -0800
Subject: netns xfrm: ->get_saddr in netns

Signed-off-by: Alexey Dobriyan <adobriyan@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/xfrm.h      | 2 +-
 net/ipv4/xfrm4_policy.c | 5 +++--
 net/ipv6/xfrm6_policy.c | 5 +++--
 net/xfrm/xfrm_policy.c  | 7 ++++---
 4 files changed, 11 insertions(+), 8 deletions(-)

(limited to 'include')

diff --git a/include/net/xfrm.h b/include/net/xfrm.h
index b16d4c0b16e0..d076f3d34278 100644
--- a/include/net/xfrm.h
+++ b/include/net/xfrm.h
@@ -269,7 +269,7 @@ struct xfrm_policy_afinfo {
 	struct dst_entry	*(*dst_lookup)(struct net *net, int tos,
 					       xfrm_address_t *saddr,
 					       xfrm_address_t *daddr);
-	int			(*get_saddr)(xfrm_address_t *saddr, xfrm_address_t *daddr);
+	int			(*get_saddr)(struct net *net, xfrm_address_t *saddr, xfrm_address_t *daddr);
 	struct dst_entry	*(*find_bundle)(struct flowi *fl, struct xfrm_policy *policy);
 	void			(*decode_session)(struct sk_buff *skb,
 						  struct flowi *fl,
diff --git a/net/ipv4/xfrm4_policy.c b/net/ipv4/xfrm4_policy.c
index e1cf9ed07d49..2ad24ba31f9d 100644
--- a/net/ipv4/xfrm4_policy.c
+++ b/net/ipv4/xfrm4_policy.c
@@ -44,12 +44,13 @@ static struct dst_entry *xfrm4_dst_lookup(struct net *net, int tos,
 	return dst;
 }
 
-static int xfrm4_get_saddr(xfrm_address_t *saddr, xfrm_address_t *daddr)
+static int xfrm4_get_saddr(struct net *net,
+			   xfrm_address_t *saddr, xfrm_address_t *daddr)
 {
 	struct dst_entry *dst;
 	struct rtable *rt;
 
-	dst = xfrm4_dst_lookup(&init_net, 0, NULL, daddr);
+	dst = xfrm4_dst_lookup(net, 0, NULL, daddr);
 	if (IS_ERR(dst))
 		return -EHOSTUNREACH;
 
diff --git a/net/ipv6/xfrm6_policy.c b/net/ipv6/xfrm6_policy.c
index d7a5b8bc3770..97ab068e8ccc 100644
--- a/net/ipv6/xfrm6_policy.c
+++ b/net/ipv6/xfrm6_policy.c
@@ -50,12 +50,13 @@ static struct dst_entry *xfrm6_dst_lookup(struct net *net, int tos,
 	return dst;
 }
 
-static int xfrm6_get_saddr(xfrm_address_t *saddr, xfrm_address_t *daddr)
+static int xfrm6_get_saddr(struct net *net,
+			   xfrm_address_t *saddr, xfrm_address_t *daddr)
 {
 	struct dst_entry *dst;
 	struct net_device *dev;
 
-	dst = xfrm6_dst_lookup(&init_net, 0, NULL, daddr);
+	dst = xfrm6_dst_lookup(net, 0, NULL, daddr);
 	if (IS_ERR(dst))
 		return -EHOSTUNREACH;
 
diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c
index 2b0a80b6259b..7c7bb54f2265 100644
--- a/net/xfrm/xfrm_policy.c
+++ b/net/xfrm/xfrm_policy.c
@@ -1187,7 +1187,7 @@ int __xfrm_sk_clone_policy(struct sock *sk)
 }
 
 static int
-xfrm_get_saddr(xfrm_address_t *local, xfrm_address_t *remote,
+xfrm_get_saddr(struct net *net, xfrm_address_t *local, xfrm_address_t *remote,
 	       unsigned short family)
 {
 	int err;
@@ -1195,7 +1195,7 @@ xfrm_get_saddr(xfrm_address_t *local, xfrm_address_t *remote,
 
 	if (unlikely(afinfo == NULL))
 		return -EINVAL;
-	err = afinfo->get_saddr(local, remote);
+	err = afinfo->get_saddr(net, local, remote);
 	xfrm_policy_put_afinfo(afinfo);
 	return err;
 }
@@ -1207,6 +1207,7 @@ xfrm_tmpl_resolve_one(struct xfrm_policy *policy, struct flowi *fl,
 		      struct xfrm_state **xfrm,
 		      unsigned short family)
 {
+	struct net *net = xp_net(policy);
 	int nx;
 	int i, error;
 	xfrm_address_t *daddr = xfrm_flowi_daddr(fl, family);
@@ -1225,7 +1226,7 @@ xfrm_tmpl_resolve_one(struct xfrm_policy *policy, struct flowi *fl,
 			local = &tmpl->saddr;
 			family = tmpl->encap_family;
 			if (xfrm_addr_any(local, family)) {
-				error = xfrm_get_saddr(&tmp, remote, family);
+				error = xfrm_get_saddr(net, &tmp, remote, family);
 				if (error)
 					goto fail;
 				local = &tmp;
-- 
cgit v1.2.3


From 59c9940ed0ef026673cac52f2eaed77af7d486da Mon Sep 17 00:00:00 2001
From: Alexey Dobriyan <adobriyan@gmail.com>
Date: Tue, 25 Nov 2008 17:59:52 -0800
Subject: netns xfrm: per-netns MIBs

Signed-off-by: Alexey Dobriyan <adobriyan@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/netns/mib.h |  3 ++
 include/net/xfrm.h      | 13 ++++----
 net/ipv6/xfrm6_input.c  |  9 +++---
 net/xfrm/xfrm_input.c   | 22 ++++++-------
 net/xfrm/xfrm_output.c  | 15 ++++-----
 net/xfrm/xfrm_policy.c  | 82 ++++++++++++++++++++++++++++---------------------
 net/xfrm/xfrm_proc.c    |  2 +-
 7 files changed, 81 insertions(+), 65 deletions(-)

(limited to 'include')

diff --git a/include/net/netns/mib.h b/include/net/netns/mib.h
index 10cb7c336de5..0b44112e2366 100644
--- a/include/net/netns/mib.h
+++ b/include/net/netns/mib.h
@@ -20,6 +20,9 @@ struct netns_mib {
 	DEFINE_SNMP_STAT(struct icmpv6_mib, icmpv6_statistics);
 	DEFINE_SNMP_STAT(struct icmpv6msg_mib, icmpv6msg_statistics);
 #endif
+#ifdef CONFIG_XFRM_STATISTICS
+	DEFINE_SNMP_STAT(struct linux_xfrm_mib, xfrm_statistics);
+#endif
 };
 
 #endif
diff --git a/include/net/xfrm.h b/include/net/xfrm.h
index d076f3d34278..78ec3e8a95ed 100644
--- a/include/net/xfrm.h
+++ b/include/net/xfrm.h
@@ -38,14 +38,13 @@
 	MODULE_ALIAS("xfrm-type-" __stringify(family) "-" __stringify(proto))
 
 #ifdef CONFIG_XFRM_STATISTICS
-DECLARE_SNMP_STAT(struct linux_xfrm_mib, xfrm_statistics);
-#define XFRM_INC_STATS(field)		SNMP_INC_STATS(xfrm_statistics, field)
-#define XFRM_INC_STATS_BH(field)	SNMP_INC_STATS_BH(xfrm_statistics, field)
-#define XFRM_INC_STATS_USER(field) 	SNMP_INC_STATS_USER(xfrm_statistics, field)
+#define XFRM_INC_STATS(net, field)	SNMP_INC_STATS((net)->mib.xfrm_statistics, field)
+#define XFRM_INC_STATS_BH(net, field)	SNMP_INC_STATS_BH((net)->mib.xfrm_statistics, field)
+#define XFRM_INC_STATS_USER(net, field)	SNMP_INC_STATS_USER((net)-mib.xfrm_statistics, field)
 #else
-#define XFRM_INC_STATS(field)
-#define XFRM_INC_STATS_BH(field)
-#define XFRM_INC_STATS_USER(field)
+#define XFRM_INC_STATS(net, field)	((void)(net))
+#define XFRM_INC_STATS_BH(net, field)	((void)(net))
+#define XFRM_INC_STATS_USER(net, field)	((void)(net))
 #endif
 
 extern u32 sysctl_xfrm_aevent_etime;
diff --git a/net/ipv6/xfrm6_input.c b/net/ipv6/xfrm6_input.c
index b69766a77743..9084582d236b 100644
--- a/net/ipv6/xfrm6_input.c
+++ b/net/ipv6/xfrm6_input.c
@@ -58,6 +58,7 @@ EXPORT_SYMBOL(xfrm6_rcv);
 int xfrm6_input_addr(struct sk_buff *skb, xfrm_address_t *daddr,
 		     xfrm_address_t *saddr, u8 proto)
 {
+	struct net *net = dev_net(skb->dev);
 	struct xfrm_state *x = NULL;
 	int i = 0;
 
@@ -67,7 +68,7 @@ int xfrm6_input_addr(struct sk_buff *skb, xfrm_address_t *daddr,
 
 		sp = secpath_dup(skb->sp);
 		if (!sp) {
-			XFRM_INC_STATS(LINUX_MIB_XFRMINERROR);
+			XFRM_INC_STATS(net, LINUX_MIB_XFRMINERROR);
 			goto drop;
 		}
 		if (skb->sp)
@@ -76,7 +77,7 @@ int xfrm6_input_addr(struct sk_buff *skb, xfrm_address_t *daddr,
 	}
 
 	if (1 + skb->sp->len == XFRM_MAX_DEPTH) {
-		XFRM_INC_STATS(LINUX_MIB_XFRMINBUFFERERROR);
+		XFRM_INC_STATS(net, LINUX_MIB_XFRMINBUFFERERROR);
 		goto drop;
 	}
 
@@ -100,7 +101,7 @@ int xfrm6_input_addr(struct sk_buff *skb, xfrm_address_t *daddr,
 			break;
 		}
 
-		x = xfrm_state_lookup_byaddr(&init_net, dst, src, proto, AF_INET6);
+		x = xfrm_state_lookup_byaddr(net, dst, src, proto, AF_INET6);
 		if (!x)
 			continue;
 
@@ -122,7 +123,7 @@ int xfrm6_input_addr(struct sk_buff *skb, xfrm_address_t *daddr,
 	}
 
 	if (!x) {
-		XFRM_INC_STATS(LINUX_MIB_XFRMINNOSTATES);
+		XFRM_INC_STATS(net, LINUX_MIB_XFRMINNOSTATES);
 		xfrm_audit_state_notfound_simple(skb, AF_INET6);
 		goto drop;
 	}
diff --git a/net/xfrm/xfrm_input.c b/net/xfrm/xfrm_input.c
index a714dce03dc4..b4a13178fb40 100644
--- a/net/xfrm/xfrm_input.c
+++ b/net/xfrm/xfrm_input.c
@@ -128,7 +128,7 @@ int xfrm_input(struct sk_buff *skb, int nexthdr, __be32 spi, int encap_type)
 
 		sp = secpath_dup(skb->sp);
 		if (!sp) {
-			XFRM_INC_STATS(LINUX_MIB_XFRMINERROR);
+			XFRM_INC_STATS(net, LINUX_MIB_XFRMINERROR);
 			goto drop;
 		}
 		if (skb->sp)
@@ -142,19 +142,19 @@ int xfrm_input(struct sk_buff *skb, int nexthdr, __be32 spi, int encap_type)
 
 	seq = 0;
 	if (!spi && (err = xfrm_parse_spi(skb, nexthdr, &spi, &seq)) != 0) {
-		XFRM_INC_STATS(LINUX_MIB_XFRMINHDRERROR);
+		XFRM_INC_STATS(net, LINUX_MIB_XFRMINHDRERROR);
 		goto drop;
 	}
 
 	do {
 		if (skb->sp->len == XFRM_MAX_DEPTH) {
-			XFRM_INC_STATS(LINUX_MIB_XFRMINBUFFERERROR);
+			XFRM_INC_STATS(net, LINUX_MIB_XFRMINBUFFERERROR);
 			goto drop;
 		}
 
 		x = xfrm_state_lookup(net, daddr, spi, nexthdr, family);
 		if (x == NULL) {
-			XFRM_INC_STATS(LINUX_MIB_XFRMINNOSTATES);
+			XFRM_INC_STATS(net, LINUX_MIB_XFRMINNOSTATES);
 			xfrm_audit_state_notfound(skb, family, spi, seq);
 			goto drop;
 		}
@@ -163,22 +163,22 @@ int xfrm_input(struct sk_buff *skb, int nexthdr, __be32 spi, int encap_type)
 
 		spin_lock(&x->lock);
 		if (unlikely(x->km.state != XFRM_STATE_VALID)) {
-			XFRM_INC_STATS(LINUX_MIB_XFRMINSTATEINVALID);
+			XFRM_INC_STATS(net, LINUX_MIB_XFRMINSTATEINVALID);
 			goto drop_unlock;
 		}
 
 		if ((x->encap ? x->encap->encap_type : 0) != encap_type) {
-			XFRM_INC_STATS(LINUX_MIB_XFRMINSTATEMISMATCH);
+			XFRM_INC_STATS(net, LINUX_MIB_XFRMINSTATEMISMATCH);
 			goto drop_unlock;
 		}
 
 		if (x->props.replay_window && xfrm_replay_check(x, skb, seq)) {
-			XFRM_INC_STATS(LINUX_MIB_XFRMINSTATESEQERROR);
+			XFRM_INC_STATS(net, LINUX_MIB_XFRMINSTATESEQERROR);
 			goto drop_unlock;
 		}
 
 		if (xfrm_state_check_expire(x)) {
-			XFRM_INC_STATS(LINUX_MIB_XFRMINSTATEEXPIRED);
+			XFRM_INC_STATS(net, LINUX_MIB_XFRMINSTATEEXPIRED);
 			goto drop_unlock;
 		}
 
@@ -199,7 +199,7 @@ resume:
 							 x->type->proto);
 				x->stats.integrity_failed++;
 			}
-			XFRM_INC_STATS(LINUX_MIB_XFRMINSTATEPROTOERROR);
+			XFRM_INC_STATS(net, LINUX_MIB_XFRMINSTATEPROTOERROR);
 			goto drop_unlock;
 		}
 
@@ -225,7 +225,7 @@ resume:
 		}
 
 		if (inner_mode->input(x, skb)) {
-			XFRM_INC_STATS(LINUX_MIB_XFRMINSTATEMODEERROR);
+			XFRM_INC_STATS(net, LINUX_MIB_XFRMINSTATEMODEERROR);
 			goto drop;
 		}
 
@@ -243,7 +243,7 @@ resume:
 
 		err = xfrm_parse_spi(skb, nexthdr, &spi, &seq);
 		if (err < 0) {
-			XFRM_INC_STATS(LINUX_MIB_XFRMINHDRERROR);
+			XFRM_INC_STATS(net, LINUX_MIB_XFRMINHDRERROR);
 			goto drop;
 		}
 	} while (!err);
diff --git a/net/xfrm/xfrm_output.c b/net/xfrm/xfrm_output.c
index ba90e5e50ffc..c235597ba8dd 100644
--- a/net/xfrm/xfrm_output.c
+++ b/net/xfrm/xfrm_output.c
@@ -49,27 +49,27 @@ static int xfrm_output_one(struct sk_buff *skb, int err)
 	do {
 		err = xfrm_state_check_space(x, skb);
 		if (err) {
-			XFRM_INC_STATS(LINUX_MIB_XFRMOUTERROR);
+			XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTERROR);
 			goto error_nolock;
 		}
 
 		err = x->outer_mode->output(x, skb);
 		if (err) {
-			XFRM_INC_STATS(LINUX_MIB_XFRMOUTSTATEMODEERROR);
+			XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTSTATEMODEERROR);
 			goto error_nolock;
 		}
 
 		spin_lock_bh(&x->lock);
 		err = xfrm_state_check_expire(x);
 		if (err) {
-			XFRM_INC_STATS(LINUX_MIB_XFRMOUTSTATEEXPIRED);
+			XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTSTATEEXPIRED);
 			goto error;
 		}
 
 		if (x->type->flags & XFRM_TYPE_REPLAY_PROT) {
 			XFRM_SKB_CB(skb)->seq.output = ++x->replay.oseq;
 			if (unlikely(x->replay.oseq == 0)) {
-				XFRM_INC_STATS(LINUX_MIB_XFRMOUTSTATESEQERROR);
+				XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTSTATESEQERROR);
 				x->replay.oseq--;
 				xfrm_audit_state_replay_overflow(x, skb);
 				err = -EOVERFLOW;
@@ -90,12 +90,12 @@ static int xfrm_output_one(struct sk_buff *skb, int err)
 
 resume:
 		if (err) {
-			XFRM_INC_STATS(LINUX_MIB_XFRMOUTSTATEPROTOERROR);
+			XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTSTATEPROTOERROR);
 			goto error_nolock;
 		}
 
 		if (!(skb->dst = dst_pop(dst))) {
-			XFRM_INC_STATS(LINUX_MIB_XFRMOUTERROR);
+			XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTERROR);
 			err = -EHOSTUNREACH;
 			goto error_nolock;
 		}
@@ -179,6 +179,7 @@ static int xfrm_output_gso(struct sk_buff *skb)
 
 int xfrm_output(struct sk_buff *skb)
 {
+	struct net *net = dev_net(skb->dst->dev);
 	int err;
 
 	if (skb_is_gso(skb))
@@ -187,7 +188,7 @@ int xfrm_output(struct sk_buff *skb)
 	if (skb->ip_summed == CHECKSUM_PARTIAL) {
 		err = skb_checksum_help(skb);
 		if (err) {
-			XFRM_INC_STATS(LINUX_MIB_XFRMOUTERROR);
+			XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTERROR);
 			kfree_skb(skb);
 			return err;
 		}
diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c
index fcf8c928285a..e239a25e571c 100644
--- a/net/xfrm/xfrm_policy.c
+++ b/net/xfrm/xfrm_policy.c
@@ -36,11 +36,6 @@
 
 int sysctl_xfrm_larval_drop __read_mostly = 1;
 
-#ifdef CONFIG_XFRM_STATISTICS
-DEFINE_SNMP_STAT(struct linux_xfrm_mib, xfrm_statistics) __read_mostly;
-EXPORT_SYMBOL(xfrm_statistics);
-#endif
-
 DEFINE_MUTEX(xfrm_cfg_mutex);
 EXPORT_SYMBOL(xfrm_cfg_mutex);
 
@@ -1570,7 +1565,7 @@ restart:
 		policy = xfrm_sk_policy_lookup(sk, XFRM_POLICY_OUT, fl);
 		err = PTR_ERR(policy);
 		if (IS_ERR(policy)) {
-			XFRM_INC_STATS(LINUX_MIB_XFRMOUTPOLERROR);
+			XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTPOLERROR);
 			goto dropdst;
 		}
 	}
@@ -1585,7 +1580,7 @@ restart:
 					   dir, xfrm_policy_lookup);
 		err = PTR_ERR(policy);
 		if (IS_ERR(policy)) {
-			XFRM_INC_STATS(LINUX_MIB_XFRMOUTPOLERROR);
+			XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTPOLERROR);
 			goto dropdst;
 		}
 	}
@@ -1608,7 +1603,7 @@ restart:
 	default:
 	case XFRM_POLICY_BLOCK:
 		/* Prohibit the flow */
-		XFRM_INC_STATS(LINUX_MIB_XFRMOUTPOLBLOCK);
+		XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTPOLBLOCK);
 		err = -EPERM;
 		goto error;
 
@@ -1628,7 +1623,7 @@ restart:
 		 */
 		dst = xfrm_find_bundle(fl, policy, family);
 		if (IS_ERR(dst)) {
-			XFRM_INC_STATS(LINUX_MIB_XFRMOUTBUNDLECHECKERROR);
+			XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTBUNDLECHECKERROR);
 			err = PTR_ERR(dst);
 			goto error;
 		}
@@ -1644,12 +1639,12 @@ restart:
 							    XFRM_POLICY_OUT);
 			if (pols[1]) {
 				if (IS_ERR(pols[1])) {
-					XFRM_INC_STATS(LINUX_MIB_XFRMOUTPOLERROR);
+					XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTPOLERROR);
 					err = PTR_ERR(pols[1]);
 					goto error;
 				}
 				if (pols[1]->action == XFRM_POLICY_BLOCK) {
-					XFRM_INC_STATS(LINUX_MIB_XFRMOUTPOLBLOCK);
+					XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTPOLBLOCK);
 					err = -EPERM;
 					goto error;
 				}
@@ -1680,7 +1675,7 @@ restart:
 				/* EREMOTE tells the caller to generate
 				 * a one-shot blackhole route.
 				 */
-				XFRM_INC_STATS(LINUX_MIB_XFRMOUTNOSTATES);
+				XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTNOSTATES);
 				xfrm_pol_put(policy);
 				return -EREMOTE;
 			}
@@ -1696,7 +1691,7 @@ restart:
 				nx = xfrm_tmpl_resolve(pols, npols, fl, xfrm, family);
 
 				if (nx == -EAGAIN && signal_pending(current)) {
-					XFRM_INC_STATS(LINUX_MIB_XFRMOUTNOSTATES);
+					XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTNOSTATES);
 					err = -ERESTART;
 					goto error;
 				}
@@ -1708,7 +1703,7 @@ restart:
 				err = nx;
 			}
 			if (err < 0) {
-				XFRM_INC_STATS(LINUX_MIB_XFRMOUTNOSTATES);
+				XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTNOSTATES);
 				goto error;
 			}
 		}
@@ -1721,7 +1716,7 @@ restart:
 		dst = xfrm_bundle_create(policy, xfrm, nx, fl, dst_orig);
 		err = PTR_ERR(dst);
 		if (IS_ERR(dst)) {
-			XFRM_INC_STATS(LINUX_MIB_XFRMOUTBUNDLEGENERROR);
+			XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTBUNDLEGENERROR);
 			goto error;
 		}
 
@@ -1742,9 +1737,9 @@ restart:
 			dst_free(dst);
 
 			if (pol_dead)
-				XFRM_INC_STATS(LINUX_MIB_XFRMOUTPOLDEAD);
+				XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTPOLDEAD);
 			else
-				XFRM_INC_STATS(LINUX_MIB_XFRMOUTBUNDLECHECKERROR);
+				XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTBUNDLECHECKERROR);
 			err = -EHOSTUNREACH;
 			goto error;
 		}
@@ -1756,7 +1751,7 @@ restart:
 		if (unlikely(err)) {
 			write_unlock_bh(&policy->lock);
 			dst_free(dst);
-			XFRM_INC_STATS(LINUX_MIB_XFRMOUTBUNDLECHECKERROR);
+			XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTBUNDLECHECKERROR);
 			goto error;
 		}
 
@@ -1912,7 +1907,7 @@ int __xfrm_policy_check(struct sock *sk, int dir, struct sk_buff *skb,
 	fl_dir = policy_to_flow_dir(dir);
 
 	if (__xfrm_decode_session(skb, &fl, family, reverse) < 0) {
-		XFRM_INC_STATS(LINUX_MIB_XFRMINHDRERROR);
+		XFRM_INC_STATS(net, LINUX_MIB_XFRMINHDRERROR);
 		return 0;
 	}
 
@@ -1925,7 +1920,7 @@ int __xfrm_policy_check(struct sock *sk, int dir, struct sk_buff *skb,
 		for (i=skb->sp->len-1; i>=0; i--) {
 			struct xfrm_state *x = skb->sp->xvec[i];
 			if (!xfrm_selector_match(&x->sel, &fl, family)) {
-				XFRM_INC_STATS(LINUX_MIB_XFRMINSTATEMISMATCH);
+				XFRM_INC_STATS(net, LINUX_MIB_XFRMINSTATEMISMATCH);
 				return 0;
 			}
 		}
@@ -1935,7 +1930,7 @@ int __xfrm_policy_check(struct sock *sk, int dir, struct sk_buff *skb,
 	if (sk && sk->sk_policy[dir]) {
 		pol = xfrm_sk_policy_lookup(sk, dir, &fl);
 		if (IS_ERR(pol)) {
-			XFRM_INC_STATS(LINUX_MIB_XFRMINPOLERROR);
+			XFRM_INC_STATS(net, LINUX_MIB_XFRMINPOLERROR);
 			return 0;
 		}
 	}
@@ -1945,14 +1940,14 @@ int __xfrm_policy_check(struct sock *sk, int dir, struct sk_buff *skb,
 					xfrm_policy_lookup);
 
 	if (IS_ERR(pol)) {
-		XFRM_INC_STATS(LINUX_MIB_XFRMINPOLERROR);
+		XFRM_INC_STATS(net, LINUX_MIB_XFRMINPOLERROR);
 		return 0;
 	}
 
 	if (!pol) {
 		if (skb->sp && secpath_has_nontransport(skb->sp, 0, &xerr_idx)) {
 			xfrm_secpath_reject(xerr_idx, skb, &fl);
-			XFRM_INC_STATS(LINUX_MIB_XFRMINNOPOLS);
+			XFRM_INC_STATS(net, LINUX_MIB_XFRMINNOPOLS);
 			return 0;
 		}
 		return 1;
@@ -1969,7 +1964,7 @@ int __xfrm_policy_check(struct sock *sk, int dir, struct sk_buff *skb,
 						    XFRM_POLICY_IN);
 		if (pols[1]) {
 			if (IS_ERR(pols[1])) {
-				XFRM_INC_STATS(LINUX_MIB_XFRMINPOLERROR);
+				XFRM_INC_STATS(net, LINUX_MIB_XFRMINPOLERROR);
 				return 0;
 			}
 			pols[1]->curlft.use_time = get_seconds();
@@ -1993,11 +1988,11 @@ int __xfrm_policy_check(struct sock *sk, int dir, struct sk_buff *skb,
 		for (pi = 0; pi < npols; pi++) {
 			if (pols[pi] != pol &&
 			    pols[pi]->action != XFRM_POLICY_ALLOW) {
-				XFRM_INC_STATS(LINUX_MIB_XFRMINPOLBLOCK);
+				XFRM_INC_STATS(net, LINUX_MIB_XFRMINPOLBLOCK);
 				goto reject;
 			}
 			if (ti + pols[pi]->xfrm_nr >= XFRM_MAX_DEPTH) {
-				XFRM_INC_STATS(LINUX_MIB_XFRMINBUFFERERROR);
+				XFRM_INC_STATS(net, LINUX_MIB_XFRMINBUFFERERROR);
 				goto reject_error;
 			}
 			for (i = 0; i < pols[pi]->xfrm_nr; i++)
@@ -2021,20 +2016,20 @@ int __xfrm_policy_check(struct sock *sk, int dir, struct sk_buff *skb,
 				if (k < -1)
 					/* "-2 - errored_index" returned */
 					xerr_idx = -(2+k);
-				XFRM_INC_STATS(LINUX_MIB_XFRMINTMPLMISMATCH);
+				XFRM_INC_STATS(net, LINUX_MIB_XFRMINTMPLMISMATCH);
 				goto reject;
 			}
 		}
 
 		if (secpath_has_nontransport(sp, k, &xerr_idx)) {
-			XFRM_INC_STATS(LINUX_MIB_XFRMINTMPLMISMATCH);
+			XFRM_INC_STATS(net, LINUX_MIB_XFRMINTMPLMISMATCH);
 			goto reject;
 		}
 
 		xfrm_pols_put(pols, npols);
 		return 1;
 	}
-	XFRM_INC_STATS(LINUX_MIB_XFRMINPOLBLOCK);
+	XFRM_INC_STATS(net, LINUX_MIB_XFRMINPOLBLOCK);
 
 reject:
 	xfrm_secpath_reject(xerr_idx, skb, &fl);
@@ -2051,7 +2046,7 @@ int __xfrm_route_forward(struct sk_buff *skb, unsigned short family)
 
 	if (xfrm_decode_session(skb, &fl, family) < 0) {
 		/* XXX: we should have something like FWDHDRERROR here. */
-		XFRM_INC_STATS(LINUX_MIB_XFRMINHDRERROR);
+		XFRM_INC_STATS(net, LINUX_MIB_XFRMINHDRERROR);
 		return 0;
 	}
 
@@ -2380,13 +2375,27 @@ static struct notifier_block xfrm_dev_notifier = {
 };
 
 #ifdef CONFIG_XFRM_STATISTICS
-static int __init xfrm_statistics_init(void)
+static int __net_init xfrm_statistics_init(struct net *net)
 {
-	if (snmp_mib_init((void **)xfrm_statistics,
+	if (snmp_mib_init((void **)net->mib.xfrm_statistics,
 			  sizeof(struct linux_xfrm_mib)) < 0)
 		return -ENOMEM;
 	return 0;
 }
+
+static void xfrm_statistics_fini(struct net *net)
+{
+	snmp_mib_free((void **)net->mib.xfrm_statistics);
+}
+#else
+static int __net_init xfrm_statistics_init(struct net *net)
+{
+	return 0;
+}
+
+static void xfrm_statistics_fini(struct net *net)
+{
+}
 #endif
 
 static int __net_init xfrm_policy_init(struct net *net)
@@ -2480,6 +2489,9 @@ static int __net_init xfrm_net_init(struct net *net)
 {
 	int rv;
 
+	rv = xfrm_statistics_init(net);
+	if (rv < 0)
+		goto out_statistics;
 	rv = xfrm_state_init(net);
 	if (rv < 0)
 		goto out_state;
@@ -2491,6 +2503,8 @@ static int __net_init xfrm_net_init(struct net *net)
 out_policy:
 	xfrm_state_fini(net);
 out_state:
+	xfrm_statistics_fini(net);
+out_statistics:
 	return rv;
 }
 
@@ -2498,6 +2512,7 @@ static void __net_exit xfrm_net_exit(struct net *net)
 {
 	xfrm_policy_fini(net);
 	xfrm_state_fini(net);
+	xfrm_statistics_fini(net);
 }
 
 static struct pernet_operations __net_initdata xfrm_net_ops = {
@@ -2508,9 +2523,6 @@ static struct pernet_operations __net_initdata xfrm_net_ops = {
 void __init xfrm_init(void)
 {
 	register_pernet_subsys(&xfrm_net_ops);
-#ifdef CONFIG_XFRM_STATISTICS
-	xfrm_statistics_init();
-#endif
 	xfrm_input_init();
 #ifdef CONFIG_XFRM_STATISTICS
 	xfrm_proc_init();
diff --git a/net/xfrm/xfrm_proc.c b/net/xfrm/xfrm_proc.c
index 2b0db13f0cda..27a2ab92d874 100644
--- a/net/xfrm/xfrm_proc.c
+++ b/net/xfrm/xfrm_proc.c
@@ -62,7 +62,7 @@ static int xfrm_statistics_seq_show(struct seq_file *seq, void *v)
 	int i;
 	for (i=0; xfrm_mib_list[i].name; i++)
 		seq_printf(seq, "%-24s\t%lu\n", xfrm_mib_list[i].name,
-			   fold_field((void **)xfrm_statistics,
+			   fold_field((void **)init_net.mib.xfrm_statistics,
 				      xfrm_mib_list[i].entry));
 	return 0;
 }
-- 
cgit v1.2.3


From c68cd1a01ba56995d85a4a62b195b2b3f6415c64 Mon Sep 17 00:00:00 2001
From: Alexey Dobriyan <adobriyan@gmail.com>
Date: Tue, 25 Nov 2008 18:00:14 -0800
Subject: netns xfrm: /proc/net/xfrm_stat in netns

Signed-off-by: Alexey Dobriyan <adobriyan@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/xfrm.h     |  3 ++-
 net/xfrm/xfrm_policy.c | 11 +++++++----
 net/xfrm/xfrm_proc.c   | 26 ++++++++++++--------------
 3 files changed, 21 insertions(+), 19 deletions(-)

(limited to 'include')

diff --git a/include/net/xfrm.h b/include/net/xfrm.h
index 78ec3e8a95ed..1554ccd0c940 100644
--- a/include/net/xfrm.h
+++ b/include/net/xfrm.h
@@ -1306,7 +1306,8 @@ static inline void xfrm6_fini(void)
 #endif
 
 #ifdef CONFIG_XFRM_STATISTICS
-extern int xfrm_proc_init(void);
+extern int xfrm_proc_init(struct net *net);
+extern void xfrm_proc_fini(struct net *net);
 #endif
 
 extern void xfrm_state_walk_init(struct xfrm_state_walk *walk, u8 proto);
diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c
index e239a25e571c..38822b34ba7d 100644
--- a/net/xfrm/xfrm_policy.c
+++ b/net/xfrm/xfrm_policy.c
@@ -2377,14 +2377,20 @@ static struct notifier_block xfrm_dev_notifier = {
 #ifdef CONFIG_XFRM_STATISTICS
 static int __net_init xfrm_statistics_init(struct net *net)
 {
+	int rv;
+
 	if (snmp_mib_init((void **)net->mib.xfrm_statistics,
 			  sizeof(struct linux_xfrm_mib)) < 0)
 		return -ENOMEM;
-	return 0;
+	rv = xfrm_proc_init(net);
+	if (rv < 0)
+		snmp_mib_free((void **)net->mib.xfrm_statistics);
+	return rv;
 }
 
 static void xfrm_statistics_fini(struct net *net)
 {
+	xfrm_proc_fini(net);
 	snmp_mib_free((void **)net->mib.xfrm_statistics);
 }
 #else
@@ -2524,9 +2530,6 @@ void __init xfrm_init(void)
 {
 	register_pernet_subsys(&xfrm_net_ops);
 	xfrm_input_init();
-#ifdef CONFIG_XFRM_STATISTICS
-	xfrm_proc_init();
-#endif
 }
 
 #ifdef CONFIG_AUDITSYSCALL
diff --git a/net/xfrm/xfrm_proc.c b/net/xfrm/xfrm_proc.c
index 27a2ab92d874..284eaef1dbf2 100644
--- a/net/xfrm/xfrm_proc.c
+++ b/net/xfrm/xfrm_proc.c
@@ -59,17 +59,18 @@ fold_field(void *mib[], int offt)
 
 static int xfrm_statistics_seq_show(struct seq_file *seq, void *v)
 {
+	struct net *net = seq->private;
 	int i;
 	for (i=0; xfrm_mib_list[i].name; i++)
 		seq_printf(seq, "%-24s\t%lu\n", xfrm_mib_list[i].name,
-			   fold_field((void **)init_net.mib.xfrm_statistics,
+			   fold_field((void **)net->mib.xfrm_statistics,
 				      xfrm_mib_list[i].entry));
 	return 0;
 }
 
 static int xfrm_statistics_seq_open(struct inode *inode, struct file *file)
 {
-	return single_open(file, xfrm_statistics_seq_show, NULL);
+	return single_open_net(inode, file, xfrm_statistics_seq_show);
 }
 
 static struct file_operations xfrm_statistics_seq_fops = {
@@ -77,21 +78,18 @@ static struct file_operations xfrm_statistics_seq_fops = {
 	.open	 = xfrm_statistics_seq_open,
 	.read	 = seq_read,
 	.llseek	 = seq_lseek,
-	.release = single_release,
+	.release = single_release_net,
 };
 
-int __init xfrm_proc_init(void)
+int __net_init xfrm_proc_init(struct net *net)
 {
-	int rc = 0;
-
-	if (!proc_net_fops_create(&init_net, "xfrm_stat", S_IRUGO,
+	if (!proc_net_fops_create(net, "xfrm_stat", S_IRUGO,
 				  &xfrm_statistics_seq_fops))
-		goto stat_fail;
-
- out:
-	return rc;
+		return -ENOMEM;
+	return 0;
+}
 
- stat_fail:
-	rc = -ENOMEM;
-	goto out;
+void xfrm_proc_fini(struct net *net)
+{
+	proc_net_remove(net, "xfrm_stat");
 }
-- 
cgit v1.2.3


From b27aeadb5948d400df83db4d29590fb9862ba49d Mon Sep 17 00:00:00 2001
From: Alexey Dobriyan <adobriyan@gmail.com>
Date: Tue, 25 Nov 2008 18:00:48 -0800
Subject: netns xfrm: per-netns sysctls

Make
	net.core.xfrm_aevent_etime
	net.core.xfrm_acq_expires
	net.core.xfrm_aevent_rseqth
	net.core.xfrm_larval_drop

sysctls per-netns.

For that make net_core_path[] global, register it to prevent two
/proc/net/core antries and change initcall position -- xfrm_init() is called
from fs_initcall, so this one should be fs_initcall at least.

Signed-off-by: Alexey Dobriyan <adobriyan@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/ip.h           |  1 +
 include/net/netns/xfrm.h   | 10 ++++++
 include/net/xfrm.h         | 14 +++++---
 net/core/sysctl_net_core.c | 42 +++--------------------
 net/xfrm/Makefile          |  4 +--
 net/xfrm/xfrm_policy.c     | 10 ++++--
 net/xfrm/xfrm_state.c      | 16 +++------
 net/xfrm/xfrm_sysctl.c     | 85 ++++++++++++++++++++++++++++++++++++++++++++++
 net/xfrm/xfrm_user.c       |  4 +--
 9 files changed, 125 insertions(+), 61 deletions(-)
 create mode 100644 net/xfrm/xfrm_sysctl.c

(limited to 'include')

diff --git a/include/net/ip.h b/include/net/ip.h
index ddef10c22e3a..10868139e656 100644
--- a/include/net/ip.h
+++ b/include/net/ip.h
@@ -187,6 +187,7 @@ extern void inet_get_local_port_range(int *low, int *high);
 extern int sysctl_ip_default_ttl;
 extern int sysctl_ip_nonlocal_bind;
 
+extern struct ctl_path net_core_path[];
 extern struct ctl_path net_ipv4_ctl_path[];
 
 /* From inetpeer.c */
diff --git a/include/net/netns/xfrm.h b/include/net/netns/xfrm.h
index 09f3060e9d18..1ba912749caa 100644
--- a/include/net/netns/xfrm.h
+++ b/include/net/netns/xfrm.h
@@ -6,6 +6,8 @@
 #include <linux/workqueue.h>
 #include <linux/xfrm.h>
 
+struct ctl_table_header;
+
 struct xfrm_policy_hash {
 	struct hlist_head	*table;
 	unsigned int		hmask;
@@ -41,6 +43,14 @@ struct netns_xfrm {
 	struct work_struct	policy_hash_work;
 
 	struct sock		*nlsk;
+
+	u32			sysctl_aevent_etime;
+	u32			sysctl_aevent_rseqth;
+	int			sysctl_larval_drop;
+	u32			sysctl_acq_expires;
+#ifdef CONFIG_SYSCTL
+	struct ctl_table_header	*sysctl_hdr;
+#endif
 };
 
 #endif
diff --git a/include/net/xfrm.h b/include/net/xfrm.h
index 1554ccd0c940..2e9f5c0018ae 100644
--- a/include/net/xfrm.h
+++ b/include/net/xfrm.h
@@ -47,11 +47,6 @@
 #define XFRM_INC_STATS_USER(net, field)	((void)(net))
 #endif
 
-extern u32 sysctl_xfrm_aevent_etime;
-extern u32 sysctl_xfrm_aevent_rseqth;
-extern int sysctl_xfrm_larval_drop;
-extern u32 sysctl_xfrm_acq_expires;
-
 extern struct mutex xfrm_cfg_mutex;
 
 /* Organization of SPD aka "XFRM rules"
@@ -1310,6 +1305,15 @@ extern int xfrm_proc_init(struct net *net);
 extern void xfrm_proc_fini(struct net *net);
 #endif
 
+extern int xfrm_sysctl_init(struct net *net);
+#ifdef CONFIG_SYSCTL
+extern void xfrm_sysctl_fini(struct net *net);
+#else
+static inline void xfrm_sysctl_fini(struct net *net)
+{
+}
+#endif
+
 extern void xfrm_state_walk_init(struct xfrm_state_walk *walk, u8 proto);
 extern int xfrm_state_walk(struct net *net, struct xfrm_state_walk *walk,
 			   int (*func)(struct xfrm_state *, int, void*), void *);
diff --git a/net/core/sysctl_net_core.c b/net/core/sysctl_net_core.c
index 2bc0384b0448..83d3398559ea 100644
--- a/net/core/sysctl_net_core.c
+++ b/net/core/sysctl_net_core.c
@@ -12,7 +12,6 @@
 #include <linux/netdevice.h>
 #include <linux/init.h>
 #include <net/sock.h>
-#include <net/xfrm.h>
 
 static struct ctl_table net_core_table[] = {
 #ifdef CONFIG_NET
@@ -89,40 +88,6 @@ static struct ctl_table net_core_table[] = {
 		.mode		= 0644,
 		.proc_handler	= proc_dointvec
 	},
-#ifdef CONFIG_XFRM
-	{
-		.ctl_name	= NET_CORE_AEVENT_ETIME,
-		.procname	= "xfrm_aevent_etime",
-		.data		= &sysctl_xfrm_aevent_etime,
-		.maxlen		= sizeof(u32),
-		.mode		= 0644,
-		.proc_handler	= proc_dointvec
-	},
-	{
-		.ctl_name	= NET_CORE_AEVENT_RSEQTH,
-		.procname	= "xfrm_aevent_rseqth",
-		.data		= &sysctl_xfrm_aevent_rseqth,
-		.maxlen		= sizeof(u32),
-		.mode		= 0644,
-		.proc_handler	= proc_dointvec
-	},
-	{
-		.ctl_name	= CTL_UNNUMBERED,
-		.procname	= "xfrm_larval_drop",
-		.data		= &sysctl_xfrm_larval_drop,
-		.maxlen		= sizeof(int),
-		.mode		= 0644,
-		.proc_handler	= proc_dointvec
-	},
-	{
-		.ctl_name	= CTL_UNNUMBERED,
-		.procname	= "xfrm_acq_expires",
-		.data		= &sysctl_xfrm_acq_expires,
-		.maxlen		= sizeof(int),
-		.mode		= 0644,
-		.proc_handler	= proc_dointvec
-	},
-#endif /* CONFIG_XFRM */
 #endif /* CONFIG_NET */
 	{
 		.ctl_name	= NET_CORE_BUDGET,
@@ -155,7 +120,7 @@ static struct ctl_table netns_core_table[] = {
 	{ .ctl_name = 0 }
 };
 
-static __net_initdata struct ctl_path net_core_path[] = {
+__net_initdata struct ctl_path net_core_path[] = {
 	{ .procname = "net", .ctl_name = CTL_NET, },
 	{ .procname = "core", .ctl_name = NET_CORE, },
 	{ },
@@ -207,8 +172,11 @@ static __net_initdata struct pernet_operations sysctl_core_ops = {
 
 static __init int sysctl_core_init(void)
 {
+	static struct ctl_table empty[1];
+
+	register_sysctl_paths(net_core_path, empty);
 	register_net_sysctl_rotable(net_core_path, net_core_table);
 	return register_pernet_subsys(&sysctl_core_ops);
 }
 
-__initcall(sysctl_core_init);
+fs_initcall(sysctl_core_init);
diff --git a/net/xfrm/Makefile b/net/xfrm/Makefile
index 0f439a72ccab..c631047e1b27 100644
--- a/net/xfrm/Makefile
+++ b/net/xfrm/Makefile
@@ -3,8 +3,8 @@
 #
 
 obj-$(CONFIG_XFRM) := xfrm_policy.o xfrm_state.o xfrm_hash.o \
-		      xfrm_input.o xfrm_output.o xfrm_algo.o
+		      xfrm_input.o xfrm_output.o xfrm_algo.o \
+		      xfrm_sysctl.o
 obj-$(CONFIG_XFRM_STATISTICS) += xfrm_proc.o
 obj-$(CONFIG_XFRM_USER) += xfrm_user.o
 obj-$(CONFIG_XFRM_IPCOMP) += xfrm_ipcomp.o
-
diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c
index 38822b34ba7d..393cc65dbfa4 100644
--- a/net/xfrm/xfrm_policy.c
+++ b/net/xfrm/xfrm_policy.c
@@ -34,8 +34,6 @@
 
 #include "xfrm_hash.h"
 
-int sysctl_xfrm_larval_drop __read_mostly = 1;
-
 DEFINE_MUTEX(xfrm_cfg_mutex);
 EXPORT_SYMBOL(xfrm_cfg_mutex);
 
@@ -1671,7 +1669,7 @@ restart:
 
 		if (unlikely(nx<0)) {
 			err = nx;
-			if (err == -EAGAIN && sysctl_xfrm_larval_drop) {
+			if (err == -EAGAIN && net->xfrm.sysctl_larval_drop) {
 				/* EREMOTE tells the caller to generate
 				 * a one-shot blackhole route.
 				 */
@@ -2504,8 +2502,13 @@ static int __net_init xfrm_net_init(struct net *net)
 	rv = xfrm_policy_init(net);
 	if (rv < 0)
 		goto out_policy;
+	rv = xfrm_sysctl_init(net);
+	if (rv < 0)
+		goto out_sysctl;
 	return 0;
 
+out_sysctl:
+	xfrm_policy_fini(net);
 out_policy:
 	xfrm_state_fini(net);
 out_state:
@@ -2516,6 +2519,7 @@ out_statistics:
 
 static void __net_exit xfrm_net_exit(struct net *net)
 {
+	xfrm_sysctl_fini(net);
 	xfrm_policy_fini(net);
 	xfrm_state_fini(net);
 	xfrm_statistics_fini(net);
diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c
index 662e47b0bcc3..2fd57f8f77c1 100644
--- a/net/xfrm/xfrm_state.c
+++ b/net/xfrm/xfrm_state.c
@@ -24,14 +24,6 @@
 
 #include "xfrm_hash.h"
 
-u32 sysctl_xfrm_aevent_etime __read_mostly = XFRM_AE_ETIME;
-EXPORT_SYMBOL(sysctl_xfrm_aevent_etime);
-
-u32 sysctl_xfrm_aevent_rseqth __read_mostly = XFRM_AE_SEQT_SIZE;
-EXPORT_SYMBOL(sysctl_xfrm_aevent_rseqth);
-
-u32 sysctl_xfrm_acq_expires __read_mostly = 30;
-
 /* Each xfrm_state may be linked to two tables:
 
    1. Hash table by (spi,daddr,ah/esp) to find SA by SPI. (input,ctl)
@@ -851,8 +843,8 @@ xfrm_state_find(xfrm_address_t *daddr, xfrm_address_t *saddr,
 				h = xfrm_spi_hash(net, &x->id.daddr, x->id.spi, x->id.proto, family);
 				hlist_add_head(&x->byspi, net->xfrm.state_byspi+h);
 			}
-			x->lft.hard_add_expires_seconds = sysctl_xfrm_acq_expires;
-			x->timer.expires = jiffies + sysctl_xfrm_acq_expires*HZ;
+			x->lft.hard_add_expires_seconds = net->xfrm.sysctl_acq_expires;
+			x->timer.expires = jiffies + net->xfrm.sysctl_acq_expires*HZ;
 			add_timer(&x->timer);
 			net->xfrm.state_num++;
 			xfrm_hash_grow_check(net, x->bydst.next != NULL);
@@ -1040,9 +1032,9 @@ static struct xfrm_state *__find_acq_core(struct net *net, unsigned short family
 		x->props.family = family;
 		x->props.mode = mode;
 		x->props.reqid = reqid;
-		x->lft.hard_add_expires_seconds = sysctl_xfrm_acq_expires;
+		x->lft.hard_add_expires_seconds = net->xfrm.sysctl_acq_expires;
 		xfrm_state_hold(x);
-		x->timer.expires = jiffies + sysctl_xfrm_acq_expires*HZ;
+		x->timer.expires = jiffies + net->xfrm.sysctl_acq_expires*HZ;
 		add_timer(&x->timer);
 		list_add(&x->km.all, &net->xfrm.state_all);
 		hlist_add_head(&x->bydst, net->xfrm.state_bydst+h);
diff --git a/net/xfrm/xfrm_sysctl.c b/net/xfrm/xfrm_sysctl.c
new file mode 100644
index 000000000000..2e6ffb66f06f
--- /dev/null
+++ b/net/xfrm/xfrm_sysctl.c
@@ -0,0 +1,85 @@
+#include <linux/sysctl.h>
+#include <net/net_namespace.h>
+#include <net/xfrm.h>
+
+static void __xfrm_sysctl_init(struct net *net)
+{
+	net->xfrm.sysctl_aevent_etime = XFRM_AE_ETIME;
+	net->xfrm.sysctl_aevent_rseqth = XFRM_AE_SEQT_SIZE;
+	net->xfrm.sysctl_larval_drop = 1;
+	net->xfrm.sysctl_acq_expires = 30;
+}
+
+#ifdef CONFIG_SYSCTL
+static struct ctl_table xfrm_table[] = {
+	{
+		.ctl_name	= NET_CORE_AEVENT_ETIME,
+		.procname	= "xfrm_aevent_etime",
+		.maxlen		= sizeof(u32),
+		.mode		= 0644,
+		.proc_handler	= proc_dointvec
+	},
+	{
+		.ctl_name	= NET_CORE_AEVENT_RSEQTH,
+		.procname	= "xfrm_aevent_rseqth",
+		.maxlen		= sizeof(u32),
+		.mode		= 0644,
+		.proc_handler	= proc_dointvec
+	},
+	{
+		.ctl_name	= CTL_UNNUMBERED,
+		.procname	= "xfrm_larval_drop",
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= proc_dointvec
+	},
+	{
+		.ctl_name	= CTL_UNNUMBERED,
+		.procname	= "xfrm_acq_expires",
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= proc_dointvec
+	},
+	{}
+};
+
+int __net_init xfrm_sysctl_init(struct net *net)
+{
+	struct ctl_table *table;
+
+	__xfrm_sysctl_init(net);
+
+	table = kmemdup(xfrm_table, sizeof(xfrm_table), GFP_KERNEL);
+	if (!table)
+		goto out_kmemdup;
+	table[0].data = &net->xfrm.sysctl_aevent_etime;
+	table[1].data = &net->xfrm.sysctl_aevent_rseqth;
+	table[2].data = &net->xfrm.sysctl_larval_drop;
+	table[3].data = &net->xfrm.sysctl_acq_expires;
+
+	net->xfrm.sysctl_hdr = register_net_sysctl_table(net, net_core_path, table);
+	if (!net->xfrm.sysctl_hdr)
+		goto out_register;
+	return 0;
+
+out_register:
+	kfree(table);
+out_kmemdup:
+	return -ENOMEM;
+}
+
+void xfrm_sysctl_fini(struct net *net)
+{
+	struct ctl_table *table;
+
+	table = net->xfrm.sysctl_hdr->ctl_table_arg;
+	unregister_net_sysctl_table(net->xfrm.sysctl_hdr);
+	kfree(table);
+}
+#else
+int __net_init xfrm_sysctl_init(struct net *net)
+{
+	__xfrm_sysctl_init(net);
+	return 0;
+}
+#endif
diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c
index b7240d5b77ad..38ffaf33312e 100644
--- a/net/xfrm/xfrm_user.c
+++ b/net/xfrm/xfrm_user.c
@@ -368,9 +368,9 @@ static struct xfrm_state *xfrm_state_construct(struct net *net,
 		goto error;
 
 	x->km.seq = p->seq;
-	x->replay_maxdiff = sysctl_xfrm_aevent_rseqth;
+	x->replay_maxdiff = net->xfrm.sysctl_aevent_rseqth;
 	/* sysctl_xfrm_aevent_etime is in 100ms units */
-	x->replay_maxage = (sysctl_xfrm_aevent_etime*HZ)/XFRM_AE_ETH_M;
+	x->replay_maxage = (net->xfrm.sysctl_aevent_etime*HZ)/XFRM_AE_ETH_M;
 	x->preplay.bitmap = 0;
 	x->preplay.seq = x->replay.seq+x->replay_maxdiff;
 	x->preplay.oseq = x->replay.oseq +x->replay_maxdiff;
-- 
cgit v1.2.3


From c1b56878fb68e9c14070939ea4537ad4db79ffae Mon Sep 17 00:00:00 2001
From: Stephen Hemminger <shemminger@vyatta.com>
Date: Tue, 25 Nov 2008 21:14:06 -0800
Subject: tc: policing requires a rate estimator

Found that while trying average rate policing, it was possible to
request average rate policing without a rate estimator. This results
in no policing which is harmless but incorrect.

Since policing could be setup in two steps, need to check
in the kernel.

Signed-off-by: Stephen Hemminger <shemminger@vyatta.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/gen_stats.h  |  1 +
 net/core/gen_estimator.c | 30 +++++++++++++++++++++++++++---
 net/sched/act_police.c   |  6 ++++++
 3 files changed, 34 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/include/net/gen_stats.h b/include/net/gen_stats.h
index 8cd8185fa2ed..dcf5bfa7d4f1 100644
--- a/include/net/gen_stats.h
+++ b/include/net/gen_stats.h
@@ -45,5 +45,6 @@ extern void gen_kill_estimator(struct gnet_stats_basic *bstats,
 extern int gen_replace_estimator(struct gnet_stats_basic *bstats,
 				 struct gnet_stats_rate_est *rate_est,
 				 spinlock_t *stats_lock, struct nlattr *opt);
+extern int gen_estimator_active(const struct gnet_stats_rate_est *rate_est);
 
 #endif
diff --git a/net/core/gen_estimator.c b/net/core/gen_estimator.c
index 80aa160877e9..3885550f0187 100644
--- a/net/core/gen_estimator.c
+++ b/net/core/gen_estimator.c
@@ -242,6 +242,7 @@ int gen_new_estimator(struct gnet_stats_basic *bstats,
 
 	return 0;
 }
+EXPORT_SYMBOL(gen_new_estimator);
 
 static void __gen_kill_estimator(struct rcu_head *head)
 {
@@ -275,6 +276,7 @@ void gen_kill_estimator(struct gnet_stats_basic *bstats,
 		call_rcu(&e->e_rcu, __gen_kill_estimator);
 	}
 }
+EXPORT_SYMBOL(gen_kill_estimator);
 
 /**
  * gen_replace_estimator - replace rate estimator configuration
@@ -295,8 +297,30 @@ int gen_replace_estimator(struct gnet_stats_basic *bstats,
 	gen_kill_estimator(bstats, rate_est);
 	return gen_new_estimator(bstats, rate_est, stats_lock, opt);
 }
+EXPORT_SYMBOL(gen_replace_estimator);
+
+/**
+ * gen_estimator_active - test if estimator is currently in use
+ * @rate_est: rate estimator statistics
+ *
+ * Returns 1 if estimator is active, and 0 if not.
+ */
+int gen_estimator_active(const struct gnet_stats_rate_est *rate_est)
+{
+	int idx;
+	struct gen_estimator *e;
 
+	ASSERT_RTNL();
 
-EXPORT_SYMBOL(gen_kill_estimator);
-EXPORT_SYMBOL(gen_new_estimator);
-EXPORT_SYMBOL(gen_replace_estimator);
+	for (idx=0; idx <= EST_MAX_INTERVAL; idx++) {
+		if (!elist[idx].timer.function)
+			continue;
+
+		list_for_each_entry(e, &elist[idx].list, list) {
+			if (e->rate_est == rate_est)
+				return 1;
+		}
+	}
+	return 0;
+}
+EXPORT_SYMBOL(gen_estimator_active);
diff --git a/net/sched/act_police.c b/net/sched/act_police.c
index e19a0261144a..c39f60cea6ee 100644
--- a/net/sched/act_police.c
+++ b/net/sched/act_police.c
@@ -182,6 +182,12 @@ override:
 		R_tab = qdisc_get_rtab(&parm->rate, tb[TCA_POLICE_RATE]);
 		if (R_tab == NULL)
 			goto failure;
+
+		if (!est && !gen_estimator_active(&police->tcf_rate_est)) {
+			err = -EINVAL;
+			goto failure;
+		}
+
 		if (parm->peakrate.rate) {
 			P_tab = qdisc_get_rtab(&parm->peakrate,
 					       tb[TCA_POLICE_PEAKRATE]);
-- 
cgit v1.2.3


From 1748376b6626acf59c24e9592ac67b3fe2a0e026 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <dada1@cosmosbay.com>
Date: Tue, 25 Nov 2008 21:16:35 -0800
Subject: net: Use a percpu_counter for sockets_allocated

Instead of using one atomic_t per protocol, use a percpu_counter
for "sockets_allocated", to reduce cache line contention on
heavy duty network servers.

Note : We revert commit (248969ae31e1b3276fc4399d67ce29a5d81e6fd9
net: af_unix can make unix_nr_socks visbile in /proc),
since it is not anymore used after sock_prot_inuse_add() addition

Signed-off-by: Eric Dumazet <dada1@cosmosbay.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/sctp/sctp.h |  1 +
 include/net/sock.h      |  2 +-
 include/net/tcp.h       |  2 +-
 net/core/sock.c         | 10 +++++++---
 net/ipv4/proc.c         |  3 ++-
 net/ipv4/tcp.c          |  8 ++++++--
 net/ipv4/tcp_ipv4.c     |  4 ++--
 net/ipv6/tcp_ipv6.c     |  2 +-
 net/sctp/protocol.c     |  6 +++++-
 net/sctp/socket.c       |  6 +++---
 net/unix/af_unix.c      |  1 -
 11 files changed, 29 insertions(+), 16 deletions(-)

(limited to 'include')

diff --git a/include/net/sctp/sctp.h b/include/net/sctp/sctp.h
index 23797506f593..bbb7742195b0 100644
--- a/include/net/sctp/sctp.h
+++ b/include/net/sctp/sctp.h
@@ -138,6 +138,7 @@ void sctp_write_space(struct sock *sk);
 unsigned int sctp_poll(struct file *file, struct socket *sock,
 		poll_table *wait);
 void sctp_sock_rfree(struct sk_buff *skb);
+extern struct percpu_counter sctp_sockets_allocated;
 
 /*
  * sctp/primitive.c
diff --git a/include/net/sock.h b/include/net/sock.h
index 00cd486d362f..a2a3890959c4 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -649,7 +649,7 @@ struct proto {
 	/* Memory pressure */
 	void			(*enter_memory_pressure)(struct sock *sk);
 	atomic_t		*memory_allocated;	/* Current allocated memory. */
-	atomic_t		*sockets_allocated;	/* Current number of sockets. */
+	struct percpu_counter	*sockets_allocated;	/* Current number of sockets. */
 	/*
 	 * Pressure flag: try to collapse.
 	 * Technical note: it is used by multiple contexts non atomically.
diff --git a/include/net/tcp.h b/include/net/tcp.h
index e8ae90a8c35e..cbca3b8a133d 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -238,7 +238,7 @@ extern int sysctl_tcp_slow_start_after_idle;
 extern int sysctl_tcp_max_ssthresh;
 
 extern atomic_t tcp_memory_allocated;
-extern atomic_t tcp_sockets_allocated;
+extern struct percpu_counter tcp_sockets_allocated;
 extern int tcp_memory_pressure;
 
 /*
diff --git a/net/core/sock.c b/net/core/sock.c
index a4e840e5a053..7a081b647bf9 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -1071,7 +1071,7 @@ struct sock *sk_clone(const struct sock *sk, const gfp_t priority)
 		newsk->sk_sleep	 = NULL;
 
 		if (newsk->sk_prot->sockets_allocated)
-			atomic_inc(newsk->sk_prot->sockets_allocated);
+			percpu_counter_inc(newsk->sk_prot->sockets_allocated);
 	}
 out:
 	return newsk;
@@ -1463,8 +1463,12 @@ int __sk_mem_schedule(struct sock *sk, int size, int kind)
 	}
 
 	if (prot->memory_pressure) {
-		if (!*prot->memory_pressure ||
-		    prot->sysctl_mem[2] > atomic_read(prot->sockets_allocated) *
+		int alloc;
+
+		if (!*prot->memory_pressure)
+			return 1;
+		alloc = percpu_counter_read_positive(prot->sockets_allocated);
+		if (prot->sysctl_mem[2] > alloc *
 		    sk_mem_pages(sk->sk_wmem_queued +
 				 atomic_read(&sk->sk_rmem_alloc) +
 				 sk->sk_forward_alloc))
diff --git a/net/ipv4/proc.c b/net/ipv4/proc.c
index 731789bb499f..4944b47ad628 100644
--- a/net/ipv4/proc.c
+++ b/net/ipv4/proc.c
@@ -55,7 +55,8 @@ static int sockstat_seq_show(struct seq_file *seq, void *v)
 	seq_printf(seq, "TCP: inuse %d orphan %d tw %d alloc %d mem %d\n",
 		   sock_prot_inuse_get(net, &tcp_prot),
 		   atomic_read(&tcp_orphan_count),
-		   tcp_death_row.tw_count, atomic_read(&tcp_sockets_allocated),
+		   tcp_death_row.tw_count,
+		   (int)percpu_counter_sum_positive(&tcp_sockets_allocated),
 		   atomic_read(&tcp_memory_allocated));
 	seq_printf(seq, "UDP: inuse %d mem %d\n",
 		   sock_prot_inuse_get(net, &udp_prot),
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 044224a341eb..e6fade9ebf62 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -290,9 +290,12 @@ EXPORT_SYMBOL(sysctl_tcp_rmem);
 EXPORT_SYMBOL(sysctl_tcp_wmem);
 
 atomic_t tcp_memory_allocated;	/* Current allocated memory. */
-atomic_t tcp_sockets_allocated;	/* Current number of TCP sockets. */
-
 EXPORT_SYMBOL(tcp_memory_allocated);
+
+/*
+ * Current number of TCP sockets.
+ */
+struct percpu_counter tcp_sockets_allocated;
 EXPORT_SYMBOL(tcp_sockets_allocated);
 
 /*
@@ -2685,6 +2688,7 @@ void __init tcp_init(void)
 
 	BUILD_BUG_ON(sizeof(struct tcp_skb_cb) > sizeof(skb->cb));
 
+	percpu_counter_init(&tcp_sockets_allocated, 0);
 	tcp_hashinfo.bind_bucket_cachep =
 		kmem_cache_create("tcp_bind_bucket",
 				  sizeof(struct inet_bind_bucket), 0,
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index cab2458f86fd..26b9030747cc 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -1797,7 +1797,7 @@ static int tcp_v4_init_sock(struct sock *sk)
 	sk->sk_sndbuf = sysctl_tcp_wmem[1];
 	sk->sk_rcvbuf = sysctl_tcp_rmem[1];
 
-	atomic_inc(&tcp_sockets_allocated);
+	percpu_counter_inc(&tcp_sockets_allocated);
 
 	return 0;
 }
@@ -1845,7 +1845,7 @@ void tcp_v4_destroy_sock(struct sock *sk)
 		sk->sk_sndmsg_page = NULL;
 	}
 
-	atomic_dec(&tcp_sockets_allocated);
+	percpu_counter_dec(&tcp_sockets_allocated);
 }
 
 EXPORT_SYMBOL(tcp_v4_destroy_sock);
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index f259c9671f3e..8702b06cb60a 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -1830,7 +1830,7 @@ static int tcp_v6_init_sock(struct sock *sk)
 	sk->sk_sndbuf = sysctl_tcp_wmem[1];
 	sk->sk_rcvbuf = sysctl_tcp_rmem[1];
 
-	atomic_inc(&tcp_sockets_allocated);
+	percpu_counter_inc(&tcp_sockets_allocated);
 
 	return 0;
 }
diff --git a/net/sctp/protocol.c b/net/sctp/protocol.c
index a8ca743241ee..d5ea232c9126 100644
--- a/net/sctp/protocol.c
+++ b/net/sctp/protocol.c
@@ -102,6 +102,8 @@ struct sock *sctp_get_ctl_sock(void)
 /* Set up the proc fs entry for the SCTP protocol. */
 static __init int sctp_proc_init(void)
 {
+	if (percpu_counter_init(&sctp_sockets_allocated, 0))
+		goto out_nomem;
 #ifdef CONFIG_PROC_FS
 	if (!proc_net_sctp) {
 		struct proc_dir_entry *ent;
@@ -110,7 +112,7 @@ static __init int sctp_proc_init(void)
 			ent->owner = THIS_MODULE;
 			proc_net_sctp = ent;
 		} else
-			goto out_nomem;
+			goto out_free_percpu;
 	}
 
 	if (sctp_snmp_proc_init())
@@ -135,6 +137,8 @@ out_snmp_proc_init:
 		proc_net_sctp = NULL;
 		remove_proc_entry("sctp", init_net.proc_net);
 	}
+out_free_percpu:
+	percpu_counter_destroy(&sctp_sockets_allocated);
 out_nomem:
 	return -ENOMEM;
 #else
diff --git a/net/sctp/socket.c b/net/sctp/socket.c
index ba81fe3ccab8..a2de585888d0 100644
--- a/net/sctp/socket.c
+++ b/net/sctp/socket.c
@@ -114,7 +114,7 @@ extern int sysctl_sctp_wmem[3];
 
 static int sctp_memory_pressure;
 static atomic_t sctp_memory_allocated;
-static atomic_t sctp_sockets_allocated;
+struct percpu_counter sctp_sockets_allocated;
 
 static void sctp_enter_memory_pressure(struct sock *sk)
 {
@@ -3613,7 +3613,7 @@ SCTP_STATIC int sctp_init_sock(struct sock *sk)
 	sp->hmac = NULL;
 
 	SCTP_DBG_OBJCNT_INC(sock);
-	atomic_inc(&sctp_sockets_allocated);
+	percpu_counter_inc(&sctp_sockets_allocated);
 
 	local_bh_disable();
 	sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1);
@@ -3632,7 +3632,7 @@ SCTP_STATIC void sctp_destroy_sock(struct sock *sk)
 	/* Release our hold on the endpoint. */
 	ep = sctp_sk(sk)->ep;
 	sctp_endpoint_free(ep);
-	atomic_dec(&sctp_sockets_allocated);
+	percpu_counter_dec(&sctp_sockets_allocated);
 	local_bh_disable();
 	sock_prot_inuse_add(sock_net(sk), sk->sk_prot, -1);
 	local_bh_enable();
diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c
index 3a35a6e8bf91..5aaf23e43f1d 100644
--- a/net/unix/af_unix.c
+++ b/net/unix/af_unix.c
@@ -571,7 +571,6 @@ static const struct proto_ops unix_seqpacket_ops = {
 static struct proto unix_proto = {
 	.name			= "UNIX",
 	.owner			= THIS_MODULE,
-	.sockets_allocated	= &unix_nr_socks,
 	.obj_size		= sizeof(struct unix_sock),
 };
 
-- 
cgit v1.2.3


From dd24c00191d5e4a1ae896aafe33c6b8095ab4bd1 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <dada1@cosmosbay.com>
Date: Tue, 25 Nov 2008 21:17:14 -0800
Subject: net: Use a percpu_counter for orphan_count

Instead of using one atomic_t per protocol, use a percpu_counter
for "orphan_count", to reduce cache line contention on
heavy duty network servers.

Signed-off-by: Eric Dumazet <dada1@cosmosbay.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/sock.h              |  2 +-
 include/net/tcp.h               |  2 +-
 net/dccp/dccp.h                 |  2 +-
 net/dccp/proto.c                | 16 ++++++++++------
 net/ipv4/inet_connection_sock.c |  4 ++--
 net/ipv4/proc.c                 |  2 +-
 net/ipv4/tcp.c                  | 12 +++++++-----
 net/ipv4/tcp_timer.c            |  2 +-
 8 files changed, 24 insertions(+), 18 deletions(-)

(limited to 'include')

diff --git a/include/net/sock.h b/include/net/sock.h
index a2a3890959c4..5a3a151bd730 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -666,7 +666,7 @@ struct proto {
 	unsigned int		obj_size;
 	int			slab_flags;
 
-	atomic_t		*orphan_count;
+	struct percpu_counter	*orphan_count;
 
 	struct request_sock_ops	*rsk_prot;
 	struct timewait_sock_ops *twsk_prot;
diff --git a/include/net/tcp.h b/include/net/tcp.h
index cbca3b8a133d..de1e91d959b8 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -46,7 +46,7 @@
 
 extern struct inet_hashinfo tcp_hashinfo;
 
-extern atomic_t tcp_orphan_count;
+extern struct percpu_counter tcp_orphan_count;
 extern void tcp_time_wait(struct sock *sk, int state, int timeo);
 
 #define MAX_TCP_HEADER	(128 + MAX_HEADER)
diff --git a/net/dccp/dccp.h b/net/dccp/dccp.h
index 031ce350d3c1..33a1127270c1 100644
--- a/net/dccp/dccp.h
+++ b/net/dccp/dccp.h
@@ -49,7 +49,7 @@ extern int dccp_debug;
 
 extern struct inet_hashinfo dccp_hashinfo;
 
-extern atomic_t dccp_orphan_count;
+extern struct percpu_counter dccp_orphan_count;
 
 extern void dccp_time_wait(struct sock *sk, int state, int timeo);
 
diff --git a/net/dccp/proto.c b/net/dccp/proto.c
index ea85c423cdbd..db225f93cd5a 100644
--- a/net/dccp/proto.c
+++ b/net/dccp/proto.c
@@ -40,8 +40,7 @@ DEFINE_SNMP_STAT(struct dccp_mib, dccp_statistics) __read_mostly;
 
 EXPORT_SYMBOL_GPL(dccp_statistics);
 
-atomic_t dccp_orphan_count = ATOMIC_INIT(0);
-
+struct percpu_counter dccp_orphan_count;
 EXPORT_SYMBOL_GPL(dccp_orphan_count);
 
 struct inet_hashinfo dccp_hashinfo;
@@ -1000,7 +999,7 @@ adjudge_to_death:
 	state = sk->sk_state;
 	sock_hold(sk);
 	sock_orphan(sk);
-	atomic_inc(sk->sk_prot->orphan_count);
+	percpu_counter_inc(sk->sk_prot->orphan_count);
 
 	/*
 	 * It is the last release_sock in its life. It will remove backlog.
@@ -1064,18 +1063,21 @@ static int __init dccp_init(void)
 {
 	unsigned long goal;
 	int ehash_order, bhash_order, i;
-	int rc = -ENOBUFS;
+	int rc;
 
 	BUILD_BUG_ON(sizeof(struct dccp_skb_cb) >
 		     FIELD_SIZEOF(struct sk_buff, cb));
-
+	rc = percpu_counter_init(&dccp_orphan_count, 0);
+	if (rc)
+		goto out;
+	rc = -ENOBUFS;
 	inet_hashinfo_init(&dccp_hashinfo);
 	dccp_hashinfo.bind_bucket_cachep =
 		kmem_cache_create("dccp_bind_bucket",
 				  sizeof(struct inet_bind_bucket), 0,
 				  SLAB_HWCACHE_ALIGN, NULL);
 	if (!dccp_hashinfo.bind_bucket_cachep)
-		goto out;
+		goto out_free_percpu;
 
 	/*
 	 * Size and allocate the main established and bind bucket
@@ -1168,6 +1170,8 @@ out_free_dccp_ehash:
 out_free_bind_bucket_cachep:
 	kmem_cache_destroy(dccp_hashinfo.bind_bucket_cachep);
 	dccp_hashinfo.bind_bucket_cachep = NULL;
+out_free_percpu:
+	percpu_counter_destroy(&dccp_orphan_count);
 	goto out;
 }
 
diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c
index 05af807ca9b9..1ccdbba528be 100644
--- a/net/ipv4/inet_connection_sock.c
+++ b/net/ipv4/inet_connection_sock.c
@@ -561,7 +561,7 @@ void inet_csk_destroy_sock(struct sock *sk)
 
 	sk_refcnt_debug_release(sk);
 
-	atomic_dec(sk->sk_prot->orphan_count);
+	percpu_counter_dec(sk->sk_prot->orphan_count);
 	sock_put(sk);
 }
 
@@ -641,7 +641,7 @@ void inet_csk_listen_stop(struct sock *sk)
 
 		sock_orphan(child);
 
-		atomic_inc(sk->sk_prot->orphan_count);
+		percpu_counter_inc(sk->sk_prot->orphan_count);
 
 		inet_csk_destroy_sock(child);
 
diff --git a/net/ipv4/proc.c b/net/ipv4/proc.c
index 4944b47ad628..614958b7c276 100644
--- a/net/ipv4/proc.c
+++ b/net/ipv4/proc.c
@@ -54,7 +54,7 @@ static int sockstat_seq_show(struct seq_file *seq, void *v)
 	socket_seq_show(seq);
 	seq_printf(seq, "TCP: inuse %d orphan %d tw %d alloc %d mem %d\n",
 		   sock_prot_inuse_get(net, &tcp_prot),
-		   atomic_read(&tcp_orphan_count),
+		   (int)percpu_counter_sum_positive(&tcp_orphan_count),
 		   tcp_death_row.tw_count,
 		   (int)percpu_counter_sum_positive(&tcp_sockets_allocated),
 		   atomic_read(&tcp_memory_allocated));
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index e6fade9ebf62..019243408623 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -277,8 +277,7 @@
 
 int sysctl_tcp_fin_timeout __read_mostly = TCP_FIN_TIMEOUT;
 
-atomic_t tcp_orphan_count = ATOMIC_INIT(0);
-
+struct percpu_counter tcp_orphan_count;
 EXPORT_SYMBOL_GPL(tcp_orphan_count);
 
 int sysctl_tcp_mem[3] __read_mostly;
@@ -1837,7 +1836,7 @@ adjudge_to_death:
 	state = sk->sk_state;
 	sock_hold(sk);
 	sock_orphan(sk);
-	atomic_inc(sk->sk_prot->orphan_count);
+	percpu_counter_inc(sk->sk_prot->orphan_count);
 
 	/* It is the last release_sock in its life. It will remove backlog. */
 	release_sock(sk);
@@ -1888,9 +1887,11 @@ adjudge_to_death:
 		}
 	}
 	if (sk->sk_state != TCP_CLOSE) {
+		int orphan_count = percpu_counter_read_positive(
+						sk->sk_prot->orphan_count);
+
 		sk_mem_reclaim(sk);
-		if (tcp_too_many_orphans(sk,
-				atomic_read(sk->sk_prot->orphan_count))) {
+		if (tcp_too_many_orphans(sk, orphan_count)) {
 			if (net_ratelimit())
 				printk(KERN_INFO "TCP: too many of orphaned "
 				       "sockets\n");
@@ -2689,6 +2690,7 @@ void __init tcp_init(void)
 	BUILD_BUG_ON(sizeof(struct tcp_skb_cb) > sizeof(skb->cb));
 
 	percpu_counter_init(&tcp_sockets_allocated, 0);
+	percpu_counter_init(&tcp_orphan_count, 0);
 	tcp_hashinfo.bind_bucket_cachep =
 		kmem_cache_create("tcp_bind_bucket",
 				  sizeof(struct inet_bind_bucket), 0,
diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c
index 3df339e3e363..cc4e6d27dedc 100644
--- a/net/ipv4/tcp_timer.c
+++ b/net/ipv4/tcp_timer.c
@@ -65,7 +65,7 @@ static void tcp_write_err(struct sock *sk)
 static int tcp_out_of_resources(struct sock *sk, int do_reset)
 {
 	struct tcp_sock *tp = tcp_sk(sk);
-	int orphans = atomic_read(&tcp_orphan_count);
+	int orphans = percpu_counter_read_positive(&tcp_orphan_count);
 
 	/* If peer does not open window for long time, or did not transmit
 	 * anything for long time, penalize it. */
-- 
cgit v1.2.3


From 5a45cfe1c64862e8cd3b0d79d7c4ba71c3118915 Mon Sep 17 00:00:00 2001
From: Steven Rostedt <rostedt@goodmis.org>
Date: Wed, 26 Nov 2008 00:16:24 -0500
Subject: ftrace: use code patching for ftrace graph tracer

Impact: more efficient code for ftrace graph tracer

This patch uses the dynamic patching, when available, to patch
the function graph code into the kernel.

This patch will ease the way for letting both function tracing
and function graph tracing run together.

Signed-off-by: Steven Rostedt <srostedt@redhat.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/entry_32.S |  5 +++++
 arch/x86/kernel/ftrace.c   | 48 ++++++++++++++++++++++++++++++++++++++++++++--
 include/linux/ftrace.h     |  5 +++++
 kernel/trace/ftrace.c      | 35 ++++++++++++++++-----------------
 4 files changed, 72 insertions(+), 21 deletions(-)

(limited to 'include')

diff --git a/arch/x86/kernel/entry_32.S b/arch/x86/kernel/entry_32.S
index 7def9fd5c1e6..958af86186c4 100644
--- a/arch/x86/kernel/entry_32.S
+++ b/arch/x86/kernel/entry_32.S
@@ -1174,6 +1174,11 @@ ftrace_call:
 	popl %edx
 	popl %ecx
 	popl %eax
+#ifdef CONFIG_FUNCTION_GRAPH_TRACER
+.globl ftrace_graph_call
+ftrace_graph_call:
+	jmp ftrace_stub
+#endif
 
 .globl ftrace_stub
 ftrace_stub:
diff --git a/arch/x86/kernel/ftrace.c b/arch/x86/kernel/ftrace.c
index 26b2d92d48b3..7ef914e6a2f6 100644
--- a/arch/x86/kernel/ftrace.c
+++ b/arch/x86/kernel/ftrace.c
@@ -111,7 +111,6 @@ static void ftrace_mod_code(void)
 	 */
 	mod_code_status = probe_kernel_write(mod_code_ip, mod_code_newcode,
 					     MCOUNT_INSN_SIZE);
-
 }
 
 void ftrace_nmi_enter(void)
@@ -325,7 +324,51 @@ int __init ftrace_dyn_arch_init(void *data)
 
 #ifdef CONFIG_FUNCTION_GRAPH_TRACER
 
-#ifndef CONFIG_DYNAMIC_FTRACE
+#ifdef CONFIG_DYNAMIC_FTRACE
+extern void ftrace_graph_call(void);
+
+static int ftrace_mod_jmp(unsigned long ip,
+			  int old_offset, int new_offset)
+{
+	unsigned char code[MCOUNT_INSN_SIZE];
+
+	if (probe_kernel_read(code, (void *)ip, MCOUNT_INSN_SIZE))
+		return -EFAULT;
+
+	if (code[0] != 0xe9 || old_offset != *(int *)(&code[1]))
+		return -EINVAL;
+
+	*(int *)(&code[1]) = new_offset;
+
+	if (do_ftrace_mod_code(ip, &code))
+		return -EPERM;
+
+	return 0;
+}
+
+int ftrace_enable_ftrace_graph_caller(void)
+{
+	unsigned long ip = (unsigned long)(&ftrace_graph_call);
+	int old_offset, new_offset;
+
+	old_offset = (unsigned long)(&ftrace_stub) - (ip + MCOUNT_INSN_SIZE);
+	new_offset = (unsigned long)(&ftrace_graph_caller) - (ip + MCOUNT_INSN_SIZE);
+
+	return ftrace_mod_jmp(ip, old_offset, new_offset);
+}
+
+int ftrace_disable_ftrace_graph_caller(void)
+{
+	unsigned long ip = (unsigned long)(&ftrace_graph_call);
+	int old_offset, new_offset;
+
+	old_offset = (unsigned long)(&ftrace_graph_caller) - (ip + MCOUNT_INSN_SIZE);
+	new_offset = (unsigned long)(&ftrace_stub) - (ip + MCOUNT_INSN_SIZE);
+
+	return ftrace_mod_jmp(ip, old_offset, new_offset);
+}
+
+#else /* CONFIG_DYNAMIC_FTRACE */
 
 /*
  * These functions are picked from those used on
@@ -343,6 +386,7 @@ void ftrace_nmi_exit(void)
 {
 	atomic_dec(&in_nmi);
 }
+
 #endif /* !CONFIG_DYNAMIC_FTRACE */
 
 /* Add a function return address to the trace stack on thread info.*/
diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h
index fc2d54987198..f9792c0d73f6 100644
--- a/include/linux/ftrace.h
+++ b/include/linux/ftrace.h
@@ -117,6 +117,11 @@ extern void ftrace_call(void);
 extern void mcount_call(void);
 #ifdef CONFIG_FUNCTION_GRAPH_TRACER
 extern void ftrace_graph_caller(void);
+extern int ftrace_enable_ftrace_graph_caller(void);
+extern int ftrace_disable_ftrace_graph_caller(void);
+#else
+static inline int ftrace_enable_ftrace_graph_caller(void) { return 0; }
+static inline int ftrace_disable_ftrace_graph_caller(void) { return 0; }
 #endif
 
 /**
diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
index 00d98c65fad0..5f7c8642d58b 100644
--- a/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c
@@ -281,6 +281,8 @@ enum {
 	FTRACE_UPDATE_TRACE_FUNC	= (1 << 2),
 	FTRACE_ENABLE_MCOUNT		= (1 << 3),
 	FTRACE_DISABLE_MCOUNT		= (1 << 4),
+	FTRACE_START_FUNC_RET		= (1 << 5),
+	FTRACE_STOP_FUNC_RET		= (1 << 6),
 };
 
 static int ftrace_filtered;
@@ -465,14 +467,7 @@ __ftrace_replace_code(struct dyn_ftrace *rec, int enable)
 	unsigned long ip, fl;
 	unsigned long ftrace_addr;
 
-#ifdef CONFIG_FUNCTION_GRAPH_TRACER
-	if (ftrace_tracing_type == FTRACE_TYPE_ENTER)
-		ftrace_addr = (unsigned long)ftrace_caller;
-	else
-		ftrace_addr = (unsigned long)ftrace_graph_caller;
-#else
 	ftrace_addr = (unsigned long)ftrace_caller;
-#endif
 
 	ip = rec->ip;
 
@@ -605,6 +600,11 @@ static int __ftrace_modify_code(void *data)
 	if (*command & FTRACE_UPDATE_TRACE_FUNC)
 		ftrace_update_ftrace_func(ftrace_trace_function);
 
+	if (*command & FTRACE_START_FUNC_RET)
+		ftrace_enable_ftrace_graph_caller();
+	else if (*command & FTRACE_STOP_FUNC_RET)
+		ftrace_disable_ftrace_graph_caller();
+
 	return 0;
 }
 
@@ -629,10 +629,8 @@ static void ftrace_startup_enable(int command)
 	ftrace_run_update_code(command);
 }
 
-static void ftrace_startup(void)
+static void ftrace_startup(int command)
 {
-	int command = 0;
-
 	if (unlikely(ftrace_disabled))
 		return;
 
@@ -645,10 +643,8 @@ static void ftrace_startup(void)
 	mutex_unlock(&ftrace_start_lock);
 }
 
-static void ftrace_shutdown(void)
+static void ftrace_shutdown(int command)
 {
-	int command = 0;
-
 	if (unlikely(ftrace_disabled))
 		return;
 
@@ -1453,8 +1449,9 @@ device_initcall(ftrace_nodyn_init);
 
 static inline int ftrace_init_dyn_debugfs(struct dentry *d_tracer) { return 0; }
 static inline void ftrace_startup_enable(int command) { }
-# define ftrace_startup()		do { } while (0)
-# define ftrace_shutdown()		do { } while (0)
+/* Keep as macros so we do not need to define the commands */
+# define ftrace_startup(command)	do { } while (0)
+# define ftrace_shutdown(command)	do { } while (0)
 # define ftrace_startup_sysctl()	do { } while (0)
 # define ftrace_shutdown_sysctl()	do { } while (0)
 #endif /* CONFIG_DYNAMIC_FTRACE */
@@ -1585,7 +1582,7 @@ int register_ftrace_function(struct ftrace_ops *ops)
 	}
 
 	ret = __register_ftrace_function(ops);
-	ftrace_startup();
+	ftrace_startup(0);
 
 out:
 	mutex_unlock(&ftrace_sysctl_lock);
@@ -1604,7 +1601,7 @@ int unregister_ftrace_function(struct ftrace_ops *ops)
 
 	mutex_lock(&ftrace_sysctl_lock);
 	ret = __unregister_ftrace_function(ops);
-	ftrace_shutdown();
+	ftrace_shutdown(0);
 	mutex_unlock(&ftrace_sysctl_lock);
 
 	return ret;
@@ -1751,7 +1748,7 @@ int register_ftrace_graph(trace_func_graph_ret_t retfunc,
 	ftrace_tracing_type = FTRACE_TYPE_RETURN;
 	ftrace_graph_return = retfunc;
 	ftrace_graph_entry = entryfunc;
-	ftrace_startup();
+	ftrace_startup(FTRACE_START_FUNC_RET);
 
 out:
 	mutex_unlock(&ftrace_sysctl_lock);
@@ -1765,7 +1762,7 @@ void unregister_ftrace_graph(void)
 	atomic_dec(&ftrace_graph_active);
 	ftrace_graph_return = (trace_func_graph_ret_t)ftrace_stub;
 	ftrace_graph_entry = (trace_func_graph_ent_t)ftrace_stub;
-	ftrace_shutdown();
+	ftrace_shutdown(FTRACE_STOP_FUNC_RET);
 	/* Restore normal tracing type */
 	ftrace_tracing_type = FTRACE_TYPE_ENTER;
 
-- 
cgit v1.2.3


From f3f47a6768a29448866da4422b6f6bee485c947f Mon Sep 17 00:00:00 2001
From: Arjan van de Ven <arjan@infradead.org>
Date: Sun, 23 Nov 2008 16:49:58 -0800
Subject: tracing: add "power-tracer": C/P state tracer to help power
 optimization

Impact: new "power-tracer" ftrace plugin

This patch adds a C/P-state ftrace plugin that will generate
detailed statistics about the C/P-states that are being used,
so that we can look at detailed decisions that the C/P-state
code is making, rather than the too high level "average"
that we have today.

An example way of using this is:

 mount -t debugfs none /sys/kernel/debug
 echo cstate > /sys/kernel/debug/tracing/current_tracer
 echo 1 > /sys/kernel/debug/tracing/tracing_enabled
 sleep 1
 echo 0 > /sys/kernel/debug/tracing/tracing_enabled
 cat /sys/kernel/debug/tracing/trace | perl scripts/trace/cstate.pl > out.svg

Signed-off-by: Arjan van de Ven <arjan@linux.intel.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c |   4 +
 arch/x86/kernel/process.c                  |  16 +++
 include/linux/ftrace.h                     |  29 +++++
 kernel/trace/Kconfig                       |  11 ++
 kernel/trace/Makefile                      |   1 +
 kernel/trace/trace.h                       |   7 ++
 kernel/trace/trace_power.c                 | 179 +++++++++++++++++++++++++++++
 scripts/trace/power.pl                     | 108 +++++++++++++++++
 8 files changed, 355 insertions(+)
 create mode 100644 kernel/trace/trace_power.c
 create mode 100644 scripts/trace/power.pl

(limited to 'include')

diff --git a/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c b/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c
index 8e48c5d4467d..88ea02dcb622 100644
--- a/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c
+++ b/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c
@@ -33,6 +33,7 @@
 #include <linux/cpufreq.h>
 #include <linux/compiler.h>
 #include <linux/dmi.h>
+#include <linux/ftrace.h>
 
 #include <linux/acpi.h>
 #include <acpi/processor.h>
@@ -391,6 +392,7 @@ static int acpi_cpufreq_target(struct cpufreq_policy *policy,
 	unsigned int next_perf_state = 0; /* Index into perf table */
 	unsigned int i;
 	int result = 0;
+	struct power_trace it;
 
 	dprintk("acpi_cpufreq_target %d (%d)\n", target_freq, policy->cpu);
 
@@ -427,6 +429,8 @@ static int acpi_cpufreq_target(struct cpufreq_policy *policy,
 		}
 	}
 
+	trace_power_mark(&it, POWER_PSTATE, next_perf_state);
+
 	switch (data->cpu_feature) {
 	case SYSTEM_INTEL_MSR_CAPABLE:
 		cmd.type = SYSTEM_INTEL_MSR_CAPABLE;
diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c
index c622772744d8..c27af49a4ede 100644
--- a/arch/x86/kernel/process.c
+++ b/arch/x86/kernel/process.c
@@ -7,6 +7,7 @@
 #include <linux/module.h>
 #include <linux/pm.h>
 #include <linux/clockchips.h>
+#include <linux/ftrace.h>
 #include <asm/system.h>
 
 unsigned long idle_halt;
@@ -100,6 +101,9 @@ static inline int hlt_use_halt(void)
 void default_idle(void)
 {
 	if (hlt_use_halt()) {
+		struct power_trace it;
+
+		trace_power_start(&it, POWER_CSTATE, 1);
 		current_thread_info()->status &= ~TS_POLLING;
 		/*
 		 * TS_POLLING-cleared state must be visible before we
@@ -112,6 +116,7 @@ void default_idle(void)
 		else
 			local_irq_enable();
 		current_thread_info()->status |= TS_POLLING;
+		trace_power_end(&it);
 	} else {
 		local_irq_enable();
 		/* loop is done by the caller */
@@ -154,24 +159,31 @@ EXPORT_SYMBOL_GPL(cpu_idle_wait);
  */
 void mwait_idle_with_hints(unsigned long ax, unsigned long cx)
 {
+	struct power_trace it;
+
+	trace_power_start(&it, POWER_CSTATE, (ax>>4)+1);
 	if (!need_resched()) {
 		__monitor((void *)&current_thread_info()->flags, 0, 0);
 		smp_mb();
 		if (!need_resched())
 			__mwait(ax, cx);
 	}
+	trace_power_end(&it);
 }
 
 /* Default MONITOR/MWAIT with no hints, used for default C1 state */
 static void mwait_idle(void)
 {
+	struct power_trace it;
 	if (!need_resched()) {
+		trace_power_start(&it, POWER_CSTATE, 1);
 		__monitor((void *)&current_thread_info()->flags, 0, 0);
 		smp_mb();
 		if (!need_resched())
 			__sti_mwait(0, 0);
 		else
 			local_irq_enable();
+		trace_power_end(&it);
 	} else
 		local_irq_enable();
 }
@@ -183,9 +195,13 @@ static void mwait_idle(void)
  */
 static void poll_idle(void)
 {
+	struct power_trace it;
+
+	trace_power_start(&it, POWER_CSTATE, 0);
 	local_irq_enable();
 	while (!need_resched())
 		cpu_relax();
+	trace_power_end(&it);
 }
 
 /*
diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h
index 7854d87b97b2..0df288666201 100644
--- a/include/linux/ftrace.h
+++ b/include/linux/ftrace.h
@@ -311,6 +311,35 @@ ftrace_init_module(struct module *mod,
 		   unsigned long *start, unsigned long *end) { }
 #endif
 
+enum {
+	POWER_NONE = 0,
+	POWER_CSTATE = 1,
+	POWER_PSTATE = 2,
+};
+
+struct power_trace {
+#ifdef CONFIG_POWER_TRACER
+	ktime_t			stamp;
+	ktime_t			end;
+	int			type;
+	int			state;
+#endif
+};
+
+#ifdef CONFIG_POWER_TRACER
+extern void trace_power_start(struct power_trace *it, unsigned int type,
+					unsigned int state);
+extern void trace_power_mark(struct power_trace *it, unsigned int type,
+					unsigned int state);
+extern void trace_power_end(struct power_trace *it);
+#else
+static inline void trace_power_start(struct power_trace *it, unsigned int type,
+					unsigned int state) { }
+static inline void trace_power_mark(struct power_trace *it, unsigned int type,
+					unsigned int state) { }
+static inline void trace_power_end(struct power_trace *it) { }
+#endif
+
 
 /*
  * Structure that defines a return function trace.
diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig
index 620feadff67a..d151aab48ed6 100644
--- a/kernel/trace/Kconfig
+++ b/kernel/trace/Kconfig
@@ -217,6 +217,17 @@ config BRANCH_TRACER
 
 	  Say N if unsure.
 
+config POWER_TRACER
+	bool "Trace power consumption behavior"
+	depends on DEBUG_KERNEL
+	depends on X86
+	select TRACING
+	help
+	  This tracer helps developers to analyze and optimize the kernels
+	  power management decisions, specifically the C-state and P-state
+	  behavior.
+
+
 config STACK_TRACER
 	bool "Trace max stack"
 	depends on HAVE_FUNCTION_TRACER
diff --git a/kernel/trace/Makefile b/kernel/trace/Makefile
index cef4bcb4e822..acaa06553eca 100644
--- a/kernel/trace/Makefile
+++ b/kernel/trace/Makefile
@@ -32,5 +32,6 @@ obj-$(CONFIG_BOOT_TRACER) += trace_boot.o
 obj-$(CONFIG_FUNCTION_RET_TRACER) += trace_functions_return.o
 obj-$(CONFIG_TRACE_BRANCH_PROFILING) += trace_branch.o
 obj-$(CONFIG_BTS_TRACER) += trace_bts.o
+obj-$(CONFIG_POWER_TRACER) += trace_power.o
 
 libftrace-y := ftrace.o
diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h
index 3abd645e8af2..4c453778a6ab 100644
--- a/kernel/trace/trace.h
+++ b/kernel/trace/trace.h
@@ -28,6 +28,7 @@ enum trace_type {
 	TRACE_FN_RET,
 	TRACE_USER_STACK,
 	TRACE_BTS,
+	TRACE_POWER,
 
 	__TRACE_LAST_TYPE
 };
@@ -160,6 +161,11 @@ struct bts_entry {
 	unsigned long		to;
 };
 
+struct trace_power {
+	struct trace_entry	ent;
+	struct power_trace	state_data;
+};
+
 /*
  * trace_flag_type is an enumeration that holds different
  * states when a trace occurs. These are:
@@ -266,6 +272,7 @@ extern void __ftrace_bad_type(void);
 		IF_ASSIGN(var, ent, struct trace_branch, TRACE_BRANCH); \
 		IF_ASSIGN(var, ent, struct ftrace_ret_entry, TRACE_FN_RET);\
 		IF_ASSIGN(var, ent, struct bts_entry, TRACE_BTS);\
+ 		IF_ASSIGN(var, ent, struct trace_power, TRACE_POWER); \
 		__ftrace_bad_type();					\
 	} while (0)
 
diff --git a/kernel/trace/trace_power.c b/kernel/trace/trace_power.c
new file mode 100644
index 000000000000..a7172a352f62
--- /dev/null
+++ b/kernel/trace/trace_power.c
@@ -0,0 +1,179 @@
+/*
+ * ring buffer based C-state tracer
+ *
+ * Arjan van de Ven <arjan@linux.intel.com>
+ * Copyright (C) 2008 Intel Corporation
+ *
+ * Much is borrowed from trace_boot.c which is
+ * Copyright (C) 2008 Frederic Weisbecker <fweisbec@gmail.com>
+ *
+ */
+
+#include <linux/init.h>
+#include <linux/debugfs.h>
+#include <linux/ftrace.h>
+#include <linux/kallsyms.h>
+#include <linux/module.h>
+
+#include "trace.h"
+
+static struct trace_array *power_trace;
+static int __read_mostly trace_power_enabled;
+
+
+static void start_power_trace(struct trace_array *tr)
+{
+	trace_power_enabled = 1;
+}
+
+static void stop_power_trace(struct trace_array *tr)
+{
+	trace_power_enabled = 0;
+}
+
+
+static int power_trace_init(struct trace_array *tr)
+{
+	int cpu;
+	power_trace = tr;
+
+	trace_power_enabled = 1;
+
+	for_each_cpu_mask(cpu, cpu_possible_map)
+		tracing_reset(tr, cpu);
+	return 0;
+}
+
+static enum print_line_t power_print_line(struct trace_iterator *iter)
+{
+	int ret = 0;
+	struct trace_entry *entry = iter->ent;
+	struct trace_power *field ;
+	struct power_trace *it;
+	struct trace_seq *s = &iter->seq;
+	struct timespec stamp;
+	struct timespec duration;
+
+	trace_assign_type(field, entry);
+	it = &field->state_data;
+	stamp = ktime_to_timespec(it->stamp);
+	duration = ktime_to_timespec(ktime_sub(it->end, it->stamp));
+
+	if (entry->type == TRACE_POWER) {
+		if (it->type == POWER_CSTATE)
+			ret = trace_seq_printf(s, "[%5ld.%09ld] CSTATE: Going to C%i on cpu %i for %ld.%09ld\n",
+					  stamp.tv_sec,
+					  stamp.tv_nsec,
+					  it->state, iter->cpu,
+					  duration.tv_sec,
+					  duration.tv_nsec);
+		if (it->type == POWER_PSTATE)
+			ret = trace_seq_printf(s, "[%5ld.%09ld] PSTATE: Going to P%i on cpu %i\n",
+					  stamp.tv_sec,
+					  stamp.tv_nsec,
+					  it->state, iter->cpu);
+		if (!ret)
+			return TRACE_TYPE_PARTIAL_LINE;
+		return TRACE_TYPE_HANDLED;
+	}
+	return TRACE_TYPE_UNHANDLED;
+}
+
+static struct tracer power_tracer __read_mostly =
+{
+	.name		= "power",
+	.init		= power_trace_init,
+	.start		= start_power_trace,
+	.stop		= stop_power_trace,
+	.reset		= stop_power_trace,
+	.print_line	= power_print_line,
+};
+
+static int init_power_trace(void)
+{
+	return register_tracer(&power_tracer);
+}
+device_initcall(init_power_trace);
+
+void trace_power_start(struct power_trace *it, unsigned int type,
+			 unsigned int level)
+{
+	if (!trace_power_enabled)
+		return;
+
+	memset(it, 0, sizeof(struct power_trace));
+	it->state = level;
+	it->type = type;
+	it->stamp = ktime_get();
+}
+EXPORT_SYMBOL_GPL(trace_power_start);
+
+
+void trace_power_end(struct power_trace *it)
+{
+	struct ring_buffer_event *event;
+	struct trace_power *entry;
+	struct trace_array_cpu *data;
+	unsigned long irq_flags;
+	struct trace_array *tr = power_trace;
+
+	if (!trace_power_enabled)
+		return;
+
+	preempt_disable();
+	it->end = ktime_get();
+	data = tr->data[smp_processor_id()];
+
+	event = ring_buffer_lock_reserve(tr->buffer, sizeof(*entry),
+					 &irq_flags);
+	if (!event)
+		goto out;
+	entry	= ring_buffer_event_data(event);
+	tracing_generic_entry_update(&entry->ent, 0, 0);
+	entry->ent.type = TRACE_POWER;
+	entry->state_data = *it;
+	ring_buffer_unlock_commit(tr->buffer, event, irq_flags);
+
+	trace_wake_up();
+
+ out:
+	preempt_enable();
+}
+EXPORT_SYMBOL_GPL(trace_power_end);
+
+void trace_power_mark(struct power_trace *it, unsigned int type,
+			 unsigned int level)
+{
+	struct ring_buffer_event *event;
+	struct trace_power *entry;
+	struct trace_array_cpu *data;
+	unsigned long irq_flags;
+	struct trace_array *tr = power_trace;
+
+	if (!trace_power_enabled)
+		return;
+
+	memset(it, 0, sizeof(struct power_trace));
+	it->state = level;
+	it->type = type;
+	it->stamp = ktime_get();
+	preempt_disable();
+	it->end = it->stamp;
+	data = tr->data[smp_processor_id()];
+
+	event = ring_buffer_lock_reserve(tr->buffer, sizeof(*entry),
+					 &irq_flags);
+	if (!event)
+		goto out;
+	entry	= ring_buffer_event_data(event);
+	tracing_generic_entry_update(&entry->ent, 0, 0);
+	entry->ent.type = TRACE_POWER;
+	entry->state_data = *it;
+	ring_buffer_unlock_commit(tr->buffer, event, irq_flags);
+
+	trace_wake_up();
+
+ out:
+	preempt_enable();
+}
+EXPORT_SYMBOL_GPL(trace_power_mark);
diff --git a/scripts/trace/power.pl b/scripts/trace/power.pl
new file mode 100644
index 000000000000..4f729b3501e0
--- /dev/null
+++ b/scripts/trace/power.pl
@@ -0,0 +1,108 @@
+#!/usr/bin/perl
+
+# Copyright 2008, Intel Corporation
+#
+# This file is part of the Linux kernel
+#
+# This program file is free software; you can redistribute it and/or modify it
+# under the terms of the GNU General Public License as published by the
+# Free Software Foundation; version 2 of the License.
+#
+# This program is distributed in the hope that it will be useful, but WITHOUT
+# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+# FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+# for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program in a file named COPYING; if not, write to the
+# Free Software Foundation, Inc.,
+# 51 Franklin Street, Fifth Floor,
+# Boston, MA 02110-1301 USA
+#
+# Authors:
+# 	Arjan van de Ven <arjan@linux.intel.com>
+
+
+#
+# This script turns a cstate ftrace output into a SVG graphic that shows
+# historic C-state information
+#
+#
+# 	cat /sys/kernel/debug/tracing/trace | perl power.pl > out.svg
+#
+
+my @styles;
+my $base = 0;
+
+my @pstate_last;
+my @pstate_level;
+
+$styles[0] = "fill:rgb(0,0,255);fill-opacity:0.5;stroke-width:1;stroke:rgb(0,0,0)";
+$styles[1] = "fill:rgb(0,255,0);fill-opacity:0.5;stroke-width:1;stroke:rgb(0,0,0)";
+$styles[2] = "fill:rgb(255,0,20);fill-opacity:0.5;stroke-width:1;stroke:rgb(0,0,0)";
+$styles[3] = "fill:rgb(255,255,20);fill-opacity:0.5;stroke-width:1;stroke:rgb(0,0,0)";
+$styles[4] = "fill:rgb(255,0,255);fill-opacity:0.5;stroke-width:1;stroke:rgb(0,0,0)";
+$styles[5] = "fill:rgb(0,255,255);fill-opacity:0.5;stroke-width:1;stroke:rgb(0,0,0)";
+$styles[6] = "fill:rgb(0,128,255);fill-opacity:0.5;stroke-width:1;stroke:rgb(0,0,0)";
+$styles[7] = "fill:rgb(0,255,128);fill-opacity:0.5;stroke-width:1;stroke:rgb(0,0,0)";
+$styles[8] = "fill:rgb(0,25,20);fill-opacity:0.5;stroke-width:1;stroke:rgb(0,0,0)";
+
+
+print "<?xml version=\"1.0\" standalone=\"no\"?> \n";
+print "<svg width=\"10000\" height=\"100%\" version=\"1.1\" xmlns=\"http://www.w3.org/2000/svg\">\n";
+
+my $scale = 30000.0;
+while (<>) {
+	my $line = $_;
+	if ($line =~ /([0-9\.]+)\] CSTATE: Going to C([0-9]) on cpu ([0-9]+) for ([0-9\.]+)/) {
+		if ($base == 0) {
+			$base = $1;
+		}
+		my $time = $1 - $base;
+		$time = $time * $scale;
+		my $C = $2;
+		my $cpu = $3;
+		my $y = 400 * $cpu;
+		my $duration = $4 * $scale;
+		my $msec = int($4 * 100000)/100.0;
+		my $height = $C * 20;
+		$style = $styles[$C];
+
+		$y = $y + 140 - $height;
+
+		$x2 = $time + 4;
+		$y2 = $y + 4;
+
+
+		print "<rect x=\"$time\" width=\"$duration\" y=\"$y\" height=\"$height\" style=\"$style\"/>\n";
+		print "<text transform=\"translate($x2,$y2) rotate(90)\">C$C $msec</text>\n";
+	}
+	if ($line =~ /([0-9\.]+)\] PSTATE: Going to P([0-9]) on cpu ([0-9]+)/) {
+		my $time = $1 - $base;
+		my $state = $2;
+		my $cpu = $3;
+
+		if (defined($pstate_last[$cpu])) {
+			my $from = $pstate_last[$cpu];
+			my $oldstate = $pstate_state[$cpu];
+			my $duration = ($time-$from) * $scale;
+
+			$from = $from * $scale;
+			my $to = $from + $duration;
+			my $height = 140 - ($oldstate * (140/8));
+
+			my $y = 400 * $cpu + 200 + $height;
+			my $y2 = $y+4;
+			my $style = $styles[8];
+
+			print "<rect x=\"$from\" y=\"$y\" width=\"$duration\" height=\"5\" style=\"$style\"/>\n";
+			print "<text transform=\"translate($from,$y2)\">P$oldstate (cpu $cpu)</text>\n";
+		};
+
+		$pstate_last[$cpu] = $time;
+		$pstate_state[$cpu] = $state;
+	}
+}
+
+
+print "</svg>\n";
-- 
cgit v1.2.3


From 5f3ea37c7716db4e894a480e0c18b24399595b6b Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@redhat.com>
Date: Thu, 30 Oct 2008 08:34:33 +0100
Subject: blktrace: port to tracepoints

This was a forward port of work done by Mathieu Desnoyers, I changed it to
encode the 'what' parameter on the tracepoint name, so that one can register
interest in specific events and not on classes of events to then check the
'what' parameter.

Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Signed-off-by: Jens Axboe <jens.axboe@oracle.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 block/Kconfig                |   1 +
 block/blk-core.c             |  33 ++---
 block/blktrace.c             | 332 ++++++++++++++++++++++++++++++++++++++++++-
 block/elevator.c             |   7 +-
 drivers/md/dm.c              |   6 +-
 fs/bio.c                     |   3 +-
 include/linux/blktrace_api.h | 172 +---------------------
 include/trace/block.h        |  60 ++++++++
 mm/bounce.c                  |   3 +-
 9 files changed, 418 insertions(+), 199 deletions(-)
 create mode 100644 include/trace/block.h

(limited to 'include')

diff --git a/block/Kconfig b/block/Kconfig
index 1ab7c15c8d7a..290b219fad9c 100644
--- a/block/Kconfig
+++ b/block/Kconfig
@@ -47,6 +47,7 @@ config BLK_DEV_IO_TRACE
 	depends on SYSFS
 	select RELAY
 	select DEBUG_FS
+	select TRACEPOINTS
 	help
 	  Say Y here if you want to be able to trace the block layer actions
 	  on a given queue. Tracing allows you to see any traffic happening
diff --git a/block/blk-core.c b/block/blk-core.c
index 10e8a64a5a5b..04267d66a2b9 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -28,6 +28,7 @@
 #include <linux/task_io_accounting_ops.h>
 #include <linux/blktrace_api.h>
 #include <linux/fault-inject.h>
+#include <trace/block.h>
 
 #include "blk.h"
 
@@ -205,7 +206,7 @@ void blk_plug_device(struct request_queue *q)
 
 	if (!queue_flag_test_and_set(QUEUE_FLAG_PLUGGED, q)) {
 		mod_timer(&q->unplug_timer, jiffies + q->unplug_delay);
-		blk_add_trace_generic(q, NULL, 0, BLK_TA_PLUG);
+		trace_block_plug(q);
 	}
 }
 EXPORT_SYMBOL(blk_plug_device);
@@ -292,9 +293,7 @@ void blk_unplug_work(struct work_struct *work)
 	struct request_queue *q =
 		container_of(work, struct request_queue, unplug_work);
 
-	blk_add_trace_pdu_int(q, BLK_TA_UNPLUG_IO, NULL,
-				q->rq.count[READ] + q->rq.count[WRITE]);
-
+	trace_block_unplug_io(q);
 	q->unplug_fn(q);
 }
 
@@ -302,9 +301,7 @@ void blk_unplug_timeout(unsigned long data)
 {
 	struct request_queue *q = (struct request_queue *)data;
 
-	blk_add_trace_pdu_int(q, BLK_TA_UNPLUG_TIMER, NULL,
-				q->rq.count[READ] + q->rq.count[WRITE]);
-
+	trace_block_unplug_timer(q);
 	kblockd_schedule_work(q, &q->unplug_work);
 }
 
@@ -314,9 +311,7 @@ void blk_unplug(struct request_queue *q)
 	 * devices don't necessarily have an ->unplug_fn defined
 	 */
 	if (q->unplug_fn) {
-		blk_add_trace_pdu_int(q, BLK_TA_UNPLUG_IO, NULL,
-					q->rq.count[READ] + q->rq.count[WRITE]);
-
+		trace_block_unplug_io(q);
 		q->unplug_fn(q);
 	}
 }
@@ -822,7 +817,7 @@ rq_starved:
 	if (ioc_batching(q, ioc))
 		ioc->nr_batch_requests--;
 
-	blk_add_trace_generic(q, bio, rw, BLK_TA_GETRQ);
+	trace_block_getrq(q, bio, rw);
 out:
 	return rq;
 }
@@ -848,7 +843,7 @@ static struct request *get_request_wait(struct request_queue *q, int rw_flags,
 		prepare_to_wait_exclusive(&rl->wait[rw], &wait,
 				TASK_UNINTERRUPTIBLE);
 
-		blk_add_trace_generic(q, bio, rw, BLK_TA_SLEEPRQ);
+		trace_block_sleeprq(q, bio, rw);
 
 		__generic_unplug_device(q);
 		spin_unlock_irq(q->queue_lock);
@@ -928,7 +923,7 @@ void blk_requeue_request(struct request_queue *q, struct request *rq)
 {
 	blk_delete_timer(rq);
 	blk_clear_rq_complete(rq);
-	blk_add_trace_rq(q, rq, BLK_TA_REQUEUE);
+	trace_block_rq_requeue(q, rq);
 
 	if (blk_rq_tagged(rq))
 		blk_queue_end_tag(q, rq);
@@ -1167,7 +1162,7 @@ static int __make_request(struct request_queue *q, struct bio *bio)
 		if (!ll_back_merge_fn(q, req, bio))
 			break;
 
-		blk_add_trace_bio(q, bio, BLK_TA_BACKMERGE);
+		trace_block_bio_backmerge(q, bio);
 
 		req->biotail->bi_next = bio;
 		req->biotail = bio;
@@ -1186,7 +1181,7 @@ static int __make_request(struct request_queue *q, struct bio *bio)
 		if (!ll_front_merge_fn(q, req, bio))
 			break;
 
-		blk_add_trace_bio(q, bio, BLK_TA_FRONTMERGE);
+		trace_block_bio_frontmerge(q, bio);
 
 		bio->bi_next = req->bio;
 		req->bio = bio;
@@ -1269,7 +1264,7 @@ static inline void blk_partition_remap(struct bio *bio)
 		bio->bi_sector += p->start_sect;
 		bio->bi_bdev = bdev->bd_contains;
 
-		blk_add_trace_remap(bdev_get_queue(bio->bi_bdev), bio,
+		trace_block_remap(bdev_get_queue(bio->bi_bdev), bio,
 				    bdev->bd_dev, bio->bi_sector,
 				    bio->bi_sector - p->start_sect);
 	}
@@ -1441,10 +1436,10 @@ end_io:
 			goto end_io;
 
 		if (old_sector != -1)
-			blk_add_trace_remap(q, bio, old_dev, bio->bi_sector,
+			trace_block_remap(q, bio, old_dev, bio->bi_sector,
 					    old_sector);
 
-		blk_add_trace_bio(q, bio, BLK_TA_QUEUE);
+		trace_block_bio_queue(q, bio);
 
 		old_sector = bio->bi_sector;
 		old_dev = bio->bi_bdev->bd_dev;
@@ -1656,7 +1651,7 @@ static int __end_that_request_first(struct request *req, int error,
 	int total_bytes, bio_nbytes, next_idx = 0;
 	struct bio *bio;
 
-	blk_add_trace_rq(req->q, req, BLK_TA_COMPLETE);
+	trace_block_rq_complete(req->q, req);
 
 	/*
 	 * for a REQ_TYPE_BLOCK_PC request, we want to carry any eventual
diff --git a/block/blktrace.c b/block/blktrace.c
index 85049a7e7a17..b0a2cae886db 100644
--- a/block/blktrace.c
+++ b/block/blktrace.c
@@ -23,10 +23,18 @@
 #include <linux/mutex.h>
 #include <linux/debugfs.h>
 #include <linux/time.h>
+#include <trace/block.h>
 #include <asm/uaccess.h>
 
 static unsigned int blktrace_seq __read_mostly = 1;
 
+/* Global reference count of probes */
+static DEFINE_MUTEX(blk_probe_mutex);
+static atomic_t blk_probes_ref = ATOMIC_INIT(0);
+
+static int blk_register_tracepoints(void);
+static void blk_unregister_tracepoints(void);
+
 /*
  * Send out a notify message.
  */
@@ -119,7 +127,7 @@ static u32 ddir_act[2] __read_mostly = { BLK_TC_ACT(BLK_TC_READ), BLK_TC_ACT(BLK
  * The worker for the various blk_add_trace*() types. Fills out a
  * blk_io_trace structure and places it in a per-cpu subbuffer.
  */
-void __blk_add_trace(struct blk_trace *bt, sector_t sector, int bytes,
+static void __blk_add_trace(struct blk_trace *bt, sector_t sector, int bytes,
 		     int rw, u32 what, int error, int pdu_len, void *pdu_data)
 {
 	struct task_struct *tsk = current;
@@ -177,8 +185,6 @@ void __blk_add_trace(struct blk_trace *bt, sector_t sector, int bytes,
 	local_irq_restore(flags);
 }
 
-EXPORT_SYMBOL_GPL(__blk_add_trace);
-
 static struct dentry *blk_tree_root;
 static DEFINE_MUTEX(blk_tree_mutex);
 static unsigned int root_users;
@@ -237,6 +243,10 @@ static void blk_trace_cleanup(struct blk_trace *bt)
 	free_percpu(bt->sequence);
 	free_percpu(bt->msg_data);
 	kfree(bt);
+	mutex_lock(&blk_probe_mutex);
+	if (atomic_dec_and_test(&blk_probes_ref))
+		blk_unregister_tracepoints();
+	mutex_unlock(&blk_probe_mutex);
 }
 
 int blk_trace_remove(struct request_queue *q)
@@ -428,6 +438,14 @@ int do_blk_trace_setup(struct request_queue *q, char *name, dev_t dev,
 	bt->pid = buts->pid;
 	bt->trace_state = Blktrace_setup;
 
+	mutex_lock(&blk_probe_mutex);
+	if (atomic_add_return(1, &blk_probes_ref) == 1) {
+		ret = blk_register_tracepoints();
+		if (ret)
+			goto probe_err;
+	}
+	mutex_unlock(&blk_probe_mutex);
+
 	ret = -EBUSY;
 	old_bt = xchg(&q->blk_trace, bt);
 	if (old_bt) {
@@ -436,6 +454,9 @@ int do_blk_trace_setup(struct request_queue *q, char *name, dev_t dev,
 	}
 
 	return 0;
+probe_err:
+	atomic_dec(&blk_probes_ref);
+	mutex_unlock(&blk_probe_mutex);
 err:
 	if (dir)
 		blk_remove_tree(dir);
@@ -562,3 +583,308 @@ void blk_trace_shutdown(struct request_queue *q)
 		blk_trace_remove(q);
 	}
 }
+
+/*
+ * blktrace probes
+ */
+
+/**
+ * blk_add_trace_rq - Add a trace for a request oriented action
+ * @q:		queue the io is for
+ * @rq:		the source request
+ * @what:	the action
+ *
+ * Description:
+ *     Records an action against a request. Will log the bio offset + size.
+ *
+ **/
+static void blk_add_trace_rq(struct request_queue *q, struct request *rq,
+				    u32 what)
+{
+	struct blk_trace *bt = q->blk_trace;
+	int rw = rq->cmd_flags & 0x03;
+
+	if (likely(!bt))
+		return;
+
+	if (blk_discard_rq(rq))
+		rw |= (1 << BIO_RW_DISCARD);
+
+	if (blk_pc_request(rq)) {
+		what |= BLK_TC_ACT(BLK_TC_PC);
+		__blk_add_trace(bt, 0, rq->data_len, rw, what, rq->errors,
+				sizeof(rq->cmd), rq->cmd);
+	} else  {
+		what |= BLK_TC_ACT(BLK_TC_FS);
+		__blk_add_trace(bt, rq->hard_sector, rq->hard_nr_sectors << 9,
+				rw, what, rq->errors, 0, NULL);
+	}
+}
+
+static void blk_add_trace_rq_abort(struct request_queue *q, struct request *rq)
+{
+	blk_add_trace_rq(q, rq, BLK_TA_ABORT);
+}
+
+static void blk_add_trace_rq_insert(struct request_queue *q, struct request *rq)
+{
+	blk_add_trace_rq(q, rq, BLK_TA_INSERT);
+}
+
+static void blk_add_trace_rq_issue(struct request_queue *q, struct request *rq)
+{
+	blk_add_trace_rq(q, rq, BLK_TA_ISSUE);
+}
+
+static void blk_add_trace_rq_requeue(struct request_queue *q, struct request *rq)
+{
+	blk_add_trace_rq(q, rq, BLK_TA_REQUEUE);
+}
+
+static void blk_add_trace_rq_complete(struct request_queue *q, struct request *rq)
+{
+	blk_add_trace_rq(q, rq, BLK_TA_COMPLETE);
+}
+
+/**
+ * blk_add_trace_bio - Add a trace for a bio oriented action
+ * @q:		queue the io is for
+ * @bio:	the source bio
+ * @what:	the action
+ *
+ * Description:
+ *     Records an action against a bio. Will log the bio offset + size.
+ *
+ **/
+static void blk_add_trace_bio(struct request_queue *q, struct bio *bio,
+				     u32 what)
+{
+	struct blk_trace *bt = q->blk_trace;
+
+	if (likely(!bt))
+		return;
+
+	__blk_add_trace(bt, bio->bi_sector, bio->bi_size, bio->bi_rw, what,
+			!bio_flagged(bio, BIO_UPTODATE), 0, NULL);
+}
+
+static void blk_add_trace_bio_bounce(struct request_queue *q, struct bio *bio)
+{
+	blk_add_trace_bio(q, bio, BLK_TA_BOUNCE);
+}
+
+static void blk_add_trace_bio_complete(struct request_queue *q, struct bio *bio)
+{
+	blk_add_trace_bio(q, bio, BLK_TA_COMPLETE);
+}
+
+static void blk_add_trace_bio_backmerge(struct request_queue *q, struct bio *bio)
+{
+	blk_add_trace_bio(q, bio, BLK_TA_BACKMERGE);
+}
+
+static void blk_add_trace_bio_frontmerge(struct request_queue *q, struct bio *bio)
+{
+	blk_add_trace_bio(q, bio, BLK_TA_FRONTMERGE);
+}
+
+static void blk_add_trace_bio_queue(struct request_queue *q, struct bio *bio)
+{
+	blk_add_trace_bio(q, bio, BLK_TA_QUEUE);
+}
+
+static void blk_add_trace_getrq(struct request_queue *q, struct bio *bio, int rw)
+{
+	if (bio)
+		blk_add_trace_bio(q, bio, BLK_TA_GETRQ);
+	else {
+		struct blk_trace *bt = q->blk_trace;
+
+		if (bt)
+			__blk_add_trace(bt, 0, 0, rw, BLK_TA_GETRQ, 0, 0, NULL);
+	}
+}
+
+
+static void blk_add_trace_sleeprq(struct request_queue *q, struct bio *bio, int rw)
+{
+	if (bio)
+		blk_add_trace_bio(q, bio, BLK_TA_SLEEPRQ);
+	else {
+		struct blk_trace *bt = q->blk_trace;
+
+		if (bt)
+			__blk_add_trace(bt, 0, 0, rw, BLK_TA_SLEEPRQ, 0, 0, NULL);
+	}
+}
+
+static void blk_add_trace_plug(struct request_queue *q)
+{
+	struct blk_trace *bt = q->blk_trace;
+
+	if (bt)
+		__blk_add_trace(bt, 0, 0, 0, BLK_TA_PLUG, 0, 0, NULL);
+}
+
+static void blk_add_trace_unplug_io(struct request_queue *q)
+{
+	struct blk_trace *bt = q->blk_trace;
+
+	if (bt) {
+		unsigned int pdu = q->rq.count[READ] + q->rq.count[WRITE];
+		__be64 rpdu = cpu_to_be64(pdu);
+
+		__blk_add_trace(bt, 0, 0, 0, BLK_TA_UNPLUG_IO, 0,
+				sizeof(rpdu), &rpdu);
+	}
+}
+
+static void blk_add_trace_unplug_timer(struct request_queue *q)
+{
+	struct blk_trace *bt = q->blk_trace;
+
+	if (bt) {
+		unsigned int pdu = q->rq.count[READ] + q->rq.count[WRITE];
+		__be64 rpdu = cpu_to_be64(pdu);
+
+		__blk_add_trace(bt, 0, 0, 0, BLK_TA_UNPLUG_TIMER, 0,
+				sizeof(rpdu), &rpdu);
+	}
+}
+
+static void blk_add_trace_split(struct request_queue *q, struct bio *bio,
+				unsigned int pdu)
+{
+	struct blk_trace *bt = q->blk_trace;
+
+	if (bt) {
+		__be64 rpdu = cpu_to_be64(pdu);
+
+		__blk_add_trace(bt, bio->bi_sector, bio->bi_size, bio->bi_rw,
+				BLK_TA_SPLIT, !bio_flagged(bio, BIO_UPTODATE),
+				sizeof(rpdu), &rpdu);
+	}
+}
+
+/**
+ * blk_add_trace_remap - Add a trace for a remap operation
+ * @q:		queue the io is for
+ * @bio:	the source bio
+ * @dev:	target device
+ * @from:	source sector
+ * @to:		target sector
+ *
+ * Description:
+ *     Device mapper or raid target sometimes need to split a bio because
+ *     it spans a stripe (or similar). Add a trace for that action.
+ *
+ **/
+static void blk_add_trace_remap(struct request_queue *q, struct bio *bio,
+				       dev_t dev, sector_t from, sector_t to)
+{
+	struct blk_trace *bt = q->blk_trace;
+	struct blk_io_trace_remap r;
+
+	if (likely(!bt))
+		return;
+
+	r.device = cpu_to_be32(dev);
+	r.device_from = cpu_to_be32(bio->bi_bdev->bd_dev);
+	r.sector = cpu_to_be64(to);
+
+	__blk_add_trace(bt, from, bio->bi_size, bio->bi_rw, BLK_TA_REMAP,
+			!bio_flagged(bio, BIO_UPTODATE), sizeof(r), &r);
+}
+
+/**
+ * blk_add_driver_data - Add binary message with driver-specific data
+ * @q:		queue the io is for
+ * @rq:		io request
+ * @data:	driver-specific data
+ * @len:	length of driver-specific data
+ *
+ * Description:
+ *     Some drivers might want to write driver-specific data per request.
+ *
+ **/
+void blk_add_driver_data(struct request_queue *q,
+			 struct request *rq,
+			 void *data, size_t len)
+{
+	struct blk_trace *bt = q->blk_trace;
+
+	if (likely(!bt))
+		return;
+
+	if (blk_pc_request(rq))
+		__blk_add_trace(bt, 0, rq->data_len, 0, BLK_TA_DRV_DATA,
+				rq->errors, len, data);
+	else
+		__blk_add_trace(bt, rq->hard_sector, rq->hard_nr_sectors << 9,
+				0, BLK_TA_DRV_DATA, rq->errors, len, data);
+}
+EXPORT_SYMBOL_GPL(blk_add_driver_data);
+
+static int blk_register_tracepoints(void)
+{
+	int ret;
+
+	ret = register_trace_block_rq_abort(blk_add_trace_rq_abort);
+	WARN_ON(ret);
+	ret = register_trace_block_rq_insert(blk_add_trace_rq_insert);
+	WARN_ON(ret);
+	ret = register_trace_block_rq_issue(blk_add_trace_rq_issue);
+	WARN_ON(ret);
+	ret = register_trace_block_rq_requeue(blk_add_trace_rq_requeue);
+	WARN_ON(ret);
+	ret = register_trace_block_rq_complete(blk_add_trace_rq_complete);
+	WARN_ON(ret);
+	ret = register_trace_block_bio_bounce(blk_add_trace_bio_bounce);
+	WARN_ON(ret);
+	ret = register_trace_block_bio_complete(blk_add_trace_bio_complete);
+	WARN_ON(ret);
+	ret = register_trace_block_bio_backmerge(blk_add_trace_bio_backmerge);
+	WARN_ON(ret);
+	ret = register_trace_block_bio_frontmerge(blk_add_trace_bio_frontmerge);
+	WARN_ON(ret);
+	ret = register_trace_block_bio_queue(blk_add_trace_bio_queue);
+	WARN_ON(ret);
+	ret = register_trace_block_getrq(blk_add_trace_getrq);
+	WARN_ON(ret);
+	ret = register_trace_block_sleeprq(blk_add_trace_sleeprq);
+	WARN_ON(ret);
+	ret = register_trace_block_plug(blk_add_trace_plug);
+	WARN_ON(ret);
+	ret = register_trace_block_unplug_timer(blk_add_trace_unplug_timer);
+	WARN_ON(ret);
+	ret = register_trace_block_unplug_io(blk_add_trace_unplug_io);
+	WARN_ON(ret);
+	ret = register_trace_block_split(blk_add_trace_split);
+	WARN_ON(ret);
+	ret = register_trace_block_remap(blk_add_trace_remap);
+	WARN_ON(ret);
+	return 0;
+}
+
+static void blk_unregister_tracepoints(void)
+{
+	unregister_trace_block_remap(blk_add_trace_remap);
+	unregister_trace_block_split(blk_add_trace_split);
+	unregister_trace_block_unplug_io(blk_add_trace_unplug_io);
+	unregister_trace_block_unplug_timer(blk_add_trace_unplug_timer);
+	unregister_trace_block_plug(blk_add_trace_plug);
+	unregister_trace_block_sleeprq(blk_add_trace_sleeprq);
+	unregister_trace_block_getrq(blk_add_trace_getrq);
+	unregister_trace_block_bio_queue(blk_add_trace_bio_queue);
+	unregister_trace_block_bio_frontmerge(blk_add_trace_bio_frontmerge);
+	unregister_trace_block_bio_backmerge(blk_add_trace_bio_backmerge);
+	unregister_trace_block_bio_complete(blk_add_trace_bio_complete);
+	unregister_trace_block_bio_bounce(blk_add_trace_bio_bounce);
+	unregister_trace_block_rq_complete(blk_add_trace_rq_complete);
+	unregister_trace_block_rq_requeue(blk_add_trace_rq_requeue);
+	unregister_trace_block_rq_issue(blk_add_trace_rq_issue);
+	unregister_trace_block_rq_insert(blk_add_trace_rq_insert);
+	unregister_trace_block_rq_abort(blk_add_trace_rq_abort);
+
+	tracepoint_synchronize_unregister();
+}
diff --git a/block/elevator.c b/block/elevator.c
index 9ac82dde99dd..530fcfe2ef07 100644
--- a/block/elevator.c
+++ b/block/elevator.c
@@ -33,6 +33,7 @@
 #include <linux/compiler.h>
 #include <linux/delay.h>
 #include <linux/blktrace_api.h>
+#include <trace/block.h>
 #include <linux/hash.h>
 #include <linux/uaccess.h>
 
@@ -586,7 +587,7 @@ void elv_insert(struct request_queue *q, struct request *rq, int where)
 	unsigned ordseq;
 	int unplug_it = 1;
 
-	blk_add_trace_rq(q, rq, BLK_TA_INSERT);
+	trace_block_rq_insert(q, rq);
 
 	rq->q = q;
 
@@ -772,7 +773,7 @@ struct request *elv_next_request(struct request_queue *q)
 			 * not be passed by new incoming requests
 			 */
 			rq->cmd_flags |= REQ_STARTED;
-			blk_add_trace_rq(q, rq, BLK_TA_ISSUE);
+			trace_block_rq_issue(q, rq);
 		}
 
 		if (!q->boundary_rq || q->boundary_rq == rq) {
@@ -921,7 +922,7 @@ void elv_abort_queue(struct request_queue *q)
 	while (!list_empty(&q->queue_head)) {
 		rq = list_entry_rq(q->queue_head.next);
 		rq->cmd_flags |= REQ_QUIET;
-		blk_add_trace_rq(q, rq, BLK_TA_ABORT);
+		trace_block_rq_abort(q, rq);
 		__blk_end_request(rq, -EIO, blk_rq_bytes(rq));
 	}
 }
diff --git a/drivers/md/dm.c b/drivers/md/dm.c
index c99e4728ff41..d23fda178163 100644
--- a/drivers/md/dm.c
+++ b/drivers/md/dm.c
@@ -21,6 +21,7 @@
 #include <linux/idr.h>
 #include <linux/hdreg.h>
 #include <linux/blktrace_api.h>
+#include <trace/block.h>
 
 #define DM_MSG_PREFIX "core"
 
@@ -504,8 +505,7 @@ static void dec_pending(struct dm_io *io, int error)
 		end_io_acct(io);
 
 		if (io->error != DM_ENDIO_REQUEUE) {
-			blk_add_trace_bio(io->md->queue, io->bio,
-					  BLK_TA_COMPLETE);
+			trace_block_bio_complete(io->md->queue, io->bio);
 
 			bio_endio(io->bio, io->error);
 		}
@@ -598,7 +598,7 @@ static void __map_bio(struct dm_target *ti, struct bio *clone,
 	if (r == DM_MAPIO_REMAPPED) {
 		/* the bio has been remapped so dispatch it */
 
-		blk_add_trace_remap(bdev_get_queue(clone->bi_bdev), clone,
+		trace_block_remap(bdev_get_queue(clone->bi_bdev), clone,
 				    tio->io->bio->bi_bdev->bd_dev,
 				    clone->bi_sector, sector);
 
diff --git a/fs/bio.c b/fs/bio.c
index 77a55bcceedb..060859c69092 100644
--- a/fs/bio.c
+++ b/fs/bio.c
@@ -26,6 +26,7 @@
 #include <linux/mempool.h>
 #include <linux/workqueue.h>
 #include <linux/blktrace_api.h>
+#include <trace/block.h>
 #include <scsi/sg.h>		/* for struct sg_iovec */
 
 static struct kmem_cache *bio_slab __read_mostly;
@@ -1263,7 +1264,7 @@ struct bio_pair *bio_split(struct bio *bi, int first_sectors)
 	if (!bp)
 		return bp;
 
-	blk_add_trace_pdu_int(bdev_get_queue(bi->bi_bdev), BLK_TA_SPLIT, bi,
+	trace_block_split(bdev_get_queue(bi->bi_bdev), bi,
 				bi->bi_sector + first_sectors);
 
 	BUG_ON(bi->bi_vcnt != 1);
diff --git a/include/linux/blktrace_api.h b/include/linux/blktrace_api.h
index bdf505d33e77..1dba3493d520 100644
--- a/include/linux/blktrace_api.h
+++ b/include/linux/blktrace_api.h
@@ -160,7 +160,6 @@ struct blk_trace {
 
 extern int blk_trace_ioctl(struct block_device *, unsigned, char __user *);
 extern void blk_trace_shutdown(struct request_queue *);
-extern void __blk_add_trace(struct blk_trace *, sector_t, int, int, u32, int, int, void *);
 extern int do_blk_trace_setup(struct request_queue *q,
 	char *name, dev_t dev, struct blk_user_trace_setup *buts);
 extern void __trace_note_message(struct blk_trace *, const char *fmt, ...);
@@ -186,168 +185,8 @@ extern void __trace_note_message(struct blk_trace *, const char *fmt, ...);
 	} while (0)
 #define BLK_TN_MAX_MSG		128
 
-/**
- * blk_add_trace_rq - Add a trace for a request oriented action
- * @q:		queue the io is for
- * @rq:		the source request
- * @what:	the action
- *
- * Description:
- *     Records an action against a request. Will log the bio offset + size.
- *
- **/
-static inline void blk_add_trace_rq(struct request_queue *q, struct request *rq,
-				    u32 what)
-{
-	struct blk_trace *bt = q->blk_trace;
-	int rw = rq->cmd_flags & 0x03;
-
-	if (likely(!bt))
-		return;
-
-	if (blk_discard_rq(rq))
-		rw |= (1 << BIO_RW_DISCARD);
-
-	if (blk_pc_request(rq)) {
-		what |= BLK_TC_ACT(BLK_TC_PC);
-		__blk_add_trace(bt, 0, rq->data_len, rw, what, rq->errors, sizeof(rq->cmd), rq->cmd);
-	} else  {
-		what |= BLK_TC_ACT(BLK_TC_FS);
-		__blk_add_trace(bt, rq->hard_sector, rq->hard_nr_sectors << 9, rw, what, rq->errors, 0, NULL);
-	}
-}
-
-/**
- * blk_add_trace_bio - Add a trace for a bio oriented action
- * @q:		queue the io is for
- * @bio:	the source bio
- * @what:	the action
- *
- * Description:
- *     Records an action against a bio. Will log the bio offset + size.
- *
- **/
-static inline void blk_add_trace_bio(struct request_queue *q, struct bio *bio,
-				     u32 what)
-{
-	struct blk_trace *bt = q->blk_trace;
-
-	if (likely(!bt))
-		return;
-
-	__blk_add_trace(bt, bio->bi_sector, bio->bi_size, bio->bi_rw, what, !bio_flagged(bio, BIO_UPTODATE), 0, NULL);
-}
-
-/**
- * blk_add_trace_generic - Add a trace for a generic action
- * @q:		queue the io is for
- * @bio:	the source bio
- * @rw:		the data direction
- * @what:	the action
- *
- * Description:
- *     Records a simple trace
- *
- **/
-static inline void blk_add_trace_generic(struct request_queue *q,
-					 struct bio *bio, int rw, u32 what)
-{
-	struct blk_trace *bt = q->blk_trace;
-
-	if (likely(!bt))
-		return;
-
-	if (bio)
-		blk_add_trace_bio(q, bio, what);
-	else
-		__blk_add_trace(bt, 0, 0, rw, what, 0, 0, NULL);
-}
-
-/**
- * blk_add_trace_pdu_int - Add a trace for a bio with an integer payload
- * @q:		queue the io is for
- * @what:	the action
- * @bio:	the source bio
- * @pdu:	the integer payload
- *
- * Description:
- *     Adds a trace with some integer payload. This might be an unplug
- *     option given as the action, with the depth at unplug time given
- *     as the payload
- *
- **/
-static inline void blk_add_trace_pdu_int(struct request_queue *q, u32 what,
-					 struct bio *bio, unsigned int pdu)
-{
-	struct blk_trace *bt = q->blk_trace;
-	__be64 rpdu = cpu_to_be64(pdu);
-
-	if (likely(!bt))
-		return;
-
-	if (bio)
-		__blk_add_trace(bt, bio->bi_sector, bio->bi_size, bio->bi_rw, what, !bio_flagged(bio, BIO_UPTODATE), sizeof(rpdu), &rpdu);
-	else
-		__blk_add_trace(bt, 0, 0, 0, what, 0, sizeof(rpdu), &rpdu);
-}
-
-/**
- * blk_add_trace_remap - Add a trace for a remap operation
- * @q:		queue the io is for
- * @bio:	the source bio
- * @dev:	target device
- * @from:	source sector
- * @to:		target sector
- *
- * Description:
- *     Device mapper or raid target sometimes need to split a bio because
- *     it spans a stripe (or similar). Add a trace for that action.
- *
- **/
-static inline void blk_add_trace_remap(struct request_queue *q, struct bio *bio,
-				       dev_t dev, sector_t from, sector_t to)
-{
-	struct blk_trace *bt = q->blk_trace;
-	struct blk_io_trace_remap r;
-
-	if (likely(!bt))
-		return;
-
-	r.device = cpu_to_be32(dev);
-	r.device_from = cpu_to_be32(bio->bi_bdev->bd_dev);
-	r.sector = cpu_to_be64(to);
-
-	__blk_add_trace(bt, from, bio->bi_size, bio->bi_rw, BLK_TA_REMAP, !bio_flagged(bio, BIO_UPTODATE), sizeof(r), &r);
-}
-
-/**
- * blk_add_driver_data - Add binary message with driver-specific data
- * @q:		queue the io is for
- * @rq:		io request
- * @data:	driver-specific data
- * @len:	length of driver-specific data
- *
- * Description:
- *     Some drivers might want to write driver-specific data per request.
- *
- **/
-static inline void blk_add_driver_data(struct request_queue *q,
-				       struct request *rq,
-				       void *data, size_t len)
-{
-	struct blk_trace *bt = q->blk_trace;
-
-	if (likely(!bt))
-		return;
-
-	if (blk_pc_request(rq))
-		__blk_add_trace(bt, 0, rq->data_len, 0, BLK_TA_DRV_DATA,
-				rq->errors, len, data);
-	else
-		__blk_add_trace(bt, rq->hard_sector, rq->hard_nr_sectors << 9,
-				0, BLK_TA_DRV_DATA, rq->errors, len, data);
-}
-
+extern void blk_add_driver_data(struct request_queue *q, struct request *rq,
+				void *data, size_t len);
 extern int blk_trace_setup(struct request_queue *q, char *name, dev_t dev,
 			   char __user *arg);
 extern int blk_trace_startstop(struct request_queue *q, int start);
@@ -356,13 +195,8 @@ extern int blk_trace_remove(struct request_queue *q);
 #else /* !CONFIG_BLK_DEV_IO_TRACE */
 #define blk_trace_ioctl(bdev, cmd, arg)		(-ENOTTY)
 #define blk_trace_shutdown(q)			do { } while (0)
-#define blk_add_trace_rq(q, rq, what)		do { } while (0)
-#define blk_add_trace_bio(q, rq, what)		do { } while (0)
-#define blk_add_trace_generic(q, rq, rw, what)	do { } while (0)
-#define blk_add_trace_pdu_int(q, what, bio, pdu)	do { } while (0)
-#define blk_add_trace_remap(q, bio, dev, f, t)	do {} while (0)
-#define blk_add_driver_data(q, rq, data, len)	do {} while (0)
 #define do_blk_trace_setup(q, name, dev, buts)	(-ENOTTY)
+#define blk_add_driver_data(q, rq, data, len)	do {} while (0)
 #define blk_trace_setup(q, name, dev, arg)	(-ENOTTY)
 #define blk_trace_startstop(q, start)		(-ENOTTY)
 #define blk_trace_remove(q)			(-ENOTTY)
diff --git a/include/trace/block.h b/include/trace/block.h
new file mode 100644
index 000000000000..3cc2675ebf01
--- /dev/null
+++ b/include/trace/block.h
@@ -0,0 +1,60 @@
+#ifndef _TRACE_BLOCK_H
+#define _TRACE_BLOCK_H
+
+#include <linux/blkdev.h>
+#include <linux/tracepoint.h>
+
+DEFINE_TRACE(block_rq_abort,
+	TPPROTO(struct request_queue *q, struct request *rq),
+	TPARGS(q, rq));
+DEFINE_TRACE(block_rq_insert,
+	TPPROTO(struct request_queue *q, struct request *rq),
+	TPARGS(q, rq));
+DEFINE_TRACE(block_rq_issue,
+	TPPROTO(struct request_queue *q, struct request *rq),
+	TPARGS(q, rq));
+DEFINE_TRACE(block_rq_requeue,
+	TPPROTO(struct request_queue *q, struct request *rq),
+	TPARGS(q, rq));
+DEFINE_TRACE(block_rq_complete,
+	TPPROTO(struct request_queue *q, struct request *rq),
+	TPARGS(q, rq));
+DEFINE_TRACE(block_bio_bounce,
+	TPPROTO(struct request_queue *q, struct bio *bio),
+	TPARGS(q, bio));
+DEFINE_TRACE(block_bio_complete,
+	TPPROTO(struct request_queue *q, struct bio *bio),
+	TPARGS(q, bio));
+DEFINE_TRACE(block_bio_backmerge,
+	TPPROTO(struct request_queue *q, struct bio *bio),
+	TPARGS(q, bio));
+DEFINE_TRACE(block_bio_frontmerge,
+	TPPROTO(struct request_queue *q, struct bio *bio),
+	TPARGS(q, bio));
+DEFINE_TRACE(block_bio_queue,
+	TPPROTO(struct request_queue *q, struct bio *bio),
+	TPARGS(q, bio));
+DEFINE_TRACE(block_getrq,
+	TPPROTO(struct request_queue *q, struct bio *bio, int rw),
+	TPARGS(q, bio, rw));
+DEFINE_TRACE(block_sleeprq,
+	TPPROTO(struct request_queue *q, struct bio *bio, int rw),
+	TPARGS(q, bio, rw));
+DEFINE_TRACE(block_plug,
+	TPPROTO(struct request_queue *q),
+	TPARGS(q));
+DEFINE_TRACE(block_unplug_timer,
+	TPPROTO(struct request_queue *q),
+	TPARGS(q));
+DEFINE_TRACE(block_unplug_io,
+	TPPROTO(struct request_queue *q),
+	TPARGS(q));
+DEFINE_TRACE(block_split,
+	TPPROTO(struct request_queue *q, struct bio *bio, unsigned int pdu),
+	TPARGS(q, bio, pdu));
+DEFINE_TRACE(block_remap,
+	TPPROTO(struct request_queue *q, struct bio *bio, dev_t dev,
+		sector_t from, sector_t to),
+	TPARGS(q, bio, dev, from, to));
+
+#endif
diff --git a/mm/bounce.c b/mm/bounce.c
index 06722c403058..bd1caaa582b8 100644
--- a/mm/bounce.c
+++ b/mm/bounce.c
@@ -14,6 +14,7 @@
 #include <linux/hash.h>
 #include <linux/highmem.h>
 #include <linux/blktrace_api.h>
+#include <trace/block.h>
 #include <asm/tlbflush.h>
 
 #define POOL_SIZE	64
@@ -222,7 +223,7 @@ static void __blk_queue_bounce(struct request_queue *q, struct bio **bio_orig,
 	if (!bio)
 		return;
 
-	blk_add_trace_bio(q, *bio_orig, BLK_TA_BOUNCE);
+	trace_block_bio_bounce(q, *bio_orig);
 
 	/*
 	 * at least one page was bounced, fill in possible non-highmem
-- 
cgit v1.2.3


From 0bfc24559d7945506184d86739fe365a181f06b7 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Wed, 26 Nov 2008 11:59:56 +0100
Subject: blktrace: port to tracepoints, update

Port to the new tracepoints API: split DEFINE_TRACE() and DECLARE_TRACE()
sites. Spread them out to the usage sites, as suggested by
Mathieu Desnoyers.

Signed-off-by: Ingo Molnar <mingo@elte.hu>
Acked-by: Mathieu Desnoyers <mathieu.desnoyers@polymtl.ca>
---
 block/blk-core.c      | 13 ++++++++
 block/elevator.c      |  5 +++
 drivers/md/dm.c       |  2 ++
 fs/bio.c              |  2 ++
 include/trace/block.h | 84 ++++++++++++++++++++++++++++++---------------------
 mm/bounce.c           |  2 ++
 6 files changed, 74 insertions(+), 34 deletions(-)

(limited to 'include')

diff --git a/block/blk-core.c b/block/blk-core.c
index 04267d66a2b9..0c06cf5aaaf8 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -32,6 +32,19 @@
 
 #include "blk.h"
 
+DEFINE_TRACE(block_plug);
+DEFINE_TRACE(block_unplug_io);
+DEFINE_TRACE(block_unplug_timer);
+DEFINE_TRACE(block_getrq);
+DEFINE_TRACE(block_sleeprq);
+DEFINE_TRACE(block_rq_requeue);
+DEFINE_TRACE(block_bio_backmerge);
+DEFINE_TRACE(block_bio_frontmerge);
+DEFINE_TRACE(block_bio_queue);
+DEFINE_TRACE(block_rq_complete);
+DEFINE_TRACE(block_remap);	/* Also used in drivers/md/dm.c */
+EXPORT_TRACEPOINT_SYMBOL_GPL(block_remap);
+
 static int __make_request(struct request_queue *q, struct bio *bio);
 
 /*
diff --git a/block/elevator.c b/block/elevator.c
index 530fcfe2ef07..e5677fe4f412 100644
--- a/block/elevator.c
+++ b/block/elevator.c
@@ -42,6 +42,8 @@
 static DEFINE_SPINLOCK(elv_list_lock);
 static LIST_HEAD(elv_list);
 
+DEFINE_TRACE(block_rq_abort);
+
 /*
  * Merge hash stuff.
  */
@@ -53,6 +55,9 @@ static const int elv_hash_shift = 6;
 #define rq_hash_key(rq)		((rq)->sector + (rq)->nr_sectors)
 #define ELV_ON_HASH(rq)		(!hlist_unhashed(&(rq)->hash))
 
+DEFINE_TRACE(block_rq_insert);
+DEFINE_TRACE(block_rq_issue);
+
 /*
  * Query io scheduler to see if the current process issuing bio may be
  * merged with rq.
diff --git a/drivers/md/dm.c b/drivers/md/dm.c
index d23fda178163..343094c3feeb 100644
--- a/drivers/md/dm.c
+++ b/drivers/md/dm.c
@@ -52,6 +52,8 @@ struct dm_target_io {
 	union map_info info;
 };
 
+DEFINE_TRACE(block_bio_complete);
+
 union map_info *dm_get_mapinfo(struct bio *bio)
 {
 	if (bio && bio->bi_private)
diff --git a/fs/bio.c b/fs/bio.c
index 060859c69092..df99c882b807 100644
--- a/fs/bio.c
+++ b/fs/bio.c
@@ -29,6 +29,8 @@
 #include <trace/block.h>
 #include <scsi/sg.h>		/* for struct sg_iovec */
 
+DEFINE_TRACE(block_split);
+
 static struct kmem_cache *bio_slab __read_mostly;
 
 static mempool_t *bio_split_pool __read_mostly;
diff --git a/include/trace/block.h b/include/trace/block.h
index 3cc2675ebf01..25c6a1fd5b77 100644
--- a/include/trace/block.h
+++ b/include/trace/block.h
@@ -4,57 +4,73 @@
 #include <linux/blkdev.h>
 #include <linux/tracepoint.h>
 
-DEFINE_TRACE(block_rq_abort,
+DECLARE_TRACE(block_rq_abort,
 	TPPROTO(struct request_queue *q, struct request *rq),
-	TPARGS(q, rq));
-DEFINE_TRACE(block_rq_insert,
+		TPARGS(q, rq));
+
+DECLARE_TRACE(block_rq_insert,
 	TPPROTO(struct request_queue *q, struct request *rq),
-	TPARGS(q, rq));
-DEFINE_TRACE(block_rq_issue,
+		TPARGS(q, rq));
+
+DECLARE_TRACE(block_rq_issue,
 	TPPROTO(struct request_queue *q, struct request *rq),
-	TPARGS(q, rq));
-DEFINE_TRACE(block_rq_requeue,
+		TPARGS(q, rq));
+
+DECLARE_TRACE(block_rq_requeue,
 	TPPROTO(struct request_queue *q, struct request *rq),
-	TPARGS(q, rq));
-DEFINE_TRACE(block_rq_complete,
+		TPARGS(q, rq));
+
+DECLARE_TRACE(block_rq_complete,
 	TPPROTO(struct request_queue *q, struct request *rq),
-	TPARGS(q, rq));
-DEFINE_TRACE(block_bio_bounce,
+		TPARGS(q, rq));
+
+DECLARE_TRACE(block_bio_bounce,
 	TPPROTO(struct request_queue *q, struct bio *bio),
-	TPARGS(q, bio));
-DEFINE_TRACE(block_bio_complete,
+		TPARGS(q, bio));
+
+DECLARE_TRACE(block_bio_complete,
 	TPPROTO(struct request_queue *q, struct bio *bio),
-	TPARGS(q, bio));
-DEFINE_TRACE(block_bio_backmerge,
+		TPARGS(q, bio));
+
+DECLARE_TRACE(block_bio_backmerge,
 	TPPROTO(struct request_queue *q, struct bio *bio),
-	TPARGS(q, bio));
-DEFINE_TRACE(block_bio_frontmerge,
+		TPARGS(q, bio));
+
+DECLARE_TRACE(block_bio_frontmerge,
 	TPPROTO(struct request_queue *q, struct bio *bio),
-	TPARGS(q, bio));
-DEFINE_TRACE(block_bio_queue,
+		TPARGS(q, bio));
+
+DECLARE_TRACE(block_bio_queue,
 	TPPROTO(struct request_queue *q, struct bio *bio),
-	TPARGS(q, bio));
-DEFINE_TRACE(block_getrq,
+		TPARGS(q, bio));
+
+DECLARE_TRACE(block_getrq,
 	TPPROTO(struct request_queue *q, struct bio *bio, int rw),
-	TPARGS(q, bio, rw));
-DEFINE_TRACE(block_sleeprq,
+		TPARGS(q, bio, rw));
+
+DECLARE_TRACE(block_sleeprq,
 	TPPROTO(struct request_queue *q, struct bio *bio, int rw),
-	TPARGS(q, bio, rw));
-DEFINE_TRACE(block_plug,
+		TPARGS(q, bio, rw));
+
+DECLARE_TRACE(block_plug,
 	TPPROTO(struct request_queue *q),
-	TPARGS(q));
-DEFINE_TRACE(block_unplug_timer,
+		TPARGS(q));
+
+DECLARE_TRACE(block_unplug_timer,
 	TPPROTO(struct request_queue *q),
-	TPARGS(q));
-DEFINE_TRACE(block_unplug_io,
+		TPARGS(q));
+
+DECLARE_TRACE(block_unplug_io,
 	TPPROTO(struct request_queue *q),
-	TPARGS(q));
-DEFINE_TRACE(block_split,
+		TPARGS(q));
+
+DECLARE_TRACE(block_split,
 	TPPROTO(struct request_queue *q, struct bio *bio, unsigned int pdu),
-	TPARGS(q, bio, pdu));
-DEFINE_TRACE(block_remap,
+		TPARGS(q, bio, pdu));
+
+DECLARE_TRACE(block_remap,
 	TPPROTO(struct request_queue *q, struct bio *bio, dev_t dev,
 		sector_t from, sector_t to),
-	TPARGS(q, bio, dev, from, to));
+		TPARGS(q, bio, dev, from, to));
 
 #endif
diff --git a/mm/bounce.c b/mm/bounce.c
index bd1caaa582b8..bf0cf7c8387b 100644
--- a/mm/bounce.c
+++ b/mm/bounce.c
@@ -22,6 +22,8 @@
 
 static mempool_t *page_pool, *isa_page_pool;
 
+DEFINE_TRACE(block_bio_bounce);
+
 #ifdef CONFIG_HIGHMEM
 static __init int init_emergency_pool(void)
 {
-- 
cgit v1.2.3


From ce71e27c6fdc43c29f36d307b9100bde70c947fc Mon Sep 17 00:00:00 2001
From: Eduard - Gabriel Munteanu <eduard.munteanu@linux360.ro>
Date: Tue, 19 Aug 2008 20:43:25 +0300
Subject: SLUB: Replace __builtin_return_address(0) with _RET_IP_.

This patch replaces __builtin_return_address(0) with _RET_IP_, since a
previous patch moved _RET_IP_ and _THIS_IP_ to include/linux/kernel.h and
they're widely available now. This makes for shorter and easier to read
code.

[penberg@cs.helsinki.fi: remove _RET_IP_ casts to void pointer]
Signed-off-by: Eduard - Gabriel Munteanu <eduard.munteanu@linux360.ro>
Signed-off-by: Pekka Enberg <penberg@cs.helsinki.fi>
---
 include/linux/slab.h |  8 ++++----
 mm/slab.c            |  8 ++++----
 mm/slub.c            | 48 ++++++++++++++++++++++++------------------------
 3 files changed, 32 insertions(+), 32 deletions(-)

(limited to 'include')

diff --git a/include/linux/slab.h b/include/linux/slab.h
index 000da12b5cf0..c97ed28559ec 100644
--- a/include/linux/slab.h
+++ b/include/linux/slab.h
@@ -253,9 +253,9 @@ static inline void *kmem_cache_alloc_node(struct kmem_cache *cachep,
  * request comes from.
  */
 #if defined(CONFIG_DEBUG_SLAB) || defined(CONFIG_SLUB)
-extern void *__kmalloc_track_caller(size_t, gfp_t, void*);
+extern void *__kmalloc_track_caller(size_t, gfp_t, unsigned long);
 #define kmalloc_track_caller(size, flags) \
-	__kmalloc_track_caller(size, flags, __builtin_return_address(0))
+	__kmalloc_track_caller(size, flags, _RET_IP_)
 #else
 #define kmalloc_track_caller(size, flags) \
 	__kmalloc(size, flags)
@@ -271,10 +271,10 @@ extern void *__kmalloc_track_caller(size_t, gfp_t, void*);
  * allocation request comes from.
  */
 #if defined(CONFIG_DEBUG_SLAB) || defined(CONFIG_SLUB)
-extern void *__kmalloc_node_track_caller(size_t, gfp_t, int, void *);
+extern void *__kmalloc_node_track_caller(size_t, gfp_t, int, unsigned long);
 #define kmalloc_node_track_caller(size, flags, node) \
 	__kmalloc_node_track_caller(size, flags, node, \
-			__builtin_return_address(0))
+			_RET_IP_)
 #else
 #define kmalloc_node_track_caller(size, flags, node) \
 	__kmalloc_node(size, flags, node)
diff --git a/mm/slab.c b/mm/slab.c
index 09187517f9dc..a14787799014 100644
--- a/mm/slab.c
+++ b/mm/slab.c
@@ -3686,9 +3686,9 @@ void *__kmalloc_node(size_t size, gfp_t flags, int node)
 EXPORT_SYMBOL(__kmalloc_node);
 
 void *__kmalloc_node_track_caller(size_t size, gfp_t flags,
-		int node, void *caller)
+		int node, unsigned long caller)
 {
-	return __do_kmalloc_node(size, flags, node, caller);
+	return __do_kmalloc_node(size, flags, node, (void *)caller);
 }
 EXPORT_SYMBOL(__kmalloc_node_track_caller);
 #else
@@ -3730,9 +3730,9 @@ void *__kmalloc(size_t size, gfp_t flags)
 }
 EXPORT_SYMBOL(__kmalloc);
 
-void *__kmalloc_track_caller(size_t size, gfp_t flags, void *caller)
+void *__kmalloc_track_caller(size_t size, gfp_t flags, unsigned long caller)
 {
-	return __do_kmalloc(size, flags, caller);
+	return __do_kmalloc(size, flags, (void *)caller);
 }
 EXPORT_SYMBOL(__kmalloc_track_caller);
 
diff --git a/mm/slub.c b/mm/slub.c
index 7ec2888bffc1..68ab260583d0 100644
--- a/mm/slub.c
+++ b/mm/slub.c
@@ -182,7 +182,7 @@ static LIST_HEAD(slab_caches);
  * Tracking user of a slab.
  */
 struct track {
-	void *addr;		/* Called from address */
+	unsigned long addr;	/* Called from address */
 	int cpu;		/* Was running on cpu */
 	int pid;		/* Pid context */
 	unsigned long when;	/* When did the operation occur */
@@ -371,7 +371,7 @@ static struct track *get_track(struct kmem_cache *s, void *object,
 }
 
 static void set_track(struct kmem_cache *s, void *object,
-				enum track_item alloc, void *addr)
+			enum track_item alloc, unsigned long addr)
 {
 	struct track *p;
 
@@ -395,8 +395,8 @@ static void init_tracking(struct kmem_cache *s, void *object)
 	if (!(s->flags & SLAB_STORE_USER))
 		return;
 
-	set_track(s, object, TRACK_FREE, NULL);
-	set_track(s, object, TRACK_ALLOC, NULL);
+	set_track(s, object, TRACK_FREE, 0UL);
+	set_track(s, object, TRACK_ALLOC, 0UL);
 }
 
 static void print_track(const char *s, struct track *t)
@@ -405,7 +405,7 @@ static void print_track(const char *s, struct track *t)
 		return;
 
 	printk(KERN_ERR "INFO: %s in %pS age=%lu cpu=%u pid=%d\n",
-		s, t->addr, jiffies - t->when, t->cpu, t->pid);
+		s, (void *)t->addr, jiffies - t->when, t->cpu, t->pid);
 }
 
 static void print_tracking(struct kmem_cache *s, void *object)
@@ -870,7 +870,7 @@ static void setup_object_debug(struct kmem_cache *s, struct page *page,
 }
 
 static int alloc_debug_processing(struct kmem_cache *s, struct page *page,
-						void *object, void *addr)
+					void *object, unsigned long addr)
 {
 	if (!check_slab(s, page))
 		goto bad;
@@ -910,7 +910,7 @@ bad:
 }
 
 static int free_debug_processing(struct kmem_cache *s, struct page *page,
-						void *object, void *addr)
+					void *object, unsigned long addr)
 {
 	if (!check_slab(s, page))
 		goto fail;
@@ -1033,10 +1033,10 @@ static inline void setup_object_debug(struct kmem_cache *s,
 			struct page *page, void *object) {}
 
 static inline int alloc_debug_processing(struct kmem_cache *s,
-	struct page *page, void *object, void *addr) { return 0; }
+	struct page *page, void *object, unsigned long addr) { return 0; }
 
 static inline int free_debug_processing(struct kmem_cache *s,
-	struct page *page, void *object, void *addr) { return 0; }
+	struct page *page, void *object, unsigned long addr) { return 0; }
 
 static inline int slab_pad_check(struct kmem_cache *s, struct page *page)
 			{ return 1; }
@@ -1503,8 +1503,8 @@ static inline int node_match(struct kmem_cache_cpu *c, int node)
  * we need to allocate a new slab. This is the slowest path since it involves
  * a call to the page allocator and the setup of a new slab.
  */
-static void *__slab_alloc(struct kmem_cache *s,
-		gfp_t gfpflags, int node, void *addr, struct kmem_cache_cpu *c)
+static void *__slab_alloc(struct kmem_cache *s, gfp_t gfpflags, int node,
+			  unsigned long addr, struct kmem_cache_cpu *c)
 {
 	void **object;
 	struct page *new;
@@ -1588,7 +1588,7 @@ debug:
  * Otherwise we can simply pick the next object from the lockless free list.
  */
 static __always_inline void *slab_alloc(struct kmem_cache *s,
-		gfp_t gfpflags, int node, void *addr)
+		gfp_t gfpflags, int node, unsigned long addr)
 {
 	void **object;
 	struct kmem_cache_cpu *c;
@@ -1617,14 +1617,14 @@ static __always_inline void *slab_alloc(struct kmem_cache *s,
 
 void *kmem_cache_alloc(struct kmem_cache *s, gfp_t gfpflags)
 {
-	return slab_alloc(s, gfpflags, -1, __builtin_return_address(0));
+	return slab_alloc(s, gfpflags, -1, _RET_IP_);
 }
 EXPORT_SYMBOL(kmem_cache_alloc);
 
 #ifdef CONFIG_NUMA
 void *kmem_cache_alloc_node(struct kmem_cache *s, gfp_t gfpflags, int node)
 {
-	return slab_alloc(s, gfpflags, node, __builtin_return_address(0));
+	return slab_alloc(s, gfpflags, node, _RET_IP_);
 }
 EXPORT_SYMBOL(kmem_cache_alloc_node);
 #endif
@@ -1638,7 +1638,7 @@ EXPORT_SYMBOL(kmem_cache_alloc_node);
  * handling required then we can return immediately.
  */
 static void __slab_free(struct kmem_cache *s, struct page *page,
-				void *x, void *addr, unsigned int offset)
+			void *x, unsigned long addr, unsigned int offset)
 {
 	void *prior;
 	void **object = (void *)x;
@@ -1708,7 +1708,7 @@ debug:
  * with all sorts of special processing.
  */
 static __always_inline void slab_free(struct kmem_cache *s,
-			struct page *page, void *x, void *addr)
+			struct page *page, void *x, unsigned long addr)
 {
 	void **object = (void *)x;
 	struct kmem_cache_cpu *c;
@@ -1735,7 +1735,7 @@ void kmem_cache_free(struct kmem_cache *s, void *x)
 
 	page = virt_to_head_page(x);
 
-	slab_free(s, page, x, __builtin_return_address(0));
+	slab_free(s, page, x, _RET_IP_);
 }
 EXPORT_SYMBOL(kmem_cache_free);
 
@@ -2663,7 +2663,7 @@ void *__kmalloc(size_t size, gfp_t flags)
 	if (unlikely(ZERO_OR_NULL_PTR(s)))
 		return s;
 
-	return slab_alloc(s, flags, -1, __builtin_return_address(0));
+	return slab_alloc(s, flags, -1, _RET_IP_);
 }
 EXPORT_SYMBOL(__kmalloc);
 
@@ -2691,7 +2691,7 @@ void *__kmalloc_node(size_t size, gfp_t flags, int node)
 	if (unlikely(ZERO_OR_NULL_PTR(s)))
 		return s;
 
-	return slab_alloc(s, flags, node, __builtin_return_address(0));
+	return slab_alloc(s, flags, node, _RET_IP_);
 }
 EXPORT_SYMBOL(__kmalloc_node);
 #endif
@@ -2748,7 +2748,7 @@ void kfree(const void *x)
 		put_page(page);
 		return;
 	}
-	slab_free(page->slab, page, object, __builtin_return_address(0));
+	slab_free(page->slab, page, object, _RET_IP_);
 }
 EXPORT_SYMBOL(kfree);
 
@@ -3204,7 +3204,7 @@ static struct notifier_block __cpuinitdata slab_notifier = {
 
 #endif
 
-void *__kmalloc_track_caller(size_t size, gfp_t gfpflags, void *caller)
+void *__kmalloc_track_caller(size_t size, gfp_t gfpflags, unsigned long caller)
 {
 	struct kmem_cache *s;
 
@@ -3220,7 +3220,7 @@ void *__kmalloc_track_caller(size_t size, gfp_t gfpflags, void *caller)
 }
 
 void *__kmalloc_node_track_caller(size_t size, gfp_t gfpflags,
-					int node, void *caller)
+					int node, unsigned long caller)
 {
 	struct kmem_cache *s;
 
@@ -3431,7 +3431,7 @@ static void resiliency_test(void) {};
 
 struct location {
 	unsigned long count;
-	void *addr;
+	unsigned long addr;
 	long long sum_time;
 	long min_time;
 	long max_time;
@@ -3479,7 +3479,7 @@ static int add_location(struct loc_track *t, struct kmem_cache *s,
 {
 	long start, end, pos;
 	struct location *l;
-	void *caddr;
+	unsigned long caddr;
 	unsigned long age = jiffies - track->when;
 
 	start = -1;
-- 
cgit v1.2.3


From e2f367f269fe19375f10e63efe0f2a6d3ddef8e6 Mon Sep 17 00:00:00 2001
From: Jouni Malinen <j@w1.fi>
Date: Fri, 21 Nov 2008 19:01:30 +0200
Subject: nl80211: Report max TX power in NL80211_BAND_ATTR_FREQS

This is useful information to provide for userspace (e.g., hostapd needs
this to generate Country IE).

Signed-off-by: Jouni Malinen <jouni.malinen@atheros.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 include/linux/nl80211.h | 4 ++++
 net/wireless/nl80211.c  | 3 +++
 2 files changed, 7 insertions(+)

(limited to 'include')

diff --git a/include/linux/nl80211.h b/include/linux/nl80211.h
index 79827345351d..54d6ebe38e39 100644
--- a/include/linux/nl80211.h
+++ b/include/linux/nl80211.h
@@ -508,6 +508,7 @@ enum nl80211_band_attr {
  *	on this channel in current regulatory domain.
  * @NL80211_FREQUENCY_ATTR_RADAR: Radar detection is mandatory
  *	on this channel in current regulatory domain.
+ * @NL80211_FREQUENCY_ATTR_MAX_TX_POWER: Maximum transmission power in dBm.
  */
 enum nl80211_frequency_attr {
 	__NL80211_FREQUENCY_ATTR_INVALID,
@@ -516,12 +517,15 @@ enum nl80211_frequency_attr {
 	NL80211_FREQUENCY_ATTR_PASSIVE_SCAN,
 	NL80211_FREQUENCY_ATTR_NO_IBSS,
 	NL80211_FREQUENCY_ATTR_RADAR,
+	NL80211_FREQUENCY_ATTR_MAX_TX_POWER,
 
 	/* keep last */
 	__NL80211_FREQUENCY_ATTR_AFTER_LAST,
 	NL80211_FREQUENCY_ATTR_MAX = __NL80211_FREQUENCY_ATTR_AFTER_LAST - 1
 };
 
+#define NL80211_FREQUENCY_ATTR_MAX_TX_POWER NL80211_FREQUENCY_ATTR_MAX_TX_POWER
+
 /**
  * enum nl80211_bitrate_attr - bitrate attributes
  * @NL80211_BITRATE_ATTR_RATE: Bitrate in units of 100 kbps
diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c
index 00121ceddb14..2e8464eaaaa2 100644
--- a/net/wireless/nl80211.c
+++ b/net/wireless/nl80211.c
@@ -198,6 +198,9 @@ static int nl80211_send_wiphy(struct sk_buff *msg, u32 pid, u32 seq, int flags,
 			if (chan->flags & IEEE80211_CHAN_RADAR)
 				NLA_PUT_FLAG(msg, NL80211_FREQUENCY_ATTR_RADAR);
 
+			NLA_PUT_U8(msg, NL80211_FREQUENCY_ATTR_MAX_TX_POWER,
+				   chan->max_power);
+
 			nla_nest_end(msg, nl_freq);
 		}
 
-- 
cgit v1.2.3


From f80b5e99c7dac5a9a0d72496cec5075a12cd1476 Mon Sep 17 00:00:00 2001
From: Henrique de Moraes Holschuh <hmh@hmh.eng.br>
Date: Fri, 21 Nov 2008 20:40:09 -0200
Subject: rfkill: preserve state across suspend

The rfkill class API requires that the driver connected to a class
call rfkill_force_state() on resume to update the real state of the
rfkill controller, OR that it provides a get_state() hook.

This means there is potentially a hidden call in the resume code flow
that changes rfkill->state (i.e. rfkill_force_state()), so the
previous state of the transmitter was being lost.

The simplest and most future-proof way to fix this is to explicitly
store the pre-sleep state on the rfkill structure, and restore from
that on resume.

Signed-off-by: Henrique de Moraes Holschuh <hmh@hmh.eng.br>
Acked-by: Ivo van Doorn <IvDoorn@gmail.com>
Cc: Matthew Garrett <mjg59@srcf.ucam.org>
Cc: Alan Jenkins <alan-jenkins@tuffmail.co.uk>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 include/linux/rfkill.h | 1 +
 net/rfkill/rfkill.c    | 7 ++++++-
 2 files changed, 7 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/rfkill.h b/include/linux/rfkill.h
index 4cd64b0d9825..f376a93927f7 100644
--- a/include/linux/rfkill.h
+++ b/include/linux/rfkill.h
@@ -108,6 +108,7 @@ struct rfkill {
 
 	struct device dev;
 	struct list_head node;
+	enum rfkill_state state_for_resume;
 };
 #define to_rfkill(d)	container_of(d, struct rfkill, dev)
 
diff --git a/net/rfkill/rfkill.c b/net/rfkill/rfkill.c
index ec26eae8004d..5ad411d3e8f8 100644
--- a/net/rfkill/rfkill.c
+++ b/net/rfkill/rfkill.c
@@ -565,10 +565,15 @@ static void rfkill_release(struct device *dev)
 #ifdef CONFIG_PM
 static int rfkill_suspend(struct device *dev, pm_message_t state)
 {
+	struct rfkill *rfkill = to_rfkill(dev);
+
 	/* mark class device as suspended */
 	if (dev->power.power_state.event != state.event)
 		dev->power.power_state = state;
 
+	/* store state for the resume handler */
+	rfkill->state_for_resume = rfkill->state;
+
 	return 0;
 }
 
@@ -590,7 +595,7 @@ static int rfkill_resume(struct device *dev)
 		rfkill_toggle_radio(rfkill,
 				rfkill_epo_lock_active ?
 					RFKILL_STATE_SOFT_BLOCKED :
-					rfkill->state,
+					rfkill->state_for_resume,
 				1);
 
 		mutex_unlock(&rfkill->mutex);
-- 
cgit v1.2.3


From bf8c1ac6d81ba8c0e4dc2215f84f5e2a3c8227e8 Mon Sep 17 00:00:00 2001
From: Jouni Malinen <j@w1.fi>
Date: Sat, 22 Nov 2008 22:00:31 +0200
Subject: nl80211: Change max TX power to be in mBm instead of dBm

In order to be consistent with NL80211_ATTR_POWER_RULE_MAX_EIRP,
change NL80211_FREQUENCY_ATTR_MAX_TX_POWER to use mBm and U32 instead
of dBm and U8. This is a userspace interface change, but the previous
version had not yet been pushed upstream and there are no userspace
programs using this yet, so there is justification to get this change in
as long as it goes in before the previous version gets out.

Signed-off-by: Jouni Malinen <j@w1.fi>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 include/linux/nl80211.h | 3 ++-
 net/wireless/nl80211.c  | 4 ++--
 2 files changed, 4 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/include/linux/nl80211.h b/include/linux/nl80211.h
index 54d6ebe38e39..e08c8bcfb78d 100644
--- a/include/linux/nl80211.h
+++ b/include/linux/nl80211.h
@@ -508,7 +508,8 @@ enum nl80211_band_attr {
  *	on this channel in current regulatory domain.
  * @NL80211_FREQUENCY_ATTR_RADAR: Radar detection is mandatory
  *	on this channel in current regulatory domain.
- * @NL80211_FREQUENCY_ATTR_MAX_TX_POWER: Maximum transmission power in dBm.
+ * @NL80211_FREQUENCY_ATTR_MAX_TX_POWER: Maximum transmission power in mBm
+ *	(100 * dBm).
  */
 enum nl80211_frequency_attr {
 	__NL80211_FREQUENCY_ATTR_INVALID,
diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c
index 2e8464eaaaa2..c9141e3df9ba 100644
--- a/net/wireless/nl80211.c
+++ b/net/wireless/nl80211.c
@@ -198,8 +198,8 @@ static int nl80211_send_wiphy(struct sk_buff *msg, u32 pid, u32 seq, int flags,
 			if (chan->flags & IEEE80211_CHAN_RADAR)
 				NLA_PUT_FLAG(msg, NL80211_FREQUENCY_ATTR_RADAR);
 
-			NLA_PUT_U8(msg, NL80211_FREQUENCY_ATTR_MAX_TX_POWER,
-				   chan->max_power);
+			NLA_PUT_U32(msg, NL80211_FREQUENCY_ATTR_MAX_TX_POWER,
+				    DBM_TO_MBM(chan->max_power));
 
 			nla_nest_end(msg, nl_freq);
 		}
-- 
cgit v1.2.3


From 244e6c2d0724bc4908a1995804704bdee3b31528 Mon Sep 17 00:00:00 2001
From: Jarek Poplawski <jarkao2@gmail.com>
Date: Wed, 26 Nov 2008 15:24:32 -0800
Subject: pkt_sched: gen_estimator: Optimize gen_estimator_active()

Since all other gen_estimator functions use bstats and rate_est params
together, and searching for them is optimized now, let's use this also
in gen_estimator_active(). The return type of gen_estimator_active()
is changed to bool, and gen_find_node() parameters to const, btw.

In tcf_act_police_locate() a check for ACT_P_CREATED is added before
calling gen_estimator_active().

Signed-off-by: Jarek Poplawski <jarkao2@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/gen_stats.h  |  4 ++--
 net/core/gen_estimator.c | 25 ++++++++-----------------
 net/sched/act_police.c   |  4 +++-
 3 files changed, 13 insertions(+), 20 deletions(-)

(limited to 'include')

diff --git a/include/net/gen_stats.h b/include/net/gen_stats.h
index dcf5bfa7d4f1..d136b5240ef2 100644
--- a/include/net/gen_stats.h
+++ b/include/net/gen_stats.h
@@ -45,6 +45,6 @@ extern void gen_kill_estimator(struct gnet_stats_basic *bstats,
 extern int gen_replace_estimator(struct gnet_stats_basic *bstats,
 				 struct gnet_stats_rate_est *rate_est,
 				 spinlock_t *stats_lock, struct nlattr *opt);
-extern int gen_estimator_active(const struct gnet_stats_rate_est *rate_est);
-
+extern bool gen_estimator_active(const struct gnet_stats_basic *bstats,
+				 const struct gnet_stats_rate_est *rate_est);
 #endif
diff --git a/net/core/gen_estimator.c b/net/core/gen_estimator.c
index 3885550f0187..9cc9f95b109e 100644
--- a/net/core/gen_estimator.c
+++ b/net/core/gen_estimator.c
@@ -163,8 +163,9 @@ static void gen_add_node(struct gen_estimator *est)
 	rb_insert_color(&est->node, &est_root);
 }
 
-static struct gen_estimator *gen_find_node(struct gnet_stats_basic *bstats,
-					   struct gnet_stats_rate_est *rate_est)
+static
+struct gen_estimator *gen_find_node(const struct gnet_stats_basic *bstats,
+				    const struct gnet_stats_rate_est *rate_est)
 {
 	struct rb_node *p = est_root.rb_node;
 
@@ -301,26 +302,16 @@ EXPORT_SYMBOL(gen_replace_estimator);
 
 /**
  * gen_estimator_active - test if estimator is currently in use
+ * @bstats: basic statistics
  * @rate_est: rate estimator statistics
  *
- * Returns 1 if estimator is active, and 0 if not.
+ * Returns true if estimator is active, and false if not.
  */
-int gen_estimator_active(const struct gnet_stats_rate_est *rate_est)
+bool gen_estimator_active(const struct gnet_stats_basic *bstats,
+			  const struct gnet_stats_rate_est *rate_est)
 {
-	int idx;
-	struct gen_estimator *e;
-
 	ASSERT_RTNL();
 
-	for (idx=0; idx <= EST_MAX_INTERVAL; idx++) {
-		if (!elist[idx].timer.function)
-			continue;
-
-		list_for_each_entry(e, &elist[idx].list, list) {
-			if (e->rate_est == rate_est)
-				return 1;
-		}
-	}
-	return 0;
+	return gen_find_node(bstats, rate_est) != NULL;
 }
 EXPORT_SYMBOL(gen_estimator_active);
diff --git a/net/sched/act_police.c b/net/sched/act_police.c
index c39f60cea6ee..5c72a116b1a4 100644
--- a/net/sched/act_police.c
+++ b/net/sched/act_police.c
@@ -183,7 +183,9 @@ override:
 		if (R_tab == NULL)
 			goto failure;
 
-		if (!est && !gen_estimator_active(&police->tcf_rate_est)) {
+		if (!est && (ret == ACT_P_CREATED ||
+			     !gen_estimator_active(&police->tcf_bstats,
+						   &police->tcf_rate_est))) {
 			err = -EINVAL;
 			goto failure;
 		}
-- 
cgit v1.2.3


From c4106aa88a440430d387e022f2ad6dc1e0d52e98 Mon Sep 17 00:00:00 2001
From: Harvey Harrison <harvey.harrison@gmail.com>
Date: Thu, 27 Nov 2008 00:12:47 -0800
Subject: decnet: remove private wrappers of endian helpers

Signed-off-by: Harvey Harrison <harvey.harrison@gmail.com>
Reviewed-by: Steven Whitehouse <swhiteho@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/dn.h               |  8 ++----
 include/net/dn_fib.h           |  6 ++--
 net/decnet/af_decnet.c         | 62 +++++++++++++++++++++---------------------
 net/decnet/dn_dev.c            | 20 +++++++-------
 net/decnet/dn_neigh.c          | 18 ++++++------
 net/decnet/dn_nsp_in.c         | 28 ++++++++++---------
 net/decnet/dn_nsp_out.c        | 22 +++++++--------
 net/decnet/dn_route.c          | 18 ++++++------
 net/decnet/dn_table.c          |  2 +-
 net/decnet/sysctl_net_decnet.c |  4 +--
 10 files changed, 94 insertions(+), 94 deletions(-)

(limited to 'include')

diff --git a/include/net/dn.h b/include/net/dn.h
index 627778384c84..7cd6bb83f7ab 100644
--- a/include/net/dn.h
+++ b/include/net/dn.h
@@ -4,9 +4,7 @@
 #include <linux/dn.h>
 #include <net/sock.h>
 #include <asm/byteorder.h>
-
-#define dn_ntohs(x) le16_to_cpu(x)
-#define dn_htons(x) cpu_to_le16(x)
+#include <asm/unalignedh>
 
 struct dn_scp                                   /* Session Control Port */
 {
@@ -175,7 +173,7 @@ struct dn_skb_cb {
 
 static inline __le16 dn_eth2dn(unsigned char *ethaddr)
 {
-	return dn_htons(ethaddr[4] | (ethaddr[5] << 8));
+	return get_unaligned((__le16 *)(ethaddr + 4));
 }
 
 static inline __le16 dn_saddr2dn(struct sockaddr_dn *saddr)
@@ -185,7 +183,7 @@ static inline __le16 dn_saddr2dn(struct sockaddr_dn *saddr)
 
 static inline void dn_dn2eth(unsigned char *ethaddr, __le16 addr)
 {
-	__u16 a = dn_ntohs(addr);
+	__u16 a = le16_to_cpu(addr);
 	ethaddr[0] = 0xAA;
 	ethaddr[1] = 0x00;
 	ethaddr[2] = 0x04;
diff --git a/include/net/dn_fib.h b/include/net/dn_fib.h
index 30125119c950..c378be7bf960 100644
--- a/include/net/dn_fib.h
+++ b/include/net/dn_fib.h
@@ -181,9 +181,9 @@ static inline void dn_fib_res_put(struct dn_fib_res *res)
 
 static inline __le16 dnet_make_mask(int n)
 {
-        if (n)
-                return dn_htons(~((1<<(16-n))-1));
-        return 0;
+	if (n)
+		return cpu_to_le16(~((1 << (16 - n)) - 1));
+	return cpu_to_le16(0);
 }
 
 #endif /* _NET_DN_FIB_H */
diff --git a/net/decnet/af_decnet.c b/net/decnet/af_decnet.c
index 3c23ab33dbc0..cf0e18499297 100644
--- a/net/decnet/af_decnet.c
+++ b/net/decnet/af_decnet.c
@@ -167,7 +167,7 @@ static struct hlist_head *dn_find_list(struct sock *sk)
 	if (scp->addr.sdn_flags & SDF_WILD)
 		return hlist_empty(&dn_wild_sk) ? &dn_wild_sk : NULL;
 
-	return &dn_sk_hash[dn_ntohs(scp->addrloc) & DN_SK_HASH_MASK];
+	return &dn_sk_hash[le16_to_cpu(scp->addrloc) & DN_SK_HASH_MASK];
 }
 
 /*
@@ -181,7 +181,7 @@ static int check_port(__le16 port)
 	if (port == 0)
 		return -1;
 
-	sk_for_each(sk, node, &dn_sk_hash[dn_ntohs(port) & DN_SK_HASH_MASK]) {
+	sk_for_each(sk, node, &dn_sk_hash[le16_to_cpu(port) & DN_SK_HASH_MASK]) {
 		struct dn_scp *scp = DN_SK(sk);
 		if (scp->addrloc == port)
 			return -1;
@@ -195,12 +195,12 @@ static unsigned short port_alloc(struct sock *sk)
 static unsigned short port = 0x2000;
 	unsigned short i_port = port;
 
-	while(check_port(dn_htons(++port)) != 0) {
+	while(check_port(cpu_to_le16(++port)) != 0) {
 		if (port == i_port)
 			return 0;
 	}
 
-	scp->addrloc = dn_htons(port);
+	scp->addrloc = cpu_to_le16(port);
 
 	return 1;
 }
@@ -255,7 +255,7 @@ static struct hlist_head *listen_hash(struct sockaddr_dn *addr)
 
 	if (hash == 0) {
 		hash = addr->sdn_objnamel;
-		for(i = 0; i < dn_ntohs(addr->sdn_objnamel); i++) {
+		for(i = 0; i < le16_to_cpu(addr->sdn_objnamel); i++) {
 			hash ^= addr->sdn_objname[i];
 			hash ^= (hash << 3);
 		}
@@ -297,16 +297,16 @@ int dn_sockaddr2username(struct sockaddr_dn *sdn, unsigned char *buf, unsigned c
 			break;
 		case 1:
 			*buf++ = 0;
-			*buf++ = dn_ntohs(sdn->sdn_objnamel);
-			memcpy(buf, sdn->sdn_objname, dn_ntohs(sdn->sdn_objnamel));
-			len = 3 + dn_ntohs(sdn->sdn_objnamel);
+			*buf++ = le16_to_cpu(sdn->sdn_objnamel);
+			memcpy(buf, sdn->sdn_objname, le16_to_cpu(sdn->sdn_objnamel));
+			len = 3 + le16_to_cpu(sdn->sdn_objnamel);
 			break;
 		case 2:
 			memset(buf, 0, 5);
 			buf += 5;
-			*buf++ = dn_ntohs(sdn->sdn_objnamel);
-			memcpy(buf, sdn->sdn_objname, dn_ntohs(sdn->sdn_objnamel));
-			len = 7 + dn_ntohs(sdn->sdn_objnamel);
+			*buf++ = le16_to_cpu(sdn->sdn_objnamel);
+			memcpy(buf, sdn->sdn_objname, le16_to_cpu(sdn->sdn_objnamel));
+			len = 7 + le16_to_cpu(sdn->sdn_objnamel);
 			break;
 	}
 
@@ -327,7 +327,7 @@ int dn_username2sockaddr(unsigned char *data, int len, struct sockaddr_dn *sdn,
 	int namel = 12;
 
 	sdn->sdn_objnum = 0;
-	sdn->sdn_objnamel = dn_htons(0);
+	sdn->sdn_objnamel = cpu_to_le16(0);
 	memset(sdn->sdn_objname, 0, DN_MAXOBJL);
 
 	if (len < 2)
@@ -361,13 +361,13 @@ int dn_username2sockaddr(unsigned char *data, int len, struct sockaddr_dn *sdn,
 	if (len < 0)
 		return -1;
 
-	sdn->sdn_objnamel = dn_htons(*data++);
-	len -= dn_ntohs(sdn->sdn_objnamel);
+	sdn->sdn_objnamel = cpu_to_le16(*data++);
+	len -= le16_to_cpu(sdn->sdn_objnamel);
 
-	if ((len < 0) || (dn_ntohs(sdn->sdn_objnamel) > namel))
+	if ((len < 0) || (le16_to_cpu(sdn->sdn_objnamel) > namel))
 		return -1;
 
-	memcpy(sdn->sdn_objname, data, dn_ntohs(sdn->sdn_objnamel));
+	memcpy(sdn->sdn_objname, data, le16_to_cpu(sdn->sdn_objnamel));
 
 	return size - len;
 }
@@ -391,7 +391,7 @@ struct sock *dn_sklist_find_listener(struct sockaddr_dn *addr)
 				continue;
 			if (scp->addr.sdn_objnamel != addr->sdn_objnamel)
 				continue;
-			if (memcmp(scp->addr.sdn_objname, addr->sdn_objname, dn_ntohs(addr->sdn_objnamel)) != 0)
+			if (memcmp(scp->addr.sdn_objname, addr->sdn_objname, le16_to_cpu(addr->sdn_objnamel)) != 0)
 				continue;
 		}
 		sock_hold(sk);
@@ -419,7 +419,7 @@ struct sock *dn_find_by_skb(struct sk_buff *skb)
 	struct dn_scp *scp;
 
 	read_lock(&dn_hash_lock);
-	sk_for_each(sk, node, &dn_sk_hash[dn_ntohs(cb->dst_port) & DN_SK_HASH_MASK]) {
+	sk_for_each(sk, node, &dn_sk_hash[le16_to_cpu(cb->dst_port) & DN_SK_HASH_MASK]) {
 		scp = DN_SK(sk);
 		if (cb->src != dn_saddr2dn(&scp->peer))
 			continue;
@@ -734,10 +734,10 @@ static int dn_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
 	if (saddr->sdn_family != AF_DECnet)
 		return -EINVAL;
 
-	if (dn_ntohs(saddr->sdn_nodeaddrl) && (dn_ntohs(saddr->sdn_nodeaddrl) != 2))
+	if (le16_to_cpu(saddr->sdn_nodeaddrl) && (le16_to_cpu(saddr->sdn_nodeaddrl) != 2))
 		return -EINVAL;
 
-	if (dn_ntohs(saddr->sdn_objnamel) > DN_MAXOBJL)
+	if (le16_to_cpu(saddr->sdn_objnamel) > DN_MAXOBJL)
 		return -EINVAL;
 
 	if (saddr->sdn_flags & ~SDF_WILD)
@@ -748,7 +748,7 @@ static int dn_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
 		return -EACCES;
 
 	if (!(saddr->sdn_flags & SDF_WILD)) {
-		if (dn_ntohs(saddr->sdn_nodeaddrl)) {
+		if (le16_to_cpu(saddr->sdn_nodeaddrl)) {
 			read_lock(&dev_base_lock);
 			ldev = NULL;
 			for_each_netdev(&init_net, dev) {
@@ -799,15 +799,15 @@ static int dn_auto_bind(struct socket *sock)
 	if ((scp->accessdata.acc_accl != 0) &&
 		(scp->accessdata.acc_accl <= 12)) {
 
-		scp->addr.sdn_objnamel = dn_htons(scp->accessdata.acc_accl);
-		memcpy(scp->addr.sdn_objname, scp->accessdata.acc_acc, dn_ntohs(scp->addr.sdn_objnamel));
+		scp->addr.sdn_objnamel = cpu_to_le16(scp->accessdata.acc_accl);
+		memcpy(scp->addr.sdn_objname, scp->accessdata.acc_acc, le16_to_cpu(scp->addr.sdn_objnamel));
 
 		scp->accessdata.acc_accl = 0;
 		memset(scp->accessdata.acc_acc, 0, 40);
 	}
 	/* End of compatibility stuff */
 
-	scp->addr.sdn_add.a_len = dn_htons(2);
+	scp->addr.sdn_add.a_len = cpu_to_le16(2);
 	rv = dn_dev_bind_default((__le16 *)scp->addr.sdn_add.a_addr);
 	if (rv == 0) {
 		rv = dn_hash_sock(sk);
@@ -1027,7 +1027,7 @@ static void dn_user_copy(struct sk_buff *skb, struct optdata_dn *opt)
 	u16 len = *ptr++; /* yes, it's 8bit on the wire */
 
 	BUG_ON(len > 16); /* we've checked the contents earlier */
-	opt->opt_optl   = dn_htons(len);
+	opt->opt_optl   = cpu_to_le16(len);
 	opt->opt_status = 0;
 	memcpy(opt->opt_data, ptr, len);
 	skb_pull(skb, len + 1);
@@ -1375,7 +1375,7 @@ static int __dn_setsockopt(struct socket *sock, int level,int optname, char __us
 			if (optlen != sizeof(struct optdata_dn))
 				return -EINVAL;
 
-			if (dn_ntohs(u.opt.opt_optl) > 16)
+			if (le16_to_cpu(u.opt.opt_optl) > 16)
 				return -EINVAL;
 
 			memcpy(&scp->conndata_out, &u.opt, optlen);
@@ -1388,7 +1388,7 @@ static int __dn_setsockopt(struct socket *sock, int level,int optname, char __us
 			if (optlen != sizeof(struct optdata_dn))
 				return -EINVAL;
 
-			if (dn_ntohs(u.opt.opt_optl) > 16)
+			if (le16_to_cpu(u.opt.opt_optl) > 16)
 				return -EINVAL;
 
 			memcpy(&scp->discdata_out, &u.opt, optlen);
@@ -2213,12 +2213,12 @@ static void dn_printable_object(struct sockaddr_dn *dn, unsigned char *buf)
 {
 	int i;
 
-	switch (dn_ntohs(dn->sdn_objnamel)) {
+	switch (le16_to_cpu(dn->sdn_objnamel)) {
 		case 0:
 			sprintf(buf, "%d", dn->sdn_objnum);
 			break;
 		default:
-			for (i = 0; i < dn_ntohs(dn->sdn_objnamel); i++) {
+			for (i = 0; i < le16_to_cpu(dn->sdn_objnamel); i++) {
 				buf[i] = dn->sdn_objname[i];
 				if (IS_NOT_PRINTABLE(buf[i]))
 					buf[i] = '.';
@@ -2281,7 +2281,7 @@ static inline void dn_socket_format_entry(struct seq_file *seq, struct sock *sk)
 	seq_printf(seq,
 		   "%6s/%04X %04d:%04d %04d:%04d %01d %-16s "
 		   "%6s/%04X %04d:%04d %04d:%04d %01d %-16s %4s %s\n",
-		   dn_addr2asc(dn_ntohs(dn_saddr2dn(&scp->addr)), buf1),
+		   dn_addr2asc(le16_to_cpu(dn_saddr2dn(&scp->addr)), buf1),
 		   scp->addrloc,
 		   scp->numdat,
 		   scp->numoth,
@@ -2289,7 +2289,7 @@ static inline void dn_socket_format_entry(struct seq_file *seq, struct sock *sk)
 		   scp->ackxmt_oth,
 		   scp->flowloc_sw,
 		   local_object,
-		   dn_addr2asc(dn_ntohs(dn_saddr2dn(&scp->peer)), buf2),
+		   dn_addr2asc(le16_to_cpu(dn_saddr2dn(&scp->peer)), buf2),
 		   scp->addrrem,
 		   scp->numdat_rcv,
 		   scp->numoth_rcv,
diff --git a/net/decnet/dn_dev.c b/net/decnet/dn_dev.c
index 28e26bd08e24..424d86ac12fe 100644
--- a/net/decnet/dn_dev.c
+++ b/net/decnet/dn_dev.c
@@ -885,7 +885,7 @@ static void dn_send_endnode_hello(struct net_device *dev, struct dn_ifaddr *ifa)
 	memcpy(msg->tiver, dn_eco_version, 3);
 	dn_dn2eth(msg->id, ifa->ifa_local);
 	msg->iinfo   = DN_RT_INFO_ENDN;
-	msg->blksize = dn_htons(mtu2blksize(dev));
+	msg->blksize = cpu_to_le16(mtu2blksize(dev));
 	msg->area    = 0x00;
 	memset(msg->seed, 0, 8);
 	memcpy(msg->neighbor, dn_hiord, ETH_ALEN);
@@ -895,13 +895,13 @@ static void dn_send_endnode_hello(struct net_device *dev, struct dn_ifaddr *ifa)
 		dn_dn2eth(msg->neighbor, dn->addr);
 	}
 
-	msg->timer   = dn_htons((unsigned short)dn_db->parms.t3);
+	msg->timer   = cpu_to_le16((unsigned short)dn_db->parms.t3);
 	msg->mpd     = 0x00;
 	msg->datalen = 0x02;
 	memset(msg->data, 0xAA, 2);
 
 	pktlen = (__le16 *)skb_push(skb,2);
-	*pktlen = dn_htons(skb->len - 2);
+	*pktlen = cpu_to_le16(skb->len - 2);
 
 	skb_reset_network_header(skb);
 
@@ -929,7 +929,7 @@ static int dn_am_i_a_router(struct dn_neigh *dn, struct dn_dev *dn_db, struct dn
 	if (dn->priority != dn_db->parms.priority)
 		return 0;
 
-	if (dn_ntohs(dn->addr) < dn_ntohs(ifa->ifa_local))
+	if (le16_to_cpu(dn->addr) < le16_to_cpu(ifa->ifa_local))
 		return 1;
 
 	return 0;
@@ -973,11 +973,11 @@ static void dn_send_router_hello(struct net_device *dev, struct dn_ifaddr *ifa)
 	ptr += ETH_ALEN;
 	*ptr++ = dn_db->parms.forwarding == 1 ?
 			DN_RT_INFO_L1RT : DN_RT_INFO_L2RT;
-	*((__le16 *)ptr) = dn_htons(mtu2blksize(dev));
+	*((__le16 *)ptr) = cpu_to_le16(mtu2blksize(dev));
 	ptr += 2;
 	*ptr++ = dn_db->parms.priority; /* Priority */
 	*ptr++ = 0; /* Area: Reserved */
-	*((__le16 *)ptr) = dn_htons((unsigned short)dn_db->parms.t3);
+	*((__le16 *)ptr) = cpu_to_le16((unsigned short)dn_db->parms.t3);
 	ptr += 2;
 	*ptr++ = 0; /* MPD: Reserved */
 	i1 = ptr++;
@@ -993,7 +993,7 @@ static void dn_send_router_hello(struct net_device *dev, struct dn_ifaddr *ifa)
 	skb_trim(skb, (27 + *i2));
 
 	pktlen = (__le16 *)skb_push(skb, 2);
-	*pktlen = dn_htons(skb->len - 2);
+	*pktlen = cpu_to_le16(skb->len - 2);
 
 	skb_reset_network_header(skb);
 
@@ -1401,8 +1401,8 @@ static int dn_dev_seq_show(struct seq_file *seq, void *v)
 				mtu2blksize(dev),
 				dn_db->parms.priority,
 				dn_db->parms.state, dn_db->parms.name,
-				dn_db->router ? dn_addr2asc(dn_ntohs(*(__le16 *)dn_db->router->primary_key), router_buf) : "",
-				dn_db->peer ? dn_addr2asc(dn_ntohs(*(__le16 *)dn_db->peer->primary_key), peer_buf) : "");
+				dn_db->router ? dn_addr2asc(le16_to_cpu(*(__le16 *)dn_db->router->primary_key), router_buf) : "",
+				dn_db->peer ? dn_addr2asc(le16_to_cpu(*(__le16 *)dn_db->peer->primary_key), peer_buf) : "");
 	}
 	return 0;
 }
@@ -1445,7 +1445,7 @@ void __init dn_dev_init(void)
 		return;
 	}
 
-	decnet_address = dn_htons((addr[0] << 10) | addr[1]);
+	decnet_address = cpu_to_le16((addr[0] << 10) | addr[1]);
 
 	dn_dev_devices_on();
 
diff --git a/net/decnet/dn_neigh.c b/net/decnet/dn_neigh.c
index 1ca13b17974d..05b5aa05e50e 100644
--- a/net/decnet/dn_neigh.c
+++ b/net/decnet/dn_neigh.c
@@ -250,7 +250,7 @@ static int dn_long_output(struct sk_buff *skb)
 	data = skb_push(skb, sizeof(struct dn_long_packet) + 3);
 	lp = (struct dn_long_packet *)(data+3);
 
-	*((__le16 *)data) = dn_htons(skb->len - 2);
+	*((__le16 *)data) = cpu_to_le16(skb->len - 2);
 	*(data + 2) = 1 | DN_RT_F_PF; /* Padding */
 
 	lp->msgflg   = DN_RT_PKT_LONG|(cb->rt_flags&(DN_RT_F_IE|DN_RT_F_RQR|DN_RT_F_RTS));
@@ -294,7 +294,7 @@ static int dn_short_output(struct sk_buff *skb)
 	}
 
 	data = skb_push(skb, sizeof(struct dn_short_packet) + 2);
-	*((__le16 *)data) = dn_htons(skb->len - 2);
+	*((__le16 *)data) = cpu_to_le16(skb->len - 2);
 	sp = (struct dn_short_packet *)(data+2);
 
 	sp->msgflg     = DN_RT_PKT_SHORT|(cb->rt_flags&(DN_RT_F_RQR|DN_RT_F_RTS));
@@ -336,12 +336,12 @@ static int dn_phase3_output(struct sk_buff *skb)
 	}
 
 	data = skb_push(skb, sizeof(struct dn_short_packet) + 2);
-	*((__le16 *)data) = dn_htons(skb->len - 2);
+	*((__le16 *)data) = cpu_to_le16(skb->len - 2);
 	sp = (struct dn_short_packet *)(data + 2);
 
 	sp->msgflg   = DN_RT_PKT_SHORT|(cb->rt_flags&(DN_RT_F_RQR|DN_RT_F_RTS));
-	sp->dstnode  = cb->dst & dn_htons(0x03ff);
-	sp->srcnode  = cb->src & dn_htons(0x03ff);
+	sp->dstnode  = cb->dst & cpu_to_le16(0x03ff);
+	sp->srcnode  = cb->src & cpu_to_le16(0x03ff);
 	sp->forward  = cb->hops & 0x3f;
 
 	skb_reset_network_header(skb);
@@ -394,7 +394,7 @@ int dn_neigh_router_hello(struct sk_buff *skb)
 			if (neigh->dev->type == ARPHRD_ETHER)
 				memcpy(neigh->ha, &eth_hdr(skb)->h_source, ETH_ALEN);
 
-			dn->blksize  = dn_ntohs(msg->blksize);
+			dn->blksize  = le16_to_cpu(msg->blksize);
 			dn->priority = msg->priority;
 
 			dn->flags &= ~DN_NDFLAG_P3;
@@ -410,7 +410,7 @@ int dn_neigh_router_hello(struct sk_buff *skb)
 		}
 
 		/* Only use routers in our area */
-		if ((dn_ntohs(src)>>10) == (dn_ntohs((decnet_address))>>10)) {
+		if ((le16_to_cpu(src)>>10) == (le16_to_cpu((decnet_address))>>10)) {
 			if (!dn_db->router) {
 				dn_db->router = neigh_clone(neigh);
 			} else {
@@ -453,7 +453,7 @@ int dn_neigh_endnode_hello(struct sk_buff *skb)
 			if (neigh->dev->type == ARPHRD_ETHER)
 				memcpy(neigh->ha, &eth_hdr(skb)->h_source, ETH_ALEN);
 			dn->flags   &= ~(DN_NDFLAG_R1 | DN_NDFLAG_R2);
-			dn->blksize  = dn_ntohs(msg->blksize);
+			dn->blksize  = le16_to_cpu(msg->blksize);
 			dn->priority = 0;
 		}
 
@@ -543,7 +543,7 @@ static inline void dn_neigh_format_entry(struct seq_file *seq,
 
 	read_lock(&n->lock);
 	seq_printf(seq, "%-7s %s%s%s   %02x    %02d  %07ld %-8s\n",
-		   dn_addr2asc(dn_ntohs(dn->addr), buf),
+		   dn_addr2asc(le16_to_cpu(dn->addr), buf),
 		   (dn->flags&DN_NDFLAG_R1) ? "1" : "-",
 		   (dn->flags&DN_NDFLAG_R2) ? "2" : "-",
 		   (dn->flags&DN_NDFLAG_P3) ? "3" : "-",
diff --git a/net/decnet/dn_nsp_in.c b/net/decnet/dn_nsp_in.c
index 4074a6e5d0de..5d8a2a56fd39 100644
--- a/net/decnet/dn_nsp_in.c
+++ b/net/decnet/dn_nsp_in.c
@@ -83,7 +83,9 @@ static void dn_log_martian(struct sk_buff *skb, const char *msg)
 	if (decnet_log_martians && net_ratelimit()) {
 		char *devname = skb->dev ? skb->dev->name : "???";
 		struct dn_skb_cb *cb = DN_SKB_CB(skb);
-		printk(KERN_INFO "DECnet: Martian packet (%s) dev=%s src=0x%04hx dst=0x%04hx srcport=0x%04hx dstport=0x%04hx\n", msg, devname, dn_ntohs(cb->src), dn_ntohs(cb->dst), dn_ntohs(cb->src_port), dn_ntohs(cb->dst_port));
+		printk(KERN_INFO "DECnet: Martian packet (%s) dev=%s src=0x%04hx dst=0x%04hx srcport=0x%04hx dstport=0x%04hx\n",
+		       msg, devname, le16_to_cpu(cb->src), le16_to_cpu(cb->dst),
+		       le16_to_cpu(cb->src_port), le16_to_cpu(cb->dst_port));
 	}
 }
 
@@ -133,7 +135,7 @@ static int dn_process_ack(struct sock *sk, struct sk_buff *skb, int oth)
 	if (skb->len < 2)
 		return len;
 
-	if ((ack = dn_ntohs(*ptr)) & 0x8000) {
+	if ((ack = le16_to_cpu(*ptr)) & 0x8000) {
 		skb_pull(skb, 2);
 		ptr++;
 		len += 2;
@@ -147,7 +149,7 @@ static int dn_process_ack(struct sock *sk, struct sk_buff *skb, int oth)
 	if (skb->len < 2)
 		return len;
 
-	if ((ack = dn_ntohs(*ptr)) & 0x8000) {
+	if ((ack = le16_to_cpu(*ptr)) & 0x8000) {
 		skb_pull(skb, 2);
 		len += 2;
 		if ((ack & 0x4000) == 0) {
@@ -237,7 +239,7 @@ static struct sock *dn_find_listener(struct sk_buff *skb, unsigned short *reason
 	cb->dst_port = msg->dstaddr;
 	cb->services = msg->services;
 	cb->info     = msg->info;
-	cb->segsize  = dn_ntohs(msg->segsize);
+	cb->segsize  = le16_to_cpu(msg->segsize);
 
 	if (!pskb_may_pull(skb, sizeof(*msg)))
 		goto err_out;
@@ -344,7 +346,7 @@ static void dn_nsp_conn_conf(struct sock *sk, struct sk_buff *skb)
 	ptr = skb->data;
 	cb->services = *ptr++;
 	cb->info = *ptr++;
-	cb->segsize = dn_ntohs(*(__le16 *)ptr);
+	cb->segsize = le16_to_cpu(*(__le16 *)ptr);
 
 	if ((scp->state == DN_CI) || (scp->state == DN_CD)) {
 		scp->persist = 0;
@@ -361,7 +363,7 @@ static void dn_nsp_conn_conf(struct sock *sk, struct sk_buff *skb)
 		if (skb->len > 0) {
 			u16 dlen = *skb->data;
 			if ((dlen <= 16) && (dlen <= skb->len)) {
-				scp->conndata_in.opt_optl = dn_htons(dlen);
+				scp->conndata_in.opt_optl = cpu_to_le16(dlen);
 				skb_copy_from_linear_data_offset(skb, 1,
 					      scp->conndata_in.opt_data, dlen);
 			}
@@ -396,17 +398,17 @@ static void dn_nsp_disc_init(struct sock *sk, struct sk_buff *skb)
 	if (skb->len < 2)
 		goto out;
 
-	reason = dn_ntohs(*(__le16 *)skb->data);
+	reason = le16_to_cpu(*(__le16 *)skb->data);
 	skb_pull(skb, 2);
 
-	scp->discdata_in.opt_status = dn_htons(reason);
+	scp->discdata_in.opt_status = cpu_to_le16(reason);
 	scp->discdata_in.opt_optl   = 0;
 	memset(scp->discdata_in.opt_data, 0, 16);
 
 	if (skb->len > 0) {
 		u16 dlen = *skb->data;
 		if ((dlen <= 16) && (dlen <= skb->len)) {
-			scp->discdata_in.opt_optl = dn_htons(dlen);
+			scp->discdata_in.opt_optl = cpu_to_le16(dlen);
 			skb_copy_from_linear_data_offset(skb, 1, scp->discdata_in.opt_data, dlen);
 		}
 	}
@@ -463,7 +465,7 @@ static void dn_nsp_disc_conf(struct sock *sk, struct sk_buff *skb)
 	if (skb->len != 2)
 		goto out;
 
-	reason = dn_ntohs(*(__le16 *)skb->data);
+	reason = le16_to_cpu(*(__le16 *)skb->data);
 
 	sk->sk_state = TCP_CLOSE;
 
@@ -512,7 +514,7 @@ static void dn_nsp_linkservice(struct sock *sk, struct sk_buff *skb)
 	if (skb->len != 4)
 		goto out;
 
-	segnum = dn_ntohs(*(__le16 *)ptr);
+	segnum = le16_to_cpu(*(__le16 *)ptr);
 	ptr += 2;
 	lsflags = *(unsigned char *)ptr++;
 	fcval = *ptr;
@@ -620,7 +622,7 @@ static void dn_nsp_otherdata(struct sock *sk, struct sk_buff *skb)
 	if (skb->len < 2)
 		goto out;
 
-	cb->segnum = segnum = dn_ntohs(*(__le16 *)skb->data);
+	cb->segnum = segnum = le16_to_cpu(*(__le16 *)skb->data);
 	skb_pull(skb, 2);
 
 	if (seq_next(scp->numoth_rcv, segnum)) {
@@ -648,7 +650,7 @@ static void dn_nsp_data(struct sock *sk, struct sk_buff *skb)
 	if (skb->len < 2)
 		goto out;
 
-	cb->segnum = segnum = dn_ntohs(*(__le16 *)skb->data);
+	cb->segnum = segnum = le16_to_cpu(*(__le16 *)skb->data);
 	skb_pull(skb, 2);
 
 	if (seq_next(scp->numdat_rcv, segnum)) {
diff --git a/net/decnet/dn_nsp_out.c b/net/decnet/dn_nsp_out.c
index 81a40ff10088..2013c25b7f5a 100644
--- a/net/decnet/dn_nsp_out.c
+++ b/net/decnet/dn_nsp_out.c
@@ -325,8 +325,8 @@ static __le16 *dn_mk_ack_header(struct sock *sk, struct sk_buff *skb, unsigned c
 
 	ptr = (__le16 *)dn_mk_common_header(scp, skb, msgflag, hlen);
 
-	*ptr++ = dn_htons(acknum);
-	*ptr++ = dn_htons(ackcrs);
+	*ptr++ = cpu_to_le16(acknum);
+	*ptr++ = cpu_to_le16(ackcrs);
 
 	return ptr;
 }
@@ -344,7 +344,7 @@ static __le16 *dn_nsp_mk_data_header(struct sock *sk, struct sk_buff *skb, int o
 		cb->segnum = scp->numdat;
 		seq_add(&scp->numdat, 1);
 	}
-	*(ptr++) = dn_htons(cb->segnum);
+	*(ptr++) = cpu_to_le16(cb->segnum);
 
 	return ptr;
 }
@@ -522,7 +522,7 @@ void dn_send_conn_conf(struct sock *sk, gfp_t gfp)
 	struct dn_scp *scp = DN_SK(sk);
 	struct sk_buff *skb = NULL;
 	struct nsp_conn_init_msg *msg;
-	__u8 len = (__u8)dn_ntohs(scp->conndata_out.opt_optl);
+	__u8 len = (__u8)le16_to_cpu(scp->conndata_out.opt_optl);
 
 	if ((skb = dn_alloc_skb(sk, 50 + len, gfp)) == NULL)
 		return;
@@ -533,7 +533,7 @@ void dn_send_conn_conf(struct sock *sk, gfp_t gfp)
 	msg->srcaddr = scp->addrloc;
 	msg->services = scp->services_loc;
 	msg->info = scp->info_loc;
-	msg->segsize = dn_htons(scp->segsize_loc);
+	msg->segsize = cpu_to_le16(scp->segsize_loc);
 
 	*skb_put(skb,1) = len;
 
@@ -559,7 +559,7 @@ static __inline__ void dn_nsp_do_disc(struct sock *sk, unsigned char msgflg,
 
 	if ((dst == NULL) || (rem == 0)) {
 		if (net_ratelimit())
-			printk(KERN_DEBUG "DECnet: dn_nsp_do_disc: BUG! Please report this to SteveW@ACM.org rem=%u dst=%p\n", dn_ntohs(rem), dst);
+			printk(KERN_DEBUG "DECnet: dn_nsp_do_disc: BUG! Please report this to SteveW@ACM.org rem=%u dst=%p\n", le16_to_cpu(rem), dst);
 		return;
 	}
 
@@ -572,7 +572,7 @@ static __inline__ void dn_nsp_do_disc(struct sock *sk, unsigned char msgflg,
 	msg += 2;
 	*(__le16 *)msg = loc;
 	msg += 2;
-	*(__le16 *)msg = dn_htons(reason);
+	*(__le16 *)msg = cpu_to_le16(reason);
 	msg += 2;
 	if (msgflg == NSP_DISCINIT)
 		*msg++ = ddl;
@@ -598,10 +598,10 @@ void dn_nsp_send_disc(struct sock *sk, unsigned char msgflg,
 	int ddl = 0;
 
 	if (msgflg == NSP_DISCINIT)
-		ddl = dn_ntohs(scp->discdata_out.opt_optl);
+		ddl = le16_to_cpu(scp->discdata_out.opt_optl);
 
 	if (reason == 0)
-		reason = dn_ntohs(scp->discdata_out.opt_status);
+		reason = le16_to_cpu(scp->discdata_out.opt_status);
 
 	dn_nsp_do_disc(sk, msgflg, reason, gfp, sk->sk_dst_cache, ddl,
 		scp->discdata_out.opt_data, scp->addrrem, scp->addrloc);
@@ -675,7 +675,7 @@ void dn_nsp_send_conninit(struct sock *sk, unsigned char msgflg)
 	msg->srcaddr	= scp->addrloc;
 	msg->services	= scp->services_loc;	/* Requested flow control    */
 	msg->info	= scp->info_loc;	/* Version Number            */
-	msg->segsize	= dn_htons(scp->segsize_loc);	/* Max segment size  */
+	msg->segsize	= cpu_to_le16(scp->segsize_loc);	/* Max segment size  */
 
 	if (scp->peer.sdn_objnum)
 		type = 0;
@@ -708,7 +708,7 @@ void dn_nsp_send_conninit(struct sock *sk, unsigned char msgflg)
 	if (aux > 0)
 	memcpy(skb_put(skb, aux), scp->accessdata.acc_acc, aux);
 
-	aux = (__u8)dn_ntohs(scp->conndata_out.opt_optl);
+	aux = (__u8)le16_to_cpu(scp->conndata_out.opt_optl);
 	*skb_put(skb, 1) = aux;
 	if (aux > 0)
 	memcpy(skb_put(skb,aux), scp->conndata_out.opt_data, aux);
diff --git a/net/decnet/dn_route.c b/net/decnet/dn_route.c
index eeaa3d819f9c..b33b254c52cb 100644
--- a/net/decnet/dn_route.c
+++ b/net/decnet/dn_route.c
@@ -475,7 +475,7 @@ static int dn_route_rx_packet(struct sk_buff *skb)
 		printk(KERN_DEBUG
 			"DECnet: dn_route_rx_packet: rt_flags=0x%02x dev=%s len=%d src=0x%04hx dst=0x%04hx err=%d type=%d\n",
 			(int)cb->rt_flags, devname, skb->len,
-			dn_ntohs(cb->src), dn_ntohs(cb->dst),
+			le16_to_cpu(cb->src), le16_to_cpu(cb->dst),
 			err, skb->pkt_type);
 	}
 
@@ -575,7 +575,7 @@ int dn_route_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type
 {
 	struct dn_skb_cb *cb;
 	unsigned char flags = 0;
-	__u16 len = dn_ntohs(*(__le16 *)skb->data);
+	__u16 len = le16_to_cpu(*(__le16 *)skb->data);
 	struct dn_dev *dn = (struct dn_dev *)dev->dn_ptr;
 	unsigned char padlen = 0;
 
@@ -773,7 +773,7 @@ static int dn_rt_bug(struct sk_buff *skb)
 		struct dn_skb_cb *cb = DN_SKB_CB(skb);
 
 		printk(KERN_DEBUG "dn_rt_bug: skb from:%04x to:%04x\n",
-				dn_ntohs(cb->src), dn_ntohs(cb->dst));
+				le16_to_cpu(cb->src), le16_to_cpu(cb->dst));
 	}
 
 	kfree_skb(skb);
@@ -816,7 +816,7 @@ static int dn_rt_set_next_hop(struct dn_route *rt, struct dn_fib_res *res)
 
 static inline int dn_match_addr(__le16 addr1, __le16 addr2)
 {
-	__u16 tmp = dn_ntohs(addr1) ^ dn_ntohs(addr2);
+	__u16 tmp = le16_to_cpu(addr1) ^ le16_to_cpu(addr2);
 	int match = 16;
 	while(tmp) {
 		tmp >>= 1;
@@ -886,8 +886,8 @@ static int dn_route_output_slow(struct dst_entry **pprt, const struct flowi *old
 	if (decnet_debug_level & 16)
 		printk(KERN_DEBUG
 		       "dn_route_output_slow: dst=%04x src=%04x mark=%d"
-		       " iif=%d oif=%d\n", dn_ntohs(oldflp->fld_dst),
-		       dn_ntohs(oldflp->fld_src),
+		       " iif=%d oif=%d\n", le16_to_cpu(oldflp->fld_dst),
+		       le16_to_cpu(oldflp->fld_src),
 		       oldflp->mark, init_net.loopback_dev->ifindex, oldflp->oif);
 
 	/* If we have an output interface, verify its a DECnet device */
@@ -959,7 +959,7 @@ source_ok:
 		printk(KERN_DEBUG
 		       "dn_route_output_slow: initial checks complete."
 		       " dst=%o4x src=%04x oif=%d try_hard=%d\n",
-		       dn_ntohs(fl.fld_dst), dn_ntohs(fl.fld_src),
+		       le16_to_cpu(fl.fld_dst), le16_to_cpu(fl.fld_src),
 		       fl.oif, try_hard);
 
 	/*
@@ -1711,8 +1711,8 @@ static int dn_rt_cache_seq_show(struct seq_file *seq, void *v)
 
 	seq_printf(seq, "%-8s %-7s %-7s %04d %04d %04d\n",
 			rt->u.dst.dev ? rt->u.dst.dev->name : "*",
-			dn_addr2asc(dn_ntohs(rt->rt_daddr), buf1),
-			dn_addr2asc(dn_ntohs(rt->rt_saddr), buf2),
+			dn_addr2asc(le16_to_cpu(rt->rt_daddr), buf1),
+			dn_addr2asc(le16_to_cpu(rt->rt_saddr), buf2),
 			atomic_read(&rt->u.dst.__refcnt),
 			rt->u.dst.__use,
 			(int) dst_metric(&rt->u.dst, RTAX_RTT));
diff --git a/net/decnet/dn_table.c b/net/decnet/dn_table.c
index 3a2830ac89c2..69ad9280c693 100644
--- a/net/decnet/dn_table.c
+++ b/net/decnet/dn_table.c
@@ -85,7 +85,7 @@ static int dn_fib_hash_zombies;
 
 static inline dn_fib_idx_t dn_hash(dn_fib_key_t key, struct dn_zone *dz)
 {
-	u16 h = dn_ntohs(key.datum)>>(16 - dz->dz_order);
+	u16 h = le16_to_cpu(key.datum)>>(16 - dz->dz_order);
 	h ^= (h >> 10);
 	h ^= (h >> 6);
 	h &= DZ_HASHMASK(dz);
diff --git a/net/decnet/sysctl_net_decnet.c b/net/decnet/sysctl_net_decnet.c
index 2f360a1e5e49..965397af9a80 100644
--- a/net/decnet/sysctl_net_decnet.c
+++ b/net/decnet/sysctl_net_decnet.c
@@ -126,7 +126,7 @@ static int parse_addr(__le16 *addr, char *str)
 	if (INVALID_END_CHAR(*str))
 		return -1;
 
-	*addr = dn_htons((area << 10) | node);
+	*addr = cpu_to_le16((area << 10) | node);
 
 	return 0;
 }
@@ -201,7 +201,7 @@ static int dn_node_address_handler(ctl_table *table, int write,
 		return 0;
 	}
 
-	dn_addr2asc(dn_ntohs(decnet_address), addr);
+	dn_addr2asc(le16_to_cpu(decnet_address), addr);
 	len = strlen(addr);
 	addr[len++] = '\n';
 
-- 
cgit v1.2.3


From d211af055d0c12dc3416c2886e6fbdc6eb74a381 Mon Sep 17 00:00:00 2001
From: Alexander van Heukelum <heukelum@mailshack.com>
Date: Mon, 24 Nov 2008 15:38:45 +0100
Subject: i386: get rid of the use of KPROBE_ENTRY / KPROBE_END

entry_32.S is now the only user of KPROBE_ENTRY / KPROBE_END,
treewide. This patch reorders entry_64.S and explicitly generates
a separate section for functions that need the protection. The
generated code before and after the patch is equal.

The KPROBE_ENTRY and KPROBE_END macro's are removed too.

Signed-off-by: Alexander van Heukelum <heukelum@fastmail.fm>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/entry_32.S | 438 +++++++++++++++++++++++----------------------
 include/linux/linkage.h    |   8 -
 2 files changed, 224 insertions(+), 222 deletions(-)

(limited to 'include')

diff --git a/arch/x86/kernel/entry_32.S b/arch/x86/kernel/entry_32.S
index bd02ec77edc4..6e96028d1a9c 100644
--- a/arch/x86/kernel/entry_32.S
+++ b/arch/x86/kernel/entry_32.S
@@ -688,65 +688,6 @@ ENDPROC(name)
 /* The include is where all of the SMP etc. interrupts come from */
 #include "entry_arch.h"
 
-KPROBE_ENTRY(page_fault)
-	RING0_EC_FRAME
-	pushl $do_page_fault
-	CFI_ADJUST_CFA_OFFSET 4
-	ALIGN
-error_code:
-	/* the function address is in %fs's slot on the stack */
-	pushl %es
-	CFI_ADJUST_CFA_OFFSET 4
-	/*CFI_REL_OFFSET es, 0*/
-	pushl %ds
-	CFI_ADJUST_CFA_OFFSET 4
-	/*CFI_REL_OFFSET ds, 0*/
-	pushl %eax
-	CFI_ADJUST_CFA_OFFSET 4
-	CFI_REL_OFFSET eax, 0
-	pushl %ebp
-	CFI_ADJUST_CFA_OFFSET 4
-	CFI_REL_OFFSET ebp, 0
-	pushl %edi
-	CFI_ADJUST_CFA_OFFSET 4
-	CFI_REL_OFFSET edi, 0
-	pushl %esi
-	CFI_ADJUST_CFA_OFFSET 4
-	CFI_REL_OFFSET esi, 0
-	pushl %edx
-	CFI_ADJUST_CFA_OFFSET 4
-	CFI_REL_OFFSET edx, 0
-	pushl %ecx
-	CFI_ADJUST_CFA_OFFSET 4
-	CFI_REL_OFFSET ecx, 0
-	pushl %ebx
-	CFI_ADJUST_CFA_OFFSET 4
-	CFI_REL_OFFSET ebx, 0
-	cld
-	pushl %fs
-	CFI_ADJUST_CFA_OFFSET 4
-	/*CFI_REL_OFFSET fs, 0*/
-	movl $(__KERNEL_PERCPU), %ecx
-	movl %ecx, %fs
-	UNWIND_ESPFIX_STACK
-	popl %ecx
-	CFI_ADJUST_CFA_OFFSET -4
-	/*CFI_REGISTER es, ecx*/
-	movl PT_FS(%esp), %edi		# get the function address
-	movl PT_ORIG_EAX(%esp), %edx	# get the error code
-	movl $-1, PT_ORIG_EAX(%esp)	# no syscall to restart
-	mov  %ecx, PT_FS(%esp)
-	/*CFI_REL_OFFSET fs, ES*/
-	movl $(__USER_DS), %ecx
-	movl %ecx, %ds
-	movl %ecx, %es
-	TRACE_IRQS_OFF
-	movl %esp,%eax			# pt_regs pointer
-	call *%edi
-	jmp ret_from_exception
-	CFI_ENDPROC
-KPROBE_END(page_fault)
-
 ENTRY(coprocessor_error)
 	RING0_INT_FRAME
 	pushl $0
@@ -777,140 +718,6 @@ ENTRY(device_not_available)
 	CFI_ENDPROC
 END(device_not_available)
 
-/*
- * Debug traps and NMI can happen at the one SYSENTER instruction
- * that sets up the real kernel stack. Check here, since we can't
- * allow the wrong stack to be used.
- *
- * "TSS_sysenter_sp0+12" is because the NMI/debug handler will have
- * already pushed 3 words if it hits on the sysenter instruction:
- * eflags, cs and eip.
- *
- * We just load the right stack, and push the three (known) values
- * by hand onto the new stack - while updating the return eip past
- * the instruction that would have done it for sysenter.
- */
-#define FIX_STACK(offset, ok, label)		\
-	cmpw $__KERNEL_CS,4(%esp);		\
-	jne ok;					\
-label:						\
-	movl TSS_sysenter_sp0+offset(%esp),%esp;	\
-	CFI_DEF_CFA esp, 0;			\
-	CFI_UNDEFINED eip;			\
-	pushfl;					\
-	CFI_ADJUST_CFA_OFFSET 4;		\
-	pushl $__KERNEL_CS;			\
-	CFI_ADJUST_CFA_OFFSET 4;		\
-	pushl $sysenter_past_esp;		\
-	CFI_ADJUST_CFA_OFFSET 4;		\
-	CFI_REL_OFFSET eip, 0
-
-KPROBE_ENTRY(debug)
-	RING0_INT_FRAME
-	cmpl $ia32_sysenter_target,(%esp)
-	jne debug_stack_correct
-	FIX_STACK(12, debug_stack_correct, debug_esp_fix_insn)
-debug_stack_correct:
-	pushl $-1			# mark this as an int
-	CFI_ADJUST_CFA_OFFSET 4
-	SAVE_ALL
-	TRACE_IRQS_OFF
-	xorl %edx,%edx			# error code 0
-	movl %esp,%eax			# pt_regs pointer
-	call do_debug
-	jmp ret_from_exception
-	CFI_ENDPROC
-KPROBE_END(debug)
-
-/*
- * NMI is doubly nasty. It can happen _while_ we're handling
- * a debug fault, and the debug fault hasn't yet been able to
- * clear up the stack. So we first check whether we got  an
- * NMI on the sysenter entry path, but after that we need to
- * check whether we got an NMI on the debug path where the debug
- * fault happened on the sysenter path.
- */
-KPROBE_ENTRY(nmi)
-	RING0_INT_FRAME
-	pushl %eax
-	CFI_ADJUST_CFA_OFFSET 4
-	movl %ss, %eax
-	cmpw $__ESPFIX_SS, %ax
-	popl %eax
-	CFI_ADJUST_CFA_OFFSET -4
-	je nmi_espfix_stack
-	cmpl $ia32_sysenter_target,(%esp)
-	je nmi_stack_fixup
-	pushl %eax
-	CFI_ADJUST_CFA_OFFSET 4
-	movl %esp,%eax
-	/* Do not access memory above the end of our stack page,
-	 * it might not exist.
-	 */
-	andl $(THREAD_SIZE-1),%eax
-	cmpl $(THREAD_SIZE-20),%eax
-	popl %eax
-	CFI_ADJUST_CFA_OFFSET -4
-	jae nmi_stack_correct
-	cmpl $ia32_sysenter_target,12(%esp)
-	je nmi_debug_stack_check
-nmi_stack_correct:
-	/* We have a RING0_INT_FRAME here */
-	pushl %eax
-	CFI_ADJUST_CFA_OFFSET 4
-	SAVE_ALL
-	TRACE_IRQS_OFF
-	xorl %edx,%edx		# zero error code
-	movl %esp,%eax		# pt_regs pointer
-	call do_nmi
-	jmp restore_nocheck_notrace
-	CFI_ENDPROC
-
-nmi_stack_fixup:
-	RING0_INT_FRAME
-	FIX_STACK(12,nmi_stack_correct, 1)
-	jmp nmi_stack_correct
-
-nmi_debug_stack_check:
-	/* We have a RING0_INT_FRAME here */
-	cmpw $__KERNEL_CS,16(%esp)
-	jne nmi_stack_correct
-	cmpl $debug,(%esp)
-	jb nmi_stack_correct
-	cmpl $debug_esp_fix_insn,(%esp)
-	ja nmi_stack_correct
-	FIX_STACK(24,nmi_stack_correct, 1)
-	jmp nmi_stack_correct
-
-nmi_espfix_stack:
-	/* We have a RING0_INT_FRAME here.
-	 *
-	 * create the pointer to lss back
-	 */
-	pushl %ss
-	CFI_ADJUST_CFA_OFFSET 4
-	pushl %esp
-	CFI_ADJUST_CFA_OFFSET 4
-	addw $4, (%esp)
-	/* copy the iret frame of 12 bytes */
-	.rept 3
-	pushl 16(%esp)
-	CFI_ADJUST_CFA_OFFSET 4
-	.endr
-	pushl %eax
-	CFI_ADJUST_CFA_OFFSET 4
-	SAVE_ALL
-	TRACE_IRQS_OFF
-	FIXUP_ESPFIX_STACK		# %eax == %esp
-	xorl %edx,%edx			# zero error code
-	call do_nmi
-	RESTORE_REGS
-	lss 12+4(%esp), %esp		# back to espfix stack
-	CFI_ADJUST_CFA_OFFSET -24
-	jmp irq_return
-	CFI_ENDPROC
-KPROBE_END(nmi)
-
 #ifdef CONFIG_PARAVIRT
 ENTRY(native_iret)
 	iret
@@ -926,19 +733,6 @@ ENTRY(native_irq_enable_sysexit)
 END(native_irq_enable_sysexit)
 #endif
 
-KPROBE_ENTRY(int3)
-	RING0_INT_FRAME
-	pushl $-1			# mark this as an int
-	CFI_ADJUST_CFA_OFFSET 4
-	SAVE_ALL
-	TRACE_IRQS_OFF
-	xorl %edx,%edx		# zero error code
-	movl %esp,%eax		# pt_regs pointer
-	call do_int3
-	jmp ret_from_exception
-	CFI_ENDPROC
-KPROBE_END(int3)
-
 ENTRY(overflow)
 	RING0_INT_FRAME
 	pushl $0
@@ -1003,14 +797,6 @@ ENTRY(stack_segment)
 	CFI_ENDPROC
 END(stack_segment)
 
-KPROBE_ENTRY(general_protection)
-	RING0_EC_FRAME
-	pushl $do_general_protection
-	CFI_ADJUST_CFA_OFFSET 4
-	jmp error_code
-	CFI_ENDPROC
-KPROBE_END(general_protection)
-
 ENTRY(alignment_check)
 	RING0_EC_FRAME
 	pushl $do_alignment_check
@@ -1220,3 +1006,227 @@ END(mcount)
 #include "syscall_table_32.S"
 
 syscall_table_size=(.-sys_call_table)
+
+/*
+ * Some functions should be protected against kprobes
+ */
+	.pushsection .kprobes.text, "ax"
+
+ENTRY(page_fault)
+	RING0_EC_FRAME
+	pushl $do_page_fault
+	CFI_ADJUST_CFA_OFFSET 4
+	ALIGN
+error_code:
+	/* the function address is in %fs's slot on the stack */
+	pushl %es
+	CFI_ADJUST_CFA_OFFSET 4
+	/*CFI_REL_OFFSET es, 0*/
+	pushl %ds
+	CFI_ADJUST_CFA_OFFSET 4
+	/*CFI_REL_OFFSET ds, 0*/
+	pushl %eax
+	CFI_ADJUST_CFA_OFFSET 4
+	CFI_REL_OFFSET eax, 0
+	pushl %ebp
+	CFI_ADJUST_CFA_OFFSET 4
+	CFI_REL_OFFSET ebp, 0
+	pushl %edi
+	CFI_ADJUST_CFA_OFFSET 4
+	CFI_REL_OFFSET edi, 0
+	pushl %esi
+	CFI_ADJUST_CFA_OFFSET 4
+	CFI_REL_OFFSET esi, 0
+	pushl %edx
+	CFI_ADJUST_CFA_OFFSET 4
+	CFI_REL_OFFSET edx, 0
+	pushl %ecx
+	CFI_ADJUST_CFA_OFFSET 4
+	CFI_REL_OFFSET ecx, 0
+	pushl %ebx
+	CFI_ADJUST_CFA_OFFSET 4
+	CFI_REL_OFFSET ebx, 0
+	cld
+	pushl %fs
+	CFI_ADJUST_CFA_OFFSET 4
+	/*CFI_REL_OFFSET fs, 0*/
+	movl $(__KERNEL_PERCPU), %ecx
+	movl %ecx, %fs
+	UNWIND_ESPFIX_STACK
+	popl %ecx
+	CFI_ADJUST_CFA_OFFSET -4
+	/*CFI_REGISTER es, ecx*/
+	movl PT_FS(%esp), %edi		# get the function address
+	movl PT_ORIG_EAX(%esp), %edx	# get the error code
+	movl $-1, PT_ORIG_EAX(%esp)	# no syscall to restart
+	mov  %ecx, PT_FS(%esp)
+	/*CFI_REL_OFFSET fs, ES*/
+	movl $(__USER_DS), %ecx
+	movl %ecx, %ds
+	movl %ecx, %es
+	TRACE_IRQS_OFF
+	movl %esp,%eax			# pt_regs pointer
+	call *%edi
+	jmp ret_from_exception
+	CFI_ENDPROC
+END(page_fault)
+
+/*
+ * Debug traps and NMI can happen at the one SYSENTER instruction
+ * that sets up the real kernel stack. Check here, since we can't
+ * allow the wrong stack to be used.
+ *
+ * "TSS_sysenter_sp0+12" is because the NMI/debug handler will have
+ * already pushed 3 words if it hits on the sysenter instruction:
+ * eflags, cs and eip.
+ *
+ * We just load the right stack, and push the three (known) values
+ * by hand onto the new stack - while updating the return eip past
+ * the instruction that would have done it for sysenter.
+ */
+#define FIX_STACK(offset, ok, label)		\
+	cmpw $__KERNEL_CS,4(%esp);		\
+	jne ok;					\
+label:						\
+	movl TSS_sysenter_sp0+offset(%esp),%esp;	\
+	CFI_DEF_CFA esp, 0;			\
+	CFI_UNDEFINED eip;			\
+	pushfl;					\
+	CFI_ADJUST_CFA_OFFSET 4;		\
+	pushl $__KERNEL_CS;			\
+	CFI_ADJUST_CFA_OFFSET 4;		\
+	pushl $sysenter_past_esp;		\
+	CFI_ADJUST_CFA_OFFSET 4;		\
+	CFI_REL_OFFSET eip, 0
+
+ENTRY(debug)
+	RING0_INT_FRAME
+	cmpl $ia32_sysenter_target,(%esp)
+	jne debug_stack_correct
+	FIX_STACK(12, debug_stack_correct, debug_esp_fix_insn)
+debug_stack_correct:
+	pushl $-1			# mark this as an int
+	CFI_ADJUST_CFA_OFFSET 4
+	SAVE_ALL
+	TRACE_IRQS_OFF
+	xorl %edx,%edx			# error code 0
+	movl %esp,%eax			# pt_regs pointer
+	call do_debug
+	jmp ret_from_exception
+	CFI_ENDPROC
+END(debug)
+
+/*
+ * NMI is doubly nasty. It can happen _while_ we're handling
+ * a debug fault, and the debug fault hasn't yet been able to
+ * clear up the stack. So we first check whether we got  an
+ * NMI on the sysenter entry path, but after that we need to
+ * check whether we got an NMI on the debug path where the debug
+ * fault happened on the sysenter path.
+ */
+ENTRY(nmi)
+	RING0_INT_FRAME
+	pushl %eax
+	CFI_ADJUST_CFA_OFFSET 4
+	movl %ss, %eax
+	cmpw $__ESPFIX_SS, %ax
+	popl %eax
+	CFI_ADJUST_CFA_OFFSET -4
+	je nmi_espfix_stack
+	cmpl $ia32_sysenter_target,(%esp)
+	je nmi_stack_fixup
+	pushl %eax
+	CFI_ADJUST_CFA_OFFSET 4
+	movl %esp,%eax
+	/* Do not access memory above the end of our stack page,
+	 * it might not exist.
+	 */
+	andl $(THREAD_SIZE-1),%eax
+	cmpl $(THREAD_SIZE-20),%eax
+	popl %eax
+	CFI_ADJUST_CFA_OFFSET -4
+	jae nmi_stack_correct
+	cmpl $ia32_sysenter_target,12(%esp)
+	je nmi_debug_stack_check
+nmi_stack_correct:
+	/* We have a RING0_INT_FRAME here */
+	pushl %eax
+	CFI_ADJUST_CFA_OFFSET 4
+	SAVE_ALL
+	TRACE_IRQS_OFF
+	xorl %edx,%edx		# zero error code
+	movl %esp,%eax		# pt_regs pointer
+	call do_nmi
+	jmp restore_nocheck_notrace
+	CFI_ENDPROC
+
+nmi_stack_fixup:
+	RING0_INT_FRAME
+	FIX_STACK(12,nmi_stack_correct, 1)
+	jmp nmi_stack_correct
+
+nmi_debug_stack_check:
+	/* We have a RING0_INT_FRAME here */
+	cmpw $__KERNEL_CS,16(%esp)
+	jne nmi_stack_correct
+	cmpl $debug,(%esp)
+	jb nmi_stack_correct
+	cmpl $debug_esp_fix_insn,(%esp)
+	ja nmi_stack_correct
+	FIX_STACK(24,nmi_stack_correct, 1)
+	jmp nmi_stack_correct
+
+nmi_espfix_stack:
+	/* We have a RING0_INT_FRAME here.
+	 *
+	 * create the pointer to lss back
+	 */
+	pushl %ss
+	CFI_ADJUST_CFA_OFFSET 4
+	pushl %esp
+	CFI_ADJUST_CFA_OFFSET 4
+	addw $4, (%esp)
+	/* copy the iret frame of 12 bytes */
+	.rept 3
+	pushl 16(%esp)
+	CFI_ADJUST_CFA_OFFSET 4
+	.endr
+	pushl %eax
+	CFI_ADJUST_CFA_OFFSET 4
+	SAVE_ALL
+	TRACE_IRQS_OFF
+	FIXUP_ESPFIX_STACK		# %eax == %esp
+	xorl %edx,%edx			# zero error code
+	call do_nmi
+	RESTORE_REGS
+	lss 12+4(%esp), %esp		# back to espfix stack
+	CFI_ADJUST_CFA_OFFSET -24
+	jmp irq_return
+	CFI_ENDPROC
+END(nmi)
+
+ENTRY(int3)
+	RING0_INT_FRAME
+	pushl $-1			# mark this as an int
+	CFI_ADJUST_CFA_OFFSET 4
+	SAVE_ALL
+	TRACE_IRQS_OFF
+	xorl %edx,%edx		# zero error code
+	movl %esp,%eax		# pt_regs pointer
+	call do_int3
+	jmp ret_from_exception
+	CFI_ENDPROC
+END(int3)
+
+ENTRY(general_protection)
+	RING0_EC_FRAME
+	pushl $do_general_protection
+	CFI_ADJUST_CFA_OFFSET 4
+	jmp error_code
+	CFI_ENDPROC
+END(general_protection)
+
+/*
+ * End of kprobes section
+ */
+	.popsection
diff --git a/include/linux/linkage.h b/include/linux/linkage.h
index 9fd1f859021b..fee9e59649c1 100644
--- a/include/linux/linkage.h
+++ b/include/linux/linkage.h
@@ -64,14 +64,6 @@
 	name:
 #endif
 
-#define KPROBE_ENTRY(name) \
-  .pushsection .kprobes.text, "ax"; \
-  ENTRY(name)
-
-#define KPROBE_END(name) \
-  END(name);		 \
-  .popsection
-
 #ifndef END
 #define END(name) \
   .size name, .-name
-- 
cgit v1.2.3


From 475ad8e2172d7f8b73af5532a8dad265b51339c2 Mon Sep 17 00:00:00 2001
From: Harvey Harrison <harvey.harrison@gmail.com>
Date: Thu, 27 Nov 2008 23:04:13 -0800
Subject: decnet: compile fix for removal of byteorder wrapper

Signed-off-by: Harvey Harrison <harvey.harrison@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/dn.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/net/dn.h b/include/net/dn.h
index 7cd6bb83f7ab..e5469f7b67a3 100644
--- a/include/net/dn.h
+++ b/include/net/dn.h
@@ -4,7 +4,7 @@
 #include <linux/dn.h>
 #include <net/sock.h>
 #include <asm/byteorder.h>
-#include <asm/unalignedh>
+#include <asm/unaligned.h>
 
 struct dn_scp                                   /* Session Control Port */
 {
-- 
cgit v1.2.3


From 8b752e3ef6e3f5cde87afc649dd51d92b1e549c1 Mon Sep 17 00:00:00 2001
From: Liming Wang <liming.wang@windriver.com>
Date: Fri, 28 Nov 2008 09:52:40 +0800
Subject: softirq: remove useless function __local_bh_enable

Impact: remove unused code

__local_bh_enable has been replaced with _local_bh_enable.
As comments says "it always nests inside local_bh_enable() sections"
has not been valid now. Also there is no reason to use __local_bh_enable
anywhere, so we can remove this useless function.

Signed-off-by: Liming Wang <liming.wang@windriver.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/bottom_half.h |  1 -
 kernel/softirq.c            | 14 --------------
 2 files changed, 15 deletions(-)

(limited to 'include')

diff --git a/include/linux/bottom_half.h b/include/linux/bottom_half.h
index 777dbf695d44..27b1bcffe408 100644
--- a/include/linux/bottom_half.h
+++ b/include/linux/bottom_half.h
@@ -2,7 +2,6 @@
 #define _LINUX_BH_H
 
 extern void local_bh_disable(void);
-extern void __local_bh_enable(void);
 extern void _local_bh_enable(void);
 extern void local_bh_enable(void);
 extern void local_bh_enable_ip(unsigned long ip);
diff --git a/kernel/softirq.c b/kernel/softirq.c
index e7c69a720d69..8d9934b4162a 100644
--- a/kernel/softirq.c
+++ b/kernel/softirq.c
@@ -102,20 +102,6 @@ void local_bh_disable(void)
 
 EXPORT_SYMBOL(local_bh_disable);
 
-void __local_bh_enable(void)
-{
-	WARN_ON_ONCE(in_irq());
-
-	/*
-	 * softirqs should never be enabled by __local_bh_enable(),
-	 * it always nests inside local_bh_enable() sections:
-	 */
-	WARN_ON_ONCE(softirq_count() == SOFTIRQ_OFFSET);
-
-	sub_preempt_count(SOFTIRQ_OFFSET);
-}
-EXPORT_SYMBOL_GPL(__local_bh_enable);
-
 /*
  * Special-case - softirqs can safely be enabled in
  * cond_resched_softirq(), or by __do_softirq(),
-- 
cgit v1.2.3


From a838c2ec6ea1f18431da74dfe4978c57355b95f3 Mon Sep 17 00:00:00 2001
From: Wu Fengguang <fengguang.wu@intel.com>
Date: Thu, 27 Nov 2008 16:14:44 +0800
Subject: markers: comment marker_synchronize_unregister() on data dependency

Add document and comments on marker_synchronize_unregister(): it
should be called before freeing resources that the probes depend on.

Based on comments from Lai Jiangshan and Mathieu Desnoyers.

Signed-off-by: Wu Fengguang <wfg@linux.intel.com>
Reviewed-by: Mathieu Desnoyers <mathieu.desnoyers@polymtl.ca>
Reviewed-by: Lai Jiangshan <laijs@cn.fujitsu.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 Documentation/markers.txt | 15 ++++++++++-----
 include/linux/marker.h    |  6 ++++--
 2 files changed, 14 insertions(+), 7 deletions(-)

(limited to 'include')

diff --git a/Documentation/markers.txt b/Documentation/markers.txt
index 6d275e4ef385..d2b3d0e91b26 100644
--- a/Documentation/markers.txt
+++ b/Documentation/markers.txt
@@ -51,11 +51,16 @@ to call) for the specific marker through marker_probe_register() and can be
 activated by calling marker_arm(). Marker deactivation can be done by calling
 marker_disarm() as many times as marker_arm() has been called. Removing a probe
 is done through marker_probe_unregister(); it will disarm the probe.
-marker_synchronize_unregister() must be called before the end of the module exit
-function to make sure there is no caller left using the probe. This, and the
-fact that preemption is disabled around the probe call, make sure that probe
-removal and module unload are safe. See the "Probe example" section below for a
-sample probe module.
+
+marker_synchronize_unregister() must be called between probe unregistration and
+the first occurrence of
+- the end of module exit function,
+  to make sure there is no caller left using the probe;
+- the free of any resource used by the probes,
+  to make sure the probes wont be accessing invalid data.
+This, and the fact that preemption is disabled around the probe call, make sure
+that probe removal and module unload are safe. See the "Probe example" section
+below for a sample probe module.
 
 The marker mechanism supports inserting multiple instances of the same marker.
 Markers can be put in inline functions, inlined static functions, and
diff --git a/include/linux/marker.h b/include/linux/marker.h
index 34c14bc957f5..b85e74ca782f 100644
--- a/include/linux/marker.h
+++ b/include/linux/marker.h
@@ -211,8 +211,10 @@ extern void *marker_get_private_data(const char *name, marker_probe_func *probe,
 
 /*
  * marker_synchronize_unregister must be called between the last marker probe
- * unregistration and the end of module exit to make sure there is no caller
- * executing a probe when it is freed.
+ * unregistration and the first one of
+ * - the end of module exit function
+ * - the free of any resource used by the probes
+ * to ensure the code and data are valid for any possibly running probes.
  */
 #define marker_synchronize_unregister() synchronize_sched()
 
-- 
cgit v1.2.3


From ec5679e513305f1411753e5f5489935bd638af23 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Fri, 28 Nov 2008 17:56:14 +0100
Subject: debug warnings: eliminate warn_on_slowpath()

Impact: cleanup, eliminate code

now that warn_on_slowpath() uses warn_slowpath(...,NULL), we can
eliminate warn_on_slowpath() altogether and use warn_slowpath().

Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/asm-generic/bug.h | 7 +++----
 kernel/panic.c            | 6 ------
 2 files changed, 3 insertions(+), 10 deletions(-)

(limited to 'include')

diff --git a/include/asm-generic/bug.h b/include/asm-generic/bug.h
index 12c07c1866b2..b8ba6941f587 100644
--- a/include/asm-generic/bug.h
+++ b/include/asm-generic/bug.h
@@ -33,15 +33,14 @@ struct bug_entry {
 
 #ifndef __WARN
 #ifndef __ASSEMBLY__
-extern void warn_on_slowpath(const char *file, const int line);
 extern void warn_slowpath(const char *file, const int line,
 		const char *fmt, ...) __attribute__((format(printf, 3, 4)));
 #define WANT_WARN_ON_SLOWPATH
 #endif
-#define __WARN() warn_on_slowpath(__FILE__, __LINE__)
-#define __WARN_printf(arg...) warn_slowpath(__FILE__, __LINE__, arg)
+#define __WARN()		warn_slowpath(__FILE__, __LINE__, NULL)
+#define __WARN_printf(arg...)	warn_slowpath(__FILE__, __LINE__, arg)
 #else
-#define __WARN_printf(arg...) do { printk(arg); __WARN(); } while (0)
+#define __WARN_printf(arg...)	do { printk(arg); __WARN(); } while (0)
 #endif
 
 #ifndef WARN_ON
diff --git a/kernel/panic.c b/kernel/panic.c
index 73d365199c3f..50349a41fba7 100644
--- a/kernel/panic.c
+++ b/kernel/panic.c
@@ -349,12 +349,6 @@ void warn_slowpath(const char *file, int line, const char *fmt, ...)
 	add_taint(TAINT_WARN);
 }
 EXPORT_SYMBOL(warn_slowpath);
-
-void warn_on_slowpath(const char *file, int line)
-{
-	warn_slowpath(file, line, NULL);
-}
-EXPORT_SYMBOL(warn_on_slowpath);
 #endif
 
 #ifdef CONFIG_CC_STACKPROTECTOR
-- 
cgit v1.2.3


From 0f0ca340e57bd7446855fefd07a64249acf81223 Mon Sep 17 00:00:00 2001
From: Giuseppe Cavallaro <peppe.cavallaro@st.com>
Date: Fri, 28 Nov 2008 16:24:56 -0800
Subject: phy: power management support

This patch adds the power management support into the physical
abstraction layer.

Suspend and resume functions respectively turns on/off the bit 11
into the PHY Basic mode control register.
Generic PHY device starts supporting PM.

In order to support the wake-on LAN and avoid to put in power down
the PHY device, the MDIO is aware of what the Ethernet device wants to do.

Voluntary, no CONFIG_PM defines were added into the sources.
Also generic suspend/resume functions are exported to allow
other drivers use them (such as genphy_config_aneg etc.).

Within the phy_driver_register function, we need to remove the
memset. It overrides the device driver owner and it is not good.

Signed-off-by: Giuseppe Cavallaro <peppe.cavallaro@st.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/phy/mdio_bus.c   | 14 ++++++++++----
 drivers/net/phy/phy_device.c | 31 ++++++++++++++++++++++++++++++-
 include/linux/phy.h          |  2 ++
 3 files changed, 42 insertions(+), 5 deletions(-)

(limited to 'include')

diff --git a/drivers/net/phy/mdio_bus.c b/drivers/net/phy/mdio_bus.c
index 868812ff6de8..8755d8cd4166 100644
--- a/drivers/net/phy/mdio_bus.c
+++ b/drivers/net/phy/mdio_bus.c
@@ -284,9 +284,12 @@ static int mdio_bus_suspend(struct device * dev, pm_message_t state)
 {
 	int ret = 0;
 	struct device_driver *drv = dev->driver;
+	struct phy_driver *phydrv = to_phy_driver(drv);
+	struct phy_device *phydev = to_phy_device(dev);
 
-	if (drv && drv->suspend)
-		ret = drv->suspend(dev, state);
+	if ((!device_may_wakeup(phydev->dev.parent)) &&
+		(phydrv && phydrv->suspend))
+			ret = phydrv->suspend(phydev);
 
 	return ret;
 }
@@ -295,9 +298,12 @@ static int mdio_bus_resume(struct device * dev)
 {
 	int ret = 0;
 	struct device_driver *drv = dev->driver;
+	struct phy_driver *phydrv = to_phy_driver(drv);
+	struct phy_device *phydev = to_phy_device(dev);
 
-	if (drv && drv->resume)
-		ret = drv->resume(dev);
+	if ((!device_may_wakeup(phydev->dev.parent)) &&
+		(phydrv && phydrv->resume))
+		ret = phydrv->resume(phydev);
 
 	return ret;
 }
diff --git a/drivers/net/phy/phy_device.c b/drivers/net/phy/phy_device.c
index 29546a206045..4cc75a290c06 100644
--- a/drivers/net/phy/phy_device.c
+++ b/drivers/net/phy/phy_device.c
@@ -779,7 +779,35 @@ static int genphy_config_init(struct phy_device *phydev)
 
 	return 0;
 }
+int genphy_suspend(struct phy_device *phydev)
+{
+	int value;
+
+	mutex_lock(&phydev->lock);
+
+	value = phy_read(phydev, MII_BMCR);
+	phy_write(phydev, MII_BMCR, (value | BMCR_PDOWN));
+
+	mutex_unlock(&phydev->lock);
+
+	return 0;
+}
+EXPORT_SYMBOL(genphy_suspend);
 
+int genphy_resume(struct phy_device *phydev)
+{
+	int value;
+
+	mutex_lock(&phydev->lock);
+
+	value = phy_read(phydev, MII_BMCR);
+	phy_write(phydev, MII_BMCR, (value & ~BMCR_PDOWN));
+
+	mutex_unlock(&phydev->lock);
+
+	return 0;
+}
+EXPORT_SYMBOL(genphy_resume);
 
 /**
  * phy_probe - probe and init a PHY device
@@ -855,7 +883,6 @@ int phy_driver_register(struct phy_driver *new_driver)
 {
 	int retval;
 
-	memset(&new_driver->driver, 0, sizeof(new_driver->driver));
 	new_driver->driver.name = new_driver->name;
 	new_driver->driver.bus = &mdio_bus_type;
 	new_driver->driver.probe = phy_probe;
@@ -890,6 +917,8 @@ static struct phy_driver genphy_driver = {
 	.features	= 0,
 	.config_aneg	= genphy_config_aneg,
 	.read_status	= genphy_read_status,
+	.suspend	= genphy_suspend,
+	.resume		= genphy_resume,
 	.driver		= {.owner= THIS_MODULE, },
 };
 
diff --git a/include/linux/phy.h b/include/linux/phy.h
index 77c4ed60b982..d7e54d98869f 100644
--- a/include/linux/phy.h
+++ b/include/linux/phy.h
@@ -467,6 +467,8 @@ int genphy_restart_aneg(struct phy_device *phydev);
 int genphy_config_aneg(struct phy_device *phydev);
 int genphy_update_link(struct phy_device *phydev);
 int genphy_read_status(struct phy_device *phydev);
+int genphy_suspend(struct phy_device *phydev);
+int genphy_resume(struct phy_device *phydev);
 void phy_driver_unregister(struct phy_driver *drv);
 int phy_driver_register(struct phy_driver *new_driver);
 void phy_prepare_link(struct phy_device *phydev,
-- 
cgit v1.2.3


From 7a9d4020533b5c0c615b6de3be154c9ff30b8cc9 Mon Sep 17 00:00:00 2001
From: Marcel Holtmann <marcel@holtmann.org>
Date: Sun, 30 Nov 2008 12:17:26 +0100
Subject: Bluetooth: Send HCI Reset command by default on device initialization

The Bluetooth subsystem was not using the HCI Reset command when doing
device initialization. The Bluetooth 1.0b specification was ambiguous
on how the device firmware was suppose to handle it. Almost every device
was triggering a transport reset at the same time. In case of USB this
ended up in disconnects from the bus.

All modern Bluetooth dongles handle this perfectly fine and a lot of
them actually require that HCI Reset is sent. If not then they are
either stuck in their HID Proxy mode or their internal structures for
inquiry and paging are not correctly setup.

To handle old and new devices smoothly the Bluetooth subsystem contains
a quirk to force the HCI Reset on initialization. However maintaining
such a quirk becomes more and more complicated. This patch turns the
logic around and lets the old devices disable the HCI Reset command.

The only device where the HCI_QUIRK_NO_RESET is still needed are the
original Digianswer devices and dongles with an early CSR firmware.

CSR reported that they fixed this for version 12 firmware. The last
official release of version 11 firmware is build ID 115. The first
version 12 candidate was build ID 117.

Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 drivers/bluetooth/bpa10x.c    |  2 +
 drivers/bluetooth/btusb.c     | 99 ++++++++++++++++++++-----------------------
 drivers/bluetooth/hci_ldisc.c |  4 +-
 include/net/bluetooth/hci.h   |  2 +-
 net/bluetooth/af_bluetooth.c  |  2 +-
 net/bluetooth/hci_core.c      |  2 +-
 6 files changed, 54 insertions(+), 57 deletions(-)

(limited to 'include')

diff --git a/drivers/bluetooth/bpa10x.c b/drivers/bluetooth/bpa10x.c
index b936d8ce2728..4f0a57236d23 100644
--- a/drivers/bluetooth/bpa10x.c
+++ b/drivers/bluetooth/bpa10x.c
@@ -489,6 +489,8 @@ static int bpa10x_probe(struct usb_interface *intf, const struct usb_device_id *
 
 	hdev->owner = THIS_MODULE;
 
+	set_bit(HCI_QUIRK_NO_RESET, &hdev->quirks);
+
 	err = hci_register_dev(hdev);
 	if (err < 0) {
 		hci_free_dev(hdev);
diff --git a/drivers/bluetooth/btusb.c b/drivers/bluetooth/btusb.c
index 0cd4a55dd5c4..5d97bedb83e0 100644
--- a/drivers/bluetooth/btusb.c
+++ b/drivers/bluetooth/btusb.c
@@ -41,25 +41,25 @@
 #define BT_DBG(D...)
 #endif
 
-#define VERSION "0.3"
+#define VERSION "0.4"
 
 static int ignore_dga;
 static int ignore_csr;
 static int ignore_sniffer;
 static int disable_scofix;
 static int force_scofix;
-static int reset;
+
+static int reset = 1;
 
 static struct usb_driver btusb_driver;
 
 #define BTUSB_IGNORE		0x01
-#define BTUSB_RESET		0x02
-#define BTUSB_DIGIANSWER	0x04
-#define BTUSB_CSR		0x08
-#define BTUSB_SNIFFER		0x10
-#define BTUSB_BCM92035		0x20
-#define BTUSB_BROKEN_ISOC	0x40
-#define BTUSB_WRONG_SCO_MTU	0x80
+#define BTUSB_DIGIANSWER	0x02
+#define BTUSB_CSR		0x04
+#define BTUSB_SNIFFER		0x08
+#define BTUSB_BCM92035		0x10
+#define BTUSB_BROKEN_ISOC	0x20
+#define BTUSB_WRONG_SCO_MTU	0x40
 
 static struct usb_device_id btusb_table[] = {
 	/* Generic Bluetooth USB device */
@@ -79,7 +79,7 @@ static struct usb_device_id btusb_table[] = {
 	{ USB_DEVICE(0x0bdb, 0x1002) },
 
 	/* Canyon CN-BTU1 with HID interfaces */
-	{ USB_DEVICE(0x0c10, 0x0000), .driver_info = BTUSB_RESET },
+	{ USB_DEVICE(0x0c10, 0x0000) },
 
 	{ }	/* Terminating entry */
 };
@@ -94,52 +94,36 @@ static struct usb_device_id blacklist_table[] = {
 	{ USB_DEVICE(0x0a5c, 0x2033), .driver_info = BTUSB_IGNORE },
 
 	/* Broadcom BCM2035 */
-	{ USB_DEVICE(0x0a5c, 0x2035), .driver_info = BTUSB_RESET | BTUSB_WRONG_SCO_MTU },
-	{ USB_DEVICE(0x0a5c, 0x200a), .driver_info = BTUSB_RESET | BTUSB_WRONG_SCO_MTU },
+	{ USB_DEVICE(0x0a5c, 0x2035), .driver_info = BTUSB_WRONG_SCO_MTU },
+	{ USB_DEVICE(0x0a5c, 0x200a), .driver_info = BTUSB_WRONG_SCO_MTU },
+	{ USB_DEVICE(0x0a5c, 0x2009), .driver_info = BTUSB_BCM92035 },
 
 	/* Broadcom BCM2045 */
-	{ USB_DEVICE(0x0a5c, 0x2039), .driver_info = BTUSB_RESET | BTUSB_WRONG_SCO_MTU },
-	{ USB_DEVICE(0x0a5c, 0x2101), .driver_info = BTUSB_RESET | BTUSB_WRONG_SCO_MTU },
-
-	/* Broadcom BCM2046 */
-	{ USB_DEVICE(0x0a5c, 0x2146), .driver_info = BTUSB_RESET },
-	{ USB_DEVICE(0x0a5c, 0x2151), .driver_info = BTUSB_RESET },
-
-	/* Apple MacBook Pro with Broadcom chip */
-	{ USB_DEVICE(0x05ac, 0x820f), .driver_info = BTUSB_RESET },
+	{ USB_DEVICE(0x0a5c, 0x2039), .driver_info = BTUSB_WRONG_SCO_MTU },
+	{ USB_DEVICE(0x0a5c, 0x2101), .driver_info = BTUSB_WRONG_SCO_MTU },
 
 	/* IBM/Lenovo ThinkPad with Broadcom chip */
-	{ USB_DEVICE(0x0a5c, 0x201e), .driver_info = BTUSB_RESET | BTUSB_WRONG_SCO_MTU },
-	{ USB_DEVICE(0x0a5c, 0x2110), .driver_info = BTUSB_RESET | BTUSB_WRONG_SCO_MTU },
-
-	/* Targus ACB10US */
-	{ USB_DEVICE(0x0a5c, 0x2100), .driver_info = BTUSB_RESET },
-	{ USB_DEVICE(0x0a5c, 0x2154), .driver_info = BTUSB_RESET },
-
-	/* ANYCOM Bluetooth USB-200 and USB-250 */
-	{ USB_DEVICE(0x0a5c, 0x2111), .driver_info = BTUSB_RESET },
+	{ USB_DEVICE(0x0a5c, 0x201e), .driver_info = BTUSB_WRONG_SCO_MTU },
+	{ USB_DEVICE(0x0a5c, 0x2110), .driver_info = BTUSB_WRONG_SCO_MTU },
 
 	/* HP laptop with Broadcom chip */
-	{ USB_DEVICE(0x03f0, 0x171d), .driver_info = BTUSB_RESET | BTUSB_WRONG_SCO_MTU },
+	{ USB_DEVICE(0x03f0, 0x171d), .driver_info = BTUSB_WRONG_SCO_MTU },
 
 	/* Dell laptop with Broadcom chip */
-	{ USB_DEVICE(0x413c, 0x8126), .driver_info = BTUSB_RESET | BTUSB_WRONG_SCO_MTU },
+	{ USB_DEVICE(0x413c, 0x8126), .driver_info = BTUSB_WRONG_SCO_MTU },
 
 	/* Dell Wireless 370 */
-	{ USB_DEVICE(0x413c, 0x8156), .driver_info = BTUSB_RESET | BTUSB_WRONG_SCO_MTU },
+	{ USB_DEVICE(0x413c, 0x8156), .driver_info = BTUSB_WRONG_SCO_MTU },
 
 	/* Dell Wireless 410 */
-	{ USB_DEVICE(0x413c, 0x8152), .driver_info = BTUSB_RESET | BTUSB_WRONG_SCO_MTU },
-
-	/* Microsoft Wireless Transceiver for Bluetooth 2.0 */
-	{ USB_DEVICE(0x045e, 0x009c), .driver_info = BTUSB_RESET },
+	{ USB_DEVICE(0x413c, 0x8152), .driver_info = BTUSB_WRONG_SCO_MTU },
 
 	/* Kensington Bluetooth USB adapter */
-	{ USB_DEVICE(0x047d, 0x105d), .driver_info = BTUSB_RESET },
-	{ USB_DEVICE(0x047d, 0x105e), .driver_info = BTUSB_RESET | BTUSB_WRONG_SCO_MTU },
+	{ USB_DEVICE(0x047d, 0x105e), .driver_info = BTUSB_WRONG_SCO_MTU },
 
-	/* ISSC Bluetooth Adapter v3.1 */
-	{ USB_DEVICE(0x1131, 0x1001), .driver_info = BTUSB_RESET },
+	/* Belkin F8T012 and F8T013 devices */
+	{ USB_DEVICE(0x050d, 0x0012), .driver_info = BTUSB_WRONG_SCO_MTU },
+	{ USB_DEVICE(0x050d, 0x0013), .driver_info = BTUSB_WRONG_SCO_MTU },
 
 	/* RTX Telecom based adapters with buggy SCO support */
 	{ USB_DEVICE(0x0400, 0x0807), .driver_info = BTUSB_BROKEN_ISOC },
@@ -148,13 +132,6 @@ static struct usb_device_id blacklist_table[] = {
 	/* CONWISE Technology based adapters with buggy SCO support */
 	{ USB_DEVICE(0x0e5e, 0x6622), .driver_info = BTUSB_BROKEN_ISOC },
 
-	/* Belkin F8T012 and F8T013 devices */
-	{ USB_DEVICE(0x050d, 0x0012), .driver_info = BTUSB_RESET | BTUSB_WRONG_SCO_MTU },
-	{ USB_DEVICE(0x050d, 0x0013), .driver_info = BTUSB_RESET | BTUSB_WRONG_SCO_MTU },
-
-	/* Belkin F8T016 device */
-	{ USB_DEVICE(0x050d, 0x016a), .driver_info = BTUSB_RESET },
-
 	/* Digianswer devices */
 	{ USB_DEVICE(0x08fd, 0x0001), .driver_info = BTUSB_DIGIANSWER },
 	{ USB_DEVICE(0x08fd, 0x0002), .driver_info = BTUSB_IGNORE },
@@ -197,6 +174,8 @@ struct btusb_data {
 	struct usb_endpoint_descriptor *isoc_tx_ep;
 	struct usb_endpoint_descriptor *isoc_rx_ep;
 
+	__u8 cmdreq_type;
+
 	int isoc_altsetting;
 	int suspend_count;
 };
@@ -590,7 +569,7 @@ static int btusb_send_frame(struct sk_buff *skb)
 			return -ENOMEM;
 		}
 
-		dr->bRequestType = USB_TYPE_CLASS;
+		dr->bRequestType = data->cmdreq_type;
 		dr->bRequest     = 0;
 		dr->wIndex       = 0;
 		dr->wValue       = 0;
@@ -828,6 +807,8 @@ static int btusb_probe(struct usb_interface *intf,
 		return -ENODEV;
 	}
 
+	data->cmdreq_type = USB_TYPE_CLASS;
+
 	data->udev = interface_to_usbdev(intf);
 	data->intf = intf;
 
@@ -862,11 +843,11 @@ static int btusb_probe(struct usb_interface *intf,
 
 	hdev->owner = THIS_MODULE;
 
-	/* interface numbers are hardcoded in the spec */
+	/* Interface numbers are hardcoded in the specification */
 	data->isoc = usb_ifnum_to_if(data->udev, 1);
 
-	if (reset || id->driver_info & BTUSB_RESET)
-		set_bit(HCI_QUIRK_RESET_ON_INIT, &hdev->quirks);
+	if (!reset)
+		set_bit(HCI_QUIRK_NO_RESET, &hdev->quirks);
 
 	if (force_scofix || id->driver_info & BTUSB_WRONG_SCO_MTU) {
 		if (!disable_scofix)
@@ -876,9 +857,23 @@ static int btusb_probe(struct usb_interface *intf,
 	if (id->driver_info & BTUSB_BROKEN_ISOC)
 		data->isoc = NULL;
 
+	if (id->driver_info & BTUSB_DIGIANSWER) {
+		data->cmdreq_type = USB_TYPE_VENDOR;
+		set_bit(HCI_QUIRK_NO_RESET, &hdev->quirks);
+	}
+
+	if (id->driver_info & BTUSB_CSR) {
+		struct usb_device *udev = data->udev;
+
+		/* Old firmware would otherwise execute USB reset */
+		if (le16_to_cpu(udev->descriptor.bcdDevice) < 0x117)
+			set_bit(HCI_QUIRK_NO_RESET, &hdev->quirks);
+	}
+
 	if (id->driver_info & BTUSB_SNIFFER) {
 		struct usb_device *udev = data->udev;
 
+		/* New sniffer firmware has crippled HCI interface */
 		if (le16_to_cpu(udev->descriptor.bcdDevice) > 0x997)
 			set_bit(HCI_QUIRK_RAW_DEVICE, &hdev->quirks);
 
diff --git a/drivers/bluetooth/hci_ldisc.c b/drivers/bluetooth/hci_ldisc.c
index 4426bb552bd9..cb46bf52a19a 100644
--- a/drivers/bluetooth/hci_ldisc.c
+++ b/drivers/bluetooth/hci_ldisc.c
@@ -399,8 +399,8 @@ static int hci_uart_register_dev(struct hci_uart *hu)
 
 	hdev->owner = THIS_MODULE;
 
-	if (reset)
-		set_bit(HCI_QUIRK_RESET_ON_INIT, &hdev->quirks);
+	if (!reset)
+		set_bit(HCI_QUIRK_NO_RESET, &hdev->quirks);
 
 	if (hci_register_dev(hdev) < 0) {
 		BT_ERR("Can't register HCI device");
diff --git a/include/net/bluetooth/hci.h b/include/net/bluetooth/hci.h
index 3cc294919312..3645139e68c7 100644
--- a/include/net/bluetooth/hci.h
+++ b/include/net/bluetooth/hci.h
@@ -54,7 +54,7 @@
 
 /* HCI device quirks */
 enum {
-	HCI_QUIRK_RESET_ON_INIT,
+	HCI_QUIRK_NO_RESET,
 	HCI_QUIRK_RAW_DEVICE,
 	HCI_QUIRK_FIXUP_BUFFER_SIZE
 };
diff --git a/net/bluetooth/af_bluetooth.c b/net/bluetooth/af_bluetooth.c
index 41f07f549ba7..f6f216e57aa3 100644
--- a/net/bluetooth/af_bluetooth.c
+++ b/net/bluetooth/af_bluetooth.c
@@ -46,7 +46,7 @@
 #define BT_DBG(D...)
 #endif
 
-#define VERSION "2.13"
+#define VERSION "2.14"
 
 /* Bluetooth sockets */
 #define BT_MAX_PROTO	8
diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c
index 7bb0f1cb7f26..fa7c5370b556 100644
--- a/net/bluetooth/hci_core.c
+++ b/net/bluetooth/hci_core.c
@@ -205,7 +205,7 @@ static void hci_init_req(struct hci_dev *hdev, unsigned long opt)
 	/* Mandatory initialization */
 
 	/* Reset */
-	if (test_bit(HCI_QUIRK_RESET_ON_INIT, &hdev->quirks))
+	if (!test_bit(HCI_QUIRK_NO_RESET, &hdev->quirks))
 			hci_send_cmd(hdev, HCI_OP_RESET, 0, NULL);
 
 	/* Read Local Supported Features */
-- 
cgit v1.2.3


From a418b893a6af11ae73c762ed5b76c1bad6dc19d8 Mon Sep 17 00:00:00 2001
From: Marcel Holtmann <marcel@holtmann.org>
Date: Sun, 30 Nov 2008 12:17:28 +0100
Subject: Bluetooth: Enable per-module dynamic debug messages

With the introduction of CONFIG_DYNAMIC_PRINTK_DEBUG it is possible to
allow debugging without having to recompile the kernel. This patch turns
all BT_DBG() calls into pr_debug() to support dynamic debug messages.

As a side effect all CONFIG_BT_*_DEBUG statements are now removed and
some broken debug entries have been fixed.

Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 drivers/bluetooth/bcm203x.c       |  9 ++-------
 drivers/bluetooth/bfusb.c         | 11 +++--------
 drivers/bluetooth/bpa10x.c        |  5 -----
 drivers/bluetooth/btsdio.c        |  5 -----
 drivers/bluetooth/btusb.c         |  6 ------
 drivers/bluetooth/hci_bcsp.c      |  5 -----
 drivers/bluetooth/hci_h4.c        |  5 -----
 drivers/bluetooth/hci_ldisc.c     |  5 -----
 drivers/bluetooth/hci_ll.c        |  5 -----
 drivers/bluetooth/hci_vhci.c      |  5 -----
 include/net/bluetooth/bluetooth.h |  4 ++--
 net/bluetooth/af_bluetooth.c      |  7 +------
 net/bluetooth/bnep/bnep.h         |  2 +-
 net/bluetooth/bnep/core.c         |  5 -----
 net/bluetooth/bnep/netdev.c       |  5 -----
 net/bluetooth/bnep/sock.c         |  5 -----
 net/bluetooth/cmtp/capi.c         |  5 -----
 net/bluetooth/cmtp/core.c         |  5 -----
 net/bluetooth/cmtp/sock.c         |  5 -----
 net/bluetooth/hci_conn.c          |  5 -----
 net/bluetooth/hci_core.c          |  7 +------
 net/bluetooth/hci_event.c         |  5 -----
 net/bluetooth/hci_sock.c          |  5 -----
 net/bluetooth/hci_sysfs.c         |  5 -----
 net/bluetooth/hidp/core.c         |  5 -----
 net/bluetooth/hidp/sock.c         |  5 -----
 net/bluetooth/l2cap.c             |  5 -----
 net/bluetooth/rfcomm/core.c       |  5 -----
 net/bluetooth/rfcomm/sock.c       |  7 +------
 net/bluetooth/rfcomm/tty.c        |  5 -----
 net/bluetooth/sco.c               |  5 -----
 31 files changed, 11 insertions(+), 157 deletions(-)

(limited to 'include')

diff --git a/drivers/bluetooth/bcm203x.c b/drivers/bluetooth/bcm203x.c
index ee40201c7278..eafd4af0746e 100644
--- a/drivers/bluetooth/bcm203x.c
+++ b/drivers/bluetooth/bcm203x.c
@@ -37,11 +37,6 @@
 
 #include <net/bluetooth/bluetooth.h>
 
-#ifndef CONFIG_BT_HCIBCM203X_DEBUG
-#undef  BT_DBG
-#define BT_DBG(D...)
-#endif
-
 #define VERSION "1.2"
 
 static struct usb_device_id bcm203x_table[] = {
@@ -199,7 +194,7 @@ static int bcm203x_probe(struct usb_interface *intf, const struct usb_device_id
 		return -EIO;
 	}
 
-	BT_DBG("minidrv data %p size %d", firmware->data, firmware->size);
+	BT_DBG("minidrv data %p size %zu", firmware->data, firmware->size);
 
 	size = max_t(uint, firmware->size, 4096);
 
@@ -227,7 +222,7 @@ static int bcm203x_probe(struct usb_interface *intf, const struct usb_device_id
 		return -EIO;
 	}
 
-	BT_DBG("firmware data %p size %d", firmware->data, firmware->size);
+	BT_DBG("firmware data %p size %zu", firmware->data, firmware->size);
 
 	data->fw_data = kmalloc(firmware->size, GFP_KERNEL);
 	if (!data->fw_data) {
diff --git a/drivers/bluetooth/bfusb.c b/drivers/bluetooth/bfusb.c
index 90a094634630..d3f14bee0f19 100644
--- a/drivers/bluetooth/bfusb.c
+++ b/drivers/bluetooth/bfusb.c
@@ -38,11 +38,6 @@
 #include <net/bluetooth/bluetooth.h>
 #include <net/bluetooth/hci_core.h>
 
-#ifndef CONFIG_BT_HCIBFUSB_DEBUG
-#undef  BT_DBG
-#define BT_DBG(D...)
-#endif
-
 #define VERSION "1.2"
 
 static struct usb_driver bfusb_driver;
@@ -221,7 +216,7 @@ static int bfusb_rx_submit(struct bfusb_data *data, struct urb *urb)
 	struct sk_buff *skb;
 	int err, pipe, size = HCI_MAX_FRAME_SIZE + 32;
 
-	BT_DBG("bfusb %p urb %p", bfusb, urb);
+	BT_DBG("bfusb %p urb %p", data, urb);
 
 	if (!urb && !(urb = usb_alloc_urb(0, GFP_ATOMIC)))
 		return -ENOMEM;
@@ -354,7 +349,7 @@ static void bfusb_rx_complete(struct urb *urb)
 	int count = urb->actual_length;
 	int err, hdr, len;
 
-	BT_DBG("bfusb %p urb %p skb %p len %d", bfusb, urb, skb, skb->len);
+	BT_DBG("bfusb %p urb %p skb %p len %d", data, urb, skb, skb->len);
 
 	read_lock(&data->lock);
 
@@ -691,7 +686,7 @@ static int bfusb_probe(struct usb_interface *intf, const struct usb_device_id *i
 		goto error;
 	}
 
-	BT_DBG("firmware data %p size %d", firmware->data, firmware->size);
+	BT_DBG("firmware data %p size %zu", firmware->data, firmware->size);
 
 	if (bfusb_load_firmware(data, firmware->data, firmware->size) < 0) {
 		BT_ERR("Firmware loading failed");
diff --git a/drivers/bluetooth/bpa10x.c b/drivers/bluetooth/bpa10x.c
index 4f0a57236d23..c115285867c3 100644
--- a/drivers/bluetooth/bpa10x.c
+++ b/drivers/bluetooth/bpa10x.c
@@ -35,11 +35,6 @@
 #include <net/bluetooth/bluetooth.h>
 #include <net/bluetooth/hci_core.h>
 
-#ifndef CONFIG_BT_HCIBPA10X_DEBUG
-#undef  BT_DBG
-#define BT_DBG(D...)
-#endif
-
 #define VERSION "0.10"
 
 static struct usb_device_id bpa10x_table[] = {
diff --git a/drivers/bluetooth/btsdio.c b/drivers/bluetooth/btsdio.c
index f2ada0c65486..7e298275c8f6 100644
--- a/drivers/bluetooth/btsdio.c
+++ b/drivers/bluetooth/btsdio.c
@@ -37,11 +37,6 @@
 #include <net/bluetooth/bluetooth.h>
 #include <net/bluetooth/hci_core.h>
 
-#ifndef CONFIG_BT_HCIBTSDIO_DEBUG
-#undef  BT_DBG
-#define BT_DBG(D...)
-#endif
-
 #define VERSION "0.1"
 
 static const struct sdio_device_id btsdio_table[] = {
diff --git a/drivers/bluetooth/btusb.c b/drivers/bluetooth/btusb.c
index 7f7526c186d0..b5fbda6d490a 100644
--- a/drivers/bluetooth/btusb.c
+++ b/drivers/bluetooth/btusb.c
@@ -35,12 +35,6 @@
 #include <net/bluetooth/bluetooth.h>
 #include <net/bluetooth/hci_core.h>
 
-//#define CONFIG_BT_HCIBTUSB_DEBUG
-#ifndef CONFIG_BT_HCIBTUSB_DEBUG
-#undef  BT_DBG
-#define BT_DBG(D...)
-#endif
-
 #define VERSION "0.4"
 
 static int ignore_dga;
diff --git a/drivers/bluetooth/hci_bcsp.c b/drivers/bluetooth/hci_bcsp.c
index 7938062c1cc7..894b2cb11ea6 100644
--- a/drivers/bluetooth/hci_bcsp.c
+++ b/drivers/bluetooth/hci_bcsp.c
@@ -47,11 +47,6 @@
 
 #include "hci_uart.h"
 
-#ifndef CONFIG_BT_HCIUART_DEBUG
-#undef  BT_DBG
-#define BT_DBG( A... )
-#endif
-
 #define VERSION "0.3"
 
 static int txcrc = 1;
diff --git a/drivers/bluetooth/hci_h4.c b/drivers/bluetooth/hci_h4.c
index bfbae14cf93d..b0fafb055996 100644
--- a/drivers/bluetooth/hci_h4.c
+++ b/drivers/bluetooth/hci_h4.c
@@ -46,11 +46,6 @@
 
 #include "hci_uart.h"
 
-#ifndef CONFIG_BT_HCIUART_DEBUG
-#undef  BT_DBG
-#define BT_DBG( A... )
-#endif
-
 #define VERSION "1.2"
 
 struct h4_struct {
diff --git a/drivers/bluetooth/hci_ldisc.c b/drivers/bluetooth/hci_ldisc.c
index cb46bf52a19a..af761dc434f6 100644
--- a/drivers/bluetooth/hci_ldisc.c
+++ b/drivers/bluetooth/hci_ldisc.c
@@ -46,11 +46,6 @@
 
 #include "hci_uart.h"
 
-#ifndef CONFIG_BT_HCIUART_DEBUG
-#undef  BT_DBG
-#define BT_DBG( A... )
-#endif
-
 #define VERSION "2.2"
 
 static int reset = 0;
diff --git a/drivers/bluetooth/hci_ll.c b/drivers/bluetooth/hci_ll.c
index 2d2f66e17fea..b91d45a41b2f 100644
--- a/drivers/bluetooth/hci_ll.c
+++ b/drivers/bluetooth/hci_ll.c
@@ -51,11 +51,6 @@
 
 #include "hci_uart.h"
 
-#ifndef CONFIG_BT_HCIUART_DEBUG
-#undef  BT_DBG
-#define BT_DBG( A... )
-#endif
-
 /* HCILL commands */
 #define HCILL_GO_TO_SLEEP_IND	0x30
 #define HCILL_GO_TO_SLEEP_ACK	0x31
diff --git a/drivers/bluetooth/hci_vhci.c b/drivers/bluetooth/hci_vhci.c
index 7320a71b6368..0bbefba6469c 100644
--- a/drivers/bluetooth/hci_vhci.c
+++ b/drivers/bluetooth/hci_vhci.c
@@ -40,11 +40,6 @@
 #include <net/bluetooth/bluetooth.h>
 #include <net/bluetooth/hci_core.h>
 
-#ifndef CONFIG_BT_HCIVHCI_DEBUG
-#undef  BT_DBG
-#define BT_DBG(D...)
-#endif
-
 #define VERSION "1.2"
 
 static int minor = MISC_DYNAMIC_MINOR;
diff --git a/include/net/bluetooth/bluetooth.h b/include/net/bluetooth/bluetooth.h
index 996d12df7594..a04f8463ac7e 100644
--- a/include/net/bluetooth/bluetooth.h
+++ b/include/net/bluetooth/bluetooth.h
@@ -54,8 +54,8 @@
 #define SOL_RFCOMM	18
 
 #define BT_INFO(fmt, arg...) printk(KERN_INFO "Bluetooth: " fmt "\n" , ## arg)
-#define BT_DBG(fmt, arg...)  printk(KERN_INFO "%s: " fmt "\n" , __func__ , ## arg)
-#define BT_ERR(fmt, arg...)  printk(KERN_ERR  "%s: " fmt "\n" , __func__ , ## arg)
+#define BT_ERR(fmt, arg...)  printk(KERN_ERR "%s: " fmt "\n" , __func__ , ## arg)
+#define BT_DBG(fmt, arg...)  pr_debug("%s: " fmt "\n" , __func__ , ## arg)
 
 /* Connection and socket states */
 enum {
diff --git a/net/bluetooth/af_bluetooth.c b/net/bluetooth/af_bluetooth.c
index f6f216e57aa3..744ed3f07ef3 100644
--- a/net/bluetooth/af_bluetooth.c
+++ b/net/bluetooth/af_bluetooth.c
@@ -41,11 +41,6 @@
 
 #include <net/bluetooth/bluetooth.h>
 
-#ifndef CONFIG_BT_SOCK_DEBUG
-#undef  BT_DBG
-#define BT_DBG(D...)
-#endif
-
 #define VERSION "2.14"
 
 /* Bluetooth sockets */
@@ -245,7 +240,7 @@ int bt_sock_recvmsg(struct kiocb *iocb, struct socket *sock,
 	size_t copied;
 	int err;
 
-	BT_DBG("sock %p sk %p len %d", sock, sk, len);
+	BT_DBG("sock %p sk %p len %zu", sock, sk, len);
 
 	if (flags & (MSG_OOB))
 		return -EOPNOTSUPP;
diff --git a/net/bluetooth/bnep/bnep.h b/net/bluetooth/bnep/bnep.h
index b69bf4e7c48b..d20f8a40f36e 100644
--- a/net/bluetooth/bnep/bnep.h
+++ b/net/bluetooth/bnep/bnep.h
@@ -161,7 +161,7 @@ struct bnep_session {
 	struct msghdr msg;
 
 	struct bnep_proto_filter proto_filter[BNEP_MAX_PROTO_FILTERS];
-	u64    mc_filter;
+	unsigned long long mc_filter;
 
 	struct socket    *sock;
 	struct net_device *dev;
diff --git a/net/bluetooth/bnep/core.c b/net/bluetooth/bnep/core.c
index f8efaf35293c..70fea8bdb4e5 100644
--- a/net/bluetooth/bnep/core.c
+++ b/net/bluetooth/bnep/core.c
@@ -52,11 +52,6 @@
 
 #include "bnep.h"
 
-#ifndef CONFIG_BT_BNEP_DEBUG
-#undef  BT_DBG
-#define BT_DBG(D...)
-#endif
-
 #define VERSION "1.3"
 
 static int compress_src = 1;
diff --git a/net/bluetooth/bnep/netdev.c b/net/bluetooth/bnep/netdev.c
index 47e179f62e82..f897da6e0444 100644
--- a/net/bluetooth/bnep/netdev.c
+++ b/net/bluetooth/bnep/netdev.c
@@ -41,11 +41,6 @@
 
 #include "bnep.h"
 
-#ifndef CONFIG_BT_BNEP_DEBUG
-#undef  BT_DBG
-#define BT_DBG( A... )
-#endif
-
 #define BNEP_TX_QUEUE_LEN 20
 
 static int bnep_net_open(struct net_device *dev)
diff --git a/net/bluetooth/bnep/sock.c b/net/bluetooth/bnep/sock.c
index 8ffb57f2303a..e857628b0b27 100644
--- a/net/bluetooth/bnep/sock.c
+++ b/net/bluetooth/bnep/sock.c
@@ -46,11 +46,6 @@
 
 #include "bnep.h"
 
-#ifndef CONFIG_BT_BNEP_DEBUG
-#undef  BT_DBG
-#define BT_DBG( A... )
-#endif
-
 static int bnep_sock_release(struct socket *sock)
 {
 	struct sock *sk = sock->sk;
diff --git a/net/bluetooth/cmtp/capi.c b/net/bluetooth/cmtp/capi.c
index 3e9d5bb3fefb..78958c0f9a40 100644
--- a/net/bluetooth/cmtp/capi.c
+++ b/net/bluetooth/cmtp/capi.c
@@ -42,11 +42,6 @@
 
 #include "cmtp.h"
 
-#ifndef CONFIG_BT_CMTP_DEBUG
-#undef  BT_DBG
-#define BT_DBG(D...)
-#endif
-
 #define CAPI_INTEROPERABILITY		0x20
 
 #define CAPI_INTEROPERABILITY_REQ	CAPICMD(CAPI_INTEROPERABILITY, CAPI_REQ)
diff --git a/net/bluetooth/cmtp/core.c b/net/bluetooth/cmtp/core.c
index ca60a4517fd3..c9cac7719efe 100644
--- a/net/bluetooth/cmtp/core.c
+++ b/net/bluetooth/cmtp/core.c
@@ -44,11 +44,6 @@
 
 #include "cmtp.h"
 
-#ifndef CONFIG_BT_CMTP_DEBUG
-#undef  BT_DBG
-#define BT_DBG(D...)
-#endif
-
 #define VERSION "1.0"
 
 static DECLARE_RWSEM(cmtp_session_sem);
diff --git a/net/bluetooth/cmtp/sock.c b/net/bluetooth/cmtp/sock.c
index 8c7f7bc4e0ba..16b0fad74f6e 100644
--- a/net/bluetooth/cmtp/sock.c
+++ b/net/bluetooth/cmtp/sock.c
@@ -43,11 +43,6 @@
 
 #include "cmtp.h"
 
-#ifndef CONFIG_BT_CMTP_DEBUG
-#undef  BT_DBG
-#define BT_DBG(D...)
-#endif
-
 static int cmtp_sock_release(struct socket *sock)
 {
 	struct sock *sk = sock->sk;
diff --git a/net/bluetooth/hci_conn.c b/net/bluetooth/hci_conn.c
index b7002429f152..a4a789f24c8d 100644
--- a/net/bluetooth/hci_conn.c
+++ b/net/bluetooth/hci_conn.c
@@ -45,11 +45,6 @@
 #include <net/bluetooth/bluetooth.h>
 #include <net/bluetooth/hci_core.h>
 
-#ifndef CONFIG_BT_HCI_CORE_DEBUG
-#undef  BT_DBG
-#define BT_DBG(D...)
-#endif
-
 void hci_acl_connect(struct hci_conn *conn)
 {
 	struct hci_dev *hdev = conn->hdev;
diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c
index fa7c5370b556..ba78cc1eb8d9 100644
--- a/net/bluetooth/hci_core.c
+++ b/net/bluetooth/hci_core.c
@@ -48,11 +48,6 @@
 #include <net/bluetooth/bluetooth.h>
 #include <net/bluetooth/hci_core.h>
 
-#ifndef CONFIG_BT_HCI_CORE_DEBUG
-#undef  BT_DBG
-#define BT_DBG(D...)
-#endif
-
 static void hci_cmd_task(unsigned long arg);
 static void hci_rx_task(unsigned long arg);
 static void hci_tx_task(unsigned long arg);
@@ -290,7 +285,7 @@ static void hci_linkpol_req(struct hci_dev *hdev, unsigned long opt)
 {
 	__le16 policy = cpu_to_le16(opt);
 
-	BT_DBG("%s %x", hdev->name, opt);
+	BT_DBG("%s %x", hdev->name, policy);
 
 	/* Default link policy */
 	hci_send_cmd(hdev, HCI_OP_WRITE_DEF_LINK_POLICY, 2, &policy);
diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c
index ad7a553d7713..f91ba690f5d2 100644
--- a/net/bluetooth/hci_event.c
+++ b/net/bluetooth/hci_event.c
@@ -45,11 +45,6 @@
 #include <net/bluetooth/bluetooth.h>
 #include <net/bluetooth/hci_core.h>
 
-#ifndef CONFIG_BT_HCI_CORE_DEBUG
-#undef  BT_DBG
-#define BT_DBG(D...)
-#endif
-
 /* Handle HCI Event packets */
 
 static void hci_cc_inquiry_cancel(struct hci_dev *hdev, struct sk_buff *skb)
diff --git a/net/bluetooth/hci_sock.c b/net/bluetooth/hci_sock.c
index d62579b67959..4f9621f759a0 100644
--- a/net/bluetooth/hci_sock.c
+++ b/net/bluetooth/hci_sock.c
@@ -49,11 +49,6 @@
 #include <net/bluetooth/bluetooth.h>
 #include <net/bluetooth/hci_core.h>
 
-#ifndef CONFIG_BT_HCI_SOCK_DEBUG
-#undef  BT_DBG
-#define BT_DBG(D...)
-#endif
-
 /* ----- HCI socket interface ----- */
 
 static inline int hci_test_bit(int nr, void *addr)
diff --git a/net/bluetooth/hci_sysfs.c b/net/bluetooth/hci_sysfs.c
index f2bbb2f65434..6490bf8402f3 100644
--- a/net/bluetooth/hci_sysfs.c
+++ b/net/bluetooth/hci_sysfs.c
@@ -6,11 +6,6 @@
 #include <net/bluetooth/bluetooth.h>
 #include <net/bluetooth/hci_core.h>
 
-#ifndef CONFIG_BT_HCI_CORE_DEBUG
-#undef  BT_DBG
-#define BT_DBG(D...)
-#endif
-
 struct class *bt_class = NULL;
 EXPORT_SYMBOL_GPL(bt_class);
 
diff --git a/net/bluetooth/hidp/core.c b/net/bluetooth/hidp/core.c
index acdeab3d9807..b18676870d55 100644
--- a/net/bluetooth/hidp/core.c
+++ b/net/bluetooth/hidp/core.c
@@ -47,11 +47,6 @@
 
 #include "hidp.h"
 
-#ifndef CONFIG_BT_HIDP_DEBUG
-#undef  BT_DBG
-#define BT_DBG(D...)
-#endif
-
 #define VERSION "1.2"
 
 static DECLARE_RWSEM(hidp_session_sem);
diff --git a/net/bluetooth/hidp/sock.c b/net/bluetooth/hidp/sock.c
index f4dd02ca9a96..37c9d7d2e688 100644
--- a/net/bluetooth/hidp/sock.c
+++ b/net/bluetooth/hidp/sock.c
@@ -39,11 +39,6 @@
 
 #include "hidp.h"
 
-#ifndef CONFIG_BT_HIDP_DEBUG
-#undef  BT_DBG
-#define BT_DBG(D...)
-#endif
-
 static int hidp_sock_release(struct socket *sock)
 {
 	struct sock *sk = sock->sk;
diff --git a/net/bluetooth/l2cap.c b/net/bluetooth/l2cap.c
index 9610a9c85b98..b93748e224ff 100644
--- a/net/bluetooth/l2cap.c
+++ b/net/bluetooth/l2cap.c
@@ -50,11 +50,6 @@
 #include <net/bluetooth/hci_core.h>
 #include <net/bluetooth/l2cap.h>
 
-#ifndef CONFIG_BT_L2CAP_DEBUG
-#undef  BT_DBG
-#define BT_DBG(D...)
-#endif
-
 #define VERSION "2.11"
 
 static u32 l2cap_feat_mask = 0x0000;
diff --git a/net/bluetooth/rfcomm/core.c b/net/bluetooth/rfcomm/core.c
index ba537fae0a4c..37c640d1c3fd 100644
--- a/net/bluetooth/rfcomm/core.c
+++ b/net/bluetooth/rfcomm/core.c
@@ -46,11 +46,6 @@
 #include <net/bluetooth/l2cap.h>
 #include <net/bluetooth/rfcomm.h>
 
-#ifndef CONFIG_BT_RFCOMM_DEBUG
-#undef  BT_DBG
-#define BT_DBG(D...)
-#endif
-
 #define VERSION "1.10"
 
 static int disable_cfc = 0;
diff --git a/net/bluetooth/rfcomm/sock.c b/net/bluetooth/rfcomm/sock.c
index bc0d4a7ce6ae..ad00cbf449cb 100644
--- a/net/bluetooth/rfcomm/sock.c
+++ b/net/bluetooth/rfcomm/sock.c
@@ -50,11 +50,6 @@
 #include <net/bluetooth/l2cap.h>
 #include <net/bluetooth/rfcomm.h>
 
-#ifndef CONFIG_BT_RFCOMM_DEBUG
-#undef  BT_DBG
-#define BT_DBG(D...)
-#endif
-
 static const struct proto_ops rfcomm_sock_ops;
 
 static struct bt_sock_list rfcomm_sk_list = {
@@ -644,7 +639,7 @@ static int rfcomm_sock_recvmsg(struct kiocb *iocb, struct socket *sock,
 
 	msg->msg_namelen = 0;
 
-	BT_DBG("sk %p size %d", sk, size);
+	BT_DBG("sk %p size %zu", sk, size);
 
 	lock_sock(sk);
 
diff --git a/net/bluetooth/rfcomm/tty.c b/net/bluetooth/rfcomm/tty.c
index d3340dd52bcf..1e4100bb0b65 100644
--- a/net/bluetooth/rfcomm/tty.c
+++ b/net/bluetooth/rfcomm/tty.c
@@ -39,11 +39,6 @@
 #include <net/bluetooth/hci_core.h>
 #include <net/bluetooth/rfcomm.h>
 
-#ifndef CONFIG_BT_RFCOMM_DEBUG
-#undef  BT_DBG
-#define BT_DBG(D...)
-#endif
-
 #define RFCOMM_TTY_MAGIC 0x6d02		/* magic number for rfcomm struct */
 #define RFCOMM_TTY_PORTS RFCOMM_MAX_DEV	/* whole lotta rfcomm devices */
 #define RFCOMM_TTY_MAJOR 216		/* device node major id of the usb/bluetooth.c driver */
diff --git a/net/bluetooth/sco.c b/net/bluetooth/sco.c
index 0cc91e6da76d..46fd8bf9a690 100644
--- a/net/bluetooth/sco.c
+++ b/net/bluetooth/sco.c
@@ -48,11 +48,6 @@
 #include <net/bluetooth/hci_core.h>
 #include <net/bluetooth/sco.h>
 
-#ifndef CONFIG_BT_SCO_DEBUG
-#undef  BT_DBG
-#define BT_DBG(D...)
-#endif
-
 #define VERSION "0.6"
 
 static int disable_esco = 0;
-- 
cgit v1.2.3


From 6c415b9234a8c71f290e5d4fddc467f103f32719 Mon Sep 17 00:00:00 2001
From: Arun R Bharadwaj <arun@linux.vnet.ibm.com>
Date: Mon, 1 Dec 2008 20:49:05 +0530
Subject: sched: add uid information to sched_debug for CONFIG_USER_SCHED

Impact: extend information in /proc/sched_debug

This patch adds uid information in sched_debug for CONFIG_USER_SCHED

Signed-off-by: Arun R Bharadwaj <arun@linux.vnet.ibm.com>
Acked-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/sched.h |  1 +
 kernel/sched.c        | 10 ++++++++++
 kernel/sched_debug.c  |  6 +++++-
 kernel/user.c         |  2 ++
 4 files changed, 18 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/sched.h b/include/linux/sched.h
index 7a69c4d224ee..d8733f07d80b 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -2218,6 +2218,7 @@ extern void normalize_rt_tasks(void);
 extern struct task_group init_task_group;
 #ifdef CONFIG_USER_SCHED
 extern struct task_group root_task_group;
+extern void set_tg_uid(struct user_struct *user);
 #endif
 
 extern struct task_group *sched_create_group(struct task_group *parent);
diff --git a/kernel/sched.c b/kernel/sched.c
index 6a99703e0eb0..4c7388ef5be7 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -261,6 +261,10 @@ struct task_group {
 	struct cgroup_subsys_state css;
 #endif
 
+#ifdef CONFIG_USER_SCHED
+	uid_t uid;
+#endif
+
 #ifdef CONFIG_FAIR_GROUP_SCHED
 	/* schedulable entities of this group on each cpu */
 	struct sched_entity **se;
@@ -286,6 +290,12 @@ struct task_group {
 
 #ifdef CONFIG_USER_SCHED
 
+/* Helper function to pass uid information to create_sched_user() */
+void set_tg_uid(struct user_struct *user)
+{
+	user->tg->uid = user->uid;
+}
+
 /*
  * Root task group.
  * 	Every UID task group (including init_task_group aka UID-0) will
diff --git a/kernel/sched_debug.c b/kernel/sched_debug.c
index baf2f17af462..4293cfa9681d 100644
--- a/kernel/sched_debug.c
+++ b/kernel/sched_debug.c
@@ -160,10 +160,14 @@ void print_cfs_rq(struct seq_file *m, int cpu, struct cfs_rq *cfs_rq)
 	cgroup_path(tg->css.cgroup, path, sizeof(path));
 
 	SEQ_printf(m, "\ncfs_rq[%d]:%s\n", cpu, path);
+#elif defined(CONFIG_USER_SCHED) && defined(CONFIG_FAIR_GROUP_SCHED)
+	{
+		uid_t uid = cfs_rq->tg->uid;
+		SEQ_printf(m, "\ncfs_rq[%d] for UID: %u\n", cpu, uid);
+	}
 #else
 	SEQ_printf(m, "\ncfs_rq[%d]:\n", cpu);
 #endif
-
 	SEQ_printf(m, "  .%-30s: %Ld.%06ld\n", "exec_clock",
 			SPLIT_NS(cfs_rq->exec_clock));
 
diff --git a/kernel/user.c b/kernel/user.c
index 39d6159fae43..cec2224bc9f5 100644
--- a/kernel/user.c
+++ b/kernel/user.c
@@ -101,6 +101,8 @@ static int sched_create_user(struct user_struct *up)
 	if (IS_ERR(up->tg))
 		rc = -ENOMEM;
 
+	set_tg_uid(up);
+
 	return rc;
 }
 
-- 
cgit v1.2.3


From 968a6025aa9f909d487988efb542217a126023a0 Mon Sep 17 00:00:00 2001
From: Mark Brown <broonie@opensource.wolfsonmicro.com>
Date: Fri, 28 Nov 2008 11:49:07 +0000
Subject: ASoC: Rename snd_soc_register_card() to snd_soc_init_card()

Currently ASoC card initialisation is completed by a function called
snd_soc_register_card().  As part of the work to allow independant
registration of cards, codecs and machines in ASoC v2 a new function of
the same name has been added so rename the existing function to
facilitate the merge of v2.

Signed-off-by: Mark Brown <broonie@opensource.wolfsonmicro.com>
---
 include/sound/soc.h            | 2 +-
 sound/soc/codecs/ac97.c        | 2 +-
 sound/soc/codecs/ad1980.c      | 2 +-
 sound/soc/codecs/ad73311.c     | 2 +-
 sound/soc/codecs/ak4535.c      | 2 +-
 sound/soc/codecs/cs4270.c      | 2 +-
 sound/soc/codecs/pcm3008.c     | 2 +-
 sound/soc/codecs/ssm2602.c     | 2 +-
 sound/soc/codecs/tlv320aic23.c | 2 +-
 sound/soc/codecs/tlv320aic26.c | 2 +-
 sound/soc/codecs/tlv320aic3x.c | 2 +-
 sound/soc/codecs/twl4030.c     | 2 +-
 sound/soc/codecs/uda134x.c     | 2 +-
 sound/soc/codecs/uda1380.c     | 2 +-
 sound/soc/codecs/wm8510.c      | 2 +-
 sound/soc/codecs/wm8580.c      | 2 +-
 sound/soc/codecs/wm8728.c      | 2 +-
 sound/soc/codecs/wm8731.c      | 2 +-
 sound/soc/codecs/wm8750.c      | 2 +-
 sound/soc/codecs/wm8753.c      | 2 +-
 sound/soc/codecs/wm8900.c      | 2 +-
 sound/soc/codecs/wm8903.c      | 2 +-
 sound/soc/codecs/wm8971.c      | 2 +-
 sound/soc/codecs/wm8990.c      | 2 +-
 sound/soc/codecs/wm9712.c      | 2 +-
 sound/soc/codecs/wm9713.c      | 2 +-
 sound/soc/soc-core.c           | 6 +++---
 27 files changed, 29 insertions(+), 29 deletions(-)

(limited to 'include')

diff --git a/include/sound/soc.h b/include/sound/soc.h
index 444f9c211379..9356c1ce98c1 100644
--- a/include/sound/soc.h
+++ b/include/sound/soc.h
@@ -162,7 +162,7 @@ extern struct snd_ac97_bus_ops soc_ac97_ops;
 /* pcm <-> DAI connect */
 void snd_soc_free_pcms(struct snd_soc_device *socdev);
 int snd_soc_new_pcms(struct snd_soc_device *socdev, int idx, const char *xid);
-int snd_soc_register_card(struct snd_soc_device *socdev);
+int snd_soc_init_card(struct snd_soc_device *socdev);
 
 /* set runtime hw params */
 int snd_soc_set_runtime_hwparams(struct snd_pcm_substream *substream,
diff --git a/sound/soc/codecs/ac97.c b/sound/soc/codecs/ac97.c
index c4208c8210c8..fb53e6511af2 100644
--- a/sound/soc/codecs/ac97.c
+++ b/sound/soc/codecs/ac97.c
@@ -114,7 +114,7 @@ static int ac97_soc_probe(struct platform_device *pdev)
 	if (ret < 0)
 		goto bus_err;
 
-	ret = snd_soc_register_card(socdev);
+	ret = snd_soc_init_card(socdev);
 	if (ret < 0)
 		goto bus_err;
 	return 0;
diff --git a/sound/soc/codecs/ad1980.c b/sound/soc/codecs/ad1980.c
index a9a268112d3f..73fdbb4d4a3d 100644
--- a/sound/soc/codecs/ad1980.c
+++ b/sound/soc/codecs/ad1980.c
@@ -270,7 +270,7 @@ static int ad1980_soc_probe(struct platform_device *pdev)
 	ac97_write(codec, AC97_EXTENDED_STATUS, ext_status&~0x3800);
 
 	ad1980_add_controls(codec);
-	ret = snd_soc_register_card(socdev);
+	ret = snd_soc_init_card(socdev);
 	if (ret < 0) {
 		printk(KERN_ERR "ad1980: failed to register card\n");
 		goto reset_err;
diff --git a/sound/soc/codecs/ad73311.c b/sound/soc/codecs/ad73311.c
index 59c4c8f18cbb..0f4110f4bceb 100644
--- a/sound/soc/codecs/ad73311.c
+++ b/sound/soc/codecs/ad73311.c
@@ -67,7 +67,7 @@ static int ad73311_soc_probe(struct platform_device *pdev)
 		goto pcm_err;
 	}
 
-	ret = snd_soc_register_card(socdev);
+	ret = snd_soc_init_card(socdev);
 	if (ret < 0) {
 		printk(KERN_ERR "ad73311: failed to register card\n");
 		goto register_err;
diff --git a/sound/soc/codecs/ak4535.c b/sound/soc/codecs/ak4535.c
index c742290e5533..23062c952e85 100644
--- a/sound/soc/codecs/ak4535.c
+++ b/sound/soc/codecs/ak4535.c
@@ -512,7 +512,7 @@ static int ak4535_init(struct snd_soc_device *socdev)
 
 	ak4535_add_controls(codec);
 	ak4535_add_widgets(codec);
-	ret = snd_soc_register_card(socdev);
+	ret = snd_soc_init_card(socdev);
 	if (ret < 0) {
 		printk(KERN_ERR "ak4535: failed to register card\n");
 		goto card_err;
diff --git a/sound/soc/codecs/cs4270.c b/sound/soc/codecs/cs4270.c
index 7507d468b200..4667a07b566c 100644
--- a/sound/soc/codecs/cs4270.c
+++ b/sound/soc/codecs/cs4270.c
@@ -723,7 +723,7 @@ static int cs4270_probe(struct platform_device *pdev)
 	printk(KERN_INFO "cs4270: I2C disabled, using stand-alone mode\n");
 #endif
 
-	ret = snd_soc_register_card(socdev);
+	ret = snd_soc_init_card(socdev);
 	if (ret < 0) {
 		printk(KERN_ERR "cs4270: failed to register card\n");
 		goto error_del_driver;
diff --git a/sound/soc/codecs/pcm3008.c b/sound/soc/codecs/pcm3008.c
index 651a15eb8c2c..a5862555b444 100644
--- a/sound/soc/codecs/pcm3008.c
+++ b/sound/soc/codecs/pcm3008.c
@@ -91,7 +91,7 @@ static int pcm3008_soc_probe(struct platform_device *pdev)
 	}
 
 	/* Register Card. */
-	ret = snd_soc_register_card(socdev);
+	ret = snd_soc_init_card(socdev);
 	if (ret < 0) {
 		printk(KERN_ERR "pcm3008: failed to register card\n");
 		goto card_err;
diff --git a/sound/soc/codecs/ssm2602.c b/sound/soc/codecs/ssm2602.c
index 0c5884ea1b00..973844973fe1 100644
--- a/sound/soc/codecs/ssm2602.c
+++ b/sound/soc/codecs/ssm2602.c
@@ -624,7 +624,7 @@ static int ssm2602_init(struct snd_soc_device *socdev)
 
 	ssm2602_add_controls(codec);
 	ssm2602_add_widgets(codec);
-	ret = snd_soc_register_card(socdev);
+	ret = snd_soc_init_card(socdev);
 	if (ret < 0) {
 		pr_err("ssm2602: failed to register card\n");
 		goto card_err;
diff --git a/sound/soc/codecs/tlv320aic23.c b/sound/soc/codecs/tlv320aic23.c
index a4e13d0688c9..d209bec02a69 100644
--- a/sound/soc/codecs/tlv320aic23.c
+++ b/sound/soc/codecs/tlv320aic23.c
@@ -720,7 +720,7 @@ static int tlv320aic23_init(struct snd_soc_device *socdev)
 
 	tlv320aic23_add_controls(codec);
 	tlv320aic23_add_widgets(codec);
-	ret = snd_soc_register_card(socdev);
+	ret = snd_soc_init_card(socdev);
 	if (ret < 0) {
 		printk(KERN_ERR "tlv320aic23: failed to register card\n");
 		goto card_err;
diff --git a/sound/soc/codecs/tlv320aic26.c b/sound/soc/codecs/tlv320aic26.c
index 6b7ddfc92573..e33fb7e00d1e 100644
--- a/sound/soc/codecs/tlv320aic26.c
+++ b/sound/soc/codecs/tlv320aic26.c
@@ -359,7 +359,7 @@ static int aic26_probe(struct platform_device *pdev)
 
 	/* CODEC is setup, we can register the card now */
 	dev_dbg(&pdev->dev, "Registering card\n");
-	ret = snd_soc_register_card(socdev);
+	ret = snd_soc_init_card(socdev);
 	if (ret < 0) {
 		dev_err(&pdev->dev, "aic26: failed to register card\n");
 		goto card_err;
diff --git a/sound/soc/codecs/tlv320aic3x.c b/sound/soc/codecs/tlv320aic3x.c
index 255e784c805b..0f4067bdd4a3 100644
--- a/sound/soc/codecs/tlv320aic3x.c
+++ b/sound/soc/codecs/tlv320aic3x.c
@@ -1187,7 +1187,7 @@ static int aic3x_init(struct snd_soc_device *socdev)
 
 	aic3x_add_controls(codec);
 	aic3x_add_widgets(codec);
-	ret = snd_soc_register_card(socdev);
+	ret = snd_soc_init_card(socdev);
 	if (ret < 0) {
 		printk(KERN_ERR "aic3x: failed to register card\n");
 		goto card_err;
diff --git a/sound/soc/codecs/twl4030.c b/sound/soc/codecs/twl4030.c
index 413623147891..f3e9e591b52f 100644
--- a/sound/soc/codecs/twl4030.c
+++ b/sound/soc/codecs/twl4030.c
@@ -764,7 +764,7 @@ static int twl4030_init(struct snd_soc_device *socdev)
 	twl4030_add_controls(codec);
 	twl4030_add_widgets(codec);
 
-	ret = snd_soc_register_card(socdev);
+	ret = snd_soc_init_card(socdev);
 	if (ret < 0) {
 		printk(KERN_ERR "twl4030: failed to register card\n");
 		goto card_err;
diff --git a/sound/soc/codecs/uda134x.c b/sound/soc/codecs/uda134x.c
index 91f333cdc7cf..58de749185e6 100644
--- a/sound/soc/codecs/uda134x.c
+++ b/sound/soc/codecs/uda134x.c
@@ -578,7 +578,7 @@ static int uda134x_soc_probe(struct platform_device *pdev)
 		goto pcm_err;
 	}
 
-	ret = snd_soc_register_card(socdev);
+	ret = snd_soc_init_card(socdev);
 	if (ret < 0) {
 		printk(KERN_ERR "UDA134X: failed to register card\n");
 		goto card_err;
diff --git a/sound/soc/codecs/uda1380.c b/sound/soc/codecs/uda1380.c
index 330877c70699..42491650593f 100644
--- a/sound/soc/codecs/uda1380.c
+++ b/sound/soc/codecs/uda1380.c
@@ -677,7 +677,7 @@ static int uda1380_init(struct snd_soc_device *socdev, int dac_clk)
 	/* uda1380 init */
 	uda1380_add_controls(codec);
 	uda1380_add_widgets(codec);
-	ret = snd_soc_register_card(socdev);
+	ret = snd_soc_init_card(socdev);
 	if (ret < 0) {
 		pr_err("uda1380: failed to register card\n");
 		goto card_err;
diff --git a/sound/soc/codecs/wm8510.c b/sound/soc/codecs/wm8510.c
index 173b66c0c766..126c70f749d1 100644
--- a/sound/soc/codecs/wm8510.c
+++ b/sound/soc/codecs/wm8510.c
@@ -658,7 +658,7 @@ static int wm8510_init(struct snd_soc_device *socdev)
 	wm8510_set_bias_level(codec, SND_SOC_BIAS_STANDBY);
 	wm8510_add_controls(codec);
 	wm8510_add_widgets(codec);
-	ret = snd_soc_register_card(socdev);
+	ret = snd_soc_init_card(socdev);
 	if (ret < 0) {
 		printk(KERN_ERR "wm8510: failed to register card\n");
 		goto card_err;
diff --git a/sound/soc/codecs/wm8580.c b/sound/soc/codecs/wm8580.c
index 220d4b68904a..572a31de3219 100644
--- a/sound/soc/codecs/wm8580.c
+++ b/sound/soc/codecs/wm8580.c
@@ -869,7 +869,7 @@ static int wm8580_init(struct snd_soc_device *socdev)
 	wm8580_add_controls(codec);
 	wm8580_add_widgets(codec);
 
-	ret = snd_soc_register_card(socdev);
+	ret = snd_soc_init_card(socdev);
 	if (ret < 0) {
 		printk(KERN_ERR "wm8580: failed to register card\n");
 		goto card_err;
diff --git a/sound/soc/codecs/wm8728.c b/sound/soc/codecs/wm8728.c
index 71949bd320d3..28f12c6a6ac8 100644
--- a/sound/soc/codecs/wm8728.c
+++ b/sound/soc/codecs/wm8728.c
@@ -332,7 +332,7 @@ static int wm8728_init(struct snd_soc_device *socdev)
 
 	wm8728_add_controls(codec);
 	wm8728_add_widgets(codec);
-	ret = snd_soc_register_card(socdev);
+	ret = snd_soc_init_card(socdev);
 	if (ret < 0) {
 		printk(KERN_ERR "wm8728: failed to register card\n");
 		goto card_err;
diff --git a/sound/soc/codecs/wm8731.c b/sound/soc/codecs/wm8731.c
index c0f277053bb2..403dea13b5d9 100644
--- a/sound/soc/codecs/wm8731.c
+++ b/sound/soc/codecs/wm8731.c
@@ -545,7 +545,7 @@ static int wm8731_init(struct snd_soc_device *socdev)
 
 	wm8731_add_controls(codec);
 	wm8731_add_widgets(codec);
-	ret = snd_soc_register_card(socdev);
+	ret = snd_soc_init_card(socdev);
 	if (ret < 0) {
 		printk(KERN_ERR "wm8731: failed to register card\n");
 		goto card_err;
diff --git a/sound/soc/codecs/wm8750.c b/sound/soc/codecs/wm8750.c
index 860a1d56830a..979446f5c983 100644
--- a/sound/soc/codecs/wm8750.c
+++ b/sound/soc/codecs/wm8750.c
@@ -818,7 +818,7 @@ static int wm8750_init(struct snd_soc_device *socdev)
 
 	wm8750_add_controls(codec);
 	wm8750_add_widgets(codec);
-	ret = snd_soc_register_card(socdev);
+	ret = snd_soc_init_card(socdev);
 	if (ret < 0) {
 		printk(KERN_ERR "wm8750: failed to register card\n");
 		goto card_err;
diff --git a/sound/soc/codecs/wm8753.c b/sound/soc/codecs/wm8753.c
index 5e4cd3bb824a..96c0453fffb3 100644
--- a/sound/soc/codecs/wm8753.c
+++ b/sound/soc/codecs/wm8753.c
@@ -1605,7 +1605,7 @@ static int wm8753_init(struct snd_soc_device *socdev)
 
 	wm8753_add_controls(codec);
 	wm8753_add_widgets(codec);
-	ret = snd_soc_register_card(socdev);
+	ret = snd_soc_init_card(socdev);
 	if (ret < 0) {
 		printk(KERN_ERR "wm8753: failed to register card\n");
 		goto card_err;
diff --git a/sound/soc/codecs/wm8900.c b/sound/soc/codecs/wm8900.c
index d1326be91c8b..29cd83991c5b 100644
--- a/sound/soc/codecs/wm8900.c
+++ b/sound/soc/codecs/wm8900.c
@@ -1365,7 +1365,7 @@ static int wm8900_init(struct snd_soc_device *socdev)
 	wm8900_add_controls(codec);
 	wm8900_add_widgets(codec);
 
-	ret = snd_soc_register_card(socdev);
+	ret = snd_soc_init_card(socdev);
 	if (ret < 0) {
 		dev_err(&i2c_client->dev, "Failed to register card\n");
 		goto card_err;
diff --git a/sound/soc/codecs/wm8903.c b/sound/soc/codecs/wm8903.c
index efbe8927b7d2..393a4c198823 100644
--- a/sound/soc/codecs/wm8903.c
+++ b/sound/soc/codecs/wm8903.c
@@ -1648,7 +1648,7 @@ static int wm8903_init(struct snd_soc_device *socdev)
 
 	wm8903_add_controls(codec);
 	wm8903_add_widgets(codec);
-	ret = snd_soc_register_card(socdev);
+	ret = snd_soc_init_card(socdev);
 	if (ret < 0) {
 		dev_err(&i2c->dev, "wm8903: failed to register card\n");
 		goto card_err;
diff --git a/sound/soc/codecs/wm8971.c b/sound/soc/codecs/wm8971.c
index 26edcc9d6e87..53e6937e9ba1 100644
--- a/sound/soc/codecs/wm8971.c
+++ b/sound/soc/codecs/wm8971.c
@@ -747,7 +747,7 @@ static int wm8971_init(struct snd_soc_device *socdev)
 
 	wm8971_add_controls(codec);
 	wm8971_add_widgets(codec);
-	ret = snd_soc_register_card(socdev);
+	ret = snd_soc_init_card(socdev);
 	if (ret < 0) {
 		printk(KERN_ERR "wm8971: failed to register card\n");
 		goto card_err;
diff --git a/sound/soc/codecs/wm8990.c b/sound/soc/codecs/wm8990.c
index 13926516d16e..5c5128b6b453 100644
--- a/sound/soc/codecs/wm8990.c
+++ b/sound/soc/codecs/wm8990.c
@@ -1462,7 +1462,7 @@ static int wm8990_init(struct snd_soc_device *socdev)
 
 	wm8990_add_controls(codec);
 	wm8990_add_widgets(codec);
-	ret = snd_soc_register_card(socdev);
+	ret = snd_soc_init_card(socdev);
 	if (ret < 0) {
 		printk(KERN_ERR "wm8990: failed to register card\n");
 		goto card_err;
diff --git a/sound/soc/codecs/wm9712.c b/sound/soc/codecs/wm9712.c
index 40f14061fb72..af83d629078a 100644
--- a/sound/soc/codecs/wm9712.c
+++ b/sound/soc/codecs/wm9712.c
@@ -700,7 +700,7 @@ static int wm9712_soc_probe(struct platform_device *pdev)
 	wm9712_set_bias_level(codec, SND_SOC_BIAS_STANDBY);
 	wm9712_add_controls(codec);
 	wm9712_add_widgets(codec);
-	ret = snd_soc_register_card(socdev);
+	ret = snd_soc_init_card(socdev);
 	if (ret < 0) {
 		printk(KERN_ERR "wm9712: failed to register card\n");
 		goto reset_err;
diff --git a/sound/soc/codecs/wm9713.c b/sound/soc/codecs/wm9713.c
index 9dad0bffcb05..49962a88770d 100644
--- a/sound/soc/codecs/wm9713.c
+++ b/sound/soc/codecs/wm9713.c
@@ -1247,7 +1247,7 @@ static int wm9713_soc_probe(struct platform_device *pdev)
 
 	wm9713_add_controls(codec);
 	wm9713_add_widgets(codec);
-	ret = snd_soc_register_card(socdev);
+	ret = snd_soc_init_card(socdev);
 	if (ret < 0)
 		goto reset_err;
 	return 0;
diff --git a/sound/soc/soc-core.c b/sound/soc/soc-core.c
index 13b4aaff0e9c..0448708245e7 100644
--- a/sound/soc/soc-core.c
+++ b/sound/soc/soc-core.c
@@ -1233,7 +1233,7 @@ int snd_soc_new_pcms(struct snd_soc_device *socdev, int idx, const char *xid)
 EXPORT_SYMBOL_GPL(snd_soc_new_pcms);
 
 /**
- * snd_soc_register_card - register sound card
+ * snd_soc_init_card - register sound card
  * @socdev: the SoC audio device
  *
  * Register a SoC sound card. Also registers an AC97 device if the
@@ -1241,7 +1241,7 @@ EXPORT_SYMBOL_GPL(snd_soc_new_pcms);
  *
  * Returns 0 for success, else error.
  */
-int snd_soc_register_card(struct snd_soc_device *socdev)
+int snd_soc_init_card(struct snd_soc_device *socdev)
 {
 	struct snd_soc_codec *codec = socdev->codec;
 	struct snd_soc_card *card = socdev->card;
@@ -1298,7 +1298,7 @@ int snd_soc_register_card(struct snd_soc_device *socdev)
 out:
 	return ret;
 }
-EXPORT_SYMBOL_GPL(snd_soc_register_card);
+EXPORT_SYMBOL_GPL(snd_soc_init_card);
 
 /**
  * snd_soc_free_pcms - free sound card and pcms
-- 
cgit v1.2.3


From 6308419a199eed66086cd756ab8dc81b88d54a6b Mon Sep 17 00:00:00 2001
From: Mark Brown <broonie@opensource.wolfsonmicro.com>
Date: Tue, 2 Dec 2008 15:08:03 +0000
Subject: ASoC: Push workqueue data into snd_soc_card

ASoC v2 does not use the struct snd_soc_device at runtime, using struct
snd_soc_card as the root of the card.  Begin removing data from
snd_soc_device by pushing the workqueue data into snd_soc_card, using a
backpointer to the snd_soc_device to keep things going for the time
being.

Signed-off-by: Mark Brown <broonie@opensource.wolfsonmicro.com>
---
 include/sound/soc.h  |  7 +++++--
 sound/soc/soc-core.c | 33 ++++++++++++++++++++-------------
 2 files changed, 25 insertions(+), 15 deletions(-)

(limited to 'include')

diff --git a/include/sound/soc.h b/include/sound/soc.h
index 9356c1ce98c1..359ec49f8d0d 100644
--- a/include/sound/soc.h
+++ b/include/sound/soc.h
@@ -349,6 +349,11 @@ struct snd_soc_card {
 	/* CPU <--> Codec DAI links  */
 	struct snd_soc_dai_link *dai_link;
 	int num_links;
+
+	struct snd_soc_device *socdev;
+
+	struct delayed_work delayed_work;
+	struct work_struct deferred_resume_work;
 };
 
 /* SoC Device - the audio subsystem */
@@ -358,8 +363,6 @@ struct snd_soc_device {
 	struct snd_soc_platform *platform;
 	struct snd_soc_codec *codec;
 	struct snd_soc_codec_device *codec_dev;
-	struct delayed_work delayed_work;
-	struct work_struct deferred_resume_work;
 	void *codec_data;
 #ifdef CONFIG_DEBUG_FS
 	struct dentry	*debugfs_root;
diff --git a/sound/soc/soc-core.c b/sound/soc/soc-core.c
index 7eb2ea1fd42e..c4b22e6984e6 100644
--- a/sound/soc/soc-core.c
+++ b/sound/soc/soc-core.c
@@ -247,8 +247,9 @@ out:
  */
 static void close_delayed_work(struct work_struct *work)
 {
-	struct snd_soc_device *socdev =
-		container_of(work, struct snd_soc_device, delayed_work.work);
+	struct snd_soc_card *card = container_of(work, struct snd_soc_card,
+						 delayed_work.work);
+	struct snd_soc_device *socdev = card->socdev;
 	struct snd_soc_codec *codec = socdev->codec;
 	struct snd_soc_dai *codec_dai;
 	int i;
@@ -299,6 +300,7 @@ static int soc_codec_close(struct snd_pcm_substream *substream)
 {
 	struct snd_soc_pcm_runtime *rtd = substream->private_data;
 	struct snd_soc_device *socdev = rtd->socdev;
+	struct snd_soc_card *card = socdev->card;
 	struct snd_soc_dai_link *machine = rtd->dai;
 	struct snd_soc_platform *platform = socdev->platform;
 	struct snd_soc_dai *cpu_dai = machine->cpu_dai;
@@ -340,7 +342,7 @@ static int soc_codec_close(struct snd_pcm_substream *substream)
 	if (substream->stream == SNDRV_PCM_STREAM_PLAYBACK) {
 		/* start delayed pop wq here for playback streams */
 		codec_dai->pop_wait = 1;
-		schedule_delayed_work(&socdev->delayed_work,
+		schedule_delayed_work(&card->delayed_work,
 			msecs_to_jiffies(pmdown_time));
 	} else {
 		/* capture streams can be powered down now */
@@ -366,6 +368,7 @@ static int soc_pcm_prepare(struct snd_pcm_substream *substream)
 {
 	struct snd_soc_pcm_runtime *rtd = substream->private_data;
 	struct snd_soc_device *socdev = rtd->socdev;
+	struct snd_soc_card *card = socdev->card;
 	struct snd_soc_dai_link *machine = rtd->dai;
 	struct snd_soc_platform *platform = socdev->platform;
 	struct snd_soc_dai *cpu_dai = machine->cpu_dai;
@@ -411,7 +414,7 @@ static int soc_pcm_prepare(struct snd_pcm_substream *substream)
 	if (substream->stream == SNDRV_PCM_STREAM_PLAYBACK &&
 	    codec_dai->pop_wait) {
 		codec_dai->pop_wait = 0;
-		cancel_delayed_work(&socdev->delayed_work);
+		cancel_delayed_work(&card->delayed_work);
 	}
 
 	/* do we need to power up codec */
@@ -645,7 +648,7 @@ static int soc_suspend(struct platform_device *pdev, pm_message_t state)
 	}
 
 	/* close any waiting streams and save state */
-	run_delayed_work(&socdev->delayed_work);
+	run_delayed_work(&card->delayed_work);
 	codec->suspend_bias_level = codec->bias_level;
 
 	for (i = 0; i < codec->num_dai; i++) {
@@ -679,10 +682,10 @@ static int soc_suspend(struct platform_device *pdev, pm_message_t state)
  */
 static void soc_resume_deferred(struct work_struct *work)
 {
-	struct snd_soc_device *socdev = container_of(work,
-						     struct snd_soc_device,
-						     deferred_resume_work);
-	struct snd_soc_card *card = socdev->card;
+	struct snd_soc_card *card = container_of(work,
+						 struct snd_soc_card,
+						 deferred_resume_work);
+	struct snd_soc_device *socdev = card->socdev;
 	struct snd_soc_platform *platform = socdev->platform;
 	struct snd_soc_codec_device *codec_dev = socdev->codec_dev;
 	struct snd_soc_codec *codec = socdev->codec;
@@ -746,10 +749,11 @@ static void soc_resume_deferred(struct work_struct *work)
 static int soc_resume(struct platform_device *pdev)
 {
 	struct snd_soc_device *socdev = platform_get_drvdata(pdev);
+	struct snd_soc_card *card = socdev->card;
 
 	dev_dbg(socdev->dev, "scheduling resume work\n");
 
-	if (!schedule_work(&socdev->deferred_resume_work))
+	if (!schedule_work(&card->deferred_resume_work))
 		dev_err(socdev->dev, "resume work item may be lost\n");
 
 	return 0;
@@ -769,6 +773,9 @@ static int soc_probe(struct platform_device *pdev)
 	struct snd_soc_platform *platform = socdev->platform;
 	struct snd_soc_codec_device *codec_dev = socdev->codec_dev;
 
+	/* Bodge while we push things out of socdev */
+	card->socdev = socdev;
+
 	if (card->probe) {
 		ret = card->probe(pdev);
 		if (ret < 0)
@@ -797,10 +804,10 @@ static int soc_probe(struct platform_device *pdev)
 	}
 
 	/* DAPM stream work */
-	INIT_DELAYED_WORK(&socdev->delayed_work, close_delayed_work);
+	INIT_DELAYED_WORK(&card->delayed_work, close_delayed_work);
 #ifdef CONFIG_PM
 	/* deferred resume work */
-	INIT_WORK(&socdev->deferred_resume_work, soc_resume_deferred);
+	INIT_WORK(&card->deferred_resume_work, soc_resume_deferred);
 #endif
 
 	return 0;
@@ -831,7 +838,7 @@ static int soc_remove(struct platform_device *pdev)
 	struct snd_soc_platform *platform = socdev->platform;
 	struct snd_soc_codec_device *codec_dev = socdev->codec_dev;
 
-	run_delayed_work(&socdev->delayed_work);
+	run_delayed_work(&card->delayed_work);
 
 	if (platform->remove)
 		platform->remove(pdev);
-- 
cgit v1.2.3


From 87689d567a45f80416feea0a2aa6d3a2a6b8963a Mon Sep 17 00:00:00 2001
From: Mark Brown <broonie@opensource.wolfsonmicro.com>
Date: Tue, 2 Dec 2008 16:01:14 +0000
Subject: ASoC: Push platform registration down into the card

As part of the deprecation of snd_soc_device push the registration of
the platform down into the card structure.

Signed-off-by: Mark Brown <broonie@opensource.wolfsonmicro.com>
---
 include/sound/soc.h                  |  2 +-
 sound/soc/atmel/playpaq_wm8510.c     |  2 +-
 sound/soc/atmel/sam9g20_wm8731.c     |  2 +-
 sound/soc/blackfin/bf5xx-ad1980.c    |  2 +-
 sound/soc/blackfin/bf5xx-ad73311.c   |  2 +-
 sound/soc/blackfin/bf5xx-ssm2602.c   |  2 +-
 sound/soc/davinci/davinci-evm.c      |  2 +-
 sound/soc/davinci/davinci-i2s.c      |  2 +-
 sound/soc/davinci/davinci-sffsdr.c   |  2 +-
 sound/soc/fsl/mpc8610_hpcd.c         |  2 +-
 sound/soc/fsl/soc-of-simple.c        |  2 +-
 sound/soc/omap/n810.c                |  2 +-
 sound/soc/omap/omap2evm.c            |  2 +-
 sound/soc/omap/omap3beagle.c         |  2 +-
 sound/soc/omap/osk5912.c             |  2 +-
 sound/soc/omap/overo.c               |  2 +-
 sound/soc/omap/sdp3430.c             |  2 +-
 sound/soc/pxa/corgi.c                |  2 +-
 sound/soc/pxa/e800_wm9712.c          |  2 +-
 sound/soc/pxa/em-x270.c              |  2 +-
 sound/soc/pxa/palm27x.c              |  2 +-
 sound/soc/pxa/poodle.c               |  2 +-
 sound/soc/pxa/spitz.c                |  2 +-
 sound/soc/pxa/tosa.c                 |  2 +-
 sound/soc/pxa/zylonite.c             |  2 +-
 sound/soc/s3c24xx/ln2440sbc_alc650.c |  2 +-
 sound/soc/s3c24xx/neo1973_wm8753.c   |  2 +-
 sound/soc/s3c24xx/s3c24xx_uda134x.c  |  2 +-
 sound/soc/s3c24xx/smdk2443_wm9710.c  |  2 +-
 sound/soc/sh/sh7760-ac97.c           |  2 +-
 sound/soc/soc-core.c                 | 44 ++++++++++++++++++++----------------
 31 files changed, 55 insertions(+), 49 deletions(-)

(limited to 'include')

diff --git a/include/sound/soc.h b/include/sound/soc.h
index 359ec49f8d0d..ad8141acd6b0 100644
--- a/include/sound/soc.h
+++ b/include/sound/soc.h
@@ -352,6 +352,7 @@ struct snd_soc_card {
 
 	struct snd_soc_device *socdev;
 
+	struct snd_soc_platform *platform;
 	struct delayed_work delayed_work;
 	struct work_struct deferred_resume_work;
 };
@@ -360,7 +361,6 @@ struct snd_soc_card {
 struct snd_soc_device {
 	struct device *dev;
 	struct snd_soc_card *card;
-	struct snd_soc_platform *platform;
 	struct snd_soc_codec *codec;
 	struct snd_soc_codec_device *codec_dev;
 	void *codec_data;
diff --git a/sound/soc/atmel/playpaq_wm8510.c b/sound/soc/atmel/playpaq_wm8510.c
index d40b5a52a8d2..43dd8cee83c6 100644
--- a/sound/soc/atmel/playpaq_wm8510.c
+++ b/sound/soc/atmel/playpaq_wm8510.c
@@ -363,6 +363,7 @@ static struct snd_soc_dai_link playpaq_wm8510_dai = {
 
 static struct snd_soc_card snd_soc_playpaq = {
 	.name = "LRS_PlayPaq_WM8510",
+	.platform = &at32_soc_platform,
 	.dai_link = &playpaq_wm8510_dai,
 	.num_links = 1,
 };
@@ -378,7 +379,6 @@ static struct wm8510_setup_data playpaq_wm8510_setup = {
 
 static struct snd_soc_device playpaq_wm8510_snd_devdata = {
 	.card = &snd_soc_playpaq,
-	.platform = &at32_soc_platform,
 	.codec_dev = &soc_codec_dev_wm8510,
 	.codec_data = &playpaq_wm8510_setup,
 };
diff --git a/sound/soc/atmel/sam9g20_wm8731.c b/sound/soc/atmel/sam9g20_wm8731.c
index fdc1d0206e0b..1fb59a9d3719 100644
--- a/sound/soc/atmel/sam9g20_wm8731.c
+++ b/sound/soc/atmel/sam9g20_wm8731.c
@@ -244,6 +244,7 @@ static struct snd_soc_dai_link at91sam9g20ek_dai = {
 
 static struct snd_soc_card snd_soc_at91sam9g20ek = {
 	.name = "WM8731",
+	.platform = &atmel_soc_platform,
 	.dai_link = &at91sam9g20ek_dai,
 	.num_links = 1,
 };
@@ -255,7 +256,6 @@ static struct wm8731_setup_data at91sam9g20ek_wm8731_setup = {
 
 static struct snd_soc_device at91sam9g20ek_snd_devdata = {
 	.card = &snd_soc_at91sam9g20ek,
-	.platform = &atmel_soc_platform,
 	.codec_dev = &soc_codec_dev_wm8731,
 	.codec_data = &at91sam9g20ek_wm8731_setup,
 };
diff --git a/sound/soc/blackfin/bf5xx-ad1980.c b/sound/soc/blackfin/bf5xx-ad1980.c
index 36c569a43ce1..d8f591273778 100644
--- a/sound/soc/blackfin/bf5xx-ad1980.c
+++ b/sound/soc/blackfin/bf5xx-ad1980.c
@@ -69,13 +69,13 @@ static struct snd_soc_dai_link bf5xx_board_dai = {
 
 static struct snd_soc_card bf5xx_board = {
 	.name = "bf5xx-board",
+	.platform = &bf5xx_ac97_soc_platform,
 	.dai_link = &bf5xx_board_dai,
 	.num_links = 1,
 };
 
 static struct snd_soc_device bf5xx_board_snd_devdata = {
 	.card = &bf5xx_board,
-	.platform = &bf5xx_ac97_soc_platform,
 	.codec_dev = &soc_codec_dev_ad1980,
 };
 
diff --git a/sound/soc/blackfin/bf5xx-ad73311.c b/sound/soc/blackfin/bf5xx-ad73311.c
index 57da14799375..7f2a5e199075 100644
--- a/sound/soc/blackfin/bf5xx-ad73311.c
+++ b/sound/soc/blackfin/bf5xx-ad73311.c
@@ -192,6 +192,7 @@ static struct snd_soc_dai_link bf5xx_ad73311_dai = {
 
 static struct snd_soc_card bf5xx_ad73311 = {
 	.name = "bf5xx_ad73311",
+	.platform = &bf5xx_i2s_soc_platform,
 	.probe = bf5xx_probe,
 	.dai_link = &bf5xx_ad73311_dai,
 	.num_links = 1,
@@ -199,7 +200,6 @@ static struct snd_soc_card bf5xx_ad73311 = {
 
 static struct snd_soc_device bf5xx_ad73311_snd_devdata = {
 	.card = &bf5xx_ad73311,
-	.platform = &bf5xx_i2s_soc_platform,
 	.codec_dev = &soc_codec_dev_ad73311,
 };
 
diff --git a/sound/soc/blackfin/bf5xx-ssm2602.c b/sound/soc/blackfin/bf5xx-ssm2602.c
index 0078dfcd95b9..bc0cdded7116 100644
--- a/sound/soc/blackfin/bf5xx-ssm2602.c
+++ b/sound/soc/blackfin/bf5xx-ssm2602.c
@@ -137,13 +137,13 @@ static struct ssm2602_setup_data bf5xx_ssm2602_setup = {
 
 static struct snd_soc_card bf5xx_ssm2602 = {
 	.name = "bf5xx_ssm2602",
+	.platform = &bf5xx_i2s_soc_platform,
 	.dai_link = &bf5xx_ssm2602_dai,
 	.num_links = 1,
 };
 
 static struct snd_soc_device bf5xx_ssm2602_snd_devdata = {
 	.card = &bf5xx_ssm2602,
-	.platform = &bf5xx_i2s_soc_platform,
 	.codec_dev = &soc_codec_dev_ssm2602,
 	.codec_data = &bf5xx_ssm2602_setup,
 };
diff --git a/sound/soc/davinci/davinci-evm.c b/sound/soc/davinci/davinci-evm.c
index 2ce34d44b15c..d87b91179cc8 100644
--- a/sound/soc/davinci/davinci-evm.c
+++ b/sound/soc/davinci/davinci-evm.c
@@ -130,6 +130,7 @@ static struct snd_soc_dai_link evm_dai = {
 /* davinci-evm audio machine driver */
 static struct snd_soc_card snd_soc_card_evm = {
 	.name = "DaVinci EVM",
+	.platform = &davinci_soc_platform,
 	.dai_link = &evm_dai,
 	.num_links = 1,
 };
@@ -143,7 +144,6 @@ static struct aic3x_setup_data evm_aic3x_setup = {
 /* evm audio subsystem */
 static struct snd_soc_device evm_snd_devdata = {
 	.card = &snd_soc_card_evm,
-	.platform = &davinci_soc_platform,
 	.codec_dev = &soc_codec_dev_aic3x,
 	.codec_data = &evm_aic3x_setup,
 };
diff --git a/sound/soc/davinci/davinci-i2s.c b/sound/soc/davinci/davinci-i2s.c
index cf31b3bb96cf..8b99efbc64c6 100644
--- a/sound/soc/davinci/davinci-i2s.c
+++ b/sound/soc/davinci/davinci-i2s.c
@@ -112,7 +112,7 @@ static void davinci_mcbsp_start(struct snd_pcm_substream *substream)
 	struct snd_soc_pcm_runtime *rtd = substream->private_data;
 	struct davinci_mcbsp_dev *dev = rtd->dai->cpu_dai->private_data;
 	struct snd_soc_device *socdev = rtd->socdev;
-	struct snd_soc_platform *platform = socdev->platform;
+	struct snd_soc_platform *platform = socdev->card->platform;
 	u32 w;
 	int ret;
 
diff --git a/sound/soc/davinci/davinci-sffsdr.c b/sound/soc/davinci/davinci-sffsdr.c
index e95fde1766b5..f67579d52765 100644
--- a/sound/soc/davinci/davinci-sffsdr.c
+++ b/sound/soc/davinci/davinci-sffsdr.c
@@ -76,6 +76,7 @@ static struct snd_soc_dai_link sffsdr_dai = {
 /* davinci-sffsdr audio machine driver */
 static struct snd_soc_card snd_soc_sffsdr = {
 	.name = "DaVinci SFFSDR",
+	.platform = &davinci_soc_platform,
 	.dai_link = &sffsdr_dai,
 	.num_links = 1,
 };
@@ -91,7 +92,6 @@ static struct pcm3008_setup_data sffsdr_pcm3008_setup = {
 /* sffsdr audio subsystem */
 static struct snd_soc_device sffsdr_snd_devdata = {
 	.card = &snd_soc_sffsdr,
-	.platform = &davinci_soc_platform,
 	.codec_dev = &soc_codec_dev_pcm3008,
 	.codec_data = &sffsdr_pcm3008_setup,
 };
diff --git a/sound/soc/fsl/mpc8610_hpcd.c b/sound/soc/fsl/mpc8610_hpcd.c
index 1cf4d6eeb538..bcec3f60bad9 100644
--- a/sound/soc/fsl/mpc8610_hpcd.c
+++ b/sound/soc/fsl/mpc8610_hpcd.c
@@ -467,7 +467,7 @@ static int mpc8610_hpcd_probe(struct of_device *ofdev,
 
 	machine_data->sound_devdata.card = &mpc8610_hpcd_machine;
 	machine_data->sound_devdata.codec_dev = &soc_codec_device_cs4270;
-	machine_data->sound_devdata.platform = &fsl_soc_platform;
+	machine_data->machine.platform = &fsl_soc_platform;
 
 	sound_device->dev.platform_data = machine_data;
 
diff --git a/sound/soc/fsl/soc-of-simple.c b/sound/soc/fsl/soc-of-simple.c
index 53be6491320a..8bc5cd9e972f 100644
--- a/sound/soc/fsl/soc-of-simple.c
+++ b/sound/soc/fsl/soc-of-simple.c
@@ -158,7 +158,7 @@ int of_snd_soc_register_platform(struct snd_soc_platform *platform,
 
 	of_soc->platform_node = node;
 	of_soc->dai_link.cpu_dai = cpu_dai;
-	of_soc->device.platform = platform;
+	of_soc->card.platform = platform;
 	of_soc->card.name = of_soc->dai_link.cpu_dai->name;
 
 	/* Now try to register the SoC device */
diff --git a/sound/soc/omap/n810.c b/sound/soc/omap/n810.c
index 18e2062e3a11..25593fee9121 100644
--- a/sound/soc/omap/n810.c
+++ b/sound/soc/omap/n810.c
@@ -288,6 +288,7 @@ static struct snd_soc_dai_link n810_dai = {
 /* Audio machine driver */
 static struct snd_soc_card snd_soc_n810 = {
 	.name = "N810",
+	.platform = &omap_soc_platform,
 	.dai_link = &n810_dai,
 	.num_links = 1,
 };
@@ -303,7 +304,6 @@ static struct aic3x_setup_data n810_aic33_setup = {
 /* Audio subsystem */
 static struct snd_soc_device n810_snd_devdata = {
 	.card = &snd_soc_n810,
-	.platform = &omap_soc_platform,
 	.codec_dev = &soc_codec_dev_aic3x,
 	.codec_data = &n810_aic33_setup,
 };
diff --git a/sound/soc/omap/omap2evm.c b/sound/soc/omap/omap2evm.c
index 7b160f9d83f9..0c2322dcf02a 100644
--- a/sound/soc/omap/omap2evm.c
+++ b/sound/soc/omap/omap2evm.c
@@ -93,6 +93,7 @@ static struct snd_soc_dai_link omap2evm_dai = {
 /* Audio machine driver */
 static struct snd_soc_card snd_soc_omap2evm = {
 	.name = "omap2evm",
+	.platform = &omap_soc_platform,
 	.dai_link = &omap2evm_dai,
 	.num_links = 1,
 };
@@ -100,7 +101,6 @@ static struct snd_soc_card snd_soc_omap2evm = {
 /* Audio subsystem */
 static struct snd_soc_device omap2evm_snd_devdata = {
 	.card = &snd_soc_omap2evm,
-	.platform = &omap_soc_platform,
 	.codec_dev = &soc_codec_dev_twl4030,
 };
 
diff --git a/sound/soc/omap/omap3beagle.c b/sound/soc/omap/omap3beagle.c
index 3ed25464627f..fd24a4acd2f5 100644
--- a/sound/soc/omap/omap3beagle.c
+++ b/sound/soc/omap/omap3beagle.c
@@ -90,6 +90,7 @@ static struct snd_soc_dai_link omap3beagle_dai = {
 /* Audio machine driver */
 static struct snd_soc_card snd_soc_omap3beagle = {
 	.name = "omap3beagle",
+	.platform = &omap_soc_platform,
 	.dai_link = &omap3beagle_dai,
 	.num_links = 1,
 };
@@ -97,7 +98,6 @@ static struct snd_soc_card snd_soc_omap3beagle = {
 /* Audio subsystem */
 static struct snd_soc_device omap3beagle_snd_devdata = {
 	.card = &snd_soc_omap3beagle,
-	.platform = &omap_soc_platform,
 	.codec_dev = &soc_codec_dev_twl4030,
 };
 
diff --git a/sound/soc/omap/osk5912.c b/sound/soc/omap/osk5912.c
index 7a8f14d0c772..845bf41335b9 100644
--- a/sound/soc/omap/osk5912.c
+++ b/sound/soc/omap/osk5912.c
@@ -145,6 +145,7 @@ static struct snd_soc_dai_link osk_dai = {
 /* Audio machine driver */
 static struct snd_soc_card snd_soc_card_osk = {
 	.name = "OSK5912",
+	.platform = &omap_soc_platform,
 	.dai_link = &osk_dai,
 	.num_links = 1,
 };
@@ -152,7 +153,6 @@ static struct snd_soc_card snd_soc_card_osk = {
 /* Audio subsystem */
 static struct snd_soc_device osk_snd_devdata = {
 	.card = &snd_soc_card_osk,
-	.platform = &omap_soc_platform,
 	.codec_dev = &soc_codec_dev_tlv320aic23,
 };
 
diff --git a/sound/soc/omap/overo.c b/sound/soc/omap/overo.c
index eea0c372bb3f..a72dc4e159e5 100644
--- a/sound/soc/omap/overo.c
+++ b/sound/soc/omap/overo.c
@@ -90,6 +90,7 @@ static struct snd_soc_dai_link overo_dai = {
 /* Audio machine driver */
 static struct snd_soc_card snd_soc_card_overo = {
 	.name = "overo",
+	.platform = &omap_soc_platform,
 	.dai_link = &overo_dai,
 	.num_links = 1,
 };
@@ -97,7 +98,6 @@ static struct snd_soc_card snd_soc_card_overo = {
 /* Audio subsystem */
 static struct snd_soc_device overo_snd_devdata = {
 	.card = &snd_soc_card_overo,
-	.platform = &omap_soc_platform,
 	.codec_dev = &soc_codec_dev_twl4030,
 };
 
diff --git a/sound/soc/omap/sdp3430.c b/sound/soc/omap/sdp3430.c
index 85fd160bca17..ad97836818b1 100644
--- a/sound/soc/omap/sdp3430.c
+++ b/sound/soc/omap/sdp3430.c
@@ -93,6 +93,7 @@ static struct snd_soc_dai_link sdp3430_dai = {
 /* Audio machine driver */
 static struct snd_soc_machine snd_soc_machine_sdp3430 = {
 	.name = "SDP3430",
+	.platform = &omap_soc_platform,
 	.dai_link = &sdp3430_dai,
 	.num_links = 1,
 };
@@ -100,7 +101,6 @@ static struct snd_soc_machine snd_soc_machine_sdp3430 = {
 /* Audio subsystem */
 static struct snd_soc_device sdp3430_snd_devdata = {
 	.machine = &snd_soc_machine_sdp3430,
-	.platform = &omap_soc_platform,
 	.codec_dev = &soc_codec_dev_twl4030,
 };
 
diff --git a/sound/soc/pxa/corgi.c b/sound/soc/pxa/corgi.c
index e56bf4b6c2af..1ba25a559524 100644
--- a/sound/soc/pxa/corgi.c
+++ b/sound/soc/pxa/corgi.c
@@ -312,6 +312,7 @@ static struct snd_soc_dai_link corgi_dai = {
 /* corgi audio machine driver */
 static struct snd_soc_card snd_soc_corgi = {
 	.name = "Corgi",
+	.platform = &pxa2xx_soc_platform,
 	.dai_link = &corgi_dai,
 	.num_links = 1,
 };
@@ -325,7 +326,6 @@ static struct wm8731_setup_data corgi_wm8731_setup = {
 /* corgi audio subsystem */
 static struct snd_soc_device corgi_snd_devdata = {
 	.card = &snd_soc_corgi,
-	.platform = &pxa2xx_soc_platform,
 	.codec_dev = &soc_codec_dev_wm8731,
 	.codec_data = &corgi_wm8731_setup,
 };
diff --git a/sound/soc/pxa/e800_wm9712.c b/sound/soc/pxa/e800_wm9712.c
index 60c64861512a..2e3386dfa0f0 100644
--- a/sound/soc/pxa/e800_wm9712.c
+++ b/sound/soc/pxa/e800_wm9712.c
@@ -42,13 +42,13 @@ static struct snd_soc_dai_link e800_dai[] = {
 
 static struct snd_soc_card e800 = {
 	.name = "Toshiba e800",
+	.platform = &pxa2xx_soc_platform,
 	.dai_link = e800_dai,
 	.num_links = ARRAY_SIZE(e800_dai),
 };
 
 static struct snd_soc_device e800_snd_devdata = {
 	.card = &e800,
-	.platform = &pxa2xx_soc_platform,
 	.codec_dev = &soc_codec_dev_wm9712,
 };
 
diff --git a/sound/soc/pxa/em-x270.c b/sound/soc/pxa/em-x270.c
index d6884b755d55..fe4a729ea648 100644
--- a/sound/soc/pxa/em-x270.c
+++ b/sound/soc/pxa/em-x270.c
@@ -54,13 +54,13 @@ static struct snd_soc_dai_link em_x270_dai[] = {
 
 static struct snd_soc_card em_x270 = {
 	.name = "EM-X270",
+	.platform = &pxa2xx_soc_platform,
 	.dai_link = em_x270_dai,
 	.num_links = ARRAY_SIZE(em_x270_dai),
 };
 
 static struct snd_soc_device em_x270_snd_devdata = {
 	.card = &em_x270,
-	.platform = &pxa2xx_soc_platform,
 	.codec_dev = &soc_codec_dev_wm9712,
 };
 
diff --git a/sound/soc/pxa/palm27x.c b/sound/soc/pxa/palm27x.c
index 3bb8879ac8a2..4a9cf3083af0 100644
--- a/sound/soc/pxa/palm27x.c
+++ b/sound/soc/pxa/palm27x.c
@@ -191,13 +191,13 @@ static struct snd_soc_dai_link palm27x_dai[] = {
 
 static struct snd_soc_card palm27x_asoc = {
 	.name = "Palm/PXA27x",
+	.platform = &pxa2xx_soc_platform,
 	.dai_link = palm27x_dai,
 	.num_links = ARRAY_SIZE(palm27x_dai),
 };
 
 static struct snd_soc_device palm27x_snd_devdata = {
 	.card = &palm27x_asoc,
-	.platform = &pxa2xx_soc_platform,
 	.codec_dev = &soc_codec_dev_wm9712,
 };
 
diff --git a/sound/soc/pxa/poodle.c b/sound/soc/pxa/poodle.c
index 03b510ab2824..6e9827189fff 100644
--- a/sound/soc/pxa/poodle.c
+++ b/sound/soc/pxa/poodle.c
@@ -278,6 +278,7 @@ static struct snd_soc_dai_link poodle_dai = {
 /* poodle audio machine driver */
 static struct snd_soc_card snd_soc_poodle = {
 	.name = "Poodle",
+	.platform = &pxa2xx_soc_platform,
 	.dai_link = &poodle_dai,
 	.num_links = 1,
 };
@@ -291,7 +292,6 @@ static struct wm8731_setup_data poodle_wm8731_setup = {
 /* poodle audio subsystem */
 static struct snd_soc_device poodle_snd_devdata = {
 	.card = &snd_soc_poodle,
-	.platform = &pxa2xx_soc_platform,
 	.codec_dev = &soc_codec_dev_wm8731,
 	.codec_data = &poodle_wm8731_setup,
 };
diff --git a/sound/soc/pxa/spitz.c b/sound/soc/pxa/spitz.c
index 579d93368f14..a3b9e6bdf979 100644
--- a/sound/soc/pxa/spitz.c
+++ b/sound/soc/pxa/spitz.c
@@ -321,6 +321,7 @@ static struct snd_soc_dai_link spitz_dai = {
 /* spitz audio machine driver */
 static struct snd_soc_card snd_soc_spitz = {
 	.name = "Spitz",
+	.platform = &pxa2xx_soc_platform,
 	.dai_link = &spitz_dai,
 	.num_links = 1,
 };
@@ -334,7 +335,6 @@ static struct wm8750_setup_data spitz_wm8750_setup = {
 /* spitz audio subsystem */
 static struct snd_soc_device spitz_snd_devdata = {
 	.card = &snd_soc_spitz,
-	.platform = &pxa2xx_soc_platform,
 	.codec_dev = &soc_codec_dev_wm8750,
 	.codec_data = &spitz_wm8750_setup,
 };
diff --git a/sound/soc/pxa/tosa.c b/sound/soc/pxa/tosa.c
index 48242b32a28b..c77194f74c9b 100644
--- a/sound/soc/pxa/tosa.c
+++ b/sound/soc/pxa/tosa.c
@@ -252,6 +252,7 @@ static int tosa_remove(struct platform_device *dev)
 
 static struct snd_soc_card tosa = {
 	.name = "Tosa",
+	.platform = &pxa2xx_soc_platform,
 	.dai_link = tosa_dai,
 	.num_links = ARRAY_SIZE(tosa_dai),
 	.probe = tosa_probe,
@@ -260,7 +261,6 @@ static struct snd_soc_card tosa = {
 
 static struct snd_soc_device tosa_snd_devdata = {
 	.card = &tosa,
-	.platform = &pxa2xx_soc_platform,
 	.codec_dev = &soc_codec_dev_wm9712,
 };
 
diff --git a/sound/soc/pxa/zylonite.c b/sound/soc/pxa/zylonite.c
index 842d6500d61f..f8e9ecd589d3 100644
--- a/sound/soc/pxa/zylonite.c
+++ b/sound/soc/pxa/zylonite.c
@@ -175,13 +175,13 @@ static struct snd_soc_dai_link zylonite_dai[] = {
 
 static struct snd_soc_card zylonite = {
 	.name = "Zylonite",
+	.platform = &pxa2xx_soc_platform,
 	.dai_link = zylonite_dai,
 	.num_links = ARRAY_SIZE(zylonite_dai),
 };
 
 static struct snd_soc_device zylonite_snd_ac97_devdata = {
 	.card = &zylonite,
-	.platform = &pxa2xx_soc_platform,
 	.codec_dev = &soc_codec_dev_wm9713,
 };
 
diff --git a/sound/soc/s3c24xx/ln2440sbc_alc650.c b/sound/soc/s3c24xx/ln2440sbc_alc650.c
index a70cbc0fa070..12c71482d258 100644
--- a/sound/soc/s3c24xx/ln2440sbc_alc650.c
+++ b/sound/soc/s3c24xx/ln2440sbc_alc650.c
@@ -40,13 +40,13 @@ static struct snd_soc_dai_link ln2440sbc_dai[] = {
 
 static struct snd_soc_card ln2440sbc = {
 	.name = "LN2440SBC",
+	.platform = &s3c24xx_soc_platform,
 	.dai_link = ln2440sbc_dai,
 	.num_links = ARRAY_SIZE(ln2440sbc_dai),
 };
 
 static struct snd_soc_device ln2440sbc_snd_ac97_devdata = {
 	.card = &ln2440sbc,
-	.platform = &s3c24xx_soc_platform,
 	.codec_dev = &soc_codec_dev_ac97,
 };
 
diff --git a/sound/soc/s3c24xx/neo1973_wm8753.c b/sound/soc/s3c24xx/neo1973_wm8753.c
index 3df2224a6723..45bb12e8ea44 100644
--- a/sound/soc/s3c24xx/neo1973_wm8753.c
+++ b/sound/soc/s3c24xx/neo1973_wm8753.c
@@ -580,6 +580,7 @@ static struct snd_soc_dai_link neo1973_dai[] = {
 
 static struct snd_soc_card neo1973 = {
 	.name = "neo1973",
+	.platform = &s3c24xx_soc_platform,
 	.dai_link = neo1973_dai,
 	.num_links = ARRAY_SIZE(neo1973_dai),
 };
@@ -591,7 +592,6 @@ static struct wm8753_setup_data neo1973_wm8753_setup = {
 
 static struct snd_soc_device neo1973_snd_devdata = {
 	.card = &neo1973,
-	.platform = &s3c24xx_soc_platform,
 	.codec_dev = &soc_codec_dev_wm8753,
 	.codec_data = &neo1973_wm8753_setup,
 };
diff --git a/sound/soc/s3c24xx/s3c24xx_uda134x.c b/sound/soc/s3c24xx/s3c24xx_uda134x.c
index 23325fca1f64..a0a4d1832a14 100644
--- a/sound/soc/s3c24xx/s3c24xx_uda134x.c
+++ b/sound/soc/s3c24xx/s3c24xx_uda134x.c
@@ -234,6 +234,7 @@ static struct snd_soc_dai_link s3c24xx_uda134x_dai_link = {
 
 static struct snd_soc_card snd_soc_s3c24xx_uda134x = {
 	.name = "S3C24XX_UDA134X",
+	.platform = &s3c24xx_soc_platform,
 	.dai_link = &s3c24xx_uda134x_dai_link,
 	.num_links = 1,
 };
@@ -271,7 +272,6 @@ static struct uda134x_platform_data s3c24xx_uda134x = {
 
 static struct snd_soc_device s3c24xx_uda134x_snd_devdata = {
 	.card = &snd_soc_s3c24xx_uda134x,
-	.platform = &s3c24xx_soc_platform,
 	.codec_dev = &soc_codec_dev_uda134x,
 	.codec_data = &s3c24xx_uda134x,
 };
diff --git a/sound/soc/s3c24xx/smdk2443_wm9710.c b/sound/soc/s3c24xx/smdk2443_wm9710.c
index 3d2e6a0417ec..a2a4f5323c17 100644
--- a/sound/soc/s3c24xx/smdk2443_wm9710.c
+++ b/sound/soc/s3c24xx/smdk2443_wm9710.c
@@ -36,13 +36,13 @@ static struct snd_soc_dai_link smdk2443_dai[] = {
 
 static struct snd_soc_card smdk2443 = {
 	.name = "SMDK2443",
+	.platform = &s3c24xx_soc_platform,
 	.dai_link = smdk2443_dai,
 	.num_links = ARRAY_SIZE(smdk2443_dai),
 };
 
 static struct snd_soc_device smdk2443_snd_ac97_devdata = {
 	.card = &smdk2443,
-	.platform = &s3c24xx_soc_platform,
 	.codec_dev = &soc_codec_dev_ac97,
 };
 
diff --git a/sound/soc/sh/sh7760-ac97.c b/sound/soc/sh/sh7760-ac97.c
index 8b44f9c8a9ff..ce7f95b59de3 100644
--- a/sound/soc/sh/sh7760-ac97.c
+++ b/sound/soc/sh/sh7760-ac97.c
@@ -40,13 +40,13 @@ static struct snd_soc_dai_link sh7760_ac97_dai = {
 
 static struct snd_soc_card sh7760_ac97_soc_machine  = {
 	.name = "SH7760 AC97",
+	.platform = &sh7760_soc_platform,
 	.dai_link = &sh7760_ac97_dai,
 	.num_links = 1,
 };
 
 static struct snd_soc_device sh7760_ac97_snd_devdata = {
 	.card = &sh7760_ac97_soc_machine,
-	.platform = &sh7760_soc_platform,
 	.codec_dev = &soc_codec_dev_ac97,
 };
 
diff --git a/sound/soc/soc-core.c b/sound/soc/soc-core.c
index c4b22e6984e6..fe89260c9028 100644
--- a/sound/soc/soc-core.c
+++ b/sound/soc/soc-core.c
@@ -109,9 +109,10 @@ static int soc_pcm_open(struct snd_pcm_substream *substream)
 {
 	struct snd_soc_pcm_runtime *rtd = substream->private_data;
 	struct snd_soc_device *socdev = rtd->socdev;
+	struct snd_soc_card *card = socdev->card;
 	struct snd_pcm_runtime *runtime = substream->runtime;
 	struct snd_soc_dai_link *machine = rtd->dai;
-	struct snd_soc_platform *platform = socdev->platform;
+	struct snd_soc_platform *platform = card->platform;
 	struct snd_soc_dai *cpu_dai = machine->cpu_dai;
 	struct snd_soc_dai *codec_dai = machine->codec_dai;
 	int ret = 0;
@@ -302,7 +303,7 @@ static int soc_codec_close(struct snd_pcm_substream *substream)
 	struct snd_soc_device *socdev = rtd->socdev;
 	struct snd_soc_card *card = socdev->card;
 	struct snd_soc_dai_link *machine = rtd->dai;
-	struct snd_soc_platform *platform = socdev->platform;
+	struct snd_soc_platform *platform = card->platform;
 	struct snd_soc_dai *cpu_dai = machine->cpu_dai;
 	struct snd_soc_dai *codec_dai = machine->codec_dai;
 	struct snd_soc_codec *codec = socdev->codec;
@@ -370,7 +371,7 @@ static int soc_pcm_prepare(struct snd_pcm_substream *substream)
 	struct snd_soc_device *socdev = rtd->socdev;
 	struct snd_soc_card *card = socdev->card;
 	struct snd_soc_dai_link *machine = rtd->dai;
-	struct snd_soc_platform *platform = socdev->platform;
+	struct snd_soc_platform *platform = card->platform;
 	struct snd_soc_dai *cpu_dai = machine->cpu_dai;
 	struct snd_soc_dai *codec_dai = machine->codec_dai;
 	struct snd_soc_codec *codec = socdev->codec;
@@ -464,7 +465,8 @@ static int soc_pcm_hw_params(struct snd_pcm_substream *substream,
 	struct snd_soc_pcm_runtime *rtd = substream->private_data;
 	struct snd_soc_device *socdev = rtd->socdev;
 	struct snd_soc_dai_link *machine = rtd->dai;
-	struct snd_soc_platform *platform = socdev->platform;
+	struct snd_soc_card *card = socdev->card;
+	struct snd_soc_platform *platform = card->platform;
 	struct snd_soc_dai *cpu_dai = machine->cpu_dai;
 	struct snd_soc_dai *codec_dai = machine->codec_dai;
 	int ret = 0;
@@ -534,7 +536,8 @@ static int soc_pcm_hw_free(struct snd_pcm_substream *substream)
 	struct snd_soc_pcm_runtime *rtd = substream->private_data;
 	struct snd_soc_device *socdev = rtd->socdev;
 	struct snd_soc_dai_link *machine = rtd->dai;
-	struct snd_soc_platform *platform = socdev->platform;
+	struct snd_soc_card *card = socdev->card;
+	struct snd_soc_platform *platform = card->platform;
 	struct snd_soc_dai *cpu_dai = machine->cpu_dai;
 	struct snd_soc_dai *codec_dai = machine->codec_dai;
 	struct snd_soc_codec *codec = socdev->codec;
@@ -568,8 +571,9 @@ static int soc_pcm_trigger(struct snd_pcm_substream *substream, int cmd)
 {
 	struct snd_soc_pcm_runtime *rtd = substream->private_data;
 	struct snd_soc_device *socdev = rtd->socdev;
+	struct snd_soc_card *card= socdev->card;
 	struct snd_soc_dai_link *machine = rtd->dai;
-	struct snd_soc_platform *platform = socdev->platform;
+	struct snd_soc_platform *platform = card->platform;
 	struct snd_soc_dai *cpu_dai = machine->cpu_dai;
 	struct snd_soc_dai *codec_dai = machine->codec_dai;
 	int ret;
@@ -610,7 +614,7 @@ static int soc_suspend(struct platform_device *pdev, pm_message_t state)
 {
 	struct snd_soc_device *socdev = platform_get_drvdata(pdev);
 	struct snd_soc_card *card = socdev->card;
-	struct snd_soc_platform *platform = socdev->platform;
+	struct snd_soc_platform *platform = card->platform;
 	struct snd_soc_codec_device *codec_dev = socdev->codec_dev;
 	struct snd_soc_codec *codec = socdev->codec;
 	int i;
@@ -686,7 +690,7 @@ static void soc_resume_deferred(struct work_struct *work)
 						 struct snd_soc_card,
 						 deferred_resume_work);
 	struct snd_soc_device *socdev = card->socdev;
-	struct snd_soc_platform *platform = socdev->platform;
+	struct snd_soc_platform *platform = card->platform;
 	struct snd_soc_codec_device *codec_dev = socdev->codec_dev;
 	struct snd_soc_codec *codec = socdev->codec;
 	struct platform_device *pdev = to_platform_device(socdev->dev);
@@ -770,7 +774,7 @@ static int soc_probe(struct platform_device *pdev)
 	int ret = 0, i;
 	struct snd_soc_device *socdev = platform_get_drvdata(pdev);
 	struct snd_soc_card *card = socdev->card;
-	struct snd_soc_platform *platform = socdev->platform;
+	struct snd_soc_platform *platform = card->platform;
 	struct snd_soc_codec_device *codec_dev = socdev->codec_dev;
 
 	/* Bodge while we push things out of socdev */
@@ -835,7 +839,7 @@ static int soc_remove(struct platform_device *pdev)
 	int i;
 	struct snd_soc_device *socdev = platform_get_drvdata(pdev);
 	struct snd_soc_card *card = socdev->card;
-	struct snd_soc_platform *platform = socdev->platform;
+	struct snd_soc_platform *platform = card->platform;
 	struct snd_soc_codec_device *codec_dev = socdev->codec_dev;
 
 	run_delayed_work(&card->delayed_work);
@@ -875,6 +879,8 @@ static int soc_new_pcm(struct snd_soc_device *socdev,
 	struct snd_soc_dai_link *dai_link, int num)
 {
 	struct snd_soc_codec *codec = socdev->codec;
+	struct snd_soc_card *card = socdev->card;
+	struct snd_soc_platform *platform = card->platform;
 	struct snd_soc_dai *codec_dai = dai_link->codec_dai;
 	struct snd_soc_dai *cpu_dai = dai_link->cpu_dai;
 	struct snd_soc_pcm_runtime *rtd;
@@ -910,13 +916,13 @@ static int soc_new_pcm(struct snd_soc_device *socdev,
 
 	dai_link->pcm = pcm;
 	pcm->private_data = rtd;
-	soc_pcm_ops.mmap = socdev->platform->pcm_ops->mmap;
-	soc_pcm_ops.pointer = socdev->platform->pcm_ops->pointer;
-	soc_pcm_ops.ioctl = socdev->platform->pcm_ops->ioctl;
-	soc_pcm_ops.copy = socdev->platform->pcm_ops->copy;
-	soc_pcm_ops.silence = socdev->platform->pcm_ops->silence;
-	soc_pcm_ops.ack = socdev->platform->pcm_ops->ack;
-	soc_pcm_ops.page = socdev->platform->pcm_ops->page;
+	soc_pcm_ops.mmap = platform->pcm_ops->mmap;
+	soc_pcm_ops.pointer = platform->pcm_ops->pointer;
+	soc_pcm_ops.ioctl = platform->pcm_ops->ioctl;
+	soc_pcm_ops.copy = platform->pcm_ops->copy;
+	soc_pcm_ops.silence = platform->pcm_ops->silence;
+	soc_pcm_ops.ack = platform->pcm_ops->ack;
+	soc_pcm_ops.page = platform->pcm_ops->page;
 
 	if (playback)
 		snd_pcm_set_ops(pcm, SNDRV_PCM_STREAM_PLAYBACK, &soc_pcm_ops);
@@ -924,14 +930,14 @@ static int soc_new_pcm(struct snd_soc_device *socdev,
 	if (capture)
 		snd_pcm_set_ops(pcm, SNDRV_PCM_STREAM_CAPTURE, &soc_pcm_ops);
 
-	ret = socdev->platform->pcm_new(codec->card, codec_dai, pcm);
+	ret = platform->pcm_new(codec->card, codec_dai, pcm);
 	if (ret < 0) {
 		printk(KERN_ERR "asoc: platform pcm constructor failed\n");
 		kfree(rtd);
 		return ret;
 	}
 
-	pcm->private_free = socdev->platform->pcm_free;
+	pcm->private_free = platform->pcm_free;
 	printk(KERN_INFO "asoc: %s <-> %s mapping ok\n", codec_dai->name,
 		cpu_dai->name);
 	return ret;
-- 
cgit v1.2.3


From 8789a9e7df6bf9b93739c4c7d4e380725bc9e936 Mon Sep 17 00:00:00 2001
From: Steven Rostedt <srostedt@redhat.com>
Date: Tue, 2 Dec 2008 15:34:07 -0500
Subject: ring-buffer: read page interface

Impact: new API to ring buffer

This patch adds a new interface into the ring buffer that allows a
page to be read from the ring buffer on a given CPU. For every page
read, one must also be given to allow for a "swap" of the pages.

 rpage = ring_buffer_alloc_read_page(buffer);
 if (!rpage)
	goto err;
 ret = ring_buffer_read_page(buffer, &rpage, cpu, full);
 if (!ret)
	goto empty;
 process_page(rpage);
 ring_buffer_free_read_page(rpage);

The caller of these functions must handle any waits that are
needed to wait for new data. The ring_buffer_read_page will simply
return 0 if there is no data, or if "full" is set and the writer
is still on the current page.

Signed-off-by: Steven Rostedt <srostedt@redhat.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/ring_buffer.h |   5 ++
 kernel/trace/ring_buffer.c  | 166 ++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 171 insertions(+)

(limited to 'include')

diff --git a/include/linux/ring_buffer.h b/include/linux/ring_buffer.h
index 3bb87a753fa3..1a350a847edd 100644
--- a/include/linux/ring_buffer.h
+++ b/include/linux/ring_buffer.h
@@ -124,6 +124,11 @@ void tracing_on(void);
 void tracing_off(void);
 void tracing_off_permanent(void);
 
+void *ring_buffer_alloc_read_page(struct ring_buffer *buffer);
+void ring_buffer_free_read_page(struct ring_buffer *buffer, void *data);
+int ring_buffer_read_page(struct ring_buffer *buffer,
+			  void **data_page, int cpu, int full);
+
 enum ring_buffer_flags {
 	RB_FL_OVERWRITE		= 1 << 0,
 };
diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c
index 8619c5345889..50b74d3a5c32 100644
--- a/kernel/trace/ring_buffer.c
+++ b/kernel/trace/ring_buffer.c
@@ -687,6 +687,12 @@ static inline int rb_null_event(struct ring_buffer_event *event)
 	return event->type == RINGBUF_TYPE_PADDING;
 }
 
+static inline void *
+__rb_data_page_index(struct buffer_data_page *page, unsigned index)
+{
+	return page->data + index;
+}
+
 static inline void *__rb_page_index(struct buffer_page *page, unsigned index)
 {
 	return page->page->data + index;
@@ -2232,6 +2238,166 @@ int ring_buffer_swap_cpu(struct ring_buffer *buffer_a,
 	return 0;
 }
 
+static void rb_remove_entries(struct ring_buffer_per_cpu *cpu_buffer,
+			      struct buffer_data_page *page)
+{
+	struct ring_buffer_event *event;
+	unsigned long head;
+
+	__raw_spin_lock(&cpu_buffer->lock);
+	for (head = 0; head < local_read(&page->commit);
+	     head += rb_event_length(event)) {
+
+		event = __rb_data_page_index(page, head);
+		if (RB_WARN_ON(cpu_buffer, rb_null_event(event)))
+			return;
+		/* Only count data entries */
+		if (event->type != RINGBUF_TYPE_DATA)
+			continue;
+		cpu_buffer->entries--;
+	}
+	__raw_spin_unlock(&cpu_buffer->lock);
+}
+
+/**
+ * ring_buffer_alloc_read_page - allocate a page to read from buffer
+ * @buffer: the buffer to allocate for.
+ *
+ * This function is used in conjunction with ring_buffer_read_page.
+ * When reading a full page from the ring buffer, these functions
+ * can be used to speed up the process. The calling function should
+ * allocate a few pages first with this function. Then when it
+ * needs to get pages from the ring buffer, it passes the result
+ * of this function into ring_buffer_read_page, which will swap
+ * the page that was allocated, with the read page of the buffer.
+ *
+ * Returns:
+ *  The page allocated, or NULL on error.
+ */
+void *ring_buffer_alloc_read_page(struct ring_buffer *buffer)
+{
+	unsigned long addr;
+	struct buffer_data_page *page;
+
+	addr = __get_free_page(GFP_KERNEL);
+	if (!addr)
+		return NULL;
+
+	page = (void *)addr;
+
+	return page;
+}
+
+/**
+ * ring_buffer_free_read_page - free an allocated read page
+ * @buffer: the buffer the page was allocate for
+ * @data: the page to free
+ *
+ * Free a page allocated from ring_buffer_alloc_read_page.
+ */
+void ring_buffer_free_read_page(struct ring_buffer *buffer, void *data)
+{
+	free_page((unsigned long)data);
+}
+
+/**
+ * ring_buffer_read_page - extract a page from the ring buffer
+ * @buffer: buffer to extract from
+ * @data_page: the page to use allocated from ring_buffer_alloc_read_page
+ * @cpu: the cpu of the buffer to extract
+ * @full: should the extraction only happen when the page is full.
+ *
+ * This function will pull out a page from the ring buffer and consume it.
+ * @data_page must be the address of the variable that was returned
+ * from ring_buffer_alloc_read_page. This is because the page might be used
+ * to swap with a page in the ring buffer.
+ *
+ * for example:
+ *	rpage = ring_buffer_alloc_page(buffer);
+ *	if (!rpage)
+ *		return error;
+ *	ret = ring_buffer_read_page(buffer, &rpage, cpu, 0);
+ *	if (ret)
+ *		process_page(rpage);
+ *
+ * When @full is set, the function will not return true unless
+ * the writer is off the reader page.
+ *
+ * Note: it is up to the calling functions to handle sleeps and wakeups.
+ *  The ring buffer can be used anywhere in the kernel and can not
+ *  blindly call wake_up. The layer that uses the ring buffer must be
+ *  responsible for that.
+ *
+ * Returns:
+ *  1 if data has been transferred
+ *  0 if no data has been transferred.
+ */
+int ring_buffer_read_page(struct ring_buffer *buffer,
+			    void **data_page, int cpu, int full)
+{
+	struct ring_buffer_per_cpu *cpu_buffer = buffer->buffers[cpu];
+	struct ring_buffer_event *event;
+	struct buffer_data_page *page;
+	unsigned long flags;
+	int ret = 0;
+
+	if (!data_page)
+		return 0;
+
+	page = *data_page;
+	if (!page)
+		return 0;
+
+	spin_lock_irqsave(&cpu_buffer->reader_lock, flags);
+
+	/*
+	 * rb_buffer_peek will get the next ring buffer if
+	 * the current reader page is empty.
+	 */
+	event = rb_buffer_peek(buffer, cpu, NULL);
+	if (!event)
+		goto out;
+
+	/* check for data */
+	if (!local_read(&cpu_buffer->reader_page->page->commit))
+		goto out;
+	/*
+	 * If the writer is already off of the read page, then simply
+	 * switch the read page with the given page. Otherwise
+	 * we need to copy the data from the reader to the writer.
+	 */
+	if (cpu_buffer->reader_page == cpu_buffer->commit_page) {
+		unsigned int read = cpu_buffer->reader_page->read;
+
+		if (full)
+			goto out;
+		/* The writer is still on the reader page, we must copy */
+		page = cpu_buffer->reader_page->page;
+		memcpy(page->data,
+		       cpu_buffer->reader_page->page->data + read,
+		       local_read(&page->commit) - read);
+
+		/* consume what was read */
+		cpu_buffer->reader_page += read;
+
+	} else {
+		/* swap the pages */
+		rb_init_page(page);
+		page = cpu_buffer->reader_page->page;
+		cpu_buffer->reader_page->page = *data_page;
+		cpu_buffer->reader_page->read = 0;
+		*data_page = page;
+	}
+	ret = 1;
+
+	/* update the entry counter */
+	rb_remove_entries(cpu_buffer, page);
+ out:
+	spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags);
+
+	return ret;
+}
+
 static ssize_t
 rb_simple_read(struct file *filp, char __user *ubuf,
 	       size_t cnt, loff_t *ppos)
-- 
cgit v1.2.3


From 14a866c567e040ccf6240d68b083dd1dbbde63e6 Mon Sep 17 00:00:00 2001
From: Steven Rostedt <srostedt@redhat.com>
Date: Tue, 2 Dec 2008 23:50:02 -0500
Subject: ftrace: add ftrace_graph_stop()

Impact: new ftrace_graph_stop function

While developing more features of function graph, I hit a bug that
caused the WARN_ON to trigger in the prepare_ftrace_return function.
Well, it was hard for me to find out that was happening because the
bug would not print, it would just cause a hard lockup or reboot.
The reason is that it is not safe to call printk from this function.

Looking further, I also found that it calls unregister_ftrace_graph,
which grabs a mutex and calls kstop machine. This would definitely
lock the box up if it were to trigger.

This patch adds a fast and safe ftrace_graph_stop() which will
stop the function tracer. Then it is safe to call the WARN ON.

Signed-off-by: Steven Rostedt <srostedt@redhat.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/ftrace.c | 10 ++++++----
 include/linux/ftrace.h   |  2 ++
 kernel/trace/ftrace.c    |  5 +++++
 3 files changed, 13 insertions(+), 4 deletions(-)

(limited to 'include')

diff --git a/arch/x86/kernel/ftrace.c b/arch/x86/kernel/ftrace.c
index 1a5b8f8cb3cc..adba8e9a427c 100644
--- a/arch/x86/kernel/ftrace.c
+++ b/arch/x86/kernel/ftrace.c
@@ -484,14 +484,16 @@ void prepare_ftrace_return(unsigned long *parent, unsigned long self_addr)
 		: "memory"
 	);
 
-	if (WARN_ON(faulted)) {
-		unregister_ftrace_graph();
+	if (unlikely(faulted)) {
+		ftrace_graph_stop();
+		WARN_ON(1);
 		return;
 	}
 
-	if (WARN_ON(!__kernel_text_address(old))) {
-		unregister_ftrace_graph();
+	if (unlikely(!__kernel_text_address(old))) {
+		ftrace_graph_stop();
 		*parent = old;
+		WARN_ON(1);
 		return;
 	}
 
diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h
index afba918c623c..58ca1c3a3f4d 100644
--- a/include/linux/ftrace.h
+++ b/include/linux/ftrace.h
@@ -376,6 +376,8 @@ typedef void (*trace_func_graph_ent_t)(struct ftrace_graph_ent *); /* entry */
 extern int register_ftrace_graph(trace_func_graph_ret_t retfunc,
 				trace_func_graph_ent_t entryfunc);
 
+extern void ftrace_graph_stop(void);
+
 /* The current handlers in use */
 extern trace_func_graph_ret_t ftrace_graph_return;
 extern trace_func_graph_ent_t ftrace_graph_entry;
diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
index 2e78628443e8..a44af05ae2d0 100644
--- a/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c
@@ -1769,5 +1769,10 @@ void ftrace_graph_exit_task(struct task_struct *t)
 
 	kfree(ret_stack);
 }
+
+void ftrace_graph_stop(void)
+{
+	ftrace_stop();
+}
 #endif
 
-- 
cgit v1.2.3


From e49dc19c6a19ea112fcb94b7c62ec62cdd5c08aa Mon Sep 17 00:00:00 2001
From: Steven Rostedt <srostedt@redhat.com>
Date: Tue, 2 Dec 2008 23:50:05 -0500
Subject: ftrace: function graph return for function entry

Impact: feature, let entry function decide to trace or not

This patch lets the graph tracer entry function decide if the tracing
should be done at the end as well. This requires all function graph
entry functions return 1 if it should trace, or 0 if the return should
not be traced.

Signed-off-by: Steven Rostedt <srostedt@redhat.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/entry_32.S |  3 +++
 arch/x86/kernel/entry_64.S |  3 +++
 arch/x86/kernel/ftrace.c   |  7 ++++++-
 include/linux/ftrace.h     |  2 +-
 kernel/trace/ftrace.c      | 10 +++++++---
 kernel/trace/trace.c       |  4 +++-
 kernel/trace/trace.h       |  2 +-
 7 files changed, 24 insertions(+), 7 deletions(-)

(limited to 'include')

diff --git a/arch/x86/kernel/entry_32.S b/arch/x86/kernel/entry_32.S
index 826682abed1d..43ceb3f454bf 100644
--- a/arch/x86/kernel/entry_32.S
+++ b/arch/x86/kernel/entry_32.S
@@ -1196,6 +1196,9 @@ ENTRY(mcount)
 #ifdef CONFIG_FUNCTION_GRAPH_TRACER
 	cmpl $ftrace_stub, ftrace_graph_return
 	jnz ftrace_graph_caller
+
+	cmpl $ftrace_graph_entry_stub, ftrace_graph_entry
+	jnz ftrace_graph_caller
 #endif
 .globl ftrace_stub
 ftrace_stub:
diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S
index 9060ba6497e2..54e0bbdccb99 100644
--- a/arch/x86/kernel/entry_64.S
+++ b/arch/x86/kernel/entry_64.S
@@ -120,6 +120,9 @@ ENTRY(mcount)
 #ifdef CONFIG_FUNCTION_GRAPH_TRACER
 	cmpq $ftrace_stub, ftrace_graph_return
 	jnz ftrace_graph_caller
+
+	cmpq $ftrace_graph_entry_stub, ftrace_graph_entry
+	jnz ftrace_graph_caller
 #endif
 
 .globl ftrace_stub
diff --git a/arch/x86/kernel/ftrace.c b/arch/x86/kernel/ftrace.c
index adba8e9a427c..d278ad2ebda2 100644
--- a/arch/x86/kernel/ftrace.c
+++ b/arch/x86/kernel/ftrace.c
@@ -425,6 +425,7 @@ static void pop_return_trace(struct ftrace_graph_ret *trace, unsigned long *ret)
 	trace->calltime = current->ret_stack[index].calltime;
 	trace->overrun = atomic_read(&current->trace_overrun);
 	trace->depth = index;
+	barrier();
 	current->curr_ret_stack--;
 }
 
@@ -506,7 +507,11 @@ void prepare_ftrace_return(unsigned long *parent, unsigned long self_addr)
 	}
 
 	trace.func = self_addr;
-	ftrace_graph_entry(&trace);
 
+	/* Only trace if the calling function expects to */
+	if (!ftrace_graph_entry(&trace)) {
+		current->curr_ret_stack--;
+		*parent = old;
+	}
 }
 #endif /* CONFIG_FUNCTION_GRAPH_TRACER */
diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h
index 58ca1c3a3f4d..469ceb3e85ba 100644
--- a/include/linux/ftrace.h
+++ b/include/linux/ftrace.h
@@ -371,7 +371,7 @@ struct ftrace_graph_ret {
 #define FTRACE_RETSTACK_ALLOC_SIZE 32
 /* Type of the callback handlers for tracing function graph*/
 typedef void (*trace_func_graph_ret_t)(struct ftrace_graph_ret *); /* return */
-typedef void (*trace_func_graph_ent_t)(struct ftrace_graph_ent *); /* entry */
+typedef int (*trace_func_graph_ent_t)(struct ftrace_graph_ent *); /* entry */
 
 extern int register_ftrace_graph(trace_func_graph_ret_t retfunc,
 				trace_func_graph_ent_t entryfunc);
diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
index a44af05ae2d0..65b9e863056b 100644
--- a/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c
@@ -1636,11 +1636,15 @@ ftrace_enable_sysctl(struct ctl_table *table, int write,
 
 static atomic_t ftrace_graph_active;
 
+int ftrace_graph_entry_stub(struct ftrace_graph_ent *trace)
+{
+	return 0;
+}
+
 /* The callbacks that hook a function */
 trace_func_graph_ret_t ftrace_graph_return =
 			(trace_func_graph_ret_t)ftrace_stub;
-trace_func_graph_ent_t ftrace_graph_entry =
-			(trace_func_graph_ent_t)ftrace_stub;
+trace_func_graph_ent_t ftrace_graph_entry = ftrace_graph_entry_stub;
 
 /* Try to assign a return stack array on FTRACE_RETSTACK_ALLOC_SIZE tasks. */
 static int alloc_retstack_tasklist(struct ftrace_ret_stack **ret_stack_list)
@@ -1738,7 +1742,7 @@ void unregister_ftrace_graph(void)
 
 	atomic_dec(&ftrace_graph_active);
 	ftrace_graph_return = (trace_func_graph_ret_t)ftrace_stub;
-	ftrace_graph_entry = (trace_func_graph_ent_t)ftrace_stub;
+	ftrace_graph_entry = ftrace_graph_entry_stub;
 	ftrace_shutdown(FTRACE_STOP_FUNC_RET);
 
 	mutex_unlock(&ftrace_sysctl_lock);
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index 380de630ebce..8b6409a62b54 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -1200,7 +1200,7 @@ function_trace_call(unsigned long ip, unsigned long parent_ip)
 }
 
 #ifdef CONFIG_FUNCTION_GRAPH_TRACER
-void trace_graph_entry(struct ftrace_graph_ent *trace)
+int trace_graph_entry(struct ftrace_graph_ent *trace)
 {
 	struct trace_array *tr = &global_trace;
 	struct trace_array_cpu *data;
@@ -1219,6 +1219,8 @@ void trace_graph_entry(struct ftrace_graph_ent *trace)
 	}
 	atomic_dec(&data->disabled);
 	local_irq_restore(flags);
+
+	return 1;
 }
 
 void trace_graph_return(struct ftrace_graph_ret *trace)
diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h
index f96f4e787ff3..0565ae9a2210 100644
--- a/kernel/trace/trace.h
+++ b/kernel/trace/trace.h
@@ -412,7 +412,7 @@ void trace_function(struct trace_array *tr,
 		    unsigned long flags, int pc);
 
 void trace_graph_return(struct ftrace_graph_ret *trace);
-void trace_graph_entry(struct ftrace_graph_ent *trace);
+int trace_graph_entry(struct ftrace_graph_ent *trace);
 void trace_bts(struct trace_array *tr,
 	       unsigned long from,
 	       unsigned long to);
-- 
cgit v1.2.3


From b908b53d580c3e9aba81ebe3339c5b7b4fa8031d Mon Sep 17 00:00:00 2001
From: Anton Vorontsov <avorontsov@ru.mvista.com>
Date: Mon, 1 Dec 2008 06:30:04 +0000
Subject: of/gpio: Implement of_get_gpio_flags()

This adds a new function, of_get_gpio_flags, which is like
of_get_gpio(), but accepts a new "flags" argument.  This new function
will be used by the drivers that need to retrieve additional GPIO
information, such as active-low flag.

Also, this changes the default ("simple") .xlate routine to warn about
bogus (< 2) #gpio-cells usage: the second cell should always be present
for GPIO flags.

Signed-off-by: Anton Vorontsov <avorontsov@ru.mvista.com>
Signed-off-by: Paul Mackerras <paulus@samba.org>
---
 drivers/of/gpio.c       | 36 +++++++++++++++++++++++++++++-------
 include/linux/of_gpio.h | 38 ++++++++++++++++++++++++++++++++++----
 2 files changed, 63 insertions(+), 11 deletions(-)

(limited to 'include')

diff --git a/drivers/of/gpio.c b/drivers/of/gpio.c
index 7cd7301b5839..a4ba217116eb 100644
--- a/drivers/of/gpio.c
+++ b/drivers/of/gpio.c
@@ -19,14 +19,17 @@
 #include <asm/prom.h>
 
 /**
- * of_get_gpio - Get a GPIO number from the device tree to use with GPIO API
+ * of_get_gpio_flags - Get a GPIO number and flags to use with GPIO API
  * @np:		device node to get GPIO from
  * @index:	index of the GPIO
+ * @flags:	a flags pointer to fill in
  *
  * Returns GPIO number to use with Linux generic GPIO API, or one of the errno
- * value on the error condition.
+ * value on the error condition. If @flags is not NULL the function also fills
+ * in flags for the GPIO.
  */
-int of_get_gpio(struct device_node *np, int index)
+int of_get_gpio_flags(struct device_node *np, int index,
+		      enum of_gpio_flags *flags)
 {
 	int ret;
 	struct device_node *gc;
@@ -59,7 +62,11 @@ int of_get_gpio(struct device_node *np, int index)
 		goto err1;
 	}
 
-	ret = of_gc->xlate(of_gc, np, gpio_spec);
+	/* .xlate might decide to not fill in the flags, so clear it. */
+	if (flags)
+		*flags = 0;
+
+	ret = of_gc->xlate(of_gc, np, gpio_spec, flags);
 	if (ret < 0)
 		goto err1;
 
@@ -70,26 +77,41 @@ err0:
 	pr_debug("%s exited with status %d\n", __func__, ret);
 	return ret;
 }
-EXPORT_SYMBOL(of_get_gpio);
+EXPORT_SYMBOL(of_get_gpio_flags);
 
 /**
- * of_gpio_simple_xlate - translate gpio_spec to the GPIO number
+ * of_gpio_simple_xlate - translate gpio_spec to the GPIO number and flags
  * @of_gc:	pointer to the of_gpio_chip structure
  * @np:		device node of the GPIO chip
  * @gpio_spec:	gpio specifier as found in the device tree
+ * @flags:	a flags pointer to fill in
  *
  * This is simple translation function, suitable for the most 1:1 mapped
  * gpio chips. This function performs only one sanity check: whether gpio
  * is less than ngpios (that is specified in the gpio_chip).
  */
 int of_gpio_simple_xlate(struct of_gpio_chip *of_gc, struct device_node *np,
-			 const void *gpio_spec)
+			 const void *gpio_spec, enum of_gpio_flags *flags)
 {
 	const u32 *gpio = gpio_spec;
 
+	/*
+	 * We're discouraging gpio_cells < 2, since that way you'll have to
+	 * write your own xlate function (that will have to retrive the GPIO
+	 * number and the flags from a single gpio cell -- this is possible,
+	 * but not recommended).
+	 */
+	if (of_gc->gpio_cells < 2) {
+		WARN_ON(1);
+		return -EINVAL;
+	}
+
 	if (*gpio > of_gc->gc.ngpio)
 		return -EINVAL;
 
+	if (flags)
+		*flags = gpio[1];
+
 	return *gpio;
 }
 EXPORT_SYMBOL(of_gpio_simple_xlate);
diff --git a/include/linux/of_gpio.h b/include/linux/of_gpio.h
index 67db101d0eb8..e25abf610cb6 100644
--- a/include/linux/of_gpio.h
+++ b/include/linux/of_gpio.h
@@ -14,9 +14,22 @@
 #ifndef __LINUX_OF_GPIO_H
 #define __LINUX_OF_GPIO_H
 
+#include <linux/compiler.h>
+#include <linux/kernel.h>
 #include <linux/errno.h>
 #include <linux/gpio.h>
 
+struct device_node;
+
+/*
+ * This is Linux-specific flags. By default controllers' and Linux' mapping
+ * match, but GPIO controllers are free to translate their own flags to
+ * Linux-specific in their .xlate callback. Though, 1:1 mapping is recommended.
+ */
+enum of_gpio_flags {
+	OF_GPIO_ACTIVE_LOW = 0x1,
+};
+
 #ifdef CONFIG_OF_GPIO
 
 /*
@@ -26,7 +39,7 @@ struct of_gpio_chip {
 	struct gpio_chip gc;
 	int gpio_cells;
 	int (*xlate)(struct of_gpio_chip *of_gc, struct device_node *np,
-		     const void *gpio_spec);
+		     const void *gpio_spec, enum of_gpio_flags *flags);
 };
 
 static inline struct of_gpio_chip *to_of_gpio_chip(struct gpio_chip *gc)
@@ -50,20 +63,37 @@ static inline struct of_mm_gpio_chip *to_of_mm_gpio_chip(struct gpio_chip *gc)
 	return container_of(of_gc, struct of_mm_gpio_chip, of_gc);
 }
 
-extern int of_get_gpio(struct device_node *np, int index);
+extern int of_get_gpio_flags(struct device_node *np, int index,
+			     enum of_gpio_flags *flags);
+
 extern int of_mm_gpiochip_add(struct device_node *np,
 			      struct of_mm_gpio_chip *mm_gc);
 extern int of_gpio_simple_xlate(struct of_gpio_chip *of_gc,
 				struct device_node *np,
-				const void *gpio_spec);
+				const void *gpio_spec,
+				enum of_gpio_flags *flags);
 #else
 
 /* Drivers may not strictly depend on the GPIO support, so let them link. */
-static inline int of_get_gpio(struct device_node *np, int index)
+static inline int of_get_gpio_flags(struct device_node *np, int index,
+				    enum of_gpio_flags *flags)
 {
 	return -ENOSYS;
 }
 
 #endif /* CONFIG_OF_GPIO */
 
+/**
+ * of_get_gpio - Get a GPIO number to use with GPIO API
+ * @np:		device node to get GPIO from
+ * @index:	index of the GPIO
+ *
+ * Returns GPIO number to use with Linux generic GPIO API, or one of the errno
+ * value on the error condition.
+ */
+static inline int of_get_gpio(struct device_node *np, int index)
+{
+	return of_get_gpio_flags(np, index, NULL);
+}
+
 #endif /* __LINUX_OF_GPIO_H */
-- 
cgit v1.2.3


From 384c89e2e4cb5879b86a38414d1b3bb2b23ec8ee Mon Sep 17 00:00:00 2001
From: Mark Brown <broonie@opensource.wolfsonmicro.com>
Date: Wed, 3 Dec 2008 17:34:03 +0000
Subject: ASoC: Push debugfs files out of the snd_soc_device structure

This is in preparation for the removal of struct snd_soc_device.

The pop time configuration should really be a property of the card not
the codec but since DAPM currently uses the codec rather than the card
using the codec is fine for now.

Signed-off-by: Mark Brown <broonie@opensource.wolfsonmicro.com>
---
 include/sound/soc.h  |  8 +++---
 sound/soc/soc-core.c | 73 +++++++++++++++++++++++++++++-----------------------
 2 files changed, 46 insertions(+), 35 deletions(-)

(limited to 'include')

diff --git a/include/sound/soc.h b/include/sound/soc.h
index ad8141acd6b0..3ee608dce2f8 100644
--- a/include/sound/soc.h
+++ b/include/sound/soc.h
@@ -279,6 +279,11 @@ struct snd_soc_codec {
 	/* codec DAI's */
 	struct snd_soc_dai *dai;
 	unsigned int num_dai;
+
+#ifdef CONFIG_DEBUG_FS
+	struct dentry *debugfs_reg;
+	struct dentry *debugfs_pop_time;
+#endif
 };
 
 /* codec device */
@@ -364,9 +369,6 @@ struct snd_soc_device {
 	struct snd_soc_codec *codec;
 	struct snd_soc_codec_device *codec_dev;
 	void *codec_data;
-#ifdef CONFIG_DEBUG_FS
-	struct dentry	*debugfs_root;
-#endif
 };
 
 /* runtime channel data */
diff --git a/sound/soc/soc-core.c b/sound/soc/soc-core.c
index fe89260c9028..34114398b914 100644
--- a/sound/soc/soc-core.c
+++ b/sound/soc/soc-core.c
@@ -39,6 +39,10 @@ static DEFINE_MUTEX(pcm_mutex);
 static DEFINE_MUTEX(io_mutex);
 static DECLARE_WAIT_QUEUE_HEAD(soc_pm_waitq);
 
+#ifdef CONFIG_DEBUG_FS
+static struct dentry *debugfs_root;
+#endif
+
 /*
  * This is a timeout to do a DAPM powerdown after a stream is closed().
  * It can be used to eliminate pops between different playback streams, e.g.
@@ -1002,7 +1006,9 @@ static ssize_t codec_reg_read_file(struct file *file, char __user *user_buf,
 			       size_t count, loff_t *ppos)
 {
 	ssize_t ret;
-	struct snd_soc_device *devdata = file->private_data;
+	struct snd_soc_codec *codec = file->private_data;
+	struct device *card_dev = codec->card->dev;
+	struct snd_soc_device *devdata = card_dev->driver_data;
 	char *buf = kmalloc(PAGE_SIZE, GFP_KERNEL);
 	if (!buf)
 		return -ENOMEM;
@@ -1021,8 +1027,7 @@ static ssize_t codec_reg_write_file(struct file *file,
 	char *start = buf;
 	unsigned long reg, value;
 	int step = 1;
-	struct snd_soc_device *devdata = file->private_data;
-	struct snd_soc_codec *codec = devdata->codec;
+	struct snd_soc_codec *codec = file->private_data;
 
 	buf_size = min(count, (sizeof(buf)-1));
 	if (copy_from_user(buf, user_buf, buf_size))
@@ -1051,44 +1056,36 @@ static const struct file_operations codec_reg_fops = {
 	.write = codec_reg_write_file,
 };
 
-static void soc_init_debugfs(struct snd_soc_device *socdev)
+static void soc_init_codec_debugfs(struct snd_soc_codec *codec)
 {
-	struct dentry *root, *file;
-	struct snd_soc_codec *codec = socdev->codec;
-	root = debugfs_create_dir(dev_name(socdev->dev), NULL);
-	if (IS_ERR(root) || !root)
-		goto exit1;
-
-	file = debugfs_create_file("codec_reg", 0644,
-			root, socdev, &codec_reg_fops);
-	if (!file)
-		goto exit2;
-
-	file = debugfs_create_u32("dapm_pop_time", 0744,
-			root, &codec->pop_time);
-	if (!file)
-		goto exit2;
-	socdev->debugfs_root = root;
-	return;
-exit2:
-	debugfs_remove_recursive(root);
-exit1:
-	dev_err(socdev->dev, "debugfs is not available\n");
+	codec->debugfs_reg = debugfs_create_file("codec_reg", 0644,
+						 debugfs_root, codec,
+						 &codec_reg_fops);
+	if (!codec->debugfs_reg)
+		printk(KERN_WARNING
+		       "ASoC: Failed to create codec register debugfs file\n");
+
+	codec->debugfs_pop_time = debugfs_create_u32("dapm_pop_time", 0744,
+						     debugfs_root,
+						     &codec->pop_time);
+	if (!codec->debugfs_pop_time)
+		printk(KERN_WARNING
+		       "Failed to create pop time debugfs file\n");
 }
 
-static void soc_cleanup_debugfs(struct snd_soc_device *socdev)
+static void soc_cleanup_codec_debugfs(struct snd_soc_codec *codec)
 {
-	debugfs_remove_recursive(socdev->debugfs_root);
-	socdev->debugfs_root = NULL;
+	debugfs_remove(codec->debugfs_pop_time);
+	debugfs_remove(codec->debugfs_reg);
 }
 
 #else
 
-static inline void soc_init_debugfs(struct snd_soc_device *socdev)
+static inline void soc_init_codec_debugfs(struct snd_soc_codec *codec)
 {
 }
 
-static inline void soc_cleanup_debugfs(struct snd_soc_device *socdev)
+static inline void soc_cleanup_codec_debugfs(struct snd_soc_codec *codec)
 {
 }
 #endif
@@ -1305,7 +1302,7 @@ int snd_soc_init_card(struct snd_soc_device *socdev)
 	if (err < 0)
 		printk(KERN_WARNING "asoc: failed to add codec sysfs files\n");
 
-	soc_init_debugfs(socdev);
+	soc_init_codec_debugfs(socdev->codec);
 	mutex_unlock(&codec->mutex);
 
 out:
@@ -1329,7 +1326,7 @@ void snd_soc_free_pcms(struct snd_soc_device *socdev)
 #endif
 
 	mutex_lock(&codec->mutex);
-	soc_cleanup_debugfs(socdev);
+	soc_cleanup_codec_debugfs(socdev->codec);
 #ifdef CONFIG_SND_SOC_AC97_BUS
 	for (i = 0; i < codec->num_dai; i++) {
 		codec_dai = &codec->dai[i];
@@ -1962,11 +1959,23 @@ EXPORT_SYMBOL_GPL(snd_soc_dai_digital_mute);
 
 static int __devinit snd_soc_init(void)
 {
+#ifdef CONFIG_DEBUG_FS
+	debugfs_root = debugfs_create_dir("asoc", NULL);
+	if (IS_ERR(debugfs_root) || !debugfs_root) {
+		printk(KERN_WARNING
+		       "ASoC: Failed to create debugfs directory\n");
+		debugfs_root = NULL;
+	}
+#endif
+
 	return platform_driver_register(&soc_driver);
 }
 
 static void __exit snd_soc_exit(void)
 {
+#ifdef CONFIG_DEBUG_FS
+	debugfs_remove_recursive(debugfs_root);
+#endif
 	platform_driver_unregister(&soc_driver);
 }
 
-- 
cgit v1.2.3


From 07c84d0409f3551b79d676630d8ee76bb551598d Mon Sep 17 00:00:00 2001
From: Mark Brown <broonie@opensource.wolfsonmicro.com>
Date: Wed, 3 Dec 2008 18:17:28 +0000
Subject: ASoC: Remove device from platform suspend and resume operations

None of the platforms are actually using the SoC device so remove it
(only atmel actually has a suspend method).

Signed-off-by: Mark Brown <broonie@opensource.wolfsonmicro.com>
---
 include/sound/soc.h         | 6 ++----
 sound/soc/atmel/atmel-pcm.c | 6 ++----
 sound/soc/soc-core.c        | 4 ++--
 3 files changed, 6 insertions(+), 10 deletions(-)

(limited to 'include')

diff --git a/include/sound/soc.h b/include/sound/soc.h
index 3ee608dce2f8..8ec63c02dc10 100644
--- a/include/sound/soc.h
+++ b/include/sound/soc.h
@@ -300,10 +300,8 @@ struct snd_soc_platform {
 
 	int (*probe)(struct platform_device *pdev);
 	int (*remove)(struct platform_device *pdev);
-	int (*suspend)(struct platform_device *pdev,
-		struct snd_soc_dai *dai);
-	int (*resume)(struct platform_device *pdev,
-		struct snd_soc_dai *dai);
+	int (*suspend)(struct snd_soc_dai *dai);
+	int (*resume)(struct snd_soc_dai *dai);
 
 	/* pcm creation and destruction */
 	int (*pcm_new)(struct snd_card *, struct snd_soc_dai *,
diff --git a/sound/soc/atmel/atmel-pcm.c b/sound/soc/atmel/atmel-pcm.c
index 394412fb396f..8507aa1cd811 100644
--- a/sound/soc/atmel/atmel-pcm.c
+++ b/sound/soc/atmel/atmel-pcm.c
@@ -417,8 +417,7 @@ static void atmel_pcm_free_dma_buffers(struct snd_pcm *pcm)
 }
 
 #ifdef CONFIG_PM
-static int atmel_pcm_suspend(struct platform_device *pdev,
-	struct snd_soc_dai *dai)
+static int atmel_pcm_suspend(struct snd_soc_dai *dai)
 {
 	struct snd_pcm_runtime *runtime = dai->runtime;
 	struct atmel_runtime_data *prtd;
@@ -442,8 +441,7 @@ static int atmel_pcm_suspend(struct platform_device *pdev,
 	return 0;
 }
 
-static int atmel_pcm_resume(struct platform_device *pdev,
-	struct snd_soc_dai *dai)
+static int atmel_pcm_resume(struct snd_soc_dai *dai)
 {
 	struct snd_pcm_runtime *runtime = dai->runtime;
 	struct atmel_runtime_data *prtd;
diff --git a/sound/soc/soc-core.c b/sound/soc/soc-core.c
index 34114398b914..f83852f11463 100644
--- a/sound/soc/soc-core.c
+++ b/sound/soc/soc-core.c
@@ -652,7 +652,7 @@ static int soc_suspend(struct platform_device *pdev, pm_message_t state)
 		if (cpu_dai->suspend && !cpu_dai->ac97_control)
 			cpu_dai->suspend(pdev, cpu_dai);
 		if (platform->suspend)
-			platform->suspend(pdev, cpu_dai);
+			platform->suspend(cpu_dai);
 	}
 
 	/* close any waiting streams and save state */
@@ -741,7 +741,7 @@ static void soc_resume_deferred(struct work_struct *work)
 		if (cpu_dai->resume && !cpu_dai->ac97_control)
 			cpu_dai->resume(pdev, cpu_dai);
 		if (platform->resume)
-			platform->resume(pdev, cpu_dai);
+			platform->resume(cpu_dai);
 	}
 
 	if (card->resume_post)
-- 
cgit v1.2.3


From dc7d7b830ee1f4111696e73d1c25da683b461548 Mon Sep 17 00:00:00 2001
From: Mark Brown <broonie@opensource.wolfsonmicro.com>
Date: Wed, 3 Dec 2008 18:21:52 +0000
Subject: ASoC: Remove platform device from DAI suspend and resume operations

None of the DAIs use it except s3c2412-i2s which only uses it for
dev_() printouts.

Signed-off-by: Mark Brown <broonie@opensource.wolfsonmicro.com>
---
 include/sound/soc-dai.h         |  6 ++----
 sound/soc/atmel/atmel_ssc_dai.c |  6 ++----
 sound/soc/au1x/psc-ac97.c       |  6 ++----
 sound/soc/au1x/psc-i2s.c        |  6 ++----
 sound/soc/blackfin/bf5xx-ac97.c |  6 ++----
 sound/soc/blackfin/bf5xx-i2s.c  |  6 ++----
 sound/soc/pxa/pxa-ssp.c         |  6 ++----
 sound/soc/pxa/pxa2xx-ac97.c     |  6 ++----
 sound/soc/pxa/pxa2xx-i2s.c      |  6 ++----
 sound/soc/s3c24xx/s3c2412-i2s.c | 16 +++++++---------
 sound/soc/s3c24xx/s3c24xx-i2s.c |  6 ++----
 sound/soc/soc-core.c            |  8 ++++----
 12 files changed, 31 insertions(+), 53 deletions(-)

(limited to 'include')

diff --git a/include/sound/soc-dai.h b/include/sound/soc-dai.h
index a01a24b10196..e2d5f76838c6 100644
--- a/include/sound/soc-dai.h
+++ b/include/sound/soc-dai.h
@@ -191,10 +191,8 @@ struct snd_soc_dai {
 		     struct snd_soc_dai *dai);
 	void (*remove)(struct platform_device *pdev,
 		       struct snd_soc_dai *dai);
-	int (*suspend)(struct platform_device *pdev,
-		struct snd_soc_dai *dai);
-	int (*resume)(struct platform_device *pdev,
-		struct snd_soc_dai *dai);
+	int (*suspend)(struct snd_soc_dai *dai);
+	int (*resume)(struct snd_soc_dai *dai);
 
 	/* ops */
 	struct snd_soc_dai_ops ops;
diff --git a/sound/soc/atmel/atmel_ssc_dai.c b/sound/soc/atmel/atmel_ssc_dai.c
index 0bb18dfa9495..d9b874c5bf37 100644
--- a/sound/soc/atmel/atmel_ssc_dai.c
+++ b/sound/soc/atmel/atmel_ssc_dai.c
@@ -628,8 +628,7 @@ static int atmel_ssc_prepare(struct snd_pcm_substream *substream,
 
 
 #ifdef CONFIG_PM
-static int atmel_ssc_suspend(struct platform_device *pdev,
-			    struct snd_soc_dai *cpu_dai)
+static int atmel_ssc_suspend(struct snd_soc_dai *cpu_dai)
 {
 	struct atmel_ssc_info *ssc_p;
 
@@ -657,8 +656,7 @@ static int atmel_ssc_suspend(struct platform_device *pdev,
 
 
-static int atmel_ssc_resume(struct platform_device *pdev,
-			   struct snd_soc_dai *cpu_dai)
+static int atmel_ssc_resume(struct snd_soc_dai *cpu_dai)
 {
 	struct atmel_ssc_info *ssc_p;
 	u32 cr;
diff --git a/sound/soc/au1x/psc-ac97.c b/sound/soc/au1x/psc-ac97.c
index a0bcfeaf5f86..a1e824d29cf9 100644
--- a/sound/soc/au1x/psc-ac97.c
+++ b/sound/soc/au1x/psc-ac97.c
@@ -314,8 +314,7 @@ static void au1xpsc_ac97_remove(struct platform_device *pdev,
 	au1xpsc_ac97_workdata = NULL;
 }
 
-static int au1xpsc_ac97_suspend(struct platform_device *pdev,
-				struct snd_soc_dai *dai)
+static int au1xpsc_ac97_suspend(struct snd_soc_dai *dai)
 {
 	/* save interesting registers and disable PSC */
 	au1xpsc_ac97_workdata->pm[0] =
@@ -329,8 +328,7 @@ static int au1xpsc_ac97_suspend(struct platform_device *pdev,
 	return 0;
 }
 
-static int au1xpsc_ac97_resume(struct platform_device *pdev,
-			       struct snd_soc_dai *dai)
+static int au1xpsc_ac97_resume(struct snd_soc_dai *dai)
 {
 	/* restore PSC clock config */
 	au_writel(au1xpsc_ac97_workdata->pm[0] | PSC_SEL_PS_AC97MODE,
diff --git a/sound/soc/au1x/psc-i2s.c b/sound/soc/au1x/psc-i2s.c
index f4217e70a787..16f97462ea15 100644
--- a/sound/soc/au1x/psc-i2s.c
+++ b/sound/soc/au1x/psc-i2s.c
@@ -339,8 +339,7 @@ static void au1xpsc_i2s_remove(struct platform_device *pdev,
 	au1xpsc_i2s_workdata = NULL;
 }
 
-static int au1xpsc_i2s_suspend(struct platform_device *pdev,
-			       struct snd_soc_dai *cpu_dai)
+static int au1xpsc_i2s_suspend(struct snd_soc_dai *cpu_dai)
 {
 	/* save interesting register and disable PSC */
 	au1xpsc_i2s_workdata->pm[0] =
@@ -354,8 +353,7 @@ static int au1xpsc_i2s_suspend(struct platform_device *pdev,
 	return 0;
 }
 
-static int au1xpsc_i2s_resume(struct platform_device *pdev,
-			      struct snd_soc_dai *cpu_dai)
+static int au1xpsc_i2s_resume(struct snd_soc_dai *cpu_dai)
 {
 	/* select I2S mode and PSC clock */
 	au_writel(PSC_CTRL_DISABLE, PSC_CTRL(au1xpsc_i2s_workdata));
diff --git a/sound/soc/blackfin/bf5xx-ac97.c b/sound/soc/blackfin/bf5xx-ac97.c
index 709bdf08e398..c602ce109d52 100644
--- a/sound/soc/blackfin/bf5xx-ac97.c
+++ b/sound/soc/blackfin/bf5xx-ac97.c
@@ -269,8 +269,7 @@ struct snd_ac97_bus_ops soc_ac97_ops = {
 EXPORT_SYMBOL_GPL(soc_ac97_ops);
 
 #ifdef CONFIG_PM
-static int bf5xx_ac97_suspend(struct platform_device *pdev,
-	struct snd_soc_dai *dai)
+static int bf5xx_ac97_suspend(struct snd_soc_dai *dai)
 {
 	struct sport_device *sport =
 		(struct sport_device *)dai->private_data;
@@ -285,8 +284,7 @@ static int bf5xx_ac97_suspend(struct platform_device *pdev,
 	return 0;
 }
 
-static int bf5xx_ac97_resume(struct platform_device *pdev,
-	struct snd_soc_dai *dai)
+static int bf5xx_ac97_resume(struct snd_soc_dai *dai)
 {
 	int ret;
 	struct sport_device *sport =
diff --git a/sound/soc/blackfin/bf5xx-i2s.c b/sound/soc/blackfin/bf5xx-i2s.c
index 6e5036bf9245..9f8ce87cc6c6 100644
--- a/sound/soc/blackfin/bf5xx-i2s.c
+++ b/sound/soc/blackfin/bf5xx-i2s.c
@@ -222,16 +222,14 @@ static int bf5xx_i2s_probe(struct platform_device *pdev,
 	return 0;
 }
 
-static void bf5xx_i2s_remove(struct platform_device *pdev,
-			   struct snd_soc_dai *dai)
+static void bf5xx_i2s_remove(struct snd_soc_dai *dai)
 {
 	pr_debug("%s enter\n", __func__);
 	peripheral_free_list(&sport_req[sport_num][0]);
 }
 
 #ifdef CONFIG_PM
-static int bf5xx_i2s_suspend(struct platform_device *dev,
-			     struct snd_soc_dai *dai)
+static int bf5xx_i2s_suspend(struct snd_soc_dai *dai)
 {
 	struct sport_device *sport =
 		(struct sport_device *)dai->private_data;
diff --git a/sound/soc/pxa/pxa-ssp.c b/sound/soc/pxa/pxa-ssp.c
index 402fc5ba65e7..73fa10defcca 100644
--- a/sound/soc/pxa/pxa-ssp.c
+++ b/sound/soc/pxa/pxa-ssp.c
@@ -244,8 +244,7 @@ static void pxa_ssp_shutdown(struct snd_pcm_substream *substream,
 
 #ifdef CONFIG_PM
 
-static int pxa_ssp_suspend(struct platform_device *pdev,
-	struct snd_soc_dai *cpu_dai)
+static int pxa_ssp_suspend(struct snd_soc_dai *cpu_dai)
 {
 	struct ssp_priv *priv = cpu_dai->private_data;
 
@@ -257,8 +256,7 @@ static int pxa_ssp_suspend(struct platform_device *pdev,
 	return 0;
 }
 
-static int pxa_ssp_resume(struct platform_device *pdev,
-	struct snd_soc_dai *cpu_dai)
+static int pxa_ssp_resume(struct snd_soc_dai *cpu_dai)
 {
 	struct ssp_priv *priv = cpu_dai->private_data;
 
diff --git a/sound/soc/pxa/pxa2xx-ac97.c b/sound/soc/pxa/pxa2xx-ac97.c
index bffbe288634c..8eed80d5675d 100644
--- a/sound/soc/pxa/pxa2xx-ac97.c
+++ b/sound/soc/pxa/pxa2xx-ac97.c
@@ -87,14 +87,12 @@ static struct pxa2xx_pcm_dma_params pxa2xx_ac97_pcm_mic_mono_in = {
 };
 
 #ifdef CONFIG_PM
-static int pxa2xx_ac97_suspend(struct platform_device *pdev,
-	struct snd_soc_dai *dai)
+static int pxa2xx_ac97_suspend(struct snd_soc_dai *dai)
 {
 	return pxa2xx_ac97_hw_suspend();
 }
 
-static int pxa2xx_ac97_resume(struct platform_device *pdev,
-	struct snd_soc_dai *dai)
+static int pxa2xx_ac97_resume(struct snd_soc_dai *dai)
 {
 	return pxa2xx_ac97_hw_resume();
 }
diff --git a/sound/soc/pxa/pxa2xx-i2s.c b/sound/soc/pxa/pxa2xx-i2s.c
index f9a9e2ebafa1..314973ace6dc 100644
--- a/sound/soc/pxa/pxa2xx-i2s.c
+++ b/sound/soc/pxa/pxa2xx-i2s.c
@@ -293,8 +293,7 @@ static void pxa2xx_i2s_shutdown(struct snd_pcm_substream *substream,
 }
 
 #ifdef CONFIG_PM
-static int pxa2xx_i2s_suspend(struct platform_device *dev,
-	struct snd_soc_dai *dai)
+static int pxa2xx_i2s_suspend(struct snd_soc_dai *dai)
 {
 	if (!dai->active)
 		return 0;
@@ -311,8 +310,7 @@ static int pxa2xx_i2s_suspend(struct platform_device *dev,
 	return 0;
 }
 
-static int pxa2xx_i2s_resume(struct platform_device *pdev,
-	struct snd_soc_dai *dai)
+static int pxa2xx_i2s_resume(struct snd_soc_dai *dai)
 {
 	if (!dai->active)
 		return 0;
diff --git a/sound/soc/s3c24xx/s3c2412-i2s.c b/sound/soc/s3c24xx/s3c2412-i2s.c
index 1c741047ae35..75f87c3c74d0 100644
--- a/sound/soc/s3c24xx/s3c2412-i2s.c
+++ b/sound/soc/s3c24xx/s3c2412-i2s.c
@@ -649,8 +649,7 @@ static int s3c2412_i2s_probe(struct platform_device *pdev,
 }
 
 #ifdef CONFIG_PM
-static int s3c2412_i2s_suspend(struct platform_device *dev,
-			      struct snd_soc_dai *dai)
+static int s3c2412_i2s_suspend(struct snd_soc_dai *dai)
 {
 	struct s3c2412_i2s_info *i2s = &s3c2412_i2s;
 	u32 iismod;
@@ -665,25 +664,24 @@ static int s3c2412_i2s_suspend(struct platform_device *dev,
 		iismod = readl(i2s->regs + S3C2412_IISMOD);
 
 		if (iismod & S3C2412_IISCON_RXDMA_ACTIVE)
-			dev_warn(&dev->dev, "%s: RXDMA active?\n", __func__);
+			pr_warning("%s: RXDMA active?\n", __func__);
 
 		if (iismod & S3C2412_IISCON_TXDMA_ACTIVE)
-			dev_warn(&dev->dev, "%s: TXDMA active?\n", __func__);
+			pr_warning("%s: TXDMA active?\n", __func__);
 
 		if (iismod & S3C2412_IISCON_IIS_ACTIVE)
-			dev_warn(&dev->dev, "%s: IIS active\n", __func__);
+			pr_warning("%s: IIS active\n", __func__);
 	}
 
 	return 0;
 }
 
-static int s3c2412_i2s_resume(struct platform_device *pdev,
-			      struct snd_soc_dai *dai)
+static int s3c2412_i2s_resume(struct snd_soc_dai *dai)
 {
 	struct s3c2412_i2s_info *i2s = &s3c2412_i2s;
 
-	dev_info(&pdev->dev, "dai_active %d, IISMOD %08x, IISCON %08x\n",
-		 dai->active, i2s->suspend_iismod, i2s->suspend_iiscon);
+	pr_info("dai_active %d, IISMOD %08x, IISCON %08x\n",
+		dai->active, i2s->suspend_iismod, i2s->suspend_iiscon);
 
 	if (dai->active) {
 		writel(i2s->suspend_iiscon, i2s->regs + S3C2412_IISCON);
diff --git a/sound/soc/s3c24xx/s3c24xx-i2s.c b/sound/soc/s3c24xx/s3c24xx-i2s.c
index 8d9135f41bc9..45fe8f7c88ab 100644
--- a/sound/soc/s3c24xx/s3c24xx-i2s.c
+++ b/sound/soc/s3c24xx/s3c24xx-i2s.c
@@ -419,8 +419,7 @@ static int s3c24xx_i2s_probe(struct platform_device *pdev,
 }
 
 #ifdef CONFIG_PM
-static int s3c24xx_i2s_suspend(struct platform_device *pdev,
-		struct snd_soc_dai *cpu_dai)
+static int s3c24xx_i2s_suspend(struct snd_soc_dai *cpu_dai)
 {
 	DBG("Entered %s\n", __func__);
 
@@ -434,8 +433,7 @@ static int s3c24xx_i2s_suspend(struct platform_device *pdev,
 	return 0;
 }
 
-static int s3c24xx_i2s_resume(struct platform_device *pdev,
-		struct snd_soc_dai *cpu_dai)
+static int s3c24xx_i2s_resume(struct snd_soc_dai *cpu_dai)
 {
 	DBG("Entered %s\n", __func__);
 	clk_enable(s3c24xx_i2s.iis_clk);
diff --git a/sound/soc/soc-core.c b/sound/soc/soc-core.c
index f83852f11463..2f2a8d93bbf0 100644
--- a/sound/soc/soc-core.c
+++ b/sound/soc/soc-core.c
@@ -650,7 +650,7 @@ static int soc_suspend(struct platform_device *pdev, pm_message_t state)
 	for (i = 0; i < card->num_links; i++) {
 		struct snd_soc_dai  *cpu_dai = card->dai_link[i].cpu_dai;
 		if (cpu_dai->suspend && !cpu_dai->ac97_control)
-			cpu_dai->suspend(pdev, cpu_dai);
+			cpu_dai->suspend(cpu_dai);
 		if (platform->suspend)
 			platform->suspend(cpu_dai);
 	}
@@ -676,7 +676,7 @@ static int soc_suspend(struct platform_device *pdev, pm_message_t state)
 	for (i = 0; i < card->num_links; i++) {
 		struct snd_soc_dai *cpu_dai = card->dai_link[i].cpu_dai;
 		if (cpu_dai->suspend && cpu_dai->ac97_control)
-			cpu_dai->suspend(pdev, cpu_dai);
+			cpu_dai->suspend(cpu_dai);
 	}
 
 	if (card->suspend_post)
@@ -712,7 +712,7 @@ static void soc_resume_deferred(struct work_struct *work)
 	for (i = 0; i < card->num_links; i++) {
 		struct snd_soc_dai *cpu_dai = card->dai_link[i].cpu_dai;
 		if (cpu_dai->resume && cpu_dai->ac97_control)
-			cpu_dai->resume(pdev, cpu_dai);
+			cpu_dai->resume(cpu_dai);
 	}
 
 	if (codec_dev->resume)
@@ -739,7 +739,7 @@ static void soc_resume_deferred(struct work_struct *work)
 	for (i = 0; i < card->num_links; i++) {
 		struct snd_soc_dai *cpu_dai = card->dai_link[i].cpu_dai;
 		if (cpu_dai->resume && !cpu_dai->ac97_control)
-			cpu_dai->resume(pdev, cpu_dai);
+			cpu_dai->resume(cpu_dai);
 		if (platform->resume)
 			platform->resume(cpu_dai);
 	}
-- 
cgit v1.2.3


From 52404881984e2d447f920a23e3bb63262dfc77f3 Mon Sep 17 00:00:00 2001
From: Rémi Denis-Courmont <remi.denis-courmont@nokia.com>
Date: Wed, 3 Dec 2008 15:42:56 -0800
Subject: Phonet: basic net namespace support
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Signed-off-by: Rémi Denis-Courmont <remi.denis-courmont@nokia.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/phonet/phonet.h |  2 +-
 include/net/phonet/pn_dev.h |  2 +-
 net/phonet/af_phonet.c      |  8 +-------
 net/phonet/pn_dev.c         |  6 ++++--
 net/phonet/socket.c         | 11 +++++++----
 5 files changed, 14 insertions(+), 15 deletions(-)

(limited to 'include')

diff --git a/include/net/phonet/phonet.h b/include/net/phonet/phonet.h
index c6a245184460..057b0a8a2885 100644
--- a/include/net/phonet/phonet.h
+++ b/include/net/phonet/phonet.h
@@ -46,7 +46,7 @@ static inline struct pn_sock *pn_sk(struct sock *sk)
 
 extern const struct proto_ops phonet_dgram_ops;
 
-struct sock *pn_find_sock_by_sa(const struct sockaddr_pn *sa);
+struct sock *pn_find_sock_by_sa(struct net *net, const struct sockaddr_pn *sa);
 void phonet_get_local_port_range(int *min, int *max);
 void pn_sock_hash(struct sock *sk);
 void pn_sock_unhash(struct sock *sk);
diff --git a/include/net/phonet/pn_dev.h b/include/net/phonet/pn_dev.h
index bbd2a836e04c..aa1c59a1d33f 100644
--- a/include/net/phonet/pn_dev.h
+++ b/include/net/phonet/pn_dev.h
@@ -43,7 +43,7 @@ struct net_device *phonet_device_get(struct net *net);
 int phonet_address_add(struct net_device *dev, u8 addr);
 int phonet_address_del(struct net_device *dev, u8 addr);
 u8 phonet_address_get(struct net_device *dev, u8 addr);
-int phonet_address_lookup(u8 addr);
+int phonet_address_lookup(struct net *net, u8 addr);
 
 #define PN_NO_ADDR	0xff
 
diff --git a/net/phonet/af_phonet.c b/net/phonet/af_phonet.c
index 9d211f12582b..13cb323f8c38 100644
--- a/net/phonet/af_phonet.c
+++ b/net/phonet/af_phonet.c
@@ -67,9 +67,6 @@ static int pn_socket_create(struct net *net, struct socket *sock, int protocol)
 	struct phonet_protocol *pnp;
 	int err;
 
-	if (net != &init_net)
-		return -EAFNOSUPPORT;
-
 	if (!capable(CAP_SYS_ADMIN))
 		return -EPERM;
 
@@ -352,9 +349,6 @@ static int phonet_rcv(struct sk_buff *skb, struct net_device *dev,
 	struct sockaddr_pn sa;
 	u16 len;
 
-	if (dev_net(dev) != &init_net)
-		goto out;
-
 	/* check we have at least a full Phonet header */
 	if (!pskb_pull(skb, sizeof(struct phonethdr)))
 		goto out;
@@ -373,7 +367,7 @@ static int phonet_rcv(struct sk_buff *skb, struct net_device *dev,
 	if (pn_sockaddr_get_addr(&sa) == 0)
 		goto out; /* currently, we cannot be device 0 */
 
-	sk = pn_find_sock_by_sa(&sa);
+	sk = pn_find_sock_by_sa(dev_net(dev), &sa);
 	if (sk == NULL) {
 		if (can_respond(skb)) {
 			send_obj_unreachable(skb);
diff --git a/net/phonet/pn_dev.c b/net/phonet/pn_dev.c
index f93ff8ef47d0..5491bf5e354b 100644
--- a/net/phonet/pn_dev.c
+++ b/net/phonet/pn_dev.c
@@ -76,7 +76,7 @@ struct net_device *phonet_device_get(struct net *net)
 		dev = pnd->netdev;
 		BUG_ON(!dev);
 
-		if (dev_net(dev) == net &&
+		if (net_eq(dev_net(dev), net) &&
 			(dev->reg_state == NETREG_REGISTERED) &&
 			((pnd->netdev->flags & IFF_UP)) == IFF_UP)
 			break;
@@ -140,12 +140,14 @@ u8 phonet_address_get(struct net_device *dev, u8 addr)
 	return addr;
 }
 
-int phonet_address_lookup(u8 addr)
+int phonet_address_lookup(struct net *net, u8 addr)
 {
 	struct phonet_device *pnd;
 
 	spin_lock_bh(&pndevs.lock);
 	list_for_each_entry(pnd, &pndevs.list, list) {
+		if (!net_eq(dev_net(pnd->netdev), net))
+			continue;
 		/* Don't allow unregistering devices! */
 		if ((pnd->netdev->reg_state != NETREG_REGISTERED) ||
 				((pnd->netdev->flags & IFF_UP)) != IFF_UP)
diff --git a/net/phonet/socket.c b/net/phonet/socket.c
index d81740187fb4..c75aa5cdead5 100644
--- a/net/phonet/socket.c
+++ b/net/phonet/socket.c
@@ -57,7 +57,7 @@ static struct  {
  * Find address based on socket address, match only certain fields.
  * Also grab sock if it was found. Remember to sock_put it later.
  */
-struct sock *pn_find_sock_by_sa(const struct sockaddr_pn *spn)
+struct sock *pn_find_sock_by_sa(struct net *net, const struct sockaddr_pn *spn)
 {
 	struct hlist_node *node;
 	struct sock *sknode;
@@ -71,6 +71,8 @@ struct sock *pn_find_sock_by_sa(const struct sockaddr_pn *spn)
 		struct pn_sock *pn = pn_sk(sknode);
 		BUG_ON(!pn->sobject); /* unbound socket */
 
+		if (!net_eq(sock_net(sknode), net))
+			continue;
 		if (pn_port(obj)) {
 			/* Look up socket by port */
 			if (pn_port(pn->sobject) != pn_port(obj))
@@ -130,7 +132,7 @@ static int pn_socket_bind(struct socket *sock, struct sockaddr *addr, int len)
 
 	handle = pn_sockaddr_get_object((struct sockaddr_pn *)addr);
 	saddr = pn_addr(handle);
-	if (saddr && phonet_address_lookup(saddr))
+	if (saddr && phonet_address_lookup(sock_net(sk), saddr))
 		return -EADDRNOTAVAIL;
 
 	lock_sock(sk);
@@ -361,6 +363,7 @@ static DEFINE_MUTEX(port_mutex);
 int pn_sock_get_port(struct sock *sk, unsigned short sport)
 {
 	static int port_cur;
+	struct net *net = sock_net(sk);
 	struct pn_sock *pn = pn_sk(sk);
 	struct sockaddr_pn try_sa;
 	struct sock *tmpsk;
@@ -381,7 +384,7 @@ int pn_sock_get_port(struct sock *sk, unsigned short sport)
 				port_cur = pmin;
 
 			pn_sockaddr_set_port(&try_sa, port_cur);
-			tmpsk = pn_find_sock_by_sa(&try_sa);
+			tmpsk = pn_find_sock_by_sa(net, &try_sa);
 			if (tmpsk == NULL) {
 				sport = port_cur;
 				goto found;
@@ -391,7 +394,7 @@ int pn_sock_get_port(struct sock *sk, unsigned short sport)
 	} else {
 		/* try to find specific port */
 		pn_sockaddr_set_port(&try_sa, sport);
-		tmpsk = pn_find_sock_by_sa(&try_sa);
+		tmpsk = pn_find_sock_by_sa(net, &try_sa);
 		if (tmpsk == NULL)
 			/* No sock there! We can use that port... */
 			goto found;
-- 
cgit v1.2.3


From d253eee20195b25e298bf162a6e72f14bf4803e5 Mon Sep 17 00:00:00 2001
From: Oliver Hartkopp <oliver@hartkopp.net>
Date: Wed, 3 Dec 2008 15:52:35 -0800
Subject: can: Fix CAN_(EFF|RTR)_FLAG handling in can_filter

Due to a wrong safety check in af_can.c it was not possible to filter
for SFF frames with a specific CAN identifier without getting the
same selected CAN identifier from a received EFF frame also.

This fix has a minimum (but user visible) impact on the CAN filter
API and therefore the CAN version is set to a new date.

Indeed the 'old' API is still working as-is. But when now setting
CAN_(EFF|RTR)_FLAG in can_filter.can_mask you might get less traffic
than before - but still the stuff that you expected to get for your
defined filter ...

Thanks to Kurt Van Dijck for pointing at this issue and for the review.

Signed-off-by: Oliver Hartkopp <oliver@hartkopp.net>
Acked-by: Kurt Van Dijck <kurt.van.dijck@eia.be>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/can/core.h |  2 +-
 net/can/af_can.c         | 63 ++++++++++++++++++++++++++++++++++++------------
 net/can/bcm.c            |  7 +++---
 3 files changed, 53 insertions(+), 19 deletions(-)

(limited to 'include')

diff --git a/include/linux/can/core.h b/include/linux/can/core.h
index e9ca210ffa5b..f50785ad4781 100644
--- a/include/linux/can/core.h
+++ b/include/linux/can/core.h
@@ -19,7 +19,7 @@
 #include <linux/skbuff.h>
 #include <linux/netdevice.h>
 
-#define CAN_VERSION "20071116"
+#define CAN_VERSION "20081130"
 
 /* increment this number each time you change some user-space interface */
 #define CAN_ABI_VERSION "8"
diff --git a/net/can/af_can.c b/net/can/af_can.c
index 7d4d2b3c137e..d8173e50cb87 100644
--- a/net/can/af_can.c
+++ b/net/can/af_can.c
@@ -319,23 +319,52 @@ static struct dev_rcv_lists *find_dev_rcv_lists(struct net_device *dev)
 	return n ? d : NULL;
 }
 
+/**
+ * find_rcv_list - determine optimal filterlist inside device filter struct
+ * @can_id: pointer to CAN identifier of a given can_filter
+ * @mask: pointer to CAN mask of a given can_filter
+ * @d: pointer to the device filter struct
+ *
+ * Description:
+ *  Returns the optimal filterlist to reduce the filter handling in the
+ *  receive path. This function is called by service functions that need
+ *  to register or unregister a can_filter in the filter lists.
+ *
+ *  A filter matches in general, when
+ *
+ *          <received_can_id> & mask == can_id & mask
+ *
+ *  so every bit set in the mask (even CAN_EFF_FLAG, CAN_RTR_FLAG) describe
+ *  relevant bits for the filter.
+ *
+ *  The filter can be inverted (CAN_INV_FILTER bit set in can_id) or it can
+ *  filter for error frames (CAN_ERR_FLAG bit set in mask). For error frames
+ *  there is a special filterlist and a special rx path filter handling.
+ *
+ * Return:
+ *  Pointer to optimal filterlist for the given can_id/mask pair.
+ *  Constistency checked mask.
+ *  Reduced can_id to have a preprocessed filter compare value.
+ */
 static struct hlist_head *find_rcv_list(canid_t *can_id, canid_t *mask,
 					struct dev_rcv_lists *d)
 {
 	canid_t inv = *can_id & CAN_INV_FILTER; /* save flag before masking */
 
-	/* filter error frames */
+	/* filter for error frames in extra filterlist */
 	if (*mask & CAN_ERR_FLAG) {
-		/* clear CAN_ERR_FLAG in list entry */
+		/* clear CAN_ERR_FLAG in filter entry */
 		*mask &= CAN_ERR_MASK;
 		return &d->rx[RX_ERR];
 	}
 
-	/* ensure valid values in can_mask */
-	if (*mask & CAN_EFF_FLAG)
-		*mask &= (CAN_EFF_MASK | CAN_EFF_FLAG | CAN_RTR_FLAG);
-	else
-		*mask &= (CAN_SFF_MASK | CAN_RTR_FLAG);
+	/* with cleared CAN_ERR_FLAG we have a simple mask/value filterpair */
+
+#define CAN_EFF_RTR_FLAGS (CAN_EFF_FLAG | CAN_RTR_FLAG)
+
+	/* ensure valid values in can_mask for 'SFF only' frame filtering */
+	if ((*mask & CAN_EFF_FLAG) && !(*can_id & CAN_EFF_FLAG))
+		*mask &= (CAN_SFF_MASK | CAN_EFF_RTR_FLAGS);
 
 	/* reduce condition testing at receive time */
 	*can_id &= *mask;
@@ -348,15 +377,19 @@ static struct hlist_head *find_rcv_list(canid_t *can_id, canid_t *mask,
 	if (!(*mask))
 		return &d->rx[RX_ALL];
 
-	/* use extra filterset for the subscription of exactly *ONE* can_id */
-	if (*can_id & CAN_EFF_FLAG) {
-		if (*mask == (CAN_EFF_MASK | CAN_EFF_FLAG)) {
-			/* RFC: a use-case for hash-tables in the future? */
-			return &d->rx[RX_EFF];
+	/* extra filterlists for the subscription of a single non-RTR can_id */
+	if (((*mask & CAN_EFF_RTR_FLAGS) == CAN_EFF_RTR_FLAGS)
+	    && !(*can_id & CAN_RTR_FLAG)) {
+
+		if (*can_id & CAN_EFF_FLAG) {
+			if (*mask == (CAN_EFF_MASK | CAN_EFF_RTR_FLAGS)) {
+				/* RFC: a future use-case for hash-tables? */
+				return &d->rx[RX_EFF];
+			}
+		} else {
+			if (*mask == (CAN_SFF_MASK | CAN_EFF_RTR_FLAGS))
+				return &d->rx_sff[*can_id];
 		}
-	} else {
-		if (*mask == CAN_SFF_MASK)
-			return &d->rx_sff[*can_id];
 	}
 
 	/* default: filter via can_id/can_mask */
diff --git a/net/can/bcm.c b/net/can/bcm.c
index d0dd382001e2..da0d426c0ce4 100644
--- a/net/can/bcm.c
+++ b/net/can/bcm.c
@@ -64,10 +64,11 @@
 #define BCM_CAN_DLC_MASK 0x0F /* clean private flags in can_dlc by masking */
 
 /* get best masking value for can_rx_register() for a given single can_id */
-#define REGMASK(id) ((id & CAN_RTR_FLAG) | ((id & CAN_EFF_FLAG) ? \
-			(CAN_EFF_MASK | CAN_EFF_FLAG) : CAN_SFF_MASK))
+#define REGMASK(id) ((id & CAN_EFF_FLAG) ? \
+		     (CAN_EFF_MASK | CAN_EFF_FLAG | CAN_RTR_FLAG) : \
+		     (CAN_SFF_MASK | CAN_EFF_FLAG | CAN_RTR_FLAG))
 
-#define CAN_BCM_VERSION "20080415"
+#define CAN_BCM_VERSION CAN_VERSION
 static __initdata const char banner[] = KERN_INFO
 	"can: broadcast manager protocol (rev " CAN_BCM_VERSION ")\n";
 
-- 
cgit v1.2.3


From 8865c418caf4e9dd2c24bdfae3a5a4106e143e60 Mon Sep 17 00:00:00 2001
From: David Woodhouse <David.Woodhouse@intel.com>
Date: Wed, 3 Dec 2008 22:12:38 -0800
Subject: atm: 32-bit ioctl compatibility

We lack compat ioctl support through most of the ATM code. This patch
deals with most of it, and I can now at least use BR2684 and PPPoATM
with 32-bit userspace.

I haven't added a .compat_ioctl method to struct atm_ioctl, because
AFAICT none of the current users need any conversion -- so we can just
call the ->ioctl() method in every case. I looked at br2684, clip, lec,
mpc, pppoatm and atmtcp.

In svc_compat_ioctl() the only mangling which is needed is to change
COMPAT_ATM_ADDPARTY to ATM_ADDPARTY. Although it's defined as
	_IOW('a', ATMIOC_SPECIAL+4,struct atm_iobuf)
it doesn't actually _take_ a struct atm_iobuf as an argument -- it takes
a struct sockaddr_atmsvc, which _is_ the same between 32-bit and 64-bit
code, so doesn't need conversion.

Almost all of vcc_ioctl() would have been identical, so I converted that
into a core do_vcc_ioctl() function with an 'int compat' argument.

I've done the same with atm_dev_ioctl(), where there _are_ a few
differences, but still it's relatively contained and there would
otherwise have been a lot of duplication.

I haven't done any of the actual device-specific ioctls, although I've
added a compat_ioctl method to struct atmdev_ops.

Signed-off-by: David Woodhouse <David.Woodhouse@intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/atm.h    | 17 ++++++++--
 include/linux/atmdev.h | 15 +++++++++
 net/atm/common.h       |  1 +
 net/atm/ioctl.c        | 49 ++++++++++++++++++++++++----
 net/atm/pvc.c          |  3 ++
 net/atm/resources.c    | 88 ++++++++++++++++++++++++++++++++++++++------------
 net/atm/resources.h    |  2 +-
 net/atm/svc.c          | 19 +++++++++++
 8 files changed, 164 insertions(+), 30 deletions(-)

(limited to 'include')

diff --git a/include/linux/atm.h b/include/linux/atm.h
index c791ddd96939..d3b292174aeb 100644
--- a/include/linux/atm.h
+++ b/include/linux/atm.h
@@ -231,10 +231,21 @@ static __inline__ int atmpvc_addr_in_use(struct sockaddr_atmpvc addr)
  */
 
 struct atmif_sioc {
-    int number;
-    int length;
-    void __user *arg;
+	int number;
+	int length;
+	void __user *arg;
 };
 
+#ifdef __KERNEL__
+#ifdef CONFIG_COMPAT
+#include <linux/compat.h>
+struct compat_atmif_sioc {
+	int number;
+	int length;
+	compat_uptr_t arg;
+};
+#endif
+#endif
+
 typedef unsigned short atm_backend_t;
 #endif
diff --git a/include/linux/atmdev.h b/include/linux/atmdev.h
index a3d07c29d16c..086e5c362d3a 100644
--- a/include/linux/atmdev.h
+++ b/include/linux/atmdev.h
@@ -100,6 +100,10 @@ struct atm_dev_stats {
 					/* use backend to make new if */
 #define ATM_ADDPARTY  	_IOW('a', ATMIOC_SPECIAL+4,struct atm_iobuf)
  					/* add party to p2mp call */
+#ifdef CONFIG_COMPAT
+/* It actually takes struct sockaddr_atmsvc, not struct atm_iobuf */
+#define COMPAT_ATM_ADDPARTY  	_IOW('a', ATMIOC_SPECIAL+4,struct compat_atm_iobuf)
+#endif
 #define ATM_DROPPARTY 	_IOW('a', ATMIOC_SPECIAL+5,int)
 					/* drop party from p2mp call */
 
@@ -224,6 +228,13 @@ struct atm_cirange {
 extern struct proc_dir_entry *atm_proc_root;
 #endif
 
+#ifdef CONFIG_COMPAT
+#include <linux/compat.h>
+struct compat_atm_iobuf {
+	int length;
+	compat_uptr_t buffer;
+};
+#endif
 
 struct k_atm_aal_stats {
 #define __HANDLE_ITEM(i) atomic_t i
@@ -379,6 +390,10 @@ struct atmdev_ops { /* only send is required */
 	int (*open)(struct atm_vcc *vcc);
 	void (*close)(struct atm_vcc *vcc);
 	int (*ioctl)(struct atm_dev *dev,unsigned int cmd,void __user *arg);
+#ifdef CONFIG_COMPAT
+	int (*compat_ioctl)(struct atm_dev *dev,unsigned int cmd,
+			    void __user *arg);
+#endif
 	int (*getsockopt)(struct atm_vcc *vcc,int level,int optname,
 	    void __user *optval,int optlen);
 	int (*setsockopt)(struct atm_vcc *vcc,int level,int optname,
diff --git a/net/atm/common.h b/net/atm/common.h
index 16f32c1fa1c9..92e2981f479f 100644
--- a/net/atm/common.h
+++ b/net/atm/common.h
@@ -19,6 +19,7 @@ int vcc_sendmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *m,
 		size_t total_len);
 unsigned int vcc_poll(struct file *file, struct socket *sock, poll_table *wait);
 int vcc_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg);
+int vcc_compat_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg);
 int vcc_setsockopt(struct socket *sock, int level, int optname,
 		   char __user *optval, int optlen);
 int vcc_getsockopt(struct socket *sock, int level, int optname,
diff --git a/net/atm/ioctl.c b/net/atm/ioctl.c
index 7afd8e7754fd..76ed3c8d26e6 100644
--- a/net/atm/ioctl.c
+++ b/net/atm/ioctl.c
@@ -19,6 +19,7 @@
 #include <linux/atmlec.h>
 #include <linux/mutex.h>
 #include <asm/ioctls.h>
+#include <net/compat.h>
 
 #include "resources.h"
 #include "signaling.h"		/* for WAITING and sigd_attach */
@@ -46,7 +47,7 @@ void deregister_atm_ioctl(struct atm_ioctl *ioctl)
 EXPORT_SYMBOL(register_atm_ioctl);
 EXPORT_SYMBOL(deregister_atm_ioctl);
 
-int vcc_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
+static int do_vcc_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg, int compat)
 {
 	struct sock *sk = sock->sk;
 	struct atm_vcc *vcc;
@@ -80,13 +81,25 @@ int vcc_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
 				goto done;
 			}
 		case SIOCGSTAMP: /* borrowed from IP */
-			error = sock_get_timestamp(sk, argp);
+#ifdef CONFIG_COMPAT
+			if (compat)
+				error = compat_sock_get_timestamp(sk, argp);
+			else
+#endif
+				error = sock_get_timestamp(sk, argp);
 			goto done;
 		case SIOCGSTAMPNS: /* borrowed from IP */
-			error = sock_get_timestampns(sk, argp);
+#ifdef CONFIG_COMPAT
+			if (compat)
+				error = compat_sock_get_timestampns(sk, argp);
+			else
+#endif
+				error = sock_get_timestampns(sk, argp);
 			goto done;
 		case ATM_SETSC:
-			printk(KERN_WARNING "ATM_SETSC is obsolete\n");
+			if (net_ratelimit())
+				printk(KERN_WARNING "ATM_SETSC is obsolete; used by %s:%d\n",
+				       current->comm, task_pid_nr(current));
 			error = 0;
 			goto done;
 		case ATMSIGD_CTRL:
@@ -99,12 +112,23 @@ int vcc_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
 			 * info uses kernel pointers as opaque references,
 			 * so the holder of the file descriptor can scribble
 			 * on the kernel... so we should make sure that we
-			 * have the same privledges that /proc/kcore needs
+			 * have the same privileges that /proc/kcore needs
 			 */
 			if (!capable(CAP_SYS_RAWIO)) {
 				error = -EPERM;
 				goto done;
 			}
+#ifdef CONFIG_COMPAT
+			/* WTF? I don't even want to _think_ about making this
+			   work for 32-bit userspace. TBH I don't really want
+			   to think about it at all. dwmw2. */
+			if (compat) {
+				if (net_ratelimit())
+					printk(KERN_WARNING "32-bit task cannot be atmsigd\n");
+				error = -EINVAL;
+				goto done;
+			}
+#endif
 			error = sigd_attach(vcc);
 			if (!error)
 				sock->state = SS_CONNECTED;
@@ -155,8 +179,21 @@ int vcc_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
 	if (error != -ENOIOCTLCMD)
 		goto done;
 
-	error = atm_dev_ioctl(cmd, argp);
+	error = atm_dev_ioctl(cmd, argp, compat);
 
 done:
 	return error;
 }
+
+
+int vcc_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
+{
+	return do_vcc_ioctl(sock, cmd, arg, 0);
+}
+
+#ifdef CONFIG_COMPAT
+int vcc_compat_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
+{
+	return do_vcc_ioctl(sock, cmd, arg, 1);
+}
+#endif
diff --git a/net/atm/pvc.c b/net/atm/pvc.c
index 43e8bf5ed001..e1d22d9430dd 100644
--- a/net/atm/pvc.c
+++ b/net/atm/pvc.c
@@ -113,6 +113,9 @@ static const struct proto_ops pvc_proto_ops = {
 	.getname =	pvc_getname,
 	.poll =		vcc_poll,
 	.ioctl =	vcc_ioctl,
+#ifdef CONFIG_COMPAT
+	.compat_ioctl = vcc_compat_ioctl,
+#endif
 	.listen =	sock_no_listen,
 	.shutdown =	pvc_shutdown,
 	.setsockopt =	pvc_setsockopt,
diff --git a/net/atm/resources.c b/net/atm/resources.c
index a34ba948af96..56b7322ff461 100644
--- a/net/atm/resources.c
+++ b/net/atm/resources.c
@@ -195,20 +195,39 @@ static int fetch_stats(struct atm_dev *dev, struct atm_dev_stats __user *arg, in
 }
 
 
-int atm_dev_ioctl(unsigned int cmd, void __user *arg)
+int atm_dev_ioctl(unsigned int cmd, void __user *arg, int compat)
 {
 	void __user *buf;
 	int error, len, number, size = 0;
 	struct atm_dev *dev;
 	struct list_head *p;
 	int *tmp_buf, *tmp_p;
-	struct atm_iobuf __user *iobuf = arg;
-	struct atmif_sioc __user *sioc = arg;
+	int __user *sioc_len;
+	int __user *iobuf_len;
+
+#ifndef CONFIG_COMPAT
+	compat = 0; /* Just so the compiler _knows_ */
+#endif
+
 	switch (cmd) {
 		case ATM_GETNAMES:
-			if (get_user(buf, &iobuf->buffer))
-				return -EFAULT;
-			if (get_user(len, &iobuf->length))
+
+			if (compat) {
+#ifdef CONFIG_COMPAT
+				struct compat_atm_iobuf __user *ciobuf = arg;
+				compat_uptr_t cbuf;
+				iobuf_len = &ciobuf->length;
+				if (get_user(cbuf, &ciobuf->buffer))
+					return -EFAULT;
+				buf = compat_ptr(cbuf);
+#endif
+			} else {
+				struct atm_iobuf __user *iobuf = arg;
+				iobuf_len = &iobuf->length;
+				if (get_user(buf, &iobuf->buffer))
+					return -EFAULT;
+			}
+			if (get_user(len, iobuf_len))
 				return -EFAULT;
 			mutex_lock(&atm_dev_mutex);
 			list_for_each(p, &atm_devs)
@@ -229,7 +248,7 @@ int atm_dev_ioctl(unsigned int cmd, void __user *arg)
 			}
 			mutex_unlock(&atm_dev_mutex);
 			error = ((copy_to_user(buf, tmp_buf, size)) ||
-					put_user(size, &iobuf->length))
+					put_user(size, iobuf_len))
 						? -EFAULT : 0;
 			kfree(tmp_buf);
 			return error;
@@ -237,13 +256,32 @@ int atm_dev_ioctl(unsigned int cmd, void __user *arg)
 			break;
 	}
 
-	if (get_user(buf, &sioc->arg))
-		return -EFAULT;
-	if (get_user(len, &sioc->length))
-		return -EFAULT;
-	if (get_user(number, &sioc->number))
-		return -EFAULT;
-
+	if (compat) {
+#ifdef CONFIG_COMPAT
+		struct compat_atmif_sioc __user *csioc = arg;
+		compat_uptr_t carg;
+
+		sioc_len = &csioc->length;
+		if (get_user(carg, &csioc->arg))
+			return -EFAULT;
+		buf = compat_ptr(carg);
+
+		if (get_user(len, &csioc->length))
+			return -EFAULT;
+		if (get_user(number, &csioc->number))
+			return -EFAULT;
+#endif
+	} else {
+		struct atmif_sioc __user *sioc = arg;
+
+		sioc_len = &sioc->length;
+		if (get_user(buf, &sioc->arg))
+			return -EFAULT;
+		if (get_user(len, &sioc->length))
+			return -EFAULT;
+		if (get_user(number, &sioc->number))
+			return -EFAULT;
+	}
 	if (!(dev = try_then_request_module(atm_dev_lookup(number),
 					    "atm-device-%d", number)))
 		return -ENODEV;
@@ -358,7 +396,7 @@ int atm_dev_ioctl(unsigned int cmd, void __user *arg)
 			size = error;
 			/* may return 0, but later on size == 0 means "don't
 			   write the length" */
-			error = put_user(size, &sioc->length)
+			error = put_user(size, sioc_len)
 				? -EFAULT : 0;
 			goto done;
 		case ATM_SETLOOP:
@@ -380,11 +418,21 @@ int atm_dev_ioctl(unsigned int cmd, void __user *arg)
 			}
 			/* fall through */
 		default:
-			if (!dev->ops->ioctl) {
-				error = -EINVAL;
-				goto done;
+			if (compat) {
+#ifdef CONFIG_COMPAT
+				if (!dev->ops->compat_ioctl) {
+					error = -EINVAL;
+					goto done;
+				}
+				size = dev->ops->compat_ioctl(dev, cmd, buf);
+#endif
+			} else {
+				if (!dev->ops->ioctl) {
+					error = -EINVAL;
+					goto done;
+				}
+				size = dev->ops->ioctl(dev, cmd, buf);
 			}
-			size = dev->ops->ioctl(dev, cmd, buf);
 			if (size < 0) {
 				error = (size == -ENOIOCTLCMD ? -EINVAL : size);
 				goto done;
@@ -392,7 +440,7 @@ int atm_dev_ioctl(unsigned int cmd, void __user *arg)
 	}
 
 	if (size)
-		error = put_user(size, &sioc->length)
+		error = put_user(size, sioc_len)
 			? -EFAULT : 0;
 	else
 		error = 0;
diff --git a/net/atm/resources.h b/net/atm/resources.h
index 1d004aaaeec1..126fb1840dfb 100644
--- a/net/atm/resources.h
+++ b/net/atm/resources.h
@@ -13,7 +13,7 @@
 extern struct list_head atm_devs;
 extern struct mutex atm_dev_mutex;
 
-int atm_dev_ioctl(unsigned int cmd, void __user *arg);
+int atm_dev_ioctl(unsigned int cmd, void __user *arg, int compat);
 
 
 #ifdef CONFIG_PROC_FS
diff --git a/net/atm/svc.c b/net/atm/svc.c
index de1e4f2f3a43..e9c65500f84e 100644
--- a/net/atm/svc.c
+++ b/net/atm/svc.c
@@ -604,6 +604,22 @@ static int svc_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
 	return error;
 }
 
+#ifdef CONFIG_COMPAT
+static int svc_compat_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
+{
+	/* The definition of ATM_ADDPARTY uses the size of struct atm_iobuf.
+	   But actually it takes a struct sockaddr_atmsvc, which doesn't need
+	   compat handling. So all we have to do is fix up cmd... */
+	if (cmd == COMPAT_ATM_ADDPARTY)
+		cmd = ATM_ADDPARTY;
+
+	if (cmd == ATM_ADDPARTY || cmd == ATM_DROPPARTY)
+		return svc_ioctl(sock, cmd, arg);
+	else
+		return vcc_compat_ioctl(sock, cmd, arg);
+}
+#endif /* CONFIG_COMPAT */
+
 static const struct proto_ops svc_proto_ops = {
 	.family =	PF_ATMSVC,
 	.owner =	THIS_MODULE,
@@ -616,6 +632,9 @@ static const struct proto_ops svc_proto_ops = {
 	.getname =	svc_getname,
 	.poll =		vcc_poll,
 	.ioctl =	svc_ioctl,
+#ifdef CONFIG_COMPAT
+	.compat_ioctl =	svc_compat_ioctl,
+#endif
 	.listen =	svc_listen,
 	.shutdown =	svc_shutdown,
 	.setsockopt =	svc_setsockopt,
-- 
cgit v1.2.3


From ea4e2bc4d9f7370e57a343ccb5e7c0ad3222ec3c Mon Sep 17 00:00:00 2001
From: Steven Rostedt <srostedt@redhat.com>
Date: Wed, 3 Dec 2008 15:36:57 -0500
Subject: ftrace: graph of a single function

This patch adds the file:

   /debugfs/tracing/set_graph_function

which can be used along with the function graph tracer.

When this file is empty, the function graph tracer will act as
usual. When the file has a function in it, the function graph
tracer will only trace that function.

For example:

 # echo blk_unplug > /debugfs/tracing/set_graph_function
 # cat /debugfs/tracing/trace
 [...]
 ------------------------------------------
 | 2)  make-19003  =>  kjournald-2219
 ------------------------------------------

 2)               |  blk_unplug() {
 2)               |    dm_unplug_all() {
 2)               |      dm_get_table() {
 2)      1.381 us |        _read_lock();
 2)      0.911 us |        dm_table_get();
 2)      1. 76 us |        _read_unlock();
 2) +   12.912 us |      }
 2)               |      dm_table_unplug_all() {
 2)               |        blk_unplug() {
 2)      0.778 us |          generic_unplug_device();
 2)      2.409 us |        }
 2)      5.992 us |      }
 2)      0.813 us |      dm_table_put();
 2) +   29. 90 us |    }
 2) +   34.532 us |  }

You can add up to 32 functions into this file. Currently we limit it
to 32, but this may change with later improvements.

To add another function, use the append '>>':

  # echo sys_read >> /debugfs/tracing/set_graph_function
  # cat /debugfs/tracing/set_graph_function
  blk_unplug
  sys_read

Using the '>' will clear out the function and write anew:

  # echo sys_write > /debug/tracing/set_graph_function
  # cat /debug/tracing/set_graph_function
  sys_write

Note, if you have function graph running while doing this, the small
time between clearing it and updating it will cause the graph to
record all functions. This should not be an issue because after
it sets the filter, only those functions will be recorded from then on.
If you need to only record a particular function then set this
file first before starting the function graph tracer. In the future
this side effect may be corrected.

The set_graph_function file is similar to the set_ftrace_filter but
it does not take wild cards nor does it allow for more than one
function to be set with a single write. There is no technical reason why
this is the case, I just do not have the time yet to implement that.

Note, dynamic ftrace must be enabled for this to appear because it
uses the dynamic ftrace records to match the name to the mcount
call sites.

Signed-off-by: Steven Rostedt <srostedt@redhat.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/ftrace.h |  46 ++++++++++
 include/linux/sched.h  |   4 +
 kernel/trace/ftrace.c  | 227 +++++++++++++++++++++++++++++++++++++++++++++++++
 kernel/trace/trace.c   |   8 ++
 kernel/trace/trace.h   |  30 ++++++-
 5 files changed, 314 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h
index 469ceb3e85ba..b295d3106bfe 100644
--- a/include/linux/ftrace.h
+++ b/include/linux/ftrace.h
@@ -7,6 +7,7 @@
 #include <linux/init.h>
 #include <linux/types.h>
 #include <linux/kallsyms.h>
+#include <linux/bitops.h>
 
 #ifdef CONFIG_FUNCTION_TRACER
 
@@ -391,4 +392,49 @@ static inline void ftrace_graph_init_task(struct task_struct *t) { }
 static inline void ftrace_graph_exit_task(struct task_struct *t) { }
 #endif
 
+#ifdef CONFIG_TRACING
+#include <linux/sched.h>
+
+/* flags for current->trace */
+enum {
+	TSK_TRACE_FL_TRACE_BIT	= 0,
+	TSK_TRACE_FL_GRAPH_BIT	= 1,
+};
+enum {
+	TSK_TRACE_FL_TRACE	= 1 << TSK_TRACE_FL_TRACE_BIT,
+	TSK_TRACE_FL_GRAPH	= 1 << TSK_TRACE_FL_GRAPH_BIT,
+};
+
+static inline void set_tsk_trace_trace(struct task_struct *tsk)
+{
+	set_bit(TSK_TRACE_FL_TRACE_BIT, &tsk->trace);
+}
+
+static inline void clear_tsk_trace_trace(struct task_struct *tsk)
+{
+	clear_bit(TSK_TRACE_FL_TRACE_BIT, &tsk->trace);
+}
+
+static inline int test_tsk_trace_trace(struct task_struct *tsk)
+{
+	return tsk->trace & TSK_TRACE_FL_TRACE;
+}
+
+static inline void set_tsk_trace_graph(struct task_struct *tsk)
+{
+	set_bit(TSK_TRACE_FL_GRAPH_BIT, &tsk->trace);
+}
+
+static inline void clear_tsk_trace_graph(struct task_struct *tsk)
+{
+	clear_bit(TSK_TRACE_FL_GRAPH_BIT, &tsk->trace);
+}
+
+static inline int test_tsk_trace_graph(struct task_struct *tsk)
+{
+	return tsk->trace & TSK_TRACE_FL_GRAPH;
+}
+
+#endif /* CONFIG_TRACING */
+
 #endif /* _LINUX_FTRACE_H */
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 2d0a93c31228..4c152e0acc9e 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1380,6 +1380,10 @@ struct task_struct {
 	 */
 	atomic_t trace_overrun;
 #endif
+#ifdef CONFIG_TRACING
+	/* state flags for use by tracers */
+	unsigned long trace;
+#endif
 };
 
 /*
diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
index 65b9e863056b..b17a30350f06 100644
--- a/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c
@@ -1320,6 +1320,224 @@ static struct file_operations ftrace_notrace_fops = {
 	.release = ftrace_notrace_release,
 };
 
+#ifdef CONFIG_FUNCTION_GRAPH_TRACER
+
+static DEFINE_MUTEX(graph_lock);
+
+int ftrace_graph_count;
+unsigned long ftrace_graph_funcs[FTRACE_GRAPH_MAX_FUNCS] __read_mostly;
+
+static void *
+g_next(struct seq_file *m, void *v, loff_t *pos)
+{
+	unsigned long *array = m->private;
+	int index = *pos;
+
+	(*pos)++;
+
+	if (index >= ftrace_graph_count)
+		return NULL;
+
+	return &array[index];
+}
+
+static void *g_start(struct seq_file *m, loff_t *pos)
+{
+	void *p = NULL;
+
+	mutex_lock(&graph_lock);
+
+	p = g_next(m, p, pos);
+
+	return p;
+}
+
+static void g_stop(struct seq_file *m, void *p)
+{
+	mutex_unlock(&graph_lock);
+}
+
+static int g_show(struct seq_file *m, void *v)
+{
+	unsigned long *ptr = v;
+	char str[KSYM_SYMBOL_LEN];
+
+	if (!ptr)
+		return 0;
+
+	kallsyms_lookup(*ptr, NULL, NULL, NULL, str);
+
+	seq_printf(m, "%s\n", str);
+
+	return 0;
+}
+
+static struct seq_operations ftrace_graph_seq_ops = {
+	.start = g_start,
+	.next = g_next,
+	.stop = g_stop,
+	.show = g_show,
+};
+
+static int
+ftrace_graph_open(struct inode *inode, struct file *file)
+{
+	int ret = 0;
+
+	if (unlikely(ftrace_disabled))
+		return -ENODEV;
+
+	mutex_lock(&graph_lock);
+	if ((file->f_mode & FMODE_WRITE) &&
+	    !(file->f_flags & O_APPEND)) {
+		ftrace_graph_count = 0;
+		memset(ftrace_graph_funcs, 0, sizeof(ftrace_graph_funcs));
+	}
+
+	if (file->f_mode & FMODE_READ) {
+		ret = seq_open(file, &ftrace_graph_seq_ops);
+		if (!ret) {
+			struct seq_file *m = file->private_data;
+			m->private = ftrace_graph_funcs;
+		}
+	} else
+		file->private_data = ftrace_graph_funcs;
+	mutex_unlock(&graph_lock);
+
+	return ret;
+}
+
+static ssize_t
+ftrace_graph_read(struct file *file, char __user *ubuf,
+		       size_t cnt, loff_t *ppos)
+{
+	if (file->f_mode & FMODE_READ)
+		return seq_read(file, ubuf, cnt, ppos);
+	else
+		return -EPERM;
+}
+
+static int
+ftrace_set_func(unsigned long *array, int idx, char *buffer)
+{
+	char str[KSYM_SYMBOL_LEN];
+	struct dyn_ftrace *rec;
+	struct ftrace_page *pg;
+	int found = 0;
+	int i;
+
+	if (ftrace_disabled)
+		return -ENODEV;
+
+	/* should not be called from interrupt context */
+	spin_lock(&ftrace_lock);
+
+	for (pg = ftrace_pages_start; pg; pg = pg->next) {
+		for (i = 0; i < pg->index; i++) {
+			rec = &pg->records[i];
+
+			if (rec->flags & (FTRACE_FL_FAILED | FTRACE_FL_FREE))
+				continue;
+
+			kallsyms_lookup(rec->ip, NULL, NULL, NULL, str);
+			if (strcmp(str, buffer) == 0) {
+				found = 1;
+				array[idx] = rec->ip;
+				break;
+			}
+		}
+	}
+	spin_unlock(&ftrace_lock);
+
+	return found ? 0 : -EINVAL;
+}
+
+static ssize_t
+ftrace_graph_write(struct file *file, const char __user *ubuf,
+		   size_t cnt, loff_t *ppos)
+{
+	unsigned char buffer[FTRACE_BUFF_MAX+1];
+	unsigned long *array;
+	size_t read = 0;
+	ssize_t ret;
+	int index = 0;
+	char ch;
+
+	if (!cnt || cnt < 0)
+		return 0;
+
+	mutex_lock(&graph_lock);
+
+	if (ftrace_graph_count >= FTRACE_GRAPH_MAX_FUNCS) {
+		ret = -EBUSY;
+		goto out;
+	}
+
+	if (file->f_mode & FMODE_READ) {
+		struct seq_file *m = file->private_data;
+		array = m->private;
+	} else
+		array = file->private_data;
+
+	ret = get_user(ch, ubuf++);
+	if (ret)
+		goto out;
+	read++;
+	cnt--;
+
+	/* skip white space */
+	while (cnt && isspace(ch)) {
+		ret = get_user(ch, ubuf++);
+		if (ret)
+			goto out;
+		read++;
+		cnt--;
+	}
+
+	if (isspace(ch)) {
+		*ppos += read;
+		ret = read;
+		goto out;
+	}
+
+	while (cnt && !isspace(ch)) {
+		if (index < FTRACE_BUFF_MAX)
+			buffer[index++] = ch;
+		else {
+			ret = -EINVAL;
+			goto out;
+		}
+		ret = get_user(ch, ubuf++);
+		if (ret)
+			goto out;
+		read++;
+		cnt--;
+	}
+	buffer[index] = 0;
+
+	/* we allow only one at a time */
+	ret = ftrace_set_func(array, ftrace_graph_count, buffer);
+	if (ret)
+		goto out;
+
+	ftrace_graph_count++;
+
+	file->f_pos += read;
+
+	ret = read;
+ out:
+	mutex_unlock(&graph_lock);
+
+	return ret;
+}
+
+static const struct file_operations ftrace_graph_fops = {
+	.open = ftrace_graph_open,
+	.read = ftrace_graph_read,
+	.write = ftrace_graph_write,
+};
+#endif /* CONFIG_FUNCTION_GRAPH_TRACER */
+
 static __init int ftrace_init_dyn_debugfs(struct dentry *d_tracer)
 {
 	struct dentry *entry;
@@ -1347,6 +1565,15 @@ static __init int ftrace_init_dyn_debugfs(struct dentry *d_tracer)
 		pr_warning("Could not create debugfs "
 			   "'set_ftrace_notrace' entry\n");
 
+#ifdef CONFIG_FUNCTION_GRAPH_TRACER
+	entry = debugfs_create_file("set_graph_function", 0444, d_tracer,
+				    NULL,
+				    &ftrace_graph_fops);
+	if (!entry)
+		pr_warning("Could not create debugfs "
+			   "'set_graph_function' entry\n");
+#endif /* CONFIG_FUNCTION_GRAPH_TRACER */
+
 	return 0;
 }
 
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index 8b6409a62b54..710b39acd81b 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -1209,6 +1209,9 @@ int trace_graph_entry(struct ftrace_graph_ent *trace)
 	int cpu;
 	int pc;
 
+	if (!ftrace_graph_addr(trace->func))
+		return 0;
+
 	local_irq_save(flags);
 	cpu = raw_smp_processor_id();
 	data = tr->data[cpu];
@@ -1217,6 +1220,9 @@ int trace_graph_entry(struct ftrace_graph_ent *trace)
 		pc = preempt_count();
 		__trace_graph_entry(tr, data, trace, flags, pc);
 	}
+	/* Only do the atomic if it is not already set */
+	if (!test_tsk_trace_graph(current))
+		set_tsk_trace_graph(current);
 	atomic_dec(&data->disabled);
 	local_irq_restore(flags);
 
@@ -1240,6 +1246,8 @@ void trace_graph_return(struct ftrace_graph_ret *trace)
 		pc = preempt_count();
 		__trace_graph_return(tr, data, trace, flags, pc);
 	}
+	if (!trace->depth)
+		clear_tsk_trace_graph(current);
 	atomic_dec(&data->disabled);
 	local_irq_restore(flags);
 }
diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h
index 0565ae9a2210..41f026bfc9ed 100644
--- a/kernel/trace/trace.h
+++ b/kernel/trace/trace.h
@@ -505,13 +505,41 @@ extern unsigned long trace_flags;
 /* Standard output formatting function used for function return traces */
 #ifdef CONFIG_FUNCTION_GRAPH_TRACER
 extern enum print_line_t print_graph_function(struct trace_iterator *iter);
+
+#ifdef CONFIG_DYNAMIC_FTRACE
+/* TODO: make this variable */
+#define FTRACE_GRAPH_MAX_FUNCS		32
+extern int ftrace_graph_count;
+extern unsigned long ftrace_graph_funcs[FTRACE_GRAPH_MAX_FUNCS];
+
+static inline int ftrace_graph_addr(unsigned long addr)
+{
+	int i;
+
+	if (!ftrace_graph_count || test_tsk_trace_graph(current))
+		return 1;
+
+	for (i = 0; i < ftrace_graph_count; i++) {
+		if (addr == ftrace_graph_funcs[i])
+			return 1;
+	}
+
+	return 0;
+}
 #else
+static inline int ftrace_trace_addr(unsigned long addr)
+{
+	return 1
+}
+#endif /* CONFIG_DYNAMIC_FTRACE */
+
+#else /* CONFIG_FUNCTION_GRAPH_TRACER */
 static inline enum print_line_t
 print_graph_function(struct trace_iterator *iter)
 {
 	return TRACE_TYPE_UNHANDLED;
 }
-#endif
+#endif /* CONFIG_FUNCTION_GRAPH_TRACER */
 
 /*
  * trace_iterator_flags is an enumeration that defines bit
-- 
cgit v1.2.3


From 5ef6476190d24419a9a537baa0b5641845136989 Mon Sep 17 00:00:00 2001
From: Steven Rostedt <srostedt@redhat.com>
Date: Thu, 4 Dec 2008 00:26:39 -0500
Subject: pid: fix the do_each_pid_task() macro

Impact: macro side-effects fix

This patch adds parenthesis around 'pid' in the do_each_pid_task
macro to allow callers to pass in more complex parameters.

e.g.  do_each_pid_task(*pid, type, task)

Signed-off-by: Steven Rostedt <srostedt@redhat.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/pid.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/pid.h b/include/linux/pid.h
index d7e98ff8021e..bb206c56d1f0 100644
--- a/include/linux/pid.h
+++ b/include/linux/pid.h
@@ -147,9 +147,9 @@ pid_t pid_vnr(struct pid *pid);
 #define do_each_pid_task(pid, type, task)				\
 	do {								\
 		struct hlist_node *pos___;				\
-		if (pid != NULL)					\
+		if ((pid) != NULL)					\
 			hlist_for_each_entry_rcu((task), pos___,	\
-				&pid->tasks[type], pids[type].node) {
+				&(pid)->tasks[type], pids[type].node) {
 
 			/*
 			 * Both old and new leaders may be attached to
-- 
cgit v1.2.3


From 00ef9f7348dfd2fc223ec42aceb30836e86b367f Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <a.p.zijlstra@chello.nl>
Date: Thu, 4 Dec 2008 09:00:17 +0100
Subject: lockdep: change a held lock's class

Impact: introduce new lockdep API

Allow to change a held lock's class. Basically the same as the existing
code to change a subclass therefore reuse all that.

The XFS code will be able to use this to annotate their inode locking.

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/lockdep.h | 12 ++++++++++--
 kernel/lockdep.c        | 24 +++++++++---------------
 2 files changed, 19 insertions(+), 17 deletions(-)

(limited to 'include')

diff --git a/include/linux/lockdep.h b/include/linux/lockdep.h
index 8956daf64abd..37a0361f4685 100644
--- a/include/linux/lockdep.h
+++ b/include/linux/lockdep.h
@@ -314,8 +314,15 @@ extern void lock_acquire(struct lockdep_map *lock, unsigned int subclass,
 extern void lock_release(struct lockdep_map *lock, int nested,
 			 unsigned long ip);
 
-extern void lock_set_subclass(struct lockdep_map *lock, unsigned int subclass,
-			      unsigned long ip);
+extern void lock_set_class(struct lockdep_map *lock, const char *name,
+			   struct lock_class_key *key, unsigned int subclass,
+			   unsigned long ip);
+
+static inline void lock_set_subclass(struct lockdep_map *lock,
+		unsigned int subclass, unsigned long ip)
+{
+	lock_set_class(lock, lock->name, lock->key, subclass, ip);
+}
 
 # define INIT_LOCKDEP				.lockdep_recursion = 0,
 
@@ -333,6 +340,7 @@ static inline void lockdep_on(void)
 
 # define lock_acquire(l, s, t, r, c, n, i)	do { } while (0)
 # define lock_release(l, n, i)			do { } while (0)
+# define lock_set_class(l, n, k, s, i)		do { } while (0)
 # define lock_set_subclass(l, s, i)		do { } while (0)
 # define lockdep_init()				do { } while (0)
 # define lockdep_info()				do { } while (0)
diff --git a/kernel/lockdep.c b/kernel/lockdep.c
index 90f3fb64dbce..4fa6eeb4e8a7 100644
--- a/kernel/lockdep.c
+++ b/kernel/lockdep.c
@@ -291,14 +291,12 @@ void lockdep_off(void)
 {
 	current->lockdep_recursion++;
 }
-
 EXPORT_SYMBOL(lockdep_off);
 
 void lockdep_on(void)
 {
 	current->lockdep_recursion--;
 }
-
 EXPORT_SYMBOL(lockdep_on);
 
 /*
@@ -2513,7 +2511,6 @@ void lockdep_init_map(struct lockdep_map *lock, const char *name,
 	if (subclass)
 		register_lock_class(lock, subclass, 1);
 }
-
 EXPORT_SYMBOL_GPL(lockdep_init_map);
 
 /*
@@ -2694,8 +2691,9 @@ static int check_unlock(struct task_struct *curr, struct lockdep_map *lock,
 }
 
 static int
-__lock_set_subclass(struct lockdep_map *lock,
-		    unsigned int subclass, unsigned long ip)
+__lock_set_class(struct lockdep_map *lock, const char *name,
+		 struct lock_class_key *key, unsigned int subclass,
+		 unsigned long ip)
 {
 	struct task_struct *curr = current;
 	struct held_lock *hlock, *prev_hlock;
@@ -2722,6 +2720,7 @@ __lock_set_subclass(struct lockdep_map *lock,
 	return print_unlock_inbalance_bug(curr, lock, ip);
 
 found_it:
+	lockdep_init_map(lock, name, key, 0);
 	class = register_lock_class(lock, subclass, 0);
 	hlock->class_idx = class - lock_classes + 1;
 
@@ -2906,9 +2905,9 @@ static void check_flags(unsigned long flags)
 #endif
 }
 
-void
-lock_set_subclass(struct lockdep_map *lock,
-		  unsigned int subclass, unsigned long ip)
+void lock_set_class(struct lockdep_map *lock, const char *name,
+		    struct lock_class_key *key, unsigned int subclass,
+		    unsigned long ip)
 {
 	unsigned long flags;
 
@@ -2918,13 +2917,12 @@ lock_set_subclass(struct lockdep_map *lock,
 	raw_local_irq_save(flags);
 	current->lockdep_recursion = 1;
 	check_flags(flags);
-	if (__lock_set_subclass(lock, subclass, ip))
+	if (__lock_set_class(lock, name, key, subclass, ip))
 		check_chain_key(current);
 	current->lockdep_recursion = 0;
 	raw_local_irq_restore(flags);
 }
-
-EXPORT_SYMBOL_GPL(lock_set_subclass);
+EXPORT_SYMBOL_GPL(lock_set_class);
 
 /*
  * We are not always called with irqs disabled - do that here,
@@ -2948,7 +2946,6 @@ void lock_acquire(struct lockdep_map *lock, unsigned int subclass,
 	current->lockdep_recursion = 0;
 	raw_local_irq_restore(flags);
 }
-
 EXPORT_SYMBOL_GPL(lock_acquire);
 
 void lock_release(struct lockdep_map *lock, int nested,
@@ -2966,7 +2963,6 @@ void lock_release(struct lockdep_map *lock, int nested,
 	current->lockdep_recursion = 0;
 	raw_local_irq_restore(flags);
 }
-
 EXPORT_SYMBOL_GPL(lock_release);
 
 #ifdef CONFIG_LOCK_STAT
@@ -3451,7 +3447,6 @@ retry:
 	if (unlock)
 		read_unlock(&tasklist_lock);
 }
-
 EXPORT_SYMBOL_GPL(debug_show_all_locks);
 
 /*
@@ -3472,7 +3467,6 @@ void debug_show_held_locks(struct task_struct *task)
 {
 		__debug_show_held_locks(task);
 }
-
 EXPORT_SYMBOL_GPL(debug_show_held_locks);
 
 void lockdep_sys_exit(void)
-- 
cgit v1.2.3


From 32c8dabc97d436582298ebd0e33af041c69f5a4b Mon Sep 17 00:00:00 2001
From: Mark Brown <broonie@opensource.wolfsonmicro.com>
Date: Wed, 3 Dec 2008 19:41:13 +0000
Subject: ASoC: Remove obsolete declaration of struct snd_soc_clock_info

The struct is never defined.

Signed-off-by: Mark Brown <broonie@opensource.wolfsonmicro.com>
---
 include/sound/soc.h | 1 -
 1 file changed, 1 deletion(-)

(limited to 'include')

diff --git a/include/sound/soc.h b/include/sound/soc.h
index 8ec63c02dc10..79d855d2bddd 100644
--- a/include/sound/soc.h
+++ b/include/sound/soc.h
@@ -152,7 +152,6 @@ struct snd_soc_dai;
 struct snd_soc_codec;
 struct soc_enum;
 struct snd_soc_ac97_ops;
-struct snd_soc_clock_info;
 
 typedef int (*hw_write_t)(void *,const char* ,int);
 typedef int (*hw_read_t)(void *,char* ,int);
-- 
cgit v1.2.3


From c9bb6003dd096ad190e1594a7d835ae1c39fae8f Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@davemloft.net>
Date: Wed, 29 Oct 2008 23:46:09 -0700
Subject: of: Fix comment, sparc no longer uses of_device objects on special
 busses.

It only uses of_platform_bus_type.

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/of_platform.h | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/of_platform.h b/include/linux/of_platform.h
index a8efcfeea732..3d327b67d7e2 100644
--- a/include/linux/of_platform.h
+++ b/include/linux/of_platform.h
@@ -26,8 +26,7 @@ extern struct bus_type of_platform_bus_type;
 
 /*
  * An of_platform_driver driver is attached to a basic of_device on
- * the "platform bus" (of_platform_bus_type) (or ISA, EBUS and SBUS
- * busses on sparc).
+ * the "platform bus" (of_platform_bus_type).
  */
 struct of_platform_driver
 {
-- 
cgit v1.2.3


From 21a8c466f99063eeb8567318b4e305eda9015408 Mon Sep 17 00:00:00 2001
From: Frederic Weisbecker <fweisbec@gmail.com>
Date: Thu, 4 Dec 2008 23:51:23 +0100
Subject: tracing/ftrace: provide the macro task_curr_ret_stack()

Impact: cleanup

As suggested by Steven Rostedt, this patch provide a new macro
task_curr_ret_stack() to move the cpp conditionnal CONFIG into
the linux/ftrace.h headers.

Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>
Acked-by: Steven Rostedt <rostedt@goodmis.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/ftrace.h | 11 +++++++++++
 kernel/trace/trace.c   |  8 +-------
 2 files changed, 12 insertions(+), 7 deletions(-)

(limited to 'include')

diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h
index b295d3106bfe..b9b4d0a22d10 100644
--- a/include/linux/ftrace.h
+++ b/include/linux/ftrace.h
@@ -8,6 +8,7 @@
 #include <linux/types.h>
 #include <linux/kallsyms.h>
 #include <linux/bitops.h>
+#include <linux/sched.h>
 
 #ifdef CONFIG_FUNCTION_TRACER
 
@@ -387,9 +388,19 @@ extern void unregister_ftrace_graph(void);
 
 extern void ftrace_graph_init_task(struct task_struct *t);
 extern void ftrace_graph_exit_task(struct task_struct *t);
+
+static inline int task_curr_ret_stack(struct task_struct *t)
+{
+	return t->curr_ret_stack;
+}
 #else
 static inline void ftrace_graph_init_task(struct task_struct *t) { }
 static inline void ftrace_graph_exit_task(struct task_struct *t) { }
+
+static inline int task_curr_ret_stack(struct task_struct *tsk)
+{
+	return -1;
+}
 #endif
 
 #ifdef CONFIG_TRACING
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index 5dca6ef1fbeb..7a93c663e52a 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -3657,13 +3657,7 @@ int __ftrace_printk(unsigned long ip, const char *fmt, ...)
 		return 0;
 
 	va_start(ap, fmt);
-
-#ifdef CONFIG_FUNCTION_GRAPH_TRACER
-	ret = trace_vprintk(ip, current->curr_ret_stack, fmt, ap);
-#else
-	ret = trace_vprintk(ip, -1, fmt, ap);
-#endif
-
+	ret = trace_vprintk(ip, task_curr_ret_stack(current), fmt, ap);
 	va_end(ap);
 	return ret;
 }
-- 
cgit v1.2.3


From 72bdcf34380917260da41e3c49e10edee04bc5cd Mon Sep 17 00:00:00 2001
From: Jouni Malinen <j@w1.fi>
Date: Wed, 26 Nov 2008 16:15:24 +0200
Subject: nl80211: Add frequency configuration (including HT40)

This patch adds new NL80211_CMD_SET_WIPHY attributes
NL80211_ATTR_WIPHY_FREQ and NL80211_ATTR_WIPHY_SEC_CHAN_OFFSET to allow
userspace to set the operating channel (e.g., hostapd for AP mode).

Signed-off-by: Jouni Malinen <jouni.malinen@atheros.com>
Acked-by: Johannes Berg <johannes@sipsolutions.net>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 include/linux/nl80211.h    | 23 +++++++++++++++++--
 include/net/cfg80211.h     |  9 ++++++++
 include/net/mac80211.h     |  3 +++
 net/mac80211/cfg.c         | 13 +++++++++++
 net/mac80211/ieee80211_i.h |  1 +
 net/mac80211/main.c        | 30 ++++++++++++++++++++----
 net/mac80211/util.c        |  1 +
 net/wireless/nl80211.c     | 57 ++++++++++++++++++++++++++++++++++++++++++++++
 8 files changed, 131 insertions(+), 6 deletions(-)

(limited to 'include')

diff --git a/include/linux/nl80211.h b/include/linux/nl80211.h
index e08c8bcfb78d..92f79d2bdd8c 100644
--- a/include/linux/nl80211.h
+++ b/include/linux/nl80211.h
@@ -26,8 +26,9 @@
  * @NL80211_CMD_GET_WIPHY: request information about a wiphy or dump request
  *	to get a list of all present wiphys.
  * @NL80211_CMD_SET_WIPHY: set wiphy parameters, needs %NL80211_ATTR_WIPHY or
- *	%NL80211_ATTR_IFINDEX; can be used to set %NL80211_ATTR_WIPHY_NAME
- *	and/or %NL80211_ATTR_WIPHY_TXQ_PARAMS.
+ *	%NL80211_ATTR_IFINDEX; can be used to set %NL80211_ATTR_WIPHY_NAME,
+ *	%NL80211_ATTR_WIPHY_TXQ_PARAMS, %NL80211_ATTR_WIPHY_FREQ, and/or
+ *	%NL80211_ATTR_WIPHY_SEC_CHAN_OFFSET.
  * @NL80211_CMD_NEW_WIPHY: Newly created wiphy, response to get request
  *	or rename notification. Has attributes %NL80211_ATTR_WIPHY and
  *	%NL80211_ATTR_WIPHY_NAME.
@@ -180,6 +181,14 @@ enum nl80211_commands {
  *	/sys/class/ieee80211/<phyname>/index
  * @NL80211_ATTR_WIPHY_NAME: wiphy name (used for renaming)
  * @NL80211_ATTR_WIPHY_TXQ_PARAMS: a nested array of TX queue parameters
+ * @NL80211_ATTR_WIPHY_FREQ: frequency of the selected channel in MHz
+ * @NL80211_ATTR_WIPHY_SEC_CHAN_OFFSET: included with NL80211_ATTR_WIPHY_FREQ
+ *	if HT20 or HT40 are allowed (i.e., 802.11n disabled if not included):
+ *	NL80211_SEC_CHAN_NO_HT = HT not allowed (i.e., same as not including
+ *		this attribute)
+ *	NL80211_SEC_CHAN_DISABLED = HT20 only
+ *	NL80211_SEC_CHAN_BELOW = secondary channel is below the primary channel
+ *	NL80211_SEC_CHAN_ABOVE = secondary channel is above the primary channel
  *
  * @NL80211_ATTR_IFINDEX: network interface index of the device to operate on
  * @NL80211_ATTR_IFNAME: network interface name
@@ -315,6 +324,8 @@ enum nl80211_attrs {
 	NL80211_ATTR_BSS_BASIC_RATES,
 
 	NL80211_ATTR_WIPHY_TXQ_PARAMS,
+	NL80211_ATTR_WIPHY_FREQ,
+	NL80211_ATTR_WIPHY_SEC_CHAN_OFFSET,
 
 	/* add attributes here, update the policy in nl80211.c */
 
@@ -329,6 +340,8 @@ enum nl80211_attrs {
 #define NL80211_ATTR_HT_CAPABILITY NL80211_ATTR_HT_CAPABILITY
 #define NL80211_ATTR_BSS_BASIC_RATES NL80211_ATTR_BSS_BASIC_RATES
 #define NL80211_ATTR_WIPHY_TXQ_PARAMS NL80211_ATTR_WIPHY_TXQ_PARAMS
+#define NL80211_ATTR_WIPHY_FREQ NL80211_ATTR_WIPHY_FREQ
+#define NL80211_ATTR_WIPHY_SEC_CHAN_OFFSET NL80211_ATTR_WIPHY_SEC_CHAN_OFFSET
 
 #define NL80211_MAX_SUPP_RATES			32
 #define NL80211_MAX_SUPP_REG_RULES		32
@@ -742,4 +755,10 @@ enum nl80211_txq_q {
 	NL80211_TXQ_Q_BK
 };
 
+enum nl80211_sec_chan_offset {
+	NL80211_SEC_CHAN_NO_HT /* No HT */,
+	NL80211_SEC_CHAN_DISABLED /* HT20 only */,
+	NL80211_SEC_CHAN_BELOW /* HT40- */,
+	NL80211_SEC_CHAN_ABOVE /* HT40+ */
+};
 #endif /* __LINUX_NL80211_H */
diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h
index 1d57835d73f2..53b06f6c62ca 100644
--- a/include/net/cfg80211.h
+++ b/include/net/cfg80211.h
@@ -392,6 +392,9 @@ struct ieee80211_txq_params {
 /* from net/wireless.h */
 struct wiphy;
 
+/* from net/ieee80211.h */
+struct ieee80211_channel;
+
 /**
  * struct cfg80211_ops - backend description for wireless configuration
  *
@@ -450,6 +453,8 @@ struct wiphy;
  * @change_bss: Modify parameters for a given BSS.
  *
  * @set_txq_params: Set TX queue parameters
+ *
+ * @set_channel: Set channel
  */
 struct cfg80211_ops {
 	int	(*add_virtual_intf)(struct wiphy *wiphy, char *name,
@@ -513,6 +518,10 @@ struct cfg80211_ops {
 
 	int	(*set_txq_params)(struct wiphy *wiphy,
 				  struct ieee80211_txq_params *params);
+
+	int	(*set_channel)(struct wiphy *wiphy,
+			       struct ieee80211_channel *chan,
+			       enum nl80211_sec_chan_offset);
 };
 
 #endif /* __NET_CFG80211_H */
diff --git a/include/net/mac80211.h b/include/net/mac80211.h
index 6a1d4ea18186..6e823cc6a4b1 100644
--- a/include/net/mac80211.h
+++ b/include/net/mac80211.h
@@ -507,6 +507,9 @@ static inline int __deprecated __IEEE80211_CONF_SHORT_SLOT_TIME(void)
 
 struct ieee80211_ht_conf {
 	bool enabled;
+	int sec_chan_offset; /* 0 = HT40 disabled; -1 = HT40 enabled, secondary
+			      * channel below primary; 1 = HT40 enabled,
+			      * secondary channel above primary */
 };
 
 /**
diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c
index 16423f94801b..7a7a6c176dc5 100644
--- a/net/mac80211/cfg.c
+++ b/net/mac80211/cfg.c
@@ -1095,6 +1095,18 @@ static int ieee80211_set_txq_params(struct wiphy *wiphy,
 	return 0;
 }
 
+static int ieee80211_set_channel(struct wiphy *wiphy,
+				 struct ieee80211_channel *chan,
+				 enum nl80211_sec_chan_offset sec_chan_offset)
+{
+	struct ieee80211_local *local = wiphy_priv(wiphy);
+
+	local->oper_channel = chan;
+	local->oper_sec_chan_offset = sec_chan_offset;
+
+	return ieee80211_hw_config(local, IEEE80211_CONF_CHANGE_CHANNEL);
+}
+
 struct cfg80211_ops mac80211_config_ops = {
 	.add_virtual_intf = ieee80211_add_iface,
 	.del_virtual_intf = ieee80211_del_iface,
@@ -1122,4 +1134,5 @@ struct cfg80211_ops mac80211_config_ops = {
 #endif
 	.change_bss = ieee80211_change_bss,
 	.set_txq_params = ieee80211_set_txq_params,
+	.set_channel = ieee80211_set_channel,
 };
diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h
index 155a20410017..527205f8c1a1 100644
--- a/net/mac80211/ieee80211_i.h
+++ b/net/mac80211/ieee80211_i.h
@@ -626,6 +626,7 @@ struct ieee80211_local {
 	struct delayed_work scan_work;
 	struct ieee80211_sub_if_data *scan_sdata;
 	struct ieee80211_channel *oper_channel, *scan_channel;
+	enum nl80211_sec_chan_offset oper_sec_chan_offset;
 	u8 scan_ssid[IEEE80211_MAX_SSID_LEN];
 	size_t scan_ssid_len;
 	struct list_head bss_list;
diff --git a/net/mac80211/main.c b/net/mac80211/main.c
index cec9b6d3e1ce..29c3ecf7e914 100644
--- a/net/mac80211/main.c
+++ b/net/mac80211/main.c
@@ -195,20 +195,42 @@ int ieee80211_hw_config(struct ieee80211_local *local, u32 changed)
 	struct ieee80211_channel *chan;
 	int ret = 0;
 	int power;
+	enum nl80211_sec_chan_offset sec_chan_offset;
 
 	might_sleep();
 
-	if (local->sw_scanning)
+	if (local->sw_scanning) {
 		chan = local->scan_channel;
-	else
+		sec_chan_offset = NL80211_SEC_CHAN_NO_HT;
+	} else {
 		chan = local->oper_channel;
+		sec_chan_offset = local->oper_sec_chan_offset;
+	}
 
-	if (chan != local->hw.conf.channel) {
+	if (chan != local->hw.conf.channel ||
+	    sec_chan_offset != local->hw.conf.ht.sec_chan_offset) {
 		local->hw.conf.channel = chan;
+		switch (sec_chan_offset) {
+		case NL80211_SEC_CHAN_NO_HT:
+			local->hw.conf.ht.enabled = false;
+			local->hw.conf.ht.sec_chan_offset = 0;
+			break;
+		case NL80211_SEC_CHAN_DISABLED:
+			local->hw.conf.ht.enabled = true;
+			local->hw.conf.ht.sec_chan_offset = 0;
+			break;
+		case NL80211_SEC_CHAN_BELOW:
+			local->hw.conf.ht.enabled = true;
+			local->hw.conf.ht.sec_chan_offset = -1;
+			break;
+		case NL80211_SEC_CHAN_ABOVE:
+			local->hw.conf.ht.enabled = true;
+			local->hw.conf.ht.sec_chan_offset = 1;
+			break;
+		}
 		changed |= IEEE80211_CONF_CHANGE_CHANNEL;
 	}
 
-
 	if (!local->hw.conf.power_level)
 		power = chan->max_power;
 	else
diff --git a/net/mac80211/util.c b/net/mac80211/util.c
index 0f841317c7e9..505d68f344ce 100644
--- a/net/mac80211/util.c
+++ b/net/mac80211/util.c
@@ -641,6 +641,7 @@ int ieee80211_set_freq(struct ieee80211_sub_if_data *sdata, int freqMHz)
 		    chan->flags & IEEE80211_CHAN_NO_IBSS)
 			return ret;
 		local->oper_channel = chan;
+		local->oper_sec_chan_offset = NL80211_SEC_CHAN_NO_HT;
 
 		if (local->sw_scanning || local->hw_scanning)
 			ret = 0;
diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c
index c9141e3df9ba..9caee6022e3f 100644
--- a/net/wireless/nl80211.c
+++ b/net/wireless/nl80211.c
@@ -59,6 +59,8 @@ static struct nla_policy nl80211_policy[NL80211_ATTR_MAX+1] __read_mostly = {
 	[NL80211_ATTR_WIPHY_NAME] = { .type = NLA_NUL_STRING,
 				      .len = BUS_ID_SIZE-1 },
 	[NL80211_ATTR_WIPHY_TXQ_PARAMS] = { .type = NLA_NESTED },
+	[NL80211_ATTR_WIPHY_FREQ] = { .type = NLA_U32 },
+	[NL80211_ATTR_WIPHY_SEC_CHAN_OFFSET] = { .type = NLA_U32 },
 
 	[NL80211_ATTR_IFTYPE] = { .type = NLA_U32 },
 	[NL80211_ATTR_IFINDEX] = { .type = NLA_U32 },
@@ -359,6 +361,61 @@ static int nl80211_set_wiphy(struct sk_buff *skb, struct genl_info *info)
 		}
 	}
 
+	if (info->attrs[NL80211_ATTR_WIPHY_FREQ]) {
+		enum nl80211_sec_chan_offset sec_chan_offset =
+			NL80211_SEC_CHAN_NO_HT;
+		struct ieee80211_channel *chan;
+		u32 freq, sec_freq;
+
+		if (!rdev->ops->set_channel) {
+			result = -EOPNOTSUPP;
+			goto bad_res;
+		}
+
+		if (info->attrs[NL80211_ATTR_WIPHY_SEC_CHAN_OFFSET]) {
+			sec_chan_offset = nla_get_u32(
+				info->attrs[
+					NL80211_ATTR_WIPHY_SEC_CHAN_OFFSET]);
+			if (sec_chan_offset != NL80211_SEC_CHAN_NO_HT &&
+			    sec_chan_offset != NL80211_SEC_CHAN_DISABLED &&
+			    sec_chan_offset != NL80211_SEC_CHAN_BELOW &&
+			    sec_chan_offset != NL80211_SEC_CHAN_ABOVE) {
+				result = -EINVAL;
+				goto bad_res;
+			}
+		}
+
+		freq = nla_get_u32(info->attrs[NL80211_ATTR_WIPHY_FREQ]);
+		chan = ieee80211_get_channel(&rdev->wiphy, freq);
+		if (!chan || chan->flags & IEEE80211_CHAN_DISABLED) {
+			/* Primary channel not allowed */
+			result = -EINVAL;
+			goto bad_res;
+		}
+		if (sec_chan_offset == NL80211_SEC_CHAN_BELOW)
+			sec_freq = freq - 20;
+		else if (sec_chan_offset == NL80211_SEC_CHAN_ABOVE)
+			sec_freq = freq + 20;
+		else
+			sec_freq = 0;
+
+		if (sec_freq) {
+			struct ieee80211_channel *schan;
+			schan = ieee80211_get_channel(&rdev->wiphy, sec_freq);
+			if (!schan || schan->flags & IEEE80211_CHAN_DISABLED) {
+				/* Secondary channel not allowed */
+				result = -EINVAL;
+				goto bad_res;
+			}
+		}
+
+		result = rdev->ops->set_channel(&rdev->wiphy, chan,
+						sec_chan_offset);
+		if (result)
+			goto bad_res;
+	}
+
+
 bad_res:
 	cfg80211_put_dev(rdev);
 	return result;
-- 
cgit v1.2.3


From 10ec4f1d0851eb97cd53db66150835dd7f64829d Mon Sep 17 00:00:00 2001
From: "Luis R. Rodriguez" <lrodriguez@atheros.com>
Date: Wed, 26 Nov 2008 13:03:08 -0800
Subject: nl80211: relicense nl80211.h under the ISC

We have a few BSD/ISC licensed userspace applications which
include nl80211.h from the kernel. To avoid legal ambiguity
for usage of the header file in these projects we rather simply
relicense the header file under the ISC. We've received consent
from all contributors to it.

Signed-off-by: Luis R. Rodriguez <lrodriguez@atheros.com>
Acked-by: Johannes Berg <johannes@sipsolutions.net>
Acked-by: Michael Wu <flamingice@sourmilk.net>
Acked-by: Luis Carlos Cobo <luisca@cozybit.com>
Acked-by: Michael Buesch <mb@bu3sch.de>
Acked-by: Jouni Malinen <jouni.malinen@atheros.com>
Acked-by: Colin McCabe <colin@cozybit.com>
Acked-by: Javier Cardona <javier@cozybit.com>
Cc: johannes@sipsolutions.net
Cc: altape@eden.rutgers.edu
Cc: luisca@cozybit.com
Cc: mb@bu3sch.de
Cc: jouni.malinen@atheros.com
Cc: colin@cozybit.com
Cc: javier@cozybit.com
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 include/linux/nl80211.h | 21 ++++++++++++++++++++-
 1 file changed, 20 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/nl80211.h b/include/linux/nl80211.h
index 92f79d2bdd8c..04d4516f9c71 100644
--- a/include/linux/nl80211.h
+++ b/include/linux/nl80211.h
@@ -3,7 +3,26 @@
 /*
  * 802.11 netlink interface public header
  *
- * Copyright 2006, 2007 Johannes Berg <johannes@sipsolutions.net>
+ * Copyright 2006, 2007, 2008 Johannes Berg <johannes@sipsolutions.net>
+ * Copyright 2008 Michael Wu <flamingice@sourmilk.net>
+ * Copyright 2008 Luis Carlos Cobo <luisca@cozybit.com>
+ * Copyright 2008 Michael Buesch <mb@bu3sch.de>
+ * Copyright 2008 Luis R. Rodriguez <lrodriguez@atheros.com>
+ * Copyright 2008 Jouni Malinen <jouni.malinen@atheros.com>
+ * Copyright 2008 Colin McCabe <colin@cozybit.com>
+ *
+ * Permission to use, copy, modify, and/or distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ *
  */
 
 /**
-- 
cgit v1.2.3


From fee52678dbda2099a25243e79da98dc390e1939a Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes@sipsolutions.net>
Date: Wed, 26 Nov 2008 22:36:31 +0100
Subject: cfg80211: handle SIOCGIWNAME

This patch moves the SIOCGIWNAME handling from mac80211 to cfg80211.

Signed-off-by: Johannes Berg <johannes@sipsolutions.net>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 include/net/cfg80211.h     |  7 ++++++
 net/mac80211/wext.c        | 44 +---------------------------------
 net/wireless/Makefile      |  1 +
 net/wireless/wext-compat.c | 60 ++++++++++++++++++++++++++++++++++++++++++++++
 4 files changed, 69 insertions(+), 43 deletions(-)
 create mode 100644 net/wireless/wext-compat.c

(limited to 'include')

diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h
index 53b06f6c62ca..c97eac34ec01 100644
--- a/include/net/cfg80211.h
+++ b/include/net/cfg80211.h
@@ -5,6 +5,8 @@
 #include <linux/skbuff.h>
 #include <linux/nl80211.h>
 #include <net/genetlink.h>
+/* remove once we remove the wext stuff */
+#include <net/iw_handler.h>
 
 /*
  * 802.11 configuration in-kernel interface
@@ -524,4 +526,9 @@ struct cfg80211_ops {
 			       enum nl80211_sec_chan_offset);
 };
 
+/* temporary wext handlers */
+int cfg80211_wext_giwname(struct net_device *dev,
+			  struct iw_request_info *info,
+			  char *name, char *extra);
+
 #endif /* __NET_CFG80211_H */
diff --git a/net/mac80211/wext.c b/net/mac80211/wext.c
index b3ce28d35611..b9eee3c903de 100644
--- a/net/mac80211/wext.c
+++ b/net/mac80211/wext.c
@@ -135,48 +135,6 @@ static int ieee80211_ioctl_siwgenie(struct net_device *dev,
 	return -EOPNOTSUPP;
 }
 
-static int ieee80211_ioctl_giwname(struct net_device *dev,
-				   struct iw_request_info *info,
-				   char *name, char *extra)
-{
-	struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr);
-	struct ieee80211_supported_band *sband;
-	u8 is_ht = 0, is_a = 0, is_b = 0, is_g = 0;
-
-
-	sband = local->hw.wiphy->bands[IEEE80211_BAND_5GHZ];
-	if (sband) {
-		is_a = 1;
-		is_ht |= sband->ht_cap.ht_supported;
-	}
-
-	sband = local->hw.wiphy->bands[IEEE80211_BAND_2GHZ];
-	if (sband) {
-		int i;
-		/* Check for mandatory rates */
-		for (i = 0; i < sband->n_bitrates; i++) {
-			if (sband->bitrates[i].bitrate == 10)
-				is_b = 1;
-			if (sband->bitrates[i].bitrate == 60)
-				is_g = 1;
-		}
-		is_ht |= sband->ht_cap.ht_supported;
-	}
-
-	strcpy(name, "IEEE 802.11");
-	if (is_a)
-		strcat(name, "a");
-	if (is_b)
-		strcat(name, "b");
-	if (is_g)
-		strcat(name, "g");
-	if (is_ht)
-		strcat(name, "n");
-
-	return 0;
-}
-
-
 static int ieee80211_ioctl_giwrange(struct net_device *dev,
 				 struct iw_request_info *info,
 				 struct iw_point *data, char *extra)
@@ -1146,7 +1104,7 @@ static int ieee80211_ioctl_siwencodeext(struct net_device *dev,
 static const iw_handler ieee80211_handler[] =
 {
 	(iw_handler) NULL,				/* SIOCSIWCOMMIT */
-	(iw_handler) ieee80211_ioctl_giwname,		/* SIOCGIWNAME */
+	(iw_handler) cfg80211_wext_giwname,		/* SIOCGIWNAME */
 	(iw_handler) NULL,				/* SIOCSIWNWID */
 	(iw_handler) NULL,				/* SIOCGIWNWID */
 	(iw_handler) ieee80211_ioctl_siwfreq,		/* SIOCSIWFREQ */
diff --git a/net/wireless/Makefile b/net/wireless/Makefile
index cc547edb111f..9bc412c83430 100644
--- a/net/wireless/Makefile
+++ b/net/wireless/Makefile
@@ -6,4 +6,5 @@ obj-$(CONFIG_LIB80211_CRYPT_CCMP) += lib80211_crypt_ccmp.o
 obj-$(CONFIG_LIB80211_CRYPT_TKIP) += lib80211_crypt_tkip.o
 
 cfg80211-y += core.o sysfs.o radiotap.o util.o reg.o
+cfg80211-$(CONFIG_WIRELESS_EXT) += wext-compat.o
 cfg80211-$(CONFIG_NL80211) += nl80211.o
diff --git a/net/wireless/wext-compat.c b/net/wireless/wext-compat.c
new file mode 100644
index 000000000000..a5db4551a31a
--- /dev/null
+++ b/net/wireless/wext-compat.c
@@ -0,0 +1,60 @@
+/*
+ * cfg80211 - wext compat code
+ *
+ * This is temporary code until all wireless functionality is migrated
+ * into cfg80211, when that happens all the exports here go away and
+ * we directly assign the wireless handlers of wireless interfaces.
+ *
+ * Copyright 2008	Johannes Berg <johannes@sipsolutions.net>
+ */
+
+#include <linux/wireless.h>
+#include <linux/nl80211.h>
+#include <net/iw_handler.h>
+#include <net/wireless.h>
+#include <net/cfg80211.h>
+#include "core.h"
+
+int cfg80211_wext_giwname(struct net_device *dev,
+			  struct iw_request_info *info,
+			  char *name, char *extra)
+{
+	struct wireless_dev *wdev = dev->ieee80211_ptr;
+	struct ieee80211_supported_band *sband;
+	bool is_ht = false, is_a = false, is_b = false, is_g = false;
+
+	if (!wdev)
+		return -EOPNOTSUPP;
+
+	sband = wdev->wiphy->bands[IEEE80211_BAND_5GHZ];
+	if (sband) {
+		is_a = true;
+		is_ht |= sband->ht_cap.ht_supported;
+	}
+
+	sband = wdev->wiphy->bands[IEEE80211_BAND_2GHZ];
+	if (sband) {
+		int i;
+		/* Check for mandatory rates */
+		for (i = 0; i < sband->n_bitrates; i++) {
+			if (sband->bitrates[i].bitrate == 10)
+				is_b = true;
+			if (sband->bitrates[i].bitrate == 60)
+				is_g = true;
+		}
+		is_ht |= sband->ht_cap.ht_supported;
+	}
+
+	strcpy(name, "IEEE 802.11");
+	if (is_a)
+		strcat(name, "a");
+	if (is_b)
+		strcat(name, "b");
+	if (is_g)
+		strcat(name, "g");
+	if (is_ht)
+		strcat(name, "n");
+
+	return 0;
+}
+EXPORT_SYMBOL(cfg80211_wext_giwname);
-- 
cgit v1.2.3


From e60c7744f8aa77bcbcb0b294596d6c87445d1200 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes@sipsolutions.net>
Date: Wed, 26 Nov 2008 23:31:40 +0100
Subject: cfg80211: handle SIOCGIWMODE/SIOCSIWMODE

further reducing wext code in mac80211.

Signed-off-by: Johannes Berg <johannes@sipsolutions.net>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 include/net/cfg80211.h     |  4 +++
 net/mac80211/iface.c       |  4 +++
 net/mac80211/wext.c        | 76 ++------------------------------------------
 net/wireless/wext-compat.c | 79 ++++++++++++++++++++++++++++++++++++++++++++++
 4 files changed, 89 insertions(+), 74 deletions(-)

(limited to 'include')

diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h
index c97eac34ec01..a0c0bf19496c 100644
--- a/include/net/cfg80211.h
+++ b/include/net/cfg80211.h
@@ -530,5 +530,9 @@ struct cfg80211_ops {
 int cfg80211_wext_giwname(struct net_device *dev,
 			  struct iw_request_info *info,
 			  char *name, char *extra);
+int cfg80211_wext_siwmode(struct net_device *dev, struct iw_request_info *info,
+			  u32 *mode, char *extra);
+int cfg80211_wext_giwmode(struct net_device *dev, struct iw_request_info *info,
+			  u32 *mode, char *extra);
 
 #endif /* __NET_CFG80211_H */
diff --git a/net/mac80211/iface.c b/net/mac80211/iface.c
index 9ab772ac74ae..5abbc3f07dd6 100644
--- a/net/mac80211/iface.c
+++ b/net/mac80211/iface.c
@@ -698,6 +698,10 @@ int ieee80211_if_change_type(struct ieee80211_sub_if_data *sdata,
 	if (type == sdata->vif.type)
 		return 0;
 
+	/* Setting ad-hoc mode on non-IBSS channel is not supported. */
+	if (sdata->local->oper_channel->flags & IEEE80211_CHAN_NO_IBSS)
+		return -EOPNOTSUPP;
+
 	/*
 	 * We could, here, on changes between IBSS/STA/MESH modes,
 	 * invoke an MLME function instead that disassociates etc.
diff --git a/net/mac80211/wext.c b/net/mac80211/wext.c
index b9eee3c903de..4e1fdcfacb0c 100644
--- a/net/mac80211/wext.c
+++ b/net/mac80211/wext.c
@@ -224,78 +224,6 @@ static int ieee80211_ioctl_giwrange(struct net_device *dev,
 }
 
 
-static int ieee80211_ioctl_siwmode(struct net_device *dev,
-				   struct iw_request_info *info,
-				   __u32 *mode, char *extra)
-{
-	struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev);
-	struct ieee80211_local *local = sdata->local;
-	int type;
-
-	if (sdata->vif.type == NL80211_IFTYPE_AP_VLAN)
-		return -EOPNOTSUPP;
-
-	switch (*mode) {
-	case IW_MODE_INFRA:
-		type = NL80211_IFTYPE_STATION;
-		break;
-	case IW_MODE_ADHOC:
-		/* Setting ad-hoc mode on non ibss channel is not
-		 * supported.
-		 */
-		if (local->oper_channel &&
-		    (local->oper_channel->flags & IEEE80211_CHAN_NO_IBSS))
-			return -EOPNOTSUPP;
-
-		type = NL80211_IFTYPE_ADHOC;
-		break;
-	case IW_MODE_REPEAT:
-		type = NL80211_IFTYPE_WDS;
-		break;
-	case IW_MODE_MONITOR:
-		type = NL80211_IFTYPE_MONITOR;
-		break;
-	default:
-		return -EINVAL;
-	}
-
-	return ieee80211_if_change_type(sdata, type);
-}
-
-
-static int ieee80211_ioctl_giwmode(struct net_device *dev,
-				   struct iw_request_info *info,
-				   __u32 *mode, char *extra)
-{
-	struct ieee80211_sub_if_data *sdata;
-
-	sdata = IEEE80211_DEV_TO_SUB_IF(dev);
-	switch (sdata->vif.type) {
-	case NL80211_IFTYPE_AP:
-		*mode = IW_MODE_MASTER;
-		break;
-	case NL80211_IFTYPE_STATION:
-		*mode = IW_MODE_INFRA;
-		break;
-	case NL80211_IFTYPE_ADHOC:
-		*mode = IW_MODE_ADHOC;
-		break;
-	case NL80211_IFTYPE_MONITOR:
-		*mode = IW_MODE_MONITOR;
-		break;
-	case NL80211_IFTYPE_WDS:
-		*mode = IW_MODE_REPEAT;
-		break;
-	case NL80211_IFTYPE_AP_VLAN:
-		*mode = IW_MODE_SECOND;		/* FIXME */
-		break;
-	default:
-		*mode = IW_MODE_AUTO;
-		break;
-	}
-	return 0;
-}
-
 static int ieee80211_ioctl_siwfreq(struct net_device *dev,
 				   struct iw_request_info *info,
 				   struct iw_freq *freq, char *extra)
@@ -1109,8 +1037,8 @@ static const iw_handler ieee80211_handler[] =
 	(iw_handler) NULL,				/* SIOCGIWNWID */
 	(iw_handler) ieee80211_ioctl_siwfreq,		/* SIOCSIWFREQ */
 	(iw_handler) ieee80211_ioctl_giwfreq,		/* SIOCGIWFREQ */
-	(iw_handler) ieee80211_ioctl_siwmode,		/* SIOCSIWMODE */
-	(iw_handler) ieee80211_ioctl_giwmode,		/* SIOCGIWMODE */
+	(iw_handler) cfg80211_wext_siwmode,		/* SIOCSIWMODE */
+	(iw_handler) cfg80211_wext_giwmode,		/* SIOCGIWMODE */
 	(iw_handler) NULL,				/* SIOCSIWSENS */
 	(iw_handler) NULL,				/* SIOCGIWSENS */
 	(iw_handler) NULL /* not used */,		/* SIOCSIWRANGE */
diff --git a/net/wireless/wext-compat.c b/net/wireless/wext-compat.c
index a5db4551a31a..58e489fd4aed 100644
--- a/net/wireless/wext-compat.c
+++ b/net/wireless/wext-compat.c
@@ -58,3 +58,82 @@ int cfg80211_wext_giwname(struct net_device *dev,
 	return 0;
 }
 EXPORT_SYMBOL(cfg80211_wext_giwname);
+
+int cfg80211_wext_siwmode(struct net_device *dev, struct iw_request_info *info,
+			  u32 *mode, char *extra)
+{
+	struct wireless_dev *wdev = dev->ieee80211_ptr;
+	struct cfg80211_registered_device *rdev;
+	struct vif_params vifparams;
+	enum nl80211_iftype type;
+
+	if (!wdev)
+		return -EOPNOTSUPP;
+
+	rdev = wiphy_to_dev(wdev->wiphy);
+
+	if (!rdev->ops->change_virtual_intf)
+		return -EOPNOTSUPP;
+
+	/* don't support changing VLANs, you just re-create them */
+	if (wdev->iftype == NL80211_IFTYPE_AP_VLAN)
+		return -EOPNOTSUPP;
+
+	switch (*mode) {
+	case IW_MODE_INFRA:
+		type = NL80211_IFTYPE_STATION;
+		break;
+	case IW_MODE_ADHOC:
+		type = NL80211_IFTYPE_ADHOC;
+		break;
+	case IW_MODE_REPEAT:
+		type = NL80211_IFTYPE_WDS;
+		break;
+	case IW_MODE_MONITOR:
+		type = NL80211_IFTYPE_MONITOR;
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	memset(&vifparams, 0, sizeof(vifparams));
+
+	return rdev->ops->change_virtual_intf(wdev->wiphy, dev->ifindex, type,
+					      NULL, &vifparams);
+}
+EXPORT_SYMBOL(cfg80211_wext_siwmode);
+
+int cfg80211_wext_giwmode(struct net_device *dev, struct iw_request_info *info,
+			  u32 *mode, char *extra)
+{
+	struct wireless_dev *wdev = dev->ieee80211_ptr;
+
+	if (!wdev)
+		return -EOPNOTSUPP;
+
+	switch (wdev->iftype) {
+	case NL80211_IFTYPE_AP:
+		*mode = IW_MODE_MASTER;
+		break;
+	case NL80211_IFTYPE_STATION:
+		*mode = IW_MODE_INFRA;
+		break;
+	case NL80211_IFTYPE_ADHOC:
+		*mode = IW_MODE_ADHOC;
+		break;
+	case NL80211_IFTYPE_MONITOR:
+		*mode = IW_MODE_MONITOR;
+		break;
+	case NL80211_IFTYPE_WDS:
+		*mode = IW_MODE_REPEAT;
+		break;
+	case NL80211_IFTYPE_AP_VLAN:
+		*mode = IW_MODE_SECOND;		/* FIXME */
+		break;
+	default:
+		*mode = IW_MODE_AUTO;
+		break;
+	}
+	return 0;
+}
+EXPORT_SYMBOL(cfg80211_wext_giwmode);
-- 
cgit v1.2.3


From 007e5ddddfed4ba039899754936e89b27d5cb551 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes@sipsolutions.net>
Date: Thu, 27 Nov 2008 23:13:38 +0100
Subject: wireless: clean up radiotap a bit

No need to pad the header so no constant needed for that,
no need to carry any version number from netbsd nor CVS
IDs from them.

Signed-off-by: Johannes Berg <johannes@sipsolutions.net>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 drivers/net/wireless/ipw2x00/ipw2200.c   | 24 +++---------------------
 drivers/net/wireless/libertas/radiotap.h |  3 ---
 include/net/ieee80211_radiotap.h         | 15 +++------------
 3 files changed, 6 insertions(+), 36 deletions(-)

(limited to 'include')

diff --git a/drivers/net/wireless/ipw2x00/ipw2200.c b/drivers/net/wireless/ipw2x00/ipw2200.c
index d2a2b7586d08..48fa6df3a774 100644
--- a/drivers/net/wireless/ipw2x00/ipw2200.c
+++ b/drivers/net/wireless/ipw2x00/ipw2200.c
@@ -7797,15 +7797,6 @@ static void ipw_handle_data_packet_monitor(struct ipw_priv *priv,
 	memmove(rxb->skb->data + sizeof(struct ipw_rt_hdr),
 		rxb->skb->data + IPW_RX_FRAME_SIZE, len);
 
-	/* Zero the radiotap static buffer  ...  We only need to zero the bytes NOT
-	 * part of our real header, saves a little time.
-	 *
-	 * No longer necessary since we fill in all our data.  Purge before merging
-	 * patch officially.
-	 * memset(rxb->skb->data + sizeof(struct ipw_rt_hdr), 0,
-	 *        IEEE80211_RADIOTAP_HDRLEN - sizeof(struct ipw_rt_hdr));
-	 */
-
 	ipw_rt = (struct ipw_rt_hdr *)rxb->skb->data;
 
 	ipw_rt->rt_hdr.it_version = PKTHDR_RADIOTAP_VERSION;
@@ -8013,15 +8004,6 @@ static void ipw_handle_promiscuous_rx(struct ipw_priv *priv,
 
 	memcpy(ipw_rt->payload, hdr, len);
 
-	/* Zero the radiotap static buffer  ...  We only need to zero the bytes
-	 * NOT part of our real header, saves a little time.
-	 *
-	 * No longer necessary since we fill in all our data.  Purge before
-	 * merging patch officially.
-	 * memset(rxb->skb->data + sizeof(struct ipw_rt_hdr), 0,
-	 *        IEEE80211_RADIOTAP_HDRLEN - sizeof(struct ipw_rt_hdr));
-	 */
-
 	ipw_rt->rt_hdr.it_version = PKTHDR_RADIOTAP_VERSION;
 	ipw_rt->rt_hdr.it_pad = 0;	/* always good to zero */
 	ipw_rt->rt_hdr.it_len = cpu_to_le16(sizeof(*ipw_rt));	/* total header+data */
@@ -10409,9 +10391,9 @@ static void ipw_handle_promiscuous_tx(struct ipw_priv *priv,
 		} else
 			len = src->len;
 
-		dst = alloc_skb(
-			len + IEEE80211_RADIOTAP_HDRLEN, GFP_ATOMIC);
-		if (!dst) continue;
+		dst = alloc_skb(len + sizeof(*rt_hdr), GFP_ATOMIC);
+		if (!dst)
+			continue;
 
 		rt_hdr = (void *)skb_put(dst, sizeof(*rt_hdr));
 
diff --git a/drivers/net/wireless/libertas/radiotap.h b/drivers/net/wireless/libertas/radiotap.h
index 5d118f40cfbc..f8eb9097ff0a 100644
--- a/drivers/net/wireless/libertas/radiotap.h
+++ b/drivers/net/wireless/libertas/radiotap.h
@@ -6,9 +6,6 @@ struct tx_radiotap_hdr {
 	u8 txpower;
 	u8 rts_retries;
 	u8 data_retries;
-#if 0
-	u8 pad[IEEE80211_RADIOTAP_HDRLEN - 12];
-#endif
 } __attribute__ ((packed));
 
 #define TX_RADIOTAP_PRESENT (				\
diff --git a/include/net/ieee80211_radiotap.h b/include/net/ieee80211_radiotap.h
index d364fd594ea4..384698cb773a 100644
--- a/include/net/ieee80211_radiotap.h
+++ b/include/net/ieee80211_radiotap.h
@@ -1,7 +1,4 @@
-/* $FreeBSD: src/sys/net80211/ieee80211_radiotap.h,v 1.5 2005/01/22 20:12:05 sam Exp $ */
-/* $NetBSD: ieee80211_radiotap.h,v 1.11 2005/06/22 06:16:02 dyoung Exp $ */
-
-/*-
+/*
  * Copyright (c) 2003, 2004 David Young.  All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
@@ -42,8 +39,6 @@
 #include <linux/kernel.h>
 #include <asm/unaligned.h>
 
-/* Radiotap header version (from official NetBSD feed) */
-#define IEEE80211RADIOTAP_VERSION	"1.5"
 /* Base version of the radiotap packet header data */
 #define PKTHDR_RADIOTAP_VERSION		0
 
@@ -62,12 +57,8 @@
  * readers.
  */
 
-/* XXX tcpdump/libpcap do not tolerate variable-length headers,
- * yet, so we pad every radiotap header to 64 bytes. Ugh.
- */
-#define IEEE80211_RADIOTAP_HDRLEN	64
-
-/* The radio capture header precedes the 802.11 header.
+/*
+ * The radio capture header precedes the 802.11 header.
  * All data in the header is little endian on all platforms.
  */
 struct ieee80211_radiotap_header {
-- 
cgit v1.2.3


From 4571d3bf87b76eae875283ff9f7243984b5ddcae Mon Sep 17 00:00:00 2001
From: Christian Lamparter <chunkeey@web.de>
Date: Sun, 30 Nov 2008 00:48:41 +0100
Subject: mac80211: add sta_notify_ps callback

This patch is necessary in order to provide a proper Access point support for p54.
Unfortunately for us, there is no documented way to disable the interfering
power save buffering mechanism in firmware completely.

Therefore we give in and notify the driver through our new sta_notify_ps callback,
so that we can update the filter state.

Signed-off-by: Christian Lamparter <chunkeey@web.de>
Acked-by: Johannes Berg <johannes@sipsolutions.net>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 include/net/mac80211.h | 18 ++++++++++++++++++
 net/mac80211/rx.c      |  7 +++++++
 2 files changed, 25 insertions(+)

(limited to 'include')

diff --git a/include/net/mac80211.h b/include/net/mac80211.h
index 6e823cc6a4b1..0a0c59365db6 100644
--- a/include/net/mac80211.h
+++ b/include/net/mac80211.h
@@ -781,6 +781,19 @@ enum sta_notify_cmd {
 	STA_NOTIFY_ADD, STA_NOTIFY_REMOVE
 };
 
+/**
+ * enum sta_notify_ps_cmd - sta power save notify command
+ *
+ * Used with the sta_notify_ps() callback in &struct ieee80211_ops to
+ * notify the driver if a station made a power state transition.
+ *
+ * @STA_NOTIFY_SLEEP: a station is now sleeping
+ * @STA_NOTIFY_AWAKE: a sleeping station woke up
+ */
+enum sta_notify_ps_cmd {
+	STA_NOTIFY_SLEEP, STA_NOTIFY_AWAKE,
+};
+
 /**
  * enum ieee80211_tkip_key_type - get tkip key
  *
@@ -1251,6 +1264,9 @@ enum ieee80211_ampdu_mlme_action {
  * @sta_notify: Notifies low level driver about addition or removal
  *	of associated station or AP.
  *
+ * @sta_ps_notify: Notifies low level driver about the power state transition
+ *	of a associated station. Must be atomic.
+ *
  * @conf_tx: Configure TX queue parameters (EDCF (aifs, cw_min, cw_max),
  *	bursting) for a hardware TX queue.
  *
@@ -1317,6 +1333,8 @@ struct ieee80211_ops {
 	int (*set_frag_threshold)(struct ieee80211_hw *hw, u32 value);
 	void (*sta_notify)(struct ieee80211_hw *hw, struct ieee80211_vif *vif,
 			enum sta_notify_cmd, struct ieee80211_sta *sta);
+	void (*sta_notify_ps)(struct ieee80211_hw *hw,
+			enum sta_notify_ps_cmd, struct ieee80211_sta *sta);
 	int (*conf_tx)(struct ieee80211_hw *hw, u16 queue,
 		       const struct ieee80211_tx_queue_params *params);
 	int (*get_tx_stats)(struct ieee80211_hw *hw,
diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c
index 68a6e973c72c..14be095b8528 100644
--- a/net/mac80211/rx.c
+++ b/net/mac80211/rx.c
@@ -654,9 +654,13 @@ ieee80211_rx_h_decrypt(struct ieee80211_rx_data *rx)
 static void ap_sta_ps_start(struct sta_info *sta)
 {
 	struct ieee80211_sub_if_data *sdata = sta->sdata;
+	struct ieee80211_local *local = sdata->local;
 
 	atomic_inc(&sdata->bss->num_sta_ps);
 	set_and_clear_sta_flags(sta, WLAN_STA_PS, WLAN_STA_PSPOLL);
+	if (local->ops->sta_notify_ps)
+		local->ops->sta_notify_ps(local_to_hw(local), STA_NOTIFY_SLEEP,
+					  &sta->sta);
 #ifdef CONFIG_MAC80211_VERBOSE_PS_DEBUG
 	printk(KERN_DEBUG "%s: STA %pM aid %d enters power save mode\n",
 	       sdata->dev->name, sta->sta.addr, sta->sta.aid);
@@ -673,6 +677,9 @@ static int ap_sta_ps_end(struct sta_info *sta)
 	atomic_dec(&sdata->bss->num_sta_ps);
 
 	clear_sta_flags(sta, WLAN_STA_PS | WLAN_STA_PSPOLL);
+	if (local->ops->sta_notify_ps)
+		local->ops->sta_notify_ps(local_to_hw(local), STA_NOTIFY_AWAKE,
+					  &sta->sta);
 
 	if (!skb_queue_empty(&sta->ps_tx_buf))
 		sta_info_clear_tim_bit(sta);
-- 
cgit v1.2.3


From 8bef7a10014c4579c66579ab47fc1bb9563ac42a Mon Sep 17 00:00:00 2001
From: Kalle Valo <kalle.valo@nokia.com>
Date: Sun, 30 Nov 2008 20:56:28 +0200
Subject: mac80211: document ieee80211_tx_info.pad

Fixes htmldocs warning:

Warning(mac80211.h:379): No description found for parameter 'pad[2]'

Signed-off-by: Kalle Valo <kalle.valo@nokia.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 include/net/mac80211.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/net/mac80211.h b/include/net/mac80211.h
index 0a0c59365db6..e84c922a1b16 100644
--- a/include/net/mac80211.h
+++ b/include/net/mac80211.h
@@ -323,6 +323,7 @@ struct ieee80211_tx_rate {
  * @flags: transmit info flags, defined above
  * @band: the band to transmit on (use for checking for races)
  * @antenna_sel_tx: antenna to use, 0 for automatic diversity
+ * @pad: padding, ignore
  * @control: union for control data
  * @status: union for status data
  * @driver_data: array of driver_data pointers
-- 
cgit v1.2.3


From b74ca3a896b9ab5f952bc440154758e708c48884 Mon Sep 17 00:00:00 2001
From: Wang Chen <wangchen@cn.fujitsu.com>
Date: Mon, 8 Dec 2008 01:14:16 -0800
Subject: netdevice: Kill netdev->priv

This is the last shoot of this series.
After I removing all directly reference of netdev->priv, I am killing
"priv" of "struct net_device" and fixing relative comments/docs.

Anyone will not be allowed to reference netdev->priv directly.
If you want to reference the memory of private data, use netdev_priv()
instead.
If the private data is not allocted when alloc_netdev(), use
netdev->ml_priv to point that memory after you creating that private
data.

Signed-off-by: Wang Chen <wangchen@cn.fujitsu.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 Documentation/networking/driver.txt     | 2 +-
 Documentation/networking/netdevices.txt | 2 +-
 drivers/net/3c501.h                     | 2 +-
 drivers/net/atp.c                       | 2 +-
 drivers/net/eexpress.c                  | 2 +-
 drivers/net/forcedeth.c                 | 4 ++--
 drivers/net/lance.c                     | 2 +-
 drivers/net/myri_sbus.c                 | 2 +-
 drivers/net/pci-skeleton.c              | 2 +-
 drivers/net/sun3_82586.c                | 2 +-
 drivers/net/sunbmac.c                   | 2 +-
 drivers/net/tokenring/3c359.c           | 5 +++--
 drivers/net/via-rhine.c                 | 9 +++++----
 drivers/net/wireless/strip.c            | 2 +-
 include/linux/hdlc.h                    | 2 +-
 include/linux/netdevice.h               | 1 -
 net/atm/mpc.c                           | 4 ++--
 net/core/dev.c                          | 6 ------
 18 files changed, 24 insertions(+), 29 deletions(-)

(limited to 'include')

diff --git a/Documentation/networking/driver.txt b/Documentation/networking/driver.txt
index ea72d2e66ca8..03283daa64fe 100644
--- a/Documentation/networking/driver.txt
+++ b/Documentation/networking/driver.txt
@@ -13,7 +13,7 @@ Transmit path guidelines:
 	static int drv_hard_start_xmit(struct sk_buff *skb,
 		   		       struct net_device *dev)
 	{
-		struct drv *dp = dev->priv;
+		struct drv *dp = netdev_priv(dev);
 
 		lock_tx(dp);
 		...
diff --git a/Documentation/networking/netdevices.txt b/Documentation/networking/netdevices.txt
index d0f71fc7f782..a2ab6a0b116d 100644
--- a/Documentation/networking/netdevices.txt
+++ b/Documentation/networking/netdevices.txt
@@ -18,7 +18,7 @@ There are routines in net_init.c to handle the common cases of
 alloc_etherdev, alloc_netdev.  These reserve extra space for driver
 private data which gets freed when the network device is freed. If
 separately allocated data is attached to the network device
-(dev->priv) then it is up to the module exit handler to free that.
+(netdev_priv(dev)) then it is up to the module exit handler to free that.
 
 MTU
 ===
diff --git a/drivers/net/3c501.h b/drivers/net/3c501.h
index cfec64efff78..f40b0493337a 100644
--- a/drivers/net/3c501.h
+++ b/drivers/net/3c501.h
@@ -23,7 +23,7 @@ static const struct ethtool_ops netdev_ethtool_ops;
 static int el_debug = EL_DEBUG;
 
 /*
- *	Board-specific info in dev->priv.
+ *	Board-specific info in netdev_priv(dev).
  */
 
 struct net_local
diff --git a/drivers/net/atp.c b/drivers/net/atp.c
index 7028b276dfd3..1d6b74c5d6c9 100644
--- a/drivers/net/atp.c
+++ b/drivers/net/atp.c
@@ -420,7 +420,7 @@ static unsigned short __init eeprom_op(long ioaddr, u32 cmd)
    registers that "should" only need to be set once at boot, so that
    there is non-reboot way to recover if something goes wrong.
 
-   This is an attachable device: if there is no dev->priv entry then it wasn't
+   This is an attachable device: if there is no private entry then it wasn't
    probed for at boot-time, and we need to probe for it again.
    */
 static int net_open(struct net_device *dev)
diff --git a/drivers/net/eexpress.c b/drivers/net/eexpress.c
index a125e41240f5..9ff3f2f5e382 100644
--- a/drivers/net/eexpress.c
+++ b/drivers/net/eexpress.c
@@ -1046,7 +1046,7 @@ static void eexp_hw_tx_pio(struct net_device *dev, unsigned short *buf,
 /*
  * Sanity check the suspected EtherExpress card
  * Read hardware address, reset card, size memory and initialize buffer
- * memory pointers. These are held in dev->priv, in case someone has more
+ * memory pointers. These are held in netdev_priv(), in case someone has more
  * than one card in a machine.
  */
 
diff --git a/drivers/net/forcedeth.c b/drivers/net/forcedeth.c
index 12384df8cb2b..1f2b24743ee9 100644
--- a/drivers/net/forcedeth.c
+++ b/drivers/net/forcedeth.c
@@ -712,12 +712,12 @@ struct nv_skb_map {
 
 /*
  * SMP locking:
- * All hardware access under dev->priv->lock, except the performance
+ * All hardware access under netdev_priv(dev)->lock, except the performance
  * critical parts:
  * - rx is (pseudo-) lockless: it relies on the single-threading provided
  *	by the arch code for interrupts.
  * - tx setup is lockless: it relies on netif_tx_lock. Actual submission
- *	needs dev->priv->lock :-(
+ *	needs netdev_priv(dev)->lock :-(
  * - set_multicast_list: preparation lockless, relies on netif_tx_lock.
  */
 
diff --git a/drivers/net/lance.c b/drivers/net/lance.c
index e81b6113ed94..d7afb938ea62 100644
--- a/drivers/net/lance.c
+++ b/drivers/net/lance.c
@@ -519,7 +519,7 @@ static int __init lance_probe1(struct net_device *dev, int ioaddr, int irq, int
 		}
 	}
 
-	/* We can't allocate dev->priv from alloc_etherdev() because it must
+	/* We can't allocate private data from alloc_etherdev() because it must
 	   a ISA DMA-able region. */
 	chipname = chip_table[lance_version].name;
 	printk("%s: %s at %#3x, ", dev->name, chipname, ioaddr);
diff --git a/drivers/net/myri_sbus.c b/drivers/net/myri_sbus.c
index 6833f65f8aec..899ed065a147 100644
--- a/drivers/net/myri_sbus.c
+++ b/drivers/net/myri_sbus.c
@@ -1091,7 +1091,7 @@ static int __devinit myri_sbus_probe(struct of_device *op, const struct of_devic
 err_free_irq:
 	free_irq(dev->irq, dev);
 err:
-	/* This will also free the co-allocated 'dev->priv' */
+	/* This will also free the co-allocated private data*/
 	free_netdev(dev);
 	return -ENODEV;
 }
diff --git a/drivers/net/pci-skeleton.c b/drivers/net/pci-skeleton.c
index b23b5c397b1d..c95fd72c3bb9 100644
--- a/drivers/net/pci-skeleton.c
+++ b/drivers/net/pci-skeleton.c
@@ -781,7 +781,7 @@ static int __devinit netdrv_init_one (struct pci_dev *pdev,
 	dev->irq = pdev->irq;
 	dev->base_addr = (unsigned long) ioaddr;
 
-	/* dev->priv/tp zeroed and aligned in alloc_etherdev */
+	/* netdev_priv()/tp zeroed and aligned in alloc_etherdev */
 	tp = netdev_priv(dev);
 
 	/* note: tp->chipset set in netdrv_init_board */
diff --git a/drivers/net/sun3_82586.c b/drivers/net/sun3_82586.c
index e8f97d5c9c23..e0d84772771c 100644
--- a/drivers/net/sun3_82586.c
+++ b/drivers/net/sun3_82586.c
@@ -209,7 +209,7 @@ static int sun3_82586_open(struct net_device *dev)
 static int check586(struct net_device *dev,char *where,unsigned size)
 {
 	struct priv pb;
-	struct priv *p = /* (struct priv *) dev->priv*/ &pb;
+	struct priv *p = &pb;
 	char *iscp_addr;
 	int i;
 
diff --git a/drivers/net/sunbmac.c b/drivers/net/sunbmac.c
index 977b3e08bbfc..7f69c7f176c4 100644
--- a/drivers/net/sunbmac.c
+++ b/drivers/net/sunbmac.c
@@ -1233,7 +1233,7 @@ fail_and_cleanup:
 				  bp->bmac_block,
 				  bp->bblock_dvma);
 
-	/* This also frees the co-located 'dev->priv' */
+	/* This also frees the co-located private data */
 	free_netdev(dev);
 	return -ENODEV;
 }
diff --git a/drivers/net/tokenring/3c359.c b/drivers/net/tokenring/3c359.c
index e7a944657cf8..43853e3b210e 100644
--- a/drivers/net/tokenring/3c359.c
+++ b/drivers/net/tokenring/3c359.c
@@ -296,8 +296,9 @@ static int __devinit xl_probe(struct pci_dev *pdev,
 	} ; 
 
 	/* 
-	 * Allowing init_trdev to allocate the dev->priv structure will align xl_private
-   	 * on a 32 bytes boundary which we need for the rx/tx descriptors
+	 * Allowing init_trdev to allocate the private data will align
+	 * xl_private on a 32 bytes boundary which we need for the rx/tx
+	 * descriptors
 	 */
 
 	dev = alloc_trdev(sizeof(struct xl_private)) ; 
diff --git a/drivers/net/via-rhine.c b/drivers/net/via-rhine.c
index 93b74b7b7077..8d405c83df8b 100644
--- a/drivers/net/via-rhine.c
+++ b/drivers/net/via-rhine.c
@@ -191,12 +191,13 @@ IIId. Synchronization
 
 The driver runs as two independent, single-threaded flows of control. One
 is the send-packet routine, which enforces single-threaded use by the
-dev->priv->lock spinlock. The other thread is the interrupt handler, which
-is single threaded by the hardware and interrupt handling software.
+netdev_priv(dev)->lock spinlock. The other thread is the interrupt handler,
+which is single threaded by the hardware and interrupt handling software.
 
 The send packet thread has partial control over the Tx ring. It locks the
-dev->priv->lock whenever it's queuing a Tx packet. If the next slot in the ring
-is not available it stops the transmit queue by calling netif_stop_queue.
+netdev_priv(dev)->lock whenever it's queuing a Tx packet. If the next slot in
+the ring is not available it stops the transmit queue by
+calling netif_stop_queue.
 
 The interrupt handler has exclusive control over the Rx ring and records stats
 from the Tx ring. After reaping the stats, it marks the Tx queue entry as
diff --git a/drivers/net/wireless/strip.c b/drivers/net/wireless/strip.c
index 692e6c5e009a..dd0de3a9ed4e 100644
--- a/drivers/net/wireless/strip.c
+++ b/drivers/net/wireless/strip.c
@@ -2494,7 +2494,7 @@ static void strip_dev_setup(struct net_device *dev)
 	dev->type = ARPHRD_METRICOM;	/* dtang */
 	dev->hard_header_len = sizeof(STRIP_Header);
 	/*
-	 *  dev->priv             Already holds a pointer to our struct strip
+	 *  netdev_priv(dev) Already holds a pointer to our struct strip
 	 */
 
 	*(MetricomAddress *) & dev->broadcast = broadcast_address;
diff --git a/include/linux/hdlc.h b/include/linux/hdlc.h
index e960faac609d..fd47a151665e 100644
--- a/include/linux/hdlc.h
+++ b/include/linux/hdlc.h
@@ -43,7 +43,7 @@ struct hdlc_proto {
 };
 
 
-/* Pointed to by dev->priv */
+/* Pointed to by netdev_priv(dev) */
 typedef struct hdlc_device {
 	/* used by HDLC layer to take control over HDLC device from hw driver*/
 	int (*attach)(struct net_device *dev,
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 0df0db068ac3..47e731528315 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -785,7 +785,6 @@ struct net_device
 /*
  * One part is mostly used on xmit path (device)
  */
-	void			*priv;	/* pointer to private data	*/
 	/* These may be needed for future network-power-down code. */
 	unsigned long		trans_start;	/* Time (in jiffies) of last Tx	*/
 
diff --git a/net/atm/mpc.c b/net/atm/mpc.c
index 12e9ea371db1..039d5cc72c3d 100644
--- a/net/atm/mpc.c
+++ b/net/atm/mpc.c
@@ -341,8 +341,8 @@ static const char *mpoa_device_type_string(char type)
 }
 
 /*
- * lec device calls this via its dev->priv->lane2_ops->associate_indicator()
- * when it sees a TLV in LE_ARP packet.
+ * lec device calls this via its netdev_priv(dev)->lane2_ops
+ * ->associate_indicator() when it sees a TLV in LE_ARP packet.
  * We fill in the pointer above when we see a LANE2 lec initializing
  * See LANE2 spec 3.1.5
  *
diff --git a/net/core/dev.c b/net/core/dev.c
index 4615e9a443aa..f54cac76438a 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -4378,12 +4378,6 @@ struct net_device *alloc_netdev_mq(int sizeof_priv, const char *name,
 	dev->num_tx_queues = queue_count;
 	dev->real_num_tx_queues = queue_count;
 
-	if (sizeof_priv) {
-		dev->priv = ((char *)dev +
-			     ((sizeof(struct net_device) + NETDEV_ALIGN_CONST)
-			      & ~NETDEV_ALIGN_CONST));
-	}
-
 	dev->gso_max_size = GSO_MAX_SIZE;
 
 	netdev_init_queues(dev);
-- 
cgit v1.2.3


From 0049bab5e765aa74cf767a834fa336e19453fc5e Mon Sep 17 00:00:00 2001
From: Gerrit Renker <gerrit@erg.abdn.ac.uk>
Date: Mon, 8 Dec 2008 01:18:05 -0800
Subject: dccp: Remove obsolete parts of the old CCID interface

The TX/RX CCIDs of the minisock are now redundant: similar to the Ack Vector
case, their value equals initially that of the sysctl, but at the end of
feature negotiation may be something different.

The old interface removed by this patch thus has been replaced by the newer
interface to dynamically query the currently loaded CCIDs.

Also removed are the constructors for the TX CCID and the RX CCID, since the
switch "rx <-> non-rx" is done by the handler in minisocks.c (and the handler
is the only place in the code where CCIDs are loaded).

Signed-off-by: Gerrit Renker <gerrit@erg.abdn.ac.uk>
Acked-by: Ian McDonald <ian.mcdonald@jandi.co.nz>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 Documentation/networking/dccp.txt |  5 +++--
 include/linux/dccp.h              |  3 ---
 net/dccp/ccid.c                   | 14 --------------
 net/dccp/ccid.h                   |  5 -----
 net/dccp/feat.c                   | 13 -------------
 net/dccp/minisocks.c              |  2 --
 6 files changed, 3 insertions(+), 39 deletions(-)

(limited to 'include')

diff --git a/Documentation/networking/dccp.txt b/Documentation/networking/dccp.txt
index 610083ff73f6..a203d132dbef 100644
--- a/Documentation/networking/dccp.txt
+++ b/Documentation/networking/dccp.txt
@@ -140,10 +140,11 @@ send_ackvec = 1
 	Whether or not to send Ack Vector options (sec. 11.5).
 
 tx_ccid = 2
-	Default CCID for the sender-receiver half-connection.
+	Default CCID for the sender-receiver half-connection. Depending on the
+	choice of CCID, the Send Ack Vector feature is enabled automatically.
 
 rx_ccid = 2
-	Default CCID for the receiver-sender half-connection.
+	Default CCID for the receiver-sender half-connection; see tx_ccid.
 
 seq_window = 100
 	The initial sequence window (sec. 7.5.2).
diff --git a/include/linux/dccp.h b/include/linux/dccp.h
index 6a72ff52a8a4..46daea312d92 100644
--- a/include/linux/dccp.h
+++ b/include/linux/dccp.h
@@ -370,7 +370,6 @@ static inline unsigned int dccp_hdr_len(const struct sk_buff *skb)
   * Will be used to pass the state from dccp_request_sock to dccp_sock.
   *
   * @dccpms_sequence_window - Sequence Window Feature (section 7.5.2)
-  * @dccpms_ccid - Congestion Control Id (CCID) (section 10)
   * @dccpms_send_ack_vector - Send Ack Vector Feature (section 11.5)
   * @dccpms_send_ndp_count - Send NDP Count Feature (7.7.2)
   * @dccpms_pending - List of features being negotiated
@@ -378,8 +377,6 @@ static inline unsigned int dccp_hdr_len(const struct sk_buff *skb)
   */
 struct dccp_minisock {
 	__u64			dccpms_sequence_window;
-	__u8			dccpms_rx_ccid;
-	__u8			dccpms_tx_ccid;
 	__u8			dccpms_send_ack_vector;
 	__u8			dccpms_send_ndp_count;
 	struct list_head	dccpms_pending;
diff --git a/net/dccp/ccid.c b/net/dccp/ccid.c
index 647cb0614f84..bcc643f992ae 100644
--- a/net/dccp/ccid.c
+++ b/net/dccp/ccid.c
@@ -253,20 +253,6 @@ out_module_put:
 
 EXPORT_SYMBOL_GPL(ccid_new);
 
-struct ccid *ccid_hc_rx_new(unsigned char id, struct sock *sk, gfp_t gfp)
-{
-	return ccid_new(id, sk, 1, gfp);
-}
-
-EXPORT_SYMBOL_GPL(ccid_hc_rx_new);
-
-struct ccid *ccid_hc_tx_new(unsigned char id,struct sock *sk,  gfp_t gfp)
-{
-	return ccid_new(id, sk, 0, gfp);
-}
-
-EXPORT_SYMBOL_GPL(ccid_hc_tx_new);
-
 static void ccid_delete(struct ccid *ccid, struct sock *sk, int rx)
 {
 	struct ccid_operations *ccid_ops;
diff --git a/net/dccp/ccid.h b/net/dccp/ccid.h
index 803343aed004..18f69423a708 100644
--- a/net/dccp/ccid.h
+++ b/net/dccp/ccid.h
@@ -111,11 +111,6 @@ extern int  ccid_getsockopt_builtin_ccids(struct sock *sk, int len,
 extern struct ccid *ccid_new(unsigned char id, struct sock *sk, int rx,
 			     gfp_t gfp);
 
-extern struct ccid *ccid_hc_rx_new(unsigned char id, struct sock *sk,
-				   gfp_t gfp);
-extern struct ccid *ccid_hc_tx_new(unsigned char id, struct sock *sk,
-				   gfp_t gfp);
-
 static inline int ccid_get_current_rx_ccid(struct dccp_sock *dp)
 {
 	struct ccid *ccid = dp->dccps_hc_rx_ccid;
diff --git a/net/dccp/feat.c b/net/dccp/feat.c
index 077f78d579c4..a0d5891a37bf 100644
--- a/net/dccp/feat.c
+++ b/net/dccp/feat.c
@@ -1124,22 +1124,9 @@ int dccp_feat_init(struct sock *sk)
 	INIT_LIST_HEAD(&dmsk->dccpms_pending);	/* XXX no longer used */
 	INIT_LIST_HEAD(&dmsk->dccpms_conf);	/* XXX no longer used */
 
-	/* CCID L */
-	rc = __feat_register_sp(&dp->dccps_featneg, DCCPF_CCID, 1, 0,
-				&dmsk->dccpms_tx_ccid, 1);
-	if (rc)
-		goto out;
-
-	/* CCID R */
-	rc = __feat_register_sp(&dp->dccps_featneg, DCCPF_CCID, 0, 0,
-				&dmsk->dccpms_rx_ccid, 1);
-	if (rc)
-		goto out;
-
 	/* Ack ratio */
 	rc = __feat_register_nn(&dp->dccps_featneg, DCCPF_ACK_RATIO, 0,
 				dp->dccps_l_ack_ratio);
-out:
 	return rc;
 }
 
diff --git a/net/dccp/minisocks.c b/net/dccp/minisocks.c
index 308b6b928c3d..210c346899ba 100644
--- a/net/dccp/minisocks.c
+++ b/net/dccp/minisocks.c
@@ -45,8 +45,6 @@ EXPORT_SYMBOL_GPL(dccp_death_row);
 void dccp_minisock_init(struct dccp_minisock *dmsk)
 {
 	dmsk->dccpms_sequence_window = sysctl_dccp_feat_sequence_window;
-	dmsk->dccpms_rx_ccid	     = sysctl_dccp_feat_rx_ccid;
-	dmsk->dccpms_tx_ccid	     = sysctl_dccp_feat_tx_ccid;
 	dmsk->dccpms_send_ack_vector = sysctl_dccp_feat_send_ack_vector;
 	dmsk->dccpms_send_ndp_count  = sysctl_dccp_feat_send_ndp_count;
 }
-- 
cgit v1.2.3


From 4098dce5be537a157eed4a326efd464109825b8b Mon Sep 17 00:00:00 2001
From: Gerrit Renker <gerrit@erg.abdn.ac.uk>
Date: Mon, 8 Dec 2008 01:18:37 -0800
Subject: dccp: Remove manual influence on NDP Count feature

Updating the NDP count feature is handled automatically now:
 * for CCID-2 it is disabled, since the code does not use NDP counts;
 * for CCID-3 it is enabled, as NDP counts are used to determine loss lengths.

Allowing the user to change NDP values leads to unpredictable and failing
behaviour, since it is then possible to disable NDP counts even when they
are needed (e.g. in CCID-3).

This means that only those user settings are sensible that agree with the
values for Send NDP Count implied by the choice of CCID. But those settings
are already activated by the feature negotiation (CCID dependency tracking),
hence this form of support is redundant.

At startup the initialisation of the NDP count feature uses the default
value of 0, which is done implicitly by the zeroing-out of the socket when
it is allocated. If the choice of CCID or feature negotiation enables NDP
count, this will then be updated via the NDP activation handler.

Signed-off-by: Gerrit Renker <gerrit@erg.abdn.ac.uk>
Acked-by: Ian McDonald <ian.mcdonald@jandi.co.nz>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 Documentation/networking/dccp.txt | 3 ---
 include/linux/dccp.h              | 4 ++--
 net/dccp/dccp.h                   | 1 -
 net/dccp/feat.c                   | 2 +-
 net/dccp/minisocks.c              | 1 -
 net/dccp/options.c                | 4 +---
 net/dccp/sysctl.c                 | 7 -------
 7 files changed, 4 insertions(+), 18 deletions(-)

(limited to 'include')

diff --git a/Documentation/networking/dccp.txt b/Documentation/networking/dccp.txt
index a203d132dbef..1403745ab406 100644
--- a/Documentation/networking/dccp.txt
+++ b/Documentation/networking/dccp.txt
@@ -133,9 +133,6 @@ retries2
 	importance for retransmitted acknowledgments and feature negotiation,
 	data packets are never retransmitted. Analogue of tcp_retries2.
 
-send_ndp = 1
-	Whether or not to send NDP count options (sec. 7.7.2).
-
 send_ackvec = 1
 	Whether or not to send Ack Vector options (sec. 11.5).
 
diff --git a/include/linux/dccp.h b/include/linux/dccp.h
index 46daea312d92..60e94438eadd 100644
--- a/include/linux/dccp.h
+++ b/include/linux/dccp.h
@@ -371,14 +371,12 @@ static inline unsigned int dccp_hdr_len(const struct sk_buff *skb)
   *
   * @dccpms_sequence_window - Sequence Window Feature (section 7.5.2)
   * @dccpms_send_ack_vector - Send Ack Vector Feature (section 11.5)
-  * @dccpms_send_ndp_count - Send NDP Count Feature (7.7.2)
   * @dccpms_pending - List of features being negotiated
   * @dccpms_conf -
   */
 struct dccp_minisock {
 	__u64			dccpms_sequence_window;
 	__u8			dccpms_send_ack_vector;
-	__u8			dccpms_send_ndp_count;
 	struct list_head	dccpms_pending;
 	struct list_head	dccpms_conf;
 };
@@ -490,6 +488,7 @@ struct dccp_ackvec;
  * @dccps_r_ack_ratio - feature-remote Ack Ratio
  * @dccps_pcslen - sender   partial checksum coverage (via sockopt)
  * @dccps_pcrlen - receiver partial checksum coverage (via sockopt)
+ * @dccps_send_ndp_count - local Send NDP Count feature (7.7.2)
  * @dccps_ndp_count - number of Non Data Packets since last data packet
  * @dccps_mss_cache - current value of MSS (path MTU minus header sizes)
  * @dccps_rate_last - timestamp for rate-limiting DCCP-Sync (RFC 4340, 7.5.4)
@@ -529,6 +528,7 @@ struct dccp_sock {
 	__u16				dccps_r_ack_ratio;
 	__u8				dccps_pcslen:4;
 	__u8				dccps_pcrlen:4;
+	__u8				dccps_send_ndp_count:1;
 	__u64				dccps_ndp_count:48;
 	unsigned long			dccps_rate_last;
 	struct dccp_minisock		dccps_minisock;
diff --git a/net/dccp/dccp.h b/net/dccp/dccp.h
index 94f6785f81ec..e0759d098b9a 100644
--- a/net/dccp/dccp.h
+++ b/net/dccp/dccp.h
@@ -99,7 +99,6 @@ extern int  sysctl_dccp_feat_sequence_window;
 extern int  sysctl_dccp_feat_rx_ccid;
 extern int  sysctl_dccp_feat_tx_ccid;
 extern int  sysctl_dccp_feat_send_ack_vector;
-extern int  sysctl_dccp_feat_send_ndp_count;
 extern int  sysctl_dccp_tx_qlen;
 extern int  sysctl_dccp_sync_ratelimit;
 
diff --git a/net/dccp/feat.c b/net/dccp/feat.c
index a0d5891a37bf..30f9fb76b921 100644
--- a/net/dccp/feat.c
+++ b/net/dccp/feat.c
@@ -85,7 +85,7 @@ static int dccp_hdlr_ackvec(struct sock *sk, u64 enable, bool rx)
 static int dccp_hdlr_ndp(struct sock *sk, u64 enable, bool rx)
 {
 	if (!rx)
-		dccp_msk(sk)->dccpms_send_ndp_count = (enable > 0);
+		dccp_sk(sk)->dccps_send_ndp_count = (enable > 0);
 	return 0;
 }
 
diff --git a/net/dccp/minisocks.c b/net/dccp/minisocks.c
index 210c346899ba..02b14812464a 100644
--- a/net/dccp/minisocks.c
+++ b/net/dccp/minisocks.c
@@ -46,7 +46,6 @@ void dccp_minisock_init(struct dccp_minisock *dmsk)
 {
 	dmsk->dccpms_sequence_window = sysctl_dccp_feat_sequence_window;
 	dmsk->dccpms_send_ack_vector = sysctl_dccp_feat_send_ack_vector;
-	dmsk->dccpms_send_ndp_count  = sysctl_dccp_feat_send_ndp_count;
 }
 
 void dccp_time_wait(struct sock *sk, int state, int timeo)
diff --git a/net/dccp/options.c b/net/dccp/options.c
index debb1008c7ad..e9d674874b4e 100644
--- a/net/dccp/options.c
+++ b/net/dccp/options.c
@@ -27,7 +27,6 @@ int sysctl_dccp_feat_sequence_window = DCCPF_INITIAL_SEQUENCE_WINDOW;
 int sysctl_dccp_feat_rx_ccid	      = DCCPF_INITIAL_CCID;
 int sysctl_dccp_feat_tx_ccid	      = DCCPF_INITIAL_CCID;
 int sysctl_dccp_feat_send_ack_vector = DCCPF_INITIAL_SEND_ACK_VECTOR;
-int sysctl_dccp_feat_send_ndp_count  = DCCPF_INITIAL_SEND_NDP_COUNT;
 
 u64 dccp_decode_value_var(const u8 *bf, const u8 len)
 {
@@ -531,8 +530,7 @@ int dccp_insert_options(struct sock *sk, struct sk_buff *skb)
 
 	DCCP_SKB_CB(skb)->dccpd_opt_len = 0;
 
-	if (dmsk->dccpms_send_ndp_count &&
-	    dccp_insert_option_ndp(sk, skb))
+	if (dp->dccps_send_ndp_count && dccp_insert_option_ndp(sk, skb))
 		return -1;
 
 	if (DCCP_SKB_CB(skb)->dccpd_type != DCCP_PKT_DATA) {
diff --git a/net/dccp/sysctl.c b/net/dccp/sysctl.c
index f6e54f433e29..587c12f915c1 100644
--- a/net/dccp/sysctl.c
+++ b/net/dccp/sysctl.c
@@ -47,13 +47,6 @@ static struct ctl_table dccp_default_table[] = {
 		.mode		= 0644,
 		.proc_handler	= proc_dointvec,
 	},
-	{
-		.procname	= "send_ndp",
-		.data		= &sysctl_dccp_feat_send_ndp_count,
-		.maxlen		= sizeof(sysctl_dccp_feat_send_ndp_count),
-		.mode		= 0644,
-		.proc_handler	= proc_dointvec,
-	},
 	{
 		.procname	= "request_retries",
 		.data		= &sysctl_dccp_request_retries,
-- 
cgit v1.2.3


From 6fdd34d43bff8be9bb925b49d87a0ee144d2ab07 Mon Sep 17 00:00:00 2001
From: Gerrit Renker <gerrit@erg.abdn.ac.uk>
Date: Mon, 8 Dec 2008 01:19:06 -0800
Subject: dccp ccid-2: Phase out the use of boolean Ack Vector sysctl

This removes the use of the sysctl and the minisock variable for the Send Ack
Vector feature, as it now is handled fully dynamically via feature negotiation
(i.e. when CCID-2 is enabled, Ack Vectors are automatically enabled as per
 RFC 4341, 4.).

Using a sysctl in parallel to this implementation would open the door to
crashes, since much of the code relies on tests of the boolean minisock /
sysctl variable. Thus, this patch replaces all tests of type

	if (dccp_msk(sk)->dccpms_send_ack_vector)
		/* ... */
with
	if (dp->dccps_hc_rx_ackvec != NULL)
		/* ... */

The dccps_hc_rx_ackvec is allocated by the dccp_hdlr_ackvec() when feature
negotiation concluded that Ack Vectors are to be used on the half-connection.
Otherwise, it is NULL (due to dccp_init_sock/dccp_create_openreq_child),
so that the test is a valid one.

The activation handler for Ack Vectors is called as soon as the feature
negotiation has concluded at the
 * server when the Ack marking the transition RESPOND => OPEN arrives;
 * client after it has sent its ACK, marking the transition REQUEST => PARTOPEN.

Adding the sequence number of the Response packet to the Ack Vector has been
removed, since
 (a) connection establishment implies that the Response has been received;
 (b) the CCIDs only look at packets received in the (PART)OPEN state, i.e.
     this entry will always be ignored;
 (c) it can not be used for anything useful - to detect loss for instance, only
     packets received after the loss can serve as pseudo-dupacks.

There was a FIXME to change the error code when dccp_ackvec_add() fails.
I removed this after finding out that:
 * the check whether ackno < ISN is already made earlier,
 * this Response is likely the 1st packet with an Ackno that the client gets,
 * so when dccp_ackvec_add() fails, the reason is likely not a packet error.

Signed-off-by: Gerrit Renker <gerrit@erg.abdn.ac.uk>
Acked-by: Ian McDonald <ian.mcdonald@jandi.co.nz>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 Documentation/networking/dccp.txt |  3 ---
 include/linux/dccp.h              |  3 ---
 net/dccp/dccp.h                   |  3 +--
 net/dccp/diag.c                   |  2 +-
 net/dccp/input.c                  | 12 +++---------
 net/dccp/minisocks.c              |  1 -
 net/dccp/options.c                |  7 ++-----
 net/dccp/proto.c                  |  3 +--
 net/dccp/sysctl.c                 |  7 -------
 9 files changed, 8 insertions(+), 33 deletions(-)

(limited to 'include')

diff --git a/Documentation/networking/dccp.txt b/Documentation/networking/dccp.txt
index 1403745ab406..7a3bb1abb830 100644
--- a/Documentation/networking/dccp.txt
+++ b/Documentation/networking/dccp.txt
@@ -133,9 +133,6 @@ retries2
 	importance for retransmitted acknowledgments and feature negotiation,
 	data packets are never retransmitted. Analogue of tcp_retries2.
 
-send_ackvec = 1
-	Whether or not to send Ack Vector options (sec. 11.5).
-
 tx_ccid = 2
 	Default CCID for the sender-receiver half-connection. Depending on the
 	choice of CCID, the Send Ack Vector feature is enabled automatically.
diff --git a/include/linux/dccp.h b/include/linux/dccp.h
index 60e94438eadd..61734e27abb7 100644
--- a/include/linux/dccp.h
+++ b/include/linux/dccp.h
@@ -360,7 +360,6 @@ static inline unsigned int dccp_hdr_len(const struct sk_buff *skb)
 #define DCCPF_INITIAL_SEQUENCE_WINDOW		100
 #define DCCPF_INITIAL_ACK_RATIO			2
 #define DCCPF_INITIAL_CCID			DCCPC_CCID2
-#define DCCPF_INITIAL_SEND_ACK_VECTOR		1
 /* FIXME: for now we're default to 1 but it should really be 0 */
 #define DCCPF_INITIAL_SEND_NDP_COUNT		1
 
@@ -370,13 +369,11 @@ static inline unsigned int dccp_hdr_len(const struct sk_buff *skb)
   * Will be used to pass the state from dccp_request_sock to dccp_sock.
   *
   * @dccpms_sequence_window - Sequence Window Feature (section 7.5.2)
-  * @dccpms_send_ack_vector - Send Ack Vector Feature (section 11.5)
   * @dccpms_pending - List of features being negotiated
   * @dccpms_conf -
   */
 struct dccp_minisock {
 	__u64			dccpms_sequence_window;
-	__u8			dccpms_send_ack_vector;
 	struct list_head	dccpms_pending;
 	struct list_head	dccpms_conf;
 };
diff --git a/net/dccp/dccp.h b/net/dccp/dccp.h
index e0759d098b9a..0bc4c9a02e19 100644
--- a/net/dccp/dccp.h
+++ b/net/dccp/dccp.h
@@ -98,7 +98,6 @@ extern int  sysctl_dccp_retries2;
 extern int  sysctl_dccp_feat_sequence_window;
 extern int  sysctl_dccp_feat_rx_ccid;
 extern int  sysctl_dccp_feat_tx_ccid;
-extern int  sysctl_dccp_feat_send_ack_vector;
 extern int  sysctl_dccp_tx_qlen;
 extern int  sysctl_dccp_sync_ratelimit;
 
@@ -434,7 +433,7 @@ static inline int dccp_ack_pending(const struct sock *sk)
 	const struct dccp_sock *dp = dccp_sk(sk);
 	return dp->dccps_timestamp_echo != 0 ||
 #ifdef CONFIG_IP_DCCP_ACKVEC
-	       (dccp_msk(sk)->dccpms_send_ack_vector &&
+	       (dp->dccps_hc_rx_ackvec != NULL &&
 		dccp_ackvec_pending(dp->dccps_hc_rx_ackvec)) ||
 #endif
 	       inet_csk_ack_scheduled(sk);
diff --git a/net/dccp/diag.c b/net/dccp/diag.c
index d1e100395efb..652a1b67f727 100644
--- a/net/dccp/diag.c
+++ b/net/dccp/diag.c
@@ -29,7 +29,7 @@ static void dccp_get_info(struct sock *sk, struct tcp_info *info)
 	info->tcpi_backoff	= icsk->icsk_backoff;
 	info->tcpi_pmtu		= icsk->icsk_pmtu_cookie;
 
-	if (dccp_msk(sk)->dccpms_send_ack_vector)
+	if (dp->dccps_hc_rx_ackvec != NULL)
 		info->tcpi_options |= TCPI_OPT_SACK;
 
 	ccid_hc_rx_get_info(dp->dccps_hc_rx_ccid, sk, info);
diff --git a/net/dccp/input.c b/net/dccp/input.c
index 0672b7e1763e..5eb443f656c1 100644
--- a/net/dccp/input.c
+++ b/net/dccp/input.c
@@ -163,7 +163,7 @@ static void dccp_event_ack_recv(struct sock *sk, struct sk_buff *skb)
 {
 	struct dccp_sock *dp = dccp_sk(sk);
 
-	if (dccp_msk(sk)->dccpms_send_ack_vector)
+	if (dp->dccps_hc_rx_ackvec != NULL)
 		dccp_ackvec_check_rcv_ackno(dp->dccps_hc_rx_ackvec, sk,
 					    DCCP_SKB_CB(skb)->dccpd_ack_seq);
 }
@@ -375,7 +375,7 @@ int dccp_rcv_established(struct sock *sk, struct sk_buff *skb,
 	if (DCCP_SKB_CB(skb)->dccpd_ack_seq != DCCP_PKT_WITHOUT_ACK_SEQ)
 		dccp_event_ack_recv(sk, skb);
 
-	if (dccp_msk(sk)->dccpms_send_ack_vector &&
+	if (dp->dccps_hc_rx_ackvec != NULL &&
 	    dccp_ackvec_add(dp->dccps_hc_rx_ackvec, sk,
 			    DCCP_SKB_CB(skb)->dccpd_seq,
 			    DCCP_ACKVEC_STATE_RECEIVED))
@@ -434,12 +434,6 @@ static int dccp_rcv_request_sent_state_process(struct sock *sk,
 			dp->dccps_syn_rtt = dccp_sample_rtt(sk, 10 * (tstamp -
 			    dp->dccps_options_received.dccpor_timestamp_echo));
 
-		if (dccp_msk(sk)->dccpms_send_ack_vector &&
-		    dccp_ackvec_add(dp->dccps_hc_rx_ackvec, sk,
-				    DCCP_SKB_CB(skb)->dccpd_seq,
-				    DCCP_ACKVEC_STATE_RECEIVED))
-			goto out_invalid_packet; /* FIXME: change error code */
-
 		/* Stop the REQUEST timer */
 		inet_csk_clear_xmit_timer(sk, ICSK_TIME_RETRANS);
 		WARN_ON(sk->sk_send_head == NULL);
@@ -637,7 +631,7 @@ int dccp_rcv_state_process(struct sock *sk, struct sk_buff *skb,
 		if (dcb->dccpd_ack_seq != DCCP_PKT_WITHOUT_ACK_SEQ)
 			dccp_event_ack_recv(sk, skb);
 
-		if (dccp_msk(sk)->dccpms_send_ack_vector &&
+		if (dp->dccps_hc_rx_ackvec != NULL &&
 		    dccp_ackvec_add(dp->dccps_hc_rx_ackvec, sk,
 				    DCCP_SKB_CB(skb)->dccpd_seq,
 				    DCCP_ACKVEC_STATE_RECEIVED))
diff --git a/net/dccp/minisocks.c b/net/dccp/minisocks.c
index 02b14812464a..6821ae33dd37 100644
--- a/net/dccp/minisocks.c
+++ b/net/dccp/minisocks.c
@@ -45,7 +45,6 @@ EXPORT_SYMBOL_GPL(dccp_death_row);
 void dccp_minisock_init(struct dccp_minisock *dmsk)
 {
 	dmsk->dccpms_sequence_window = sysctl_dccp_feat_sequence_window;
-	dmsk->dccpms_send_ack_vector = sysctl_dccp_feat_send_ack_vector;
 }
 
 void dccp_time_wait(struct sock *sk, int state, int timeo)
diff --git a/net/dccp/options.c b/net/dccp/options.c
index e9d674874b4e..7b1165c21f51 100644
--- a/net/dccp/options.c
+++ b/net/dccp/options.c
@@ -26,7 +26,6 @@
 int sysctl_dccp_feat_sequence_window = DCCPF_INITIAL_SEQUENCE_WINDOW;
 int sysctl_dccp_feat_rx_ccid	      = DCCPF_INITIAL_CCID;
 int sysctl_dccp_feat_tx_ccid	      = DCCPF_INITIAL_CCID;
-int sysctl_dccp_feat_send_ack_vector = DCCPF_INITIAL_SEND_ACK_VECTOR;
 
 u64 dccp_decode_value_var(const u8 *bf, const u8 len)
 {
@@ -145,8 +144,7 @@ int dccp_parse_options(struct sock *sk, struct dccp_request_sock *dreq,
 		case DCCPO_ACK_VECTOR_1:
 			if (dccp_packet_without_ack(skb))   /* RFC 4340, 11.4 */
 				break;
-
-			if (dccp_msk(sk)->dccpms_send_ack_vector &&
+			if (dp->dccps_hc_rx_ackvec != NULL &&
 			    dccp_ackvec_parse(sk, skb, &ackno, opt, value, len))
 				goto out_invalid_option;
 			break;
@@ -526,7 +524,6 @@ static void dccp_insert_option_padding(struct sk_buff *skb)
 int dccp_insert_options(struct sock *sk, struct sk_buff *skb)
 {
 	struct dccp_sock *dp = dccp_sk(sk);
-	struct dccp_minisock *dmsk = dccp_msk(sk);
 
 	DCCP_SKB_CB(skb)->dccpd_opt_len = 0;
 
@@ -547,7 +544,7 @@ int dccp_insert_options(struct sock *sk, struct sk_buff *skb)
 			if (dccp_insert_option_timestamp(sk, skb))
 				return -1;
 
-		} else if (dmsk->dccpms_send_ack_vector	&&
+		} else if (dp->dccps_hc_rx_ackvec != NULL &&
 			   dccp_ackvec_pending(dp->dccps_hc_rx_ackvec) &&
 			   dccp_insert_option_ackvec(sk, skb)) {
 				return -1;
diff --git a/net/dccp/proto.c b/net/dccp/proto.c
index 0941f8fe1675..d5c2bacb713c 100644
--- a/net/dccp/proto.c
+++ b/net/dccp/proto.c
@@ -201,7 +201,6 @@ EXPORT_SYMBOL_GPL(dccp_init_sock);
 void dccp_destroy_sock(struct sock *sk)
 {
 	struct dccp_sock *dp = dccp_sk(sk);
-	struct dccp_minisock *dmsk = dccp_msk(sk);
 
 	/*
 	 * DCCP doesn't use sk_write_queue, just sk_send_head
@@ -219,7 +218,7 @@ void dccp_destroy_sock(struct sock *sk)
 	kfree(dp->dccps_service_list);
 	dp->dccps_service_list = NULL;
 
-	if (dmsk->dccpms_send_ack_vector) {
+	if (dp->dccps_hc_rx_ackvec != NULL) {
 		dccp_ackvec_free(dp->dccps_hc_rx_ackvec);
 		dp->dccps_hc_rx_ackvec = NULL;
 	}
diff --git a/net/dccp/sysctl.c b/net/dccp/sysctl.c
index 587c12f915c1..018e210875e1 100644
--- a/net/dccp/sysctl.c
+++ b/net/dccp/sysctl.c
@@ -40,13 +40,6 @@ static struct ctl_table dccp_default_table[] = {
 		.mode		= 0644,
 		.proc_handler	= proc_dointvec,
 	},
-	{
-		.procname	= "send_ackvec",
-		.data		= &sysctl_dccp_feat_send_ack_vector,
-		.maxlen		= sizeof(sysctl_dccp_feat_send_ack_vector),
-		.mode		= 0644,
-		.proc_handler	= proc_dointvec,
-	},
 	{
 		.procname	= "request_retries",
 		.data		= &sysctl_dccp_request_retries,
-- 
cgit v1.2.3


From 361b73d5c34f59c3fd107bb9dbe7a1fbff2c2517 Mon Sep 17 00:00:00 2001
From: Lai Jiangshan <laijs@cn.fujitsu.com>
Date: Mon, 8 Dec 2008 10:58:08 +0800
Subject: ring_buffer: fix comments

Impact: comments cleanup

fix incorrect comments for enum ring_buffer_type

Signed-off-by: Lai Jiangshan <laijs@cn.fujitsu.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/ring_buffer.h | 10 ++++++----
 1 file changed, 6 insertions(+), 4 deletions(-)

(limited to 'include')

diff --git a/include/linux/ring_buffer.h b/include/linux/ring_buffer.h
index 1a350a847edd..d363467c8f13 100644
--- a/include/linux/ring_buffer.h
+++ b/include/linux/ring_buffer.h
@@ -28,17 +28,19 @@ struct ring_buffer_event {
  *				 size = 8 bytes
  *
  * @RINGBUF_TYPE_TIME_STAMP:	Sync time stamp with external clock
- *				 array[0] = tv_nsec
- *				 array[1] = tv_sec
+ *				 array[0]    = tv_nsec
+ *				 array[1..2] = tv_sec
  *				 size = 16 bytes
  *
  * @RINGBUF_TYPE_DATA:		Data record
  *				 If len is zero:
  *				  array[0] holds the actual length
- *				  array[1..(length+3)/4-1] holds data
+ *				  array[1..(length+3)/4] holds data
+ *				  size = 4 + 4 + length (bytes)
  *				 else
  *				  length = len << 2
- *				  array[0..(length+3)/4] holds data
+ *				  array[0..(length+3)/4-1] holds data
+ *				  size = 4 + length (bytes)
  */
 enum ring_buffer_type {
 	RINGBUF_TYPE_PADDING,
-- 
cgit v1.2.3


From 0b8f1efad30bd58f89961b82dfe68b9edf8fd2ac Mon Sep 17 00:00:00 2001
From: Yinghai Lu <yinghai@kernel.org>
Date: Fri, 5 Dec 2008 18:58:31 -0800
Subject: sparse irq_desc[] array: core kernel and x86 changes

Impact: new feature

Problem on distro kernels: irq_desc[NR_IRQS] takes megabytes of RAM with
NR_CPUS set to large values. The goal is to be able to scale up to much
larger NR_IRQS value without impacting the (important) common case.

To solve this, we generalize irq_desc[NR_IRQS] to an (optional) array of
irq_desc pointers.

When CONFIG_SPARSE_IRQ=y is used, we use kzalloc_node to get irq_desc,
this also makes the IRQ descriptors NUMA-local (to the site that calls
request_irq()).

This gets rid of the irq_cfg[] static array on x86 as well: irq_cfg now
uses desc->chip_data for x86 to store irq_cfg.

Signed-off-by: Yinghai Lu <yinghai@kernel.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/Kconfig                   |  10 ++
 arch/x86/include/asm/irq_vectors.h |   9 ++
 arch/x86/kernel/io_apic.c          | 301 +++++++++++++++++++++++--------------
 arch/x86/kernel/irq.c              |   3 +
 arch/x86/kernel/irq_32.c           |   2 +
 arch/x86/kernel/irq_64.c           |   2 +
 arch/x86/kernel/irqinit_32.c       |   1 -
 arch/x86/kernel/irqinit_64.c       |   1 -
 drivers/char/random.c              |  22 +--
 drivers/pci/intr_remapping.c       |  76 +++++++++-
 drivers/xen/events.c               |  12 +-
 fs/proc/stat.c                     |  17 ++-
 include/linux/interrupt.h          |   2 +
 include/linux/irq.h                |  54 ++++++-
 include/linux/irqnr.h              |  14 +-
 include/linux/kernel_stat.h        |  14 +-
 include/linux/random.h             |  51 +++++++
 init/main.c                        |  11 ++
 kernel/irq/autoprobe.c             |  15 ++
 kernel/irq/chip.c                  |   3 +-
 kernel/irq/handle.c                | 181 +++++++++++++++++++++-
 kernel/irq/proc.c                  |   6 +-
 kernel/irq/spurious.c              |   5 +
 23 files changed, 649 insertions(+), 163 deletions(-)

(limited to 'include')

diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index ac22bb7719f7..48ac688de3cd 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -238,6 +238,16 @@ config X86_HAS_BOOT_CPU_ID
 	def_bool y
 	depends on X86_VOYAGER
 
+config SPARSE_IRQ
+	bool "Support sparse irq numbering"
+	depends on PCI_MSI || HT_IRQ
+	default y
+	help
+	  This enables support for sparse irq, esp for msi/msi-x. You may need
+	  if you have lots of cards supports msi-x installed.
+
+	  If you don't know what to do here, say Y.
+
 config X86_FIND_SMP_CONFIG
 	def_bool y
 	depends on X86_MPPARSE || X86_VOYAGER
diff --git a/arch/x86/include/asm/irq_vectors.h b/arch/x86/include/asm/irq_vectors.h
index 0005adb0f941..bb6b69a6b125 100644
--- a/arch/x86/include/asm/irq_vectors.h
+++ b/arch/x86/include/asm/irq_vectors.h
@@ -102,11 +102,20 @@
 #define invalid_vm86_irq(irq)	((irq) < 3 || (irq) > 15)
 
 #if defined(CONFIG_X86_IO_APIC) && !defined(CONFIG_X86_VOYAGER)
+
+#ifndef CONFIG_SPARSE_IRQ
 # if NR_CPUS < MAX_IO_APICS
 #  define NR_IRQS (NR_VECTORS + (32 * NR_CPUS))
 # else
 #  define NR_IRQS (NR_VECTORS + (32 * MAX_IO_APICS))
 # endif
+#else
+# if (8 * NR_CPUS) > (32 * MAX_IO_APICS)
+#  define NR_IRQS (NR_VECTORS + (8 * NR_CPUS))
+# else
+#  define NR_IRQS (NR_VECTORS + (32 * MAX_IO_APICS))
+# endif
+#endif
 
 #elif defined(CONFIG_X86_VOYAGER)
 
diff --git a/arch/x86/kernel/io_apic.c b/arch/x86/kernel/io_apic.c
index 9043251210fb..9de17f5c1125 100644
--- a/arch/x86/kernel/io_apic.c
+++ b/arch/x86/kernel/io_apic.c
@@ -108,8 +108,33 @@ static int __init parse_noapic(char *str)
 early_param("noapic", parse_noapic);
 
 struct irq_pin_list;
+
+/*
+ * This is performance-critical, we want to do it O(1)
+ *
+ * the indexing order of this array favors 1:1 mappings
+ * between pins and IRQs.
+ */
+
+struct irq_pin_list {
+	int apic, pin;
+	struct irq_pin_list *next;
+};
+
+static struct irq_pin_list *get_one_free_irq_2_pin(int cpu)
+{
+	struct irq_pin_list *pin;
+	int node;
+
+	node = cpu_to_node(cpu);
+
+	pin = kzalloc_node(sizeof(*pin), GFP_ATOMIC, node);
+	printk(KERN_DEBUG "  alloc irq_2_pin on cpu %d node %d\n", cpu, node);
+
+	return pin;
+}
+
 struct irq_cfg {
-	unsigned int irq;
 	struct irq_pin_list *irq_2_pin;
 	cpumask_t domain;
 	cpumask_t old_domain;
@@ -119,83 +144,93 @@ struct irq_cfg {
 };
 
 /* irq_cfg is indexed by the sum of all RTEs in all I/O APICs. */
+#ifdef CONFIG_SPARSE_IRQ
+static struct irq_cfg irq_cfgx[] = {
+#else
 static struct irq_cfg irq_cfgx[NR_IRQS] = {
-	[0]  = { .irq =  0, .domain = CPU_MASK_ALL, .vector = IRQ0_VECTOR,  },
-	[1]  = { .irq =  1, .domain = CPU_MASK_ALL, .vector = IRQ1_VECTOR,  },
-	[2]  = { .irq =  2, .domain = CPU_MASK_ALL, .vector = IRQ2_VECTOR,  },
-	[3]  = { .irq =  3, .domain = CPU_MASK_ALL, .vector = IRQ3_VECTOR,  },
-	[4]  = { .irq =  4, .domain = CPU_MASK_ALL, .vector = IRQ4_VECTOR,  },
-	[5]  = { .irq =  5, .domain = CPU_MASK_ALL, .vector = IRQ5_VECTOR,  },
-	[6]  = { .irq =  6, .domain = CPU_MASK_ALL, .vector = IRQ6_VECTOR,  },
-	[7]  = { .irq =  7, .domain = CPU_MASK_ALL, .vector = IRQ7_VECTOR,  },
-	[8]  = { .irq =  8, .domain = CPU_MASK_ALL, .vector = IRQ8_VECTOR,  },
-	[9]  = { .irq =  9, .domain = CPU_MASK_ALL, .vector = IRQ9_VECTOR,  },
-	[10] = { .irq = 10, .domain = CPU_MASK_ALL, .vector = IRQ10_VECTOR, },
-	[11] = { .irq = 11, .domain = CPU_MASK_ALL, .vector = IRQ11_VECTOR, },
-	[12] = { .irq = 12, .domain = CPU_MASK_ALL, .vector = IRQ12_VECTOR, },
-	[13] = { .irq = 13, .domain = CPU_MASK_ALL, .vector = IRQ13_VECTOR, },
-	[14] = { .irq = 14, .domain = CPU_MASK_ALL, .vector = IRQ14_VECTOR, },
-	[15] = { .irq = 15, .domain = CPU_MASK_ALL, .vector = IRQ15_VECTOR, },
+#endif
+	[0]  = { .domain = CPU_MASK_ALL, .vector = IRQ0_VECTOR,  },
+	[1]  = { .domain = CPU_MASK_ALL, .vector = IRQ1_VECTOR,  },
+	[2]  = { .domain = CPU_MASK_ALL, .vector = IRQ2_VECTOR,  },
+	[3]  = { .domain = CPU_MASK_ALL, .vector = IRQ3_VECTOR,  },
+	[4]  = { .domain = CPU_MASK_ALL, .vector = IRQ4_VECTOR,  },
+	[5]  = { .domain = CPU_MASK_ALL, .vector = IRQ5_VECTOR,  },
+	[6]  = { .domain = CPU_MASK_ALL, .vector = IRQ6_VECTOR,  },
+	[7]  = { .domain = CPU_MASK_ALL, .vector = IRQ7_VECTOR,  },
+	[8]  = { .domain = CPU_MASK_ALL, .vector = IRQ8_VECTOR,  },
+	[9]  = { .domain = CPU_MASK_ALL, .vector = IRQ9_VECTOR,  },
+	[10] = { .domain = CPU_MASK_ALL, .vector = IRQ10_VECTOR, },
+	[11] = { .domain = CPU_MASK_ALL, .vector = IRQ11_VECTOR, },
+	[12] = { .domain = CPU_MASK_ALL, .vector = IRQ12_VECTOR, },
+	[13] = { .domain = CPU_MASK_ALL, .vector = IRQ13_VECTOR, },
+	[14] = { .domain = CPU_MASK_ALL, .vector = IRQ14_VECTOR, },
+	[15] = { .domain = CPU_MASK_ALL, .vector = IRQ15_VECTOR, },
 };
 
-#define for_each_irq_cfg(irq, cfg)		\
-	for (irq = 0, cfg = irq_cfgx; irq < nr_irqs; irq++, cfg++)
-
-static struct irq_cfg *irq_cfg(unsigned int irq)
+void __init arch_early_irq_init(void)
 {
-	return irq < nr_irqs ? irq_cfgx + irq : NULL;
-}
+	struct irq_cfg *cfg;
+	struct irq_desc *desc;
+	int count;
+	int i;
 
-static struct irq_cfg *irq_cfg_alloc(unsigned int irq)
-{
-	return irq_cfg(irq);
-}
+	cfg = irq_cfgx;
+	count = ARRAY_SIZE(irq_cfgx);
 
-/*
- * Rough estimation of how many shared IRQs there are, can be changed
- * anytime.
- */
-#define MAX_PLUS_SHARED_IRQS NR_IRQS
-#define PIN_MAP_SIZE (MAX_PLUS_SHARED_IRQS + NR_IRQS)
+	for (i = 0; i < count; i++) {
+		desc = irq_to_desc(i);
+		desc->chip_data = &cfg[i];
+	}
+}
 
-/*
- * This is performance-critical, we want to do it O(1)
- *
- * the indexing order of this array favors 1:1 mappings
- * between pins and IRQs.
- */
+#ifdef CONFIG_SPARSE_IRQ
+static struct irq_cfg *irq_cfg(unsigned int irq)
+{
+	struct irq_cfg *cfg = NULL;
+	struct irq_desc *desc;
 
-struct irq_pin_list {
-	int apic, pin;
-	struct irq_pin_list *next;
-};
+	desc = irq_to_desc(irq);
+	if (desc)
+		cfg = desc->chip_data;
 
-static struct irq_pin_list irq_2_pin_head[PIN_MAP_SIZE];
-static struct irq_pin_list *irq_2_pin_ptr;
+	return cfg;
+}
 
-static void __init irq_2_pin_init(void)
+static struct irq_cfg *get_one_free_irq_cfg(int cpu)
 {
-	struct irq_pin_list *pin = irq_2_pin_head;
-	int i;
+	struct irq_cfg *cfg;
+	int node;
+
+	node = cpu_to_node(cpu);
 
-	for (i = 1; i < PIN_MAP_SIZE; i++)
-		pin[i-1].next = &pin[i];
+	cfg = kzalloc_node(sizeof(*cfg), GFP_ATOMIC, node);
+	printk(KERN_DEBUG "  alloc irq_cfg on cpu %d node %d\n", cpu, node);
 
-	irq_2_pin_ptr = &pin[0];
+	return cfg;
 }
 
-static struct irq_pin_list *get_one_free_irq_2_pin(void)
+void arch_init_chip_data(struct irq_desc *desc, int cpu)
 {
-	struct irq_pin_list *pin = irq_2_pin_ptr;
+	struct irq_cfg *cfg;
 
-	if (!pin)
-		panic("can not get more irq_2_pin\n");
+	cfg = desc->chip_data;
+	if (!cfg) {
+		desc->chip_data = get_one_free_irq_cfg(cpu);
+		if (!desc->chip_data) {
+			printk(KERN_ERR "can not alloc irq_cfg\n");
+			BUG_ON(1);
+		}
+	}
+}
 
-	irq_2_pin_ptr = pin->next;
-	pin->next = NULL;
-	return pin;
+#else
+static struct irq_cfg *irq_cfg(unsigned int irq)
+{
+	return irq < nr_irqs ? irq_cfgx + irq : NULL;
 }
 
+#endif
+
 struct io_apic {
 	unsigned int index;
 	unsigned int unused[3];
@@ -397,16 +432,19 @@ static void set_ioapic_affinity_irq(unsigned int irq, cpumask_t mask)
  * shared ISA-space IRQs, so we have to support them. We are super
  * fast in the common case, and fast for shared ISA-space IRQs.
  */
-static void add_pin_to_irq(unsigned int irq, int apic, int pin)
+static void add_pin_to_irq_cpu(unsigned int irq, int cpu, int apic, int pin)
 {
-	struct irq_cfg *cfg;
 	struct irq_pin_list *entry;
+	struct irq_cfg *cfg = irq_cfg(irq);
 
-	/* first time to refer irq_cfg, so with new */
-	cfg = irq_cfg_alloc(irq);
 	entry = cfg->irq_2_pin;
 	if (!entry) {
-		entry = get_one_free_irq_2_pin();
+		entry = get_one_free_irq_2_pin(cpu);
+		if (!entry) {
+			printk(KERN_ERR "can not alloc irq_2_pin to add %d - %d\n",
+					apic, pin);
+			return;
+		}
 		cfg->irq_2_pin = entry;
 		entry->apic = apic;
 		entry->pin = pin;
@@ -421,7 +459,7 @@ static void add_pin_to_irq(unsigned int irq, int apic, int pin)
 		entry = entry->next;
 	}
 
-	entry->next = get_one_free_irq_2_pin();
+	entry->next = get_one_free_irq_2_pin(cpu);
 	entry = entry->next;
 	entry->apic = apic;
 	entry->pin = pin;
@@ -430,7 +468,7 @@ static void add_pin_to_irq(unsigned int irq, int apic, int pin)
 /*
  * Reroute an IRQ to a different pin.
  */
-static void __init replace_pin_at_irq(unsigned int irq,
+static void __init replace_pin_at_irq(unsigned int irq, int cpu,
 				      int oldapic, int oldpin,
 				      int newapic, int newpin)
 {
@@ -451,7 +489,7 @@ static void __init replace_pin_at_irq(unsigned int irq,
 
 	/* why? call replace before add? */
 	if (!replaced)
-		add_pin_to_irq(irq, newapic, newpin);
+		add_pin_to_irq_cpu(irq, cpu, newapic, newpin);
 }
 
 static inline void io_apic_modify_irq(unsigned int irq,
@@ -1162,9 +1200,13 @@ void __setup_vector_irq(int cpu)
 	/* This function must be called with vector_lock held */
 	int irq, vector;
 	struct irq_cfg *cfg;
+	struct irq_desc *desc;
 
 	/* Mark the inuse vectors */
-	for_each_irq_cfg(irq, cfg) {
+	for_each_irq_desc(irq, desc) {
+		if (!desc)
+			continue;
+		cfg = desc->chip_data;
 		if (!cpu_isset(cpu, cfg->domain))
 			continue;
 		vector = cfg->vector;
@@ -1356,6 +1398,8 @@ static void __init setup_IO_APIC_irqs(void)
 {
 	int apic, pin, idx, irq;
 	int notcon = 0;
+	struct irq_desc *desc;
+	int cpu = boot_cpu_id;
 
 	apic_printk(APIC_VERBOSE, KERN_DEBUG "init IO_APIC IRQs\n");
 
@@ -1387,7 +1431,12 @@ static void __init setup_IO_APIC_irqs(void)
 			if (multi_timer_check(apic, irq))
 				continue;
 #endif
-			add_pin_to_irq(irq, apic, pin);
+			desc = irq_to_desc_alloc_cpu(irq, cpu);
+			if (!desc) {
+				printk(KERN_INFO "can not get irq_desc for %d\n", irq);
+				continue;
+			}
+			add_pin_to_irq_cpu(irq, cpu, apic, pin);
 
 			setup_IO_APIC_irq(apic, pin, irq,
 					irq_trigger(idx), irq_polarity(idx));
@@ -1448,6 +1497,7 @@ __apicdebuginit(void) print_IO_APIC(void)
 	union IO_APIC_reg_03 reg_03;
 	unsigned long flags;
 	struct irq_cfg *cfg;
+	struct irq_desc *desc;
 	unsigned int irq;
 
 	if (apic_verbosity == APIC_QUIET)
@@ -1537,8 +1587,13 @@ __apicdebuginit(void) print_IO_APIC(void)
 	}
 	}
 	printk(KERN_DEBUG "IRQ to pin mappings:\n");
-	for_each_irq_cfg(irq, cfg) {
-		struct irq_pin_list *entry = cfg->irq_2_pin;
+	for_each_irq_desc(irq, desc) {
+		struct irq_pin_list *entry;
+
+		if (!desc)
+			continue;
+		cfg = desc->chip_data;
+		entry = cfg->irq_2_pin;
 		if (!entry)
 			continue;
 		printk(KERN_DEBUG "IRQ%d ", irq);
@@ -2022,6 +2077,7 @@ static unsigned int startup_ioapic_irq(unsigned int irq)
 {
 	int was_pending = 0;
 	unsigned long flags;
+	struct irq_cfg *cfg;
 
 	spin_lock_irqsave(&ioapic_lock, flags);
 	if (irq < 16) {
@@ -2029,6 +2085,7 @@ static unsigned int startup_ioapic_irq(unsigned int irq)
 		if (i8259A_irq_pending(irq))
 			was_pending = 1;
 	}
+	cfg = irq_cfg(irq);
 	__unmask_IO_APIC_irq(irq);
 	spin_unlock_irqrestore(&ioapic_lock, flags);
 
@@ -2178,6 +2235,9 @@ static void ir_irq_migration(struct work_struct *work)
 	struct irq_desc *desc;
 
 	for_each_irq_desc(irq, desc) {
+		if (!desc)
+			continue;
+
 		if (desc->status & IRQ_MOVE_PENDING) {
 			unsigned long flags;
 
@@ -2229,6 +2289,9 @@ asmlinkage void smp_irq_move_cleanup_interrupt(void)
 		struct irq_cfg *cfg;
 		irq = __get_cpu_var(vector_irq)[vector];
 
+		if (irq == -1)
+			continue;
+
 		desc = irq_to_desc(irq);
 		if (!desc)
 			continue;
@@ -2430,8 +2493,12 @@ static inline void init_IO_APIC_traps(void)
 	 * Also, we've got to be careful not to trash gate
 	 * 0x80, because int 0x80 is hm, kind of importantish. ;)
 	 */
-	for_each_irq_cfg(irq, cfg) {
-		if (IO_APIC_IRQ(irq) && !cfg->vector) {
+	for_each_irq_desc(irq, desc) {
+		if (!desc)
+			continue;
+
+		cfg = desc->chip_data;
+		if (IO_APIC_IRQ(irq) && cfg && !cfg->vector) {
 			/*
 			 * Hmm.. We don't have an entry for this,
 			 * so default to an old-fashioned 8259
@@ -2439,11 +2506,9 @@ static inline void init_IO_APIC_traps(void)
 			 */
 			if (irq < 16)
 				make_8259A_irq(irq);
-			else {
-				desc = irq_to_desc(irq);
+			else
 				/* Strange. Oh, well.. */
 				desc->chip = &no_irq_chip;
-			}
 		}
 	}
 }
@@ -2654,7 +2719,7 @@ static inline void __init check_timer(void)
 		 * Ok, does IRQ0 through the IOAPIC work?
 		 */
 		if (no_pin1) {
-			add_pin_to_irq(0, apic1, pin1);
+			add_pin_to_irq_cpu(0, boot_cpu_id, apic1, pin1);
 			setup_timer_IRQ0_pin(apic1, pin1, cfg->vector);
 		}
 		unmask_IO_APIC_irq(0);
@@ -2683,7 +2748,7 @@ static inline void __init check_timer(void)
 		/*
 		 * legacy devices should be connected to IO APIC #0
 		 */
-		replace_pin_at_irq(0, apic1, pin1, apic2, pin2);
+		replace_pin_at_irq(0, boot_cpu_id, apic1, pin1, apic2, pin2);
 		setup_timer_IRQ0_pin(apic2, pin2, cfg->vector);
 		unmask_IO_APIC_irq(0);
 		enable_8259A_irq(0);
@@ -2902,21 +2967,25 @@ unsigned int create_irq_nr(unsigned int irq_want)
 	unsigned int irq;
 	unsigned int new;
 	unsigned long flags;
-	struct irq_cfg *cfg_new;
-
-	irq_want = nr_irqs - 1;
+	struct irq_cfg *cfg_new = NULL;
+	int cpu = boot_cpu_id;
+	struct irq_desc *desc_new = NULL;
 
 	irq = 0;
 	spin_lock_irqsave(&vector_lock, flags);
 	for (new = irq_want; new > 0; new--) {
 		if (platform_legacy_irq(new))
 			continue;
-		cfg_new = irq_cfg(new);
-		if (cfg_new && cfg_new->vector != 0)
+
+		desc_new = irq_to_desc_alloc_cpu(new, cpu);
+		if (!desc_new) {
+			printk(KERN_INFO "can not get irq_desc for %d\n", new);
+			continue;
+		}
+		cfg_new = desc_new->chip_data;
+
+		if (cfg_new->vector != 0)
 			continue;
-		/* check if need to create one */
-		if (!cfg_new)
-			cfg_new = irq_cfg_alloc(new);
 		if (__assign_irq_vector(new, TARGET_CPUS) == 0)
 			irq = new;
 		break;
@@ -2925,6 +2994,9 @@ unsigned int create_irq_nr(unsigned int irq_want)
 
 	if (irq > 0) {
 		dynamic_irq_init(irq);
+		/* restore it, in case dynamic_irq_init clear it */
+		if (desc_new)
+			desc_new->chip_data = cfg_new;
 	}
 	return irq;
 }
@@ -2944,8 +3016,16 @@ int create_irq(void)
 void destroy_irq(unsigned int irq)
 {
 	unsigned long flags;
+	struct irq_cfg *cfg;
+	struct irq_desc *desc;
 
+	/* store it, in case dynamic_irq_cleanup clear it */
+	desc = irq_to_desc(irq);
+	cfg = desc->chip_data;
 	dynamic_irq_cleanup(irq);
+	/* connect back irq_cfg */
+	if (desc)
+		desc->chip_data = cfg;
 
 #ifdef CONFIG_INTR_REMAP
 	free_irte(irq);
@@ -3195,26 +3275,13 @@ static int setup_msi_irq(struct pci_dev *dev, struct msi_desc *desc, int irq)
 	return 0;
 }
 
-static unsigned int build_irq_for_pci_dev(struct pci_dev *dev)
-{
-	unsigned int irq;
-
-	irq = dev->bus->number;
-	irq <<= 8;
-	irq |= dev->devfn;
-	irq <<= 12;
-
-	return irq;
-}
-
-int arch_setup_msi_irq(struct pci_dev *dev, struct msi_desc *desc)
+int arch_setup_msi_irq(struct pci_dev *dev, struct msi_desc *msidesc)
 {
 	unsigned int irq;
 	int ret;
 	unsigned int irq_want;
 
-	irq_want = build_irq_for_pci_dev(dev) + 0x100;
-
+	irq_want = nr_irqs - 1;
 	irq = create_irq_nr(irq_want);
 	if (irq == 0)
 		return -1;
@@ -3228,7 +3295,7 @@ int arch_setup_msi_irq(struct pci_dev *dev, struct msi_desc *desc)
 		goto error;
 no_ir:
 #endif
-	ret = setup_msi_irq(dev, desc, irq);
+	ret = setup_msi_irq(dev, msidesc, irq);
 	if (ret < 0) {
 		destroy_irq(irq);
 		return ret;
@@ -3246,7 +3313,7 @@ int arch_setup_msi_irqs(struct pci_dev *dev, int nvec, int type)
 {
 	unsigned int irq;
 	int ret, sub_handle;
-	struct msi_desc *desc;
+	struct msi_desc *msidesc;
 	unsigned int irq_want;
 
 #ifdef CONFIG_INTR_REMAP
@@ -3254,10 +3321,11 @@ int arch_setup_msi_irqs(struct pci_dev *dev, int nvec, int type)
 	int index = 0;
 #endif
 
-	irq_want = build_irq_for_pci_dev(dev) + 0x100;
+	irq_want = nr_irqs - 1;
 	sub_handle = 0;
-	list_for_each_entry(desc, &dev->msi_list, list) {
-		irq = create_irq_nr(irq_want--);
+	list_for_each_entry(msidesc, &dev->msi_list, list) {
+		irq = create_irq_nr(irq_want);
+		irq_want--;
 		if (irq == 0)
 			return -1;
 #ifdef CONFIG_INTR_REMAP
@@ -3289,7 +3357,7 @@ int arch_setup_msi_irqs(struct pci_dev *dev, int nvec, int type)
 		}
 no_ir:
 #endif
-		ret = setup_msi_irq(dev, desc, irq);
+		ret = setup_msi_irq(dev, msidesc, irq);
 		if (ret < 0)
 			goto error;
 		sub_handle++;
@@ -3707,17 +3775,29 @@ int __init io_apic_get_version(int ioapic)
 
 int io_apic_set_pci_routing (int ioapic, int pin, int irq, int triggering, int polarity)
 {
+	struct irq_desc *desc;
+	struct irq_cfg *cfg;
+	int cpu = boot_cpu_id;
+
 	if (!IO_APIC_IRQ(irq)) {
 		apic_printk(APIC_QUIET,KERN_ERR "IOAPIC[%d]: Invalid reference to IRQ 0\n",
 			ioapic);
 		return -EINVAL;
 	}
 
+	desc = irq_to_desc_alloc_cpu(irq, cpu);
+	if (!desc) {
+		printk(KERN_INFO "can not get irq_desc %d\n", irq);
+		return 0;
+	}
+
 	/*
 	 * IRQs < 16 are already in the irq_2_pin[] map
 	 */
-	if (irq >= 16)
-		add_pin_to_irq(irq, ioapic, pin);
+	if (irq >= 16) {
+		cfg = desc->chip_data;
+		add_pin_to_irq_cpu(irq, cpu, ioapic, pin);
+	}
 
 	setup_IO_APIC_irq(ioapic, pin, irq, triggering, polarity);
 
@@ -3773,7 +3853,8 @@ void __init setup_ioapic_dest(void)
 			 * when you have too many devices, because at that time only boot
 			 * cpu is online.
 			 */
-			cfg = irq_cfg(irq);
+			desc = irq_to_desc(irq);
+			cfg = desc->chip_data;
 			if (!cfg->vector) {
 				setup_IO_APIC_irq(ioapic, pin, irq,
 						  irq_trigger(irq_entry),
@@ -3785,7 +3866,6 @@ void __init setup_ioapic_dest(void)
 			/*
 			 * Honour affinities which have been set in early boot
 			 */
-			desc = irq_to_desc(irq);
 			if (desc->status &
 			    (IRQ_NO_BALANCING | IRQ_AFFINITY_SET))
 				mask = desc->affinity;
@@ -3846,7 +3926,6 @@ void __init ioapic_init_mappings(void)
 	struct resource *ioapic_res;
 	int i;
 
-	irq_2_pin_init();
 	ioapic_res = ioapic_setup_resources();
 	for (i = 0; i < nr_ioapics; i++) {
 		if (smp_found_config) {
diff --git a/arch/x86/kernel/irq.c b/arch/x86/kernel/irq.c
index d1d4dc52f649..3f1d9d18df67 100644
--- a/arch/x86/kernel/irq.c
+++ b/arch/x86/kernel/irq.c
@@ -118,6 +118,9 @@ int show_interrupts(struct seq_file *p, void *v)
 	}
 
 	desc = irq_to_desc(i);
+	if (!desc)
+		return 0;
+
 	spin_lock_irqsave(&desc->lock, flags);
 #ifndef CONFIG_SMP
 	any_count = kstat_irqs(i);
diff --git a/arch/x86/kernel/irq_32.c b/arch/x86/kernel/irq_32.c
index a51382672de0..119fc9c8ff7f 100644
--- a/arch/x86/kernel/irq_32.c
+++ b/arch/x86/kernel/irq_32.c
@@ -242,6 +242,8 @@ void fixup_irqs(cpumask_t map)
 	for_each_irq_desc(irq, desc) {
 		cpumask_t mask;
 
+		if (!desc)
+			continue;
 		if (irq == 2)
 			continue;
 
diff --git a/arch/x86/kernel/irq_64.c b/arch/x86/kernel/irq_64.c
index 60eb84eb77a0..900009c70591 100644
--- a/arch/x86/kernel/irq_64.c
+++ b/arch/x86/kernel/irq_64.c
@@ -94,6 +94,8 @@ void fixup_irqs(cpumask_t map)
 		int break_affinity = 0;
 		int set_affinity = 1;
 
+		if (!desc)
+			continue;
 		if (irq == 2)
 			continue;
 
diff --git a/arch/x86/kernel/irqinit_32.c b/arch/x86/kernel/irqinit_32.c
index 845aa9803e80..5a5651b7f9e6 100644
--- a/arch/x86/kernel/irqinit_32.c
+++ b/arch/x86/kernel/irqinit_32.c
@@ -69,7 +69,6 @@ void __init init_ISA_irqs (void)
 	 * 16 old-style INTA-cycle interrupts:
 	 */
 	for (i = 0; i < 16; i++) {
-		/* first time call this irq_desc */
 		struct irq_desc *desc = irq_to_desc(i);
 
 		desc->status = IRQ_DISABLED;
diff --git a/arch/x86/kernel/irqinit_64.c b/arch/x86/kernel/irqinit_64.c
index ff0235391285..cd9f42d028d9 100644
--- a/arch/x86/kernel/irqinit_64.c
+++ b/arch/x86/kernel/irqinit_64.c
@@ -143,7 +143,6 @@ void __init init_ISA_irqs(void)
 	init_8259A(0);
 
 	for (i = 0; i < 16; i++) {
-		/* first time call this irq_desc */
 		struct irq_desc *desc = irq_to_desc(i);
 
 		desc->status = IRQ_DISABLED;
diff --git a/drivers/char/random.c b/drivers/char/random.c
index 675076f5fca8..d26891bfcd41 100644
--- a/drivers/char/random.c
+++ b/drivers/char/random.c
@@ -558,23 +558,9 @@ struct timer_rand_state {
 	unsigned dont_count_entropy:1;
 };
 
-static struct timer_rand_state *irq_timer_state[NR_IRQS];
-
-static struct timer_rand_state *get_timer_rand_state(unsigned int irq)
-{
-	if (irq >= nr_irqs)
-		return NULL;
-
-	return irq_timer_state[irq];
-}
-
-static void set_timer_rand_state(unsigned int irq, struct timer_rand_state *state)
-{
-	if (irq >= nr_irqs)
-		return;
-
-	irq_timer_state[irq] = state;
-}
+#ifndef CONFIG_SPARSE_IRQ
+struct timer_rand_state *irq_timer_state[NR_IRQS];
+#endif
 
 static struct timer_rand_state input_timer_state;
 
@@ -933,8 +919,10 @@ void rand_initialize_irq(int irq)
 {
 	struct timer_rand_state *state;
 
+#ifndef CONFIG_SPARSE_IRQ
 	if (irq >= nr_irqs)
 		return;
+#endif
 
 	state = get_timer_rand_state(irq);
 
diff --git a/drivers/pci/intr_remapping.c b/drivers/pci/intr_remapping.c
index 2de5a3238c94..c9958ec5e25e 100644
--- a/drivers/pci/intr_remapping.c
+++ b/drivers/pci/intr_remapping.c
@@ -19,17 +19,75 @@ struct irq_2_iommu {
 	u8  irte_mask;
 };
 
-static struct irq_2_iommu irq_2_iommuX[NR_IRQS];
+#ifdef CONFIG_SPARSE_IRQ
+static struct irq_2_iommu *get_one_free_irq_2_iommu(int cpu)
+{
+	struct irq_2_iommu *iommu;
+	int node;
+
+	node = cpu_to_node(cpu);
+
+	iommu = kzalloc_node(sizeof(*iommu), GFP_ATOMIC, node);
+	printk(KERN_DEBUG "alloc irq_2_iommu on cpu %d node %d\n", cpu, node);
+
+	return iommu;
+}
 
 static struct irq_2_iommu *irq_2_iommu(unsigned int irq)
 {
-	return (irq < nr_irqs) ? irq_2_iommuX + irq : NULL;
+	struct irq_desc *desc;
+
+	desc = irq_to_desc(irq);
+
+	if (WARN_ON_ONCE(!desc))
+		return NULL;
+
+	return desc->irq_2_iommu;
+}
+
+static struct irq_2_iommu *irq_2_iommu_alloc_cpu(unsigned int irq, int cpu)
+{
+	struct irq_desc *desc;
+	struct irq_2_iommu *irq_iommu;
+
+	/*
+	 * alloc irq desc if not allocated already.
+	 */
+	desc = irq_to_desc_alloc_cpu(irq, cpu);
+	if (!desc) {
+		printk(KERN_INFO "can not get irq_desc for %d\n", irq);
+		return NULL;
+	}
+
+	irq_iommu = desc->irq_2_iommu;
+
+	if (!irq_iommu)
+		desc->irq_2_iommu = get_one_free_irq_2_iommu(cpu);
+
+	return desc->irq_2_iommu;
 }
 
+static struct irq_2_iommu *irq_2_iommu_alloc(unsigned int irq)
+{
+	return irq_2_iommu_alloc_cpu(irq, boot_cpu_id);
+}
+
+#else /* !CONFIG_SPARSE_IRQ */
+
+static struct irq_2_iommu irq_2_iommuX[NR_IRQS];
+
+static struct irq_2_iommu *irq_2_iommu(unsigned int irq)
+{
+	if (irq < nr_irqs)
+		return &irq_2_iommuX[irq];
+
+	return NULL;
+}
 static struct irq_2_iommu *irq_2_iommu_alloc(unsigned int irq)
 {
 	return irq_2_iommu(irq);
 }
+#endif
 
 static DEFINE_SPINLOCK(irq_2_ir_lock);
 
@@ -86,9 +144,11 @@ int alloc_irte(struct intel_iommu *iommu, int irq, u16 count)
 	if (!count)
 		return -1;
 
+#ifndef CONFIG_SPARSE_IRQ
 	/* protect irq_2_iommu_alloc later */
 	if (irq >= nr_irqs)
 		return -1;
+#endif
 
 	/*
 	 * start the IRTE search from index 0.
@@ -130,6 +190,12 @@ int alloc_irte(struct intel_iommu *iommu, int irq, u16 count)
 		table->base[i].present = 1;
 
 	irq_iommu = irq_2_iommu_alloc(irq);
+	if (!irq_iommu) {
+		spin_unlock(&irq_2_ir_lock);
+		printk(KERN_ERR "can't allocate irq_2_iommu\n");
+		return -1;
+	}
+
 	irq_iommu->iommu = iommu;
 	irq_iommu->irte_index =  index;
 	irq_iommu->sub_handle = 0;
@@ -177,6 +243,12 @@ int set_irte_irq(int irq, struct intel_iommu *iommu, u16 index, u16 subhandle)
 
 	irq_iommu = irq_2_iommu_alloc(irq);
 
+	if (!irq_iommu) {
+		spin_unlock(&irq_2_ir_lock);
+		printk(KERN_ERR "can't allocate irq_2_iommu\n");
+		return -1;
+	}
+
 	irq_iommu->iommu = iommu;
 	irq_iommu->irte_index = index;
 	irq_iommu->sub_handle = subhandle;
diff --git a/drivers/xen/events.c b/drivers/xen/events.c
index 1e3b934a4cf7..2924faa7f6c4 100644
--- a/drivers/xen/events.c
+++ b/drivers/xen/events.c
@@ -141,8 +141,12 @@ static void init_evtchn_cpu_bindings(void)
 	int i;
 
 	/* By default all event channels notify CPU#0. */
-	for_each_irq_desc(i, desc)
+	for_each_irq_desc(i, desc) {
+		if (!desc)
+			continue;
+
 		desc->affinity = cpumask_of_cpu(0);
+	}
 #endif
 
 	memset(cpu_evtchn, 0, sizeof(cpu_evtchn));
@@ -231,7 +235,7 @@ static int find_unbound_irq(void)
 	int irq;
 
 	/* Only allocate from dynirq range */
-	for_each_irq_nr(irq)
+	for (irq = 0; irq < nr_irqs; irq++)
 		if (irq_bindcount[irq] == 0)
 			break;
 
@@ -792,7 +796,7 @@ void xen_irq_resume(void)
 		mask_evtchn(evtchn);
 
 	/* No IRQ <-> event-channel mappings. */
-	for_each_irq_nr(irq)
+	for (irq = 0; irq < nr_irqs; irq++)
 		irq_info[irq].evtchn = 0; /* zap event-channel binding */
 
 	for (evtchn = 0; evtchn < NR_EVENT_CHANNELS; evtchn++)
@@ -824,7 +828,7 @@ void __init xen_init_IRQ(void)
 		mask_evtchn(i);
 
 	/* Dynamic IRQ space is currently unbound. Zero the refcnts. */
-	for_each_irq_nr(i)
+	for (i = 0; i < nr_irqs; i++)
 		irq_bindcount[i] = 0;
 
 	irq_ctx_init(smp_processor_id());
diff --git a/fs/proc/stat.c b/fs/proc/stat.c
index 81904f07679d..a13431ab7c65 100644
--- a/fs/proc/stat.c
+++ b/fs/proc/stat.c
@@ -27,6 +27,7 @@ static int show_stat(struct seq_file *p, void *v)
 	u64 sum = 0;
 	struct timespec boottime;
 	unsigned int per_irq_sum;
+	struct irq_desc *desc;
 
 	user = nice = system = idle = iowait =
 		irq = softirq = steal = cputime64_zero;
@@ -44,10 +45,11 @@ static int show_stat(struct seq_file *p, void *v)
 		softirq = cputime64_add(softirq, kstat_cpu(i).cpustat.softirq);
 		steal = cputime64_add(steal, kstat_cpu(i).cpustat.steal);
 		guest = cputime64_add(guest, kstat_cpu(i).cpustat.guest);
-
-		for_each_irq_nr(j)
+		for_each_irq_desc(j, desc) {
+			if (!desc)
+				continue;
 			sum += kstat_irqs_cpu(j, i);
-
+		}
 		sum += arch_irq_stat_cpu(i);
 	}
 	sum += arch_irq_stat();
@@ -90,11 +92,14 @@ static int show_stat(struct seq_file *p, void *v)
 	seq_printf(p, "intr %llu", (unsigned long long)sum);
 
 	/* sum again ? it could be updated? */
-	for_each_irq_nr(j) {
+	for (j = 0; j < NR_IRQS; j++) {
+		desc = irq_to_desc(j);
 		per_irq_sum = 0;
 
-		for_each_possible_cpu(i)
-			per_irq_sum += kstat_irqs_cpu(j, i);
+		if (desc) {
+			for_each_possible_cpu(i)
+				per_irq_sum += kstat_irqs_cpu(j, i);
+		}
 
 		seq_printf(p, " %u", per_irq_sum);
 	}
diff --git a/include/linux/interrupt.h b/include/linux/interrupt.h
index f58a0cf8929a..79e915e7e8a5 100644
--- a/include/linux/interrupt.h
+++ b/include/linux/interrupt.h
@@ -18,6 +18,8 @@
 #include <asm/ptrace.h>
 #include <asm/system.h>
 
+extern int nr_irqs;
+
 /*
  * These correspond to the IORESOURCE_IRQ_* defines in
  * linux/ioport.h to select the interrupt line behaviour.  When
diff --git a/include/linux/irq.h b/include/linux/irq.h
index 3dddfa703ebd..63b00439d4d2 100644
--- a/include/linux/irq.h
+++ b/include/linux/irq.h
@@ -129,6 +129,8 @@ struct irq_chip {
 	const char	*typename;
 };
 
+struct timer_rand_state;
+struct irq_2_iommu;
 /**
  * struct irq_desc - interrupt descriptor
  * @irq:		interrupt number for this descriptor
@@ -154,6 +156,13 @@ struct irq_chip {
  */
 struct irq_desc {
 	unsigned int		irq;
+#ifdef CONFIG_SPARSE_IRQ
+	struct timer_rand_state *timer_rand_state;
+	unsigned int            *kstat_irqs;
+# ifdef CONFIG_INTR_REMAP
+	struct irq_2_iommu      *irq_2_iommu;
+# endif
+#endif
 	irq_flow_handler_t	handle_irq;
 	struct irq_chip		*chip;
 	struct msi_desc		*msi_desc;
@@ -181,14 +190,52 @@ struct irq_desc {
 	const char		*name;
 } ____cacheline_internodealigned_in_smp;
 
+extern void early_irq_init(void);
+extern void arch_early_irq_init(void);
+extern void arch_init_chip_data(struct irq_desc *desc, int cpu);
+extern void arch_init_copy_chip_data(struct irq_desc *old_desc,
+					struct irq_desc *desc, int cpu);
+extern void arch_free_chip_data(struct irq_desc *old_desc, struct irq_desc *desc);
+
+#ifndef CONFIG_SPARSE_IRQ
 
 extern struct irq_desc irq_desc[NR_IRQS];
 
 static inline struct irq_desc *irq_to_desc(unsigned int irq)
 {
-	return (irq < nr_irqs) ? irq_desc + irq : NULL;
+	return (irq < NR_IRQS) ? irq_desc + irq : NULL;
+}
+static inline struct irq_desc *irq_to_desc_alloc_cpu(unsigned int irq, int cpu)
+{
+	return irq_to_desc(irq);
 }
 
+#ifdef CONFIG_GENERIC_HARDIRQS
+# define for_each_irq_desc(irq, desc)		\
+	for (irq = 0, desc = irq_desc; irq < nr_irqs; irq++, desc++)
+# define for_each_irq_desc_reverse(irq, desc)                          \
+	for (irq = nr_irqs - 1, desc = irq_desc + (nr_irqs - 1);        \
+	    irq >= 0; irq--, desc--)
+#endif
+
+#else
+
+extern struct irq_desc *irq_to_desc(unsigned int irq);
+extern struct irq_desc *irq_to_desc_alloc_cpu(unsigned int irq, int cpu);
+extern struct irq_desc *move_irq_desc(struct irq_desc *old_desc, int cpu);
+
+# define for_each_irq_desc(irq, desc)		\
+	for (irq = 0, desc = irq_to_desc(irq); irq < nr_irqs; irq++, desc = irq_to_desc(irq))
+# define for_each_irq_desc_reverse(irq, desc)                          \
+	for (irq = nr_irqs - 1, desc = irq_to_desc(irq); irq >= 0; irq--, desc = irq_to_desc(irq))
+
+#define kstat_irqs_this_cpu(DESC) \
+	((DESC)->kstat_irqs[smp_processor_id()])
+#define kstat_incr_irqs_this_cpu(irqno, DESC) \
+	((DESC)->kstat_irqs[smp_processor_id()]++)
+
+#endif
+
 /*
  * Migration helpers for obsolete names, they will go away:
  */
@@ -380,6 +427,11 @@ extern int set_irq_msi(unsigned int irq, struct msi_desc *entry);
 #define get_irq_data(irq)	(irq_to_desc(irq)->handler_data)
 #define get_irq_msi(irq)	(irq_to_desc(irq)->msi_desc)
 
+#define get_irq_desc_chip(desc)		((desc)->chip)
+#define get_irq_desc_chip_data(desc)	((desc)->chip_data)
+#define get_irq_desc_data(desc)		((desc)->handler_data)
+#define get_irq_desc_msi(desc)		((desc)->msi_desc)
+
 #endif /* CONFIG_GENERIC_HARDIRQS */
 
 #endif /* !CONFIG_S390 */
diff --git a/include/linux/irqnr.h b/include/linux/irqnr.h
index 452c280c8115..7a299e989f8b 100644
--- a/include/linux/irqnr.h
+++ b/include/linux/irqnr.h
@@ -7,18 +7,10 @@
 
 # define for_each_irq_desc(irq, desc)		\
 	for (irq = 0; irq < nr_irqs; irq++)
-#else
-extern int nr_irqs;
 
-# define for_each_irq_desc(irq, desc)		\
-	for (irq = 0, desc = irq_desc; irq < nr_irqs; irq++, desc++)
-
-# define for_each_irq_desc_reverse(irq, desc)				\
-	for (irq = nr_irqs - 1, desc = irq_desc + (nr_irqs - 1);	\
-	     irq >= 0; irq--, desc--)
+static inline early_sparse_irq_init(void)
+{
+}
 #endif
 
-#define for_each_irq_nr(irq)			\
-	for (irq = 0; irq < nr_irqs; irq++)
-
 #endif
diff --git a/include/linux/kernel_stat.h b/include/linux/kernel_stat.h
index 4a145caeee07..4ee4b3d2316f 100644
--- a/include/linux/kernel_stat.h
+++ b/include/linux/kernel_stat.h
@@ -28,7 +28,9 @@ struct cpu_usage_stat {
 
 struct kernel_stat {
 	struct cpu_usage_stat	cpustat;
-	unsigned int irqs[NR_IRQS];
+#ifndef CONFIG_SPARSE_IRQ
+       unsigned int irqs[NR_IRQS];
+#endif
 };
 
 DECLARE_PER_CPU(struct kernel_stat, kstat);
@@ -39,6 +41,10 @@ DECLARE_PER_CPU(struct kernel_stat, kstat);
 
 extern unsigned long long nr_context_switches(void);
 
+#ifndef CONFIG_SPARSE_IRQ
+#define kstat_irqs_this_cpu(irq) \
+	(kstat_this_cpu.irqs[irq])
+
 struct irq_desc;
 
 static inline void kstat_incr_irqs_this_cpu(unsigned int irq,
@@ -46,11 +52,17 @@ static inline void kstat_incr_irqs_this_cpu(unsigned int irq,
 {
 	kstat_this_cpu.irqs[irq]++;
 }
+#endif
+
 
+#ifndef CONFIG_SPARSE_IRQ
 static inline unsigned int kstat_irqs_cpu(unsigned int irq, int cpu)
 {
        return kstat_cpu(cpu).irqs[irq];
 }
+#else
+extern unsigned int kstat_irqs_cpu(unsigned int irq, int cpu);
+#endif
 
 /*
  * Number of interrupts per specific IRQ source, since bootup
diff --git a/include/linux/random.h b/include/linux/random.h
index 36f125c0c603..ad9daa2374d5 100644
--- a/include/linux/random.h
+++ b/include/linux/random.h
@@ -44,6 +44,57 @@ struct rand_pool_info {
 
 extern void rand_initialize_irq(int irq);
 
+struct timer_rand_state;
+#ifndef CONFIG_SPARSE_IRQ
+
+extern struct timer_rand_state *irq_timer_state[];
+
+extern int nr_irqs;
+static inline struct timer_rand_state *get_timer_rand_state(unsigned int irq)
+{
+	if (irq >= nr_irqs)
+		return NULL;
+
+	return irq_timer_state[irq];
+}
+
+static inline void set_timer_rand_state(unsigned int irq, struct timer_rand_state *state)
+{
+	if (irq >= nr_irqs)
+		return;
+
+	irq_timer_state[irq] = state;
+}
+
+#else
+
+#include <linux/irq.h>
+static inline struct timer_rand_state *get_timer_rand_state(unsigned int irq)
+{
+	struct irq_desc *desc;
+
+	desc = irq_to_desc(irq);
+
+	if (!desc)
+		return NULL;
+
+	return desc->timer_rand_state;
+}
+
+static inline void set_timer_rand_state(unsigned int irq, struct timer_rand_state *state)
+{
+	struct irq_desc *desc;
+
+	desc = irq_to_desc(irq);
+
+	if (!desc)
+		return;
+
+	desc->timer_rand_state = state;
+}
+#endif
+
+
 extern void add_input_randomness(unsigned int type, unsigned int code,
 				 unsigned int value);
 extern void add_interrupt_randomness(int irq);
diff --git a/init/main.c b/init/main.c
index 7e117a231af1..c1f999a3cf31 100644
--- a/init/main.c
+++ b/init/main.c
@@ -539,6 +539,15 @@ void __init __weak thread_info_cache_init(void)
 {
 }
 
+void __init __weak arch_early_irq_init(void)
+{
+}
+
+void __init __weak early_irq_init(void)
+{
+	arch_early_irq_init();
+}
+
 asmlinkage void __init start_kernel(void)
 {
 	char * command_line;
@@ -603,6 +612,8 @@ asmlinkage void __init start_kernel(void)
 	sort_main_extable();
 	trap_init();
 	rcu_init();
+	/* init some links before init_ISA_irqs() */
+	early_irq_init();
 	init_IRQ();
 	pidhash_init();
 	init_timers();
diff --git a/kernel/irq/autoprobe.c b/kernel/irq/autoprobe.c
index cc0f7321b8ce..650ce4102a63 100644
--- a/kernel/irq/autoprobe.c
+++ b/kernel/irq/autoprobe.c
@@ -40,6 +40,9 @@ unsigned long probe_irq_on(void)
 	 * flush such a longstanding irq before considering it as spurious.
 	 */
 	for_each_irq_desc_reverse(i, desc) {
+		if (!desc)
+			continue;
+
 		spin_lock_irq(&desc->lock);
 		if (!desc->action && !(desc->status & IRQ_NOPROBE)) {
 			/*
@@ -68,6 +71,9 @@ unsigned long probe_irq_on(void)
 	 * happened in the previous stage, it may have masked itself)
 	 */
 	for_each_irq_desc_reverse(i, desc) {
+		if (!desc)
+			continue;
+
 		spin_lock_irq(&desc->lock);
 		if (!desc->action && !(desc->status & IRQ_NOPROBE)) {
 			desc->status |= IRQ_AUTODETECT | IRQ_WAITING;
@@ -86,6 +92,9 @@ unsigned long probe_irq_on(void)
 	 * Now filter out any obviously spurious interrupts
 	 */
 	for_each_irq_desc(i, desc) {
+		if (!desc)
+			continue;
+
 		spin_lock_irq(&desc->lock);
 		status = desc->status;
 
@@ -124,6 +133,9 @@ unsigned int probe_irq_mask(unsigned long val)
 	int i;
 
 	for_each_irq_desc(i, desc) {
+		if (!desc)
+			continue;
+
 		spin_lock_irq(&desc->lock);
 		status = desc->status;
 
@@ -166,6 +178,9 @@ int probe_irq_off(unsigned long val)
 	unsigned int status;
 
 	for_each_irq_desc(i, desc) {
+		if (!desc)
+			continue;
+
 		spin_lock_irq(&desc->lock);
 		status = desc->status;
 
diff --git a/kernel/irq/chip.c b/kernel/irq/chip.c
index 10b5092e9bfe..8e4fce4a1b1f 100644
--- a/kernel/irq/chip.c
+++ b/kernel/irq/chip.c
@@ -24,9 +24,10 @@
  */
 void dynamic_irq_init(unsigned int irq)
 {
-	struct irq_desc *desc = irq_to_desc(irq);
+	struct irq_desc *desc;
 	unsigned long flags;
 
+	desc = irq_to_desc(irq);
 	if (!desc) {
 		WARN(1, KERN_ERR "Trying to initialize invalid IRQ%d\n", irq);
 		return;
diff --git a/kernel/irq/handle.c b/kernel/irq/handle.c
index c815b42d0f5b..96ca203eb51b 100644
--- a/kernel/irq/handle.c
+++ b/kernel/irq/handle.c
@@ -15,9 +15,16 @@
 #include <linux/random.h>
 #include <linux/interrupt.h>
 #include <linux/kernel_stat.h>
+#include <linux/rculist.h>
+#include <linux/hash.h>
 
 #include "internals.h"
 
+/*
+ * lockdep: we want to handle all irq_desc locks as a single lock-class:
+ */
+static struct lock_class_key irq_desc_lock_class;
+
 /**
  * handle_bad_irq - handle spurious and unhandled irqs
  * @irq:       the interrupt number
@@ -49,6 +56,155 @@ void handle_bad_irq(unsigned int irq, struct irq_desc *desc)
 int nr_irqs = NR_IRQS;
 EXPORT_SYMBOL_GPL(nr_irqs);
 
+void __init __attribute__((weak)) arch_early_irq_init(void)
+{
+}
+
+#ifdef CONFIG_SPARSE_IRQ
+static struct irq_desc irq_desc_init = {
+	.irq	    = -1,
+	.status	    = IRQ_DISABLED,
+	.chip	    = &no_irq_chip,
+	.handle_irq = handle_bad_irq,
+	.depth      = 1,
+	.lock       = __SPIN_LOCK_UNLOCKED(irq_desc_init.lock),
+#ifdef CONFIG_SMP
+	.affinity   = CPU_MASK_ALL
+#endif
+};
+
+static void init_kstat_irqs(struct irq_desc *desc, int cpu, int nr)
+{
+	unsigned long bytes;
+	char *ptr;
+	int node;
+
+	/* Compute how many bytes we need per irq and allocate them */
+	bytes = nr * sizeof(unsigned int);
+
+	node = cpu_to_node(cpu);
+	ptr = kzalloc_node(bytes, GFP_ATOMIC, node);
+	printk(KERN_DEBUG "  alloc kstat_irqs on cpu %d node %d\n", cpu, node);
+
+	if (ptr)
+		desc->kstat_irqs = (unsigned int *)ptr;
+}
+
+void __attribute__((weak)) arch_init_chip_data(struct irq_desc *desc, int cpu)
+{
+}
+
+static void init_one_irq_desc(int irq, struct irq_desc *desc, int cpu)
+{
+	memcpy(desc, &irq_desc_init, sizeof(struct irq_desc));
+	desc->irq = irq;
+#ifdef CONFIG_SMP
+	desc->cpu = cpu;
+#endif
+	lockdep_set_class(&desc->lock, &irq_desc_lock_class);
+	init_kstat_irqs(desc, cpu, nr_cpu_ids);
+	if (!desc->kstat_irqs) {
+		printk(KERN_ERR "can not alloc kstat_irqs\n");
+		BUG_ON(1);
+	}
+	arch_init_chip_data(desc, cpu);
+}
+
+/*
+ * Protect the sparse_irqs:
+ */
+static DEFINE_SPINLOCK(sparse_irq_lock);
+
+struct irq_desc *irq_desc_ptrs[NR_IRQS] __read_mostly;
+
+static struct irq_desc irq_desc_legacy[16] __cacheline_aligned_in_smp = {
+	[0 ... 15] = {
+		.irq	    = -1,
+		.status	    = IRQ_DISABLED,
+		.chip	    = &no_irq_chip,
+		.handle_irq = handle_bad_irq,
+		.depth	    = 1,
+		.lock	    = __SPIN_LOCK_UNLOCKED(irq_desc_init.lock),
+#ifdef CONFIG_SMP
+		.affinity   = CPU_MASK_ALL
+#endif
+	}
+};
+
+/* FIXME: use bootmem alloc ...*/
+static unsigned int kstat_irqs_legacy[16][NR_CPUS];
+
+void __init early_irq_init(void)
+{
+	struct irq_desc *desc;
+	int legacy_count;
+	int i;
+
+	desc = irq_desc_legacy;
+	legacy_count = ARRAY_SIZE(irq_desc_legacy);
+
+	for (i = 0; i < legacy_count; i++) {
+		desc[i].irq = i;
+		desc[i].kstat_irqs = kstat_irqs_legacy[i];
+
+		irq_desc_ptrs[i] = desc + i;
+	}
+
+	for (i = legacy_count; i < NR_IRQS; i++)
+		irq_desc_ptrs[i] = NULL;
+
+	arch_early_irq_init();
+}
+
+struct irq_desc *irq_to_desc(unsigned int irq)
+{
+	return (irq < NR_IRQS) ? irq_desc_ptrs[irq] : NULL;
+}
+
+struct irq_desc *irq_to_desc_alloc_cpu(unsigned int irq, int cpu)
+{
+	struct irq_desc *desc;
+	unsigned long flags;
+	int node;
+
+	if (irq >= NR_IRQS) {
+		printk(KERN_WARNING "irq >= NR_IRQS in irq_to_desc_alloc: %d %d\n",
+				irq, NR_IRQS);
+		WARN_ON(1);
+		return NULL;
+	}
+
+	desc = irq_desc_ptrs[irq];
+	if (desc)
+		return desc;
+
+	spin_lock_irqsave(&sparse_irq_lock, flags);
+
+	/* We have to check it to avoid races with another CPU */
+	desc = irq_desc_ptrs[irq];
+	if (desc)
+		goto out_unlock;
+
+	node = cpu_to_node(cpu);
+	desc = kzalloc_node(sizeof(*desc), GFP_ATOMIC, node);
+	printk(KERN_DEBUG "  alloc irq_desc for %d on cpu %d node %d\n",
+		 irq, cpu, node);
+	if (!desc) {
+		printk(KERN_ERR "can not alloc irq_desc\n");
+		BUG_ON(1);
+	}
+	init_one_irq_desc(irq, desc, cpu);
+
+	irq_desc_ptrs[irq] = desc;
+
+out_unlock:
+	spin_unlock_irqrestore(&sparse_irq_lock, flags);
+
+	return desc;
+}
+
+#else
+
 struct irq_desc irq_desc[NR_IRQS] __cacheline_aligned_in_smp = {
 	[0 ... NR_IRQS-1] = {
 		.status = IRQ_DISABLED,
@@ -62,6 +218,8 @@ struct irq_desc irq_desc[NR_IRQS] __cacheline_aligned_in_smp = {
 	}
 };
 
+#endif
+
 /*
  * What should we do if we get a hw irq event on an illegal vector?
  * Each architecture has to answer this themself.
@@ -261,17 +419,28 @@ out:
 
 
 #ifdef CONFIG_TRACE_IRQFLAGS
-/*
- * lockdep: we want to handle all irq_desc locks as a single lock-class:
- */
-static struct lock_class_key irq_desc_lock_class;
-
 void early_init_irq_lock_class(void)
 {
+#ifndef CONFIG_SPARSE_IRQ
 	struct irq_desc *desc;
 	int i;
 
-	for_each_irq_desc(i, desc)
+	for_each_irq_desc(i, desc) {
+		if (!desc)
+			continue;
+
 		lockdep_set_class(&desc->lock, &irq_desc_lock_class);
+	}
+#endif
+}
+#endif
+
+#ifdef CONFIG_SPARSE_IRQ
+unsigned int kstat_irqs_cpu(unsigned int irq, int cpu)
+{
+	struct irq_desc *desc = irq_to_desc(irq);
+	return desc->kstat_irqs[cpu];
 }
 #endif
+EXPORT_SYMBOL(kstat_irqs_cpu);
+
diff --git a/kernel/irq/proc.c b/kernel/irq/proc.c
index d257e7d6a8a4..f6b3440f05bc 100644
--- a/kernel/irq/proc.c
+++ b/kernel/irq/proc.c
@@ -243,7 +243,11 @@ void init_irq_proc(void)
 	/*
 	 * Create entries for all existing IRQs.
 	 */
-	for_each_irq_desc(irq, desc)
+	for_each_irq_desc(irq, desc) {
+		if (!desc)
+			continue;
+
 		register_irq_proc(irq, desc);
+	}
 }
 
diff --git a/kernel/irq/spurious.c b/kernel/irq/spurious.c
index dd364c11e56e..3738107531fd 100644
--- a/kernel/irq/spurious.c
+++ b/kernel/irq/spurious.c
@@ -91,6 +91,9 @@ static int misrouted_irq(int irq)
 	int i, ok = 0;
 
 	for_each_irq_desc(i, desc) {
+		if (!desc)
+			continue;
+
 		if (!i)
 			 continue;
 
@@ -112,6 +115,8 @@ static void poll_spurious_irqs(unsigned long dummy)
 	for_each_irq_desc(i, desc) {
 		unsigned int status;
 
+		if (!desc)
+			continue;
 		if (!i)
 			 continue;
 
-- 
cgit v1.2.3


From 3145e941fcfe2548fa2270afb1a05bab3a6bc418 Mon Sep 17 00:00:00 2001
From: Yinghai Lu <yinghai@kernel.org>
Date: Fri, 5 Dec 2008 18:58:34 -0800
Subject: x86, MSI: pass irq_cfg and irq_desc

Impact: simplify code

Pass irq_desc and cfg around, instead of raw IRQ numbers - this way
we dont have to look it up again and again.

Signed-off-by: Yinghai Lu <yinghai@kernel.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/io_apic.c | 318 ++++++++++++++++++++++++++--------------------
 drivers/pci/msi.c         |  55 +++++---
 include/linux/msi.h       |   3 +
 3 files changed, 222 insertions(+), 154 deletions(-)

(limited to 'include')

diff --git a/arch/x86/kernel/io_apic.c b/arch/x86/kernel/io_apic.c
index 0dcde74abd1d..a1a2e070f31a 100644
--- a/arch/x86/kernel/io_apic.c
+++ b/arch/x86/kernel/io_apic.c
@@ -231,6 +231,10 @@ static struct irq_cfg *irq_cfg(unsigned int irq)
 
 #endif
 
+static inline void set_extra_move_desc(struct irq_desc *desc, cpumask_t mask)
+{
+}
+
 struct io_apic {
 	unsigned int index;
 	unsigned int unused[3];
@@ -272,11 +276,10 @@ static inline void io_apic_modify(unsigned int apic, unsigned int reg, unsigned
 	writel(value, &io_apic->data);
 }
 
-static bool io_apic_level_ack_pending(unsigned int irq)
+static bool io_apic_level_ack_pending(struct irq_cfg *cfg)
 {
 	struct irq_pin_list *entry;
 	unsigned long flags;
-	struct irq_cfg *cfg = irq_cfg(irq);
 
 	spin_lock_irqsave(&ioapic_lock, flags);
 	entry = cfg->irq_2_pin;
@@ -358,13 +361,12 @@ static void ioapic_mask_entry(int apic, int pin)
 }
 
 #ifdef CONFIG_SMP
-static void __target_IO_APIC_irq(unsigned int irq, unsigned int dest, u8 vector)
+static void __target_IO_APIC_irq(unsigned int irq, unsigned int dest, struct irq_cfg *cfg)
 {
 	int apic, pin;
-	struct irq_cfg *cfg;
 	struct irq_pin_list *entry;
+	u8 vector = cfg->vector;
 
-	cfg = irq_cfg(irq);
 	entry = cfg->irq_2_pin;
 	for (;;) {
 		unsigned int reg;
@@ -394,24 +396,27 @@ static void __target_IO_APIC_irq(unsigned int irq, unsigned int dest, u8 vector)
 	}
 }
 
-static int assign_irq_vector(int irq, cpumask_t mask);
+static int assign_irq_vector(int irq, struct irq_cfg *cfg, cpumask_t mask);
 
-static void set_ioapic_affinity_irq(unsigned int irq, cpumask_t mask)
+static void set_ioapic_affinity_irq_desc(struct irq_desc *desc, cpumask_t mask)
 {
 	struct irq_cfg *cfg;
 	unsigned long flags;
 	unsigned int dest;
 	cpumask_t tmp;
-	struct irq_desc *desc;
+	unsigned int irq;
 
 	cpus_and(tmp, mask, cpu_online_map);
 	if (cpus_empty(tmp))
 		return;
 
-	cfg = irq_cfg(irq);
-	if (assign_irq_vector(irq, mask))
+	irq = desc->irq;
+	cfg = desc->chip_data;
+	if (assign_irq_vector(irq, cfg, mask))
 		return;
 
+	set_extra_move_desc(desc, mask);
+
 	cpus_and(tmp, cfg->domain, mask);
 	dest = cpu_mask_to_apicid(tmp);
 	/*
@@ -419,12 +424,20 @@ static void set_ioapic_affinity_irq(unsigned int irq, cpumask_t mask)
 	 */
 	dest = SET_APIC_LOGICAL_ID(dest);
 
-	desc = irq_to_desc(irq);
 	spin_lock_irqsave(&ioapic_lock, flags);
-	__target_IO_APIC_irq(irq, dest, cfg->vector);
+	__target_IO_APIC_irq(irq, dest, cfg);
 	desc->affinity = mask;
 	spin_unlock_irqrestore(&ioapic_lock, flags);
 }
+
+static void set_ioapic_affinity_irq(unsigned int irq, cpumask_t mask)
+{
+	struct irq_desc *desc;
+
+	desc = irq_to_desc(irq);
+
+	set_ioapic_affinity_irq_desc(desc, mask);
+}
 #endif /* CONFIG_SMP */
 
 /*
@@ -432,10 +445,9 @@ static void set_ioapic_affinity_irq(unsigned int irq, cpumask_t mask)
  * shared ISA-space IRQs, so we have to support them. We are super
  * fast in the common case, and fast for shared ISA-space IRQs.
  */
-static void add_pin_to_irq_cpu(unsigned int irq, int cpu, int apic, int pin)
+static void add_pin_to_irq_cpu(struct irq_cfg *cfg, int cpu, int apic, int pin)
 {
 	struct irq_pin_list *entry;
-	struct irq_cfg *cfg = irq_cfg(irq);
 
 	entry = cfg->irq_2_pin;
 	if (!entry) {
@@ -468,11 +480,10 @@ static void add_pin_to_irq_cpu(unsigned int irq, int cpu, int apic, int pin)
 /*
  * Reroute an IRQ to a different pin.
  */
-static void __init replace_pin_at_irq(unsigned int irq, int cpu,
+static void __init replace_pin_at_irq_cpu(struct irq_cfg *cfg, int cpu,
 				      int oldapic, int oldpin,
 				      int newapic, int newpin)
 {
-	struct irq_cfg *cfg = irq_cfg(irq);
 	struct irq_pin_list *entry = cfg->irq_2_pin;
 	int replaced = 0;
 
@@ -489,18 +500,16 @@ static void __init replace_pin_at_irq(unsigned int irq, int cpu,
 
 	/* why? call replace before add? */
 	if (!replaced)
-		add_pin_to_irq_cpu(irq, cpu, newapic, newpin);
+		add_pin_to_irq_cpu(cfg, cpu, newapic, newpin);
 }
 
-static inline void io_apic_modify_irq(unsigned int irq,
+static inline void io_apic_modify_irq(struct irq_cfg *cfg,
 				int mask_and, int mask_or,
 				void (*final)(struct irq_pin_list *entry))
 {
 	int pin;
-	struct irq_cfg *cfg;
 	struct irq_pin_list *entry;
 
-	cfg = irq_cfg(irq);
 	for (entry = cfg->irq_2_pin; entry != NULL; entry = entry->next) {
 		unsigned int reg;
 		pin = entry->pin;
@@ -513,9 +522,9 @@ static inline void io_apic_modify_irq(unsigned int irq,
 	}
 }
 
-static void __unmask_IO_APIC_irq(unsigned int irq)
+static void __unmask_IO_APIC_irq(struct irq_cfg *cfg)
 {
-	io_apic_modify_irq(irq, ~IO_APIC_REDIR_MASKED, 0, NULL);
+	io_apic_modify_irq(cfg, ~IO_APIC_REDIR_MASKED, 0, NULL);
 }
 
 #ifdef CONFIG_X86_64
@@ -530,47 +539,64 @@ void io_apic_sync(struct irq_pin_list *entry)
 	readl(&io_apic->data);
 }
 
-static void __mask_IO_APIC_irq(unsigned int irq)
+static void __mask_IO_APIC_irq(struct irq_cfg *cfg)
 {
-	io_apic_modify_irq(irq, ~0, IO_APIC_REDIR_MASKED, &io_apic_sync);
+	io_apic_modify_irq(cfg, ~0, IO_APIC_REDIR_MASKED, &io_apic_sync);
 }
 #else /* CONFIG_X86_32 */
-static void __mask_IO_APIC_irq(unsigned int irq)
+static void __mask_IO_APIC_irq(struct irq_cfg *cfg)
 {
-	io_apic_modify_irq(irq, ~0, IO_APIC_REDIR_MASKED, NULL);
+	io_apic_modify_irq(cfg, ~0, IO_APIC_REDIR_MASKED, NULL);
 }
 
-static void __mask_and_edge_IO_APIC_irq(unsigned int irq)
+static void __mask_and_edge_IO_APIC_irq(struct irq_cfg *cfg)
 {
-	io_apic_modify_irq(irq, ~IO_APIC_REDIR_LEVEL_TRIGGER,
+	io_apic_modify_irq(cfg, ~IO_APIC_REDIR_LEVEL_TRIGGER,
 			IO_APIC_REDIR_MASKED, NULL);
 }
 
-static void __unmask_and_level_IO_APIC_irq(unsigned int irq)
+static void __unmask_and_level_IO_APIC_irq(struct irq_cfg *cfg)
 {
-	io_apic_modify_irq(irq, ~IO_APIC_REDIR_MASKED,
+	io_apic_modify_irq(cfg, ~IO_APIC_REDIR_MASKED,
 			IO_APIC_REDIR_LEVEL_TRIGGER, NULL);
 }
 #endif /* CONFIG_X86_32 */
 
-static void mask_IO_APIC_irq (unsigned int irq)
+static void mask_IO_APIC_irq_desc(struct irq_desc *desc)
 {
+	struct irq_cfg *cfg = desc->chip_data;
 	unsigned long flags;
 
+	BUG_ON(!cfg);
+
 	spin_lock_irqsave(&ioapic_lock, flags);
-	__mask_IO_APIC_irq(irq);
+	__mask_IO_APIC_irq(cfg);
 	spin_unlock_irqrestore(&ioapic_lock, flags);
 }
 
-static void unmask_IO_APIC_irq (unsigned int irq)
+static void unmask_IO_APIC_irq_desc(struct irq_desc *desc)
 {
+	struct irq_cfg *cfg = desc->chip_data;
 	unsigned long flags;
 
 	spin_lock_irqsave(&ioapic_lock, flags);
-	__unmask_IO_APIC_irq(irq);
+	__unmask_IO_APIC_irq(cfg);
 	spin_unlock_irqrestore(&ioapic_lock, flags);
 }
 
+static void mask_IO_APIC_irq(unsigned int irq)
+{
+	struct irq_desc *desc = irq_to_desc(irq);
+
+	mask_IO_APIC_irq_desc(desc);
+}
+static void unmask_IO_APIC_irq(unsigned int irq)
+{
+	struct irq_desc *desc = irq_to_desc(irq);
+
+	unmask_IO_APIC_irq_desc(desc);
+}
+
 static void clear_IO_APIC_pin(unsigned int apic, unsigned int pin)
 {
 	struct IO_APIC_route_entry entry;
@@ -1072,7 +1098,7 @@ void unlock_vector_lock(void)
 	spin_unlock(&vector_lock);
 }
 
-static int __assign_irq_vector(int irq, cpumask_t mask)
+static int __assign_irq_vector(int irq, struct irq_cfg *cfg, cpumask_t mask)
 {
 	/*
 	 * NOTE! The local APIC isn't very good at handling
@@ -1088,16 +1114,13 @@ static int __assign_irq_vector(int irq, cpumask_t mask)
 	static int current_vector = FIRST_DEVICE_VECTOR, current_offset = 0;
 	unsigned int old_vector;
 	int cpu;
-	struct irq_cfg *cfg;
 
-	cfg = irq_cfg(irq);
+	if ((cfg->move_in_progress) || cfg->move_cleanup_count)
+		return -EBUSY;
 
 	/* Only try and allocate irqs on cpus that are present */
 	cpus_and(mask, mask, cpu_online_map);
 
-	if ((cfg->move_in_progress) || cfg->move_cleanup_count)
-		return -EBUSY;
-
 	old_vector = cfg->vector;
 	if (old_vector) {
 		cpumask_t tmp;
@@ -1151,24 +1174,22 @@ next:
 	return -ENOSPC;
 }
 
-static int assign_irq_vector(int irq, cpumask_t mask)
+static int assign_irq_vector(int irq, struct irq_cfg *cfg, cpumask_t mask)
 {
 	int err;
 	unsigned long flags;
 
 	spin_lock_irqsave(&vector_lock, flags);
-	err = __assign_irq_vector(irq, mask);
+	err = __assign_irq_vector(irq, cfg, mask);
 	spin_unlock_irqrestore(&vector_lock, flags);
 	return err;
 }
 
-static void __clear_irq_vector(int irq)
+static void __clear_irq_vector(int irq, struct irq_cfg *cfg)
 {
-	struct irq_cfg *cfg;
 	cpumask_t mask;
 	int cpu, vector;
 
-	cfg = irq_cfg(irq);
 	BUG_ON(!cfg->vector);
 
 	vector = cfg->vector;
@@ -1257,11 +1278,8 @@ static inline int IO_APIC_irq_trigger(int irq)
 }
 #endif
 
-static void ioapic_register_intr(int irq, unsigned long trigger)
+static void ioapic_register_intr(int irq, struct irq_desc *desc, unsigned long trigger)
 {
-	struct irq_desc *desc;
-
-	desc = irq_to_desc(irq);
 
 	if ((trigger == IOAPIC_AUTO && IO_APIC_irq_trigger(irq)) ||
 	    trigger == IOAPIC_LEVEL)
@@ -1353,7 +1371,7 @@ static int setup_ioapic_entry(int apic, int irq,
 	return 0;
 }
 
-static void setup_IO_APIC_irq(int apic, int pin, unsigned int irq,
+static void setup_IO_APIC_irq(int apic, int pin, unsigned int irq, struct irq_desc *desc,
 			      int trigger, int polarity)
 {
 	struct irq_cfg *cfg;
@@ -1363,10 +1381,10 @@ static void setup_IO_APIC_irq(int apic, int pin, unsigned int irq,
 	if (!IO_APIC_IRQ(irq))
 		return;
 
-	cfg = irq_cfg(irq);
+	cfg = desc->chip_data;
 
 	mask = TARGET_CPUS;
-	if (assign_irq_vector(irq, mask))
+	if (assign_irq_vector(irq, cfg, mask))
 		return;
 
 	cpus_and(mask, cfg->domain, mask);
@@ -1383,11 +1401,11 @@ static void setup_IO_APIC_irq(int apic, int pin, unsigned int irq,
 			       cfg->vector)) {
 		printk("Failed to setup ioapic entry for ioapic  %d, pin %d\n",
 		       mp_ioapics[apic].mp_apicid, pin);
-		__clear_irq_vector(irq);
+		__clear_irq_vector(irq, cfg);
 		return;
 	}
 
-	ioapic_register_intr(irq, trigger);
+	ioapic_register_intr(irq, desc, trigger);
 	if (irq < NR_IRQS_LEGACY)
 		disable_8259A_irq(irq);
 
@@ -1399,6 +1417,7 @@ static void __init setup_IO_APIC_irqs(void)
 	int apic, pin, idx, irq;
 	int notcon = 0;
 	struct irq_desc *desc;
+	struct irq_cfg *cfg;
 	int cpu = boot_cpu_id;
 
 	apic_printk(APIC_VERBOSE, KERN_DEBUG "init IO_APIC IRQs\n");
@@ -1436,9 +1455,10 @@ static void __init setup_IO_APIC_irqs(void)
 				printk(KERN_INFO "can not get irq_desc for %d\n", irq);
 				continue;
 			}
-			add_pin_to_irq_cpu(irq, cpu, apic, pin);
+			cfg = desc->chip_data;
+			add_pin_to_irq_cpu(cfg, cpu, apic, pin);
 
-			setup_IO_APIC_irq(apic, pin, irq,
+			setup_IO_APIC_irq(apic, pin, irq, desc,
 					irq_trigger(idx), irq_polarity(idx));
 		}
 	}
@@ -2086,7 +2106,7 @@ static unsigned int startup_ioapic_irq(unsigned int irq)
 			was_pending = 1;
 	}
 	cfg = irq_cfg(irq);
-	__unmask_IO_APIC_irq(irq);
+	__unmask_IO_APIC_irq(cfg);
 	spin_unlock_irqrestore(&ioapic_lock, flags);
 
 	return was_pending;
@@ -2149,35 +2169,37 @@ static DECLARE_DELAYED_WORK(ir_migration_work, ir_irq_migration);
  * as simple as edge triggered migration and we can do the irq migration
  * with a simple atomic update to IO-APIC RTE.
  */
-static void migrate_ioapic_irq(int irq, cpumask_t mask)
+static void migrate_ioapic_irq_desc(struct irq_desc *desc, cpumask_t mask)
 {
 	struct irq_cfg *cfg;
-	struct irq_desc *desc;
 	cpumask_t tmp, cleanup_mask;
 	struct irte irte;
 	int modify_ioapic_rte;
 	unsigned int dest;
 	unsigned long flags;
+	unsigned int irq;
 
 	cpus_and(tmp, mask, cpu_online_map);
 	if (cpus_empty(tmp))
 		return;
 
+	irq = desc->irq;
 	if (get_irte(irq, &irte))
 		return;
 
-	if (assign_irq_vector(irq, mask))
+	cfg = desc->chip_data;
+	if (assign_irq_vector(irq, cfg, mask))
 		return;
 
-	cfg = irq_cfg(irq);
+	set_extra_move_desc(desc, mask);
+
 	cpus_and(tmp, cfg->domain, mask);
 	dest = cpu_mask_to_apicid(tmp);
 
-	desc = irq_to_desc(irq);
 	modify_ioapic_rte = desc->status & IRQ_LEVEL;
 	if (modify_ioapic_rte) {
 		spin_lock_irqsave(&ioapic_lock, flags);
-		__target_IO_APIC_irq(irq, dest, cfg->vector);
+		__target_IO_APIC_irq(irq, dest, cfg);
 		spin_unlock_irqrestore(&ioapic_lock, flags);
 	}
 
@@ -2199,14 +2221,14 @@ static void migrate_ioapic_irq(int irq, cpumask_t mask)
 	desc->affinity = mask;
 }
 
-static int migrate_irq_remapped_level(int irq)
+static int migrate_irq_remapped_level_desc(struct irq_desc *desc)
 {
 	int ret = -1;
-	struct irq_desc *desc = irq_to_desc(irq);
+	struct irq_cfg *cfg = desc->chip_data;
 
-	mask_IO_APIC_irq(irq);
+	mask_IO_APIC_irq_desc(desc);
 
-	if (io_apic_level_ack_pending(irq)) {
+	if (io_apic_level_ack_pending(cfg)) {
 		/*
 		 * Interrupt in progress. Migrating irq now will change the
 		 * vector information in the IO-APIC RTE and that will confuse
@@ -2218,14 +2240,15 @@ static int migrate_irq_remapped_level(int irq)
 	}
 
 	/* everthing is clear. we have right of way */
-	migrate_ioapic_irq(irq, desc->pending_mask);
+	migrate_ioapic_irq_desc(desc, desc->pending_mask);
 
 	ret = 0;
 	desc->status &= ~IRQ_MOVE_PENDING;
 	cpus_clear(desc->pending_mask);
 
 unmask:
-	unmask_IO_APIC_irq(irq);
+	unmask_IO_APIC_irq_desc(desc);
+
 	return ret;
 }
 
@@ -2258,18 +2281,22 @@ static void ir_irq_migration(struct work_struct *work)
 /*
  * Migrates the IRQ destination in the process context.
  */
-static void set_ir_ioapic_affinity_irq(unsigned int irq, cpumask_t mask)
+static void set_ir_ioapic_affinity_irq_desc(struct irq_desc *desc, cpumask_t mask)
 {
-	struct irq_desc *desc = irq_to_desc(irq);
-
 	if (desc->status & IRQ_LEVEL) {
 		desc->status |= IRQ_MOVE_PENDING;
 		desc->pending_mask = mask;
-		migrate_irq_remapped_level(irq);
+		migrate_irq_remapped_level_desc(desc);
 		return;
 	}
 
-	migrate_ioapic_irq(irq, mask);
+	migrate_ioapic_irq_desc(desc, mask);
+}
+static void set_ir_ioapic_affinity_irq(unsigned int irq, cpumask_t mask)
+{
+	struct irq_desc *desc = irq_to_desc(irq);
+
+	set_ir_ioapic_affinity_irq_desc(desc, mask);
 }
 #endif
 
@@ -2313,9 +2340,10 @@ unlock:
 	irq_exit();
 }
 
-static void irq_complete_move(unsigned int irq)
+static void irq_complete_move(struct irq_desc **descp)
 {
-	struct irq_cfg *cfg = irq_cfg(irq);
+	struct irq_desc *desc = *descp;
+	struct irq_cfg *cfg = desc->chip_data;
 	unsigned vector, me;
 
 	if (likely(!cfg->move_in_progress))
@@ -2333,8 +2361,9 @@ static void irq_complete_move(unsigned int irq)
 	}
 }
 #else
-static inline void irq_complete_move(unsigned int irq) {}
+static inline void irq_complete_move(struct irq_desc **descp) {}
 #endif
+
 #ifdef CONFIG_INTR_REMAP
 static void ack_x2apic_level(unsigned int irq)
 {
@@ -2345,11 +2374,14 @@ static void ack_x2apic_edge(unsigned int irq)
 {
 	ack_x2APIC_irq();
 }
+
 #endif
 
 static void ack_apic_edge(unsigned int irq)
 {
-	irq_complete_move(irq);
+	struct irq_desc *desc = irq_to_desc(irq);
+
+	irq_complete_move(&desc);
 	move_native_irq(irq);
 	ack_APIC_irq();
 }
@@ -2358,18 +2390,21 @@ atomic_t irq_mis_count;
 
 static void ack_apic_level(unsigned int irq)
 {
+	struct irq_desc *desc = irq_to_desc(irq);
+
 #ifdef CONFIG_X86_32
 	unsigned long v;
 	int i;
 #endif
+	struct irq_cfg *cfg;
 	int do_unmask_irq = 0;
 
-	irq_complete_move(irq);
+	irq_complete_move(&desc);
 #ifdef CONFIG_GENERIC_PENDING_IRQ
 	/* If we are moving the irq we need to mask it */
-	if (unlikely(irq_to_desc(irq)->status & IRQ_MOVE_PENDING)) {
+	if (unlikely(desc->status & IRQ_MOVE_PENDING)) {
 		do_unmask_irq = 1;
-		mask_IO_APIC_irq(irq);
+		mask_IO_APIC_irq_desc(desc);
 	}
 #endif
 
@@ -2393,7 +2428,8 @@ static void ack_apic_level(unsigned int irq)
 	* operation to prevent an edge-triggered interrupt escaping meanwhile.
 	* The idea is from Manfred Spraul.  --macro
 	*/
-	i = irq_cfg(irq)->vector;
+	cfg = desc->chip_data;
+	i = cfg->vector;
 
 	v = apic_read(APIC_TMR + ((i & ~0x1f) >> 1));
 #endif
@@ -2432,17 +2468,18 @@ static void ack_apic_level(unsigned int irq)
 		 * accurate and is causing problems then it is a hardware bug
 		 * and you can go talk to the chipset vendor about it.
 		 */
-		if (!io_apic_level_ack_pending(irq))
+		cfg = desc->chip_data;
+		if (!io_apic_level_ack_pending(cfg))
 			move_masked_irq(irq);
-		unmask_IO_APIC_irq(irq);
+		unmask_IO_APIC_irq_desc(desc);
 	}
 
 #ifdef CONFIG_X86_32
 	if (!(v & (1 << (i & 0x1f)))) {
 		atomic_inc(&irq_mis_count);
 		spin_lock(&ioapic_lock);
-		__mask_and_edge_IO_APIC_irq(irq);
-		__unmask_and_level_IO_APIC_irq(irq);
+		__mask_and_edge_IO_APIC_irq(cfg);
+		__unmask_and_level_IO_APIC_irq(cfg);
 		spin_unlock(&ioapic_lock);
 	}
 #endif
@@ -2533,7 +2570,7 @@ static void unmask_lapic_irq(unsigned int irq)
 	apic_write(APIC_LVT0, v & ~APIC_LVT_MASKED);
 }
 
-static void ack_lapic_irq (unsigned int irq)
+static void ack_lapic_irq(unsigned int irq)
 {
 	ack_APIC_irq();
 }
@@ -2545,11 +2582,8 @@ static struct irq_chip lapic_chip __read_mostly = {
 	.ack		= ack_lapic_irq,
 };
 
-static void lapic_register_intr(int irq)
+static void lapic_register_intr(int irq, struct irq_desc *desc)
 {
-	struct irq_desc *desc;
-
-	desc = irq_to_desc(irq);
 	desc->status &= ~IRQ_LEVEL;
 	set_irq_chip_and_handler_name(irq, &lapic_chip, handle_edge_irq,
 				      "edge");
@@ -2653,7 +2687,9 @@ int timer_through_8259 __initdata;
  */
 static inline void __init check_timer(void)
 {
-	struct irq_cfg *cfg = irq_cfg(0);
+	struct irq_desc *desc = irq_to_desc(0);
+	struct irq_cfg *cfg = desc->chip_data;
+	int cpu = boot_cpu_id;
 	int apic1, pin1, apic2, pin2;
 	unsigned long flags;
 	unsigned int ver;
@@ -2668,7 +2704,7 @@ static inline void __init check_timer(void)
 	 * get/set the timer IRQ vector:
 	 */
 	disable_8259A_irq(0);
-	assign_irq_vector(0, TARGET_CPUS);
+	assign_irq_vector(0, cfg, TARGET_CPUS);
 
 	/*
 	 * As IRQ0 is to be enabled in the 8259A, the virtual
@@ -2719,10 +2755,10 @@ static inline void __init check_timer(void)
 		 * Ok, does IRQ0 through the IOAPIC work?
 		 */
 		if (no_pin1) {
-			add_pin_to_irq_cpu(0, boot_cpu_id, apic1, pin1);
+			add_pin_to_irq_cpu(cfg, cpu, apic1, pin1);
 			setup_timer_IRQ0_pin(apic1, pin1, cfg->vector);
 		}
-		unmask_IO_APIC_irq(0);
+		unmask_IO_APIC_irq_desc(desc);
 		if (timer_irq_works()) {
 			if (nmi_watchdog == NMI_IO_APIC) {
 				setup_nmi();
@@ -2748,9 +2784,9 @@ static inline void __init check_timer(void)
 		/*
 		 * legacy devices should be connected to IO APIC #0
 		 */
-		replace_pin_at_irq(0, boot_cpu_id, apic1, pin1, apic2, pin2);
+		replace_pin_at_irq_cpu(cfg, cpu, apic1, pin1, apic2, pin2);
 		setup_timer_IRQ0_pin(apic2, pin2, cfg->vector);
-		unmask_IO_APIC_irq(0);
+		unmask_IO_APIC_irq_desc(desc);
 		enable_8259A_irq(0);
 		if (timer_irq_works()) {
 			apic_printk(APIC_QUIET, KERN_INFO "....... works.\n");
@@ -2782,7 +2818,7 @@ static inline void __init check_timer(void)
 	apic_printk(APIC_QUIET, KERN_INFO
 		    "...trying to set up timer as Virtual Wire IRQ...\n");
 
-	lapic_register_intr(0);
+	lapic_register_intr(0, desc);
 	apic_write(APIC_LVT0, APIC_DM_FIXED | cfg->vector);	/* Fixed mode */
 	enable_8259A_irq(0);
 
@@ -2986,7 +3022,7 @@ unsigned int create_irq_nr(unsigned int irq_want)
 
 		if (cfg_new->vector != 0)
 			continue;
-		if (__assign_irq_vector(new, TARGET_CPUS) == 0)
+		if (__assign_irq_vector(new, cfg_new, TARGET_CPUS) == 0)
 			irq = new;
 		break;
 	}
@@ -3034,7 +3070,7 @@ void destroy_irq(unsigned int irq)
 	free_irte(irq);
 #endif
 	spin_lock_irqsave(&vector_lock, flags);
-	__clear_irq_vector(irq);
+	__clear_irq_vector(irq, cfg);
 	spin_unlock_irqrestore(&vector_lock, flags);
 }
 
@@ -3049,12 +3085,12 @@ static int msi_compose_msg(struct pci_dev *pdev, unsigned int irq, struct msi_ms
 	unsigned dest;
 	cpumask_t tmp;
 
+	cfg = irq_cfg(irq);
 	tmp = TARGET_CPUS;
-	err = assign_irq_vector(irq, tmp);
+	err = assign_irq_vector(irq, cfg, tmp);
 	if (err)
 		return err;
 
-	cfg = irq_cfg(irq);
 	cpus_and(tmp, cfg->domain, tmp);
 	dest = cpu_mask_to_apicid(tmp);
 
@@ -3112,35 +3148,35 @@ static int msi_compose_msg(struct pci_dev *pdev, unsigned int irq, struct msi_ms
 #ifdef CONFIG_SMP
 static void set_msi_irq_affinity(unsigned int irq, cpumask_t mask)
 {
+	struct irq_desc *desc = irq_to_desc(irq);
 	struct irq_cfg *cfg;
 	struct msi_msg msg;
 	unsigned int dest;
 	cpumask_t tmp;
-	struct irq_desc *desc;
 
 	cpus_and(tmp, mask, cpu_online_map);
 	if (cpus_empty(tmp))
 		return;
 
-	if (assign_irq_vector(irq, mask))
+	cfg = desc->chip_data;
+	if (assign_irq_vector(irq, cfg, mask))
 		return;
 
-	cfg = irq_cfg(irq);
+	set_extra_move_desc(desc, mask);
+
 	cpus_and(tmp, cfg->domain, mask);
 	dest = cpu_mask_to_apicid(tmp);
 
-	read_msi_msg(irq, &msg);
+	read_msi_msg_desc(desc, &msg);
 
 	msg.data &= ~MSI_DATA_VECTOR_MASK;
 	msg.data |= MSI_DATA_VECTOR(cfg->vector);
 	msg.address_lo &= ~MSI_ADDR_DEST_ID_MASK;
 	msg.address_lo |= MSI_ADDR_DEST_ID(dest);
 
-	write_msi_msg(irq, &msg);
-	desc = irq_to_desc(irq);
+	write_msi_msg_desc(desc, &msg);
 	desc->affinity = mask;
 }
-
 #ifdef CONFIG_INTR_REMAP
 /*
  * Migrate the MSI irq to another cpumask. This migration is
@@ -3148,11 +3184,11 @@ static void set_msi_irq_affinity(unsigned int irq, cpumask_t mask)
  */
 static void ir_set_msi_irq_affinity(unsigned int irq, cpumask_t mask)
 {
+	struct irq_desc *desc = irq_to_desc(irq);
 	struct irq_cfg *cfg;
 	unsigned int dest;
 	cpumask_t tmp, cleanup_mask;
 	struct irte irte;
-	struct irq_desc *desc;
 
 	cpus_and(tmp, mask, cpu_online_map);
 	if (cpus_empty(tmp))
@@ -3161,10 +3197,12 @@ static void ir_set_msi_irq_affinity(unsigned int irq, cpumask_t mask)
 	if (get_irte(irq, &irte))
 		return;
 
-	if (assign_irq_vector(irq, mask))
+	cfg = desc->chip_data;
+	if (assign_irq_vector(irq, cfg, mask))
 		return;
 
-	cfg = irq_cfg(irq);
+	set_extra_move_desc(desc, mask);
+
 	cpus_and(tmp, cfg->domain, mask);
 	dest = cpu_mask_to_apicid(tmp);
 
@@ -3188,9 +3226,9 @@ static void ir_set_msi_irq_affinity(unsigned int irq, cpumask_t mask)
 		cfg->move_in_progress = 0;
 	}
 
-	desc = irq_to_desc(irq);
 	desc->affinity = mask;
 }
+
 #endif
 #endif /* CONFIG_SMP */
 
@@ -3249,7 +3287,7 @@ static int msi_alloc_irte(struct pci_dev *dev, int irq, int nvec)
 }
 #endif
 
-static int setup_msi_irq(struct pci_dev *dev, struct msi_desc *desc, int irq)
+static int setup_msi_irq(struct pci_dev *dev, struct msi_desc *msidesc, int irq)
 {
 	int ret;
 	struct msi_msg msg;
@@ -3258,7 +3296,7 @@ static int setup_msi_irq(struct pci_dev *dev, struct msi_desc *desc, int irq)
 	if (ret < 0)
 		return ret;
 
-	set_irq_msi(irq, desc);
+	set_irq_msi(irq, msidesc);
 	write_msi_msg(irq, &msg);
 
 #ifdef CONFIG_INTR_REMAP
@@ -3381,20 +3419,22 @@ void arch_teardown_msi_irq(unsigned int irq)
 #ifdef CONFIG_SMP
 static void dmar_msi_set_affinity(unsigned int irq, cpumask_t mask)
 {
+	struct irq_desc *desc = irq_to_desc(irq);
 	struct irq_cfg *cfg;
 	struct msi_msg msg;
 	unsigned int dest;
 	cpumask_t tmp;
-	struct irq_desc *desc;
 
 	cpus_and(tmp, mask, cpu_online_map);
 	if (cpus_empty(tmp))
 		return;
 
-	if (assign_irq_vector(irq, mask))
+	cfg = desc->chip_data;
+	if (assign_irq_vector(irq, cfg, mask))
 		return;
 
-	cfg = irq_cfg(irq);
+	set_extra_move_desc(desc, mask);
+
 	cpus_and(tmp, cfg->domain, mask);
 	dest = cpu_mask_to_apicid(tmp);
 
@@ -3406,9 +3446,9 @@ static void dmar_msi_set_affinity(unsigned int irq, cpumask_t mask)
 	msg.address_lo |= MSI_ADDR_DEST_ID(dest);
 
 	dmar_msi_write(irq, &msg);
-	desc = irq_to_desc(irq);
 	desc->affinity = mask;
 }
+
 #endif /* CONFIG_SMP */
 
 struct irq_chip dmar_msi_type = {
@@ -3442,8 +3482,8 @@ int arch_setup_dmar_msi(unsigned int irq)
 #ifdef CONFIG_SMP
 static void hpet_msi_set_affinity(unsigned int irq, cpumask_t mask)
 {
+	struct irq_desc *desc = irq_to_desc(irq);
 	struct irq_cfg *cfg;
-	struct irq_desc *desc;
 	struct msi_msg msg;
 	unsigned int dest;
 	cpumask_t tmp;
@@ -3452,10 +3492,12 @@ static void hpet_msi_set_affinity(unsigned int irq, cpumask_t mask)
 	if (cpus_empty(tmp))
 		return;
 
-	if (assign_irq_vector(irq, mask))
+	cfg = desc->chip_data;
+	if (assign_irq_vector(irq, cfg, mask))
 		return;
 
-	cfg = irq_cfg(irq);
+	set_extra_move_desc(desc, mask);
+
 	cpus_and(tmp, cfg->domain, mask);
 	dest = cpu_mask_to_apicid(tmp);
 
@@ -3467,9 +3509,9 @@ static void hpet_msi_set_affinity(unsigned int irq, cpumask_t mask)
 	msg.address_lo |= MSI_ADDR_DEST_ID(dest);
 
 	hpet_msi_write(irq, &msg);
-	desc = irq_to_desc(irq);
 	desc->affinity = mask;
 }
+
 #endif /* CONFIG_SMP */
 
 struct irq_chip hpet_msi_type = {
@@ -3524,26 +3566,28 @@ static void target_ht_irq(unsigned int irq, unsigned int dest, u8 vector)
 
 static void set_ht_irq_affinity(unsigned int irq, cpumask_t mask)
 {
+	struct irq_desc *desc = irq_to_desc(irq);
 	struct irq_cfg *cfg;
 	unsigned int dest;
 	cpumask_t tmp;
-	struct irq_desc *desc;
 
 	cpus_and(tmp, mask, cpu_online_map);
 	if (cpus_empty(tmp))
 		return;
 
-	if (assign_irq_vector(irq, mask))
+	cfg = desc->chip_data;
+	if (assign_irq_vector(irq, cfg, mask))
 		return;
 
-	cfg = irq_cfg(irq);
+	set_extra_move_desc(desc, mask);
+
 	cpus_and(tmp, cfg->domain, mask);
 	dest = cpu_mask_to_apicid(tmp);
 
 	target_ht_irq(irq, dest, cfg->vector);
-	desc = irq_to_desc(irq);
 	desc->affinity = mask;
 }
+
 #endif
 
 static struct irq_chip ht_irq_chip = {
@@ -3563,13 +3607,13 @@ int arch_setup_ht_irq(unsigned int irq, struct pci_dev *dev)
 	int err;
 	cpumask_t tmp;
 
+	cfg = irq_cfg(irq);
 	tmp = TARGET_CPUS;
-	err = assign_irq_vector(irq, tmp);
+	err = assign_irq_vector(irq, cfg, tmp);
 	if (!err) {
 		struct ht_irq_msg msg;
 		unsigned dest;
 
-		cfg = irq_cfg(irq);
 		cpus_and(tmp, cfg->domain, tmp);
 		dest = cpu_mask_to_apicid(tmp);
 
@@ -3615,7 +3659,9 @@ int arch_enable_uv_irq(char *irq_name, unsigned int irq, int cpu, int mmr_blade,
 	unsigned long flags;
 	int err;
 
-	err = assign_irq_vector(irq, *eligible_cpu);
+	cfg = irq_cfg(irq);
+
+	err = assign_irq_vector(irq, cfg, *eligible_cpu);
 	if (err != 0)
 		return err;
 
@@ -3624,8 +3670,6 @@ int arch_enable_uv_irq(char *irq_name, unsigned int irq, int cpu, int mmr_blade,
 				      irq_name);
 	spin_unlock_irqrestore(&vector_lock, flags);
 
-	cfg = irq_cfg(irq);
-
 	mmr_value = 0;
 	entry = (struct uv_IO_APIC_route_entry *)&mmr_value;
 	BUG_ON(sizeof(struct uv_IO_APIC_route_entry) != sizeof(unsigned long));
@@ -3806,10 +3850,10 @@ int io_apic_set_pci_routing (int ioapic, int pin, int irq, int triggering, int p
 	 */
 	if (irq >= NR_IRQS_LEGACY) {
 		cfg = desc->chip_data;
-		add_pin_to_irq_cpu(irq, cpu, ioapic, pin);
+		add_pin_to_irq_cpu(cfg, cpu, ioapic, pin);
 	}
 
-	setup_IO_APIC_irq(ioapic, pin, irq, triggering, polarity);
+	setup_IO_APIC_irq(ioapic, pin, irq, desc, triggering, polarity);
 
 	return 0;
 }
@@ -3866,7 +3910,7 @@ void __init setup_ioapic_dest(void)
 			desc = irq_to_desc(irq);
 			cfg = desc->chip_data;
 			if (!cfg->vector) {
-				setup_IO_APIC_irq(ioapic, pin, irq,
+				setup_IO_APIC_irq(ioapic, pin, irq, desc,
 						  irq_trigger(irq_entry),
 						  irq_polarity(irq_entry));
 				continue;
@@ -3884,10 +3928,10 @@ void __init setup_ioapic_dest(void)
 
 #ifdef CONFIG_INTR_REMAP
 			if (intr_remapping_enabled)
-				set_ir_ioapic_affinity_irq(irq, mask);
+				set_ir_ioapic_affinity_irq_desc(desc, mask);
 			else
 #endif
-				set_ioapic_affinity_irq(irq, mask);
+				set_ioapic_affinity_irq_desc(desc, mask);
 		}
 
 	}
diff --git a/drivers/pci/msi.c b/drivers/pci/msi.c
index 74801f7df9c9..11a51f8ed3b3 100644
--- a/drivers/pci/msi.c
+++ b/drivers/pci/msi.c
@@ -103,11 +103,11 @@ static void msix_set_enable(struct pci_dev *dev, int enable)
 	}
 }
 
-static void msix_flush_writes(unsigned int irq)
+static void msix_flush_writes(struct irq_desc *desc)
 {
 	struct msi_desc *entry;
 
-	entry = get_irq_msi(irq);
+	entry = get_irq_desc_msi(desc);
 	BUG_ON(!entry || !entry->dev);
 	switch (entry->msi_attrib.type) {
 	case PCI_CAP_ID_MSI:
@@ -135,11 +135,11 @@ static void msix_flush_writes(unsigned int irq)
  * Returns 1 if it succeeded in masking the interrupt and 0 if the device
  * doesn't support MSI masking.
  */
-static int msi_set_mask_bits(unsigned int irq, u32 mask, u32 flag)
+static int msi_set_mask_bits(struct irq_desc *desc, u32 mask, u32 flag)
 {
 	struct msi_desc *entry;
 
-	entry = get_irq_msi(irq);
+	entry = get_irq_desc_msi(desc);
 	BUG_ON(!entry || !entry->dev);
 	switch (entry->msi_attrib.type) {
 	case PCI_CAP_ID_MSI:
@@ -172,9 +172,9 @@ static int msi_set_mask_bits(unsigned int irq, u32 mask, u32 flag)
 	return 1;
 }
 
-void read_msi_msg(unsigned int irq, struct msi_msg *msg)
+void read_msi_msg_desc(struct irq_desc *desc, struct msi_msg *msg)
 {
-	struct msi_desc *entry = get_irq_msi(irq);
+	struct msi_desc *entry = get_irq_desc_msi(desc);
 	switch(entry->msi_attrib.type) {
 	case PCI_CAP_ID_MSI:
 	{
@@ -211,9 +211,16 @@ void read_msi_msg(unsigned int irq, struct msi_msg *msg)
 	}
 }
 
-void write_msi_msg(unsigned int irq, struct msi_msg *msg)
+void read_msi_msg(unsigned int irq, struct msi_msg *msg)
 {
-	struct msi_desc *entry = get_irq_msi(irq);
+	struct irq_desc *desc = irq_to_desc(irq);
+
+	read_msi_msg_desc(desc, msg);
+}
+
+void write_msi_msg_desc(struct irq_desc *desc, struct msi_msg *msg)
+{
+	struct msi_desc *entry = get_irq_desc_msi(desc);
 	switch (entry->msi_attrib.type) {
 	case PCI_CAP_ID_MSI:
 	{
@@ -252,21 +259,31 @@ void write_msi_msg(unsigned int irq, struct msi_msg *msg)
 	entry->msg = *msg;
 }
 
+void write_msi_msg(unsigned int irq, struct msi_msg *msg)
+{
+	struct irq_desc *desc = irq_to_desc(irq);
+
+	write_msi_msg_desc(desc, msg);
+}
+
 void mask_msi_irq(unsigned int irq)
 {
-	msi_set_mask_bits(irq, 1, 1);
-	msix_flush_writes(irq);
+	struct irq_desc *desc = irq_to_desc(irq);
+
+	msi_set_mask_bits(desc, 1, 1);
+	msix_flush_writes(desc);
 }
 
 void unmask_msi_irq(unsigned int irq)
 {
-	msi_set_mask_bits(irq, 1, 0);
-	msix_flush_writes(irq);
+	struct irq_desc *desc = irq_to_desc(irq);
+
+	msi_set_mask_bits(desc, 1, 0);
+	msix_flush_writes(desc);
 }
 
 static int msi_free_irqs(struct pci_dev* dev);
 
-
 static struct msi_desc* alloc_msi_entry(void)
 {
 	struct msi_desc *entry;
@@ -303,9 +320,11 @@ static void __pci_restore_msi_state(struct pci_dev *dev)
 	pci_intx_for_msi(dev, 0);
 	msi_set_enable(dev, 0);
 	write_msi_msg(dev->irq, &entry->msg);
-	if (entry->msi_attrib.maskbit)
-		msi_set_mask_bits(dev->irq, entry->msi_attrib.maskbits_mask,
+	if (entry->msi_attrib.maskbit) {
+		struct irq_desc *desc = irq_to_desc(dev->irq);
+		msi_set_mask_bits(desc, entry->msi_attrib.maskbits_mask,
 				  entry->msi_attrib.masked);
+	}
 
 	pci_read_config_word(dev, pos + PCI_MSI_FLAGS, &control);
 	control &= ~PCI_MSI_FLAGS_QSIZE;
@@ -327,8 +346,9 @@ static void __pci_restore_msix_state(struct pci_dev *dev)
 	msix_set_enable(dev, 0);
 
 	list_for_each_entry(entry, &dev->msi_list, list) {
+		struct irq_desc *desc = irq_to_desc(entry->irq);
 		write_msi_msg(entry->irq, &entry->msg);
-		msi_set_mask_bits(entry->irq, 1, entry->msi_attrib.masked);
+		msi_set_mask_bits(desc, 1, entry->msi_attrib.masked);
 	}
 
 	BUG_ON(list_empty(&dev->msi_list));
@@ -596,7 +616,8 @@ void pci_msi_shutdown(struct pci_dev* dev)
 	/* Return the the pci reset with msi irqs unmasked */
 	if (entry->msi_attrib.maskbit) {
 		u32 mask = entry->msi_attrib.maskbits_mask;
-		msi_set_mask_bits(dev->irq, mask, ~mask);
+		struct irq_desc *desc = irq_to_desc(dev->irq);
+		msi_set_mask_bits(desc, mask, ~mask);
 	}
 	if (!entry->dev || entry->msi_attrib.type != PCI_CAP_ID_MSI)
 		return;
diff --git a/include/linux/msi.h b/include/linux/msi.h
index 8f2939227207..d2b8a1e8ca11 100644
--- a/include/linux/msi.h
+++ b/include/linux/msi.h
@@ -10,8 +10,11 @@ struct msi_msg {
 };
 
 /* Helper functions */
+struct irq_desc;
 extern void mask_msi_irq(unsigned int irq);
 extern void unmask_msi_irq(unsigned int irq);
+extern void read_msi_msg_desc(struct irq_desc *desc, struct msi_msg *msg);
+extern void write_msi_msg_desc(struct irq_desc *desc, struct msi_msg *msg);
 extern void read_msi_msg(unsigned int irq, struct msi_msg *msg);
 extern void write_msi_msg(unsigned int irq, struct msi_msg *msg);
 
-- 
cgit v1.2.3


From 8b96f0119818964e4944fd1c423bf6770027d3ac Mon Sep 17 00:00:00 2001
From: Frederic Weisbecker <fweisbec@gmail.com>
Date: Sat, 6 Dec 2008 03:40:00 +0100
Subject: tracing/function-graph-tracer: introduce __notrace_funcgraph to
 filter special functions

Impact: trace more functions

When the function graph tracer is configured, three more files are not
traced to prevent only four functions to be traced. And this impacts the
normal function tracer too.

arch/x86/kernel/process_64/32.c:

I had crashes when I let this file traced. After some debugging, I saw
that the "current" task point was changed inside__swtich_to(), ie:
"write_pda(pcurrent, next_p);" inside process_64.c Since the tracer store
the original return address of the function inside current, we had
crashes. Only __switch_to() has to be excluded from tracing.

kernel/module.c and kernel/extable.c:

Because of a function used internally by the function graph tracer:
__kernel_text_address()

To let the other functions inside these files to be traced, this patch
introduces the __notrace_funcgraph function prefix which is __notrace if
function graph tracer is configured and nothing if not.

Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/Makefile     |  6 ------
 arch/x86/kernel/process_32.c |  4 +++-
 arch/x86/kernel/process_64.c |  4 +++-
 include/linux/ftrace.h       | 11 +++++++++++
 kernel/Makefile              |  4 ----
 kernel/extable.c             |  5 +++--
 kernel/module.c              |  2 +-
 7 files changed, 21 insertions(+), 15 deletions(-)

(limited to 'include')

diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile
index a3049da61985..1cad9318d217 100644
--- a/arch/x86/kernel/Makefile
+++ b/arch/x86/kernel/Makefile
@@ -14,12 +14,6 @@ CFLAGS_REMOVE_paravirt-spinlocks.o = -pg
 CFLAGS_REMOVE_ftrace.o = -pg
 endif
 
-ifdef CONFIG_FUNCTION_GRAPH_TRACER
-# Don't trace __switch_to() but let it for function tracer
-CFLAGS_REMOVE_process_32.o = -pg
-CFLAGS_REMOVE_process_64.o = -pg
-endif
-
 #
 # vsyscalls (which work on the user stack) should have
 # no stack-protector checks:
diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c
index 0a1302fe6d45..24c2276aa453 100644
--- a/arch/x86/kernel/process_32.c
+++ b/arch/x86/kernel/process_32.c
@@ -38,6 +38,7 @@
 #include <linux/percpu.h>
 #include <linux/prctl.h>
 #include <linux/dmi.h>
+#include <linux/ftrace.h>
 
 #include <asm/uaccess.h>
 #include <asm/pgtable.h>
@@ -548,7 +549,8 @@ __switch_to_xtra(struct task_struct *prev_p, struct task_struct *next_p,
  * the task-switch, and shows up in ret_from_fork in entry.S,
  * for example.
  */
-struct task_struct * __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
+__notrace_funcgraph struct task_struct *
+__switch_to(struct task_struct *prev_p, struct task_struct *next_p)
 {
 	struct thread_struct *prev = &prev_p->thread,
 				 *next = &next_p->thread;
diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c
index c958120fb1b6..fbb321d53d34 100644
--- a/arch/x86/kernel/process_64.c
+++ b/arch/x86/kernel/process_64.c
@@ -39,6 +39,7 @@
 #include <linux/prctl.h>
 #include <linux/uaccess.h>
 #include <linux/io.h>
+#include <linux/ftrace.h>
 
 #include <asm/pgtable.h>
 #include <asm/system.h>
@@ -551,8 +552,9 @@ static inline void __switch_to_xtra(struct task_struct *prev_p,
  * - could test fs/gs bitsliced
  *
  * Kprobes not supported here. Set the probe on schedule instead.
+ * Function graph tracer not supported too.
  */
-struct task_struct *
+__notrace_funcgraph struct task_struct *
 __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
 {
 	struct thread_struct *prev = &prev_p->thread;
diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h
index b9b4d0a22d10..449fa8e9e34f 100644
--- a/include/linux/ftrace.h
+++ b/include/linux/ftrace.h
@@ -369,6 +369,14 @@ struct ftrace_graph_ret {
 };
 
 #ifdef CONFIG_FUNCTION_GRAPH_TRACER
+
+/*
+ * Sometimes we don't want to trace a function with the function
+ * graph tracer but we want them to keep traced by the usual function
+ * tracer if the function graph tracer is not configured.
+ */
+#define __notrace_funcgraph		notrace
+
 #define FTRACE_RETFUNC_DEPTH 50
 #define FTRACE_RETSTACK_ALLOC_SIZE 32
 /* Type of the callback handlers for tracing function graph*/
@@ -394,6 +402,9 @@ static inline int task_curr_ret_stack(struct task_struct *t)
 	return t->curr_ret_stack;
 }
 #else
+
+#define __notrace_funcgraph
+
 static inline void ftrace_graph_init_task(struct task_struct *t) { }
 static inline void ftrace_graph_exit_task(struct task_struct *t) { }
 
diff --git a/kernel/Makefile b/kernel/Makefile
index 703cf3b7389c..19fad003b19d 100644
--- a/kernel/Makefile
+++ b/kernel/Makefile
@@ -21,10 +21,6 @@ CFLAGS_REMOVE_cgroup-debug.o = -pg
 CFLAGS_REMOVE_sched_clock.o = -pg
 CFLAGS_REMOVE_sched.o = -pg
 endif
-ifdef CONFIG_FUNCTION_GRAPH_TRACER
-CFLAGS_REMOVE_extable.o = -pg # For __kernel_text_address()
-CFLAGS_REMOVE_module.o = -pg # For __module_text_address()
-endif
 
 obj-$(CONFIG_FREEZER) += freezer.o
 obj-$(CONFIG_PROFILING) += profile.o
diff --git a/kernel/extable.c b/kernel/extable.c
index a26cb2e17023..feb0317cf09a 100644
--- a/kernel/extable.c
+++ b/kernel/extable.c
@@ -17,6 +17,7 @@
 */
 #include <linux/module.h>
 #include <linux/init.h>
+#include <linux/ftrace.h>
 #include <asm/uaccess.h>
 #include <asm/sections.h>
 
@@ -40,7 +41,7 @@ const struct exception_table_entry *search_exception_tables(unsigned long addr)
 	return e;
 }
 
-int core_kernel_text(unsigned long addr)
+__notrace_funcgraph int core_kernel_text(unsigned long addr)
 {
 	if (addr >= (unsigned long)_stext &&
 	    addr <= (unsigned long)_etext)
@@ -53,7 +54,7 @@ int core_kernel_text(unsigned long addr)
 	return 0;
 }
 
-int __kernel_text_address(unsigned long addr)
+__notrace_funcgraph int __kernel_text_address(unsigned long addr)
 {
 	if (core_kernel_text(addr))
 		return 1;
diff --git a/kernel/module.c b/kernel/module.c
index 89bcf7c1327d..dd2a54155b54 100644
--- a/kernel/module.c
+++ b/kernel/module.c
@@ -2704,7 +2704,7 @@ int is_module_address(unsigned long addr)
 
 
 /* Is this a valid kernel address? */
-struct module *__module_text_address(unsigned long addr)
+__notrace_funcgraph struct module *__module_text_address(unsigned long addr)
 {
 	struct module *mod;
 
-- 
cgit v1.2.3


From 380c4b1411ccd6885f92b2c8ceb08433a720f44e Mon Sep 17 00:00:00 2001
From: Frederic Weisbecker <fweisbec@gmail.com>
Date: Sat, 6 Dec 2008 03:43:41 +0100
Subject: tracing/function-graph-tracer: append the tracing_graph_flag

Impact: Provide a way to pause the function graph tracer

As suggested by Steven Rostedt, the previous patch that prevented from
spinlock function tracing shouldn't use the raw_spinlock to fix it.
It's much better to follow lockdep with normal spinlock, so this patch
adds a new flag for each task to make the function graph tracer able
to be paused. We also can send an ftrace_printk whithout worrying of
the irrelevant traced spinlock during insertion.

Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/ftrace.c |  5 ++++-
 include/linux/ftrace.h   | 13 +++++++++++++
 include/linux/sched.h    |  2 ++
 kernel/trace/ftrace.c    |  2 ++
 kernel/trace/trace.c     | 18 +++++-------------
 5 files changed, 26 insertions(+), 14 deletions(-)

(limited to 'include')

diff --git a/arch/x86/kernel/ftrace.c b/arch/x86/kernel/ftrace.c
index f98c4076a170..1b43086b097a 100644
--- a/arch/x86/kernel/ftrace.c
+++ b/arch/x86/kernel/ftrace.c
@@ -476,7 +476,10 @@ void prepare_ftrace_return(unsigned long *parent, unsigned long self_addr)
 				&return_to_handler;
 
 	/* Nmi's are currently unsupported */
-	if (atomic_read(&in_nmi))
+	if (unlikely(atomic_read(&in_nmi)))
+		return;
+
+	if (unlikely(atomic_read(&current->tracing_graph_pause)))
 		return;
 
 	/*
diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h
index 449fa8e9e34f..11cac81eed08 100644
--- a/include/linux/ftrace.h
+++ b/include/linux/ftrace.h
@@ -401,6 +401,16 @@ static inline int task_curr_ret_stack(struct task_struct *t)
 {
 	return t->curr_ret_stack;
 }
+
+static inline void pause_graph_tracing(void)
+{
+	atomic_inc(&current->tracing_graph_pause);
+}
+
+static inline void unpause_graph_tracing(void)
+{
+	atomic_dec(&current->tracing_graph_pause);
+}
 #else
 
 #define __notrace_funcgraph
@@ -412,6 +422,9 @@ static inline int task_curr_ret_stack(struct task_struct *tsk)
 {
 	return -1;
 }
+
+static inline void pause_graph_tracing(void) { }
+static inline void unpause_graph_tracing(void) { }
 #endif
 
 #ifdef CONFIG_TRACING
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 4c152e0acc9e..4b81fc5f7731 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1379,6 +1379,8 @@ struct task_struct {
 	 * because of depth overrun.
 	 */
 	atomic_t trace_overrun;
+	/* Pause for the tracing */
+	atomic_t tracing_graph_pause;
 #endif
 #ifdef CONFIG_TRACING
 	/* state flags for use by tracers */
diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
index 2971fe48f55e..a12f80efceaa 100644
--- a/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c
@@ -1998,6 +1998,7 @@ static int alloc_retstack_tasklist(struct ftrace_ret_stack **ret_stack_list)
 			/* Make sure IRQs see the -1 first: */
 			barrier();
 			t->ret_stack = ret_stack_list[start++];
+			atomic_set(&t->tracing_graph_pause, 0);
 			atomic_set(&t->trace_overrun, 0);
 		}
 	} while_each_thread(g, t);
@@ -2077,6 +2078,7 @@ void ftrace_graph_init_task(struct task_struct *t)
 		if (!t->ret_stack)
 			return;
 		t->curr_ret_stack = -1;
+		atomic_set(&t->tracing_graph_pause, 0);
 		atomic_set(&t->trace_overrun, 0);
 	} else
 		t->ret_stack = NULL;
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index 33549537f30f..0b8659bd5ad2 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -3590,14 +3590,7 @@ static __init int tracer_init_debugfs(void)
 
 int trace_vprintk(unsigned long ip, int depth, const char *fmt, va_list args)
 {
-	/*
-	 * Raw Spinlock because a normal spinlock would be traced here
-	 * and append an irrelevant couple spin_lock_irqsave/
-	 * spin_unlock_irqrestore traced by ftrace around this
-	 * TRACE_PRINTK trace.
-	 */
-	static raw_spinlock_t trace_buf_lock =
-				(raw_spinlock_t)__RAW_SPIN_LOCK_UNLOCKED;
+	static DEFINE_SPINLOCK(trace_buf_lock);
 	static char trace_buf[TRACE_BUF_SIZE];
 
 	struct ring_buffer_event *event;
@@ -3618,8 +3611,8 @@ int trace_vprintk(unsigned long ip, int depth, const char *fmt, va_list args)
 	if (unlikely(atomic_read(&data->disabled)))
 		goto out;
 
-	local_irq_save(flags);
-	__raw_spin_lock(&trace_buf_lock);
+	pause_graph_tracing();
+	spin_lock_irqsave(&trace_buf_lock, irq_flags);
 	len = vsnprintf(trace_buf, TRACE_BUF_SIZE, fmt, args);
 
 	len = min(len, TRACE_BUF_SIZE-1);
@@ -3640,9 +3633,8 @@ int trace_vprintk(unsigned long ip, int depth, const char *fmt, va_list args)
 	ring_buffer_unlock_commit(tr->buffer, event, irq_flags);
 
  out_unlock:
-	__raw_spin_unlock(&trace_buf_lock);
-	local_irq_restore(flags);
-
+	spin_unlock_irqrestore(&trace_buf_lock, irq_flags);
+	unpause_graph_tracing();
  out:
 	preempt_enable_notrace();
 
-- 
cgit v1.2.3


From 240d367b4e6c6e3c5075e034db14dba60a6f5fa7 Mon Sep 17 00:00:00 2001
From: Yinghai Lu <yinghai@kernel.org>
Date: Mon, 8 Dec 2008 14:06:17 -0800
Subject: sparseirq: fix Alpha build failure

Impact: build fix on Alpha

-tip testing found this build failure on the Alpha defconfig:

/home/mingo/tip/fs/proc/stat.c: In function 'show_stat':
/home/mingo/tip/fs/proc/stat.c:48: error: implicit declaration of function 'for_each_irq_desc'
/home/mingo/tip/fs/proc/stat.c:48: error: expected ';' before '{' token

can not use irq_desc() in stat.c on older architectures.

Signed-off-by: Yinghai Lu <yinghai@kernel.orgg>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 fs/proc/stat.c        | 20 +++++++++++++-------
 include/linux/irq.h   |  9 ---------
 include/linux/irqnr.h | 19 ++++++++++++++++---
 3 files changed, 29 insertions(+), 19 deletions(-)

(limited to 'include')

diff --git a/fs/proc/stat.c b/fs/proc/stat.c
index a13431ab7c65..3cb9492801c0 100644
--- a/fs/proc/stat.c
+++ b/fs/proc/stat.c
@@ -45,9 +45,12 @@ static int show_stat(struct seq_file *p, void *v)
 		softirq = cputime64_add(softirq, kstat_cpu(i).cpustat.softirq);
 		steal = cputime64_add(steal, kstat_cpu(i).cpustat.steal);
 		guest = cputime64_add(guest, kstat_cpu(i).cpustat.guest);
-		for_each_irq_desc(j, desc) {
+		for_each_irq_nr(j) {
+#ifdef CONFIG_SPARSE_IRQ
+			desc = irq_to_desc(j);
 			if (!desc)
 				continue;
+#endif
 			sum += kstat_irqs_cpu(j, i);
 		}
 		sum += arch_irq_stat_cpu(i);
@@ -92,14 +95,17 @@ static int show_stat(struct seq_file *p, void *v)
 	seq_printf(p, "intr %llu", (unsigned long long)sum);
 
 	/* sum again ? it could be updated? */
-	for (j = 0; j < NR_IRQS; j++) {
-		desc = irq_to_desc(j);
+	for_each_irq_nr(j) {
 		per_irq_sum = 0;
-
-		if (desc) {
-			for_each_possible_cpu(i)
-				per_irq_sum += kstat_irqs_cpu(j, i);
+#ifdef CONFIG_SPARSE_IRQ
+		desc = irq_to_desc(j);
+		if (!desc) {
+			seq_printf(p, " %u", per_irq_sum);
+			continue;
 		}
+#endif
+		for_each_possible_cpu(i)
+			per_irq_sum += kstat_irqs_cpu(j, i);
 
 		seq_printf(p, " %u", per_irq_sum);
 	}
diff --git a/include/linux/irq.h b/include/linux/irq.h
index 63b00439d4d2..b5749db3e5a1 100644
--- a/include/linux/irq.h
+++ b/include/linux/irq.h
@@ -198,7 +198,6 @@ extern void arch_init_copy_chip_data(struct irq_desc *old_desc,
 extern void arch_free_chip_data(struct irq_desc *old_desc, struct irq_desc *desc);
 
 #ifndef CONFIG_SPARSE_IRQ
-
 extern struct irq_desc irq_desc[NR_IRQS];
 
 static inline struct irq_desc *irq_to_desc(unsigned int irq)
@@ -210,14 +209,6 @@ static inline struct irq_desc *irq_to_desc_alloc_cpu(unsigned int irq, int cpu)
 	return irq_to_desc(irq);
 }
 
-#ifdef CONFIG_GENERIC_HARDIRQS
-# define for_each_irq_desc(irq, desc)		\
-	for (irq = 0, desc = irq_desc; irq < nr_irqs; irq++, desc++)
-# define for_each_irq_desc_reverse(irq, desc)                          \
-	for (irq = nr_irqs - 1, desc = irq_desc + (nr_irqs - 1);        \
-	    irq >= 0; irq--, desc--)
-#endif
-
 #else
 
 extern struct irq_desc *irq_to_desc(unsigned int irq);
diff --git a/include/linux/irqnr.h b/include/linux/irqnr.h
index 7a299e989f8b..13754f813589 100644
--- a/include/linux/irqnr.h
+++ b/include/linux/irqnr.h
@@ -8,9 +8,22 @@
 # define for_each_irq_desc(irq, desc)		\
 	for (irq = 0; irq < nr_irqs; irq++)
 
-static inline early_sparse_irq_init(void)
-{
-}
+# define for_each_irq_desc_reverse(irq, desc)                          \
+	for (irq = nr_irqs - 1; irq >= 0; irq--)
+#else
+#ifndef CONFIG_SPARSE_IRQ
+
+struct irq_desc;
+extern int nr_irqs;
+# define for_each_irq_desc(irq, desc)		\
+	for (irq = 0, desc = irq_desc; irq < nr_irqs; irq++, desc++)
+# define for_each_irq_desc_reverse(irq, desc)                          \
+	for (irq = nr_irqs - 1, desc = irq_desc + (nr_irqs - 1);        \
+	    irq >= 0; irq--, desc--)
 #endif
+#endif
+
+#define for_each_irq_nr(irq)                   \
+       for (irq = 0; irq < nr_irqs; irq++)
 
 #endif
-- 
cgit v1.2.3


From a64e64944f4b8ce3288519555dbaa0232414b8ac Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Wed, 12 Nov 2008 18:37:41 -0500
Subject: [PATCH] return records for fork() both to child and parent

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 include/linux/audit.h |  2 ++
 kernel/auditsc.c      | 17 +++++++++++++++++
 kernel/fork.c         |  1 +
 3 files changed, 20 insertions(+)

(limited to 'include')

diff --git a/include/linux/audit.h b/include/linux/audit.h
index 6272a395d43c..1b2a6a5c1876 100644
--- a/include/linux/audit.h
+++ b/include/linux/audit.h
@@ -391,6 +391,7 @@ extern int audit_classify_arch(int arch);
 #ifdef CONFIG_AUDITSYSCALL
 /* These are defined in auditsc.c */
 				/* Public API */
+extern void audit_finish_fork(struct task_struct *child);
 extern int  audit_alloc(struct task_struct *task);
 extern void audit_free(struct task_struct *task);
 extern void audit_syscall_entry(int arch,
@@ -504,6 +505,7 @@ static inline int audit_mq_getsetattr(mqd_t mqdes, struct mq_attr *mqstat)
 extern int audit_n_rules;
 extern int audit_signals;
 #else
+#define audit_finish_fork(t)
 #define audit_alloc(t) ({ 0; })
 #define audit_free(t) do { ; } while (0)
 #define audit_syscall_entry(ta,a,b,c,d,e) do { ; } while (0)
diff --git a/kernel/auditsc.c b/kernel/auditsc.c
index cf5bc2f5f9c3..de8468050afa 100644
--- a/kernel/auditsc.c
+++ b/kernel/auditsc.c
@@ -1548,6 +1548,23 @@ void audit_syscall_entry(int arch, int major,
 	context->ppid       = 0;
 }
 
+void audit_finish_fork(struct task_struct *child)
+{
+	struct audit_context *ctx = current->audit_context;
+	struct audit_context *p = child->audit_context;
+	if (!p || !ctx || !ctx->auditable)
+		return;
+	p->arch = ctx->arch;
+	p->major = ctx->major;
+	memcpy(p->argv, ctx->argv, sizeof(ctx->argv));
+	p->ctime = ctx->ctime;
+	p->dummy = ctx->dummy;
+	p->auditable = ctx->auditable;
+	p->in_syscall = ctx->in_syscall;
+	p->filterkey = kstrdup(ctx->filterkey, GFP_KERNEL);
+	p->ppid = current->pid;
+}
+
 /**
  * audit_syscall_exit - deallocate audit context after a system call
  * @tsk: task being audited
diff --git a/kernel/fork.c b/kernel/fork.c
index 2a372a0e206f..8d6a7dd9282b 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -1398,6 +1398,7 @@ long do_fork(unsigned long clone_flags,
 			init_completion(&vfork);
 		}
 
+		audit_finish_fork(p);
 		tracehook_report_clone(trace, regs, clone_flags, nr, p);
 
 		/*
-- 
cgit v1.2.3


From 0b0c940a91f8e6fd0e1be3e01d5e98997446233b Mon Sep 17 00:00:00 2001
From: Mike Frysinger <vapier.adi@gmail.com>
Date: Tue, 18 Nov 2008 15:03:49 +0800
Subject: [PATCH] asm/generic: fix bug - kernel fails to build when enable some
 common audit code on Blackfin

If you enable some common audit code, the kernel fails to build.

In file included from lib/audit.c:17:
include/asm-generic/audit_write.h:3: error: '__NR_swapon' undeclared here (not in a function)
make[1]: *** [lib/audit.o] Error 1
make: *** [lib] Error 2

So do not use __NR_swapon if it isnt defined for a port.

Signed-off-by: Mike Frysinger <vapier.adi@gmail.com>
Signed-off-by: Bryan Wu <cooloney@kernel.org>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 include/asm-generic/audit_write.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include')

diff --git a/include/asm-generic/audit_write.h b/include/asm-generic/audit_write.h
index f10d367fb2a5..c5f1c2c920e2 100644
--- a/include/asm-generic/audit_write.h
+++ b/include/asm-generic/audit_write.h
@@ -1,6 +1,8 @@
 #include <asm-generic/audit_dir_write.h>
 __NR_acct,
+#ifdef __NR_swapon
 __NR_swapon,
+#endif
 __NR_quotactl,
 __NR_truncate,
 #ifdef __NR_truncate64
-- 
cgit v1.2.3


From 48887e63d6e057543067327da6b091297f7fe645 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Sat, 6 Dec 2008 01:05:50 -0500
Subject: [PATCH] fix broken timestamps in AVC generated by kernel threads

Timestamp in audit_context is valid only if ->in_syscall is set.

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 include/linux/audit.h | 4 ++--
 kernel/audit.c        | 4 +---
 kernel/auditsc.c      | 5 ++++-
 3 files changed, 7 insertions(+), 6 deletions(-)

(limited to 'include')

diff --git a/include/linux/audit.h b/include/linux/audit.h
index 1b2a6a5c1876..8f0672d13eb1 100644
--- a/include/linux/audit.h
+++ b/include/linux/audit.h
@@ -435,7 +435,7 @@ static inline void audit_ptrace(struct task_struct *t)
 
 				/* Private API (for audit.c only) */
 extern unsigned int audit_serial(void);
-extern void auditsc_get_stamp(struct audit_context *ctx,
+extern int auditsc_get_stamp(struct audit_context *ctx,
 			      struct timespec *t, unsigned int *serial);
 extern int  audit_set_loginuid(struct task_struct *task, uid_t loginuid);
 #define audit_get_loginuid(t) ((t)->loginuid)
@@ -518,7 +518,7 @@ extern int audit_signals;
 #define audit_inode(n,d) do { ; } while (0)
 #define audit_inode_child(d,i,p) do { ; } while (0)
 #define audit_core_dumps(i) do { ; } while (0)
-#define auditsc_get_stamp(c,t,s) do { BUG(); } while (0)
+#define auditsc_get_stamp(c,t,s) (0)
 #define audit_get_loginuid(t) (-1)
 #define audit_get_sessionid(t) (-1)
 #define audit_log_task_context(b) do { ; } while (0)
diff --git a/kernel/audit.c b/kernel/audit.c
index d8646c23b427..ce6d8ea3131e 100644
--- a/kernel/audit.c
+++ b/kernel/audit.c
@@ -1121,9 +1121,7 @@ unsigned int audit_serial(void)
 static inline void audit_get_stamp(struct audit_context *ctx,
 				   struct timespec *t, unsigned int *serial)
 {
-	if (ctx)
-		auditsc_get_stamp(ctx, t, serial);
-	else {
+	if (!ctx || !auditsc_get_stamp(ctx, t, serial)) {
 		*t = CURRENT_TIME;
 		*serial = audit_serial();
 	}
diff --git a/kernel/auditsc.c b/kernel/auditsc.c
index 0a13d6895494..2a3f0afc4d2a 100644
--- a/kernel/auditsc.c
+++ b/kernel/auditsc.c
@@ -1957,15 +1957,18 @@ EXPORT_SYMBOL_GPL(__audit_inode_child);
  *
  * Also sets the context as auditable.
  */
-void auditsc_get_stamp(struct audit_context *ctx,
+int auditsc_get_stamp(struct audit_context *ctx,
 		       struct timespec *t, unsigned int *serial)
 {
+	if (!ctx->in_syscall)
+		return 0;
 	if (!ctx->serial)
 		ctx->serial = audit_serial();
 	t->tv_sec  = ctx->ctime.tv_sec;
 	t->tv_nsec = ctx->ctime.tv_nsec;
 	*serial    = ctx->serial;
 	ctx->auditable = 1;
+	return 1;
 }
 
 /* global counter which is incremented every time something logs in */
-- 
cgit v1.2.3


From 1e641743f055f075ed9a4edd75f1fb1e05669ddc Mon Sep 17 00:00:00 2001
From: Al Viro <viro@ZenIV.linux.org.uk>
Date: Tue, 9 Dec 2008 09:23:33 +0000
Subject: Audit: Log TIOCSTI

AUDIT_TTY records currently log all data read by processes marked for
TTY input auditing, even if the data was "pushed back" using the TIOCSTI
ioctl, not typed by the user.

This patch records all TIOCSTI calls to disambiguate the input.  It
generates one audit message per character pushed back; considering
TIOCSTI is used very rarely, this simple solution is probably good
enough.  (The only program I could find that uses TIOCSTI is mailx/nail
in "header editing" mode, e.g. using the ~h escape.  mailx is used very
rarely, and the escapes are used even rarer.)

Signed-Off-By: Miloslav Trmac <mitr@redhat.com>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
Signed-off-by: James Morris <jmorris@namei.org>
---
 drivers/char/tty_audit.c | 78 +++++++++++++++++++++++++++++++++++++-----------
 drivers/char/tty_io.c    |  1 +
 include/linux/tty.h      |  4 +++
 3 files changed, 66 insertions(+), 17 deletions(-)

(limited to 'include')

diff --git a/drivers/char/tty_audit.c b/drivers/char/tty_audit.c
index d961fa9612c4..34ab6d798f81 100644
--- a/drivers/char/tty_audit.c
+++ b/drivers/char/tty_audit.c
@@ -67,37 +67,45 @@ static void tty_audit_buf_put(struct tty_audit_buf *buf)
 		tty_audit_buf_free(buf);
 }
 
-/**
- *	tty_audit_buf_push	-	Push buffered data out
- *
- *	Generate an audit message from the contents of @buf, which is owned by
- *	@tsk with @loginuid.  @buf->mutex must be locked.
- */
-static void tty_audit_buf_push(struct task_struct *tsk, uid_t loginuid,
-			       unsigned int sessionid,
-			       struct tty_audit_buf *buf)
+static void tty_audit_log(const char *description, struct task_struct *tsk,
+			  uid_t loginuid, unsigned sessionid, int major,
+			  int minor, unsigned char *data, size_t size)
 {
 	struct audit_buffer *ab;
 
-	if (buf->valid == 0)
-		return;
-	if (audit_enabled == 0)
-		return;
 	ab = audit_log_start(NULL, GFP_KERNEL, AUDIT_TTY);
 	if (ab) {
 		char name[sizeof(tsk->comm)];
 		uid_t uid = task_uid(tsk);
 
-		audit_log_format(ab, "tty pid=%u uid=%u auid=%u ses=%u "
-				 "major=%d minor=%d comm=",
+		audit_log_format(ab, "%s pid=%u uid=%u auid=%u ses=%u "
+				 "major=%d minor=%d comm=", description,
 				 tsk->pid, uid, loginuid, sessionid,
-				 buf->major, buf->minor);
+				 major, minor);
 		get_task_comm(name, tsk);
 		audit_log_untrustedstring(ab, name);
 		audit_log_format(ab, " data=");
-		audit_log_n_hex(ab, buf->data, buf->valid);
+		audit_log_n_hex(ab, data, size);
 		audit_log_end(ab);
 	}
+}
+
+/**
+ *	tty_audit_buf_push	-	Push buffered data out
+ *
+ *	Generate an audit message from the contents of @buf, which is owned by
+ *	@tsk with @loginuid.  @buf->mutex must be locked.
+ */
+static void tty_audit_buf_push(struct task_struct *tsk, uid_t loginuid,
+			       unsigned int sessionid,
+			       struct tty_audit_buf *buf)
+{
+	if (buf->valid == 0)
+		return;
+	if (audit_enabled == 0)
+		return;
+	tty_audit_log("tty", tsk, loginuid, sessionid, buf->major, buf->minor,
+		      buf->data, buf->valid);
 	buf->valid = 0;
 }
 
@@ -151,6 +159,42 @@ void tty_audit_fork(struct signal_struct *sig)
 	sig->tty_audit_buf = NULL;
 }
 
+/**
+ *	tty_audit_tiocsti	-	Log TIOCSTI
+ */
+void tty_audit_tiocsti(struct tty_struct *tty, char ch)
+{
+	struct tty_audit_buf *buf;
+	int major, minor, should_audit;
+
+	spin_lock_irq(&current->sighand->siglock);
+	should_audit = current->signal->audit_tty;
+	buf = current->signal->tty_audit_buf;
+	if (buf)
+		atomic_inc(&buf->count);
+	spin_unlock_irq(&current->sighand->siglock);
+
+	major = tty->driver->major;
+	minor = tty->driver->minor_start + tty->index;
+	if (buf) {
+		mutex_lock(&buf->mutex);
+		if (buf->major == major && buf->minor == minor)
+			tty_audit_buf_push_current(buf);
+		mutex_unlock(&buf->mutex);
+		tty_audit_buf_put(buf);
+	}
+
+	if (should_audit && audit_enabled) {
+		uid_t auid;
+		unsigned int sessionid;
+
+		auid = audit_get_loginuid(current);
+		sessionid = audit_get_sessionid(current);
+		tty_audit_log("ioctl=TIOCSTI", current, auid, sessionid, major,
+			      minor, &ch, 1);
+	}
+}
+
 /**
  *	tty_audit_push_task	-	Flush task's pending audit data
  */
diff --git a/drivers/char/tty_io.c b/drivers/char/tty_io.c
index 1412a8d1e58d..db15f9ba7c0b 100644
--- a/drivers/char/tty_io.c
+++ b/drivers/char/tty_io.c
@@ -2018,6 +2018,7 @@ static int tiocsti(struct tty_struct *tty, char __user *p)
 		return -EPERM;
 	if (get_user(ch, p))
 		return -EFAULT;
+	tty_audit_tiocsti(tty, ch);
 	ld = tty_ldisc_ref_wait(tty);
 	ld->ops->receive_buf(tty, &ch, &mbz, 1);
 	tty_ldisc_deref(ld);
diff --git a/include/linux/tty.h b/include/linux/tty.h
index 3b8121d4e36f..580700f20a1c 100644
--- a/include/linux/tty.h
+++ b/include/linux/tty.h
@@ -442,6 +442,7 @@ extern void tty_audit_add_data(struct tty_struct *tty, unsigned char *data,
 			       size_t size);
 extern void tty_audit_exit(void);
 extern void tty_audit_fork(struct signal_struct *sig);
+extern void tty_audit_tiocsti(struct tty_struct *tty, char ch);
 extern void tty_audit_push(struct tty_struct *tty);
 extern void tty_audit_push_task(struct task_struct *tsk,
 					uid_t loginuid, u32 sessionid);
@@ -450,6 +451,9 @@ static inline void tty_audit_add_data(struct tty_struct *tty,
 				      unsigned char *data, size_t size)
 {
 }
+static inline void tty_audit_tiocsti(struct tty_struct *tty, char ch)
+{
+}
 static inline void tty_audit_exit(void)
 {
 }
-- 
cgit v1.2.3


From c5af3a2e192d333997d1e191f3eba7fd2f869681 Mon Sep 17 00:00:00 2001
From: Mark Brown <broonie@opensource.wolfsonmicro.com>
Date: Fri, 28 Nov 2008 13:29:45 +0000
Subject: ASoC: Add card registration API

ASoC v2 allows cards, codecs and platforms to instantiate separately,
with the overall ASoC device only being instantiated once all the
required components have registered. As part of backporting Liam's work
introduce an initial version of the card registration functions. At
present these do nothing active and are internal only, they will be
exposed to machine drivers after further backporting.  Adding this now
allows the datastructures used for dynamic card instantiation to be
built up gradually.

Signed-off-by: Mark Brown <broonie@opensource.wolfsonmicro.com>
---
 include/sound/soc.h  |  5 +++++
 sound/soc/soc-core.c | 62 ++++++++++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 67 insertions(+)

(limited to 'include')

diff --git a/include/sound/soc.h b/include/sound/soc.h
index 79d855d2bddd..4a578b5d855c 100644
--- a/include/sound/soc.h
+++ b/include/sound/soc.h
@@ -333,6 +333,11 @@ struct snd_soc_dai_link  {
 /* SoC card */
 struct snd_soc_card {
 	char *name;
+	struct device *dev;
+
+	struct list_head list;
+
+	int instantiated;
 
 	int (*probe)(struct platform_device *pdev);
 	int (*remove)(struct platform_device *pdev);
diff --git a/sound/soc/soc-core.c b/sound/soc/soc-core.c
index 2f2a8d93bbf0..44fbd71ce80f 100644
--- a/sound/soc/soc-core.c
+++ b/sound/soc/soc-core.c
@@ -43,6 +43,12 @@ static DECLARE_WAIT_QUEUE_HEAD(soc_pm_waitq);
 static struct dentry *debugfs_root;
 #endif
 
+static DEFINE_MUTEX(client_mutex);
+static LIST_HEAD(card_list);
+
+static int snd_soc_register_card(struct snd_soc_card *card);
+static int snd_soc_unregister_card(struct snd_soc_card *card);
+
 /*
  * This is a timeout to do a DAPM powerdown after a stream is closed().
  * It can be used to eliminate pops between different playback streams, e.g.
@@ -784,6 +790,14 @@ static int soc_probe(struct platform_device *pdev)
 	/* Bodge while we push things out of socdev */
 	card->socdev = socdev;
 
+	/* Bodge while we unpick instantiation */
+	card->dev = &pdev->dev;
+	ret = snd_soc_register_card(card);
+	if (ret != 0) {
+		dev_err(&pdev->dev, "Failed to register card\n");
+		return ret;
+	}
+
 	if (card->probe) {
 		ret = card->probe(pdev);
 		if (ret < 0)
@@ -863,6 +877,8 @@ static int soc_remove(struct platform_device *pdev)
 	if (card->remove)
 		card->remove(pdev);
 
+	snd_soc_unregister_card(card);
+
 	return 0;
 }
 
@@ -1957,6 +1973,52 @@ int snd_soc_dai_digital_mute(struct snd_soc_dai *dai, int mute)
 }
 EXPORT_SYMBOL_GPL(snd_soc_dai_digital_mute);
 
+/**
+ * snd_soc_register_card - Register a card with the ASoC core
+ *
+ * @param card Card to register
+ *
+ * Note that currently this is an internal only function: it will be
+ * exposed to machine drivers after further backporting of ASoC v2
+ * registration APIs.
+ */
+static int snd_soc_register_card(struct snd_soc_card *card)
+{
+	if (!card->name || !card->dev)
+		return -EINVAL;
+
+	INIT_LIST_HEAD(&card->list);
+	card->instantiated = 0;
+
+	mutex_lock(&client_mutex);
+	list_add(&card->list, &card_list);
+	mutex_unlock(&client_mutex);
+
+	dev_dbg(card->dev, "Registered card '%s'\n", card->name);
+
+	return 0;
+}
+
+/**
+ * snd_soc_unregister_card - Unregister a card with the ASoC core
+ *
+ * @param card Card to unregister
+ *
+ * Note that currently this is an internal only function: it will be
+ * exposed to machine drivers after further backporting of ASoC v2
+ * registration APIs.
+ */
+static int snd_soc_unregister_card(struct snd_soc_card *card)
+{
+	mutex_lock(&client_mutex);
+	list_del(&card->list);
+	mutex_unlock(&client_mutex);
+
+	dev_dbg(card->dev, "Unregistered card '%s'\n", card->name);
+
+	return 0;
+}
+
 static int __devinit snd_soc_init(void)
 {
 #ifdef CONFIG_DEBUG_FS
-- 
cgit v1.2.3


From 9115171a6b79b6b4d5c6697f123556b6efc37f1f Mon Sep 17 00:00:00 2001
From: Mark Brown <broonie@opensource.wolfsonmicro.com>
Date: Sun, 30 Nov 2008 23:31:24 +0000
Subject: ASoC: Add DAI registration API

Add API calls to register and unregister DAIs with the core.  Currently
these APIs are ineffective.  Since multiple DAIs for a given device are
a common case bulk variants are provided.

Signed-off-by: Mark Brown <broonie@opensource.wolfsonmicro.com>
---
 include/sound/soc-dai.h |  8 +++++
 sound/soc/soc-core.c    | 83 +++++++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 91 insertions(+)

(limited to 'include')

diff --git a/include/sound/soc-dai.h b/include/sound/soc-dai.h
index e2d5f76838c6..24247f763608 100644
--- a/include/sound/soc-dai.h
+++ b/include/sound/soc-dai.h
@@ -100,6 +100,12 @@ struct snd_soc_dai_ops;
 struct snd_soc_dai;
 struct snd_ac97_bus_ops;
 
+/* Digital Audio Interface registration */
+int snd_soc_register_dai(struct snd_soc_dai *dai);
+void snd_soc_unregister_dai(struct snd_soc_dai *dai);
+int snd_soc_register_dais(struct snd_soc_dai *dai, size_t count);
+void snd_soc_unregister_dais(struct snd_soc_dai *dai, size_t count);
+
 /* Digital Audio Interface clocking API.*/
 int snd_soc_dai_set_sysclk(struct snd_soc_dai *dai, int clk_id,
 	unsigned int freq, int dir);
@@ -186,6 +192,8 @@ struct snd_soc_dai {
 	unsigned int id;
 	int ac97_control;
 
+	struct device *dev;
+
 	/* DAI callbacks */
 	int (*probe)(struct platform_device *pdev,
 		     struct snd_soc_dai *dai);
diff --git a/sound/soc/soc-core.c b/sound/soc/soc-core.c
index 44fbd71ce80f..03460b068f1e 100644
--- a/sound/soc/soc-core.c
+++ b/sound/soc/soc-core.c
@@ -45,6 +45,7 @@ static struct dentry *debugfs_root;
 
 static DEFINE_MUTEX(client_mutex);
 static LIST_HEAD(card_list);
+static LIST_HEAD(dai_list);
 
 static int snd_soc_register_card(struct snd_soc_card *card);
 static int snd_soc_unregister_card(struct snd_soc_card *card);
@@ -2019,6 +2020,88 @@ static int snd_soc_unregister_card(struct snd_soc_card *card)
 	return 0;
 }
 
+/**
+ * snd_soc_register_dai - Register a DAI with the ASoC core
+ *
+ * @param dai DAI to register
+ */
+int snd_soc_register_dai(struct snd_soc_dai *dai)
+{
+	if (!dai->name)
+		return -EINVAL;
+
+	/* The device should become mandatory over time */
+	if (!dai->dev)
+		printk(KERN_WARNING "No device for DAI %s\n", dai->name);
+
+	INIT_LIST_HEAD(&dai->list);
+
+	mutex_lock(&client_mutex);
+	list_add(&dai->list, &dai_list);
+	mutex_unlock(&client_mutex);
+
+	pr_debug("Registered DAI '%s'\n", dai->name);
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(snd_soc_register_dai);
+
+/**
+ * snd_soc_unregister_dai - Unregister a DAI from the ASoC core
+ *
+ * @param dai DAI to unregister
+ */
+void snd_soc_unregister_dai(struct snd_soc_dai *dai)
+{
+	mutex_lock(&client_mutex);
+	list_del(&dai->list);
+	mutex_unlock(&client_mutex);
+
+	pr_debug("Unregistered DAI '%s'\n", dai->name);
+}
+EXPORT_SYMBOL_GPL(snd_soc_unregister_dai);
+
+/**
+ * snd_soc_register_dais - Register multiple DAIs with the ASoC core
+ *
+ * @param dai Array of DAIs to register
+ * @param count Number of DAIs
+ */
+int snd_soc_register_dais(struct snd_soc_dai *dai, size_t count)
+{
+	int i, ret;
+
+	for (i = 0; i < count; i++) {
+		ret = snd_soc_register_dai(&dai[i]);
+		if (ret != 0)
+			goto err;
+	}
+
+	return 0;
+
+err:
+	for (i--; i >= 0; i--)
+		snd_soc_unregister_dai(&dai[i]);
+
+	return ret;
+}
+EXPORT_SYMBOL_GPL(snd_soc_register_dais);
+
+/**
+ * snd_soc_unregister_dais - Unregister multiple DAIs from the ASoC core
+ *
+ * @param dai Array of DAIs to unregister
+ * @param count Number of DAIs
+ */
+void snd_soc_unregister_dais(struct snd_soc_dai *dai, size_t count)
+{
+	int i;
+
+	for (i = 0; i < count; i++)
+		snd_soc_unregister_dai(&dai[i]);
+}
+EXPORT_SYMBOL_GPL(snd_soc_unregister_dais);
+
 static int __devinit snd_soc_init(void)
 {
 #ifdef CONFIG_DEBUG_FS
-- 
cgit v1.2.3


From 12a48a8c0087ba39d926cf1d63938ccbdb9752c3 Mon Sep 17 00:00:00 2001
From: Mark Brown <broonie@opensource.wolfsonmicro.com>
Date: Wed, 3 Dec 2008 19:40:30 +0000
Subject: ASoC: Add platform registration API

ASoC v2 allows platform drivers to instantiate independantly of the
overall ASoC card. This API allows drivers to notify the core when
they are registered.

Signed-off-by: Mark Brown <broonie@opensource.wolfsonmicro.com>
---
 include/sound/soc.h  |  5 +++++
 sound/soc/soc-core.c | 38 ++++++++++++++++++++++++++++++++++++++
 2 files changed, 43 insertions(+)

(limited to 'include')

diff --git a/include/sound/soc.h b/include/sound/soc.h
index 4a578b5d855c..ce3661d07c24 100644
--- a/include/sound/soc.h
+++ b/include/sound/soc.h
@@ -149,6 +149,7 @@ struct snd_soc_ops;
 struct snd_soc_dai_mode;
 struct snd_soc_pcm_runtime;
 struct snd_soc_dai;
+struct snd_soc_platform;
 struct snd_soc_codec;
 struct soc_enum;
 struct snd_soc_ac97_ops;
@@ -158,6 +159,9 @@ typedef int (*hw_read_t)(void *,char* ,int);
 
 extern struct snd_ac97_bus_ops soc_ac97_ops;
 
+int snd_soc_register_platform(struct snd_soc_platform *platform);
+void snd_soc_unregister_platform(struct snd_soc_platform *platform);
+
 /* pcm <-> DAI connect */
 void snd_soc_free_pcms(struct snd_soc_device *socdev);
 int snd_soc_new_pcms(struct snd_soc_device *socdev, int idx, const char *xid);
@@ -296,6 +300,7 @@ struct snd_soc_codec_device {
 /* SoC platform interface */
 struct snd_soc_platform {
 	char *name;
+	struct list_head list;
 
 	int (*probe)(struct platform_device *pdev);
 	int (*remove)(struct platform_device *pdev);
diff --git a/sound/soc/soc-core.c b/sound/soc/soc-core.c
index 03460b068f1e..ffae370b45df 100644
--- a/sound/soc/soc-core.c
+++ b/sound/soc/soc-core.c
@@ -46,6 +46,7 @@ static struct dentry *debugfs_root;
 static DEFINE_MUTEX(client_mutex);
 static LIST_HEAD(card_list);
 static LIST_HEAD(dai_list);
+static LIST_HEAD(platform_list);
 
 static int snd_soc_register_card(struct snd_soc_card *card);
 static int snd_soc_unregister_card(struct snd_soc_card *card);
@@ -2102,6 +2103,43 @@ void snd_soc_unregister_dais(struct snd_soc_dai *dai, size_t count)
 }
 EXPORT_SYMBOL_GPL(snd_soc_unregister_dais);
 
+/**
+ * snd_soc_register_platform - Register a platform with the ASoC core
+ *
+ * @param platform platform to register
+ */
+int snd_soc_register_platform(struct snd_soc_platform *platform)
+{
+	if (!platform->name)
+		return -EINVAL;
+
+	INIT_LIST_HEAD(&platform->list);
+
+	mutex_lock(&client_mutex);
+	list_add(&platform->list, &platform_list);
+	mutex_unlock(&client_mutex);
+
+	pr_debug("Registered platform '%s'\n", platform->name);
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(snd_soc_register_platform);
+
+/**
+ * snd_soc_unregister_platform - Unregister a platform from the ASoC core
+ *
+ * @param platform platform to unregister
+ */
+void snd_soc_unregister_platform(struct snd_soc_platform *platform)
+{
+	mutex_lock(&client_mutex);
+	list_del(&platform->list);
+	mutex_unlock(&client_mutex);
+
+	pr_debug("Unregistered platform '%s'\n", platform->name);
+}
+EXPORT_SYMBOL_GPL(snd_soc_unregister_platform);
+
 static int __devinit snd_soc_init(void)
 {
 #ifdef CONFIG_DEBUG_FS
-- 
cgit v1.2.3


From 7b363e440021a1cf9ed76944b2685f48dacefb3e Mon Sep 17 00:00:00 2001
From: Neil Horman <nhorman@tuxdriver.com>
Date: Tue, 9 Dec 2008 23:22:26 -0800
Subject: netpoll: fix race on poll_list resulting in garbage entry

	A few months back a race was discused between the netpoll napi service
path, and the fast path through net_rx_action:
http://kerneltrap.org/mailarchive/linux-netdev/2007/10/16/345470

A patch was submitted for that bug, but I think we missed a case.

Consider the following scenario:

INITIAL STATE
CPU0 has one napi_struct A on its poll_list
CPU1 is calling netpoll_send_skb and needs to call poll_napi on the same
napi_struct A that CPU0 has on its list


CPU0						CPU1
net_rx_action					poll_napi
!list_empty (returns true)			locks poll_lock for A
						 poll_one_napi
						  napi->poll
						   netif_rx_complete
						    __napi_complete
						    (removes A from poll_list)
list_entry(list->next)


In the above scenario, net_rx_action assumes that the per-cpu poll_list is
exclusive to that cpu.  netpoll of course violates that, and because the netpoll
path can dequeue from the poll list, its possible for CPU0 to detect a non-empty
list at the top of the while loop in net_rx_action, but have it become empty by
the time it calls list_entry.  Since the poll_list isn't surrounded by any other
structure, the returned data from that list_entry call in this situation is
garbage, and any number of crashes can result based on what exactly that garbage
is.

Given that its not fasible for performance reasons to place exclusive locks
arround each cpus poll list to provide that mutal exclusion, I think the best
solution is modify the netpoll path in such a way that we continue to guarantee
that the poll_list for a cpu is in fact exclusive to that cpu.  To do this I've
implemented the patch below.  It adds an additional bit to the state field in
the napi_struct.  When executing napi->poll from the netpoll_path, this bit will
be set. When a driver calls netif_rx_complete, if that bit is set, it will not
remove the napi_struct from the poll_list.  That work will be saved for the next
iteration of net_rx_action.

I've tested this and it seems to work well.  About the biggest drawback I can
see to it is the fact that it might result in an extra loop through
net_rx_action in the event that the device is actually contended for (i.e. the
netpoll path actually preforms all the needed work no the device, and the call
to net_rx_action winds up doing nothing, except removing the napi_struct from
the poll_list.  However I think this is probably a small price to pay, given
that the alternative is a crash.

Signed-off-by: Neil Horman <nhorman@tuxdriver.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netdevice.h | 7 +++++++
 net/core/netpoll.c        | 2 ++
 2 files changed, 9 insertions(+)

(limited to 'include')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 9d77b1d7dca8..e26f54952892 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -319,6 +319,7 @@ enum
 {
 	NAPI_STATE_SCHED,	/* Poll is scheduled */
 	NAPI_STATE_DISABLE,	/* Disable pending */
+	NAPI_STATE_NPSVC,	/* Netpoll - don't dequeue from poll_list */
 };
 
 extern void __napi_schedule(struct napi_struct *n);
@@ -1497,6 +1498,12 @@ static inline void netif_rx_complete(struct net_device *dev,
 {
 	unsigned long flags;
 
+	/*
+	 * don't let napi dequeue from the cpu poll list
+	 * just in case its running on a different cpu
+	 */
+	if (unlikely(test_bit(NAPI_STATE_NPSVC, &napi->state)))
+		return;
 	local_irq_save(flags);
 	__netif_rx_complete(dev, napi);
 	local_irq_restore(flags);
diff --git a/net/core/netpoll.c b/net/core/netpoll.c
index 6c7af390be0a..dadac6281f20 100644
--- a/net/core/netpoll.c
+++ b/net/core/netpoll.c
@@ -133,9 +133,11 @@ static int poll_one_napi(struct netpoll_info *npinfo,
 
 	npinfo->rx_flags |= NETPOLL_RX_DROP;
 	atomic_inc(&trapped);
+	set_bit(NAPI_STATE_NPSVC, &napi->state);
 
 	work = napi->poll(napi, budget);
 
+	clear_bit(NAPI_STATE_NPSVC, &napi->state);
 	atomic_dec(&trapped);
 	npinfo->rx_flags &= ~NETPOLL_RX_DROP;
 
-- 
cgit v1.2.3


From 58494487581cb143a0d763e3056a894d5009d60a Mon Sep 17 00:00:00 2001
From: Robert Richter <robert.richter@amd.com>
Date: Wed, 26 Nov 2008 12:02:53 +0100
Subject: oprofile: update comment for oprofile_add_sample()

The cpu argument is no longer part of the parameter list.

Signed-off-by: Robert Richter <robert.richter@amd.com>
---
 include/linux/oprofile.h | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/oprofile.h b/include/linux/oprofile.h
index 5231861f357d..1ce9fe572e51 100644
--- a/include/linux/oprofile.h
+++ b/include/linux/oprofile.h
@@ -86,8 +86,7 @@ int oprofile_arch_init(struct oprofile_operations * ops);
 void oprofile_arch_exit(void);
 
 /**
- * Add a sample. This may be called from any context. Pass
- * smp_processor_id() as cpu.
+ * Add a sample. This may be called from any context.
  */
 void oprofile_add_sample(struct pt_regs * const regs, unsigned long event);
 
-- 
cgit v1.2.3


From e09373f22e76cc048ca5fe10a9ff9012f5d64309 Mon Sep 17 00:00:00 2001
From: Robert Richter <robert.richter@amd.com>
Date: Wed, 26 Nov 2008 14:04:19 +0100
Subject: ring_buffer: add remaining cpu functions to ring_buffer.h

These functions are not yet in ring_buffer.h though they seems to be
part of the API.

Cc: Steven Rostedt <rostedt@goodmis.org>
Signed-off-by: Robert Richter <robert.richter@amd.com>
---
 include/linux/ring_buffer.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include')

diff --git a/include/linux/ring_buffer.h b/include/linux/ring_buffer.h
index e097c2e6b6dc..de9d8c12e5ec 100644
--- a/include/linux/ring_buffer.h
+++ b/include/linux/ring_buffer.h
@@ -116,6 +116,8 @@ void ring_buffer_record_enable_cpu(struct ring_buffer *buffer, int cpu);
 
 unsigned long ring_buffer_entries(struct ring_buffer *buffer);
 unsigned long ring_buffer_overruns(struct ring_buffer *buffer);
+unsigned long ring_buffer_entries_cpu(struct ring_buffer *buffer, int cpu);
+unsigned long ring_buffer_overrun_cpu(struct ring_buffer *buffer, int cpu);
 
 u64 ring_buffer_time_stamp(int cpu);
 void ring_buffer_normalize_time_stamp(int cpu, u64 *ts);
-- 
cgit v1.2.3


From cdc693643271b2e6a693cf8f6afb258cce01f058 Mon Sep 17 00:00:00 2001
From: Mark Brown <broonie@opensource.wolfsonmicro.com>
Date: Wed, 10 Dec 2008 13:55:49 +0000
Subject: ALSA: Add support for mechanical jack insertion

Some systems support both mechanical and electrical jack detection,
allowing them to report that a jack is physically present but does
not have any functioning connections. Add a new jack type for these,
allowing user space to report faulty connections.

Thanks to Guillem Jover for the suggestion.

Signed-off-by: Mark Brown <broonie@opensource.wolfsonmicro.com>
Signed-off-by: Takashi Iwai <tiwai@suse.de>
---
 include/linux/input.h | 1 +
 include/sound/jack.h  | 1 +
 sound/core/jack.c     | 6 ++++++
 3 files changed, 8 insertions(+)

(limited to 'include')

diff --git a/include/linux/input.h b/include/linux/input.h
index 7323d2ff5151..abd223b0f586 100644
--- a/include/linux/input.h
+++ b/include/linux/input.h
@@ -645,6 +645,7 @@ struct input_absinfo {
 #define SW_MICROPHONE_INSERT	0x04  /* set = inserted */
 #define SW_DOCK			0x05  /* set = plugged into dock */
 #define SW_LINEOUT_INSERT	0x06  /* set = inserted */
+#define SW_JACK_PHYSICAL_INSERT 0x07  /* set = mechanical switch set */
 #define SW_MAX			0x0f
 #define SW_CNT			(SW_MAX+1)
 
diff --git a/include/sound/jack.h b/include/sound/jack.h
index 7cb25f4b50bb..2e0315cdd0d6 100644
--- a/include/sound/jack.h
+++ b/include/sound/jack.h
@@ -36,6 +36,7 @@ enum snd_jack_types {
 	SND_JACK_MICROPHONE	= 0x0002,
 	SND_JACK_HEADSET	= SND_JACK_HEADPHONE | SND_JACK_MICROPHONE,
 	SND_JACK_LINEOUT	= 0x0004,
+	SND_JACK_MECHANICAL	= 0x0008, /* If detected separately */
 };
 
 struct snd_jack {
diff --git a/sound/core/jack.c b/sound/core/jack.c
index c4bb9bad3133..6ebd5f12bc50 100644
--- a/sound/core/jack.c
+++ b/sound/core/jack.c
@@ -108,6 +108,9 @@ int snd_jack_new(struct snd_card *card, const char *id, int type,
 	if (type & SND_JACK_MICROPHONE)
 		input_set_capability(jack->input_dev, EV_SW,
 				     SW_MICROPHONE_INSERT);
+	if (type & SND_JACK_MECHANICAL)
+		input_set_capability(jack->input_dev, EV_SW,
+				     SW_JACK_PHYSICAL_INSERT);
 
 	err = snd_device_new(card, SNDRV_DEV_JACK, jack, &ops);
 	if (err < 0)
@@ -159,6 +162,9 @@ void snd_jack_report(struct snd_jack *jack, int status)
 	if (jack->type & SND_JACK_MICROPHONE)
 		input_report_switch(jack->input_dev, SW_MICROPHONE_INSERT,
 				    status & SND_JACK_MICROPHONE);
+	if (jack->type & SND_JACK_MECHANICAL)
+		input_report_switch(jack->input_dev, SW_JACK_PHYSICAL_INSERT,
+				    status & SND_JACK_MECHANICAL);
 
 	input_sync(jack->input_dev);
 }
-- 
cgit v1.2.3


From 0d0cf00a7fc63cee9a4c4a3b8612879b4f7f42ba Mon Sep 17 00:00:00 2001
From: Mark Brown <broonie@opensource.wolfsonmicro.com>
Date: Wed, 10 Dec 2008 14:32:45 +0000
Subject: ASoC: Add codec registration API

Another part of the backporting of Liam's ASoC v2 work. Using this is
more complicated than the other registration types since currently the
codec is instantiated during the probe of the ASoC device so we can't
currently readily wait for the codec to register.

Signed-off-by: Mark Brown <broonie@opensource.wolfsonmicro.com>
---
 include/sound/soc.h  |  5 +++++
 sound/soc/soc-core.c | 43 +++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 48 insertions(+)

(limited to 'include')

diff --git a/include/sound/soc.h b/include/sound/soc.h
index ce3661d07c24..f86e455d3828 100644
--- a/include/sound/soc.h
+++ b/include/sound/soc.h
@@ -161,6 +161,8 @@ extern struct snd_ac97_bus_ops soc_ac97_ops;
 
 int snd_soc_register_platform(struct snd_soc_platform *platform);
 void snd_soc_unregister_platform(struct snd_soc_platform *platform);
+int snd_soc_register_codec(struct snd_soc_codec *codec);
+void snd_soc_unregister_codec(struct snd_soc_codec *codec);
 
 /* pcm <-> DAI connect */
 void snd_soc_free_pcms(struct snd_soc_device *socdev);
@@ -247,6 +249,9 @@ struct snd_soc_codec {
 	char *name;
 	struct module *owner;
 	struct mutex mutex;
+	struct device *dev;
+
+	struct list_head list;
 
 	/* callbacks */
 	int (*set_bias_level)(struct snd_soc_codec *,
diff --git a/sound/soc/soc-core.c b/sound/soc/soc-core.c
index 4d2db7cfaf4c..b098c0b4c584 100644
--- a/sound/soc/soc-core.c
+++ b/sound/soc/soc-core.c
@@ -47,6 +47,7 @@ static DEFINE_MUTEX(client_mutex);
 static LIST_HEAD(card_list);
 static LIST_HEAD(dai_list);
 static LIST_HEAD(platform_list);
+static LIST_HEAD(codec_list);
 
 static int snd_soc_register_card(struct snd_soc_card *card);
 static int snd_soc_unregister_card(struct snd_soc_card *card);
@@ -2224,6 +2225,48 @@ void snd_soc_unregister_platform(struct snd_soc_platform *platform)
 }
 EXPORT_SYMBOL_GPL(snd_soc_unregister_platform);
 
+/**
+ * snd_soc_register_codec - Register a codec with the ASoC core
+ *
+ * @param codec codec to register
+ */
+int snd_soc_register_codec(struct snd_soc_codec *codec)
+{
+	if (!codec->name)
+		return -EINVAL;
+
+	/* The device should become mandatory over time */
+	if (!codec->dev)
+		printk(KERN_WARNING "No device for codec %s\n", codec->name);
+
+	INIT_LIST_HEAD(&codec->list);
+
+	mutex_lock(&client_mutex);
+	list_add(&codec->list, &codec_list);
+	snd_soc_instantiate_cards();
+	mutex_unlock(&client_mutex);
+
+	pr_debug("Registered codec '%s'\n", codec->name);
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(snd_soc_register_codec);
+
+/**
+ * snd_soc_unregister_codec - Unregister a codec from the ASoC core
+ *
+ * @param codec codec to unregister
+ */
+void snd_soc_unregister_codec(struct snd_soc_codec *codec)
+{
+	mutex_lock(&client_mutex);
+	list_del(&codec->list);
+	mutex_unlock(&client_mutex);
+
+	pr_debug("Unregistered codec '%s'\n", codec->name);
+}
+EXPORT_SYMBOL_GPL(snd_soc_unregister_codec);
+
 static int __init snd_soc_init(void)
 {
 #ifdef CONFIG_DEBUG_FS
-- 
cgit v1.2.3


From 71c5576fbd809f2015f4eddf72e501e298720cf3 Mon Sep 17 00:00:00 2001
From: Andrew Morton <akpm@linux-foundation.org>
Date: Tue, 9 Dec 2008 13:14:13 -0800
Subject: revert "percpu counter: clean up percpu_counter_sum_and_set()"

Revert

    commit 1f7c14c62ce63805f9574664a6c6de3633d4a354
    Author: Mingming Cao <cmm@us.ibm.com>
    Date:   Thu Oct 9 12:50:59 2008 -0400

        percpu counter: clean up percpu_counter_sum_and_set()

Before this patch we had the following:

percpu_counter_sum(): return the percpu_counter's value

percpu_counter_sum_and_set(): return the percpu_counter's value, copying
that value into the central value and zeroing the per-cpu counters before
returning.

After this patch, percpu_counter_sum_and_set() has gone, and
percpu_counter_sum() gets the old percpu_counter_sum_and_set()
functionality.

Problem is, as Eric points out, the old percpu_counter_sum_and_set()
functionality was racy and wrong.  It zeroes out counters on "other" cpus,
without holding any locks which will prevent races agaist updates from
those other CPUS.

This patch reverts 1f7c14c62ce63805f9574664a6c6de3633d4a354.  This means
that percpu_counter_sum_and_set() still has the race, but
percpu_counter_sum() does not.

Note that this is not a simple revert - ext4 has since started using
percpu_counter_sum() for its dirty_blocks counter as well.

Note that this revert patch changes percpu_counter_sum() semantics.

Before the patch, a call to percpu_counter_sum() will bring the counter's
central counter mostly up-to-date, so a following percpu_counter_read()
will return a close value.

After this patch, a call to percpu_counter_sum() will leave the counter's
central accumulator unaltered, so a subsequent call to
percpu_counter_read() can now return a significantly inaccurate result.

If there is any code in the tree which was introduced after
e8ced39d5e8911c662d4d69a342b9d053eaaac4e was merged, and which depends
upon the new percpu_counter_sum() semantics, that code will break.

Reported-by: Eric Dumazet <dada1@cosmosbay.com>
Cc: "David S. Miller" <davem@davemloft.net>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Mingming Cao <cmm@us.ibm.com>
Cc: <linux-ext4@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/ext4/balloc.c               |  4 ++--
 include/linux/percpu_counter.h | 12 +++++++++---
 lib/percpu_counter.c           |  8 +++++---
 3 files changed, 16 insertions(+), 8 deletions(-)

(limited to 'include')

diff --git a/fs/ext4/balloc.c b/fs/ext4/balloc.c
index d2003cdc36aa..c17f69bcd7dd 100644
--- a/fs/ext4/balloc.c
+++ b/fs/ext4/balloc.c
@@ -609,8 +609,8 @@ int ext4_has_free_blocks(struct ext4_sb_info *sbi, s64 nblocks)
 
 	if (free_blocks - (nblocks + root_blocks + dirty_blocks) <
 						EXT4_FREEBLOCKS_WATERMARK) {
-		free_blocks  = percpu_counter_sum(fbc);
-		dirty_blocks = percpu_counter_sum(dbc);
+		free_blocks  = percpu_counter_sum_and_set(fbc);
+		dirty_blocks = percpu_counter_sum_and_set(dbc);
 		if (dirty_blocks < 0) {
 			printk(KERN_CRIT "Dirty block accounting "
 					"went wrong %lld\n",
diff --git a/include/linux/percpu_counter.h b/include/linux/percpu_counter.h
index 9007ccdfc112..208388835357 100644
--- a/include/linux/percpu_counter.h
+++ b/include/linux/percpu_counter.h
@@ -35,7 +35,7 @@ int percpu_counter_init_irq(struct percpu_counter *fbc, s64 amount);
 void percpu_counter_destroy(struct percpu_counter *fbc);
 void percpu_counter_set(struct percpu_counter *fbc, s64 amount);
 void __percpu_counter_add(struct percpu_counter *fbc, s64 amount, s32 batch);
-s64 __percpu_counter_sum(struct percpu_counter *fbc);
+s64 __percpu_counter_sum(struct percpu_counter *fbc, int set);
 
 static inline void percpu_counter_add(struct percpu_counter *fbc, s64 amount)
 {
@@ -44,13 +44,19 @@ static inline void percpu_counter_add(struct percpu_counter *fbc, s64 amount)
 
 static inline s64 percpu_counter_sum_positive(struct percpu_counter *fbc)
 {
-	s64 ret = __percpu_counter_sum(fbc);
+	s64 ret = __percpu_counter_sum(fbc, 0);
 	return ret < 0 ? 0 : ret;
 }
 
+static inline s64 percpu_counter_sum_and_set(struct percpu_counter *fbc)
+{
+	return __percpu_counter_sum(fbc, 1);
+}
+
+
 static inline s64 percpu_counter_sum(struct percpu_counter *fbc)
 {
-	return __percpu_counter_sum(fbc);
+	return __percpu_counter_sum(fbc, 0);
 }
 
 static inline s64 percpu_counter_read(struct percpu_counter *fbc)
diff --git a/lib/percpu_counter.c b/lib/percpu_counter.c
index 71b265c330ce..dba1530a5b29 100644
--- a/lib/percpu_counter.c
+++ b/lib/percpu_counter.c
@@ -52,7 +52,7 @@ EXPORT_SYMBOL(__percpu_counter_add);
  * Add up all the per-cpu counts, return the result.  This is a more accurate
  * but much slower version of percpu_counter_read_positive()
  */
-s64 __percpu_counter_sum(struct percpu_counter *fbc)
+s64 __percpu_counter_sum(struct percpu_counter *fbc, int set)
 {
 	s64 ret;
 	int cpu;
@@ -62,9 +62,11 @@ s64 __percpu_counter_sum(struct percpu_counter *fbc)
 	for_each_online_cpu(cpu) {
 		s32 *pcount = per_cpu_ptr(fbc->counters, cpu);
 		ret += *pcount;
-		*pcount = 0;
+		if (set)
+			*pcount = 0;
 	}
-	fbc->count = ret;
+	if (set)
+		fbc->count = ret;
 
 	spin_unlock(&fbc->lock);
 	return ret;
-- 
cgit v1.2.3


From 02d211688727ad02bb4555b1aa8ae2de16b21b39 Mon Sep 17 00:00:00 2001
From: Andrew Morton <akpm@linux-foundation.org>
Date: Tue, 9 Dec 2008 13:14:14 -0800
Subject: revert "percpu_counter: new function percpu_counter_sum_and_set"

Revert

    commit e8ced39d5e8911c662d4d69a342b9d053eaaac4e
    Author: Mingming Cao <cmm@us.ibm.com>
    Date:   Fri Jul 11 19:27:31 2008 -0400

        percpu_counter: new function percpu_counter_sum_and_set

As described in

	revert "percpu counter: clean up percpu_counter_sum_and_set()"

the new percpu_counter_sum_and_set() is racy against updates to the
cpu-local accumulators on other CPUs.  Revert that change.

This means that ext4 will be slow again.  But correct.

Reported-by: Eric Dumazet <dada1@cosmosbay.com>
Cc: "David S. Miller" <davem@davemloft.net>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Mingming Cao <cmm@us.ibm.com>
Cc: <linux-ext4@vger.kernel.org>
Cc: <stable@kernel.org>		[2.6.27.x]
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/ext4/balloc.c               |  4 ++--
 include/linux/percpu_counter.h | 12 +++---------
 lib/percpu_counter.c           |  7 +------
 3 files changed, 6 insertions(+), 17 deletions(-)

(limited to 'include')

diff --git a/fs/ext4/balloc.c b/fs/ext4/balloc.c
index c17f69bcd7dd..db35cfdb3c8b 100644
--- a/fs/ext4/balloc.c
+++ b/fs/ext4/balloc.c
@@ -609,8 +609,8 @@ int ext4_has_free_blocks(struct ext4_sb_info *sbi, s64 nblocks)
 
 	if (free_blocks - (nblocks + root_blocks + dirty_blocks) <
 						EXT4_FREEBLOCKS_WATERMARK) {
-		free_blocks  = percpu_counter_sum_and_set(fbc);
-		dirty_blocks = percpu_counter_sum_and_set(dbc);
+		free_blocks  = percpu_counter_sum_positive(fbc);
+		dirty_blocks = percpu_counter_sum_positive(dbc);
 		if (dirty_blocks < 0) {
 			printk(KERN_CRIT "Dirty block accounting "
 					"went wrong %lld\n",
diff --git a/include/linux/percpu_counter.h b/include/linux/percpu_counter.h
index 208388835357..9007ccdfc112 100644
--- a/include/linux/percpu_counter.h
+++ b/include/linux/percpu_counter.h
@@ -35,7 +35,7 @@ int percpu_counter_init_irq(struct percpu_counter *fbc, s64 amount);
 void percpu_counter_destroy(struct percpu_counter *fbc);
 void percpu_counter_set(struct percpu_counter *fbc, s64 amount);
 void __percpu_counter_add(struct percpu_counter *fbc, s64 amount, s32 batch);
-s64 __percpu_counter_sum(struct percpu_counter *fbc, int set);
+s64 __percpu_counter_sum(struct percpu_counter *fbc);
 
 static inline void percpu_counter_add(struct percpu_counter *fbc, s64 amount)
 {
@@ -44,19 +44,13 @@ static inline void percpu_counter_add(struct percpu_counter *fbc, s64 amount)
 
 static inline s64 percpu_counter_sum_positive(struct percpu_counter *fbc)
 {
-	s64 ret = __percpu_counter_sum(fbc, 0);
+	s64 ret = __percpu_counter_sum(fbc);
 	return ret < 0 ? 0 : ret;
 }
 
-static inline s64 percpu_counter_sum_and_set(struct percpu_counter *fbc)
-{
-	return __percpu_counter_sum(fbc, 1);
-}
-
-
 static inline s64 percpu_counter_sum(struct percpu_counter *fbc)
 {
-	return __percpu_counter_sum(fbc, 0);
+	return __percpu_counter_sum(fbc);
 }
 
 static inline s64 percpu_counter_read(struct percpu_counter *fbc)
diff --git a/lib/percpu_counter.c b/lib/percpu_counter.c
index dba1530a5b29..b255b939bc1b 100644
--- a/lib/percpu_counter.c
+++ b/lib/percpu_counter.c
@@ -52,7 +52,7 @@ EXPORT_SYMBOL(__percpu_counter_add);
  * Add up all the per-cpu counts, return the result.  This is a more accurate
  * but much slower version of percpu_counter_read_positive()
  */
-s64 __percpu_counter_sum(struct percpu_counter *fbc, int set)
+s64 __percpu_counter_sum(struct percpu_counter *fbc)
 {
 	s64 ret;
 	int cpu;
@@ -62,12 +62,7 @@ s64 __percpu_counter_sum(struct percpu_counter *fbc, int set)
 	for_each_online_cpu(cpu) {
 		s32 *pcount = per_cpu_ptr(fbc->counters, cpu);
 		ret += *pcount;
-		if (set)
-			*pcount = 0;
 	}
-	if (set)
-		fbc->count = ret;
-
 	spin_unlock(&fbc->lock);
 	return ret;
 }
-- 
cgit v1.2.3


From aa6f14796630c8b03c11e782484aec2aee05e671 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <dada1@cosmosbay.com>
Date: Tue, 9 Dec 2008 13:14:25 -0800
Subject: atomic: fix a typo in atomic_long_xchg()

atomic_long_xchg() is not correctly defined for 32bit arches.

Signed-off-by: Eric Dumazet <dada1@cosmosbay.com>
Cc: Mathieu Desnoyers <mathieu.desnoyers@polymtl.ca>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/asm-generic/atomic.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/asm-generic/atomic.h b/include/asm-generic/atomic.h
index 4ec0a296bdec..7abdaa91ccd3 100644
--- a/include/asm-generic/atomic.h
+++ b/include/asm-generic/atomic.h
@@ -251,7 +251,7 @@ static inline long atomic_long_add_unless(atomic_long_t *l, long a, long u)
 #define atomic_long_cmpxchg(l, old, new) \
 	(atomic_cmpxchg((atomic_t *)(l), (old), (new)))
 #define atomic_long_xchg(v, new) \
-	(atomic_xchg((atomic_t *)(l), (new)))
+	(atomic_xchg((atomic_t *)(v), (new)))
 
 #endif  /*  BITS_PER_LONG == 64  */
 
-- 
cgit v1.2.3


From 9c24624727f6d6c460e45762a408ca5f5b9b8ef2 Mon Sep 17 00:00:00 2001
From: Hugh Dickins <hugh@veritas.com>
Date: Tue, 9 Dec 2008 13:14:27 -0800
Subject: KSYM_SYMBOL_LEN fixes

Miles Lane tailing /sys files hit a BUG which Pekka Enberg has tracked
to my 966c8c12dc9e77f931e2281ba25d2f0244b06949 sprint_symbol(): use
less stack exposing a bug in slub's list_locations() -
kallsyms_lookup() writes a 0 to namebuf[KSYM_NAME_LEN-1], but that was
beyond the end of page provided.

The 100 slop which list_locations() allows at end of page looks roughly
enough for all the other stuff it might print after the symbol before
it checks again: break out KSYM_SYMBOL_LEN earlier than before.

Latencytop and ftrace and are using KSYM_NAME_LEN buffers where they
need KSYM_SYMBOL_LEN buffers, and vmallocinfo a 2*KSYM_NAME_LEN buffer
where it wants a KSYM_SYMBOL_LEN buffer: fix those before anyone copies
them.

[akpm@linux-foundation.org: ftrace.h needs module.h]
Signed-off-by: Hugh Dickins <hugh@veritas.com>
Cc: Christoph Lameter <cl@linux-foundation.org>
Cc Miles Lane <miles.lane@gmail.com>
Acked-by: Pekka Enberg <penberg@cs.helsinki.fi>
Acked-by: Steven Rostedt <srostedt@redhat.com>
Acked-by: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Rusty Russell <rusty@rustcorp.com.au>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/proc/base.c         | 2 +-
 include/linux/ftrace.h | 3 ++-
 kernel/latencytop.c    | 2 +-
 mm/slub.c              | 2 +-
 mm/vmalloc.c           | 2 +-
 5 files changed, 6 insertions(+), 5 deletions(-)

(limited to 'include')

diff --git a/fs/proc/base.c b/fs/proc/base.c
index 486cf3fe7139..d4677603c889 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -371,7 +371,7 @@ static int lstats_show_proc(struct seq_file *m, void *v)
 				task->latency_record[i].time,
 				task->latency_record[i].max);
 			for (q = 0; q < LT_BACKTRACEDEPTH; q++) {
-				char sym[KSYM_NAME_LEN];
+				char sym[KSYM_SYMBOL_LEN];
 				char *c;
 				if (!task->latency_record[i].backtrace[q])
 					break;
diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h
index 703eb53cfa2b..9c5bc6be2b09 100644
--- a/include/linux/ftrace.h
+++ b/include/linux/ftrace.h
@@ -6,6 +6,7 @@
 #include <linux/ktime.h>
 #include <linux/init.h>
 #include <linux/types.h>
+#include <linux/module.h>
 #include <linux/kallsyms.h>
 
 #ifdef CONFIG_FUNCTION_TRACER
@@ -231,7 +232,7 @@ ftrace_init_module(unsigned long *start, unsigned long *end) { }
 
 struct boot_trace {
 	pid_t			caller;
-	char			func[KSYM_NAME_LEN];
+	char			func[KSYM_SYMBOL_LEN];
 	int			result;
 	unsigned long long	duration;		/* usecs */
 	ktime_t			calltime;
diff --git a/kernel/latencytop.c b/kernel/latencytop.c
index 5e7b45c56923..449db466bdbc 100644
--- a/kernel/latencytop.c
+++ b/kernel/latencytop.c
@@ -191,7 +191,7 @@ static int lstats_show(struct seq_file *m, void *v)
 				latency_record[i].time,
 				latency_record[i].max);
 			for (q = 0; q < LT_BACKTRACEDEPTH; q++) {
-				char sym[KSYM_NAME_LEN];
+				char sym[KSYM_SYMBOL_LEN];
 				char *c;
 				if (!latency_record[i].backtrace[q])
 					break;
diff --git a/mm/slub.c b/mm/slub.c
index 749588a50a5a..a2cd47d89e0a 100644
--- a/mm/slub.c
+++ b/mm/slub.c
@@ -3597,7 +3597,7 @@ static int list_locations(struct kmem_cache *s, char *buf,
 	for (i = 0; i < t.count; i++) {
 		struct location *l = &t.loc[i];
 
-		if (len > PAGE_SIZE - 100)
+		if (len > PAGE_SIZE - KSYM_SYMBOL_LEN - 100)
 			break;
 		len += sprintf(buf + len, "%7ld ", l->count);
 
diff --git a/mm/vmalloc.c b/mm/vmalloc.c
index f3f6e0758562..1ddb77ba3995 100644
--- a/mm/vmalloc.c
+++ b/mm/vmalloc.c
@@ -1717,7 +1717,7 @@ static int s_show(struct seq_file *m, void *p)
 		v->addr, v->addr + v->size, v->size);
 
 	if (v->caller) {
-		char buff[2 * KSYM_NAME_LEN];
+		char buff[KSYM_SYMBOL_LEN];
 
 		seq_putc(m, ' ');
 		sprint_symbol(buff, (unsigned long)v->caller);
-- 
cgit v1.2.3


From 0bed7b292d68f82316bfb8cd521e16c867689efe Mon Sep 17 00:00:00 2001
From: Andres Salomon <dilinger@queued.net>
Date: Wed, 5 Nov 2008 17:29:53 -0500
Subject: ALSA: cs5535audio: stick AD1888 bitshift values into a header file

We'd like to use the High Pass Filter and V_REFOUT bitshift values elsewhere,
so stick them into a ac97_codec.h.

Signed-off-by: Andres Salomon <dilinger@debian.org>
Signed-off-by: Takashi Iwai <tiwai@suse.de>
---
 include/sound/ac97_codec.h  | 2 ++
 sound/pci/ac97/ac97_patch.c | 5 +++--
 2 files changed, 5 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/sound/ac97_codec.h b/include/sound/ac97_codec.h
index 9c309daf492b..251fc1cd5002 100644
--- a/include/sound/ac97_codec.h
+++ b/include/sound/ac97_codec.h
@@ -281,10 +281,12 @@
 /* specific - Analog Devices */
 #define AC97_AD_TEST		0x5a	/* test register */
 #define AC97_AD_TEST2		0x5c	/* undocumented test register 2 */
+#define AC97_AD_HPFD_SHIFT	12	/* High Pass Filter Disable */
 #define AC97_AD_CODEC_CFG	0x70	/* codec configuration */
 #define AC97_AD_JACK_SPDIF	0x72	/* Jack Sense & S/PDIF */
 #define AC97_AD_SERIAL_CFG	0x74	/* Serial Configuration */
 #define AC97_AD_MISC		0x76	/* Misc Control Bits */
+#define AC97_AD_VREFD_SHIFT	2	/* V_REFOUT Disable (AD1888) */
 
 /* specific - Cirrus Logic */
 #define AC97_CSR_ACMODE		0x5e	/* AC Mode Register */
diff --git a/sound/pci/ac97/ac97_patch.c b/sound/pci/ac97/ac97_patch.c
index 6e831aff1bd0..7ad25f439b50 100644
--- a/sound/pci/ac97/ac97_patch.c
+++ b/sound/pci/ac97/ac97_patch.c
@@ -2054,8 +2054,9 @@ static const struct snd_kcontrol_new snd_ac97_ad1888_controls[] = {
 		.get = snd_ac97_ad1888_lohpsel_get,
 		.put = snd_ac97_ad1888_lohpsel_put
 	},
-	AC97_SINGLE("V_REFOUT Enable", AC97_AD_MISC, 2, 1, 1),
-	AC97_SINGLE("High Pass Filter Enable", AC97_AD_TEST2, 12, 1, 1),
+	AC97_SINGLE("V_REFOUT Enable", AC97_AD_MISC, AC97_AD_VREFD_SHIFT, 1, 1),
+	AC97_SINGLE("High Pass Filter Enable", AC97_AD_TEST2,
+			AC97_AD_HPFD_SHIFT, 1, 1),
 	AC97_SINGLE("Spread Front to Surround and Center/LFE", AC97_AD_MISC, 7, 1, 0),
 	{
 		.iface = SNDRV_CTL_ELEM_IFACE_MIXER,
-- 
cgit v1.2.3


From 54b71fba68efbf3ab89721a384df2ce757750979 Mon Sep 17 00:00:00 2001
From: Akira Takeuchi <takeuchi.akr@jp.panasonic.com>
Date: Wed, 10 Dec 2008 12:43:34 +0000
Subject: MN10300: Fix __put_user_asm8()

Fix __put_user_asm8() by jumping to the end label (3:) from the exception
handler, rather than jumping back to retry the second store instruction (label
2:).

Signed-off-by: Akira Takeuchi <takeuchi.akr@jp.panasonic.com>
Signed-off-by: David Howells <dhowells@redhat.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/asm-mn10300/uaccess.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/asm-mn10300/uaccess.h b/include/asm-mn10300/uaccess.h
index 46b9b647f3c3..8a3a4dd55763 100644
--- a/include/asm-mn10300/uaccess.h
+++ b/include/asm-mn10300/uaccess.h
@@ -266,7 +266,7 @@ extern int __get_user_unknown(void);
 		"	.section	.fixup,\"ax\"	\n"		\
 		"4:					\n"		\
 		"	mov		%5,%0		\n"		\
-		"	jmp		2b		\n"		\
+		"	jmp		3b		\n"		\
 		"	.previous			\n"		\
 		"	.section	__ex_table,\"a\"\n"		\
 		"	.balign		4		\n"		\
-- 
cgit v1.2.3


From 2107fb8b5bf018be691afdd4c6ffaecf0c3307be Mon Sep 17 00:00:00 2001
From: Steve Glendinning <steve.glendinning@smsc.com>
Date: Wed, 5 Nov 2008 00:35:38 +0000
Subject: smsc911x: add dynamic bus configuration

Convert the driver to select 16-bit or 32-bit bus access at runtime,
at a small performance cost.

Signed-off-by: Steve Glendinning <steve.glendinning@smsc.com>
Acked-by: Catalin Marinas <catalin.marinas@arm.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/smsc911x.c   | 158 +++++++++++++++++++++++------------------------
 drivers/net/smsc911x.h   |   1 -
 include/linux/smsc911x.h |   5 ++
 3 files changed, 84 insertions(+), 80 deletions(-)

(limited to 'include')

diff --git a/drivers/net/smsc911x.c b/drivers/net/smsc911x.c
index fe517880fc97..4b8ff843e300 100644
--- a/drivers/net/smsc911x.c
+++ b/drivers/net/smsc911x.c
@@ -77,19 +77,16 @@ struct smsc911x_data {
 	unsigned int generation;
 
 	/* device configuration (copied from platform_data during probe) */
-	unsigned int irq_polarity;
-	unsigned int irq_type;
-	phy_interface_t phy_interface;
+	struct smsc911x_platform_config config;
 
 	/* This needs to be acquired before calling any of below:
 	 * smsc911x_mac_read(), smsc911x_mac_write()
 	 */
 	spinlock_t mac_lock;
 
-#if (!SMSC_CAN_USE_32BIT)
-	/* spinlock to ensure 16-bit accesses are serialised */
+	/* spinlock to ensure 16-bit accesses are serialised.
+	 * unused with a 32-bit bus */
 	spinlock_t dev_lock;
-#endif
 
 	struct phy_device *phy_dev;
 	struct mii_bus *mii_bus;
@@ -121,70 +118,56 @@ struct smsc911x_data {
 	unsigned int hashlo;
 };
 
-#if SMSC_CAN_USE_32BIT
-
-static inline u32 smsc911x_reg_read(struct smsc911x_data *pdata, u32 reg)
-{
-	return readl(pdata->ioaddr + reg);
-}
-
-static inline void smsc911x_reg_write(struct smsc911x_data *pdata, u32 reg,
-				      u32 val)
-{
-	writel(val, pdata->ioaddr + reg);
-}
-
-/* Writes a packet to the TX_DATA_FIFO */
-static inline void
-smsc911x_tx_writefifo(struct smsc911x_data *pdata, unsigned int *buf,
-		      unsigned int wordcount)
-{
-	writesl(pdata->ioaddr + TX_DATA_FIFO, buf, wordcount);
-}
-
-/* Reads a packet out of the RX_DATA_FIFO */
-static inline void
-smsc911x_rx_readfifo(struct smsc911x_data *pdata, unsigned int *buf,
-		     unsigned int wordcount)
-{
-	readsl(pdata->ioaddr + RX_DATA_FIFO, buf, wordcount);
-}
-
-#else				/* SMSC_CAN_USE_32BIT */
-
-/* These 16-bit access functions are significantly slower, due to the locking
+/* The 16-bit access functions are significantly slower, due to the locking
  * necessary.  If your bus hardware can be configured to do this for you
  * (in response to a single 32-bit operation from software), you should use
  * the 32-bit access functions instead. */
 
 static inline u32 smsc911x_reg_read(struct smsc911x_data *pdata, u32 reg)
 {
-	unsigned long flags;
-	u32 data;
-
-	/* these two 16-bit reads must be performed consecutively, so must
-	 * not be interrupted by our own ISR (which would start another
-	 * read operation) */
-	spin_lock_irqsave(&pdata->dev_lock, flags);
-	data = ((readw(pdata->ioaddr + reg) & 0xFFFF) |
-		((readw(pdata->ioaddr + reg + 2) & 0xFFFF) << 16));
-	spin_unlock_irqrestore(&pdata->dev_lock, flags);
+	if (pdata->config.flags & SMSC911X_USE_32BIT)
+		return readl(pdata->ioaddr + reg);
+
+	if (pdata->config.flags & SMSC911X_USE_16BIT) {
+		u32 data;
+		unsigned long flags;
+
+		/* these two 16-bit reads must be performed consecutively, so
+		 * must not be interrupted by our own ISR (which would start
+		 * another read operation) */
+		spin_lock_irqsave(&pdata->dev_lock, flags);
+		data = ((readw(pdata->ioaddr + reg) & 0xFFFF) |
+			((readw(pdata->ioaddr + reg + 2) & 0xFFFF) << 16));
+		spin_unlock_irqrestore(&pdata->dev_lock, flags);
+
+		return data;
+	}
 
-	return data;
+	BUG();
 }
 
 static inline void smsc911x_reg_write(struct smsc911x_data *pdata, u32 reg,
 				      u32 val)
 {
-	unsigned long flags;
+	if (pdata->config.flags & SMSC911X_USE_32BIT) {
+		writel(val, pdata->ioaddr + reg);
+		return;
+	}
+
+	if (pdata->config.flags & SMSC911X_USE_16BIT) {
+		unsigned long flags;
+
+		/* these two 16-bit writes must be performed consecutively, so
+		 * must not be interrupted by our own ISR (which would start
+		 * another read operation) */
+		spin_lock_irqsave(&pdata->dev_lock, flags);
+		writew(val & 0xFFFF, pdata->ioaddr + reg);
+		writew((val >> 16) & 0xFFFF, pdata->ioaddr + reg + 2);
+		spin_unlock_irqrestore(&pdata->dev_lock, flags);
+		return;
+	}
 
-	/* these two 16-bit writes must be performed consecutively, so must
-	 * not be interrupted by our own ISR (which would start another
-	 * read operation) */
-	spin_lock_irqsave(&pdata->dev_lock, flags);
-	writew(val & 0xFFFF, pdata->ioaddr + reg);
-	writew((val >> 16) & 0xFFFF, pdata->ioaddr + reg + 2);
-	spin_unlock_irqrestore(&pdata->dev_lock, flags);
+	BUG();
 }
 
 /* Writes a packet to the TX_DATA_FIFO */
@@ -192,8 +175,18 @@ static inline void
 smsc911x_tx_writefifo(struct smsc911x_data *pdata, unsigned int *buf,
 		      unsigned int wordcount)
 {
-	while (wordcount--)
-		smsc911x_reg_write(pdata, TX_DATA_FIFO, *buf++);
+	if (pdata->config.flags & SMSC911X_USE_32BIT) {
+		writesl(pdata->ioaddr + TX_DATA_FIFO, buf, wordcount);
+		return;
+	}
+
+	if (pdata->config.flags & SMSC911X_USE_16BIT) {
+		while (wordcount--)
+			smsc911x_reg_write(pdata, TX_DATA_FIFO, *buf++);
+		return;
+	}
+
+	BUG();
 }
 
 /* Reads a packet out of the RX_DATA_FIFO */
@@ -201,11 +194,19 @@ static inline void
 smsc911x_rx_readfifo(struct smsc911x_data *pdata, unsigned int *buf,
 		     unsigned int wordcount)
 {
-	while (wordcount--)
-		*buf++ = smsc911x_reg_read(pdata, RX_DATA_FIFO);
-}
+	if (pdata->config.flags & SMSC911X_USE_32BIT) {
+		readsl(pdata->ioaddr + RX_DATA_FIFO, buf, wordcount);
+		return;
+	}
 
-#endif				/* SMSC_CAN_USE_32BIT */
+	if (pdata->config.flags & SMSC911X_USE_16BIT) {
+		while (wordcount--)
+			*buf++ = smsc911x_reg_read(pdata, RX_DATA_FIFO);
+		return;
+	}
+
+	BUG();
+}
 
 /* waits for MAC not busy, with timeout.  Only called by smsc911x_mac_read
  * and smsc911x_mac_write, so assumes mac_lock is held */
@@ -790,7 +791,7 @@ static int smsc911x_mii_probe(struct net_device *dev)
 	}
 
 	phydev = phy_connect(dev, phydev->dev.bus_id,
-		&smsc911x_phy_adjust_link, 0, pdata->phy_interface);
+		&smsc911x_phy_adjust_link, 0, pdata->config.phy_interface);
 
 	if (IS_ERR(phydev)) {
 		pr_err("%s: Could not attach to PHY\n", dev->name);
@@ -1205,14 +1206,14 @@ static int smsc911x_open(struct net_device *dev)
 	/* Set interrupt deassertion to 100uS */
 	intcfg = ((10 << 24) | INT_CFG_IRQ_EN_);
 
-	if (pdata->irq_polarity) {
+	if (pdata->config.irq_polarity) {
 		SMSC_TRACE(IFUP, "irq polarity: active high");
 		intcfg |= INT_CFG_IRQ_POL_;
 	} else {
 		SMSC_TRACE(IFUP, "irq polarity: active low");
 	}
 
-	if (pdata->irq_type) {
+	if (pdata->config.irq_type) {
 		SMSC_TRACE(IFUP, "irq type: push-pull");
 		intcfg |= INT_CFG_IRQ_TYPE_;
 	} else {
@@ -1767,9 +1768,7 @@ static int __devinit smsc911x_init(struct net_device *dev)
 	SMSC_TRACE(PROBE, "IRQ: %d", dev->irq);
 	SMSC_TRACE(PROBE, "PHY will be autodetected.");
 
-#if (!SMSC_CAN_USE_32BIT)
 	spin_lock_init(&pdata->dev_lock);
-#endif
 
 	if (pdata->ioaddr == 0) {
 		SMSC_WARNING(PROBE, "pdata->ioaddr: 0x00000000");
@@ -1910,6 +1909,7 @@ static int __devinit smsc911x_drv_probe(struct platform_device *pdev)
 {
 	struct net_device *dev;
 	struct smsc911x_data *pdata;
+	struct smsc911x_platform_config *config = pdev->dev.platform_data;
 	struct resource *res;
 	unsigned int intcfg = 0;
 	int res_size;
@@ -1918,6 +1918,13 @@ static int __devinit smsc911x_drv_probe(struct platform_device *pdev)
 
 	pr_info("%s: Driver version %s.\n", SMSC_CHIPNAME, SMSC_DRV_VERSION);
 
+	/* platform data specifies irq & dynamic bus configuration */
+	if (!pdev->dev.platform_data) {
+		pr_warning("%s: platform_data not provided\n", SMSC_CHIPNAME);
+		retval = -ENODEV;
+		goto out_0;
+	}
+
 	res = platform_get_resource_byname(pdev, IORESOURCE_MEM,
 					   "smsc911x-memory");
 	if (!res)
@@ -1949,15 +1956,8 @@ static int __devinit smsc911x_drv_probe(struct platform_device *pdev)
 	dev->irq = platform_get_irq(pdev, 0);
 	pdata->ioaddr = ioremap_nocache(res->start, res_size);
 
-	/* copy config parameters across if present, otherwise pdata
-	 * defaults to zeros */
-	if (pdev->dev.platform_data) {
-		struct smsc911x_platform_config *config =
-			pdev->dev.platform_data;
-		pdata->irq_polarity = config->irq_polarity;
-		pdata->irq_type  = config->irq_type;
-		pdata->phy_interface = config->phy_interface;
-	}
+	/* copy config parameters across to pdata */
+	memcpy(&pdata->config, config, sizeof(pdata->config));
 
 	pdata->dev = dev;
 	pdata->msg_enable = ((1 << debug) - 1);
@@ -1974,10 +1974,10 @@ static int __devinit smsc911x_drv_probe(struct platform_device *pdev)
 		goto out_unmap_io_3;
 
 	/* configure irq polarity and type before connecting isr */
-	if (pdata->irq_polarity == SMSC911X_IRQ_POLARITY_ACTIVE_HIGH)
+	if (pdata->config.irq_polarity == SMSC911X_IRQ_POLARITY_ACTIVE_HIGH)
 		intcfg |= INT_CFG_IRQ_POL_;
 
-	if (pdata->irq_type == SMSC911X_IRQ_TYPE_PUSH_PULL)
+	if (pdata->config.irq_type == SMSC911X_IRQ_TYPE_PUSH_PULL)
 		intcfg |= INT_CFG_IRQ_TYPE_;
 
 	smsc911x_reg_write(pdata, INT_CFG, intcfg);
diff --git a/drivers/net/smsc911x.h b/drivers/net/smsc911x.h
index feb36de274ca..f818cf0415f7 100644
--- a/drivers/net/smsc911x.h
+++ b/drivers/net/smsc911x.h
@@ -21,7 +21,6 @@
 #ifndef __SMSC911X_H__
 #define __SMSC911X_H__
 
-#define SMSC_CAN_USE_32BIT	1
 #define TX_FIFO_LOW_THRESHOLD	((u32)1600)
 #define SMSC911X_EEPROM_SIZE	((u32)7)
 #define USE_DEBUG		0
diff --git a/include/linux/smsc911x.h b/include/linux/smsc911x.h
index 47c4ffd10dbb..1cbf0313adde 100644
--- a/include/linux/smsc911x.h
+++ b/include/linux/smsc911x.h
@@ -28,6 +28,7 @@
 struct smsc911x_platform_config {
 	unsigned int irq_polarity;
 	unsigned int irq_type;
+	unsigned int flags;
 	phy_interface_t phy_interface;
 };
 
@@ -39,4 +40,8 @@ struct smsc911x_platform_config {
 #define SMSC911X_IRQ_TYPE_OPEN_DRAIN		0
 #define SMSC911X_IRQ_TYPE_PUSH_PULL		1
 
+/* Constants for flags */
+#define SMSC911X_USE_16BIT 			(BIT(0))
+#define SMSC911X_USE_32BIT 			(BIT(1))
+
 #endif /* __LINUX_SMSC911X_H__ */
-- 
cgit v1.2.3


From bd91b8bf372911c1e4d66d6bb44fe409349a6791 Mon Sep 17 00:00:00 2001
From: Benjamin Thery <benjamin.thery@bull.net>
Date: Wed, 10 Dec 2008 16:07:08 -0800
Subject: netns: ip6mr: allocate mroute6_socket per-namespace.

Preliminary work to make IPv6 multicast forwarding netns-aware.

Make IPv6 multicast forwarding mroute6_socket per-namespace,
moves it into struct netns_ipv6.

At the moment, mroute6_socket is only referenced in init_net.

Signed-off-by: Benjamin Thery <benjamin.thery@bull.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/mroute6.h  |  8 ++++++--
 include/net/netns/ipv6.h |  3 +++
 net/ipv6/ip6_output.c    |  3 ++-
 net/ipv6/ip6mr.c         | 22 ++++++++++------------
 4 files changed, 21 insertions(+), 15 deletions(-)

(limited to 'include')

diff --git a/include/linux/mroute6.h b/include/linux/mroute6.h
index 6f4c180179e2..2cd9901ee5c7 100644
--- a/include/linux/mroute6.h
+++ b/include/linux/mroute6.h
@@ -117,6 +117,7 @@ struct sioc_mif_req6
 
 #include <linux/pim.h>
 #include <linux/skbuff.h>	/* for struct sk_buff_head */
+#include <net/net_namespace.h>
 
 #ifdef CONFIG_IPV6_MROUTE
 static inline int ip6_mroute_opt(int opt)
@@ -232,10 +233,13 @@ struct rtmsg;
 extern int ip6mr_get_route(struct sk_buff *skb, struct rtmsg *rtm, int nowait);
 
 #ifdef CONFIG_IPV6_MROUTE
-extern struct sock *mroute6_socket;
+static inline struct sock *mroute6_socket(struct net *net)
+{
+	return net->ipv6.mroute6_sk;
+}
 extern int ip6mr_sk_done(struct sock *sk);
 #else
-#define mroute6_socket NULL
+static inline struct sock *mroute6_socket(struct net *net) { return NULL; }
 static inline int ip6mr_sk_done(struct sock *sk) { return 0; }
 #endif
 #endif
diff --git a/include/net/netns/ipv6.h b/include/net/netns/ipv6.h
index 2932721180c0..8a0a67d073b3 100644
--- a/include/net/netns/ipv6.h
+++ b/include/net/netns/ipv6.h
@@ -55,5 +55,8 @@ struct netns_ipv6 {
 	struct sock             *ndisc_sk;
 	struct sock             *tcp_sk;
 	struct sock             *igmp_sk;
+#ifdef CONFIG_IPV6_MROUTE
+	struct sock		*mroute6_sk;
+#endif
 };
 #endif
diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c
index 7d92fd97cfb9..4b15938bef4d 100644
--- a/net/ipv6/ip6_output.c
+++ b/net/ipv6/ip6_output.c
@@ -137,7 +137,8 @@ static int ip6_output2(struct sk_buff *skb)
 		struct inet6_dev *idev = ip6_dst_idev(skb->dst);
 
 		if (!(dev->flags & IFF_LOOPBACK) && (!np || np->mc_loop) &&
-		    ((mroute6_socket && !(IP6CB(skb)->flags & IP6SKB_FORWARDED)) ||
+		    ((mroute6_socket(dev_net(dev)) &&
+		     !(IP6CB(skb)->flags & IP6SKB_FORWARDED)) ||
 		     ipv6_chk_mcast_addr(dev, &ipv6_hdr(skb)->daddr,
 					 &ipv6_hdr(skb)->saddr))) {
 			struct sk_buff *newskb = skb_clone(skb, GFP_ATOMIC);
diff --git a/net/ipv6/ip6mr.c b/net/ipv6/ip6mr.c
index d1008e6891e7..02163dbf84c3 100644
--- a/net/ipv6/ip6mr.c
+++ b/net/ipv6/ip6mr.c
@@ -49,9 +49,6 @@
 #include <net/addrconf.h>
 #include <linux/netfilter_ipv6.h>
 
-struct sock *mroute6_socket;
-
-
 /* Big lock, protecting vif table, mrt cache and mroute socket state.
    Note that the changes are semaphored via rtnl_lock.
  */
@@ -820,7 +817,7 @@ static int ip6mr_cache_report(struct sk_buff *pkt, mifi_t mifi, int assert)
 	skb_pull(skb, sizeof(struct ipv6hdr));
 	}
 
-	if (mroute6_socket == NULL) {
+	if (init_net.ipv6.mroute6_sk == NULL) {
 		kfree_skb(skb);
 		return -EINVAL;
 	}
@@ -828,7 +825,8 @@ static int ip6mr_cache_report(struct sk_buff *pkt, mifi_t mifi, int assert)
 	/*
 	 *	Deliver to user space multicast routing algorithms
 	 */
-	if ((ret = sock_queue_rcv_skb(mroute6_socket, skb)) < 0) {
+	ret = sock_queue_rcv_skb(init_net.ipv6.mroute6_sk, skb);
+	if (ret < 0) {
 		if (net_ratelimit())
 			printk(KERN_WARNING "mroute6: pending queue full, dropping entries.\n");
 		kfree_skb(skb);
@@ -1145,8 +1143,8 @@ static int ip6mr_sk_init(struct sock *sk)
 
 	rtnl_lock();
 	write_lock_bh(&mrt_lock);
-	if (likely(mroute6_socket == NULL))
-		mroute6_socket = sk;
+	if (likely(init_net.ipv6.mroute6_sk == NULL))
+		init_net.ipv6.mroute6_sk = sk;
 	else
 		err = -EADDRINUSE;
 	write_unlock_bh(&mrt_lock);
@@ -1161,9 +1159,9 @@ int ip6mr_sk_done(struct sock *sk)
 	int err = 0;
 
 	rtnl_lock();
-	if (sk == mroute6_socket) {
+	if (sk == init_net.ipv6.mroute6_sk) {
 		write_lock_bh(&mrt_lock);
-		mroute6_socket = NULL;
+		init_net.ipv6.mroute6_sk = NULL;
 		write_unlock_bh(&mrt_lock);
 
 		mroute_clean_tables(sk);
@@ -1189,7 +1187,7 @@ int ip6_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, int
 	mifi_t mifi;
 
 	if (optname != MRT6_INIT) {
-		if (sk != mroute6_socket && !capable(CAP_NET_ADMIN))
+		if (sk != init_net.ipv6.mroute6_sk && !capable(CAP_NET_ADMIN))
 			return -EACCES;
 	}
 
@@ -1214,7 +1212,7 @@ int ip6_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, int
 		if (vif.mif6c_mifi >= MAXMIFS)
 			return -ENFILE;
 		rtnl_lock();
-		ret = mif6_add(&vif, sk == mroute6_socket);
+		ret = mif6_add(&vif, sk == init_net.ipv6.mroute6_sk);
 		rtnl_unlock();
 		return ret;
 
@@ -1242,7 +1240,7 @@ int ip6_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, int
 		if (optname == MRT6_DEL_MFC)
 			ret = ip6mr_mfc_delete(&mfc);
 		else
-			ret = ip6mr_mfc_add(&mfc, sk == mroute6_socket);
+			ret = ip6mr_mfc_add(&mfc, sk == init_net.ipv6.mroute6_sk);
 		rtnl_unlock();
 		return ret;
 
-- 
cgit v1.2.3


From 4e16880cb4225bfa68878ad5b2a9ded53657d054 Mon Sep 17 00:00:00 2001
From: Benjamin Thery <benjamin.thery@bull.net>
Date: Wed, 10 Dec 2008 16:15:08 -0800
Subject: netns: ip6mr: dynamically allocates vif6_table

Preliminary work to make IPv6 multicast forwarding netns-aware.

Dynamically allocates interface table vif6_table and moves it to
struct netns_ipv6, and updates MIF_EXISTS() macro.

At the moment, vif6_table is only referenced in init_net.

Signed-off-by: Benjamin Thery <benjamin.thery@bull.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/netns/ipv6.h |   2 +
 net/ipv6/ip6mr.c         | 107 ++++++++++++++++++++++++++++++-----------------
 2 files changed, 70 insertions(+), 39 deletions(-)

(limited to 'include')

diff --git a/include/net/netns/ipv6.h b/include/net/netns/ipv6.h
index 8a0a67d073b3..4ab0cb01a7a5 100644
--- a/include/net/netns/ipv6.h
+++ b/include/net/netns/ipv6.h
@@ -57,6 +57,8 @@ struct netns_ipv6 {
 	struct sock             *igmp_sk;
 #ifdef CONFIG_IPV6_MROUTE
 	struct sock		*mroute6_sk;
+	struct mif_device	*vif6_table;
+	int			maxvif;
 #endif
 };
 #endif
diff --git a/net/ipv6/ip6mr.c b/net/ipv6/ip6mr.c
index 02163dbf84c3..bae3ef649664 100644
--- a/net/ipv6/ip6mr.c
+++ b/net/ipv6/ip6mr.c
@@ -59,10 +59,7 @@ static DEFINE_RWLOCK(mrt_lock);
  *	Multicast router control variables
  */
 
-static struct mif_device vif6_table[MAXMIFS];		/* Devices 		*/
-static int maxvif;
-
-#define MIF_EXISTS(idx) (vif6_table[idx].dev != NULL)
+#define MIF_EXISTS(_net, _idx) ((_net)->ipv6.vif6_table[_idx].dev != NULL)
 
 static int mroute_do_assert;				/* Set in PIM assert	*/
 #ifdef CONFIG_IPV6_PIMSM_V2
@@ -145,11 +142,11 @@ struct ipmr_vif_iter {
 static struct mif_device *ip6mr_vif_seq_idx(struct ipmr_vif_iter *iter,
 					    loff_t pos)
 {
-	for (iter->ct = 0; iter->ct < maxvif; ++iter->ct) {
-		if (!MIF_EXISTS(iter->ct))
+	for (iter->ct = 0; iter->ct < init_net.ipv6.maxvif; ++iter->ct) {
+		if (!MIF_EXISTS(&init_net, iter->ct))
 			continue;
 		if (pos-- == 0)
-			return &vif6_table[iter->ct];
+			return &init_net.ipv6.vif6_table[iter->ct];
 	}
 	return NULL;
 }
@@ -170,10 +167,10 @@ static void *ip6mr_vif_seq_next(struct seq_file *seq, void *v, loff_t *pos)
 	if (v == SEQ_START_TOKEN)
 		return ip6mr_vif_seq_idx(iter, 0);
 
-	while (++iter->ct < maxvif) {
-		if (!MIF_EXISTS(iter->ct))
+	while (++iter->ct < init_net.ipv6.maxvif) {
+		if (!MIF_EXISTS(&init_net, iter->ct))
 			continue;
-		return &vif6_table[iter->ct];
+		return &init_net.ipv6.vif6_table[iter->ct];
 	}
 	return NULL;
 }
@@ -195,7 +192,7 @@ static int ip6mr_vif_seq_show(struct seq_file *seq, void *v)
 
 		seq_printf(seq,
 			   "%2td %-10s %8ld %7ld  %8ld %7ld %05X\n",
-			   vif - vif6_table,
+			   vif - init_net.ipv6.vif6_table,
 			   name, vif->bytes_in, vif->pkt_in,
 			   vif->bytes_out, vif->pkt_out,
 			   vif->flags);
@@ -305,7 +302,7 @@ static int ipmr_mfc_seq_show(struct seq_file *seq, void *v)
 				   mfc->mfc_un.res.wrong_if);
 			for (n = mfc->mfc_un.res.minvif;
 			     n < mfc->mfc_un.res.maxvif; n++) {
-				if (MIF_EXISTS(n) &&
+				if (MIF_EXISTS(&init_net, n) &&
 				    mfc->mfc_un.res.ttls[n] < 255)
 					seq_printf(seq,
 						   " %2d:%-3d",
@@ -374,7 +371,7 @@ static int pim6_rcv(struct sk_buff *skb)
 
 	read_lock(&mrt_lock);
 	if (reg_vif_num >= 0)
-		reg_dev = vif6_table[reg_vif_num].dev;
+		reg_dev = init_net.ipv6.vif6_table[reg_vif_num].dev;
 	if (reg_dev)
 		dev_hold(reg_dev);
 	read_unlock(&mrt_lock);
@@ -470,10 +467,10 @@ static int mif6_delete(int vifi)
 {
 	struct mif_device *v;
 	struct net_device *dev;
-	if (vifi < 0 || vifi >= maxvif)
+	if (vifi < 0 || vifi >= init_net.ipv6.maxvif)
 		return -EADDRNOTAVAIL;
 
-	v = &vif6_table[vifi];
+	v = &init_net.ipv6.vif6_table[vifi];
 
 	write_lock_bh(&mrt_lock);
 	dev = v->dev;
@@ -489,13 +486,13 @@ static int mif6_delete(int vifi)
 		reg_vif_num = -1;
 #endif
 
-	if (vifi + 1 == maxvif) {
+	if (vifi + 1 == init_net.ipv6.maxvif) {
 		int tmp;
 		for (tmp = vifi - 1; tmp >= 0; tmp--) {
-			if (MIF_EXISTS(tmp))
+			if (MIF_EXISTS(&init_net, tmp))
 				break;
 		}
-		maxvif = tmp + 1;
+		init_net.ipv6.maxvif = tmp + 1;
 	}
 
 	write_unlock_bh(&mrt_lock);
@@ -586,8 +583,9 @@ static void ip6mr_update_thresholds(struct mfc6_cache *cache, unsigned char *ttl
 	cache->mfc_un.res.maxvif = 0;
 	memset(cache->mfc_un.res.ttls, 255, MAXMIFS);
 
-	for (vifi = 0; vifi < maxvif; vifi++) {
-		if (MIF_EXISTS(vifi) && ttls[vifi] && ttls[vifi] < 255) {
+	for (vifi = 0; vifi < init_net.ipv6.maxvif; vifi++) {
+		if (MIF_EXISTS(&init_net, vifi) &&
+		    ttls[vifi] && ttls[vifi] < 255) {
 			cache->mfc_un.res.ttls[vifi] = ttls[vifi];
 			if (cache->mfc_un.res.minvif > vifi)
 				cache->mfc_un.res.minvif = vifi;
@@ -600,12 +598,12 @@ static void ip6mr_update_thresholds(struct mfc6_cache *cache, unsigned char *ttl
 static int mif6_add(struct mif6ctl *vifc, int mrtsock)
 {
 	int vifi = vifc->mif6c_mifi;
-	struct mif_device *v = &vif6_table[vifi];
+	struct mif_device *v = &init_net.ipv6.vif6_table[vifi];
 	struct net_device *dev;
 	int err;
 
 	/* Is vif busy ? */
-	if (MIF_EXISTS(vifi))
+	if (MIF_EXISTS(&init_net, vifi))
 		return -EADDRINUSE;
 
 	switch (vifc->mif6c_flags) {
@@ -665,8 +663,8 @@ static int mif6_add(struct mif6ctl *vifc, int mrtsock)
 	if (v->flags & MIFF_REGISTER)
 		reg_vif_num = vifi;
 #endif
-	if (vifi + 1 > maxvif)
-		maxvif = vifi + 1;
+	if (vifi + 1 > init_net.ipv6.maxvif)
+		init_net.ipv6.maxvif = vifi + 1;
 	write_unlock_bh(&mrt_lock);
 	return 0;
 }
@@ -946,8 +944,8 @@ static int ip6mr_device_event(struct notifier_block *this,
 	if (event != NETDEV_UNREGISTER)
 		return NOTIFY_DONE;
 
-	v = &vif6_table[0];
-	for (ct = 0; ct < maxvif; ct++, v++) {
+	v = &init_net.ipv6.vif6_table[0];
+	for (ct = 0; ct < init_net.ipv6.maxvif; ct++, v++) {
 		if (v->dev == dev)
 			mif6_delete(ct);
 	}
@@ -962,6 +960,30 @@ static struct notifier_block ip6_mr_notifier = {
  *	Setup for IP multicast routing
  */
 
+static int __net_init ip6mr_net_init(struct net *net)
+{
+	int err = 0;
+
+	net->ipv6.vif6_table = kcalloc(MAXMIFS, sizeof(struct mif_device),
+				       GFP_KERNEL);
+	if (!net->ipv6.vif6_table) {
+		err = -ENOMEM;
+		goto fail;
+	}
+fail:
+	return err;
+}
+
+static void __net_exit ip6mr_net_exit(struct net *net)
+{
+	kfree(net->ipv6.vif6_table);
+}
+
+static struct pernet_operations ip6mr_net_ops = {
+	.init = ip6mr_net_init,
+	.exit = ip6mr_net_exit,
+};
+
 int __init ip6_mr_init(void)
 {
 	int err;
@@ -973,6 +995,10 @@ int __init ip6_mr_init(void)
 	if (!mrt_cachep)
 		return -ENOMEM;
 
+	err = register_pernet_subsys(&ip6mr_net_ops);
+	if (err)
+		goto reg_pernet_fail;
+
 	setup_timer(&ipmr_expire_timer, ipmr_expire_process, 0);
 	err = register_netdevice_notifier(&ip6_mr_notifier);
 	if (err)
@@ -994,6 +1020,8 @@ proc_vif_fail:
 #endif
 reg_notif_fail:
 	del_timer(&ipmr_expire_timer);
+	unregister_pernet_subsys(&ip6mr_net_ops);
+reg_pernet_fail:
 	kmem_cache_destroy(mrt_cachep);
 	return err;
 }
@@ -1006,6 +1034,7 @@ void ip6_mr_cleanup(void)
 #endif
 	unregister_netdevice_notifier(&ip6_mr_notifier);
 	del_timer(&ipmr_expire_timer);
+	unregister_pernet_subsys(&ip6mr_net_ops);
 	kmem_cache_destroy(mrt_cachep);
 }
 
@@ -1095,8 +1124,8 @@ static void mroute_clean_tables(struct sock *sk)
 	/*
 	 *	Shut down all active vif entries
 	 */
-	for (i = 0; i < maxvif; i++) {
-		if (!(vif6_table[i].flags & VIFF_STATIC))
+	for (i = 0; i < init_net.ipv6.maxvif; i++) {
+		if (!(init_net.ipv6.vif6_table[i].flags & VIFF_STATIC))
 			mif6_delete(i);
 	}
 
@@ -1346,11 +1375,11 @@ int ip6mr_ioctl(struct sock *sk, int cmd, void __user *arg)
 	case SIOCGETMIFCNT_IN6:
 		if (copy_from_user(&vr, arg, sizeof(vr)))
 			return -EFAULT;
-		if (vr.mifi >= maxvif)
+		if (vr.mifi >= init_net.ipv6.maxvif)
 			return -EINVAL;
 		read_lock(&mrt_lock);
-		vif = &vif6_table[vr.mifi];
-		if (MIF_EXISTS(vr.mifi)) {
+		vif = &init_net.ipv6.vif6_table[vr.mifi];
+		if (MIF_EXISTS(&init_net, vr.mifi)) {
 			vr.icount = vif->pkt_in;
 			vr.ocount = vif->pkt_out;
 			vr.ibytes = vif->bytes_in;
@@ -1401,7 +1430,7 @@ static inline int ip6mr_forward2_finish(struct sk_buff *skb)
 static int ip6mr_forward2(struct sk_buff *skb, struct mfc6_cache *c, int vifi)
 {
 	struct ipv6hdr *ipv6h;
-	struct mif_device *vif = &vif6_table[vifi];
+	struct mif_device *vif = &init_net.ipv6.vif6_table[vifi];
 	struct net_device *dev;
 	struct dst_entry *dst;
 	struct flowi fl;
@@ -1474,8 +1503,8 @@ out_free:
 static int ip6mr_find_vif(struct net_device *dev)
 {
 	int ct;
-	for (ct = maxvif - 1; ct >= 0; ct--) {
-		if (vif6_table[ct].dev == dev)
+	for (ct = init_net.ipv6.maxvif - 1; ct >= 0; ct--) {
+		if (init_net.ipv6.vif6_table[ct].dev == dev)
 			break;
 	}
 	return ct;
@@ -1493,7 +1522,7 @@ static int ip6_mr_forward(struct sk_buff *skb, struct mfc6_cache *cache)
 	/*
 	 * Wrong interface: drop packet and (maybe) send PIM assert.
 	 */
-	if (vif6_table[vif].dev != skb->dev) {
+	if (init_net.ipv6.vif6_table[vif].dev != skb->dev) {
 		int true_vifi;
 
 		cache->mfc_un.res.wrong_if++;
@@ -1514,8 +1543,8 @@ static int ip6_mr_forward(struct sk_buff *skb, struct mfc6_cache *cache)
 		goto dont_forward;
 	}
 
-	vif6_table[vif].pkt_in++;
-	vif6_table[vif].bytes_in += skb->len;
+	init_net.ipv6.vif6_table[vif].pkt_in++;
+	init_net.ipv6.vif6_table[vif].bytes_in += skb->len;
 
 	/*
 	 *	Forward the frame
@@ -1583,7 +1612,7 @@ ip6mr_fill_mroute(struct sk_buff *skb, struct mfc6_cache *c, struct rtmsg *rtm)
 {
 	int ct;
 	struct rtnexthop *nhp;
-	struct net_device *dev = vif6_table[c->mf6c_parent].dev;
+	struct net_device *dev = init_net.ipv6.vif6_table[c->mf6c_parent].dev;
 	u8 *b = skb_tail_pointer(skb);
 	struct rtattr *mp_head;
 
@@ -1599,7 +1628,7 @@ ip6mr_fill_mroute(struct sk_buff *skb, struct mfc6_cache *c, struct rtmsg *rtm)
 			nhp = (struct rtnexthop *)skb_put(skb, RTA_ALIGN(sizeof(*nhp)));
 			nhp->rtnh_flags = 0;
 			nhp->rtnh_hops = c->mfc_un.res.ttls[ct];
-			nhp->rtnh_ifindex = vif6_table[ct].dev->ifindex;
+			nhp->rtnh_ifindex = init_net.ipv6.vif6_table[ct].dev->ifindex;
 			nhp->rtnh_len = sizeof(*nhp);
 		}
 	}
-- 
cgit v1.2.3


From 58701ad41105638baa0b38ffe9ac5b10469c1fd3 Mon Sep 17 00:00:00 2001
From: Benjamin Thery <benjamin.thery@bull.net>
Date: Wed, 10 Dec 2008 16:22:34 -0800
Subject: netns: ip6mr: store netns in struct mfc6_cache

This patch stores into struct mfc6_cache the network namespace each
mfc6_cache belongs to. The new member is mfc6_net.

mfc6_net is assigned at cache allocation and doesn't change during
the rest of the cache entry life.

This will help to retrieve the current netns around the IPv6 multicast
forwarding code.

At the moment, all mfc6_cache are allocated in init_net.

Changelog:
==========
* Use write_pnet()/read_pnet() to set and get mfc6_net.

Signed-off-by: Benjamin Thery <benjamin.thery@bull.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/mroute6.h | 15 +++++++++++++++
 net/ipv6/ip6mr.c        | 26 +++++++++++++++++---------
 2 files changed, 32 insertions(+), 9 deletions(-)

(limited to 'include')

diff --git a/include/linux/mroute6.h b/include/linux/mroute6.h
index 2cd9901ee5c7..15d85fe12bbd 100644
--- a/include/linux/mroute6.h
+++ b/include/linux/mroute6.h
@@ -188,6 +188,9 @@ struct mif_device
 struct mfc6_cache
 {
 	struct mfc6_cache *next;		/* Next entry on cache line 	*/
+#ifdef CONFIG_NET_NS
+	struct net *mfc6_net;
+#endif
 	struct in6_addr mf6c_mcastgrp;			/* Group the entry belongs to 	*/
 	struct in6_addr mf6c_origin;			/* Source of packet 		*/
 	mifi_t mf6c_parent;			/* Source interface		*/
@@ -210,6 +213,18 @@ struct mfc6_cache
 	} mfc_un;
 };
 
+static inline
+struct net *mfc6_net(const struct mfc6_cache *mfc)
+{
+	return read_pnet(&mfc->mfc6_net);
+}
+
+static inline
+void mfc6_net_set(struct mfc6_cache *mfc, struct net *net)
+{
+	write_pnet(&mfc->mfc6_net, hold_net(net));
+}
+
 #define MFC_STATIC		1
 #define MFC_NOTIFY		2
 
diff --git a/net/ipv6/ip6mr.c b/net/ipv6/ip6mr.c
index bae3ef649664..fab5aba28a20 100644
--- a/net/ipv6/ip6mr.c
+++ b/net/ipv6/ip6mr.c
@@ -506,6 +506,12 @@ static int mif6_delete(int vifi)
 	return 0;
 }
 
+static inline void ip6mr_cache_free(struct mfc6_cache *c)
+{
+	release_net(mfc6_net(c));
+	kmem_cache_free(mrt_cachep, c);
+}
+
 /* Destroy an unresolved cache entry, killing queued skbs
    and reporting error to netlink readers.
  */
@@ -528,7 +534,7 @@ static void ip6mr_destroy_unres(struct mfc6_cache *c)
 			kfree_skb(skb);
 	}
 
-	kmem_cache_free(mrt_cachep, c);
+	ip6mr_cache_free(c);
 }
 
 
@@ -685,22 +691,24 @@ static struct mfc6_cache *ip6mr_cache_find(struct in6_addr *origin, struct in6_a
 /*
  *	Allocate a multicast cache entry
  */
-static struct mfc6_cache *ip6mr_cache_alloc(void)
+static struct mfc6_cache *ip6mr_cache_alloc(struct net *net)
 {
 	struct mfc6_cache *c = kmem_cache_zalloc(mrt_cachep, GFP_KERNEL);
 	if (c == NULL)
 		return NULL;
 	c->mfc_un.res.minvif = MAXMIFS;
+	mfc6_net_set(c, net);
 	return c;
 }
 
-static struct mfc6_cache *ip6mr_cache_alloc_unres(void)
+static struct mfc6_cache *ip6mr_cache_alloc_unres(struct net *net)
 {
 	struct mfc6_cache *c = kmem_cache_zalloc(mrt_cachep, GFP_ATOMIC);
 	if (c == NULL)
 		return NULL;
 	skb_queue_head_init(&c->mfc_un.unres.unresolved);
 	c->mfc_un.unres.expires = jiffies + 10 * HZ;
+	mfc6_net_set(c, net);
 	return c;
 }
 
@@ -856,7 +864,7 @@ ip6mr_cache_unresolved(mifi_t mifi, struct sk_buff *skb)
 		 */
 
 		if (atomic_read(&cache_resolve_queue_len) >= 10 ||
-		    (c = ip6mr_cache_alloc_unres()) == NULL) {
+		    (c = ip6mr_cache_alloc_unres(&init_net)) == NULL) {
 			spin_unlock_bh(&mfc_unres_lock);
 
 			kfree_skb(skb);
@@ -879,7 +887,7 @@ ip6mr_cache_unresolved(mifi_t mifi, struct sk_buff *skb)
 			 */
 			spin_unlock_bh(&mfc_unres_lock);
 
-			kmem_cache_free(mrt_cachep, c);
+			ip6mr_cache_free(c);
 			kfree_skb(skb);
 			return err;
 		}
@@ -924,7 +932,7 @@ static int ip6mr_mfc_delete(struct mf6cctl *mfc)
 			*cp = c->next;
 			write_unlock_bh(&mrt_lock);
 
-			kmem_cache_free(mrt_cachep, c);
+			ip6mr_cache_free(c);
 			return 0;
 		}
 	}
@@ -1073,7 +1081,7 @@ static int ip6mr_mfc_add(struct mf6cctl *mfc, int mrtsock)
 	if (!ipv6_addr_is_multicast(&mfc->mf6cc_mcastgrp.sin6_addr))
 		return -EINVAL;
 
-	c = ip6mr_cache_alloc();
+	c = ip6mr_cache_alloc(&init_net);
 	if (c == NULL)
 		return -ENOMEM;
 
@@ -1108,7 +1116,7 @@ static int ip6mr_mfc_add(struct mf6cctl *mfc, int mrtsock)
 
 	if (uc) {
 		ip6mr_cache_resolve(uc, c);
-		kmem_cache_free(mrt_cachep, uc);
+		ip6mr_cache_free(uc);
 	}
 	return 0;
 }
@@ -1145,7 +1153,7 @@ static void mroute_clean_tables(struct sock *sk)
 			*cp = c->next;
 			write_unlock_bh(&mrt_lock);
 
-			kmem_cache_free(mrt_cachep, c);
+			ip6mr_cache_free(c);
 		}
 	}
 
-- 
cgit v1.2.3


From 4a6258a0e33d042e4c84d9dec25d45ddb40a70b3 Mon Sep 17 00:00:00 2001
From: Benjamin Thery <benjamin.thery@bull.net>
Date: Wed, 10 Dec 2008 16:24:07 -0800
Subject: netns: ip6mr: dynamically allocate mfc6_cache_array

Preliminary work to make IPv6 multicast forwarding netns-aware.

Dynamically allocates IPv6 multicast forwarding cache, mfc6_cache_array,
and moves it to struct netns_ipv6.

At the moment, mfc6_cache_array is only referenced in init_net.

Replace 'ARRAY_SIZE(mfc6_cache_array)' with mfc6_cache_array size: MFC6_LINES.

Signed-off-by: Benjamin Thery <benjamin.thery@bull.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/netns/ipv6.h |  1 +
 net/ipv6/ip6mr.c         | 47 +++++++++++++++++++++++++++++++----------------
 2 files changed, 32 insertions(+), 16 deletions(-)

(limited to 'include')

diff --git a/include/net/netns/ipv6.h b/include/net/netns/ipv6.h
index 4ab0cb01a7a5..14c1bbe68a85 100644
--- a/include/net/netns/ipv6.h
+++ b/include/net/netns/ipv6.h
@@ -57,6 +57,7 @@ struct netns_ipv6 {
 	struct sock             *igmp_sk;
 #ifdef CONFIG_IPV6_MROUTE
 	struct sock		*mroute6_sk;
+	struct mfc6_cache	**mfc6_cache_array;
 	struct mif_device	*vif6_table;
 	int			maxvif;
 #endif
diff --git a/net/ipv6/ip6mr.c b/net/ipv6/ip6mr.c
index fab5aba28a20..287e526ba036 100644
--- a/net/ipv6/ip6mr.c
+++ b/net/ipv6/ip6mr.c
@@ -68,8 +68,6 @@ static int mroute_do_pim;
 #define mroute_do_pim 0
 #endif
 
-static struct mfc6_cache *mfc6_cache_array[MFC6_LINES];	/* Forwarding cache	*/
-
 static struct mfc6_cache *mfc_unres_queue;		/* Queue of unresolved entries */
 static atomic_t cache_resolve_queue_len;		/* Size of unresolved	*/
 
@@ -109,10 +107,11 @@ static struct mfc6_cache *ipmr_mfc_seq_idx(struct ipmr_mfc_iter *it, loff_t pos)
 {
 	struct mfc6_cache *mfc;
 
-	it->cache = mfc6_cache_array;
+	it->cache = init_net.ipv6.mfc6_cache_array;
 	read_lock(&mrt_lock);
-	for (it->ct = 0; it->ct < ARRAY_SIZE(mfc6_cache_array); it->ct++)
-		for (mfc = mfc6_cache_array[it->ct]; mfc; mfc = mfc->next)
+	for (it->ct = 0; it->ct < MFC6_LINES; it->ct++)
+		for (mfc = init_net.ipv6.mfc6_cache_array[it->ct];
+		     mfc; mfc = mfc->next)
 			if (pos-- == 0)
 				return mfc;
 	read_unlock(&mrt_lock);
@@ -243,10 +242,10 @@ static void *ipmr_mfc_seq_next(struct seq_file *seq, void *v, loff_t *pos)
 	if (it->cache == &mfc_unres_queue)
 		goto end_of_list;
 
-	BUG_ON(it->cache != mfc6_cache_array);
+	BUG_ON(it->cache != init_net.ipv6.mfc6_cache_array);
 
-	while (++it->ct < ARRAY_SIZE(mfc6_cache_array)) {
-		mfc = mfc6_cache_array[it->ct];
+	while (++it->ct < MFC6_LINES) {
+		mfc = init_net.ipv6.mfc6_cache_array[it->ct];
 		if (mfc)
 			return mfc;
 	}
@@ -274,7 +273,7 @@ static void ipmr_mfc_seq_stop(struct seq_file *seq, void *v)
 
 	if (it->cache == &mfc_unres_queue)
 		spin_unlock_bh(&mfc_unres_lock);
-	else if (it->cache == mfc6_cache_array)
+	else if (it->cache == init_net.ipv6.mfc6_cache_array)
 		read_unlock(&mrt_lock);
 }
 
@@ -680,7 +679,7 @@ static struct mfc6_cache *ip6mr_cache_find(struct in6_addr *origin, struct in6_a
 	int line = MFC6_HASH(mcastgrp, origin);
 	struct mfc6_cache *c;
 
-	for (c = mfc6_cache_array[line]; c; c = c->next) {
+	for (c = init_net.ipv6.mfc6_cache_array[line]; c; c = c->next) {
 		if (ipv6_addr_equal(&c->mf6c_origin, origin) &&
 		    ipv6_addr_equal(&c->mf6c_mcastgrp, mcastgrp))
 			break;
@@ -925,7 +924,8 @@ static int ip6mr_mfc_delete(struct mf6cctl *mfc)
 
 	line = MFC6_HASH(&mfc->mf6cc_mcastgrp.sin6_addr, &mfc->mf6cc_origin.sin6_addr);
 
-	for (cp = &mfc6_cache_array[line]; (c = *cp) != NULL; cp = &c->next) {
+	for (cp = &init_net.ipv6.mfc6_cache_array[line];
+	     (c = *cp) != NULL; cp = &c->next) {
 		if (ipv6_addr_equal(&c->mf6c_origin, &mfc->mf6cc_origin.sin6_addr) &&
 		    ipv6_addr_equal(&c->mf6c_mcastgrp, &mfc->mf6cc_mcastgrp.sin6_addr)) {
 			write_lock_bh(&mrt_lock);
@@ -978,12 +978,26 @@ static int __net_init ip6mr_net_init(struct net *net)
 		err = -ENOMEM;
 		goto fail;
 	}
+
+	/* Forwarding cache */
+	net->ipv6.mfc6_cache_array = kcalloc(MFC6_LINES,
+					     sizeof(struct mfc6_cache *),
+					     GFP_KERNEL);
+	if (!net->ipv6.mfc6_cache_array) {
+		err = -ENOMEM;
+		goto fail_mfc6_cache;
+	}
+	return 0;
+
+fail_mfc6_cache:
+	kfree(net->ipv6.vif6_table);
 fail:
 	return err;
 }
 
 static void __net_exit ip6mr_net_exit(struct net *net)
 {
+	kfree(net->ipv6.mfc6_cache_array);
 	kfree(net->ipv6.vif6_table);
 }
 
@@ -1062,7 +1076,8 @@ static int ip6mr_mfc_add(struct mf6cctl *mfc, int mrtsock)
 
 	line = MFC6_HASH(&mfc->mf6cc_mcastgrp.sin6_addr, &mfc->mf6cc_origin.sin6_addr);
 
-	for (cp = &mfc6_cache_array[line]; (c = *cp) != NULL; cp = &c->next) {
+	for (cp = &init_net.ipv6.mfc6_cache_array[line];
+	     (c = *cp) != NULL; cp = &c->next) {
 		if (ipv6_addr_equal(&c->mf6c_origin, &mfc->mf6cc_origin.sin6_addr) &&
 		    ipv6_addr_equal(&c->mf6c_mcastgrp, &mfc->mf6cc_mcastgrp.sin6_addr))
 			break;
@@ -1093,8 +1108,8 @@ static int ip6mr_mfc_add(struct mf6cctl *mfc, int mrtsock)
 		c->mfc_flags |= MFC_STATIC;
 
 	write_lock_bh(&mrt_lock);
-	c->next = mfc6_cache_array[line];
-	mfc6_cache_array[line] = c;
+	c->next = init_net.ipv6.mfc6_cache_array[line];
+	init_net.ipv6.mfc6_cache_array[line] = c;
 	write_unlock_bh(&mrt_lock);
 
 	/*
@@ -1140,10 +1155,10 @@ static void mroute_clean_tables(struct sock *sk)
 	/*
 	 *	Wipe the cache
 	 */
-	for (i = 0; i < ARRAY_SIZE(mfc6_cache_array); i++) {
+	for (i = 0; i < MFC6_LINES; i++) {
 		struct mfc6_cache *c, **cp;
 
-		cp = &mfc6_cache_array[i];
+		cp = &init_net.ipv6.mfc6_cache_array[i];
 		while ((c = *cp) != NULL) {
 			if (c->mfc_flags & MFC_STATIC) {
 				cp = &c->next;
-- 
cgit v1.2.3


From 4045e57c19bee150370390545ee8a933b3f7a18d Mon Sep 17 00:00:00 2001
From: Benjamin Thery <benjamin.thery@bull.net>
Date: Wed, 10 Dec 2008 16:27:21 -0800
Subject: netns: ip6mr: declare counter cache_resolve_queue_len per-namespace

Preliminary work to make IPv6 multicast forwarding netns-aware.

Declare variable cache_resolve_queue_len per-namespace: moves it into
struct netns_ipv6.

This variable counts the number of unresolved cache entries queued in the
list mfc_unres_queue. This list is kept global to all netns as the number
of entries per namespace is limited to 10 (hardcoded in routine
ip6mr_cache_unresolved).
Entries belonging to different namespaces in mfc_unres_queue will be
identified by matching the mfc_net member introduced previously in
struct mfc6_cache.

Keeping this list global to all netns, also allows us to keep a single
timer (ipmr_expire_timer) to handle their expiration.
In some places cache_resolve_queue_len value was tested for arming
or deleting the timer. These tests were equivalent to testing
mfc_unres_queue value instead and are replaced in this patch.

At the moment, cache_resolve_queue_len is only referenced in init_net.

Signed-off-by: Benjamin Thery <benjamin.thery@bull.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/netns/ipv6.h |  1 +
 net/ipv6/ip6mr.c         | 40 +++++++++++++++++++++-------------------
 2 files changed, 22 insertions(+), 19 deletions(-)

(limited to 'include')

diff --git a/include/net/netns/ipv6.h b/include/net/netns/ipv6.h
index 14c1bbe68a85..30572f3f9781 100644
--- a/include/net/netns/ipv6.h
+++ b/include/net/netns/ipv6.h
@@ -60,6 +60,7 @@ struct netns_ipv6 {
 	struct mfc6_cache	**mfc6_cache_array;
 	struct mif_device	*vif6_table;
 	int			maxvif;
+	atomic_t		cache_resolve_queue_len;
 #endif
 };
 #endif
diff --git a/net/ipv6/ip6mr.c b/net/ipv6/ip6mr.c
index 287e526ba036..077c8198eb53 100644
--- a/net/ipv6/ip6mr.c
+++ b/net/ipv6/ip6mr.c
@@ -69,7 +69,6 @@ static int mroute_do_pim;
 #endif
 
 static struct mfc6_cache *mfc_unres_queue;		/* Queue of unresolved entries */
-static atomic_t cache_resolve_queue_len;		/* Size of unresolved	*/
 
 /* Special spinlock for queue of unresolved entries */
 static DEFINE_SPINLOCK(mfc_unres_lock);
@@ -519,7 +518,7 @@ static void ip6mr_destroy_unres(struct mfc6_cache *c)
 {
 	struct sk_buff *skb;
 
-	atomic_dec(&cache_resolve_queue_len);
+	atomic_dec(&init_net.ipv6.cache_resolve_queue_len);
 
 	while((skb = skb_dequeue(&c->mfc_un.unres.unresolved)) != NULL) {
 		if (ipv6_hdr(skb)->version == 0) {
@@ -561,7 +560,7 @@ static void ipmr_do_expire_process(unsigned long dummy)
 		ip6mr_destroy_unres(c);
 	}
 
-	if (atomic_read(&cache_resolve_queue_len))
+	if (mfc_unres_queue != NULL)
 		mod_timer(&ipmr_expire_timer, jiffies + expires);
 }
 
@@ -572,7 +571,7 @@ static void ipmr_expire_process(unsigned long dummy)
 		return;
 	}
 
-	if (atomic_read(&cache_resolve_queue_len))
+	if (mfc_unres_queue != NULL)
 		ipmr_do_expire_process(dummy);
 
 	spin_unlock(&mfc_unres_lock);
@@ -852,7 +851,8 @@ ip6mr_cache_unresolved(mifi_t mifi, struct sk_buff *skb)
 
 	spin_lock_bh(&mfc_unres_lock);
 	for (c = mfc_unres_queue; c; c = c->next) {
-		if (ipv6_addr_equal(&c->mf6c_mcastgrp, &ipv6_hdr(skb)->daddr) &&
+		if (net_eq(mfc6_net(c), &init_net) &&
+		    ipv6_addr_equal(&c->mf6c_mcastgrp, &ipv6_hdr(skb)->daddr) &&
 		    ipv6_addr_equal(&c->mf6c_origin, &ipv6_hdr(skb)->saddr))
 			break;
 	}
@@ -862,7 +862,7 @@ ip6mr_cache_unresolved(mifi_t mifi, struct sk_buff *skb)
 		 *	Create a new entry if allowable
 		 */
 
-		if (atomic_read(&cache_resolve_queue_len) >= 10 ||
+		if (atomic_read(&init_net.ipv6.cache_resolve_queue_len) >= 10 ||
 		    (c = ip6mr_cache_alloc_unres(&init_net)) == NULL) {
 			spin_unlock_bh(&mfc_unres_lock);
 
@@ -891,7 +891,7 @@ ip6mr_cache_unresolved(mifi_t mifi, struct sk_buff *skb)
 			return err;
 		}
 
-		atomic_inc(&cache_resolve_queue_len);
+		atomic_inc(&init_net.ipv6.cache_resolve_queue_len);
 		c->next = mfc_unres_queue;
 		mfc_unres_queue = c;
 
@@ -1119,14 +1119,16 @@ static int ip6mr_mfc_add(struct mf6cctl *mfc, int mrtsock)
 	spin_lock_bh(&mfc_unres_lock);
 	for (cp = &mfc_unres_queue; (uc = *cp) != NULL;
 	     cp = &uc->next) {
-		if (ipv6_addr_equal(&uc->mf6c_origin, &c->mf6c_origin) &&
+		if (net_eq(mfc6_net(uc), &init_net) &&
+		    ipv6_addr_equal(&uc->mf6c_origin, &c->mf6c_origin) &&
 		    ipv6_addr_equal(&uc->mf6c_mcastgrp, &c->mf6c_mcastgrp)) {
 			*cp = uc->next;
-			if (atomic_dec_and_test(&cache_resolve_queue_len))
-				del_timer(&ipmr_expire_timer);
+			atomic_dec(&init_net.ipv6.cache_resolve_queue_len);
 			break;
 		}
 	}
+	if (mfc_unres_queue == NULL)
+		del_timer(&ipmr_expire_timer);
 	spin_unlock_bh(&mfc_unres_lock);
 
 	if (uc) {
@@ -1172,18 +1174,18 @@ static void mroute_clean_tables(struct sock *sk)
 		}
 	}
 
-	if (atomic_read(&cache_resolve_queue_len) != 0) {
-		struct mfc6_cache *c;
+	if (atomic_read(&init_net.ipv6.cache_resolve_queue_len) != 0) {
+		struct mfc6_cache *c, **cp;
 
 		spin_lock_bh(&mfc_unres_lock);
-		while (mfc_unres_queue != NULL) {
-			c = mfc_unres_queue;
-			mfc_unres_queue = c->next;
-			spin_unlock_bh(&mfc_unres_lock);
-
+		cp = &mfc_unres_queue;
+		while ((c = *cp) != NULL) {
+			if (!net_eq(mfc6_net(c), &init_net)) {
+				cp = &c->next;
+				continue;
+			}
+			*cp = c->next;
 			ip6mr_destroy_unres(c);
-
-			spin_lock_bh(&mfc_unres_lock);
 		}
 		spin_unlock_bh(&mfc_unres_lock);
 	}
-- 
cgit v1.2.3


From a21f3f997c73ced682129aedd372bb6b53041510 Mon Sep 17 00:00:00 2001
From: Benjamin Thery <benjamin.thery@bull.net>
Date: Wed, 10 Dec 2008 16:28:44 -0800
Subject: netns: ip6mr: declare mroute_do_assert and mroute_do_pim
 per-namespace

Preliminary work to make IPv6 multicast forwarding netns-aware.

Declare IPv6 multicast forwarding variables 'mroute_do_assert' and
'mroute_do_pim' per-namespace in struct netns_ipv6.

At the moment, these variables are only referenced in init_net.

Signed-off-by: Benjamin Thery <benjamin.thery@bull.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/netns/ipv6.h |  2 ++
 net/ipv6/ip6mr.c         | 26 ++++++++++----------------
 2 files changed, 12 insertions(+), 16 deletions(-)

(limited to 'include')

diff --git a/include/net/netns/ipv6.h b/include/net/netns/ipv6.h
index 30572f3f9781..919cb0f20055 100644
--- a/include/net/netns/ipv6.h
+++ b/include/net/netns/ipv6.h
@@ -61,6 +61,8 @@ struct netns_ipv6 {
 	struct mif_device	*vif6_table;
 	int			maxvif;
 	atomic_t		cache_resolve_queue_len;
+	int			mroute_do_assert;
+	int			mroute_do_pim;
 #endif
 };
 #endif
diff --git a/net/ipv6/ip6mr.c b/net/ipv6/ip6mr.c
index 077c8198eb53..2d2ac23b8606 100644
--- a/net/ipv6/ip6mr.c
+++ b/net/ipv6/ip6mr.c
@@ -61,13 +61,6 @@ static DEFINE_RWLOCK(mrt_lock);
 
 #define MIF_EXISTS(_net, _idx) ((_net)->ipv6.vif6_table[_idx].dev != NULL)
 
-static int mroute_do_assert;				/* Set in PIM assert	*/
-#ifdef CONFIG_IPV6_PIMSM_V2
-static int mroute_do_pim;
-#else
-#define mroute_do_pim 0
-#endif
-
 static struct mfc6_cache *mfc_unres_queue;		/* Queue of unresolved entries */
 
 /* Special spinlock for queue of unresolved entries */
@@ -1306,7 +1299,7 @@ int ip6_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, int
 		int v;
 		if (get_user(v, (int __user *)optval))
 			return -EFAULT;
-		mroute_do_assert = !!v;
+		init_net.ipv6.mroute_do_assert = !!v;
 		return 0;
 	}
 
@@ -1319,10 +1312,10 @@ int ip6_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, int
 		v = !!v;
 		rtnl_lock();
 		ret = 0;
-		if (v != mroute_do_pim) {
-			mroute_do_pim = v;
-			mroute_do_assert = v;
-			if (mroute_do_pim)
+		if (v != init_net.ipv6.mroute_do_pim) {
+			init_net.ipv6.mroute_do_pim = v;
+			init_net.ipv6.mroute_do_assert = v;
+			if (init_net.ipv6.mroute_do_pim)
 				ret = inet6_add_protocol(&pim6_protocol,
 							 IPPROTO_PIM);
 			else
@@ -1361,11 +1354,11 @@ int ip6_mroute_getsockopt(struct sock *sk, int optname, char __user *optval,
 		break;
 #ifdef CONFIG_IPV6_PIMSM_V2
 	case MRT6_PIM:
-		val = mroute_do_pim;
+		val = init_net.ipv6.mroute_do_pim;
 		break;
 #endif
 	case MRT6_ASSERT:
-		val = mroute_do_assert;
+		val = init_net.ipv6.mroute_do_assert;
 		break;
 	default:
 		return -ENOPROTOOPT;
@@ -1553,13 +1546,14 @@ static int ip6_mr_forward(struct sk_buff *skb, struct mfc6_cache *cache)
 		cache->mfc_un.res.wrong_if++;
 		true_vifi = ip6mr_find_vif(skb->dev);
 
-		if (true_vifi >= 0 && mroute_do_assert &&
+		if (true_vifi >= 0 && init_net.ipv6.mroute_do_assert &&
 		    /* pimsm uses asserts, when switching from RPT to SPT,
 		       so that we cannot check that packet arrived on an oif.
 		       It is bad, but otherwise we would need to move pretty
 		       large chunk of pimd to kernel. Ough... --ANK
 		     */
-		    (mroute_do_pim || cache->mfc_un.res.ttls[true_vifi] < 255) &&
+		    (init_net.ipv6.mroute_do_pim ||
+		     cache->mfc_un.res.ttls[true_vifi] < 255) &&
 		    time_after(jiffies,
 			       cache->mfc_un.res.last_assert + MFC_ASSERT_THRESH)) {
 			cache->mfc_un.res.last_assert = jiffies;
-- 
cgit v1.2.3


From 950d5704e5daa1f90bcd75b99163491e7b249169 Mon Sep 17 00:00:00 2001
From: Benjamin Thery <benjamin.thery@bull.net>
Date: Wed, 10 Dec 2008 16:29:24 -0800
Subject: netns: ip6mr: declare reg_vif_num per-namespace

Preliminary work to make IPv6 multicast forwarding netns-aware.

Declare variable 'reg_vif_num' per-namespace, moves into struct netns_ipv6.

At the moment, this variable is only referenced in init_net.

Signed-off-by: Benjamin Thery <benjamin.thery@bull.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/netns/ipv6.h |  3 +++
 net/ipv6/ip6mr.c         | 19 +++++++++++--------
 2 files changed, 14 insertions(+), 8 deletions(-)

(limited to 'include')

diff --git a/include/net/netns/ipv6.h b/include/net/netns/ipv6.h
index 919cb0f20055..afab4e4cbac7 100644
--- a/include/net/netns/ipv6.h
+++ b/include/net/netns/ipv6.h
@@ -63,6 +63,9 @@ struct netns_ipv6 {
 	atomic_t		cache_resolve_queue_len;
 	int			mroute_do_assert;
 	int			mroute_do_pim;
+#ifdef CONFIG_IPV6_PIMSM_V2
+	int			mroute_reg_vif_num;
+#endif
 #endif
 };
 #endif
diff --git a/net/ipv6/ip6mr.c b/net/ipv6/ip6mr.c
index 2d2ac23b8606..8e693859ea03 100644
--- a/net/ipv6/ip6mr.c
+++ b/net/ipv6/ip6mr.c
@@ -333,13 +333,13 @@ static struct file_operations ip6mr_mfc_fops = {
 #endif
 
 #ifdef CONFIG_IPV6_PIMSM_V2
-static int reg_vif_num = -1;
 
 static int pim6_rcv(struct sk_buff *skb)
 {
 	struct pimreghdr *pim;
 	struct ipv6hdr   *encap;
 	struct net_device  *reg_dev = NULL;
+	int reg_vif_num = init_net.ipv6.mroute_reg_vif_num;
 
 	if (!pskb_may_pull(skb, sizeof(*pim) + sizeof(*encap)))
 		goto drop;
@@ -401,7 +401,7 @@ static int reg_vif_xmit(struct sk_buff *skb, struct net_device *dev)
 	read_lock(&mrt_lock);
 	dev->stats.tx_bytes += skb->len;
 	dev->stats.tx_packets++;
-	ip6mr_cache_report(skb, reg_vif_num, MRT6MSG_WHOLEPKT);
+	ip6mr_cache_report(skb, init_net.ipv6.mroute_reg_vif_num, MRT6MSG_WHOLEPKT);
 	read_unlock(&mrt_lock);
 	kfree_skb(skb);
 	return 0;
@@ -473,8 +473,8 @@ static int mif6_delete(int vifi)
 	}
 
 #ifdef CONFIG_IPV6_PIMSM_V2
-	if (vifi == reg_vif_num)
-		reg_vif_num = -1;
+	if (vifi == init_net.ipv6.mroute_reg_vif_num)
+		init_net.ipv6.mroute_reg_vif_num = -1;
 #endif
 
 	if (vifi + 1 == init_net.ipv6.maxvif) {
@@ -610,7 +610,7 @@ static int mif6_add(struct mif6ctl *vifc, int mrtsock)
 		 * Special Purpose VIF in PIM
 		 * All the packets will be sent to the daemon
 		 */
-		if (reg_vif_num >= 0)
+		if (init_net.ipv6.mroute_reg_vif_num >= 0)
 			return -EADDRINUSE;
 		dev = ip6mr_reg_vif();
 		if (!dev)
@@ -658,7 +658,7 @@ static int mif6_add(struct mif6ctl *vifc, int mrtsock)
 	v->dev = dev;
 #ifdef CONFIG_IPV6_PIMSM_V2
 	if (v->flags & MIFF_REGISTER)
-		reg_vif_num = vifi;
+		init_net.ipv6.mroute_reg_vif_num = vifi;
 #endif
 	if (vifi + 1 > init_net.ipv6.maxvif)
 		init_net.ipv6.maxvif = vifi + 1;
@@ -777,7 +777,7 @@ static int ip6mr_cache_report(struct sk_buff *pkt, mifi_t mifi, int assert)
 		msg = (struct mrt6msg *)skb_transport_header(skb);
 		msg->im6_mbz = 0;
 		msg->im6_msgtype = MRT6MSG_WHOLEPKT;
-		msg->im6_mif = reg_vif_num;
+		msg->im6_mif = init_net.ipv6.mroute_reg_vif_num;
 		msg->im6_pad = 0;
 		ipv6_addr_copy(&msg->im6_src, &ipv6_hdr(pkt)->saddr);
 		ipv6_addr_copy(&msg->im6_dst, &ipv6_hdr(pkt)->daddr);
@@ -964,7 +964,6 @@ static struct notifier_block ip6_mr_notifier = {
 static int __net_init ip6mr_net_init(struct net *net)
 {
 	int err = 0;
-
 	net->ipv6.vif6_table = kcalloc(MAXMIFS, sizeof(struct mif_device),
 				       GFP_KERNEL);
 	if (!net->ipv6.vif6_table) {
@@ -980,6 +979,10 @@ static int __net_init ip6mr_net_init(struct net *net)
 		err = -ENOMEM;
 		goto fail_mfc6_cache;
 	}
+
+#ifdef CONFIG_IPV6_PIMSM_V2
+	net->ipv6.mroute_reg_vif_num = -1;
+#endif
 	return 0;
 
 fail_mfc6_cache:
-- 
cgit v1.2.3


From 8229efdaef1e7913ae1712c0ba752f267e5fcd5e Mon Sep 17 00:00:00 2001
From: Benjamin Thery <benjamin.thery@bull.net>
Date: Wed, 10 Dec 2008 16:30:15 -0800
Subject: netns: ip6mr: enable namespace support in ipv6 multicast forwarding
 code

This last patch makes the appropriate changes to use and propagate the
network namespace where needed in IPv6 multicast forwarding code.

This consists mainly in replacing all the remaining init_net occurences
with current netns pointer retrieved from sockets, net devices or
mfc6_caches depending on the routines' contexts.

Some routines receive a new 'struct net' parameter to propagate the current
netns:
* ip6mr_get_route
* ip6mr_cache_report
* ip6mr_cache_find
* ip6mr_cache_unresolved
* mif6_add/mif6_delete
* ip6mr_mfc_add/ip6mr_mfc_delete
* ip6mr_reg_vif

All the IPv6 multicast forwarding variables moved to struct netns_ipv6 by
the previous patches are now referenced in the correct namespace.

Changelog:
==========
* Take into account the net associated to mfc6_cache when matching entries in
  mfc_unres_queue list.
* Call mroute_clean_tables() in ip6mr_net_exit() to free memory allocated
  per-namespace.
* Call dev_net_set() in ip6mr_reg_vif() to initialize dev->nd_net
  correctly.

Signed-off-by: Benjamin Thery <benjamin.thery@bull.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/mroute6.h |   3 +-
 net/ipv6/ip6mr.c        | 226 +++++++++++++++++++++++++++---------------------
 net/ipv6/route.c        |   2 +-
 3 files changed, 129 insertions(+), 102 deletions(-)

(limited to 'include')

diff --git a/include/linux/mroute6.h b/include/linux/mroute6.h
index 15d85fe12bbd..5375faca1f72 100644
--- a/include/linux/mroute6.h
+++ b/include/linux/mroute6.h
@@ -245,7 +245,8 @@ void mfc6_net_set(struct mfc6_cache *mfc, struct net *net)
 
 #ifdef __KERNEL__
 struct rtmsg;
-extern int ip6mr_get_route(struct sk_buff *skb, struct rtmsg *rtm, int nowait);
+extern int ip6mr_get_route(struct net *net, struct sk_buff *skb,
+			   struct rtmsg *rtm, int nowait);
 
 #ifdef CONFIG_IPV6_MROUTE
 static inline struct sock *mroute6_socket(struct net *net)
diff --git a/net/ipv6/ip6mr.c b/net/ipv6/ip6mr.c
index d619faf68d98..9eed2422b3e3 100644
--- a/net/ipv6/ip6mr.c
+++ b/net/ipv6/ip6mr.c
@@ -77,8 +77,10 @@ static DEFINE_SPINLOCK(mfc_unres_lock);
 static struct kmem_cache *mrt_cachep __read_mostly;
 
 static int ip6_mr_forward(struct sk_buff *skb, struct mfc6_cache *cache);
-static int ip6mr_cache_report(struct sk_buff *pkt, mifi_t mifi, int assert);
+static int ip6mr_cache_report(struct net *net, struct sk_buff *pkt,
+			      mifi_t mifi, int assert);
 static int ip6mr_fill_mroute(struct sk_buff *skb, struct mfc6_cache *c, struct rtmsg *rtm);
+static void mroute_clean_tables(struct net *net);
 
 #ifdef CONFIG_IPV6_PIMSM_V2
 static struct inet6_protocol pim6_protocol;
@@ -354,7 +356,8 @@ static int pim6_rcv(struct sk_buff *skb)
 	struct pimreghdr *pim;
 	struct ipv6hdr   *encap;
 	struct net_device  *reg_dev = NULL;
-	int reg_vif_num = init_net.ipv6.mroute_reg_vif_num;
+	struct net *net = dev_net(skb->dev);
+	int reg_vif_num = net->ipv6.mroute_reg_vif_num;
 
 	if (!pskb_may_pull(skb, sizeof(*pim) + sizeof(*encap)))
 		goto drop;
@@ -377,7 +380,7 @@ static int pim6_rcv(struct sk_buff *skb)
 
 	read_lock(&mrt_lock);
 	if (reg_vif_num >= 0)
-		reg_dev = init_net.ipv6.vif6_table[reg_vif_num].dev;
+		reg_dev = net->ipv6.vif6_table[reg_vif_num].dev;
 	if (reg_dev)
 		dev_hold(reg_dev);
 	read_unlock(&mrt_lock);
@@ -413,10 +416,13 @@ static struct inet6_protocol pim6_protocol = {
 
 static int reg_vif_xmit(struct sk_buff *skb, struct net_device *dev)
 {
+	struct net *net = dev_net(dev);
+
 	read_lock(&mrt_lock);
 	dev->stats.tx_bytes += skb->len;
 	dev->stats.tx_packets++;
-	ip6mr_cache_report(skb, init_net.ipv6.mroute_reg_vif_num, MRT6MSG_WHOLEPKT);
+	ip6mr_cache_report(net, skb, net->ipv6.mroute_reg_vif_num,
+			   MRT6MSG_WHOLEPKT);
 	read_unlock(&mrt_lock);
 	kfree_skb(skb);
 	return 0;
@@ -435,7 +441,7 @@ static void reg_vif_setup(struct net_device *dev)
 	dev->destructor		= free_netdev;
 }
 
-static struct net_device *ip6mr_reg_vif(void)
+static struct net_device *ip6mr_reg_vif(struct net *net)
 {
 	struct net_device *dev;
 
@@ -443,6 +449,8 @@ static struct net_device *ip6mr_reg_vif(void)
 	if (dev == NULL)
 		return NULL;
 
+	dev_net_set(dev, net);
+
 	if (register_netdevice(dev)) {
 		free_netdev(dev);
 		return NULL;
@@ -469,14 +477,14 @@ failure:
  *	Delete a VIF entry
  */
 
-static int mif6_delete(int vifi)
+static int mif6_delete(struct net *net, int vifi)
 {
 	struct mif_device *v;
 	struct net_device *dev;
-	if (vifi < 0 || vifi >= init_net.ipv6.maxvif)
+	if (vifi < 0 || vifi >= net->ipv6.maxvif)
 		return -EADDRNOTAVAIL;
 
-	v = &init_net.ipv6.vif6_table[vifi];
+	v = &net->ipv6.vif6_table[vifi];
 
 	write_lock_bh(&mrt_lock);
 	dev = v->dev;
@@ -488,17 +496,17 @@ static int mif6_delete(int vifi)
 	}
 
 #ifdef CONFIG_IPV6_PIMSM_V2
-	if (vifi == init_net.ipv6.mroute_reg_vif_num)
-		init_net.ipv6.mroute_reg_vif_num = -1;
+	if (vifi == net->ipv6.mroute_reg_vif_num)
+		net->ipv6.mroute_reg_vif_num = -1;
 #endif
 
-	if (vifi + 1 == init_net.ipv6.maxvif) {
+	if (vifi + 1 == net->ipv6.maxvif) {
 		int tmp;
 		for (tmp = vifi - 1; tmp >= 0; tmp--) {
-			if (MIF_EXISTS(&init_net, tmp))
+			if (MIF_EXISTS(net, tmp))
 				break;
 		}
-		init_net.ipv6.maxvif = tmp + 1;
+		net->ipv6.maxvif = tmp + 1;
 	}
 
 	write_unlock_bh(&mrt_lock);
@@ -525,8 +533,9 @@ static inline void ip6mr_cache_free(struct mfc6_cache *c)
 static void ip6mr_destroy_unres(struct mfc6_cache *c)
 {
 	struct sk_buff *skb;
+	struct net *net = mfc6_net(c);
 
-	atomic_dec(&init_net.ipv6.cache_resolve_queue_len);
+	atomic_dec(&net->ipv6.cache_resolve_queue_len);
 
 	while((skb = skb_dequeue(&c->mfc_un.unres.unresolved)) != NULL) {
 		if (ipv6_hdr(skb)->version == 0) {
@@ -535,7 +544,7 @@ static void ip6mr_destroy_unres(struct mfc6_cache *c)
 			nlh->nlmsg_len = NLMSG_LENGTH(sizeof(struct nlmsgerr));
 			skb_trim(skb, nlh->nlmsg_len);
 			((struct nlmsgerr *)NLMSG_DATA(nlh))->error = -ETIMEDOUT;
-			rtnl_unicast(skb, &init_net, NETLINK_CB(skb).pid);
+			rtnl_unicast(skb, net, NETLINK_CB(skb).pid);
 		} else
 			kfree_skb(skb);
 	}
@@ -590,13 +599,14 @@ static void ipmr_expire_process(unsigned long dummy)
 static void ip6mr_update_thresholds(struct mfc6_cache *cache, unsigned char *ttls)
 {
 	int vifi;
+	struct net *net = mfc6_net(cache);
 
 	cache->mfc_un.res.minvif = MAXMIFS;
 	cache->mfc_un.res.maxvif = 0;
 	memset(cache->mfc_un.res.ttls, 255, MAXMIFS);
 
-	for (vifi = 0; vifi < init_net.ipv6.maxvif; vifi++) {
-		if (MIF_EXISTS(&init_net, vifi) &&
+	for (vifi = 0; vifi < net->ipv6.maxvif; vifi++) {
+		if (MIF_EXISTS(net, vifi) &&
 		    ttls[vifi] && ttls[vifi] < 255) {
 			cache->mfc_un.res.ttls[vifi] = ttls[vifi];
 			if (cache->mfc_un.res.minvif > vifi)
@@ -607,15 +617,15 @@ static void ip6mr_update_thresholds(struct mfc6_cache *cache, unsigned char *ttl
 	}
 }
 
-static int mif6_add(struct mif6ctl *vifc, int mrtsock)
+static int mif6_add(struct net *net, struct mif6ctl *vifc, int mrtsock)
 {
 	int vifi = vifc->mif6c_mifi;
-	struct mif_device *v = &init_net.ipv6.vif6_table[vifi];
+	struct mif_device *v = &net->ipv6.vif6_table[vifi];
 	struct net_device *dev;
 	int err;
 
 	/* Is vif busy ? */
-	if (MIF_EXISTS(&init_net, vifi))
+	if (MIF_EXISTS(net, vifi))
 		return -EADDRINUSE;
 
 	switch (vifc->mif6c_flags) {
@@ -625,9 +635,9 @@ static int mif6_add(struct mif6ctl *vifc, int mrtsock)
 		 * Special Purpose VIF in PIM
 		 * All the packets will be sent to the daemon
 		 */
-		if (init_net.ipv6.mroute_reg_vif_num >= 0)
+		if (net->ipv6.mroute_reg_vif_num >= 0)
 			return -EADDRINUSE;
-		dev = ip6mr_reg_vif();
+		dev = ip6mr_reg_vif(net);
 		if (!dev)
 			return -ENOBUFS;
 		err = dev_set_allmulti(dev, 1);
@@ -639,7 +649,7 @@ static int mif6_add(struct mif6ctl *vifc, int mrtsock)
 		break;
 #endif
 	case 0:
-		dev = dev_get_by_index(&init_net, vifc->mif6c_pifi);
+		dev = dev_get_by_index(net, vifc->mif6c_pifi);
 		if (!dev)
 			return -EADDRNOTAVAIL;
 		err = dev_set_allmulti(dev, 1);
@@ -673,20 +683,22 @@ static int mif6_add(struct mif6ctl *vifc, int mrtsock)
 	v->dev = dev;
 #ifdef CONFIG_IPV6_PIMSM_V2
 	if (v->flags & MIFF_REGISTER)
-		init_net.ipv6.mroute_reg_vif_num = vifi;
+		net->ipv6.mroute_reg_vif_num = vifi;
 #endif
-	if (vifi + 1 > init_net.ipv6.maxvif)
-		init_net.ipv6.maxvif = vifi + 1;
+	if (vifi + 1 > net->ipv6.maxvif)
+		net->ipv6.maxvif = vifi + 1;
 	write_unlock_bh(&mrt_lock);
 	return 0;
 }
 
-static struct mfc6_cache *ip6mr_cache_find(struct in6_addr *origin, struct in6_addr *mcastgrp)
+static struct mfc6_cache *ip6mr_cache_find(struct net *net,
+					   struct in6_addr *origin,
+					   struct in6_addr *mcastgrp)
 {
 	int line = MFC6_HASH(mcastgrp, origin);
 	struct mfc6_cache *c;
 
-	for (c = init_net.ipv6.mfc6_cache_array[line]; c; c = c->next) {
+	for (c = net->ipv6.mfc6_cache_array[line]; c; c = c->next) {
 		if (ipv6_addr_equal(&c->mf6c_origin, origin) &&
 		    ipv6_addr_equal(&c->mf6c_mcastgrp, mcastgrp))
 			break;
@@ -743,7 +755,7 @@ static void ip6mr_cache_resolve(struct mfc6_cache *uc, struct mfc6_cache *c)
 				skb_trim(skb, nlh->nlmsg_len);
 				((struct nlmsgerr *)NLMSG_DATA(nlh))->error = -EMSGSIZE;
 			}
-			err = rtnl_unicast(skb, &init_net, NETLINK_CB(skb).pid);
+			err = rtnl_unicast(skb, mfc6_net(uc), NETLINK_CB(skb).pid);
 		} else
 			ip6_mr_forward(skb, c);
 	}
@@ -756,7 +768,8 @@ static void ip6mr_cache_resolve(struct mfc6_cache *uc, struct mfc6_cache *c)
  *	Called under mrt_lock.
  */
 
-static int ip6mr_cache_report(struct sk_buff *pkt, mifi_t mifi, int assert)
+static int ip6mr_cache_report(struct net *net, struct sk_buff *pkt, mifi_t mifi,
+			      int assert)
 {
 	struct sk_buff *skb;
 	struct mrt6msg *msg;
@@ -792,7 +805,7 @@ static int ip6mr_cache_report(struct sk_buff *pkt, mifi_t mifi, int assert)
 		msg = (struct mrt6msg *)skb_transport_header(skb);
 		msg->im6_mbz = 0;
 		msg->im6_msgtype = MRT6MSG_WHOLEPKT;
-		msg->im6_mif = init_net.ipv6.mroute_reg_vif_num;
+		msg->im6_mif = net->ipv6.mroute_reg_vif_num;
 		msg->im6_pad = 0;
 		ipv6_addr_copy(&msg->im6_src, &ipv6_hdr(pkt)->saddr);
 		ipv6_addr_copy(&msg->im6_dst, &ipv6_hdr(pkt)->daddr);
@@ -829,7 +842,7 @@ static int ip6mr_cache_report(struct sk_buff *pkt, mifi_t mifi, int assert)
 	skb_pull(skb, sizeof(struct ipv6hdr));
 	}
 
-	if (init_net.ipv6.mroute6_sk == NULL) {
+	if (net->ipv6.mroute6_sk == NULL) {
 		kfree_skb(skb);
 		return -EINVAL;
 	}
@@ -837,7 +850,7 @@ static int ip6mr_cache_report(struct sk_buff *pkt, mifi_t mifi, int assert)
 	/*
 	 *	Deliver to user space multicast routing algorithms
 	 */
-	ret = sock_queue_rcv_skb(init_net.ipv6.mroute6_sk, skb);
+	ret = sock_queue_rcv_skb(net->ipv6.mroute6_sk, skb);
 	if (ret < 0) {
 		if (net_ratelimit())
 			printk(KERN_WARNING "mroute6: pending queue full, dropping entries.\n");
@@ -852,14 +865,14 @@ static int ip6mr_cache_report(struct sk_buff *pkt, mifi_t mifi, int assert)
  */
 
 static int
-ip6mr_cache_unresolved(mifi_t mifi, struct sk_buff *skb)
+ip6mr_cache_unresolved(struct net *net, mifi_t mifi, struct sk_buff *skb)
 {
 	int err;
 	struct mfc6_cache *c;
 
 	spin_lock_bh(&mfc_unres_lock);
 	for (c = mfc_unres_queue; c; c = c->next) {
-		if (net_eq(mfc6_net(c), &init_net) &&
+		if (net_eq(mfc6_net(c), net) &&
 		    ipv6_addr_equal(&c->mf6c_mcastgrp, &ipv6_hdr(skb)->daddr) &&
 		    ipv6_addr_equal(&c->mf6c_origin, &ipv6_hdr(skb)->saddr))
 			break;
@@ -870,8 +883,8 @@ ip6mr_cache_unresolved(mifi_t mifi, struct sk_buff *skb)
 		 *	Create a new entry if allowable
 		 */
 
-		if (atomic_read(&init_net.ipv6.cache_resolve_queue_len) >= 10 ||
-		    (c = ip6mr_cache_alloc_unres(&init_net)) == NULL) {
+		if (atomic_read(&net->ipv6.cache_resolve_queue_len) >= 10 ||
+		    (c = ip6mr_cache_alloc_unres(net)) == NULL) {
 			spin_unlock_bh(&mfc_unres_lock);
 
 			kfree_skb(skb);
@@ -888,7 +901,8 @@ ip6mr_cache_unresolved(mifi_t mifi, struct sk_buff *skb)
 		/*
 		 *	Reflect first query at pim6sd
 		 */
-		if ((err = ip6mr_cache_report(skb, mifi, MRT6MSG_NOCACHE)) < 0) {
+		err = ip6mr_cache_report(net, skb, mifi, MRT6MSG_NOCACHE);
+		if (err < 0) {
 			/* If the report failed throw the cache entry
 			   out - Brad Parker
 			 */
@@ -899,7 +913,7 @@ ip6mr_cache_unresolved(mifi_t mifi, struct sk_buff *skb)
 			return err;
 		}
 
-		atomic_inc(&init_net.ipv6.cache_resolve_queue_len);
+		atomic_inc(&net->ipv6.cache_resolve_queue_len);
 		c->next = mfc_unres_queue;
 		mfc_unres_queue = c;
 
@@ -925,14 +939,14 @@ ip6mr_cache_unresolved(mifi_t mifi, struct sk_buff *skb)
  *	MFC6 cache manipulation by user space
  */
 
-static int ip6mr_mfc_delete(struct mf6cctl *mfc)
+static int ip6mr_mfc_delete(struct net *net, struct mf6cctl *mfc)
 {
 	int line;
 	struct mfc6_cache *c, **cp;
 
 	line = MFC6_HASH(&mfc->mf6cc_mcastgrp.sin6_addr, &mfc->mf6cc_origin.sin6_addr);
 
-	for (cp = &init_net.ipv6.mfc6_cache_array[line];
+	for (cp = &net->ipv6.mfc6_cache_array[line];
 	     (c = *cp) != NULL; cp = &c->next) {
 		if (ipv6_addr_equal(&c->mf6c_origin, &mfc->mf6cc_origin.sin6_addr) &&
 		    ipv6_addr_equal(&c->mf6c_mcastgrp, &mfc->mf6cc_mcastgrp.sin6_addr)) {
@@ -951,19 +965,17 @@ static int ip6mr_device_event(struct notifier_block *this,
 			      unsigned long event, void *ptr)
 {
 	struct net_device *dev = ptr;
+	struct net *net = dev_net(dev);
 	struct mif_device *v;
 	int ct;
 
-	if (!net_eq(dev_net(dev), &init_net))
-		return NOTIFY_DONE;
-
 	if (event != NETDEV_UNREGISTER)
 		return NOTIFY_DONE;
 
-	v = &init_net.ipv6.vif6_table[0];
-	for (ct = 0; ct < init_net.ipv6.maxvif; ct++, v++) {
+	v = &net->ipv6.vif6_table[0];
+	for (ct = 0; ct < net->ipv6.maxvif; ct++, v++) {
 		if (v->dev == dev)
-			mif6_delete(ct);
+			mif6_delete(net, ct);
 	}
 	return NOTIFY_DONE;
 }
@@ -1026,6 +1038,7 @@ static void __net_exit ip6mr_net_exit(struct net *net)
 	proc_net_remove(net, "ip6_mr_cache");
 	proc_net_remove(net, "ip6_mr_vif");
 #endif
+	mroute_clean_tables(net);
 	kfree(net->ipv6.mfc6_cache_array);
 	kfree(net->ipv6.vif6_table);
 }
@@ -1071,7 +1084,7 @@ void ip6_mr_cleanup(void)
 	kmem_cache_destroy(mrt_cachep);
 }
 
-static int ip6mr_mfc_add(struct mf6cctl *mfc, int mrtsock)
+static int ip6mr_mfc_add(struct net *net, struct mf6cctl *mfc, int mrtsock)
 {
 	int line;
 	struct mfc6_cache *uc, *c, **cp;
@@ -1087,7 +1100,7 @@ static int ip6mr_mfc_add(struct mf6cctl *mfc, int mrtsock)
 
 	line = MFC6_HASH(&mfc->mf6cc_mcastgrp.sin6_addr, &mfc->mf6cc_origin.sin6_addr);
 
-	for (cp = &init_net.ipv6.mfc6_cache_array[line];
+	for (cp = &net->ipv6.mfc6_cache_array[line];
 	     (c = *cp) != NULL; cp = &c->next) {
 		if (ipv6_addr_equal(&c->mf6c_origin, &mfc->mf6cc_origin.sin6_addr) &&
 		    ipv6_addr_equal(&c->mf6c_mcastgrp, &mfc->mf6cc_mcastgrp.sin6_addr))
@@ -1107,7 +1120,7 @@ static int ip6mr_mfc_add(struct mf6cctl *mfc, int mrtsock)
 	if (!ipv6_addr_is_multicast(&mfc->mf6cc_mcastgrp.sin6_addr))
 		return -EINVAL;
 
-	c = ip6mr_cache_alloc(&init_net);
+	c = ip6mr_cache_alloc(net);
 	if (c == NULL)
 		return -ENOMEM;
 
@@ -1119,8 +1132,8 @@ static int ip6mr_mfc_add(struct mf6cctl *mfc, int mrtsock)
 		c->mfc_flags |= MFC_STATIC;
 
 	write_lock_bh(&mrt_lock);
-	c->next = init_net.ipv6.mfc6_cache_array[line];
-	init_net.ipv6.mfc6_cache_array[line] = c;
+	c->next = net->ipv6.mfc6_cache_array[line];
+	net->ipv6.mfc6_cache_array[line] = c;
 	write_unlock_bh(&mrt_lock);
 
 	/*
@@ -1130,11 +1143,11 @@ static int ip6mr_mfc_add(struct mf6cctl *mfc, int mrtsock)
 	spin_lock_bh(&mfc_unres_lock);
 	for (cp = &mfc_unres_queue; (uc = *cp) != NULL;
 	     cp = &uc->next) {
-		if (net_eq(mfc6_net(uc), &init_net) &&
+		if (net_eq(mfc6_net(uc), net) &&
 		    ipv6_addr_equal(&uc->mf6c_origin, &c->mf6c_origin) &&
 		    ipv6_addr_equal(&uc->mf6c_mcastgrp, &c->mf6c_mcastgrp)) {
 			*cp = uc->next;
-			atomic_dec(&init_net.ipv6.cache_resolve_queue_len);
+			atomic_dec(&net->ipv6.cache_resolve_queue_len);
 			break;
 		}
 	}
@@ -1153,16 +1166,16 @@ static int ip6mr_mfc_add(struct mf6cctl *mfc, int mrtsock)
  *	Close the multicast socket, and clear the vif tables etc
  */
 
-static void mroute_clean_tables(struct sock *sk)
+static void mroute_clean_tables(struct net *net)
 {
 	int i;
 
 	/*
 	 *	Shut down all active vif entries
 	 */
-	for (i = 0; i < init_net.ipv6.maxvif; i++) {
-		if (!(init_net.ipv6.vif6_table[i].flags & VIFF_STATIC))
-			mif6_delete(i);
+	for (i = 0; i < net->ipv6.maxvif; i++) {
+		if (!(net->ipv6.vif6_table[i].flags & VIFF_STATIC))
+			mif6_delete(net, i);
 	}
 
 	/*
@@ -1171,7 +1184,7 @@ static void mroute_clean_tables(struct sock *sk)
 	for (i = 0; i < MFC6_LINES; i++) {
 		struct mfc6_cache *c, **cp;
 
-		cp = &init_net.ipv6.mfc6_cache_array[i];
+		cp = &net->ipv6.mfc6_cache_array[i];
 		while ((c = *cp) != NULL) {
 			if (c->mfc_flags & MFC_STATIC) {
 				cp = &c->next;
@@ -1185,13 +1198,13 @@ static void mroute_clean_tables(struct sock *sk)
 		}
 	}
 
-	if (atomic_read(&init_net.ipv6.cache_resolve_queue_len) != 0) {
+	if (atomic_read(&net->ipv6.cache_resolve_queue_len) != 0) {
 		struct mfc6_cache *c, **cp;
 
 		spin_lock_bh(&mfc_unres_lock);
 		cp = &mfc_unres_queue;
 		while ((c = *cp) != NULL) {
-			if (!net_eq(mfc6_net(c), &init_net)) {
+			if (!net_eq(mfc6_net(c), net)) {
 				cp = &c->next;
 				continue;
 			}
@@ -1205,11 +1218,12 @@ static void mroute_clean_tables(struct sock *sk)
 static int ip6mr_sk_init(struct sock *sk)
 {
 	int err = 0;
+	struct net *net = sock_net(sk);
 
 	rtnl_lock();
 	write_lock_bh(&mrt_lock);
-	if (likely(init_net.ipv6.mroute6_sk == NULL))
-		init_net.ipv6.mroute6_sk = sk;
+	if (likely(net->ipv6.mroute6_sk == NULL))
+		net->ipv6.mroute6_sk = sk;
 	else
 		err = -EADDRINUSE;
 	write_unlock_bh(&mrt_lock);
@@ -1222,14 +1236,15 @@ static int ip6mr_sk_init(struct sock *sk)
 int ip6mr_sk_done(struct sock *sk)
 {
 	int err = 0;
+	struct net *net = sock_net(sk);
 
 	rtnl_lock();
-	if (sk == init_net.ipv6.mroute6_sk) {
+	if (sk == net->ipv6.mroute6_sk) {
 		write_lock_bh(&mrt_lock);
-		init_net.ipv6.mroute6_sk = NULL;
+		net->ipv6.mroute6_sk = NULL;
 		write_unlock_bh(&mrt_lock);
 
-		mroute_clean_tables(sk);
+		mroute_clean_tables(net);
 	} else
 		err = -EACCES;
 	rtnl_unlock();
@@ -1250,9 +1265,10 @@ int ip6_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, int
 	struct mif6ctl vif;
 	struct mf6cctl mfc;
 	mifi_t mifi;
+	struct net *net = sock_net(sk);
 
 	if (optname != MRT6_INIT) {
-		if (sk != init_net.ipv6.mroute6_sk && !capable(CAP_NET_ADMIN))
+		if (sk != net->ipv6.mroute6_sk && !capable(CAP_NET_ADMIN))
 			return -EACCES;
 	}
 
@@ -1277,7 +1293,7 @@ int ip6_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, int
 		if (vif.mif6c_mifi >= MAXMIFS)
 			return -ENFILE;
 		rtnl_lock();
-		ret = mif6_add(&vif, sk == init_net.ipv6.mroute6_sk);
+		ret = mif6_add(net, &vif, sk == net->ipv6.mroute6_sk);
 		rtnl_unlock();
 		return ret;
 
@@ -1287,7 +1303,7 @@ int ip6_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, int
 		if (copy_from_user(&mifi, optval, sizeof(mifi_t)))
 			return -EFAULT;
 		rtnl_lock();
-		ret = mif6_delete(mifi);
+		ret = mif6_delete(net, mifi);
 		rtnl_unlock();
 		return ret;
 
@@ -1303,9 +1319,10 @@ int ip6_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, int
 			return -EFAULT;
 		rtnl_lock();
 		if (optname == MRT6_DEL_MFC)
-			ret = ip6mr_mfc_delete(&mfc);
+			ret = ip6mr_mfc_delete(net, &mfc);
 		else
-			ret = ip6mr_mfc_add(&mfc, sk == init_net.ipv6.mroute6_sk);
+			ret = ip6mr_mfc_add(net, &mfc,
+					    sk == net->ipv6.mroute6_sk);
 		rtnl_unlock();
 		return ret;
 
@@ -1317,7 +1334,7 @@ int ip6_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, int
 		int v;
 		if (get_user(v, (int __user *)optval))
 			return -EFAULT;
-		init_net.ipv6.mroute_do_assert = !!v;
+		net->ipv6.mroute_do_assert = !!v;
 		return 0;
 	}
 
@@ -1330,10 +1347,10 @@ int ip6_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, int
 		v = !!v;
 		rtnl_lock();
 		ret = 0;
-		if (v != init_net.ipv6.mroute_do_pim) {
-			init_net.ipv6.mroute_do_pim = v;
-			init_net.ipv6.mroute_do_assert = v;
-			if (init_net.ipv6.mroute_do_pim)
+		if (v != net->ipv6.mroute_do_pim) {
+			net->ipv6.mroute_do_pim = v;
+			net->ipv6.mroute_do_assert = v;
+			if (net->ipv6.mroute_do_pim)
 				ret = inet6_add_protocol(&pim6_protocol,
 							 IPPROTO_PIM);
 			else
@@ -1365,6 +1382,7 @@ int ip6_mroute_getsockopt(struct sock *sk, int optname, char __user *optval,
 {
 	int olr;
 	int val;
+	struct net *net = sock_net(sk);
 
 	switch (optname) {
 	case MRT6_VERSION:
@@ -1372,11 +1390,11 @@ int ip6_mroute_getsockopt(struct sock *sk, int optname, char __user *optval,
 		break;
 #ifdef CONFIG_IPV6_PIMSM_V2
 	case MRT6_PIM:
-		val = init_net.ipv6.mroute_do_pim;
+		val = net->ipv6.mroute_do_pim;
 		break;
 #endif
 	case MRT6_ASSERT:
-		val = init_net.ipv6.mroute_do_assert;
+		val = net->ipv6.mroute_do_assert;
 		break;
 	default:
 		return -ENOPROTOOPT;
@@ -1406,16 +1424,17 @@ int ip6mr_ioctl(struct sock *sk, int cmd, void __user *arg)
 	struct sioc_mif_req6 vr;
 	struct mif_device *vif;
 	struct mfc6_cache *c;
+	struct net *net = sock_net(sk);
 
 	switch (cmd) {
 	case SIOCGETMIFCNT_IN6:
 		if (copy_from_user(&vr, arg, sizeof(vr)))
 			return -EFAULT;
-		if (vr.mifi >= init_net.ipv6.maxvif)
+		if (vr.mifi >= net->ipv6.maxvif)
 			return -EINVAL;
 		read_lock(&mrt_lock);
-		vif = &init_net.ipv6.vif6_table[vr.mifi];
-		if (MIF_EXISTS(&init_net, vr.mifi)) {
+		vif = &net->ipv6.vif6_table[vr.mifi];
+		if (MIF_EXISTS(net, vr.mifi)) {
 			vr.icount = vif->pkt_in;
 			vr.ocount = vif->pkt_out;
 			vr.ibytes = vif->bytes_in;
@@ -1433,7 +1452,7 @@ int ip6mr_ioctl(struct sock *sk, int cmd, void __user *arg)
 			return -EFAULT;
 
 		read_lock(&mrt_lock);
-		c = ip6mr_cache_find(&sr.src.sin6_addr, &sr.grp.sin6_addr);
+		c = ip6mr_cache_find(net, &sr.src.sin6_addr, &sr.grp.sin6_addr);
 		if (c) {
 			sr.pktcnt = c->mfc_un.res.pkt;
 			sr.bytecnt = c->mfc_un.res.bytes;
@@ -1466,7 +1485,8 @@ static inline int ip6mr_forward2_finish(struct sk_buff *skb)
 static int ip6mr_forward2(struct sk_buff *skb, struct mfc6_cache *c, int vifi)
 {
 	struct ipv6hdr *ipv6h;
-	struct mif_device *vif = &init_net.ipv6.vif6_table[vifi];
+	struct net *net = mfc6_net(c);
+	struct mif_device *vif = &net->ipv6.vif6_table[vifi];
 	struct net_device *dev;
 	struct dst_entry *dst;
 	struct flowi fl;
@@ -1480,7 +1500,7 @@ static int ip6mr_forward2(struct sk_buff *skb, struct mfc6_cache *c, int vifi)
 		vif->bytes_out += skb->len;
 		vif->dev->stats.tx_bytes += skb->len;
 		vif->dev->stats.tx_packets++;
-		ip6mr_cache_report(skb, vifi, MRT6MSG_WHOLEPKT);
+		ip6mr_cache_report(net, skb, vifi, MRT6MSG_WHOLEPKT);
 		kfree_skb(skb);
 		return 0;
 	}
@@ -1495,7 +1515,7 @@ static int ip6mr_forward2(struct sk_buff *skb, struct mfc6_cache *c, int vifi)
 		}
 	};
 
-	dst = ip6_route_output(&init_net, NULL, &fl);
+	dst = ip6_route_output(net, NULL, &fl);
 	if (!dst)
 		goto out_free;
 
@@ -1538,9 +1558,10 @@ out_free:
 
 static int ip6mr_find_vif(struct net_device *dev)
 {
+	struct net *net = dev_net(dev);
 	int ct;
-	for (ct = init_net.ipv6.maxvif - 1; ct >= 0; ct--) {
-		if (init_net.ipv6.vif6_table[ct].dev == dev)
+	for (ct = net->ipv6.maxvif - 1; ct >= 0; ct--) {
+		if (net->ipv6.vif6_table[ct].dev == dev)
 			break;
 	}
 	return ct;
@@ -1550,6 +1571,7 @@ static int ip6_mr_forward(struct sk_buff *skb, struct mfc6_cache *cache)
 {
 	int psend = -1;
 	int vif, ct;
+	struct net *net = mfc6_net(cache);
 
 	vif = cache->mf6c_parent;
 	cache->mfc_un.res.pkt++;
@@ -1558,30 +1580,30 @@ static int ip6_mr_forward(struct sk_buff *skb, struct mfc6_cache *cache)
 	/*
 	 * Wrong interface: drop packet and (maybe) send PIM assert.
 	 */
-	if (init_net.ipv6.vif6_table[vif].dev != skb->dev) {
+	if (net->ipv6.vif6_table[vif].dev != skb->dev) {
 		int true_vifi;
 
 		cache->mfc_un.res.wrong_if++;
 		true_vifi = ip6mr_find_vif(skb->dev);
 
-		if (true_vifi >= 0 && init_net.ipv6.mroute_do_assert &&
+		if (true_vifi >= 0 && net->ipv6.mroute_do_assert &&
 		    /* pimsm uses asserts, when switching from RPT to SPT,
 		       so that we cannot check that packet arrived on an oif.
 		       It is bad, but otherwise we would need to move pretty
 		       large chunk of pimd to kernel. Ough... --ANK
 		     */
-		    (init_net.ipv6.mroute_do_pim ||
+		    (net->ipv6.mroute_do_pim ||
 		     cache->mfc_un.res.ttls[true_vifi] < 255) &&
 		    time_after(jiffies,
 			       cache->mfc_un.res.last_assert + MFC_ASSERT_THRESH)) {
 			cache->mfc_un.res.last_assert = jiffies;
-			ip6mr_cache_report(skb, true_vifi, MRT6MSG_WRONGMIF);
+			ip6mr_cache_report(net, skb, true_vifi, MRT6MSG_WRONGMIF);
 		}
 		goto dont_forward;
 	}
 
-	init_net.ipv6.vif6_table[vif].pkt_in++;
-	init_net.ipv6.vif6_table[vif].bytes_in += skb->len;
+	net->ipv6.vif6_table[vif].pkt_in++;
+	net->ipv6.vif6_table[vif].bytes_in += skb->len;
 
 	/*
 	 *	Forward the frame
@@ -1614,9 +1636,11 @@ dont_forward:
 int ip6_mr_input(struct sk_buff *skb)
 {
 	struct mfc6_cache *cache;
+	struct net *net = dev_net(skb->dev);
 
 	read_lock(&mrt_lock);
-	cache = ip6mr_cache_find(&ipv6_hdr(skb)->saddr, &ipv6_hdr(skb)->daddr);
+	cache = ip6mr_cache_find(net,
+				 &ipv6_hdr(skb)->saddr, &ipv6_hdr(skb)->daddr);
 
 	/*
 	 *	No usable cache entry
@@ -1626,7 +1650,7 @@ int ip6_mr_input(struct sk_buff *skb)
 
 		vif = ip6mr_find_vif(skb->dev);
 		if (vif >= 0) {
-			int err = ip6mr_cache_unresolved(vif, skb);
+			int err = ip6mr_cache_unresolved(net, vif, skb);
 			read_unlock(&mrt_lock);
 
 			return err;
@@ -1649,7 +1673,8 @@ ip6mr_fill_mroute(struct sk_buff *skb, struct mfc6_cache *c, struct rtmsg *rtm)
 {
 	int ct;
 	struct rtnexthop *nhp;
-	struct net_device *dev = init_net.ipv6.vif6_table[c->mf6c_parent].dev;
+	struct net *net = mfc6_net(c);
+	struct net_device *dev = net->ipv6.vif6_table[c->mf6c_parent].dev;
 	u8 *b = skb_tail_pointer(skb);
 	struct rtattr *mp_head;
 
@@ -1665,7 +1690,7 @@ ip6mr_fill_mroute(struct sk_buff *skb, struct mfc6_cache *c, struct rtmsg *rtm)
 			nhp = (struct rtnexthop *)skb_put(skb, RTA_ALIGN(sizeof(*nhp)));
 			nhp->rtnh_flags = 0;
 			nhp->rtnh_hops = c->mfc_un.res.ttls[ct];
-			nhp->rtnh_ifindex = init_net.ipv6.vif6_table[ct].dev->ifindex;
+			nhp->rtnh_ifindex = net->ipv6.vif6_table[ct].dev->ifindex;
 			nhp->rtnh_len = sizeof(*nhp);
 		}
 	}
@@ -1679,14 +1704,15 @@ rtattr_failure:
 	return -EMSGSIZE;
 }
 
-int ip6mr_get_route(struct sk_buff *skb, struct rtmsg *rtm, int nowait)
+int ip6mr_get_route(struct net *net,
+		    struct sk_buff *skb, struct rtmsg *rtm, int nowait)
 {
 	int err;
 	struct mfc6_cache *cache;
 	struct rt6_info *rt = (struct rt6_info *)skb->dst;
 
 	read_lock(&mrt_lock);
-	cache = ip6mr_cache_find(&rt->rt6i_src.addr, &rt->rt6i_dst.addr);
+	cache = ip6mr_cache_find(net, &rt->rt6i_src.addr, &rt->rt6i_dst.addr);
 
 	if (!cache) {
 		struct sk_buff *skb2;
@@ -1729,7 +1755,7 @@ int ip6mr_get_route(struct sk_buff *skb, struct rtmsg *rtm, int nowait)
 		ipv6_addr_copy(&iph->saddr, &rt->rt6i_src.addr);
 		ipv6_addr_copy(&iph->daddr, &rt->rt6i_dst.addr);
 
-		err = ip6mr_cache_unresolved(vif, skb2);
+		err = ip6mr_cache_unresolved(net, vif, skb2);
 		read_unlock(&mrt_lock);
 
 		return err;
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index 9da1ece466a2..18c486cf4987 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -2194,7 +2194,7 @@ static int rt6_fill_node(struct net *net,
 	if (iif) {
 #ifdef CONFIG_IPV6_MROUTE
 		if (ipv6_addr_is_multicast(&rt->rt6i_dst.addr)) {
-			int err = ip6mr_get_route(skb, rtm, nowait);
+			int err = ip6mr_get_route(net, skb, rtm, nowait);
 			if (err <= 0) {
 				if (!nowait) {
 					if (err == 0)
-- 
cgit v1.2.3


From 6c34bc2976b30dc8b56392c020e25bae1f363cab Mon Sep 17 00:00:00 2001
From: Linus Torvalds <torvalds@linux-foundation.org>
Date: Wed, 10 Dec 2008 09:26:17 -0800
Subject: Revert "radeonfb: accelerate imageblit and other improvements"

This reverts commit b1ee26bab14886350ba12a5c10cbc0696ac679bf, along with
the "fixes" for it that all just caused problems:

 - c4c6fa9891f3d1bcaae4f39fb751d5302965b566 "radeonfb: fix problem with
   color expansion & alignment"

 - f3179748a157c21d44d929fd3779421ebfbeaa93 "radeonfb: Disable new color
   expand acceleration unless explicitely enabled"

because even when disabled, it breaks for people. See

	http://bugzilla.kernel.org/show_bug.cgi?id=12191

for the latest example.

Acked-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Acked-by: David S. Miller <davem@davemloft.net>
Cc: Krzysztof Halasa <khc@pm.waw.pl>
Cc: James Cloos <cloos@jhcloos.com>
Cc: "Rafael J. Wysocki" <rjw@sisk.pl>
Cc: Krzysztof Helt <krzysztof.h1@poczta.fm>
Cc: Jean-Luc Coulon <jean.luc.coulon@gmail.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/video/aty/radeon_accel.c     | 295 ++++++++++-------------------------
 drivers/video/aty/radeon_backlight.c |   2 +-
 drivers/video/aty/radeon_base.c      |  46 ++----
 drivers/video/aty/radeon_pm.c        |   6 +-
 drivers/video/aty/radeonfb.h         |  40 ++---
 include/video/radeon.h               |  18 +--
 6 files changed, 127 insertions(+), 280 deletions(-)

(limited to 'include')

diff --git a/drivers/video/aty/radeon_accel.c b/drivers/video/aty/radeon_accel.c
index 8da5e5ab8547..a469a3d6edcb 100644
--- a/drivers/video/aty/radeon_accel.c
+++ b/drivers/video/aty/radeon_accel.c
@@ -5,61 +5,61 @@
  * --dte
  */
 
-#define FLUSH_CACHE_WORKAROUND	1
-
-void radeon_fifo_update_and_wait(struct radeonfb_info *rinfo, int entries)
+static void radeon_fixup_offset(struct radeonfb_info *rinfo)
 {
-	int i;
+	u32 local_base;
+
+	/* *** Ugly workaround *** */
+	/*
+	 * On some platforms, the video memory is mapped at 0 in radeon chip space
+	 * (like PPCs) by the firmware. X will always move it up so that it's seen
+	 * by the chip to be at the same address as the PCI BAR.
+	 * That means that when switching back from X, there is a mismatch between
+	 * the offsets programmed into the engine. This means that potentially,
+	 * accel operations done before radeonfb has a chance to re-init the engine
+	 * will have incorrect offsets, and potentially trash system memory !
+	 *
+	 * The correct fix is for fbcon to never call any accel op before the engine
+	 * has properly been re-initialized (by a call to set_var), but this is a
+	 * complex fix. This workaround in the meantime, called before every accel
+	 * operation, makes sure the offsets are in sync.
+	 */
 
-	for (i=0; i<2000000; i++) {
-		rinfo->fifo_free = INREG(RBBM_STATUS) & 0x7f;
-		if (rinfo->fifo_free >= entries)
-			return;
-		udelay(10);
-	}
-	printk(KERN_ERR "radeonfb: FIFO Timeout !\n");
-	/* XXX Todo: attempt to reset the engine */
-}
+	radeon_fifo_wait (1);
+	local_base = INREG(MC_FB_LOCATION) << 16;
+	if (local_base == rinfo->fb_local_base)
+		return;
 
-static inline void radeon_fifo_wait(struct radeonfb_info *rinfo, int entries)
-{
-	if (entries <= rinfo->fifo_free)
-		rinfo->fifo_free -= entries;
-	else
-		radeon_fifo_update_and_wait(rinfo, entries);
-}
+	rinfo->fb_local_base = local_base;
 
-static inline void radeonfb_set_creg(struct radeonfb_info *rinfo, u32 reg,
-				     u32 *cache, u32 new_val)
-{
-	if (new_val == *cache)
-		return;
-	*cache = new_val;
-	radeon_fifo_wait(rinfo, 1);
-	OUTREG(reg, new_val);
+	radeon_fifo_wait (3);
+	OUTREG(DEFAULT_PITCH_OFFSET, (rinfo->pitch << 0x16) |
+				     (rinfo->fb_local_base >> 10));
+	OUTREG(DST_PITCH_OFFSET, (rinfo->pitch << 0x16) | (rinfo->fb_local_base >> 10));
+	OUTREG(SRC_PITCH_OFFSET, (rinfo->pitch << 0x16) | (rinfo->fb_local_base >> 10));
 }
 
 static void radeonfb_prim_fillrect(struct radeonfb_info *rinfo, 
 				   const struct fb_fillrect *region)
 {
-	radeonfb_set_creg(rinfo, DP_GUI_MASTER_CNTL, &rinfo->dp_gui_mc_cache,
-			  rinfo->dp_gui_mc_base | GMC_BRUSH_SOLID_COLOR | ROP3_P);
-	radeonfb_set_creg(rinfo, DP_CNTL, &rinfo->dp_cntl_cache,
-			  DST_X_LEFT_TO_RIGHT | DST_Y_TOP_TO_BOTTOM);
-	radeonfb_set_creg(rinfo, DP_BRUSH_FRGD_CLR, &rinfo->dp_brush_fg_cache,
-			  region->color);
-
-	/* Ensure the dst cache is flushed and the engine idle before
-	 * issuing the operation.
-	 *
-	 * This works around engine lockups on some cards
-	 */
-#if FLUSH_CACHE_WORKAROUND
-	radeon_fifo_wait(rinfo, 2);
+	radeon_fifo_wait(4);  
+  
+	OUTREG(DP_GUI_MASTER_CNTL,  
+		rinfo->dp_gui_master_cntl  /* contains, like GMC_DST_32BPP */
+                | GMC_BRUSH_SOLID_COLOR
+                | ROP3_P);
+	if (radeon_get_dstbpp(rinfo->depth) != DST_8BPP)
+		OUTREG(DP_BRUSH_FRGD_CLR, rinfo->pseudo_palette[region->color]);
+	else
+		OUTREG(DP_BRUSH_FRGD_CLR, region->color);
+	OUTREG(DP_WRITE_MSK, 0xffffffff);
+	OUTREG(DP_CNTL, (DST_X_LEFT_TO_RIGHT | DST_Y_TOP_TO_BOTTOM));
+
+	radeon_fifo_wait(2);
 	OUTREG(DSTCACHE_CTLSTAT, RB2D_DC_FLUSH_ALL);
 	OUTREG(WAIT_UNTIL, (WAIT_2D_IDLECLEAN | WAIT_DMA_GUI_IDLE));
-#endif
-	radeon_fifo_wait(rinfo, 2);
+
+	radeon_fifo_wait(2);  
 	OUTREG(DST_Y_X, (region->dy << 16) | region->dx);
 	OUTREG(DST_WIDTH_HEIGHT, (region->width << 16) | region->height);
 }
@@ -70,14 +70,15 @@ void radeonfb_fillrect(struct fb_info *info, const struct fb_fillrect *region)
 	struct fb_fillrect modded;
 	int vxres, vyres;
   
-	WARN_ON(rinfo->gfx_mode);
-	if (info->state != FBINFO_STATE_RUNNING || rinfo->gfx_mode)
+	if (info->state != FBINFO_STATE_RUNNING)
 		return;
 	if (info->flags & FBINFO_HWACCEL_DISABLED) {
 		cfb_fillrect(info, region);
 		return;
 	}
 
+	radeon_fixup_offset(rinfo);
+
 	vxres = info->var.xres_virtual;
 	vyres = info->var.yres_virtual;
 
@@ -90,10 +91,6 @@ void radeonfb_fillrect(struct fb_info *info, const struct fb_fillrect *region)
 	if(modded.dx + modded.width  > vxres) modded.width  = vxres - modded.dx;
 	if(modded.dy + modded.height > vyres) modded.height = vyres - modded.dy;
 
-	if (info->fix.visual == FB_VISUAL_TRUECOLOR ||
-	    info->fix.visual == FB_VISUAL_DIRECTCOLOR )
-		modded.color = ((u32 *) (info->pseudo_palette))[region->color];
-
 	radeonfb_prim_fillrect(rinfo, &modded);
 }
 
@@ -112,22 +109,22 @@ static void radeonfb_prim_copyarea(struct radeonfb_info *rinfo,
 	if ( xdir < 0 ) { sx += w-1; dx += w-1; }
 	if ( ydir < 0 ) { sy += h-1; dy += h-1; }
 
-	radeonfb_set_creg(rinfo, DP_GUI_MASTER_CNTL, &rinfo->dp_gui_mc_cache,
-			  rinfo->dp_gui_mc_base |
-			  GMC_BRUSH_NONE |
-			  GMC_SRC_DATATYPE_COLOR |
-			  ROP3_S |
-			  DP_SRC_SOURCE_MEMORY);
-	radeonfb_set_creg(rinfo, DP_CNTL, &rinfo->dp_cntl_cache,
-			  (xdir>=0 ? DST_X_LEFT_TO_RIGHT : 0) |
-			  (ydir>=0 ? DST_Y_TOP_TO_BOTTOM : 0));
-
-#if FLUSH_CACHE_WORKAROUND
-	radeon_fifo_wait(rinfo, 2);
+	radeon_fifo_wait(3);
+	OUTREG(DP_GUI_MASTER_CNTL,
+		rinfo->dp_gui_master_cntl /* i.e. GMC_DST_32BPP */
+		| GMC_BRUSH_NONE
+		| GMC_SRC_DSTCOLOR
+		| ROP3_S 
+		| DP_SRC_SOURCE_MEMORY );
+	OUTREG(DP_WRITE_MSK, 0xffffffff);
+	OUTREG(DP_CNTL, (xdir>=0 ? DST_X_LEFT_TO_RIGHT : 0)
+			| (ydir>=0 ? DST_Y_TOP_TO_BOTTOM : 0));
+
+	radeon_fifo_wait(2);
 	OUTREG(DSTCACHE_CTLSTAT, RB2D_DC_FLUSH_ALL);
 	OUTREG(WAIT_UNTIL, (WAIT_2D_IDLECLEAN | WAIT_DMA_GUI_IDLE));
-#endif
-	radeon_fifo_wait(rinfo, 3);
+
+	radeon_fifo_wait(3);
 	OUTREG(SRC_Y_X, (sy << 16) | sx);
 	OUTREG(DST_Y_X, (dy << 16) | dx);
 	OUTREG(DST_HEIGHT_WIDTH, (h << 16) | w);
@@ -146,14 +143,15 @@ void radeonfb_copyarea(struct fb_info *info, const struct fb_copyarea *area)
 	modded.width  = area->width;
 	modded.height = area->height;
   
-	WARN_ON(rinfo->gfx_mode);
-	if (info->state != FBINFO_STATE_RUNNING || rinfo->gfx_mode)
+	if (info->state != FBINFO_STATE_RUNNING)
 		return;
 	if (info->flags & FBINFO_HWACCEL_DISABLED) {
 		cfb_copyarea(info, area);
 		return;
 	}
 
+	radeon_fixup_offset(rinfo);
+
 	vxres = info->var.xres_virtual;
 	vyres = info->var.yres_virtual;
 
@@ -170,116 +168,13 @@ void radeonfb_copyarea(struct fb_info *info, const struct fb_copyarea *area)
 	radeonfb_prim_copyarea(rinfo, &modded);
 }
 
-static void radeonfb_prim_imageblit(struct radeonfb_info *rinfo,
-				    const struct fb_image *image,
-				    u32 fg, u32 bg)
-{
-	unsigned int dwords;
-	u32 *bits;
-
-	radeonfb_set_creg(rinfo, DP_GUI_MASTER_CNTL, &rinfo->dp_gui_mc_cache,
-			  rinfo->dp_gui_mc_base |
-			  GMC_BRUSH_NONE | GMC_DST_CLIP_LEAVE |
-			  GMC_SRC_DATATYPE_MONO_FG_BG |
-			  ROP3_S |
-			  GMC_BYTE_ORDER_MSB_TO_LSB |
-			  DP_SRC_SOURCE_HOST_DATA);
-	radeonfb_set_creg(rinfo, DP_CNTL, &rinfo->dp_cntl_cache,
-			  DST_X_LEFT_TO_RIGHT | DST_Y_TOP_TO_BOTTOM);
-	radeonfb_set_creg(rinfo, DP_SRC_FRGD_CLR, &rinfo->dp_src_fg_cache, fg);
-	radeonfb_set_creg(rinfo, DP_SRC_BKGD_CLR, &rinfo->dp_src_bg_cache, bg);
-
-	/* Ensure the dst cache is flushed and the engine idle before
-	 * issuing the operation.
-	 *
-	 * This works around engine lockups on some cards
-	 */
-#if FLUSH_CACHE_WORKAROUND
-	radeon_fifo_wait(rinfo, 2);
-	OUTREG(DSTCACHE_CTLSTAT, RB2D_DC_FLUSH_ALL);
-	OUTREG(WAIT_UNTIL, (WAIT_2D_IDLECLEAN | WAIT_DMA_GUI_IDLE));
-#endif
-
-	/* X here pads width to a multiple of 32 and uses the clipper to
-	 * adjust the result. Is that really necessary ? Things seem to
-	 * work ok for me without that and the doco doesn't seem to imply]
-	 * there is such a restriction.
-	 */
-	radeon_fifo_wait(rinfo, 4);
-	OUTREG(SC_TOP_LEFT, (image->dy << 16) | image->dx);
-	OUTREG(SC_BOTTOM_RIGHT, ((image->dy + image->height) << 16) |
-	       (image->dx + image->width));
-	OUTREG(DST_Y_X, (image->dy << 16) | image->dx);
-
-	OUTREG(DST_HEIGHT_WIDTH, (image->height << 16) | ((image->width + 31) & ~31));
-
-	dwords = (image->width + 31) >> 5;
-	dwords *= image->height;
-	bits = (u32*)(image->data);
-
-	while(dwords >= 8) {
-		radeon_fifo_wait(rinfo, 8);
-#if BITS_PER_LONG == 64
-		__raw_writeq(*((u64 *)(bits)), rinfo->mmio_base + HOST_DATA0);
-		__raw_writeq(*((u64 *)(bits+2)), rinfo->mmio_base + HOST_DATA2);
-		__raw_writeq(*((u64 *)(bits+4)), rinfo->mmio_base + HOST_DATA4);
-		__raw_writeq(*((u64 *)(bits+6)), rinfo->mmio_base + HOST_DATA6);
-		bits += 8;
-#else
-		__raw_writel(*(bits++), rinfo->mmio_base + HOST_DATA0);
-		__raw_writel(*(bits++), rinfo->mmio_base + HOST_DATA1);
-		__raw_writel(*(bits++), rinfo->mmio_base + HOST_DATA2);
-		__raw_writel(*(bits++), rinfo->mmio_base + HOST_DATA3);
-		__raw_writel(*(bits++), rinfo->mmio_base + HOST_DATA4);
-		__raw_writel(*(bits++), rinfo->mmio_base + HOST_DATA5);
-		__raw_writel(*(bits++), rinfo->mmio_base + HOST_DATA6);
-		__raw_writel(*(bits++), rinfo->mmio_base + HOST_DATA7);
-#endif
-		dwords -= 8;
-	}
-	while(dwords--) {
-		radeon_fifo_wait(rinfo, 1);
-		__raw_writel(*(bits++), rinfo->mmio_base + HOST_DATA0);
-	}
-}
-
 void radeonfb_imageblit(struct fb_info *info, const struct fb_image *image)
 {
 	struct radeonfb_info *rinfo = info->par;
-	u32 fg, bg;
 
-	WARN_ON(rinfo->gfx_mode);
-	if (info->state != FBINFO_STATE_RUNNING || rinfo->gfx_mode)
-		return;
-
-	if (!image->width || !image->height)
+	if (info->state != FBINFO_STATE_RUNNING)
 		return;
-
-	/* We only do 1 bpp color expansion for now */
-	if (!accel_cexp ||
-	    (info->flags & FBINFO_HWACCEL_DISABLED) || image->depth != 1)
-		goto fallback;
-
-	/* Fallback if running out of the screen. We may do clipping
-	 * in the future */
-	if ((image->dx + image->width) > info->var.xres_virtual ||
-	    (image->dy + image->height) > info->var.yres_virtual)
-		goto fallback;
-
-	if (info->fix.visual == FB_VISUAL_TRUECOLOR ||
-	    info->fix.visual == FB_VISUAL_DIRECTCOLOR) {
-		fg = ((u32*)(info->pseudo_palette))[image->fg_color];
-		bg = ((u32*)(info->pseudo_palette))[image->bg_color];
-	} else {
-		fg = image->fg_color;
-		bg = image->bg_color;
-	}
-
-	radeonfb_prim_imageblit(rinfo, image, fg, bg);
-	return;
-
- fallback:
-	radeon_engine_idle(rinfo);
+	radeon_engine_idle();
 
 	cfb_imageblit(info, image);
 }
@@ -290,8 +185,7 @@ int radeonfb_sync(struct fb_info *info)
 
 	if (info->state != FBINFO_STATE_RUNNING)
 		return 0;
-
-	radeon_engine_idle(rinfo);
+	radeon_engine_idle();
 
 	return 0;
 }
@@ -367,10 +261,9 @@ void radeonfb_engine_init (struct radeonfb_info *rinfo)
 	/* disable 3D engine */
 	OUTREG(RB3D_CNTL, 0);
 
-	rinfo->fifo_free = 0;
 	radeonfb_engine_reset(rinfo);
 
-	radeon_fifo_wait(rinfo, 1);
+	radeon_fifo_wait (1);
 	if (IS_R300_VARIANT(rinfo)) {
 		OUTREG(RB2D_DSTCACHE_MODE, INREG(RB2D_DSTCACHE_MODE) |
 		       RB2D_DC_AUTOFLUSH_ENABLE |
@@ -384,7 +277,7 @@ void radeonfb_engine_init (struct radeonfb_info *rinfo)
 		OUTREG(RB2D_DSTCACHE_MODE, 0);
 	}
 
-	radeon_fifo_wait(rinfo, 3);
+	radeon_fifo_wait (3);
 	/* We re-read MC_FB_LOCATION from card as it can have been
 	 * modified by XFree drivers (ouch !)
 	 */
@@ -395,57 +288,41 @@ void radeonfb_engine_init (struct radeonfb_info *rinfo)
 	OUTREG(DST_PITCH_OFFSET, (rinfo->pitch << 0x16) | (rinfo->fb_local_base >> 10));
 	OUTREG(SRC_PITCH_OFFSET, (rinfo->pitch << 0x16) | (rinfo->fb_local_base >> 10));
 
-	radeon_fifo_wait(rinfo, 1);
-#ifdef __BIG_ENDIAN
+	radeon_fifo_wait (1);
+#if defined(__BIG_ENDIAN)
 	OUTREGP(DP_DATATYPE, HOST_BIG_ENDIAN_EN, ~HOST_BIG_ENDIAN_EN);
 #else
 	OUTREGP(DP_DATATYPE, 0, ~HOST_BIG_ENDIAN_EN);
 #endif
-	radeon_fifo_wait(rinfo, 2);
+	radeon_fifo_wait (2);
 	OUTREG(DEFAULT_SC_TOP_LEFT, 0);
 	OUTREG(DEFAULT_SC_BOTTOM_RIGHT, (DEFAULT_SC_RIGHT_MAX |
 					 DEFAULT_SC_BOTTOM_MAX));
 
-	/* set default DP_GUI_MASTER_CNTL */
 	temp = radeon_get_dstbpp(rinfo->depth);
-	rinfo->dp_gui_mc_base = ((temp << 8) | GMC_CLR_CMP_CNTL_DIS);
+	rinfo->dp_gui_master_cntl = ((temp << 8) | GMC_CLR_CMP_CNTL_DIS);
 
-	rinfo->dp_gui_mc_cache = rinfo->dp_gui_mc_base |
-		GMC_BRUSH_SOLID_COLOR |
-		GMC_SRC_DATATYPE_COLOR;
-	radeon_fifo_wait(rinfo, 1);
-	OUTREG(DP_GUI_MASTER_CNTL, rinfo->dp_gui_mc_cache);
+	radeon_fifo_wait (1);
+	OUTREG(DP_GUI_MASTER_CNTL, (rinfo->dp_gui_master_cntl |
+				    GMC_BRUSH_SOLID_COLOR |
+				    GMC_SRC_DATATYPE_COLOR));
 
+	radeon_fifo_wait (7);
 
 	/* clear line drawing regs */
-	radeon_fifo_wait(rinfo, 2);
 	OUTREG(DST_LINE_START, 0);
 	OUTREG(DST_LINE_END, 0);
 
-	/* set brush and source color regs */
-	rinfo->dp_brush_fg_cache = 0xffffffff;
-	rinfo->dp_brush_bg_cache = 0x00000000;
-	rinfo->dp_src_fg_cache = 0xffffffff;
-	rinfo->dp_src_bg_cache = 0x00000000;
-	radeon_fifo_wait(rinfo, 4);
-	OUTREG(DP_BRUSH_FRGD_CLR, rinfo->dp_brush_fg_cache);
-	OUTREG(DP_BRUSH_BKGD_CLR, rinfo->dp_brush_bg_cache);
-	OUTREG(DP_SRC_FRGD_CLR, rinfo->dp_src_fg_cache);
-	OUTREG(DP_SRC_BKGD_CLR, rinfo->dp_src_bg_cache);
-
-	/* Default direction */
-	rinfo->dp_cntl_cache = DST_X_LEFT_TO_RIGHT | DST_Y_TOP_TO_BOTTOM;
-	radeon_fifo_wait(rinfo, 1);
-	OUTREG(DP_CNTL, rinfo->dp_cntl_cache);
+	/* set brush color regs */
+	OUTREG(DP_BRUSH_FRGD_CLR, 0xffffffff);
+	OUTREG(DP_BRUSH_BKGD_CLR, 0x00000000);
+
+	/* set source color regs */
+	OUTREG(DP_SRC_FRGD_CLR, 0xffffffff);
+	OUTREG(DP_SRC_BKGD_CLR, 0x00000000);
 
 	/* default write mask */
-	radeon_fifo_wait(rinfo, 1);
 	OUTREG(DP_WRITE_MSK, 0xffffffff);
 
-	/* Default to no swapping of host data */
-	radeon_fifo_wait(rinfo, 1);
-	OUTREG(RBBM_GUICNTL, RBBM_GUICNTL_HOST_DATA_SWAP_NONE);
-
-	/* Make sure it's settled */
-	radeon_engine_idle(rinfo);
+	radeon_engine_idle ();
 }
diff --git a/drivers/video/aty/radeon_backlight.c b/drivers/video/aty/radeon_backlight.c
index f343ba83f0ae..1a056adb61c8 100644
--- a/drivers/video/aty/radeon_backlight.c
+++ b/drivers/video/aty/radeon_backlight.c
@@ -66,7 +66,7 @@ static int radeon_bl_update_status(struct backlight_device *bd)
 		level = bd->props.brightness;
 
 	del_timer_sync(&rinfo->lvds_timer);
-	radeon_engine_idle(rinfo);
+	radeon_engine_idle();
 
 	lvds_gen_cntl = INREG(LVDS_GEN_CNTL);
 	if (level > 0) {
diff --git a/drivers/video/aty/radeon_base.c b/drivers/video/aty/radeon_base.c
index d5b27f9d374d..d0f1a7fc2c9d 100644
--- a/drivers/video/aty/radeon_base.c
+++ b/drivers/video/aty/radeon_base.c
@@ -282,8 +282,6 @@ static int backlight = 1;
 static int backlight = 0;
 #endif
 
-int accel_cexp = 0;
-
 /*
  * prototypes
  */
@@ -854,6 +852,7 @@ static int radeonfb_pan_display (struct fb_var_screeninfo *var,
         if (rinfo->asleep)
         	return 0;
 
+	radeon_fifo_wait(2);
         OUTREG(CRTC_OFFSET, ((var->yoffset * var->xres_virtual + var->xoffset)
 			     * var->bits_per_pixel / 8) & ~7);
         return 0;
@@ -883,6 +882,7 @@ static int radeonfb_ioctl (struct fb_info *info, unsigned int cmd,
 			if (rc)
 				return rc;
 
+			radeon_fifo_wait(2);
 			if (value & 0x01) {
 				tmp = INREG(LVDS_GEN_CNTL);
 
@@ -940,7 +940,7 @@ int radeon_screen_blank(struct radeonfb_info *rinfo, int blank, int mode_switch)
 	if (rinfo->lock_blank)
 		return 0;
 
-	radeon_engine_idle(rinfo);
+	radeon_engine_idle();
 
 	val = INREG(CRTC_EXT_CNTL);
         val &= ~(CRTC_DISPLAY_DIS | CRTC_HSYNC_DIS |
@@ -1048,7 +1048,7 @@ static int radeonfb_blank (int blank, struct fb_info *info)
 
 	if (rinfo->asleep)
 		return 0;
-
+		
 	return radeon_screen_blank(rinfo, blank, 0);
 }
 
@@ -1074,6 +1074,8 @@ static int radeon_setcolreg (unsigned regno, unsigned red, unsigned green,
         pindex = regno;
 
         if (!rinfo->asleep) {
+		radeon_fifo_wait(9);
+
 		if (rinfo->bpp == 16) {
 			pindex = regno * 8;
 
@@ -1242,6 +1244,8 @@ static void radeon_write_pll_regs(struct radeonfb_info *rinfo, struct radeon_reg
 {
 	int i;
 
+	radeon_fifo_wait(20);
+
 	/* Workaround from XFree */
 	if (rinfo->is_mobility) {
 	        /* A temporal workaround for the occational blanking on certain laptop
@@ -1337,7 +1341,7 @@ static void radeon_lvds_timer_func(unsigned long data)
 {
 	struct radeonfb_info *rinfo = (struct radeonfb_info *)data;
 
-	radeon_engine_idle(rinfo);
+	radeon_engine_idle();
 
 	OUTREG(LVDS_GEN_CNTL, rinfo->pending_lvds_gen_cntl);
 }
@@ -1355,11 +1359,10 @@ void radeon_write_mode (struct radeonfb_info *rinfo, struct radeon_regs *mode,
 	if (nomodeset)
 		return;
 
-	radeon_engine_idle(rinfo);
-
 	if (!regs_only)
 		radeon_screen_blank(rinfo, FB_BLANK_NORMAL, 0);
 
+	radeon_fifo_wait(31);
 	for (i=0; i<10; i++)
 		OUTREG(common_regs[i].reg, common_regs[i].val);
 
@@ -1387,6 +1390,7 @@ void radeon_write_mode (struct radeonfb_info *rinfo, struct radeon_regs *mode,
 	radeon_write_pll_regs(rinfo, mode);
 
 	if ((primary_mon == MT_DFP) || (primary_mon == MT_LCD)) {
+		radeon_fifo_wait(10);
 		OUTREG(FP_CRTC_H_TOTAL_DISP, mode->fp_crtc_h_total_disp);
 		OUTREG(FP_CRTC_V_TOTAL_DISP, mode->fp_crtc_v_total_disp);
 		OUTREG(FP_H_SYNC_STRT_WID, mode->fp_h_sync_strt_wid);
@@ -1401,6 +1405,7 @@ void radeon_write_mode (struct radeonfb_info *rinfo, struct radeon_regs *mode,
 	if (!regs_only)
 		radeon_screen_blank(rinfo, FB_BLANK_UNBLANK, 0);
 
+	radeon_fifo_wait(2);
 	OUTPLL(VCLK_ECP_CNTL, mode->vclk_ecp_cntl);
 	
 	return;
@@ -1551,7 +1556,7 @@ static int radeonfb_set_par(struct fb_info *info)
 	/* We always want engine to be idle on a mode switch, even
 	 * if we won't actually change the mode
 	 */
-	radeon_engine_idle(rinfo);
+	radeon_engine_idle();
 
 	hSyncStart = mode->xres + mode->right_margin;
 	hSyncEnd = hSyncStart + mode->hsync_len;
@@ -1846,6 +1851,7 @@ static int radeonfb_set_par(struct fb_info *info)
 	return 0;
 }
 
+
 static struct fb_ops radeonfb_ops = {
 	.owner			= THIS_MODULE,
 	.fb_check_var		= radeonfb_check_var,
@@ -1869,7 +1875,6 @@ static int __devinit radeon_set_fbinfo (struct radeonfb_info *rinfo)
 	info->par = rinfo;
 	info->pseudo_palette = rinfo->pseudo_palette;
 	info->flags = FBINFO_DEFAULT
-		    | FBINFO_HWACCEL_IMAGEBLIT
 		    | FBINFO_HWACCEL_COPYAREA
 		    | FBINFO_HWACCEL_FILLRECT
 		    | FBINFO_HWACCEL_XPAN
@@ -1877,7 +1882,6 @@ static int __devinit radeon_set_fbinfo (struct radeonfb_info *rinfo)
 	info->fbops = &radeonfb_ops;
 	info->screen_base = rinfo->fb_base;
 	info->screen_size = rinfo->mapped_vram;
-
 	/* Fill fix common fields */
 	strlcpy(info->fix.id, rinfo->name, sizeof(info->fix.id));
         info->fix.smem_start = rinfo->fb_base_phys;
@@ -1892,25 +1896,8 @@ static int __devinit radeon_set_fbinfo (struct radeonfb_info *rinfo)
         info->fix.mmio_len = RADEON_REGSIZE;
 	info->fix.accel = FB_ACCEL_ATI_RADEON;
 
-	/* Allocate colormap */
 	fb_alloc_cmap(&info->cmap, 256, 0);
 
-	/* Setup pixmap used for acceleration */
-#define PIXMAP_SIZE	(2048 * 4)
-
-	info->pixmap.addr = kmalloc(PIXMAP_SIZE, GFP_KERNEL);
-	if (!info->pixmap.addr) {
-		printk(KERN_ERR "radeonfb: Failed to allocate pixmap !\n");
-		noaccel = 1;
-		goto bail;
-	}
-	info->pixmap.size = PIXMAP_SIZE;
-	info->pixmap.flags = FB_PIXMAP_SYSTEM;
-	info->pixmap.scan_align = 4;
-	info->pixmap.buf_align = 4;
-	info->pixmap.access_align = 32;
-
-bail:
 	if (noaccel)
 		info->flags |= FBINFO_HWACCEL_DISABLED;
 
@@ -2019,6 +2006,7 @@ static void radeon_identify_vram(struct radeonfb_info *rinfo)
           u32 tom = INREG(NB_TOM);
           tmp = ((((tom >> 16) - (tom & 0xffff) + 1) << 6) * 1024);
 
+ 		radeon_fifo_wait(6);
           OUTREG(MC_FB_LOCATION, tom);
           OUTREG(DISPLAY_BASE_ADDR, (tom & 0xffff) << 16);
           OUTREG(CRTC2_DISPLAY_BASE_ADDR, (tom & 0xffff) << 16);
@@ -2522,8 +2510,6 @@ static int __init radeonfb_setup (char *options)
 		} else if (!strncmp(this_opt, "ignore_devlist", 14)) {
 			ignore_devlist = 1;
 #endif
-		} else if (!strncmp(this_opt, "accel_cexp", 12)) {
-			accel_cexp = 1;
 		} else
 			mode_option = this_opt;
 	}
@@ -2571,8 +2557,6 @@ module_param(monitor_layout, charp, 0);
 MODULE_PARM_DESC(monitor_layout, "Specify monitor mapping (like XFree86)");
 module_param(force_measure_pll, bool, 0);
 MODULE_PARM_DESC(force_measure_pll, "Force measurement of PLL (debug)");
-module_param(accel_cexp, bool, 0);
-MODULE_PARM_DESC(accel_cexp, "Use acceleration engine for color expansion");
 #ifdef CONFIG_MTRR
 module_param(nomtrr, bool, 0);
 MODULE_PARM_DESC(nomtrr, "bool: disable use of MTRR registers");
diff --git a/drivers/video/aty/radeon_pm.c b/drivers/video/aty/radeon_pm.c
index 3df5015f1d13..675abdafc2d8 100644
--- a/drivers/video/aty/radeon_pm.c
+++ b/drivers/video/aty/radeon_pm.c
@@ -2653,9 +2653,9 @@ int radeonfb_pci_suspend(struct pci_dev *pdev, pm_message_t mesg)
 
 	if (!(info->flags & FBINFO_HWACCEL_DISABLED)) {
 		/* Make sure engine is reset */
-		radeon_engine_idle(rinfo);
+		radeon_engine_idle();
 		radeonfb_engine_reset(rinfo);
-		radeon_engine_idle(rinfo);
+		radeon_engine_idle();
 	}
 
 	/* Blank display and LCD */
@@ -2767,7 +2767,7 @@ int radeonfb_pci_resume(struct pci_dev *pdev)
 
 		rinfo->asleep = 0;
 	} else
-		radeon_engine_idle(rinfo);
+		radeon_engine_idle();
 
 	/* Restore display & engine */
 	radeon_write_mode (rinfo, &rinfo->state, 1);
diff --git a/drivers/video/aty/radeonfb.h b/drivers/video/aty/radeonfb.h
index 974ca6d86540..3ea1b00fdd22 100644
--- a/drivers/video/aty/radeonfb.h
+++ b/drivers/video/aty/radeonfb.h
@@ -336,15 +336,7 @@ struct radeonfb_info {
 	int			mon2_type;
 	u8		        *mon2_EDID;
 
-	/* accel bits */
-	u32			dp_gui_mc_base;
-	u32			dp_gui_mc_cache;
-	u32			dp_cntl_cache;
-	u32			dp_brush_fg_cache;
-	u32			dp_brush_bg_cache;
-	u32			dp_src_fg_cache;
-	u32			dp_src_bg_cache;
-	u32			fifo_free;
+	u32			dp_gui_master_cntl;
 
 	struct pll_info		pll;
 
@@ -356,7 +348,6 @@ struct radeonfb_info {
 	int			lock_blank;
 	int			dynclk;
 	int			no_schedule;
-	int 			gfx_mode;
 	enum radeon_pm_mode	pm_mode;
 	reinit_function_ptr     reinit_func;
 
@@ -401,14 +392,8 @@ static inline void _radeon_msleep(struct radeonfb_info *rinfo, unsigned long ms)
 #define OUTREG8(addr,val)	writeb(val, (rinfo->mmio_base)+addr)
 #define INREG16(addr)		readw((rinfo->mmio_base)+addr)
 #define OUTREG16(addr,val)	writew(val, (rinfo->mmio_base)+addr)
-
-#ifdef CONFIG_PPC
-#define INREG(addr)	     	({ eieio(); ld_le32(rinfo->mmio_base+(addr)); })
-#define OUTREG(addr,val)	do { eieio(); st_le32(rinfo->mmio_base+(addr),(val)); } while(0)
-#else
 #define INREG(addr)		readl((rinfo->mmio_base)+addr)
 #define OUTREG(addr,val)	writel(val, (rinfo->mmio_base)+addr)
-#endif
 
 static inline void _OUTREGP(struct radeonfb_info *rinfo, u32 addr,
 		       u32 val, u32 mask)
@@ -550,7 +535,17 @@ static inline u32 radeon_get_dstbpp(u16 depth)
  * 2D Engine helper routines
  */
 
-extern void radeon_fifo_update_and_wait(struct radeonfb_info *rinfo, int entries);
+static inline void _radeon_fifo_wait(struct radeonfb_info *rinfo, int entries)
+{
+	int i;
+
+	for (i=0; i<2000000; i++) {
+		if ((INREG(RBBM_STATUS) & 0x7f) >= entries)
+			return;
+		udelay(1);
+	}
+	printk(KERN_ERR "radeonfb: FIFO Timeout !\n");
+}
 
 static inline void radeon_engine_flush (struct radeonfb_info *rinfo)
 {
@@ -563,7 +558,7 @@ static inline void radeon_engine_flush (struct radeonfb_info *rinfo)
 	/* Ensure FIFO is empty, ie, make sure the flush commands
 	 * has reached the cache
 	 */
-	radeon_fifo_update_and_wait(rinfo, 64);
+	_radeon_fifo_wait (rinfo, 64);
 
 	/* Wait for the flush to complete */
 	for (i=0; i < 2000000; i++) {
@@ -575,12 +570,12 @@ static inline void radeon_engine_flush (struct radeonfb_info *rinfo)
 }
 
 
-static inline void radeon_engine_idle(struct radeonfb_info *rinfo)
+static inline void _radeon_engine_idle(struct radeonfb_info *rinfo)
 {
 	int i;
 
 	/* ensure FIFO is empty before waiting for idle */
-	radeon_fifo_update_and_wait (rinfo, 64);
+	_radeon_fifo_wait (rinfo, 64);
 
 	for (i=0; i<2000000; i++) {
 		if (((INREG(RBBM_STATUS) & GUI_ACTIVE)) == 0) {
@@ -593,6 +588,8 @@ static inline void radeon_engine_idle(struct radeonfb_info *rinfo)
 }
 
 
+#define radeon_engine_idle()		_radeon_engine_idle(rinfo)
+#define radeon_fifo_wait(entries)	_radeon_fifo_wait(rinfo,entries)
 #define radeon_msleep(ms)		_radeon_msleep(rinfo,ms)
 
 
@@ -622,7 +619,6 @@ extern void radeonfb_imageblit(struct fb_info *p, const struct fb_image *image);
 extern int radeonfb_sync(struct fb_info *info);
 extern void radeonfb_engine_init (struct radeonfb_info *rinfo);
 extern void radeonfb_engine_reset(struct radeonfb_info *rinfo);
-extern void radeon_fixup_mem_offset(struct radeonfb_info *rinfo);
 
 /* Other functions */
 extern int radeon_screen_blank(struct radeonfb_info *rinfo, int blank, int mode_switch);
@@ -638,6 +634,4 @@ static inline void radeonfb_bl_init(struct radeonfb_info *rinfo) {}
 static inline void radeonfb_bl_exit(struct radeonfb_info *rinfo) {}
 #endif
 
-extern int accel_cexp;
-
 #endif /* __RADEONFB_H__ */
diff --git a/include/video/radeon.h b/include/video/radeon.h
index d5dcaf154ba4..1cd09cc5b169 100644
--- a/include/video/radeon.h
+++ b/include/video/radeon.h
@@ -525,9 +525,6 @@
 #define CRTC_DISPLAY_DIS			   (1 << 10)
 #define CRTC_CRT_ON				   (1 << 15)
 
-/* DSTCACHE_MODE bits constants */
-#define RB2D_DC_AUTOFLUSH_ENABLE                   (1 << 8)
-#define RB2D_DC_DC_DISABLE_IGNORE_PE               (1 << 17)
 
 /* DSTCACHE_CTLSTAT bit constants */
 #define RB2D_DC_FLUSH_2D			   (1 << 0)
@@ -869,10 +866,15 @@
 #define GMC_DST_16BPP_YVYU422                      0x00000c00
 #define GMC_DST_32BPP_AYUV444                      0x00000e00
 #define GMC_DST_16BPP_ARGB4444                     0x00000f00
+#define GMC_SRC_MONO                               0x00000000
+#define GMC_SRC_MONO_LBKGD                         0x00001000
+#define GMC_SRC_DSTCOLOR                           0x00003000
 #define GMC_BYTE_ORDER_MSB_TO_LSB                  0x00000000
 #define GMC_BYTE_ORDER_LSB_TO_MSB                  0x00004000
 #define GMC_DP_CONVERSION_TEMP_9300                0x00008000
 #define GMC_DP_CONVERSION_TEMP_6500                0x00000000
+#define GMC_DP_SRC_RECT                            0x02000000
+#define GMC_DP_SRC_HOST                            0x03000000
 #define GMC_DP_SRC_HOST_BYTEALIGN                  0x04000000
 #define GMC_3D_FCN_EN_CLR                          0x00000000
 #define GMC_3D_FCN_EN_SET                          0x08000000
@@ -883,9 +885,6 @@
 #define GMC_WRITE_MASK_LEAVE                       0x00000000
 #define GMC_WRITE_MASK_SET                         0x40000000
 #define GMC_CLR_CMP_CNTL_DIS      		   (1 << 28)
-#define GMC_SRC_DATATYPE_MASK			   (3 << 12)
-#define GMC_SRC_DATATYPE_MONO_FG_BG		   (0 << 12)
-#define GMC_SRC_DATATYPE_MONO_FG_LA		   (1 << 12)
 #define GMC_SRC_DATATYPE_COLOR			   (3 << 12)
 #define ROP3_S                			   0x00cc0000
 #define ROP3_SRCCOPY				   0x00cc0000
@@ -894,7 +893,6 @@
 #define DP_SRC_SOURCE_MASK        		   (7    << 24)
 #define GMC_BRUSH_NONE            		   (15   <<  4)
 #define DP_SRC_SOURCE_MEMORY			   (2    << 24)
-#define DP_SRC_SOURCE_HOST_DATA			   (3    << 24)
 #define GMC_BRUSH_SOLIDCOLOR			   0x000000d0
 
 /* DP_MIX bit constants */
@@ -980,12 +978,6 @@
 #define DISP_PWR_MAN_TV_ENABLE_RST                 (1 << 25)
 #define DISP_PWR_MAN_AUTO_PWRUP_EN                 (1 << 26)
 
-/* RBBM_GUICNTL constants */
-#define RBBM_GUICNTL_HOST_DATA_SWAP_NONE	   (0 << 0)
-#define RBBM_GUICNTL_HOST_DATA_SWAP_16BIT          (1 << 0)
-#define RBBM_GUICNTL_HOST_DATA_SWAP_32BIT	   (2 << 0)
-#define RBBM_GUICNTL_HOST_DATA_SWAP_HDW		   (3 << 0)
-
 /* masks */
 
 #define CONFIG_MEMSIZE_MASK		0x1f000000
-- 
cgit v1.2.3


From 4d4be482a4d78ca906f45e99fd9fdb91e907f5ad Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@infradead.org>
Date: Tue, 9 Dec 2008 04:47:33 -0500
Subject: [XFS] add a FMODE flag to make XFS invisible I/O less hacky

XFS has a mode called invisble I/O that doesn't update any of the
timestamps.  It's used for HSM-style applications and exposed through
the nasty open by handle ioctl.

Instead of doing directly assignment of file operations that set an
internal flag for it add a new FMODE_NOCMTIME flag that we can check
in the normal file operations.

(addition of the generic VFS flag has been ACKed by Al as an interims
 solution)

Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Lachlan McIlroy <lachlan@sgi.com>
---
 fs/xfs/linux-2.6/xfs_file.c    | 145 ++++++-----------------------------------
 fs/xfs/linux-2.6/xfs_ioctl.c   |  29 ++++++---
 fs/xfs/linux-2.6/xfs_ioctl.h   |   8 +--
 fs/xfs/linux-2.6/xfs_ioctl32.c |  56 ++++++----------
 fs/xfs/linux-2.6/xfs_iops.h    |   1 -
 fs/xfs/xfs_vnodeops.h          |   2 -
 include/linux/fs.h             |   8 +++
 7 files changed, 71 insertions(+), 178 deletions(-)

(limited to 'include')

diff --git a/fs/xfs/linux-2.6/xfs_file.c b/fs/xfs/linux-2.6/xfs_file.c
index f999d20a429c..a0c45cc8a6b8 100644
--- a/fs/xfs/linux-2.6/xfs_file.c
+++ b/fs/xfs/linux-2.6/xfs_file.c
@@ -45,80 +45,44 @@
 
 static struct vm_operations_struct xfs_file_vm_ops;
 
-STATIC_INLINE ssize_t
-__xfs_file_read(
+STATIC ssize_t
+xfs_file_aio_read(
 	struct kiocb		*iocb,
 	const struct iovec	*iov,
 	unsigned long		nr_segs,
-	int			ioflags,
 	loff_t			pos)
 {
 	struct file		*file = iocb->ki_filp;
+	int			ioflags = IO_ISAIO;
 
 	BUG_ON(iocb->ki_pos != pos);
 	if (unlikely(file->f_flags & O_DIRECT))
 		ioflags |= IO_ISDIRECT;
+	if (file->f_mode & FMODE_NOCMTIME)
+		ioflags |= IO_INVIS;
 	return xfs_read(XFS_I(file->f_path.dentry->d_inode), iocb, iov,
 				nr_segs, &iocb->ki_pos, ioflags);
 }
 
 STATIC ssize_t
-xfs_file_aio_read(
-	struct kiocb		*iocb,
-	const struct iovec	*iov,
-	unsigned long		nr_segs,
-	loff_t			pos)
-{
-	return __xfs_file_read(iocb, iov, nr_segs, IO_ISAIO, pos);
-}
-
-STATIC ssize_t
-xfs_file_aio_read_invis(
-	struct kiocb		*iocb,
-	const struct iovec	*iov,
-	unsigned long		nr_segs,
-	loff_t			pos)
-{
-	return __xfs_file_read(iocb, iov, nr_segs, IO_ISAIO|IO_INVIS, pos);
-}
-
-STATIC_INLINE ssize_t
-__xfs_file_write(
+xfs_file_aio_write(
 	struct kiocb		*iocb,
 	const struct iovec	*iov,
 	unsigned long		nr_segs,
-	int			ioflags,
 	loff_t			pos)
 {
-	struct file	*file = iocb->ki_filp;
+	struct file		*file = iocb->ki_filp;
+	int			ioflags = IO_ISAIO;
 
 	BUG_ON(iocb->ki_pos != pos);
 	if (unlikely(file->f_flags & O_DIRECT))
 		ioflags |= IO_ISDIRECT;
+	if (file->f_mode & FMODE_NOCMTIME)
+		ioflags |= IO_INVIS;
 	return xfs_write(XFS_I(file->f_mapping->host), iocb, iov, nr_segs,
 				&iocb->ki_pos, ioflags);
 }
 
-STATIC ssize_t
-xfs_file_aio_write(
-	struct kiocb		*iocb,
-	const struct iovec	*iov,
-	unsigned long		nr_segs,
-	loff_t			pos)
-{
-	return __xfs_file_write(iocb, iov, nr_segs, IO_ISAIO, pos);
-}
-
-STATIC ssize_t
-xfs_file_aio_write_invis(
-	struct kiocb		*iocb,
-	const struct iovec	*iov,
-	unsigned long		nr_segs,
-	loff_t			pos)
-{
-	return __xfs_file_write(iocb, iov, nr_segs, IO_ISAIO|IO_INVIS, pos);
-}
-
 STATIC ssize_t
 xfs_file_splice_read(
 	struct file		*infilp,
@@ -127,20 +91,13 @@ xfs_file_splice_read(
 	size_t			len,
 	unsigned int		flags)
 {
-	return xfs_splice_read(XFS_I(infilp->f_path.dentry->d_inode),
-				   infilp, ppos, pipe, len, flags, 0);
-}
+	int			ioflags = 0;
+
+	if (infilp->f_mode & FMODE_NOCMTIME)
+		ioflags |= IO_INVIS;
 
-STATIC ssize_t
-xfs_file_splice_read_invis(
-	struct file		*infilp,
-	loff_t			*ppos,
-	struct pipe_inode_info	*pipe,
-	size_t			len,
-	unsigned int		flags)
-{
 	return xfs_splice_read(XFS_I(infilp->f_path.dentry->d_inode),
-				   infilp, ppos, pipe, len, flags, IO_INVIS);
+				   infilp, ppos, pipe, len, flags, ioflags);
 }
 
 STATIC ssize_t
@@ -151,20 +108,13 @@ xfs_file_splice_write(
 	size_t			len,
 	unsigned int		flags)
 {
-	return xfs_splice_write(XFS_I(outfilp->f_path.dentry->d_inode),
-				    pipe, outfilp, ppos, len, flags, 0);
-}
+	int			ioflags = 0;
+
+	if (outfilp->f_mode & FMODE_NOCMTIME)
+		ioflags |= IO_INVIS;
 
-STATIC ssize_t
-xfs_file_splice_write_invis(
-	struct pipe_inode_info	*pipe,
-	struct file		*outfilp,
-	loff_t			*ppos,
-	size_t			len,
-	unsigned int		flags)
-{
 	return xfs_splice_write(XFS_I(outfilp->f_path.dentry->d_inode),
-				    pipe, outfilp, ppos, len, flags, IO_INVIS);
+				    pipe, outfilp, ppos, len, flags, ioflags);
 }
 
 STATIC int
@@ -275,42 +225,6 @@ xfs_file_mmap(
 	return 0;
 }
 
-STATIC long
-xfs_file_ioctl(
-	struct file	*filp,
-	unsigned int	cmd,
-	unsigned long	p)
-{
-	struct inode	*inode = filp->f_path.dentry->d_inode;
-
-
-	/* NOTE:  some of the ioctl's return positive #'s as a
-	 *	  byte count indicating success, such as
-	 *	  readlink_by_handle.  So we don't "sign flip"
-	 *	  like most other routines.  This means true
-	 *	  errors need to be returned as a negative value.
-	 */
-	return xfs_ioctl(XFS_I(inode), filp, 0, cmd, (void __user *)p);
-}
-
-STATIC long
-xfs_file_ioctl_invis(
-	struct file	*filp,
-	unsigned int	cmd,
-	unsigned long	p)
-{
-	struct inode	*inode = filp->f_path.dentry->d_inode;
-
-
-	/* NOTE:  some of the ioctl's return positive #'s as a
-	 *	  byte count indicating success, such as
-	 *	  readlink_by_handle.  So we don't "sign flip"
-	 *	  like most other routines.  This means true
-	 *	  errors need to be returned as a negative value.
-	 */
-	return xfs_ioctl(XFS_I(inode), filp, IO_INVIS, cmd, (void __user *)p);
-}
-
 /*
  * mmap()d file has taken write protection fault and is being made
  * writable. We can set the page state up correctly for a writable
@@ -346,25 +260,6 @@ const struct file_operations xfs_file_operations = {
 #endif
 };
 
-const struct file_operations xfs_invis_file_operations = {
-	.llseek		= generic_file_llseek,
-	.read		= do_sync_read,
-	.write		= do_sync_write,
-	.aio_read	= xfs_file_aio_read_invis,
-	.aio_write	= xfs_file_aio_write_invis,
-	.splice_read	= xfs_file_splice_read_invis,
-	.splice_write	= xfs_file_splice_write_invis,
-	.unlocked_ioctl	= xfs_file_ioctl_invis,
-#ifdef CONFIG_COMPAT
-	.compat_ioctl	= xfs_file_compat_invis_ioctl,
-#endif
-	.mmap		= xfs_file_mmap,
-	.open		= xfs_file_open,
-	.release	= xfs_file_release,
-	.fsync		= xfs_file_fsync,
-};
-
-
 const struct file_operations xfs_dir_file_operations = {
 	.open		= xfs_dir_open,
 	.read		= generic_read_dir,
diff --git a/fs/xfs/linux-2.6/xfs_ioctl.c b/fs/xfs/linux-2.6/xfs_ioctl.c
index c8f1e632ba94..0264c8719ffd 100644
--- a/fs/xfs/linux-2.6/xfs_ioctl.c
+++ b/fs/xfs/linux-2.6/xfs_ioctl.c
@@ -319,10 +319,11 @@ xfs_open_by_handle(
 		put_unused_fd(new_fd);
 		return -XFS_ERROR(-PTR_ERR(filp));
 	}
+
 	if (inode->i_mode & S_IFREG) {
 		/* invisible operation should not change atime */
 		filp->f_flags |= O_NOATIME;
-		filp->f_op = &xfs_invis_file_operations;
+		filp->f_mode |= FMODE_NOCMTIME;
 	}
 
 	fd_install(new_fd, filp);
@@ -1328,21 +1329,31 @@ xfs_ioc_getbmapx(
 	return 0;
 }
 
-int
-xfs_ioctl(
-	xfs_inode_t		*ip,
+/*
+ * Note: some of the ioctl's return positive numbers as a
+ * byte count indicating success, such as readlink_by_handle.
+ * So we don't "sign flip" like most other routines.  This means
+ * true errors need to be returned as a negative value.
+ */
+long
+xfs_file_ioctl(
 	struct file		*filp,
-	int			ioflags,
 	unsigned int		cmd,
-	void			__user *arg)
+	unsigned long		p)
 {
 	struct inode		*inode = filp->f_path.dentry->d_inode;
-	xfs_mount_t		*mp = ip->i_mount;
+	struct xfs_inode	*ip = XFS_I(inode);
+	struct xfs_mount	*mp = ip->i_mount;
+	void			__user *arg = (void __user *)p;
+	int			ioflags = 0;
 	int			error;
 
-	xfs_itrace_entry(XFS_I(inode));
-	switch (cmd) {
+	if (filp->f_mode & FMODE_NOCMTIME)
+		ioflags |= IO_INVIS;
 
+	xfs_itrace_entry(ip);
+
+	switch (cmd) {
 	case XFS_IOC_ALLOCSP:
 	case XFS_IOC_FREESP:
 	case XFS_IOC_RESVSP:
diff --git a/fs/xfs/linux-2.6/xfs_ioctl.h b/fs/xfs/linux-2.6/xfs_ioctl.h
index d92131c827bc..8c16bf2d7e03 100644
--- a/fs/xfs/linux-2.6/xfs_ioctl.h
+++ b/fs/xfs/linux-2.6/xfs_ioctl.h
@@ -68,13 +68,13 @@ xfs_attrmulti_attr_remove(
 	__uint32_t		flags);
 
 extern long
-xfs_file_compat_ioctl(
-	struct file		*file,
+xfs_file_ioctl(
+	struct file		*filp,
 	unsigned int		cmd,
-	unsigned long		arg);
+	unsigned long		p);
 
 extern long
-xfs_file_compat_invis_ioctl(
+xfs_file_compat_ioctl(
 	struct file		*file,
 	unsigned int		cmd,
 	unsigned long		arg);
diff --git a/fs/xfs/linux-2.6/xfs_ioctl32.c b/fs/xfs/linux-2.6/xfs_ioctl32.c
index b34b3d8892a2..0504cece9f66 100644
--- a/fs/xfs/linux-2.6/xfs_ioctl32.c
+++ b/fs/xfs/linux-2.6/xfs_ioctl32.c
@@ -599,19 +599,24 @@ out:
 	return error;
 }
 
-STATIC long
-xfs_compat_ioctl(
-	xfs_inode_t	*ip,
-	struct file	*filp,
-	int		ioflags,
-	unsigned	cmd,
-	void		__user *arg)
+long
+xfs_file_compat_ioctl(
+	struct file		*filp,
+	unsigned		cmd,
+	unsigned long		p)
 {
-	struct inode	*inode = filp->f_path.dentry->d_inode;
-	xfs_mount_t	*mp = ip->i_mount;
-	int		error;
+	struct inode		*inode = filp->f_path.dentry->d_inode;
+	struct xfs_inode	*ip = XFS_I(inode);
+	struct xfs_mount	*mp = ip->i_mount;
+	void			__user *arg = (void __user *)p;
+	int			ioflags = 0;
+	int			error;
+
+	if (filp->f_mode & FMODE_NOCMTIME)
+		ioflags |= IO_INVIS;
+
+	xfs_itrace_entry(ip);
 
-	xfs_itrace_entry(XFS_I(inode));
 	switch (cmd) {
 	/* No size or alignment issues on any arch */
 	case XFS_IOC_DIOINFO:
@@ -632,7 +637,7 @@ xfs_compat_ioctl(
 	case XFS_IOC_GOINGDOWN:
 	case XFS_IOC_ERROR_INJECTION:
 	case XFS_IOC_ERROR_CLEARALL:
-		return xfs_ioctl(ip, filp, ioflags, cmd, arg);
+		return xfs_file_ioctl(filp, cmd, p);
 #ifndef BROKEN_X86_ALIGNMENT
 	/* These are handled fine if no alignment issues */
 	case XFS_IOC_ALLOCSP:
@@ -646,7 +651,7 @@ xfs_compat_ioctl(
 	case XFS_IOC_FSGEOMETRY_V1:
 	case XFS_IOC_FSGROWFSDATA:
 	case XFS_IOC_FSGROWFSRT:
-		return xfs_ioctl(ip, filp, ioflags, cmd, arg);
+		return xfs_file_ioctl(filp, cmd, p);
 #else
 	case XFS_IOC_ALLOCSP_32:
 	case XFS_IOC_FREESP_32:
@@ -687,7 +692,7 @@ xfs_compat_ioctl(
 	case XFS_IOC_SETXFLAGS_32:
 	case XFS_IOC_GETVERSION_32:
 		cmd = _NATIVE_IOC(cmd, long);
-		return xfs_ioctl(ip, filp, ioflags, cmd, arg);
+		return xfs_file_ioctl(filp, cmd, p);
 	case XFS_IOC_SWAPEXT: {
 		struct xfs_swapext	  sxp;
 		struct compat_xfs_swapext __user *sxu = arg;
@@ -738,26 +743,3 @@ xfs_compat_ioctl(
 		return -XFS_ERROR(ENOIOCTLCMD);
 	}
 }
-
-long
-xfs_file_compat_ioctl(
-	struct file		*filp,
-	unsigned int		cmd,
-	unsigned long		p)
-{
-	struct inode	*inode = filp->f_path.dentry->d_inode;
-
-	return xfs_compat_ioctl(XFS_I(inode), filp, 0, cmd, (void __user *)p);
-}
-
-long
-xfs_file_compat_invis_ioctl(
-	struct file		*filp,
-	unsigned int		cmd,
-	unsigned long		p)
-{
-	struct inode	*inode = filp->f_path.dentry->d_inode;
-
-	return xfs_compat_ioctl(XFS_I(inode), filp, IO_INVIS, cmd,
-				(void __user *)p);
-}
diff --git a/fs/xfs/linux-2.6/xfs_iops.h b/fs/xfs/linux-2.6/xfs_iops.h
index 8b1a1e31dc21..ef41c92ce66e 100644
--- a/fs/xfs/linux-2.6/xfs_iops.h
+++ b/fs/xfs/linux-2.6/xfs_iops.h
@@ -22,7 +22,6 @@ struct xfs_inode;
 
 extern const struct file_operations xfs_file_operations;
 extern const struct file_operations xfs_dir_file_operations;
-extern const struct file_operations xfs_invis_file_operations;
 
 extern ssize_t xfs_vn_listxattr(struct dentry *, char *data, size_t size);
 
diff --git a/fs/xfs/xfs_vnodeops.h b/fs/xfs/xfs_vnodeops.h
index 2a45b00ad32e..55d955ec4ece 100644
--- a/fs/xfs/xfs_vnodeops.h
+++ b/fs/xfs/xfs_vnodeops.h
@@ -53,8 +53,6 @@ int xfs_attr_set(struct xfs_inode *dp, const char *name, char *value,
 int xfs_attr_remove(struct xfs_inode *dp, const char *name, int flags);
 int xfs_attr_list(struct xfs_inode *dp, char *buffer, int bufsize,
 		int flags, struct attrlist_cursor_kern *cursor);
-int xfs_ioctl(struct xfs_inode *ip, struct file *filp,
-		int ioflags, unsigned int cmd, void __user *arg);
 ssize_t xfs_read(struct xfs_inode *ip, struct kiocb *iocb,
 		const struct iovec *iovp, unsigned int segs,
 		loff_t *offset, int ioflags);
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 51bd9370d437..965b9ba3865d 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -81,6 +81,14 @@ extern int dir_notify_enable;
 #define FMODE_WRITE_IOCTL	((__force fmode_t)128)
 #define FMODE_NDELAY_NOW	((__force fmode_t)256)
 
+/*
+ * Don't update ctime and mtime.
+ *
+ * Currently a special hack for the XFS open_by_handle ioctl, but we'll
+ * hopefully graduate it to a proper O_CMTIME flag supported by open(2) soon.
+ */
+#define FMODE_NOCMTIME		((__force fmode_t)2048)
+
 #define RW_MASK		1
 #define RWA_MASK	2
 #define READ 0
-- 
cgit v1.2.3


From c2724775ce57c98b8af9694857b941dc61056516 Mon Sep 17 00:00:00 2001
From: Markus Metzger <markus.t.metzger@intel.com>
Date: Thu, 11 Dec 2008 13:49:59 +0100
Subject: x86, bts: provide in-kernel branch-trace interface

Impact: cleanup

Move the BTS bits from ptrace.c into ds.c.

Signed-off-by: Markus Metzger <markus.t.metzger@intel.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/include/asm/ds.h          | 241 ++++++-----
 arch/x86/include/asm/processor.h   |  13 +
 arch/x86/include/asm/ptrace.h      |  36 --
 arch/x86/include/asm/thread_info.h |   5 +-
 arch/x86/kernel/cpu/intel.c        |   4 -
 arch/x86/kernel/ds.c               | 857 +++++++++++++++++++++++--------------
 arch/x86/kernel/process_32.c       |  59 +--
 arch/x86/kernel/process_64.c       |  50 +--
 arch/x86/kernel/ptrace.c           | 416 +++++-------------
 include/linux/sched.h              |   1 +
 10 files changed, 811 insertions(+), 871 deletions(-)

(limited to 'include')

diff --git a/arch/x86/include/asm/ds.h b/arch/x86/include/asm/ds.h
index 99b6c39774a4..ee0ea3a96c11 100644
--- a/arch/x86/include/asm/ds.h
+++ b/arch/x86/include/asm/ds.h
@@ -6,13 +6,13 @@
  * precise-event based sampling (PEBS).
  *
  * It manages:
- * - per-thread and per-cpu allocation of BTS and PEBS
+ * - DS and BTS hardware configuration
  * - buffer overflow handling (to be done)
  * - buffer access
  *
- * It assumes:
- * - get_task_struct on all traced tasks
- * - current is allowed to trace tasks
+ * It does not do:
+ * - security checking (is the caller allowed to trace the task)
+ * - buffer allocation (memory accounting)
  *
  *
  * Copyright (C) 2007-2008 Intel Corporation.
@@ -31,6 +31,7 @@
 #ifdef CONFIG_X86_DS
 
 struct task_struct;
+struct ds_context;
 struct ds_tracer;
 struct bts_tracer;
 struct pebs_tracer;
@@ -38,6 +39,38 @@ struct pebs_tracer;
 typedef void (*bts_ovfl_callback_t)(struct bts_tracer *);
 typedef void (*pebs_ovfl_callback_t)(struct pebs_tracer *);
 
+
+/*
+ * A list of features plus corresponding macros to talk about them in
+ * the ds_request function's flags parameter.
+ *
+ * We use the enum to index an array of corresponding control bits;
+ * we use the macro to index a flags bit-vector.
+ */
+enum ds_feature {
+	dsf_bts = 0,
+	dsf_bts_kernel,
+#define BTS_KERNEL (1 << dsf_bts_kernel)
+	/* trace kernel-mode branches */
+
+	dsf_bts_user,
+#define BTS_USER (1 << dsf_bts_user)
+	/* trace user-mode branches */
+
+	dsf_bts_overflow,
+	dsf_bts_max,
+	dsf_pebs = dsf_bts_max,
+
+	dsf_pebs_max,
+	dsf_ctl_max = dsf_pebs_max,
+	dsf_bts_timestamps = dsf_ctl_max,
+#define BTS_TIMESTAMPS (1 << dsf_bts_timestamps)
+	/* add timestamps into BTS trace */
+
+#define BTS_USER_FLAGS (BTS_KERNEL | BTS_USER | BTS_TIMESTAMPS)
+};
+
+
 /*
  * Request BTS or PEBS
  *
@@ -58,92 +91,135 @@ typedef void (*pebs_ovfl_callback_t)(struct pebs_tracer *);
  *       NULL if cyclic buffer requested
  * th: the interrupt threshold in records from the end of the buffer;
  *     -1 if no interrupt threshold is requested.
+ * flags: a bit-mask of the above flags
  */
 extern struct bts_tracer *ds_request_bts(struct task_struct *task,
 					 void *base, size_t size,
-					 bts_ovfl_callback_t ovfl, size_t th);
+					 bts_ovfl_callback_t ovfl,
+					 size_t th, unsigned int flags);
 extern struct pebs_tracer *ds_request_pebs(struct task_struct *task,
 					   void *base, size_t size,
 					   pebs_ovfl_callback_t ovfl,
-					   size_t th);
+					   size_t th, unsigned int flags);
 
 /*
  * Release BTS or PEBS resources
- *
- * Returns 0 on success; -Eerrno otherwise
+ * Suspend and resume BTS or PEBS tracing
  *
  * tracer: the tracer handle returned from ds_request_~()
  */
-extern int ds_release_bts(struct bts_tracer *tracer);
-extern int ds_release_pebs(struct pebs_tracer *tracer);
+extern void ds_release_bts(struct bts_tracer *tracer);
+extern void ds_suspend_bts(struct bts_tracer *tracer);
+extern void ds_resume_bts(struct bts_tracer *tracer);
+extern void ds_release_pebs(struct pebs_tracer *tracer);
+extern void ds_suspend_pebs(struct pebs_tracer *tracer);
+extern void ds_resume_pebs(struct pebs_tracer *tracer);
+
 
 /*
- * Get the (array) index of the write pointer.
- * (assuming an array of BTS/PEBS records)
- *
- * Returns 0 on success; -Eerrno on error
+ * The raw DS buffer state as it is used for BTS and PEBS recording.
  *
- * tracer: the tracer handle returned from ds_request_~()
- * pos (out): will hold the result
+ * This is the low-level, arch-dependent interface for working
+ * directly on the raw trace data.
  */
-extern int ds_get_bts_index(struct bts_tracer *tracer, size_t *pos);
-extern int ds_get_pebs_index(struct pebs_tracer *tracer, size_t *pos);
+struct ds_trace {
+	/* the number of bts/pebs records */
+	size_t n;
+	/* the size of a bts/pebs record in bytes */
+	size_t size;
+	/* pointers into the raw buffer:
+	   - to the first entry */
+	void *begin;
+	/* - one beyond the last entry */
+	void *end;
+	/* - one beyond the newest entry */
+	void *top;
+	/* - the interrupt threshold */
+	void *ith;
+	/* flags given on ds_request() */
+	unsigned int flags;
+};
 
 /*
- * Get the (array) index one record beyond the end of the array.
- * (assuming an array of BTS/PEBS records)
- *
- * Returns 0 on success; -Eerrno on error
- *
- * tracer: the tracer handle returned from ds_request_~()
- * pos (out): will hold the result
+ * An arch-independent view on branch trace data.
  */
-extern int ds_get_bts_end(struct bts_tracer *tracer, size_t *pos);
-extern int ds_get_pebs_end(struct pebs_tracer *tracer, size_t *pos);
+enum bts_qualifier {
+	bts_invalid,
+#define BTS_INVALID bts_invalid
+
+	bts_branch,
+#define BTS_BRANCH bts_branch
+
+	bts_task_arrives,
+#define BTS_TASK_ARRIVES bts_task_arrives
+
+	bts_task_departs,
+#define BTS_TASK_DEPARTS bts_task_departs
+
+	bts_qual_bit_size = 4,
+	bts_qual_max = (1 << bts_qual_bit_size),
+};
+
+struct bts_struct {
+	__u64 qualifier;
+	union {
+		/* BTS_BRANCH */
+		struct {
+			__u64 from;
+			__u64 to;
+		} lbr;
+		/* BTS_TASK_ARRIVES or BTS_TASK_DEPARTS */
+		struct {
+			__u64 jiffies;
+			pid_t pid;
+		} timestamp;
+	} variant;
+};
+
 
 /*
- * Provide a pointer to the BTS/PEBS record at parameter index.
- * (assuming an array of BTS/PEBS records)
- *
- * The pointer points directly into the buffer. The user is
- * responsible for copying the record.
- *
- * Returns the size of a single record on success; -Eerrno on error
+ * The BTS state.
  *
- * tracer: the tracer handle returned from ds_request_~()
- * index: the index of the requested record
- * record (out): pointer to the requested record
+ * This gives access to the raw DS state and adds functions to provide
+ * an arch-independent view of the BTS data.
  */
-extern int ds_access_bts(struct bts_tracer *tracer,
-			 size_t index, const void **record);
-extern int ds_access_pebs(struct pebs_tracer *tracer,
-			  size_t index, const void **record);
+struct bts_trace {
+	struct ds_trace ds;
+
+	int (*read)(struct bts_tracer *tracer, const void *at,
+		    struct bts_struct *out);
+	int (*write)(struct bts_tracer *tracer, const struct bts_struct *in);
+};
+
 
 /*
- * Write one or more BTS/PEBS records at the write pointer index and
- * advance the write pointer.
+ * The PEBS state.
  *
- * If size is not a multiple of the record size, trailing bytes are
- * zeroed out.
- *
- * May result in one or more overflow notifications.
- *
- * If called during overflow handling, that is, with index >=
- * interrupt threshold, the write will wrap around.
+ * This gives access to the raw DS state and the PEBS-specific counter
+ * reset value.
+ */
+struct pebs_trace {
+	struct ds_trace ds;
+
+	/* the PEBS reset value */
+	unsigned long long reset_value;
+};
+
+
+/*
+ * Read the BTS or PEBS trace.
  *
- * An overflow notification is given if and when the interrupt
- * threshold is reached during or after the write.
+ * Returns a view on the trace collected for the parameter tracer.
  *
- * Returns the number of bytes written or -Eerrno.
+ * The view remains valid as long as the traced task is not running or
+ * the tracer is suspended.
+ * Writes into the trace buffer are not reflected.
  *
  * tracer: the tracer handle returned from ds_request_~()
- * buffer: the buffer to write
- * size: the size of the buffer
  */
-extern int ds_write_bts(struct bts_tracer *tracer,
-			const void *buffer, size_t size);
-extern int ds_write_pebs(struct pebs_tracer *tracer,
-			 const void *buffer, size_t size);
+extern const struct bts_trace *ds_read_bts(struct bts_tracer *tracer);
+extern const struct pebs_trace *ds_read_pebs(struct pebs_tracer *tracer);
+
 
 /*
  * Reset the write pointer of the BTS/PEBS buffer.
@@ -155,27 +231,6 @@ extern int ds_write_pebs(struct pebs_tracer *tracer,
 extern int ds_reset_bts(struct bts_tracer *tracer);
 extern int ds_reset_pebs(struct pebs_tracer *tracer);
 
-/*
- * Clear the BTS/PEBS buffer and reset the write pointer.
- * The entire buffer will be zeroed out.
- *
- * Returns 0 on success; -Eerrno on error
- *
- * tracer: the tracer handle returned from ds_request_~()
- */
-extern int ds_clear_bts(struct bts_tracer *tracer);
-extern int ds_clear_pebs(struct pebs_tracer *tracer);
-
-/*
- * Provide the PEBS counter reset value.
- *
- * Returns 0 on success; -Eerrno on error
- *
- * tracer: the tracer handle returned from ds_request_pebs()
- * value (out): the counter reset value
- */
-extern int ds_get_pebs_reset(struct pebs_tracer *tracer, u64 *value);
-
 /*
  * Set the PEBS counter reset value.
  *
@@ -192,35 +247,17 @@ extern int ds_set_pebs_reset(struct pebs_tracer *tracer, u64 value);
 struct cpuinfo_x86;
 extern void __cpuinit ds_init_intel(struct cpuinfo_x86 *);
 
-
-
 /*
- * The DS context - part of struct thread_struct.
+ * Context switch work
  */
-#define MAX_SIZEOF_DS (12 * 8)
-
-struct ds_context {
-	/* pointer to the DS configuration; goes into MSR_IA32_DS_AREA */
-	unsigned char ds[MAX_SIZEOF_DS];
-	/* the owner of the BTS and PEBS configuration, respectively */
-	struct ds_tracer  *owner[2];
-	/* use count */
-	unsigned long count;
-	/* a pointer to the context location inside the thread_struct
-	 * or the per_cpu context array */
-	struct ds_context **this;
-	/* a pointer to the task owning this context, or NULL, if the
-	 * context is owned by a cpu */
-	struct task_struct *task;
-};
-
-/* called by exit_thread() to free leftover contexts */
-extern void ds_free(struct ds_context *context);
+extern void ds_switch_to(struct task_struct *prev, struct task_struct *next);
 
 #else /* CONFIG_X86_DS */
 
 struct cpuinfo_x86;
 static inline void __cpuinit ds_init_intel(struct cpuinfo_x86 *ignored) {}
+static inline void ds_switch_to(struct task_struct *prev,
+				struct task_struct *next) {}
 
 #endif /* CONFIG_X86_DS */
 #endif /* _ASM_X86_DS_H */
diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h
index 5ca01e383269..aa5914f8e501 100644
--- a/arch/x86/include/asm/processor.h
+++ b/arch/x86/include/asm/processor.h
@@ -752,6 +752,19 @@ extern void switch_to_new_gdt(void);
 extern void cpu_init(void);
 extern void init_gdt(int cpu);
 
+static inline unsigned long get_debugctlmsr(void)
+{
+    unsigned long debugctlmsr = 0;
+
+#ifndef CONFIG_X86_DEBUGCTLMSR
+	if (boot_cpu_data.x86 < 6)
+		return 0;
+#endif
+	rdmsrl(MSR_IA32_DEBUGCTLMSR, debugctlmsr);
+
+    return debugctlmsr;
+}
+
 static inline void update_debugctlmsr(unsigned long debugctlmsr)
 {
 #ifndef CONFIG_X86_DEBUGCTLMSR
diff --git a/arch/x86/include/asm/ptrace.h b/arch/x86/include/asm/ptrace.h
index eefb0594b058..fbf744215911 100644
--- a/arch/x86/include/asm/ptrace.h
+++ b/arch/x86/include/asm/ptrace.h
@@ -6,7 +6,6 @@
 #include <asm/processor-flags.h>
 
 #ifdef __KERNEL__
-#include <asm/ds.h>		/* the DS BTS struct is used for ptrace too */
 #include <asm/segment.h>
 #endif
 
@@ -128,34 +127,6 @@ struct pt_regs {
 #endif /* !__i386__ */
 
 
-#ifdef CONFIG_X86_PTRACE_BTS
-/* a branch trace record entry
- *
- * In order to unify the interface between various processor versions,
- * we use the below data structure for all processors.
- */
-enum bts_qualifier {
-	BTS_INVALID = 0,
-	BTS_BRANCH,
-	BTS_TASK_ARRIVES,
-	BTS_TASK_DEPARTS
-};
-
-struct bts_struct {
-	__u64 qualifier;
-	union {
-		/* BTS_BRANCH */
-		struct {
-			__u64 from_ip;
-			__u64 to_ip;
-		} lbr;
-		/* BTS_TASK_ARRIVES or
-		   BTS_TASK_DEPARTS */
-		__u64 jiffies;
-	} variant;
-};
-#endif /* CONFIG_X86_PTRACE_BTS */
-
 #ifdef __KERNEL__
 
 #include <linux/init.h>
@@ -163,13 +134,6 @@ struct bts_struct {
 struct cpuinfo_x86;
 struct task_struct;
 
-#ifdef CONFIG_X86_PTRACE_BTS
-extern void __cpuinit ptrace_bts_init_intel(struct cpuinfo_x86 *);
-extern void ptrace_bts_take_timestamp(struct task_struct *, enum bts_qualifier);
-#else
-#define ptrace_bts_init_intel(config) do {} while (0)
-#endif /* CONFIG_X86_PTRACE_BTS */
-
 extern unsigned long profile_pc(struct pt_regs *regs);
 
 extern unsigned long
diff --git a/arch/x86/include/asm/thread_info.h b/arch/x86/include/asm/thread_info.h
index 0921b4018c11..bf8113d16a33 100644
--- a/arch/x86/include/asm/thread_info.h
+++ b/arch/x86/include/asm/thread_info.h
@@ -93,7 +93,6 @@ struct thread_info {
 #define TIF_FORCED_TF		24	/* true if TF in eflags artificially */
 #define TIF_DEBUGCTLMSR		25	/* uses thread_struct.debugctlmsr */
 #define TIF_DS_AREA_MSR		26      /* uses thread_struct.ds_area_msr */
-#define TIF_BTS_TRACE_TS	27      /* record scheduling event timestamps */
 
 #define _TIF_SYSCALL_TRACE	(1 << TIF_SYSCALL_TRACE)
 #define _TIF_NOTIFY_RESUME	(1 << TIF_NOTIFY_RESUME)
@@ -115,7 +114,6 @@ struct thread_info {
 #define _TIF_FORCED_TF		(1 << TIF_FORCED_TF)
 #define _TIF_DEBUGCTLMSR	(1 << TIF_DEBUGCTLMSR)
 #define _TIF_DS_AREA_MSR	(1 << TIF_DS_AREA_MSR)
-#define _TIF_BTS_TRACE_TS	(1 << TIF_BTS_TRACE_TS)
 
 /* work to do in syscall_trace_enter() */
 #define _TIF_WORK_SYSCALL_ENTRY	\
@@ -141,8 +139,7 @@ struct thread_info {
 
 /* flags to check in __switch_to() */
 #define _TIF_WORK_CTXSW							\
-	(_TIF_IO_BITMAP|_TIF_DEBUGCTLMSR|_TIF_DS_AREA_MSR|_TIF_BTS_TRACE_TS| \
-								_TIF_NOTSC)
+	(_TIF_IO_BITMAP|_TIF_DEBUGCTLMSR|_TIF_DS_AREA_MSR|_TIF_NOTSC)
 
 #define _TIF_WORK_CTXSW_PREV _TIF_WORK_CTXSW
 #define _TIF_WORK_CTXSW_NEXT (_TIF_WORK_CTXSW|_TIF_DEBUG)
diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c
index 816f27f289b1..cd413d9a0218 100644
--- a/arch/x86/kernel/cpu/intel.c
+++ b/arch/x86/kernel/cpu/intel.c
@@ -11,7 +11,6 @@
 #include <asm/pgtable.h>
 #include <asm/msr.h>
 #include <asm/uaccess.h>
-#include <asm/ptrace.h>
 #include <asm/ds.h>
 #include <asm/bugs.h>
 
@@ -309,9 +308,6 @@ static void __cpuinit init_intel(struct cpuinfo_x86 *c)
 		set_cpu_cap(c, X86_FEATURE_P3);
 #endif
 
-	if (cpu_has_bts)
-		ptrace_bts_init_intel(c);
-
 	detect_extended_topology(c);
 	if (!cpu_has(c, X86_FEATURE_XTOPOLOGY)) {
 		/*
diff --git a/arch/x86/kernel/ds.c b/arch/x86/kernel/ds.c
index 095306988667..f0583005b75e 100644
--- a/arch/x86/kernel/ds.c
+++ b/arch/x86/kernel/ds.c
@@ -6,13 +6,13 @@
  * precise-event based sampling (PEBS).
  *
  * It manages:
- * - per-thread and per-cpu allocation of BTS and PEBS
+ * - DS and BTS hardware configuration
  * - buffer overflow handling (to be done)
  * - buffer access
  *
- * It assumes:
- * - get_task_struct on all traced tasks
- * - current is allowed to trace tasks
+ * It does not do:
+ * - security checking (is the caller allowed to trace the task)
+ * - buffer allocation (memory accounting)
  *
  *
  * Copyright (C) 2007-2008 Intel Corporation.
@@ -34,15 +34,30 @@
  * The configuration for a particular DS hardware implementation.
  */
 struct ds_configuration {
-	/* the size of the DS structure in bytes */
-	unsigned char  sizeof_ds;
-	/* the size of one pointer-typed field in the DS structure in bytes;
-	   this covers the first 8 fields related to buffer management. */
+	/* the name of the configuration */
+	const char *name;
+	/* the size of one pointer-typed field in the DS structure and
+	   in the BTS and PEBS buffers in bytes;
+	   this covers the first 8 DS fields related to buffer management. */
 	unsigned char  sizeof_field;
 	/* the size of a BTS/PEBS record in bytes */
 	unsigned char  sizeof_rec[2];
+	/* a series of bit-masks to control various features indexed
+	 * by enum ds_feature */
+	unsigned long ctl[dsf_ctl_max];
 };
-static struct ds_configuration ds_cfg;
+static DEFINE_PER_CPU(struct ds_configuration, ds_cfg_array);
+
+#define ds_cfg per_cpu(ds_cfg_array, smp_processor_id())
+
+#define MAX_SIZEOF_DS (12 * 8)	/* maximal size of a DS configuration */
+#define MAX_SIZEOF_BTS (3 * 8)	/* maximal size of a BTS record */
+#define DS_ALIGNMENT (1 << 3)	/* BTS and PEBS buffer alignment */
+
+#define BTS_CONTROL \
+ (ds_cfg.ctl[dsf_bts] | ds_cfg.ctl[dsf_bts_kernel] | ds_cfg.ctl[dsf_bts_user] |\
+  ds_cfg.ctl[dsf_bts_overflow])
+
 
 /*
  * A BTS or PEBS tracer.
@@ -61,6 +76,8 @@ struct ds_tracer {
 struct bts_tracer {
 	/* the common DS part */
 	struct ds_tracer ds;
+	/* the trace including the DS configuration */
+	struct bts_trace trace;
 	/* buffer overflow notification function */
 	bts_ovfl_callback_t ovfl;
 };
@@ -68,6 +85,8 @@ struct bts_tracer {
 struct pebs_tracer {
 	/* the common DS part */
 	struct ds_tracer ds;
+	/* the trace including the DS configuration */
+	struct pebs_trace trace;
 	/* buffer overflow notification function */
 	pebs_ovfl_callback_t ovfl;
 };
@@ -134,13 +153,11 @@ static inline void ds_set(unsigned char *base, enum ds_qualifier qual,
 	(*(unsigned long *)base) = value;
 }
 
-#define DS_ALIGNMENT (1 << 3)	/* BTS and PEBS buffer alignment */
-
 
 /*
  * Locking is done only for allocating BTS or PEBS resources.
  */
-static spinlock_t ds_lock = __SPIN_LOCK_UNLOCKED(ds_lock);
+static DEFINE_SPINLOCK(ds_lock);
 
 
 /*
@@ -156,27 +173,32 @@ static spinlock_t ds_lock = __SPIN_LOCK_UNLOCKED(ds_lock);
  *   >0  number of per-thread tracers
  *   <0  number of per-cpu tracers
  *
- * The below functions to get and put tracers and to check the
- * allocation type require the ds_lock to be held by the caller.
- *
  * Tracers essentially gives the number of ds contexts for a certain
  * type of allocation.
  */
-static long tracers;
+static atomic_t tracers = ATOMIC_INIT(0);
 
 static inline void get_tracer(struct task_struct *task)
 {
-	tracers += (task ? 1 : -1);
+	if (task)
+		atomic_inc(&tracers);
+	else
+		atomic_dec(&tracers);
 }
 
 static inline void put_tracer(struct task_struct *task)
 {
-	tracers -= (task ? 1 : -1);
+	if (task)
+		atomic_dec(&tracers);
+	else
+		atomic_inc(&tracers);
 }
 
 static inline int check_tracer(struct task_struct *task)
 {
-	return (task ? (tracers >= 0) : (tracers <= 0));
+	return task ?
+		(atomic_read(&tracers) >= 0) :
+		(atomic_read(&tracers) <= 0);
 }
 
 
@@ -190,14 +212,30 @@ static inline int check_tracer(struct task_struct *task)
  * Contexts are use-counted. They are allocated on first access and
  * deallocated when the last user puts the context.
  */
-static DEFINE_PER_CPU(struct ds_context *, system_context);
+struct ds_context {
+	/* pointer to the DS configuration; goes into MSR_IA32_DS_AREA */
+	unsigned char ds[MAX_SIZEOF_DS];
+	/* the owner of the BTS and PEBS configuration, respectively */
+	struct bts_tracer *bts_master;
+	struct pebs_tracer *pebs_master;
+	/* use count */
+	unsigned long count;
+	/* a pointer to the context location inside the thread_struct
+	 * or the per_cpu context array */
+	struct ds_context **this;
+	/* a pointer to the task owning this context, or NULL, if the
+	 * context is owned by a cpu */
+	struct task_struct *task;
+};
+
+static DEFINE_PER_CPU(struct ds_context *, system_context_array);
 
-#define this_system_context per_cpu(system_context, smp_processor_id())
+#define system_context per_cpu(system_context_array, smp_processor_id())
 
 static inline struct ds_context *ds_get_context(struct task_struct *task)
 {
 	struct ds_context **p_context =
-		(task ? &task->thread.ds_ctx : &this_system_context);
+		(task ? &task->thread.ds_ctx : &system_context);
 	struct ds_context *context = *p_context;
 	unsigned long irq;
 
@@ -225,10 +263,22 @@ static inline struct ds_context *ds_get_context(struct task_struct *task)
 				wrmsrl(MSR_IA32_DS_AREA,
 				       (unsigned long)context->ds);
 		}
+
+		context->count++;
+
+		spin_unlock_irqrestore(&ds_lock, irq);
+	} else {
+		spin_lock_irqsave(&ds_lock, irq);
+
+		context = *p_context;
+		if (context)
+			context->count++;
+
 		spin_unlock_irqrestore(&ds_lock, irq);
-	}
 
-	context->count++;
+		if (!context)
+			context = ds_get_context(task);
+	}
 
 	return context;
 }
@@ -242,8 +292,10 @@ static inline void ds_put_context(struct ds_context *context)
 
 	spin_lock_irqsave(&ds_lock, irq);
 
-	if (--context->count)
-		goto out;
+	if (--context->count) {
+		spin_unlock_irqrestore(&ds_lock, irq);
+		return;
+	}
 
 	*(context->this) = NULL;
 
@@ -253,14 +305,14 @@ static inline void ds_put_context(struct ds_context *context)
 	if (!context->task || (context->task == current))
 		wrmsrl(MSR_IA32_DS_AREA, 0);
 
-	kfree(context);
- out:
 	spin_unlock_irqrestore(&ds_lock, irq);
+
+	kfree(context);
 }
 
 
 /*
- * Handle a buffer overflow
+ * Call the tracer's callback on a buffer overflow.
  *
  * context: the ds context
  * qual: the buffer type
@@ -268,30 +320,244 @@ static inline void ds_put_context(struct ds_context *context)
 static void ds_overflow(struct ds_context *context, enum ds_qualifier qual)
 {
 	switch (qual) {
-	case ds_bts: {
-		struct bts_tracer *tracer =
-			container_of(context->owner[qual],
-				     struct bts_tracer, ds);
-		if (tracer->ovfl)
-			tracer->ovfl(tracer);
-	}
+	case ds_bts:
+		if (context->bts_master &&
+		    context->bts_master->ovfl)
+			context->bts_master->ovfl(context->bts_master);
+		break;
+	case ds_pebs:
+		if (context->pebs_master &&
+		    context->pebs_master->ovfl)
+			context->pebs_master->ovfl(context->pebs_master);
 		break;
-	case ds_pebs: {
-		struct pebs_tracer *tracer =
-			container_of(context->owner[qual],
-				     struct pebs_tracer, ds);
-		if (tracer->ovfl)
-			tracer->ovfl(tracer);
 	}
+}
+
+
+/*
+ * Write raw data into the BTS or PEBS buffer.
+ *
+ * The remainder of any partially written record is zeroed out.
+ *
+ * context: the DS context
+ * qual: the buffer type
+ * record: the data to write
+ * size: the size of the data
+ */
+static int ds_write(struct ds_context *context, enum ds_qualifier qual,
+		    const void *record, size_t size)
+{
+	int bytes_written = 0;
+
+	if (!record)
+		return -EINVAL;
+
+	while (size) {
+		unsigned long base, index, end, write_end, int_th;
+		unsigned long write_size, adj_write_size;
+
+		/*
+		 * write as much as possible without producing an
+		 * overflow interrupt.
+		 *
+		 * interrupt_threshold must either be
+		 * - bigger than absolute_maximum or
+		 * - point to a record between buffer_base and absolute_maximum
+		 *
+		 * index points to a valid record.
+		 */
+		base   = ds_get(context->ds, qual, ds_buffer_base);
+		index  = ds_get(context->ds, qual, ds_index);
+		end    = ds_get(context->ds, qual, ds_absolute_maximum);
+		int_th = ds_get(context->ds, qual, ds_interrupt_threshold);
+
+		write_end = min(end, int_th);
+
+		/* if we are already beyond the interrupt threshold,
+		 * we fill the entire buffer */
+		if (write_end <= index)
+			write_end = end;
+
+		if (write_end <= index)
+			break;
+
+		write_size = min((unsigned long) size, write_end - index);
+		memcpy((void *)index, record, write_size);
+
+		record = (const char *)record + write_size;
+		size -= write_size;
+		bytes_written += write_size;
+
+		adj_write_size = write_size / ds_cfg.sizeof_rec[qual];
+		adj_write_size *= ds_cfg.sizeof_rec[qual];
+
+		/* zero out trailing bytes */
+		memset((char *)index + write_size, 0,
+		       adj_write_size - write_size);
+		index += adj_write_size;
+
+		if (index >= end)
+			index = base;
+		ds_set(context->ds, qual, ds_index, index);
+
+		if (index >= int_th)
+			ds_overflow(context, qual);
+	}
+
+	return bytes_written;
+}
+
+
+/*
+ * Branch Trace Store (BTS) uses the following format. Different
+ * architectures vary in the size of those fields.
+ * - source linear address
+ * - destination linear address
+ * - flags
+ *
+ * Later architectures use 64bit pointers throughout, whereas earlier
+ * architectures use 32bit pointers in 32bit mode.
+ *
+ * We compute the base address for the first 8 fields based on:
+ * - the field size stored in the DS configuration
+ * - the relative field position
+ *
+ * In order to store additional information in the BTS buffer, we use
+ * a special source address to indicate that the record requires
+ * special interpretation.
+ *
+ * Netburst indicated via a bit in the flags field whether the branch
+ * was predicted; this is ignored.
+ *
+ * We use two levels of abstraction:
+ * - the raw data level defined here
+ * - an arch-independent level defined in ds.h
+ */
+
+enum bts_field {
+	bts_from,
+	bts_to,
+	bts_flags,
+
+	bts_qual = bts_from,
+	bts_jiffies = bts_to,
+	bts_pid = bts_flags,
+
+	bts_qual_mask = (bts_qual_max - 1),
+	bts_escape = ((unsigned long)-1 & ~bts_qual_mask)
+};
+
+static inline unsigned long bts_get(const char *base, enum bts_field field)
+{
+	base += (ds_cfg.sizeof_field * field);
+	return *(unsigned long *)base;
+}
+
+static inline void bts_set(char *base, enum bts_field field, unsigned long val)
+{
+	base += (ds_cfg.sizeof_field * field);;
+	(*(unsigned long *)base) = val;
+}
+
+
+/*
+ * The raw BTS data is architecture dependent.
+ *
+ * For higher-level users, we give an arch-independent view.
+ * - ds.h defines struct bts_struct
+ * - bts_read translates one raw bts record into a bts_struct
+ * - bts_write translates one bts_struct into the raw format and
+ *   writes it into the top of the parameter tracer's buffer.
+ *
+ * return: bytes read/written on success; -Eerrno, otherwise
+ */
+static int bts_read(struct bts_tracer *tracer, const void *at,
+		    struct bts_struct *out)
+{
+	if (!tracer)
+		return -EINVAL;
+
+	if (at < tracer->trace.ds.begin)
+		return -EINVAL;
+
+	if (tracer->trace.ds.end < (at + tracer->trace.ds.size))
+		return -EINVAL;
+
+	memset(out, 0, sizeof(*out));
+	if ((bts_get(at, bts_qual) & ~bts_qual_mask) == bts_escape) {
+		out->qualifier = (bts_get(at, bts_qual) & bts_qual_mask);
+		out->variant.timestamp.jiffies = bts_get(at, bts_jiffies);
+		out->variant.timestamp.pid = bts_get(at, bts_pid);
+	} else {
+		out->qualifier = bts_branch;
+		out->variant.lbr.from = bts_get(at, bts_from);
+		out->variant.lbr.to   = bts_get(at, bts_to);
+	}
+
+	return ds_cfg.sizeof_rec[ds_bts];
+}
+
+static int bts_write(struct bts_tracer *tracer, const struct bts_struct *in)
+{
+	unsigned char raw[MAX_SIZEOF_BTS];
+
+	if (!tracer)
+		return -EINVAL;
+
+	if (MAX_SIZEOF_BTS < ds_cfg.sizeof_rec[ds_bts])
+		return -EOVERFLOW;
+
+	switch (in->qualifier) {
+	case bts_invalid:
+		bts_set(raw, bts_from, 0);
+		bts_set(raw, bts_to, 0);
+		bts_set(raw, bts_flags, 0);
+		break;
+	case bts_branch:
+		bts_set(raw, bts_from, in->variant.lbr.from);
+		bts_set(raw, bts_to,   in->variant.lbr.to);
+		bts_set(raw, bts_flags, 0);
+		break;
+	case bts_task_arrives:
+	case bts_task_departs:
+		bts_set(raw, bts_qual, (bts_escape | in->qualifier));
+		bts_set(raw, bts_jiffies, in->variant.timestamp.jiffies);
+		bts_set(raw, bts_pid, in->variant.timestamp.pid);
 		break;
+	default:
+		return -EINVAL;
 	}
+
+	return ds_write(tracer->ds.context, ds_bts, raw,
+			ds_cfg.sizeof_rec[ds_bts]);
 }
 
 
-static void ds_install_ds_config(struct ds_context *context,
-				 enum ds_qualifier qual,
-				 void *base, size_t size, size_t ith)
+static void ds_write_config(struct ds_context *context,
+			    struct ds_trace *cfg, enum ds_qualifier qual)
+{
+	unsigned char *ds = context->ds;
+
+	ds_set(ds, qual, ds_buffer_base, (unsigned long)cfg->begin);
+	ds_set(ds, qual, ds_index, (unsigned long)cfg->top);
+	ds_set(ds, qual, ds_absolute_maximum, (unsigned long)cfg->end);
+	ds_set(ds, qual, ds_interrupt_threshold, (unsigned long)cfg->ith);
+}
+
+static void ds_read_config(struct ds_context *context,
+			   struct ds_trace *cfg, enum ds_qualifier qual)
 {
+	unsigned char *ds = context->ds;
+
+	cfg->begin = (void *)ds_get(ds, qual, ds_buffer_base);
+	cfg->top = (void *)ds_get(ds, qual, ds_index);
+	cfg->end = (void *)ds_get(ds, qual, ds_absolute_maximum);
+	cfg->ith = (void *)ds_get(ds, qual, ds_interrupt_threshold);
+}
+
+static void ds_init_ds_trace(struct ds_trace *trace, enum ds_qualifier qual,
+			     void *base, size_t size, size_t ith,
+			     unsigned int flags) {
 	unsigned long buffer, adj;
 
 	/* adjust the buffer address and size to meet alignment
@@ -308,32 +574,30 @@ static void ds_install_ds_config(struct ds_context *context,
 	buffer += adj;
 	size   -= adj;
 
-	size /= ds_cfg.sizeof_rec[qual];
-	size *= ds_cfg.sizeof_rec[qual];
+	trace->n = size / ds_cfg.sizeof_rec[qual];
+	trace->size = ds_cfg.sizeof_rec[qual];
 
-	ds_set(context->ds, qual, ds_buffer_base, buffer);
-	ds_set(context->ds, qual, ds_index, buffer);
-	ds_set(context->ds, qual, ds_absolute_maximum, buffer + size);
+	size = (trace->n * trace->size);
 
+	trace->begin = (void *)buffer;
+	trace->top = trace->begin;
+	trace->end = (void *)(buffer + size);
 	/* The value for 'no threshold' is -1, which will set the
 	 * threshold outside of the buffer, just like we want it.
 	 */
-	ds_set(context->ds, qual,
-	       ds_interrupt_threshold, buffer + size - ith);
+	trace->ith = (void *)(buffer + size - ith);
+
+	trace->flags = flags;
 }
 
-static int ds_request(struct ds_tracer *tracer, enum ds_qualifier qual,
-		      struct task_struct *task,
-		      void *base, size_t size, size_t th)
+
+static int ds_request(struct ds_tracer *tracer, struct ds_trace *trace,
+		      enum ds_qualifier qual, struct task_struct *task,
+		      void *base, size_t size, size_t th, unsigned int flags)
 {
 	struct ds_context *context;
-	unsigned long irq;
 	int error;
 
-	error = -EOPNOTSUPP;
-	if (!ds_cfg.sizeof_ds)
-		goto out;
-
 	error = -EINVAL;
 	if (!base)
 		goto out;
@@ -360,43 +624,26 @@ static int ds_request(struct ds_tracer *tracer, enum ds_qualifier qual,
 		goto out;
 	tracer->context = context;
 
+	ds_init_ds_trace(trace, qual, base, size, th, flags);
 
-	spin_lock_irqsave(&ds_lock, irq);
-
-	error = -EPERM;
-	if (!check_tracer(task))
-		goto out_unlock;
-	get_tracer(task);
-
-	error = -EPERM;
-	if (context->owner[qual])
-		goto out_put_tracer;
-	context->owner[qual] = tracer;
-
-	spin_unlock_irqrestore(&ds_lock, irq);
-
-
-	ds_install_ds_config(context, qual, base, size, th);
-
-	return 0;
-
- out_put_tracer:
-	put_tracer(task);
- out_unlock:
-	spin_unlock_irqrestore(&ds_lock, irq);
-	ds_put_context(context);
-	tracer->context = NULL;
+	error = 0;
  out:
 	return error;
 }
 
 struct bts_tracer *ds_request_bts(struct task_struct *task,
 				  void *base, size_t size,
-				  bts_ovfl_callback_t ovfl, size_t th)
+				  bts_ovfl_callback_t ovfl, size_t th,
+				  unsigned int flags)
 {
 	struct bts_tracer *tracer;
+	unsigned long irq;
 	int error;
 
+	error = -EOPNOTSUPP;
+	if (!ds_cfg.ctl[dsf_bts])
+		goto out;
+
 	/* buffer overflow notification is not yet implemented */
 	error = -EOPNOTSUPP;
 	if (ovfl)
@@ -408,12 +655,40 @@ struct bts_tracer *ds_request_bts(struct task_struct *task,
 		goto out;
 	tracer->ovfl = ovfl;
 
-	error = ds_request(&tracer->ds, ds_bts, task, base, size, th);
+	error = ds_request(&tracer->ds, &tracer->trace.ds,
+			   ds_bts, task, base, size, th, flags);
 	if (error < 0)
 		goto out_tracer;
 
+
+	spin_lock_irqsave(&ds_lock, irq);
+
+	error = -EPERM;
+	if (!check_tracer(task))
+		goto out_unlock;
+	get_tracer(task);
+
+	error = -EPERM;
+	if (tracer->ds.context->bts_master)
+		goto out_put_tracer;
+	tracer->ds.context->bts_master = tracer;
+
+	spin_unlock_irqrestore(&ds_lock, irq);
+
+
+	tracer->trace.read  = bts_read;
+	tracer->trace.write = bts_write;
+
+	ds_write_config(tracer->ds.context, &tracer->trace.ds, ds_bts);
+	ds_resume_bts(tracer);
+
 	return tracer;
 
+ out_put_tracer:
+	put_tracer(task);
+ out_unlock:
+	spin_unlock_irqrestore(&ds_lock, irq);
+	ds_put_context(tracer->ds.context);
  out_tracer:
 	kfree(tracer);
  out:
@@ -422,9 +697,11 @@ struct bts_tracer *ds_request_bts(struct task_struct *task,
 
 struct pebs_tracer *ds_request_pebs(struct task_struct *task,
 				    void *base, size_t size,
-				    pebs_ovfl_callback_t ovfl, size_t th)
+				    pebs_ovfl_callback_t ovfl, size_t th,
+				    unsigned int flags)
 {
 	struct pebs_tracer *tracer;
+	unsigned long irq;
 	int error;
 
 	/* buffer overflow notification is not yet implemented */
@@ -438,300 +715,171 @@ struct pebs_tracer *ds_request_pebs(struct task_struct *task,
 		goto out;
 	tracer->ovfl = ovfl;
 
-	error = ds_request(&tracer->ds, ds_pebs, task, base, size, th);
+	error = ds_request(&tracer->ds, &tracer->trace.ds,
+			   ds_pebs, task, base, size, th, flags);
 	if (error < 0)
 		goto out_tracer;
 
+	spin_lock_irqsave(&ds_lock, irq);
+
+	error = -EPERM;
+	if (!check_tracer(task))
+		goto out_unlock;
+	get_tracer(task);
+
+	error = -EPERM;
+	if (tracer->ds.context->pebs_master)
+		goto out_put_tracer;
+	tracer->ds.context->pebs_master = tracer;
+
+	spin_unlock_irqrestore(&ds_lock, irq);
+
+	ds_write_config(tracer->ds.context, &tracer->trace.ds, ds_bts);
+	ds_resume_pebs(tracer);
+
 	return tracer;
 
+ out_put_tracer:
+	put_tracer(task);
+ out_unlock:
+	spin_unlock_irqrestore(&ds_lock, irq);
+	ds_put_context(tracer->ds.context);
  out_tracer:
 	kfree(tracer);
  out:
 	return ERR_PTR(error);
 }
 
-static void ds_release(struct ds_tracer *tracer, enum ds_qualifier qual)
-{
-	WARN_ON_ONCE(tracer->context->owner[qual] != tracer);
-	tracer->context->owner[qual] = NULL;
-
-	put_tracer(tracer->context->task);
-	ds_put_context(tracer->context);
-}
-
-int ds_release_bts(struct bts_tracer *tracer)
+void ds_release_bts(struct bts_tracer *tracer)
 {
 	if (!tracer)
-		return -EINVAL;
+		return;
 
-	ds_release(&tracer->ds, ds_bts);
-	kfree(tracer);
+	ds_suspend_bts(tracer);
 
-	return 0;
-}
+	WARN_ON_ONCE(tracer->ds.context->bts_master != tracer);
+	tracer->ds.context->bts_master = NULL;
 
-int ds_release_pebs(struct pebs_tracer *tracer)
-{
-	if (!tracer)
-		return -EINVAL;
+	put_tracer(tracer->ds.context->task);
+	ds_put_context(tracer->ds.context);
 
-	ds_release(&tracer->ds, ds_pebs);
 	kfree(tracer);
-
-	return 0;
-}
-
-static size_t ds_get_index(struct ds_context *context, enum ds_qualifier qual)
-{
-	unsigned long base, index;
-
-	base  = ds_get(context->ds, qual, ds_buffer_base);
-	index = ds_get(context->ds, qual, ds_index);
-
-	return (index - base) / ds_cfg.sizeof_rec[qual];
 }
 
-int ds_get_bts_index(struct bts_tracer *tracer, size_t *pos)
+void ds_suspend_bts(struct bts_tracer *tracer)
 {
-	if (!tracer)
-		return -EINVAL;
+	struct task_struct *task;
 
-	if (!pos)
-		return -EINVAL;
-
-	*pos = ds_get_index(tracer->ds.context, ds_bts);
-
-	return 0;
-}
-
-int ds_get_pebs_index(struct pebs_tracer *tracer, size_t *pos)
-{
 	if (!tracer)
-		return -EINVAL;
+		return;
 
-	if (!pos)
-		return -EINVAL;
+	task = tracer->ds.context->task;
 
-	*pos = ds_get_index(tracer->ds.context, ds_pebs);
+	if (!task || (task == current))
+		update_debugctlmsr(get_debugctlmsr() & ~BTS_CONTROL);
 
-	return 0;
-}
+	if (task) {
+		task->thread.debugctlmsr &= ~BTS_CONTROL;
 
-static size_t ds_get_end(struct ds_context *context, enum ds_qualifier qual)
-{
-	unsigned long base, max;
-
-	base = ds_get(context->ds, qual, ds_buffer_base);
-	max  = ds_get(context->ds, qual, ds_absolute_maximum);
-
-	return (max - base) / ds_cfg.sizeof_rec[qual];
+		if (!task->thread.debugctlmsr)
+			clear_tsk_thread_flag(task, TIF_DEBUGCTLMSR);
+	}
 }
 
-int ds_get_bts_end(struct bts_tracer *tracer, size_t *pos)
+void ds_resume_bts(struct bts_tracer *tracer)
 {
-	if (!tracer)
-		return -EINVAL;
-
-	if (!pos)
-		return -EINVAL;
-
-	*pos = ds_get_end(tracer->ds.context, ds_bts);
-
-	return 0;
-}
+	struct task_struct *task;
+	unsigned long control;
 
-int ds_get_pebs_end(struct pebs_tracer *tracer, size_t *pos)
-{
 	if (!tracer)
-		return -EINVAL;
-
-	if (!pos)
-		return -EINVAL;
-
-	*pos = ds_get_end(tracer->ds.context, ds_pebs);
-
-	return 0;
-}
-
-static int ds_access(struct ds_context *context, enum ds_qualifier qual,
-		     size_t index, const void **record)
-{
-	unsigned long base, idx;
-
-	if (!record)
-		return -EINVAL;
-
-	base = ds_get(context->ds, qual, ds_buffer_base);
-	idx = base + (index * ds_cfg.sizeof_rec[qual]);
-
-	if (idx > ds_get(context->ds, qual, ds_absolute_maximum))
-		return -EINVAL;
+		return;
 
-	*record = (const void *)idx;
+	task = tracer->ds.context->task;
 
-	return ds_cfg.sizeof_rec[qual];
-}
+	control = ds_cfg.ctl[dsf_bts];
+	if (!(tracer->trace.ds.flags & BTS_KERNEL))
+		control |= ds_cfg.ctl[dsf_bts_kernel];
+	if (!(tracer->trace.ds.flags & BTS_USER))
+		control |= ds_cfg.ctl[dsf_bts_user];
 
-int ds_access_bts(struct bts_tracer *tracer, size_t index,
-		  const void **record)
-{
-	if (!tracer)
-		return -EINVAL;
+	if (task) {
+		task->thread.debugctlmsr |= control;
+		set_tsk_thread_flag(task, TIF_DEBUGCTLMSR);
+	}
 
-	return ds_access(tracer->ds.context, ds_bts, index, record);
+	if (!task || (task == current))
+		update_debugctlmsr(get_debugctlmsr() | control);
 }
 
-int ds_access_pebs(struct pebs_tracer *tracer, size_t index,
-		   const void **record)
+void ds_release_pebs(struct pebs_tracer *tracer)
 {
 	if (!tracer)
-		return -EINVAL;
-
-	return ds_access(tracer->ds.context, ds_pebs, index, record);
-}
-
-static int ds_write(struct ds_context *context, enum ds_qualifier qual,
-		    const void *record, size_t size)
-{
-	int bytes_written = 0;
-
-	if (!record)
-		return -EINVAL;
-
-	while (size) {
-		unsigned long base, index, end, write_end, int_th;
-		unsigned long write_size, adj_write_size;
-
-		/*
-		 * write as much as possible without producing an
-		 * overflow interrupt.
-		 *
-		 * interrupt_threshold must either be
-		 * - bigger than absolute_maximum or
-		 * - point to a record between buffer_base and absolute_maximum
-		 *
-		 * index points to a valid record.
-		 */
-		base   = ds_get(context->ds, qual, ds_buffer_base);
-		index  = ds_get(context->ds, qual, ds_index);
-		end    = ds_get(context->ds, qual, ds_absolute_maximum);
-		int_th = ds_get(context->ds, qual, ds_interrupt_threshold);
-
-		write_end = min(end, int_th);
-
-		/* if we are already beyond the interrupt threshold,
-		 * we fill the entire buffer */
-		if (write_end <= index)
-			write_end = end;
-
-		if (write_end <= index)
-			break;
-
-		write_size = min((unsigned long) size, write_end - index);
-		memcpy((void *)index, record, write_size);
-
-		record = (const char *)record + write_size;
-		size -= write_size;
-		bytes_written += write_size;
-
-		adj_write_size = write_size / ds_cfg.sizeof_rec[qual];
-		adj_write_size *= ds_cfg.sizeof_rec[qual];
-
-		/* zero out trailing bytes */
-		memset((char *)index + write_size, 0,
-		       adj_write_size - write_size);
-		index += adj_write_size;
+		return;
 
-		if (index >= end)
-			index = base;
-		ds_set(context->ds, qual, ds_index, index);
+	ds_suspend_pebs(tracer);
 
-		if (index >= int_th)
-			ds_overflow(context, qual);
-	}
+	WARN_ON_ONCE(tracer->ds.context->pebs_master != tracer);
+	tracer->ds.context->pebs_master = NULL;
 
-	return bytes_written;
-}
+	put_tracer(tracer->ds.context->task);
+	ds_put_context(tracer->ds.context);
 
-int ds_write_bts(struct bts_tracer *tracer, const void *record, size_t size)
-{
-	if (!tracer)
-		return -EINVAL;
-
-	return ds_write(tracer->ds.context, ds_bts, record, size);
+	kfree(tracer);
 }
 
-int ds_write_pebs(struct pebs_tracer *tracer, const void *record, size_t size)
+void ds_suspend_pebs(struct pebs_tracer *tracer)
 {
-	if (!tracer)
-		return -EINVAL;
 
-	return ds_write(tracer->ds.context, ds_pebs, record, size);
 }
 
-static void ds_reset_or_clear(struct ds_context *context,
-			      enum ds_qualifier qual, int clear)
+void ds_resume_pebs(struct pebs_tracer *tracer)
 {
-	unsigned long base, end;
-
-	base = ds_get(context->ds, qual, ds_buffer_base);
-	end  = ds_get(context->ds, qual, ds_absolute_maximum);
-
-	if (clear)
-		memset((void *)base, 0, end - base);
 
-	ds_set(context->ds, qual, ds_index, base);
 }
 
-int ds_reset_bts(struct bts_tracer *tracer)
+const struct bts_trace *ds_read_bts(struct bts_tracer *tracer)
 {
 	if (!tracer)
-		return -EINVAL;
-
-	ds_reset_or_clear(tracer->ds.context, ds_bts, /* clear = */ 0);
+		return NULL;
 
-	return 0;
+	ds_read_config(tracer->ds.context, &tracer->trace.ds, ds_bts);
+	return &tracer->trace;
 }
 
-int ds_reset_pebs(struct pebs_tracer *tracer)
+const struct pebs_trace *ds_read_pebs(struct pebs_tracer *tracer)
 {
 	if (!tracer)
-		return -EINVAL;
+		return NULL;
 
-	ds_reset_or_clear(tracer->ds.context, ds_pebs, /* clear = */ 0);
+	ds_read_config(tracer->ds.context, &tracer->trace.ds, ds_pebs);
+	tracer->trace.reset_value =
+		*(u64 *)(tracer->ds.context->ds + (ds_cfg.sizeof_field * 8));
 
-	return 0;
+	return &tracer->trace;
 }
 
-int ds_clear_bts(struct bts_tracer *tracer)
+int ds_reset_bts(struct bts_tracer *tracer)
 {
 	if (!tracer)
 		return -EINVAL;
 
-	ds_reset_or_clear(tracer->ds.context, ds_bts, /* clear = */ 1);
-
-	return 0;
-}
-
-int ds_clear_pebs(struct pebs_tracer *tracer)
-{
-	if (!tracer)
-		return -EINVAL;
+	tracer->trace.ds.top = tracer->trace.ds.begin;
 
-	ds_reset_or_clear(tracer->ds.context, ds_pebs, /* clear = */ 1);
+	ds_set(tracer->ds.context->ds, ds_bts, ds_index,
+	       (unsigned long)tracer->trace.ds.top);
 
 	return 0;
 }
 
-int ds_get_pebs_reset(struct pebs_tracer *tracer, u64 *value)
+int ds_reset_pebs(struct pebs_tracer *tracer)
 {
 	if (!tracer)
 		return -EINVAL;
 
-	if (!value)
-		return -EINVAL;
+	tracer->trace.ds.top = tracer->trace.ds.begin;
 
-	*value = *(u64 *)(tracer->ds.context->ds + (ds_cfg.sizeof_field * 8));
+	ds_set(tracer->ds.context->ds, ds_bts, ds_index,
+	       (unsigned long)tracer->trace.ds.top);
 
 	return 0;
 }
@@ -746,35 +894,59 @@ int ds_set_pebs_reset(struct pebs_tracer *tracer, u64 value)
 	return 0;
 }
 
-static const struct ds_configuration ds_cfg_var = {
-	.sizeof_ds    = sizeof(long) * 12,
-	.sizeof_field = sizeof(long),
-	.sizeof_rec[ds_bts]   = sizeof(long) * 3,
+static const struct ds_configuration ds_cfg_netburst = {
+	.name = "netburst",
+	.ctl[dsf_bts]		= (1 << 2) | (1 << 3),
+	.ctl[dsf_bts_kernel]	= (1 << 5),
+	.ctl[dsf_bts_user]	= (1 << 6),
+
+	.sizeof_field		= sizeof(long),
+	.sizeof_rec[ds_bts]	= sizeof(long) * 3,
 #ifdef __i386__
-	.sizeof_rec[ds_pebs]  = sizeof(long) * 10
+	.sizeof_rec[ds_pebs]	= sizeof(long) * 10,
 #else
-	.sizeof_rec[ds_pebs]  = sizeof(long) * 18
+	.sizeof_rec[ds_pebs]	= sizeof(long) * 18,
 #endif
 };
-static const struct ds_configuration ds_cfg_64 = {
-	.sizeof_ds    = 8 * 12,
-	.sizeof_field = 8,
-	.sizeof_rec[ds_bts]   = 8 * 3,
+static const struct ds_configuration ds_cfg_pentium_m = {
+	.name = "pentium m",
+	.ctl[dsf_bts]		= (1 << 6) | (1 << 7),
+
+	.sizeof_field		= sizeof(long),
+	.sizeof_rec[ds_bts]	= sizeof(long) * 3,
 #ifdef __i386__
-	.sizeof_rec[ds_pebs]  = 8 * 10
+	.sizeof_rec[ds_pebs]	= sizeof(long) * 10,
 #else
-	.sizeof_rec[ds_pebs]  = 8 * 18
+	.sizeof_rec[ds_pebs]	= sizeof(long) * 18,
 #endif
 };
+static const struct ds_configuration ds_cfg_core2 = {
+	.name = "core 2",
+	.ctl[dsf_bts]		= (1 << 6) | (1 << 7),
+	.ctl[dsf_bts_kernel]	= (1 << 9),
+	.ctl[dsf_bts_user]	= (1 << 10),
+
+	.sizeof_field		= 8,
+	.sizeof_rec[ds_bts]	= 8 * 3,
+	.sizeof_rec[ds_pebs]	= 8 * 18,
+};
 
-static inline void
+static void
 ds_configure(const struct ds_configuration *cfg)
 {
+	memset(&ds_cfg, 0, sizeof(ds_cfg));
 	ds_cfg = *cfg;
 
-	printk(KERN_INFO "DS available\n");
+	printk(KERN_INFO "[ds] using %s configuration\n", ds_cfg.name);
+
+	if (!cpu_has_bts) {
+		ds_cfg.ctl[dsf_bts] = 0;
+		printk(KERN_INFO "[ds] bts not available\n");
+	}
+	if (!cpu_has_pebs)
+		printk(KERN_INFO "[ds] pebs not available\n");
 
-	WARN_ON_ONCE(MAX_SIZEOF_DS < ds_cfg.sizeof_ds);
+	WARN_ON_ONCE(MAX_SIZEOF_DS < (12 * ds_cfg.sizeof_field));
 }
 
 void __cpuinit ds_init_intel(struct cpuinfo_x86 *c)
@@ -787,10 +959,10 @@ void __cpuinit ds_init_intel(struct cpuinfo_x86 *c)
 			break;
 		case 0xD:
 		case 0xE: /* Pentium M */
-			ds_configure(&ds_cfg_var);
+			ds_configure(&ds_cfg_pentium_m);
 			break;
 		default: /* Core2, Atom, ... */
-			ds_configure(&ds_cfg_64);
+			ds_configure(&ds_cfg_core2);
 			break;
 		}
 		break;
@@ -799,7 +971,7 @@ void __cpuinit ds_init_intel(struct cpuinfo_x86 *c)
 		case 0x0:
 		case 0x1:
 		case 0x2: /* Netburst */
-			ds_configure(&ds_cfg_var);
+			ds_configure(&ds_cfg_netburst);
 			break;
 		default:
 			/* sorry, don't know about them */
@@ -812,14 +984,41 @@ void __cpuinit ds_init_intel(struct cpuinfo_x86 *c)
 	}
 }
 
-void ds_free(struct ds_context *context)
+/*
+ * Change the DS configuration from tracing prev to tracing next.
+ */
+void ds_switch_to(struct task_struct *prev, struct task_struct *next)
 {
-	/* This is called when the task owning the parameter context
-	 * is dying. There should not be any user of that context left
-	 * to disturb us, anymore. */
-	unsigned long leftovers = context->count;
-	while (leftovers--) {
-		put_tracer(context->task);
-		ds_put_context(context);
+	struct ds_context *prev_ctx = prev->thread.ds_ctx;
+	struct ds_context *next_ctx = next->thread.ds_ctx;
+
+	if (prev_ctx) {
+		update_debugctlmsr(0);
+
+		if (prev_ctx->bts_master &&
+		    (prev_ctx->bts_master->trace.ds.flags & BTS_TIMESTAMPS)) {
+			struct bts_struct ts = {
+				.qualifier = bts_task_departs,
+				.variant.timestamp.jiffies = jiffies_64,
+				.variant.timestamp.pid = prev->pid
+			};
+			bts_write(prev_ctx->bts_master, &ts);
+		}
+	}
+
+	if (next_ctx) {
+		if (next_ctx->bts_master &&
+		    (next_ctx->bts_master->trace.ds.flags & BTS_TIMESTAMPS)) {
+			struct bts_struct ts = {
+				.qualifier = bts_task_arrives,
+				.variant.timestamp.jiffies = jiffies_64,
+				.variant.timestamp.pid = next->pid
+			};
+			bts_write(next_ctx->bts_master, &ts);
+		}
+
+		wrmsrl(MSR_IA32_DS_AREA, (unsigned long)next_ctx->ds);
 	}
+
+	update_debugctlmsr(next->thread.debugctlmsr);
 }
diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c
index 24c2276aa453..605eff9a8ac0 100644
--- a/arch/x86/kernel/process_32.c
+++ b/arch/x86/kernel/process_32.c
@@ -252,11 +252,14 @@ void exit_thread(void)
 		put_cpu();
 	}
 #ifdef CONFIG_X86_DS
-	/* Free any DS contexts that have not been properly released. */
-	if (unlikely(current->thread.ds_ctx)) {
-		/* we clear debugctl to make sure DS is not used. */
-		update_debugctlmsr(0);
-		ds_free(current->thread.ds_ctx);
+	/* Free any BTS tracers that have not been properly released. */
+	if (unlikely(current->bts)) {
+		ds_release_bts(current->bts);
+		current->bts = NULL;
+
+		kfree(current->bts_buffer);
+		current->bts_buffer = NULL;
+		current->bts_size = 0;
 	}
 #endif /* CONFIG_X86_DS */
 }
@@ -420,48 +423,19 @@ int set_tsc_mode(unsigned int val)
 	return 0;
 }
 
-#ifdef CONFIG_X86_DS
-static int update_debugctl(struct thread_struct *prev,
-			struct thread_struct *next, unsigned long debugctl)
-{
-	unsigned long ds_prev = 0;
-	unsigned long ds_next = 0;
-
-	if (prev->ds_ctx)
-		ds_prev = (unsigned long)prev->ds_ctx->ds;
-	if (next->ds_ctx)
-		ds_next = (unsigned long)next->ds_ctx->ds;
-
-	if (ds_next != ds_prev) {
-		/* we clear debugctl to make sure DS
-		 * is not in use when we change it */
-		debugctl = 0;
-		update_debugctlmsr(0);
-		wrmsr(MSR_IA32_DS_AREA, ds_next, 0);
-	}
-	return debugctl;
-}
-#else
-static int update_debugctl(struct thread_struct *prev,
-			struct thread_struct *next, unsigned long debugctl)
-{
-	return debugctl;
-}
-#endif /* CONFIG_X86_DS */
-
 static noinline void
 __switch_to_xtra(struct task_struct *prev_p, struct task_struct *next_p,
 		 struct tss_struct *tss)
 {
 	struct thread_struct *prev, *next;
-	unsigned long debugctl;
 
 	prev = &prev_p->thread;
 	next = &next_p->thread;
 
-	debugctl = update_debugctl(prev, next, prev->debugctlmsr);
-
-	if (next->debugctlmsr != debugctl)
+	if (test_tsk_thread_flag(next_p, TIF_DS_AREA_MSR) ||
+	    test_tsk_thread_flag(prev_p, TIF_DS_AREA_MSR))
+		ds_switch_to(prev_p, next_p);
+	else if (next->debugctlmsr != prev->debugctlmsr)
 		update_debugctlmsr(next->debugctlmsr);
 
 	if (test_tsk_thread_flag(next_p, TIF_DEBUG)) {
@@ -483,15 +457,6 @@ __switch_to_xtra(struct task_struct *prev_p, struct task_struct *next_p,
 			hard_enable_TSC();
 	}
 
-#ifdef CONFIG_X86_PTRACE_BTS
-	if (test_tsk_thread_flag(prev_p, TIF_BTS_TRACE_TS))
-		ptrace_bts_take_timestamp(prev_p, BTS_TASK_DEPARTS);
-
-	if (test_tsk_thread_flag(next_p, TIF_BTS_TRACE_TS))
-		ptrace_bts_take_timestamp(next_p, BTS_TASK_ARRIVES);
-#endif /* CONFIG_X86_PTRACE_BTS */
-
-
 	if (!test_tsk_thread_flag(next_p, TIF_IO_BITMAP)) {
 		/*
 		 * Disable the bitmap via an invalid offset. We still cache
diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c
index fbb321d53d34..1cfd2a4bf853 100644
--- a/arch/x86/kernel/process_64.c
+++ b/arch/x86/kernel/process_64.c
@@ -237,11 +237,14 @@ void exit_thread(void)
 		put_cpu();
 	}
 #ifdef CONFIG_X86_DS
-	/* Free any DS contexts that have not been properly released. */
-	if (unlikely(t->ds_ctx)) {
-		/* we clear debugctl to make sure DS is not used. */
-		update_debugctlmsr(0);
-		ds_free(t->ds_ctx);
+	/* Free any BTS tracers that have not been properly released. */
+	if (unlikely(current->bts)) {
+		ds_release_bts(current->bts);
+		current->bts = NULL;
+
+		kfree(current->bts_buffer);
+		current->bts_buffer = NULL;
+		current->bts_size = 0;
 	}
 #endif /* CONFIG_X86_DS */
 }
@@ -471,35 +474,14 @@ static inline void __switch_to_xtra(struct task_struct *prev_p,
 				    struct tss_struct *tss)
 {
 	struct thread_struct *prev, *next;
-	unsigned long debugctl;
 
 	prev = &prev_p->thread,
 	next = &next_p->thread;
 
-	debugctl = prev->debugctlmsr;
-
-#ifdef CONFIG_X86_DS
-	{
-		unsigned long ds_prev = 0, ds_next = 0;
-
-		if (prev->ds_ctx)
-			ds_prev = (unsigned long)prev->ds_ctx->ds;
-		if (next->ds_ctx)
-			ds_next = (unsigned long)next->ds_ctx->ds;
-
-		if (ds_next != ds_prev) {
-			/*
-			 * We clear debugctl to make sure DS
-			 * is not in use when we change it:
-			 */
-			debugctl = 0;
-			update_debugctlmsr(0);
-			wrmsrl(MSR_IA32_DS_AREA, ds_next);
-		}
-	}
-#endif /* CONFIG_X86_DS */
-
-	if (next->debugctlmsr != debugctl)
+	if (test_tsk_thread_flag(next_p, TIF_DS_AREA_MSR) ||
+	    test_tsk_thread_flag(prev_p, TIF_DS_AREA_MSR))
+		ds_switch_to(prev_p, next_p);
+	else if (next->debugctlmsr != prev->debugctlmsr)
 		update_debugctlmsr(next->debugctlmsr);
 
 	if (test_tsk_thread_flag(next_p, TIF_DEBUG)) {
@@ -534,14 +516,6 @@ static inline void __switch_to_xtra(struct task_struct *prev_p,
 		 */
 		memset(tss->io_bitmap, 0xff, prev->io_bitmap_max);
 	}
-
-#ifdef CONFIG_X86_PTRACE_BTS
-	if (test_tsk_thread_flag(prev_p, TIF_BTS_TRACE_TS))
-		ptrace_bts_take_timestamp(prev_p, BTS_TASK_DEPARTS);
-
-	if (test_tsk_thread_flag(next_p, TIF_BTS_TRACE_TS))
-		ptrace_bts_take_timestamp(next_p, BTS_TASK_ARRIVES);
-#endif /* CONFIG_X86_PTRACE_BTS */
 }
 
 /*
diff --git a/arch/x86/kernel/ptrace.c b/arch/x86/kernel/ptrace.c
index b2998fe1166b..45e9855da2d2 100644
--- a/arch/x86/kernel/ptrace.c
+++ b/arch/x86/kernel/ptrace.c
@@ -581,153 +581,73 @@ static int ioperm_get(struct task_struct *target,
 }
 
 #ifdef CONFIG_X86_PTRACE_BTS
-/*
- * The configuration for a particular BTS hardware implementation.
- */
-struct bts_configuration {
-	/* the size of a BTS record in bytes; at most BTS_MAX_RECORD_SIZE */
-	unsigned char  sizeof_bts;
-	/* the size of a field in the BTS record in bytes */
-	unsigned char  sizeof_field;
-	/* a bitmask to enable/disable BTS in DEBUGCTL MSR */
-	unsigned long debugctl_mask;
-};
-static struct bts_configuration bts_cfg;
-
-#define BTS_MAX_RECORD_SIZE (8 * 3)
-
-
-/*
- * Branch Trace Store (BTS) uses the following format. Different
- * architectures vary in the size of those fields.
- * - source linear address
- * - destination linear address
- * - flags
- *
- * Later architectures use 64bit pointers throughout, whereas earlier
- * architectures use 32bit pointers in 32bit mode.
- *
- * We compute the base address for the first 8 fields based on:
- * - the field size stored in the DS configuration
- * - the relative field position
- *
- * In order to store additional information in the BTS buffer, we use
- * a special source address to indicate that the record requires
- * special interpretation.
- *
- * Netburst indicated via a bit in the flags field whether the branch
- * was predicted; this is ignored.
- */
-
-enum bts_field {
-	bts_from = 0,
-	bts_to,
-	bts_flags,
-
-	bts_escape = (unsigned long)-1,
-	bts_qual = bts_to,
-	bts_jiffies = bts_flags
-};
-
-static inline unsigned long bts_get(const char *base, enum bts_field field)
-{
-	base += (bts_cfg.sizeof_field * field);
-	return *(unsigned long *)base;
-}
-
-static inline void bts_set(char *base, enum bts_field field, unsigned long val)
-{
-	base += (bts_cfg.sizeof_field * field);;
-	(*(unsigned long *)base) = val;
-}
-
-/*
- * Translate a BTS record from the raw format into the bts_struct format
- *
- * out (out): bts_struct interpretation
- * raw: raw BTS record
- */
-static void ptrace_bts_translate_record(struct bts_struct *out, const void *raw)
-{
-	memset(out, 0, sizeof(*out));
-	if (bts_get(raw, bts_from) == bts_escape) {
-		out->qualifier       = bts_get(raw, bts_qual);
-		out->variant.jiffies = bts_get(raw, bts_jiffies);
-	} else {
-		out->qualifier = BTS_BRANCH;
-		out->variant.lbr.from_ip = bts_get(raw, bts_from);
-		out->variant.lbr.to_ip   = bts_get(raw, bts_to);
-	}
-}
-
 static int ptrace_bts_read_record(struct task_struct *child, size_t index,
 				  struct bts_struct __user *out)
 {
-	struct bts_struct ret;
-	const void *bts_record;
-	size_t bts_index, bts_end;
+	const struct bts_trace *trace;
+	struct bts_struct bts;
+	const unsigned char *at;
 	int error;
 
-	error = ds_get_bts_end(child->bts, &bts_end);
-	if (error < 0)
-		return error;
-
-	if (bts_end <= index)
-		return -EINVAL;
+	trace = ds_read_bts(child->bts);
+	if (!trace)
+		return -EPERM;
 
-	error = ds_get_bts_index(child->bts, &bts_index);
-	if (error < 0)
-		return error;
+	at = trace->ds.top - ((index + 1) * trace->ds.size);
+	if ((void *)at < trace->ds.begin)
+		at += (trace->ds.n * trace->ds.size);
 
-	/* translate the ptrace bts index into the ds bts index */
-	bts_index += bts_end - (index + 1);
-	if (bts_end <= bts_index)
-		bts_index -= bts_end;
+	if (!trace->read)
+		return -EOPNOTSUPP;
 
-	error = ds_access_bts(child->bts, bts_index, &bts_record);
+	error = trace->read(child->bts, at, &bts);
 	if (error < 0)
 		return error;
 
-	ptrace_bts_translate_record(&ret, bts_record);
-
-	if (copy_to_user(out, &ret, sizeof(ret)))
+	if (copy_to_user(out, &bts, sizeof(bts)))
 		return -EFAULT;
 
-	return sizeof(ret);
+	return sizeof(bts);
 }
 
 static int ptrace_bts_drain(struct task_struct *child,
 			    long size,
 			    struct bts_struct __user *out)
 {
-	struct bts_struct ret;
-	const unsigned char *raw;
-	size_t end, i;
-	int error;
+	const struct bts_trace *trace;
+	const unsigned char *at;
+	int error, drained = 0;
 
-	error = ds_get_bts_index(child->bts, &end);
-	if (error < 0)
-		return error;
+	trace = ds_read_bts(child->bts);
+	if (!trace)
+		return -EPERM;
 
-	if (size < (end * sizeof(struct bts_struct)))
+	if (!trace->read)
+		return -EOPNOTSUPP;
+
+	if (size < (trace->ds.top - trace->ds.begin))
 		return -EIO;
 
-	error = ds_access_bts(child->bts, 0, (const void **)&raw);
-	if (error < 0)
-		return error;
+	for (at = trace->ds.begin; (void *)at < trace->ds.top;
+	     out++, drained++, at += trace->ds.size) {
+		struct bts_struct bts;
+		int error;
 
-	for (i = 0; i < end; i++, out++, raw += bts_cfg.sizeof_bts) {
-		ptrace_bts_translate_record(&ret, raw);
+		error = trace->read(child->bts, at, &bts);
+		if (error < 0)
+			return error;
 
-		if (copy_to_user(out, &ret, sizeof(ret)))
+		if (copy_to_user(out, &bts, sizeof(bts)))
 			return -EFAULT;
 	}
 
-	error = ds_clear_bts(child->bts);
+	memset(trace->ds.begin, 0, trace->ds.n * trace->ds.size);
+
+	error = ds_reset_bts(child->bts);
 	if (error < 0)
 		return error;
 
-	return end;
+	return drained;
 }
 
 static int ptrace_bts_config(struct task_struct *child,
@@ -735,136 +655,89 @@ static int ptrace_bts_config(struct task_struct *child,
 			     const struct ptrace_bts_config __user *ucfg)
 {
 	struct ptrace_bts_config cfg;
-	int error = 0;
-
-	error = -EOPNOTSUPP;
-	if (!bts_cfg.sizeof_bts)
-		goto errout;
+	unsigned int flags = 0;
 
-	error = -EIO;
 	if (cfg_size < sizeof(cfg))
-		goto errout;
+		return -EIO;
 
-	error = -EFAULT;
 	if (copy_from_user(&cfg, ucfg, sizeof(cfg)))
-		goto errout;
-
-	error = -EINVAL;
-	if ((cfg.flags & PTRACE_BTS_O_SIGNAL) &&
-	    !(cfg.flags & PTRACE_BTS_O_ALLOC))
-		goto errout;
-
-	if (cfg.flags & PTRACE_BTS_O_ALLOC) {
-		bts_ovfl_callback_t ovfl = NULL;
-		unsigned int sig = 0;
-
-		error = -EINVAL;
-		if (cfg.size < (10 * bts_cfg.sizeof_bts))
-			goto errout;
+		return -EFAULT;
 
-		if (cfg.flags & PTRACE_BTS_O_SIGNAL) {
-			if (!cfg.signal)
-				goto errout;
+	if (child->bts) {
+		ds_release_bts(child->bts);
+		child->bts = NULL;
+	}
 
-			error = -EOPNOTSUPP;
-			goto errout;
+	if (cfg.flags & PTRACE_BTS_O_SIGNAL) {
+		if (!cfg.signal)
+			return -EINVAL;
 
-			sig  = cfg.signal;
-		}
+		return -EOPNOTSUPP;
 
-		if (child->bts) {
-			(void)ds_release_bts(child->bts);
-			kfree(child->bts_buffer);
+		child->thread.bts_ovfl_signal = cfg.signal;
+	}
 
-			child->bts = NULL;
-			child->bts_buffer = NULL;
-		}
+	if ((cfg.flags & PTRACE_BTS_O_ALLOC) &&
+	    (cfg.size != child->bts_size)) {
+		kfree(child->bts_buffer);
 
-		error = -ENOMEM;
+		child->bts_size = cfg.size;
 		child->bts_buffer = kzalloc(cfg.size, GFP_KERNEL);
-		if (!child->bts_buffer)
-			goto errout;
-
-		child->bts = ds_request_bts(child, child->bts_buffer, cfg.size,
-					    ovfl, /* th = */ (size_t)-1);
-		if (IS_ERR(child->bts)) {
-			error = PTR_ERR(child->bts);
-			kfree(child->bts_buffer);
-			child->bts = NULL;
-			child->bts_buffer = NULL;
-			goto errout;
+		if (!child->bts_buffer) {
+			child->bts_size = 0;
+			return -ENOMEM;
 		}
-
-		child->thread.bts_ovfl_signal = sig;
 	}
 
-	error = -EINVAL;
-	if (!child->thread.ds_ctx && cfg.flags)
-		goto errout;
-
 	if (cfg.flags & PTRACE_BTS_O_TRACE)
-		child->thread.debugctlmsr |= bts_cfg.debugctl_mask;
-	else
-		child->thread.debugctlmsr &= ~bts_cfg.debugctl_mask;
+		flags |= BTS_USER;
 
 	if (cfg.flags & PTRACE_BTS_O_SCHED)
-		set_tsk_thread_flag(child, TIF_BTS_TRACE_TS);
-	else
-		clear_tsk_thread_flag(child, TIF_BTS_TRACE_TS);
+		flags |= BTS_TIMESTAMPS;
 
-	error = sizeof(cfg);
+	child->bts = ds_request_bts(child, child->bts_buffer, child->bts_size,
+				    /* ovfl = */ NULL, /* th = */ (size_t)-1,
+				    flags);
+	if (IS_ERR(child->bts)) {
+		int error = PTR_ERR(child->bts);
 
-out:
-	if (child->thread.debugctlmsr)
-		set_tsk_thread_flag(child, TIF_DEBUGCTLMSR);
-	else
-		clear_tsk_thread_flag(child, TIF_DEBUGCTLMSR);
+		kfree(child->bts_buffer);
+		child->bts = NULL;
+		child->bts_buffer = NULL;
+		child->bts_size = 0;
 
-	return error;
+		return error;
+	}
 
-errout:
-	child->thread.debugctlmsr &= ~bts_cfg.debugctl_mask;
-	clear_tsk_thread_flag(child, TIF_BTS_TRACE_TS);
-	goto out;
+	return sizeof(cfg);
 }
 
 static int ptrace_bts_status(struct task_struct *child,
 			     long cfg_size,
 			     struct ptrace_bts_config __user *ucfg)
 {
+	const struct bts_trace *trace;
 	struct ptrace_bts_config cfg;
-	size_t end;
-	const void *base, *max;
-	int error;
 
 	if (cfg_size < sizeof(cfg))
 		return -EIO;
 
-	error = ds_get_bts_end(child->bts, &end);
-	if (error < 0)
-		return error;
-
-	error = ds_access_bts(child->bts, /* index = */ 0, &base);
-	if (error < 0)
-		return error;
-
-	error = ds_access_bts(child->bts, /* index = */ end, &max);
-	if (error < 0)
-		return error;
+	trace = ds_read_bts(child->bts);
+	if (!trace)
+		return -EPERM;
 
 	memset(&cfg, 0, sizeof(cfg));
-	cfg.size = (max - base);
+	cfg.size = trace->ds.end - trace->ds.begin;
 	cfg.signal = child->thread.bts_ovfl_signal;
 	cfg.bts_size = sizeof(struct bts_struct);
 
 	if (cfg.signal)
 		cfg.flags |= PTRACE_BTS_O_SIGNAL;
 
-	if (test_tsk_thread_flag(child, TIF_DEBUGCTLMSR) &&
-	    child->thread.debugctlmsr & bts_cfg.debugctl_mask)
+	if (trace->ds.flags & BTS_USER)
 		cfg.flags |= PTRACE_BTS_O_TRACE;
 
-	if (test_tsk_thread_flag(child, TIF_BTS_TRACE_TS))
+	if (trace->ds.flags & BTS_TIMESTAMPS)
 		cfg.flags |= PTRACE_BTS_O_SCHED;
 
 	if (copy_to_user(ucfg, &cfg, sizeof(cfg)))
@@ -873,105 +746,28 @@ static int ptrace_bts_status(struct task_struct *child,
 	return sizeof(cfg);
 }
 
-static int ptrace_bts_write_record(struct task_struct *child,
-				   const struct bts_struct *in)
+static int ptrace_bts_clear(struct task_struct *child)
 {
-	unsigned char bts_record[BTS_MAX_RECORD_SIZE];
+	const struct bts_trace *trace;
 
-	if (BTS_MAX_RECORD_SIZE < bts_cfg.sizeof_bts)
-		return -EOVERFLOW;
+	trace = ds_read_bts(child->bts);
+	if (!trace)
+		return -EPERM;
 
-	memset(bts_record, 0, bts_cfg.sizeof_bts);
-	switch (in->qualifier) {
-	case BTS_INVALID:
-		break;
+	memset(trace->ds.begin, 0, trace->ds.n * trace->ds.size);
 
-	case BTS_BRANCH:
-		bts_set(bts_record, bts_from, in->variant.lbr.from_ip);
-		bts_set(bts_record, bts_to,   in->variant.lbr.to_ip);
-		break;
-
-	case BTS_TASK_ARRIVES:
-	case BTS_TASK_DEPARTS:
-		bts_set(bts_record, bts_from,    bts_escape);
-		bts_set(bts_record, bts_qual,    in->qualifier);
-		bts_set(bts_record, bts_jiffies, in->variant.jiffies);
-		break;
-
-	default:
-		return -EINVAL;
-	}
-
-	return ds_write_bts(child->bts, bts_record, bts_cfg.sizeof_bts);
+	return ds_reset_bts(child->bts);
 }
 
-void ptrace_bts_take_timestamp(struct task_struct *tsk,
-			       enum bts_qualifier qualifier)
+static int ptrace_bts_size(struct task_struct *child)
 {
-	struct bts_struct rec = {
-		.qualifier = qualifier,
-		.variant.jiffies = jiffies_64
-	};
-
-	ptrace_bts_write_record(tsk, &rec);
-}
-
-static const struct bts_configuration bts_cfg_netburst = {
-	.sizeof_bts    = sizeof(long) * 3,
-	.sizeof_field  = sizeof(long),
-	.debugctl_mask = (1<<2)|(1<<3)|(1<<5)
-};
+	const struct bts_trace *trace;
 
-static const struct bts_configuration bts_cfg_pentium_m = {
-	.sizeof_bts    = sizeof(long) * 3,
-	.sizeof_field  = sizeof(long),
-	.debugctl_mask = (1<<6)|(1<<7)
-};
+	trace = ds_read_bts(child->bts);
+	if (!trace)
+		return -EPERM;
 
-static const struct bts_configuration bts_cfg_core2 = {
-	.sizeof_bts    = 8 * 3,
-	.sizeof_field  = 8,
-	.debugctl_mask = (1<<6)|(1<<7)|(1<<9)
-};
-
-static inline void bts_configure(const struct bts_configuration *cfg)
-{
-	bts_cfg = *cfg;
-}
-
-void __cpuinit ptrace_bts_init_intel(struct cpuinfo_x86 *c)
-{
-	switch (c->x86) {
-	case 0x6:
-		switch (c->x86_model) {
-		case 0 ... 0xC:
-			/* sorry, don't know about them */
-			break;
-		case 0xD:
-		case 0xE: /* Pentium M */
-			bts_configure(&bts_cfg_pentium_m);
-			break;
-		default: /* Core2, Atom, ... */
-			bts_configure(&bts_cfg_core2);
-			break;
-		}
-		break;
-	case 0xF:
-		switch (c->x86_model) {
-		case 0x0:
-		case 0x1:
-		case 0x2: /* Netburst */
-			bts_configure(&bts_cfg_netburst);
-			break;
-		default:
-			/* sorry, don't know about them */
-			break;
-		}
-		break;
-	default:
-		/* sorry, don't know about them */
-		break;
-	}
+	return (trace->ds.top - trace->ds.begin) / trace->ds.size;
 }
 #endif /* CONFIG_X86_PTRACE_BTS */
 
@@ -988,15 +784,12 @@ void ptrace_disable(struct task_struct *child)
 #endif
 #ifdef CONFIG_X86_PTRACE_BTS
 	if (child->bts) {
-		(void)ds_release_bts(child->bts);
+		ds_release_bts(child->bts);
+		child->bts = NULL;
+
 		kfree(child->bts_buffer);
 		child->bts_buffer = NULL;
-
-		child->thread.debugctlmsr &= ~bts_cfg.debugctl_mask;
-		if (!child->thread.debugctlmsr)
-			clear_tsk_thread_flag(child, TIF_DEBUGCTLMSR);
-
-		clear_tsk_thread_flag(child, TIF_BTS_TRACE_TS);
+		child->bts_size = 0;
 	}
 #endif /* CONFIG_X86_PTRACE_BTS */
 }
@@ -1129,16 +922,9 @@ long arch_ptrace(struct task_struct *child, long request, long addr, long data)
 			(child, data, (struct ptrace_bts_config __user *)addr);
 		break;
 
-	case PTRACE_BTS_SIZE: {
-		size_t size;
-
-		ret = ds_get_bts_index(child->bts, &size);
-		if (ret == 0) {
-			WARN_ON_ONCE(size != (int) size);
-			ret = (int) size;
-		}
+	case PTRACE_BTS_SIZE:
+		ret = ptrace_bts_size(child);
 		break;
-	}
 
 	case PTRACE_BTS_GET:
 		ret = ptrace_bts_read_record
@@ -1146,7 +932,7 @@ long arch_ptrace(struct task_struct *child, long request, long addr, long data)
 		break;
 
 	case PTRACE_BTS_CLEAR:
-		ret = ds_clear_bts(child->bts);
+		ret = ptrace_bts_clear(child);
 		break;
 
 	case PTRACE_BTS_DRAIN:
@@ -1409,6 +1195,14 @@ long compat_arch_ptrace(struct task_struct *child, compat_long_t request,
 
 	case PTRACE_GET_THREAD_AREA:
 	case PTRACE_SET_THREAD_AREA:
+#ifdef CONFIG_X86_PTRACE_BTS
+	case PTRACE_BTS_CONFIG:
+	case PTRACE_BTS_STATUS:
+	case PTRACE_BTS_SIZE:
+	case PTRACE_BTS_GET:
+	case PTRACE_BTS_CLEAR:
+	case PTRACE_BTS_DRAIN:
+#endif /* CONFIG_X86_PTRACE_BTS */
 		return arch_ptrace(child, request, addr, data);
 
 	default:
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 4b81fc5f7731..dc5ea65dc716 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1176,6 +1176,7 @@ struct task_struct {
 	 * The buffer to hold the BTS data.
 	 */
 	void *bts_buffer;
+	size_t bts_size;
 #endif /* CONFIG_X86_PTRACE_BTS */
 
 	/* PID/PID hash table linkage. */
-- 
cgit v1.2.3


From 8001530d5af707eb9a158839c8f651eb6c1cb3c2 Mon Sep 17 00:00:00 2001
From: Stephen Rothwell <sfr@canb.auug.org.au>
Date: Thu, 11 Dec 2008 16:10:08 +0100
Subject: tracing/fastboot: fix len of func buffer

Impact: fix possible stack overrun

This is a port of a patch included in the mainline (KSYM_SYMBOL_LEN fixes).
The current func len is not large enough to contain the max symbol len, the
right size must be KSYM_SYMBOL_LEN.

Signed-off-by: Stephen Rothwell <sfr@canb.auug.org.au>
Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/trace/boot.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/trace/boot.h b/include/trace/boot.h
index 6b54537eab02..3ec58b4b7512 100644
--- a/include/trace/boot.h
+++ b/include/trace/boot.h
@@ -9,7 +9,7 @@
  */
 struct boot_trace_call {
 	pid_t			caller;
-	char			func[KSYM_NAME_LEN];
+	char			func[KSYM_SYMBOL_LEN];
 };
 
 /*
@@ -17,7 +17,7 @@ struct boot_trace_call {
  * while it returns.
  */
 struct boot_trace_ret {
-	char			func[KSYM_NAME_LEN];
+	char			func[KSYM_SYMBOL_LEN];
 	int				result;
 	unsigned long long	duration;		/* nsecs */
 };
-- 
cgit v1.2.3


From da485e0cb16726797e99a595a399b9fc721b91bc Mon Sep 17 00:00:00 2001
From: Frederic Weisbecker <fweisbec@gmail.com>
Date: Thu, 11 Dec 2008 16:14:23 +0100
Subject: tracing/fastboot: include missing headers

For now include/trace/boot.h doesn't need to include necessary headers
for its functions and structures because the files that include it already
do it.

But boot.h could be needed as well for further uses on other files.
So, this patch adds the necessary headers for future purposes...

Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/trace/boot.h | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'include')

diff --git a/include/trace/boot.h b/include/trace/boot.h
index 3ec58b4b7512..088ea089e31d 100644
--- a/include/trace/boot.h
+++ b/include/trace/boot.h
@@ -1,6 +1,10 @@
 #ifndef _LINUX_TRACE_BOOT_H
 #define _LINUX_TRACE_BOOT_H
 
+#include <linux/module.h>
+#include <linux/kallsyms.h>
+#include <linux/init.h>
+
 /*
  * Structure which defines the trace of an initcall
  * while it is called.
-- 
cgit v1.2.3


From a0343e823184070f55364d8359f832dcb33c57c7 Mon Sep 17 00:00:00 2001
From: Frederic Weisbecker <fweisbec@gmail.com>
Date: Tue, 9 Dec 2008 23:53:16 +0100
Subject: tracing/function-graph-tracer: add a new .irqentry.text section

Impact: let the function-graph-tracer be aware of the irq entrypoints

Add a new .irqentry.text section to store the irq entrypoints functions
inside the same section. This way, the tracer will be able to signal
an interrupts triggering on output by recognizing these entrypoints.

Also, make this section recordable for dynamic tracing.

Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/vmlinux_64.lds.S  |  1 +
 include/asm-generic/vmlinux.lds.h | 10 ++++++++++
 scripts/recordmcount.pl           |  1 +
 3 files changed, 12 insertions(+)

(limited to 'include')

diff --git a/arch/x86/kernel/vmlinux_64.lds.S b/arch/x86/kernel/vmlinux_64.lds.S
index 46e05447405b..1a614c0e6bef 100644
--- a/arch/x86/kernel/vmlinux_64.lds.S
+++ b/arch/x86/kernel/vmlinux_64.lds.S
@@ -35,6 +35,7 @@ SECTIONS
 	SCHED_TEXT
 	LOCK_TEXT
 	KPROBES_TEXT
+	IRQENTRY_TEXT
 	*(.fixup)
 	*(.gnu.warning)
 	_etext = .;		/* End of text section */
diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h
index eba835a2c2cd..c61fab1dd2f8 100644
--- a/include/asm-generic/vmlinux.lds.h
+++ b/include/asm-generic/vmlinux.lds.h
@@ -288,6 +288,16 @@
 		*(.kprobes.text)					\
 		VMLINUX_SYMBOL(__kprobes_text_end) = .;
 
+#ifdef CONFIG_FUNCTION_GRAPH_TRACER
+#define IRQENTRY_TEXT							\
+		ALIGN_FUNCTION();					\
+		VMLINUX_SYMBOL(__irqentry_text_start) = .;		\
+		*(.irqentry.text)					\
+		VMLINUX_SYMBOL(__irqentry_text_end) = .;
+#else
+#define IRQENTRY_TEXT
+#endif
+
 /* Section used for early init (in .S files) */
 #define HEAD_TEXT  *(.head.text)
 
diff --git a/scripts/recordmcount.pl b/scripts/recordmcount.pl
index 0b1dc9f9bb06..fe831412bea9 100755
--- a/scripts/recordmcount.pl
+++ b/scripts/recordmcount.pl
@@ -114,6 +114,7 @@ my %text_sections = (
      ".text" => 1,
      ".sched.text" => 1,
      ".spinlock.text" => 1,
+     ".irqentry.text" => 1,
 );
 
 $objdump = "objdump" if ((length $objdump) == 0);
-- 
cgit v1.2.3


From bcbc4f20b52c2c40c43a4d2337707dcdfe81bc3a Mon Sep 17 00:00:00 2001
From: Frederic Weisbecker <fweisbec@gmail.com>
Date: Tue, 9 Dec 2008 23:54:20 +0100
Subject: tracing/function-graph-tracer: annotate do_IRQ and
 smp_apic_timer_interrupt

Impact: move most important x86 irq entry-points to a separate subsection

Annotate do_IRQ and smp_apic_timer_interrupt to put them into the .irqentry.text
subsection. These function will so be recognized as hardirq entrypoints for the
function-graph-tracer. We could also annotate other irq entries but the others
are far less important but they can be added on request.

Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/apic.c   |  3 ++-
 arch/x86/kernel/irq_64.c |  3 ++-
 include/linux/ftrace.h   | 11 +++++++++++
 3 files changed, 15 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/arch/x86/kernel/apic.c b/arch/x86/kernel/apic.c
index 16f94879b525..b946ac19753b 100644
--- a/arch/x86/kernel/apic.c
+++ b/arch/x86/kernel/apic.c
@@ -30,6 +30,7 @@
 #include <linux/module.h>
 #include <linux/dmi.h>
 #include <linux/dmar.h>
+#include <linux/ftrace.h>
 
 #include <asm/atomic.h>
 #include <asm/smp.h>
@@ -800,7 +801,7 @@ static void local_apic_timer_interrupt(void)
  * [ if a single-CPU system runs an SMP kernel then we call the local
  *   interrupt as well. Thus we cannot inline the local irq ... ]
  */
-void smp_apic_timer_interrupt(struct pt_regs *regs)
+void __irq_entry smp_apic_timer_interrupt(struct pt_regs *regs)
 {
 	struct pt_regs *old_regs = set_irq_regs(regs);
 
diff --git a/arch/x86/kernel/irq_64.c b/arch/x86/kernel/irq_64.c
index 60eb84eb77a0..11c65e811ffe 100644
--- a/arch/x86/kernel/irq_64.c
+++ b/arch/x86/kernel/irq_64.c
@@ -13,6 +13,7 @@
 #include <linux/seq_file.h>
 #include <linux/module.h>
 #include <linux/delay.h>
+#include <linux/ftrace.h>
 #include <asm/uaccess.h>
 #include <asm/io_apic.h>
 #include <asm/idle.h>
@@ -47,7 +48,7 @@ static inline void stack_overflow_check(struct pt_regs *regs)
  * SMP cross-CPU interrupts have their own specific
  * handlers).
  */
-asmlinkage unsigned int do_IRQ(struct pt_regs *regs)
+asmlinkage unsigned int __irq_entry do_IRQ(struct pt_regs *regs)
 {
 	struct pt_regs *old_regs = set_irq_regs(regs);
 	struct irq_desc *desc;
diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h
index 11cac81eed08..44020f31bd81 100644
--- a/include/linux/ftrace.h
+++ b/include/linux/ftrace.h
@@ -377,6 +377,16 @@ struct ftrace_graph_ret {
  */
 #define __notrace_funcgraph		notrace
 
+/*
+ * We want to which function is an entrypoint of a hardirq.
+ * That will help us to put a signal on output.
+ */
+#define __irq_entry		 __attribute__((__section__(".irqentry.text")))
+
+/* Limits of hardirq entrypoints */
+extern char __irqentry_text_start[];
+extern char __irqentry_text_end[];
+
 #define FTRACE_RETFUNC_DEPTH 50
 #define FTRACE_RETSTACK_ALLOC_SIZE 32
 /* Type of the callback handlers for tracing function graph*/
@@ -414,6 +424,7 @@ static inline void unpause_graph_tracing(void)
 #else
 
 #define __notrace_funcgraph
+#define __irq_entry
 
 static inline void ftrace_graph_init_task(struct task_struct *t) { }
 static inline void ftrace_graph_exit_task(struct task_struct *t) { }
-- 
cgit v1.2.3


From cbc34ed1ac36690f75fd272e19e7b4fc29aae5a2 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Wed, 10 Dec 2008 08:08:22 +0100
Subject: sched: fix tracepoints in scheduler

The trace point only caught one of many places where a task changes cpu,
put it in the right place to we get all of them.

Change the signature while we're at it.

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/trace/sched.h | 4 ++--
 kernel/sched.c        | 3 ++-
 2 files changed, 4 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/include/trace/sched.h b/include/trace/sched.h
index 9b2854abf7e2..f4549d506b16 100644
--- a/include/trace/sched.h
+++ b/include/trace/sched.h
@@ -30,8 +30,8 @@ DECLARE_TRACE(sched_switch,
 		TPARGS(rq, prev, next));
 
 DECLARE_TRACE(sched_migrate_task,
-	TPPROTO(struct rq *rq, struct task_struct *p, int dest_cpu),
-		TPARGS(rq, p, dest_cpu));
+	TPPROTO(struct task_struct *p, int orig_cpu, int dest_cpu),
+		TPARGS(p, orig_cpu, dest_cpu));
 
 DECLARE_TRACE(sched_process_free,
 	TPPROTO(struct task_struct *p),
diff --git a/kernel/sched.c b/kernel/sched.c
index 7729c4bbc8ba..d377097572f9 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -1851,6 +1851,8 @@ void set_task_cpu(struct task_struct *p, unsigned int new_cpu)
 
 	clock_offset = old_rq->clock - new_rq->clock;
 
+	trace_sched_migrate_task(p, task_cpu(p), new_cpu);
+
 #ifdef CONFIG_SCHEDSTATS
 	if (p->se.wait_start)
 		p->se.wait_start -= clock_offset;
@@ -2868,7 +2870,6 @@ static void sched_migrate_task(struct task_struct *p, int dest_cpu)
 	    || unlikely(!cpu_active(dest_cpu)))
 		goto out;
 
-	trace_sched_migrate_task(rq, p, dest_cpu);
 	/* force the process onto the specified CPU */
 	if (migrate_task(p, dest_cpu, &req)) {
 		/* Need to wait for migration thread (might exit: take ref). */
-- 
cgit v1.2.3


From 0ebb26e7a4e2c5337502e98b2221e037fda911b9 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Fri, 12 Dec 2008 11:26:39 +0100
Subject: sparse irqs: handle !GENIRQ platforms

Impact: build fix

fix:

 In file included from /home/mingo/tip/arch/m68k/amiga/amiints.c:39:
 /home/mingo/tip/include/linux/interrupt.h:21: error: expected identifier or '('
 /home/mingo/tip/arch/m68k/amiga/amiints.c: In function 'amiga_init_IRQ':

Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/interrupt.h |  4 ++--
 include/linux/irqnr.h     | 11 ++++++++++-
 include/linux/random.h    |  2 +-
 3 files changed, 13 insertions(+), 4 deletions(-)

(limited to 'include')

diff --git a/include/linux/interrupt.h b/include/linux/interrupt.h
index 79e915e7e8a5..777f89e00b4a 100644
--- a/include/linux/interrupt.h
+++ b/include/linux/interrupt.h
@@ -14,12 +14,12 @@
 #include <linux/irqflags.h>
 #include <linux/smp.h>
 #include <linux/percpu.h>
+#include <linux/irqnr.h>
+
 #include <asm/atomic.h>
 #include <asm/ptrace.h>
 #include <asm/system.h>
 
-extern int nr_irqs;
-
 /*
  * These correspond to the IORESOURCE_IRQ_* defines in
  * linux/ioport.h to select the interrupt line behaviour.  When
diff --git a/include/linux/irqnr.h b/include/linux/irqnr.h
index 13754f813589..95d2b74641f5 100644
--- a/include/linux/irqnr.h
+++ b/include/linux/irqnr.h
@@ -1,6 +1,11 @@
 #ifndef _LINUX_IRQNR_H
 #define _LINUX_IRQNR_H
 
+/*
+ * Generic irq_desc iterators:
+ */
+#ifdef __KERNEL__
+
 #ifndef CONFIG_GENERIC_HARDIRQS
 #include <asm/irq.h>
 # define nr_irqs		NR_IRQS
@@ -11,10 +16,12 @@
 # define for_each_irq_desc_reverse(irq, desc)                          \
 	for (irq = nr_irqs - 1; irq >= 0; irq--)
 #else
+
+extern int nr_irqs;
+
 #ifndef CONFIG_SPARSE_IRQ
 
 struct irq_desc;
-extern int nr_irqs;
 # define for_each_irq_desc(irq, desc)		\
 	for (irq = 0, desc = irq_desc; irq < nr_irqs; irq++, desc++)
 # define for_each_irq_desc_reverse(irq, desc)                          \
@@ -26,4 +33,6 @@ extern int nr_irqs;
 #define for_each_irq_nr(irq)                   \
        for (irq = 0; irq < nr_irqs; irq++)
 
+#endif /* __KERNEL__ */
+
 #endif
diff --git a/include/linux/random.h b/include/linux/random.h
index ad9daa2374d5..adbf3bd3c6b3 100644
--- a/include/linux/random.h
+++ b/include/linux/random.h
@@ -8,6 +8,7 @@
 #define _LINUX_RANDOM_H
 
 #include <linux/ioctl.h>
+#include <linux/irqnr.h>
 
 /* ioctl()'s for the random number generator */
 
@@ -49,7 +50,6 @@ struct timer_rand_state;
 
 extern struct timer_rand_state *irq_timer_state[];
 
-extern int nr_irqs;
 static inline struct timer_rand_state *get_timer_rand_state(unsigned int irq)
 {
 	if (irq >= nr_irqs)
-- 
cgit v1.2.3


From 30cb367ea2be76bf71dbd275f38d0fd3b6f4142b Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Fri, 12 Dec 2008 12:19:57 +0100
Subject: sparse irqs: add irqnr.h to the user headers list

Impact: fix build error

/home/mingo/tip/usr/include/linux/random.h:11: included file
'linux/irqnr.h' is not exported

Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/Kbuild | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/linux/Kbuild b/include/linux/Kbuild
index e531783e5d78..95ac82340c3b 100644
--- a/include/linux/Kbuild
+++ b/include/linux/Kbuild
@@ -313,6 +313,7 @@ unifdef-y += ptrace.h
 unifdef-y += qnx4_fs.h
 unifdef-y += quota.h
 unifdef-y += random.h
+unifdef-y += irqnr.h
 unifdef-y += reboot.h
 unifdef-y += reiserfs_fs.h
 unifdef-y += reiserfs_xattr.h
-- 
cgit v1.2.3


From ee79d1bdb6a10499e53f80b1e8d14110215178ba Mon Sep 17 00:00:00 2001
From: Heiko Carstens <heiko.carstens@de.ibm.com>
Date: Tue, 9 Dec 2008 18:49:50 +0100
Subject: sched: let arch_update_cpu_topology indicate if topology changed

Change arch_update_cpu_topology so it returns 1 if the cpu topology changed
and 0 if it didn't change. This will be useful for the next patch which adds
a call to this function in partition_sched_domains.

Signed-off-by: Heiko Carstens <heiko.carstens@de.ibm.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/s390/kernel/topology.c | 5 +++--
 include/linux/topology.h    | 2 +-
 kernel/sched.c              | 8 +++++++-
 3 files changed, 11 insertions(+), 4 deletions(-)

(limited to 'include')

diff --git a/arch/s390/kernel/topology.c b/arch/s390/kernel/topology.c
index a947899dcba1..bf96f1b5c6ec 100644
--- a/arch/s390/kernel/topology.c
+++ b/arch/s390/kernel/topology.c
@@ -212,7 +212,7 @@ static void update_cpu_core_map(void)
 		cpu_core_map[cpu] = cpu_coregroup_map(cpu);
 }
 
-void arch_update_cpu_topology(void)
+int arch_update_cpu_topology(void)
 {
 	struct tl_info *info = tl_info;
 	struct sys_device *sysdev;
@@ -221,7 +221,7 @@ void arch_update_cpu_topology(void)
 	if (!machine_has_topology) {
 		update_cpu_core_map();
 		topology_update_polarization_simple();
-		return;
+		return 0;
 	}
 	stsi(info, 15, 1, 2);
 	tl_to_cores(info);
@@ -230,6 +230,7 @@ void arch_update_cpu_topology(void)
 		sysdev = get_cpu_sysdev(cpu);
 		kobject_uevent(&sysdev->kobj, KOBJ_CHANGE);
 	}
+	return 1;
 }
 
 static void topology_work_fn(struct work_struct *work)
diff --git a/include/linux/topology.h b/include/linux/topology.h
index 117f1b7405cf..0c5b5ac36d8e 100644
--- a/include/linux/topology.h
+++ b/include/linux/topology.h
@@ -49,7 +49,7 @@
 	for_each_online_node(node)			\
 		if (nr_cpus_node(node))
 
-void arch_update_cpu_topology(void);
+int arch_update_cpu_topology(void);
 
 /* Conform to ACPI 2.0 SLIT distance definitions */
 #define LOCAL_DISTANCE		10
diff --git a/kernel/sched.c b/kernel/sched.c
index ef212da928e8..fcfbbd9dbd60 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -7675,8 +7675,14 @@ static struct sched_domain_attr *dattr_cur;
  */
 static cpumask_t fallback_doms;
 
-void __attribute__((weak)) arch_update_cpu_topology(void)
+/*
+ * arch_update_cpu_topology lets virtualized architectures update the
+ * cpu core maps. It is supposed to return 1 if the topology changed
+ * or 0 if it stayed the same.
+ */
+int __attribute__((weak)) arch_update_cpu_topology(void)
 {
+	return 0;
 }
 
 /*
-- 
cgit v1.2.3


From 27af4245b6ce99e08c6a7c38825383bf51119cc9 Mon Sep 17 00:00:00 2001
From: Oleg Nesterov <oleg@tv-sign.ru>
Date: Mon, 1 Dec 2008 14:18:13 -0800
Subject: posix-timers: use "struct pid*" instead of "struct task_struct*"

Impact: restructure, clean up code

k_itimer holds the ref to the ->it_process until sys_timer_delete(). This
allows to pin up to RLIMIT_SIGPENDING dead task_struct's. Change the code
to use "struct pid *" instead.

The patch doesn't kill ->it_process, it places ->it_pid into the union.
->it_process is still used by do_cpu_nanosleep() as before. It would be
trivial to change the nanosleep code as well, but since it uses it_process
in a special way I think it is better to keep this field for grep.

The patch bloats the kernel by 104 bytes and it also adds the new pointer,
->it_signal, to k_itimer. It is used by lock_timer() to verify that the
found timer was not created by another process. It is not clear why do we
use the global database (and thus the global idr_lock) for posix timers.
We still need the signal_struct->posix_timers which contains all useable
timers, perhaps it is better to use some form of per-process array
instead.

Signed-off-by: Oleg Nesterov <oleg@tv-sign.ru>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 include/linux/posix-timers.h |  6 +++++-
 kernel/posix-timers.c        | 43 +++++++++++++++++++++++--------------------
 2 files changed, 28 insertions(+), 21 deletions(-)

(limited to 'include')

diff --git a/include/linux/posix-timers.h b/include/linux/posix-timers.h
index a7c721355549..4f71bf4e628c 100644
--- a/include/linux/posix-timers.h
+++ b/include/linux/posix-timers.h
@@ -45,7 +45,11 @@ struct k_itimer {
 	int it_requeue_pending;		/* waiting to requeue this timer */
 #define REQUEUE_PENDING 1
 	int it_sigev_notify;		/* notify word of sigevent struct */
-	struct task_struct *it_process;	/* process to send signal to */
+	struct signal_struct *it_signal;
+	union {
+		struct pid *it_pid;	/* pid of process to send signal to */
+		struct task_struct *it_process;	/* for clock_nanosleep */
+	};
 	struct sigqueue *sigq;		/* signal queue entry. */
 	union {
 		struct {
diff --git a/kernel/posix-timers.c b/kernel/posix-timers.c
index 5e79c662294b..42a39afd694a 100644
--- a/kernel/posix-timers.c
+++ b/kernel/posix-timers.c
@@ -116,7 +116,7 @@ static DEFINE_SPINLOCK(idr_lock);
  *	    must supply functions here, even if the function just returns
  *	    ENOSYS.  The standard POSIX timer management code assumes the
  *	    following: 1.) The k_itimer struct (sched.h) is used for the
- *	    timer.  2.) The list, it_lock, it_clock, it_id and it_process
+ *	    timer.  2.) The list, it_lock, it_clock, it_id and it_pid
  *	    fields are not modified by timer code.
  *
  *          At this time all functions EXCEPT clock_nanosleep can be
@@ -313,7 +313,8 @@ void do_schedule_next_timer(struct siginfo *info)
 
 int posix_timer_event(struct k_itimer *timr, int si_private)
 {
-	int shared, ret;
+	struct task_struct *task;
+	int shared, ret = -1;
 	/*
 	 * FIXME: if ->sigq is queued we can race with
 	 * dequeue_signal()->do_schedule_next_timer().
@@ -327,8 +328,13 @@ int posix_timer_event(struct k_itimer *timr, int si_private)
 	 */
 	timr->sigq->info.si_sys_private = si_private;
 
-	shared = !(timr->it_sigev_notify & SIGEV_THREAD_ID);
-	ret = send_sigqueue(timr->sigq, timr->it_process, shared);
+	rcu_read_lock();
+	task = pid_task(timr->it_pid, PIDTYPE_PID);
+	if (task) {
+		shared = !(timr->it_sigev_notify & SIGEV_THREAD_ID);
+		ret = send_sigqueue(timr->sigq, task, shared);
+	}
+	rcu_read_unlock();
 	/* If we failed to send the signal the timer stops. */
 	return ret > 0;
 }
@@ -405,7 +411,7 @@ static enum hrtimer_restart posix_timer_fn(struct hrtimer *timer)
 	return ret;
 }
 
-static struct task_struct * good_sigevent(sigevent_t * event)
+static struct pid *good_sigevent(sigevent_t * event)
 {
 	struct task_struct *rtn = current->group_leader;
 
@@ -419,7 +425,7 @@ static struct task_struct * good_sigevent(sigevent_t * event)
 	    ((event->sigev_signo <= 0) || (event->sigev_signo > SIGRTMAX)))
 		return NULL;
 
-	return rtn;
+	return task_pid(rtn);
 }
 
 void register_posix_clock(const clockid_t clock_id, struct k_clock *new_clock)
@@ -471,7 +477,7 @@ sys_timer_create(const clockid_t which_clock,
 {
 	struct k_itimer *new_timer;
 	int error, new_timer_id;
-	struct task_struct *process;
+	struct pid *it_pid;
 	sigevent_t event;
 	int it_id_set = IT_ID_NOT_SET;
 
@@ -525,11 +531,9 @@ sys_timer_create(const clockid_t which_clock,
 			goto out;
 		}
 		rcu_read_lock();
-		process = good_sigevent(&event);
-		if (process)
-			get_task_struct(process);
+		it_pid = get_pid(good_sigevent(&event));
 		rcu_read_unlock();
-		if (!process) {
+		if (!it_pid) {
 			error = -EINVAL;
 			goto out;
 		}
@@ -537,8 +541,7 @@ sys_timer_create(const clockid_t which_clock,
 		event.sigev_notify = SIGEV_SIGNAL;
 		event.sigev_signo = SIGALRM;
 		event.sigev_value.sival_int = new_timer->it_id;
-		process = current->group_leader;
-		get_task_struct(process);
+		it_pid = get_pid(task_tgid(current));
 	}
 
 	new_timer->it_sigev_notify     = event.sigev_notify;
@@ -548,7 +551,8 @@ sys_timer_create(const clockid_t which_clock,
 	new_timer->sigq->info.si_code  = SI_TIMER;
 
 	spin_lock_irq(&current->sighand->siglock);
-	new_timer->it_process = process;
+	new_timer->it_pid = it_pid;
+	new_timer->it_signal = current->signal;
 	list_add(&new_timer->list, &current->signal->posix_timers);
 	spin_unlock_irq(&current->sighand->siglock);
 
@@ -583,8 +587,7 @@ static struct k_itimer *lock_timer(timer_t timer_id, unsigned long *flags)
 	timr = idr_find(&posix_timers_id, (int)timer_id);
 	if (timr) {
 		spin_lock(&timr->it_lock);
-		if (timr->it_process &&
-		    same_thread_group(timr->it_process, current)) {
+		if (timr->it_pid && timr->it_signal == current->signal) {
 			spin_unlock(&idr_lock);
 			return timr;
 		}
@@ -831,8 +834,8 @@ retry_delete:
 	 * This keeps any tasks waiting on the spin lock from thinking
 	 * they got something (see the lock code above).
 	 */
-	put_task_struct(timer->it_process);
-	timer->it_process = NULL;
+	put_pid(timer->it_pid);
+	timer->it_pid = NULL;
 
 	unlock_timer(timer, flags);
 	release_posix_timer(timer, IT_ID_SET);
@@ -858,8 +861,8 @@ retry_delete:
 	 * This keeps any tasks waiting on the spin lock from thinking
 	 * they got something (see the lock code above).
 	 */
-	put_task_struct(timer->it_process);
-	timer->it_process = NULL;
+	put_pid(timer->it_pid);
+	timer->it_pid = NULL;
 
 	unlock_timer(timer, flags);
 	release_posix_timer(timer, IT_ID_SET);
-- 
cgit v1.2.3


From c29541b24fb2c6301021637229ae5347c877330a Mon Sep 17 00:00:00 2001
From: Mike Frysinger <vapier@gentoo.org>
Date: Mon, 1 Dec 2008 14:18:11 -0800
Subject: linux/timex.h: cleanup for userspace

Impact: fix user-space exported use

Move all the kernel-specific defines and includes into the __KERNEL__
section so that they don't get leaked into userspace.

[akpm@linux-foundation.org: coding-style fixes]

Signed-off-by: Mike Frysinger <vapier@gentoo.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 include/linux/timex.h | 73 ++++++++++++++++++++++++++-------------------------
 1 file changed, 37 insertions(+), 36 deletions(-)

(limited to 'include')

diff --git a/include/linux/timex.h b/include/linux/timex.h
index 9007313b5b71..998a55d80acf 100644
--- a/include/linux/timex.h
+++ b/include/linux/timex.h
@@ -53,46 +53,10 @@
 #ifndef _LINUX_TIMEX_H
 #define _LINUX_TIMEX_H
 
-#include <linux/compiler.h>
 #include <linux/time.h>
 
-#include <asm/param.h>
-
 #define NTP_API		4	/* NTP API version */
 
-/*
- * SHIFT_KG and SHIFT_KF establish the damping of the PLL and are chosen
- * for a slightly underdamped convergence characteristic. SHIFT_KH
- * establishes the damping of the FLL and is chosen by wisdom and black
- * art.
- *
- * MAXTC establishes the maximum time constant of the PLL. With the
- * SHIFT_KG and SHIFT_KF values given and a time constant range from
- * zero to MAXTC, the PLL will converge in 15 minutes to 16 hours,
- * respectively.
- */
-#define SHIFT_PLL	4	/* PLL frequency factor (shift) */
-#define SHIFT_FLL	2	/* FLL frequency factor (shift) */
-#define MAXTC		10	/* maximum time constant (shift) */
-
-/*
- * SHIFT_USEC defines the scaling (shift) of the time_freq and
- * time_tolerance variables, which represent the current frequency
- * offset and maximum frequency tolerance.
- */
-#define SHIFT_USEC 16		/* frequency offset scale (shift) */
-#define PPM_SCALE (NSEC_PER_USEC << (NTP_SCALE_SHIFT - SHIFT_USEC))
-#define PPM_SCALE_INV_SHIFT 19
-#define PPM_SCALE_INV ((1ll << (PPM_SCALE_INV_SHIFT + NTP_SCALE_SHIFT)) / \
-		       PPM_SCALE + 1)
-
-#define MAXPHASE 500000000l	/* max phase error (ns) */
-#define MAXFREQ 500000		/* max frequency error (ns/s) */
-#define MAXFREQ_SCALED ((s64)MAXFREQ << NTP_SCALE_SHIFT)
-#define MINSEC 256		/* min interval between updates (s) */
-#define MAXSEC 2048		/* max interval between updates (s) */
-#define NTP_PHASE_LIMIT ((MAXPHASE / NSEC_PER_USEC) << 5) /* beyond max. dispersion */
-
 /*
  * syscall interface - used (mainly by NTP daemon)
  * to discipline kernel clock oscillator
@@ -199,8 +163,45 @@ struct timex {
 #define TIME_BAD	TIME_ERROR /* bw compat */
 
 #ifdef __KERNEL__
+#include <linux/compiler.h>
+#include <linux/types.h>
+#include <linux/param.h>
+
 #include <asm/timex.h>
 
+/*
+ * SHIFT_KG and SHIFT_KF establish the damping of the PLL and are chosen
+ * for a slightly underdamped convergence characteristic. SHIFT_KH
+ * establishes the damping of the FLL and is chosen by wisdom and black
+ * art.
+ *
+ * MAXTC establishes the maximum time constant of the PLL. With the
+ * SHIFT_KG and SHIFT_KF values given and a time constant range from
+ * zero to MAXTC, the PLL will converge in 15 minutes to 16 hours,
+ * respectively.
+ */
+#define SHIFT_PLL	4	/* PLL frequency factor (shift) */
+#define SHIFT_FLL	2	/* FLL frequency factor (shift) */
+#define MAXTC		10	/* maximum time constant (shift) */
+
+/*
+ * SHIFT_USEC defines the scaling (shift) of the time_freq and
+ * time_tolerance variables, which represent the current frequency
+ * offset and maximum frequency tolerance.
+ */
+#define SHIFT_USEC 16		/* frequency offset scale (shift) */
+#define PPM_SCALE (NSEC_PER_USEC << (NTP_SCALE_SHIFT - SHIFT_USEC))
+#define PPM_SCALE_INV_SHIFT 19
+#define PPM_SCALE_INV ((1ll << (PPM_SCALE_INV_SHIFT + NTP_SCALE_SHIFT)) / \
+		       PPM_SCALE + 1)
+
+#define MAXPHASE 500000000l	/* max phase error (ns) */
+#define MAXFREQ 500000		/* max frequency error (ns/s) */
+#define MAXFREQ_SCALED ((s64)MAXFREQ << NTP_SCALE_SHIFT)
+#define MINSEC 256		/* min interval between updates (s) */
+#define MAXSEC 2048		/* max interval between updates (s) */
+#define NTP_PHASE_LIMIT ((MAXPHASE / NSEC_PER_USEC) << 5) /* beyond max. dispersion */
+
 /*
  * kernel variables
  * Note: maximum error = NTP synch distance = dispersion + delay / 2;
-- 
cgit v1.2.3


From bb608e9db7d29616fb6e0d856c23434610d4a1bd Mon Sep 17 00:00:00 2001
From: Senthil Balasubramanian <senthilkumar@atheros.com>
Date: Thu, 4 Dec 2008 20:38:13 +0530
Subject: wireless: Incorrect LEAP authentication algorithm identifier.

This patch fixes a regression introduced by
"wireless: avoid some net/ieee80211.h vs. linux/ieee80211.h conflicts"
LEAP authentication algorithm identifier should be 128.

Signed-off-by: Senthil Balasubramanian <senthilkumar@atheros.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 include/linux/ieee80211.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/ieee80211.h b/include/linux/ieee80211.h
index a6ec928186ad..c4e6ca1a6306 100644
--- a/include/linux/ieee80211.h
+++ b/include/linux/ieee80211.h
@@ -836,7 +836,7 @@ struct ieee80211_ht_info {
 /* Authentication algorithms */
 #define WLAN_AUTH_OPEN 0
 #define WLAN_AUTH_SHARED_KEY 1
-#define WLAN_AUTH_LEAP 2
+#define WLAN_AUTH_LEAP 128
 
 #define WLAN_AUTH_CHALLENGE_LEN 128
 
-- 
cgit v1.2.3


From 7ba1c04ed727a70df2dc63464232c0ec906ad67d Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes@sipsolutions.net>
Date: Mon, 8 Dec 2008 11:18:32 +0100
Subject: mac80211: improve sta_notify documentation

Mention more possible STA entries and document the atomic requirement.

Signed-off-by: Johannes Berg <johannes@sipsolutions.net>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 include/net/mac80211.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/net/mac80211.h b/include/net/mac80211.h
index e84c922a1b16..346f373fb676 100644
--- a/include/net/mac80211.h
+++ b/include/net/mac80211.h
@@ -1262,8 +1262,8 @@ enum ieee80211_ampdu_mlme_action {
  *	the device does fragmentation by itself; if this method is assigned then
  *	the stack will not do fragmentation.
  *
- * @sta_notify: Notifies low level driver about addition or removal
- *	of associated station or AP.
+ * @sta_notify: Notifies low level driver about addition or removal of an
+ *	associated station, AP, IBSS/WDS/mesh peer etc. Must be atomic.
  *
  * @sta_ps_notify: Notifies low level driver about the power state transition
  *	of a associated station. Must be atomic.
-- 
cgit v1.2.3


From 0f202aa2e1e1db1d20da9bcc3f5ad43c5a22d2d5 Mon Sep 17 00:00:00 2001
From: "John W. Linville" <linville@tuxdriver.com>
Date: Mon, 8 Dec 2008 14:51:41 -0500
Subject: ieee80211_security: correct warning about width of auth_mode

Also remove auth_algo which is unused.

Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 include/net/ieee80211.h | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/include/net/ieee80211.h b/include/net/ieee80211.h
index 7ab3ed2bbccb..adb7cf31f781 100644
--- a/include/net/ieee80211.h
+++ b/include/net/ieee80211.h
@@ -385,9 +385,8 @@ struct ieee80211_device;
 #define SCM_TEMPORAL_KEY_LENGTH	16
 
 struct ieee80211_security {
-	u16 active_key:2,
-	    enabled:1,
-	    auth_mode:2, auth_algo:4, unicast_uses_group:1, encrypt:1;
+	u16 active_key:2, enabled:1, unicast_uses_group:1, encrypt:1;
+	u8 auth_mode;
 	u8 encode_alg[WEP_KEYS];
 	u8 key_sizes[WEP_KEYS];
 	u8 keys[WEP_KEYS][SCM_KEY_LEN];
-- 
cgit v1.2.3


From f546638c3f809fdacddc03fe765669c3042e0d9d Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes@sipsolutions.net>
Date: Tue, 9 Dec 2008 12:30:03 +0100
Subject: mac80211: remove fragmentation offload functionality

There's no driver that actually does fragmentation on the
device, and the callback is buggy (when it returns an error,
mac80211's fragmentation status is changed so reading the
frag threshold from userspace reads the new value despite
the error). Let's just remove it, if we really find some
hardware supporting it we can add it back later.

Signed-off-by: Johannes Berg <johannes@sipsolutions.net>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 include/net/mac80211.h | 5 -----
 net/mac80211/tx.c      | 1 -
 net/mac80211/wext.c    | 8 --------
 3 files changed, 14 deletions(-)

(limited to 'include')

diff --git a/include/net/mac80211.h b/include/net/mac80211.h
index 346f373fb676..5ecc686c1f1a 100644
--- a/include/net/mac80211.h
+++ b/include/net/mac80211.h
@@ -1258,10 +1258,6 @@ enum ieee80211_ampdu_mlme_action {
  *
  * @set_rts_threshold: Configuration of RTS threshold (if device needs it)
  *
- * @set_frag_threshold: Configuration of fragmentation threshold. Assign this if
- *	the device does fragmentation by itself; if this method is assigned then
- *	the stack will not do fragmentation.
- *
  * @sta_notify: Notifies low level driver about addition or removal of an
  *	associated station, AP, IBSS/WDS/mesh peer etc. Must be atomic.
  *
@@ -1331,7 +1327,6 @@ struct ieee80211_ops {
 	void (*get_tkip_seq)(struct ieee80211_hw *hw, u8 hw_key_idx,
 			     u32 *iv32, u16 *iv16);
 	int (*set_rts_threshold)(struct ieee80211_hw *hw, u32 value);
-	int (*set_frag_threshold)(struct ieee80211_hw *hw, u32 value);
 	void (*sta_notify)(struct ieee80211_hw *hw, struct ieee80211_vif *vif,
 			enum sta_notify_cmd, struct ieee80211_sta *sta);
 	void (*sta_notify_ps)(struct ieee80211_hw *hw,
diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c
index d7761e95e4cf..b098c58d216f 100644
--- a/net/mac80211/tx.c
+++ b/net/mac80211/tx.c
@@ -1001,7 +1001,6 @@ __ieee80211_tx_prepare(struct ieee80211_tx_data *tx,
 	if (tx->flags & IEEE80211_TX_FRAGMENTED) {
 		if ((tx->flags & IEEE80211_TX_UNICAST) &&
 		    skb->len + FCS_LEN > local->fragmentation_threshold &&
-		    !local->ops->set_frag_threshold &&
 		    !(info->flags & IEEE80211_TX_CTL_AMPDU))
 			tx->flags |= IEEE80211_TX_FRAGMENTED;
 		else
diff --git a/net/mac80211/wext.c b/net/mac80211/wext.c
index 4e1fdcfacb0c..fbeb927c116b 100644
--- a/net/mac80211/wext.c
+++ b/net/mac80211/wext.c
@@ -639,14 +639,6 @@ static int ieee80211_ioctl_siwfrag(struct net_device *dev,
 		local->fragmentation_threshold = frag->value & ~0x1;
 	}
 
-	/* If the wlan card performs fragmentation in hardware/firmware,
-	 * configure it here */
-
-	if (local->ops->set_frag_threshold)
-		return local->ops->set_frag_threshold(
-			local_to_hw(local),
-			local->fragmentation_threshold);
-
 	return 0;
 }
 
-- 
cgit v1.2.3


From 89fad578a61810b7fdf8edd294890f3c0cde4390 Mon Sep 17 00:00:00 2001
From: Christian Lamparter <chunkeey@web.de>
Date: Tue, 9 Dec 2008 16:28:06 +0100
Subject: mac80211: integrate sta_notify_ps cmds into sta_notify

This patch replaces the newly introduced sta_notify_ps function,
which can be used to notify the driver about every power state
transition for all associated stations, by integrating its functionality
back into the original sta_notify callback.

Signed-off-by: Christian Lamparter <chunkeey@web.de>
Acked-by: Johannes Berg <johannes@sipsolutions.net>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 drivers/net/wireless/mac80211_hwsim.c |  4 ++++
 drivers/net/wireless/p54/p54common.c  | 18 ++++--------------
 include/net/mac80211.h                | 27 +++++++--------------------
 net/mac80211/rx.c                     | 12 ++++++------
 4 files changed, 21 insertions(+), 40 deletions(-)

(limited to 'include')

diff --git a/drivers/net/wireless/mac80211_hwsim.c b/drivers/net/wireless/mac80211_hwsim.c
index 530648b39935..fd5a537ac51d 100644
--- a/drivers/net/wireless/mac80211_hwsim.c
+++ b/drivers/net/wireless/mac80211_hwsim.c
@@ -522,6 +522,10 @@ static void mac80211_hwsim_sta_notify(struct ieee80211_hw *hw,
 	case STA_NOTIFY_REMOVE:
 		hwsim_clear_sta_magic(sta);
 		break;
+	case STA_NOTIFY_SLEEP:
+	case STA_NOTIFY_AWAKE:
+		/* TODO: make good use of these flags */
+		break;
 	}
 }
 
diff --git a/drivers/net/wireless/p54/p54common.c b/drivers/net/wireless/p54/p54common.c
index 89968a5bff84..409ae930d766 100644
--- a/drivers/net/wireless/p54/p54common.c
+++ b/drivers/net/wireless/p54/p54common.c
@@ -1051,19 +1051,6 @@ static int p54_sta_unlock(struct ieee80211_hw *dev, u8 *addr)
 	return 0;
 }
 
-static void p54_sta_notify_ps(struct ieee80211_hw *dev,
-			      enum sta_notify_ps_cmd notify_cmd,
-			      struct ieee80211_sta *sta)
-{
-	switch (notify_cmd) {
-	case STA_NOTIFY_AWAKE:
-		p54_sta_unlock(dev, sta->addr);
-		break;
-	default:
-		break;
-	}
-}
-
 static void p54_sta_notify(struct ieee80211_hw *dev, struct ieee80211_vif *vif,
 			      enum sta_notify_cmd notify_cmd,
 			      struct ieee80211_sta *sta)
@@ -1076,6 +1063,10 @@ static void p54_sta_notify(struct ieee80211_hw *dev, struct ieee80211_vif *vif,
 		 * need to buffer frames for this station anymore.
 		 */
 
+		p54_sta_unlock(dev, sta->addr);
+		break;
+	case STA_NOTIFY_AWAKE:
+		/* update the firmware's filter table */
 		p54_sta_unlock(dev, sta->addr);
 		break;
 	default:
@@ -2027,7 +2018,6 @@ static const struct ieee80211_ops p54_ops = {
 	.add_interface		= p54_add_interface,
 	.remove_interface	= p54_remove_interface,
 	.set_tim		= p54_set_tim,
-	.sta_notify_ps		= p54_sta_notify_ps,
 	.sta_notify		= p54_sta_notify,
 	.set_key		= p54_set_key,
 	.config			= p54_config,
diff --git a/include/net/mac80211.h b/include/net/mac80211.h
index 5ecc686c1f1a..046ce692a906 100644
--- a/include/net/mac80211.h
+++ b/include/net/mac80211.h
@@ -773,25 +773,16 @@ struct ieee80211_sta {
  * enum sta_notify_cmd - sta notify command
  *
  * Used with the sta_notify() callback in &struct ieee80211_ops, this
- * indicates addition and removal of a station to station table.
+ * indicates addition and removal of a station to station table,
+ * or if a associated station made a power state transition.
  *
  * @STA_NOTIFY_ADD: a station was added to the station table
  * @STA_NOTIFY_REMOVE: a station being removed from the station table
- */
-enum sta_notify_cmd {
-	STA_NOTIFY_ADD, STA_NOTIFY_REMOVE
-};
-
-/**
- * enum sta_notify_ps_cmd - sta power save notify command
- *
- * Used with the sta_notify_ps() callback in &struct ieee80211_ops to
- * notify the driver if a station made a power state transition.
- *
  * @STA_NOTIFY_SLEEP: a station is now sleeping
  * @STA_NOTIFY_AWAKE: a sleeping station woke up
  */
-enum sta_notify_ps_cmd {
+enum sta_notify_cmd {
+	STA_NOTIFY_ADD, STA_NOTIFY_REMOVE,
 	STA_NOTIFY_SLEEP, STA_NOTIFY_AWAKE,
 };
 
@@ -1258,11 +1249,9 @@ enum ieee80211_ampdu_mlme_action {
  *
  * @set_rts_threshold: Configuration of RTS threshold (if device needs it)
  *
- * @sta_notify: Notifies low level driver about addition or removal of an
- *	associated station, AP, IBSS/WDS/mesh peer etc. Must be atomic.
- *
- * @sta_ps_notify: Notifies low level driver about the power state transition
- *	of a associated station. Must be atomic.
+ * @sta_notify: Notifies low level driver about addition, removal or power
+ *	state transition of an associated station, AP,  IBSS/WDS/mesh peer etc.
+ *	Must be atomic.
  *
  * @conf_tx: Configure TX queue parameters (EDCF (aifs, cw_min, cw_max),
  *	bursting) for a hardware TX queue.
@@ -1329,8 +1318,6 @@ struct ieee80211_ops {
 	int (*set_rts_threshold)(struct ieee80211_hw *hw, u32 value);
 	void (*sta_notify)(struct ieee80211_hw *hw, struct ieee80211_vif *vif,
 			enum sta_notify_cmd, struct ieee80211_sta *sta);
-	void (*sta_notify_ps)(struct ieee80211_hw *hw,
-			enum sta_notify_ps_cmd, struct ieee80211_sta *sta);
 	int (*conf_tx)(struct ieee80211_hw *hw, u16 queue,
 		       const struct ieee80211_tx_queue_params *params);
 	int (*get_tx_stats)(struct ieee80211_hw *hw,
diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c
index 14be095b8528..23443de7ee4e 100644
--- a/net/mac80211/rx.c
+++ b/net/mac80211/rx.c
@@ -658,9 +658,9 @@ static void ap_sta_ps_start(struct sta_info *sta)
 
 	atomic_inc(&sdata->bss->num_sta_ps);
 	set_and_clear_sta_flags(sta, WLAN_STA_PS, WLAN_STA_PSPOLL);
-	if (local->ops->sta_notify_ps)
-		local->ops->sta_notify_ps(local_to_hw(local), STA_NOTIFY_SLEEP,
-					  &sta->sta);
+	if (local->ops->sta_notify)
+		local->ops->sta_notify(local_to_hw(local), &sdata->vif,
+					STA_NOTIFY_SLEEP, &sta->sta);
 #ifdef CONFIG_MAC80211_VERBOSE_PS_DEBUG
 	printk(KERN_DEBUG "%s: STA %pM aid %d enters power save mode\n",
 	       sdata->dev->name, sta->sta.addr, sta->sta.aid);
@@ -677,9 +677,9 @@ static int ap_sta_ps_end(struct sta_info *sta)
 	atomic_dec(&sdata->bss->num_sta_ps);
 
 	clear_sta_flags(sta, WLAN_STA_PS | WLAN_STA_PSPOLL);
-	if (local->ops->sta_notify_ps)
-		local->ops->sta_notify_ps(local_to_hw(local), STA_NOTIFY_AWAKE,
-					  &sta->sta);
+	if (local->ops->sta_notify)
+		local->ops->sta_notify(local_to_hw(local), &sdata->vif,
+					STA_NOTIFY_AWAKE, &sta->sta);
 
 	if (!skb_queue_empty(&sta->ps_tx_buf))
 		sta_info_clear_tim_bit(sta);
-- 
cgit v1.2.3


From 4dec9b807be757780ca3611a959ac22c28d292a7 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes@sipsolutions.net>
Date: Wed, 10 Dec 2008 17:48:48 +0100
Subject: rfkill: strip pointless notifier chain

No users, so no reason to have it.

Signed-off-by: Johannes Berg <johannes@sipsolutions.net>
Acked-by: Ivo van Doorn <IvDoorn@gmail.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 include/linux/rfkill.h |  7 -----
 net/rfkill/rfkill.c    | 83 +++-----------------------------------------------
 2 files changed, 5 insertions(+), 85 deletions(-)

(limited to 'include')

diff --git a/include/linux/rfkill.h b/include/linux/rfkill.h
index f376a93927f7..164332cbb77c 100644
--- a/include/linux/rfkill.h
+++ b/include/linux/rfkill.h
@@ -149,11 +149,4 @@ static inline char *rfkill_get_led_name(struct rfkill *rfkill)
 #endif
 }
 
-/* rfkill notification chain */
-#define RFKILL_STATE_CHANGED		0x0001	/* state of a normal rfkill
-						   switch has changed */
-
-int register_rfkill_notifier(struct notifier_block *nb);
-int unregister_rfkill_notifier(struct notifier_block *nb);
-
 #endif /* RFKILL_H */
diff --git a/net/rfkill/rfkill.c b/net/rfkill/rfkill.c
index 051d2c9ea66b..3c94f76d5525 100644
--- a/net/rfkill/rfkill.c
+++ b/net/rfkill/rfkill.c
@@ -53,51 +53,6 @@ static struct rfkill_gsw_state rfkill_global_states[RFKILL_TYPE_MAX];
 static unsigned long rfkill_states_lockdflt[BITS_TO_LONGS(RFKILL_TYPE_MAX)];
 static bool rfkill_epo_lock_active;
 
-static BLOCKING_NOTIFIER_HEAD(rfkill_notifier_list);
-
-
-/**
- * register_rfkill_notifier - Add notifier to rfkill notifier chain
- * @nb: pointer to the new entry to add to the chain
- *
- * See blocking_notifier_chain_register() for return value and further
- * observations.
- *
- * Adds a notifier to the rfkill notifier chain.  The chain will be
- * called with a pointer to the relevant rfkill structure as a parameter,
- * refer to include/linux/rfkill.h for the possible events.
- *
- * Notifiers added to this chain are to always return NOTIFY_DONE.  This
- * chain is a blocking notifier chain: notifiers can sleep.
- *
- * Calls to this chain may have been done through a workqueue.  One must
- * assume unordered asynchronous behaviour, there is no way to know if
- * actions related to the event that generated the notification have been
- * carried out already.
- */
-int register_rfkill_notifier(struct notifier_block *nb)
-{
-	BUG_ON(!nb);
-	return blocking_notifier_chain_register(&rfkill_notifier_list, nb);
-}
-EXPORT_SYMBOL_GPL(register_rfkill_notifier);
-
-/**
- * unregister_rfkill_notifier - remove notifier from rfkill notifier chain
- * @nb: pointer to the entry to remove from the chain
- *
- * See blocking_notifier_chain_unregister() for return value and further
- * observations.
- *
- * Removes a notifier from the rfkill notifier chain.
- */
-int unregister_rfkill_notifier(struct notifier_block *nb)
-{
-	BUG_ON(!nb);
-	return blocking_notifier_chain_unregister(&rfkill_notifier_list, nb);
-}
-EXPORT_SYMBOL_GPL(unregister_rfkill_notifier);
-
 
 static void rfkill_led_trigger(struct rfkill *rfkill,
 			       enum rfkill_state state)
@@ -124,12 +79,9 @@ static void rfkill_led_trigger_activate(struct led_classdev *led)
 }
 #endif /* CONFIG_RFKILL_LEDS */
 
-static void notify_rfkill_state_change(struct rfkill *rfkill)
+static void rfkill_uevent(struct rfkill *rfkill)
 {
-	rfkill_led_trigger(rfkill, rfkill->state);
-	blocking_notifier_call_chain(&rfkill_notifier_list,
-			RFKILL_STATE_CHANGED,
-			rfkill);
+	kobject_uevent(&rfkill->dev.kobj, KOBJ_CHANGE);
 }
 
 static void update_rfkill_state(struct rfkill *rfkill)
@@ -142,7 +94,7 @@ static void update_rfkill_state(struct rfkill *rfkill)
 			oldstate = rfkill->state;
 			rfkill->state = newstate;
 			if (oldstate != newstate)
-				notify_rfkill_state_change(rfkill);
+				rfkill_uevent(rfkill);
 		}
 		mutex_unlock(&rfkill->mutex);
 	}
@@ -220,7 +172,7 @@ static int rfkill_toggle_radio(struct rfkill *rfkill,
 	}
 
 	if (force || rfkill->state != oldstate)
-		notify_rfkill_state_change(rfkill);
+		rfkill_uevent(rfkill);
 
 	return retval;
 }
@@ -405,7 +357,7 @@ int rfkill_force_state(struct rfkill *rfkill, enum rfkill_state state)
 	rfkill->state = state;
 
 	if (state != oldstate)
-		notify_rfkill_state_change(rfkill);
+		rfkill_uevent(rfkill);
 
 	mutex_unlock(&rfkill->mutex);
 
@@ -618,28 +570,6 @@ static int rfkill_resume(struct device *dev)
 #define rfkill_resume NULL
 #endif
 
-static int rfkill_blocking_uevent_notifier(struct notifier_block *nb,
-					unsigned long eventid,
-					void *data)
-{
-	struct rfkill *rfkill = (struct rfkill *)data;
-
-	switch (eventid) {
-	case RFKILL_STATE_CHANGED:
-		kobject_uevent(&rfkill->dev.kobj, KOBJ_CHANGE);
-		break;
-	default:
-		break;
-	}
-
-	return NOTIFY_DONE;
-}
-
-static struct notifier_block rfkill_blocking_uevent_nb = {
-	.notifier_call	= rfkill_blocking_uevent_notifier,
-	.priority	= 0,
-};
-
 static int rfkill_dev_uevent(struct device *dev, struct kobj_uevent_env *env)
 {
 	struct rfkill *rfkill = to_rfkill(dev);
@@ -942,14 +872,11 @@ static int __init rfkill_init(void)
 		return error;
 	}
 
-	register_rfkill_notifier(&rfkill_blocking_uevent_nb);
-
 	return 0;
 }
 
 static void __exit rfkill_exit(void)
 {
-	unregister_rfkill_notifier(&rfkill_blocking_uevent_nb);
 	class_unregister(&rfkill_class);
 }
 
-- 
cgit v1.2.3


From b690ace50be7d10d77cb7a6d5ef1bd9de649852f Mon Sep 17 00:00:00 2001
From: Ben Dooks <ben-linux@fluff.org>
Date: Tue, 21 Oct 2008 14:07:03 +0100
Subject: [ARM] S3C6400: serial support for S3C6400 and S3C6410 SoCs

Add support to the Samsung serial driver for the S3C6400
and S3C6410 serial ports.

Signed-off-by: Ben Dooks <ben-linux@fluff.org>
---
 arch/arm/mach-s3c2410/include/mach/map.h     |   2 +
 arch/arm/mach-s3c24a0/include/mach/map.h     |   2 +
 arch/arm/plat-s3c/include/plat/regs-serial.h |   6 ++
 drivers/serial/Kconfig                       |  10 +-
 drivers/serial/Makefile                      |   1 +
 drivers/serial/s3c6400.c                     | 151 +++++++++++++++++++++++++++
 drivers/serial/samsung.c                     |   8 +-
 include/linux/serial_core.h                  |   2 +
 8 files changed, 178 insertions(+), 4 deletions(-)
 create mode 100644 drivers/serial/s3c6400.c

(limited to 'include')

diff --git a/arch/arm/mach-s3c2410/include/mach/map.h b/arch/arm/mach-s3c2410/include/mach/map.h
index 6b30361a0805..918e3463297f 100644
--- a/arch/arm/mach-s3c2410/include/mach/map.h
+++ b/arch/arm/mach-s3c2410/include/mach/map.h
@@ -101,4 +101,6 @@
 #define S3C24XX_PA_SDI      S3C2410_PA_SDI
 #define S3C24XX_PA_NAND	    S3C2410_PA_NAND
 
+#define S3C_PA_UART	    S3C24XX_PA_UART
+
 #endif /* __ASM_ARCH_MAP_H */
diff --git a/arch/arm/mach-s3c24a0/include/mach/map.h b/arch/arm/mach-s3c24a0/include/mach/map.h
index 2ce1839de4ee..6667355a47a1 100644
--- a/arch/arm/mach-s3c24a0/include/mach/map.h
+++ b/arch/arm/mach-s3c24a0/include/mach/map.h
@@ -80,4 +80,6 @@
 #define S3C24XX_PA_SDI		S3C24A0_PA_SDI
 #define S3C24XX_PA_NAND		S3C24A0_PA_NAND
 
+#define S3C_PA_UART		S3C24A0_PA_UART
+
 #endif /* __ASM_ARCH_24A0_MAP_H */
diff --git a/arch/arm/plat-s3c/include/plat/regs-serial.h b/arch/arm/plat-s3c/include/plat/regs-serial.h
index 18ba31c7174c..3ca28585cf80 100644
--- a/arch/arm/plat-s3c/include/plat/regs-serial.h
+++ b/arch/arm/plat-s3c/include/plat/regs-serial.h
@@ -77,6 +77,12 @@
 #define S3C2440_UCON_FCLK	  (3<<10)
 #define S3C2443_UCON_EPLL	  (3<<10)
 
+#define S3C6400_UCON_CLKMASK	(3<<10)
+#define S3C6400_UCON_PCLK	(0<<10)
+#define S3C6400_UCON_PCLK2	(2<<10)
+#define S3C6400_UCON_UCLK0	(1<<10)
+#define S3C6400_UCON_UCLK1	(3<<10)
+
 #define S3C2440_UCON2_FCLK_EN	  (1<<15)
 #define S3C2440_UCON0_DIVMASK	  (15 << 12)
 #define S3C2440_UCON1_DIVMASK	  (15 << 12)
diff --git a/drivers/serial/Kconfig b/drivers/serial/Kconfig
index f71a2e8a5f60..e4ae499e587e 100644
--- a/drivers/serial/Kconfig
+++ b/drivers/serial/Kconfig
@@ -447,7 +447,7 @@ config SERIAL_CLPS711X_CONSOLE
 
 config SERIAL_SAMSUNG
 	tristate "Samsung SoC serial support"
-	depends on ARM && PLAT_S3C24XX
+	depends on ARM && PLAT_S3C
 	select SERIAL_CORE
 	help
 	  Support for the on-chip UARTs on the Samsung S3C24XX series CPUs,
@@ -515,6 +515,14 @@ config SERIAL_S3C24A0
 	help
 	  Serial port support for the Samsung S3C24A0 SoC
 
+config SERIAL_S3C6400
+	tristate "Samsung S3C6400/S3C6410 Serial port support"
+	depends on SERIAL_SAMSUNG && (CPU_S3C600 || CPU_S3C6410)
+	default y
+	help
+	  Serial port support for the Samsung S3C6400 and S3C6410
+	  SoCs
+
 config SERIAL_DZ
 	bool "DECstation DZ serial driver"
 	depends on MACH_DECSTATION && 32BIT
diff --git a/drivers/serial/Makefile b/drivers/serial/Makefile
index 7769aece54ca..dfe775ac45b2 100644
--- a/drivers/serial/Makefile
+++ b/drivers/serial/Makefile
@@ -42,6 +42,7 @@ obj-$(CONFIG_SERIAL_S3C2410) += s3c2410.o
 obj-$(CONFIG_SERIAL_S3C2412) += s3c2412.o
 obj-$(CONFIG_SERIAL_S3C2440) += s3c2440.o
 obj-$(CONFIG_SERIAL_S3C24A0) += s3c24a0.o
+obj-$(CONFIG_SERIAL_S3C6400) += s3c6400.o
 obj-$(CONFIG_SERIAL_IP22_ZILOG) += ip22zilog.o
 obj-$(CONFIG_SERIAL_MUX) += mux.o
 obj-$(CONFIG_SERIAL_68328) += 68328serial.o
diff --git a/drivers/serial/s3c6400.c b/drivers/serial/s3c6400.c
new file mode 100644
index 000000000000..06936d13393f
--- /dev/null
+++ b/drivers/serial/s3c6400.c
@@ -0,0 +1,151 @@
+/* linux/drivers/serial/s3c6400.c
+ *
+ * Driver for Samsung S3C6400 and S3C6410 SoC onboard UARTs.
+ *
+ * Copyright 2008 Openmoko,  Inc.
+ * Copyright 2008 Simtec Electronics
+ *	Ben Dooks <ben@simtec.co.uk>
+ *	http://armlinux.simtec.co.uk/
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+*/
+
+#include <linux/module.h>
+#include <linux/ioport.h>
+#include <linux/io.h>
+#include <linux/platform_device.h>
+#include <linux/init.h>
+#include <linux/serial_core.h>
+#include <linux/serial.h>
+
+#include <asm/irq.h>
+#include <mach/hardware.h>
+
+#include <plat/regs-serial.h>
+
+#include "samsung.h"
+
+static int s3c6400_serial_setsource(struct uart_port *port,
+				    struct s3c24xx_uart_clksrc *clk)
+{
+	unsigned long ucon = rd_regl(port, S3C2410_UCON);
+
+	if (strcmp(clk->name, "uclk0") == 0) {
+		ucon &= ~S3C6400_UCON_CLKMASK;
+		ucon |= S3C6400_UCON_UCLK0;
+	} else if (strcmp(clk->name, "uclk1") == 0)
+		ucon |= S3C6400_UCON_UCLK1;
+	else if (strcmp(clk->name, "pclk") == 0) {
+		/* See notes about transitioning from UCLK to PCLK */
+		ucon &= ~S3C6400_UCON_UCLK0;
+	} else {
+		printk(KERN_ERR "unknown clock source %s\n", clk->name);
+		return -EINVAL;
+	}
+
+	wr_regl(port, S3C2410_UCON, ucon);
+	return 0;
+}
+
+
+static int s3c6400_serial_getsource(struct uart_port *port,
+				    struct s3c24xx_uart_clksrc *clk)
+{
+	u32 ucon = rd_regl(port, S3C2410_UCON);
+
+	clk->divisor = 1;
+
+	switch (ucon & S3C6400_UCON_CLKMASK) {
+	case S3C6400_UCON_UCLK0:
+		clk->name = "uclk0";
+		break;
+
+	case S3C6400_UCON_UCLK1:
+		clk->name = "uclk1";
+		break;
+
+	case S3C6400_UCON_PCLK:
+	case S3C6400_UCON_PCLK2:
+		clk->name = "pclk";
+		break;
+	}
+
+	return 0;
+}
+
+static int s3c6400_serial_resetport(struct uart_port *port,
+				    struct s3c2410_uartcfg *cfg)
+{
+	unsigned long ucon = rd_regl(port, S3C2410_UCON);
+
+	dbg("s3c6400_serial_resetport: port=%p (%08lx), cfg=%p\n",
+	    port, port->mapbase, cfg);
+
+	/* ensure we don't change the clock settings... */
+
+	ucon &= S3C6400_UCON_CLKMASK;
+
+	wr_regl(port, S3C2410_UCON,  ucon | cfg->ucon);
+	wr_regl(port, S3C2410_ULCON, cfg->ulcon);
+
+	/* reset both fifos */
+
+	wr_regl(port, S3C2410_UFCON, cfg->ufcon | S3C2410_UFCON_RESETBOTH);
+	wr_regl(port, S3C2410_UFCON, cfg->ufcon);
+
+	return 0;
+}
+
+static struct s3c24xx_uart_info s3c6400_uart_inf = {
+	.name		= "Samsung S3C6400 UART",
+	.type		= PORT_S3C6400,
+	.fifosize	= 64,
+	.rx_fifomask	= S3C2440_UFSTAT_RXMASK,
+	.rx_fifoshift	= S3C2440_UFSTAT_RXSHIFT,
+	.rx_fifofull	= S3C2440_UFSTAT_RXFULL,
+	.tx_fifofull	= S3C2440_UFSTAT_TXFULL,
+	.tx_fifomask	= S3C2440_UFSTAT_TXMASK,
+	.tx_fifoshift	= S3C2440_UFSTAT_TXSHIFT,
+	.get_clksrc	= s3c6400_serial_getsource,
+	.set_clksrc	= s3c6400_serial_setsource,
+	.reset_port	= s3c6400_serial_resetport,
+};
+
+/* device management */
+
+static int s3c6400_serial_probe(struct platform_device *dev)
+{
+	dbg("s3c6400_serial_probe: dev=%p\n", dev);
+	return s3c24xx_serial_probe(dev, &s3c6400_uart_inf);
+}
+
+static struct platform_driver s3c6400_serial_drv = {
+	.probe		= s3c6400_serial_probe,
+	.remove		= s3c24xx_serial_remove,
+	.driver		= {
+		.name	= "s3c6400-uart",
+		.owner	= THIS_MODULE,
+	},
+};
+
+s3c24xx_console_init(&s3c6400_serial_drv, &s3c6400_uart_inf);
+
+static int __init s3c6400_serial_init(void)
+{
+	return s3c24xx_serial_init(&s3c6400_serial_drv, &s3c6400_uart_inf);
+}
+
+static void __exit s3c6400_serial_exit(void)
+{
+	platform_driver_unregister(&s3c6400_serial_drv);
+}
+
+module_init(s3c6400_serial_init);
+module_exit(s3c6400_serial_exit);
+
+MODULE_DESCRIPTION("Samsung S3C6400,S3C6410 SoC Serial port driver");
+MODULE_AUTHOR("Ben Dooks <ben@simtec.co.uk>");
+MODULE_LICENSE("GPL v2");
+MODULE_ALIAS("platform:s3c6400-uart");
diff --git a/drivers/serial/samsung.c b/drivers/serial/samsung.c
index bb8b57aae3af..44fc38afa228 100644
--- a/drivers/serial/samsung.c
+++ b/drivers/serial/samsung.c
@@ -47,9 +47,9 @@
 #include <asm/irq.h>
 
 #include <mach/hardware.h>
+#include <mach/map.h>
 
 #include <plat/regs-serial.h>
-#include <mach/regs-gpio.h>
 
 #include "samsung.h"
 
@@ -756,6 +756,8 @@ static const char *s3c24xx_serial_type(struct uart_port *port)
 		return "S3C2440";
 	case PORT_S3C2412:
 		return "S3C2412";
+	case PORT_S3C6400:
+		return "S3C6400/10";
 	default:
 		return NULL;
 	}
@@ -1034,8 +1036,8 @@ static int s3c24xx_serial_init_port(struct s3c24xx_uart_port *ourport,
 
 	dbg("resource %p (%lx..%lx)\n", res, res->start, res->end);
 
-	port->mapbase	= res->start;
-	port->membase	= S3C24XX_VA_UART + (res->start - S3C24XX_PA_UART);
+	port->mapbase = res->start;
+	port->membase = S3C_VA_UART + res->start - (S3C_PA_UART & 0xfff00000);
 	ret = platform_get_irq(platdev, 0);
 	if (ret < 0)
 		port->irq = 0;
diff --git a/include/linux/serial_core.h b/include/linux/serial_core.h
index 4e4f1277f3bf..feb3b939ec4b 100644
--- a/include/linux/serial_core.h
+++ b/include/linux/serial_core.h
@@ -158,6 +158,8 @@
 /* SH-SCI */
 #define PORT_SCIFA	83
 
+#define PORT_S3C6400	84
+
 #ifdef __KERNEL__
 
 #include <linux/compiler.h>
-- 
cgit v1.2.3


From d2ff911882b6bc693d86ca9566daac70aacbb2b3 Mon Sep 17 00:00:00 2001
From: Rusty Russell <rusty@rustcorp.com.au>
Date: Mon, 15 Dec 2008 19:04:35 +1030
Subject: Define smp_call_function_many for UP

Otherwise those using it in transition patches (eg. kvm) can't compile
with CONFIG_SMP=n:

arch/x86/kvm/../../../virt/kvm/kvm_main.c: In function 'make_all_cpus_request':
arch/x86/kvm/../../../virt/kvm/kvm_main.c:380: error: implicit declaration of function 'smp_call_function_many'

Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/smp.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include')

diff --git a/include/linux/smp.h b/include/linux/smp.h
index 3f9a60043a97..6e7ba16ff454 100644
--- a/include/linux/smp.h
+++ b/include/linux/smp.h
@@ -146,6 +146,8 @@ static inline void smp_send_reschedule(int cpu) { }
 })
 #define smp_call_function_mask(mask, func, info, wait) \
 			(up_smp_call_function(func, info))
+#define smp_call_function_many(mask, func, info, wait) \
+			(up_smp_call_function(func, info))
 static inline void init_call_single_data(void)
 {
 }
-- 
cgit v1.2.3


From b53c7583e26746ef6f66c866841e10450150ed8e Mon Sep 17 00:00:00 2001
From: Kay Sievers <kay.sievers@vrfy.org>
Date: Thu, 4 Dec 2008 10:01:52 -0800
Subject: rapidio: struct device - replace bus_id with dev_name(),
 dev_set_name()

Cc: Matt Porter <mporter@kernel.crashing.org>
Signed-off-by: Kay Sievers <kay.sievers@vrfy.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
Signed-off-by: Paul Mackerras <paulus@samba.org>
---
 drivers/rapidio/rio-scan.c | 8 ++++----
 include/linux/rio_drv.h    | 4 ++--
 2 files changed, 6 insertions(+), 6 deletions(-)

(limited to 'include')

diff --git a/drivers/rapidio/rio-scan.c b/drivers/rapidio/rio-scan.c
index 643a6b98462b..5c13f61bfb1b 100644
--- a/drivers/rapidio/rio-scan.c
+++ b/drivers/rapidio/rio-scan.c
@@ -365,15 +365,15 @@ static struct rio_dev *rio_setup_device(struct rio_net *net,
 				rdid++)
 			rswitch->route_table[rdid] = RIO_INVALID_ROUTE;
 		rdev->rswitch = rswitch;
-		sprintf(rio_name(rdev), "%02x:s:%04x", rdev->net->id,
-			rdev->rswitch->switchid);
+		dev_set_name(&rdev->dev, "%02x:s:%04x", rdev->net->id,
+			     rdev->rswitch->switchid);
 		rio_route_set_ops(rdev);
 
 		list_add_tail(&rswitch->node, &rio_switches);
 
 	} else
-		sprintf(rio_name(rdev), "%02x:e:%04x", rdev->net->id,
-			rdev->destid);
+		dev_set_name(&rdev->dev, "%02x:e:%04x", rdev->net->id,
+			     rdev->destid);
 
 	rdev->dev.bus = &rio_bus_type;
 
diff --git a/include/linux/rio_drv.h b/include/linux/rio_drv.h
index 90987b7bcc1b..32c0547ffafc 100644
--- a/include/linux/rio_drv.h
+++ b/include/linux/rio_drv.h
@@ -427,9 +427,9 @@ void rio_dev_put(struct rio_dev *);
  * Get the unique RIO device identifier. Returns the device
  * identifier string.
  */
-static inline char *rio_name(struct rio_dev *rdev)
+static inline const char *rio_name(struct rio_dev *rdev)
 {
-	return rdev->dev.bus_id;
+	return dev_name(&rdev->dev);
 }
 
 /**
-- 
cgit v1.2.3


From 1a881f27c50b4fbd6858a8696a189263621136b0 Mon Sep 17 00:00:00 2001
From: Herbert Xu <herbert@gondor.apana.org.au>
Date: Mon, 15 Dec 2008 23:27:47 -0800
Subject: net: Add frag_list support to GSO

This patch allows GSO to handle frag_list in a limited way for the
purposes of allowing packets merged by GRO to be refragmented on
output.

Most hardware won't (and aren't expected to) support handling GRO
frag_list packets directly.  Therefore we will perform GSO in
software for those cases.

However, for drivers that can support it (such as virtual NICs) we
may not have to segment the packets at all.

Whether the added overhead of GRO/GSO is worthwhile for bridges
and routers when weighed against the benefit of potentially
increasing the MTU within the host is still an open question.
However, for the case of host nodes this is undoubtedly a win.

Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netdevice.h | 2 ++
 net/core/dev.c            | 2 --
 2 files changed, 2 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index b60c26b7d31c..bdf5465deb91 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -1858,6 +1858,8 @@ static inline int netif_needs_gso(struct net_device *dev, struct sk_buff *skb)
 {
 	return skb_is_gso(skb) &&
 	       (!skb_gso_ok(skb, dev->features) ||
+	        (skb_shinfo(skb)->frag_list &&
+	         !(dev->features & NETIF_F_FRAGLIST)) ||
 		unlikely(skb->ip_summed != CHECKSUM_PARTIAL));
 }
 
diff --git a/net/core/dev.c b/net/core/dev.c
index f54cac76438a..e415f0b0d0d0 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -1533,8 +1533,6 @@ struct sk_buff *skb_gso_segment(struct sk_buff *skb, int features)
 	__be16 type = skb->protocol;
 	int err;
 
-	BUG_ON(skb_shinfo(skb)->frag_list);
-
 	skb_reset_mac_header(skb);
 	skb->mac_len = skb->network_header - skb->mac_header;
 	__skb_pull(skb, skb->mac_len);
-- 
cgit v1.2.3


From d565b0a1a9b6ee7dff46e1f68b26b526ac11ae50 Mon Sep 17 00:00:00 2001
From: Herbert Xu <herbert@gondor.apana.org.au>
Date: Mon, 15 Dec 2008 23:38:52 -0800
Subject: net: Add Generic Receive Offload infrastructure

This patch adds the top-level GRO (Generic Receive Offload) infrastructure.
This is pretty similar to LRO except that this is protocol-independent.
Instead of holding packets in an lro_mgr structure, they're now held in
napi_struct.

For drivers that intend to use this, they can set the NETIF_F_GRO bit and
call napi_gro_receive instead of netif_receive_skb or just call netif_rx.
The latter will call napi_receive_skb automatically.  When napi_gro_receive
is used, the driver must either call napi_complete/napi_rx_complete, or
call napi_gro_flush in softirq context if the driver uses the primitives
__napi_complete/__napi_rx_complete.

Protocols will set the gro_receive and gro_complete function pointers in
order to participate in this scheme.

In addition to the packet, gro_receive will get a list of currently held
packets.  Each packet in the list has a same_flow field which is non-zero
if it is a potential match for the new packet.  For each packet that may
match, they also have a flush field which is non-zero if the held packet
must not be merged with the new packet.

Once gro_receive has determined that the new skb matches a held packet,
the held packet may be processed immediately if the new skb cannot be
merged with it.  In this case gro_receive should return the pointer to
the existing skb in gro_list.  Otherwise the new skb should be merged into
the existing packet and NULL should be returned, unless the new skb makes
it impossible for any further merges to be made (e.g., FIN packet) where
the merged skb should be returned.

Whenever the skb is merged into an existing entry, the gro_receive
function should set NAPI_GRO_CB(skb)->same_flow.  Note that if an skb
merely matches an existing entry but can't be merged with it, then
this shouldn't be set.

If gro_receive finds it pointless to hold the new skb for future merging,
it should set NAPI_GRO_CB(skb)->flush.

Held packets will be flushed by napi_gro_flush which is called by
napi_complete and napi_rx_complete.

Currently held packets are stored in a singly liked list just like LRO.
The list is limited to a maximum of 8 entries.  In future, this may be
expanded to use a hash table to allow more flows to be held for merging.

Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netdevice.h |  80 +++++++------------
 include/linux/netpoll.h   |   5 --
 net/core/dev.c            | 193 +++++++++++++++++++++++++++++++++++++++++++++-
 3 files changed, 219 insertions(+), 59 deletions(-)

(limited to 'include')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index bdf5465deb91..58856b6737fb 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -314,8 +314,9 @@ struct napi_struct {
 	spinlock_t		poll_lock;
 	int			poll_owner;
 	struct net_device	*dev;
-	struct list_head	dev_list;
 #endif
+	struct list_head	dev_list;
+	struct sk_buff		*gro_list;
 };
 
 enum
@@ -376,22 +377,8 @@ static inline int napi_reschedule(struct napi_struct *napi)
  *
  * Mark NAPI processing as complete.
  */
-static inline void __napi_complete(struct napi_struct *n)
-{
-	BUG_ON(!test_bit(NAPI_STATE_SCHED, &n->state));
-	list_del(&n->poll_list);
-	smp_mb__before_clear_bit();
-	clear_bit(NAPI_STATE_SCHED, &n->state);
-}
-
-static inline void napi_complete(struct napi_struct *n)
-{
-	unsigned long flags;
-
-	local_irq_save(flags);
-	__napi_complete(n);
-	local_irq_restore(flags);
-}
+extern void __napi_complete(struct napi_struct *n);
+extern void napi_complete(struct napi_struct *n);
 
 /**
  *	napi_disable - prevent NAPI from scheduling
@@ -640,9 +627,7 @@ struct net_device
 	unsigned long		state;
 
 	struct list_head	dev_list;
-#ifdef CONFIG_NETPOLL
 	struct list_head	napi_list;
-#endif
 
 	/* Net device features */
 	unsigned long		features;
@@ -661,6 +646,7 @@ struct net_device
 #define NETIF_F_LLTX		4096	/* LockLess TX - deprecated. Please */
 					/* do not use LLTX in new drivers */
 #define NETIF_F_NETNS_LOCAL	8192	/* Does not change network namespaces */
+#define NETIF_F_GRO		16384	/* Generic receive offload */
 #define NETIF_F_LRO		32768	/* large receive offload */
 
 	/* Segmentation offload features */
@@ -984,22 +970,8 @@ static inline void *netdev_priv(const struct net_device *dev)
  * netif_napi_add() must be used to initialize a napi context prior to calling
  * *any* of the other napi related functions.
  */
-static inline void netif_napi_add(struct net_device *dev,
-				  struct napi_struct *napi,
-				  int (*poll)(struct napi_struct *, int),
-				  int weight)
-{
-	INIT_LIST_HEAD(&napi->poll_list);
-	napi->poll = poll;
-	napi->weight = weight;
-#ifdef CONFIG_NETPOLL
-	napi->dev = dev;
-	list_add(&napi->dev_list, &dev->napi_list);
-	spin_lock_init(&napi->poll_lock);
-	napi->poll_owner = -1;
-#endif
-	set_bit(NAPI_STATE_SCHED, &napi->state);
-}
+void netif_napi_add(struct net_device *dev, struct napi_struct *napi,
+		    int (*poll)(struct napi_struct *, int), int weight);
 
 /**
  *  netif_napi_del - remove a napi context
@@ -1007,12 +979,20 @@ static inline void netif_napi_add(struct net_device *dev,
  *
  *  netif_napi_del() removes a napi context from the network device napi list
  */
-static inline void netif_napi_del(struct napi_struct *napi)
-{
-#ifdef CONFIG_NETPOLL
-	list_del(&napi->dev_list);
-#endif
-}
+void netif_napi_del(struct napi_struct *napi);
+
+struct napi_gro_cb {
+	/* This is non-zero if the packet may be of the same flow. */
+	int same_flow;
+
+	/* This is non-zero if the packet cannot be merged with the new skb. */
+	int flush;
+
+	/* Number of segments aggregated. */
+	int count;
+};
+
+#define NAPI_GRO_CB(skb) ((struct napi_gro_cb *)(skb)->cb)
 
 struct packet_type {
 	__be16			type;	/* This is really htons(ether_type). */
@@ -1024,6 +1004,9 @@ struct packet_type {
 	struct sk_buff		*(*gso_segment)(struct sk_buff *skb,
 						int features);
 	int			(*gso_send_check)(struct sk_buff *skb);
+	struct sk_buff		**(*gro_receive)(struct sk_buff **head,
+					       struct sk_buff *skb);
+	int			(*gro_complete)(struct sk_buff *skb);
 	void			*af_packet_priv;
 	struct list_head	list;
 };
@@ -1377,6 +1360,9 @@ extern int		netif_rx(struct sk_buff *skb);
 extern int		netif_rx_ni(struct sk_buff *skb);
 #define HAVE_NETIF_RECEIVE_SKB 1
 extern int		netif_receive_skb(struct sk_buff *skb);
+extern void		napi_gro_flush(struct napi_struct *napi);
+extern int		napi_gro_receive(struct napi_struct *napi,
+					 struct sk_buff *skb);
 extern void		netif_nit_deliver(struct sk_buff *skb);
 extern int		dev_valid_name(const char *name);
 extern int		dev_ioctl(struct net *net, unsigned int cmd, void __user *);
@@ -1621,17 +1607,7 @@ static inline void __netif_rx_complete(struct net_device *dev,
 static inline void netif_rx_complete(struct net_device *dev,
 				     struct napi_struct *napi)
 {
-	unsigned long flags;
-
-	/*
-	 * don't let napi dequeue from the cpu poll list
-	 * just in case its running on a different cpu
-	 */
-	if (unlikely(test_bit(NAPI_STATE_NPSVC, &napi->state)))
-		return;
-	local_irq_save(flags);
-	__netif_rx_complete(dev, napi);
-	local_irq_restore(flags);
+	napi_complete(napi);
 }
 
 static inline void __netif_tx_lock(struct netdev_queue *txq, int cpu)
diff --git a/include/linux/netpoll.h b/include/linux/netpoll.h
index e3d79593fb3a..e38d3c9dccda 100644
--- a/include/linux/netpoll.h
+++ b/include/linux/netpoll.h
@@ -94,11 +94,6 @@ static inline void netpoll_poll_unlock(void *have)
 	rcu_read_unlock();
 }
 
-static inline void netpoll_netdev_init(struct net_device *dev)
-{
-	INIT_LIST_HEAD(&dev->napi_list);
-}
-
 #else
 static inline int netpoll_rx(struct sk_buff *skb)
 {
diff --git a/net/core/dev.c b/net/core/dev.c
index e415f0b0d0d0..d8d7d1fccde4 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -129,6 +129,9 @@
 
 #include "net-sysfs.h"
 
+/* Instead of increasing this, you should create a hash table. */
+#define MAX_GRO_SKBS 8
+
 /*
  *	The list of packet types we will receive (as opposed to discard)
  *	and the routines to invoke.
@@ -2335,6 +2338,122 @@ static void flush_backlog(void *arg)
 		}
 }
 
+static int napi_gro_complete(struct sk_buff *skb)
+{
+	struct packet_type *ptype;
+	__be16 type = skb->protocol;
+	struct list_head *head = &ptype_base[ntohs(type) & PTYPE_HASH_MASK];
+	int err = -ENOENT;
+
+	if (!skb_shinfo(skb)->frag_list)
+		goto out;
+
+	rcu_read_lock();
+	list_for_each_entry_rcu(ptype, head, list) {
+		if (ptype->type != type || ptype->dev || !ptype->gro_complete)
+			continue;
+
+		err = ptype->gro_complete(skb);
+		break;
+	}
+	rcu_read_unlock();
+
+	if (err) {
+		WARN_ON(&ptype->list == head);
+		kfree_skb(skb);
+		return NET_RX_SUCCESS;
+	}
+
+out:
+	__skb_push(skb, -skb_network_offset(skb));
+	return netif_receive_skb(skb);
+}
+
+void napi_gro_flush(struct napi_struct *napi)
+{
+	struct sk_buff *skb, *next;
+
+	for (skb = napi->gro_list; skb; skb = next) {
+		next = skb->next;
+		skb->next = NULL;
+		napi_gro_complete(skb);
+	}
+
+	napi->gro_list = NULL;
+}
+EXPORT_SYMBOL(napi_gro_flush);
+
+int napi_gro_receive(struct napi_struct *napi, struct sk_buff *skb)
+{
+	struct sk_buff **pp = NULL;
+	struct packet_type *ptype;
+	__be16 type = skb->protocol;
+	struct list_head *head = &ptype_base[ntohs(type) & PTYPE_HASH_MASK];
+	int count = 0;
+	int mac_len;
+
+	if (!(skb->dev->features & NETIF_F_GRO))
+		goto normal;
+
+	rcu_read_lock();
+	list_for_each_entry_rcu(ptype, head, list) {
+		struct sk_buff *p;
+
+		if (ptype->type != type || ptype->dev || !ptype->gro_receive)
+			continue;
+
+		skb_reset_network_header(skb);
+		mac_len = skb->network_header - skb->mac_header;
+		skb->mac_len = mac_len;
+		NAPI_GRO_CB(skb)->same_flow = 0;
+		NAPI_GRO_CB(skb)->flush = 0;
+
+		for (p = napi->gro_list; p; p = p->next) {
+			count++;
+			NAPI_GRO_CB(p)->same_flow =
+				p->mac_len == mac_len &&
+				!memcmp(skb_mac_header(p), skb_mac_header(skb),
+					mac_len);
+			NAPI_GRO_CB(p)->flush = 0;
+		}
+
+		pp = ptype->gro_receive(&napi->gro_list, skb);
+		break;
+	}
+	rcu_read_unlock();
+
+	if (&ptype->list == head)
+		goto normal;
+
+	if (pp) {
+		struct sk_buff *nskb = *pp;
+
+		*pp = nskb->next;
+		nskb->next = NULL;
+		napi_gro_complete(nskb);
+		count--;
+	}
+
+	if (NAPI_GRO_CB(skb)->same_flow)
+		goto ok;
+
+	if (NAPI_GRO_CB(skb)->flush || count >= MAX_GRO_SKBS) {
+		__skb_push(skb, -skb_network_offset(skb));
+		goto normal;
+	}
+
+	NAPI_GRO_CB(skb)->count = 1;
+	skb->next = napi->gro_list;
+	napi->gro_list = skb;
+
+ok:
+	return NET_RX_SUCCESS;
+
+normal:
+	return netif_receive_skb(skb);
+}
+EXPORT_SYMBOL(napi_gro_receive);
+
 static int process_backlog(struct napi_struct *napi, int quota)
 {
 	int work = 0;
@@ -2354,9 +2473,11 @@ static int process_backlog(struct napi_struct *napi, int quota)
 		}
 		local_irq_enable();
 
-		netif_receive_skb(skb);
+		napi_gro_receive(napi, skb);
 	} while (++work < quota && jiffies == start_time);
 
+	napi_gro_flush(napi);
+
 	return work;
 }
 
@@ -2377,6 +2498,68 @@ void __napi_schedule(struct napi_struct *n)
 }
 EXPORT_SYMBOL(__napi_schedule);
 
+void __napi_complete(struct napi_struct *n)
+{
+	BUG_ON(!test_bit(NAPI_STATE_SCHED, &n->state));
+	BUG_ON(n->gro_list);
+
+	list_del(&n->poll_list);
+	smp_mb__before_clear_bit();
+	clear_bit(NAPI_STATE_SCHED, &n->state);
+}
+EXPORT_SYMBOL(__napi_complete);
+
+void napi_complete(struct napi_struct *n)
+{
+	unsigned long flags;
+
+	/*
+	 * don't let napi dequeue from the cpu poll list
+	 * just in case its running on a different cpu
+	 */
+	if (unlikely(test_bit(NAPI_STATE_NPSVC, &n->state)))
+		return;
+
+	napi_gro_flush(n);
+	local_irq_save(flags);
+	__napi_complete(n);
+	local_irq_restore(flags);
+}
+EXPORT_SYMBOL(napi_complete);
+
+void netif_napi_add(struct net_device *dev, struct napi_struct *napi,
+		    int (*poll)(struct napi_struct *, int), int weight)
+{
+	INIT_LIST_HEAD(&napi->poll_list);
+	napi->gro_list = NULL;
+	napi->poll = poll;
+	napi->weight = weight;
+	list_add(&napi->dev_list, &dev->napi_list);
+#ifdef CONFIG_NETPOLL
+	napi->dev = dev;
+	spin_lock_init(&napi->poll_lock);
+	napi->poll_owner = -1;
+#endif
+	set_bit(NAPI_STATE_SCHED, &napi->state);
+}
+EXPORT_SYMBOL(netif_napi_add);
+
+void netif_napi_del(struct napi_struct *napi)
+{
+	struct sk_buff *skb, *next;
+
+	list_del(&napi->dev_list);
+
+	for (skb = napi->gro_list; skb; skb = next) {
+		next = skb->next;
+		skb->next = NULL;
+		kfree_skb(skb);
+	}
+
+	napi->gro_list = NULL;
+}
+EXPORT_SYMBOL(netif_napi_del);
+
 
 static void net_rx_action(struct softirq_action *h)
 {
@@ -4380,7 +4563,7 @@ struct net_device *alloc_netdev_mq(int sizeof_priv, const char *name,
 
 	netdev_init_queues(dev);
 
-	netpoll_netdev_init(dev);
+	INIT_LIST_HEAD(&dev->napi_list);
 	setup(dev);
 	strcpy(dev->name, name);
 	return dev;
@@ -4397,10 +4580,15 @@ EXPORT_SYMBOL(alloc_netdev_mq);
  */
 void free_netdev(struct net_device *dev)
 {
+	struct napi_struct *p, *n;
+
 	release_net(dev_net(dev));
 
 	kfree(dev->_tx);
 
+	list_for_each_entry_safe(p, n, &dev->napi_list, dev_list)
+		netif_napi_del(p);
+
 	/*  Compatibility with error handling in drivers */
 	if (dev->reg_state == NETREG_UNINITIALIZED) {
 		kfree((char *)dev - dev->padded);
@@ -4949,6 +5137,7 @@ static int __init net_dev_init(void)
 
 		queue->backlog.poll = process_backlog;
 		queue->backlog.weight = weight_p;
+		queue->backlog.gro_list = NULL;
 	}
 
 	dev_boot_phase = 0;
-- 
cgit v1.2.3


From 73cc19f1556b95976934de236fd9043f7208844f Mon Sep 17 00:00:00 2001
From: Herbert Xu <herbert@gondor.apana.org.au>
Date: Mon, 15 Dec 2008 23:41:09 -0800
Subject: ipv4: Add GRO infrastructure

This patch adds GRO support for IPv4.

The criteria for merging is more stringent than LRO, in particular,
we require all fields in the IP header to be identical except for
the length, ID and checksum.  In addition, the ID must form an
arithmetic sequence with a difference of one.

The ID requirement might seem overly strict, however, most hardware
TSO solutions already obey this rule.  Linux itself also obeys this
whether GSO is in use or not.

In future we could relax this rule by storing the IDs (or rather
making sure that we don't drop them when pulling the aggregate
skb's tail).

Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/protocol.h |  3 ++
 net/ipv4/af_inet.c     | 97 ++++++++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 100 insertions(+)

(limited to 'include')

diff --git a/include/net/protocol.h b/include/net/protocol.h
index 8d024d7cb741..cb2965aa1b62 100644
--- a/include/net/protocol.h
+++ b/include/net/protocol.h
@@ -39,6 +39,9 @@ struct net_protocol {
 	int			(*gso_send_check)(struct sk_buff *skb);
 	struct sk_buff	       *(*gso_segment)(struct sk_buff *skb,
 					       int features);
+	struct sk_buff	      **(*gro_receive)(struct sk_buff **head,
+					       struct sk_buff *skb);
+	int			(*gro_complete)(struct sk_buff *skb);
 	unsigned int		no_policy:1,
 				netns_ok:1;
 };
diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
index fe03048c130d..a85595307fa7 100644
--- a/net/ipv4/af_inet.c
+++ b/net/ipv4/af_inet.c
@@ -94,6 +94,7 @@
 #include <linux/igmp.h>
 #include <linux/inetdevice.h>
 #include <linux/netdevice.h>
+#include <net/checksum.h>
 #include <net/ip.h>
 #include <net/protocol.h>
 #include <net/arp.h>
@@ -1241,6 +1242,100 @@ out:
 	return segs;
 }
 
+static struct sk_buff **inet_gro_receive(struct sk_buff **head,
+					 struct sk_buff *skb)
+{
+	struct net_protocol *ops;
+	struct sk_buff **pp = NULL;
+	struct sk_buff *p;
+	struct iphdr *iph;
+	int flush = 1;
+	int proto;
+	int id;
+
+	if (unlikely(!pskb_may_pull(skb, sizeof(*iph))))
+		goto out;
+
+	iph = ip_hdr(skb);
+	proto = iph->protocol & (MAX_INET_PROTOS - 1);
+
+	rcu_read_lock();
+	ops = rcu_dereference(inet_protos[proto]);
+	if (!ops || !ops->gro_receive)
+		goto out_unlock;
+
+	if (iph->version != 4 || iph->ihl != 5)
+		goto out_unlock;
+
+	if (unlikely(ip_fast_csum((u8 *)iph, iph->ihl)))
+		goto out_unlock;
+
+	flush = ntohs(iph->tot_len) != skb->len ||
+		iph->frag_off != htons(IP_DF);
+	id = ntohs(iph->id);
+
+	for (p = *head; p; p = p->next) {
+		struct iphdr *iph2;
+
+		if (!NAPI_GRO_CB(p)->same_flow)
+			continue;
+
+		iph2 = ip_hdr(p);
+
+		if (iph->protocol != iph2->protocol ||
+		    iph->tos != iph2->tos ||
+		    memcmp(&iph->saddr, &iph2->saddr, 8)) {
+			NAPI_GRO_CB(p)->same_flow = 0;
+			continue;
+		}
+
+		/* All fields must match except length and checksum. */
+		NAPI_GRO_CB(p)->flush |=
+			memcmp(&iph->frag_off, &iph2->frag_off, 4) ||
+			(u16)(ntohs(iph2->id) + NAPI_GRO_CB(p)->count) != id;
+
+		NAPI_GRO_CB(p)->flush |= flush;
+	}
+
+	NAPI_GRO_CB(skb)->flush |= flush;
+	__skb_pull(skb, sizeof(*iph));
+	skb_reset_transport_header(skb);
+
+	pp = ops->gro_receive(head, skb);
+
+out_unlock:
+	rcu_read_unlock();
+
+out:
+	NAPI_GRO_CB(skb)->flush |= flush;
+
+	return pp;
+}
+
+static int inet_gro_complete(struct sk_buff *skb)
+{
+	struct net_protocol *ops;
+	struct iphdr *iph = ip_hdr(skb);
+	int proto = iph->protocol & (MAX_INET_PROTOS - 1);
+	int err = -ENOSYS;
+	__be16 newlen = htons(skb->len - skb_network_offset(skb));
+
+	csum_replace2(&iph->check, iph->tot_len, newlen);
+	iph->tot_len = newlen;
+
+	rcu_read_lock();
+	ops = rcu_dereference(inet_protos[proto]);
+	if (WARN_ON(!ops || !ops->gro_complete))
+		goto out_unlock;
+
+	err = ops->gro_complete(skb);
+
+out_unlock:
+	rcu_read_unlock();
+
+	return err;
+}
+
 int inet_ctl_sock_create(struct sock **sk, unsigned short family,
 			 unsigned short type, unsigned char protocol,
 			 struct net *net)
@@ -1407,6 +1502,8 @@ static struct packet_type ip_packet_type = {
 	.func = ip_rcv,
 	.gso_send_check = inet_gso_send_check,
 	.gso_segment = inet_gso_segment,
+	.gro_receive = inet_gro_receive,
+	.gro_complete = inet_gro_complete,
 };
 
 static int __init inet_init(void)
-- 
cgit v1.2.3


From 71d93b39e52e92aea35f1058d957cf12250d0b75 Mon Sep 17 00:00:00 2001
From: Herbert Xu <herbert@gondor.apana.org.au>
Date: Mon, 15 Dec 2008 23:42:33 -0800
Subject: net: Add skb_gro_receive

This patch adds the helper skb_gro_receive to merge packets for
GRO.  The current method is to allocate a new header skb and then
chain the original packets to its frag_list.  This is done to
make it easier to integrate into the existing GSO framework.

In future as GSO is moved into the drivers, we can undo this and
simply chain the original packets together.

Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/skbuff.h |  2 ++
 net/core/skbuff.c      | 59 ++++++++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 61 insertions(+)

(limited to 'include')

diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index acf17af45af9..cf2cb50f77d1 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -1687,6 +1687,8 @@ extern int	       skb_shift(struct sk_buff *tgt, struct sk_buff *skb,
 				 int shiftlen);
 
 extern struct sk_buff *skb_segment(struct sk_buff *skb, int features);
+extern int	       skb_gro_receive(struct sk_buff **head,
+				       struct sk_buff *skb);
 
 static inline void *skb_header_pointer(const struct sk_buff *skb, int offset,
 				       int len, void *buffer)
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index 18e224af05a6..b8d0abb26433 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -2582,6 +2582,65 @@ err:
 
 EXPORT_SYMBOL_GPL(skb_segment);
 
+int skb_gro_receive(struct sk_buff **head, struct sk_buff *skb)
+{
+	struct sk_buff *p = *head;
+	struct sk_buff *nskb;
+	unsigned int headroom;
+	unsigned int hlen = p->data - skb_mac_header(p);
+
+	if (hlen + p->len + skb->len >= 65536)
+		return -E2BIG;
+
+	if (skb_shinfo(p)->frag_list)
+		goto merge;
+
+	headroom = skb_headroom(p);
+	nskb = netdev_alloc_skb(p->dev, headroom);
+	if (unlikely(!nskb))
+		return -ENOMEM;
+
+	__copy_skb_header(nskb, p);
+	nskb->mac_len = p->mac_len;
+
+	skb_reserve(nskb, headroom);
+
+	skb_set_mac_header(nskb, -hlen);
+	skb_set_network_header(nskb, skb_network_offset(p));
+	skb_set_transport_header(nskb, skb_transport_offset(p));
+
+	memcpy(skb_mac_header(nskb), skb_mac_header(p), hlen);
+
+	*NAPI_GRO_CB(nskb) = *NAPI_GRO_CB(p);
+	skb_shinfo(nskb)->frag_list = p;
+	skb_header_release(p);
+	nskb->prev = p;
+
+	nskb->data_len += p->len;
+	nskb->truesize += p->len;
+	nskb->len += p->len;
+
+	*head = nskb;
+	nskb->next = p->next;
+	p->next = NULL;
+
+	p = nskb;
+
+merge:
+	NAPI_GRO_CB(p)->count++;
+	p->prev->next = skb;
+	p->prev = skb;
+	skb_header_release(skb);
+
+	p->data_len += skb->len;
+	p->truesize += skb->len;
+	p->len += skb->len;
+
+	NAPI_GRO_CB(skb)->same_flow = 1;
+	return 0;
+}
+EXPORT_SYMBOL_GPL(skb_gro_receive);
+
 void __init skb_init(void)
 {
 	skbuff_head_cache = kmem_cache_create("skbuff_head_cache",
-- 
cgit v1.2.3


From bf296b125b21b8d558ceb6ec30bb4eba2730cd6b Mon Sep 17 00:00:00 2001
From: Herbert Xu <herbert@gondor.apana.org.au>
Date: Mon, 15 Dec 2008 23:43:36 -0800
Subject: tcp: Add GRO support

This patch adds the TCP-specific portion of GRO.  The criterion for
merging is extremely strict (the TCP header must match exactly apart
from the checksum) so as to allow refragmentation.  Otherwise this
is pretty much identical to LRO, except that we support the merging
of ECN packets.

Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/tcp.h   |   6 ++++
 net/ipv4/af_inet.c  |   2 ++
 net/ipv4/tcp.c      | 100 ++++++++++++++++++++++++++++++++++++++++++++++++++++
 net/ipv4/tcp_ipv4.c |  35 ++++++++++++++++++
 4 files changed, 143 insertions(+)

(limited to 'include')

diff --git a/include/net/tcp.h b/include/net/tcp.h
index de1e91d959b8..218235de8963 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -1358,6 +1358,12 @@ extern void tcp_v4_destroy_sock(struct sock *sk);
 
 extern int tcp_v4_gso_send_check(struct sk_buff *skb);
 extern struct sk_buff *tcp_tso_segment(struct sk_buff *skb, int features);
+extern struct sk_buff **tcp_gro_receive(struct sk_buff **head,
+					struct sk_buff *skb);
+extern struct sk_buff **tcp4_gro_receive(struct sk_buff **head,
+					 struct sk_buff *skb);
+extern int tcp_gro_complete(struct sk_buff *skb);
+extern int tcp4_gro_complete(struct sk_buff *skb);
 
 #ifdef CONFIG_PROC_FS
 extern int  tcp4_proc_init(void);
diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
index a85595307fa7..664ff0ee1c83 100644
--- a/net/ipv4/af_inet.c
+++ b/net/ipv4/af_inet.c
@@ -1410,6 +1410,8 @@ static struct net_protocol tcp_protocol = {
 	.err_handler =	tcp_v4_err,
 	.gso_send_check = tcp_v4_gso_send_check,
 	.gso_segment =	tcp_tso_segment,
+	.gro_receive =	tcp4_gro_receive,
+	.gro_complete =	tcp4_gro_complete,
 	.no_policy =	1,
 	.netns_ok =	1,
 };
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 019243408623..1f3d52946b3b 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -2465,6 +2465,106 @@ out:
 }
 EXPORT_SYMBOL(tcp_tso_segment);
 
+struct sk_buff **tcp_gro_receive(struct sk_buff **head, struct sk_buff *skb)
+{
+	struct sk_buff **pp = NULL;
+	struct sk_buff *p;
+	struct tcphdr *th;
+	struct tcphdr *th2;
+	unsigned int thlen;
+	unsigned int flags;
+	unsigned int total;
+	unsigned int mss = 1;
+	int flush = 1;
+
+	if (!pskb_may_pull(skb, sizeof(*th)))
+		goto out;
+
+	th = tcp_hdr(skb);
+	thlen = th->doff * 4;
+	if (thlen < sizeof(*th))
+		goto out;
+
+	if (!pskb_may_pull(skb, thlen))
+		goto out;
+
+	th = tcp_hdr(skb);
+	__skb_pull(skb, thlen);
+
+	flags = tcp_flag_word(th);
+
+	for (; (p = *head); head = &p->next) {
+		if (!NAPI_GRO_CB(p)->same_flow)
+			continue;
+
+		th2 = tcp_hdr(p);
+
+		if (th->source != th2->source || th->dest != th2->dest) {
+			NAPI_GRO_CB(p)->same_flow = 0;
+			continue;
+		}
+
+		goto found;
+	}
+
+	goto out_check_final;
+
+found:
+	flush = NAPI_GRO_CB(p)->flush;
+	flush |= flags & TCP_FLAG_CWR;
+	flush |= (flags ^ tcp_flag_word(th2)) &
+		  ~(TCP_FLAG_CWR | TCP_FLAG_FIN | TCP_FLAG_PSH);
+	flush |= th->ack_seq != th2->ack_seq || th->window != th2->window;
+	flush |= memcmp(th + 1, th2 + 1, thlen - sizeof(*th));
+
+	total = p->len;
+	mss = total;
+	if (skb_shinfo(p)->frag_list)
+		mss = skb_shinfo(p)->frag_list->len;
+
+	flush |= skb->len > mss || skb->len <= 0;
+	flush |= ntohl(th2->seq) + total != ntohl(th->seq);
+
+	if (flush || skb_gro_receive(head, skb)) {
+		mss = 1;
+		goto out_check_final;
+	}
+
+	p = *head;
+	th2 = tcp_hdr(p);
+	tcp_flag_word(th2) |= flags & (TCP_FLAG_FIN | TCP_FLAG_PSH);
+
+out_check_final:
+	flush = skb->len < mss;
+	flush |= flags & (TCP_FLAG_URG | TCP_FLAG_PSH | TCP_FLAG_RST |
+			  TCP_FLAG_SYN | TCP_FLAG_FIN);
+
+	if (p && (!NAPI_GRO_CB(skb)->same_flow || flush))
+		pp = head;
+
+out:
+	NAPI_GRO_CB(skb)->flush |= flush;
+
+	return pp;
+}
+
+int tcp_gro_complete(struct sk_buff *skb)
+{
+	struct tcphdr *th = tcp_hdr(skb);
+
+	skb->csum_start = skb_transport_header(skb) - skb->head;
+	skb->csum_offset = offsetof(struct tcphdr, check);
+	skb->ip_summed = CHECKSUM_PARTIAL;
+
+	skb_shinfo(skb)->gso_size = skb_shinfo(skb)->frag_list->len;
+	skb_shinfo(skb)->gso_segs = NAPI_GRO_CB(skb)->count;
+
+	if (th->cwr)
+		skb_shinfo(skb)->gso_type |= SKB_GSO_TCP_ECN;
+
+	return 0;
+}
+
 #ifdef CONFIG_TCP_MD5SIG
 static unsigned long tcp_md5sig_users;
 static struct tcp_md5sig_pool **tcp_md5sig_pool;
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index 26b9030747cc..10172487921b 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -2346,6 +2346,41 @@ void tcp4_proc_exit(void)
 }
 #endif /* CONFIG_PROC_FS */
 
+struct sk_buff **tcp4_gro_receive(struct sk_buff **head, struct sk_buff *skb)
+{
+	struct iphdr *iph = ip_hdr(skb);
+
+	switch (skb->ip_summed) {
+	case CHECKSUM_COMPLETE:
+		if (!tcp_v4_check(skb->len, iph->saddr, iph->daddr,
+				  skb->csum)) {
+			skb->ip_summed = CHECKSUM_UNNECESSARY;
+			break;
+		}
+
+		/* fall through */
+	case CHECKSUM_NONE:
+		NAPI_GRO_CB(skb)->flush = 1;
+		return NULL;
+	}
+
+	return tcp_gro_receive(head, skb);
+}
+EXPORT_SYMBOL(tcp4_gro_receive);
+
+int tcp4_gro_complete(struct sk_buff *skb)
+{
+	struct iphdr *iph = ip_hdr(skb);
+	struct tcphdr *th = tcp_hdr(skb);
+
+	th->check = ~tcp_v4_check(skb->len - skb_transport_offset(skb),
+				  iph->saddr, iph->daddr, 0);
+	skb_shinfo(skb)->gso_type = SKB_GSO_TCPV4;
+
+	return tcp_gro_complete(skb);
+}
+EXPORT_SYMBOL(tcp4_gro_complete);
+
 struct proto tcp_prot = {
 	.name			= "TCP",
 	.owner			= THIS_MODULE,
-- 
cgit v1.2.3


From b240a0e5644eb817c4a397098a40e1ad42a615bc Mon Sep 17 00:00:00 2001
From: Herbert Xu <herbert@gondor.apana.org.au>
Date: Mon, 15 Dec 2008 23:44:31 -0800
Subject: ethtool: Add GGRO and SGRO ops

This patch adds the ethtool ops to enable and disable GRO.  It also
makes GRO depend on RX checksum offload much the same as how TSO
depends on SG support.

Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/ethtool.h |  2 ++
 net/core/ethtool.c      | 53 +++++++++++++++++++++++++++++++++++++++++++++++--
 2 files changed, 53 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/ethtool.h b/include/linux/ethtool.h
index b4b038b89ee6..27c67a542235 100644
--- a/include/linux/ethtool.h
+++ b/include/linux/ethtool.h
@@ -467,6 +467,8 @@ struct ethtool_ops {
 
 #define	ETHTOOL_GRXFH		0x00000029 /* Get RX flow hash configuration */
 #define	ETHTOOL_SRXFH		0x0000002a /* Set RX flow hash configuration */
+#define ETHTOOL_GGRO		0x0000002b /* Get GRO enable (ethtool_value) */
+#define ETHTOOL_SGRO		0x0000002c /* Set GRO enable (ethtool_value) */
 
 /* compatibility with older code */
 #define SPARC_ETH_GSET		ETHTOOL_GSET
diff --git a/net/core/ethtool.c b/net/core/ethtool.c
index 14ada537f895..947710a36ced 100644
--- a/net/core/ethtool.c
+++ b/net/core/ethtool.c
@@ -528,6 +528,22 @@ static int ethtool_set_tx_csum(struct net_device *dev, char __user *useraddr)
 	return dev->ethtool_ops->set_tx_csum(dev, edata.data);
 }
 
+static int ethtool_set_rx_csum(struct net_device *dev, char __user *useraddr)
+{
+	struct ethtool_value edata;
+
+	if (!dev->ethtool_ops->set_rx_csum)
+		return -EOPNOTSUPP;
+
+	if (copy_from_user(&edata, useraddr, sizeof(edata)))
+		return -EFAULT;
+
+	if (!edata.data && dev->ethtool_ops->set_sg)
+		dev->features &= ~NETIF_F_GRO;
+
+	return dev->ethtool_ops->set_rx_csum(dev, edata.data);
+}
+
 static int ethtool_set_sg(struct net_device *dev, char __user *useraddr)
 {
 	struct ethtool_value edata;
@@ -599,6 +615,34 @@ static int ethtool_set_gso(struct net_device *dev, char __user *useraddr)
 	return 0;
 }
 
+static int ethtool_get_gro(struct net_device *dev, char __user *useraddr)
+{
+	struct ethtool_value edata = { ETHTOOL_GGRO };
+
+	edata.data = dev->features & NETIF_F_GRO;
+	if (copy_to_user(useraddr, &edata, sizeof(edata)))
+		 return -EFAULT;
+	return 0;
+}
+
+static int ethtool_set_gro(struct net_device *dev, char __user *useraddr)
+{
+	struct ethtool_value edata;
+
+	if (copy_from_user(&edata, useraddr, sizeof(edata)))
+		return -EFAULT;
+
+	if (edata.data) {
+		if (!dev->ethtool_ops->get_rx_csum ||
+		    !dev->ethtool_ops->get_rx_csum(dev))
+			return -EINVAL;
+		dev->features |= NETIF_F_GRO;
+	} else
+		dev->features &= ~NETIF_F_GRO;
+
+	return 0;
+}
+
 static int ethtool_self_test(struct net_device *dev, char __user *useraddr)
 {
 	struct ethtool_test test;
@@ -932,8 +976,7 @@ int dev_ethtool(struct net *net, struct ifreq *ifr)
 				       dev->ethtool_ops->get_rx_csum);
 		break;
 	case ETHTOOL_SRXCSUM:
-		rc = ethtool_set_value(dev, useraddr,
-				       dev->ethtool_ops->set_rx_csum);
+		rc = ethtool_set_rx_csum(dev, useraddr);
 		break;
 	case ETHTOOL_GTXCSUM:
 		rc = ethtool_get_value(dev, useraddr, ethcmd,
@@ -1014,6 +1057,12 @@ int dev_ethtool(struct net *net, struct ifreq *ifr)
 	case ETHTOOL_SRXFH:
 		rc = ethtool_set_rxhash(dev, useraddr);
 		break;
+	case ETHTOOL_GGRO:
+		rc = ethtool_get_gro(dev, useraddr);
+		break;
+	case ETHTOOL_SGRO:
+		rc = ethtool_set_gro(dev, useraddr);
+		break;
 	default:
 		rc = -EOPNOTSUPP;
 	}
-- 
cgit v1.2.3


From 092cab7e2cd868cb0b30209a0337689c3ffd6133 Mon Sep 17 00:00:00 2001
From: Pablo Neira Ayuso <pablo@netfilter.org>
Date: Tue, 16 Dec 2008 01:19:41 -0800
Subject: netfilter: ctnetlink: fix missing CTA_NAT_SEQ_UNSPEC

This patch fixes an inconsistency in nfnetlink_conntrack.h that
I introduced myself. The problem is that CTA_NAT_SEQ_UNSPEC is
missing from enum ctattr_natseq. This inconsistency may lead to
problems in the message parsing in userspace (if the message
contains the CTA_NAT_SEQ_* attributes, of course).

This patch breaks backward compatibility, however, the only known
client of this code is libnetfilter_conntrack which indeed crashes
because it assumes the existence of CTA_NAT_SEQ_UNSPEC to do
the parsing.

The CTA_NAT_SEQ_* attributes were introduced in 2.6.25.

Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netfilter/nfnetlink_conntrack.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/linux/netfilter/nfnetlink_conntrack.h b/include/linux/netfilter/nfnetlink_conntrack.h
index c19595c89304..29fe9ea1d346 100644
--- a/include/linux/netfilter/nfnetlink_conntrack.h
+++ b/include/linux/netfilter/nfnetlink_conntrack.h
@@ -141,6 +141,7 @@ enum ctattr_protonat {
 #define CTA_PROTONAT_MAX (__CTA_PROTONAT_MAX - 1)
 
 enum ctattr_natseq {
+	CTA_NAT_SEQ_UNSPEC,
 	CTA_NAT_SEQ_CORRECTION_POS,
 	CTA_NAT_SEQ_OFFSET_BEFORE,
 	CTA_NAT_SEQ_OFFSET_AFTER,
-- 
cgit v1.2.3


From e18ce3465477502108187c6c08b6423fb784a313 Mon Sep 17 00:00:00 2001
From: Steve Glendinning <steve.glendinning@smsc.com>
Date: Tue, 16 Dec 2008 02:00:00 -0800
Subject: net: Move flow control definitions to mii.h

flags used within drivers for indicating tx and rx flow control are
defined in 4 drivers (and probably more), move these constants to mii.h.

The 3 SMSC drivers use the same constants (FLOW_CTRL_TX), but TG3 uses
TG3_FLOW_CTRL_TX, so this patch also renames the constants within TG3.

Signed-off-by: Steve Glendinning <steve.glendinning@smsc.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/smsc911x.h     |  3 ---
 drivers/net/smsc9420.h     |  3 ---
 drivers/net/tg3.c          | 50 +++++++++++++++++++++++-----------------------
 drivers/net/tg3.h          |  2 --
 drivers/net/usb/smsc95xx.c |  2 --
 include/linux/mii.h        |  4 ++++
 6 files changed, 29 insertions(+), 35 deletions(-)

(limited to 'include')

diff --git a/drivers/net/smsc911x.h b/drivers/net/smsc911x.h
index f818cf0415f7..2b76654bb958 100644
--- a/drivers/net/smsc911x.h
+++ b/drivers/net/smsc911x.h
@@ -61,9 +61,6 @@
 #define SMSC_ASSERT_MAC_LOCK(pdata) do {} while (0)
 #endif				/* CONFIG_DEBUG_SPINLOCK */
 
-#define FLOW_CTRL_TX		(1)
-#define FLOW_CTRL_RX		(2)
-
 /* SMSC911x registers and bitfields */
 #define RX_DATA_FIFO			0x00
 
diff --git a/drivers/net/smsc9420.h b/drivers/net/smsc9420.h
index 80c426dc4325..69c351f93f86 100644
--- a/drivers/net/smsc9420.h
+++ b/drivers/net/smsc9420.h
@@ -45,9 +45,6 @@
 
 #define SMSC9420_EEPROM_SIZE		((u32)11)
 
-#define FLOW_CTRL_TX			(1)
-#define FLOW_CTRL_RX			(2)
-
 #define PKT_BUF_SZ			(VLAN_ETH_FRAME_LEN + NET_IP_ALIGN + 4)
 
 /***********************************************/
diff --git a/drivers/net/tg3.c b/drivers/net/tg3.c
index 6e40d52107a4..f353f69caeb8 100644
--- a/drivers/net/tg3.c
+++ b/drivers/net/tg3.c
@@ -1187,9 +1187,9 @@ static void tg3_link_report(struct tg3 *tp)
 		printk(KERN_INFO PFX
 		       "%s: Flow control is %s for TX and %s for RX.\n",
 		       tp->dev->name,
-		       (tp->link_config.active_flowctrl & TG3_FLOW_CTRL_TX) ?
+		       (tp->link_config.active_flowctrl & FLOW_CTRL_TX) ?
 		       "on" : "off",
-		       (tp->link_config.active_flowctrl & TG3_FLOW_CTRL_RX) ?
+		       (tp->link_config.active_flowctrl & FLOW_CTRL_RX) ?
 		       "on" : "off");
 		tg3_ump_link_report(tp);
 	}
@@ -1199,11 +1199,11 @@ static u16 tg3_advert_flowctrl_1000T(u8 flow_ctrl)
 {
 	u16 miireg;
 
-	if ((flow_ctrl & TG3_FLOW_CTRL_TX) && (flow_ctrl & TG3_FLOW_CTRL_RX))
+	if ((flow_ctrl & FLOW_CTRL_TX) && (flow_ctrl & FLOW_CTRL_RX))
 		miireg = ADVERTISE_PAUSE_CAP;
-	else if (flow_ctrl & TG3_FLOW_CTRL_TX)
+	else if (flow_ctrl & FLOW_CTRL_TX)
 		miireg = ADVERTISE_PAUSE_ASYM;
-	else if (flow_ctrl & TG3_FLOW_CTRL_RX)
+	else if (flow_ctrl & FLOW_CTRL_RX)
 		miireg = ADVERTISE_PAUSE_CAP | ADVERTISE_PAUSE_ASYM;
 	else
 		miireg = 0;
@@ -1215,11 +1215,11 @@ static u16 tg3_advert_flowctrl_1000X(u8 flow_ctrl)
 {
 	u16 miireg;
 
-	if ((flow_ctrl & TG3_FLOW_CTRL_TX) && (flow_ctrl & TG3_FLOW_CTRL_RX))
+	if ((flow_ctrl & FLOW_CTRL_TX) && (flow_ctrl & FLOW_CTRL_RX))
 		miireg = ADVERTISE_1000XPAUSE;
-	else if (flow_ctrl & TG3_FLOW_CTRL_TX)
+	else if (flow_ctrl & FLOW_CTRL_TX)
 		miireg = ADVERTISE_1000XPSE_ASYM;
-	else if (flow_ctrl & TG3_FLOW_CTRL_RX)
+	else if (flow_ctrl & FLOW_CTRL_RX)
 		miireg = ADVERTISE_1000XPAUSE | ADVERTISE_1000XPSE_ASYM;
 	else
 		miireg = 0;
@@ -1256,16 +1256,16 @@ static u8 tg3_resolve_flowctrl_1000X(u16 lcladv, u16 rmtadv)
 	if (lcladv & ADVERTISE_1000XPAUSE) {
 		if (lcladv & ADVERTISE_1000XPSE_ASYM) {
 			if (rmtadv & LPA_1000XPAUSE)
-				cap = TG3_FLOW_CTRL_TX | TG3_FLOW_CTRL_RX;
+				cap = FLOW_CTRL_TX | FLOW_CTRL_RX;
 			else if (rmtadv & LPA_1000XPAUSE_ASYM)
-				cap = TG3_FLOW_CTRL_RX;
+				cap = FLOW_CTRL_RX;
 		} else {
 			if (rmtadv & LPA_1000XPAUSE)
-				cap = TG3_FLOW_CTRL_TX | TG3_FLOW_CTRL_RX;
+				cap = FLOW_CTRL_TX | FLOW_CTRL_RX;
 		}
 	} else if (lcladv & ADVERTISE_1000XPSE_ASYM) {
 		if ((rmtadv & LPA_1000XPAUSE) && (rmtadv & LPA_1000XPAUSE_ASYM))
-			cap = TG3_FLOW_CTRL_TX;
+			cap = FLOW_CTRL_TX;
 	}
 
 	return cap;
@@ -1294,7 +1294,7 @@ static void tg3_setup_flow_control(struct tg3 *tp, u32 lcladv, u32 rmtadv)
 
 	tp->link_config.active_flowctrl = flowctrl;
 
-	if (flowctrl & TG3_FLOW_CTRL_RX)
+	if (flowctrl & FLOW_CTRL_RX)
 		tp->rx_mode |= RX_MODE_FLOW_CTRL_ENABLE;
 	else
 		tp->rx_mode &= ~RX_MODE_FLOW_CTRL_ENABLE;
@@ -1302,7 +1302,7 @@ static void tg3_setup_flow_control(struct tg3 *tp, u32 lcladv, u32 rmtadv)
 	if (old_rx_mode != tp->rx_mode)
 		tw32_f(MAC_RX_MODE, tp->rx_mode);
 
-	if (flowctrl & TG3_FLOW_CTRL_TX)
+	if (flowctrl & FLOW_CTRL_TX)
 		tp->tx_mode |= TX_MODE_FLOW_CTRL_ENABLE;
 	else
 		tp->tx_mode &= ~TX_MODE_FLOW_CTRL_ENABLE;
@@ -9419,12 +9419,12 @@ static void tg3_get_pauseparam(struct net_device *dev, struct ethtool_pauseparam
 
 	epause->autoneg = (tp->tg3_flags & TG3_FLAG_PAUSE_AUTONEG) != 0;
 
-	if (tp->link_config.active_flowctrl & TG3_FLOW_CTRL_RX)
+	if (tp->link_config.active_flowctrl & FLOW_CTRL_RX)
 		epause->rx_pause = 1;
 	else
 		epause->rx_pause = 0;
 
-	if (tp->link_config.active_flowctrl & TG3_FLOW_CTRL_TX)
+	if (tp->link_config.active_flowctrl & FLOW_CTRL_TX)
 		epause->tx_pause = 1;
 	else
 		epause->tx_pause = 0;
@@ -9475,14 +9475,14 @@ static int tg3_set_pauseparam(struct net_device *dev, struct ethtool_pauseparam
 			}
 		} else {
 			if (epause->rx_pause)
-				tp->link_config.flowctrl |= TG3_FLOW_CTRL_RX;
+				tp->link_config.flowctrl |= FLOW_CTRL_RX;
 			else
-				tp->link_config.flowctrl &= ~TG3_FLOW_CTRL_RX;
+				tp->link_config.flowctrl &= ~FLOW_CTRL_RX;
 
 			if (epause->tx_pause)
-				tp->link_config.flowctrl |= TG3_FLOW_CTRL_TX;
+				tp->link_config.flowctrl |= FLOW_CTRL_TX;
 			else
-				tp->link_config.flowctrl &= ~TG3_FLOW_CTRL_TX;
+				tp->link_config.flowctrl &= ~FLOW_CTRL_TX;
 
 			if (netif_running(dev))
 				tg3_setup_flow_control(tp, 0, 0);
@@ -9502,13 +9502,13 @@ static int tg3_set_pauseparam(struct net_device *dev, struct ethtool_pauseparam
 		else
 			tp->tg3_flags &= ~TG3_FLAG_PAUSE_AUTONEG;
 		if (epause->rx_pause)
-			tp->link_config.flowctrl |= TG3_FLOW_CTRL_RX;
+			tp->link_config.flowctrl |= FLOW_CTRL_RX;
 		else
-			tp->link_config.flowctrl &= ~TG3_FLOW_CTRL_RX;
+			tp->link_config.flowctrl &= ~FLOW_CTRL_RX;
 		if (epause->tx_pause)
-			tp->link_config.flowctrl |= TG3_FLOW_CTRL_TX;
+			tp->link_config.flowctrl |= FLOW_CTRL_TX;
 		else
-			tp->link_config.flowctrl &= ~TG3_FLOW_CTRL_TX;
+			tp->link_config.flowctrl &= ~FLOW_CTRL_TX;
 
 		if (netif_running(dev)) {
 			tg3_halt(tp, RESET_KIND_SHUTDOWN, 1);
@@ -13849,7 +13849,7 @@ static int __devinit tg3_init_one(struct pci_dev *pdev,
 
 	/* flow control autonegotiation is default behavior */
 	tp->tg3_flags |= TG3_FLAG_PAUSE_AUTONEG;
-	tp->link_config.flowctrl = TG3_FLOW_CTRL_TX | TG3_FLOW_CTRL_RX;
+	tp->link_config.flowctrl = FLOW_CTRL_TX | FLOW_CTRL_RX;
 
 	tg3_init_coal(tp);
 
diff --git a/drivers/net/tg3.h b/drivers/net/tg3.h
index 61556764a505..0880cfacdcba 100644
--- a/drivers/net/tg3.h
+++ b/drivers/net/tg3.h
@@ -2338,8 +2338,6 @@ struct tg3_link_config {
 	u8				duplex;
 	u8				autoneg;
 	u8				flowctrl;
-#define TG3_FLOW_CTRL_TX		0x01
-#define TG3_FLOW_CTRL_RX		0x02
 
 	/* Describes what we actually have. */
 	u8				active_flowctrl;
diff --git a/drivers/net/usb/smsc95xx.c b/drivers/net/usb/smsc95xx.c
index 4638a7bb471e..ee2eac3047b3 100644
--- a/drivers/net/usb/smsc95xx.c
+++ b/drivers/net/usb/smsc95xx.c
@@ -45,8 +45,6 @@
 #define SMSC95XX_INTERNAL_PHY_ID	(1)
 #define SMSC95XX_TX_OVERHEAD		(8)
 #define SMSC95XX_TX_OVERHEAD_CSUM	(12)
-#define FLOW_CTRL_TX			(1)
-#define FLOW_CTRL_RX			(2)
 
 struct smsc95xx_priv {
 	u32 mac_cr;
diff --git a/include/linux/mii.h b/include/linux/mii.h
index 151b7e0182c7..4a376e0816fd 100644
--- a/include/linux/mii.h
+++ b/include/linux/mii.h
@@ -135,6 +135,10 @@
 #define LPA_1000FULL            0x0800  /* Link partner 1000BASE-T full duplex */
 #define LPA_1000HALF            0x0400  /* Link partner 1000BASE-T half duplex */
 
+/* Flow control flags */
+#define FLOW_CTRL_TX		0x01
+#define FLOW_CTRL_RX		0x02
+
 /* This structure is used in all SIOCxMIIxxx ioctl calls */
 struct mii_ioctl_data {
 	__u16		phy_id;
-- 
cgit v1.2.3


From bc02ff95fe4ebd3e5ee7455c0aa6f76ebe39ebca Mon Sep 17 00:00:00 2001
From: Steve Glendinning <steve.glendinning@smsc.com>
Date: Tue, 16 Dec 2008 02:00:48 -0800
Subject: net: Refactor full duplex flow control resolution

These 4 drivers have identical full duplex flow control resolution
functions.  This patch changes them all to use one common function.

The function in question decides whether a device should enable TX and
RX flow control in a standard way (IEEE 802.3-2005 table 28B-3), so this
should also be useful for other drivers.

Signed-off-by: Steve Glendinning <steve.glendinning@smsc.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/smsc911x.c     | 24 +-----------------------
 drivers/net/smsc9420.c     | 24 +-----------------------
 drivers/net/tg3.c          | 24 +-----------------------
 drivers/net/usb/smsc95xx.c | 24 +-----------------------
 include/linux/mii.h        | 29 +++++++++++++++++++++++++++++
 5 files changed, 33 insertions(+), 92 deletions(-)

(limited to 'include')

diff --git a/drivers/net/smsc911x.c b/drivers/net/smsc911x.c
index ae327166f978..fa28542b47d5 100644
--- a/drivers/net/smsc911x.c
+++ b/drivers/net/smsc911x.c
@@ -642,28 +642,6 @@ static int smsc911x_phy_loopbacktest(struct net_device *dev)
 }
 #endif				/* USE_PHY_WORK_AROUND */
 
-static u8 smsc95xx_resolve_flowctrl_fulldplx(u16 lcladv, u16 rmtadv)
-{
-	u8 cap = 0;
-
-	if (lcladv & ADVERTISE_PAUSE_CAP) {
-		if (lcladv & ADVERTISE_PAUSE_ASYM) {
-			if (rmtadv & LPA_PAUSE_CAP)
-				cap = FLOW_CTRL_TX | FLOW_CTRL_RX;
-			else if (rmtadv & LPA_PAUSE_ASYM)
-				cap = FLOW_CTRL_RX;
-		} else {
-			if (rmtadv & LPA_PAUSE_CAP)
-				cap = FLOW_CTRL_TX | FLOW_CTRL_RX;
-		}
-	} else if (lcladv & ADVERTISE_PAUSE_ASYM) {
-		if ((rmtadv & LPA_PAUSE_CAP) && (rmtadv & LPA_PAUSE_ASYM))
-			cap = FLOW_CTRL_TX;
-	}
-
-	return cap;
-}
-
 static void smsc911x_phy_update_flowcontrol(struct smsc911x_data *pdata)
 {
 	struct phy_device *phy_dev = pdata->phy_dev;
@@ -674,7 +652,7 @@ static void smsc911x_phy_update_flowcontrol(struct smsc911x_data *pdata)
 	if (phy_dev->duplex == DUPLEX_FULL) {
 		u16 lcladv = phy_read(phy_dev, MII_ADVERTISE);
 		u16 rmtadv = phy_read(phy_dev, MII_LPA);
-		u8 cap = smsc95xx_resolve_flowctrl_fulldplx(lcladv, rmtadv);
+		u8 cap = mii_resolve_flowctrl_fdx(lcladv, rmtadv);
 
 		if (cap & FLOW_CTRL_RX)
 			flow = 0xFFFF0002;
diff --git a/drivers/net/smsc9420.c b/drivers/net/smsc9420.c
index bc9879d5f281..940220f60921 100644
--- a/drivers/net/smsc9420.c
+++ b/drivers/net/smsc9420.c
@@ -1080,28 +1080,6 @@ static void smsc9420_set_multicast_list(struct net_device *dev)
 	smsc9420_pci_flush_write(pd);
 }
 
-static u8 smsc9420_resolve_flowctrl_fulldplx(u16 lcladv, u16 rmtadv)
-{
-	u8 cap = 0;
-
-	if (lcladv & ADVERTISE_PAUSE_CAP) {
-		if (lcladv & ADVERTISE_PAUSE_ASYM) {
-			if (rmtadv & LPA_PAUSE_CAP)
-				cap = FLOW_CTRL_TX | FLOW_CTRL_RX;
-			else if (rmtadv & LPA_PAUSE_ASYM)
-				cap = FLOW_CTRL_RX;
-		} else {
-			if (rmtadv & LPA_PAUSE_CAP)
-				cap = FLOW_CTRL_TX | FLOW_CTRL_RX;
-		}
-	} else if (lcladv & ADVERTISE_PAUSE_ASYM) {
-		if ((rmtadv & LPA_PAUSE_CAP) && (rmtadv & LPA_PAUSE_ASYM))
-			cap = FLOW_CTRL_TX;
-	}
-
-	return cap;
-}
-
 static void smsc9420_phy_update_flowcontrol(struct smsc9420_pdata *pd)
 {
 	struct phy_device *phy_dev = pd->phy_dev;
@@ -1110,7 +1088,7 @@ static void smsc9420_phy_update_flowcontrol(struct smsc9420_pdata *pd)
 	if (phy_dev->duplex == DUPLEX_FULL) {
 		u16 lcladv = phy_read(phy_dev, MII_ADVERTISE);
 		u16 rmtadv = phy_read(phy_dev, MII_LPA);
-		u8 cap = smsc9420_resolve_flowctrl_fulldplx(lcladv, rmtadv);
+		u8 cap = mii_resolve_flowctrl_fdx(lcladv, rmtadv);
 
 		if (cap & FLOW_CTRL_RX)
 			flow = 0xFFFF0002;
diff --git a/drivers/net/tg3.c b/drivers/net/tg3.c
index f353f69caeb8..06bd2f4eee6c 100644
--- a/drivers/net/tg3.c
+++ b/drivers/net/tg3.c
@@ -1227,28 +1227,6 @@ static u16 tg3_advert_flowctrl_1000X(u8 flow_ctrl)
 	return miireg;
 }
 
-static u8 tg3_resolve_flowctrl_1000T(u16 lcladv, u16 rmtadv)
-{
-	u8 cap = 0;
-
-	if (lcladv & ADVERTISE_PAUSE_CAP) {
-		if (lcladv & ADVERTISE_PAUSE_ASYM) {
-			if (rmtadv & LPA_PAUSE_CAP)
-				cap = TG3_FLOW_CTRL_TX | TG3_FLOW_CTRL_RX;
-			else if (rmtadv & LPA_PAUSE_ASYM)
-				cap = TG3_FLOW_CTRL_RX;
-		} else {
-			if (rmtadv & LPA_PAUSE_CAP)
-				cap = TG3_FLOW_CTRL_TX | TG3_FLOW_CTRL_RX;
-		}
-	} else if (lcladv & ADVERTISE_PAUSE_ASYM) {
-		if ((rmtadv & LPA_PAUSE_CAP) && (rmtadv & LPA_PAUSE_ASYM))
-			cap = TG3_FLOW_CTRL_TX;
-	}
-
-	return cap;
-}
-
 static u8 tg3_resolve_flowctrl_1000X(u16 lcladv, u16 rmtadv)
 {
 	u8 cap = 0;
@@ -1288,7 +1266,7 @@ static void tg3_setup_flow_control(struct tg3 *tp, u32 lcladv, u32 rmtadv)
 		if (tp->tg3_flags2 & TG3_FLG2_ANY_SERDES)
 			flowctrl = tg3_resolve_flowctrl_1000X(lcladv, rmtadv);
 		else
-			flowctrl = tg3_resolve_flowctrl_1000T(lcladv, rmtadv);
+			flowctrl = mii_resolve_flowctrl_fdx(lcladv, rmtadv);
 	} else
 		flowctrl = tp->link_config.flowctrl;
 
diff --git a/drivers/net/usb/smsc95xx.c b/drivers/net/usb/smsc95xx.c
index ee2eac3047b3..fed22ffedd57 100644
--- a/drivers/net/usb/smsc95xx.c
+++ b/drivers/net/usb/smsc95xx.c
@@ -435,28 +435,6 @@ static void smsc95xx_set_multicast(struct net_device *netdev)
 	smsc95xx_write_reg_async(dev, MAC_CR, &pdata->mac_cr);
 }
 
-static u8 smsc95xx_resolve_flowctrl_fulldplx(u16 lcladv, u16 rmtadv)
-{
-	u8 cap = 0;
-
-	if (lcladv & ADVERTISE_PAUSE_CAP) {
-		if (lcladv & ADVERTISE_PAUSE_ASYM) {
-			if (rmtadv & LPA_PAUSE_CAP)
-				cap = FLOW_CTRL_TX | FLOW_CTRL_RX;
-			else if (rmtadv & LPA_PAUSE_ASYM)
-				cap = FLOW_CTRL_RX;
-		} else {
-			if (rmtadv & LPA_PAUSE_CAP)
-				cap = FLOW_CTRL_TX | FLOW_CTRL_RX;
-		}
-	} else if (lcladv & ADVERTISE_PAUSE_ASYM) {
-		if ((rmtadv & LPA_PAUSE_CAP) && (rmtadv & LPA_PAUSE_ASYM))
-			cap = FLOW_CTRL_TX;
-	}
-
-	return cap;
-}
-
 static void smsc95xx_phy_update_flowcontrol(struct usbnet *dev, u8 duplex,
 					    u16 lcladv, u16 rmtadv)
 {
@@ -469,7 +447,7 @@ static void smsc95xx_phy_update_flowcontrol(struct usbnet *dev, u8 duplex,
 	}
 
 	if (duplex == DUPLEX_FULL) {
-		u8 cap = smsc95xx_resolve_flowctrl_fulldplx(lcladv, rmtadv);
+		u8 cap = mii_resolve_flowctrl_fdx(lcladv, rmtadv);
 
 		if (cap & FLOW_CTRL_RX)
 			flow = 0xFFFF0002;
diff --git a/include/linux/mii.h b/include/linux/mii.h
index 4a376e0816fd..ad748588faf1 100644
--- a/include/linux/mii.h
+++ b/include/linux/mii.h
@@ -239,5 +239,34 @@ static inline unsigned int mii_duplex (unsigned int duplex_lock,
 	return 0;
 }
 
+/**
+ * mii_resolve_flowctrl_fdx
+ * @lcladv: value of MII ADVERTISE register
+ * @rmtadv: value of MII LPA register
+ *
+ * Resolve full duplex flow control as per IEEE 802.3-2005 table 28B-3
+ */
+static inline u8 mii_resolve_flowctrl_fdx(u16 lcladv, u16 rmtadv)
+{
+	u8 cap = 0;
+
+	if (lcladv & ADVERTISE_PAUSE_CAP) {
+		if (lcladv & ADVERTISE_PAUSE_ASYM) {
+			if (rmtadv & LPA_PAUSE_CAP)
+				cap = FLOW_CTRL_TX | FLOW_CTRL_RX;
+			else if (rmtadv & LPA_PAUSE_ASYM)
+				cap = FLOW_CTRL_RX;
+		} else {
+			if (rmtadv & LPA_PAUSE_CAP)
+				cap = FLOW_CTRL_TX | FLOW_CTRL_RX;
+		}
+	} else if (lcladv & ADVERTISE_PAUSE_ASYM) {
+		if ((rmtadv & LPA_PAUSE_CAP) && (rmtadv & LPA_PAUSE_ASYM))
+			cap = FLOW_CTRL_TX;
+	}
+
+	return cap;
+}
+
 #endif /* __KERNEL__ */
 #endif /* __LINUX_MII_H__ */
-- 
cgit v1.2.3


From b24a2516d10751d7ed5afb58420df25370c9dffb Mon Sep 17 00:00:00 2001
From: Yang Hongyang <yanghy@cn.fujitsu.com>
Date: Tue, 16 Dec 2008 02:06:23 -0800
Subject: ipv6: Add IPV6_PKTINFO sticky option support to setsockopt()

There are three reasons for me to add this support:
1.When no interface is specified in an IPV6_PKTINFO ancillary data
  item, the interface specified in an IPV6_PKTINFO sticky optionis
  is used.

RFC3542:
6.7.  Summary of Outgoing Interface Selection

   This document and [RFC-3493] specify various methods that affect the
   selection of the packet's outgoing interface.  This subsection
   summarizes the ordering among those in order to ensure deterministic
   behavior.

   For a given outgoing packet on a given socket, the outgoing interface
   is determined in the following order:

   1. if an interface is specified in an IPV6_PKTINFO ancillary data
      item, the interface is used.

   2. otherwise, if an interface is specified in an IPV6_PKTINFO sticky
      option, the interface is used.

2.When no IPV6_PKTINFO ancillary data is received,getsockopt() should
  return the sticky option value which set with setsockopt().

RFC 3542:
   Issuing getsockopt() for the above options will return the sticky
   option value i.e., the value set with setsockopt().  If no sticky
   option value has been set getsockopt() will return the following
   values:

3.Make the setsockopt implementation POSIX compliant.

Signed-off-by: Yang Hongyang <yanghy@cn.fujitsu.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/ipv6.h     |  1 +
 net/ipv6/ipv6_sockglue.c | 22 ++++++++++++++++++++++
 2 files changed, 23 insertions(+)

(limited to 'include')

diff --git a/include/linux/ipv6.h b/include/linux/ipv6.h
index 641e026eee8f..0b816cae533e 100644
--- a/include/linux/ipv6.h
+++ b/include/linux/ipv6.h
@@ -278,6 +278,7 @@ struct ipv6_pinfo {
 	struct in6_addr 	saddr;
 	struct in6_addr 	rcv_saddr;
 	struct in6_addr		daddr;
+	struct in6_pktinfo	sticky_pktinfo;
 	struct in6_addr		*daddr_cache;
 #ifdef CONFIG_IPV6_SUBTREES
 	struct in6_addr		*saddr_cache;
diff --git a/net/ipv6/ipv6_sockglue.c b/net/ipv6/ipv6_sockglue.c
index 2aa294be0c79..0feaee38bc37 100644
--- a/net/ipv6/ipv6_sockglue.c
+++ b/net/ipv6/ipv6_sockglue.c
@@ -395,6 +395,28 @@ sticky_done:
 		break;
 	}
 
+	case IPV6_PKTINFO:
+	{
+		struct in6_pktinfo pkt;
+
+		if (optlen == 0)
+			goto e_inval;
+		else if (optlen < sizeof(struct in6_pktinfo) || optval == NULL)
+			goto e_inval;
+
+		if (copy_from_user(&pkt, optval, optlen)) {
+				retv = -EFAULT;
+				break;
+		}
+		if (sk->sk_bound_dev_if && pkt.ipi6_ifindex != sk->sk_bound_dev_if)
+			goto e_inval;
+
+		np->sticky_pktinfo.ipi6_ifindex = pkt.ipi6_ifindex;
+		ipv6_addr_copy(&np->sticky_pktinfo.ipi6_addr, &pkt.ipi6_addr);
+		retv = 0;
+		break;
+	}
+
 	case IPV6_2292PKTOPTIONS:
 	{
 		struct ipv6_txoptions *opt = NULL;
-- 
cgit v1.2.3


From b93a531e315e97ef00367099e6b5f19651936e20 Mon Sep 17 00:00:00 2001
From: Jan Beulich <jbeulich@novell.com>
Date: Tue, 16 Dec 2008 11:40:27 +0000
Subject: allow bug table entries to use relative pointers (and use it on
 x86-64)

Impact: reduce bug table size

This allows reducing the bug table size by half. Perhaps there are
other 64-bit architectures that could also make use of this.

Signed-off-by: Jan Beulich <jbeulich@novell.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/Kconfig           |  4 ++++
 arch/x86/include/asm/bug.h |  2 +-
 include/asm-generic/bug.h  |  8 ++++++++
 lib/bug.c                  | 19 +++++++++++++++++--
 4 files changed, 30 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index ac22bb7719f7..ab98cca84e1b 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -87,6 +87,10 @@ config GENERIC_IOMAP
 config GENERIC_BUG
 	def_bool y
 	depends on BUG
+	select GENERIC_BUG_RELATIVE_POINTERS if X86_64
+
+config GENERIC_BUG_RELATIVE_POINTERS
+	bool
 
 config GENERIC_HWEIGHT
 	def_bool y
diff --git a/arch/x86/include/asm/bug.h b/arch/x86/include/asm/bug.h
index 3def2065fcea..d9cf1cd156d2 100644
--- a/arch/x86/include/asm/bug.h
+++ b/arch/x86/include/asm/bug.h
@@ -9,7 +9,7 @@
 #ifdef CONFIG_X86_32
 # define __BUG_C0	"2:\t.long 1b, %c0\n"
 #else
-# define __BUG_C0	"2:\t.quad 1b, %c0\n"
+# define __BUG_C0	"2:\t.long 1b - 2b, %c0 - 2b\n"
 #endif
 
 #define BUG()							\
diff --git a/include/asm-generic/bug.h b/include/asm-generic/bug.h
index 12c07c1866b2..4c794d73fb84 100644
--- a/include/asm-generic/bug.h
+++ b/include/asm-generic/bug.h
@@ -8,9 +8,17 @@
 #ifdef CONFIG_GENERIC_BUG
 #ifndef __ASSEMBLY__
 struct bug_entry {
+#ifndef CONFIG_GENERIC_BUG_RELATIVE_POINTERS
 	unsigned long	bug_addr;
+#else
+	signed int	bug_addr_disp;
+#endif
 #ifdef CONFIG_DEBUG_BUGVERBOSE
+#ifndef CONFIG_GENERIC_BUG_RELATIVE_POINTERS
 	const char	*file;
+#else
+	signed int	file_disp;
+#endif
 	unsigned short	line;
 #endif
 	unsigned short	flags;
diff --git a/lib/bug.c b/lib/bug.c
index bfeafd60ee9f..300e41afbf97 100644
--- a/lib/bug.c
+++ b/lib/bug.c
@@ -5,6 +5,8 @@
 
   CONFIG_BUG - emit BUG traps.  Nothing happens without this.
   CONFIG_GENERIC_BUG - enable this code.
+  CONFIG_GENERIC_BUG_RELATIVE_POINTERS - use 32-bit pointers relative to
+	the containing struct bug_entry for bug_addr and file.
   CONFIG_DEBUG_BUGVERBOSE - emit full file+line information for each BUG
 
   CONFIG_BUG and CONFIG_DEBUG_BUGVERBOSE are potentially user-settable
@@ -43,6 +45,15 @@
 
 extern const struct bug_entry __start___bug_table[], __stop___bug_table[];
 
+static inline unsigned long bug_addr(const struct bug_entry *bug)
+{
+#ifndef CONFIG_GENERIC_BUG_RELATIVE_POINTERS
+	return bug->bug_addr;
+#else
+	return (unsigned long)bug + bug->bug_addr_disp;
+#endif
+}
+
 #ifdef CONFIG_MODULES
 static LIST_HEAD(module_bug_list);
 
@@ -55,7 +66,7 @@ static const struct bug_entry *module_find_bug(unsigned long bugaddr)
 		unsigned i;
 
 		for (i = 0; i < mod->num_bugs; ++i, ++bug)
-			if (bugaddr == bug->bug_addr)
+			if (bugaddr == bug_addr(bug))
 				return bug;
 	}
 	return NULL;
@@ -108,7 +119,7 @@ const struct bug_entry *find_bug(unsigned long bugaddr)
 	const struct bug_entry *bug;
 
 	for (bug = __start___bug_table; bug < __stop___bug_table; ++bug)
-		if (bugaddr == bug->bug_addr)
+		if (bugaddr == bug_addr(bug))
 			return bug;
 
 	return module_find_bug(bugaddr);
@@ -133,7 +144,11 @@ enum bug_trap_type report_bug(unsigned long bugaddr, struct pt_regs *regs)
 
 	if (bug) {
 #ifdef CONFIG_DEBUG_BUGVERBOSE
+#ifndef CONFIG_GENERIC_BUG_RELATIVE_POINTERS
 		file = bug->file;
+#else
+		file = (const char *)bug + bug->file_disp;
+#endif
 		line = bug->line;
 #endif
 		warning = (bug->flags & BUGFLAG_WARNING) != 0;
-- 
cgit v1.2.3


From 8c5df16bec8a60bb8589fc232b9e26cac0ed4b2c Mon Sep 17 00:00:00 2001
From: Jeremy Fitzhardinge <jeremy@goop.org>
Date: Tue, 16 Dec 2008 12:17:26 -0800
Subject: swiotlb: allow architectures to override swiotlb pool allocation

Impact: generalize swiotlb allocation code

Architectures may need to allocate memory specially for use with
the swiotlb.  Create the weak function swiotlb_alloc_boot() and
swiotlb_alloc() defaulting to the current behaviour.

Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
Signed-off-by: Ian Campbell <ian.campbell@citrix.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/swiotlb.h |  3 +++
 lib/swiotlb.c           | 16 +++++++++++++---
 2 files changed, 16 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/include/linux/swiotlb.h b/include/linux/swiotlb.h
index b18ec5533e8c..b8c5fc766a56 100644
--- a/include/linux/swiotlb.h
+++ b/include/linux/swiotlb.h
@@ -10,6 +10,9 @@ struct scatterlist;
 extern void
 swiotlb_init(void);
 
+extern void *swiotlb_alloc_boot(size_t bytes, unsigned long nslabs);
+extern void *swiotlb_alloc(unsigned order, unsigned long nslabs);
+
 extern void
 *swiotlb_alloc_coherent(struct device *hwdev, size_t size,
 			dma_addr_t *dma_handle, gfp_t flags);
diff --git a/lib/swiotlb.c b/lib/swiotlb.c
index 5f6c629a924d..abecb2857556 100644
--- a/lib/swiotlb.c
+++ b/lib/swiotlb.c
@@ -21,6 +21,7 @@
 #include <linux/mm.h>
 #include <linux/module.h>
 #include <linux/spinlock.h>
+#include <linux/swiotlb.h>
 #include <linux/string.h>
 #include <linux/types.h>
 #include <linux/ctype.h>
@@ -126,6 +127,16 @@ setup_io_tlb_npages(char *str)
 __setup("swiotlb=", setup_io_tlb_npages);
 /* make io_tlb_overflow tunable too? */
 
+void * __weak swiotlb_alloc_boot(size_t size, unsigned long nslabs)
+{
+	return alloc_bootmem_low_pages(size);
+}
+
+void * __weak swiotlb_alloc(unsigned order, unsigned long nslabs)
+{
+	return (void *)__get_free_pages(GFP_DMA | __GFP_NOWARN, order);
+}
+
 /*
  * Statically reserve bounce buffer space and initialize bounce buffer data
  * structures for the software IO TLB used to implement the DMA API.
@@ -145,7 +156,7 @@ swiotlb_init_with_default_size(size_t default_size)
 	/*
 	 * Get IO TLB memory from the low pages
 	 */
-	io_tlb_start = alloc_bootmem_low_pages(bytes);
+	io_tlb_start = swiotlb_alloc_boot(bytes, io_tlb_nslabs);
 	if (!io_tlb_start)
 		panic("Cannot allocate SWIOTLB buffer");
 	io_tlb_end = io_tlb_start + bytes;
@@ -202,8 +213,7 @@ swiotlb_late_init_with_default_size(size_t default_size)
 	bytes = io_tlb_nslabs << IO_TLB_SHIFT;
 
 	while ((SLABS_PER_PAGE << order) > IO_TLB_MIN_SLABS) {
-		io_tlb_start = (char *)__get_free_pages(GFP_DMA | __GFP_NOWARN,
-		                                        order);
+		io_tlb_start = swiotlb_alloc(order, io_tlb_nslabs);
 		if (io_tlb_start)
 			break;
 		order--;
-- 
cgit v1.2.3


From 0016fdee927f7aa0f428494bcf11ae60c7470a02 Mon Sep 17 00:00:00 2001
From: Ian Campbell <ian.campbell@citrix.com>
Date: Tue, 16 Dec 2008 12:17:27 -0800
Subject: swiotlb: move some definitions to header

Impact: cleanup

Signed-off-by: Ian Campbell <ian.campbell@citrix.com>
Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/swiotlb.h | 14 ++++++++++++++
 lib/swiotlb.c           | 14 +-------------
 2 files changed, 15 insertions(+), 13 deletions(-)

(limited to 'include')

diff --git a/include/linux/swiotlb.h b/include/linux/swiotlb.h
index b8c5fc766a56..58b996a642f9 100644
--- a/include/linux/swiotlb.h
+++ b/include/linux/swiotlb.h
@@ -7,6 +7,20 @@ struct device;
 struct dma_attrs;
 struct scatterlist;
 
+/*
+ * Maximum allowable number of contiguous slabs to map,
+ * must be a power of 2.  What is the appropriate value ?
+ * The complexity of {map,unmap}_single is linearly dependent on this value.
+ */
+#define IO_TLB_SEGSIZE	128
+
+
+/*
+ * log of the size of each IO TLB slab.  The number of slabs is command line
+ * controllable.
+ */
+#define IO_TLB_SHIFT 11
+
 extern void
 swiotlb_init(void);
 
diff --git a/lib/swiotlb.c b/lib/swiotlb.c
index abecb2857556..db724ba7ebf6 100644
--- a/lib/swiotlb.c
+++ b/lib/swiotlb.c
@@ -23,6 +23,7 @@
 #include <linux/spinlock.h>
 #include <linux/swiotlb.h>
 #include <linux/string.h>
+#include <linux/swiotlb.h>
 #include <linux/types.h>
 #include <linux/ctype.h>
 
@@ -40,19 +41,6 @@
 #define SG_ENT_VIRT_ADDRESS(sg)	(sg_virt((sg)))
 #define SG_ENT_PHYS_ADDRESS(sg)	virt_to_bus(SG_ENT_VIRT_ADDRESS(sg))
 
-/*
- * Maximum allowable number of contiguous slabs to map,
- * must be a power of 2.  What is the appropriate value ?
- * The complexity of {map,unmap}_single is linearly dependent on this value.
- */
-#define IO_TLB_SEGSIZE	128
-
-/*
- * log of the size of each IO TLB slab.  The number of slabs is command line
- * controllable.
- */
-#define IO_TLB_SHIFT 11
-
 #define SLABS_PER_PAGE (1 << (PAGE_SHIFT - IO_TLB_SHIFT))
 
 /*
-- 
cgit v1.2.3


From ecbf29cdb3990c83d90d0c4187c89fb2ce423367 Mon Sep 17 00:00:00 2001
From: Jeremy Fitzhardinge <jeremy@goop.org>
Date: Tue, 16 Dec 2008 12:37:07 -0800
Subject: xen: clean up asm/xen/hypervisor.h

Impact: cleanup

hypervisor.h had accumulated a lot of crud, including lots of spurious
#includes.  Clean it all up, and go around fixing up everything else
accordingly.

Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/include/asm/xen/hypercall.h  |  6 ++++++
 arch/x86/include/asm/xen/hypervisor.h | 39 +++++++----------------------------
 arch/x86/include/asm/xen/page.h       |  5 +++++
 arch/x86/xen/enlighten.c              |  1 +
 drivers/xen/balloon.c                 |  4 +++-
 drivers/xen/features.c                |  6 +++++-
 drivers/xen/grant-table.c             |  1 +
 include/xen/interface/event_channel.h |  2 ++
 8 files changed, 31 insertions(+), 33 deletions(-)

(limited to 'include')

diff --git a/arch/x86/include/asm/xen/hypercall.h b/arch/x86/include/asm/xen/hypercall.h
index 3f6000d95fe2..5e79ca694326 100644
--- a/arch/x86/include/asm/xen/hypercall.h
+++ b/arch/x86/include/asm/xen/hypercall.h
@@ -33,8 +33,14 @@
 #ifndef _ASM_X86_XEN_HYPERCALL_H
 #define _ASM_X86_XEN_HYPERCALL_H
 
+#include <linux/kernel.h>
+#include <linux/spinlock.h>
 #include <linux/errno.h>
 #include <linux/string.h>
+#include <linux/types.h>
+
+#include <asm/page.h>
+#include <asm/pgtable.h>
 
 #include <xen/interface/xen.h>
 #include <xen/interface/sched.h>
diff --git a/arch/x86/include/asm/xen/hypervisor.h b/arch/x86/include/asm/xen/hypervisor.h
index a38d25ac87d2..81fbd735aec4 100644
--- a/arch/x86/include/asm/xen/hypervisor.h
+++ b/arch/x86/include/asm/xen/hypervisor.h
@@ -33,39 +33,10 @@
 #ifndef _ASM_X86_XEN_HYPERVISOR_H
 #define _ASM_X86_XEN_HYPERVISOR_H
 
-#include <linux/types.h>
-#include <linux/kernel.h>
-
-#include <xen/interface/xen.h>
-#include <xen/interface/version.h>
-
-#include <asm/ptrace.h>
-#include <asm/page.h>
-#include <asm/desc.h>
-#if defined(__i386__)
-#  ifdef CONFIG_X86_PAE
-#   include <asm-generic/pgtable-nopud.h>
-#  else
-#   include <asm-generic/pgtable-nopmd.h>
-#  endif
-#endif
-#include <asm/xen/hypercall.h>
-
 /* arch/i386/kernel/setup.c */
 extern struct shared_info *HYPERVISOR_shared_info;
 extern struct start_info *xen_start_info;
 
-/* arch/i386/mach-xen/evtchn.c */
-/* Force a proper event-channel callback from Xen. */
-extern void force_evtchn_callback(void);
-
-/* Turn jiffies into Xen system time. */
-u64 jiffies_to_st(unsigned long jiffies);
-
-
-#define MULTI_UVMFLAGS_INDEX 3
-#define MULTI_UVMDOMID_INDEX 4
-
 enum xen_domain_type {
 	XEN_NATIVE,
 	XEN_PV_DOMAIN,
@@ -74,9 +45,15 @@ enum xen_domain_type {
 
 extern enum xen_domain_type xen_domain_type;
 
+#ifdef CONFIG_XEN
 #define xen_domain()		(xen_domain_type != XEN_NATIVE)
-#define xen_pv_domain()		(xen_domain_type == XEN_PV_DOMAIN)
+#else
+#define xen_domain()		(0)
+#endif
+
+#define xen_pv_domain()		(xen_domain() && xen_domain_type == XEN_PV_DOMAIN)
+#define xen_hvm_domain()	(xen_domain() && xen_domain_type == XEN_HVM_DOMAIN)
+
 #define xen_initial_domain()	(xen_pv_domain() && xen_start_info->flags & SIF_INITDOMAIN)
-#define xen_hvm_domain()	(xen_domain_type == XEN_HVM_DOMAIN)
 
 #endif /* _ASM_X86_XEN_HYPERVISOR_H */
diff --git a/arch/x86/include/asm/xen/page.h b/arch/x86/include/asm/xen/page.h
index bc628998a1b9..7ef617ef1df3 100644
--- a/arch/x86/include/asm/xen/page.h
+++ b/arch/x86/include/asm/xen/page.h
@@ -1,11 +1,16 @@
 #ifndef _ASM_X86_XEN_PAGE_H
 #define _ASM_X86_XEN_PAGE_H
 
+#include <linux/kernel.h>
+#include <linux/types.h>
+#include <linux/spinlock.h>
 #include <linux/pfn.h>
 
 #include <asm/uaccess.h>
+#include <asm/page.h>
 #include <asm/pgtable.h>
 
+#include <xen/interface/xen.h>
 #include <xen/features.h>
 
 /* Xen machine address */
diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c
index 86cd2f829683..bea215230b20 100644
--- a/arch/x86/xen/enlighten.c
+++ b/arch/x86/xen/enlighten.c
@@ -28,6 +28,7 @@
 #include <linux/console.h>
 
 #include <xen/interface/xen.h>
+#include <xen/interface/version.h>
 #include <xen/interface/physdev.h>
 #include <xen/interface/vcpu.h>
 #include <xen/features.h>
diff --git a/drivers/xen/balloon.c b/drivers/xen/balloon.c
index 526c191e84ea..8dc7109d61b7 100644
--- a/drivers/xen/balloon.c
+++ b/drivers/xen/balloon.c
@@ -44,13 +44,15 @@
 #include <linux/list.h>
 #include <linux/sysdev.h>
 
-#include <asm/xen/hypervisor.h>
 #include <asm/page.h>
 #include <asm/pgalloc.h>
 #include <asm/pgtable.h>
 #include <asm/uaccess.h>
 #include <asm/tlb.h>
 
+#include <asm/xen/hypervisor.h>
+#include <asm/xen/hypercall.h>
+#include <xen/interface/xen.h>
 #include <xen/interface/memory.h>
 #include <xen/xenbus.h>
 #include <xen/features.h>
diff --git a/drivers/xen/features.c b/drivers/xen/features.c
index 0707714e40d6..99eda169c779 100644
--- a/drivers/xen/features.c
+++ b/drivers/xen/features.c
@@ -8,7 +8,11 @@
 #include <linux/types.h>
 #include <linux/cache.h>
 #include <linux/module.h>
-#include <asm/xen/hypervisor.h>
+
+#include <asm/xen/hypercall.h>
+
+#include <xen/interface/xen.h>
+#include <xen/interface/version.h>
 #include <xen/features.h>
 
 u8 xen_features[XENFEAT_NR_SUBMAPS * 32] __read_mostly;
diff --git a/drivers/xen/grant-table.c b/drivers/xen/grant-table.c
index 06592b9da83c..7d8f531fb8e8 100644
--- a/drivers/xen/grant-table.c
+++ b/drivers/xen/grant-table.c
@@ -40,6 +40,7 @@
 #include <xen/interface/xen.h>
 #include <xen/page.h>
 #include <xen/grant_table.h>
+#include <asm/xen/hypercall.h>
 
 #include <asm/pgtable.h>
 #include <asm/sync_bitops.h>
diff --git a/include/xen/interface/event_channel.h b/include/xen/interface/event_channel.h
index 919b5bdcb2bd..2090881c3650 100644
--- a/include/xen/interface/event_channel.h
+++ b/include/xen/interface/event_channel.h
@@ -9,6 +9,8 @@
 #ifndef __XEN_PUBLIC_EVENT_CHANNEL_H__
 #define __XEN_PUBLIC_EVENT_CHANNEL_H__
 
+#include <xen/interface/xen.h>
+
 typedef uint32_t evtchn_port_t;
 DEFINE_GUEST_HANDLE(evtchn_port_t);
 
-- 
cgit v1.2.3


From 48a1b10aff588833b73994704c47bbd0deb73e9c Mon Sep 17 00:00:00 2001
From: Yinghai Lu <yinghai@kernel.org>
Date: Thu, 11 Dec 2008 00:15:01 -0800
Subject: x86, sparseirq: move irq_desc according to smp_affinity, v7

Impact: improve NUMA handling by migrating irq_desc on smp_affinity changes

if CONFIG_NUMA_MIGRATE_IRQ_DESC is set:

-  make irq_desc to go with affinity aka irq_desc moving etc
-  call move_irq_desc in irq_complete_move()
-  legacy irq_desc is not moved, because they are allocated via static array

for logical apic mode, need to add move_desc_in_progress_in_same_domain,
otherwise it will not be moved ==> also could need two phases to get
irq_desc moved.

Signed-off-by: Yinghai Lu <yinghai@kernel.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/Kconfig          |   9 +++
 arch/x86/kernel/io_apic.c | 142 +++++++++++++++++++++++++++++++++++++++++++++-
 include/linux/irq.h       |  10 ++++
 kernel/irq/Makefile       |   1 +
 kernel/irq/chip.c         |  12 +++-
 kernel/irq/handle.c       |  15 +++--
 kernel/irq/internals.h    |   5 ++
 kernel/irq/numa_migrate.c | 127 +++++++++++++++++++++++++++++++++++++++++
 8 files changed, 313 insertions(+), 8 deletions(-)
 create mode 100644 kernel/irq/numa_migrate.c

(limited to 'include')

diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 8943c13502c6..29073532f94c 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -248,6 +248,15 @@ config SPARSE_IRQ
 
 	  If you don't know what to do here, say Y.
 
+config NUMA_MIGRATE_IRQ_DESC
+	bool "Move irq desc when changing irq smp_affinity"
+	depends on SPARSE_IRQ && SMP
+	default n
+	help
+	  This enables moving irq_desc to cpu/node that irq will use handled.
+
+	  If you don't know what to do here, say N.
+
 config X86_FIND_SMP_CONFIG
 	def_bool y
 	depends on X86_MPPARSE || X86_VOYAGER
diff --git a/arch/x86/kernel/io_apic.c b/arch/x86/kernel/io_apic.c
index a1a2e070f31a..bfe1245b1a3e 100644
--- a/arch/x86/kernel/io_apic.c
+++ b/arch/x86/kernel/io_apic.c
@@ -141,6 +141,9 @@ struct irq_cfg {
 	unsigned move_cleanup_count;
 	u8 vector;
 	u8 move_in_progress : 1;
+#ifdef CONFIG_NUMA_MIGRATE_IRQ_DESC
+	u8 move_desc_pending : 1;
+#endif
 };
 
 /* irq_cfg is indexed by the sum of all RTEs in all I/O APICs. */
@@ -223,6 +226,121 @@ void arch_init_chip_data(struct irq_desc *desc, int cpu)
 	}
 }
 
+#ifdef CONFIG_NUMA_MIGRATE_IRQ_DESC
+
+static void
+init_copy_irq_2_pin(struct irq_cfg *old_cfg, struct irq_cfg *cfg, int cpu)
+{
+	struct irq_pin_list *old_entry, *head, *tail, *entry;
+
+	cfg->irq_2_pin = NULL;
+	old_entry = old_cfg->irq_2_pin;
+	if (!old_entry)
+		return;
+
+	entry = get_one_free_irq_2_pin(cpu);
+	if (!entry)
+		return;
+
+	entry->apic	= old_entry->apic;
+	entry->pin	= old_entry->pin;
+	head		= entry;
+	tail		= entry;
+	old_entry	= old_entry->next;
+	while (old_entry) {
+		entry = get_one_free_irq_2_pin(cpu);
+		if (!entry) {
+			entry = head;
+			while (entry) {
+				head = entry->next;
+				kfree(entry);
+				entry = head;
+			}
+			/* still use the old one */
+			return;
+		}
+		entry->apic	= old_entry->apic;
+		entry->pin	= old_entry->pin;
+		tail->next	= entry;
+		tail		= entry;
+		old_entry	= old_entry->next;
+	}
+
+	tail->next = NULL;
+	cfg->irq_2_pin = head;
+}
+
+static void free_irq_2_pin(struct irq_cfg *old_cfg, struct irq_cfg *cfg)
+{
+	struct irq_pin_list *entry, *next;
+
+	if (old_cfg->irq_2_pin == cfg->irq_2_pin)
+		return;
+
+	entry = old_cfg->irq_2_pin;
+
+	while (entry) {
+		next = entry->next;
+		kfree(entry);
+		entry = next;
+	}
+	old_cfg->irq_2_pin = NULL;
+}
+
+void arch_init_copy_chip_data(struct irq_desc *old_desc,
+				 struct irq_desc *desc, int cpu)
+{
+	struct irq_cfg *cfg;
+	struct irq_cfg *old_cfg;
+
+	cfg = get_one_free_irq_cfg(cpu);
+
+	if (!cfg)
+		return;
+
+	desc->chip_data = cfg;
+
+	old_cfg = old_desc->chip_data;
+
+	memcpy(cfg, old_cfg, sizeof(struct irq_cfg));
+
+	init_copy_irq_2_pin(old_cfg, cfg, cpu);
+}
+
+static void free_irq_cfg(struct irq_cfg *old_cfg)
+{
+	kfree(old_cfg);
+}
+
+void arch_free_chip_data(struct irq_desc *old_desc, struct irq_desc *desc)
+{
+	struct irq_cfg *old_cfg, *cfg;
+
+	old_cfg = old_desc->chip_data;
+	cfg = desc->chip_data;
+
+	if (old_cfg == cfg)
+		return;
+
+	if (old_cfg) {
+		free_irq_2_pin(old_cfg, cfg);
+		free_irq_cfg(old_cfg);
+		old_desc->chip_data = NULL;
+	}
+}
+
+static void set_extra_move_desc(struct irq_desc *desc, cpumask_t mask)
+{
+	struct irq_cfg *cfg = desc->chip_data;
+
+	if (!cfg->move_in_progress) {
+		/* it means that domain is not changed */
+		if (!cpus_intersects(desc->affinity, mask))
+			cfg->move_desc_pending = 1;
+	}
+}
+#endif
+
 #else
 static struct irq_cfg *irq_cfg(unsigned int irq)
 {
@@ -231,9 +349,11 @@ static struct irq_cfg *irq_cfg(unsigned int irq)
 
 #endif
 
+#ifndef CONFIG_NUMA_MIGRATE_IRQ_DESC
 static inline void set_extra_move_desc(struct irq_desc *desc, cpumask_t mask)
 {
 }
+#endif
 
 struct io_apic {
 	unsigned int index;
@@ -2346,14 +2466,34 @@ static void irq_complete_move(struct irq_desc **descp)
 	struct irq_cfg *cfg = desc->chip_data;
 	unsigned vector, me;
 
-	if (likely(!cfg->move_in_progress))
+	if (likely(!cfg->move_in_progress)) {
+#ifdef CONFIG_NUMA_MIGRATE_IRQ_DESC
+		if (likely(!cfg->move_desc_pending))
+			return;
+
+		/* domain is not change, but affinity is changed */
+		me = smp_processor_id();
+		if (cpu_isset(me, desc->affinity)) {
+			*descp = desc = move_irq_desc(desc, me);
+			/* get the new one */
+			cfg = desc->chip_data;
+			cfg->move_desc_pending = 0;
+		}
+#endif
 		return;
+	}
 
 	vector = ~get_irq_regs()->orig_ax;
 	me = smp_processor_id();
 	if ((vector == cfg->vector) && cpu_isset(me, cfg->domain)) {
 		cpumask_t cleanup_mask;
 
+#ifdef CONFIG_NUMA_MIGRATE_IRQ_DESC
+		*descp = desc = move_irq_desc(desc, me);
+		/* get the new one */
+		cfg = desc->chip_data;
+#endif
+
 		cpus_and(cleanup_mask, cfg->old_domain, cpu_online_map);
 		cfg->move_cleanup_count = cpus_weight(cleanup_mask);
 		send_IPI_mask(cleanup_mask, IRQ_MOVE_CLEANUP_VECTOR);
diff --git a/include/linux/irq.h b/include/linux/irq.h
index b5749db3e5a1..36a015746788 100644
--- a/include/linux/irq.h
+++ b/include/linux/irq.h
@@ -227,6 +227,16 @@ extern struct irq_desc *move_irq_desc(struct irq_desc *old_desc, int cpu);
 
 #endif
 
+static inline struct irq_desc *
+irq_remap_to_desc(unsigned int irq, struct irq_desc *desc)
+{
+#ifdef CONFIG_NUMA_MIGRATE_IRQ_DESC
+	return irq_to_desc(irq);
+#else
+	return desc;
+#endif
+}
+
 /*
  * Migration helpers for obsolete names, they will go away:
  */
diff --git a/kernel/irq/Makefile b/kernel/irq/Makefile
index 681c52dbfe22..4dd5b1edac98 100644
--- a/kernel/irq/Makefile
+++ b/kernel/irq/Makefile
@@ -3,3 +3,4 @@ obj-y := handle.o manage.o spurious.o resend.o chip.o devres.o
 obj-$(CONFIG_GENERIC_IRQ_PROBE) += autoprobe.o
 obj-$(CONFIG_PROC_FS) += proc.o
 obj-$(CONFIG_GENERIC_PENDING_IRQ) += migration.o
+obj-$(CONFIG_NUMA_MIGRATE_IRQ_DESC) += numa_migrate.o
diff --git a/kernel/irq/chip.c b/kernel/irq/chip.c
index 8e4fce4a1b1f..de210f4b7a92 100644
--- a/kernel/irq/chip.c
+++ b/kernel/irq/chip.c
@@ -353,6 +353,7 @@ handle_level_irq(unsigned int irq, struct irq_desc *desc)
 
 	spin_lock(&desc->lock);
 	mask_ack_irq(desc, irq);
+	desc = irq_remap_to_desc(irq, desc);
 
 	if (unlikely(desc->status & IRQ_INPROGRESS))
 		goto out_unlock;
@@ -430,6 +431,7 @@ handle_fasteoi_irq(unsigned int irq, struct irq_desc *desc)
 	desc->status &= ~IRQ_INPROGRESS;
 out:
 	desc->chip->eoi(irq);
+	desc = irq_remap_to_desc(irq, desc);
 
 	spin_unlock(&desc->lock);
 }
@@ -466,12 +468,14 @@ handle_edge_irq(unsigned int irq, struct irq_desc *desc)
 		    !desc->action)) {
 		desc->status |= (IRQ_PENDING | IRQ_MASKED);
 		mask_ack_irq(desc, irq);
+		desc = irq_remap_to_desc(irq, desc);
 		goto out_unlock;
 	}
 	kstat_incr_irqs_this_cpu(irq, desc);
 
 	/* Start handling the irq */
 	desc->chip->ack(irq);
+	desc = irq_remap_to_desc(irq, desc);
 
 	/* Mark the IRQ currently in progress.*/
 	desc->status |= IRQ_INPROGRESS;
@@ -532,8 +536,10 @@ handle_percpu_irq(unsigned int irq, struct irq_desc *desc)
 	if (!noirqdebug)
 		note_interrupt(irq, desc, action_ret);
 
-	if (desc->chip->eoi)
+	if (desc->chip->eoi) {
 		desc->chip->eoi(irq);
+		desc = irq_remap_to_desc(irq, desc);
+	}
 }
 
 void
@@ -568,8 +574,10 @@ __set_irq_handler(unsigned int irq, irq_flow_handler_t handle, int is_chained,
 
 	/* Uninstall? */
 	if (handle == handle_bad_irq) {
-		if (desc->chip != &no_irq_chip)
+		if (desc->chip != &no_irq_chip) {
 			mask_ack_irq(desc, irq);
+			desc = irq_remap_to_desc(irq, desc);
+		}
 		desc->status |= IRQ_DISABLED;
 		desc->depth = 1;
 	}
diff --git a/kernel/irq/handle.c b/kernel/irq/handle.c
index 8aa09547f5ef..f1a23069c20a 100644
--- a/kernel/irq/handle.c
+++ b/kernel/irq/handle.c
@@ -23,7 +23,7 @@
 /*
  * lockdep: we want to handle all irq_desc locks as a single lock-class:
  */
-static struct lock_class_key irq_desc_lock_class;
+struct lock_class_key irq_desc_lock_class;
 
 /**
  * handle_bad_irq - handle spurious and unhandled irqs
@@ -73,7 +73,7 @@ static struct irq_desc irq_desc_init = {
 #endif
 };
 
-static void init_kstat_irqs(struct irq_desc *desc, int cpu, int nr)
+void init_kstat_irqs(struct irq_desc *desc, int cpu, int nr)
 {
 	unsigned long bytes;
 	char *ptr;
@@ -113,7 +113,7 @@ static void init_one_irq_desc(int irq, struct irq_desc *desc, int cpu)
 /*
  * Protect the sparse_irqs:
  */
-static DEFINE_SPINLOCK(sparse_irq_lock);
+DEFINE_SPINLOCK(sparse_irq_lock);
 
 struct irq_desc *irq_desc_ptrs[NR_IRQS] __read_mostly;
 
@@ -337,8 +337,11 @@ unsigned int __do_IRQ(unsigned int irq)
 		/*
 		 * No locking required for CPU-local interrupts:
 		 */
-		if (desc->chip->ack)
+		if (desc->chip->ack) {
 			desc->chip->ack(irq);
+			/* get new one */
+			desc = irq_remap_to_desc(irq, desc);
+		}
 		if (likely(!(desc->status & IRQ_DISABLED))) {
 			action_ret = handle_IRQ_event(irq, desc->action);
 			if (!noirqdebug)
@@ -349,8 +352,10 @@ unsigned int __do_IRQ(unsigned int irq)
 	}
 
 	spin_lock(&desc->lock);
-	if (desc->chip->ack)
+	if (desc->chip->ack) {
 		desc->chip->ack(irq);
+		desc = irq_remap_to_desc(irq, desc);
+	}
 	/*
 	 * REPLAY is when Linux resends an IRQ that was dropped earlier
 	 * WAITING is used by probe to mark irqs that are being tested
diff --git a/kernel/irq/internals.h b/kernel/irq/internals.h
index 64c1c7253dae..e6d0a43cc125 100644
--- a/kernel/irq/internals.h
+++ b/kernel/irq/internals.h
@@ -13,6 +13,11 @@ extern void compat_irq_chip_set_default_handler(struct irq_desc *desc);
 extern int __irq_set_trigger(struct irq_desc *desc, unsigned int irq,
 		unsigned long flags);
 
+extern struct lock_class_key irq_desc_lock_class;
+extern void init_kstat_irqs(struct irq_desc *desc, int cpu, int nr);
+extern spinlock_t sparse_irq_lock;
+extern struct irq_desc *irq_desc_ptrs[NR_IRQS];
+
 #ifdef CONFIG_PROC_FS
 extern void register_irq_proc(unsigned int irq, struct irq_desc *desc);
 extern void register_handler_proc(unsigned int irq, struct irqaction *action);
diff --git a/kernel/irq/numa_migrate.c b/kernel/irq/numa_migrate.c
new file mode 100644
index 000000000000..0178e2296990
--- /dev/null
+++ b/kernel/irq/numa_migrate.c
@@ -0,0 +1,127 @@
+/*
+ * linux/kernel/irq/handle.c
+ *
+ * Copyright (C) 1992, 1998-2006 Linus Torvalds, Ingo Molnar
+ * Copyright (C) 2005-2006, Thomas Gleixner, Russell King
+ *
+ * This file contains the core interrupt handling code.
+ *
+ * Detailed information is available in Documentation/DocBook/genericirq
+ *
+ */
+
+#include <linux/irq.h>
+#include <linux/module.h>
+#include <linux/random.h>
+#include <linux/interrupt.h>
+#include <linux/kernel_stat.h>
+
+#include "internals.h"
+
+static void init_copy_kstat_irqs(struct irq_desc *old_desc,
+				 struct irq_desc *desc,
+				 int cpu, int nr)
+{
+	unsigned long bytes;
+
+	init_kstat_irqs(desc, cpu, nr);
+
+	if (desc->kstat_irqs != old_desc->kstat_irqs) {
+		/* Compute how many bytes we need per irq and allocate them */
+		bytes = nr * sizeof(unsigned int);
+
+		memcpy(desc->kstat_irqs, old_desc->kstat_irqs, bytes);
+	}
+}
+
+static void free_kstat_irqs(struct irq_desc *old_desc, struct irq_desc *desc)
+{
+	if (old_desc->kstat_irqs == desc->kstat_irqs)
+		return;
+
+	kfree(old_desc->kstat_irqs);
+	old_desc->kstat_irqs = NULL;
+}
+
+static void init_copy_one_irq_desc(int irq, struct irq_desc *old_desc,
+		 struct irq_desc *desc, int cpu)
+{
+	memcpy(desc, old_desc, sizeof(struct irq_desc));
+	desc->cpu = cpu;
+	lockdep_set_class(&desc->lock, &irq_desc_lock_class);
+	init_copy_kstat_irqs(old_desc, desc, cpu, nr_cpu_ids);
+	arch_init_copy_chip_data(old_desc, desc, cpu);
+}
+
+static void free_one_irq_desc(struct irq_desc *old_desc, struct irq_desc *desc)
+{
+	free_kstat_irqs(old_desc, desc);
+	arch_free_chip_data(old_desc, desc);
+}
+
+static struct irq_desc *__real_move_irq_desc(struct irq_desc *old_desc,
+						int cpu)
+{
+	struct irq_desc *desc;
+	unsigned int irq;
+	unsigned long flags;
+	int node;
+
+	irq = old_desc->irq;
+
+	spin_lock_irqsave(&sparse_irq_lock, flags);
+
+	/* We have to check it to avoid races with another CPU */
+	desc = irq_desc_ptrs[irq];
+
+	if (desc && old_desc != desc)
+			goto out_unlock;
+
+	node = cpu_to_node(cpu);
+	desc = kzalloc_node(sizeof(*desc), GFP_ATOMIC, node);
+	printk(KERN_DEBUG "  move irq_desc for %d to cpu %d node %d\n",
+		 irq, cpu, node);
+	if (!desc) {
+		printk(KERN_ERR "can not get new irq_desc for moving\n");
+		/* still use old one */
+		desc = old_desc;
+		goto out_unlock;
+	}
+	init_copy_one_irq_desc(irq, old_desc, desc, cpu);
+
+	irq_desc_ptrs[irq] = desc;
+
+	/* free the old one */
+	free_one_irq_desc(old_desc, desc);
+	kfree(old_desc);
+
+out_unlock:
+	spin_unlock_irqrestore(&sparse_irq_lock, flags);
+
+	return desc;
+}
+
+struct irq_desc *move_irq_desc(struct irq_desc *desc, int cpu)
+{
+	int old_cpu;
+	int node, old_node;
+
+	/* those all static, do move them */
+	if (desc->irq < NR_IRQS_LEGACY)
+		return desc;
+
+	old_cpu = desc->cpu;
+	printk(KERN_DEBUG
+		 "try to move irq_desc from cpu %d to %d\n", old_cpu, cpu);
+	if (old_cpu != cpu) {
+		node = cpu_to_node(cpu);
+		old_node = cpu_to_node(old_cpu);
+		if (old_node != node)
+			desc = __real_move_irq_desc(desc, cpu);
+		else
+			desc->cpu = cpu;
+	}
+
+	return desc;
+}
+
-- 
cgit v1.2.3


From b31a1d8b41513b96e9c7ec2f68c5734cef0b26a4 Mon Sep 17 00:00:00 2001
From: Andy Fleming <afleming@freescale.com>
Date: Tue, 16 Dec 2008 15:29:15 -0800
Subject: gianfar: Convert gianfar to an of_platform_driver

Does the same for the accompanying MDIO driver, and then modifies the TBI
configuration method.  The old way used fields in einfo, which no longer
exists.  The new way is to create an MDIO device-tree node for each instance
of gianfar, and create a tbi-handle property to associate ethernet controllers
with the TBI PHYs they are connected to.

Signed-off-by: Andy Fleming <afleming@freescale.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 Documentation/powerpc/dts-bindings/fsl/tsec.txt |  12 +-
 arch/powerpc/boot/dts/asp834x-redboot.dts       |  20 ++
 arch/powerpc/boot/dts/ksi8560.dts               |  20 ++
 arch/powerpc/boot/dts/mpc8313erdb.dts           |  20 ++
 arch/powerpc/boot/dts/mpc8315erdb.dts           |  19 ++
 arch/powerpc/boot/dts/mpc8349emitx.dts          |  18 ++
 arch/powerpc/boot/dts/mpc8349emitxgp.dts        |   5 +
 arch/powerpc/boot/dts/mpc834x_mds.dts           |  19 ++
 arch/powerpc/boot/dts/mpc8377_mds.dts           |  19 ++
 arch/powerpc/boot/dts/mpc8377_rdb.dts           |  19 ++
 arch/powerpc/boot/dts/mpc8378_mds.dts           |  19 ++
 arch/powerpc/boot/dts/mpc8378_rdb.dts           |  17 ++
 arch/powerpc/boot/dts/mpc8379_mds.dts           |  18 ++
 arch/powerpc/boot/dts/mpc8379_rdb.dts           |  18 ++
 arch/powerpc/boot/dts/mpc8536ds.dts             |  18 ++
 arch/powerpc/boot/dts/mpc8540ads.dts            |  31 +++
 arch/powerpc/boot/dts/mpc8541cds.dts            |  18 ++
 arch/powerpc/boot/dts/mpc8544ds.dts             |  20 ++
 arch/powerpc/boot/dts/mpc8548cds.dts            |  44 ++++
 arch/powerpc/boot/dts/mpc8555cds.dts            |  18 ++
 arch/powerpc/boot/dts/mpc8560ads.dts            |  18 ++
 arch/powerpc/boot/dts/mpc8568mds.dts            |  18 ++
 arch/powerpc/boot/dts/mpc8572ds.dts             |  45 ++++
 arch/powerpc/boot/dts/mpc8641_hpcn.dts          |  45 ++++
 arch/powerpc/boot/dts/sbc8349.dts               |  18 ++
 arch/powerpc/boot/dts/sbc8548.dts               |  18 ++
 arch/powerpc/boot/dts/sbc8560.dts               |  18 ++
 arch/powerpc/boot/dts/sbc8641d.dts              |  44 ++++
 arch/powerpc/boot/dts/stx_gp3_8560.dts          |  18 ++
 arch/powerpc/boot/dts/tqm8540.dts               |  28 +++
 arch/powerpc/boot/dts/tqm8541.dts               |  18 ++
 arch/powerpc/boot/dts/tqm8548-bigflash.dts      |  44 ++++
 arch/powerpc/boot/dts/tqm8548.dts               |  44 ++++
 arch/powerpc/boot/dts/tqm8555.dts               |  18 ++
 arch/powerpc/boot/dts/tqm8560.dts               |  18 ++
 arch/powerpc/sysdev/fsl_soc.c                   | 241 +++---------------
 drivers/net/gianfar.c                           | 321 ++++++++++++++++--------
 drivers/net/gianfar.h                           |  21 +-
 drivers/net/gianfar_ethtool.c                   |  22 +-
 drivers/net/gianfar_mii.c                       | 212 +++++++++++-----
 drivers/net/gianfar_mii.h                       |   2 +
 include/linux/fsl_devices.h                     |  18 +-
 42 files changed, 1217 insertions(+), 424 deletions(-)

(limited to 'include')

diff --git a/Documentation/powerpc/dts-bindings/fsl/tsec.txt b/Documentation/powerpc/dts-bindings/fsl/tsec.txt
index cf55fa4112d2..7fa4b27574b5 100644
--- a/Documentation/powerpc/dts-bindings/fsl/tsec.txt
+++ b/Documentation/powerpc/dts-bindings/fsl/tsec.txt
@@ -2,8 +2,8 @@
 
 The MDIO is a bus to which the PHY devices are connected.  For each
 device that exists on this bus, a child node should be created.  See
-the definition of the PHY node below for an example of how to define
-a PHY.
+the definition of the PHY node in booting-without-of.txt for an example
+of how to define a PHY.
 
 Required properties:
   - reg : Offset and length of the register set for the device
@@ -21,6 +21,14 @@ Example:
 		};
 	};
 
+* TBI Internal MDIO bus
+
+As of this writing, every tsec is associated with an internal TBI PHY.
+This PHY is accessed through the local MDIO bus.  These buses are defined
+similarly to the mdio buses, except they are compatible with "fsl,gianfar-tbi".
+The TBI PHYs underneath them are similar to normal PHYs, but the reg property
+is considered instructive, rather than descriptive.  The reg property should
+be chosen so it doesn't interfere with other PHYs on the bus.
 
 * Gianfar-compatible ethernet nodes
 
diff --git a/arch/powerpc/boot/dts/asp834x-redboot.dts b/arch/powerpc/boot/dts/asp834x-redboot.dts
index 6235fca445de..524af7ef9f26 100644
--- a/arch/powerpc/boot/dts/asp834x-redboot.dts
+++ b/arch/powerpc/boot/dts/asp834x-redboot.dts
@@ -199,8 +199,26 @@
 				reg = <0x2>;
 				device_type = "ethernet-phy";
 			};
+
+			tbi0: tbi-phy@11 {
+				reg = <0x11>;
+				device_type = "tbi-phy";
+			};
 		};
 
+		mdio@25520 {
+			#address-cells = <1>;
+			#size-cells = <0>;
+			compatible = "fsl,gianfar-tbi";
+			reg = <0x25520 0x20>;
+
+			tbi1: tbi-phy@11 {
+				reg = <0x11>;
+				device_type = "tbi-phy";
+			};
+		};
+
+
 		enet0: ethernet@24000 {
 			cell-index = <0>;
 			device_type = "network";
@@ -210,6 +228,7 @@
 			local-mac-address = [ 00 08 e5 11 32 33 ];
 			interrupts = <32 0x8 33 0x8 34 0x8>;
 			interrupt-parent = <&ipic>;
+			tbi-handle = <&tbi0>;
 			phy-handle = <&phy0>;
 			linux,network-index = <0>;
 		};
@@ -223,6 +242,7 @@
 			local-mac-address = [ 00 08 e5 11 32 34 ];
 			interrupts = <35 0x8 36 0x8 37 0x8>;
 			interrupt-parent = <&ipic>;
+			tbi-handle = <&tbi1>;
 			phy-handle = <&phy1>;
 			linux,network-index = <1>;
 		};
diff --git a/arch/powerpc/boot/dts/ksi8560.dts b/arch/powerpc/boot/dts/ksi8560.dts
index 49737589ffc8..3bfff47418db 100644
--- a/arch/powerpc/boot/dts/ksi8560.dts
+++ b/arch/powerpc/boot/dts/ksi8560.dts
@@ -141,8 +141,26 @@
 				reg = <0x2>;
 				device_type = "ethernet-phy";
 			};
+
+			tbi0: tbi-phy@11 {
+				reg = <0x11>;
+				device_type = "tbi-phy";
+			};
 		};
 
+		mdio@25520 {
+			#address-cells = <1>;
+			#size-cells = <0>;
+			compatible = "fsl,gianfar-tbi";
+			reg = <0x25520 0x20>;
+
+			tbi1: tbi-phy@11 {
+				reg = <0x11>;
+				device_type = "tbi-phy";
+			};
+		};
+
+
 		enet0: ethernet@24000 {
 			device_type = "network";
 			model = "TSEC";
@@ -152,6 +170,7 @@
 			local-mac-address = [ 00 00 00 00 00 00 ];
 			interrupts = <0x1d 0x2 0x1e 0x2 0x22 0x2>;
 			interrupt-parent = <&mpic>;
+			tbi-handle = <&tbi0>;
 			phy-handle = <&PHY1>;
 		};
 
@@ -164,6 +183,7 @@
 			local-mac-address = [ 00 00 00 00 00 00 ];
 			interrupts = <0x23 0x2 0x24 0x2 0x28 0x2>;
 			interrupt-parent = <&mpic>;
+			tbi-handle = <&tbi1>;
 			phy-handle = <&PHY2>;
 		};
 
diff --git a/arch/powerpc/boot/dts/mpc8313erdb.dts b/arch/powerpc/boot/dts/mpc8313erdb.dts
index 503031766825..d4df8b6857a4 100644
--- a/arch/powerpc/boot/dts/mpc8313erdb.dts
+++ b/arch/powerpc/boot/dts/mpc8313erdb.dts
@@ -190,6 +190,7 @@
 			local-mac-address = [ 00 00 00 00 00 00 ];
 			interrupts = <37 0x8 36 0x8 35 0x8>;
 			interrupt-parent = <&ipic>;
+			tbi-handle = < &tbi0 >;
 			phy-handle = < &phy1 >;
 			fsl,magic-packet;
 
@@ -210,6 +211,10 @@
 					reg = <0x4>;
 					device_type = "ethernet-phy";
 				};
+				tbi0: tbi-phy@11 {
+					reg = <0x11>;
+					device_type = "tbi-phy";
+				};
 			};
 		};
 
@@ -222,9 +227,24 @@
 			local-mac-address = [ 00 00 00 00 00 00 ];
 			interrupts = <34 0x8 33 0x8 32 0x8>;
 			interrupt-parent = <&ipic>;
+			tbi-handle = < &tbi1 >;
 			phy-handle = < &phy4 >;
 			sleep = <&pmc 0x10000000>;
 			fsl,magic-packet;
+
+			mdio@25520 {
+				#address-cells = <1>;
+				#size-cells = <0>;
+				compatible = "fsl,gianfar-tbi";
+				reg = <0x25520 0x20>;
+
+				tbi1: tbi-phy@11 {
+					reg = <0x11>;
+					device_type = "tbi-phy";
+				};
+			};
+
+
 		};
 
 		serial0: serial@4500 {
diff --git a/arch/powerpc/boot/dts/mpc8315erdb.dts b/arch/powerpc/boot/dts/mpc8315erdb.dts
index 6b850670de1d..d2cdd47a246d 100644
--- a/arch/powerpc/boot/dts/mpc8315erdb.dts
+++ b/arch/powerpc/boot/dts/mpc8315erdb.dts
@@ -206,8 +206,25 @@
 				reg = <0x1>;
 				device_type = "ethernet-phy";
 			};
+			tbi0: tbi-phy@11 {
+				reg = <0x11>;
+				device_type = "tbi-phy";
+			};
+		};
+
+		mdio@25520 {
+			#address-cells = <1>;
+			#size-cells = <0>;
+			compatible = "fsl,gianfar-tbi";
+			reg = <0x25520 0x20>;
+
+			tbi1: tbi-phy@11 {
+				reg = <0x11>;
+				device_type = "tbi-phy";
+			};
 		};
 
+
 		enet0: ethernet@24000 {
 			cell-index = <0>;
 			device_type = "network";
@@ -217,6 +234,7 @@
 			local-mac-address = [ 00 00 00 00 00 00 ];
 			interrupts = <32 0x8 33 0x8 34 0x8>;
 			interrupt-parent = <&ipic>;
+			tbi-handle = <&tbi0>;
 			phy-handle = < &phy0 >;
 		};
 
@@ -229,6 +247,7 @@
 			local-mac-address = [ 00 00 00 00 00 00 ];
 			interrupts = <35 0x8 36 0x8 37 0x8>;
 			interrupt-parent = <&ipic>;
+			tbi-handle = <&tbi1>;
 			phy-handle = < &phy1 >;
 		};
 
diff --git a/arch/powerpc/boot/dts/mpc8349emitx.dts b/arch/powerpc/boot/dts/mpc8349emitx.dts
index 4bdbaf4993a1..712783d0707e 100644
--- a/arch/powerpc/boot/dts/mpc8349emitx.dts
+++ b/arch/powerpc/boot/dts/mpc8349emitx.dts
@@ -184,6 +184,22 @@
 				reg = <0x1c>;
 				device_type = "ethernet-phy";
 			};
+			tbi0: tbi-phy@11 {
+				reg = <0x11>;
+				device_type = "tbi-phy";
+			};
+		};
+
+		mdio@25520 {
+			#address-cells = <1>;
+			#size-cells = <0>;
+			compatible = "fsl,gianfar-tbi";
+			reg = <0x25520 0x20>;
+
+			tbi1: tbi-phy@11 {
+				reg = <0x11>;
+				device_type = "tbi-phy";
+			};
 		};
 
 		enet0: ethernet@24000 {
@@ -195,6 +211,7 @@
 			local-mac-address = [ 00 00 00 00 00 00 ];
 			interrupts = <32 0x8 33 0x8 34 0x8>;
 			interrupt-parent = <&ipic>;
+			tbi-handle = <&tbi0>;
 			phy-handle = <&phy1c>;
 			linux,network-index = <0>;
 		};
@@ -211,6 +228,7 @@
 			/* Vitesse 7385 isn't on the MDIO bus */
 			fixed-link = <1 1 1000 0 0>;
 			linux,network-index = <1>;
+			tbi-handle = <&tbi1>;
 		};
 
 		serial0: serial@4500 {
diff --git a/arch/powerpc/boot/dts/mpc8349emitxgp.dts b/arch/powerpc/boot/dts/mpc8349emitxgp.dts
index fa40647ee62e..3e918af41fb1 100644
--- a/arch/powerpc/boot/dts/mpc8349emitxgp.dts
+++ b/arch/powerpc/boot/dts/mpc8349emitxgp.dts
@@ -163,6 +163,10 @@
 				reg = <0x1c>;
 				device_type = "ethernet-phy";
 			};
+			tbi0: tbi-phy@11 {
+				reg = <0x11>;
+				device_type = "tbi-phy";
+			};
 		};
 
 		enet0: ethernet@24000 {
@@ -174,6 +178,7 @@
 			local-mac-address = [ 00 00 00 00 00 00 ];
 			interrupts = <32 0x8 33 0x8 34 0x8>;
 			interrupt-parent = <&ipic>;
+			tbi-handle = <&tbi0>;
 			phy-handle = <&phy1c>;
 			linux,network-index = <0>;
 		};
diff --git a/arch/powerpc/boot/dts/mpc834x_mds.dts b/arch/powerpc/boot/dts/mpc834x_mds.dts
index c986c541e9bb..d9adba01c09c 100644
--- a/arch/powerpc/boot/dts/mpc834x_mds.dts
+++ b/arch/powerpc/boot/dts/mpc834x_mds.dts
@@ -185,8 +185,25 @@
 				reg = <0x1>;
 				device_type = "ethernet-phy";
 			};
+			tbi0: tbi-phy@11 {
+				reg = <0x11>;
+				device_type = "tbi-phy";
+			};
+		};
+
+		mdio@25520 {
+			#address-cells = <1>;
+			#size-cells = <0>;
+			compatible = "fsl,gianfar-tbi";
+			reg = <0x25520 0x20>;
+
+			tbi1: tbi-phy@11 {
+				reg = <0x11>;
+				device_type = "tbi-phy";
+			};
 		};
 
+
 		enet0: ethernet@24000 {
 			cell-index = <0>;
 			device_type = "network";
@@ -196,6 +213,7 @@
 			local-mac-address = [ 00 00 00 00 00 00 ];
 			interrupts = <32 0x8 33 0x8 34 0x8>;
 			interrupt-parent = <&ipic>;
+			tbi-handle = <&tbi0>;
 			phy-handle = <&phy0>;
 			linux,network-index = <0>;
 		};
@@ -209,6 +227,7 @@
 			local-mac-address = [ 00 00 00 00 00 00 ];
 			interrupts = <35 0x8 36 0x8 37 0x8>;
 			interrupt-parent = <&ipic>;
+			tbi-handle = <&tbi1>;
 			phy-handle = <&phy1>;
 			linux,network-index = <1>;
 		};
diff --git a/arch/powerpc/boot/dts/mpc8377_mds.dts b/arch/powerpc/boot/dts/mpc8377_mds.dts
index 0484561bd2c0..1d14d7052e6d 100644
--- a/arch/powerpc/boot/dts/mpc8377_mds.dts
+++ b/arch/powerpc/boot/dts/mpc8377_mds.dts
@@ -193,8 +193,25 @@
 				reg = <0x3>;
 				device_type = "ethernet-phy";
 			};
+			tbi0: tbi-phy@11 {
+				reg = <0x11>;
+				device_type = "tbi-phy";
+			};
+		};
+
+		mdio@25520 {
+			#address-cells = <1>;
+			#size-cells = <0>;
+			compatible = "fsl,gianfar-tbi";
+			reg = <0x25520 0x20>;
+
+			tbi1: tbi-phy@11 {
+				reg = <0x11>;
+				device_type = "tbi-phy";
+			};
 		};
 
+
 		enet0: ethernet@24000 {
 			cell-index = <0>;
 			device_type = "network";
@@ -205,6 +222,7 @@
 			interrupts = <32 0x8 33 0x8 34 0x8>;
 			phy-connection-type = "mii";
 			interrupt-parent = <&ipic>;
+			tbi-handle = <&tbi0>;
 			phy-handle = <&phy2>;
 		};
 
@@ -218,6 +236,7 @@
 			interrupts = <35 0x8 36 0x8 37 0x8>;
 			phy-connection-type = "mii";
 			interrupt-parent = <&ipic>;
+			tbi-handle = <&tbi1>;
 			phy-handle = <&phy3>;
 		};
 
diff --git a/arch/powerpc/boot/dts/mpc8377_rdb.dts b/arch/powerpc/boot/dts/mpc8377_rdb.dts
index 435ef3dd022d..31f348fdfe14 100644
--- a/arch/powerpc/boot/dts/mpc8377_rdb.dts
+++ b/arch/powerpc/boot/dts/mpc8377_rdb.dts
@@ -211,8 +211,25 @@
 				reg = <0x2>;
 				device_type = "ethernet-phy";
 			};
+			tbi0: tbi-phy@11 {
+				reg = <0x11>;
+				device_type = "tbi-phy";
+			};
+		};
+
+		mdio@25520 {
+			#address-cells = <1>;
+			#size-cells = <0>;
+			compatible = "fsl,gianfar-tbi";
+			reg = <0x25520 0x20>;
+
+			tbi1: tbi-phy@11 {
+				reg = <0x11>;
+				device_type = "tbi-phy";
+			};
 		};
 
+
 		enet0: ethernet@24000 {
 			cell-index = <0>;
 			device_type = "network";
@@ -223,6 +240,7 @@
 			interrupts = <32 0x8 33 0x8 34 0x8>;
 			phy-connection-type = "mii";
 			interrupt-parent = <&ipic>;
+			tbi-handle = <&tbi0>;
 			phy-handle = <&phy2>;
 		};
 
@@ -237,6 +255,7 @@
 			phy-connection-type = "mii";
 			interrupt-parent = <&ipic>;
 			fixed-link = <1 1 1000 0 0>;
+			tbi-handle = <&tbi1>;
 		};
 
 		serial0: serial@4500 {
diff --git a/arch/powerpc/boot/dts/mpc8378_mds.dts b/arch/powerpc/boot/dts/mpc8378_mds.dts
index 67a08d2e2ff2..b85fc02682d2 100644
--- a/arch/powerpc/boot/dts/mpc8378_mds.dts
+++ b/arch/powerpc/boot/dts/mpc8378_mds.dts
@@ -232,8 +232,25 @@
 				reg = <0x3>;
 				device_type = "ethernet-phy";
 			};
+			tbi0: tbi-phy@11 {
+				reg = <0x11>;
+				device_type = "tbi-phy";
+			};
+		};
+
+		mdio@25520 {
+			#address-cells = <1>;
+			#size-cells = <0>;
+			compatible = "fsl,gianfar-tbi";
+			reg = <0x25520 0x20>;
+
+			tbi1: tbi-phy@11 {
+				reg = <0x11>;
+				device_type = "tbi-phy";
+			};
 		};
 
+
 		enet0: ethernet@24000 {
 			cell-index = <0>;
 			device_type = "network";
@@ -244,6 +261,7 @@
 			interrupts = <32 0x8 33 0x8 34 0x8>;
 			phy-connection-type = "mii";
 			interrupt-parent = <&ipic>;
+			tbi-handle = <&tbi0>;
 			phy-handle = <&phy2>;
 		};
 
@@ -257,6 +275,7 @@
 			interrupts = <35 0x8 36 0x8 37 0x8>;
 			phy-connection-type = "mii";
 			interrupt-parent = <&ipic>;
+			tbi-handle = <&tbi1>;
 			phy-handle = <&phy3>;
 		};
 
diff --git a/arch/powerpc/boot/dts/mpc8378_rdb.dts b/arch/powerpc/boot/dts/mpc8378_rdb.dts
index b11e68f56a06..7a2bad038bd6 100644
--- a/arch/powerpc/boot/dts/mpc8378_rdb.dts
+++ b/arch/powerpc/boot/dts/mpc8378_rdb.dts
@@ -211,8 +211,25 @@
 				reg = <0x2>;
 				device_type = "ethernet-phy";
 			};
+			tbi0: tbi-phy@11 {
+				reg = <0x11>;
+				device_type = "tbi-phy";
+			};
+		};
+
+		mdio@25520 {
+			#address-cells = <1>;
+			#size-cells = <0>;
+			compatible = "fsl,gianfar-tbi";
+			reg = <0x25520 0x20>;
+
+			tbi1: tbi-phy@11 {
+				reg = <0x11>;
+				device_type = "tbi-phy";
+			};
 		};
 
+
 		enet0: ethernet@24000 {
 			cell-index = <0>;
 			device_type = "network";
diff --git a/arch/powerpc/boot/dts/mpc8379_mds.dts b/arch/powerpc/boot/dts/mpc8379_mds.dts
index 323370a2b5ff..acf06c438dbf 100644
--- a/arch/powerpc/boot/dts/mpc8379_mds.dts
+++ b/arch/powerpc/boot/dts/mpc8379_mds.dts
@@ -232,6 +232,22 @@
 				reg = <0x3>;
 				device_type = "ethernet-phy";
 			};
+			tbi0: tbi-phy@11 {
+				reg = <0x11>;
+				device_type = "tbi-phy";
+			};
+		};
+
+		mdio@25520 {
+			#address-cells = <1>;
+			#size-cells = <0>;
+			compatible = "fsl,gianfar-tbi";
+			reg = <0x25520 0x20>;
+
+			tbi1: tbi-phy@11 {
+				reg = <0x11>;
+				device_type = "tbi-phy";
+			};
 		};
 
 		enet0: ethernet@24000 {
@@ -244,6 +260,7 @@
 			interrupts = <32 0x8 33 0x8 34 0x8>;
 			phy-connection-type = "mii";
 			interrupt-parent = <&ipic>;
+			tbi-handle = <&tbi0>;
 			phy-handle = <&phy2>;
 		};
 
@@ -257,6 +274,7 @@
 			interrupts = <35 0x8 36 0x8 37 0x8>;
 			phy-connection-type = "mii";
 			interrupt-parent = <&ipic>;
+			tbi-handle = <&tbi1>;
 			phy-handle = <&phy3>;
 		};
 
diff --git a/arch/powerpc/boot/dts/mpc8379_rdb.dts b/arch/powerpc/boot/dts/mpc8379_rdb.dts
index 337af6ea26d3..e067616f3f42 100644
--- a/arch/powerpc/boot/dts/mpc8379_rdb.dts
+++ b/arch/powerpc/boot/dts/mpc8379_rdb.dts
@@ -211,6 +211,22 @@
 				reg = <0x2>;
 				device_type = "ethernet-phy";
 			};
+			tbi0: tbi-phy@11 {
+				reg = <0x11>;
+				device_type = "tbi-phy";
+			};
+		};
+
+		mdio@25520 {
+			#address-cells = <1>;
+			#size-cells = <0>;
+			compatible = "fsl,gianfar-tbi";
+			reg = <0x25520 0x20>;
+
+			tbi1: tbi-phy@11 {
+				reg = <0x11>;
+				device_type = "tbi-phy";
+			};
 		};
 
 		enet0: ethernet@24000 {
@@ -223,6 +239,7 @@
 			interrupts = <32 0x8 33 0x8 34 0x8>;
 			phy-connection-type = "mii";
 			interrupt-parent = <&ipic>;
+			tbi-handle = <&tbi0>;
 			phy-handle = <&phy2>;
 		};
 
@@ -237,6 +254,7 @@
 			phy-connection-type = "mii";
 			interrupt-parent = <&ipic>;
 			fixed-link = <1 1 1000 0 0>;
+			tbi-handle = <&tbi1>;
 		};
 
 		serial0: serial@4500 {
diff --git a/arch/powerpc/boot/dts/mpc8536ds.dts b/arch/powerpc/boot/dts/mpc8536ds.dts
index 35db1e5440c7..3c905df1812c 100644
--- a/arch/powerpc/boot/dts/mpc8536ds.dts
+++ b/arch/powerpc/boot/dts/mpc8536ds.dts
@@ -155,6 +155,22 @@
 				reg = <1>;
 				device_type = "ethernet-phy";
 			};
+			tbi0: tbi-phy@11 {
+				reg = <0x11>;
+				device_type = "tbi-phy";
+			};
+		};
+
+		mdio@26520 {
+			#address-cells = <1>;
+			#size-cells = <0>;
+			compatible = "fsl,gianfar-tbi";
+			reg = <0x26520 0x20>;
+
+			tbi1: tbi-phy@11 {
+				reg = <0x11>;
+				device_type = "tbi-phy";
+			};
 		};
 
 		usb@22000 {
@@ -186,6 +202,7 @@
 			local-mac-address = [ 00 00 00 00 00 00 ];
 			interrupts = <29 2 30 2 34 2>;
 			interrupt-parent = <&mpic>;
+			tbi-handle = <&tbi0>;
 			phy-handle = <&phy1>;
 			phy-connection-type = "rgmii-id";
 		};
@@ -199,6 +216,7 @@
 			local-mac-address = [ 00 00 00 00 00 00 ];
 			interrupts = <31 2 32 2 33 2>;
 			interrupt-parent = <&mpic>;
+			tbi-handle = <&tbi1>;
 			phy-handle = <&phy0>;
 			phy-connection-type = "rgmii-id";
 		};
diff --git a/arch/powerpc/boot/dts/mpc8540ads.dts b/arch/powerpc/boot/dts/mpc8540ads.dts
index 9568bfaff8f7..79570ffe41b9 100644
--- a/arch/powerpc/boot/dts/mpc8540ads.dts
+++ b/arch/powerpc/boot/dts/mpc8540ads.dts
@@ -150,6 +150,34 @@
 				reg = <0x3>;
 				device_type = "ethernet-phy";
 			};
+			tbi0: tbi-phy@11 {
+				reg = <0x11>;
+				device_type = "tbi-phy";
+			};
+		};
+
+		mdio@25520 {
+			#address-cells = <1>;
+			#size-cells = <0>;
+			compatible = "fsl,gianfar-tbi";
+			reg = <0x25520 0x20>;
+
+			tbi1: tbi-phy@11 {
+				reg = <0x11>;
+				device_type = "tbi-phy";
+			};
+		};
+
+		mdio@26520 {
+			#address-cells = <1>;
+			#size-cells = <0>;
+			compatible = "fsl,gianfar-tbi";
+			reg = <0x26520 0x20>;
+
+			tbi2: tbi-phy@11 {
+				reg = <0x11>;
+				device_type = "tbi-phy";
+			};
 		};
 
 		enet0: ethernet@24000 {
@@ -161,6 +189,7 @@
 			local-mac-address = [ 00 00 00 00 00 00 ];
 			interrupts = <29 2 30 2 34 2>;
 			interrupt-parent = <&mpic>;
+			tbi-handle = <&tbi0>;
 			phy-handle = <&phy0>;
 		};
 
@@ -173,6 +202,7 @@
 			local-mac-address = [ 00 00 00 00 00 00 ];
 			interrupts = <35 2 36 2 40 2>;
 			interrupt-parent = <&mpic>;
+			tbi-handle = <&tbi1>;
 			phy-handle = <&phy1>;
 		};
 
@@ -185,6 +215,7 @@
 			local-mac-address = [ 00 00 00 00 00 00 ];
 			interrupts = <41 2>;
 			interrupt-parent = <&mpic>;
+			tbi-handle = <&tbi2>;
 			phy-handle = <&phy3>;
 		};
 
diff --git a/arch/powerpc/boot/dts/mpc8541cds.dts b/arch/powerpc/boot/dts/mpc8541cds.dts
index 6480f4fd96e0..221036a8ce23 100644
--- a/arch/powerpc/boot/dts/mpc8541cds.dts
+++ b/arch/powerpc/boot/dts/mpc8541cds.dts
@@ -144,6 +144,22 @@
 				reg = <0x1>;
 				device_type = "ethernet-phy";
 			};
+			tbi0: tbi-phy@11 {
+				reg = <0x11>;
+				device_type = "tbi-phy";
+			};
+		};
+
+		mdio@25520 {
+			#address-cells = <1>;
+			#size-cells = <0>;
+			compatible = "fsl,gianfar-tbi";
+			reg = <0x25520 0x20>;
+
+			tbi1: tbi-phy@11 {
+				reg = <0x11>;
+				device_type = "tbi-phy";
+			};
 		};
 
 		enet0: ethernet@24000 {
@@ -155,6 +171,7 @@
 			local-mac-address = [ 00 00 00 00 00 00 ];
 			interrupts = <29 2 30 2 34 2>;
 			interrupt-parent = <&mpic>;
+			tbi-handle = <&tbi0>;
 			phy-handle = <&phy0>;
 		};
 
@@ -167,6 +184,7 @@
 			local-mac-address = [ 00 00 00 00 00 00 ];
 			interrupts = <35 2 36 2 40 2>;
 			interrupt-parent = <&mpic>;
+			tbi-handle = <&tbi1>;
 			phy-handle = <&phy1>;
 		};
 
diff --git a/arch/powerpc/boot/dts/mpc8544ds.dts b/arch/powerpc/boot/dts/mpc8544ds.dts
index f1fb20737e3e..b9da42105066 100644
--- a/arch/powerpc/boot/dts/mpc8544ds.dts
+++ b/arch/powerpc/boot/dts/mpc8544ds.dts
@@ -116,8 +116,26 @@
 				reg = <0x1>;
 				device_type = "ethernet-phy";
 			};
+
+			tbi0: tbi-phy@11 {
+				reg = <0x11>;
+				device_type = "tbi-phy";
+			};
 		};
 
+		mdio@26520 {
+			#address-cells = <1>;
+			#size-cells = <0>;
+			compatible = "fsl,gianfar-tbi";
+			reg = <0x26520 0x20>;
+
+			tbi1: tbi-phy@11 {
+				reg = <0x11>;
+				device_type = "tbi-phy";
+			};
+		};
+
+
 		dma@21300 {
 			#address-cells = <1>;
 			#size-cells = <1>;
@@ -169,6 +187,7 @@
 			interrupts = <29 2 30 2 34 2>;
 			interrupt-parent = <&mpic>;
 			phy-handle = <&phy0>;
+			tbi-handle = <&tbi0>;
 			phy-connection-type = "rgmii-id";
 		};
 
@@ -182,6 +201,7 @@
 			interrupts = <31 2 32 2 33 2>;
 			interrupt-parent = <&mpic>;
 			phy-handle = <&phy1>;
+			tbi-handle = <&tbi1>;
 			phy-connection-type = "rgmii-id";
 		};
 
diff --git a/arch/powerpc/boot/dts/mpc8548cds.dts b/arch/powerpc/boot/dts/mpc8548cds.dts
index 431b496270dc..df774a7088ff 100644
--- a/arch/powerpc/boot/dts/mpc8548cds.dts
+++ b/arch/powerpc/boot/dts/mpc8548cds.dts
@@ -172,6 +172,46 @@
 				reg = <0x3>;
 				device_type = "ethernet-phy";
 			};
+			tbi0: tbi-phy@11 {
+				reg = <0x11>;
+				device_type = "tbi-phy";
+			};
+		};
+
+		mdio@25520 {
+			#address-cells = <1>;
+			#size-cells = <0>;
+			compatible = "fsl,gianfar-tbi";
+			reg = <0x25520 0x20>;
+
+			tbi1: tbi-phy@11 {
+				reg = <0x11>;
+				device_type = "tbi-phy";
+			};
+		};
+
+		mdio@26520 {
+			#address-cells = <1>;
+			#size-cells = <0>;
+			compatible = "fsl,gianfar-tbi";
+			reg = <0x26520 0x20>;
+
+			tbi2: tbi-phy@11 {
+				reg = <0x11>;
+				device_type = "tbi-phy";
+			};
+		};
+
+		mdio@27520 {
+			#address-cells = <1>;
+			#size-cells = <0>;
+			compatible = "fsl,gianfar-tbi";
+			reg = <0x27520 0x20>;
+
+			tbi3: tbi-phy@11 {
+				reg = <0x11>;
+				device_type = "tbi-phy";
+			};
 		};
 
 		enet0: ethernet@24000 {
@@ -183,6 +223,7 @@
 			local-mac-address = [ 00 00 00 00 00 00 ];
 			interrupts = <29 2 30 2 34 2>;
 			interrupt-parent = <&mpic>;
+			tbi-handle = <&tbi0>;
 			phy-handle = <&phy0>;
 		};
 
@@ -195,6 +236,7 @@
 			local-mac-address = [ 00 00 00 00 00 00 ];
 			interrupts = <35 2 36 2 40 2>;
 			interrupt-parent = <&mpic>;
+			tbi-handle = <&tbi1>;
 			phy-handle = <&phy1>;
 		};
 
@@ -208,6 +250,7 @@
 			local-mac-address = [ 00 00 00 00 00 00 ];
 			interrupts = <31 2 32 2 33 2>;
 			interrupt-parent = <&mpic>;
+			tbi-handle = <&tbi2>;
 			phy-handle = <&phy2>;
 		};
 
@@ -220,6 +263,7 @@
 			local-mac-address = [ 00 00 00 00 00 00 ];
 			interrupts = <37 2 38 2 39 2>;
 			interrupt-parent = <&mpic>;
+			tbi-handle = <&tbi3>;
 			phy-handle = <&phy3>;
 		};
  */
diff --git a/arch/powerpc/boot/dts/mpc8555cds.dts b/arch/powerpc/boot/dts/mpc8555cds.dts
index d833a5c4f476..053b01e1c93b 100644
--- a/arch/powerpc/boot/dts/mpc8555cds.dts
+++ b/arch/powerpc/boot/dts/mpc8555cds.dts
@@ -144,6 +144,22 @@
 				reg = <0x1>;
 				device_type = "ethernet-phy";
 			};
+			tbi0: tbi-phy@11 {
+				reg = <0x11>;
+				device_type = "tbi-phy";
+			};
+		};
+
+		mdio@25520 {
+			#address-cells = <1>;
+			#size-cells = <0>;
+			compatible = "fsl,gianfar-tbi";
+			reg = <0x25520 0x20>;
+
+			tbi1: tbi-phy@11 {
+				reg = <0x11>;
+				device_type = "tbi-phy";
+			};
 		};
 
 		enet0: ethernet@24000 {
@@ -155,6 +171,7 @@
 			local-mac-address = [ 00 00 00 00 00 00 ];
 			interrupts = <29 2 30 2 34 2>;
 			interrupt-parent = <&mpic>;
+			tbi-handle = <&tbi0>;
 			phy-handle = <&phy0>;
 		};
 
@@ -167,6 +184,7 @@
 			local-mac-address = [ 00 00 00 00 00 00 ];
 			interrupts = <35 2 36 2 40 2>;
 			interrupt-parent = <&mpic>;
+			tbi-handle = <&tbi1>;
 			phy-handle = <&phy1>;
 		};
 
diff --git a/arch/powerpc/boot/dts/mpc8560ads.dts b/arch/powerpc/boot/dts/mpc8560ads.dts
index 4d1f2f284094..11b1bcbe14ce 100644
--- a/arch/powerpc/boot/dts/mpc8560ads.dts
+++ b/arch/powerpc/boot/dts/mpc8560ads.dts
@@ -145,6 +145,22 @@
 				reg = <0x3>;
 				device_type = "ethernet-phy";
 			};
+			tbi0: tbi-phy@11 {
+				reg = <0x11>;
+				device_type = "tbi-phy";
+			};
+		};
+
+		mdio@25520 {
+			#address-cells = <1>;
+			#size-cells = <0>;
+			compatible = "fsl,gianfar-tbi";
+			reg = <0x25520 0x20>;
+
+			tbi1: tbi-phy@11 {
+				reg = <0x11>;
+				device_type = "tbi-phy";
+			};
 		};
 
 		enet0: ethernet@24000 {
@@ -156,6 +172,7 @@
 			local-mac-address = [ 00 00 00 00 00 00 ];
 			interrupts = <29 2 30 2 34 2>;
 			interrupt-parent = <&mpic>;
+			tbi-handle = <&tbi0>;
 			phy-handle = <&phy0>;
 		};
 
@@ -168,6 +185,7 @@
 			local-mac-address = [ 00 00 00 00 00 00 ];
 			interrupts = <35 2 36 2 40 2>;
 			interrupt-parent = <&mpic>;
+			tbi-handle = <&tbi1>;
 			phy-handle = <&phy1>;
 		};
 
diff --git a/arch/powerpc/boot/dts/mpc8568mds.dts b/arch/powerpc/boot/dts/mpc8568mds.dts
index c80158f7741d..1955bd9e113d 100644
--- a/arch/powerpc/boot/dts/mpc8568mds.dts
+++ b/arch/powerpc/boot/dts/mpc8568mds.dts
@@ -179,6 +179,22 @@
 				reg = <0x3>;
 				device_type = "ethernet-phy";
 			};
+			tbi0: tbi-phy@11 {
+				reg = <0x11>;
+				device_type = "tbi-phy";
+			};
+		};
+
+		mdio@25520 {
+			#address-cells = <1>;
+			#size-cells = <0>;
+			compatible = "fsl,gianfar-tbi";
+			reg = <0x25520 0x20>;
+
+			tbi1: tbi-phy@11 {
+				reg = <0x11>;
+				device_type = "tbi-phy";
+			};
 		};
 
 		enet0: ethernet@24000 {
@@ -190,6 +206,7 @@
 			local-mac-address = [ 00 00 00 00 00 00 ];
  			interrupts = <29 2 30 2 34 2>;
 			interrupt-parent = <&mpic>;
+			tbi-handle = <&tbi0>;
 			phy-handle = <&phy2>;
 		};
 
@@ -202,6 +219,7 @@
 			local-mac-address = [ 00 00 00 00 00 00 ];
  			interrupts = <35 2 36 2 40 2>;
 			interrupt-parent = <&mpic>;
+			tbi-handle = <&tbi1>;
 			phy-handle = <&phy3>;
 		};
 
diff --git a/arch/powerpc/boot/dts/mpc8572ds.dts b/arch/powerpc/boot/dts/mpc8572ds.dts
index 5c69b2fafd32..05f67253b49f 100644
--- a/arch/powerpc/boot/dts/mpc8572ds.dts
+++ b/arch/powerpc/boot/dts/mpc8572ds.dts
@@ -225,6 +225,47 @@
 				interrupts = <10 1>;
 				reg = <0x3>;
 			};
+
+			tbi0: tbi-phy@11 {
+				reg = <0x11>;
+				device_type = "tbi-phy";
+			};
+		};
+
+		mdio@25520 {
+			#address-cells = <1>;
+			#size-cells = <0>;
+			compatible = "fsl,gianfar-tbi";
+			reg = <0x25520 0x20>;
+
+			tbi1: tbi-phy@11 {
+				reg = <0x11>;
+				device_type = "tbi-phy";
+			};
+		};
+
+		mdio@26520 {
+			#address-cells = <1>;
+			#size-cells = <0>;
+			compatible = "fsl,gianfar-tbi";
+			reg = <0x26520 0x20>;
+
+			tbi2: tbi-phy@11 {
+				reg = <0x11>;
+				device_type = "tbi-phy";
+			};
+		};
+
+		mdio@27520 {
+			#address-cells = <1>;
+			#size-cells = <0>;
+			compatible = "fsl,gianfar-tbi";
+			reg = <0x27520 0x20>;
+
+			tbi3: tbi-phy@11 {
+				reg = <0x11>;
+				device_type = "tbi-phy";
+			};
 		};
 
 		enet0: ethernet@24000 {
@@ -236,6 +277,7 @@
 			local-mac-address = [ 00 00 00 00 00 00 ];
 			interrupts = <29 2 30 2 34 2>;
 			interrupt-parent = <&mpic>;
+			tbi-handle = <&tbi0>;
 			phy-handle = <&phy0>;
 			phy-connection-type = "rgmii-id";
 		};
@@ -249,6 +291,7 @@
 			local-mac-address = [ 00 00 00 00 00 00 ];
 			interrupts = <35 2 36 2 40 2>;
 			interrupt-parent = <&mpic>;
+			tbi-handle = <&tbi1>;
 			phy-handle = <&phy1>;
 			phy-connection-type = "rgmii-id";
 		};
@@ -262,6 +305,7 @@
 			local-mac-address = [ 00 00 00 00 00 00 ];
 			interrupts = <31 2 32 2 33 2>;
 			interrupt-parent = <&mpic>;
+			tbi-handle = <&tbi2>;
 			phy-handle = <&phy2>;
 			phy-connection-type = "rgmii-id";
 		};
@@ -275,6 +319,7 @@
 			local-mac-address = [ 00 00 00 00 00 00 ];
 			interrupts = <37 2 38 2 39 2>;
 			interrupt-parent = <&mpic>;
+			tbi-handle = <&tbi3>;
 			phy-handle = <&phy3>;
 			phy-connection-type = "rgmii-id";
 		};
diff --git a/arch/powerpc/boot/dts/mpc8641_hpcn.dts b/arch/powerpc/boot/dts/mpc8641_hpcn.dts
index d665e767822a..35d5e248ccd7 100644
--- a/arch/powerpc/boot/dts/mpc8641_hpcn.dts
+++ b/arch/powerpc/boot/dts/mpc8641_hpcn.dts
@@ -205,8 +205,49 @@
 				reg = <3>;
 				device_type = "ethernet-phy";
 			};
+			tbi0: tbi-phy@11 {
+				reg = <0x11>;
+				device_type = "tbi-phy";
+			};
+		};
+
+		mdio@25520 {
+			#address-cells = <1>;
+			#size-cells = <0>;
+			compatible = "fsl,gianfar-tbi";
+			reg = <0x25520 0x20>;
+
+			tbi1: tbi-phy@11 {
+				reg = <0x11>;
+				device_type = "tbi-phy";
+			};
+		};
+
+		mdio@26520 {
+			#address-cells = <1>;
+			#size-cells = <0>;
+			compatible = "fsl,gianfar-tbi";
+			reg = <0x26520 0x20>;
+
+			tbi2: tbi-phy@11 {
+				reg = <0x11>;
+				device_type = "tbi-phy";
+			};
+		};
+
+		mdio@27520 {
+			#address-cells = <1>;
+			#size-cells = <0>;
+			compatible = "fsl,gianfar-tbi";
+			reg = <0x27520 0x20>;
+
+			tbi3: tbi-phy@11 {
+				reg = <0x11>;
+				device_type = "tbi-phy";
+			};
 		};
 
+
 		enet0: ethernet@24000 {
 			cell-index = <0>;
 			device_type = "network";
@@ -216,6 +257,7 @@
 			local-mac-address = [ 00 00 00 00 00 00 ];
 			interrupts = <29 2 30  2 34 2>;
 			interrupt-parent = <&mpic>;
+			tbi-handle = <&tbi0>;
 			phy-handle = <&phy0>;
 			phy-connection-type = "rgmii-id";
 		};
@@ -229,6 +271,7 @@
 			local-mac-address = [ 00 00 00 00 00 00 ];
 			interrupts = <35 2 36 2 40 2>;
 			interrupt-parent = <&mpic>;
+			tbi-handle = <&tbi1>;
 			phy-handle = <&phy1>;
 			phy-connection-type = "rgmii-id";
 		};
@@ -242,6 +285,7 @@
 			local-mac-address = [ 00 00 00 00 00 00 ];
 			interrupts = <31 2 32 2 33 2>;
 			interrupt-parent = <&mpic>;
+			tbi-handle = <&tbi2>;
 			phy-handle = <&phy2>;
 			phy-connection-type = "rgmii-id";
 		};
@@ -255,6 +299,7 @@
 			local-mac-address = [ 00 00 00 00 00 00 ];
 			interrupts = <37 2 38 2 39 2>;
 			interrupt-parent = <&mpic>;
+			tbi-handle = <&tbi3>;
 			phy-handle = <&phy3>;
 			phy-connection-type = "rgmii-id";
 		};
diff --git a/arch/powerpc/boot/dts/sbc8349.dts b/arch/powerpc/boot/dts/sbc8349.dts
index 0f941f310e44..8d365a57ebc1 100644
--- a/arch/powerpc/boot/dts/sbc8349.dts
+++ b/arch/powerpc/boot/dts/sbc8349.dts
@@ -177,6 +177,22 @@
 				reg = <0x1a>;
 				device_type = "ethernet-phy";
 			};
+			tbi0: tbi-phy@11 {
+				reg = <0x11>;
+				device_type = "tbi-phy";
+			};
+		};
+
+		mdio@25520 {
+			#address-cells = <1>;
+			#size-cells = <0>;
+			compatible = "fsl,gianfar-tbi";
+			reg = <0x25520 0x20>;
+
+			tbi1: tbi-phy@11 {
+				reg = <0x11>;
+				device_type = "tbi-phy";
+			};
 		};
 
 		enet0: ethernet@24000 {
@@ -188,6 +204,7 @@
 			local-mac-address = [ 00 00 00 00 00 00 ];
 			interrupts = <32 0x8 33 0x8 34 0x8>;
 			interrupt-parent = <&ipic>;
+			tbi-handle = <&tbi0>;
 			phy-handle = <&phy0>;
 			linux,network-index = <0>;
 		};
@@ -201,6 +218,7 @@
 			local-mac-address = [ 00 00 00 00 00 00 ];
 			interrupts = <35 0x8 36 0x8 37 0x8>;
 			interrupt-parent = <&ipic>;
+			tbi-handle = <&tbi1>;
 			phy-handle = <&phy1>;
 			linux,network-index = <1>;
 		};
diff --git a/arch/powerpc/boot/dts/sbc8548.dts b/arch/powerpc/boot/dts/sbc8548.dts
index 333552b4e90d..2baf4a51f224 100644
--- a/arch/powerpc/boot/dts/sbc8548.dts
+++ b/arch/powerpc/boot/dts/sbc8548.dts
@@ -252,6 +252,22 @@
 				reg = <0x1a>;
 				device_type = "ethernet-phy";
 			};
+			tbi0: tbi-phy@11 {
+				reg = <0x11>;
+				device_type = "tbi-phy";
+			};
+		};
+
+		mdio@25520 {
+			#address-cells = <1>;
+			#size-cells = <0>;
+			compatible = "fsl,gianfar-tbi";
+			reg = <0x25520 0x20>;
+
+			tbi1: tbi-phy@11 {
+				reg = <0x11>;
+				device_type = "tbi-phy";
+			};
 		};
 
 		enet0: ethernet@24000 {
@@ -263,6 +279,7 @@
 			local-mac-address = [ 00 00 00 00 00 00 ];
 			interrupts = <0x1d 0x2 0x1e 0x2 0x22 0x2>;
 			interrupt-parent = <&mpic>;
+			tbi-handle = <&tbi0>;
 			phy-handle = <&phy0>;
 		};
 
@@ -275,6 +292,7 @@
 			local-mac-address = [ 00 00 00 00 00 00 ];
 			interrupts = <0x23 0x2 0x24 0x2 0x28 0x2>;
 			interrupt-parent = <&mpic>;
+			tbi-handle = <&tbi1>;
 			phy-handle = <&phy1>;
 		};
 
diff --git a/arch/powerpc/boot/dts/sbc8560.dts b/arch/powerpc/boot/dts/sbc8560.dts
index db3632ef9888..01542f7062ab 100644
--- a/arch/powerpc/boot/dts/sbc8560.dts
+++ b/arch/powerpc/boot/dts/sbc8560.dts
@@ -168,6 +168,22 @@
 				reg = <0x1c>;
 				device_type = "ethernet-phy";
 			};
+			tbi0: tbi-phy@11 {
+				reg = <0x11>;
+				device_type = "tbi-phy";
+			};
+		};
+
+		mdio@25520 {
+			#address-cells = <1>;
+			#size-cells = <0>;
+			compatible = "fsl,gianfar-tbi";
+			reg = <0x25520 0x20>;
+
+			tbi1: tbi-phy@11 {
+				reg = <0x11>;
+				device_type = "tbi-phy";
+			};
 		};
 
 		enet0: ethernet@24000 {
@@ -179,6 +195,7 @@
 			local-mac-address = [ 00 00 00 00 00 00 ];
 			interrupts = <0x1d 0x2 0x1e 0x2 0x22 0x2>;
 			interrupt-parent = <&mpic>;
+			tbi-handle = <&tbi0>;
 			phy-handle = <&phy0>;
 		};
 
@@ -191,6 +208,7 @@
 			local-mac-address = [ 00 00 00 00 00 00 ];
 			interrupts = <0x23 0x2 0x24 0x2 0x28 0x2>;
 			interrupt-parent = <&mpic>;
+			tbi-handle = <&tbi1>;
 			phy-handle = <&phy1>;
 		};
 
diff --git a/arch/powerpc/boot/dts/sbc8641d.dts b/arch/powerpc/boot/dts/sbc8641d.dts
index 9652456158fb..36db981548e4 100644
--- a/arch/powerpc/boot/dts/sbc8641d.dts
+++ b/arch/powerpc/boot/dts/sbc8641d.dts
@@ -222,6 +222,46 @@
 				reg = <2>;
 				device_type = "ethernet-phy";
 			};
+			tbi0: tbi-phy@11 {
+				reg = <0x11>;
+				device_type = "tbi-phy";
+			};
+		};
+
+		mdio@25520 {
+			#address-cells = <1>;
+			#size-cells = <0>;
+			compatible = "fsl,gianfar-tbi";
+			reg = <0x25520 0x20>;
+
+			tbi1: tbi-phy@11 {
+				reg = <0x11>;
+				device_type = "tbi-phy";
+			};
+		};
+
+		mdio@26520 {
+			#address-cells = <1>;
+			#size-cells = <0>;
+			compatible = "fsl,gianfar-tbi";
+			reg = <0x26520 0x20>;
+
+			tbi2: tbi-phy@11 {
+				reg = <0x11>;
+				device_type = "tbi-phy";
+			};
+		};
+
+		mdio@27520 {
+			#address-cells = <1>;
+			#size-cells = <0>;
+			compatible = "fsl,gianfar-tbi";
+			reg = <0x27520 0x20>;
+
+			tbi3: tbi-phy@11 {
+				reg = <0x11>;
+				device_type = "tbi-phy";
+			};
 		};
 
 		enet0: ethernet@24000 {
@@ -233,6 +273,7 @@
 			local-mac-address = [ 00 00 00 00 00 00 ];
 			interrupts = <29 2 30  2 34 2>;
 			interrupt-parent = <&mpic>;
+			tbi-handle = <&tbi0>;
 			phy-handle = <&phy0>;
 			phy-connection-type = "rgmii-id";
 		};
@@ -246,6 +287,7 @@
 			local-mac-address = [ 00 00 00 00 00 00 ];
 			interrupts = <35 2 36 2 40 2>;
 			interrupt-parent = <&mpic>;
+			tbi-handle = <&tbi1>;
 			phy-handle = <&phy1>;
 			phy-connection-type = "rgmii-id";
 		};
@@ -259,6 +301,7 @@
 			local-mac-address = [ 00 00 00 00 00 00 ];
 			interrupts = <31 2 32 2 33 2>;
 			interrupt-parent = <&mpic>;
+			tbi-handle = <&tbi2>;
 			phy-handle = <&phy2>;
 			phy-connection-type = "rgmii-id";
 		};
@@ -272,6 +315,7 @@
 			local-mac-address = [ 00 00 00 00 00 00 ];
 			interrupts = <37 2 38 2 39 2>;
 			interrupt-parent = <&mpic>;
+			tbi-handle = <&tbi3>;
 			phy-handle = <&phy3>;
 			phy-connection-type = "rgmii-id";
 		};
diff --git a/arch/powerpc/boot/dts/stx_gp3_8560.dts b/arch/powerpc/boot/dts/stx_gp3_8560.dts
index fcd1db6ca0a8..fff33fe6efc6 100644
--- a/arch/powerpc/boot/dts/stx_gp3_8560.dts
+++ b/arch/powerpc/boot/dts/stx_gp3_8560.dts
@@ -142,6 +142,22 @@
 				reg = <4>;
 				device_type = "ethernet-phy";
 			};
+			tbi0: tbi-phy@11 {
+				reg = <0x11>;
+				device_type = "tbi-phy";
+			};
+		};
+
+		mdio@25520 {
+			#address-cells = <1>;
+			#size-cells = <0>;
+			compatible = "fsl,gianfar-tbi";
+			reg = <0x25520 0x20>;
+
+			tbi1: tbi-phy@11 {
+				reg = <0x11>;
+				device_type = "tbi-phy";
+			};
 		};
 
 		enet0: ethernet@24000 {
@@ -153,6 +169,7 @@
 			local-mac-address = [ 00 00 00 00 00 00 ];
 			interrupts = <29 2 30 2 34 2>;
 			interrupt-parent = <&mpic>;
+			tbi-handle = <&tbi0>;
 			phy-handle = <&phy2>;
 		};
 
@@ -165,6 +182,7 @@
 			local-mac-address = [ 00 00 00 00 00 00 ];
 			interrupts = <35 2 36 2 40 2>;
 			interrupt-parent = <&mpic>;
+			tbi-handle = <&tbi1>;
 			phy-handle = <&phy4>;
 		};
 
diff --git a/arch/powerpc/boot/dts/tqm8540.dts b/arch/powerpc/boot/dts/tqm8540.dts
index e1d260b9085e..a693f01c21aa 100644
--- a/arch/powerpc/boot/dts/tqm8540.dts
+++ b/arch/powerpc/boot/dts/tqm8540.dts
@@ -155,6 +155,34 @@
 				reg = <3>;
 				device_type = "ethernet-phy";
 			};
+			tbi0: tbi-phy@11 {
+				reg = <0x11>;
+				device_type = "tbi-phy";
+			};
+		};
+
+		mdio@25520 {
+			#address-cells = <1>;
+			#size-cells = <0>;
+			compatible = "fsl,gianfar-tbi";
+			reg = <0x25520 0x20>;
+
+			tbi1: tbi-phy@11 {
+				reg = <0x11>;
+				device_type = "tbi-phy";
+			};
+		};
+
+		mdio@26520 {
+			#address-cells = <1>;
+			#size-cells = <0>;
+			compatible = "fsl,gianfar-tbi";
+			reg = <0x26520 0x20>;
+
+			tbi2: tbi-phy@11 {
+				reg = <0x11>;
+				device_type = "tbi-phy";
+			};
 		};
 
 		enet0: ethernet@24000 {
diff --git a/arch/powerpc/boot/dts/tqm8541.dts b/arch/powerpc/boot/dts/tqm8541.dts
index d76441ec5dc7..9e3f5f0dde20 100644
--- a/arch/powerpc/boot/dts/tqm8541.dts
+++ b/arch/powerpc/boot/dts/tqm8541.dts
@@ -154,6 +154,22 @@
 				reg = <3>;
 				device_type = "ethernet-phy";
 			};
+			tbi0: tbi-phy@11 {
+				reg = <0x11>;
+				device_type = "tbi-phy";
+			};
+		};
+
+		mdio@25520 {
+			#address-cells = <1>;
+			#size-cells = <0>;
+			compatible = "fsl,gianfar-tbi";
+			reg = <0x25520 0x20>;
+
+			tbi1: tbi-phy@11 {
+				reg = <0x11>;
+				device_type = "tbi-phy";
+			};
 		};
 
 		enet0: ethernet@24000 {
@@ -165,6 +181,7 @@
 			local-mac-address = [ 00 00 00 00 00 00 ];
 			interrupts = <29 2 30 2 34 2>;
 			interrupt-parent = <&mpic>;
+			tbi-handle = <&tbi0>;
 			phy-handle = <&phy2>;
 		};
 
@@ -177,6 +194,7 @@
 			local-mac-address = [ 00 00 00 00 00 00 ];
 			interrupts = <35 2 36 2 40 2>;
 			interrupt-parent = <&mpic>;
+			tbi-handle = <&tbi1>;
 			phy-handle = <&phy1>;
 		};
 
diff --git a/arch/powerpc/boot/dts/tqm8548-bigflash.dts b/arch/powerpc/boot/dts/tqm8548-bigflash.dts
index 4199e89b4e50..15086eb65c50 100644
--- a/arch/powerpc/boot/dts/tqm8548-bigflash.dts
+++ b/arch/powerpc/boot/dts/tqm8548-bigflash.dts
@@ -179,6 +179,46 @@
 				reg = <5>;
 				device_type = "ethernet-phy";
 			};
+			tbi0: tbi-phy@11 {
+				reg = <0x11>;
+				device_type = "tbi-phy";
+			};
+		};
+
+		mdio@25520 {
+			#address-cells = <1>;
+			#size-cells = <0>;
+			compatible = "fsl,gianfar-tbi";
+			reg = <0x25520 0x20>;
+
+			tbi1: tbi-phy@11 {
+				reg = <0x11>;
+				device_type = "tbi-phy";
+			};
+		};
+
+		mdio@26520 {
+			#address-cells = <1>;
+			#size-cells = <0>;
+			compatible = "fsl,gianfar-tbi";
+			reg = <0x26520 0x20>;
+
+			tbi2: tbi-phy@11 {
+				reg = <0x11>;
+				device_type = "tbi-phy";
+			};
+		};
+
+		mdio@27520 {
+			#address-cells = <1>;
+			#size-cells = <0>;
+			compatible = "fsl,gianfar-tbi";
+			reg = <0x27520 0x20>;
+
+			tbi3: tbi-phy@11 {
+				reg = <0x11>;
+				device_type = "tbi-phy";
+			};
 		};
 
 		enet0: ethernet@24000 {
@@ -190,6 +230,7 @@
 			local-mac-address = [ 00 00 00 00 00 00 ];
 			interrupts = <29 2 30 2 34 2>;
 			interrupt-parent = <&mpic>;
+			tbi-handle = <&tbi0>;
 			phy-handle = <&phy2>;
 		};
 
@@ -202,6 +243,7 @@
 			local-mac-address = [ 00 00 00 00 00 00 ];
 			interrupts = <35 2 36 2 40 2>;
 			interrupt-parent = <&mpic>;
+			tbi-handle = <&tbi1>;
 			phy-handle = <&phy1>;
 		};
 
@@ -214,6 +256,7 @@
 			local-mac-address = [ 00 00 00 00 00 00 ];
 			interrupts = <31 2 32 2 33 2>;
 			interrupt-parent = <&mpic>;
+			tbi-handle = <&tbi2>;
 			phy-handle = <&phy3>;
 		};
 
@@ -226,6 +269,7 @@
 			local-mac-address = [ 00 00 00 00 00 00 ];
 			interrupts = <37 2 38 2 39 2>;
 			interrupt-parent = <&mpic>;
+			tbi-handle = <&tbi3>;
 			phy-handle = <&phy4>;
 		};
 
diff --git a/arch/powerpc/boot/dts/tqm8548.dts b/arch/powerpc/boot/dts/tqm8548.dts
index 58ee4185454b..b7b65f5e79b6 100644
--- a/arch/powerpc/boot/dts/tqm8548.dts
+++ b/arch/powerpc/boot/dts/tqm8548.dts
@@ -179,6 +179,46 @@
 				reg = <5>;
 				device_type = "ethernet-phy";
 			};
+			tbi0: tbi-phy@11 {
+				reg = <0x11>;
+				device_type = "tbi-phy";
+			};
+		};
+
+		mdio@25520 {
+			#address-cells = <1>;
+			#size-cells = <0>;
+			compatible = "fsl,gianfar-tbi";
+			reg = <0x25520 0x20>;
+
+			tbi1: tbi-phy@11 {
+				reg = <0x11>;
+				device_type = "tbi-phy";
+			};
+		};
+
+		mdio@26520 {
+			#address-cells = <1>;
+			#size-cells = <0>;
+			compatible = "fsl,gianfar-tbi";
+			reg = <0x26520 0x20>;
+
+			tbi2: tbi-phy@11 {
+				reg = <0x11>;
+				device_type = "tbi-phy";
+			};
+		};
+
+		mdio@27520 {
+			#address-cells = <1>;
+			#size-cells = <0>;
+			compatible = "fsl,gianfar-tbi";
+			reg = <0x27520 0x20>;
+
+			tbi3: tbi-phy@11 {
+				reg = <0x11>;
+				device_type = "tbi-phy";
+			};
 		};
 
 		enet0: ethernet@24000 {
@@ -190,6 +230,7 @@
 			local-mac-address = [ 00 00 00 00 00 00 ];
 			interrupts = <29 2 30 2 34 2>;
 			interrupt-parent = <&mpic>;
+			tbi-handle = <&tbi0>;
 			phy-handle = <&phy2>;
 		};
 
@@ -202,6 +243,7 @@
 			local-mac-address = [ 00 00 00 00 00 00 ];
 			interrupts = <35 2 36 2 40 2>;
 			interrupt-parent = <&mpic>;
+			tbi-handle = <&tbi1>;
 			phy-handle = <&phy1>;
 		};
 
@@ -214,6 +256,7 @@
 			local-mac-address = [ 00 00 00 00 00 00 ];
 			interrupts = <31 2 32 2 33 2>;
 			interrupt-parent = <&mpic>;
+			tbi-handle = <&tbi2>;
 			phy-handle = <&phy3>;
 		};
 
@@ -226,6 +269,7 @@
 			local-mac-address = [ 00 00 00 00 00 00 ];
 			interrupts = <37 2 38 2 39 2>;
 			interrupt-parent = <&mpic>;
+			tbi-handle = <&tbi3>;
 			phy-handle = <&phy4>;
 		};
 
diff --git a/arch/powerpc/boot/dts/tqm8555.dts b/arch/powerpc/boot/dts/tqm8555.dts
index 6f7ea59c4846..cf92b4e7945e 100644
--- a/arch/powerpc/boot/dts/tqm8555.dts
+++ b/arch/powerpc/boot/dts/tqm8555.dts
@@ -154,6 +154,22 @@
 				reg = <3>;
 				device_type = "ethernet-phy";
 			};
+			tbi0: tbi-phy@11 {
+				reg = <0x11>;
+				device_type = "tbi-phy";
+			};
+		};
+
+		mdio@25520 {
+			#address-cells = <1>;
+			#size-cells = <0>;
+			compatible = "fsl,gianfar-tbi";
+			reg = <0x25520 0x20>;
+
+			tbi1: tbi-phy@11 {
+				reg = <0x11>;
+				device_type = "tbi-phy";
+			};
 		};
 
 		enet0: ethernet@24000 {
@@ -165,6 +181,7 @@
 			local-mac-address = [ 00 00 00 00 00 00 ];
 			interrupts = <29 2 30 2 34 2>;
 			interrupt-parent = <&mpic>;
+			tbi-handle = <&tbi0>;
 			phy-handle = <&phy2>;
 		};
 
@@ -177,6 +194,7 @@
 			local-mac-address = [ 00 00 00 00 00 00 ];
 			interrupts = <35 2 36 2 40 2>;
 			interrupt-parent = <&mpic>;
+			tbi-handle = <&tbi1>;
 			phy-handle = <&phy1>;
 		};
 
diff --git a/arch/powerpc/boot/dts/tqm8560.dts b/arch/powerpc/boot/dts/tqm8560.dts
index 3fe35208907b..9e1ab2d2f669 100644
--- a/arch/powerpc/boot/dts/tqm8560.dts
+++ b/arch/powerpc/boot/dts/tqm8560.dts
@@ -156,6 +156,22 @@
 				reg = <3>;
 				device_type = "ethernet-phy";
 			};
+			tbi0: tbi-phy@11 {
+				reg = <0x11>;
+				device_type = "tbi-phy";
+			};
+		};
+
+		mdio@25520 {
+			#address-cells = <1>;
+			#size-cells = <0>;
+			compatible = "fsl,gianfar-tbi";
+			reg = <0x25520 0x20>;
+
+			tbi1: tbi-phy@11 {
+				reg = <0x11>;
+				device_type = "tbi-phy";
+			};
 		};
 
 		enet0: ethernet@24000 {
@@ -167,6 +183,7 @@
 			local-mac-address = [ 00 00 00 00 00 00 ];
 			interrupts = <29 2 30 2 34 2>;
 			interrupt-parent = <&mpic>;
+			tbi-handle = <&tbi0>;
 			phy-handle = <&phy2>;
 		};
 
@@ -179,6 +196,7 @@
 			local-mac-address = [ 00 00 00 00 00 00 ];
 			interrupts = <35 2 36 2 40 2>;
 			interrupt-parent = <&mpic>;
+			tbi-handle = <&tbi1>;
 			phy-handle = <&phy1>;
 		};
 
diff --git a/arch/powerpc/sysdev/fsl_soc.c b/arch/powerpc/sysdev/fsl_soc.c
index 26ecb96f9731..115cb16351fd 100644
--- a/arch/powerpc/sysdev/fsl_soc.c
+++ b/arch/powerpc/sysdev/fsl_soc.c
@@ -207,236 +207,51 @@ static int __init of_add_fixed_phys(void)
 arch_initcall(of_add_fixed_phys);
 #endif /* CONFIG_FIXED_PHY */
 
-static int gfar_mdio_of_init_one(struct device_node *np)
-{
-	int k;
-	struct device_node *child = NULL;
-	struct gianfar_mdio_data mdio_data;
-	struct platform_device *mdio_dev;
-	struct resource res;
-	int ret;
-
-	memset(&res, 0, sizeof(res));
-	memset(&mdio_data, 0, sizeof(mdio_data));
-
-	ret = of_address_to_resource(np, 0, &res);
-	if (ret)
-		return ret;
-
-	/* The gianfar device will try to use the same ID created below to find
-	 * this bus, to coordinate register access (since they share).  */
-	mdio_dev = platform_device_register_simple("fsl-gianfar_mdio",
-			res.start&0xfffff, &res, 1);
-	if (IS_ERR(mdio_dev))
-		return PTR_ERR(mdio_dev);
-
-	for (k = 0; k < 32; k++)
-		mdio_data.irq[k] = PHY_POLL;
-
-	while ((child = of_get_next_child(np, child)) != NULL) {
-		int irq = irq_of_parse_and_map(child, 0);
-		if (irq != NO_IRQ) {
-			const u32 *id = of_get_property(child, "reg", NULL);
-			mdio_data.irq[*id] = irq;
-		}
-	}
-
-	ret = platform_device_add_data(mdio_dev, &mdio_data,
-				sizeof(struct gianfar_mdio_data));
-	if (ret)
-		platform_device_unregister(mdio_dev);
-
-	return ret;
-}
-
-static int __init gfar_mdio_of_init(void)
-{
-	struct device_node *np = NULL;
-
-	for_each_compatible_node(np, NULL, "fsl,gianfar-mdio")
-		gfar_mdio_of_init_one(np);
-
-	/* try the deprecated version */
-	for_each_compatible_node(np, "mdio", "gianfar");
-		gfar_mdio_of_init_one(np);
-
-	return 0;
-}
-
-arch_initcall(gfar_mdio_of_init);
-
-static const char *gfar_tx_intr = "tx";
-static const char *gfar_rx_intr = "rx";
-static const char *gfar_err_intr = "error";
-
-static int __init gfar_of_init(void)
+#ifdef CONFIG_PPC_83xx
+static int __init mpc83xx_wdt_init(void)
 {
+	struct resource r;
 	struct device_node *np;
-	unsigned int i;
-	struct platform_device *gfar_dev;
-	struct resource res;
+	struct platform_device *dev;
+	u32 freq = fsl_get_sys_freq();
 	int ret;
 
-	for (np = NULL, i = 0;
-	     (np = of_find_compatible_node(np, "network", "gianfar")) != NULL;
-	     i++) {
-		struct resource r[4];
-		struct device_node *phy, *mdio;
-		struct gianfar_platform_data gfar_data;
-		const unsigned int *id;
-		const char *model;
-		const char *ctype;
-		const void *mac_addr;
-		const phandle *ph;
-		int n_res = 2;
-
-		if (!of_device_is_available(np))
-			continue;
-
-		memset(r, 0, sizeof(r));
-		memset(&gfar_data, 0, sizeof(gfar_data));
-
-		ret = of_address_to_resource(np, 0, &r[0]);
-		if (ret)
-			goto err;
-
-		of_irq_to_resource(np, 0, &r[1]);
-
-		model = of_get_property(np, "model", NULL);
-
-		/* If we aren't the FEC we have multiple interrupts */
-		if (model && strcasecmp(model, "FEC")) {
-			r[1].name = gfar_tx_intr;
-
-			r[2].name = gfar_rx_intr;
-			of_irq_to_resource(np, 1, &r[2]);
+	np = of_find_compatible_node(NULL, "watchdog", "mpc83xx_wdt");
 
-			r[3].name = gfar_err_intr;
-			of_irq_to_resource(np, 2, &r[3]);
-
-			n_res += 2;
-		}
-
-		gfar_dev =
-		    platform_device_register_simple("fsl-gianfar", i, &r[0],
-						    n_res);
-
-		if (IS_ERR(gfar_dev)) {
-			ret = PTR_ERR(gfar_dev);
-			goto err;
-		}
-
-		mac_addr = of_get_mac_address(np);
-		if (mac_addr)
-			memcpy(gfar_data.mac_addr, mac_addr, 6);
-
-		if (model && !strcasecmp(model, "TSEC"))
-			gfar_data.device_flags =
-			    FSL_GIANFAR_DEV_HAS_GIGABIT |
-			    FSL_GIANFAR_DEV_HAS_COALESCE |
-			    FSL_GIANFAR_DEV_HAS_RMON |
-			    FSL_GIANFAR_DEV_HAS_MULTI_INTR;
-		if (model && !strcasecmp(model, "eTSEC"))
-			gfar_data.device_flags =
-			    FSL_GIANFAR_DEV_HAS_GIGABIT |
-			    FSL_GIANFAR_DEV_HAS_COALESCE |
-			    FSL_GIANFAR_DEV_HAS_RMON |
-			    FSL_GIANFAR_DEV_HAS_MULTI_INTR |
-			    FSL_GIANFAR_DEV_HAS_CSUM |
-			    FSL_GIANFAR_DEV_HAS_VLAN |
-			    FSL_GIANFAR_DEV_HAS_EXTENDED_HASH;
-
-		ctype = of_get_property(np, "phy-connection-type", NULL);
-
-		/* We only care about rgmii-id.  The rest are autodetected */
-		if (ctype && !strcmp(ctype, "rgmii-id"))
-			gfar_data.interface = PHY_INTERFACE_MODE_RGMII_ID;
-		else
-			gfar_data.interface = PHY_INTERFACE_MODE_MII;
-
-		if (of_get_property(np, "fsl,magic-packet", NULL))
-			gfar_data.device_flags |= FSL_GIANFAR_DEV_HAS_MAGIC_PACKET;
-
-		ph = of_get_property(np, "phy-handle", NULL);
-		if (ph == NULL) {
-			u32 *fixed_link;
-
-			fixed_link = (u32 *)of_get_property(np, "fixed-link",
-							   NULL);
-			if (!fixed_link) {
-				ret = -ENODEV;
-				goto unreg;
-			}
-
-			snprintf(gfar_data.bus_id, MII_BUS_ID_SIZE, "0");
-			gfar_data.phy_id = fixed_link[0];
-		} else {
-			phy = of_find_node_by_phandle(*ph);
-
-			if (phy == NULL) {
-				ret = -ENODEV;
-				goto unreg;
-			}
-
-			mdio = of_get_parent(phy);
-
-			id = of_get_property(phy, "reg", NULL);
-			ret = of_address_to_resource(mdio, 0, &res);
-			if (ret) {
-				of_node_put(phy);
-				of_node_put(mdio);
-				goto unreg;
-			}
-
-			gfar_data.phy_id = *id;
-			snprintf(gfar_data.bus_id, MII_BUS_ID_SIZE, "%llx",
-				 (unsigned long long)res.start&0xfffff);
+	if (!np) {
+		ret = -ENODEV;
+		goto nodev;
+	}
 
-			of_node_put(phy);
-			of_node_put(mdio);
-		}
+	memset(&r, 0, sizeof(r));
 
-		/* Get MDIO bus controlled by this eTSEC, if any.  Normally only
-		 * eTSEC 1 will control an MDIO bus, not necessarily the same
-		 * bus that its PHY is on ('mdio' above), so we can't just use
-		 * that.  What we do is look for a gianfar mdio device that has
-		 * overlapping registers with this device.  That's really the
-		 * whole point, to find the device sharing our registers to
-		 * coordinate access with it.
-		 */
-		for_each_compatible_node(mdio, NULL, "fsl,gianfar-mdio") {
-			if (of_address_to_resource(mdio, 0, &res))
-				continue;
-
-			if (res.start >= r[0].start && res.end <= r[0].end) {
-				/* Get the ID the mdio bus platform device was
-				 * registered with.  gfar_data.bus_id is
-				 * different because it's for finding a PHY,
-				 * while this is for finding a MII bus.
-				 */
-				gfar_data.mdio_bus = res.start&0xfffff;
-				of_node_put(mdio);
-				break;
-			}
-		}
+	ret = of_address_to_resource(np, 0, &r);
+	if (ret)
+		goto err;
 
-		ret =
-		    platform_device_add_data(gfar_dev, &gfar_data,
-					     sizeof(struct
-						    gianfar_platform_data));
-		if (ret)
-			goto unreg;
+	dev = platform_device_register_simple("mpc83xx_wdt", 0, &r, 1);
+	if (IS_ERR(dev)) {
+		ret = PTR_ERR(dev);
+		goto err;
 	}
 
+	ret = platform_device_add_data(dev, &freq, sizeof(freq));
+	if (ret)
+		goto unreg;
+
+	of_node_put(np);
 	return 0;
 
 unreg:
-	platform_device_unregister(gfar_dev);
+	platform_device_unregister(dev);
 err:
+	of_node_put(np);
+nodev:
 	return ret;
 }
 
-arch_initcall(gfar_of_init);
+arch_initcall(mpc83xx_wdt_init);
+#endif
 
 static enum fsl_usb2_phy_modes determine_usb_phy(const char *phy_type)
 {
diff --git a/drivers/net/gianfar.c b/drivers/net/gianfar.c
index 55e319fa7fe6..7398704c4b55 100644
--- a/drivers/net/gianfar.c
+++ b/drivers/net/gianfar.c
@@ -25,11 +25,8 @@
  *
  *  Theory of operation
  *
- *  The driver is initialized through platform_device.  Structures which
- *  define the configuration needed by the board are defined in a
- *  board structure in arch/ppc/platforms (though I do not
- *  discount the possibility that other architectures could one
- *  day be supported.
+ *  The driver is initialized through of_device. Configuration information
+ *  is therefore conveyed through an OF-style device tree.
  *
  *  The Gianfar Ethernet Controller uses a ring of buffer
  *  descriptors.  The beginning is indicated by a register
@@ -78,7 +75,7 @@
 #include <linux/if_vlan.h>
 #include <linux/spinlock.h>
 #include <linux/mm.h>
-#include <linux/platform_device.h>
+#include <linux/of_platform.h>
 #include <linux/ip.h>
 #include <linux/tcp.h>
 #include <linux/udp.h>
@@ -92,6 +89,8 @@
 #include <linux/crc32.h>
 #include <linux/mii.h>
 #include <linux/phy.h>
+#include <linux/phy_fixed.h>
+#include <linux/of.h>
 
 #include "gianfar.h"
 #include "gianfar_mii.h"
@@ -119,8 +118,9 @@ static irqreturn_t gfar_interrupt(int irq, void *dev_id);
 static void adjust_link(struct net_device *dev);
 static void init_registers(struct net_device *dev);
 static int init_phy(struct net_device *dev);
-static int gfar_probe(struct platform_device *pdev);
-static int gfar_remove(struct platform_device *pdev);
+static int gfar_probe(struct of_device *ofdev,
+		const struct of_device_id *match);
+static int gfar_remove(struct of_device *ofdev);
 static void free_skb_resources(struct gfar_private *priv);
 static void gfar_set_multi(struct net_device *dev);
 static void gfar_set_hash_for_addr(struct net_device *dev, u8 *addr);
@@ -152,25 +152,158 @@ static inline int gfar_uses_fcb(struct gfar_private *priv)
 	return (priv->vlan_enable || priv->rx_csum_enable);
 }
 
+static int gfar_of_init(struct net_device *dev)
+{
+	struct device_node *phy, *mdio;
+	const unsigned int *id;
+	const char *model;
+	const char *ctype;
+	const void *mac_addr;
+	const phandle *ph;
+	u64 addr, size;
+	int err = 0;
+	struct gfar_private *priv = netdev_priv(dev);
+	struct device_node *np = priv->node;
+	char bus_name[MII_BUS_ID_SIZE];
+
+	if (!np || !of_device_is_available(np))
+		return -ENODEV;
+
+	/* get a pointer to the register memory */
+	addr = of_translate_address(np, of_get_address(np, 0, &size, NULL));
+	priv->regs = ioremap(addr, size);
+
+	if (priv->regs == NULL)
+		return -ENOMEM;
+
+	priv->interruptTransmit = irq_of_parse_and_map(np, 0);
+
+	model = of_get_property(np, "model", NULL);
+
+	/* If we aren't the FEC we have multiple interrupts */
+	if (model && strcasecmp(model, "FEC")) {
+		priv->interruptReceive = irq_of_parse_and_map(np, 1);
+
+		priv->interruptError = irq_of_parse_and_map(np, 2);
+
+		if (priv->interruptTransmit < 0 ||
+				priv->interruptReceive < 0 ||
+				priv->interruptError < 0) {
+			err = -EINVAL;
+			goto err_out;
+		}
+	}
+
+	mac_addr = of_get_mac_address(np);
+	if (mac_addr)
+		memcpy(dev->dev_addr, mac_addr, MAC_ADDR_LEN);
+
+	if (model && !strcasecmp(model, "TSEC"))
+		priv->device_flags =
+			FSL_GIANFAR_DEV_HAS_GIGABIT |
+			FSL_GIANFAR_DEV_HAS_COALESCE |
+			FSL_GIANFAR_DEV_HAS_RMON |
+			FSL_GIANFAR_DEV_HAS_MULTI_INTR;
+	if (model && !strcasecmp(model, "eTSEC"))
+		priv->device_flags =
+			FSL_GIANFAR_DEV_HAS_GIGABIT |
+			FSL_GIANFAR_DEV_HAS_COALESCE |
+			FSL_GIANFAR_DEV_HAS_RMON |
+			FSL_GIANFAR_DEV_HAS_MULTI_INTR |
+			FSL_GIANFAR_DEV_HAS_CSUM |
+			FSL_GIANFAR_DEV_HAS_VLAN |
+			FSL_GIANFAR_DEV_HAS_MAGIC_PACKET |
+			FSL_GIANFAR_DEV_HAS_EXTENDED_HASH;
+
+	ctype = of_get_property(np, "phy-connection-type", NULL);
+
+	/* We only care about rgmii-id.  The rest are autodetected */
+	if (ctype && !strcmp(ctype, "rgmii-id"))
+		priv->interface = PHY_INTERFACE_MODE_RGMII_ID;
+	else
+		priv->interface = PHY_INTERFACE_MODE_MII;
+
+	if (of_get_property(np, "fsl,magic-packet", NULL))
+		priv->device_flags |= FSL_GIANFAR_DEV_HAS_MAGIC_PACKET;
+
+	ph = of_get_property(np, "phy-handle", NULL);
+	if (ph == NULL) {
+		u32 *fixed_link;
+
+		fixed_link = (u32 *)of_get_property(np, "fixed-link", NULL);
+		if (!fixed_link) {
+			err = -ENODEV;
+			goto err_out;
+		}
+
+		snprintf(priv->phy_bus_id, BUS_ID_SIZE, PHY_ID_FMT, "0",
+				fixed_link[0]);
+	} else {
+		phy = of_find_node_by_phandle(*ph);
+
+		if (phy == NULL) {
+			err = -ENODEV;
+			goto err_out;
+		}
+
+		mdio = of_get_parent(phy);
+
+		id = of_get_property(phy, "reg", NULL);
+
+		of_node_put(phy);
+		of_node_put(mdio);
+
+		gfar_mdio_bus_name(bus_name, mdio);
+		snprintf(priv->phy_bus_id, BUS_ID_SIZE, "%s:%02x",
+				bus_name, *id);
+	}
+
+	/* Find the TBI PHY.  If it's not there, we don't support SGMII */
+	ph = of_get_property(np, "tbi-handle", NULL);
+	if (ph) {
+		struct device_node *tbi = of_find_node_by_phandle(*ph);
+		struct of_device *ofdev;
+		struct mii_bus *bus;
+
+		if (!tbi)
+			return 0;
+
+		mdio = of_get_parent(tbi);
+		if (!mdio)
+			return 0;
+
+		ofdev = of_find_device_by_node(mdio);
+
+		of_node_put(mdio);
+
+		id = of_get_property(tbi, "reg", NULL);
+		if (!id)
+			return 0;
+
+		of_node_put(tbi);
+
+		bus = dev_get_drvdata(&ofdev->dev);
+
+		priv->tbiphy = bus->phy_map[*id];
+	}
+
+	return 0;
+
+err_out:
+	iounmap(priv->regs);
+	return err;
+}
+
 /* Set up the ethernet device structure, private data,
  * and anything else we need before we start */
-static int gfar_probe(struct platform_device *pdev)
+static int gfar_probe(struct of_device *ofdev,
+		const struct of_device_id *match)
 {
 	u32 tempval;
 	struct net_device *dev = NULL;
 	struct gfar_private *priv = NULL;
-	struct gianfar_platform_data *einfo;
-	struct resource *r;
-	int err = 0, irq;
-
-	einfo = (struct gianfar_platform_data *) pdev->dev.platform_data;
-
-	if (NULL == einfo) {
-		printk(KERN_ERR "gfar %d: Missing additional data!\n",
-		       pdev->id);
-
-		return -ENODEV;
-	}
+	int err = 0;
+	DECLARE_MAC_BUF(mac);
 
 	/* Create an ethernet device instance */
 	dev = alloc_etherdev(sizeof (*priv));
@@ -180,48 +313,19 @@ static int gfar_probe(struct platform_device *pdev)
 
 	priv = netdev_priv(dev);
 	priv->dev = dev;
+	priv->node = ofdev->node;
 
-	/* Set the info in the priv to the current info */
-	priv->einfo = einfo;
-
-	/* fill out IRQ fields */
-	if (einfo->device_flags & FSL_GIANFAR_DEV_HAS_MULTI_INTR) {
-		irq = platform_get_irq_byname(pdev, "tx");
-		if (irq < 0)
-			goto regs_fail;
-		priv->interruptTransmit = irq;
-
-		irq = platform_get_irq_byname(pdev, "rx");
-		if (irq < 0)
-			goto regs_fail;
-		priv->interruptReceive = irq;
-
-		irq = platform_get_irq_byname(pdev, "error");
-		if (irq < 0)
-			goto regs_fail;
-		priv->interruptError = irq;
-	} else {
-		irq = platform_get_irq(pdev, 0);
-		if (irq < 0)
-			goto regs_fail;
-		priv->interruptTransmit = irq;
-	}
-
-	/* get a pointer to the register memory */
-	r = platform_get_resource(pdev, IORESOURCE_MEM, 0);
-	priv->regs = ioremap(r->start, sizeof (struct gfar));
+	err = gfar_of_init(dev);
 
-	if (NULL == priv->regs) {
-		err = -ENOMEM;
+	if (err)
 		goto regs_fail;
-	}
 
 	spin_lock_init(&priv->txlock);
 	spin_lock_init(&priv->rxlock);
 	spin_lock_init(&priv->bflock);
 	INIT_WORK(&priv->reset_task, gfar_reset_task);
 
-	platform_set_drvdata(pdev, dev);
+	dev_set_drvdata(&ofdev->dev, priv);
 
 	/* Stop the DMA engine now, in case it was running before */
 	/* (The firmware could have used it, and left it running). */
@@ -239,13 +343,10 @@ static int gfar_probe(struct platform_device *pdev)
 	/* Initialize ECNTRL */
 	gfar_write(&priv->regs->ecntrl, ECNTRL_INIT_SETTINGS);
 
-	/* Copy the station address into the dev structure, */
-	memcpy(dev->dev_addr, einfo->mac_addr, MAC_ADDR_LEN);
-
 	/* Set the dev->base_addr to the gfar reg region */
 	dev->base_addr = (unsigned long) (priv->regs);
 
-	SET_NETDEV_DEV(dev, &pdev->dev);
+	SET_NETDEV_DEV(dev, &ofdev->dev);
 
 	/* Fill in the dev structure */
 	dev->open = gfar_enet_open;
@@ -263,7 +364,7 @@ static int gfar_probe(struct platform_device *pdev)
 
 	dev->ethtool_ops = &gfar_ethtool_ops;
 
-	if (priv->einfo->device_flags & FSL_GIANFAR_DEV_HAS_CSUM) {
+	if (priv->device_flags & FSL_GIANFAR_DEV_HAS_CSUM) {
 		priv->rx_csum_enable = 1;
 		dev->features |= NETIF_F_IP_CSUM;
 	} else
@@ -271,7 +372,7 @@ static int gfar_probe(struct platform_device *pdev)
 
 	priv->vlgrp = NULL;
 
-	if (priv->einfo->device_flags & FSL_GIANFAR_DEV_HAS_VLAN) {
+	if (priv->device_flags & FSL_GIANFAR_DEV_HAS_VLAN) {
 		dev->vlan_rx_register = gfar_vlan_rx_register;
 
 		dev->features |= NETIF_F_HW_VLAN_TX | NETIF_F_HW_VLAN_RX;
@@ -279,7 +380,7 @@ static int gfar_probe(struct platform_device *pdev)
 		priv->vlan_enable = 1;
 	}
 
-	if (priv->einfo->device_flags & FSL_GIANFAR_DEV_HAS_EXTENDED_HASH) {
+	if (priv->device_flags & FSL_GIANFAR_DEV_HAS_EXTENDED_HASH) {
 		priv->extended_hash = 1;
 		priv->hash_width = 9;
 
@@ -314,7 +415,7 @@ static int gfar_probe(struct platform_device *pdev)
 		priv->hash_regs[7] = &priv->regs->gaddr7;
 	}
 
-	if (priv->einfo->device_flags & FSL_GIANFAR_DEV_HAS_PADDING)
+	if (priv->device_flags & FSL_GIANFAR_DEV_HAS_PADDING)
 		priv->padding = DEFAULT_PADDING;
 	else
 		priv->padding = 0;
@@ -368,29 +469,28 @@ regs_fail:
 	return err;
 }
 
-static int gfar_remove(struct platform_device *pdev)
+static int gfar_remove(struct of_device *ofdev)
 {
-	struct net_device *dev = platform_get_drvdata(pdev);
-	struct gfar_private *priv = netdev_priv(dev);
+	struct gfar_private *priv = dev_get_drvdata(&ofdev->dev);
 
-	platform_set_drvdata(pdev, NULL);
+	dev_set_drvdata(&ofdev->dev, NULL);
 
 	iounmap(priv->regs);
-	free_netdev(dev);
+	free_netdev(priv->dev);
 
 	return 0;
 }
 
 #ifdef CONFIG_PM
-static int gfar_suspend(struct platform_device *pdev, pm_message_t state)
+static int gfar_suspend(struct of_device *ofdev, pm_message_t state)
 {
-	struct net_device *dev = platform_get_drvdata(pdev);
-	struct gfar_private *priv = netdev_priv(dev);
+	struct gfar_private *priv = dev_get_drvdata(&ofdev->dev);
+	struct net_device *dev = priv->dev;
 	unsigned long flags;
 	u32 tempval;
 
 	int magic_packet = priv->wol_en &&
-		(priv->einfo->device_flags & FSL_GIANFAR_DEV_HAS_MAGIC_PACKET);
+		(priv->device_flags & FSL_GIANFAR_DEV_HAS_MAGIC_PACKET);
 
 	netif_device_detach(dev);
 
@@ -431,14 +531,14 @@ static int gfar_suspend(struct platform_device *pdev, pm_message_t state)
 	return 0;
 }
 
-static int gfar_resume(struct platform_device *pdev)
+static int gfar_resume(struct of_device *ofdev)
 {
-	struct net_device *dev = platform_get_drvdata(pdev);
-	struct gfar_private *priv = netdev_priv(dev);
+	struct gfar_private *priv = dev_get_drvdata(&ofdev->dev);
+	struct net_device *dev = priv->dev;
 	unsigned long flags;
 	u32 tempval;
 	int magic_packet = priv->wol_en &&
-		(priv->einfo->device_flags & FSL_GIANFAR_DEV_HAS_MAGIC_PACKET);
+		(priv->device_flags & FSL_GIANFAR_DEV_HAS_MAGIC_PACKET);
 
 	if (!netif_running(dev)) {
 		netif_device_attach(dev);
@@ -497,7 +597,7 @@ static phy_interface_t gfar_get_interface(struct net_device *dev)
 		if (ecntrl & ECNTRL_REDUCED_MII_MODE)
 			return PHY_INTERFACE_MODE_RMII;
 		else {
-			phy_interface_t interface = priv->einfo->interface;
+			phy_interface_t interface = priv->interface;
 
 			/*
 			 * This isn't autodetected right now, so it must
@@ -510,7 +610,7 @@ static phy_interface_t gfar_get_interface(struct net_device *dev)
 		}
 	}
 
-	if (priv->einfo->device_flags & FSL_GIANFAR_DEV_HAS_GIGABIT)
+	if (priv->device_flags & FSL_GIANFAR_DEV_HAS_GIGABIT)
 		return PHY_INTERFACE_MODE_GMII;
 
 	return PHY_INTERFACE_MODE_MII;
@@ -524,21 +624,18 @@ static int init_phy(struct net_device *dev)
 {
 	struct gfar_private *priv = netdev_priv(dev);
 	uint gigabit_support =
-		priv->einfo->device_flags & FSL_GIANFAR_DEV_HAS_GIGABIT ?
+		priv->device_flags & FSL_GIANFAR_DEV_HAS_GIGABIT ?
 		SUPPORTED_1000baseT_Full : 0;
 	struct phy_device *phydev;
-	char phy_id[BUS_ID_SIZE];
 	phy_interface_t interface;
 
 	priv->oldlink = 0;
 	priv->oldspeed = 0;
 	priv->oldduplex = -1;
 
-	snprintf(phy_id, sizeof(phy_id), PHY_ID_FMT, priv->einfo->bus_id, priv->einfo->phy_id);
-
 	interface = gfar_get_interface(dev);
 
-	phydev = phy_connect(dev, phy_id, &adjust_link, 0, interface);
+	phydev = phy_connect(dev, priv->phy_bus_id, &adjust_link, 0, interface);
 
 	if (interface == PHY_INTERFACE_MODE_SGMII)
 		gfar_configure_serdes(dev);
@@ -569,35 +666,31 @@ static int init_phy(struct net_device *dev)
 static void gfar_configure_serdes(struct net_device *dev)
 {
 	struct gfar_private *priv = netdev_priv(dev);
-	struct gfar_mii __iomem *regs =
-			(void __iomem *)&priv->regs->gfar_mii_regs;
-	int tbipa = gfar_read(&priv->regs->tbipa);
-	struct mii_bus *bus = gfar_get_miibus(priv);
 
-	if (bus)
-		mutex_lock(&bus->mdio_lock);
+	if (!priv->tbiphy) {
+		printk(KERN_WARNING "SGMII mode requires that the device "
+				"tree specify a tbi-handle\n");
+		return;
+	}
 
-	/* If the link is already up, we must already be ok, and don't need to
+	/*
+	 * If the link is already up, we must already be ok, and don't need to
 	 * configure and reset the TBI<->SerDes link.  Maybe U-Boot configured
 	 * everything for us?  Resetting it takes the link down and requires
 	 * several seconds for it to come back.
 	 */
-	if (gfar_local_mdio_read(regs, tbipa, MII_BMSR) & BMSR_LSTATUS)
-		goto done;
+	if (phy_read(priv->tbiphy, MII_BMSR) & BMSR_LSTATUS)
+		return;
 
 	/* Single clk mode, mii mode off(for serdes communication) */
-	gfar_local_mdio_write(regs, tbipa, MII_TBICON, TBICON_CLK_SELECT);
+	phy_write(priv->tbiphy, MII_TBICON, TBICON_CLK_SELECT);
 
-	gfar_local_mdio_write(regs, tbipa, MII_ADVERTISE,
+	phy_write(priv->tbiphy, MII_ADVERTISE,
 			ADVERTISE_1000XFULL | ADVERTISE_1000XPAUSE |
 			ADVERTISE_1000XPSE_ASYM);
 
-	gfar_local_mdio_write(regs, tbipa, MII_BMCR, BMCR_ANENABLE |
+	phy_write(priv->tbiphy, MII_BMCR, BMCR_ANENABLE |
 			BMCR_ANRESTART | BMCR_FULLDPLX | BMCR_SPEED1000);
-
-	done:
-	if (bus)
-		mutex_unlock(&bus->mdio_lock);
 }
 
 static void init_registers(struct net_device *dev)
@@ -630,7 +723,7 @@ static void init_registers(struct net_device *dev)
 	gfar_write(&priv->regs->gaddr7, 0);
 
 	/* Zero out the rmon mib registers if it has them */
-	if (priv->einfo->device_flags & FSL_GIANFAR_DEV_HAS_RMON) {
+	if (priv->device_flags & FSL_GIANFAR_DEV_HAS_RMON) {
 		memset_io(&(priv->regs->rmon), 0, sizeof (struct rmon_mib));
 
 		/* Mask off the CAM interrupts */
@@ -705,7 +798,7 @@ void stop_gfar(struct net_device *dev)
 	spin_unlock_irqrestore(&priv->txlock, flags);
 
 	/* Free the IRQs */
-	if (priv->einfo->device_flags & FSL_GIANFAR_DEV_HAS_MULTI_INTR) {
+	if (priv->device_flags & FSL_GIANFAR_DEV_HAS_MULTI_INTR) {
 		free_irq(priv->interruptError, dev);
 		free_irq(priv->interruptTransmit, dev);
 		free_irq(priv->interruptReceive, dev);
@@ -919,7 +1012,7 @@ int startup_gfar(struct net_device *dev)
 
 	/* If the device has multiple interrupts, register for
 	 * them.  Otherwise, only register for the one */
-	if (priv->einfo->device_flags & FSL_GIANFAR_DEV_HAS_MULTI_INTR) {
+	if (priv->device_flags & FSL_GIANFAR_DEV_HAS_MULTI_INTR) {
 		/* Install our interrupt handlers for Error,
 		 * Transmit, and Receive */
 		if (request_irq(priv->interruptError, gfar_error,
@@ -1751,7 +1844,7 @@ static void gfar_netpoll(struct net_device *dev)
 	struct gfar_private *priv = netdev_priv(dev);
 
 	/* If the device has multiple interrupts, run tx/rx */
-	if (priv->einfo->device_flags & FSL_GIANFAR_DEV_HAS_MULTI_INTR) {
+	if (priv->device_flags & FSL_GIANFAR_DEV_HAS_MULTI_INTR) {
 		disable_irq(priv->interruptTransmit);
 		disable_irq(priv->interruptReceive);
 		disable_irq(priv->interruptError);
@@ -2045,7 +2138,7 @@ static irqreturn_t gfar_error(int irq, void *dev_id)
 	gfar_write(&priv->regs->ievent, events & IEVENT_ERR_MASK);
 
 	/* Magic Packet is not an error. */
-	if ((priv->einfo->device_flags & FSL_GIANFAR_DEV_HAS_MAGIC_PACKET) &&
+	if ((priv->device_flags & FSL_GIANFAR_DEV_HAS_MAGIC_PACKET) &&
 	    (events & IEVENT_MAG))
 		events &= ~IEVENT_MAG;
 
@@ -2111,16 +2204,24 @@ static irqreturn_t gfar_error(int irq, void *dev_id)
 /* work with hotplug and coldplug */
 MODULE_ALIAS("platform:fsl-gianfar");
 
+static struct of_device_id gfar_match[] =
+{
+	{
+		.type = "network",
+		.compatible = "gianfar",
+	},
+	{},
+};
+
 /* Structure for a device driver */
-static struct platform_driver gfar_driver = {
+static struct of_platform_driver gfar_driver = {
+	.name = "fsl-gianfar",
+	.match_table = gfar_match,
+
 	.probe = gfar_probe,
 	.remove = gfar_remove,
 	.suspend = gfar_suspend,
 	.resume = gfar_resume,
-	.driver	= {
-		.name = "fsl-gianfar",
-		.owner = THIS_MODULE,
-	},
 };
 
 static int __init gfar_init(void)
@@ -2130,7 +2231,7 @@ static int __init gfar_init(void)
 	if (err)
 		return err;
 
-	err = platform_driver_register(&gfar_driver);
+	err = of_register_platform_driver(&gfar_driver);
 
 	if (err)
 		gfar_mdio_exit();
@@ -2140,7 +2241,7 @@ static int __init gfar_init(void)
 
 static void __exit gfar_exit(void)
 {
-	platform_driver_unregister(&gfar_driver);
+	of_unregister_platform_driver(&gfar_driver);
 	gfar_mdio_exit();
 }
 
diff --git a/drivers/net/gianfar.h b/drivers/net/gianfar.h
index f46e9b63af13..ca7f0a6a68c5 100644
--- a/drivers/net/gianfar.h
+++ b/drivers/net/gianfar.h
@@ -657,6 +657,19 @@ struct gfar {
 
 };
 
+/* Flags related to gianfar device features */
+#define FSL_GIANFAR_DEV_HAS_GIGABIT		0x00000001
+#define FSL_GIANFAR_DEV_HAS_COALESCE		0x00000002
+#define FSL_GIANFAR_DEV_HAS_RMON		0x00000004
+#define FSL_GIANFAR_DEV_HAS_MULTI_INTR		0x00000008
+#define FSL_GIANFAR_DEV_HAS_CSUM		0x00000010
+#define FSL_GIANFAR_DEV_HAS_VLAN		0x00000020
+#define FSL_GIANFAR_DEV_HAS_EXTENDED_HASH	0x00000040
+#define FSL_GIANFAR_DEV_HAS_PADDING		0x00000080
+#define FSL_GIANFAR_DEV_HAS_MAGIC_PACKET	0x00000100
+#define FSL_GIANFAR_DEV_HAS_BD_STASHING		0x00000200
+#define FSL_GIANFAR_DEV_HAS_BUF_STASHING	0x00000400
+
 /* Struct stolen almost completely (and shamelessly) from the FCC enet source
  * (Ok, that's not so true anymore, but there is a family resemblence)
  * The GFAR buffer descriptors track the ring buffers.  The rx_bd_base
@@ -694,6 +707,7 @@ struct gfar_private {
 	/* RX Locked fields */
 	spinlock_t rxlock;
 
+	struct device_node *node;
 	struct net_device *dev;
 	struct napi_struct napi;
 
@@ -733,6 +747,9 @@ struct gfar_private {
 	/* Bitfield update lock */
 	spinlock_t bflock;
 
+	phy_interface_t interface;
+	char	phy_bus_id[BUS_ID_SIZE];
+	u32 device_flags;
 	unsigned char vlan_enable:1,
 		rx_csum_enable:1,
 		extended_hash:1,
@@ -744,11 +761,9 @@ struct gfar_private {
 	unsigned int interruptReceive;
 	unsigned int interruptError;
 
-	/* info structure initialized by platform code */
-	struct gianfar_platform_data *einfo;
-
 	/* PHY stuff */
 	struct phy_device *phydev;
+	struct phy_device *tbiphy;
 	struct mii_bus *mii_bus;
 	int oldspeed;
 	int oldduplex;
diff --git a/drivers/net/gianfar_ethtool.c b/drivers/net/gianfar_ethtool.c
index fb7d3ccc0fdc..53944b120a3d 100644
--- a/drivers/net/gianfar_ethtool.c
+++ b/drivers/net/gianfar_ethtool.c
@@ -121,7 +121,7 @@ static void gfar_gstrings(struct net_device *dev, u32 stringset, u8 * buf)
 {
 	struct gfar_private *priv = netdev_priv(dev);
 
-	if (priv->einfo->device_flags & FSL_GIANFAR_DEV_HAS_RMON)
+	if (priv->device_flags & FSL_GIANFAR_DEV_HAS_RMON)
 		memcpy(buf, stat_gstrings, GFAR_STATS_LEN * ETH_GSTRING_LEN);
 	else
 		memcpy(buf, stat_gstrings,
@@ -138,7 +138,7 @@ static void gfar_fill_stats(struct net_device *dev, struct ethtool_stats *dummy,
 	struct gfar_private *priv = netdev_priv(dev);
 	u64 *extra = (u64 *) & priv->extra_stats;
 
-	if (priv->einfo->device_flags & FSL_GIANFAR_DEV_HAS_RMON) {
+	if (priv->device_flags & FSL_GIANFAR_DEV_HAS_RMON) {
 		u32 __iomem *rmon = (u32 __iomem *) & priv->regs->rmon;
 		struct gfar_stats *stats = (struct gfar_stats *) buf;
 
@@ -158,7 +158,7 @@ static int gfar_sset_count(struct net_device *dev, int sset)
 
 	switch (sset) {
 	case ETH_SS_STATS:
-		if (priv->einfo->device_flags & FSL_GIANFAR_DEV_HAS_RMON)
+		if (priv->device_flags & FSL_GIANFAR_DEV_HAS_RMON)
 			return GFAR_STATS_LEN;
 		else
 			return GFAR_EXTRA_STATS_LEN;
@@ -280,7 +280,7 @@ static int gfar_gcoalesce(struct net_device *dev, struct ethtool_coalesce *cvals
 {
 	struct gfar_private *priv = netdev_priv(dev);
 
-	if (!(priv->einfo->device_flags & FSL_GIANFAR_DEV_HAS_COALESCE))
+	if (!(priv->device_flags & FSL_GIANFAR_DEV_HAS_COALESCE))
 		return -EOPNOTSUPP;
 
 	if (NULL == priv->phydev)
@@ -332,7 +332,7 @@ static int gfar_scoalesce(struct net_device *dev, struct ethtool_coalesce *cvals
 {
 	struct gfar_private *priv = netdev_priv(dev);
 
-	if (!(priv->einfo->device_flags & FSL_GIANFAR_DEV_HAS_COALESCE))
+	if (!(priv->device_flags & FSL_GIANFAR_DEV_HAS_COALESCE))
 		return -EOPNOTSUPP;
 
 	/* Set up rx coalescing */
@@ -482,7 +482,7 @@ static int gfar_set_rx_csum(struct net_device *dev, uint32_t data)
 	unsigned long flags;
 	int err = 0;
 
-	if (!(priv->einfo->device_flags & FSL_GIANFAR_DEV_HAS_CSUM))
+	if (!(priv->device_flags & FSL_GIANFAR_DEV_HAS_CSUM))
 		return -EOPNOTSUPP;
 
 	if (dev->flags & IFF_UP) {
@@ -515,7 +515,7 @@ static uint32_t gfar_get_rx_csum(struct net_device *dev)
 {
 	struct gfar_private *priv = netdev_priv(dev);
 
-	if (!(priv->einfo->device_flags & FSL_GIANFAR_DEV_HAS_CSUM))
+	if (!(priv->device_flags & FSL_GIANFAR_DEV_HAS_CSUM))
 		return 0;
 
 	return priv->rx_csum_enable;
@@ -526,7 +526,7 @@ static int gfar_set_tx_csum(struct net_device *dev, uint32_t data)
 	unsigned long flags;
 	struct gfar_private *priv = netdev_priv(dev);
 
-	if (!(priv->einfo->device_flags & FSL_GIANFAR_DEV_HAS_CSUM))
+	if (!(priv->device_flags & FSL_GIANFAR_DEV_HAS_CSUM))
 		return -EOPNOTSUPP;
 
 	spin_lock_irqsave(&priv->txlock, flags);
@@ -547,7 +547,7 @@ static uint32_t gfar_get_tx_csum(struct net_device *dev)
 {
 	struct gfar_private *priv = netdev_priv(dev);
 
-	if (!(priv->einfo->device_flags & FSL_GIANFAR_DEV_HAS_CSUM))
+	if (!(priv->device_flags & FSL_GIANFAR_DEV_HAS_CSUM))
 		return 0;
 
 	return (dev->features & NETIF_F_IP_CSUM) != 0;
@@ -570,7 +570,7 @@ static void gfar_get_wol(struct net_device *dev, struct ethtool_wolinfo *wol)
 {
 	struct gfar_private *priv = netdev_priv(dev);
 
-	if (priv->einfo->device_flags & FSL_GIANFAR_DEV_HAS_MAGIC_PACKET) {
+	if (priv->device_flags & FSL_GIANFAR_DEV_HAS_MAGIC_PACKET) {
 		wol->supported = WAKE_MAGIC;
 		wol->wolopts = priv->wol_en ? WAKE_MAGIC : 0;
 	} else {
@@ -583,7 +583,7 @@ static int gfar_set_wol(struct net_device *dev, struct ethtool_wolinfo *wol)
 	struct gfar_private *priv = netdev_priv(dev);
 	unsigned long flags;
 
-	if (!(priv->einfo->device_flags & FSL_GIANFAR_DEV_HAS_MAGIC_PACKET) &&
+	if (!(priv->device_flags & FSL_GIANFAR_DEV_HAS_MAGIC_PACKET) &&
 	    wol->wolopts != 0)
 		return -EINVAL;
 
diff --git a/drivers/net/gianfar_mii.c b/drivers/net/gianfar_mii.c
index 0e2595d24933..f3706e415b45 100644
--- a/drivers/net/gianfar_mii.c
+++ b/drivers/net/gianfar_mii.c
@@ -34,6 +34,8 @@
 #include <linux/crc32.h>
 #include <linux/mii.h>
 #include <linux/phy.h>
+#include <linux/of.h>
+#include <linux/of_platform.h>
 
 #include <asm/io.h>
 #include <asm/irq.h>
@@ -150,19 +152,83 @@ static int gfar_mdio_reset(struct mii_bus *bus)
 	return 0;
 }
 
+/* Allocate an array which provides irq #s for each PHY on the given bus */
+static int *create_irq_map(struct device_node *np)
+{
+	int *irqs;
+	int i;
+	struct device_node *child = NULL;
+
+	irqs = kcalloc(PHY_MAX_ADDR, sizeof(int), GFP_KERNEL);
+
+	if (!irqs)
+		return NULL;
+
+	for (i = 0; i < PHY_MAX_ADDR; i++)
+		irqs[i] = PHY_POLL;
+
+	while ((child = of_get_next_child(np, child)) != NULL) {
+		int irq = irq_of_parse_and_map(child, 0);
+		const u32 *id;
+
+		if (irq == NO_IRQ)
+			continue;
+
+		id = of_get_property(child, "reg", NULL);
+
+		if (!id)
+			continue;
+
+		if (*id < PHY_MAX_ADDR && *id >= 0)
+			irqs[*id] = irq;
+		else
+			printk(KERN_WARNING "%s: "
+					"%d is not a valid PHY address\n",
+					np->full_name, *id);
+	}
+
+	return irqs;
+}
+
+
+void gfar_mdio_bus_name(char *name, struct device_node *np)
+{
+	const u32 *reg;
+
+	reg = of_get_property(np, "reg", NULL);
 
-static int gfar_mdio_probe(struct device *dev)
+	snprintf(name, MII_BUS_ID_SIZE, "%s@%x", np->name, reg ? *reg : 0);
+}
+
+/* Scan the bus in reverse, looking for an empty spot */
+static int gfar_mdio_find_free(struct mii_bus *new_bus)
+{
+	int i;
+
+	for (i = PHY_MAX_ADDR; i > 0; i--) {
+		u32 phy_id;
+
+		if (get_phy_id(new_bus, i, &phy_id))
+			return -1;
+
+		if (phy_id == 0xffffffff)
+			break;
+	}
+
+	return i;
+}
+
+static int gfar_mdio_probe(struct of_device *ofdev,
+		const struct of_device_id *match)
 {
-	struct platform_device *pdev = to_platform_device(dev);
-	struct gianfar_mdio_data *pdata;
 	struct gfar_mii __iomem *regs;
 	struct gfar __iomem *enet_regs;
 	struct mii_bus *new_bus;
-	struct resource *r;
-	int i, err = 0;
-
-	if (NULL == dev)
-		return -EINVAL;
+	int err = 0;
+	u64 addr, size;
+	struct device_node *np = ofdev->node;
+	struct device_node *tbi;
+	int tbiaddr = -1;
 
 	new_bus = mdiobus_alloc();
 	if (NULL == new_bus)
@@ -172,31 +238,28 @@ static int gfar_mdio_probe(struct device *dev)
 	new_bus->read = &gfar_mdio_read,
 	new_bus->write = &gfar_mdio_write,
 	new_bus->reset = &gfar_mdio_reset,
-	snprintf(new_bus->id, MII_BUS_ID_SIZE, "%x", pdev->id);
-
-	pdata = (struct gianfar_mdio_data *)pdev->dev.platform_data;
-
-	if (NULL == pdata) {
-		printk(KERN_ERR "gfar mdio %d: Missing platform data!\n", pdev->id);
-		return -ENODEV;
-	}
-
-	r = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	gfar_mdio_bus_name(new_bus->id, np);
 
 	/* Set the PHY base address */
-	regs = ioremap(r->start, sizeof (struct gfar_mii));
+	addr = of_translate_address(np, of_get_address(np, 0, &size, NULL));
+	regs = ioremap(addr, size);
 
 	if (NULL == regs) {
 		err = -ENOMEM;
-		goto reg_map_fail;
+		goto err_free_bus;
 	}
 
 	new_bus->priv = (void __force *)regs;
 
-	new_bus->irq = pdata->irq;
+	new_bus->irq = create_irq_map(np);
+
+	if (new_bus->irq == NULL) {
+		err = -ENOMEM;
+		goto err_unmap_regs;
+	}
 
-	new_bus->parent = dev;
-	dev_set_drvdata(dev, new_bus);
+	new_bus->parent = &ofdev->dev;
+	dev_set_drvdata(&ofdev->dev, new_bus);
 
 	/*
 	 * This is mildly evil, but so is our hardware for doing this.
@@ -206,96 +269,109 @@ static int gfar_mdio_probe(struct device *dev)
 	enet_regs = (struct gfar __iomem *)
 		((char *)regs - offsetof(struct gfar, gfar_mii_regs));
 
-	/* Scan the bus, looking for an empty spot for TBIPA */
-	gfar_write(&enet_regs->tbipa, 0);
-	for (i = PHY_MAX_ADDR; i > 0; i--) {
-		u32 phy_id;
+	for_each_child_of_node(np, tbi) {
+		if (!strncmp(tbi->type, "tbi-phy", 8))
+			break;
+	}
 
-		err = get_phy_id(new_bus, i, &phy_id);
-		if (err)
-			goto bus_register_fail;
+	if (tbi) {
+		const u32 *prop = of_get_property(tbi, "reg", NULL);
 
-		if (phy_id == 0xffffffff)
-			break;
+		if (prop)
+			tbiaddr = *prop;
 	}
 
-	/* The bus is full.  We don't support using 31 PHYs, sorry */
-	if (i == 0) {
+	if (tbiaddr == -1) {
+		gfar_write(&enet_regs->tbipa, 0);
+
+		tbiaddr = gfar_mdio_find_free(new_bus);
+	}
+
+	/*
+	 * We define TBIPA at 0 to be illegal, opting to fail for boards that
+	 * have PHYs at 1-31, rather than change tbipa and rescan.
+	 */
+	if (tbiaddr == 0) {
 		err = -EBUSY;
 
-		goto bus_register_fail;
+		goto err_free_irqs;
 	}
 
-	gfar_write(&enet_regs->tbipa, i);
+	gfar_write(&enet_regs->tbipa, tbiaddr);
+
+	/*
+	 * The TBIPHY-only buses will find PHYs at every address,
+	 * so we mask them all but the TBI
+	 */
+	if (!of_device_is_compatible(np, "fsl,gianfar-mdio"))
+		new_bus->phy_mask = ~(1 << tbiaddr);
 
 	err = mdiobus_register(new_bus);
 
-	if (0 != err) {
+	if (err != 0) {
 		printk (KERN_ERR "%s: Cannot register as MDIO bus\n",
 				new_bus->name);
-		goto bus_register_fail;
+		goto err_free_irqs;
 	}
 
 	return 0;
 
-bus_register_fail:
+err_free_irqs:
+	kfree(new_bus->irq);
+err_unmap_regs:
 	iounmap(regs);
-reg_map_fail:
+err_free_bus:
 	mdiobus_free(new_bus);
 
 	return err;
 }
 
 
-static int gfar_mdio_remove(struct device *dev)
+static int gfar_mdio_remove(struct of_device *ofdev)
 {
-	struct mii_bus *bus = dev_get_drvdata(dev);
+	struct mii_bus *bus = dev_get_drvdata(&ofdev->dev);
 
 	mdiobus_unregister(bus);
 
-	dev_set_drvdata(dev, NULL);
+	dev_set_drvdata(&ofdev->dev, NULL);
 
 	iounmap((void __iomem *)bus->priv);
 	bus->priv = NULL;
+	kfree(bus->irq);
 	mdiobus_free(bus);
 
 	return 0;
 }
 
-static struct device_driver gianfar_mdio_driver = {
+static struct of_device_id gfar_mdio_match[] =
+{
+	{
+		.compatible = "fsl,gianfar-mdio",
+	},
+	{
+		.compatible = "fsl,gianfar-tbi",
+	},
+	{
+		.type = "mdio",
+		.compatible = "gianfar",
+	},
+	{},
+};
+
+static struct of_platform_driver gianfar_mdio_driver = {
 	.name = "fsl-gianfar_mdio",
-	.bus = &platform_bus_type,
+	.match_table = gfar_mdio_match,
+
 	.probe = gfar_mdio_probe,
 	.remove = gfar_mdio_remove,
 };
 
-static int match_mdio_bus(struct device *dev, void *data)
-{
-	const struct gfar_private *priv = data;
-	const struct platform_device *pdev = to_platform_device(dev);
-
-	return !strcmp(pdev->name, gianfar_mdio_driver.name) &&
-		pdev->id == priv->einfo->mdio_bus;
-}
-
-/* Given a gfar_priv structure, find the mii_bus controlled by this device (not
- * necessarily the same as the bus the gfar's PHY is on), if one exists.
- * Normally only the first gianfar controls a mii_bus.  */
-struct mii_bus *gfar_get_miibus(const struct gfar_private *priv)
-{
-	/*const*/ struct device *d;
-
-	d = bus_find_device(gianfar_mdio_driver.bus, NULL, (void *)priv,
-			    match_mdio_bus);
-	return d ? dev_get_drvdata(d) : NULL;
-}
-
 int __init gfar_mdio_init(void)
 {
-	return driver_register(&gianfar_mdio_driver);
+	return of_register_platform_driver(&gianfar_mdio_driver);
 }
 
 void gfar_mdio_exit(void)
 {
-	driver_unregister(&gianfar_mdio_driver);
+	of_unregister_platform_driver(&gianfar_mdio_driver);
 }
diff --git a/drivers/net/gianfar_mii.h b/drivers/net/gianfar_mii.h
index 02dc970ca1ff..65c242cd468a 100644
--- a/drivers/net/gianfar_mii.h
+++ b/drivers/net/gianfar_mii.h
@@ -49,4 +49,6 @@ int gfar_local_mdio_read(struct gfar_mii __iomem *regs, int mii_id, int regnum);
 struct mii_bus *gfar_get_miibus(const struct gfar_private *priv);
 int __init gfar_mdio_init(void);
 void gfar_mdio_exit(void);
+
+void gfar_mdio_bus_name(char *name, struct device_node *np);
 #endif /* GIANFAR_PHY_H */
diff --git a/include/linux/fsl_devices.h b/include/linux/fsl_devices.h
index 708bab58d8d0..d9051d717d27 100644
--- a/include/linux/fsl_devices.h
+++ b/include/linux/fsl_devices.h
@@ -47,12 +47,7 @@
 struct gianfar_platform_data {
 	/* device specific information */
 	u32	device_flags;
-	/* board specific information */
-	u32	board_flags;
-	int	mdio_bus;			/* Bus controlled by us */
-	char	bus_id[MII_BUS_ID_SIZE];	/* Bus PHY is on */
-	u32	phy_id;
-	u8	mac_addr[6];
+	char	bus_id[BUS_ID_SIZE];
 	phy_interface_t interface;
 };
 
@@ -61,17 +56,6 @@ struct gianfar_mdio_data {
 	int	irq[32];
 };
 
-/* Flags related to gianfar device features */
-#define FSL_GIANFAR_DEV_HAS_GIGABIT		0x00000001
-#define FSL_GIANFAR_DEV_HAS_COALESCE		0x00000002
-#define FSL_GIANFAR_DEV_HAS_RMON		0x00000004
-#define FSL_GIANFAR_DEV_HAS_MULTI_INTR		0x00000008
-#define FSL_GIANFAR_DEV_HAS_CSUM		0x00000010
-#define FSL_GIANFAR_DEV_HAS_VLAN		0x00000020
-#define FSL_GIANFAR_DEV_HAS_EXTENDED_HASH	0x00000040
-#define FSL_GIANFAR_DEV_HAS_PADDING		0x00000080
-#define FSL_GIANFAR_DEV_HAS_MAGIC_PACKET	0x00000100
-
 /* Flags in gianfar_platform_data */
 #define FSL_GIANFAR_BRD_HAS_PHY_INTR	0x00000001 /* set or use a timer */
 #define FSL_GIANFAR_BRD_IS_REDUCED	0x00000002 /* Set if RGMII, RMII */
-- 
cgit v1.2.3


From e08e1f7adba522378e8d2ae941bf25443866136d Mon Sep 17 00:00:00 2001
From: Ian Campbell <ian.campbell@citrix.com>
Date: Tue, 16 Dec 2008 12:17:30 -0800
Subject: swiotlb: allow architectures to override phys<->bus<->phys
 conversions

Impact: generalize phys<->bus<->phys conversions in the swiotlb code

Architectures may need to override these conversions. Implement a
__weak hook point containing the default implementation.

Signed-off-by: Ian Campbell <ian.campbell@citrix.com>
Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/swiotlb.h |  3 +++
 lib/swiotlb.c           | 52 ++++++++++++++++++++++++++++++++++---------------
 2 files changed, 39 insertions(+), 16 deletions(-)

(limited to 'include')

diff --git a/include/linux/swiotlb.h b/include/linux/swiotlb.h
index 58b996a642f9..694f1839cbc0 100644
--- a/include/linux/swiotlb.h
+++ b/include/linux/swiotlb.h
@@ -27,6 +27,9 @@ swiotlb_init(void);
 extern void *swiotlb_alloc_boot(size_t bytes, unsigned long nslabs);
 extern void *swiotlb_alloc(unsigned order, unsigned long nslabs);
 
+extern dma_addr_t swiotlb_phys_to_bus(phys_addr_t address);
+extern phys_addr_t swiotlb_bus_to_phys(dma_addr_t address);
+
 extern void
 *swiotlb_alloc_coherent(struct device *hwdev, size_t size,
 			dma_addr_t *dma_handle, gfp_t flags);
diff --git a/lib/swiotlb.c b/lib/swiotlb.c
index 1272b23e4769..3494263cdd9a 100644
--- a/lib/swiotlb.c
+++ b/lib/swiotlb.c
@@ -125,6 +125,26 @@ void * __weak swiotlb_alloc(unsigned order, unsigned long nslabs)
 	return (void *)__get_free_pages(GFP_DMA | __GFP_NOWARN, order);
 }
 
+dma_addr_t __weak swiotlb_phys_to_bus(phys_addr_t paddr)
+{
+	return paddr;
+}
+
+phys_addr_t __weak swiotlb_bus_to_phys(dma_addr_t baddr)
+{
+	return baddr;
+}
+
+static dma_addr_t swiotlb_virt_to_bus(volatile void *address)
+{
+	return swiotlb_phys_to_bus(virt_to_phys(address));
+}
+
+static void *swiotlb_bus_to_virt(dma_addr_t address)
+{
+	return phys_to_virt(swiotlb_bus_to_phys(address));
+}
+
 /*
  * Statically reserve bounce buffer space and initialize bounce buffer data
  * structures for the software IO TLB used to implement the DMA API.
@@ -168,7 +188,7 @@ swiotlb_init_with_default_size(size_t default_size)
 		panic("Cannot allocate SWIOTLB overflow buffer!\n");
 
 	printk(KERN_INFO "Placing software IO TLB between 0x%lx - 0x%lx\n",
-	       virt_to_bus(io_tlb_start), virt_to_bus(io_tlb_end));
+	       swiotlb_virt_to_bus(io_tlb_start), swiotlb_virt_to_bus(io_tlb_end));
 }
 
 void __init
@@ -250,7 +270,7 @@ swiotlb_late_init_with_default_size(size_t default_size)
 
 	printk(KERN_INFO "Placing %luMB software IO TLB between 0x%lx - "
 	       "0x%lx\n", bytes >> 20,
-	       virt_to_bus(io_tlb_start), virt_to_bus(io_tlb_end));
+	       swiotlb_virt_to_bus(io_tlb_start), swiotlb_virt_to_bus(io_tlb_end));
 
 	return 0;
 
@@ -298,7 +318,7 @@ map_single(struct device *hwdev, char *buffer, size_t size, int dir)
 	unsigned long max_slots;
 
 	mask = dma_get_seg_boundary(hwdev);
-	start_dma_addr = virt_to_bus(io_tlb_start) & mask;
+	start_dma_addr = swiotlb_virt_to_bus(io_tlb_start) & mask;
 
 	offset_slots = ALIGN(start_dma_addr, 1 << IO_TLB_SHIFT) >> IO_TLB_SHIFT;
 
@@ -475,7 +495,7 @@ swiotlb_alloc_coherent(struct device *hwdev, size_t size,
 		dma_mask = hwdev->coherent_dma_mask;
 
 	ret = (void *)__get_free_pages(flags, order);
-	if (ret && !is_buffer_dma_capable(dma_mask, virt_to_bus(ret), size)) {
+	if (ret && !is_buffer_dma_capable(dma_mask, swiotlb_virt_to_bus(ret), size)) {
 		/*
 		 * The allocated memory isn't reachable by the device.
 		 * Fall back on swiotlb_map_single().
@@ -496,7 +516,7 @@ swiotlb_alloc_coherent(struct device *hwdev, size_t size,
 	}
 
 	memset(ret, 0, size);
-	dev_addr = virt_to_bus(ret);
+	dev_addr = swiotlb_virt_to_bus(ret);
 
 	/* Confirm address can be DMA'd by device */
 	if (!is_buffer_dma_capable(dma_mask, dev_addr, size)) {
@@ -556,7 +576,7 @@ dma_addr_t
 swiotlb_map_single_attrs(struct device *hwdev, void *ptr, size_t size,
 			 int dir, struct dma_attrs *attrs)
 {
-	dma_addr_t dev_addr = virt_to_bus(ptr);
+	dma_addr_t dev_addr = swiotlb_virt_to_bus(ptr);
 	void *map;
 
 	BUG_ON(dir == DMA_NONE);
@@ -577,7 +597,7 @@ swiotlb_map_single_attrs(struct device *hwdev, void *ptr, size_t size,
 		map = io_tlb_overflow_buffer;
 	}
 
-	dev_addr = virt_to_bus(map);
+	dev_addr = swiotlb_virt_to_bus(map);
 
 	/*
 	 * Ensure that the address returned is DMA'ble
@@ -607,7 +627,7 @@ void
 swiotlb_unmap_single_attrs(struct device *hwdev, dma_addr_t dev_addr,
 			   size_t size, int dir, struct dma_attrs *attrs)
 {
-	char *dma_addr = bus_to_virt(dev_addr);
+	char *dma_addr = swiotlb_bus_to_virt(dev_addr);
 
 	BUG_ON(dir == DMA_NONE);
 	if (is_swiotlb_buffer(dma_addr))
@@ -637,7 +657,7 @@ static void
 swiotlb_sync_single(struct device *hwdev, dma_addr_t dev_addr,
 		    size_t size, int dir, int target)
 {
-	char *dma_addr = bus_to_virt(dev_addr);
+	char *dma_addr = swiotlb_bus_to_virt(dev_addr);
 
 	BUG_ON(dir == DMA_NONE);
 	if (is_swiotlb_buffer(dma_addr))
@@ -668,7 +688,7 @@ swiotlb_sync_single_range(struct device *hwdev, dma_addr_t dev_addr,
 			  unsigned long offset, size_t size,
 			  int dir, int target)
 {
-	char *dma_addr = bus_to_virt(dev_addr) + offset;
+	char *dma_addr = swiotlb_bus_to_virt(dev_addr) + offset;
 
 	BUG_ON(dir == DMA_NONE);
 	if (is_swiotlb_buffer(dma_addr))
@@ -724,7 +744,7 @@ swiotlb_map_sg_attrs(struct device *hwdev, struct scatterlist *sgl, int nelems,
 
 	for_each_sg(sgl, sg, nelems, i) {
 		addr = SG_ENT_VIRT_ADDRESS(sg);
-		dev_addr = virt_to_bus(addr);
+		dev_addr = swiotlb_virt_to_bus(addr);
 		if (swiotlb_force ||
 		    address_needs_mapping(hwdev, dev_addr, sg->length)) {
 			void *map = map_single(hwdev, addr, sg->length, dir);
@@ -737,7 +757,7 @@ swiotlb_map_sg_attrs(struct device *hwdev, struct scatterlist *sgl, int nelems,
 				sgl[0].dma_length = 0;
 				return 0;
 			}
-			sg->dma_address = virt_to_bus(map);
+			sg->dma_address = swiotlb_virt_to_bus(map);
 		} else
 			sg->dma_address = dev_addr;
 		sg->dma_length = sg->length;
@@ -768,7 +788,7 @@ swiotlb_unmap_sg_attrs(struct device *hwdev, struct scatterlist *sgl,
 
 	for_each_sg(sgl, sg, nelems, i) {
 		if (sg->dma_address != SG_ENT_PHYS_ADDRESS(sg))
-			unmap_single(hwdev, bus_to_virt(sg->dma_address),
+			unmap_single(hwdev, swiotlb_bus_to_virt(sg->dma_address),
 				     sg->dma_length, dir);
 		else if (dir == DMA_FROM_DEVICE)
 			dma_mark_clean(SG_ENT_VIRT_ADDRESS(sg), sg->dma_length);
@@ -801,7 +821,7 @@ swiotlb_sync_sg(struct device *hwdev, struct scatterlist *sgl,
 
 	for_each_sg(sgl, sg, nelems, i) {
 		if (sg->dma_address != SG_ENT_PHYS_ADDRESS(sg))
-			sync_single(hwdev, bus_to_virt(sg->dma_address),
+			sync_single(hwdev, swiotlb_bus_to_virt(sg->dma_address),
 				    sg->dma_length, dir, target);
 		else if (dir == DMA_FROM_DEVICE)
 			dma_mark_clean(SG_ENT_VIRT_ADDRESS(sg), sg->dma_length);
@@ -825,7 +845,7 @@ swiotlb_sync_sg_for_device(struct device *hwdev, struct scatterlist *sg,
 int
 swiotlb_dma_mapping_error(struct device *hwdev, dma_addr_t dma_addr)
 {
-	return (dma_addr == virt_to_bus(io_tlb_overflow_buffer));
+	return (dma_addr == swiotlb_virt_to_bus(io_tlb_overflow_buffer));
 }
 
 /*
@@ -837,7 +857,7 @@ swiotlb_dma_mapping_error(struct device *hwdev, dma_addr_t dma_addr)
 int
 swiotlb_dma_supported(struct device *hwdev, u64 mask)
 {
-	return virt_to_bus(io_tlb_end - 1) <= mask;
+	return swiotlb_virt_to_bus(io_tlb_end - 1) <= mask;
 }
 
 EXPORT_SYMBOL(swiotlb_map_single);
-- 
cgit v1.2.3


From b81ea27b2329bf44b30c427800954f845896d476 Mon Sep 17 00:00:00 2001
From: Ian Campbell <ian.campbell@citrix.com>
Date: Tue, 16 Dec 2008 12:17:31 -0800
Subject: swiotlb: add arch hook to force mapping

Impact: generalize the sw-IOTLB range checks

Some architectures require special rules to determine whether a range
needs mapping or not.  This adds a weak function for architectures to
override.

Signed-off-by: Ian Campbell <ian.campbell@citrix.com>
Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/swiotlb.h |  2 ++
 lib/swiotlb.c           | 15 +++++++++++++--
 2 files changed, 15 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/swiotlb.h b/include/linux/swiotlb.h
index 694f1839cbc0..325af1de0351 100644
--- a/include/linux/swiotlb.h
+++ b/include/linux/swiotlb.h
@@ -30,6 +30,8 @@ extern void *swiotlb_alloc(unsigned order, unsigned long nslabs);
 extern dma_addr_t swiotlb_phys_to_bus(phys_addr_t address);
 extern phys_addr_t swiotlb_bus_to_phys(dma_addr_t address);
 
+extern int swiotlb_arch_range_needs_mapping(void *ptr, size_t size);
+
 extern void
 *swiotlb_alloc_coherent(struct device *hwdev, size_t size,
 			dma_addr_t *dma_handle, gfp_t flags);
diff --git a/lib/swiotlb.c b/lib/swiotlb.c
index 3494263cdd9a..d8b09051c455 100644
--- a/lib/swiotlb.c
+++ b/lib/swiotlb.c
@@ -145,6 +145,11 @@ static void *swiotlb_bus_to_virt(dma_addr_t address)
 	return phys_to_virt(swiotlb_bus_to_phys(address));
 }
 
+int __weak swiotlb_arch_range_needs_mapping(void *ptr, size_t size)
+{
+	return 0;
+}
+
 /*
  * Statically reserve bounce buffer space and initialize bounce buffer data
  * structures for the software IO TLB used to implement the DMA API.
@@ -297,6 +302,11 @@ address_needs_mapping(struct device *hwdev, dma_addr_t addr, size_t size)
 	return !is_buffer_dma_capable(dma_get_mask(hwdev), addr, size);
 }
 
+static inline int range_needs_mapping(void *ptr, size_t size)
+{
+	return swiotlb_force || swiotlb_arch_range_needs_mapping(ptr, size);
+}
+
 static int is_swiotlb_buffer(char *addr)
 {
 	return addr >= io_tlb_start && addr < io_tlb_end;
@@ -585,7 +595,8 @@ swiotlb_map_single_attrs(struct device *hwdev, void *ptr, size_t size,
 	 * we can safely return the device addr and not worry about bounce
 	 * buffering it.
 	 */
-	if (!address_needs_mapping(hwdev, dev_addr, size) && !swiotlb_force)
+	if (!address_needs_mapping(hwdev, dev_addr, size) &&
+	    !range_needs_mapping(ptr, size))
 		return dev_addr;
 
 	/*
@@ -745,7 +756,7 @@ swiotlb_map_sg_attrs(struct device *hwdev, struct scatterlist *sgl, int nelems,
 	for_each_sg(sgl, sg, nelems, i) {
 		addr = SG_ENT_VIRT_ADDRESS(sg);
 		dev_addr = swiotlb_virt_to_bus(addr);
-		if (swiotlb_force ||
+		if (range_needs_mapping(sg_virt(sg), sg->length) ||
 		    address_needs_mapping(hwdev, dev_addr, sg->length)) {
 			void *map = map_single(hwdev, addr, sg->length, dir);
 			if (!map) {
-- 
cgit v1.2.3


From 9a9fafb89433c5fd1331bac0c84c4b321e358b42 Mon Sep 17 00:00:00 2001
From: Phil Endecott <usb_endian_patch@chezphil.org>
Date: Mon, 1 Dec 2008 10:22:33 -0500
Subject: USB: fix comment about endianness of descriptors

This patch fixes a comment and clarifies the documentation about the
endianness of descriptors. The current policy is that descriptors will
be little-endian at the API even on big-endian systems; however the
/proc/bus/usb API predates this policy and presents descriptors with
some multibyte fields byte-swapped.

Signed-off-by: Phil Endecott <usb_endian_patch@chezphil.org>
Signed-off-by: Alan Stern <stern@rowland.harvard.edu>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 Documentation/usb/proc_usb_info.txt | 6 ++++--
 include/linux/usb/ch9.h             | 8 ++++++--
 2 files changed, 10 insertions(+), 4 deletions(-)

(limited to 'include')

diff --git a/Documentation/usb/proc_usb_info.txt b/Documentation/usb/proc_usb_info.txt
index 077e9032d0cd..fafcd4723260 100644
--- a/Documentation/usb/proc_usb_info.txt
+++ b/Documentation/usb/proc_usb_info.txt
@@ -49,8 +49,10 @@ it and 002/048 sometime later.
 
 These files can be read as binary data.  The binary data consists
 of first the device descriptor, then the descriptors for each
-configuration of the device.  That information is also shown in
-text form by the /proc/bus/usb/devices file, described later.
+configuration of the device.  Multi-byte fields in the device and
+configuration descriptors, but not other descriptors, are converted
+to host endianness by the kernel.  This information is also shown
+in text form by the /proc/bus/usb/devices file, described later.
 
 These files may also be used to write user-level drivers for the USB
 devices.  You would open the /proc/bus/usb/BBB/DDD file read/write,
diff --git a/include/linux/usb/ch9.h b/include/linux/usb/ch9.h
index 73a2f4eb1f7a..9b42baed3900 100644
--- a/include/linux/usb/ch9.h
+++ b/include/linux/usb/ch9.h
@@ -158,8 +158,12 @@ struct usb_ctrlrequest {
  * (rarely) accepted by SET_DESCRIPTOR.
  *
  * Note that all multi-byte values here are encoded in little endian
- * byte order "on the wire".  But when exposed through Linux-USB APIs,
- * they've been converted to cpu byte order.
+ * byte order "on the wire".  Within the kernel and when exposed
+ * through the Linux-USB APIs, they are not converted to cpu byte
+ * order; it is the responsibility of the client code to do this.
+ * The single exception is when device and configuration descriptors (but
+ * not other descriptors) are read from usbfs (i.e. /proc/bus/usb/BBB/DDD);
+ * in this case the fields are converted to host endianness by the kernel.
  */
 
 /*
-- 
cgit v1.2.3


From 69c30e1e7492192f882a3fc11888b320fde5206a Mon Sep 17 00:00:00 2001
From: Samuel Ortiz <samuel@sortiz.org>
Date: Wed, 17 Dec 2008 15:44:58 -0800
Subject: irda: Add irda_skb_cb qdisc related padding

We need to pad irda_skb_cb in order to keep it safe accross dev_queue_xmit()
calls. This is some ugly and temporary hack triggered by recent qisc code
changes.
Even though it fixes bugzilla.kernel.org bug #11795, it will be replaced by a
proper fix before 2.6.29 is released.

Signed-off-by: Samuel Ortiz <samuel@sortiz.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/irda/irda_device.h | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/net/irda/irda_device.h b/include/net/irda/irda_device.h
index 3025ae17ddbe..94c852d47d0f 100644
--- a/include/net/irda/irda_device.h
+++ b/include/net/irda/irda_device.h
@@ -135,9 +135,11 @@ struct dongle_reg {
 
 /* 
  * Per-packet information we need to hide inside sk_buff 
- * (must not exceed 48 bytes, check with struct sk_buff) 
+ * (must not exceed 48 bytes, check with struct sk_buff)
+ * The default_qdisc_pad field is a temporary hack.
  */
 struct irda_skb_cb {
+	unsigned int default_qdisc_pad;
 	magic_t magic;       /* Be sure that we can trust the information */
 	__u32   next_speed;  /* The Speed to be set *after* this frame */
 	__u16   mtt;         /* Minimum turn around time */
-- 
cgit v1.2.3


From 2d91d78b68606ff7ce52ea70e187dee7831aa2f6 Mon Sep 17 00:00:00 2001
From: Rémi Denis-Courmont <remi.denis-courmont@nokia.com>
Date: Wed, 17 Dec 2008 15:47:29 -0800
Subject: Phonet: allocate a non-Ethernet ARP type
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Also leave some room for more 802.11 types.

Signed-off-by: Rémi Denis-Courmont <remi.denis-courmont@nokia.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/if_arp.h | 2 ++
 net/core/dev.c         | 8 ++++----
 2 files changed, 6 insertions(+), 4 deletions(-)

(limited to 'include')

diff --git a/include/linux/if_arp.h b/include/linux/if_arp.h
index 4d3401812e6c..11df77ab2dbb 100644
--- a/include/linux/if_arp.h
+++ b/include/linux/if_arp.h
@@ -87,6 +87,8 @@
 #define ARPHRD_IEEE80211_PRISM 802	/* IEEE 802.11 + Prism2 header  */
 #define ARPHRD_IEEE80211_RADIOTAP 803	/* IEEE 802.11 + radiotap header */
 
+#define ARPHRD_PHONET	820		/* PhoNet media type		*/
+
 #define ARPHRD_VOID	  0xFFFF	/* Void type, nothing is known */
 #define ARPHRD_NONE	  0xFFFE	/* zero header length */
 
diff --git a/net/core/dev.c b/net/core/dev.c
index d8d7d1fccde4..15aab0c46d6d 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -283,8 +283,8 @@ static const unsigned short netdev_lock_type[] =
 	 ARPHRD_PIMREG, ARPHRD_HIPPI, ARPHRD_ASH, ARPHRD_ECONET,
 	 ARPHRD_IRDA, ARPHRD_FCPP, ARPHRD_FCAL, ARPHRD_FCPL,
 	 ARPHRD_FCFABRIC, ARPHRD_IEEE802_TR, ARPHRD_IEEE80211,
-	 ARPHRD_IEEE80211_PRISM, ARPHRD_IEEE80211_RADIOTAP, ARPHRD_VOID,
-	 ARPHRD_NONE};
+	 ARPHRD_IEEE80211_PRISM, ARPHRD_IEEE80211_RADIOTAP, ARPHRD_PHONET,
+	 ARPHRD_VOID, ARPHRD_NONE};
 
 static const char *netdev_lock_name[] =
 	{"_xmit_NETROM", "_xmit_ETHER", "_xmit_EETHER", "_xmit_AX25",
@@ -300,8 +300,8 @@ static const char *netdev_lock_name[] =
 	 "_xmit_PIMREG", "_xmit_HIPPI", "_xmit_ASH", "_xmit_ECONET",
 	 "_xmit_IRDA", "_xmit_FCPP", "_xmit_FCAL", "_xmit_FCPL",
 	 "_xmit_FCFABRIC", "_xmit_IEEE802_TR", "_xmit_IEEE80211",
-	 "_xmit_IEEE80211_PRISM", "_xmit_IEEE80211_RADIOTAP", "_xmit_VOID",
-	 "_xmit_NONE"};
+	 "_xmit_IEEE80211_PRISM", "_xmit_IEEE80211_RADIOTAP", "_xmit_PHONET",
+	 "_xmit_VOID", "_xmit_NONE"};
 
 static struct lock_class_key netdev_xmit_lock_key[ARRAY_SIZE(netdev_lock_type)];
 static struct lock_class_key netdev_addr_lock_key[ARRAY_SIZE(netdev_lock_type)];
-- 
cgit v1.2.3


From 57c81fffc863fb4c1804bc963bcbfb82d736c6df Mon Sep 17 00:00:00 2001
From: Rémi Denis-Courmont <remi.denis-courmont@nokia.com>
Date: Wed, 17 Dec 2008 15:47:48 -0800
Subject: Phonet: allocate separate ARP type for GPRS over a Phonet pipe
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

A separate xmit lock class supports GPRS over a Phonet pipe over a TUN
device (type ARPHRD_NONE).

Signed-off-by: Rémi Denis-Courmont <remi.denis-courmont@nokia.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/if_arp.h | 1 +
 net/core/dev.c         | 4 ++--
 net/phonet/pep-gprs.c  | 2 +-
 3 files changed, 4 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/include/linux/if_arp.h b/include/linux/if_arp.h
index 11df77ab2dbb..5ff89809a581 100644
--- a/include/linux/if_arp.h
+++ b/include/linux/if_arp.h
@@ -88,6 +88,7 @@
 #define ARPHRD_IEEE80211_RADIOTAP 803	/* IEEE 802.11 + radiotap header */
 
 #define ARPHRD_PHONET	820		/* PhoNet media type		*/
+#define ARPHRD_PHONET_PIPE 821		/* PhoNet pipe header		*/
 
 #define ARPHRD_VOID	  0xFFFF	/* Void type, nothing is known */
 #define ARPHRD_NONE	  0xFFFE	/* zero header length */
diff --git a/net/core/dev.c b/net/core/dev.c
index 15aab0c46d6d..048cf1197872 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -284,7 +284,7 @@ static const unsigned short netdev_lock_type[] =
 	 ARPHRD_IRDA, ARPHRD_FCPP, ARPHRD_FCAL, ARPHRD_FCPL,
 	 ARPHRD_FCFABRIC, ARPHRD_IEEE802_TR, ARPHRD_IEEE80211,
 	 ARPHRD_IEEE80211_PRISM, ARPHRD_IEEE80211_RADIOTAP, ARPHRD_PHONET,
-	 ARPHRD_VOID, ARPHRD_NONE};
+	 ARPHRD_PHONET_PIPE, ARPHRD_VOID, ARPHRD_NONE};
 
 static const char *netdev_lock_name[] =
 	{"_xmit_NETROM", "_xmit_ETHER", "_xmit_EETHER", "_xmit_AX25",
@@ -301,7 +301,7 @@ static const char *netdev_lock_name[] =
 	 "_xmit_IRDA", "_xmit_FCPP", "_xmit_FCAL", "_xmit_FCPL",
 	 "_xmit_FCFABRIC", "_xmit_IEEE802_TR", "_xmit_IEEE80211",
 	 "_xmit_IEEE80211_PRISM", "_xmit_IEEE80211_RADIOTAP", "_xmit_PHONET",
-	 "_xmit_VOID", "_xmit_NONE"};
+	 "_xmit_PHONET_PIPE", "_xmit_VOID", "_xmit_NONE"};
 
 static struct lock_class_key netdev_xmit_lock_key[ARRAY_SIZE(netdev_lock_type)];
 static struct lock_class_key netdev_addr_lock_key[ARRAY_SIZE(netdev_lock_type)];
diff --git a/net/phonet/pep-gprs.c b/net/phonet/pep-gprs.c
index 0b640b0fce0c..a2873203dff2 100644
--- a/net/phonet/pep-gprs.c
+++ b/net/phonet/pep-gprs.c
@@ -260,7 +260,7 @@ static int gprs_set_mtu(struct net_device *dev, int new_mtu)
 static void gprs_setup(struct net_device *dev)
 {
 	dev->features		= NETIF_F_FRAGLIST;
-	dev->type		= ARPHRD_NONE;
+	dev->type		= ARPHRD_PHONET_PIPE;
 	dev->flags		= IFF_POINTOPOINT | IFF_NOARP;
 	dev->mtu		= GPRS_DEFAULT_MTU;
 	dev->hard_header_len	= 0;
-- 
cgit v1.2.3


From be677730a0ccb6bedced6f65f2ba8f57a3c607ba Mon Sep 17 00:00:00 2001
From: Rémi Denis-Courmont <remi.denis-courmont@nokia.com>
Date: Wed, 17 Dec 2008 15:48:31 -0800
Subject: Phonet: use atomic for packet TX window
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

GPRS TX flow control won't need to lock the underlying socket anymore.

Signed-off-by: Rémi Denis-Courmont <remi.denis-courmont@nokia.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/phonet/pep.h |  2 +-
 net/phonet/pep.c         | 38 +++++++++++++++++++++-----------------
 net/phonet/socket.c      |  2 +-
 3 files changed, 23 insertions(+), 19 deletions(-)

(limited to 'include')

diff --git a/include/net/phonet/pep.h b/include/net/phonet/pep.h
index fcd793030e4d..4c61cdce4e5f 100644
--- a/include/net/phonet/pep.h
+++ b/include/net/phonet/pep.h
@@ -35,12 +35,12 @@ struct pep_sock {
 	struct sock		*listener;
 	struct sk_buff_head	ctrlreq_queue;
 #define PNPIPE_CTRLREQ_MAX	10
+	atomic_t		tx_credits;
 	int			ifindex;
 	u16			peer_type;	/* peer type/subtype */
 	u8			pipe_handle;
 
 	u8			rx_credits;
-	u8			tx_credits;
 	u8			rx_fc;	/* RX flow control */
 	u8			tx_fc;	/* TX flow control */
 	u8			init_enable;	/* auto-enable at creation */
diff --git a/net/phonet/pep.c b/net/phonet/pep.c
index bc6d50f83249..bb3e67849b38 100644
--- a/net/phonet/pep.c
+++ b/net/phonet/pep.c
@@ -225,6 +225,7 @@ static int pipe_rcv_status(struct sock *sk, struct sk_buff *skb)
 {
 	struct pep_sock *pn = pep_sk(sk);
 	struct pnpipehdr *hdr = pnp_hdr(skb);
+	int wake = 0;
 
 	if (!pskb_may_pull(skb, sizeof(*hdr) + 4))
 		return -EINVAL;
@@ -241,16 +242,16 @@ static int pipe_rcv_status(struct sock *sk, struct sk_buff *skb)
 		case PN_LEGACY_FLOW_CONTROL:
 			switch (hdr->data[4]) {
 			case PEP_IND_BUSY:
-				pn->tx_credits = 0;
+				atomic_set(&pn->tx_credits, 0);
 				break;
 			case PEP_IND_READY:
-				pn->tx_credits = 1;
+				atomic_set(&pn->tx_credits, wake = 1);
 				break;
 			}
 			break;
 		case PN_ONE_CREDIT_FLOW_CONTROL:
 			if (hdr->data[4] == PEP_IND_READY)
-				pn->tx_credits = 1;
+				atomic_set(&pn->tx_credits, wake = 1);
 			break;
 		}
 		break;
@@ -258,10 +259,7 @@ static int pipe_rcv_status(struct sock *sk, struct sk_buff *skb)
 	case PN_PEP_IND_ID_MCFC_GRANT_CREDITS:
 		if (pn->tx_fc != PN_MULTI_CREDIT_FLOW_CONTROL)
 			break;
-		if (pn->tx_credits + hdr->data[4] > 0xff)
-			pn->tx_credits = 0xff;
-		else
-			pn->tx_credits += hdr->data[4];
+		atomic_add(wake = hdr->data[4], &pn->tx_credits);
 		break;
 
 	default:
@@ -269,7 +267,7 @@ static int pipe_rcv_status(struct sock *sk, struct sk_buff *skb)
 				(unsigned)hdr->data[1]);
 		return -EOPNOTSUPP;
 	}
-	if (pn->tx_credits)
+	if (wake)
 		sk->sk_write_space(sk);
 	return 0;
 }
@@ -343,7 +341,7 @@ static int pipe_do_rcv(struct sock *sk, struct sk_buff *skb)
 		}
 		/* fall through */
 	case PNS_PEP_DISABLE_REQ:
-		pn->tx_credits = 0;
+		atomic_set(&pn->tx_credits, 0);
 		pep_reply(sk, skb, PN_PIPE_NO_ERROR, NULL, 0, GFP_ATOMIC);
 		break;
 
@@ -390,7 +388,7 @@ static int pipe_do_rcv(struct sock *sk, struct sk_buff *skb)
 		/* fall through */
 	case PNS_PIPE_ENABLED_IND:
 		if (!pn_flow_safe(pn->tx_fc)) {
-			pn->tx_credits = 1;
+			atomic_set(&pn->tx_credits, 1);
 			sk->sk_write_space(sk);
 		}
 		if (sk->sk_state == TCP_ESTABLISHED)
@@ -504,8 +502,9 @@ static int pep_connreq_rcv(struct sock *sk, struct sk_buff *skb)
 	newpn->pn_sk.resource = pn->pn_sk.resource;
 	skb_queue_head_init(&newpn->ctrlreq_queue);
 	newpn->pipe_handle = pipe_handle;
+	atomic_set(&newpn->tx_credits, 0);
 	newpn->peer_type = peer_type;
-	newpn->rx_credits = newpn->tx_credits = 0;
+	newpn->rx_credits = 0;
 	newpn->rx_fc = newpn->tx_fc = PN_LEGACY_FLOW_CONTROL;
 	newpn->init_enable = enabled;
 
@@ -821,14 +820,18 @@ static int pipe_skb_send(struct sock *sk, struct sk_buff *skb)
 	struct pep_sock *pn = pep_sk(sk);
 	struct pnpipehdr *ph;
 
+	if (pn_flow_safe(pn->tx_fc) &&
+	    !atomic_add_unless(&pn->tx_credits, -1, 0)) {
+		kfree_skb(skb);
+		return -ENOBUFS;
+	}
+
 	skb_push(skb, 3);
 	skb_reset_transport_header(skb);
 	ph = pnp_hdr(skb);
 	ph->utid = 0;
 	ph->message_id = PNS_PIPE_DATA;
 	ph->pipe_handle = pn->pipe_handle;
-	if (pn_flow_safe(pn->tx_fc) && pn->tx_credits)
-		pn->tx_credits--;
 
 	return pn_skb_send(sk, skb, &pipe_srv);
 }
@@ -866,7 +869,7 @@ disabled:
 	BUG_ON(sk->sk_state != TCP_ESTABLISHED);
 
 	/* Wait until flow control allows TX */
-	done = pn->tx_credits > 0;
+	done = atomic_read(&pn->tx_credits);
 	while (!done) {
 		DEFINE_WAIT(wait);
 
@@ -881,7 +884,7 @@ disabled:
 
 		prepare_to_wait(&sk->sk_socket->wait, &wait,
 				TASK_INTERRUPTIBLE);
-		done = sk_wait_event(sk, &timeo, pn->tx_credits > 0);
+		done = sk_wait_event(sk, &timeo, atomic_read(&pn->tx_credits));
 		finish_wait(&sk->sk_socket->wait, &wait);
 
 		if (sk->sk_state != TCP_ESTABLISHED)
@@ -895,7 +898,8 @@ disabled:
 			goto out;
 		skb_reserve(skb, MAX_PHONET_HEADER + 3);
 
-		if (sk->sk_state != TCP_ESTABLISHED || !pn->tx_credits)
+		if (sk->sk_state != TCP_ESTABLISHED ||
+		    !atomic_read(&pn->tx_credits))
 			goto disabled; /* sock_alloc_send_skb might sleep */
 	}
 
@@ -917,7 +921,7 @@ int pep_writeable(struct sock *sk)
 {
 	struct pep_sock *pn = pep_sk(sk);
 
-	return (sk->sk_state == TCP_ESTABLISHED) ? pn->tx_credits : 0;
+	return atomic_read(&pn->tx_credits);
 }
 
 int pep_write(struct sock *sk, struct sk_buff *skb)
diff --git a/net/phonet/socket.c b/net/phonet/socket.c
index c75aa5cdead5..ada2a35bf7a2 100644
--- a/net/phonet/socket.c
+++ b/net/phonet/socket.c
@@ -227,7 +227,7 @@ static unsigned int pn_socket_poll(struct file *file, struct socket *sock,
 	if (!mask && sk->sk_state == TCP_CLOSE_WAIT)
 		return POLLHUP;
 
-	if (sk->sk_state == TCP_ESTABLISHED && pn->tx_credits)
+	if (sk->sk_state == TCP_ESTABLISHED && atomic_read(&pn->tx_credits))
 		mask |= POLLOUT | POLLWRNORM | POLLWRBAND;
 
 	return mask;
-- 
cgit v1.2.3


From f38f1d2aa5a3520cf05da7cd6bd12fe2b0c509b7 Mon Sep 17 00:00:00 2001
From: Steven Rostedt <srostedt@redhat.com>
Date: Tue, 16 Dec 2008 23:06:40 -0500
Subject: trace: add a way to enable or disable the stack tracer

Impact: enhancement to stack tracer

The stack tracer currently is either on when configured in or
off when it is not. It can not be disabled when it is configured on.
(besides disabling the function tracer that it uses)

This patch adds a way to enable or disable the stack tracer at
run time. It defaults off on bootup, but a kernel parameter 'stacktrace'
has been added to enable it on bootup.

A new sysctl has been added "kernel.stack_tracer_enabled" to let
the user enable or disable the stack tracer at run time.

Signed-off-by: Steven Rostedt <srostedt@redhat.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 Documentation/kernel-parameters.txt |  4 +++
 include/linux/ftrace.h              |  8 ++++++
 kernel/sysctl.c                     | 10 +++++++
 kernel/trace/Kconfig                | 13 +++++++---
 kernel/trace/trace_stack.c          | 52 ++++++++++++++++++++++++++++++++++---
 5 files changed, 79 insertions(+), 8 deletions(-)

(limited to 'include')

diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt
index 2919a2e91938..edab81c13182 100644
--- a/Documentation/kernel-parameters.txt
+++ b/Documentation/kernel-parameters.txt
@@ -89,6 +89,7 @@ parameter is applicable:
 	SPARC	Sparc architecture is enabled.
 	SWSUSP	Software suspend (hibernation) is enabled.
 	SUSPEND	System suspend states are enabled.
+	FTRACE	Function tracing enabled.
 	TS	Appropriate touchscreen support is enabled.
 	USB	USB support is enabled.
 	USBHID	USB Human Interface Device support is enabled.
@@ -2173,6 +2174,9 @@ and is between 256 and 4096 characters. It is defined in the file
 	st=		[HW,SCSI] SCSI tape parameters (buffers, etc.)
 			See Documentation/scsi/st.txt.
 
+	stacktrace	[FTRACE]
+			Enabled the stack tracer on boot up.
+
 	sti=		[PARISC,HW]
 			Format: <num>
 			Set the STI (builtin display/keyboard on the HP-PARISC
diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h
index 44020f31bd81..6b0db53caa7d 100644
--- a/include/linux/ftrace.h
+++ b/include/linux/ftrace.h
@@ -86,6 +86,14 @@ static inline void ftrace_stop(void) { }
 static inline void ftrace_start(void) { }
 #endif /* CONFIG_FUNCTION_TRACER */
 
+#ifdef CONFIG_STACK_TRACER
+extern int stack_tracer_enabled;
+int
+stack_trace_sysctl(struct ctl_table *table, int write,
+		   struct file *file, void __user *buffer, size_t *lenp,
+		   loff_t *ppos);
+#endif
+
 #ifdef CONFIG_DYNAMIC_FTRACE
 /* asm/ftrace.h must be defined for archs supporting dynamic ftrace */
 #include <asm/ftrace.h>
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index c83f566e940a..6ac501a2dcc6 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -487,6 +487,16 @@ static struct ctl_table kern_table[] = {
 		.proc_handler	= &ftrace_enable_sysctl,
 	},
 #endif
+#ifdef CONFIG_STACK_TRACER
+	{
+		.ctl_name	= CTL_UNNUMBERED,
+		.procname	= "stack_tracer_enabled",
+		.data		= &stack_tracer_enabled,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= &stack_trace_sysctl,
+	},
+#endif
 #ifdef CONFIG_TRACING
 	{
 		.ctl_name	= CTL_UNNUMBERED,
diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig
index d8bae6f4219e..e2a4ff6fc3a6 100644
--- a/kernel/trace/Kconfig
+++ b/kernel/trace/Kconfig
@@ -244,10 +244,15 @@ config STACK_TRACER
 
 	  This tracer works by hooking into every function call that the
 	  kernel executes, and keeping a maximum stack depth value and
-	  stack-trace saved. Because this logic has to execute in every
-	  kernel function, all the time, this option can slow down the
-	  kernel measurably and is generally intended for kernel
-	  developers only.
+	  stack-trace saved.  If this is configured with DYNAMIC_FTRACE
+	  then it will not have any overhead while the stack tracer
+	  is disabled.
+
+	  To enable the stack tracer on bootup, pass in 'stacktrace'
+	  on the kernel command line.
+
+	  The stack tracer can also be enabled or disabled via the
+	  sysctl kernel.stack_tracer_enabled
 
 	  Say N if unsure.
 
diff --git a/kernel/trace/trace_stack.c b/kernel/trace/trace_stack.c
index 0b863f2cbc8e..4842c969c785 100644
--- a/kernel/trace/trace_stack.c
+++ b/kernel/trace/trace_stack.c
@@ -10,6 +10,7 @@
 #include <linux/debugfs.h>
 #include <linux/ftrace.h>
 #include <linux/module.h>
+#include <linux/sysctl.h>
 #include <linux/init.h>
 #include <linux/fs.h>
 #include "trace.h"
@@ -31,6 +32,10 @@ static raw_spinlock_t max_stack_lock =
 
 static int stack_trace_disabled __read_mostly;
 static DEFINE_PER_CPU(int, trace_active);
+static DEFINE_MUTEX(stack_sysctl_mutex);
+
+int stack_tracer_enabled;
+static int last_stack_tracer_enabled;
 
 static inline void check_stack(void)
 {
@@ -174,7 +179,7 @@ stack_max_size_write(struct file *filp, const char __user *ubuf,
 	return count;
 }
 
-static struct file_operations stack_max_size_fops = {
+static const struct file_operations stack_max_size_fops = {
 	.open		= tracing_open_generic,
 	.read		= stack_max_size_read,
 	.write		= stack_max_size_write,
@@ -272,7 +277,7 @@ static int t_show(struct seq_file *m, void *v)
 	return 0;
 }
 
-static struct seq_operations stack_trace_seq_ops = {
+static const struct seq_operations stack_trace_seq_ops = {
 	.start		= t_start,
 	.next		= t_next,
 	.stop		= t_stop,
@@ -288,12 +293,48 @@ static int stack_trace_open(struct inode *inode, struct file *file)
 	return ret;
 }
 
-static struct file_operations stack_trace_fops = {
+static const struct file_operations stack_trace_fops = {
 	.open		= stack_trace_open,
 	.read		= seq_read,
 	.llseek		= seq_lseek,
 };
 
+int
+stack_trace_sysctl(struct ctl_table *table, int write,
+		   struct file *file, void __user *buffer, size_t *lenp,
+		   loff_t *ppos)
+{
+	int ret;
+
+	mutex_lock(&stack_sysctl_mutex);
+
+	ret  = proc_dointvec(table, write, file, buffer, lenp, ppos);
+
+	if (ret || !write ||
+	    (last_stack_tracer_enabled == stack_tracer_enabled))
+		goto out;
+
+	last_stack_tracer_enabled = stack_tracer_enabled;
+
+	if (stack_tracer_enabled)
+		register_ftrace_function(&trace_ops);
+	else
+		unregister_ftrace_function(&trace_ops);
+
+ out:
+	mutex_unlock(&stack_sysctl_mutex);
+	return ret;
+}
+
+static int start_stack_trace __initdata;
+
+static __init int enable_stacktrace(char *str)
+{
+	start_stack_trace = 1;
+	return 1;
+}
+__setup("stacktrace", enable_stacktrace);
+
 static __init int stack_trace_init(void)
 {
 	struct dentry *d_tracer;
@@ -311,7 +352,10 @@ static __init int stack_trace_init(void)
 	if (!entry)
 		pr_warning("Could not create debugfs 'stack_trace' entry\n");
 
-	register_ftrace_function(&trace_ops);
+	if (start_stack_trace) {
+		register_ftrace_function(&trace_ops);
+		stack_tracer_enabled = 1;
+	}
 
 	return 0;
 }
-- 
cgit v1.2.3


From 9c2c48020ec0dd6ecd27e5a1298f73b40d85a595 Mon Sep 17 00:00:00 2001
From: Ken Chen <kenchen@google.com>
Date: Tue, 16 Dec 2008 23:41:22 -0800
Subject: schedstat: consolidate per-task cpu runtime stats

Impact: simplify code

When we turn on CONFIG_SCHEDSTATS, per-task cpu runtime is accumulated
twice. Once in task->se.sum_exec_runtime and once in sched_info.cpu_time.
These two stats are exactly the same.

Given that task->se.sum_exec_runtime is always accumulated by the core
scheduler, sched_info can reuse that data instead of duplicate the accounting.

Signed-off-by: Ken Chen <kenchen@google.com>
Acked-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 fs/proc/base.c        | 2 +-
 include/linux/sched.h | 3 +--
 kernel/delayacct.c    | 2 +-
 kernel/sched.c        | 2 ++
 kernel/sched_stats.h  | 5 ++---
 5 files changed, 7 insertions(+), 7 deletions(-)

(limited to 'include')

diff --git a/fs/proc/base.c b/fs/proc/base.c
index d4677603c889..4d745bac768c 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -347,7 +347,7 @@ static int proc_pid_wchan(struct task_struct *task, char *buffer)
 static int proc_pid_schedstat(struct task_struct *task, char *buffer)
 {
 	return sprintf(buffer, "%llu %llu %lu\n",
-			task->sched_info.cpu_time,
+			task->se.sum_exec_runtime,
 			task->sched_info.run_delay,
 			task->sched_info.pcount);
 }
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 8cccd6dc5d66..2d1e840ddd35 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -670,8 +670,7 @@ struct reclaim_state;
 struct sched_info {
 	/* cumulative counters */
 	unsigned long pcount;	      /* # of times run on this cpu */
-	unsigned long long cpu_time,  /* time spent on the cpu */
-			   run_delay; /* time spent waiting on a runqueue */
+	unsigned long long run_delay; /* time spent waiting on a runqueue */
 
 	/* timestamps */
 	unsigned long long last_arrival,/* when we last ran on a cpu */
diff --git a/kernel/delayacct.c b/kernel/delayacct.c
index b3179dad71be..abb6e17505e2 100644
--- a/kernel/delayacct.c
+++ b/kernel/delayacct.c
@@ -127,7 +127,7 @@ int __delayacct_add_tsk(struct taskstats *d, struct task_struct *tsk)
 	 */
 	t1 = tsk->sched_info.pcount;
 	t2 = tsk->sched_info.run_delay;
-	t3 = tsk->sched_info.cpu_time;
+	t3 = tsk->se.sum_exec_runtime;
 
 	d->cpu_count += t1;
 
diff --git a/kernel/sched.c b/kernel/sched.c
index f53e2b8ef521..fd835fc320b8 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -596,6 +596,8 @@ struct rq {
 #ifdef CONFIG_SCHEDSTATS
 	/* latency stats */
 	struct sched_info rq_sched_info;
+	unsigned long long rq_cpu_time;
+	/* could above be rq->cfs_rq.exec_clock + rq->rt_rq.rt_runtime ? */
 
 	/* sys_sched_yield() stats */
 	unsigned int yld_exp_empty;
diff --git a/kernel/sched_stats.h b/kernel/sched_stats.h
index 7dbf72a2b02c..3b01098164c8 100644
--- a/kernel/sched_stats.h
+++ b/kernel/sched_stats.h
@@ -31,7 +31,7 @@ static int show_schedstat(struct seq_file *seq, void *v)
 		    rq->yld_act_empty, rq->yld_exp_empty, rq->yld_count,
 		    rq->sched_switch, rq->sched_count, rq->sched_goidle,
 		    rq->ttwu_count, rq->ttwu_local,
-		    rq->rq_sched_info.cpu_time,
+		    rq->rq_cpu_time,
 		    rq->rq_sched_info.run_delay, rq->rq_sched_info.pcount);
 
 		seq_printf(seq, "\n");
@@ -123,7 +123,7 @@ static inline void
 rq_sched_info_depart(struct rq *rq, unsigned long long delta)
 {
 	if (rq)
-		rq->rq_sched_info.cpu_time += delta;
+		rq->rq_cpu_time += delta;
 }
 
 static inline void
@@ -236,7 +236,6 @@ static inline void sched_info_depart(struct task_struct *t)
 	unsigned long long delta = task_rq(t)->clock -
 					t->sched_info.last_arrival;
 
-	t->sched_info.cpu_time += delta;
 	rq_sched_info_depart(task_rq(t), delta);
 
 	if (t->state == TASK_RUNNING)
-- 
cgit v1.2.3


From 74c8a6130486bed224e960790f4aa72dd09c061e Mon Sep 17 00:00:00 2001
From: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Date: Wed, 17 Dec 2008 19:40:33 +0900
Subject: locking, irq: enclose irq_desc_lock_class in CONFIG_LOCKDEP

Impact: simplify code

commit "08678b0: generic: sparse irqs: use irq_desc() [...]" introduced
the irq_desc_lock_class variable.

But it is used only if CONFIG_SPARSE_IRQ=Y or CONFIG_TRACE_IRQFLAGS=Y.
Otherwise, following warnings happen:

	CC      kernel/irq/handle.o
	kernel/irq/handle.c:26: warning: 'irq_desc_lock_class' defined but not used

Actually, current early_init_irq_lock_class has a bit strange and messy ifdef.
In addition, it is not valueable.

1. this function is protected by !CONFIG_SPARSE_IRQ, but that is not necessary.
   if CONFIG_SPARSE_IRQ=Y, desc of all irq number are initialized by NULL
   at first - then this function calling is safe.

2. this function protected by CONFIG_TRACE_IRQFLAGS too. but it is not
   necessary either, because lockdep_set_class() doesn't have bad side
   effect even if CONFIG_TRACE_IRQFLAGS=n.

This patch bloat kernel size a bit on CONFIG_TRACE_IRQFLAGS=n and
CONFIG_SPARSE_IRQ=Y - but that's ok. early_init_irq_lock_class() is not
a fastpatch at all.

To avoid messy ifdefs is more important than a few bytes diet.

Signed-off-by: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/lockdep.h | 2 +-
 kernel/irq/handle.c     | 5 -----
 2 files changed, 1 insertion(+), 6 deletions(-)

(limited to 'include')

diff --git a/include/linux/lockdep.h b/include/linux/lockdep.h
index 29aec6e10020..9dba554c802c 100644
--- a/include/linux/lockdep.h
+++ b/include/linux/lockdep.h
@@ -377,7 +377,7 @@ do {								\
 
 #endif /* CONFIG_LOCK_STAT */
 
-#if defined(CONFIG_TRACE_IRQFLAGS) && defined(CONFIG_GENERIC_HARDIRQS)
+#ifdef CONFIG_GENERIC_HARDIRQS
 extern void early_init_irq_lock_class(void);
 #else
 static inline void early_init_irq_lock_class(void)
diff --git a/kernel/irq/handle.c b/kernel/irq/handle.c
index f1a23069c20a..6492400cb50d 100644
--- a/kernel/irq/handle.c
+++ b/kernel/irq/handle.c
@@ -422,11 +422,8 @@ out:
 }
 #endif
 
-
-#ifdef CONFIG_TRACE_IRQFLAGS
 void early_init_irq_lock_class(void)
 {
-#ifndef CONFIG_SPARSE_IRQ
 	struct irq_desc *desc;
 	int i;
 
@@ -436,9 +433,7 @@ void early_init_irq_lock_class(void)
 
 		lockdep_set_class(&desc->lock, &irq_desc_lock_class);
 	}
-#endif
 }
-#endif
 
 #ifdef CONFIG_SPARSE_IRQ
 unsigned int kstat_irqs_cpu(unsigned int irq, int cpu)
-- 
cgit v1.2.3


From 40aa4a30d0fd075fb934de4ee8163056827052ab Mon Sep 17 00:00:00 2001
From: Mark Brown <broonie@opensource.wolfsonmicro.com>
Date: Tue, 16 Dec 2008 10:15:12 +0000
Subject: ASoC: Add WM8350 AudioPlus codec driver

The WM8350 is an integrated audio and power management subsystem which
provides a single-chip solution for portable audio and multimedia systems.

The integrated audio CODEC provides all the necessary functions for
high-quality stereo recording and playback. Programmable on-chip
amplifiers allow for the direct connection of headphones and microphones
with a minimum of external components. A programmable low-noise bias
voltage is available to feed one or more electret microphones.
Additional audio features include programmable high-pass filter in the
ADC input path.

This driver was originally written by Liam Girdwood with further updates
from me.

Signed-off-by: Mark Brown <broonie@opensource.wolfsonmicro.com>
---
 include/linux/mfd/wm8350/audio.h |   38 +-
 sound/soc/codecs/Kconfig         |    4 +
 sound/soc/codecs/Makefile        |    2 +
 sound/soc/codecs/wm8350.c        | 1583 ++++++++++++++++++++++++++++++++++++++
 sound/soc/codecs/wm8350.h        |   20 +
 5 files changed, 1643 insertions(+), 4 deletions(-)
 create mode 100644 sound/soc/codecs/wm8350.c
 create mode 100644 sound/soc/codecs/wm8350.h

(limited to 'include')

diff --git a/include/linux/mfd/wm8350/audio.h b/include/linux/mfd/wm8350/audio.h
index 217bb22ebb8e..af95a1d2f3a1 100644
--- a/include/linux/mfd/wm8350/audio.h
+++ b/include/linux/mfd/wm8350/audio.h
@@ -1,7 +1,7 @@
 /*
  * audio.h  --  Audio Driver for Wolfson WM8350 PMIC
  *
- * Copyright 2007 Wolfson Microelectronics PLC
+ * Copyright 2007, 2008 Wolfson Microelectronics PLC
  *
  *  This program is free software; you can redistribute  it and/or modify it
  *  under  the terms of  the GNU General  Public License as published by the
@@ -70,9 +70,9 @@
 #define WM8350_CODEC_ISEL_0_5                   3	/* x0.5 */
 
 #define WM8350_VMID_OFF                         0
-#define WM8350_VMID_500K                        1
-#define WM8350_VMID_100K                        2
-#define WM8350_VMID_10K                         3
+#define WM8350_VMID_300K                        1
+#define WM8350_VMID_50K                         2
+#define WM8350_VMID_5K                          3
 
 /*
  * R40 (0x28) - Clock Control 1
@@ -591,8 +591,38 @@
 #define WM8350_IRQ_CODEC_MICSCD			41
 #define WM8350_IRQ_CODEC_MICD			42
 
+/*
+ * WM8350 Platform data.
+ *
+ * This must be initialised per platform for best audio performance.
+ * Please see WM8350 datasheet for information.
+ */
+struct wm8350_audio_platform_data {
+	int vmid_discharge_msecs;	/* VMID --> OFF discharge time */
+	int drain_msecs;	/* OFF drain time */
+	int cap_discharge_msecs;	/* Cap ON (from OFF) discharge time */
+	int vmid_charge_msecs;	/* vmid power up time */
+	u32 vmid_s_curve:2;	/* vmid enable s curve speed */
+	u32 dis_out4:2;		/* out4 discharge speed */
+	u32 dis_out3:2;		/* out3 discharge speed */
+	u32 dis_out2:2;		/* out2 discharge speed */
+	u32 dis_out1:2;		/* out1 discharge speed */
+	u32 vroi_out4:1;	/* out4 tie off */
+	u32 vroi_out3:1;	/* out3 tie off */
+	u32 vroi_out2:1;	/* out2 tie off */
+	u32 vroi_out1:1;	/* out1 tie off */
+	u32 vroi_enable:1;	/* enable tie off */
+	u32 codec_current_on:2;	/* current level ON */
+	u32 codec_current_standby:2;	/* current level STANDBY */
+	u32 codec_current_charge:2;	/* codec current @ vmid charge */
+};
+
+struct snd_soc_codec;
+
 struct wm8350_codec {
 	struct platform_device *pdev;
+	struct snd_soc_codec *codec;
+	struct wm8350_audio_platform_data *platform_data;
 };
 
 #endif
diff --git a/sound/soc/codecs/Kconfig b/sound/soc/codecs/Kconfig
index bf68052d6924..c41289b5f586 100644
--- a/sound/soc/codecs/Kconfig
+++ b/sound/soc/codecs/Kconfig
@@ -13,6 +13,7 @@ config SND_SOC_ALL_CODECS
 	select SND_SOC_TWL4030 if TWL4030_CORE
 	select SND_SOC_UDA134X
 	select SND_SOC_UDA1380 if I2C
+	select SND_SOC_WM8350 if MFD_WM8350
 	select SND_SOC_WM8510 if (I2C || SPI_MASTER)
 	select SND_SOC_WM8580 if I2C
 	select SND_SOC_WM8728 if (I2C || SPI_MASTER)
@@ -100,6 +101,9 @@ config SND_SOC_UDA134X
 config SND_SOC_UDA1380
         tristate
 
+config SND_SOC_WM8350
+	tristate
+
 config SND_SOC_WM8510
 	tristate
 
diff --git a/sound/soc/codecs/Makefile b/sound/soc/codecs/Makefile
index 9a20fddd09c7..c4ddc9aa2bbd 100644
--- a/sound/soc/codecs/Makefile
+++ b/sound/soc/codecs/Makefile
@@ -12,6 +12,7 @@ snd-soc-tlv320aic3x-objs := tlv320aic3x.o
 snd-soc-twl4030-objs := twl4030.o
 snd-soc-uda134x-objs := uda134x.o
 snd-soc-uda1380-objs := uda1380.o
+snd-soc-wm8350-objs := wm8350.o
 snd-soc-wm8510-objs := wm8510.o
 snd-soc-wm8580-objs := wm8580.o
 snd-soc-wm8728-objs := wm8728.o
@@ -39,6 +40,7 @@ obj-$(CONFIG_SND_SOC_TLV320AIC3X)	+= snd-soc-tlv320aic3x.o
 obj-$(CONFIG_SND_SOC_TWL4030)	+= snd-soc-twl4030.o
 obj-$(CONFIG_SND_SOC_UDA134X)	+= snd-soc-uda134x.o
 obj-$(CONFIG_SND_SOC_UDA1380)	+= snd-soc-uda1380.o
+obj-$(CONFIG_SND_SOC_WM8350)	+= snd-soc-wm8350.o
 obj-$(CONFIG_SND_SOC_WM8510)	+= snd-soc-wm8510.o
 obj-$(CONFIG_SND_SOC_WM8580)	+= snd-soc-wm8580.o
 obj-$(CONFIG_SND_SOC_WM8728)	+= snd-soc-wm8728.o
diff --git a/sound/soc/codecs/wm8350.c b/sound/soc/codecs/wm8350.c
new file mode 100644
index 000000000000..4bbfb5a5894b
--- /dev/null
+++ b/sound/soc/codecs/wm8350.c
@@ -0,0 +1,1583 @@
+/*
+ * wm8350.c -- WM8350 ALSA SoC audio driver
+ *
+ * Copyright (C) 2007, 2008 Wolfson Microelectronics PLC.
+ *
+ * Author: Liam Girdwood <lg@opensource.wolfsonmicro.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/module.h>
+#include <linux/moduleparam.h>
+#include <linux/init.h>
+#include <linux/delay.h>
+#include <linux/pm.h>
+#include <linux/platform_device.h>
+#include <linux/mfd/wm8350/audio.h>
+#include <linux/mfd/wm8350/core.h>
+#include <linux/regulator/consumer.h>
+#include <sound/core.h>
+#include <sound/pcm.h>
+#include <sound/pcm_params.h>
+#include <sound/soc.h>
+#include <sound/soc-dapm.h>
+#include <sound/initval.h>
+#include <sound/tlv.h>
+
+#include "wm8350.h"
+
+#define WM8350_OUTn_0dB 0x39
+
+#define WM8350_RAMP_NONE	0
+#define WM8350_RAMP_UP		1
+#define WM8350_RAMP_DOWN	2
+
+/* We only include the analogue supplies here; the digital supplies
+ * need to be available well before this driver can be probed.
+ */
+static const char *supply_names[] = {
+	"AVDD",
+	"HPVDD",
+};
+
+struct wm8350_output {
+	u16 active;
+	u16 left_vol;
+	u16 right_vol;
+	u16 ramp;
+	u16 mute;
+};
+
+struct wm8350_data {
+	struct snd_soc_codec codec;
+	struct wm8350_output out1;
+	struct wm8350_output out2;
+	struct regulator_bulk_data supplies[ARRAY_SIZE(supply_names)];
+};
+
+static unsigned int wm8350_codec_cache_read(struct snd_soc_codec *codec,
+					    unsigned int reg)
+{
+	struct wm8350 *wm8350 = codec->control_data;
+	return wm8350->reg_cache[reg];
+}
+
+static unsigned int wm8350_codec_read(struct snd_soc_codec *codec,
+				      unsigned int reg)
+{
+	struct wm8350 *wm8350 = codec->control_data;
+	return wm8350_reg_read(wm8350, reg);
+}
+
+static int wm8350_codec_write(struct snd_soc_codec *codec, unsigned int reg,
+			      unsigned int value)
+{
+	struct wm8350 *wm8350 = codec->control_data;
+	return wm8350_reg_write(wm8350, reg, value);
+}
+
+/*
+ * Ramp OUT1 PGA volume to minimise pops at stream startup and shutdown.
+ */
+static inline int wm8350_out1_ramp_step(struct snd_soc_codec *codec)
+{
+	struct wm8350_data *wm8350_data = codec->private_data;
+	struct wm8350_output *out1 = &wm8350_data->out1;
+	struct wm8350 *wm8350 = codec->control_data;
+	int left_complete = 0, right_complete = 0;
+	u16 reg, val;
+
+	/* left channel */
+	reg = wm8350_reg_read(wm8350, WM8350_LOUT1_VOLUME);
+	val = (reg & WM8350_OUT1L_VOL_MASK) >> WM8350_OUT1L_VOL_SHIFT;
+
+	if (out1->ramp == WM8350_RAMP_UP) {
+		/* ramp step up */
+		if (val < out1->left_vol) {
+			val++;
+			reg &= ~WM8350_OUT1L_VOL_MASK;
+			wm8350_reg_write(wm8350, WM8350_LOUT1_VOLUME,
+					 reg | (val << WM8350_OUT1L_VOL_SHIFT));
+		} else
+			left_complete = 1;
+	} else if (out1->ramp == WM8350_RAMP_DOWN) {
+		/* ramp step down */
+		if (val > 0) {
+			val--;
+			reg &= ~WM8350_OUT1L_VOL_MASK;
+			wm8350_reg_write(wm8350, WM8350_LOUT1_VOLUME,
+					 reg | (val << WM8350_OUT1L_VOL_SHIFT));
+		} else
+			left_complete = 1;
+	} else
+		return 1;
+
+	/* right channel */
+	reg = wm8350_reg_read(wm8350, WM8350_ROUT1_VOLUME);
+	val = (reg & WM8350_OUT1R_VOL_MASK) >> WM8350_OUT1R_VOL_SHIFT;
+	if (out1->ramp == WM8350_RAMP_UP) {
+		/* ramp step up */
+		if (val < out1->right_vol) {
+			val++;
+			reg &= ~WM8350_OUT1R_VOL_MASK;
+			wm8350_reg_write(wm8350, WM8350_ROUT1_VOLUME,
+					 reg | (val << WM8350_OUT1R_VOL_SHIFT));
+		} else
+			right_complete = 1;
+	} else if (out1->ramp == WM8350_RAMP_DOWN) {
+		/* ramp step down */
+		if (val > 0) {
+			val--;
+			reg &= ~WM8350_OUT1R_VOL_MASK;
+			wm8350_reg_write(wm8350, WM8350_ROUT1_VOLUME,
+					 reg | (val << WM8350_OUT1R_VOL_SHIFT));
+		} else
+			right_complete = 1;
+	}
+
+	/* only hit the update bit if either volume has changed this step */
+	if (!left_complete || !right_complete)
+		wm8350_set_bits(wm8350, WM8350_LOUT1_VOLUME, WM8350_OUT1_VU);
+
+	return left_complete & right_complete;
+}
+
+/*
+ * Ramp OUT2 PGA volume to minimise pops at stream startup and shutdown.
+ */
+static inline int wm8350_out2_ramp_step(struct snd_soc_codec *codec)
+{
+	struct wm8350_data *wm8350_data = codec->private_data;
+	struct wm8350_output *out2 = &wm8350_data->out2;
+	struct wm8350 *wm8350 = codec->control_data;
+	int left_complete = 0, right_complete = 0;
+	u16 reg, val;
+
+	/* left channel */
+	reg = wm8350_reg_read(wm8350, WM8350_LOUT2_VOLUME);
+	val = (reg & WM8350_OUT2L_VOL_MASK) >> WM8350_OUT1L_VOL_SHIFT;
+	if (out2->ramp == WM8350_RAMP_UP) {
+		/* ramp step up */
+		if (val < out2->left_vol) {
+			val++;
+			reg &= ~WM8350_OUT2L_VOL_MASK;
+			wm8350_reg_write(wm8350, WM8350_LOUT2_VOLUME,
+					 reg | (val << WM8350_OUT1L_VOL_SHIFT));
+		} else
+			left_complete = 1;
+	} else if (out2->ramp == WM8350_RAMP_DOWN) {
+		/* ramp step down */
+		if (val > 0) {
+			val--;
+			reg &= ~WM8350_OUT2L_VOL_MASK;
+			wm8350_reg_write(wm8350, WM8350_LOUT2_VOLUME,
+					 reg | (val << WM8350_OUT1L_VOL_SHIFT));
+		} else
+			left_complete = 1;
+	} else
+		return 1;
+
+	/* right channel */
+	reg = wm8350_reg_read(wm8350, WM8350_ROUT2_VOLUME);
+	val = (reg & WM8350_OUT2R_VOL_MASK) >> WM8350_OUT1R_VOL_SHIFT;
+	if (out2->ramp == WM8350_RAMP_UP) {
+		/* ramp step up */
+		if (val < out2->right_vol) {
+			val++;
+			reg &= ~WM8350_OUT2R_VOL_MASK;
+			wm8350_reg_write(wm8350, WM8350_ROUT2_VOLUME,
+					 reg | (val << WM8350_OUT1R_VOL_SHIFT));
+		} else
+			right_complete = 1;
+	} else if (out2->ramp == WM8350_RAMP_DOWN) {
+		/* ramp step down */
+		if (val > 0) {
+			val--;
+			reg &= ~WM8350_OUT2R_VOL_MASK;
+			wm8350_reg_write(wm8350, WM8350_ROUT2_VOLUME,
+					 reg | (val << WM8350_OUT1R_VOL_SHIFT));
+		} else
+			right_complete = 1;
+	}
+
+	/* only hit the update bit if either volume has changed this step */
+	if (!left_complete || !right_complete)
+		wm8350_set_bits(wm8350, WM8350_LOUT2_VOLUME, WM8350_OUT2_VU);
+
+	return left_complete & right_complete;
+}
+
+/*
+ * This work ramps both output PGAs at stream start/stop time to
+ * minimise pop associated with DAPM power switching.
+ * It's best to enable Zero Cross when ramping occurs to minimise any
+ * zipper noises.
+ */
+static void wm8350_pga_work(struct work_struct *work)
+{
+	struct snd_soc_codec *codec =
+	    container_of(work, struct snd_soc_codec, delayed_work.work);
+	struct wm8350_data *wm8350_data = codec->private_data;
+	struct wm8350_output *out1 = &wm8350_data->out1,
+	    *out2 = &wm8350_data->out2;
+	int i, out1_complete, out2_complete;
+
+	/* do we need to ramp at all ? */
+	if (out1->ramp == WM8350_RAMP_NONE && out2->ramp == WM8350_RAMP_NONE)
+		return;
+
+	/* PGA volumes have 6 bits of resolution to ramp */
+	for (i = 0; i <= 63; i++) {
+		out1_complete = 1, out2_complete = 1;
+		if (out1->ramp != WM8350_RAMP_NONE)
+			out1_complete = wm8350_out1_ramp_step(codec);
+		if (out2->ramp != WM8350_RAMP_NONE)
+			out2_complete = wm8350_out2_ramp_step(codec);
+
+		/* ramp finished ? */
+		if (out1_complete && out2_complete)
+			break;
+
+		/* we need to delay longer on the up ramp */
+		if (out1->ramp == WM8350_RAMP_UP ||
+		    out2->ramp == WM8350_RAMP_UP) {
+			/* delay is longer over 0dB as increases are larger */
+			if (i >= WM8350_OUTn_0dB)
+				schedule_timeout_interruptible(msecs_to_jiffies
+							       (2));
+			else
+				schedule_timeout_interruptible(msecs_to_jiffies
+							       (1));
+		} else
+			udelay(50);	/* doesn't matter if we delay longer */
+	}
+
+	out1->ramp = WM8350_RAMP_NONE;
+	out2->ramp = WM8350_RAMP_NONE;
+}
+
+/*
+ * WM8350 Controls
+ */
+
+static int pga_event(struct snd_soc_dapm_widget *w,
+		     struct snd_kcontrol *kcontrol, int event)
+{
+	struct snd_soc_codec *codec = w->codec;
+	struct wm8350_data *wm8350_data = codec->private_data;
+	struct wm8350_output *out;
+
+	switch (w->shift) {
+	case 0:
+	case 1:
+		out = &wm8350_data->out1;
+		break;
+	case 2:
+	case 3:
+		out = &wm8350_data->out2;
+		break;
+
+	default:
+		BUG();
+		return -1;
+	}
+
+	switch (event) {
+	case SND_SOC_DAPM_POST_PMU:
+		out->ramp = WM8350_RAMP_UP;
+		out->active = 1;
+
+		if (!delayed_work_pending(&codec->delayed_work))
+			schedule_delayed_work(&codec->delayed_work,
+					      msecs_to_jiffies(1));
+		break;
+
+	case SND_SOC_DAPM_PRE_PMD:
+		out->ramp = WM8350_RAMP_DOWN;
+		out->active = 0;
+
+		if (!delayed_work_pending(&codec->delayed_work))
+			schedule_delayed_work(&codec->delayed_work,
+					      msecs_to_jiffies(1));
+		break;
+	}
+
+	return 0;
+}
+
+static int wm8350_put_volsw_2r_vu(struct snd_kcontrol *kcontrol,
+				  struct snd_ctl_elem_value *ucontrol)
+{
+	struct snd_soc_codec *codec = snd_kcontrol_chip(kcontrol);
+	struct wm8350_data *wm8350_priv = codec->private_data;
+	struct wm8350_output *out = NULL;
+	struct soc_mixer_control *mc =
+		(struct soc_mixer_control *)kcontrol->private_value;
+	int ret;
+	unsigned int reg = mc->reg;
+	u16 val;
+
+	/* For OUT1 and OUT2 we shadow the values and only actually write
+	 * them out when active in order to ensure the amplifier comes on
+	 * as quietly as possible. */
+	switch (reg) {
+	case WM8350_LOUT1_VOLUME:
+		out = &wm8350_priv->out1;
+		break;
+	case WM8350_LOUT2_VOLUME:
+		out = &wm8350_priv->out2;
+		break;
+	default:
+		break;
+	}
+
+	if (out) {
+		out->left_vol = ucontrol->value.integer.value[0];
+		out->right_vol = ucontrol->value.integer.value[1];
+		if (!out->active)
+			return 1;
+	}
+
+	ret = snd_soc_put_volsw_2r(kcontrol, ucontrol);
+	if (ret < 0)
+		return ret;
+
+	/* now hit the volume update bits (always bit 8) */
+	val = wm8350_codec_read(codec, reg);
+	wm8350_codec_write(codec, reg, val | WM8350_OUT1_VU);
+	return 1;
+}
+
+static int wm8350_get_volsw_2r(struct snd_kcontrol *kcontrol,
+			       struct snd_ctl_elem_value *ucontrol)
+{
+	struct snd_soc_codec *codec = snd_kcontrol_chip(kcontrol);
+	struct wm8350_data *wm8350_priv = codec->private_data;
+	struct wm8350_output *out1 = &wm8350_priv->out1;
+	struct wm8350_output *out2 = &wm8350_priv->out2;
+	struct soc_mixer_control *mc =
+		(struct soc_mixer_control *)kcontrol->private_value;
+	unsigned int reg = mc->reg;
+
+	/* If these are cached registers use the cache */
+	switch (reg) {
+	case WM8350_LOUT1_VOLUME:
+		ucontrol->value.integer.value[0] = out1->left_vol;
+		ucontrol->value.integer.value[1] = out1->right_vol;
+		return 0;
+
+	case WM8350_LOUT2_VOLUME:
+		ucontrol->value.integer.value[0] = out2->left_vol;
+		ucontrol->value.integer.value[1] = out2->right_vol;
+		return 0;
+
+	default:
+		break;
+	}
+
+	return snd_soc_get_volsw_2r(kcontrol, ucontrol);
+}
+
+/* double control with volume update */
+#define SOC_WM8350_DOUBLE_R_TLV(xname, reg_left, reg_right, xshift, xmax, \
+				xinvert, tlv_array) \
+{	.iface = SNDRV_CTL_ELEM_IFACE_MIXER, .name = (xname), \
+	.access = SNDRV_CTL_ELEM_ACCESS_TLV_READ | \
+		SNDRV_CTL_ELEM_ACCESS_READWRITE | \
+		SNDRV_CTL_ELEM_ACCESS_VOLATILE, \
+	.tlv.p = (tlv_array), \
+	.info = snd_soc_info_volsw_2r, \
+	.get = wm8350_get_volsw_2r, .put = wm8350_put_volsw_2r_vu, \
+	.private_value = (unsigned long)&(struct soc_mixer_control) \
+		{.reg = reg_left, .rreg = reg_right, .shift = xshift, \
+		 .rshift = xshift, .max = xmax, .invert = xinvert}, }
+
+static const char *wm8350_deemp[] = { "None", "32kHz", "44.1kHz", "48kHz" };
+static const char *wm8350_pol[] = { "Normal", "Inv R", "Inv L", "Inv L & R" };
+static const char *wm8350_dacmutem[] = { "Normal", "Soft" };
+static const char *wm8350_dacmutes[] = { "Fast", "Slow" };
+static const char *wm8350_dacfilter[] = { "Normal", "Sloping" };
+static const char *wm8350_adcfilter[] = { "None", "High Pass" };
+static const char *wm8350_adchp[] = { "44.1kHz", "8kHz", "16kHz", "32kHz" };
+static const char *wm8350_lr[] = { "Left", "Right" };
+
+static const struct soc_enum wm8350_enum[] = {
+	SOC_ENUM_SINGLE(WM8350_DAC_CONTROL, 4, 4, wm8350_deemp),
+	SOC_ENUM_SINGLE(WM8350_DAC_CONTROL, 0, 4, wm8350_pol),
+	SOC_ENUM_SINGLE(WM8350_DAC_MUTE_VOLUME, 14, 2, wm8350_dacmutem),
+	SOC_ENUM_SINGLE(WM8350_DAC_MUTE_VOLUME, 13, 2, wm8350_dacmutes),
+	SOC_ENUM_SINGLE(WM8350_DAC_MUTE_VOLUME, 12, 2, wm8350_dacfilter),
+	SOC_ENUM_SINGLE(WM8350_ADC_CONTROL, 15, 2, wm8350_adcfilter),
+	SOC_ENUM_SINGLE(WM8350_ADC_CONTROL, 8, 4, wm8350_adchp),
+	SOC_ENUM_SINGLE(WM8350_ADC_CONTROL, 0, 4, wm8350_pol),
+	SOC_ENUM_SINGLE(WM8350_INPUT_MIXER_VOLUME, 15, 2, wm8350_lr),
+};
+
+static DECLARE_TLV_DB_LINEAR(pre_amp_tlv, -1200, 3525);
+static DECLARE_TLV_DB_LINEAR(out_pga_tlv, -5700, 600);
+static DECLARE_TLV_DB_SCALE(dac_pcm_tlv, -7163, 36, 1);
+static DECLARE_TLV_DB_SCALE(adc_pcm_tlv, -12700, 50, 1);
+static DECLARE_TLV_DB_SCALE(out_mix_tlv, -1500, 300, 1);
+
+static const unsigned int capture_sd_tlv[] = {
+	TLV_DB_RANGE_HEAD(2),
+	0, 12, TLV_DB_SCALE_ITEM(-3600, 300, 1),
+	13, 15, TLV_DB_SCALE_ITEM(0, 0, 0),
+};
+
+static const struct snd_kcontrol_new wm8350_snd_controls[] = {
+	SOC_ENUM("Playback Deemphasis", wm8350_enum[0]),
+	SOC_ENUM("Playback DAC Inversion", wm8350_enum[1]),
+	SOC_WM8350_DOUBLE_R_TLV("Playback PCM Volume",
+				WM8350_DAC_DIGITAL_VOLUME_L,
+				WM8350_DAC_DIGITAL_VOLUME_R,
+				0, 255, 0, dac_pcm_tlv),
+	SOC_ENUM("Playback PCM Mute Function", wm8350_enum[2]),
+	SOC_ENUM("Playback PCM Mute Speed", wm8350_enum[3]),
+	SOC_ENUM("Playback PCM Filter", wm8350_enum[4]),
+	SOC_ENUM("Capture PCM Filter", wm8350_enum[5]),
+	SOC_ENUM("Capture PCM HP Filter", wm8350_enum[6]),
+	SOC_ENUM("Capture ADC Inversion", wm8350_enum[7]),
+	SOC_WM8350_DOUBLE_R_TLV("Capture PCM Volume",
+				WM8350_ADC_DIGITAL_VOLUME_L,
+				WM8350_ADC_DIGITAL_VOLUME_R,
+				0, 255, 0, adc_pcm_tlv),
+	SOC_DOUBLE_TLV("Capture Sidetone Volume",
+		       WM8350_ADC_DIVIDER,
+		       8, 4, 15, 1, capture_sd_tlv),
+	SOC_WM8350_DOUBLE_R_TLV("Capture Volume",
+				WM8350_LEFT_INPUT_VOLUME,
+				WM8350_RIGHT_INPUT_VOLUME,
+				2, 63, 0, pre_amp_tlv),
+	SOC_DOUBLE_R("Capture ZC Switch",
+		     WM8350_LEFT_INPUT_VOLUME,
+		     WM8350_RIGHT_INPUT_VOLUME, 13, 1, 0),
+	SOC_SINGLE_TLV("Left Input Left Sidetone Volume",
+		       WM8350_OUTPUT_LEFT_MIXER_VOLUME, 1, 7, 0, out_mix_tlv),
+	SOC_SINGLE_TLV("Left Input Right Sidetone Volume",
+		       WM8350_OUTPUT_LEFT_MIXER_VOLUME,
+		       5, 7, 0, out_mix_tlv),
+	SOC_SINGLE_TLV("Left Input Bypass Volume",
+		       WM8350_OUTPUT_LEFT_MIXER_VOLUME,
+		       9, 7, 0, out_mix_tlv),
+	SOC_SINGLE_TLV("Right Input Left Sidetone Volume",
+		       WM8350_OUTPUT_RIGHT_MIXER_VOLUME,
+		       1, 7, 0, out_mix_tlv),
+	SOC_SINGLE_TLV("Right Input Right Sidetone Volume",
+		       WM8350_OUTPUT_RIGHT_MIXER_VOLUME,
+		       5, 7, 0, out_mix_tlv),
+	SOC_SINGLE_TLV("Right Input Bypass Volume",
+		       WM8350_OUTPUT_RIGHT_MIXER_VOLUME,
+		       13, 7, 0, out_mix_tlv),
+	SOC_SINGLE("Left Input Mixer +20dB Switch",
+		   WM8350_INPUT_MIXER_VOLUME_L, 0, 1, 0),
+	SOC_SINGLE("Right Input Mixer +20dB Switch",
+		   WM8350_INPUT_MIXER_VOLUME_R, 0, 1, 0),
+	SOC_SINGLE_TLV("Out4 Capture Volume",
+		       WM8350_INPUT_MIXER_VOLUME,
+		       1, 7, 0, out_mix_tlv),
+	SOC_WM8350_DOUBLE_R_TLV("Out1 Playback Volume",
+				WM8350_LOUT1_VOLUME,
+				WM8350_ROUT1_VOLUME,
+				2, 63, 0, out_pga_tlv),
+	SOC_DOUBLE_R("Out1 Playback ZC Switch",
+		     WM8350_LOUT1_VOLUME,
+		     WM8350_ROUT1_VOLUME, 13, 1, 0),
+	SOC_WM8350_DOUBLE_R_TLV("Out2 Playback Volume",
+				WM8350_LOUT2_VOLUME,
+				WM8350_ROUT2_VOLUME,
+				2, 63, 0, out_pga_tlv),
+	SOC_DOUBLE_R("Out2 Playback ZC Switch", WM8350_LOUT2_VOLUME,
+		     WM8350_ROUT2_VOLUME, 13, 1, 0),
+	SOC_SINGLE("Out2 Right Invert Switch", WM8350_ROUT2_VOLUME, 10, 1, 0),
+	SOC_SINGLE_TLV("Out2 Beep Volume", WM8350_BEEP_VOLUME,
+		       5, 7, 0, out_mix_tlv),
+
+	SOC_DOUBLE_R("Out1 Playback Switch",
+		     WM8350_LOUT1_VOLUME,
+		     WM8350_ROUT1_VOLUME,
+		     14, 1, 1),
+	SOC_DOUBLE_R("Out2 Playback Switch",
+		     WM8350_LOUT2_VOLUME,
+		     WM8350_ROUT2_VOLUME,
+		     14, 1, 1),
+};
+
+/*
+ * DAPM Controls
+ */
+
+/* Left Playback Mixer */
+static const struct snd_kcontrol_new wm8350_left_play_mixer_controls[] = {
+	SOC_DAPM_SINGLE("Playback Switch",
+			WM8350_LEFT_MIXER_CONTROL, 11, 1, 0),
+	SOC_DAPM_SINGLE("Left Bypass Switch",
+			WM8350_LEFT_MIXER_CONTROL, 2, 1, 0),
+	SOC_DAPM_SINGLE("Right Playback Switch",
+			WM8350_LEFT_MIXER_CONTROL, 12, 1, 0),
+	SOC_DAPM_SINGLE("Left Sidetone Switch",
+			WM8350_LEFT_MIXER_CONTROL, 0, 1, 0),
+	SOC_DAPM_SINGLE("Right Sidetone Switch",
+			WM8350_LEFT_MIXER_CONTROL, 1, 1, 0),
+};
+
+/* Right Playback Mixer */
+static const struct snd_kcontrol_new wm8350_right_play_mixer_controls[] = {
+	SOC_DAPM_SINGLE("Playback Switch",
+			WM8350_RIGHT_MIXER_CONTROL, 12, 1, 0),
+	SOC_DAPM_SINGLE("Right Bypass Switch",
+			WM8350_RIGHT_MIXER_CONTROL, 3, 1, 0),
+	SOC_DAPM_SINGLE("Left Playback Switch",
+			WM8350_RIGHT_MIXER_CONTROL, 11, 1, 0),
+	SOC_DAPM_SINGLE("Left Sidetone Switch",
+			WM8350_RIGHT_MIXER_CONTROL, 0, 1, 0),
+	SOC_DAPM_SINGLE("Right Sidetone Switch",
+			WM8350_RIGHT_MIXER_CONTROL, 1, 1, 0),
+};
+
+/* Out4 Mixer */
+static const struct snd_kcontrol_new wm8350_out4_mixer_controls[] = {
+	SOC_DAPM_SINGLE("Right Playback Switch",
+			WM8350_OUT4_MIXER_CONTROL, 12, 1, 0),
+	SOC_DAPM_SINGLE("Left Playback Switch",
+			WM8350_OUT4_MIXER_CONTROL, 11, 1, 0),
+	SOC_DAPM_SINGLE("Right Capture Switch",
+			WM8350_OUT4_MIXER_CONTROL, 9, 1, 0),
+	SOC_DAPM_SINGLE("Out3 Playback Switch",
+			WM8350_OUT4_MIXER_CONTROL, 2, 1, 0),
+	SOC_DAPM_SINGLE("Right Mixer Switch",
+			WM8350_OUT4_MIXER_CONTROL, 1, 1, 0),
+	SOC_DAPM_SINGLE("Left Mixer Switch",
+			WM8350_OUT4_MIXER_CONTROL, 0, 1, 0),
+};
+
+/* Out3 Mixer */
+static const struct snd_kcontrol_new wm8350_out3_mixer_controls[] = {
+	SOC_DAPM_SINGLE("Left Playback Switch",
+			WM8350_OUT3_MIXER_CONTROL, 11, 1, 0),
+	SOC_DAPM_SINGLE("Left Capture Switch",
+			WM8350_OUT3_MIXER_CONTROL, 8, 1, 0),
+	SOC_DAPM_SINGLE("Out4 Playback Switch",
+			WM8350_OUT3_MIXER_CONTROL, 3, 1, 0),
+	SOC_DAPM_SINGLE("Left Mixer Switch",
+			WM8350_OUT3_MIXER_CONTROL, 0, 1, 0),
+};
+
+/* Left Input Mixer */
+static const struct snd_kcontrol_new wm8350_left_capt_mixer_controls[] = {
+	SOC_DAPM_SINGLE_TLV("L2 Capture Volume",
+			    WM8350_INPUT_MIXER_VOLUME_L, 1, 7, 0, out_mix_tlv),
+	SOC_DAPM_SINGLE_TLV("L3 Capture Volume",
+			    WM8350_INPUT_MIXER_VOLUME_L, 9, 7, 0, out_mix_tlv),
+	SOC_DAPM_SINGLE("PGA Capture Switch",
+			WM8350_LEFT_INPUT_VOLUME, 14, 1, 0),
+};
+
+/* Right Input Mixer */
+static const struct snd_kcontrol_new wm8350_right_capt_mixer_controls[] = {
+	SOC_DAPM_SINGLE_TLV("L2 Capture Volume",
+			    WM8350_INPUT_MIXER_VOLUME_R, 5, 7, 0, out_mix_tlv),
+	SOC_DAPM_SINGLE_TLV("L3 Capture Volume",
+			    WM8350_INPUT_MIXER_VOLUME_R, 13, 7, 0, out_mix_tlv),
+	SOC_DAPM_SINGLE("PGA Capture Switch",
+			WM8350_RIGHT_INPUT_VOLUME, 14, 1, 0),
+};
+
+/* Left Mic Mixer */
+static const struct snd_kcontrol_new wm8350_left_mic_mixer_controls[] = {
+	SOC_DAPM_SINGLE("INN Capture Switch", WM8350_INPUT_CONTROL, 1, 1, 0),
+	SOC_DAPM_SINGLE("INP Capture Switch", WM8350_INPUT_CONTROL, 0, 1, 0),
+	SOC_DAPM_SINGLE("IN2 Capture Switch", WM8350_INPUT_CONTROL, 2, 1, 0),
+};
+
+/* Right Mic Mixer */
+static const struct snd_kcontrol_new wm8350_right_mic_mixer_controls[] = {
+	SOC_DAPM_SINGLE("INN Capture Switch", WM8350_INPUT_CONTROL, 9, 1, 0),
+	SOC_DAPM_SINGLE("INP Capture Switch", WM8350_INPUT_CONTROL, 8, 1, 0),
+	SOC_DAPM_SINGLE("IN2 Capture Switch", WM8350_INPUT_CONTROL, 10, 1, 0),
+};
+
+/* Beep Switch */
+static const struct snd_kcontrol_new wm8350_beep_switch_controls =
+SOC_DAPM_SINGLE("Switch", WM8350_BEEP_VOLUME, 15, 1, 1);
+
+/* Out4 Capture Mux */
+static const struct snd_kcontrol_new wm8350_out4_capture_controls =
+SOC_DAPM_ENUM("Route", wm8350_enum[8]);
+
+static const struct snd_soc_dapm_widget wm8350_dapm_widgets[] = {
+
+	SND_SOC_DAPM_PGA("IN3R PGA", WM8350_POWER_MGMT_2, 11, 0, NULL, 0),
+	SND_SOC_DAPM_PGA("IN3L PGA", WM8350_POWER_MGMT_2, 10, 0, NULL, 0),
+	SND_SOC_DAPM_PGA_E("Right Out2 PGA", WM8350_POWER_MGMT_3, 3, 0, NULL,
+			   0, pga_event,
+			   SND_SOC_DAPM_POST_PMU | SND_SOC_DAPM_PRE_PMD),
+	SND_SOC_DAPM_PGA_E("Left Out2 PGA", WM8350_POWER_MGMT_3, 2, 0, NULL, 0,
+			   pga_event,
+			   SND_SOC_DAPM_POST_PMU | SND_SOC_DAPM_PRE_PMD),
+	SND_SOC_DAPM_PGA_E("Right Out1 PGA", WM8350_POWER_MGMT_3, 1, 0, NULL,
+			   0, pga_event,
+			   SND_SOC_DAPM_POST_PMU | SND_SOC_DAPM_PRE_PMD),
+	SND_SOC_DAPM_PGA_E("Left Out1 PGA", WM8350_POWER_MGMT_3, 0, 0, NULL, 0,
+			   pga_event,
+			   SND_SOC_DAPM_POST_PMU | SND_SOC_DAPM_PRE_PMD),
+
+	SND_SOC_DAPM_MIXER("Right Capture Mixer", WM8350_POWER_MGMT_2,
+			   7, 0, &wm8350_right_capt_mixer_controls[0],
+			   ARRAY_SIZE(wm8350_right_capt_mixer_controls)),
+
+	SND_SOC_DAPM_MIXER("Left Capture Mixer", WM8350_POWER_MGMT_2,
+			   6, 0, &wm8350_left_capt_mixer_controls[0],
+			   ARRAY_SIZE(wm8350_left_capt_mixer_controls)),
+
+	SND_SOC_DAPM_MIXER("Out4 Mixer", WM8350_POWER_MGMT_2, 5, 0,
+			   &wm8350_out4_mixer_controls[0],
+			   ARRAY_SIZE(wm8350_out4_mixer_controls)),
+
+	SND_SOC_DAPM_MIXER("Out3 Mixer", WM8350_POWER_MGMT_2, 4, 0,
+			   &wm8350_out3_mixer_controls[0],
+			   ARRAY_SIZE(wm8350_out3_mixer_controls)),
+
+	SND_SOC_DAPM_MIXER("Right Playback Mixer", WM8350_POWER_MGMT_2, 1, 0,
+			   &wm8350_right_play_mixer_controls[0],
+			   ARRAY_SIZE(wm8350_right_play_mixer_controls)),
+
+	SND_SOC_DAPM_MIXER("Left Playback Mixer", WM8350_POWER_MGMT_2, 0, 0,
+			   &wm8350_left_play_mixer_controls[0],
+			   ARRAY_SIZE(wm8350_left_play_mixer_controls)),
+
+	SND_SOC_DAPM_MIXER("Left Mic Mixer", WM8350_POWER_MGMT_2, 8, 0,
+			   &wm8350_left_mic_mixer_controls[0],
+			   ARRAY_SIZE(wm8350_left_mic_mixer_controls)),
+
+	SND_SOC_DAPM_MIXER("Right Mic Mixer", WM8350_POWER_MGMT_2, 9, 0,
+			   &wm8350_right_mic_mixer_controls[0],
+			   ARRAY_SIZE(wm8350_right_mic_mixer_controls)),
+
+	/* virtual mixer for Beep and Out2R */
+	SND_SOC_DAPM_MIXER("Out2 Mixer", SND_SOC_NOPM, 0, 0, NULL, 0),
+
+	SND_SOC_DAPM_SWITCH("Beep", WM8350_POWER_MGMT_3, 7, 0,
+			    &wm8350_beep_switch_controls),
+
+	SND_SOC_DAPM_ADC("Right ADC", "Right Capture",
+			 WM8350_POWER_MGMT_4, 3, 0),
+	SND_SOC_DAPM_ADC("Left ADC", "Left Capture",
+			 WM8350_POWER_MGMT_4, 2, 0),
+	SND_SOC_DAPM_DAC("Right DAC", "Right Playback",
+			 WM8350_POWER_MGMT_4, 5, 0),
+	SND_SOC_DAPM_DAC("Left DAC", "Left Playback",
+			 WM8350_POWER_MGMT_4, 4, 0),
+
+	SND_SOC_DAPM_MICBIAS("Mic Bias", WM8350_POWER_MGMT_1, 4, 0),
+
+	SND_SOC_DAPM_MUX("Out4 Capture Channel", SND_SOC_NOPM, 0, 0,
+			 &wm8350_out4_capture_controls),
+
+	SND_SOC_DAPM_OUTPUT("OUT1R"),
+	SND_SOC_DAPM_OUTPUT("OUT1L"),
+	SND_SOC_DAPM_OUTPUT("OUT2R"),
+	SND_SOC_DAPM_OUTPUT("OUT2L"),
+	SND_SOC_DAPM_OUTPUT("OUT3"),
+	SND_SOC_DAPM_OUTPUT("OUT4"),
+
+	SND_SOC_DAPM_INPUT("IN1RN"),
+	SND_SOC_DAPM_INPUT("IN1RP"),
+	SND_SOC_DAPM_INPUT("IN2R"),
+	SND_SOC_DAPM_INPUT("IN1LP"),
+	SND_SOC_DAPM_INPUT("IN1LN"),
+	SND_SOC_DAPM_INPUT("IN2L"),
+	SND_SOC_DAPM_INPUT("IN3R"),
+	SND_SOC_DAPM_INPUT("IN3L"),
+};
+
+static const struct snd_soc_dapm_route audio_map[] = {
+
+	/* left playback mixer */
+	{"Left Playback Mixer", "Playback Switch", "Left DAC"},
+	{"Left Playback Mixer", "Left Bypass Switch", "IN3L PGA"},
+	{"Left Playback Mixer", "Right Playback Switch", "Right DAC"},
+	{"Left Playback Mixer", "Left Sidetone Switch", "Left Mic Mixer"},
+	{"Left Playback Mixer", "Right Sidetone Switch", "Right Mic Mixer"},
+
+	/* right playback mixer */
+	{"Right Playback Mixer", "Playback Switch", "Right DAC"},
+	{"Right Playback Mixer", "Right Bypass Switch", "IN3R PGA"},
+	{"Right Playback Mixer", "Left Playback Switch", "Left DAC"},
+	{"Right Playback Mixer", "Left Sidetone Switch", "Left Mic Mixer"},
+	{"Right Playback Mixer", "Right Sidetone Switch", "Right Mic Mixer"},
+
+	/* out4 playback mixer */
+	{"Out4 Mixer", "Right Playback Switch", "Right DAC"},
+	{"Out4 Mixer", "Left Playback Switch", "Left DAC"},
+	{"Out4 Mixer", "Right Capture Switch", "Right Capture Mixer"},
+	{"Out4 Mixer", "Out3 Playback Switch", "Out3 Mixer"},
+	{"Out4 Mixer", "Right Mixer Switch", "Right Playback Mixer"},
+	{"Out4 Mixer", "Left Mixer Switch", "Left Playback Mixer"},
+	{"OUT4", NULL, "Out4 Mixer"},
+
+	/* out3 playback mixer */
+	{"Out3 Mixer", "Left Playback Switch", "Left DAC"},
+	{"Out3 Mixer", "Left Capture Switch", "Left Capture Mixer"},
+	{"Out3 Mixer", "Left Mixer Switch", "Left Playback Mixer"},
+	{"Out3 Mixer", "Out4 Playback Switch", "Out4 Mixer"},
+	{"OUT3", NULL, "Out3 Mixer"},
+
+	/* out2 */
+	{"Right Out2 PGA", NULL, "Right Playback Mixer"},
+	{"Left Out2 PGA", NULL, "Left Playback Mixer"},
+	{"OUT2L", NULL, "Left Out2 PGA"},
+	{"OUT2R", NULL, "Right Out2 PGA"},
+
+	/* out1 */
+	{"Right Out1 PGA", NULL, "Right Playback Mixer"},
+	{"Left Out1 PGA", NULL, "Left Playback Mixer"},
+	{"OUT1L", NULL, "Left Out1 PGA"},
+	{"OUT1R", NULL, "Right Out1 PGA"},
+
+	/* ADCs */
+	{"Left ADC", NULL, "Left Capture Mixer"},
+	{"Right ADC", NULL, "Right Capture Mixer"},
+
+	/* Left capture mixer */
+	{"Left Capture Mixer", "L2 Capture Volume", "IN2L"},
+	{"Left Capture Mixer", "L3 Capture Volume", "IN3L PGA"},
+	{"Left Capture Mixer", "PGA Capture Switch", "Left Mic Mixer"},
+	{"Left Capture Mixer", NULL, "Out4 Capture Channel"},
+
+	/* Right capture mixer */
+	{"Right Capture Mixer", "L2 Capture Volume", "IN2R"},
+	{"Right Capture Mixer", "L3 Capture Volume", "IN3R PGA"},
+	{"Right Capture Mixer", "PGA Capture Switch", "Right Mic Mixer"},
+	{"Right Capture Mixer", NULL, "Out4 Capture Channel"},
+
+	/* L3 Inputs */
+	{"IN3L PGA", NULL, "IN3L"},
+	{"IN3R PGA", NULL, "IN3R"},
+
+	/* Left Mic mixer */
+	{"Left Mic Mixer", "INN Capture Switch", "IN1LN"},
+	{"Left Mic Mixer", "INP Capture Switch", "IN1LP"},
+	{"Left Mic Mixer", "IN2 Capture Switch", "IN2L"},
+
+	/* Right Mic mixer */
+	{"Right Mic Mixer", "INN Capture Switch", "IN1RN"},
+	{"Right Mic Mixer", "INP Capture Switch", "IN1RP"},
+	{"Right Mic Mixer", "IN2 Capture Switch", "IN2R"},
+
+	/* out 4 capture */
+	{"Out4 Capture Channel", NULL, "Out4 Mixer"},
+
+	/* Beep */
+	{"Beep", NULL, "IN3R PGA"},
+};
+
+static int wm8350_add_controls(struct snd_soc_codec *codec)
+{
+	int err, i;
+
+	for (i = 0; i < ARRAY_SIZE(wm8350_snd_controls); i++) {
+		err = snd_ctl_add(codec->card,
+				  snd_soc_cnew(&wm8350_snd_controls[i],
+					       codec, NULL));
+		if (err < 0)
+			return err;
+	}
+
+	return 0;
+}
+
+static int wm8350_add_widgets(struct snd_soc_codec *codec)
+{
+	int ret;
+
+	ret = snd_soc_dapm_new_controls(codec,
+					wm8350_dapm_widgets,
+					ARRAY_SIZE(wm8350_dapm_widgets));
+	if (ret != 0) {
+		dev_err(codec->dev, "dapm control register failed\n");
+		return ret;
+	}
+
+	/* set up audio paths */
+	ret = snd_soc_dapm_add_routes(codec, audio_map, ARRAY_SIZE(audio_map));
+	if (ret != 0) {
+		dev_err(codec->dev, "DAPM route register failed\n");
+		return ret;
+	}
+
+	return snd_soc_dapm_new_widgets(codec);
+}
+
+static int wm8350_set_dai_sysclk(struct snd_soc_dai *codec_dai,
+				 int clk_id, unsigned int freq, int dir)
+{
+	struct snd_soc_codec *codec = codec_dai->codec;
+	struct wm8350 *wm8350 = codec->control_data;
+	u16 fll_4;
+
+	switch (clk_id) {
+	case WM8350_MCLK_SEL_MCLK:
+		wm8350_clear_bits(wm8350, WM8350_CLOCK_CONTROL_1,
+				  WM8350_MCLK_SEL);
+		break;
+	case WM8350_MCLK_SEL_PLL_MCLK:
+	case WM8350_MCLK_SEL_PLL_DAC:
+	case WM8350_MCLK_SEL_PLL_ADC:
+	case WM8350_MCLK_SEL_PLL_32K:
+		wm8350_set_bits(wm8350, WM8350_CLOCK_CONTROL_1,
+				WM8350_MCLK_SEL);
+		fll_4 = wm8350_codec_read(codec, WM8350_FLL_CONTROL_4) &
+		    ~WM8350_FLL_CLK_SRC_MASK;
+		wm8350_codec_write(codec, WM8350_FLL_CONTROL_4, fll_4 | clk_id);
+		break;
+	}
+
+	/* MCLK direction */
+	if (dir == WM8350_MCLK_DIR_OUT)
+		wm8350_set_bits(wm8350, WM8350_CLOCK_CONTROL_2,
+				WM8350_MCLK_DIR);
+	else
+		wm8350_clear_bits(wm8350, WM8350_CLOCK_CONTROL_2,
+				  WM8350_MCLK_DIR);
+
+	return 0;
+}
+
+static int wm8350_set_clkdiv(struct snd_soc_dai *codec_dai, int div_id, int div)
+{
+	struct snd_soc_codec *codec = codec_dai->codec;
+	u16 val;
+
+	switch (div_id) {
+	case WM8350_ADC_CLKDIV:
+		val = wm8350_codec_read(codec, WM8350_ADC_DIVIDER) &
+		    ~WM8350_ADC_CLKDIV_MASK;
+		wm8350_codec_write(codec, WM8350_ADC_DIVIDER, val | div);
+		break;
+	case WM8350_DAC_CLKDIV:
+		val = wm8350_codec_read(codec, WM8350_DAC_CLOCK_CONTROL) &
+		    ~WM8350_DAC_CLKDIV_MASK;
+		wm8350_codec_write(codec, WM8350_DAC_CLOCK_CONTROL, val | div);
+		break;
+	case WM8350_BCLK_CLKDIV:
+		val = wm8350_codec_read(codec, WM8350_CLOCK_CONTROL_1) &
+		    ~WM8350_BCLK_DIV_MASK;
+		wm8350_codec_write(codec, WM8350_CLOCK_CONTROL_1, val | div);
+		break;
+	case WM8350_OPCLK_CLKDIV:
+		val = wm8350_codec_read(codec, WM8350_CLOCK_CONTROL_1) &
+		    ~WM8350_OPCLK_DIV_MASK;
+		wm8350_codec_write(codec, WM8350_CLOCK_CONTROL_1, val | div);
+		break;
+	case WM8350_SYS_CLKDIV:
+		val = wm8350_codec_read(codec, WM8350_CLOCK_CONTROL_1) &
+		    ~WM8350_MCLK_DIV_MASK;
+		wm8350_codec_write(codec, WM8350_CLOCK_CONTROL_1, val | div);
+		break;
+	case WM8350_DACLR_CLKDIV:
+		val = wm8350_codec_read(codec, WM8350_DAC_LR_RATE) &
+		    ~WM8350_DACLRC_RATE_MASK;
+		wm8350_codec_write(codec, WM8350_DAC_LR_RATE, val | div);
+		break;
+	case WM8350_ADCLR_CLKDIV:
+		val = wm8350_codec_read(codec, WM8350_ADC_LR_RATE) &
+		    ~WM8350_ADCLRC_RATE_MASK;
+		wm8350_codec_write(codec, WM8350_ADC_LR_RATE, val | div);
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+static int wm8350_set_dai_fmt(struct snd_soc_dai *codec_dai, unsigned int fmt)
+{
+	struct snd_soc_codec *codec = codec_dai->codec;
+	u16 iface = wm8350_codec_read(codec, WM8350_AI_FORMATING) &
+	    ~(WM8350_AIF_BCLK_INV | WM8350_AIF_LRCLK_INV | WM8350_AIF_FMT_MASK);
+	u16 master = wm8350_codec_read(codec, WM8350_AI_DAC_CONTROL) &
+	    ~WM8350_BCLK_MSTR;
+	u16 dac_lrc = wm8350_codec_read(codec, WM8350_DAC_LR_RATE) &
+	    ~WM8350_DACLRC_ENA;
+	u16 adc_lrc = wm8350_codec_read(codec, WM8350_ADC_LR_RATE) &
+	    ~WM8350_ADCLRC_ENA;
+
+	/* set master/slave audio interface */
+	switch (fmt & SND_SOC_DAIFMT_MASTER_MASK) {
+	case SND_SOC_DAIFMT_CBM_CFM:
+		master |= WM8350_BCLK_MSTR;
+		dac_lrc |= WM8350_DACLRC_ENA;
+		adc_lrc |= WM8350_ADCLRC_ENA;
+		break;
+	case SND_SOC_DAIFMT_CBS_CFS:
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	/* interface format */
+	switch (fmt & SND_SOC_DAIFMT_FORMAT_MASK) {
+	case SND_SOC_DAIFMT_I2S:
+		iface |= 0x2 << 8;
+		break;
+	case SND_SOC_DAIFMT_RIGHT_J:
+		break;
+	case SND_SOC_DAIFMT_LEFT_J:
+		iface |= 0x1 << 8;
+		break;
+	case SND_SOC_DAIFMT_DSP_A:
+		iface |= 0x3 << 8;
+		break;
+	case SND_SOC_DAIFMT_DSP_B:
+		iface |= 0x3 << 8;	/* lg not sure which mode */
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	/* clock inversion */
+	switch (fmt & SND_SOC_DAIFMT_INV_MASK) {
+	case SND_SOC_DAIFMT_NB_NF:
+		break;
+	case SND_SOC_DAIFMT_IB_IF:
+		iface |= WM8350_AIF_LRCLK_INV | WM8350_AIF_BCLK_INV;
+		break;
+	case SND_SOC_DAIFMT_IB_NF:
+		iface |= WM8350_AIF_BCLK_INV;
+		break;
+	case SND_SOC_DAIFMT_NB_IF:
+		iface |= WM8350_AIF_LRCLK_INV;
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	wm8350_codec_write(codec, WM8350_AI_FORMATING, iface);
+	wm8350_codec_write(codec, WM8350_AI_DAC_CONTROL, master);
+	wm8350_codec_write(codec, WM8350_DAC_LR_RATE, dac_lrc);
+	wm8350_codec_write(codec, WM8350_ADC_LR_RATE, adc_lrc);
+	return 0;
+}
+
+static int wm8350_pcm_trigger(struct snd_pcm_substream *substream,
+			      int cmd, struct snd_soc_dai *codec_dai)
+{
+	struct snd_soc_codec *codec = codec_dai->codec;
+	int master = wm8350_codec_cache_read(codec, WM8350_AI_DAC_CONTROL) &
+	    WM8350_BCLK_MSTR;
+	int enabled = 0;
+
+	/* Check that the DACs or ADCs are enabled since they are
+	 * required for LRC in master mode. The DACs or ADCs need a
+	 * valid audio path i.e. pin -> ADC or DAC -> pin before
+	 * the LRC will be enabled in master mode. */
+	if (!master && cmd != SNDRV_PCM_TRIGGER_START)
+		return 0;
+
+	if (substream->stream == SNDRV_PCM_STREAM_CAPTURE) {
+		enabled = wm8350_codec_cache_read(codec, WM8350_POWER_MGMT_4) &
+		    (WM8350_ADCR_ENA | WM8350_ADCL_ENA);
+	} else {
+		enabled = wm8350_codec_cache_read(codec, WM8350_POWER_MGMT_4) &
+		    (WM8350_DACR_ENA | WM8350_DACL_ENA);
+	}
+
+	if (!enabled) {
+		dev_err(codec->dev,
+		       "%s: invalid audio path - no clocks available\n",
+		       __func__);
+		return -EINVAL;
+	}
+	return 0;
+}
+
+static int wm8350_pcm_hw_params(struct snd_pcm_substream *substream,
+				struct snd_pcm_hw_params *params,
+				struct snd_soc_dai *codec_dai)
+{
+	struct snd_soc_codec *codec = codec_dai->codec;
+	u16 iface = wm8350_codec_read(codec, WM8350_AI_FORMATING) &
+	    ~WM8350_AIF_WL_MASK;
+
+	/* bit size */
+	switch (params_format(params)) {
+	case SNDRV_PCM_FORMAT_S16_LE:
+		break;
+	case SNDRV_PCM_FORMAT_S20_3LE:
+		iface |= 0x1 << 10;
+		break;
+	case SNDRV_PCM_FORMAT_S24_LE:
+		iface |= 0x2 << 10;
+		break;
+	case SNDRV_PCM_FORMAT_S32_LE:
+		iface |= 0x3 << 10;
+		break;
+	}
+
+	wm8350_codec_write(codec, WM8350_AI_FORMATING, iface);
+	return 0;
+}
+
+static int wm8350_mute(struct snd_soc_dai *dai, int mute)
+{
+	struct snd_soc_codec *codec = dai->codec;
+	struct wm8350 *wm8350 = codec->control_data;
+
+	if (mute)
+		wm8350_set_bits(wm8350, WM8350_DAC_MUTE, WM8350_DAC_MUTE_ENA);
+	else
+		wm8350_clear_bits(wm8350, WM8350_DAC_MUTE, WM8350_DAC_MUTE_ENA);
+	return 0;
+}
+
+/* FLL divisors */
+struct _fll_div {
+	int div;		/* FLL_OUTDIV */
+	int n;
+	int k;
+	int ratio;		/* FLL_FRATIO */
+};
+
+/* The size in bits of the fll divide multiplied by 10
+ * to allow rounding later */
+#define FIXED_FLL_SIZE ((1 << 16) * 10)
+
+static inline int fll_factors(struct _fll_div *fll_div, unsigned int input,
+			      unsigned int output)
+{
+	u64 Kpart;
+	unsigned int t1, t2, K, Nmod;
+
+	if (output >= 2815250 && output <= 3125000)
+		fll_div->div = 0x4;
+	else if (output >= 5625000 && output <= 6250000)
+		fll_div->div = 0x3;
+	else if (output >= 11250000 && output <= 12500000)
+		fll_div->div = 0x2;
+	else if (output >= 22500000 && output <= 25000000)
+		fll_div->div = 0x1;
+	else {
+		printk(KERN_ERR "wm8350: fll freq %d out of range\n", output);
+		return -EINVAL;
+	}
+
+	if (input > 48000)
+		fll_div->ratio = 1;
+	else
+		fll_div->ratio = 8;
+
+	t1 = output * (1 << (fll_div->div + 1));
+	t2 = input * fll_div->ratio;
+
+	fll_div->n = t1 / t2;
+	Nmod = t1 % t2;
+
+	if (Nmod) {
+		Kpart = FIXED_FLL_SIZE * (long long)Nmod;
+		do_div(Kpart, t2);
+		K = Kpart & 0xFFFFFFFF;
+
+		/* Check if we need to round */
+		if ((K % 10) >= 5)
+			K += 5;
+
+		/* Move down to proper range now rounding is done */
+		K /= 10;
+		fll_div->k = K;
+	} else
+		fll_div->k = 0;
+
+	return 0;
+}
+
+static int wm8350_set_fll(struct snd_soc_dai *codec_dai,
+			  int pll_id, unsigned int freq_in,
+			  unsigned int freq_out)
+{
+	struct snd_soc_codec *codec = codec_dai->codec;
+	struct wm8350 *wm8350 = codec->control_data;
+	struct _fll_div fll_div;
+	int ret = 0;
+	u16 fll_1, fll_4;
+
+	/* power down FLL - we need to do this for reconfiguration */
+	wm8350_clear_bits(wm8350, WM8350_POWER_MGMT_4,
+			  WM8350_FLL_ENA | WM8350_FLL_OSC_ENA);
+
+	if (freq_out == 0 || freq_in == 0)
+		return ret;
+
+	ret = fll_factors(&fll_div, freq_in, freq_out);
+	if (ret < 0)
+		return ret;
+	dev_dbg(wm8350->dev,
+		"FLL in %d FLL out %d N 0x%x K 0x%x div %d ratio %d",
+		freq_in, freq_out, fll_div.n, fll_div.k, fll_div.div,
+		fll_div.ratio);
+
+	/* set up N.K & dividers */
+	fll_1 = wm8350_codec_read(codec, WM8350_FLL_CONTROL_1) &
+	    ~(WM8350_FLL_OUTDIV_MASK | WM8350_FLL_RSP_RATE_MASK | 0xc000);
+	wm8350_codec_write(codec, WM8350_FLL_CONTROL_1,
+			   fll_1 | (fll_div.div << 8) | 0x50);
+	wm8350_codec_write(codec, WM8350_FLL_CONTROL_2,
+			   (fll_div.ratio << 11) | (fll_div.
+						    n & WM8350_FLL_N_MASK));
+	wm8350_codec_write(codec, WM8350_FLL_CONTROL_3, fll_div.k);
+	fll_4 = wm8350_codec_read(codec, WM8350_FLL_CONTROL_4) &
+	    ~(WM8350_FLL_FRAC | WM8350_FLL_SLOW_LOCK_REF);
+	wm8350_codec_write(codec, WM8350_FLL_CONTROL_4,
+			   fll_4 | (fll_div.k ? WM8350_FLL_FRAC : 0) |
+			   (fll_div.ratio == 8 ? WM8350_FLL_SLOW_LOCK_REF : 0));
+
+	/* power FLL on */
+	wm8350_set_bits(wm8350, WM8350_POWER_MGMT_4, WM8350_FLL_OSC_ENA);
+	wm8350_set_bits(wm8350, WM8350_POWER_MGMT_4, WM8350_FLL_ENA);
+
+	return 0;
+}
+
+static int wm8350_set_bias_level(struct snd_soc_codec *codec,
+				 enum snd_soc_bias_level level)
+{
+	struct wm8350 *wm8350 = codec->control_data;
+	struct wm8350_data *priv = codec->private_data;
+	struct wm8350_audio_platform_data *platform =
+		wm8350->codec.platform_data;
+	u16 pm1;
+	int ret;
+
+	switch (level) {
+	case SND_SOC_BIAS_ON:
+		pm1 = wm8350_reg_read(wm8350, WM8350_POWER_MGMT_1) &
+		    ~(WM8350_VMID_MASK | WM8350_CODEC_ISEL_MASK);
+		wm8350_reg_write(wm8350, WM8350_POWER_MGMT_1,
+				 pm1 | WM8350_VMID_50K |
+				 platform->codec_current_on << 14);
+		break;
+
+	case SND_SOC_BIAS_PREPARE:
+		pm1 = wm8350_reg_read(wm8350, WM8350_POWER_MGMT_1);
+		pm1 &= ~WM8350_VMID_MASK;
+		wm8350_reg_write(wm8350, WM8350_POWER_MGMT_1,
+				 pm1 | WM8350_VMID_50K);
+		break;
+
+	case SND_SOC_BIAS_STANDBY:
+		if (codec->bias_level == SND_SOC_BIAS_OFF) {
+			ret = regulator_bulk_enable(ARRAY_SIZE(priv->supplies),
+						    priv->supplies);
+			if (ret != 0)
+				return ret;
+
+			/* Enable the system clock */
+			wm8350_set_bits(wm8350, WM8350_POWER_MGMT_4,
+					WM8350_SYSCLK_ENA);
+
+			/* mute DAC & outputs */
+			wm8350_set_bits(wm8350, WM8350_DAC_MUTE,
+					WM8350_DAC_MUTE_ENA);
+
+			/* discharge cap memory */
+			wm8350_reg_write(wm8350, WM8350_ANTI_POP_CONTROL,
+					 platform->dis_out1 |
+					 (platform->dis_out2 << 2) |
+					 (platform->dis_out3 << 4) |
+					 (platform->dis_out4 << 6));
+
+			/* wait for discharge */
+			schedule_timeout_interruptible(msecs_to_jiffies
+						       (platform->
+							cap_discharge_msecs));
+
+			/* enable antipop */
+			wm8350_reg_write(wm8350, WM8350_ANTI_POP_CONTROL,
+					 (platform->vmid_s_curve << 8));
+
+			/* ramp up vmid */
+			wm8350_reg_write(wm8350, WM8350_POWER_MGMT_1,
+					 (platform->
+					  codec_current_charge << 14) |
+					 WM8350_VMID_5K | WM8350_VMIDEN |
+					 WM8350_VBUFEN);
+
+			/* wait for vmid */
+			schedule_timeout_interruptible(msecs_to_jiffies
+						       (platform->
+							vmid_charge_msecs));
+
+			/* turn on vmid 300k  */
+			pm1 = wm8350_reg_read(wm8350, WM8350_POWER_MGMT_1) &
+			    ~(WM8350_VMID_MASK | WM8350_CODEC_ISEL_MASK);
+			pm1 |= WM8350_VMID_300K |
+				(platform->codec_current_standby << 14);
+			wm8350_reg_write(wm8350, WM8350_POWER_MGMT_1,
+					 pm1);
+
+
+			/* enable analogue bias */
+			pm1 |= WM8350_BIASEN;
+			wm8350_reg_write(wm8350, WM8350_POWER_MGMT_1, pm1);
+
+			/* disable antipop */
+			wm8350_reg_write(wm8350, WM8350_ANTI_POP_CONTROL, 0);
+
+		} else {
+			/* turn on vmid 300k and reduce current */
+			pm1 = wm8350_reg_read(wm8350, WM8350_POWER_MGMT_1) &
+			    ~(WM8350_VMID_MASK | WM8350_CODEC_ISEL_MASK);
+			wm8350_reg_write(wm8350, WM8350_POWER_MGMT_1,
+					 pm1 | WM8350_VMID_300K |
+					 (platform->
+					  codec_current_standby << 14));
+
+		}
+		break;
+
+	case SND_SOC_BIAS_OFF:
+
+		/* mute DAC & enable outputs */
+		wm8350_set_bits(wm8350, WM8350_DAC_MUTE, WM8350_DAC_MUTE_ENA);
+
+		wm8350_set_bits(wm8350, WM8350_POWER_MGMT_3,
+				WM8350_OUT1L_ENA | WM8350_OUT1R_ENA |
+				WM8350_OUT2L_ENA | WM8350_OUT2R_ENA);
+
+		/* enable anti pop S curve */
+		wm8350_reg_write(wm8350, WM8350_ANTI_POP_CONTROL,
+				 (platform->vmid_s_curve << 8));
+
+		/* turn off vmid  */
+		pm1 = wm8350_reg_read(wm8350, WM8350_POWER_MGMT_1) &
+		    ~WM8350_VMIDEN;
+		wm8350_reg_write(wm8350, WM8350_POWER_MGMT_1, pm1);
+
+		/* wait */
+		schedule_timeout_interruptible(msecs_to_jiffies
+					       (platform->
+						vmid_discharge_msecs));
+
+		wm8350_reg_write(wm8350, WM8350_ANTI_POP_CONTROL,
+				 (platform->vmid_s_curve << 8) |
+				 platform->dis_out1 |
+				 (platform->dis_out2 << 2) |
+				 (platform->dis_out3 << 4) |
+				 (platform->dis_out4 << 6));
+
+		/* turn off VBuf and drain */
+		pm1 = wm8350_reg_read(wm8350, WM8350_POWER_MGMT_1) &
+		    ~(WM8350_VBUFEN | WM8350_VMID_MASK);
+		wm8350_reg_write(wm8350, WM8350_POWER_MGMT_1,
+				 pm1 | WM8350_OUTPUT_DRAIN_EN);
+
+		/* wait */
+		schedule_timeout_interruptible(msecs_to_jiffies
+					       (platform->drain_msecs));
+
+		pm1 &= ~WM8350_BIASEN;
+		wm8350_reg_write(wm8350, WM8350_POWER_MGMT_1, pm1);
+
+		/* disable anti-pop */
+		wm8350_reg_write(wm8350, WM8350_ANTI_POP_CONTROL, 0);
+
+		wm8350_clear_bits(wm8350, WM8350_LOUT1_VOLUME,
+				  WM8350_OUT1L_ENA);
+		wm8350_clear_bits(wm8350, WM8350_ROUT1_VOLUME,
+				  WM8350_OUT1R_ENA);
+		wm8350_clear_bits(wm8350, WM8350_LOUT2_VOLUME,
+				  WM8350_OUT2L_ENA);
+		wm8350_clear_bits(wm8350, WM8350_ROUT2_VOLUME,
+				  WM8350_OUT2R_ENA);
+
+		/* disable clock gen */
+		wm8350_clear_bits(wm8350, WM8350_POWER_MGMT_4,
+				  WM8350_SYSCLK_ENA);
+
+		regulator_bulk_disable(ARRAY_SIZE(priv->supplies),
+				       priv->supplies);
+		break;
+	}
+	codec->bias_level = level;
+	return 0;
+}
+
+static int wm8350_suspend(struct platform_device *pdev, pm_message_t state)
+{
+	struct snd_soc_device *socdev = platform_get_drvdata(pdev);
+	struct snd_soc_codec *codec = socdev->codec;
+
+	wm8350_set_bias_level(codec, SND_SOC_BIAS_OFF);
+	return 0;
+}
+
+static int wm8350_resume(struct platform_device *pdev)
+{
+	struct snd_soc_device *socdev = platform_get_drvdata(pdev);
+	struct snd_soc_codec *codec = socdev->codec;
+
+	wm8350_set_bias_level(codec, SND_SOC_BIAS_STANDBY);
+
+	if (codec->suspend_bias_level == SND_SOC_BIAS_ON)
+		wm8350_set_bias_level(codec, SND_SOC_BIAS_ON);
+
+	return 0;
+}
+
+static struct snd_soc_codec *wm8350_codec;
+
+static int wm8350_probe(struct platform_device *pdev)
+{
+	struct snd_soc_device *socdev = platform_get_drvdata(pdev);
+	struct snd_soc_codec *codec;
+	struct wm8350 *wm8350;
+	struct wm8350_data *priv;
+	int ret;
+	struct wm8350_output *out1;
+	struct wm8350_output *out2;
+
+	BUG_ON(!wm8350_codec);
+
+	socdev->codec = wm8350_codec;
+	codec = socdev->codec;
+	wm8350 = codec->control_data;
+	priv = codec->private_data;
+
+	/* Enable the codec */
+	wm8350_set_bits(wm8350, WM8350_POWER_MGMT_5, WM8350_CODEC_ENA);
+
+	/* Enable robust clocking mode in ADC */
+	wm8350_codec_write(codec, WM8350_SECURITY, 0xa7);
+	wm8350_codec_write(codec, 0xde, 0x13);
+	wm8350_codec_write(codec, WM8350_SECURITY, 0);
+
+	/* read OUT1 & OUT2 volumes */
+	out1 = &priv->out1;
+	out2 = &priv->out2;
+	out1->left_vol = (wm8350_reg_read(wm8350, WM8350_LOUT1_VOLUME) &
+			  WM8350_OUT1L_VOL_MASK) >> WM8350_OUT1L_VOL_SHIFT;
+	out1->right_vol = (wm8350_reg_read(wm8350, WM8350_ROUT1_VOLUME) &
+			   WM8350_OUT1R_VOL_MASK) >> WM8350_OUT1R_VOL_SHIFT;
+	out2->left_vol = (wm8350_reg_read(wm8350, WM8350_LOUT2_VOLUME) &
+			  WM8350_OUT2L_VOL_MASK) >> WM8350_OUT1L_VOL_SHIFT;
+	out2->right_vol = (wm8350_reg_read(wm8350, WM8350_ROUT2_VOLUME) &
+			   WM8350_OUT2R_VOL_MASK) >> WM8350_OUT1R_VOL_SHIFT;
+	wm8350_reg_write(wm8350, WM8350_LOUT1_VOLUME, 0);
+	wm8350_reg_write(wm8350, WM8350_ROUT1_VOLUME, 0);
+	wm8350_reg_write(wm8350, WM8350_LOUT2_VOLUME, 0);
+	wm8350_reg_write(wm8350, WM8350_ROUT2_VOLUME, 0);
+
+	/* Latch VU bits & mute */
+	wm8350_set_bits(wm8350, WM8350_LOUT1_VOLUME,
+			WM8350_OUT1_VU | WM8350_OUT1L_MUTE);
+	wm8350_set_bits(wm8350, WM8350_LOUT2_VOLUME,
+			WM8350_OUT2_VU | WM8350_OUT2L_MUTE);
+	wm8350_set_bits(wm8350, WM8350_ROUT1_VOLUME,
+			WM8350_OUT1_VU | WM8350_OUT1R_MUTE);
+	wm8350_set_bits(wm8350, WM8350_ROUT2_VOLUME,
+			WM8350_OUT2_VU | WM8350_OUT2R_MUTE);
+
+	ret = snd_soc_new_pcms(socdev, SNDRV_DEFAULT_IDX1, SNDRV_DEFAULT_STR1);
+	if (ret < 0) {
+		dev_err(&pdev->dev, "failed to create pcms\n");
+		return ret;
+	}
+
+	wm8350_add_controls(codec);
+	wm8350_add_widgets(codec);
+
+	wm8350_set_bias_level(codec, SND_SOC_BIAS_STANDBY);
+
+	ret = snd_soc_init_card(socdev);
+	if (ret < 0) {
+		dev_err(&pdev->dev, "failed to register card\n");
+		goto card_err;
+	}
+
+	return 0;
+
+card_err:
+	snd_soc_free_pcms(socdev);
+	snd_soc_dapm_free(socdev);
+	return ret;
+}
+
+static int wm8350_remove(struct platform_device *pdev)
+{
+	struct snd_soc_device *socdev = platform_get_drvdata(pdev);
+	struct snd_soc_codec *codec = socdev->codec;
+	struct wm8350 *wm8350 = codec->control_data;
+	int ret;
+
+	/* cancel any work waiting to be queued. */
+	ret = cancel_delayed_work(&codec->delayed_work);
+
+	/* if there was any work waiting then we run it now and
+	 * wait for its completion */
+	if (ret) {
+		schedule_delayed_work(&codec->delayed_work, 0);
+		flush_scheduled_work();
+	}
+
+	wm8350_set_bias_level(codec, SND_SOC_BIAS_OFF);
+
+	wm8350_clear_bits(wm8350, WM8350_POWER_MGMT_5, WM8350_CODEC_ENA);
+
+	return 0;
+}
+
+#define WM8350_RATES (SNDRV_PCM_RATE_8000_96000)
+
+#define WM8350_FORMATS (SNDRV_PCM_FMTBIT_S16_LE |\
+			SNDRV_PCM_FMTBIT_S20_3LE |\
+			SNDRV_PCM_FMTBIT_S24_LE)
+
+struct snd_soc_dai wm8350_dai = {
+	.name = "WM8350",
+	.playback = {
+		.stream_name = "Playback",
+		.channels_min = 1,
+		.channels_max = 2,
+		.rates = WM8350_RATES,
+		.formats = WM8350_FORMATS,
+	},
+	.capture = {
+		 .stream_name = "Capture",
+		 .channels_min = 1,
+		 .channels_max = 2,
+		 .rates = WM8350_RATES,
+		 .formats = WM8350_FORMATS,
+	 },
+	.ops = {
+		 .hw_params = wm8350_pcm_hw_params,
+		 .digital_mute = wm8350_mute,
+		 .trigger = wm8350_pcm_trigger,
+		 .set_fmt = wm8350_set_dai_fmt,
+		 .set_sysclk = wm8350_set_dai_sysclk,
+		 .set_pll = wm8350_set_fll,
+		 .set_clkdiv = wm8350_set_clkdiv,
+	 },
+};
+EXPORT_SYMBOL_GPL(wm8350_dai);
+
+struct snd_soc_codec_device soc_codec_dev_wm8350 = {
+	.probe = 	wm8350_probe,
+	.remove = 	wm8350_remove,
+	.suspend = 	wm8350_suspend,
+	.resume =	wm8350_resume,
+};
+EXPORT_SYMBOL_GPL(soc_codec_dev_wm8350);
+
+static int wm8350_codec_probe(struct platform_device *pdev)
+{
+	struct wm8350 *wm8350 = platform_get_drvdata(pdev);
+	struct wm8350_data *priv;
+	struct snd_soc_codec *codec;
+	int ret, i;
+
+	if (wm8350->codec.platform_data == NULL) {
+		dev_err(&pdev->dev, "No audio platform data supplied\n");
+		return -EINVAL;
+	}
+
+	priv = kzalloc(sizeof(struct wm8350_data), GFP_KERNEL);
+	if (priv == NULL)
+		return -ENOMEM;
+
+	for (i = 0; i < ARRAY_SIZE(supply_names); i++)
+		priv->supplies[i].supply = supply_names[i];
+
+	ret = regulator_bulk_get(wm8350->dev, ARRAY_SIZE(priv->supplies),
+				 priv->supplies);
+	if (ret != 0)
+		goto err_priv;
+
+	codec = &priv->codec;
+	wm8350->codec.codec = codec;
+
+	wm8350_dai.dev = &pdev->dev;
+
+	mutex_init(&codec->mutex);
+	INIT_LIST_HEAD(&codec->dapm_widgets);
+	INIT_LIST_HEAD(&codec->dapm_paths);
+	codec->dev = &pdev->dev;
+	codec->name = "WM8350";
+	codec->owner = THIS_MODULE;
+	codec->read = wm8350_codec_read;
+	codec->write = wm8350_codec_write;
+	codec->bias_level = SND_SOC_BIAS_OFF;
+	codec->set_bias_level = wm8350_set_bias_level;
+	codec->dai = &wm8350_dai;
+	codec->num_dai = 1;
+	codec->reg_cache_size = WM8350_MAX_REGISTER;
+	codec->private_data = priv;
+	codec->control_data = wm8350;
+
+	/* Put the codec into reset if it wasn't already */
+	wm8350_clear_bits(wm8350, WM8350_POWER_MGMT_5, WM8350_CODEC_ENA);
+
+	INIT_DELAYED_WORK(&codec->delayed_work, wm8350_pga_work);
+	ret = snd_soc_register_codec(codec);
+	if (ret != 0)
+		goto err_supply;
+
+	wm8350_codec = codec;
+
+	ret = snd_soc_register_dai(&wm8350_dai);
+	if (ret != 0)
+		goto err_codec;
+	return 0;
+
+err_codec:
+	snd_soc_unregister_codec(codec);
+err_supply:
+	regulator_bulk_free(ARRAY_SIZE(priv->supplies), priv->supplies);
+err_priv:
+	kfree(priv);
+	wm8350_codec = NULL;
+	return ret;
+}
+
+static int wm8350_codec_remove(struct platform_device *pdev)
+{
+	struct wm8350 *wm8350 = platform_get_drvdata(pdev);
+	struct snd_soc_codec *codec = wm8350->codec.codec;
+	struct wm8350_data *priv = codec->private_data;
+
+	snd_soc_unregister_dai(&wm8350_dai);
+	snd_soc_unregister_codec(codec);
+	regulator_bulk_free(ARRAY_SIZE(priv->supplies), priv->supplies);
+	kfree(priv);
+	wm8350_codec = NULL;
+	return 0;
+}
+
+static struct platform_driver wm8350_codec_driver = {
+	.driver = {
+		   .name = "wm8350-codec",
+		   .owner = THIS_MODULE,
+		   },
+	.probe = wm8350_codec_probe,
+	.remove = __devexit_p(wm8350_codec_remove),
+};
+
+static __init int wm8350_init(void)
+{
+	return platform_driver_register(&wm8350_codec_driver);
+}
+module_init(wm8350_init);
+
+static __exit void wm8350_exit(void)
+{
+	platform_driver_unregister(&wm8350_codec_driver);
+}
+module_exit(wm8350_exit);
+
+MODULE_DESCRIPTION("ASoC WM8350 driver");
+MODULE_AUTHOR("Liam Girdwood");
+MODULE_LICENSE("GPL");
+MODULE_ALIAS("platform:wm8350-codec");
diff --git a/sound/soc/codecs/wm8350.h b/sound/soc/codecs/wm8350.h
new file mode 100644
index 000000000000..cc2887aa6c38
--- /dev/null
+++ b/sound/soc/codecs/wm8350.h
@@ -0,0 +1,20 @@
+/*
+ * wm8350.h - WM8903 audio codec interface
+ *
+ * Copyright 2008 Wolfson Microelectronics PLC.
+ *
+ *  This program is free software; you can redistribute  it and/or modify it
+ *  under  the terms of  the GNU General  Public License as published by the
+ *  Free Software Foundation;  either version 2 of the  License, or (at your
+ *  option) any later version.
+ */
+
+#ifndef _WM8350_H
+#define _WM8350_H
+
+#include <sound/soc.h>
+
+extern struct snd_soc_dai wm8350_dai;
+extern struct snd_soc_codec_device soc_codec_dev_wm8350;
+
+#endif
-- 
cgit v1.2.3


From 64db4cfff99c04cd5f550357edcc8780f96b54a2 Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com>
Date: Thu, 18 Dec 2008 21:55:32 +0100
Subject: "Tree RCU": scalable classic RCU implementation

This patch fixes a long-standing performance bug in classic RCU that
results in massive internal-to-RCU lock contention on systems with
more than a few hundred CPUs.  Although this patch creates a separate
flavor of RCU for ease of review and patch maintenance, it is intended
to replace classic RCU.

This patch still handles stress better than does mainline, so I am still
calling it ready for inclusion.  This patch is against the -tip tree.
Nevertheless, experience on an actual 1000+ CPU machine would still be
most welcome.

Most of the changes noted below were found while creating an rcutiny
(which should permit ejecting the current rcuclassic) and while doing
detailed line-by-line documentation.

Updates from v9 (http://lkml.org/lkml/2008/12/2/334):

o	Fixes from remainder of line-by-line code walkthrough,
	including comment spelling, initialization, undesirable
	narrowing due to type conversion, removing redundant memory
	barriers, removing redundant local-variable initialization,
	and removing redundant local variables.

	I do not believe that any of these fixes address the CPU-hotplug
	issues that Andi Kleen was seeing, but please do give it a whirl
	in case the machine is smarter than I am.

	A writeup from the walkthrough may be found at the following
	URL, in case you are suffering from terminal insomnia or
	masochism:

	http://www.kernel.org/pub/linux/kernel/people/paulmck/tmp/rcutree-walkthrough.2008.12.16a.pdf

o	Made rcutree tracing use seq_file, as suggested some time
	ago by Lai Jiangshan.

o	Added a .csv variant of the rcudata debugfs trace file, to allow
	people having thousands of CPUs to drop the data into
	a spreadsheet.	Tested with oocalc and gnumeric.  Updated
	documentation to suit.

Updates from v8 (http://lkml.org/lkml/2008/11/15/139):

o	Fix a theoretical race between grace-period initialization and
	force_quiescent_state() that could occur if more than three
	jiffies were required to carry out the grace-period
	initialization.  Which it might, if you had enough CPUs.

o	Apply Ingo's printk-standardization patch.

o	Substitute local variables for repeated accesses to global
	variables.

o	Fix comment misspellings and redundant (but harmless) increments
	of ->n_rcu_pending (this latter after having explicitly added it).

o	Apply checkpatch fixes.

Updates from v7 (http://lkml.org/lkml/2008/10/10/291):

o	Fixed a number of problems noted by Gautham Shenoy, including
	the cpu-stall-detection bug that he was having difficulty
	convincing me was real.  ;-)

o	Changed cpu-stall detection to wait for ten seconds rather than
	three in order to reduce false positive, as suggested by Ingo
	Molnar.

o	Produced a design document (http://lwn.net/Articles/305782/).
	The act of writing this document uncovered a number of both
	theoretical and "here and now" bugs as noted below.

o	Fix dynticks_nesting accounting confusion, simplify WARN_ON()
	condition, fix kerneldoc comments, and add memory barriers
	in dynticks interface functions.

o	Add more data to tracing.

o	Remove unused "rcu_barrier" field from rcu_data structure.

o	Count calls to rcu_pending() from scheduling-clock interrupt
	to use as a surrogate timebase should jiffies stop counting.

o	Fix a theoretical race between force_quiescent_state() and
	grace-period initialization.  Yes, initialization does have to
	go on for some jiffies for this race to occur, but given enough
	CPUs...

Updates from v6 (http://lkml.org/lkml/2008/9/23/448):

o	Fix a number of checkpatch.pl complaints.

o	Apply review comments from Ingo Molnar and Lai Jiangshan
	on the stall-detection code.

o	Fix several bugs in !CONFIG_SMP builds.

o	Fix a misspelled config-parameter name so that RCU now announces
	at boot time if stall detection is configured.

o	Run tests on numerous combinations of configurations parameters,
	which after the fixes above, now build and run correctly.

Updates from v5 (http://lkml.org/lkml/2008/9/15/92, bad subject line):

o	Fix a compiler error in the !CONFIG_FANOUT_EXACT case (blew a
	changeset some time ago, and finally got around to retesting
	this option).

o	Fix some tracing bugs in rcupreempt that caused incorrect
	totals to be printed.

o	I now test with a more brutal random-selection online/offline
	script (attached).  Probably more brutal than it needs to be
	on the people reading it as well, but so it goes.

o	A number of optimizations and usability improvements:

	o	Make rcu_pending() ignore the grace-period timeout when
		there is no grace period in progress.

	o	Make force_quiescent_state() avoid going for a global
		lock in the case where there is no grace period in
		progress.

	o	Rearrange struct fields to improve struct layout.

	o	Make call_rcu() initiate a grace period if RCU was
		idle, rather than waiting for the next scheduling
		clock interrupt.

	o	Invoke rcu_irq_enter() and rcu_irq_exit() only when
		idle, as suggested by Andi Kleen.  I still don't
		completely trust this change, and might back it out.

	o	Make CONFIG_RCU_TRACE be the single config variable
		manipulated for all forms of RCU, instead of the prior
		confusion.

	o	Document tracing files and formats for both rcupreempt
		and rcutree.

Updates from v4 for those missing v5 given its bad subject line:

o	Separated dynticks interface so that NMIs and irqs call separate
	functions, greatly simplifying it.  In particular, this code
	no longer requires a proof of correctness.  ;-)

o	Separated dynticks state out into its own per-CPU structure,
	avoiding the duplicated accounting.

o	The case where a dynticks-idle CPU runs an irq handler that
	invokes call_rcu() is now correctly handled, forcing that CPU
	out of dynticks-idle mode.

o	Review comments have been applied (thank you all!!!).
	For but one example, fixed the dynticks-ordering issue that
	Manfred pointed out, saving me much debugging.  ;-)

o	Adjusted rcuclassic and rcupreempt to handle dynticks changes.

Attached is an updated patch to Classic RCU that applies a hierarchy,
greatly reducing the contention on the top-level lock for large machines.
This passes 10-hour concurrent rcutorture and online-offline testing on
128-CPU ppc64 without dynticks enabled, and exposes some timekeeping
bugs in presence of dynticks (exciting working on a system where
"sleep 1" hangs until interrupted...), which were fixed in the
2.6.27 kernel.  It is getting more reliable than mainline by some
measures, so the next version will be against -tip for inclusion.
See also Manfred Spraul's recent patches (or his earlier work from
2004 at http://marc.info/?l=linux-kernel&m=108546384711797&w=2).
We will converge onto a common patch in the fullness of time, but are
currently exploring different regions of the design space.  That said,
I have already gratefully stolen quite a few of Manfred's ideas.

This patch provides CONFIG_RCU_FANOUT, which controls the bushiness
of the RCU hierarchy.  Defaults to 32 on 32-bit machines and 64 on
64-bit machines.  If CONFIG_NR_CPUS is less than CONFIG_RCU_FANOUT,
there is no hierarchy.  By default, the RCU initialization code will
adjust CONFIG_RCU_FANOUT to balance the hierarchy, so strongly NUMA
architectures may choose to set CONFIG_RCU_FANOUT_EXACT to disable
this balancing, allowing the hierarchy to be exactly aligned to the
underlying hardware.  Up to two levels of hierarchy are permitted
(in addition to the root node), allowing up to 16,384 CPUs on 32-bit
systems and up to 262,144 CPUs on 64-bit systems.  I just know that I
am going to regret saying this, but this seems more than sufficient
for the foreseeable future.  (Some architectures might wish to set
CONFIG_RCU_FANOUT=4, which would limit such architectures to 64 CPUs.
If this becomes a real problem, additional levels can be added, but I
doubt that it will make a significant difference on real hardware.)

In the common case, a given CPU will manipulate its private rcu_data
structure and the rcu_node structure that it shares with its immediate
neighbors.  This can reduce both lock and memory contention by multiple
orders of magnitude, which should eliminate the need for the strange
manipulations that are reported to be required when running Linux on
very large systems.

Some shortcomings:

o	More bugs will probably surface as a result of an ongoing
	line-by-line code inspection.

	Patches will be provided as required.

o	There are probably hangs, rcutorture failures, &c.  Seems
	quite stable on a 128-CPU machine, but that is kind of small
	compared to 4096 CPUs.  However, seems to do better than
	mainline.

	Patches will be provided as required.

o	The memory footprint of this version is several KB larger
	than rcuclassic.

	A separate UP-only rcutiny patch will be provided, which will
	reduce the memory footprint significantly, even compared
	to the old rcuclassic.  One such patch passes light testing,
	and has a memory footprint smaller even than rcuclassic.
	Initial reaction from various embedded guys was "it is not
	worth it", so am putting it aside.

Credits:

o	Manfred Spraul for ideas, review comments, and bugs spotted,
	as well as some good friendly competition.  ;-)

o	Josh Triplett, Ingo Molnar, Peter Zijlstra, Mathieu Desnoyers,
	Lai Jiangshan, Andi Kleen, Andy Whitcroft, and Andrew Morton
	for reviews and comments.

o	Thomas Gleixner for much-needed help with some timer issues
	(see patches below).

o	Jon M. Tollefson, Tim Pepper, Andrew Theurer, Jose R. Santos,
	Andy Whitcroft, Darrick Wong, Nishanth Aravamudan, Anton
	Blanchard, Dave Kleikamp, and Nathan Lynch for keeping machines
	alive despite my heavy abuse^Wtesting.

Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 Documentation/RCU/00-INDEX             |    2 +
 Documentation/RCU/trace.txt            |  413 +++++++++
 arch/powerpc/platforms/pseries/rtasd.c |    4 +
 include/linux/hardirq.h                |   14 +-
 include/linux/rcupdate.h               |   10 +-
 include/linux/rcutree.h                |  329 +++++++
 init/Kconfig                           |   18 +-
 kernel/Kconfig.preempt                 |   62 +-
 kernel/Makefile                        |    6 +-
 kernel/rcupreempt.c                    |   10 +
 kernel/rcupreempt_trace.c              |   10 +-
 kernel/rcutree.c                       | 1535 ++++++++++++++++++++++++++++++++
 kernel/rcutree_trace.c                 |  271 ++++++
 kernel/softirq.c                       |    5 +-
 lib/Kconfig.debug                      |   13 +
 15 files changed, 2671 insertions(+), 31 deletions(-)
 create mode 100644 Documentation/RCU/trace.txt
 create mode 100644 include/linux/rcutree.h
 create mode 100644 kernel/rcutree.c
 create mode 100644 kernel/rcutree_trace.c

(limited to 'include')

diff --git a/Documentation/RCU/00-INDEX b/Documentation/RCU/00-INDEX
index 461481dfb7c3..7dc0695a8f90 100644
--- a/Documentation/RCU/00-INDEX
+++ b/Documentation/RCU/00-INDEX
@@ -16,6 +16,8 @@ RTFP.txt
 	- List of RCU papers (bibliography) going back to 1980.
 torture.txt
 	- RCU Torture Test Operation (CONFIG_RCU_TORTURE_TEST)
+trace.txt
+	- CONFIG_RCU_TRACE debugfs files and formats
 UP.txt
 	- RCU on Uniprocessor Systems
 whatisRCU.txt
diff --git a/Documentation/RCU/trace.txt b/Documentation/RCU/trace.txt
new file mode 100644
index 000000000000..068848240a8b
--- /dev/null
+++ b/Documentation/RCU/trace.txt
@@ -0,0 +1,413 @@
+CONFIG_RCU_TRACE debugfs Files and Formats
+
+
+The rcupreempt and rcutree implementations of RCU provide debugfs trace
+output that summarizes counters and state.  This information is useful for
+debugging RCU itself, and can sometimes also help to debug abuses of RCU.
+Note that the rcuclassic implementation of RCU does not provide debugfs
+trace output.
+
+The following sections describe the debugfs files and formats for
+preemptable RCU (rcupreempt) and hierarchical RCU (rcutree).
+
+
+Preemptable RCU debugfs Files and Formats
+
+This implementation of RCU provides three debugfs files under the
+top-level directory RCU: rcu/rcuctrs (which displays the per-CPU
+counters used by preemptable RCU) rcu/rcugp (which displays grace-period
+counters), and rcu/rcustats (which internal counters for debugging RCU).
+
+The output of "cat rcu/rcuctrs" looks as follows:
+
+CPU last cur F M
+  0    5  -5 0 0
+  1   -1   0 0 0
+  2    0   1 0 0
+  3    0   1 0 0
+  4    0   1 0 0
+  5    0   1 0 0
+  6    0   2 0 0
+  7    0  -1 0 0
+  8    0   1 0 0
+ggp = 26226, state = waitzero
+
+The per-CPU fields are as follows:
+
+o	"CPU" gives the CPU number.  Offline CPUs are not displayed.
+
+o	"last" gives the value of the counter that is being decremented
+	for the current grace period phase.  In the example above,
+	the counters sum to 4, indicating that there are still four
+	RCU read-side critical sections still running that started
+	before the last counter flip.
+
+o	"cur" gives the value of the counter that is currently being
+	both incremented (by rcu_read_lock()) and decremented (by
+	rcu_read_unlock()).  In the example above, the counters sum to
+	1, indicating that there is only one RCU read-side critical section
+	still running that started after the last counter flip.
+
+o	"F" indicates whether RCU is waiting for this CPU to acknowledge
+	a counter flip.  In the above example, RCU is not waiting on any,
+	which is consistent with the state being "waitzero" rather than
+	"waitack".
+
+o	"M" indicates whether RCU is waiting for this CPU to execute a
+	memory barrier.  In the above example, RCU is not waiting on any,
+	which is consistent with the state being "waitzero" rather than
+	"waitmb".
+
+o	"ggp" is the global grace-period counter.
+
+o	"state" is the RCU state, which can be one of the following:
+
+	o	"idle": there is no grace period in progress.
+
+	o	"waitack": RCU just incremented the global grace-period
+		counter, which has the effect of reversing the roles of
+		the "last" and "cur" counters above, and is waiting for
+		all the CPUs to acknowledge the flip.  Once the flip has
+		been acknowledged, CPUs will no longer be incrementing
+		what are now the "last" counters, so that their sum will
+		decrease monotonically down to zero.
+
+	o	"waitzero": RCU is waiting for the sum of the "last" counters
+		to decrease to zero.
+
+	o	"waitmb": RCU is waiting for each CPU to execute a memory
+		barrier, which ensures that instructions from a given CPU's
+		last RCU read-side critical section cannot be reordered
+		with instructions following the memory-barrier instruction.
+
+The output of "cat rcu/rcugp" looks as follows:
+
+oldggp=48870  newggp=48873
+
+Note that reading from this file provokes a synchronize_rcu().  The
+"oldggp" value is that of "ggp" from rcu/rcuctrs above, taken before
+executing the synchronize_rcu(), and the "newggp" value is also the
+"ggp" value, but taken after the synchronize_rcu() command returns.
+
+
+The output of "cat rcu/rcugp" looks as follows:
+
+na=1337955 nl=40 wa=1337915 wl=44 da=1337871 dl=0 dr=1337871 di=1337871
+1=50989 e1=6138 i1=49722 ie1=82 g1=49640 a1=315203 ae1=265563 a2=49640
+z1=1401244 ze1=1351605 z2=49639 m1=5661253 me1=5611614 m2=49639
+
+These are counters tracking internal preemptable-RCU events, however,
+some of them may be useful for debugging algorithms using RCU.  In
+particular, the "nl", "wl", and "dl" values track the number of RCU
+callbacks in various states.  The fields are as follows:
+
+o	"na" is the total number of RCU callbacks that have been enqueued
+	since boot.
+
+o	"nl" is the number of RCU callbacks waiting for the previous
+	grace period to end so that they can start waiting on the next
+	grace period.
+
+o	"wa" is the total number of RCU callbacks that have started waiting
+	for a grace period since boot.  "na" should be roughly equal to
+	"nl" plus "wa".
+
+o	"wl" is the number of RCU callbacks currently waiting for their
+	grace period to end.
+
+o	"da" is the total number of RCU callbacks whose grace periods
+	have completed since boot.  "wa" should be roughly equal to
+	"wl" plus "da".
+
+o	"dr" is the total number of RCU callbacks that have been removed
+	from the list of callbacks ready to invoke.  "dr" should be roughly
+	equal to "da".
+
+o	"di" is the total number of RCU callbacks that have been invoked
+	since boot.  "di" should be roughly equal to "da", though some
+	early versions of preemptable RCU had a bug so that only the
+	last CPU's count of invocations was displayed, rather than the
+	sum of all CPU's counts.
+
+o	"1" is the number of calls to rcu_try_flip().  This should be
+	roughly equal to the sum of "e1", "i1", "a1", "z1", and "m1"
+	described below.  In other words, the number of times that
+	the state machine is visited should be equal to the sum of the
+	number of times that each state is visited plus the number of
+	times that the state-machine lock acquisition failed.
+
+o	"e1" is the number of times that rcu_try_flip() was unable to
+	acquire the fliplock.
+
+o	"i1" is the number of calls to rcu_try_flip_idle().
+
+o	"ie1" is the number of times rcu_try_flip_idle() exited early
+	due to the calling CPU having no work for RCU.
+
+o	"g1" is the number of times that rcu_try_flip_idle() decided
+	to start a new grace period.  "i1" should be roughly equal to
+	"ie1" plus "g1".
+
+o	"a1" is the number of calls to rcu_try_flip_waitack().
+
+o	"ae1" is the number of times that rcu_try_flip_waitack() found
+	that at least one CPU had not yet acknowledge the new grace period
+	(AKA "counter flip").
+
+o	"a2" is the number of time rcu_try_flip_waitack() found that
+	all CPUs had acknowledged.  "a1" should be roughly equal to
+	"ae1" plus "a2".  (This particular output was collected on
+	a 128-CPU machine, hence the smaller-than-usual fraction of
+	calls to rcu_try_flip_waitack() finding all CPUs having already
+	acknowledged.)
+
+o	"z1" is the number of calls to rcu_try_flip_waitzero().
+
+o	"ze1" is the number of times that rcu_try_flip_waitzero() found
+	that not all of the old RCU read-side critical sections had
+	completed.
+
+o	"z2" is the number of times that rcu_try_flip_waitzero() finds
+	the sum of the counters equal to zero, in other words, that
+	all of the old RCU read-side critical sections had completed.
+	The value of "z1" should be roughly equal to "ze1" plus
+	"z2".
+
+o	"m1" is the number of calls to rcu_try_flip_waitmb().
+
+o	"me1" is the number of times that rcu_try_flip_waitmb() finds
+	that at least one CPU has not yet executed a memory barrier.
+
+o	"m2" is the number of times that rcu_try_flip_waitmb() finds that
+	all CPUs have executed a memory barrier.
+
+
+Hierarchical RCU debugfs Files and Formats
+
+This implementation of RCU provides three debugfs files under the
+top-level directory RCU: rcu/rcudata (which displays fields in struct
+rcu_data), rcu/rcugp (which displays grace-period counters), and
+rcu/rcuhier (which displays the struct rcu_node hierarchy).
+
+The output of "cat rcu/rcudata" looks as follows:
+
+rcu:
+  0 c=4011 g=4012 pq=1 pqc=4011 qp=0 rpfq=1 rp=3c2a dt=23301/73 dn=2 df=1882 of=0 ri=2126 ql=2 b=10
+  1 c=4011 g=4012 pq=1 pqc=4011 qp=0 rpfq=3 rp=39a6 dt=78073/1 dn=2 df=1402 of=0 ri=1875 ql=46 b=10
+  2 c=4010 g=4010 pq=1 pqc=4010 qp=0 rpfq=-5 rp=1d12 dt=16646/0 dn=2 df=3140 of=0 ri=2080 ql=0 b=10
+  3 c=4012 g=4013 pq=1 pqc=4012 qp=1 rpfq=3 rp=2b50 dt=21159/1 dn=2 df=2230 of=0 ri=1923 ql=72 b=10
+  4 c=4012 g=4013 pq=1 pqc=4012 qp=1 rpfq=3 rp=1644 dt=5783/1 dn=2 df=3348 of=0 ri=2805 ql=7 b=10
+  5 c=4012 g=4013 pq=0 pqc=4011 qp=1 rpfq=3 rp=1aac dt=5879/1 dn=2 df=3140 of=0 ri=2066 ql=10 b=10
+  6 c=4012 g=4013 pq=1 pqc=4012 qp=1 rpfq=3 rp=ed8 dt=5847/1 dn=2 df=3797 of=0 ri=1266 ql=10 b=10
+  7 c=4012 g=4013 pq=1 pqc=4012 qp=1 rpfq=3 rp=1fa2 dt=6199/1 dn=2 df=2795 of=0 ri=2162 ql=28 b=10
+rcu_bh:
+  0 c=-268 g=-268 pq=1 pqc=-268 qp=0 rpfq=-145 rp=21d6 dt=23301/73 dn=2 df=0 of=0 ri=0 ql=0 b=10
+  1 c=-268 g=-268 pq=1 pqc=-268 qp=1 rpfq=-170 rp=20ce dt=78073/1 dn=2 df=26 of=0 ri=5 ql=0 b=10
+  2 c=-268 g=-268 pq=1 pqc=-268 qp=1 rpfq=-83 rp=fbd dt=16646/0 dn=2 df=28 of=0 ri=4 ql=0 b=10
+  3 c=-268 g=-268 pq=1 pqc=-268 qp=0 rpfq=-105 rp=178c dt=21159/1 dn=2 df=28 of=0 ri=2 ql=0 b=10
+  4 c=-268 g=-268 pq=1 pqc=-268 qp=1 rpfq=-30 rp=b54 dt=5783/1 dn=2 df=32 of=0 ri=0 ql=0 b=10
+  5 c=-268 g=-268 pq=1 pqc=-268 qp=1 rpfq=-29 rp=df5 dt=5879/1 dn=2 df=30 of=0 ri=3 ql=0 b=10
+  6 c=-268 g=-268 pq=1 pqc=-268 qp=1 rpfq=-28 rp=788 dt=5847/1 dn=2 df=32 of=0 ri=0 ql=0 b=10
+  7 c=-268 g=-268 pq=1 pqc=-268 qp=1 rpfq=-53 rp=1098 dt=6199/1 dn=2 df=30 of=0 ri=3 ql=0 b=10
+
+The first section lists the rcu_data structures for rcu, the second for
+rcu_bh.  Each section has one line per CPU, or eight for this 8-CPU system.
+The fields are as follows:
+
+o	The number at the beginning of each line is the CPU number.
+	CPUs numbers followed by an exclamation mark are offline,
+	but have been online at least once since boot.	There will be
+	no output for CPUs that have never been online, which can be
+	a good thing in the surprisingly common case where NR_CPUS is
+	substantially larger than the number of actual CPUs.
+
+o	"c" is the count of grace periods that this CPU believes have
+	completed.  CPUs in dynticks idle mode may lag quite a ways
+	behind, for example, CPU 4 under "rcu" above, which has slept
+	through the past 25 RCU grace periods.	It is not unusual to
+	see CPUs lagging by thousands of grace periods.
+
+o	"g" is the count of grace periods that this CPU believes have
+	started.  Again, CPUs in dynticks idle mode may lag behind.
+	If the "c" and "g" values are equal, this CPU has already
+	reported a quiescent state for the last RCU grace period that
+	it is aware of, otherwise, the CPU believes that it owes RCU a
+	quiescent state.
+
+o	"pq" indicates that this CPU has passed through a quiescent state
+	for the current grace period.  It is possible for "pq" to be
+	"1" and "c" different than "g", which indicates that although
+	the CPU has passed through a quiescent state, either (1) this
+	CPU has not yet reported that fact, (2) some other CPU has not
+	yet reported for this grace period, or (3) both.
+
+o	"pqc" indicates which grace period the last-observed quiescent
+	state for this CPU corresponds to.  This is important for handling
+	the race between CPU 0 reporting an extended dynticks-idle
+	quiescent state for CPU 1 and CPU 1 suddenly waking up and
+	reporting its own quiescent state.  If CPU 1 was the last CPU
+	for the current grace period, then the CPU that loses this race
+	will attempt to incorrectly mark CPU 1 as having checked in for
+	the next grace period!
+
+o	"qp" indicates that RCU still expects a quiescent state from
+	this CPU.
+
+o	"rpfq" is the number of rcu_pending() calls on this CPU required
+	to induce this CPU to invoke force_quiescent_state().
+
+o	"rp" is low-order four hex digits of the count of how many times
+	rcu_pending() has been invoked on this CPU.
+
+o	"dt" is the current value of the dyntick counter that is incremented
+	when entering or leaving dynticks idle state, either by the
+	scheduler or by irq.  The number after the "/" is the interrupt
+	nesting depth when in dyntick-idle state, or one greater than
+	the interrupt-nesting depth otherwise.
+
+	This field is displayed only for CONFIG_NO_HZ kernels.
+
+o	"dn" is the current value of the dyntick counter that is incremented
+	when entering or leaving dynticks idle state via NMI.  If both
+	the "dt" and "dn" values are even, then this CPU is in dynticks
+	idle mode and may be ignored by RCU.  If either of these two
+	counters is odd, then RCU must be alert to the possibility of
+	an RCU read-side critical section running on this CPU.
+
+	This field is displayed only for CONFIG_NO_HZ kernels.
+
+o	"df" is the number of times that some other CPU has forced a
+	quiescent state on behalf of this CPU due to this CPU being in
+	dynticks-idle state.
+
+	This field is displayed only for CONFIG_NO_HZ kernels.
+
+o	"of" is the number of times that some other CPU has forced a
+	quiescent state on behalf of this CPU due to this CPU being
+	offline.  In a perfect world, this might neve happen, but it
+	turns out that offlining and onlining a CPU can take several grace
+	periods, and so there is likely to be an extended period of time
+	when RCU believes that the CPU is online when it really is not.
+	Please note that erring in the other direction (RCU believing a
+	CPU is offline when it is really alive and kicking) is a fatal
+	error, so it makes sense to err conservatively.
+
+o	"ri" is the number of times that RCU has seen fit to send a
+	reschedule IPI to this CPU in order to get it to report a
+	quiescent state.
+
+o	"ql" is the number of RCU callbacks currently residing on
+	this CPU.  This is the total number of callbacks, regardless
+	of what state they are in (new, waiting for grace period to
+	start, waiting for grace period to end, ready to invoke).
+
+o	"b" is the batch limit for this CPU.  If more than this number
+	of RCU callbacks is ready to invoke, then the remainder will
+	be deferred.
+
+
+The output of "cat rcu/rcugp" looks as follows:
+
+rcu: completed=33062  gpnum=33063
+rcu_bh: completed=464  gpnum=464
+
+Again, this output is for both "rcu" and "rcu_bh".  The fields are
+taken from the rcu_state structure, and are as follows:
+
+o	"completed" is the number of grace periods that have completed.
+	It is comparable to the "c" field from rcu/rcudata in that a
+	CPU whose "c" field matches the value of "completed" is aware
+	that the corresponding RCU grace period has completed.
+
+o	"gpnum" is the number of grace periods that have started.  It is
+	comparable to the "g" field from rcu/rcudata in that a CPU
+	whose "g" field matches the value of "gpnum" is aware that the
+	corresponding RCU grace period has started.
+
+	If these two fields are equal (as they are for "rcu_bh" above),
+	then there is no grace period in progress, in other words, RCU
+	is idle.  On the other hand, if the two fields differ (as they
+	do for "rcu" above), then an RCU grace period is in progress.
+
+
+The output of "cat rcu/rcuhier" looks as follows, with very long lines:
+
+c=6902 g=6903 s=2 jfq=3 j=72c7 nfqs=13142/nfqsng=0(13142) fqlh=6
+1/1 0:127 ^0    
+3/3 0:35 ^0    0/0 36:71 ^1    0/0 72:107 ^2    0/0 108:127 ^3    
+3/3f 0:5 ^0    2/3 6:11 ^1    0/0 12:17 ^2    0/0 18:23 ^3    0/0 24:29 ^4    0/0 30:35 ^5    0/0 36:41 ^0    0/0 42:47 ^1    0/0 48:53 ^2    0/0 54:59 ^3    0/0 60:65 ^4    0/0 66:71 ^5    0/0 72:77 ^0    0/0 78:83 ^1    0/0 84:89 ^2    0/0 90:95 ^3    0/0 96:101 ^4    0/0 102:107 ^5    0/0 108:113 ^0    0/0 114:119 ^1    0/0 120:125 ^2    0/0 126:127 ^3    
+rcu_bh:
+c=-226 g=-226 s=1 jfq=-5701 j=72c7 nfqs=88/nfqsng=0(88) fqlh=0
+0/1 0:127 ^0    
+0/3 0:35 ^0    0/0 36:71 ^1    0/0 72:107 ^2    0/0 108:127 ^3    
+0/3f 0:5 ^0    0/3 6:11 ^1    0/0 12:17 ^2    0/0 18:23 ^3    0/0 24:29 ^4    0/0 30:35 ^5    0/0 36:41 ^0    0/0 42:47 ^1    0/0 48:53 ^2    0/0 54:59 ^3    0/0 60:65 ^4    0/0 66:71 ^5    0/0 72:77 ^0    0/0 78:83 ^1    0/0 84:89 ^2    0/0 90:95 ^3    0/0 96:101 ^4    0/0 102:107 ^5    0/0 108:113 ^0    0/0 114:119 ^1    0/0 120:125 ^2    0/0 126:127 ^3
+
+This is once again split into "rcu" and "rcu_bh" portions.  The fields are
+as follows:
+
+o	"c" is exactly the same as "completed" under rcu/rcugp.
+
+o	"g" is exactly the same as "gpnum" under rcu/rcugp.
+
+o	"s" is the "signaled" state that drives force_quiescent_state()'s
+	state machine.
+
+o	"jfq" is the number of jiffies remaining for this grace period
+	before force_quiescent_state() is invoked to help push things
+	along.  Note that CPUs in dyntick-idle mode thoughout the grace
+	period will not report on their own, but rather must be check by
+	some other CPU via force_quiescent_state().
+
+o	"j" is the low-order four hex digits of the jiffies counter.
+	Yes, Paul did run into a number of problems that turned out to
+	be due to the jiffies counter no longer counting.  Why do you ask?
+
+o	"nfqs" is the number of calls to force_quiescent_state() since
+	boot.
+
+o	"nfqsng" is the number of useless calls to force_quiescent_state(),
+	where there wasn't actually a grace period active.  This can
+	happen due to races.  The number in parentheses is the difference
+	between "nfqs" and "nfqsng", or the number of times that
+	force_quiescent_state() actually did some real work.
+
+o	"fqlh" is the number of calls to force_quiescent_state() that
+	exited immediately (without even being counted in nfqs above)
+	due to contention on ->fqslock.
+
+o	Each element of the form "1/1 0:127 ^0" represents one struct
+	rcu_node.  Each line represents one level of the hierarchy, from
+	root to leaves.  It is best to think of the rcu_data structures
+	as forming yet another level after the leaves.  Note that there
+	might be either one, two, or three levels of rcu_node structures,
+	depending on the relationship between CONFIG_RCU_FANOUT and
+	CONFIG_NR_CPUS.
+	
+	o	The numbers separated by the "/" are the qsmask followed
+		by the qsmaskinit.  The qsmask will have one bit
+		set for each entity in the next lower level that
+		has not yet checked in for the current grace period.
+		The qsmaskinit will have one bit for each entity that is
+		currently expected to check in during each grace period.
+		The value of qsmaskinit is assigned to that of qsmask
+		at the beginning of each grace period.
+
+		For example, for "rcu", the qsmask of the first entry
+		of the lowest level is 0x14, meaning that we are still
+		waiting for CPUs 2 and 4 to check in for the current
+		grace period.
+
+	o	The numbers separated by the ":" are the range of CPUs
+		served by this struct rcu_node.  This can be helpful
+		in working out how the hierarchy is wired together.
+
+		For example, the first entry at the lowest level shows
+		"0:5", indicating that it covers CPUs 0 through 5.
+
+	o	The number after the "^" indicates the bit in the
+		next higher level rcu_node structure that this
+		rcu_node structure corresponds to.
+
+		For example, the first entry at the lowest level shows
+		"^0", indicating that it corresponds to bit zero in
+		the first entry at the middle level.
diff --git a/arch/powerpc/platforms/pseries/rtasd.c b/arch/powerpc/platforms/pseries/rtasd.c
index f4e55be2eea9..afad9f5ac0ac 100644
--- a/arch/powerpc/platforms/pseries/rtasd.c
+++ b/arch/powerpc/platforms/pseries/rtasd.c
@@ -208,6 +208,7 @@ void pSeries_log_error(char *buf, unsigned int err_type, int fatal)
 		break;
 	case ERR_TYPE_KERNEL_PANIC:
 	default:
+		WARN_ON_ONCE(!irqs_disabled()); /* @@@ DEBUG @@@ */
 		spin_unlock_irqrestore(&rtasd_log_lock, s);
 		return;
 	}
@@ -227,6 +228,7 @@ void pSeries_log_error(char *buf, unsigned int err_type, int fatal)
 	/* Check to see if we need to or have stopped logging */
 	if (fatal || !logging_enabled) {
 		logging_enabled = 0;
+		WARN_ON_ONCE(!irqs_disabled()); /* @@@ DEBUG @@@ */
 		spin_unlock_irqrestore(&rtasd_log_lock, s);
 		return;
 	}
@@ -249,11 +251,13 @@ void pSeries_log_error(char *buf, unsigned int err_type, int fatal)
 		else
 			rtas_log_start += 1;
 
+		WARN_ON_ONCE(!irqs_disabled()); /* @@@ DEBUG @@@ */
 		spin_unlock_irqrestore(&rtasd_log_lock, s);
 		wake_up_interruptible(&rtas_log_wait);
 		break;
 	case ERR_TYPE_KERNEL_PANIC:
 	default:
+		WARN_ON_ONCE(!irqs_disabled()); /* @@@ DEBUG @@@ */
 		spin_unlock_irqrestore(&rtasd_log_lock, s);
 		return;
 	}
diff --git a/include/linux/hardirq.h b/include/linux/hardirq.h
index 181006cc94a0..9b70b9231693 100644
--- a/include/linux/hardirq.h
+++ b/include/linux/hardirq.h
@@ -118,13 +118,17 @@ static inline void account_system_vtime(struct task_struct *tsk)
 }
 #endif
 
-#if defined(CONFIG_PREEMPT_RCU) && defined(CONFIG_NO_HZ)
+#if defined(CONFIG_NO_HZ) && !defined(CONFIG_CLASSIC_RCU)
 extern void rcu_irq_enter(void);
 extern void rcu_irq_exit(void);
+extern void rcu_nmi_enter(void);
+extern void rcu_nmi_exit(void);
 #else
 # define rcu_irq_enter() do { } while (0)
 # define rcu_irq_exit() do { } while (0)
-#endif /* CONFIG_PREEMPT_RCU */
+# define rcu_nmi_enter() do { } while (0)
+# define rcu_nmi_exit() do { } while (0)
+#endif /* #if defined(CONFIG_NO_HZ) && !defined(CONFIG_CLASSIC_RCU) */
 
 /*
  * It is safe to do non-atomic ops on ->hardirq_context,
@@ -134,7 +138,6 @@ extern void rcu_irq_exit(void);
  */
 #define __irq_enter()					\
 	do {						\
-		rcu_irq_enter();			\
 		account_system_vtime(current);		\
 		add_preempt_count(HARDIRQ_OFFSET);	\
 		trace_hardirq_enter();			\
@@ -153,7 +156,6 @@ extern void irq_enter(void);
 		trace_hardirq_exit();			\
 		account_system_vtime(current);		\
 		sub_preempt_count(HARDIRQ_OFFSET);	\
-		rcu_irq_exit();				\
 	} while (0)
 
 /*
@@ -161,7 +163,7 @@ extern void irq_enter(void);
  */
 extern void irq_exit(void);
 
-#define nmi_enter()		do { lockdep_off(); __irq_enter(); } while (0)
-#define nmi_exit()		do { __irq_exit(); lockdep_on(); } while (0)
+#define nmi_enter()		do { lockdep_off(); rcu_nmi_enter(); __irq_enter(); } while (0)
+#define nmi_exit()		do { __irq_exit(); rcu_nmi_exit(); lockdep_on(); } while (0)
 
 #endif /* LINUX_HARDIRQ_H */
diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h
index 86f1f5e43e33..bfd289aff576 100644
--- a/include/linux/rcupdate.h
+++ b/include/linux/rcupdate.h
@@ -52,11 +52,15 @@ struct rcu_head {
 	void (*func)(struct rcu_head *head);
 };
 
-#ifdef CONFIG_CLASSIC_RCU
+#if defined(CONFIG_CLASSIC_RCU)
 #include <linux/rcuclassic.h>
-#else /* #ifdef CONFIG_CLASSIC_RCU */
+#elif defined(CONFIG_TREE_RCU)
+#include <linux/rcutree.h>
+#elif defined(CONFIG_PREEMPT_RCU)
 #include <linux/rcupreempt.h>
-#endif /* #else #ifdef CONFIG_CLASSIC_RCU */
+#else
+#error "Unknown RCU implementation specified to kernel configuration"
+#endif /* #else #if defined(CONFIG_CLASSIC_RCU) */
 
 #define RCU_HEAD_INIT 	{ .next = NULL, .func = NULL }
 #define RCU_HEAD(head) struct rcu_head head = RCU_HEAD_INIT
diff --git a/include/linux/rcutree.h b/include/linux/rcutree.h
new file mode 100644
index 000000000000..d4368b7975c3
--- /dev/null
+++ b/include/linux/rcutree.h
@@ -0,0 +1,329 @@
+/*
+ * Read-Copy Update mechanism for mutual exclusion (tree-based version)
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+ *
+ * Copyright IBM Corporation, 2008
+ *
+ * Author: Dipankar Sarma <dipankar@in.ibm.com>
+ *	   Paul E. McKenney <paulmck@linux.vnet.ibm.com> Hierarchical algorithm
+ *
+ * Based on the original work by Paul McKenney <paulmck@us.ibm.com>
+ * and inputs from Rusty Russell, Andrea Arcangeli and Andi Kleen.
+ *
+ * For detailed explanation of Read-Copy Update mechanism see -
+ * 	Documentation/RCU
+ */
+
+#ifndef __LINUX_RCUTREE_H
+#define __LINUX_RCUTREE_H
+
+#include <linux/cache.h>
+#include <linux/spinlock.h>
+#include <linux/threads.h>
+#include <linux/percpu.h>
+#include <linux/cpumask.h>
+#include <linux/seqlock.h>
+
+/*
+ * Define shape of hierarchy based on NR_CPUS and CONFIG_RCU_FANOUT.
+ * In theory, it should be possible to add more levels straightforwardly.
+ * In practice, this has not been tested, so there is probably some
+ * bug somewhere.
+ */
+#define MAX_RCU_LVLS 3
+#define RCU_FANOUT	      (CONFIG_RCU_FANOUT)
+#define RCU_FANOUT_SQ	      (RCU_FANOUT * RCU_FANOUT)
+#define RCU_FANOUT_CUBE	      (RCU_FANOUT_SQ * RCU_FANOUT)
+
+#if NR_CPUS <= RCU_FANOUT
+#  define NUM_RCU_LVLS	      1
+#  define NUM_RCU_LVL_0	      1
+#  define NUM_RCU_LVL_1	      (NR_CPUS)
+#  define NUM_RCU_LVL_2	      0
+#  define NUM_RCU_LVL_3	      0
+#elif NR_CPUS <= RCU_FANOUT_SQ
+#  define NUM_RCU_LVLS	      2
+#  define NUM_RCU_LVL_0	      1
+#  define NUM_RCU_LVL_1	      (((NR_CPUS) + RCU_FANOUT - 1) / RCU_FANOUT)
+#  define NUM_RCU_LVL_2	      (NR_CPUS)
+#  define NUM_RCU_LVL_3	      0
+#elif NR_CPUS <= RCU_FANOUT_CUBE
+#  define NUM_RCU_LVLS	      3
+#  define NUM_RCU_LVL_0	      1
+#  define NUM_RCU_LVL_1	      (((NR_CPUS) + RCU_FANOUT_SQ - 1) / RCU_FANOUT_SQ)
+#  define NUM_RCU_LVL_2	      (((NR_CPUS) + (RCU_FANOUT) - 1) / (RCU_FANOUT))
+#  define NUM_RCU_LVL_3	      NR_CPUS
+#else
+# error "CONFIG_RCU_FANOUT insufficient for NR_CPUS"
+#endif /* #if (NR_CPUS) <= RCU_FANOUT */
+
+#define RCU_SUM (NUM_RCU_LVL_0 + NUM_RCU_LVL_1 + NUM_RCU_LVL_2 + NUM_RCU_LVL_3)
+#define NUM_RCU_NODES (RCU_SUM - NR_CPUS)
+
+/*
+ * Dynticks per-CPU state.
+ */
+struct rcu_dynticks {
+	int dynticks_nesting;	/* Track nesting level, sort of. */
+	int dynticks;		/* Even value for dynticks-idle, else odd. */
+	int dynticks_nmi;	/* Even value for either dynticks-idle or */
+				/*  not in nmi handler, else odd.  So this */
+				/*  remains even for nmi from irq handler. */
+};
+
+/*
+ * Definition for node within the RCU grace-period-detection hierarchy.
+ */
+struct rcu_node {
+	spinlock_t lock;
+	unsigned long qsmask;	/* CPUs or groups that need to switch in */
+				/*  order for current grace period to proceed.*/
+	unsigned long qsmaskinit;
+				/* Per-GP initialization for qsmask. */
+	unsigned long grpmask;	/* Mask to apply to parent qsmask. */
+	int	grplo;		/* lowest-numbered CPU or group here. */
+	int	grphi;		/* highest-numbered CPU or group here. */
+	u8	grpnum;		/* CPU/group number for next level up. */
+	u8	level;		/* root is at level 0. */
+	struct rcu_node *parent;
+} ____cacheline_internodealigned_in_smp;
+
+/* Index values for nxttail array in struct rcu_data. */
+#define RCU_DONE_TAIL		0	/* Also RCU_WAIT head. */
+#define RCU_WAIT_TAIL		1	/* Also RCU_NEXT_READY head. */
+#define RCU_NEXT_READY_TAIL	2	/* Also RCU_NEXT head. */
+#define RCU_NEXT_TAIL		3
+#define RCU_NEXT_SIZE		4
+
+/* Per-CPU data for read-copy update. */
+struct rcu_data {
+	/* 1) quiescent-state and grace-period handling : */
+	long		completed;	/* Track rsp->completed gp number */
+					/*  in order to detect GP end. */
+	long		gpnum;		/* Highest gp number that this CPU */
+					/*  is aware of having started. */
+	long		passed_quiesc_completed;
+					/* Value of completed at time of qs. */
+	bool		passed_quiesc;	/* User-mode/idle loop etc. */
+	bool		qs_pending;	/* Core waits for quiesc state. */
+	bool		beenonline;	/* CPU online at least once. */
+	struct rcu_node *mynode;	/* This CPU's leaf of hierarchy */
+	unsigned long grpmask;		/* Mask to apply to leaf qsmask. */
+
+	/* 2) batch handling */
+	/*
+	 * If nxtlist is not NULL, it is partitioned as follows.
+	 * Any of the partitions might be empty, in which case the
+	 * pointer to that partition will be equal to the pointer for
+	 * the following partition.  When the list is empty, all of
+	 * the nxttail elements point to nxtlist, which is NULL.
+	 *
+	 * [*nxttail[RCU_NEXT_READY_TAIL], NULL = *nxttail[RCU_NEXT_TAIL]):
+	 *	Entries that might have arrived after current GP ended
+	 * [*nxttail[RCU_WAIT_TAIL], *nxttail[RCU_NEXT_READY_TAIL]):
+	 *	Entries known to have arrived before current GP ended
+	 * [*nxttail[RCU_DONE_TAIL], *nxttail[RCU_WAIT_TAIL]):
+	 *	Entries that batch # <= ->completed - 1: waiting for current GP
+	 * [nxtlist, *nxttail[RCU_DONE_TAIL]):
+	 *	Entries that batch # <= ->completed
+	 *	The grace period for these entries has completed, and
+	 *	the other grace-period-completed entries may be moved
+	 *	here temporarily in rcu_process_callbacks().
+	 */
+	struct rcu_head *nxtlist;
+	struct rcu_head **nxttail[RCU_NEXT_SIZE];
+	long		qlen; 	 	/* # of queued callbacks */
+	long		blimit;		/* Upper limit on a processed batch */
+
+#ifdef CONFIG_NO_HZ
+	/* 3) dynticks interface. */
+	struct rcu_dynticks *dynticks;	/* Shared per-CPU dynticks state. */
+	int dynticks_snap;		/* Per-GP tracking for dynticks. */
+	int dynticks_nmi_snap;		/* Per-GP tracking for dynticks_nmi. */
+#endif /* #ifdef CONFIG_NO_HZ */
+
+	/* 4) reasons this CPU needed to be kicked by force_quiescent_state */
+#ifdef CONFIG_NO_HZ
+	unsigned long dynticks_fqs;	/* Kicked due to dynticks idle. */
+#endif /* #ifdef CONFIG_NO_HZ */
+	unsigned long offline_fqs;	/* Kicked due to being offline. */
+	unsigned long resched_ipi;	/* Sent a resched IPI. */
+
+	/* 5) state to allow this CPU to force_quiescent_state on others */
+	long n_rcu_pending;		/* rcu_pending() calls since boot. */
+	long n_rcu_pending_force_qs;	/* when to force quiescent states. */
+
+	int cpu;
+};
+
+/* Values for signaled field in struct rcu_state. */
+#define RCU_GP_INIT		0	/* Grace period being initialized. */
+#define RCU_SAVE_DYNTICK	1	/* Need to scan dyntick state. */
+#define RCU_FORCE_QS		2	/* Need to force quiescent state. */
+#ifdef CONFIG_NO_HZ
+#define RCU_SIGNAL_INIT		RCU_SAVE_DYNTICK
+#else /* #ifdef CONFIG_NO_HZ */
+#define RCU_SIGNAL_INIT		RCU_FORCE_QS
+#endif /* #else #ifdef CONFIG_NO_HZ */
+
+#define RCU_JIFFIES_TILL_FORCE_QS	 3	/* for rsp->jiffies_force_qs */
+#ifdef CONFIG_RCU_CPU_STALL_DETECTOR
+#define RCU_SECONDS_TILL_STALL_CHECK   (10 * HZ)  /* for rsp->jiffies_stall */
+#define RCU_SECONDS_TILL_STALL_RECHECK (30 * HZ)  /* for rsp->jiffies_stall */
+#define RCU_STALL_RAT_DELAY		2	  /* Allow other CPUs time */
+						  /*  to take at least one */
+						  /*  scheduling clock irq */
+						  /*  before ratting on them. */
+
+#endif /* #ifdef CONFIG_RCU_CPU_STALL_DETECTOR */
+
+/*
+ * RCU global state, including node hierarchy.  This hierarchy is
+ * represented in "heap" form in a dense array.  The root (first level)
+ * of the hierarchy is in ->node[0] (referenced by ->level[0]), the second
+ * level in ->node[1] through ->node[m] (->node[1] referenced by ->level[1]),
+ * and the third level in ->node[m+1] and following (->node[m+1] referenced
+ * by ->level[2]).  The number of levels is determined by the number of
+ * CPUs and by CONFIG_RCU_FANOUT.  Small systems will have a "hierarchy"
+ * consisting of a single rcu_node.
+ */
+struct rcu_state {
+	struct rcu_node node[NUM_RCU_NODES];	/* Hierarchy. */
+	struct rcu_node *level[NUM_RCU_LVLS];	/* Hierarchy levels. */
+	u32 levelcnt[MAX_RCU_LVLS + 1];		/* # nodes in each level. */
+	u8 levelspread[NUM_RCU_LVLS];		/* kids/node in each level. */
+	struct rcu_data *rda[NR_CPUS];		/* array of rdp pointers. */
+
+	/* The following fields are guarded by the root rcu_node's lock. */
+
+	u8	signaled ____cacheline_internodealigned_in_smp;
+						/* Force QS state. */
+	long	gpnum;				/* Current gp number. */
+	long	completed;			/* # of last completed gp. */
+	spinlock_t onofflock;			/* exclude on/offline and */
+						/*  starting new GP. */
+	spinlock_t fqslock;			/* Only one task forcing */
+						/*  quiescent states. */
+	unsigned long jiffies_force_qs;		/* Time at which to invoke */
+						/*  force_quiescent_state(). */
+	unsigned long n_force_qs;		/* Number of calls to */
+						/*  force_quiescent_state(). */
+	unsigned long n_force_qs_lh;		/* ~Number of calls leaving */
+						/*  due to lock unavailable. */
+	unsigned long n_force_qs_ngp;		/* Number of calls leaving */
+						/*  due to no GP active. */
+#ifdef CONFIG_RCU_CPU_STALL_DETECTOR
+	unsigned long gp_start;			/* Time at which GP started, */
+						/*  but in jiffies. */
+	unsigned long jiffies_stall;		/* Time at which to check */
+						/*  for CPU stalls. */
+#endif /* #ifdef CONFIG_RCU_CPU_STALL_DETECTOR */
+#ifdef CONFIG_NO_HZ
+	long dynticks_completed;		/* Value of completed @ snap. */
+#endif /* #ifdef CONFIG_NO_HZ */
+};
+
+extern struct rcu_state rcu_state;
+DECLARE_PER_CPU(struct rcu_data, rcu_data);
+
+extern struct rcu_state rcu_bh_state;
+DECLARE_PER_CPU(struct rcu_data, rcu_bh_data);
+
+/*
+ * Increment the quiescent state counter.
+ * The counter is a bit degenerated: We do not need to know
+ * how many quiescent states passed, just if there was at least
+ * one since the start of the grace period. Thus just a flag.
+ */
+static inline void rcu_qsctr_inc(int cpu)
+{
+	struct rcu_data *rdp = &per_cpu(rcu_data, cpu);
+	rdp->passed_quiesc = 1;
+	rdp->passed_quiesc_completed = rdp->completed;
+}
+static inline void rcu_bh_qsctr_inc(int cpu)
+{
+	struct rcu_data *rdp = &per_cpu(rcu_bh_data, cpu);
+	rdp->passed_quiesc = 1;
+	rdp->passed_quiesc_completed = rdp->completed;
+}
+
+extern int rcu_pending(int cpu);
+extern int rcu_needs_cpu(int cpu);
+
+#ifdef CONFIG_DEBUG_LOCK_ALLOC
+extern struct lockdep_map rcu_lock_map;
+# define rcu_read_acquire()	\
+			lock_acquire(&rcu_lock_map, 0, 0, 2, 1, NULL, _THIS_IP_)
+# define rcu_read_release()	lock_release(&rcu_lock_map, 1, _THIS_IP_)
+#else
+# define rcu_read_acquire()	do { } while (0)
+# define rcu_read_release()	do { } while (0)
+#endif
+
+static inline void __rcu_read_lock(void)
+{
+	preempt_disable();
+	__acquire(RCU);
+	rcu_read_acquire();
+}
+static inline void __rcu_read_unlock(void)
+{
+	rcu_read_release();
+	__release(RCU);
+	preempt_enable();
+}
+static inline void __rcu_read_lock_bh(void)
+{
+	local_bh_disable();
+	__acquire(RCU_BH);
+	rcu_read_acquire();
+}
+static inline void __rcu_read_unlock_bh(void)
+{
+	rcu_read_release();
+	__release(RCU_BH);
+	local_bh_enable();
+}
+
+#define __synchronize_sched() synchronize_rcu()
+
+#define call_rcu_sched(head, func) call_rcu(head, func)
+
+static inline void rcu_init_sched(void)
+{
+}
+
+extern void __rcu_init(void);
+extern void rcu_check_callbacks(int cpu, int user);
+extern void rcu_restart_cpu(int cpu);
+
+extern long rcu_batches_completed(void);
+extern long rcu_batches_completed_bh(void);
+
+#ifdef CONFIG_NO_HZ
+void rcu_enter_nohz(void);
+void rcu_exit_nohz(void);
+#else /* CONFIG_NO_HZ */
+static inline void rcu_enter_nohz(void)
+{
+}
+static inline void rcu_exit_nohz(void)
+{
+}
+#endif /* CONFIG_NO_HZ */
+
+#endif /* __LINUX_RCUTREE_H */
diff --git a/init/Kconfig b/init/Kconfig
index f763762d544a..9dd7958a71f0 100644
--- a/init/Kconfig
+++ b/init/Kconfig
@@ -928,10 +928,16 @@ source "block/Kconfig"
 config PREEMPT_NOTIFIERS
 	bool
 
-config CLASSIC_RCU
-	def_bool !PREEMPT_RCU
+config TREE_RCU_TRACE
+	def_bool RCU_TRACE && TREE_RCU
+	select DEBUG_FS
 	help
-	  This option selects the classic RCU implementation that is
-	  designed for best read-side performance on non-realtime
-	  systems.  Classic RCU is the default.  Note that the
-	  PREEMPT_RCU symbol is used to select/deselect this option.
+	  This option provides tracing for the TREE_RCU implementation,
+	  permitting Makefile to trivially select kernel/rcutree_trace.c.
+
+config PREEMPT_RCU_TRACE
+	def_bool RCU_TRACE && PREEMPT_RCU
+	select DEBUG_FS
+	help
+	  This option provides tracing for the PREEMPT_RCU implementation,
+	  permitting Makefile to trivially select kernel/rcupreempt_trace.c.
diff --git a/kernel/Kconfig.preempt b/kernel/Kconfig.preempt
index 9fdba03dc1fc..463f29743ea0 100644
--- a/kernel/Kconfig.preempt
+++ b/kernel/Kconfig.preempt
@@ -52,10 +52,29 @@ config PREEMPT
 
 endchoice
 
+choice
+	prompt "RCU Implementation"
+	default CLASSIC_RCU
+
+config CLASSIC_RCU
+	bool "Classic RCU"
+	help
+	  This option selects the classic RCU implementation that is
+	  designed for best read-side performance on non-realtime
+	  systems.
+	  
+	  Select this option if you are unsure.
+
+config TREE_RCU
+	bool "Tree-based hierarchical RCU"
+	help
+	  This option selects the RCU implementation that is
+	  designed for very large SMP system with hundreds or
+	  thousands of CPUs.
+
 config PREEMPT_RCU
 	bool "Preemptible RCU"
 	depends on PREEMPT
-	default n
 	help
 	  This option reduces the latency of the kernel by making certain
 	  RCU sections preemptible. Normally RCU code is non-preemptible, if
@@ -64,16 +83,47 @@ config PREEMPT_RCU
 	  now-naive assumptions about each RCU read-side critical section
 	  remaining on a given CPU through its execution.
 
-	  Say N if you are unsure.
+endchoice
 
 config RCU_TRACE
-	bool "Enable tracing for RCU - currently stats in debugfs"
-	depends on PREEMPT_RCU
-	select DEBUG_FS
-	default y
+	bool "Enable tracing for RCU"
+	depends on TREE_RCU || PREEMPT_RCU
 	help
 	  This option provides tracing in RCU which presents stats
 	  in debugfs for debugging RCU implementation.
 
 	  Say Y here if you want to enable RCU tracing
 	  Say N if you are unsure.
+
+config RCU_FANOUT
+	int "Tree-based hierarchical RCU fanout value"
+	range 2 64 if 64BIT
+	range 2 32 if !64BIT
+	depends on TREE_RCU
+	default 64 if 64BIT
+	default 32 if !64BIT
+	help
+	  This option controls the fanout of hierarchical implementations
+	  of RCU, allowing RCU to work efficiently on machines with
+	  large numbers of CPUs.  This value must be at least the cube
+	  root of NR_CPUS, which allows NR_CPUS up to 32,768 for 32-bit
+	  systems and up to 262,144 for 64-bit systems.
+
+	  Select a specific number if testing RCU itself.
+	  Take the default if unsure.
+
+config RCU_FANOUT_EXACT
+	bool "Disable tree-based hierarchical RCU auto-balancing"
+	depends on TREE_RCU
+	default n
+	help
+	  This option forces use of the exact RCU_FANOUT value specified,
+	  regardless of imbalances in the hierarchy.  This is useful for
+	  testing RCU itself, and might one day be useful on systems with
+	  strong NUMA behavior.
+
+	  Without RCU_FANOUT_EXACT, the code will balance the hierarchy.
+
+	  Say n if unsure.
+
+	
diff --git a/kernel/Makefile b/kernel/Makefile
index 19fad003b19d..b4fdbbff5ec0 100644
--- a/kernel/Makefile
+++ b/kernel/Makefile
@@ -74,10 +74,10 @@ obj-$(CONFIG_GENERIC_HARDIRQS) += irq/
 obj-$(CONFIG_SECCOMP) += seccomp.o
 obj-$(CONFIG_RCU_TORTURE_TEST) += rcutorture.o
 obj-$(CONFIG_CLASSIC_RCU) += rcuclassic.o
+obj-$(CONFIG_TREE_RCU) += rcutree.o
 obj-$(CONFIG_PREEMPT_RCU) += rcupreempt.o
-ifeq ($(CONFIG_PREEMPT_RCU),y)
-obj-$(CONFIG_RCU_TRACE) += rcupreempt_trace.o
-endif
+obj-$(CONFIG_TREE_RCU_TRACE) += rcutree_trace.o
+obj-$(CONFIG_PREEMPT_RCU_TRACE) += rcupreempt_trace.o
 obj-$(CONFIG_RELAY) += relay.o
 obj-$(CONFIG_SYSCTL) += utsname_sysctl.o
 obj-$(CONFIG_TASK_DELAY_ACCT) += delayacct.o
diff --git a/kernel/rcupreempt.c b/kernel/rcupreempt.c
index 59236e8b9daa..04982659875a 100644
--- a/kernel/rcupreempt.c
+++ b/kernel/rcupreempt.c
@@ -551,6 +551,16 @@ void rcu_irq_exit(void)
 	}
 }
 
+void rcu_nmi_enter(void)
+{
+	rcu_irq_enter();
+}
+
+void rcu_nmi_exit(void)
+{
+	rcu_irq_exit();
+}
+
 static void dyntick_save_progress_counter(int cpu)
 {
 	struct rcu_dyntick_sched *rdssp = &per_cpu(rcu_dyntick_sched, cpu);
diff --git a/kernel/rcupreempt_trace.c b/kernel/rcupreempt_trace.c
index 35c2d3360ecf..7c2665cac172 100644
--- a/kernel/rcupreempt_trace.c
+++ b/kernel/rcupreempt_trace.c
@@ -149,12 +149,12 @@ static void rcupreempt_trace_sum(struct rcupreempt_trace *sp)
 		sp->done_length += cp->done_length;
 		sp->done_add += cp->done_add;
 		sp->done_remove += cp->done_remove;
-		atomic_set(&sp->done_invoked, atomic_read(&cp->done_invoked));
+		atomic_add(atomic_read(&cp->done_invoked), &sp->done_invoked);
 		sp->rcu_check_callbacks += cp->rcu_check_callbacks;
-		atomic_set(&sp->rcu_try_flip_1,
-			   atomic_read(&cp->rcu_try_flip_1));
-		atomic_set(&sp->rcu_try_flip_e1,
-			   atomic_read(&cp->rcu_try_flip_e1));
+		atomic_add(atomic_read(&cp->rcu_try_flip_1),
+			   &sp->rcu_try_flip_1);
+		atomic_add(atomic_read(&cp->rcu_try_flip_e1),
+			   &sp->rcu_try_flip_e1);
 		sp->rcu_try_flip_i1 += cp->rcu_try_flip_i1;
 		sp->rcu_try_flip_ie1 += cp->rcu_try_flip_ie1;
 		sp->rcu_try_flip_g1 += cp->rcu_try_flip_g1;
diff --git a/kernel/rcutree.c b/kernel/rcutree.c
new file mode 100644
index 000000000000..a342b032112c
--- /dev/null
+++ b/kernel/rcutree.c
@@ -0,0 +1,1535 @@
+/*
+ * Read-Copy Update mechanism for mutual exclusion
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+ *
+ * Copyright IBM Corporation, 2008
+ *
+ * Authors: Dipankar Sarma <dipankar@in.ibm.com>
+ *	    Manfred Spraul <manfred@colorfullife.com>
+ *	    Paul E. McKenney <paulmck@linux.vnet.ibm.com> Hierarchical version
+ *
+ * Based on the original work by Paul McKenney <paulmck@us.ibm.com>
+ * and inputs from Rusty Russell, Andrea Arcangeli and Andi Kleen.
+ *
+ * For detailed explanation of Read-Copy Update mechanism see -
+ * 	Documentation/RCU
+ */
+#include <linux/types.h>
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/spinlock.h>
+#include <linux/smp.h>
+#include <linux/rcupdate.h>
+#include <linux/interrupt.h>
+#include <linux/sched.h>
+#include <asm/atomic.h>
+#include <linux/bitops.h>
+#include <linux/module.h>
+#include <linux/completion.h>
+#include <linux/moduleparam.h>
+#include <linux/percpu.h>
+#include <linux/notifier.h>
+#include <linux/cpu.h>
+#include <linux/mutex.h>
+#include <linux/time.h>
+
+#ifdef CONFIG_DEBUG_LOCK_ALLOC
+static struct lock_class_key rcu_lock_key;
+struct lockdep_map rcu_lock_map =
+	STATIC_LOCKDEP_MAP_INIT("rcu_read_lock", &rcu_lock_key);
+EXPORT_SYMBOL_GPL(rcu_lock_map);
+#endif
+
+/* Data structures. */
+
+#define RCU_STATE_INITIALIZER(name) { \
+	.level = { &name.node[0] }, \
+	.levelcnt = { \
+		NUM_RCU_LVL_0,  /* root of hierarchy. */ \
+		NUM_RCU_LVL_1, \
+		NUM_RCU_LVL_2, \
+		NUM_RCU_LVL_3, /* == MAX_RCU_LVLS */ \
+	}, \
+	.signaled = RCU_SIGNAL_INIT, \
+	.gpnum = -300, \
+	.completed = -300, \
+	.onofflock = __SPIN_LOCK_UNLOCKED(&name.onofflock), \
+	.fqslock = __SPIN_LOCK_UNLOCKED(&name.fqslock), \
+	.n_force_qs = 0, \
+	.n_force_qs_ngp = 0, \
+}
+
+struct rcu_state rcu_state = RCU_STATE_INITIALIZER(rcu_state);
+DEFINE_PER_CPU(struct rcu_data, rcu_data);
+
+struct rcu_state rcu_bh_state = RCU_STATE_INITIALIZER(rcu_bh_state);
+DEFINE_PER_CPU(struct rcu_data, rcu_bh_data);
+
+#ifdef CONFIG_NO_HZ
+DEFINE_PER_CPU(struct rcu_dynticks, rcu_dynticks);
+#endif /* #ifdef CONFIG_NO_HZ */
+
+static int blimit = 10;		/* Maximum callbacks per softirq. */
+static int qhimark = 10000;	/* If this many pending, ignore blimit. */
+static int qlowmark = 100;	/* Once only this many pending, use blimit. */
+
+static void force_quiescent_state(struct rcu_state *rsp, int relaxed);
+
+/*
+ * Return the number of RCU batches processed thus far for debug & stats.
+ */
+long rcu_batches_completed(void)
+{
+	return rcu_state.completed;
+}
+EXPORT_SYMBOL_GPL(rcu_batches_completed);
+
+/*
+ * Return the number of RCU BH batches processed thus far for debug & stats.
+ */
+long rcu_batches_completed_bh(void)
+{
+	return rcu_bh_state.completed;
+}
+EXPORT_SYMBOL_GPL(rcu_batches_completed_bh);
+
+/*
+ * Does the CPU have callbacks ready to be invoked?
+ */
+static int
+cpu_has_callbacks_ready_to_invoke(struct rcu_data *rdp)
+{
+	return &rdp->nxtlist != rdp->nxttail[RCU_DONE_TAIL];
+}
+
+/*
+ * Does the current CPU require a yet-as-unscheduled grace period?
+ */
+static int
+cpu_needs_another_gp(struct rcu_state *rsp, struct rcu_data *rdp)
+{
+	/* ACCESS_ONCE() because we are accessing outside of lock. */
+	return *rdp->nxttail[RCU_DONE_TAIL] &&
+	       ACCESS_ONCE(rsp->completed) == ACCESS_ONCE(rsp->gpnum);
+}
+
+/*
+ * Return the root node of the specified rcu_state structure.
+ */
+static struct rcu_node *rcu_get_root(struct rcu_state *rsp)
+{
+	return &rsp->node[0];
+}
+
+#ifdef CONFIG_SMP
+
+/*
+ * If the specified CPU is offline, tell the caller that it is in
+ * a quiescent state.  Otherwise, whack it with a reschedule IPI.
+ * Grace periods can end up waiting on an offline CPU when that
+ * CPU is in the process of coming online -- it will be added to the
+ * rcu_node bitmasks before it actually makes it online.  The same thing
+ * can happen while a CPU is in the process of coming online.  Because this
+ * race is quite rare, we check for it after detecting that the grace
+ * period has been delayed rather than checking each and every CPU
+ * each and every time we start a new grace period.
+ */
+static int rcu_implicit_offline_qs(struct rcu_data *rdp)
+{
+	/*
+	 * If the CPU is offline, it is in a quiescent state.  We can
+	 * trust its state not to change because interrupts are disabled.
+	 */
+	if (cpu_is_offline(rdp->cpu)) {
+		rdp->offline_fqs++;
+		return 1;
+	}
+
+	/* The CPU is online, so send it a reschedule IPI. */
+	if (rdp->cpu != smp_processor_id())
+		smp_send_reschedule(rdp->cpu);
+	else
+		set_need_resched();
+	rdp->resched_ipi++;
+	return 0;
+}
+
+#endif /* #ifdef CONFIG_SMP */
+
+#ifdef CONFIG_NO_HZ
+static DEFINE_RATELIMIT_STATE(rcu_rs, 10 * HZ, 5);
+
+/**
+ * rcu_enter_nohz - inform RCU that current CPU is entering nohz
+ *
+ * Enter nohz mode, in other words, -leave- the mode in which RCU
+ * read-side critical sections can occur.  (Though RCU read-side
+ * critical sections can occur in irq handlers in nohz mode, a possibility
+ * handled by rcu_irq_enter() and rcu_irq_exit()).
+ */
+void rcu_enter_nohz(void)
+{
+	unsigned long flags;
+	struct rcu_dynticks *rdtp;
+
+	smp_mb(); /* CPUs seeing ++ must see prior RCU read-side crit sects */
+	local_irq_save(flags);
+	rdtp = &__get_cpu_var(rcu_dynticks);
+	rdtp->dynticks++;
+	rdtp->dynticks_nesting--;
+	WARN_ON_RATELIMIT(rdtp->dynticks & 0x1, &rcu_rs);
+	local_irq_restore(flags);
+}
+
+/*
+ * rcu_exit_nohz - inform RCU that current CPU is leaving nohz
+ *
+ * Exit nohz mode, in other words, -enter- the mode in which RCU
+ * read-side critical sections normally occur.
+ */
+void rcu_exit_nohz(void)
+{
+	unsigned long flags;
+	struct rcu_dynticks *rdtp;
+
+	local_irq_save(flags);
+	rdtp = &__get_cpu_var(rcu_dynticks);
+	rdtp->dynticks++;
+	rdtp->dynticks_nesting++;
+	WARN_ON_RATELIMIT(!(rdtp->dynticks & 0x1), &rcu_rs);
+	local_irq_restore(flags);
+	smp_mb(); /* CPUs seeing ++ must see later RCU read-side crit sects */
+}
+
+/**
+ * rcu_nmi_enter - inform RCU of entry to NMI context
+ *
+ * If the CPU was idle with dynamic ticks active, and there is no
+ * irq handler running, this updates rdtp->dynticks_nmi to let the
+ * RCU grace-period handling know that the CPU is active.
+ */
+void rcu_nmi_enter(void)
+{
+	struct rcu_dynticks *rdtp = &__get_cpu_var(rcu_dynticks);
+
+	if (rdtp->dynticks & 0x1)
+		return;
+	rdtp->dynticks_nmi++;
+	WARN_ON_RATELIMIT(!(rdtp->dynticks_nmi & 0x1), &rcu_rs);
+	smp_mb(); /* CPUs seeing ++ must see later RCU read-side crit sects */
+}
+
+/**
+ * rcu_nmi_exit - inform RCU of exit from NMI context
+ *
+ * If the CPU was idle with dynamic ticks active, and there is no
+ * irq handler running, this updates rdtp->dynticks_nmi to let the
+ * RCU grace-period handling know that the CPU is no longer active.
+ */
+void rcu_nmi_exit(void)
+{
+	struct rcu_dynticks *rdtp = &__get_cpu_var(rcu_dynticks);
+
+	if (rdtp->dynticks & 0x1)
+		return;
+	smp_mb(); /* CPUs seeing ++ must see prior RCU read-side crit sects */
+	rdtp->dynticks_nmi++;
+	WARN_ON_RATELIMIT(rdtp->dynticks_nmi & 0x1, &rcu_rs);
+}
+
+/**
+ * rcu_irq_enter - inform RCU of entry to hard irq context
+ *
+ * If the CPU was idle with dynamic ticks active, this updates the
+ * rdtp->dynticks to let the RCU handling know that the CPU is active.
+ */
+void rcu_irq_enter(void)
+{
+	struct rcu_dynticks *rdtp = &__get_cpu_var(rcu_dynticks);
+
+	if (rdtp->dynticks_nesting++)
+		return;
+	rdtp->dynticks++;
+	WARN_ON_RATELIMIT(!(rdtp->dynticks & 0x1), &rcu_rs);
+	smp_mb(); /* CPUs seeing ++ must see later RCU read-side crit sects */
+}
+
+/**
+ * rcu_irq_exit - inform RCU of exit from hard irq context
+ *
+ * If the CPU was idle with dynamic ticks active, update the rdp->dynticks
+ * to put let the RCU handling be aware that the CPU is going back to idle
+ * with no ticks.
+ */
+void rcu_irq_exit(void)
+{
+	struct rcu_dynticks *rdtp = &__get_cpu_var(rcu_dynticks);
+
+	if (--rdtp->dynticks_nesting)
+		return;
+	smp_mb(); /* CPUs seeing ++ must see prior RCU read-side crit sects */
+	rdtp->dynticks++;
+	WARN_ON_RATELIMIT(rdtp->dynticks & 0x1, &rcu_rs);
+
+	/* If the interrupt queued a callback, get out of dyntick mode. */
+	if (__get_cpu_var(rcu_data).nxtlist ||
+	    __get_cpu_var(rcu_bh_data).nxtlist)
+		set_need_resched();
+}
+
+/*
+ * Record the specified "completed" value, which is later used to validate
+ * dynticks counter manipulations.  Specify "rsp->completed - 1" to
+ * unconditionally invalidate any future dynticks manipulations (which is
+ * useful at the beginning of a grace period).
+ */
+static void dyntick_record_completed(struct rcu_state *rsp, long comp)
+{
+	rsp->dynticks_completed = comp;
+}
+
+#ifdef CONFIG_SMP
+
+/*
+ * Recall the previously recorded value of the completion for dynticks.
+ */
+static long dyntick_recall_completed(struct rcu_state *rsp)
+{
+	return rsp->dynticks_completed;
+}
+
+/*
+ * Snapshot the specified CPU's dynticks counter so that we can later
+ * credit them with an implicit quiescent state.  Return 1 if this CPU
+ * is already in a quiescent state courtesy of dynticks idle mode.
+ */
+static int dyntick_save_progress_counter(struct rcu_data *rdp)
+{
+	int ret;
+	int snap;
+	int snap_nmi;
+
+	snap = rdp->dynticks->dynticks;
+	snap_nmi = rdp->dynticks->dynticks_nmi;
+	smp_mb();	/* Order sampling of snap with end of grace period. */
+	rdp->dynticks_snap = snap;
+	rdp->dynticks_nmi_snap = snap_nmi;
+	ret = ((snap & 0x1) == 0) && ((snap_nmi & 0x1) == 0);
+	if (ret)
+		rdp->dynticks_fqs++;
+	return ret;
+}
+
+/*
+ * Return true if the specified CPU has passed through a quiescent
+ * state by virtue of being in or having passed through an dynticks
+ * idle state since the last call to dyntick_save_progress_counter()
+ * for this same CPU.
+ */
+static int rcu_implicit_dynticks_qs(struct rcu_data *rdp)
+{
+	long curr;
+	long curr_nmi;
+	long snap;
+	long snap_nmi;
+
+	curr = rdp->dynticks->dynticks;
+	snap = rdp->dynticks_snap;
+	curr_nmi = rdp->dynticks->dynticks_nmi;
+	snap_nmi = rdp->dynticks_nmi_snap;
+	smp_mb(); /* force ordering with cpu entering/leaving dynticks. */
+
+	/*
+	 * If the CPU passed through or entered a dynticks idle phase with
+	 * no active irq/NMI handlers, then we can safely pretend that the CPU
+	 * already acknowledged the request to pass through a quiescent
+	 * state.  Either way, that CPU cannot possibly be in an RCU
+	 * read-side critical section that started before the beginning
+	 * of the current RCU grace period.
+	 */
+	if ((curr != snap || (curr & 0x1) == 0) &&
+	    (curr_nmi != snap_nmi || (curr_nmi & 0x1) == 0)) {
+		rdp->dynticks_fqs++;
+		return 1;
+	}
+
+	/* Go check for the CPU being offline. */
+	return rcu_implicit_offline_qs(rdp);
+}
+
+#endif /* #ifdef CONFIG_SMP */
+
+#else /* #ifdef CONFIG_NO_HZ */
+
+static void dyntick_record_completed(struct rcu_state *rsp, long comp)
+{
+}
+
+#ifdef CONFIG_SMP
+
+/*
+ * If there are no dynticks, then the only way that a CPU can passively
+ * be in a quiescent state is to be offline.  Unlike dynticks idle, which
+ * is a point in time during the prior (already finished) grace period,
+ * an offline CPU is always in a quiescent state, and thus can be
+ * unconditionally applied.  So just return the current value of completed.
+ */
+static long dyntick_recall_completed(struct rcu_state *rsp)
+{
+	return rsp->completed;
+}
+
+static int dyntick_save_progress_counter(struct rcu_data *rdp)
+{
+	return 0;
+}
+
+static int rcu_implicit_dynticks_qs(struct rcu_data *rdp)
+{
+	return rcu_implicit_offline_qs(rdp);
+}
+
+#endif /* #ifdef CONFIG_SMP */
+
+#endif /* #else #ifdef CONFIG_NO_HZ */
+
+#ifdef CONFIG_RCU_CPU_STALL_DETECTOR
+
+static void record_gp_stall_check_time(struct rcu_state *rsp)
+{
+	rsp->gp_start = jiffies;
+	rsp->jiffies_stall = jiffies + RCU_SECONDS_TILL_STALL_CHECK;
+}
+
+static void print_other_cpu_stall(struct rcu_state *rsp)
+{
+	int cpu;
+	long delta;
+	unsigned long flags;
+	struct rcu_node *rnp = rcu_get_root(rsp);
+	struct rcu_node *rnp_cur = rsp->level[NUM_RCU_LVLS - 1];
+	struct rcu_node *rnp_end = &rsp->node[NUM_RCU_NODES];
+
+	/* Only let one CPU complain about others per time interval. */
+
+	spin_lock_irqsave(&rnp->lock, flags);
+	delta = jiffies - rsp->jiffies_stall;
+	if (delta < RCU_STALL_RAT_DELAY || rsp->gpnum == rsp->completed) {
+		spin_unlock_irqrestore(&rnp->lock, flags);
+		return;
+	}
+	rsp->jiffies_stall = jiffies + RCU_SECONDS_TILL_STALL_RECHECK;
+	spin_unlock_irqrestore(&rnp->lock, flags);
+
+	/* OK, time to rat on our buddy... */
+
+	printk(KERN_ERR "INFO: RCU detected CPU stalls:");
+	for (; rnp_cur < rnp_end; rnp_cur++) {
+		if (rnp_cur->qsmask == 0)
+			continue;
+		for (cpu = 0; cpu <= rnp_cur->grphi - rnp_cur->grplo; cpu++)
+			if (rnp_cur->qsmask & (1UL << cpu))
+				printk(" %d", rnp_cur->grplo + cpu);
+	}
+	printk(" (detected by %d, t=%ld jiffies)\n",
+	       smp_processor_id(), (long)(jiffies - rsp->gp_start));
+	force_quiescent_state(rsp, 0);  /* Kick them all. */
+}
+
+static void print_cpu_stall(struct rcu_state *rsp)
+{
+	unsigned long flags;
+	struct rcu_node *rnp = rcu_get_root(rsp);
+
+	printk(KERN_ERR "INFO: RCU detected CPU %d stall (t=%lu jiffies)\n",
+			smp_processor_id(), jiffies - rsp->gp_start);
+	dump_stack();
+	spin_lock_irqsave(&rnp->lock, flags);
+	if ((long)(jiffies - rsp->jiffies_stall) >= 0)
+		rsp->jiffies_stall =
+			jiffies + RCU_SECONDS_TILL_STALL_RECHECK;
+	spin_unlock_irqrestore(&rnp->lock, flags);
+	set_need_resched();  /* kick ourselves to get things going. */
+}
+
+static void check_cpu_stall(struct rcu_state *rsp, struct rcu_data *rdp)
+{
+	long delta;
+	struct rcu_node *rnp;
+
+	delta = jiffies - rsp->jiffies_stall;
+	rnp = rdp->mynode;
+	if ((rnp->qsmask & rdp->grpmask) && delta >= 0) {
+
+		/* We haven't checked in, so go dump stack. */
+		print_cpu_stall(rsp);
+
+	} else if (rsp->gpnum != rsp->completed &&
+		   delta >= RCU_STALL_RAT_DELAY) {
+
+		/* They had two time units to dump stack, so complain. */
+		print_other_cpu_stall(rsp);
+	}
+}
+
+#else /* #ifdef CONFIG_RCU_CPU_STALL_DETECTOR */
+
+static void record_gp_stall_check_time(struct rcu_state *rsp)
+{
+}
+
+static void check_cpu_stall(struct rcu_state *rsp, struct rcu_data *rdp)
+{
+}
+
+#endif /* #else #ifdef CONFIG_RCU_CPU_STALL_DETECTOR */
+
+/*
+ * Update CPU-local rcu_data state to record the newly noticed grace period.
+ * This is used both when we started the grace period and when we notice
+ * that someone else started the grace period.
+ */
+static void note_new_gpnum(struct rcu_state *rsp, struct rcu_data *rdp)
+{
+	rdp->qs_pending = 1;
+	rdp->passed_quiesc = 0;
+	rdp->gpnum = rsp->gpnum;
+	rdp->n_rcu_pending_force_qs = rdp->n_rcu_pending +
+				      RCU_JIFFIES_TILL_FORCE_QS;
+}
+
+/*
+ * Did someone else start a new RCU grace period start since we last
+ * checked?  Update local state appropriately if so.  Must be called
+ * on the CPU corresponding to rdp.
+ */
+static int
+check_for_new_grace_period(struct rcu_state *rsp, struct rcu_data *rdp)
+{
+	unsigned long flags;
+	int ret = 0;
+
+	local_irq_save(flags);
+	if (rdp->gpnum != rsp->gpnum) {
+		note_new_gpnum(rsp, rdp);
+		ret = 1;
+	}
+	local_irq_restore(flags);
+	return ret;
+}
+
+/*
+ * Start a new RCU grace period if warranted, re-initializing the hierarchy
+ * in preparation for detecting the next grace period.  The caller must hold
+ * the root node's ->lock, which is released before return.  Hard irqs must
+ * be disabled.
+ */
+static void
+rcu_start_gp(struct rcu_state *rsp, unsigned long flags)
+	__releases(rcu_get_root(rsp)->lock)
+{
+	struct rcu_data *rdp = rsp->rda[smp_processor_id()];
+	struct rcu_node *rnp = rcu_get_root(rsp);
+	struct rcu_node *rnp_cur;
+	struct rcu_node *rnp_end;
+
+	if (!cpu_needs_another_gp(rsp, rdp)) {
+		spin_unlock_irqrestore(&rnp->lock, flags);
+		return;
+	}
+
+	/* Advance to a new grace period and initialize state. */
+	rsp->gpnum++;
+	rsp->signaled = RCU_GP_INIT; /* Hold off force_quiescent_state. */
+	rsp->jiffies_force_qs = jiffies + RCU_JIFFIES_TILL_FORCE_QS;
+	rdp->n_rcu_pending_force_qs = rdp->n_rcu_pending +
+				      RCU_JIFFIES_TILL_FORCE_QS;
+	record_gp_stall_check_time(rsp);
+	dyntick_record_completed(rsp, rsp->completed - 1);
+	note_new_gpnum(rsp, rdp);
+
+	/*
+	 * Because we are first, we know that all our callbacks will
+	 * be covered by this upcoming grace period, even the ones
+	 * that were registered arbitrarily recently.
+	 */
+	rdp->nxttail[RCU_NEXT_READY_TAIL] = rdp->nxttail[RCU_NEXT_TAIL];
+	rdp->nxttail[RCU_WAIT_TAIL] = rdp->nxttail[RCU_NEXT_TAIL];
+
+	/* Special-case the common single-level case. */
+	if (NUM_RCU_NODES == 1) {
+		rnp->qsmask = rnp->qsmaskinit;
+		spin_unlock_irqrestore(&rnp->lock, flags);
+		return;
+	}
+
+	spin_unlock(&rnp->lock);  /* leave irqs disabled. */
+
+
+	/* Exclude any concurrent CPU-hotplug operations. */
+	spin_lock(&rsp->onofflock);  /* irqs already disabled. */
+
+	/*
+	 * Set the quiescent-state-needed bits in all the non-leaf RCU
+	 * nodes for all currently online CPUs.  This operation relies
+	 * on the layout of the hierarchy within the rsp->node[] array.
+	 * Note that other CPUs will access only the leaves of the
+	 * hierarchy, which still indicate that no grace period is in
+	 * progress.  In addition, we have excluded CPU-hotplug operations.
+	 *
+	 * We therefore do not need to hold any locks.  Any required
+	 * memory barriers will be supplied by the locks guarding the
+	 * leaf rcu_nodes in the hierarchy.
+	 */
+
+	rnp_end = rsp->level[NUM_RCU_LVLS - 1];
+	for (rnp_cur = &rsp->node[0]; rnp_cur < rnp_end; rnp_cur++)
+		rnp_cur->qsmask = rnp_cur->qsmaskinit;
+
+	/*
+	 * Now set up the leaf nodes.  Here we must be careful.  First,
+	 * we need to hold the lock in order to exclude other CPUs, which
+	 * might be contending for the leaf nodes' locks.  Second, as
+	 * soon as we initialize a given leaf node, its CPUs might run
+	 * up the rest of the hierarchy.  We must therefore acquire locks
+	 * for each node that we touch during this stage.  (But we still
+	 * are excluding CPU-hotplug operations.)
+	 *
+	 * Note that the grace period cannot complete until we finish
+	 * the initialization process, as there will be at least one
+	 * qsmask bit set in the root node until that time, namely the
+	 * one corresponding to this CPU.
+	 */
+	rnp_end = &rsp->node[NUM_RCU_NODES];
+	rnp_cur = rsp->level[NUM_RCU_LVLS - 1];
+	for (; rnp_cur < rnp_end; rnp_cur++) {
+		spin_lock(&rnp_cur->lock);	/* irqs already disabled. */
+		rnp_cur->qsmask = rnp_cur->qsmaskinit;
+		spin_unlock(&rnp_cur->lock);	/* irqs already disabled. */
+	}
+
+	rsp->signaled = RCU_SIGNAL_INIT; /* force_quiescent_state now OK. */
+	spin_unlock_irqrestore(&rsp->onofflock, flags);
+}
+
+/*
+ * Advance this CPU's callbacks, but only if the current grace period
+ * has ended.  This may be called only from the CPU to whom the rdp
+ * belongs.
+ */
+static void
+rcu_process_gp_end(struct rcu_state *rsp, struct rcu_data *rdp)
+{
+	long completed_snap;
+	unsigned long flags;
+
+	local_irq_save(flags);
+	completed_snap = ACCESS_ONCE(rsp->completed);  /* outside of lock. */
+
+	/* Did another grace period end? */
+	if (rdp->completed != completed_snap) {
+
+		/* Advance callbacks.  No harm if list empty. */
+		rdp->nxttail[RCU_DONE_TAIL] = rdp->nxttail[RCU_WAIT_TAIL];
+		rdp->nxttail[RCU_WAIT_TAIL] = rdp->nxttail[RCU_NEXT_READY_TAIL];
+		rdp->nxttail[RCU_NEXT_READY_TAIL] = rdp->nxttail[RCU_NEXT_TAIL];
+
+		/* Remember that we saw this grace-period completion. */
+		rdp->completed = completed_snap;
+	}
+	local_irq_restore(flags);
+}
+
+/*
+ * Similar to cpu_quiet(), for which it is a helper function.  Allows
+ * a group of CPUs to be quieted at one go, though all the CPUs in the
+ * group must be represented by the same leaf rcu_node structure.
+ * That structure's lock must be held upon entry, and it is released
+ * before return.
+ */
+static void
+cpu_quiet_msk(unsigned long mask, struct rcu_state *rsp, struct rcu_node *rnp,
+	      unsigned long flags)
+	__releases(rnp->lock)
+{
+	/* Walk up the rcu_node hierarchy. */
+	for (;;) {
+		if (!(rnp->qsmask & mask)) {
+
+			/* Our bit has already been cleared, so done. */
+			spin_unlock_irqrestore(&rnp->lock, flags);
+			return;
+		}
+		rnp->qsmask &= ~mask;
+		if (rnp->qsmask != 0) {
+
+			/* Other bits still set at this level, so done. */
+			spin_unlock_irqrestore(&rnp->lock, flags);
+			return;
+		}
+		mask = rnp->grpmask;
+		if (rnp->parent == NULL) {
+
+			/* No more levels.  Exit loop holding root lock. */
+
+			break;
+		}
+		spin_unlock_irqrestore(&rnp->lock, flags);
+		rnp = rnp->parent;
+		spin_lock_irqsave(&rnp->lock, flags);
+	}
+
+	/*
+	 * Get here if we are the last CPU to pass through a quiescent
+	 * state for this grace period.  Clean up and let rcu_start_gp()
+	 * start up the next grace period if one is needed.  Note that
+	 * we still hold rnp->lock, as required by rcu_start_gp(), which
+	 * will release it.
+	 */
+	rsp->completed = rsp->gpnum;
+	rcu_process_gp_end(rsp, rsp->rda[smp_processor_id()]);
+	rcu_start_gp(rsp, flags);  /* releases rnp->lock. */
+}
+
+/*
+ * Record a quiescent state for the specified CPU, which must either be
+ * the current CPU or an offline CPU.  The lastcomp argument is used to
+ * make sure we are still in the grace period of interest.  We don't want
+ * to end the current grace period based on quiescent states detected in
+ * an earlier grace period!
+ */
+static void
+cpu_quiet(int cpu, struct rcu_state *rsp, struct rcu_data *rdp, long lastcomp)
+{
+	unsigned long flags;
+	unsigned long mask;
+	struct rcu_node *rnp;
+
+	rnp = rdp->mynode;
+	spin_lock_irqsave(&rnp->lock, flags);
+	if (lastcomp != ACCESS_ONCE(rsp->completed)) {
+
+		/*
+		 * Someone beat us to it for this grace period, so leave.
+		 * The race with GP start is resolved by the fact that we
+		 * hold the leaf rcu_node lock, so that the per-CPU bits
+		 * cannot yet be initialized -- so we would simply find our
+		 * CPU's bit already cleared in cpu_quiet_msk() if this race
+		 * occurred.
+		 */
+		rdp->passed_quiesc = 0;	/* try again later! */
+		spin_unlock_irqrestore(&rnp->lock, flags);
+		return;
+	}
+	mask = rdp->grpmask;
+	if ((rnp->qsmask & mask) == 0) {
+		spin_unlock_irqrestore(&rnp->lock, flags);
+	} else {
+		rdp->qs_pending = 0;
+
+		/*
+		 * This GP can't end until cpu checks in, so all of our
+		 * callbacks can be processed during the next GP.
+		 */
+		rdp = rsp->rda[smp_processor_id()];
+		rdp->nxttail[RCU_NEXT_READY_TAIL] = rdp->nxttail[RCU_NEXT_TAIL];
+
+		cpu_quiet_msk(mask, rsp, rnp, flags); /* releases rnp->lock */
+	}
+}
+
+/*
+ * Check to see if there is a new grace period of which this CPU
+ * is not yet aware, and if so, set up local rcu_data state for it.
+ * Otherwise, see if this CPU has just passed through its first
+ * quiescent state for this grace period, and record that fact if so.
+ */
+static void
+rcu_check_quiescent_state(struct rcu_state *rsp, struct rcu_data *rdp)
+{
+	/* If there is now a new grace period, record and return. */
+	if (check_for_new_grace_period(rsp, rdp))
+		return;
+
+	/*
+	 * Does this CPU still need to do its part for current grace period?
+	 * If no, return and let the other CPUs do their part as well.
+	 */
+	if (!rdp->qs_pending)
+		return;
+
+	/*
+	 * Was there a quiescent state since the beginning of the grace
+	 * period? If no, then exit and wait for the next call.
+	 */
+	if (!rdp->passed_quiesc)
+		return;
+
+	/* Tell RCU we are done (but cpu_quiet() will be the judge of that). */
+	cpu_quiet(rdp->cpu, rsp, rdp, rdp->passed_quiesc_completed);
+}
+
+#ifdef CONFIG_HOTPLUG_CPU
+
+/*
+ * Remove the outgoing CPU from the bitmasks in the rcu_node hierarchy
+ * and move all callbacks from the outgoing CPU to the current one.
+ */
+static void __rcu_offline_cpu(int cpu, struct rcu_state *rsp)
+{
+	int i;
+	unsigned long flags;
+	long lastcomp;
+	unsigned long mask;
+	struct rcu_data *rdp = rsp->rda[cpu];
+	struct rcu_data *rdp_me;
+	struct rcu_node *rnp;
+
+	/* Exclude any attempts to start a new grace period. */
+	spin_lock_irqsave(&rsp->onofflock, flags);
+
+	/* Remove the outgoing CPU from the masks in the rcu_node hierarchy. */
+	rnp = rdp->mynode;
+	mask = rdp->grpmask;	/* rnp->grplo is constant. */
+	do {
+		spin_lock(&rnp->lock);		/* irqs already disabled. */
+		rnp->qsmaskinit &= ~mask;
+		if (rnp->qsmaskinit != 0) {
+			spin_unlock(&rnp->lock); /* irqs already disabled. */
+			break;
+		}
+		mask = rnp->grpmask;
+		spin_unlock(&rnp->lock);	/* irqs already disabled. */
+		rnp = rnp->parent;
+	} while (rnp != NULL);
+	lastcomp = rsp->completed;
+
+	spin_unlock(&rsp->onofflock);		/* irqs remain disabled. */
+
+	/* Being offline is a quiescent state, so go record it. */
+	cpu_quiet(cpu, rsp, rdp, lastcomp);
+
+	/*
+	 * Move callbacks from the outgoing CPU to the running CPU.
+	 * Note that the outgoing CPU is now quiscent, so it is now
+	 * (uncharacteristically) safe to access it rcu_data structure.
+	 * Note also that we must carefully retain the order of the
+	 * outgoing CPU's callbacks in order for rcu_barrier() to work
+	 * correctly.  Finally, note that we start all the callbacks
+	 * afresh, even those that have passed through a grace period
+	 * and are therefore ready to invoke.  The theory is that hotplug
+	 * events are rare, and that if they are frequent enough to
+	 * indefinitely delay callbacks, you have far worse things to
+	 * be worrying about.
+	 */
+	rdp_me = rsp->rda[smp_processor_id()];
+	if (rdp->nxtlist != NULL) {
+		*rdp_me->nxttail[RCU_NEXT_TAIL] = rdp->nxtlist;
+		rdp_me->nxttail[RCU_NEXT_TAIL] = rdp->nxttail[RCU_NEXT_TAIL];
+		rdp->nxtlist = NULL;
+		for (i = 0; i < RCU_NEXT_SIZE; i++)
+			rdp->nxttail[i] = &rdp->nxtlist;
+		rdp_me->qlen += rdp->qlen;
+		rdp->qlen = 0;
+	}
+	local_irq_restore(flags);
+}
+
+/*
+ * Remove the specified CPU from the RCU hierarchy and move any pending
+ * callbacks that it might have to the current CPU.  This code assumes
+ * that at least one CPU in the system will remain running at all times.
+ * Any attempt to offline -all- CPUs is likely to strand RCU callbacks.
+ */
+static void rcu_offline_cpu(int cpu)
+{
+	__rcu_offline_cpu(cpu, &rcu_state);
+	__rcu_offline_cpu(cpu, &rcu_bh_state);
+}
+
+#else /* #ifdef CONFIG_HOTPLUG_CPU */
+
+static void rcu_offline_cpu(int cpu)
+{
+}
+
+#endif /* #else #ifdef CONFIG_HOTPLUG_CPU */
+
+/*
+ * Invoke any RCU callbacks that have made it to the end of their grace
+ * period.  Thottle as specified by rdp->blimit.
+ */
+static void rcu_do_batch(struct rcu_data *rdp)
+{
+	unsigned long flags;
+	struct rcu_head *next, *list, **tail;
+	int count;
+
+	/* If no callbacks are ready, just return.*/
+	if (!cpu_has_callbacks_ready_to_invoke(rdp))
+		return;
+
+	/*
+	 * Extract the list of ready callbacks, disabling to prevent
+	 * races with call_rcu() from interrupt handlers.
+	 */
+	local_irq_save(flags);
+	list = rdp->nxtlist;
+	rdp->nxtlist = *rdp->nxttail[RCU_DONE_TAIL];
+	*rdp->nxttail[RCU_DONE_TAIL] = NULL;
+	tail = rdp->nxttail[RCU_DONE_TAIL];
+	for (count = RCU_NEXT_SIZE - 1; count >= 0; count--)
+		if (rdp->nxttail[count] == rdp->nxttail[RCU_DONE_TAIL])
+			rdp->nxttail[count] = &rdp->nxtlist;
+	local_irq_restore(flags);
+
+	/* Invoke callbacks. */
+	count = 0;
+	while (list) {
+		next = list->next;
+		prefetch(next);
+		list->func(list);
+		list = next;
+		if (++count >= rdp->blimit)
+			break;
+	}
+
+	local_irq_save(flags);
+
+	/* Update count, and requeue any remaining callbacks. */
+	rdp->qlen -= count;
+	if (list != NULL) {
+		*tail = rdp->nxtlist;
+		rdp->nxtlist = list;
+		for (count = 0; count < RCU_NEXT_SIZE; count++)
+			if (&rdp->nxtlist == rdp->nxttail[count])
+				rdp->nxttail[count] = tail;
+			else
+				break;
+	}
+
+	/* Reinstate batch limit if we have worked down the excess. */
+	if (rdp->blimit == LONG_MAX && rdp->qlen <= qlowmark)
+		rdp->blimit = blimit;
+
+	local_irq_restore(flags);
+
+	/* Re-raise the RCU softirq if there are callbacks remaining. */
+	if (cpu_has_callbacks_ready_to_invoke(rdp))
+		raise_softirq(RCU_SOFTIRQ);
+}
+
+/*
+ * Check to see if this CPU is in a non-context-switch quiescent state
+ * (user mode or idle loop for rcu, non-softirq execution for rcu_bh).
+ * Also schedule the RCU softirq handler.
+ *
+ * This function must be called with hardirqs disabled.  It is normally
+ * invoked from the scheduling-clock interrupt.  If rcu_pending returns
+ * false, there is no point in invoking rcu_check_callbacks().
+ */
+void rcu_check_callbacks(int cpu, int user)
+{
+	if (user ||
+	    (idle_cpu(cpu) && !in_softirq() &&
+				hardirq_count() <= (1 << HARDIRQ_SHIFT))) {
+
+		/*
+		 * Get here if this CPU took its interrupt from user
+		 * mode or from the idle loop, and if this is not a
+		 * nested interrupt.  In this case, the CPU is in
+		 * a quiescent state, so count it.
+		 *
+		 * No memory barrier is required here because both
+		 * rcu_qsctr_inc() and rcu_bh_qsctr_inc() reference
+		 * only CPU-local variables that other CPUs neither
+		 * access nor modify, at least not while the corresponding
+		 * CPU is online.
+		 */
+
+		rcu_qsctr_inc(cpu);
+		rcu_bh_qsctr_inc(cpu);
+
+	} else if (!in_softirq()) {
+
+		/*
+		 * Get here if this CPU did not take its interrupt from
+		 * softirq, in other words, if it is not interrupting
+		 * a rcu_bh read-side critical section.  This is an _bh
+		 * critical section, so count it.
+		 */
+
+		rcu_bh_qsctr_inc(cpu);
+	}
+	raise_softirq(RCU_SOFTIRQ);
+}
+
+#ifdef CONFIG_SMP
+
+/*
+ * Scan the leaf rcu_node structures, processing dyntick state for any that
+ * have not yet encountered a quiescent state, using the function specified.
+ * Returns 1 if the current grace period ends while scanning (possibly
+ * because we made it end).
+ */
+static int rcu_process_dyntick(struct rcu_state *rsp, long lastcomp,
+			       int (*f)(struct rcu_data *))
+{
+	unsigned long bit;
+	int cpu;
+	unsigned long flags;
+	unsigned long mask;
+	struct rcu_node *rnp_cur = rsp->level[NUM_RCU_LVLS - 1];
+	struct rcu_node *rnp_end = &rsp->node[NUM_RCU_NODES];
+
+	for (; rnp_cur < rnp_end; rnp_cur++) {
+		mask = 0;
+		spin_lock_irqsave(&rnp_cur->lock, flags);
+		if (rsp->completed != lastcomp) {
+			spin_unlock_irqrestore(&rnp_cur->lock, flags);
+			return 1;
+		}
+		if (rnp_cur->qsmask == 0) {
+			spin_unlock_irqrestore(&rnp_cur->lock, flags);
+			continue;
+		}
+		cpu = rnp_cur->grplo;
+		bit = 1;
+		for (; cpu <= rnp_cur->grphi; cpu++, bit <<= 1) {
+			if ((rnp_cur->qsmask & bit) != 0 && f(rsp->rda[cpu]))
+				mask |= bit;
+		}
+		if (mask != 0 && rsp->completed == lastcomp) {
+
+			/* cpu_quiet_msk() releases rnp_cur->lock. */
+			cpu_quiet_msk(mask, rsp, rnp_cur, flags);
+			continue;
+		}
+		spin_unlock_irqrestore(&rnp_cur->lock, flags);
+	}
+	return 0;
+}
+
+/*
+ * Force quiescent states on reluctant CPUs, and also detect which
+ * CPUs are in dyntick-idle mode.
+ */
+static void force_quiescent_state(struct rcu_state *rsp, int relaxed)
+{
+	unsigned long flags;
+	long lastcomp;
+	struct rcu_data *rdp = rsp->rda[smp_processor_id()];
+	struct rcu_node *rnp = rcu_get_root(rsp);
+	u8 signaled;
+
+	if (ACCESS_ONCE(rsp->completed) == ACCESS_ONCE(rsp->gpnum))
+		return;  /* No grace period in progress, nothing to force. */
+	if (!spin_trylock_irqsave(&rsp->fqslock, flags)) {
+		rsp->n_force_qs_lh++; /* Inexact, can lose counts.  Tough! */
+		return;	/* Someone else is already on the job. */
+	}
+	if (relaxed &&
+	    (long)(rsp->jiffies_force_qs - jiffies) >= 0 &&
+	    (rdp->n_rcu_pending_force_qs - rdp->n_rcu_pending) >= 0)
+		goto unlock_ret; /* no emergency and done recently. */
+	rsp->n_force_qs++;
+	spin_lock(&rnp->lock);
+	lastcomp = rsp->completed;
+	signaled = rsp->signaled;
+	rsp->jiffies_force_qs = jiffies + RCU_JIFFIES_TILL_FORCE_QS;
+	rdp->n_rcu_pending_force_qs = rdp->n_rcu_pending +
+				      RCU_JIFFIES_TILL_FORCE_QS;
+	if (lastcomp == rsp->gpnum) {
+		rsp->n_force_qs_ngp++;
+		spin_unlock(&rnp->lock);
+		goto unlock_ret;  /* no GP in progress, time updated. */
+	}
+	spin_unlock(&rnp->lock);
+	switch (signaled) {
+	case RCU_GP_INIT:
+
+		break; /* grace period still initializing, ignore. */
+
+	case RCU_SAVE_DYNTICK:
+
+		if (RCU_SIGNAL_INIT != RCU_SAVE_DYNTICK)
+			break; /* So gcc recognizes the dead code. */
+
+		/* Record dyntick-idle state. */
+		if (rcu_process_dyntick(rsp, lastcomp,
+					dyntick_save_progress_counter))
+			goto unlock_ret;
+
+		/* Update state, record completion counter. */
+		spin_lock(&rnp->lock);
+		if (lastcomp == rsp->completed) {
+			rsp->signaled = RCU_FORCE_QS;
+			dyntick_record_completed(rsp, lastcomp);
+		}
+		spin_unlock(&rnp->lock);
+		break;
+
+	case RCU_FORCE_QS:
+
+		/* Check dyntick-idle state, send IPI to laggarts. */
+		if (rcu_process_dyntick(rsp, dyntick_recall_completed(rsp),
+					rcu_implicit_dynticks_qs))
+			goto unlock_ret;
+
+		/* Leave state in case more forcing is required. */
+
+		break;
+	}
+unlock_ret:
+	spin_unlock_irqrestore(&rsp->fqslock, flags);
+}
+
+#else /* #ifdef CONFIG_SMP */
+
+static void force_quiescent_state(struct rcu_state *rsp, int relaxed)
+{
+	set_need_resched();
+}
+
+#endif /* #else #ifdef CONFIG_SMP */
+
+/*
+ * This does the RCU processing work from softirq context for the
+ * specified rcu_state and rcu_data structures.  This may be called
+ * only from the CPU to whom the rdp belongs.
+ */
+static void
+__rcu_process_callbacks(struct rcu_state *rsp, struct rcu_data *rdp)
+{
+	unsigned long flags;
+
+	/*
+	 * If an RCU GP has gone long enough, go check for dyntick
+	 * idle CPUs and, if needed, send resched IPIs.
+	 */
+	if ((long)(ACCESS_ONCE(rsp->jiffies_force_qs) - jiffies) < 0 ||
+	    (rdp->n_rcu_pending_force_qs - rdp->n_rcu_pending) < 0)
+		force_quiescent_state(rsp, 1);
+
+	/*
+	 * Advance callbacks in response to end of earlier grace
+	 * period that some other CPU ended.
+	 */
+	rcu_process_gp_end(rsp, rdp);
+
+	/* Update RCU state based on any recent quiescent states. */
+	rcu_check_quiescent_state(rsp, rdp);
+
+	/* Does this CPU require a not-yet-started grace period? */
+	if (cpu_needs_another_gp(rsp, rdp)) {
+		spin_lock_irqsave(&rcu_get_root(rsp)->lock, flags);
+		rcu_start_gp(rsp, flags);  /* releases above lock */
+	}
+
+	/* If there are callbacks ready, invoke them. */
+	rcu_do_batch(rdp);
+}
+
+/*
+ * Do softirq processing for the current CPU.
+ */
+static void rcu_process_callbacks(struct softirq_action *unused)
+{
+	/*
+	 * Memory references from any prior RCU read-side critical sections
+	 * executed by the interrupted code must be seen before any RCU
+	 * grace-period manipulations below.
+	 */
+	smp_mb(); /* See above block comment. */
+
+	__rcu_process_callbacks(&rcu_state, &__get_cpu_var(rcu_data));
+	__rcu_process_callbacks(&rcu_bh_state, &__get_cpu_var(rcu_bh_data));
+
+	/*
+	 * Memory references from any later RCU read-side critical sections
+	 * executed by the interrupted code must be seen after any RCU
+	 * grace-period manipulations above.
+	 */
+	smp_mb(); /* See above block comment. */
+}
+
+static void
+__call_rcu(struct rcu_head *head, void (*func)(struct rcu_head *rcu),
+	   struct rcu_state *rsp)
+{
+	unsigned long flags;
+	struct rcu_data *rdp;
+
+	head->func = func;
+	head->next = NULL;
+
+	smp_mb(); /* Ensure RCU update seen before callback registry. */
+
+	/*
+	 * Opportunistically note grace-period endings and beginnings.
+	 * Note that we might see a beginning right after we see an
+	 * end, but never vice versa, since this CPU has to pass through
+	 * a quiescent state betweentimes.
+	 */
+	local_irq_save(flags);
+	rdp = rsp->rda[smp_processor_id()];
+	rcu_process_gp_end(rsp, rdp);
+	check_for_new_grace_period(rsp, rdp);
+
+	/* Add the callback to our list. */
+	*rdp->nxttail[RCU_NEXT_TAIL] = head;
+	rdp->nxttail[RCU_NEXT_TAIL] = &head->next;
+
+	/* Start a new grace period if one not already started. */
+	if (ACCESS_ONCE(rsp->completed) == ACCESS_ONCE(rsp->gpnum)) {
+		unsigned long nestflag;
+		struct rcu_node *rnp_root = rcu_get_root(rsp);
+
+		spin_lock_irqsave(&rnp_root->lock, nestflag);
+		rcu_start_gp(rsp, nestflag);  /* releases rnp_root->lock. */
+	}
+
+	/* Force the grace period if too many callbacks or too long waiting. */
+	if (unlikely(++rdp->qlen > qhimark)) {
+		rdp->blimit = LONG_MAX;
+		force_quiescent_state(rsp, 0);
+	} else if ((long)(ACCESS_ONCE(rsp->jiffies_force_qs) - jiffies) < 0 ||
+		   (rdp->n_rcu_pending_force_qs - rdp->n_rcu_pending) < 0)
+		force_quiescent_state(rsp, 1);
+	local_irq_restore(flags);
+}
+
+/*
+ * Queue an RCU callback for invocation after a grace period.
+ */
+void call_rcu(struct rcu_head *head, void (*func)(struct rcu_head *rcu))
+{
+	__call_rcu(head, func, &rcu_state);
+}
+EXPORT_SYMBOL_GPL(call_rcu);
+
+/*
+ * Queue an RCU for invocation after a quicker grace period.
+ */
+void call_rcu_bh(struct rcu_head *head, void (*func)(struct rcu_head *rcu))
+{
+	__call_rcu(head, func, &rcu_bh_state);
+}
+EXPORT_SYMBOL_GPL(call_rcu_bh);
+
+/*
+ * Check to see if there is any immediate RCU-related work to be done
+ * by the current CPU, for the specified type of RCU, returning 1 if so.
+ * The checks are in order of increasing expense: checks that can be
+ * carried out against CPU-local state are performed first.  However,
+ * we must check for CPU stalls first, else we might not get a chance.
+ */
+static int __rcu_pending(struct rcu_state *rsp, struct rcu_data *rdp)
+{
+	rdp->n_rcu_pending++;
+
+	/* Check for CPU stalls, if enabled. */
+	check_cpu_stall(rsp, rdp);
+
+	/* Is the RCU core waiting for a quiescent state from this CPU? */
+	if (rdp->qs_pending)
+		return 1;
+
+	/* Does this CPU have callbacks ready to invoke? */
+	if (cpu_has_callbacks_ready_to_invoke(rdp))
+		return 1;
+
+	/* Has RCU gone idle with this CPU needing another grace period? */
+	if (cpu_needs_another_gp(rsp, rdp))
+		return 1;
+
+	/* Has another RCU grace period completed?  */
+	if (ACCESS_ONCE(rsp->completed) != rdp->completed) /* outside of lock */
+		return 1;
+
+	/* Has a new RCU grace period started? */
+	if (ACCESS_ONCE(rsp->gpnum) != rdp->gpnum) /* outside of lock */
+		return 1;
+
+	/* Has an RCU GP gone long enough to send resched IPIs &c? */
+	if (ACCESS_ONCE(rsp->completed) != ACCESS_ONCE(rsp->gpnum) &&
+	    ((long)(ACCESS_ONCE(rsp->jiffies_force_qs) - jiffies) < 0 ||
+	     (rdp->n_rcu_pending_force_qs - rdp->n_rcu_pending) < 0))
+		return 1;
+
+	/* nothing to do */
+	return 0;
+}
+
+/*
+ * Check to see if there is any immediate RCU-related work to be done
+ * by the current CPU, returning 1 if so.  This function is part of the
+ * RCU implementation; it is -not- an exported member of the RCU API.
+ */
+int rcu_pending(int cpu)
+{
+	return __rcu_pending(&rcu_state, &per_cpu(rcu_data, cpu)) ||
+	       __rcu_pending(&rcu_bh_state, &per_cpu(rcu_bh_data, cpu));
+}
+
+/*
+ * Check to see if any future RCU-related work will need to be done
+ * by the current CPU, even if none need be done immediately, returning
+ * 1 if so.  This function is part of the RCU implementation; it is -not-
+ * an exported member of the RCU API.
+ */
+int rcu_needs_cpu(int cpu)
+{
+	/* RCU callbacks either ready or pending? */
+	return per_cpu(rcu_data, cpu).nxtlist ||
+	       per_cpu(rcu_bh_data, cpu).nxtlist;
+}
+
+/*
+ * Initialize a CPU's per-CPU RCU data.  We take this "scorched earth"
+ * approach so that we don't have to worry about how long the CPU has
+ * been gone, or whether it ever was online previously.  We do trust the
+ * ->mynode field, as it is constant for a given struct rcu_data and
+ * initialized during early boot.
+ *
+ * Note that only one online or offline event can be happening at a given
+ * time.  Note also that we can accept some slop in the rsp->completed
+ * access due to the fact that this CPU cannot possibly have any RCU
+ * callbacks in flight yet.
+ */
+static void
+rcu_init_percpu_data(int cpu, struct rcu_state *rsp)
+{
+	unsigned long flags;
+	int i;
+	long lastcomp;
+	unsigned long mask;
+	struct rcu_data *rdp = rsp->rda[cpu];
+	struct rcu_node *rnp = rcu_get_root(rsp);
+
+	/* Set up local state, ensuring consistent view of global state. */
+	spin_lock_irqsave(&rnp->lock, flags);
+	lastcomp = rsp->completed;
+	rdp->completed = lastcomp;
+	rdp->gpnum = lastcomp;
+	rdp->passed_quiesc = 0;  /* We could be racing with new GP, */
+	rdp->qs_pending = 1;	 /*  so set up to respond to current GP. */
+	rdp->beenonline = 1;	 /* We have now been online. */
+	rdp->passed_quiesc_completed = lastcomp - 1;
+	rdp->grpmask = 1UL << (cpu - rdp->mynode->grplo);
+	rdp->nxtlist = NULL;
+	for (i = 0; i < RCU_NEXT_SIZE; i++)
+		rdp->nxttail[i] = &rdp->nxtlist;
+	rdp->qlen = 0;
+	rdp->blimit = blimit;
+#ifdef CONFIG_NO_HZ
+	rdp->dynticks = &per_cpu(rcu_dynticks, cpu);
+#endif /* #ifdef CONFIG_NO_HZ */
+	rdp->cpu = cpu;
+	spin_unlock(&rnp->lock);		/* irqs remain disabled. */
+
+	/*
+	 * A new grace period might start here.  If so, we won't be part
+	 * of it, but that is OK, as we are currently in a quiescent state.
+	 */
+
+	/* Exclude any attempts to start a new GP on large systems. */
+	spin_lock(&rsp->onofflock);		/* irqs already disabled. */
+
+	/* Add CPU to rcu_node bitmasks. */
+	rnp = rdp->mynode;
+	mask = rdp->grpmask;
+	do {
+		/* Exclude any attempts to start a new GP on small systems. */
+		spin_lock(&rnp->lock);	/* irqs already disabled. */
+		rnp->qsmaskinit |= mask;
+		mask = rnp->grpmask;
+		spin_unlock(&rnp->lock); /* irqs already disabled. */
+		rnp = rnp->parent;
+	} while (rnp != NULL && !(rnp->qsmaskinit & mask));
+
+	spin_unlock(&rsp->onofflock);		/* irqs remain disabled. */
+
+	/*
+	 * A new grace period might start here.  If so, we will be part of
+	 * it, and its gpnum will be greater than ours, so we will
+	 * participate.  It is also possible for the gpnum to have been
+	 * incremented before this function was called, and the bitmasks
+	 * to not be filled out until now, in which case we will also
+	 * participate due to our gpnum being behind.
+	 */
+
+	/* Since it is coming online, the CPU is in a quiescent state. */
+	cpu_quiet(cpu, rsp, rdp, lastcomp);
+	local_irq_restore(flags);
+}
+
+static void __cpuinit rcu_online_cpu(int cpu)
+{
+#ifdef CONFIG_NO_HZ
+	struct rcu_dynticks *rdtp = &per_cpu(rcu_dynticks, cpu);
+
+	rdtp->dynticks_nesting = 1;
+	rdtp->dynticks |= 1; 	/* need consecutive #s even for hotplug. */
+	rdtp->dynticks_nmi = (rdtp->dynticks_nmi + 1) & ~0x1;
+#endif /* #ifdef CONFIG_NO_HZ */
+	rcu_init_percpu_data(cpu, &rcu_state);
+	rcu_init_percpu_data(cpu, &rcu_bh_state);
+	open_softirq(RCU_SOFTIRQ, rcu_process_callbacks);
+}
+
+/*
+ * Handle CPU online/offline notifcation events.
+ */
+static int __cpuinit rcu_cpu_notify(struct notifier_block *self,
+				unsigned long action, void *hcpu)
+{
+	long cpu = (long)hcpu;
+
+	switch (action) {
+	case CPU_UP_PREPARE:
+	case CPU_UP_PREPARE_FROZEN:
+		rcu_online_cpu(cpu);
+		break;
+	case CPU_DEAD:
+	case CPU_DEAD_FROZEN:
+	case CPU_UP_CANCELED:
+	case CPU_UP_CANCELED_FROZEN:
+		rcu_offline_cpu(cpu);
+		break;
+	default:
+		break;
+	}
+	return NOTIFY_OK;
+}
+
+/*
+ * Compute the per-level fanout, either using the exact fanout specified
+ * or balancing the tree, depending on CONFIG_RCU_FANOUT_EXACT.
+ */
+#ifdef CONFIG_RCU_FANOUT_EXACT
+static void __init rcu_init_levelspread(struct rcu_state *rsp)
+{
+	int i;
+
+	for (i = NUM_RCU_LVLS - 1; i >= 0; i--)
+		rsp->levelspread[i] = CONFIG_RCU_FANOUT;
+}
+#else /* #ifdef CONFIG_RCU_FANOUT_EXACT */
+static void __init rcu_init_levelspread(struct rcu_state *rsp)
+{
+	int ccur;
+	int cprv;
+	int i;
+
+	cprv = NR_CPUS;
+	for (i = NUM_RCU_LVLS - 1; i >= 0; i--) {
+		ccur = rsp->levelcnt[i];
+		rsp->levelspread[i] = (cprv + ccur - 1) / ccur;
+		cprv = ccur;
+	}
+}
+#endif /* #else #ifdef CONFIG_RCU_FANOUT_EXACT */
+
+/*
+ * Helper function for rcu_init() that initializes one rcu_state structure.
+ */
+static void __init rcu_init_one(struct rcu_state *rsp)
+{
+	int cpustride = 1;
+	int i;
+	int j;
+	struct rcu_node *rnp;
+
+	/* Initialize the level-tracking arrays. */
+
+	for (i = 1; i < NUM_RCU_LVLS; i++)
+		rsp->level[i] = rsp->level[i - 1] + rsp->levelcnt[i - 1];
+	rcu_init_levelspread(rsp);
+
+	/* Initialize the elements themselves, starting from the leaves. */
+
+	for (i = NUM_RCU_LVLS - 1; i >= 0; i--) {
+		cpustride *= rsp->levelspread[i];
+		rnp = rsp->level[i];
+		for (j = 0; j < rsp->levelcnt[i]; j++, rnp++) {
+			spin_lock_init(&rnp->lock);
+			rnp->qsmask = 0;
+			rnp->qsmaskinit = 0;
+			rnp->grplo = j * cpustride;
+			rnp->grphi = (j + 1) * cpustride - 1;
+			if (rnp->grphi >= NR_CPUS)
+				rnp->grphi = NR_CPUS - 1;
+			if (i == 0) {
+				rnp->grpnum = 0;
+				rnp->grpmask = 0;
+				rnp->parent = NULL;
+			} else {
+				rnp->grpnum = j % rsp->levelspread[i - 1];
+				rnp->grpmask = 1UL << rnp->grpnum;
+				rnp->parent = rsp->level[i - 1] +
+					      j / rsp->levelspread[i - 1];
+			}
+			rnp->level = i;
+		}
+	}
+}
+
+/*
+ * Helper macro for __rcu_init().  To be used nowhere else!
+ * Assigns leaf node pointers into each CPU's rcu_data structure.
+ */
+#define RCU_DATA_PTR_INIT(rsp, rcu_data) \
+do { \
+	rnp = (rsp)->level[NUM_RCU_LVLS - 1]; \
+	j = 0; \
+	for_each_possible_cpu(i) { \
+		if (i > rnp[j].grphi) \
+			j++; \
+		per_cpu(rcu_data, i).mynode = &rnp[j]; \
+		(rsp)->rda[i] = &per_cpu(rcu_data, i); \
+	} \
+} while (0)
+
+static struct notifier_block __cpuinitdata rcu_nb = {
+	.notifier_call	= rcu_cpu_notify,
+};
+
+void __init __rcu_init(void)
+{
+	int i;			/* All used by RCU_DATA_PTR_INIT(). */
+	int j;
+	struct rcu_node *rnp;
+
+	printk(KERN_WARNING "Experimental hierarchical RCU implementation.\n");
+#ifdef CONFIG_RCU_CPU_STALL_DETECTOR
+	printk(KERN_INFO "RCU-based detection of stalled CPUs is enabled.\n");
+#endif /* #ifdef CONFIG_RCU_CPU_STALL_DETECTOR */
+	rcu_init_one(&rcu_state);
+	RCU_DATA_PTR_INIT(&rcu_state, rcu_data);
+	rcu_init_one(&rcu_bh_state);
+	RCU_DATA_PTR_INIT(&rcu_bh_state, rcu_bh_data);
+
+	for_each_online_cpu(i)
+		rcu_cpu_notify(&rcu_nb, CPU_UP_PREPARE, (void *)(long)i);
+	/* Register notifier for non-boot CPUs */
+	register_cpu_notifier(&rcu_nb);
+	printk(KERN_WARNING "Experimental hierarchical RCU init done.\n");
+}
+
+module_param(blimit, int, 0);
+module_param(qhimark, int, 0);
+module_param(qlowmark, int, 0);
diff --git a/kernel/rcutree_trace.c b/kernel/rcutree_trace.c
new file mode 100644
index 000000000000..d6db3e837826
--- /dev/null
+++ b/kernel/rcutree_trace.c
@@ -0,0 +1,271 @@
+/*
+ * Read-Copy Update tracing for classic implementation
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+ *
+ * Copyright IBM Corporation, 2008
+ *
+ * Papers:  http://www.rdrop.com/users/paulmck/RCU
+ *
+ * For detailed explanation of Read-Copy Update mechanism see -
+ * 		Documentation/RCU
+ *
+ */
+#include <linux/types.h>
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/spinlock.h>
+#include <linux/smp.h>
+#include <linux/rcupdate.h>
+#include <linux/interrupt.h>
+#include <linux/sched.h>
+#include <asm/atomic.h>
+#include <linux/bitops.h>
+#include <linux/module.h>
+#include <linux/completion.h>
+#include <linux/moduleparam.h>
+#include <linux/percpu.h>
+#include <linux/notifier.h>
+#include <linux/cpu.h>
+#include <linux/mutex.h>
+#include <linux/debugfs.h>
+#include <linux/seq_file.h>
+
+static void print_one_rcu_data(struct seq_file *m, struct rcu_data *rdp)
+{
+	if (!rdp->beenonline)
+		return;
+	seq_printf(m, "%3d%cc=%ld g=%ld pq=%d pqc=%ld qp=%d rpfq=%ld rp=%x",
+		   rdp->cpu,
+		   cpu_is_offline(rdp->cpu) ? '!' : ' ',
+		   rdp->completed, rdp->gpnum,
+		   rdp->passed_quiesc, rdp->passed_quiesc_completed,
+		   rdp->qs_pending,
+		   rdp->n_rcu_pending_force_qs - rdp->n_rcu_pending,
+		   (int)(rdp->n_rcu_pending & 0xffff));
+#ifdef CONFIG_NO_HZ
+	seq_printf(m, " dt=%d/%d dn=%d df=%lu",
+		   rdp->dynticks->dynticks,
+		   rdp->dynticks->dynticks_nesting,
+		   rdp->dynticks->dynticks_nmi,
+		   rdp->dynticks_fqs);
+#endif /* #ifdef CONFIG_NO_HZ */
+	seq_printf(m, " of=%lu ri=%lu", rdp->offline_fqs, rdp->resched_ipi);
+	seq_printf(m, " ql=%ld b=%ld\n", rdp->qlen, rdp->blimit);
+}
+
+#define PRINT_RCU_DATA(name, func, m) \
+	do { \
+		int _p_r_d_i; \
+		\
+		for_each_possible_cpu(_p_r_d_i) \
+			func(m, &per_cpu(name, _p_r_d_i)); \
+	} while (0)
+
+static int show_rcudata(struct seq_file *m, void *unused)
+{
+	seq_puts(m, "rcu:\n");
+	PRINT_RCU_DATA(rcu_data, print_one_rcu_data, m);
+	seq_puts(m, "rcu_bh:\n");
+	PRINT_RCU_DATA(rcu_bh_data, print_one_rcu_data, m);
+	return 0;
+}
+
+static int rcudata_open(struct inode *inode, struct file *file)
+{
+	return single_open(file, show_rcudata, NULL);
+}
+
+static struct file_operations rcudata_fops = {
+	.owner = THIS_MODULE,
+	.open = rcudata_open,
+	.read = seq_read,
+	.llseek = seq_lseek,
+	.release = single_release,
+};
+
+static void print_one_rcu_data_csv(struct seq_file *m, struct rcu_data *rdp)
+{
+	if (!rdp->beenonline)
+		return;
+	seq_printf(m, "%d,%s,%ld,%ld,%d,%ld,%d,%ld,%ld",
+		   rdp->cpu,
+		   cpu_is_offline(rdp->cpu) ? "\"Y\"" : "\"N\"",
+		   rdp->completed, rdp->gpnum,
+		   rdp->passed_quiesc, rdp->passed_quiesc_completed,
+		   rdp->qs_pending,
+		   rdp->n_rcu_pending_force_qs - rdp->n_rcu_pending,
+		   rdp->n_rcu_pending);
+#ifdef CONFIG_NO_HZ
+	seq_printf(m, ",%d,%d,%d,%lu",
+		   rdp->dynticks->dynticks,
+		   rdp->dynticks->dynticks_nesting,
+		   rdp->dynticks->dynticks_nmi,
+		   rdp->dynticks_fqs);
+#endif /* #ifdef CONFIG_NO_HZ */
+	seq_printf(m, ",%lu,%lu", rdp->offline_fqs, rdp->resched_ipi);
+	seq_printf(m, ",%ld,%ld\n", rdp->qlen, rdp->blimit);
+}
+
+static int show_rcudata_csv(struct seq_file *m, void *unused)
+{
+	seq_puts(m, "\"CPU\",\"Online?\",\"c\",\"g\",\"pq\",\"pqc\",\"pq\",\"rpfq\",\"rp\",");
+#ifdef CONFIG_NO_HZ
+	seq_puts(m, "\"dt\",\"dt nesting\",\"dn\",\"df\",");
+#endif /* #ifdef CONFIG_NO_HZ */
+	seq_puts(m, "\"of\",\"ri\",\"ql\",\"b\"\n");
+	seq_puts(m, "\"rcu:\"\n");
+	PRINT_RCU_DATA(rcu_data, print_one_rcu_data_csv, m);
+	seq_puts(m, "\"rcu_bh:\"\n");
+	PRINT_RCU_DATA(rcu_bh_data, print_one_rcu_data_csv, m);
+	return 0;
+}
+
+static int rcudata_csv_open(struct inode *inode, struct file *file)
+{
+	return single_open(file, show_rcudata_csv, NULL);
+}
+
+static struct file_operations rcudata_csv_fops = {
+	.owner = THIS_MODULE,
+	.open = rcudata_csv_open,
+	.read = seq_read,
+	.llseek = seq_lseek,
+	.release = single_release,
+};
+
+static void print_one_rcu_state(struct seq_file *m, struct rcu_state *rsp)
+{
+	int level = 0;
+	struct rcu_node *rnp;
+
+	seq_printf(m, "c=%ld g=%ld s=%d jfq=%ld j=%x "
+	              "nfqs=%lu/nfqsng=%lu(%lu) fqlh=%lu\n",
+		   rsp->completed, rsp->gpnum, rsp->signaled,
+		   (long)(rsp->jiffies_force_qs - jiffies),
+		   (int)(jiffies & 0xffff),
+		   rsp->n_force_qs, rsp->n_force_qs_ngp,
+		   rsp->n_force_qs - rsp->n_force_qs_ngp,
+		   rsp->n_force_qs_lh);
+	for (rnp = &rsp->node[0]; rnp - &rsp->node[0] < NUM_RCU_NODES; rnp++) {
+		if (rnp->level != level) {
+			seq_puts(m, "\n");
+			level = rnp->level;
+		}
+		seq_printf(m, "%lx/%lx %d:%d ^%d    ",
+			   rnp->qsmask, rnp->qsmaskinit,
+			   rnp->grplo, rnp->grphi, rnp->grpnum);
+	}
+	seq_puts(m, "\n");
+}
+
+static int show_rcuhier(struct seq_file *m, void *unused)
+{
+	seq_puts(m, "rcu:\n");
+	print_one_rcu_state(m, &rcu_state);
+	seq_puts(m, "rcu_bh:\n");
+	print_one_rcu_state(m, &rcu_bh_state);
+	return 0;
+}
+
+static int rcuhier_open(struct inode *inode, struct file *file)
+{
+	return single_open(file, show_rcuhier, NULL);
+}
+
+static struct file_operations rcuhier_fops = {
+	.owner = THIS_MODULE,
+	.open = rcuhier_open,
+	.read = seq_read,
+	.llseek = seq_lseek,
+	.release = single_release,
+};
+
+static int show_rcugp(struct seq_file *m, void *unused)
+{
+	seq_printf(m, "rcu: completed=%ld  gpnum=%ld\n",
+		   rcu_state.completed, rcu_state.gpnum);
+	seq_printf(m, "rcu_bh: completed=%ld  gpnum=%ld\n",
+		   rcu_bh_state.completed, rcu_bh_state.gpnum);
+	return 0;
+}
+
+static int rcugp_open(struct inode *inode, struct file *file)
+{
+	return single_open(file, show_rcugp, NULL);
+}
+
+static struct file_operations rcugp_fops = {
+	.owner = THIS_MODULE,
+	.open = rcugp_open,
+	.read = seq_read,
+	.llseek = seq_lseek,
+	.release = single_release,
+};
+
+static struct dentry *rcudir, *datadir, *datadir_csv, *hierdir, *gpdir;
+static int __init rcuclassic_trace_init(void)
+{
+	rcudir = debugfs_create_dir("rcu", NULL);
+	if (!rcudir)
+		goto out;
+
+	datadir = debugfs_create_file("rcudata", 0444, rcudir,
+						NULL, &rcudata_fops);
+	if (!datadir)
+		goto free_out;
+
+	datadir_csv = debugfs_create_file("rcudata.csv", 0444, rcudir,
+						NULL, &rcudata_csv_fops);
+	if (!datadir_csv)
+		goto free_out;
+
+	gpdir = debugfs_create_file("rcugp", 0444, rcudir, NULL, &rcugp_fops);
+	if (!gpdir)
+		goto free_out;
+
+	hierdir = debugfs_create_file("rcuhier", 0444, rcudir,
+						NULL, &rcuhier_fops);
+	if (!hierdir)
+		goto free_out;
+	return 0;
+free_out:
+	if (datadir)
+		debugfs_remove(datadir);
+	if (datadir_csv)
+		debugfs_remove(datadir_csv);
+	if (gpdir)
+		debugfs_remove(gpdir);
+	debugfs_remove(rcudir);
+out:
+	return 1;
+}
+
+static void __exit rcuclassic_trace_cleanup(void)
+{
+	debugfs_remove(datadir);
+	debugfs_remove(datadir_csv);
+	debugfs_remove(gpdir);
+	debugfs_remove(hierdir);
+	debugfs_remove(rcudir);
+}
+
+
+module_init(rcuclassic_trace_init);
+module_exit(rcuclassic_trace_cleanup);
+
+MODULE_AUTHOR("Paul E. McKenney");
+MODULE_DESCRIPTION("Read-Copy Update tracing for hierarchical implementation");
+MODULE_LICENSE("GPL");
diff --git a/kernel/softirq.c b/kernel/softirq.c
index e7c69a720d69..80d323e6f61a 100644
--- a/kernel/softirq.c
+++ b/kernel/softirq.c
@@ -269,6 +269,7 @@ void irq_enter(void)
 {
 	int cpu = smp_processor_id();
 
+	rcu_irq_enter();
 	if (idle_cpu(cpu) && !in_interrupt()) {
 		__irq_enter();
 		tick_check_idle(cpu);
@@ -295,9 +296,9 @@ void irq_exit(void)
 
 #ifdef CONFIG_NO_HZ
 	/* Make sure that timer wheel updates are propagated */
-	if (!in_interrupt() && idle_cpu(smp_processor_id()) && !need_resched())
-		tick_nohz_stop_sched_tick(0);
 	rcu_irq_exit();
+	if (idle_cpu(smp_processor_id()) && !in_interrupt() && !need_resched())
+		tick_nohz_stop_sched_tick(0);
 #endif
 	preempt_enable_no_resched();
 }
diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
index b0f239e443bc..465d822f3f5d 100644
--- a/lib/Kconfig.debug
+++ b/lib/Kconfig.debug
@@ -619,6 +619,19 @@ config RCU_CPU_STALL_DETECTOR
 
 	  Say N if you are unsure.
 
+config RCU_CPU_STALL_DETECTOR
+	bool "Check for stalled CPUs delaying RCU grace periods"
+	depends on CLASSIC_RCU || TREE_RCU
+	default n
+	help
+	  This option causes RCU to printk information on which
+	  CPUs are delaying the current grace period, but only when
+	  the grace period extends for excessive time periods.
+
+	  Say Y if you want RCU to perform such checks.
+
+	  Say N if you are unsure.
+
 config KPROBES_SANITY_TEST
 	bool "Kprobes sanity tests"
 	depends on DEBUG_KERNEL
-- 
cgit v1.2.3


From 3c8bb73ace6249bd089b70c941440441940e3365 Mon Sep 17 00:00:00 2001
From: "venkatesh.pallipadi@intel.com" <venkatesh.pallipadi@intel.com>
Date: Thu, 18 Dec 2008 11:41:27 -0800
Subject: x86: PAT: store vm_pgoff for all linear_over_vma_region mappings - v3

Impact: Code transformation, new functions added should have no effect.

Drivers use mmap followed by pgprot_* and remap_pfn_range or vm_insert_pfn,
in order to export reserved memory to userspace. Currently, such mappings are
not tracked and hence not kept consistent with other mappings (/dev/mem,
pci resource, ioremap) for the sme memory, that may exist in the system.

The following patchset adds x86 PAT attribute tracking and untracking for
pfnmap related APIs.

First three patches in the patchset are changing the generic mm code to fit
in this tracking. Last four patches are x86 specific to make things work
with x86 PAT code. The patchset aso introduces pgprot_writecombine interface,
which gives writecombine mapping when enabled, falling back to
pgprot_noncached otherwise.

This patch:

While working on x86 PAT, we faced some hurdles with trackking
remap_pfn_range() regions, as we do not have any information to say
whether that PFNMAP mapping is linear for the entire vma range or
it is smaller granularity regions within the vma.

A simple solution to this is to use vm_pgoff as an indicator for
linear mapping over the vma region. Currently, remap_pfn_range
only sets vm_pgoff for COW mappings. Below patch changes the
logic and sets the vm_pgoff irrespective of COW. This will still not
be enough for the case where pfn is zero (vma region mapped to
physical address zero). But, for all the other cases, we can look at
pfnmap VMAs and say whether the mappng is for the entire vma region
or not.

Signed-off-by: Venkatesh Pallipadi <venkatesh.pallipadi@intel.com>
Signed-off-by: Suresh Siddha <suresh.b.siddha@intel.com>
Signed-off-by: H. Peter Anvin <hpa@zytor.com>
---
 include/linux/mm.h | 9 +++++++++
 mm/memory.c        | 7 +++----
 2 files changed, 12 insertions(+), 4 deletions(-)

(limited to 'include')

diff --git a/include/linux/mm.h b/include/linux/mm.h
index ffee2f743418..2be8d9b5e46f 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -145,6 +145,15 @@ extern pgprot_t protection_map[16];
 #define FAULT_FLAG_WRITE	0x01	/* Fault was a write access */
 #define FAULT_FLAG_NONLINEAR	0x02	/* Fault was via a nonlinear mapping */
 
+static inline int is_linear_pfn_mapping(struct vm_area_struct *vma)
+{
+	return ((vma->vm_flags & VM_PFNMAP) && vma->vm_pgoff);
+}
+
+static inline int is_pfn_mapping(struct vm_area_struct *vma)
+{
+	return (vma->vm_flags & VM_PFNMAP);
+}
 
 /*
  * vm_fault is filled by the the pagefault handler and passed to the vma's
diff --git a/mm/memory.c b/mm/memory.c
index 164951c47305..cef95c8c77fa 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -1575,11 +1575,10 @@ int remap_pfn_range(struct vm_area_struct *vma, unsigned long addr,
 	 * behaviour that some programs depend on. We mark the "original"
 	 * un-COW'ed pages by matching them up with "vma->vm_pgoff".
 	 */
-	if (is_cow_mapping(vma->vm_flags)) {
-		if (addr != vma->vm_start || end != vma->vm_end)
-			return -EINVAL;
+	if (addr == vma->vm_start && end == vma->vm_end)
 		vma->vm_pgoff = pfn;
-	}
+	else if (is_cow_mapping(vma->vm_flags))
+		return -EINVAL;
 
 	vma->vm_flags |= VM_IO | VM_RESERVED | VM_PFNMAP;
 
-- 
cgit v1.2.3


From e121e418441525b5636321fe03d16f0193ad218e Mon Sep 17 00:00:00 2001
From: "venkatesh.pallipadi@intel.com" <venkatesh.pallipadi@intel.com>
Date: Thu, 18 Dec 2008 11:41:28 -0800
Subject: x86: PAT: add follow_pfnmp_pte routine to help tracking pfnmap pages
 - v3

Impact: New currently unused interface.

Add a generic interface to follow pfn in a pfnmap vma range. This is used by
one of the subsequent x86 PAT related patch to keep track of memory types
for vma regions across vma copy and free.

Signed-off-by: Venkatesh Pallipadi <venkatesh.pallipadi@intel.com>
Signed-off-by: Suresh Siddha <suresh.b.siddha@intel.com>
Signed-off-by: H. Peter Anvin <hpa@zytor.com>
---
 include/linux/mm.h |  3 +++
 mm/memory.c        | 43 +++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 46 insertions(+)

(limited to 'include')

diff --git a/include/linux/mm.h b/include/linux/mm.h
index 2be8d9b5e46f..a25024ff9c11 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -1223,6 +1223,9 @@ struct page *follow_page(struct vm_area_struct *, unsigned long address,
 #define FOLL_GET	0x04	/* do get_page on page */
 #define FOLL_ANON	0x08	/* give ZERO_PAGE if no pgtable */
 
+int follow_pfnmap_pte(struct vm_area_struct *vma,
+				unsigned long address, pte_t *ret_ptep);
+
 typedef int (*pte_fn_t)(pte_t *pte, pgtable_t token, unsigned long addr,
 			void *data);
 extern int apply_to_page_range(struct mm_struct *mm, unsigned long address,
diff --git a/mm/memory.c b/mm/memory.c
index cef95c8c77fa..8ca6bbf34ad6 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -1111,6 +1111,49 @@ no_page_table:
 	return page;
 }
 
+int follow_pfnmap_pte(struct vm_area_struct *vma, unsigned long address,
+			pte_t *ret_ptep)
+{
+	pgd_t *pgd;
+	pud_t *pud;
+	pmd_t *pmd;
+	pte_t *ptep, pte;
+	spinlock_t *ptl;
+	struct page *page;
+	struct mm_struct *mm = vma->vm_mm;
+
+	if (!is_pfn_mapping(vma))
+		goto err;
+
+	page = NULL;
+	pgd = pgd_offset(mm, address);
+	if (pgd_none(*pgd) || unlikely(pgd_bad(*pgd)))
+		goto err;
+
+	pud = pud_offset(pgd, address);
+	if (pud_none(*pud) || unlikely(pud_bad(*pud)))
+		goto err;
+
+	pmd = pmd_offset(pud, address);
+	if (pmd_none(*pmd) || unlikely(pmd_bad(*pmd)))
+		goto err;
+
+	ptep = pte_offset_map_lock(mm, pmd, address, &ptl);
+
+	pte = *ptep;
+	if (!pte_present(pte))
+		goto err_unlock;
+
+	*ret_ptep = pte;
+	pte_unmap_unlock(ptep, ptl);
+	return 0;
+
+err_unlock:
+	pte_unmap_unlock(ptep, ptl);
+err:
+	return -EINVAL;
+}
+
 /* Can we do the FOLL_ANON optimization? */
 static inline int use_zero_page(struct vm_area_struct *vma)
 {
-- 
cgit v1.2.3


From 2ab640379a0ab4cef746ced1d7e04a0941774bcb Mon Sep 17 00:00:00 2001
From: "venkatesh.pallipadi@intel.com" <venkatesh.pallipadi@intel.com>
Date: Thu, 18 Dec 2008 11:41:29 -0800
Subject: x86: PAT: hooks in generic vm code to help archs to track pfnmap
 regions - v3

Impact: Introduces new hooks, which are currently null.

Introduce generic hooks in remap_pfn_range and vm_insert_pfn and
corresponding copy and free routines with reserve and free tracking.

Signed-off-by: Venkatesh Pallipadi <venkatesh.pallipadi@intel.com>
Signed-off-by: Suresh Siddha <suresh.b.siddha@intel.com>
Signed-off-by: H. Peter Anvin <hpa@zytor.com>
---
 include/linux/mm.h |  6 +++++
 mm/memory.c        | 76 +++++++++++++++++++++++++++++++++++++++++++++++++++++-
 2 files changed, 81 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/mm.h b/include/linux/mm.h
index a25024ff9c11..87ecb40e11a0 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -155,6 +155,12 @@ static inline int is_pfn_mapping(struct vm_area_struct *vma)
 	return (vma->vm_flags & VM_PFNMAP);
 }
 
+extern int track_pfn_vma_new(struct vm_area_struct *vma, pgprot_t prot,
+				unsigned long pfn, unsigned long size);
+extern int track_pfn_vma_copy(struct vm_area_struct *vma);
+extern void untrack_pfn_vma(struct vm_area_struct *vma, unsigned long pfn,
+				unsigned long size);
+
 /*
  * vm_fault is filled by the the pagefault handler and passed to the vma's
  * ->fault function. The vma's ->fault is responsible for returning a bitmask
diff --git a/mm/memory.c b/mm/memory.c
index 8ca6bbf34ad6..1e8f0d347c0e 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -99,6 +99,50 @@ int randomize_va_space __read_mostly =
 					2;
 #endif
 
+#ifndef track_pfn_vma_new
+/*
+ * Interface that can be used by architecture code to keep track of
+ * memory type of pfn mappings (remap_pfn_range, vm_insert_pfn)
+ *
+ * track_pfn_vma_new is called when a _new_ pfn mapping is being established
+ * for physical range indicated by pfn and size.
+ */
+int track_pfn_vma_new(struct vm_area_struct *vma, pgprot_t prot,
+			unsigned long pfn, unsigned long size)
+{
+	return 0;
+}
+#endif
+
+#ifndef track_pfn_vma_copy
+/*
+ * Interface that can be used by architecture code to keep track of
+ * memory type of pfn mappings (remap_pfn_range, vm_insert_pfn)
+ *
+ * track_pfn_vma_copy is called when vma that is covering the pfnmap gets
+ * copied through copy_page_range().
+ */
+int track_pfn_vma_copy(struct vm_area_struct *vma)
+{
+	return 0;
+}
+#endif
+
+#ifndef untrack_pfn_vma
+/*
+ * Interface that can be used by architecture code to keep track of
+ * memory type of pfn mappings (remap_pfn_range, vm_insert_pfn)
+ *
+ * untrack_pfn_vma is called while unmapping a pfnmap for a region.
+ * untrack can be called for a specific region indicated by pfn and size or
+ * can be for the entire vma (in which case size can be zero).
+ */
+void untrack_pfn_vma(struct vm_area_struct *vma, unsigned long pfn,
+			unsigned long size)
+{
+}
+#endif
+
 static int __init disable_randmaps(char *s)
 {
 	randomize_va_space = 0;
@@ -669,6 +713,16 @@ int copy_page_range(struct mm_struct *dst_mm, struct mm_struct *src_mm,
 	if (is_vm_hugetlb_page(vma))
 		return copy_hugetlb_page_range(dst_mm, src_mm, vma);
 
+	if (is_pfn_mapping(vma)) {
+		/*
+		 * We do not free on error cases below as remove_vma
+		 * gets called on error from higher level routine
+		 */
+		ret = track_pfn_vma_copy(vma);
+		if (ret)
+			return ret;
+	}
+
 	/*
 	 * We need to invalidate the secondary MMU mappings only when
 	 * there could be a permission downgrade on the ptes of the
@@ -915,6 +969,9 @@ unsigned long unmap_vmas(struct mmu_gather **tlbp,
 		if (vma->vm_flags & VM_ACCOUNT)
 			*nr_accounted += (end - start) >> PAGE_SHIFT;
 
+		if (is_pfn_mapping(vma))
+			untrack_pfn_vma(vma, 0, 0);
+
 		while (start != end) {
 			if (!tlb_start_valid) {
 				tlb_start = start;
@@ -1473,6 +1530,7 @@ out:
 int vm_insert_pfn(struct vm_area_struct *vma, unsigned long addr,
 			unsigned long pfn)
 {
+	int ret;
 	/*
 	 * Technically, architectures with pte_special can avoid all these
 	 * restrictions (same for remap_pfn_range).  However we would like
@@ -1487,7 +1545,15 @@ int vm_insert_pfn(struct vm_area_struct *vma, unsigned long addr,
 
 	if (addr < vma->vm_start || addr >= vma->vm_end)
 		return -EFAULT;
-	return insert_pfn(vma, addr, pfn, vma->vm_page_prot);
+	if (track_pfn_vma_new(vma, vma->vm_page_prot, pfn, PAGE_SIZE))
+		return -EINVAL;
+
+	ret = insert_pfn(vma, addr, pfn, vma->vm_page_prot);
+
+	if (ret)
+		untrack_pfn_vma(vma, pfn, PAGE_SIZE);
+
+	return ret;
 }
 EXPORT_SYMBOL(vm_insert_pfn);
 
@@ -1625,6 +1691,10 @@ int remap_pfn_range(struct vm_area_struct *vma, unsigned long addr,
 
 	vma->vm_flags |= VM_IO | VM_RESERVED | VM_PFNMAP;
 
+	err = track_pfn_vma_new(vma, prot, pfn, PAGE_ALIGN(size));
+	if (err)
+		return -EINVAL;
+
 	BUG_ON(addr >= end);
 	pfn -= addr >> PAGE_SHIFT;
 	pgd = pgd_offset(mm, addr);
@@ -1636,6 +1706,10 @@ int remap_pfn_range(struct vm_area_struct *vma, unsigned long addr,
 		if (err)
 			break;
 	} while (pgd++, addr = next, addr != end);
+
+	if (err)
+		untrack_pfn_vma(vma, pfn, PAGE_ALIGN(size));
+
 	return err;
 }
 EXPORT_SYMBOL(remap_pfn_range);
-- 
cgit v1.2.3


From 2520bd3123c00272f818a176c92d03c7d0a113d6 Mon Sep 17 00:00:00 2001
From: "venkatesh.pallipadi@intel.com" <venkatesh.pallipadi@intel.com>
Date: Thu, 18 Dec 2008 11:41:32 -0800
Subject: x86: PAT: add pgprot_writecombine() interface for drivers - v3

Impact: New mm functionality.

Add pgprot_writecombine. pgprot_writecombine will be aliased to
pgprot_noncached when not supported by the architecture.

Signed-off-by: Venkatesh Pallipadi <venkatesh.pallipadi@intel.com>
Signed-off-by: Suresh Siddha <suresh.b.siddha@intel.com>
Signed-off-by: H. Peter Anvin <hpa@zytor.com>
---
 arch/x86/include/asm/pgtable.h | 3 +++
 arch/x86/mm/pat.c              | 8 ++++++++
 include/asm-generic/pgtable.h  | 4 ++++
 3 files changed, 15 insertions(+)

(limited to 'include')

diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h
index 6968d4f6be3e..579f8ceee948 100644
--- a/arch/x86/include/asm/pgtable.h
+++ b/arch/x86/include/asm/pgtable.h
@@ -168,6 +168,9 @@
 
 #ifndef __ASSEMBLY__
 
+#define pgprot_writecombine	pgprot_writecombine
+extern pgprot_t pgprot_writecombine(pgprot_t prot);
+
 /*
  * ZERO_PAGE is a global shared page that is always zero: used
  * for zero-mapped memory areas etc..
diff --git a/arch/x86/mm/pat.c b/arch/x86/mm/pat.c
index 1069ffecf77d..d5254bae84f4 100644
--- a/arch/x86/mm/pat.c
+++ b/arch/x86/mm/pat.c
@@ -832,6 +832,14 @@ void untrack_pfn_vma(struct vm_area_struct *vma, unsigned long pfn,
 	}
 }
 
+pgprot_t pgprot_writecombine(pgprot_t prot)
+{
+	if (pat_enabled)
+		return __pgprot(pgprot_val(prot) | _PAGE_CACHE_WC);
+	else
+		return pgprot_noncached(prot);
+}
+
 #if defined(CONFIG_DEBUG_FS) && defined(CONFIG_X86_PAT)
 
 /* get Nth element of the linked list */
diff --git a/include/asm-generic/pgtable.h b/include/asm-generic/pgtable.h
index ef87f889ef62..b84633801fb6 100644
--- a/include/asm-generic/pgtable.h
+++ b/include/asm-generic/pgtable.h
@@ -129,6 +129,10 @@ static inline void ptep_set_wrprotect(struct mm_struct *mm, unsigned long addres
 #define move_pte(pte, prot, old_addr, new_addr)	(pte)
 #endif
 
+#ifndef pgprot_writecombine
+#define pgprot_writecombine pgprot_noncached
+#endif
+
 /*
  * When walking page tables, get the address of the next boundary,
  * or the end address of the range if that comes earlier.  Although no
-- 
cgit v1.2.3


From c71dd42db2c6f1637b92502a214587431c1a6ad2 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Fri, 19 Dec 2008 01:09:51 +0100
Subject: tracing: fix warnings in kernel/trace/trace_sched_switch.c
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

these warnings:

  kernel/trace/trace_sched_switch.c: In function ‘tracing_sched_register’:
  kernel/trace/trace_sched_switch.c:96: warning: passing argument 1 of ‘register_trace_sched_wakeup_new’ from incompatible pointer type
  kernel/trace/trace_sched_switch.c:112: warning: passing argument 1 of ‘unregister_trace_sched_wakeup_new’ from incompatible pointer type
  kernel/trace/trace_sched_switch.c: In function ‘tracing_sched_unregister’:
  kernel/trace/trace_sched_switch.c:121: warning: passing argument 1 of ‘unregister_trace_sched_wakeup_new’ from incompatible pointer type

Trigger because sched_wakeup_new tracepoints need the same trace
signature as sched_wakeup - which was changed recently.

Fix it.

Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/trace/sched.h             | 4 ++--
 kernel/sched.c                    | 2 +-
 kernel/trace/trace_sched_switch.c | 1 +
 3 files changed, 4 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/include/trace/sched.h b/include/trace/sched.h
index f4549d506b16..bc4c9eadc6ba 100644
--- a/include/trace/sched.h
+++ b/include/trace/sched.h
@@ -21,8 +21,8 @@ DECLARE_TRACE(sched_wakeup,
 		TPARGS(rq, p));
 
 DECLARE_TRACE(sched_wakeup_new,
-	TPPROTO(struct rq *rq, struct task_struct *p),
-		TPARGS(rq, p));
+	TPPROTO(struct rq *rq, struct task_struct *p, int success),
+		TPARGS(rq, p, success));
 
 DECLARE_TRACE(sched_switch,
 	TPPROTO(struct rq *rq, struct task_struct *prev,
diff --git a/kernel/sched.c b/kernel/sched.c
index d377097572f9..ac5a70a87d1e 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -2457,7 +2457,7 @@ void wake_up_new_task(struct task_struct *p, unsigned long clone_flags)
 		p->sched_class->task_new(rq, p);
 		inc_nr_running(rq);
 	}
-	trace_sched_wakeup_new(rq, p);
+	trace_sched_wakeup_new(rq, p, 1);
 	check_preempt_curr(rq, p, 0);
 #ifdef CONFIG_SMP
 	if (p->sched_class->task_wake_up)
diff --git a/kernel/trace/trace_sched_switch.c b/kernel/trace/trace_sched_switch.c
index 863390557b44..781d72ef873c 100644
--- a/kernel/trace/trace_sched_switch.c
+++ b/kernel/trace/trace_sched_switch.c
@@ -247,3 +247,4 @@ __init static int init_sched_switch_trace(void)
 	return register_tracer(&sched_switch_trace);
 }
 device_initcall(init_sched_switch_trace);
+
-- 
cgit v1.2.3


From 078a55db075bf4dcc03115dc94875b3dd83f69d4 Mon Sep 17 00:00:00 2001
From: Yinghai Lu <yinghai@kernel.org>
Date: Thu, 18 Dec 2008 16:57:52 -0800
Subject: sparseirq: add kernel-doc notation for new member in irq_desc, -v2

Signed-off-by: Yinghai Lu <yinghai@kernel.org>
Acked-by: Randy Dunlap <randy.dunlap@oracle.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/irq.h | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'include')

diff --git a/include/linux/irq.h b/include/linux/irq.h
index 36a015746788..7fa7f30fccb6 100644
--- a/include/linux/irq.h
+++ b/include/linux/irq.h
@@ -134,6 +134,9 @@ struct irq_2_iommu;
 /**
  * struct irq_desc - interrupt descriptor
  * @irq:		interrupt number for this descriptor
+ * @timer_rand_state:	pointer to timer rand state struct
+ * @kstat_irqs:		irq stats per cpu
+ * @irq_2_iommu:	iommu with this irq
  * @handle_irq:		highlevel irq-events handler [if NULL, __do_IRQ()]
  * @chip:		low level interrupt hardware access
  * @msi_desc:		MSI descriptor
-- 
cgit v1.2.3


From e76f42761197dd6e9405e2eeb35932acfede115a Mon Sep 17 00:00:00 2001
From: Bjorn Helgaas <bjorn.helgaas@hp.com>
Date: Thu, 13 Nov 2008 17:30:13 -0600
Subject: ACPI: fix 2.6.28 acpi.debug_level regression

acpi_early_init() was changed to over-write the cmdline param,
making it really inconvenient to set debug flags at boot-time.

Also,
This sets the default level to "info", which is what all the ACPI
drivers use.  So to enable messages from drivers, you only have to
supply the "layer" (a.k.a. "component").  For non-"info" ACPI core
and ACPI interpreter messages, you have to supply both level and
layer masks, as before.

Signed-off-by: Bjorn Helgaas <bjorn.helgaas@hp.com>
Signed-off-by: Len Brown <len.brown@intel.com>
---
 Documentation/kernel-parameters.txt | 11 +++++++----
 drivers/acpi/bus.c                  |  8 --------
 drivers/acpi/utilities/utglobal.c   |  2 +-
 include/acpi/acoutput.h             |  2 +-
 4 files changed, 9 insertions(+), 14 deletions(-)

(limited to 'include')

diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt
index e0f346d201ed..c9115c1b672c 100644
--- a/Documentation/kernel-parameters.txt
+++ b/Documentation/kernel-parameters.txt
@@ -220,14 +220,17 @@ and is between 256 and 4096 characters. It is defined in the file
 			Bits in debug_level correspond to a level in
 			ACPI_DEBUG_PRINT statements, e.g.,
 			    ACPI_DEBUG_PRINT((ACPI_DB_INFO, ...
-			See Documentation/acpi/debug.txt for more information
-			about debug layers and levels.
+			The debug_level mask defaults to "info".  See
+			Documentation/acpi/debug.txt for more information about
+			debug layers and levels.
 
+			Enable processor driver info messages:
+			    acpi.debug_layer=0x20000000
+			Enable PCI/PCI interrupt routing info messages:
+			    acpi.debug_layer=0x400000
 			Enable AML "Debug" output, i.e., stores to the Debug
 			object while interpreting AML:
 			    acpi.debug_layer=0xffffffff acpi.debug_level=0x2
-			Enable PCI/PCI interrupt routing info messages:
-			    acpi.debug_layer=0x400000 acpi.debug_level=0x4
 			Enable all messages related to ACPI hardware:
 			    acpi.debug_layer=0x2 acpi.debug_level=0xffffffff
 
diff --git a/drivers/acpi/bus.c b/drivers/acpi/bus.c
index 7edf6d913c13..765fd1c56cd6 100644
--- a/drivers/acpi/bus.c
+++ b/drivers/acpi/bus.c
@@ -688,14 +688,6 @@ void __init acpi_early_init(void)
 	if (acpi_disabled)
 		return;
 
-	/*
-	 * ACPI CA initializes acpi_dbg_level to non-zero, which means
-	 * we get debug output merely by turning on CONFIG_ACPI_DEBUG.
-	 * Turn it off so we don't get output unless the user specifies
-	 * acpi.debug_level.
-	 */
-	acpi_dbg_level = 0;
-
 	printk(KERN_INFO PREFIX "Core revision %08x\n", ACPI_CA_VERSION);
 
 	/* enable workarounds, unless strict ACPI spec. compliance */
diff --git a/drivers/acpi/utilities/utglobal.c b/drivers/acpi/utilities/utglobal.c
index 670551b95e56..17ed5ac840f7 100644
--- a/drivers/acpi/utilities/utglobal.c
+++ b/drivers/acpi/utilities/utglobal.c
@@ -64,7 +64,7 @@ u32 acpi_dbg_level = ACPI_DEBUG_DEFAULT;
 
 /* Debug switch - layer (component) mask */
 
-u32 acpi_dbg_layer = ACPI_COMPONENT_DEFAULT | ACPI_ALL_DRIVERS;
+u32 acpi_dbg_layer = 0;
 u32 acpi_gbl_nesting_level = 0;
 
 /* Debugger globals */
diff --git a/include/acpi/acoutput.h b/include/acpi/acoutput.h
index 09d33c7740f0..db8852d8bcf7 100644
--- a/include/acpi/acoutput.h
+++ b/include/acpi/acoutput.h
@@ -172,7 +172,7 @@
 
 /* Defaults for debug_level, debug and normal */
 
-#define ACPI_DEBUG_DEFAULT          (ACPI_LV_INIT | ACPI_LV_DEBUG_OBJECT)
+#define ACPI_DEBUG_DEFAULT          (ACPI_LV_INFO)
 #define ACPI_NORMAL_DEFAULT         (ACPI_LV_INIT | ACPI_LV_DEBUG_OBJECT)
 #define ACPI_DEBUG_ALL              (ACPI_LV_AML_DISASSEMBLE | ACPI_LV_ALL_EXCEPTIONS | ACPI_LV_ALL)
 
-- 
cgit v1.2.3


From abe1dfab60e1839d115930286cb421f5a5b193f3 Mon Sep 17 00:00:00 2001
From: Wu Fengguang <fengguang.wu@intel.com>
Date: Wed, 26 Nov 2008 14:35:22 +0800
Subject: ACPI: don't cond_resched() when irqs_disabled()

The ACPI interpreter usually runs with irqs enabled.
However, during suspend/resume it runs with
irqs disabled to evaluate _GTS/_BFS, as well as
by irqrouter_resume() which evaluates _CRS, _PRS, _SRS.

http://bugzilla.kernel.org/show_bug.cgi?id=12252

Signed-off-by: Wu Fengguang <wfg@linux.intel.com>
Signed-off-by: Len Brown <len.brown@intel.com>
---
 include/acpi/platform/aclinux.h | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/acpi/platform/aclinux.h b/include/acpi/platform/aclinux.h
index 029c8c06c151..0515e754449d 100644
--- a/include/acpi/platform/aclinux.h
+++ b/include/acpi/platform/aclinux.h
@@ -141,6 +141,10 @@ static inline void *acpi_os_acquire_object(acpi_cache_t * cache)
 /*
  * We need to show where it is safe to preempt execution of ACPICA
  */
-#define ACPI_PREEMPTION_POINT()	cond_resched()
+#define ACPI_PREEMPTION_POINT()		\
+	do {				\
+		if (!irqs_disabled())	\
+			cond_resched();	\
+	} while (0)
 
 #endif				/* __ACLINUX_H__ */
-- 
cgit v1.2.3


From 420e7fabd9c6d907280ed6b3e40eef425c5d8d8d Mon Sep 17 00:00:00 2001
From: Henning Rogge <hrogge@googlemail.com>
Date: Thu, 11 Dec 2008 22:04:19 +0100
Subject: nl80211: Add signal strength and bandwith to nl80211station info

This patch adds signal strength and transmission bitrate
to the station_info of nl80211.

Signed-off-by: Henning Rogge <rogge@fgan.de>
Acked-by: Johannes Berg <johannes@sipsolutions.net>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 include/linux/nl80211.h | 31 ++++++++++++++++++++++++++
 include/net/cfg80211.h  | 40 ++++++++++++++++++++++++++++++++++
 net/mac80211/cfg.c      | 25 ++++++++++++++++++++-
 net/wireless/nl80211.c  | 58 ++++++++++++++++++++++++++++++++++++++++++++++++-
 4 files changed, 152 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/nl80211.h b/include/linux/nl80211.h
index 04d4516f9c71..7501acfcfdc4 100644
--- a/include/linux/nl80211.h
+++ b/include/linux/nl80211.h
@@ -424,6 +424,32 @@ enum nl80211_sta_flags {
 	NL80211_STA_FLAG_MAX = __NL80211_STA_FLAG_AFTER_LAST - 1
 };
 
+/**
+ * enum nl80211_rate_info - bitrate information
+ *
+ * These attribute types are used with %NL80211_STA_INFO_TXRATE
+ * when getting information about the bitrate of a station.
+ *
+ * @__NL80211_RATE_INFO_INVALID: attribute number 0 is reserved
+ * @NL80211_RATE_INFO_BITRATE: total bitrate (u16, 100kbit/s)
+ * @NL80211_RATE_INFO_MCS: mcs index for 802.11n (u8)
+ * @NL80211_RATE_INFO_40_MHZ_WIDTH: 40 Mhz dualchannel bitrate
+ * @NL80211_RATE_INFO_SHORT_GI: 400ns guard interval
+ * @NL80211_RATE_INFO_MAX: highest rate_info number currently defined
+ * @__NL80211_RATE_INFO_AFTER_LAST: internal use
+ */
+enum nl80211_rate_info {
+	__NL80211_RATE_INFO_INVALID,
+	NL80211_RATE_INFO_BITRATE,
+	NL80211_RATE_INFO_MCS,
+	NL80211_RATE_INFO_40_MHZ_WIDTH,
+	NL80211_RATE_INFO_SHORT_GI,
+
+	/* keep last */
+	__NL80211_RATE_INFO_AFTER_LAST,
+	NL80211_RATE_INFO_MAX = __NL80211_RATE_INFO_AFTER_LAST - 1
+};
+
 /**
  * enum nl80211_sta_info - station information
  *
@@ -436,6 +462,9 @@ enum nl80211_sta_flags {
  * @NL80211_STA_INFO_TX_BYTES: total transmitted bytes (u32, to this station)
  * @__NL80211_STA_INFO_AFTER_LAST: internal
  * @NL80211_STA_INFO_MAX: highest possible station info attribute
+ * @NL80211_STA_INFO_SIGNAL: signal strength of last received PPDU (u8, dBm)
+ * @NL80211_STA_INFO_TX_BITRATE: current unicast tx rate, nested attribute
+ * 	containing info as possible, see &enum nl80211_sta_info_txrate.
  */
 enum nl80211_sta_info {
 	__NL80211_STA_INFO_INVALID,
@@ -445,6 +474,8 @@ enum nl80211_sta_info {
 	NL80211_STA_INFO_LLID,
 	NL80211_STA_INFO_PLID,
 	NL80211_STA_INFO_PLINK_STATE,
+	NL80211_STA_INFO_SIGNAL,
+	NL80211_STA_INFO_TX_BITRATE,
 
 	/* keep last */
 	__NL80211_STA_INFO_AFTER_LAST,
diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h
index a0c0bf19496c..65e03ac93109 100644
--- a/include/net/cfg80211.h
+++ b/include/net/cfg80211.h
@@ -169,6 +169,9 @@ struct station_parameters {
  * @STATION_INFO_LLID: @llid filled
  * @STATION_INFO_PLID: @plid filled
  * @STATION_INFO_PLINK_STATE: @plink_state filled
+ * @STATION_INFO_SIGNAL: @signal filled
+ * @STATION_INFO_TX_BITRATE: @tx_bitrate fields are filled
+ *  (tx_bitrate, tx_bitrate_flags and tx_bitrate_mcs)
  */
 enum station_info_flags {
 	STATION_INFO_INACTIVE_TIME	= 1<<0,
@@ -177,6 +180,39 @@ enum station_info_flags {
 	STATION_INFO_LLID		= 1<<3,
 	STATION_INFO_PLID		= 1<<4,
 	STATION_INFO_PLINK_STATE	= 1<<5,
+	STATION_INFO_SIGNAL		= 1<<6,
+	STATION_INFO_TX_BITRATE		= 1<<7,
+};
+
+/**
+ * enum station_info_rate_flags - bitrate info flags
+ *
+ * Used by the driver to indicate the specific rate transmission
+ * type for 802.11n transmissions.
+ *
+ * @RATE_INFO_FLAGS_MCS: @tx_bitrate_mcs filled
+ * @RATE_INFO_FLAGS_40_MHZ_WIDTH: 40 Mhz width transmission
+ * @RATE_INFO_FLAGS_SHORT_GI: 400ns guard interval
+ */
+enum rate_info_flags {
+	RATE_INFO_FLAGS_MCS		= 1<<0,
+	RATE_INFO_FLAGS_40_MHZ_WIDTH	= 1<<1,
+	RATE_INFO_FLAGS_SHORT_GI	= 1<<2,
+};
+
+/**
+ * struct rate_info - bitrate information
+ *
+ * Information about a receiving or transmitting bitrate
+ *
+ * @flags: bitflag of flags from &enum rate_info_flags
+ * @mcs: mcs index if struct describes a 802.11n bitrate
+ * @legacy: bitrate in 100kbit/s for 802.11abg
+ */
+struct rate_info {
+	u8 flags;
+	u8 mcs;
+	u16 legacy;
 };
 
 /**
@@ -191,6 +227,8 @@ enum station_info_flags {
  * @llid: mesh local link id
  * @plid: mesh peer link id
  * @plink_state: mesh peer link state
+ * @signal: signal strength of last received packet in dBm
+ * @txrate: current unicast bitrate to this station
  */
 struct station_info {
 	u32 filled;
@@ -200,6 +238,8 @@ struct station_info {
 	u16 llid;
 	u16 plid;
 	u8 plink_state;
+	s8 signal;
+	struct rate_info txrate;
 };
 
 /**
diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c
index 7912eb14eca0..23b5eeaf7bc5 100644
--- a/net/mac80211/cfg.c
+++ b/net/mac80211/cfg.c
@@ -310,12 +310,35 @@ static void sta_set_sinfo(struct sta_info *sta, struct station_info *sinfo)
 
 	sinfo->filled = STATION_INFO_INACTIVE_TIME |
 			STATION_INFO_RX_BYTES |
-			STATION_INFO_TX_BYTES;
+			STATION_INFO_TX_BYTES |
+			STATION_INFO_TX_BITRATE;
 
 	sinfo->inactive_time = jiffies_to_msecs(jiffies - sta->last_rx);
 	sinfo->rx_bytes = sta->rx_bytes;
 	sinfo->tx_bytes = sta->tx_bytes;
 
+	if (sta->local->hw.flags & IEEE80211_HW_SIGNAL_DBM) {
+		sinfo->filled |= STATION_INFO_SIGNAL;
+		sinfo->signal = (s8)sta->last_signal;
+	}
+
+	sinfo->txrate.flags = 0;
+	if (sta->last_tx_rate.flags & IEEE80211_TX_RC_MCS)
+		sinfo->txrate.flags |= RATE_INFO_FLAGS_MCS;
+	if (sta->last_tx_rate.flags & IEEE80211_TX_RC_40_MHZ_WIDTH)
+		sinfo->txrate.flags |= RATE_INFO_FLAGS_40_MHZ_WIDTH;
+	if (sta->last_tx_rate.flags & IEEE80211_TX_RC_SHORT_GI)
+		sinfo->txrate.flags |= RATE_INFO_FLAGS_SHORT_GI;
+
+	if (!(sta->last_tx_rate.flags & IEEE80211_TX_RC_MCS)) {
+		struct ieee80211_supported_band *sband;
+		sband = sta->local->hw.wiphy->bands[
+				sta->local->hw.conf.channel->band];
+		sinfo->txrate.legacy =
+			sband->bitrates[sta->last_tx_rate.idx].bitrate;
+	} else
+		sinfo->txrate.mcs = sta->last_tx_rate.idx;
+
 	if (ieee80211_vif_is_mesh(&sdata->vif)) {
 #ifdef CONFIG_MAC80211_MESH
 		sinfo->filled |= STATION_INFO_LLID |
diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c
index 4335f76be71f..93c9b983ce08 100644
--- a/net/wireless/nl80211.c
+++ b/net/wireless/nl80211.c
@@ -1091,12 +1091,46 @@ static int parse_station_flags(struct nlattr *nla, u32 *staflags)
 	return 0;
 }
 
+static u16 nl80211_calculate_bitrate(struct rate_info *rate)
+{
+	int modulation, streams, bitrate;
+
+	if (!(rate->flags & RATE_INFO_FLAGS_MCS))
+		return rate->legacy;
+
+	/* the formula below does only work for MCS values smaller than 32 */
+	if (rate->mcs >= 32)
+		return 0;
+
+	modulation = rate->mcs & 7;
+	streams = (rate->mcs >> 3) + 1;
+
+	bitrate = (rate->flags & RATE_INFO_FLAGS_40_MHZ_WIDTH) ?
+			13500000 : 6500000;
+
+	if (modulation < 4)
+		bitrate *= (modulation + 1);
+	else if (modulation == 4)
+		bitrate *= (modulation + 2);
+	else
+		bitrate *= (modulation + 3);
+
+	bitrate *= streams;
+
+	if (rate->flags & RATE_INFO_FLAGS_SHORT_GI)
+		bitrate = (bitrate / 9) * 10;
+
+	/* do NOT round down here */
+	return (bitrate + 50000) / 100000;
+}
+
 static int nl80211_send_station(struct sk_buff *msg, u32 pid, u32 seq,
 				int flags, struct net_device *dev,
 				u8 *mac_addr, struct station_info *sinfo)
 {
 	void *hdr;
-	struct nlattr *sinfoattr;
+	struct nlattr *sinfoattr, *txrate;
+	u16 bitrate;
 
 	hdr = nl80211hdr_put(msg, pid, seq, flags, NL80211_CMD_NEW_STATION);
 	if (!hdr)
@@ -1126,7 +1160,29 @@ static int nl80211_send_station(struct sk_buff *msg, u32 pid, u32 seq,
 	if (sinfo->filled & STATION_INFO_PLINK_STATE)
 		NLA_PUT_U8(msg, NL80211_STA_INFO_PLINK_STATE,
 			    sinfo->plink_state);
+	if (sinfo->filled & STATION_INFO_SIGNAL)
+		NLA_PUT_U8(msg, NL80211_STA_INFO_SIGNAL,
+			   sinfo->signal);
+	if (sinfo->filled & STATION_INFO_TX_BITRATE) {
+		txrate = nla_nest_start(msg, NL80211_STA_INFO_TX_BITRATE);
+		if (!txrate)
+			goto nla_put_failure;
+
+		/* nl80211_calculate_bitrate will return 0 for mcs >= 32 */
+		bitrate = nl80211_calculate_bitrate(&sinfo->txrate);
+		if (bitrate > 0)
+			NLA_PUT_U16(msg, NL80211_RATE_INFO_BITRATE, bitrate);
 
+		if (sinfo->txrate.flags & RATE_INFO_FLAGS_MCS)
+			NLA_PUT_U8(msg, NL80211_RATE_INFO_MCS,
+				    sinfo->txrate.mcs);
+		if (sinfo->txrate.flags & RATE_INFO_FLAGS_40_MHZ_WIDTH)
+			NLA_PUT_FLAG(msg, NL80211_RATE_INFO_40_MHZ_WIDTH);
+		if (sinfo->txrate.flags & RATE_INFO_FLAGS_SHORT_GI)
+			NLA_PUT_FLAG(msg, NL80211_RATE_INFO_SHORT_GI);
+
+		nla_nest_end(msg, txrate);
+	}
 	nla_nest_end(msg, sinfoattr);
 
 	return genlmsg_end(msg, hdr);
-- 
cgit v1.2.3


From 094d05dc32fc2930e381189a942016e5561775d9 Mon Sep 17 00:00:00 2001
From: Sujith <Sujith.Manoharan@atheros.com>
Date: Fri, 12 Dec 2008 11:57:43 +0530
Subject: mac80211: Fix HT channel selection

HT management is done differently for AP and STA modes, unify
to just the ->config() callback since HT is fundamentally a
PHY property and cannot be per-BSS.

Rename enum nl80211_sec_chan_offset as nl80211_channel_type to denote
the channel type ( NO_HT, HT20, HT40+, HT40- ).

Signed-off-by: Johannes Berg <johannes@sipsolutions.net>
Signed-off-by: Sujith <Sujith.Manoharan@atheros.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 drivers/net/wireless/ath9k/main.c      | 123 ++++++++-------------------------
 drivers/net/wireless/iwlwifi/iwl-agn.c |  18 +++--
 drivers/net/wireless/mac80211_hwsim.c  |   6 +-
 include/linux/nl80211.h                |  22 +++---
 include/net/cfg80211.h                 |   2 +-
 include/net/mac80211.h                 |   9 +--
 net/mac80211/cfg.c                     |   4 +-
 net/mac80211/ht.c                      |  35 +++++++---
 net/mac80211/ieee80211_i.h             |   2 +-
 net/mac80211/main.c                    |  27 +++-----
 net/mac80211/mlme.c                    |   1 +
 net/mac80211/util.c                    |   2 +-
 net/wireless/nl80211.c                 |  27 ++++----
 13 files changed, 109 insertions(+), 169 deletions(-)

(limited to 'include')

diff --git a/drivers/net/wireless/ath9k/main.c b/drivers/net/wireless/ath9k/main.c
index 02e1771bb274..e22fea18bad6 100644
--- a/drivers/net/wireless/ath9k/main.c
+++ b/drivers/net/wireless/ath9k/main.c
@@ -623,37 +623,40 @@ static int ath_get_channel(struct ath_softc *sc,
 	return -1;
 }
 
-/* ext_chan_offset: (-1, 0, 1) (below, none, above) */
-
 static u32 ath_get_extchanmode(struct ath_softc *sc,
 			       struct ieee80211_channel *chan,
-			       int ext_chan_offset,
-			       enum ath9k_ht_macmode tx_chan_width)
+			       enum nl80211_channel_type channel_type)
 {
 	u32 chanmode = 0;
 
 	switch (chan->band) {
 	case IEEE80211_BAND_2GHZ:
-		if ((ext_chan_offset == 0) &&
-		    (tx_chan_width == ATH9K_HT_MACMODE_20))
+		switch(channel_type) {
+		case NL80211_CHAN_NO_HT:
+		case NL80211_CHAN_HT20:
 			chanmode = CHANNEL_G_HT20;
-		if ((ext_chan_offset == 1) &&
-		    (tx_chan_width == ATH9K_HT_MACMODE_2040))
+			break;
+		case NL80211_CHAN_HT40PLUS:
 			chanmode = CHANNEL_G_HT40PLUS;
-		if ((ext_chan_offset == -1) &&
-		    (tx_chan_width == ATH9K_HT_MACMODE_2040))
+			break;
+		case NL80211_CHAN_HT40MINUS:
 			chanmode = CHANNEL_G_HT40MINUS;
+			break;
+		}
 		break;
 	case IEEE80211_BAND_5GHZ:
-		if ((ext_chan_offset == 0) &&
-		    (tx_chan_width == ATH9K_HT_MACMODE_20))
+		switch(channel_type) {
+		case NL80211_CHAN_NO_HT:
+		case NL80211_CHAN_HT20:
 			chanmode = CHANNEL_A_HT20;
-		if ((ext_chan_offset == 1) &&
-		    (tx_chan_width == ATH9K_HT_MACMODE_2040))
+			break;
+		case NL80211_CHAN_HT40PLUS:
 			chanmode = CHANNEL_A_HT40PLUS;
-		if ((ext_chan_offset == -1) &&
-		    (tx_chan_width == ATH9K_HT_MACMODE_2040))
+			break;
+		case NL80211_CHAN_HT40MINUS:
 			chanmode = CHANNEL_A_HT40MINUS;
+			break;
+		}
 		break;
 	default:
 		break;
@@ -829,45 +832,15 @@ static void setup_ht_cap(struct ieee80211_sta_ht_cap *ht_info)
 	ht_info->mcs.tx_params = IEEE80211_HT_MCS_TX_DEFINED;
 }
 
-static void ath9k_ht_conf(struct ath_softc *sc,
-			  struct ieee80211_bss_conf *bss_conf)
-{
-	if (sc->hw->conf.ht.enabled) {
-		if (bss_conf->ht.width_40_ok)
-			sc->tx_chan_width = ATH9K_HT_MACMODE_2040;
-		else
-			sc->tx_chan_width = ATH9K_HT_MACMODE_20;
-
-		ath9k_hw_set11nmac2040(sc->sc_ah, sc->tx_chan_width);
-
-		DPRINTF(sc, ATH_DBG_CONFIG,
-			"BSS Changed HT, chanwidth: %d\n", sc->tx_chan_width);
-	}
-}
-
-static inline int ath_sec_offset(u8 ext_offset)
-{
-	if (ext_offset == IEEE80211_HT_PARAM_CHA_SEC_NONE)
-		return 0;
-	else if (ext_offset == IEEE80211_HT_PARAM_CHA_SEC_ABOVE)
-		return 1;
-	else if (ext_offset == IEEE80211_HT_PARAM_CHA_SEC_BELOW)
-		return -1;
-
-	return 0;
-}
-
 static void ath9k_bss_assoc_info(struct ath_softc *sc,
 				 struct ieee80211_vif *vif,
 				 struct ieee80211_bss_conf *bss_conf)
 {
-	struct ieee80211_hw *hw = sc->hw;
-	struct ieee80211_channel *curchan = hw->conf.channel;
 	struct ath_vap *avp = (void *)vif->drv_priv;
-	int pos;
 
 	if (bss_conf->assoc) {
-		DPRINTF(sc, ATH_DBG_CONFIG, "Bss Info ASSOC %d\n", bss_conf->aid);
+		DPRINTF(sc, ATH_DBG_CONFIG, "Bss Info ASSOC %d, bssid: %pM\n",
+			bss_conf->aid, sc->sc_curbssid);
 
 		/* New association, store aid */
 		if (avp->av_opmode == NL80211_IFTYPE_STATION) {
@@ -886,40 +859,6 @@ static void ath9k_bss_assoc_info(struct ath_softc *sc,
 		sc->sc_halstats.ns_avgtxrssi = ATH_RSSI_DUMMY_MARKER;
 		sc->sc_halstats.ns_avgtxrate = ATH_RATE_DUMMY_MARKER;
 
-		/* Update chainmask */
-		ath_update_chainmask(sc, hw->conf.ht.enabled);
-
-		DPRINTF(sc, ATH_DBG_CONFIG,
-			"bssid %pM aid 0x%x\n",
-			sc->sc_curbssid, sc->sc_curaid);
-
-		pos = ath_get_channel(sc, curchan);
-		if (pos == -1) {
-			DPRINTF(sc, ATH_DBG_FATAL,
-				"Invalid channel: %d\n", curchan->center_freq);
-			return;
-		}
-
-		if (hw->conf.ht.enabled) {
-			int offset =
-				ath_sec_offset(bss_conf->ht.secondary_channel_offset);
-			sc->tx_chan_width = (bss_conf->ht.width_40_ok) ?
-				ATH9K_HT_MACMODE_2040 : ATH9K_HT_MACMODE_20;
-
-			sc->sc_ah->ah_channels[pos].chanmode =
-				ath_get_extchanmode(sc, curchan,
-						    offset, sc->tx_chan_width);
-		} else {
-			sc->sc_ah->ah_channels[pos].chanmode =
-				(curchan->band == IEEE80211_BAND_2GHZ) ?
-				CHANNEL_G : CHANNEL_A;
-		}
-
-		/* set h/w channel */
-		if (ath_set_channel(sc, &sc->sc_ah->ah_channels[pos]) < 0)
-			DPRINTF(sc, ATH_DBG_FATAL, "Unable to set channel: %d\n",
-				curchan->center_freq);
-
 		/* Start ANI */
 		mod_timer(&sc->sc_ani.timer,
 			jiffies + msecs_to_jiffies(ATH_ANI_POLLINTERVAL));
@@ -2146,7 +2085,8 @@ static int ath9k_config(struct ieee80211_hw *hw, u32 changed)
 	struct ath_softc *sc = hw->priv;
 	struct ieee80211_conf *conf = &hw->conf;
 
-	if (changed & IEEE80211_CONF_CHANGE_CHANNEL) {
+	if (changed & (IEEE80211_CONF_CHANGE_CHANNEL |
+		       IEEE80211_CONF_CHANGE_HT)) {
 		struct ieee80211_channel *curchan = hw->conf.channel;
 		int pos;
 
@@ -2165,25 +2105,23 @@ static int ath9k_config(struct ieee80211_hw *hw, u32 changed)
 			(curchan->band == IEEE80211_BAND_2GHZ) ?
 			CHANNEL_G : CHANNEL_A;
 
-		if ((sc->sc_ah->ah_opmode == NL80211_IFTYPE_AP) &&
-		    (conf->ht.enabled)) {
-			sc->tx_chan_width = (!!conf->ht.sec_chan_offset) ?
-				ATH9K_HT_MACMODE_2040 : ATH9K_HT_MACMODE_20;
+		if (conf->ht.enabled) {
+			if (conf->ht.channel_type == NL80211_CHAN_HT40PLUS ||
+			    conf->ht.channel_type == NL80211_CHAN_HT40MINUS)
+				sc->tx_chan_width = ATH9K_HT_MACMODE_2040;
 
 			sc->sc_ah->ah_channels[pos].chanmode =
 				ath_get_extchanmode(sc, curchan,
-						    conf->ht.sec_chan_offset,
-						    sc->tx_chan_width);
+						    conf->ht.channel_type);
 		}
 
 		if (ath_set_channel(sc, &sc->sc_ah->ah_channels[pos]) < 0) {
 			DPRINTF(sc, ATH_DBG_FATAL, "Unable to set channel\n");
 			return -EINVAL;
 		}
-	}
 
-	if (changed & IEEE80211_CONF_CHANGE_HT)
 		ath_update_chainmask(sc, conf->ht.enabled);
+	}
 
 	if (changed & IEEE80211_CONF_CHANGE_POWER)
 		sc->sc_config.txpowlimit = 2 * conf->power_level;
@@ -2417,9 +2355,6 @@ static void ath9k_bss_info_changed(struct ieee80211_hw *hw,
 			sc->sc_flags &= ~SC_OP_PROTECT_ENABLE;
 	}
 
-	if (changed & BSS_CHANGED_HT)
-		ath9k_ht_conf(sc, bss_conf);
-
 	if (changed & BSS_CHANGED_ASSOC) {
 		DPRINTF(sc, ATH_DBG_CONFIG, "BSS Changed ASSOC %d\n",
 			bss_conf->assoc);
diff --git a/drivers/net/wireless/iwlwifi/iwl-agn.c b/drivers/net/wireless/iwlwifi/iwl-agn.c
index 2f5e86e12916..bbc1c8052ffa 100644
--- a/drivers/net/wireless/iwlwifi/iwl-agn.c
+++ b/drivers/net/wireless/iwlwifi/iwl-agn.c
@@ -515,19 +515,27 @@ static void iwl_ht_conf(struct iwl_priv *priv,
 	iwl_conf->supported_chan_width =
 		!!(ht_conf->cap & IEEE80211_HT_CAP_SUP_WIDTH_20_40);
 
-	iwl_conf->extension_chan_offset = bss_conf->ht.secondary_channel_offset;
+	/*
+	 * XXX: The HT configuration needs to be moved into iwl_mac_config()
+	 *	to be done there correctly.
+	 */
+
+	iwl_conf->extension_chan_offset = IEEE80211_HT_PARAM_CHA_SEC_NONE;
+	if (priv->hw->conf.ht.channel_type == NL80211_CHAN_HT40MINUS)
+		iwl_conf->extension_chan_offset = IEEE80211_HT_PARAM_CHA_SEC_BELOW;
+	else if(priv->hw->conf.ht.channel_type == NL80211_CHAN_HT40PLUS)
+		iwl_conf->extension_chan_offset = IEEE80211_HT_PARAM_CHA_SEC_ABOVE;
+
 	/* If no above or below channel supplied disable FAT channel */
 	if (iwl_conf->extension_chan_offset != IEEE80211_HT_PARAM_CHA_SEC_ABOVE &&
-	    iwl_conf->extension_chan_offset != IEEE80211_HT_PARAM_CHA_SEC_BELOW) {
-		iwl_conf->extension_chan_offset = IEEE80211_HT_PARAM_CHA_SEC_NONE;
+	    iwl_conf->extension_chan_offset != IEEE80211_HT_PARAM_CHA_SEC_BELOW)
 		iwl_conf->supported_chan_width = 0;
-	}
 
 	iwl_conf->sm_ps = (u8)((ht_conf->cap & IEEE80211_HT_CAP_SM_PS) >> 2);
 
 	memcpy(&iwl_conf->mcs, &ht_conf->mcs, 16);
 
-	iwl_conf->tx_chan_width = bss_conf->ht.width_40_ok;
+	iwl_conf->tx_chan_width = iwl_conf->supported_chan_width != 0;
 	iwl_conf->ht_protection =
 		bss_conf->ht.operation_mode & IEEE80211_HT_OP_MODE_PROTECTION;
 	iwl_conf->non_GF_STA_present =
diff --git a/drivers/net/wireless/mac80211_hwsim.c b/drivers/net/wireless/mac80211_hwsim.c
index fd5a537ac51d..f83d69e813d3 100644
--- a/drivers/net/wireless/mac80211_hwsim.c
+++ b/drivers/net/wireless/mac80211_hwsim.c
@@ -495,11 +495,9 @@ static void mac80211_hwsim_bss_info_changed(struct ieee80211_hw *hw,
 	}
 
 	if (changed & BSS_CHANGED_HT) {
-		printk(KERN_DEBUG "  %s: HT: sec_ch_offs=%d width_40_ok=%d "
-		       "op_mode=%d\n",
+		printk(KERN_DEBUG "  %s: HT: op_mode=0x%x\n",
 		       wiphy_name(hw->wiphy),
-		       info->ht.secondary_channel_offset,
-		       info->ht.width_40_ok, info->ht.operation_mode);
+		       info->ht.operation_mode);
 	}
 
 	if (changed & BSS_CHANGED_BASIC_RATES) {
diff --git a/include/linux/nl80211.h b/include/linux/nl80211.h
index 7501acfcfdc4..e86ed59f9ad5 100644
--- a/include/linux/nl80211.h
+++ b/include/linux/nl80211.h
@@ -201,13 +201,13 @@ enum nl80211_commands {
  * @NL80211_ATTR_WIPHY_NAME: wiphy name (used for renaming)
  * @NL80211_ATTR_WIPHY_TXQ_PARAMS: a nested array of TX queue parameters
  * @NL80211_ATTR_WIPHY_FREQ: frequency of the selected channel in MHz
- * @NL80211_ATTR_WIPHY_SEC_CHAN_OFFSET: included with NL80211_ATTR_WIPHY_FREQ
+ * @NL80211_ATTR_WIPHY_CHANNEL_TYPE: included with NL80211_ATTR_WIPHY_FREQ
  *	if HT20 or HT40 are allowed (i.e., 802.11n disabled if not included):
- *	NL80211_SEC_CHAN_NO_HT = HT not allowed (i.e., same as not including
+ *	NL80211_CHAN_NO_HT = HT not allowed (i.e., same as not including
  *		this attribute)
- *	NL80211_SEC_CHAN_DISABLED = HT20 only
- *	NL80211_SEC_CHAN_BELOW = secondary channel is below the primary channel
- *	NL80211_SEC_CHAN_ABOVE = secondary channel is above the primary channel
+ *	NL80211_CHAN_HT20 = HT20 only
+ *	NL80211_CHAN_HT40MINUS = secondary channel is below the primary channel
+ *	NL80211_CHAN_HT40PLUS = secondary channel is above the primary channel
  *
  * @NL80211_ATTR_IFINDEX: network interface index of the device to operate on
  * @NL80211_ATTR_IFNAME: network interface name
@@ -344,7 +344,7 @@ enum nl80211_attrs {
 
 	NL80211_ATTR_WIPHY_TXQ_PARAMS,
 	NL80211_ATTR_WIPHY_FREQ,
-	NL80211_ATTR_WIPHY_SEC_CHAN_OFFSET,
+	NL80211_ATTR_WIPHY_CHANNEL_TYPE,
 
 	/* add attributes here, update the policy in nl80211.c */
 
@@ -805,10 +805,10 @@ enum nl80211_txq_q {
 	NL80211_TXQ_Q_BK
 };
 
-enum nl80211_sec_chan_offset {
-	NL80211_SEC_CHAN_NO_HT /* No HT */,
-	NL80211_SEC_CHAN_DISABLED /* HT20 only */,
-	NL80211_SEC_CHAN_BELOW /* HT40- */,
-	NL80211_SEC_CHAN_ABOVE /* HT40+ */
+enum nl80211_channel_type {
+	NL80211_CHAN_NO_HT,
+	NL80211_CHAN_HT20,
+	NL80211_CHAN_HT40MINUS,
+	NL80211_CHAN_HT40PLUS
 };
 #endif /* __LINUX_NL80211_H */
diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h
index 65e03ac93109..23c0ab74ded6 100644
--- a/include/net/cfg80211.h
+++ b/include/net/cfg80211.h
@@ -563,7 +563,7 @@ struct cfg80211_ops {
 
 	int	(*set_channel)(struct wiphy *wiphy,
 			       struct ieee80211_channel *chan,
-			       enum nl80211_sec_chan_offset);
+			       enum nl80211_channel_type channel_type);
 };
 
 /* temporary wext handlers */
diff --git a/include/net/mac80211.h b/include/net/mac80211.h
index 046ce692a906..22ae72c58d76 100644
--- a/include/net/mac80211.h
+++ b/include/net/mac80211.h
@@ -165,14 +165,9 @@ enum ieee80211_bss_change {
 
 /**
  * struct ieee80211_bss_ht_conf - BSS's changing HT configuration
- * @secondary_channel_offset: secondary channel offset, uses
- *	%IEEE80211_HT_PARAM_CHA_SEC_ values
- * @width_40_ok: indicates that 40 MHz bandwidth may be used for TX
  * @operation_mode: HT operation mode (like in &struct ieee80211_ht_info)
  */
 struct ieee80211_bss_ht_conf {
-	u8 secondary_channel_offset;
-	bool width_40_ok;
 	u16 operation_mode;
 };
 
@@ -508,9 +503,7 @@ static inline int __deprecated __IEEE80211_CONF_SHORT_SLOT_TIME(void)
 
 struct ieee80211_ht_conf {
 	bool enabled;
-	int sec_chan_offset; /* 0 = HT40 disabled; -1 = HT40 enabled, secondary
-			      * channel below primary; 1 = HT40 enabled,
-			      * secondary channel above primary */
+	enum nl80211_channel_type channel_type;
 };
 
 /**
diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c
index 23b5eeaf7bc5..3ea0884c9432 100644
--- a/net/mac80211/cfg.c
+++ b/net/mac80211/cfg.c
@@ -1122,12 +1122,12 @@ static int ieee80211_set_txq_params(struct wiphy *wiphy,
 
 static int ieee80211_set_channel(struct wiphy *wiphy,
 				 struct ieee80211_channel *chan,
-				 enum nl80211_sec_chan_offset sec_chan_offset)
+				 enum nl80211_channel_type channel_type)
 {
 	struct ieee80211_local *local = wiphy_priv(wiphy);
 
 	local->oper_channel = chan;
-	local->oper_sec_chan_offset = sec_chan_offset;
+	local->oper_channel_type = channel_type;
 
 	return ieee80211_hw_config(local, IEEE80211_CONF_CHANGE_CHANNEL);
 }
diff --git a/net/mac80211/ht.c b/net/mac80211/ht.c
index a1eed7032c9b..5f510a13b9f0 100644
--- a/net/mac80211/ht.c
+++ b/net/mac80211/ht.c
@@ -98,6 +98,7 @@ u32 ieee80211_enable_ht(struct ieee80211_sub_if_data *sdata,
 	struct ieee80211_bss_ht_conf ht;
 	u32 changed = 0;
 	bool enable_ht = true, ht_changed;
+	enum nl80211_channel_type channel_type = NL80211_CHAN_NO_HT;
 
 	sband = local->hw.wiphy->bands[local->hw.conf.channel->band];
 
@@ -112,24 +113,36 @@ u32 ieee80211_enable_ht(struct ieee80211_sub_if_data *sdata,
 	    ieee80211_channel_to_frequency(hti->control_chan))
 		enable_ht = false;
 
-	/*
-	 * XXX: This is totally incorrect when there are multiple virtual
-	 *	interfaces, needs to be fixed later.
-	 */
-	ht_changed = local->hw.conf.ht.enabled != enable_ht;
+	if (enable_ht) {
+		channel_type = NL80211_CHAN_HT20;
+
+		if (!(ap_ht_cap_flags & IEEE80211_HT_CAP_40MHZ_INTOLERANT) &&
+		    (sband->ht_cap.cap & IEEE80211_HT_CAP_SUP_WIDTH_20_40) &&
+		    (hti->ht_param & IEEE80211_HT_PARAM_CHAN_WIDTH_ANY)) {
+			switch(hti->ht_param & IEEE80211_HT_PARAM_CHA_SEC_OFFSET) {
+			case IEEE80211_HT_PARAM_CHA_SEC_ABOVE:
+				channel_type = NL80211_CHAN_HT40PLUS;
+				break;
+			case IEEE80211_HT_PARAM_CHA_SEC_BELOW:
+				channel_type = NL80211_CHAN_HT40MINUS;
+				break;
+			}
+		}
+	}
+
+	ht_changed = local->hw.conf.ht.enabled != enable_ht ||
+		     channel_type != local->hw.conf.ht.channel_type;
+
+	local->oper_channel_type = channel_type;
 	local->hw.conf.ht.enabled = enable_ht;
+
 	if (ht_changed)
 		ieee80211_hw_config(local, IEEE80211_CONF_CHANGE_HT);
 
 	/* disable HT */
 	if (!enable_ht)
 		return 0;
-	ht.secondary_channel_offset =
-		hti->ht_param & IEEE80211_HT_PARAM_CHA_SEC_OFFSET;
-	ht.width_40_ok =
-		!(ap_ht_cap_flags & IEEE80211_HT_CAP_40MHZ_INTOLERANT) &&
-		(sband->ht_cap.cap & IEEE80211_HT_CAP_SUP_WIDTH_20_40) &&
-		(hti->ht_param & IEEE80211_HT_PARAM_CHAN_WIDTH_ANY);
+
 	ht.operation_mode = le16_to_cpu(hti->operation_mode);
 
 	/* if bss configuration changed store the new one */
diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h
index 6f59e11d7b33..a7dabaecfc72 100644
--- a/net/mac80211/ieee80211_i.h
+++ b/net/mac80211/ieee80211_i.h
@@ -625,7 +625,7 @@ struct ieee80211_local {
 	struct delayed_work scan_work;
 	struct ieee80211_sub_if_data *scan_sdata;
 	struct ieee80211_channel *oper_channel, *scan_channel;
-	enum nl80211_sec_chan_offset oper_sec_chan_offset;
+	enum nl80211_channel_type oper_channel_type;
 	u8 scan_ssid[IEEE80211_MAX_SSID_LEN];
 	size_t scan_ssid_len;
 	struct list_head bss_list;
diff --git a/net/mac80211/main.c b/net/mac80211/main.c
index 6d8710327d14..a0371caf01ce 100644
--- a/net/mac80211/main.c
+++ b/net/mac80211/main.c
@@ -195,37 +195,30 @@ int ieee80211_hw_config(struct ieee80211_local *local, u32 changed)
 	struct ieee80211_channel *chan;
 	int ret = 0;
 	int power;
-	enum nl80211_sec_chan_offset sec_chan_offset;
+	enum nl80211_channel_type channel_type;
 
 	might_sleep();
 
 	if (local->sw_scanning) {
 		chan = local->scan_channel;
-		sec_chan_offset = NL80211_SEC_CHAN_NO_HT;
+		channel_type = NL80211_CHAN_NO_HT;
 	} else {
 		chan = local->oper_channel;
-		sec_chan_offset = local->oper_sec_chan_offset;
+		channel_type = local->oper_channel_type;
 	}
 
 	if (chan != local->hw.conf.channel ||
-	    sec_chan_offset != local->hw.conf.ht.sec_chan_offset) {
+	    channel_type != local->hw.conf.ht.channel_type) {
 		local->hw.conf.channel = chan;
-		switch (sec_chan_offset) {
-		case NL80211_SEC_CHAN_NO_HT:
+		local->hw.conf.ht.channel_type = channel_type;
+		switch (channel_type) {
+		case NL80211_CHAN_NO_HT:
 			local->hw.conf.ht.enabled = false;
-			local->hw.conf.ht.sec_chan_offset = 0;
 			break;
-		case NL80211_SEC_CHAN_DISABLED:
+		case NL80211_CHAN_HT20:
+		case NL80211_CHAN_HT40MINUS:
+		case NL80211_CHAN_HT40PLUS:
 			local->hw.conf.ht.enabled = true;
-			local->hw.conf.ht.sec_chan_offset = 0;
-			break;
-		case NL80211_SEC_CHAN_BELOW:
-			local->hw.conf.ht.enabled = true;
-			local->hw.conf.ht.sec_chan_offset = -1;
-			break;
-		case NL80211_SEC_CHAN_ABOVE:
-			local->hw.conf.ht.enabled = true;
-			local->hw.conf.ht.sec_chan_offset = 1;
 			break;
 		}
 		changed |= IEEE80211_CONF_CHANGE_CHANNEL;
diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c
index 290b0017ef2e..e4d1fca5c72d 100644
--- a/net/mac80211/mlme.c
+++ b/net/mac80211/mlme.c
@@ -858,6 +858,7 @@ static void ieee80211_set_disassoc(struct ieee80211_sub_if_data *sdata,
 	rcu_read_unlock();
 
 	local->hw.conf.ht.enabled = false;
+	local->oper_channel_type = NL80211_CHAN_NO_HT;
 	ieee80211_hw_config(local, IEEE80211_CONF_CHANGE_HT);
 
 	ieee80211_bss_info_change_notify(sdata, changed);
diff --git a/net/mac80211/util.c b/net/mac80211/util.c
index 505d68f344ce..71a8391c54f6 100644
--- a/net/mac80211/util.c
+++ b/net/mac80211/util.c
@@ -641,7 +641,7 @@ int ieee80211_set_freq(struct ieee80211_sub_if_data *sdata, int freqMHz)
 		    chan->flags & IEEE80211_CHAN_NO_IBSS)
 			return ret;
 		local->oper_channel = chan;
-		local->oper_sec_chan_offset = NL80211_SEC_CHAN_NO_HT;
+		local->oper_channel_type = NL80211_CHAN_NO_HT;
 
 		if (local->sw_scanning || local->hw_scanning)
 			ret = 0;
diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c
index 93c9b983ce08..1e728fff474e 100644
--- a/net/wireless/nl80211.c
+++ b/net/wireless/nl80211.c
@@ -60,7 +60,7 @@ static struct nla_policy nl80211_policy[NL80211_ATTR_MAX+1] __read_mostly = {
 				      .len = BUS_ID_SIZE-1 },
 	[NL80211_ATTR_WIPHY_TXQ_PARAMS] = { .type = NLA_NESTED },
 	[NL80211_ATTR_WIPHY_FREQ] = { .type = NLA_U32 },
-	[NL80211_ATTR_WIPHY_SEC_CHAN_OFFSET] = { .type = NLA_U32 },
+	[NL80211_ATTR_WIPHY_CHANNEL_TYPE] = { .type = NLA_U32 },
 
 	[NL80211_ATTR_IFTYPE] = { .type = NLA_U32 },
 	[NL80211_ATTR_IFINDEX] = { .type = NLA_U32 },
@@ -362,8 +362,7 @@ static int nl80211_set_wiphy(struct sk_buff *skb, struct genl_info *info)
 	}
 
 	if (info->attrs[NL80211_ATTR_WIPHY_FREQ]) {
-		enum nl80211_sec_chan_offset sec_chan_offset =
-			NL80211_SEC_CHAN_NO_HT;
+		enum nl80211_channel_type channel_type = NL80211_CHAN_NO_HT;
 		struct ieee80211_channel *chan;
 		struct ieee80211_sta_ht_cap *ht_cap;
 		u32 freq, sec_freq;
@@ -375,13 +374,13 @@ static int nl80211_set_wiphy(struct sk_buff *skb, struct genl_info *info)
 
 		result = -EINVAL;
 
-		if (info->attrs[NL80211_ATTR_WIPHY_SEC_CHAN_OFFSET]) {
-			sec_chan_offset = nla_get_u32(info->attrs[
-					NL80211_ATTR_WIPHY_SEC_CHAN_OFFSET]);
-			if (sec_chan_offset != NL80211_SEC_CHAN_NO_HT &&
-			    sec_chan_offset != NL80211_SEC_CHAN_DISABLED &&
-			    sec_chan_offset != NL80211_SEC_CHAN_BELOW &&
-			    sec_chan_offset != NL80211_SEC_CHAN_ABOVE)
+		if (info->attrs[NL80211_ATTR_WIPHY_CHANNEL_TYPE]) {
+			channel_type = nla_get_u32(info->attrs[
+					   NL80211_ATTR_WIPHY_CHANNEL_TYPE]);
+			if (channel_type != NL80211_CHAN_NO_HT &&
+			    channel_type != NL80211_CHAN_HT20 &&
+			    channel_type != NL80211_CHAN_HT40PLUS &&
+			    channel_type != NL80211_CHAN_HT40MINUS)
 				goto bad_res;
 		}
 
@@ -392,9 +391,9 @@ static int nl80211_set_wiphy(struct sk_buff *skb, struct genl_info *info)
 		if (!chan || chan->flags & IEEE80211_CHAN_DISABLED)
 			goto bad_res;
 
-		if (sec_chan_offset == NL80211_SEC_CHAN_BELOW)
+		if (channel_type == NL80211_CHAN_HT40MINUS)
 			sec_freq = freq - 20;
-		else if (sec_chan_offset == NL80211_SEC_CHAN_ABOVE)
+		else if (channel_type == NL80211_CHAN_HT40PLUS)
 			sec_freq = freq + 20;
 		else
 			sec_freq = 0;
@@ -402,7 +401,7 @@ static int nl80211_set_wiphy(struct sk_buff *skb, struct genl_info *info)
 		ht_cap = &rdev->wiphy.bands[chan->band]->ht_cap;
 
 		/* no HT capabilities */
-		if (sec_chan_offset != NL80211_SEC_CHAN_NO_HT &&
+		if (channel_type != NL80211_CHAN_NO_HT &&
 		    !ht_cap->ht_supported)
 			goto bad_res;
 
@@ -422,7 +421,7 @@ static int nl80211_set_wiphy(struct sk_buff *skb, struct genl_info *info)
 		}
 
 		result = rdev->ops->set_channel(&rdev->wiphy, chan,
-						sec_chan_offset);
+						channel_type);
 		if (result)
 			goto bad_res;
 	}
-- 
cgit v1.2.3


From 0fb8ca45eb164c405eef8978f26829f9348b4d4d Mon Sep 17 00:00:00 2001
From: Jouni Malinen <jouni.malinen@atheros.com>
Date: Fri, 12 Dec 2008 14:38:33 +0200
Subject: mac80211: Add HT rates into RX status reporting

This patch adds option for HT-enabled drivers to report HT rates
(HT20/HT40, short GI, MCS index) to mac80211. These rates are
currently not in the rate table, so the rate_idx is used to indicate
MCS index.

Signed-off-by: Jouni Malinen <jouni.malinen@atheros.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 include/net/mac80211.h | 11 ++++++++--
 net/mac80211/mlme.c    |  7 ++++++-
 net/mac80211/rx.c      | 56 +++++++++++++++++++++++++++++++++++++++++---------
 3 files changed, 61 insertions(+), 13 deletions(-)

(limited to 'include')

diff --git a/include/net/mac80211.h b/include/net/mac80211.h
index 22ae72c58d76..9428d3e2f113 100644
--- a/include/net/mac80211.h
+++ b/include/net/mac80211.h
@@ -436,6 +436,9 @@ ieee80211_tx_info_clear_status(struct ieee80211_tx_info *info)
  *	is valid. This is useful in monitor mode and necessary for beacon frames
  *	to enable IBSS merging.
  * @RX_FLAG_SHORTPRE: Short preamble was used for this frame
+ * @RX_FLAG_HT: HT MCS was used and rate_idx is MCS index
+ * @RX_FLAG_40MHZ: HT40 (40 MHz) was used
+ * @RX_FLAG_SHORT_GI: Short guard interval was used
  */
 enum mac80211_rx_flags {
 	RX_FLAG_MMIC_ERROR	= 1<<0,
@@ -446,7 +449,10 @@ enum mac80211_rx_flags {
 	RX_FLAG_FAILED_FCS_CRC	= 1<<5,
 	RX_FLAG_FAILED_PLCP_CRC = 1<<6,
 	RX_FLAG_TSFT		= 1<<7,
-	RX_FLAG_SHORTPRE	= 1<<8
+	RX_FLAG_SHORTPRE	= 1<<8,
+	RX_FLAG_HT		= 1<<9,
+	RX_FLAG_40MHZ		= 1<<10,
+	RX_FLAG_SHORT_GI	= 1<<11,
 };
 
 /**
@@ -466,7 +472,8 @@ enum mac80211_rx_flags {
  * @noise: noise when receiving this frame, in dBm.
  * @qual: overall signal quality indication, in percent (0-100).
  * @antenna: antenna used
- * @rate_idx: index of data rate into band's supported rates
+ * @rate_idx: index of data rate into band's supported rates or MCS index if
+ *	HT rates are use (RX_FLAG_HT)
  * @flag: %RX_FLAG_*
  */
 struct ieee80211_rx_status {
diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c
index e4d1fca5c72d..a444b38f4901 100644
--- a/net/mac80211/mlme.c
+++ b/net/mac80211/mlme.c
@@ -1613,8 +1613,13 @@ static void ieee80211_rx_bss_info(struct ieee80211_sub_if_data *sdata,
 			 * e.g: at 1 MBit that means mactime is 192 usec earlier
 			 * (=24 bytes * 8 usecs/byte) than the beacon timestamp.
 			 */
-			int rate = local->hw.wiphy->bands[band]->
+			int rate;
+			if (rx_status->flag & RX_FLAG_HT) {
+				rate = 65; /* TODO: HT rates */
+			} else {
+				rate = local->hw.wiphy->bands[band]->
 					bitrates[rx_status->rate_idx].bitrate;
+			}
 			rx_timestamp = rx_status->mactime + (24 * 8 * 10 / rate);
 		} else if (local && local->ops && local->ops->get_tsf)
 			/* second best option: get current TSF */
diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c
index 73cf126cef49..b729c005a2b3 100644
--- a/net/mac80211/rx.c
+++ b/net/mac80211/rx.c
@@ -149,7 +149,17 @@ ieee80211_add_rx_radiotap_header(struct ieee80211_local *local,
 	pos++;
 
 	/* IEEE80211_RADIOTAP_RATE */
-	*pos = rate->bitrate / 5;
+	if (status->flag & RX_FLAG_HT) {
+		/*
+		 * TODO: add following information into radiotap header once
+		 * suitable fields are defined for it:
+		 * - MCS index (status->rate_idx)
+		 * - HT40 (status->flag & RX_FLAG_40MHZ)
+		 * - short-GI (status->flag & RX_FLAG_SHORT_GI)
+		 */
+		*pos = 0;
+	} else
+		*pos = rate->bitrate / 5;
 	pos++;
 
 	/* IEEE80211_RADIOTAP_CHANNEL */
@@ -1849,9 +1859,15 @@ static int prepare_for_handlers(struct ieee80211_sub_if_data *sdata,
 			if (!(sdata->dev->flags & IFF_PROMISC))
 				return 0;
 			rx->flags &= ~IEEE80211_RX_RA_MATCH;
-		} else if (!rx->sta)
+		} else if (!rx->sta) {
+			int rate_idx;
+			if (rx->status->flag & RX_FLAG_HT)
+				rate_idx = 0; /* TODO: HT rates */
+			else
+				rate_idx = rx->status->rate_idx;
 			rx->sta = ieee80211_ibss_add_sta(sdata, bssid, hdr->addr2,
-				BIT(rx->status->rate_idx));
+				BIT(rate_idx));
+		}
 		break;
 	case NL80211_IFTYPE_MESH_POINT:
 		if (!multicast &&
@@ -2057,7 +2073,13 @@ static u8 ieee80211_sta_manage_reorder_buf(struct ieee80211_hw *hw,
 					tid_agg_rx->reorder_buf[index]->cb,
 					sizeof(status));
 				sband = local->hw.wiphy->bands[status.band];
-				rate = &sband->bitrates[status.rate_idx];
+				if (status.flag & RX_FLAG_HT) {
+					/* TODO: HT rates */
+					rate = sband->bitrates;
+				} else {
+					rate = &sband->bitrates
+						[status.rate_idx];
+				}
 				__ieee80211_rx_handle_packet(hw,
 					tid_agg_rx->reorder_buf[index],
 					&status, rate);
@@ -2101,7 +2123,10 @@ static u8 ieee80211_sta_manage_reorder_buf(struct ieee80211_hw *hw,
 		memcpy(&status, tid_agg_rx->reorder_buf[index]->cb,
 			sizeof(status));
 		sband = local->hw.wiphy->bands[status.band];
-		rate = &sband->bitrates[status.rate_idx];
+		if (status.flag & RX_FLAG_HT)
+			rate = sband->bitrates; /* TODO: HT rates */
+		else
+			rate = &sband->bitrates[status.rate_idx];
 		__ieee80211_rx_handle_packet(hw, tid_agg_rx->reorder_buf[index],
 					     &status, rate);
 		tid_agg_rx->stored_mpdu_num--;
@@ -2189,15 +2214,26 @@ void __ieee80211_rx(struct ieee80211_hw *hw, struct sk_buff *skb,
 	}
 
 	sband = local->hw.wiphy->bands[status->band];
-
-	if (!sband ||
-	    status->rate_idx < 0 ||
-	    status->rate_idx >= sband->n_bitrates) {
+	if (!sband) {
 		WARN_ON(1);
 		return;
 	}
 
-	rate = &sband->bitrates[status->rate_idx];
+	if (status->flag & RX_FLAG_HT) {
+		/* rate_idx is MCS index */
+		if (WARN_ON(status->rate_idx < 0 ||
+			    status->rate_idx >= 76))
+			return;
+		/* HT rates are not in the table - use the highest legacy rate
+		 * for now since other parts of mac80211 may not yet be fully
+		 * MCS aware. */
+		rate = &sband->bitrates[sband->n_bitrates - 1];
+	} else {
+		if (WARN_ON(status->rate_idx < 0 ||
+			    status->rate_idx >= sband->n_bitrates))
+			return;
+		rate = &sband->bitrates[status->rate_idx];
+	}
 
 	/*
 	 * key references and virtual interfaces are protected using RCU
-- 
cgit v1.2.3


From 520eb82076993b7f55ef9b80771d264272e5127b Mon Sep 17 00:00:00 2001
From: Kalle Valo <kalle.valo@nokia.com>
Date: Thu, 18 Dec 2008 23:35:27 +0200
Subject: mac80211: implement dynamic power save

This patch implements dynamic power save for mac80211. Basically it
means enabling power save mode after an idle period. Implementing it
dynamically gives a good compromise of low power consumption and low
latency. Some hardware have support for this in firmware, but some
require the host to do it.

The dynamic power save is implemented by adding an timeout to
ieee80211_subif_start_xmit(). The timeout can be enabled from userspace
with Wireless Extensions. For example, the command below enables the
dynamic power save and sets the time timeout to 500 ms:

iwconfig wlan0 power timeout 500m

Power save now only works with devices which handle power save in firmware.
It's also disabled by default and the heuristics when and how to enable is
considered as a policy decision and will be left for the userspace to handle.
In case the firmware has support for this, drivers can disable this feature
with IEEE80211_HW_NO_STACK_DYNAMIC_PS.

Big thanks to Johannes Berg for the help with the design and code.

Signed-off-by: Kalle Valo <kalle.valo@nokia.com>
Acked-by: Johannes Berg <johannes@sipsolutions.net>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 include/net/mac80211.h     |  6 ++++++
 net/mac80211/ieee80211_i.h | 10 ++++++++++
 net/mac80211/main.c        |  7 +++++++
 net/mac80211/mlme.c        | 49 ++++++++++++++++++++++++++++++++++++++++++++--
 net/mac80211/tx.c          | 13 ++++++++++++
 net/mac80211/wext.c        | 30 ++++++++++++++++++----------
 6 files changed, 103 insertions(+), 12 deletions(-)

(limited to 'include')

diff --git a/include/net/mac80211.h b/include/net/mac80211.h
index 9428d3e2f113..b3bd00a9d992 100644
--- a/include/net/mac80211.h
+++ b/include/net/mac80211.h
@@ -854,6 +854,11 @@ enum ieee80211_tkip_key_type {
  *
  * @IEEE80211_HW_AMPDU_AGGREGATION:
  *	Hardware supports 11n A-MPDU aggregation.
+ *
+ * @IEEE80211_HW_NO_STACK_DYNAMIC_PS:
+ *	Hardware which has dynamic power save support, meaning
+ *	that power save is enabled in idle periods, and don't need support
+ *	from stack.
  */
 enum ieee80211_hw_flags {
 	IEEE80211_HW_RX_INCLUDES_FCS			= 1<<1,
@@ -866,6 +871,7 @@ enum ieee80211_hw_flags {
 	IEEE80211_HW_NOISE_DBM				= 1<<8,
 	IEEE80211_HW_SPECTRUM_MGMT			= 1<<9,
 	IEEE80211_HW_AMPDU_AGGREGATION			= 1<<10,
+	IEEE80211_HW_NO_STACK_DYNAMIC_PS		= 1<<11,
 };
 
 /**
diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h
index a74d6738b30a..f3eec989662b 100644
--- a/net/mac80211/ieee80211_i.h
+++ b/net/mac80211/ieee80211_i.h
@@ -540,6 +540,7 @@ enum {
 
 enum queue_stop_reason {
 	IEEE80211_QUEUE_STOP_REASON_DRIVER,
+	IEEE80211_QUEUE_STOP_REASON_PS,
 };
 
 /* maximum number of hardware queues we support. */
@@ -693,7 +694,12 @@ struct ieee80211_local {
 				*/
 	int wifi_wme_noack_test;
 	unsigned int wmm_acm; /* bit field of ACM bits (BIT(802.1D tag)) */
+
 	bool powersave;
+	int dynamic_ps_timeout;
+	struct work_struct dynamic_ps_enable_work;
+	struct work_struct dynamic_ps_disable_work;
+	struct timer_list dynamic_ps_timer;
 
 #ifdef CONFIG_MAC80211_DEBUGFS
 	struct local_debugfsdentries {
@@ -977,6 +983,10 @@ int ieee80211_set_freq(struct ieee80211_sub_if_data *sdata, int freq);
 u64 ieee80211_mandatory_rates(struct ieee80211_local *local,
 			      enum ieee80211_band band);
 
+void ieee80211_dynamic_ps_enable_work(struct work_struct *work);
+void ieee80211_dynamic_ps_disable_work(struct work_struct *work);
+void ieee80211_dynamic_ps_timer(unsigned long data);
+
 void ieee80211_wake_queues_by_reason(struct ieee80211_hw *hw,
 				     enum queue_stop_reason reason);
 void ieee80211_stop_queues_by_reason(struct ieee80211_hw *hw,
diff --git a/net/mac80211/main.c b/net/mac80211/main.c
index 21335382f530..24b14363d6e7 100644
--- a/net/mac80211/main.c
+++ b/net/mac80211/main.c
@@ -729,6 +729,13 @@ struct ieee80211_hw *ieee80211_alloc_hw(size_t priv_data_len,
 
 	INIT_DELAYED_WORK(&local->scan_work, ieee80211_scan_work);
 
+	INIT_WORK(&local->dynamic_ps_enable_work,
+		  ieee80211_dynamic_ps_enable_work);
+	INIT_WORK(&local->dynamic_ps_disable_work,
+		  ieee80211_dynamic_ps_disable_work);
+	setup_timer(&local->dynamic_ps_timer,
+		    ieee80211_dynamic_ps_timer, (unsigned long) local);
+
 	sta_info_init(local);
 
 	tasklet_init(&local->tx_pending_tasklet, ieee80211_tx_pending,
diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c
index dac8bd37dcf5..5ba721b6a399 100644
--- a/net/mac80211/mlme.c
+++ b/net/mac80211/mlme.c
@@ -745,8 +745,14 @@ static void ieee80211_set_associated(struct ieee80211_sub_if_data *sdata,
 	ieee80211_bss_info_change_notify(sdata, bss_info_changed);
 
 	if (local->powersave) {
-		local->hw.conf.flags |= IEEE80211_CONF_PS;
-		ieee80211_hw_config(local, IEEE80211_CONF_CHANGE_PS);
+		if (local->dynamic_ps_timeout > 0)
+			mod_timer(&local->dynamic_ps_timer, jiffies +
+				  msecs_to_jiffies(local->dynamic_ps_timeout));
+		else {
+			conf->flags |= IEEE80211_CONF_PS;
+			ieee80211_hw_config(local,
+					    IEEE80211_CONF_CHANGE_PS);
+		}
 	}
 
 	netif_tx_start_all_queues(sdata->dev);
@@ -866,6 +872,9 @@ static void ieee80211_set_disassoc(struct ieee80211_sub_if_data *sdata,
 	local->oper_channel_type = NL80211_CHAN_NO_HT;
 	config_changed |= IEEE80211_CONF_CHANGE_HT;
 
+	del_timer_sync(&local->dynamic_ps_timer);
+	cancel_work_sync(&local->dynamic_ps_enable_work);
+
 	if (local->hw.conf.flags & IEEE80211_CONF_PS) {
 		local->hw.conf.flags &= ~IEEE80211_CONF_PS;
 		config_changed |= IEEE80211_CONF_CHANGE_PS;
@@ -2593,3 +2602,39 @@ void ieee80211_mlme_notify_scan_completed(struct ieee80211_local *local)
 		ieee80211_restart_sta_timer(sdata);
 	rcu_read_unlock();
 }
+
+void ieee80211_dynamic_ps_disable_work(struct work_struct *work)
+{
+	struct ieee80211_local *local =
+		container_of(work, struct ieee80211_local,
+			     dynamic_ps_disable_work);
+
+	if (local->hw.conf.flags & IEEE80211_CONF_PS) {
+		local->hw.conf.flags &= ~IEEE80211_CONF_PS;
+		ieee80211_hw_config(local, IEEE80211_CONF_CHANGE_PS);
+	}
+
+	ieee80211_wake_queues_by_reason(&local->hw,
+					IEEE80211_QUEUE_STOP_REASON_PS);
+}
+
+void ieee80211_dynamic_ps_enable_work(struct work_struct *work)
+{
+	struct ieee80211_local *local =
+		container_of(work, struct ieee80211_local,
+			     dynamic_ps_enable_work);
+
+	if (local->hw.conf.flags & IEEE80211_CONF_PS)
+		return;
+
+	local->hw.conf.flags |= IEEE80211_CONF_PS;
+
+	ieee80211_hw_config(local, IEEE80211_CONF_CHANGE_PS);
+}
+
+void ieee80211_dynamic_ps_timer(unsigned long data)
+{
+	struct ieee80211_local *local = (void *) data;
+
+	queue_work(local->hw.workqueue, &local->dynamic_ps_enable_work);
+}
diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c
index b098c58d216f..a4af3a124cce 100644
--- a/net/mac80211/tx.c
+++ b/net/mac80211/tx.c
@@ -1473,6 +1473,19 @@ int ieee80211_subif_start_xmit(struct sk_buff *skb,
 		goto fail;
 	}
 
+	if (!(local->hw.flags & IEEE80211_HW_NO_STACK_DYNAMIC_PS) &&
+	    local->dynamic_ps_timeout > 0) {
+		if (local->hw.conf.flags & IEEE80211_CONF_PS) {
+			ieee80211_stop_queues_by_reason(&local->hw,
+							IEEE80211_QUEUE_STOP_REASON_PS);
+			queue_work(local->hw.workqueue,
+				   &local->dynamic_ps_disable_work);
+		}
+
+		mod_timer(&local->dynamic_ps_timer, jiffies +
+			  msecs_to_jiffies(local->dynamic_ps_timeout));
+	}
+
 	nh_pos = skb_network_header(skb) - skb->data;
 	h_pos = skb_transport_header(skb) - skb->data;
 
diff --git a/net/mac80211/wext.c b/net/mac80211/wext.c
index f6640d047157..7162d5816f39 100644
--- a/net/mac80211/wext.c
+++ b/net/mac80211/wext.c
@@ -833,7 +833,7 @@ static int ieee80211_ioctl_siwpower(struct net_device *dev,
 	struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev);
 	struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr);
 	struct ieee80211_conf *conf = &local->hw.conf;
-	int ret = 0;
+	int ret = 0, timeout = 0;
 	bool ps;
 
 	if (sdata->vif.type != NL80211_IFTYPE_STATION)
@@ -841,6 +841,7 @@ static int ieee80211_ioctl_siwpower(struct net_device *dev,
 
 	if (wrq->disabled) {
 		ps = false;
+		timeout = 0;
 		goto set;
 	}
 
@@ -850,22 +851,31 @@ static int ieee80211_ioctl_siwpower(struct net_device *dev,
 	case IW_POWER_ALL_R:    /* If explicitely state all */
 		ps = true;
 		break;
-	default:                /* Otherwise we don't support it */
-		return -EINVAL;
+	default:                /* Otherwise we ignore */
+		break;
 	}
 
-	if (ps == local->powersave)
-		return ret;
+	if (wrq->flags & IW_POWER_TIMEOUT)
+		timeout = wrq->value / 1000;
 
 set:
+	if (ps == local->powersave && timeout == local->dynamic_ps_timeout)
+		return ret;
+
 	local->powersave = ps;
+	local->dynamic_ps_timeout = timeout;
 
 	if (sdata->u.sta.flags & IEEE80211_STA_ASSOCIATED) {
-		if (local->powersave)
-			conf->flags |= IEEE80211_CONF_PS;
-		else
-			conf->flags &= ~IEEE80211_CONF_PS;
-
+		if (!(local->hw.flags & IEEE80211_HW_NO_STACK_DYNAMIC_PS) &&
+		    local->dynamic_ps_timeout > 0)
+			mod_timer(&local->dynamic_ps_timer, jiffies +
+				  msecs_to_jiffies(local->dynamic_ps_timeout));
+		else {
+			if (local->powersave)
+				conf->flags |= IEEE80211_CONF_PS;
+			else
+				conf->flags &= ~IEEE80211_CONF_PS;
+		}
 		ret = ieee80211_hw_config(local, IEEE80211_CONF_CHANGE_PS);
 	}
 
-- 
cgit v1.2.3


From 12204e24b1330428c3062faee10a0d80b8a5cb61 Mon Sep 17 00:00:00 2001
From: James Morris <jmorris@namei.org>
Date: Fri, 19 Dec 2008 10:44:42 +1100
Subject: security: pass mount flags to security_sb_kern_mount()

Pass mount flags to security_sb_kern_mount(), so security modules
can determine if a mount operation is being performed by the kernel.

Signed-off-by: James Morris <jmorris@namei.org>
Acked-by: Stephen Smalley <sds@tycho.nsa.gov>
---
 fs/super.c                 | 2 +-
 include/linux/security.h   | 6 +++---
 security/capability.c      | 2 +-
 security/security.c        | 4 ++--
 security/selinux/hooks.c   | 2 +-
 security/smack/smack_lsm.c | 3 ++-
 6 files changed, 10 insertions(+), 9 deletions(-)

(limited to 'include')

diff --git a/fs/super.c b/fs/super.c
index 400a7608f15e..ddba069d7a99 100644
--- a/fs/super.c
+++ b/fs/super.c
@@ -914,7 +914,7 @@ vfs_kern_mount(struct file_system_type *type, int flags, const char *name, void
 		goto out_free_secdata;
 	BUG_ON(!mnt->mnt_sb);
 
- 	error = security_sb_kern_mount(mnt->mnt_sb, secdata);
+ 	error = security_sb_kern_mount(mnt->mnt_sb, flags, secdata);
  	if (error)
  		goto out_sb;
 
diff --git a/include/linux/security.h b/include/linux/security.h
index 6423abf1ac0f..3416cb85e77b 100644
--- a/include/linux/security.h
+++ b/include/linux/security.h
@@ -1308,7 +1308,7 @@ struct security_operations {
 	int (*sb_alloc_security) (struct super_block *sb);
 	void (*sb_free_security) (struct super_block *sb);
 	int (*sb_copy_data) (char *orig, char *copy);
-	int (*sb_kern_mount) (struct super_block *sb, void *data);
+	int (*sb_kern_mount) (struct super_block *sb, int flags, void *data);
 	int (*sb_show_options) (struct seq_file *m, struct super_block *sb);
 	int (*sb_statfs) (struct dentry *dentry);
 	int (*sb_mount) (char *dev_name, struct path *path,
@@ -1575,7 +1575,7 @@ int security_bprm_secureexec(struct linux_binprm *bprm);
 int security_sb_alloc(struct super_block *sb);
 void security_sb_free(struct super_block *sb);
 int security_sb_copy_data(char *orig, char *copy);
-int security_sb_kern_mount(struct super_block *sb, void *data);
+int security_sb_kern_mount(struct super_block *sb, int flags, void *data);
 int security_sb_show_options(struct seq_file *m, struct super_block *sb);
 int security_sb_statfs(struct dentry *dentry);
 int security_sb_mount(char *dev_name, struct path *path,
@@ -1850,7 +1850,7 @@ static inline int security_sb_copy_data(char *orig, char *copy)
 	return 0;
 }
 
-static inline int security_sb_kern_mount(struct super_block *sb, void *data)
+static inline int security_sb_kern_mount(struct super_block *sb, int flags, void *data)
 {
 	return 0;
 }
diff --git a/security/capability.c b/security/capability.c
index b9e391425e6f..2dce66fcb992 100644
--- a/security/capability.c
+++ b/security/capability.c
@@ -59,7 +59,7 @@ static int cap_sb_copy_data(char *orig, char *copy)
 	return 0;
 }
 
-static int cap_sb_kern_mount(struct super_block *sb, void *data)
+static int cap_sb_kern_mount(struct super_block *sb, int flags, void *data)
 {
 	return 0;
 }
diff --git a/security/security.c b/security/security.c
index f0d96a6cc4e9..d85dbb37c972 100644
--- a/security/security.c
+++ b/security/security.c
@@ -254,9 +254,9 @@ int security_sb_copy_data(char *orig, char *copy)
 }
 EXPORT_SYMBOL(security_sb_copy_data);
 
-int security_sb_kern_mount(struct super_block *sb, void *data)
+int security_sb_kern_mount(struct super_block *sb, int flags, void *data)
 {
-	return security_ops->sb_kern_mount(sb, data);
+	return security_ops->sb_kern_mount(sb, flags, data);
 }
 
 int security_sb_show_options(struct seq_file *m, struct super_block *sb)
diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c
index 8dbc54cde59e..7465d713b531 100644
--- a/security/selinux/hooks.c
+++ b/security/selinux/hooks.c
@@ -2474,7 +2474,7 @@ out:
 	return rc;
 }
 
-static int selinux_sb_kern_mount(struct super_block *sb, void *data)
+static int selinux_sb_kern_mount(struct super_block *sb, int flags, void *data)
 {
 	const struct cred *cred = current_cred();
 	struct avc_audit_data ad;
diff --git a/security/smack/smack_lsm.c b/security/smack/smack_lsm.c
index 8ad48161cef5..1b5551dfc1f7 100644
--- a/security/smack/smack_lsm.c
+++ b/security/smack/smack_lsm.c
@@ -250,11 +250,12 @@ static int smack_sb_copy_data(char *orig, char *smackopts)
 /**
  * smack_sb_kern_mount - Smack specific mount processing
  * @sb: the file system superblock
+ * @flags: the mount flags
  * @data: the smack mount options
  *
  * Returns 0 on success, an error code on failure
  */
-static int smack_sb_kern_mount(struct super_block *sb, void *data)
+static int smack_sb_kern_mount(struct super_block *sb, int flags, void *data)
 {
 	struct dentry *root = sb->s_root;
 	struct inode *inode = root->d_inode;
-- 
cgit v1.2.3


From 6bd9cd50c830eb88d571c492ec370a30bf999e15 Mon Sep 17 00:00:00 2001
From: "venkatesh.pallipadi@intel.com" <venkatesh.pallipadi@intel.com>
Date: Fri, 19 Dec 2008 13:47:26 -0800
Subject: x86: PAT: clarify is_linear_pfn_mapping() interface

Impact: Documentation only

Incremental patches to address the review comments from Nick Piggin
for v3 version of x86 PAT pfnmap changes patchset here

http://lkml.indiana.edu/hypermail/linux/kernel/0812.2/01330.html

This patch:

Clarify is_linear_pfn_mapping() and its usage.

It is used by x86 PAT code for performance reasons. Identifying pfnmap
as linear over entire vma helps speedup reserve and free of memtype
for the region.

Signed-off-by: Venkatesh Pallipadi <venkatesh.pallipadi@intel.com>
Signed-off-by: Suresh Siddha <suresh.b.siddha@intel.com>
Signed-off-by: H. Peter Anvin <hpa@zytor.com>
---
 include/linux/mm.h | 8 ++++++++
 1 file changed, 8 insertions(+)

(limited to 'include')

diff --git a/include/linux/mm.h b/include/linux/mm.h
index 87ecb40e11a0..35f811b0cd69 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -145,6 +145,14 @@ extern pgprot_t protection_map[16];
 #define FAULT_FLAG_WRITE	0x01	/* Fault was a write access */
 #define FAULT_FLAG_NONLINEAR	0x02	/* Fault was via a nonlinear mapping */
 
+/*
+ * This interface is used by x86 PAT code to identify a pfn mapping that is
+ * linear over entire vma. This is to optimize PAT code that deals with
+ * marking the physical region with a particular prot. This is not for generic
+ * mm use. Note also that this check will not work if the pfn mapping is
+ * linear for a vma starting at physical address 0. In which case PAT code
+ * falls back to slow path of reserving physical range page by page.
+ */
 static inline int is_linear_pfn_mapping(struct vm_area_struct *vma)
 {
 	return ((vma->vm_flags & VM_PFNMAP) && vma->vm_pgoff);
-- 
cgit v1.2.3


From d87fe6607c31944f7572f965c1507ae77026c133 Mon Sep 17 00:00:00 2001
From: "venkatesh.pallipadi@intel.com" <venkatesh.pallipadi@intel.com>
Date: Fri, 19 Dec 2008 13:47:27 -0800
Subject: x86: PAT: modify follow_phys to return phys_addr prot and return
 value

Impact: Changes and globalizes an existing static interface.

Follow_phys does similar things as follow_pfnmap_pte. Make a minor change
to follow_phys so that it can be used in place of follow_pfnmap_pte.
Physical address return value with 0 as error return does not work in
follow_phys as the actual physical address 0 mapping may exist in pte.

Signed-off-by: Venkatesh Pallipadi <venkatesh.pallipadi@intel.com>
Signed-off-by: Suresh Siddha <suresh.b.siddha@intel.com>
Signed-off-by: H. Peter Anvin <hpa@zytor.com>
---
 include/linux/mm.h |  2 ++
 mm/memory.c        | 31 ++++++++++++++-----------------
 2 files changed, 16 insertions(+), 17 deletions(-)

(limited to 'include')

diff --git a/include/linux/mm.h b/include/linux/mm.h
index 35f811b0cd69..2f6e2f886d4b 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -804,6 +804,8 @@ int copy_page_range(struct mm_struct *dst, struct mm_struct *src,
 			struct vm_area_struct *vma);
 void unmap_mapping_range(struct address_space *mapping,
 		loff_t const holebegin, loff_t const holelen, int even_cows);
+int follow_phys(struct vm_area_struct *vma, unsigned long address,
+		unsigned int flags, unsigned long *prot, resource_size_t *phys);
 int generic_access_phys(struct vm_area_struct *vma, unsigned long addr,
 			void *buf, int len, int write);
 
diff --git a/mm/memory.c b/mm/memory.c
index 1e8f0d347c0e..79f28e35d4fc 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -2981,9 +2981,9 @@ int in_gate_area_no_task(unsigned long addr)
 #endif	/* __HAVE_ARCH_GATE_AREA */
 
 #ifdef CONFIG_HAVE_IOREMAP_PROT
-static resource_size_t follow_phys(struct vm_area_struct *vma,
-			unsigned long address, unsigned int flags,
-			unsigned long *prot)
+int follow_phys(struct vm_area_struct *vma,
+		unsigned long address, unsigned int flags,
+		unsigned long *prot, resource_size_t *phys)
 {
 	pgd_t *pgd;
 	pud_t *pud;
@@ -2992,24 +2992,26 @@ static resource_size_t follow_phys(struct vm_area_struct *vma,
 	spinlock_t *ptl;
 	resource_size_t phys_addr = 0;
 	struct mm_struct *mm = vma->vm_mm;
+	int ret = -EINVAL;
 
-	VM_BUG_ON(!(vma->vm_flags & (VM_IO | VM_PFNMAP)));
+	if (!(vma->vm_flags & (VM_IO | VM_PFNMAP)))
+		goto out;
 
 	pgd = pgd_offset(mm, address);
 	if (pgd_none(*pgd) || unlikely(pgd_bad(*pgd)))
-		goto no_page_table;
+		goto out;
 
 	pud = pud_offset(pgd, address);
 	if (pud_none(*pud) || unlikely(pud_bad(*pud)))
-		goto no_page_table;
+		goto out;
 
 	pmd = pmd_offset(pud, address);
 	if (pmd_none(*pmd) || unlikely(pmd_bad(*pmd)))
-		goto no_page_table;
+		goto out;
 
 	/* We cannot handle huge page PFN maps. Luckily they don't exist. */
 	if (pmd_huge(*pmd))
-		goto no_page_table;
+		goto out;
 
 	ptep = pte_offset_map_lock(mm, pmd, address, &ptl);
 	if (!ptep)
@@ -3024,13 +3026,13 @@ static resource_size_t follow_phys(struct vm_area_struct *vma,
 	phys_addr <<= PAGE_SHIFT; /* Shift here to avoid overflow on PAE */
 
 	*prot = pgprot_val(pte_pgprot(pte));
+	*phys = phys_addr;
+	ret = 0;
 
 unlock:
 	pte_unmap_unlock(ptep, ptl);
 out:
-	return phys_addr;
-no_page_table:
-	return 0;
+	return ret;
 }
 
 int generic_access_phys(struct vm_area_struct *vma, unsigned long addr,
@@ -3041,12 +3043,7 @@ int generic_access_phys(struct vm_area_struct *vma, unsigned long addr,
 	void *maddr;
 	int offset = addr & (PAGE_SIZE-1);
 
-	if (!(vma->vm_flags & (VM_IO | VM_PFNMAP)))
-		return -EINVAL;
-
-	phys_addr = follow_phys(vma, addr, write, &prot);
-
-	if (!phys_addr)
+	if (follow_phys(vma, addr, write, &prot, &phys_addr))
 		return -EINVAL;
 
 	maddr = ioremap_prot(phys_addr, PAGE_SIZE, prot);
-- 
cgit v1.2.3


From 982d789ab76c8a11426852fec2fdf2f412e21c0c Mon Sep 17 00:00:00 2001
From: "venkatesh.pallipadi@intel.com" <venkatesh.pallipadi@intel.com>
Date: Fri, 19 Dec 2008 13:47:28 -0800
Subject: x86: PAT: remove follow_pfnmap_pte in favor of follow_phys

Impact: Cleanup - removes a new function in favor of a recently modified older one.

Replace follow_pfnmap_pte in pat code with follow_phys. follow_phys lso
returns protection eliminating the need of pte_pgprot call. Using follow_phys
also eliminates the need for pte_pa.

Signed-off-by: Venkatesh Pallipadi <venkatesh.pallipadi@intel.com>
Signed-off-by: Suresh Siddha <suresh.b.siddha@intel.com>
Signed-off-by: H. Peter Anvin <hpa@zytor.com>
---
 arch/x86/include/asm/pgtable.h |  5 -----
 arch/x86/mm/pat.c              | 30 +++++++++++------------------
 include/linux/mm.h             |  3 ---
 mm/memory.c                    | 43 ------------------------------------------
 4 files changed, 11 insertions(+), 70 deletions(-)

(limited to 'include')

diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h
index 579f8ceee948..2aa792bbd7e0 100644
--- a/arch/x86/include/asm/pgtable.h
+++ b/arch/x86/include/asm/pgtable.h
@@ -230,11 +230,6 @@ static inline unsigned long pte_pfn(pte_t pte)
 	return (pte_val(pte) & PTE_PFN_MASK) >> PAGE_SHIFT;
 }
 
-static inline u64 pte_pa(pte_t pte)
-{
-	return pte_val(pte) & PTE_PFN_MASK;
-}
-
 #define pte_page(pte)	pfn_to_page(pte_pfn(pte))
 
 static inline int pmd_large(pmd_t pte)
diff --git a/arch/x86/mm/pat.c b/arch/x86/mm/pat.c
index d5254bae84f4..541bcc944a5b 100644
--- a/arch/x86/mm/pat.c
+++ b/arch/x86/mm/pat.c
@@ -685,8 +685,7 @@ int track_pfn_vma_copy(struct vm_area_struct *vma)
 	int retval = 0;
 	unsigned long i, j;
 	u64 paddr;
-	pgprot_t prot;
-	pte_t pte;
+	unsigned long prot;
 	unsigned long vma_start = vma->vm_start;
 	unsigned long vma_end = vma->vm_end;
 	unsigned long vma_size = vma_end - vma_start;
@@ -696,26 +695,22 @@ int track_pfn_vma_copy(struct vm_area_struct *vma)
 
 	if (is_linear_pfn_mapping(vma)) {
 		/*
-		 * reserve the whole chunk starting from vm_pgoff,
-		 * But, we have to get the protection from pte.
+		 * reserve the whole chunk covered by vma. We need the
+		 * starting address and protection from pte.
 		 */
-		if (follow_pfnmap_pte(vma, vma_start, &pte)) {
+		if (follow_phys(vma, vma_start, 0, &prot, &paddr)) {
 			WARN_ON_ONCE(1);
-			return -1;
+			return -EINVAL;
 		}
-		prot = pte_pgprot(pte);
-		paddr = (u64)vma->vm_pgoff << PAGE_SHIFT;
-		return reserve_pfn_range(paddr, vma_size, prot);
+		return reserve_pfn_range(paddr, vma_size, __pgprot(prot));
 	}
 
 	/* reserve entire vma page by page, using pfn and prot from pte */
 	for (i = 0; i < vma_size; i += PAGE_SIZE) {
-		if (follow_pfnmap_pte(vma, vma_start + i, &pte))
+		if (follow_phys(vma, vma_start + i, 0, &prot, &paddr))
 			continue;
 
-		paddr = pte_pa(pte);
-		prot = pte_pgprot(pte);
-		retval = reserve_pfn_range(paddr, PAGE_SIZE, prot);
+		retval = reserve_pfn_range(paddr, PAGE_SIZE, __pgprot(prot));
 		if (retval)
 			goto cleanup_ret;
 	}
@@ -724,10 +719,9 @@ int track_pfn_vma_copy(struct vm_area_struct *vma)
 cleanup_ret:
 	/* Reserve error: Cleanup partial reservation and return error */
 	for (j = 0; j < i; j += PAGE_SIZE) {
-		if (follow_pfnmap_pte(vma, vma_start + j, &pte))
+		if (follow_phys(vma, vma_start + j, 0, &prot, &paddr))
 			continue;
 
-		paddr = pte_pa(pte);
 		free_pfn_range(paddr, PAGE_SIZE);
 	}
 
@@ -797,6 +791,7 @@ void untrack_pfn_vma(struct vm_area_struct *vma, unsigned long pfn,
 {
 	unsigned long i;
 	u64 paddr;
+	unsigned long prot;
 	unsigned long vma_start = vma->vm_start;
 	unsigned long vma_end = vma->vm_end;
 	unsigned long vma_size = vma_end - vma_start;
@@ -821,12 +816,9 @@ void untrack_pfn_vma(struct vm_area_struct *vma, unsigned long pfn,
 	} else {
 		/* free entire vma, page by page, using the pfn from pte */
 		for (i = 0; i < vma_size; i += PAGE_SIZE) {
-			pte_t pte;
-
-			if (follow_pfnmap_pte(vma, vma_start + i, &pte))
+			if (follow_phys(vma, vma_start + i, 0, &prot, &paddr))
 				continue;
 
-			paddr = pte_pa(pte);
 			free_pfn_range(paddr, PAGE_SIZE);
 		}
 	}
diff --git a/include/linux/mm.h b/include/linux/mm.h
index 2f6e2f886d4b..36f9b3fa5e15 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -1239,9 +1239,6 @@ struct page *follow_page(struct vm_area_struct *, unsigned long address,
 #define FOLL_GET	0x04	/* do get_page on page */
 #define FOLL_ANON	0x08	/* give ZERO_PAGE if no pgtable */
 
-int follow_pfnmap_pte(struct vm_area_struct *vma,
-				unsigned long address, pte_t *ret_ptep);
-
 typedef int (*pte_fn_t)(pte_t *pte, pgtable_t token, unsigned long addr,
 			void *data);
 extern int apply_to_page_range(struct mm_struct *mm, unsigned long address,
diff --git a/mm/memory.c b/mm/memory.c
index 79f28e35d4fc..6b29f39a5a3e 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -1168,49 +1168,6 @@ no_page_table:
 	return page;
 }
 
-int follow_pfnmap_pte(struct vm_area_struct *vma, unsigned long address,
-			pte_t *ret_ptep)
-{
-	pgd_t *pgd;
-	pud_t *pud;
-	pmd_t *pmd;
-	pte_t *ptep, pte;
-	spinlock_t *ptl;
-	struct page *page;
-	struct mm_struct *mm = vma->vm_mm;
-
-	if (!is_pfn_mapping(vma))
-		goto err;
-
-	page = NULL;
-	pgd = pgd_offset(mm, address);
-	if (pgd_none(*pgd) || unlikely(pgd_bad(*pgd)))
-		goto err;
-
-	pud = pud_offset(pgd, address);
-	if (pud_none(*pud) || unlikely(pud_bad(*pud)))
-		goto err;
-
-	pmd = pmd_offset(pud, address);
-	if (pmd_none(*pmd) || unlikely(pmd_bad(*pmd)))
-		goto err;
-
-	ptep = pte_offset_map_lock(mm, pmd, address, &ptl);
-
-	pte = *ptep;
-	if (!pte_present(pte))
-		goto err_unlock;
-
-	*ret_ptep = pte;
-	pte_unmap_unlock(ptep, ptl);
-	return 0;
-
-err_unlock:
-	pte_unmap_unlock(ptep, ptl);
-err:
-	return -EINVAL;
-}
-
 /* Can we do the FOLL_ANON optimization? */
 static inline int use_zero_page(struct vm_area_struct *vma)
 {
-- 
cgit v1.2.3


From 34801ba9bf0381fcf0e2b08179d2c07f2c6ede74 Mon Sep 17 00:00:00 2001
From: "venkatesh.pallipadi@intel.com" <venkatesh.pallipadi@intel.com>
Date: Fri, 19 Dec 2008 13:47:29 -0800
Subject: x86: PAT: move track untrack pfnmap stubs to asm-generic

Impact: Cleanup and branch hints only.

Move the track and untrack pfn stub routines from memory.c to asm-generic.
Also add unlikely to pfnmap related calls in fork and exit path.

Signed-off-by: Venkatesh Pallipadi <venkatesh.pallipadi@intel.com>
Signed-off-by: Suresh Siddha <suresh.b.siddha@intel.com>
Signed-off-by: H. Peter Anvin <hpa@zytor.com>
---
 arch/x86/include/asm/pgtable.h |  6 ++----
 include/asm-generic/pgtable.h  | 46 ++++++++++++++++++++++++++++++++++++++++
 include/linux/mm.h             |  6 ------
 mm/memory.c                    | 48 ++----------------------------------------
 4 files changed, 50 insertions(+), 56 deletions(-)

(limited to 'include')

diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h
index 2aa792bbd7e0..875192bf72cb 100644
--- a/arch/x86/include/asm/pgtable.h
+++ b/arch/x86/include/asm/pgtable.h
@@ -339,12 +339,10 @@ static inline pgprot_t pgprot_modify(pgprot_t oldprot, pgprot_t newprot)
 
 #define canon_pgprot(p) __pgprot(pgprot_val(p) & __supported_pte_mask)
 
+#ifndef __ASSEMBLY__
 /* Indicate that x86 has its own track and untrack pfn vma functions */
-#define track_pfn_vma_new track_pfn_vma_new
-#define track_pfn_vma_copy track_pfn_vma_copy
-#define untrack_pfn_vma untrack_pfn_vma
+#define __HAVE_PFNMAP_TRACKING
 
-#ifndef __ASSEMBLY__
 #define __HAVE_PHYS_MEM_ACCESS_PROT
 struct file;
 pgprot_t phys_mem_access_prot(struct file *file, unsigned long pfn,
diff --git a/include/asm-generic/pgtable.h b/include/asm-generic/pgtable.h
index b84633801fb6..72ebe91005a8 100644
--- a/include/asm-generic/pgtable.h
+++ b/include/asm-generic/pgtable.h
@@ -293,6 +293,52 @@ static inline void ptep_modify_prot_commit(struct mm_struct *mm,
 #define arch_flush_lazy_cpu_mode()	do {} while (0)
 #endif
 
+#ifndef __HAVE_PFNMAP_TRACKING
+/*
+ * Interface that can be used by architecture code to keep track of
+ * memory type of pfn mappings (remap_pfn_range, vm_insert_pfn)
+ *
+ * track_pfn_vma_new is called when a _new_ pfn mapping is being established
+ * for physical range indicated by pfn and size.
+ */
+static inline int track_pfn_vma_new(struct vm_area_struct *vma, pgprot_t prot,
+					unsigned long pfn, unsigned long size)
+{
+	return 0;
+}
+
+/*
+ * Interface that can be used by architecture code to keep track of
+ * memory type of pfn mappings (remap_pfn_range, vm_insert_pfn)
+ *
+ * track_pfn_vma_copy is called when vma that is covering the pfnmap gets
+ * copied through copy_page_range().
+ */
+static inline int track_pfn_vma_copy(struct vm_area_struct *vma)
+{
+	return 0;
+}
+
+/*
+ * Interface that can be used by architecture code to keep track of
+ * memory type of pfn mappings (remap_pfn_range, vm_insert_pfn)
+ *
+ * untrack_pfn_vma is called while unmapping a pfnmap for a region.
+ * untrack can be called for a specific region indicated by pfn and size or
+ * can be for the entire vma (in which case size can be zero).
+ */
+static inline void untrack_pfn_vma(struct vm_area_struct *vma,
+					unsigned long pfn, unsigned long size)
+{
+}
+#else
+extern int track_pfn_vma_new(struct vm_area_struct *vma, pgprot_t prot,
+				unsigned long pfn, unsigned long size);
+extern int track_pfn_vma_copy(struct vm_area_struct *vma);
+extern void untrack_pfn_vma(struct vm_area_struct *vma, unsigned long pfn,
+				unsigned long size);
+#endif
+
 #endif /* !__ASSEMBLY__ */
 
 #endif /* _ASM_GENERIC_PGTABLE_H */
diff --git a/include/linux/mm.h b/include/linux/mm.h
index 36f9b3fa5e15..d3ddd735e375 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -163,12 +163,6 @@ static inline int is_pfn_mapping(struct vm_area_struct *vma)
 	return (vma->vm_flags & VM_PFNMAP);
 }
 
-extern int track_pfn_vma_new(struct vm_area_struct *vma, pgprot_t prot,
-				unsigned long pfn, unsigned long size);
-extern int track_pfn_vma_copy(struct vm_area_struct *vma);
-extern void untrack_pfn_vma(struct vm_area_struct *vma, unsigned long pfn,
-				unsigned long size);
-
 /*
  * vm_fault is filled by the the pagefault handler and passed to the vma's
  * ->fault function. The vma's ->fault is responsible for returning a bitmask
diff --git a/mm/memory.c b/mm/memory.c
index 6b29f39a5a3e..f01b7eed6e16 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -99,50 +99,6 @@ int randomize_va_space __read_mostly =
 					2;
 #endif
 
-#ifndef track_pfn_vma_new
-/*
- * Interface that can be used by architecture code to keep track of
- * memory type of pfn mappings (remap_pfn_range, vm_insert_pfn)
- *
- * track_pfn_vma_new is called when a _new_ pfn mapping is being established
- * for physical range indicated by pfn and size.
- */
-int track_pfn_vma_new(struct vm_area_struct *vma, pgprot_t prot,
-			unsigned long pfn, unsigned long size)
-{
-	return 0;
-}
-#endif
-
-#ifndef track_pfn_vma_copy
-/*
- * Interface that can be used by architecture code to keep track of
- * memory type of pfn mappings (remap_pfn_range, vm_insert_pfn)
- *
- * track_pfn_vma_copy is called when vma that is covering the pfnmap gets
- * copied through copy_page_range().
- */
-int track_pfn_vma_copy(struct vm_area_struct *vma)
-{
-	return 0;
-}
-#endif
-
-#ifndef untrack_pfn_vma
-/*
- * Interface that can be used by architecture code to keep track of
- * memory type of pfn mappings (remap_pfn_range, vm_insert_pfn)
- *
- * untrack_pfn_vma is called while unmapping a pfnmap for a region.
- * untrack can be called for a specific region indicated by pfn and size or
- * can be for the entire vma (in which case size can be zero).
- */
-void untrack_pfn_vma(struct vm_area_struct *vma, unsigned long pfn,
-			unsigned long size)
-{
-}
-#endif
-
 static int __init disable_randmaps(char *s)
 {
 	randomize_va_space = 0;
@@ -713,7 +669,7 @@ int copy_page_range(struct mm_struct *dst_mm, struct mm_struct *src_mm,
 	if (is_vm_hugetlb_page(vma))
 		return copy_hugetlb_page_range(dst_mm, src_mm, vma);
 
-	if (is_pfn_mapping(vma)) {
+	if (unlikely(is_pfn_mapping(vma))) {
 		/*
 		 * We do not free on error cases below as remove_vma
 		 * gets called on error from higher level routine
@@ -969,7 +925,7 @@ unsigned long unmap_vmas(struct mmu_gather **tlbp,
 		if (vma->vm_flags & VM_ACCOUNT)
 			*nr_accounted += (end - start) >> PAGE_SHIFT;
 
-		if (is_pfn_mapping(vma))
+		if (unlikely(is_pfn_mapping(vma)))
 			untrack_pfn_vma(vma, 0, 0);
 
 		while (start != end) {
-- 
cgit v1.2.3


From bf53de907dfdaac178c92d774aae7370d7b97d20 Mon Sep 17 00:00:00 2001
From: Markus Metzger <markus.t.metzger@intel.com>
Date: Fri, 19 Dec 2008 15:10:24 +0100
Subject: x86, bts: add fork and exit handling

Impact: introduce new ptrace facility

Add arch_ptrace_untrace() function that is called when the tracer
detaches (either voluntarily or when the tracing task dies);
ptrace_disable() is only called on a voluntary detach.

Add ptrace_fork() and arch_ptrace_fork(). They are called when a
traced task is forked.

Clear DS and BTS related fields on fork.

Release DS resources and reclaim memory in ptrace_untrace(). This
releases resources already when the tracing task dies. We used to do
that when the traced task dies.

Signed-off-by: Markus Metzger <markus.t.metzger@intel.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/include/asm/ds.h     |  9 ++++++++
 arch/x86/include/asm/ptrace.h |  7 ++++++
 arch/x86/kernel/ds.c          | 11 ++++++++++
 arch/x86/kernel/process_32.c  | 20 ++++++++---------
 arch/x86/kernel/process_64.c  | 20 ++++++++---------
 arch/x86/kernel/ptrace.c      | 50 ++++++++++++++++++++++++++++++++++---------
 include/linux/ptrace.h        | 22 +++++++++++++++++++
 kernel/fork.c                 |  2 ++
 kernel/ptrace.c               | 12 +++++++++++
 9 files changed, 121 insertions(+), 32 deletions(-)

(limited to 'include')

diff --git a/arch/x86/include/asm/ds.h b/arch/x86/include/asm/ds.h
index ee0ea3a96c11..a8f672ba100c 100644
--- a/arch/x86/include/asm/ds.h
+++ b/arch/x86/include/asm/ds.h
@@ -252,12 +252,21 @@ extern void __cpuinit ds_init_intel(struct cpuinfo_x86 *);
  */
 extern void ds_switch_to(struct task_struct *prev, struct task_struct *next);
 
+/*
+ * Task clone/init and cleanup work
+ */
+extern void ds_copy_thread(struct task_struct *tsk, struct task_struct *father);
+extern void ds_exit_thread(struct task_struct *tsk);
+
 #else /* CONFIG_X86_DS */
 
 struct cpuinfo_x86;
 static inline void __cpuinit ds_init_intel(struct cpuinfo_x86 *ignored) {}
 static inline void ds_switch_to(struct task_struct *prev,
 				struct task_struct *next) {}
+static inline void ds_copy_thread(struct task_struct *tsk,
+				  struct task_struct *father) {}
+static inline void ds_exit_thread(struct task_struct *tsk) {}
 
 #endif /* CONFIG_X86_DS */
 #endif /* _ASM_X86_DS_H */
diff --git a/arch/x86/include/asm/ptrace.h b/arch/x86/include/asm/ptrace.h
index fbf744215911..6d34d954c228 100644
--- a/arch/x86/include/asm/ptrace.h
+++ b/arch/x86/include/asm/ptrace.h
@@ -235,6 +235,13 @@ extern int do_get_thread_area(struct task_struct *p, int idx,
 extern int do_set_thread_area(struct task_struct *p, int idx,
 			      struct user_desc __user *info, int can_allocate);
 
+extern void x86_ptrace_untrace(struct task_struct *);
+extern void x86_ptrace_fork(struct task_struct *child,
+			    unsigned long clone_flags);
+
+#define arch_ptrace_untrace(tsk) x86_ptrace_untrace(tsk)
+#define arch_ptrace_fork(child, flags) x86_ptrace_fork(child, flags)
+
 #endif /* __KERNEL__ */
 
 #endif /* !__ASSEMBLY__ */
diff --git a/arch/x86/kernel/ds.c b/arch/x86/kernel/ds.c
index 98d271e60e08..da91701a2348 100644
--- a/arch/x86/kernel/ds.c
+++ b/arch/x86/kernel/ds.c
@@ -1017,3 +1017,14 @@ void ds_switch_to(struct task_struct *prev, struct task_struct *next)
 
 	update_debugctlmsr(next->thread.debugctlmsr);
 }
+
+void ds_copy_thread(struct task_struct *tsk, struct task_struct *father)
+{
+	clear_tsk_thread_flag(tsk, TIF_DS_AREA_MSR);
+	tsk->thread.ds_ctx = NULL;
+}
+
+void ds_exit_thread(struct task_struct *tsk)
+{
+	WARN_ON(tsk->thread.ds_ctx);
+}
diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c
index 605eff9a8ac0..3ba155d24884 100644
--- a/arch/x86/kernel/process_32.c
+++ b/arch/x86/kernel/process_32.c
@@ -60,6 +60,7 @@
 #include <asm/idle.h>
 #include <asm/syscalls.h>
 #include <asm/smp.h>
+#include <asm/ds.h>
 
 asmlinkage void ret_from_fork(void) __asm__("ret_from_fork");
 
@@ -251,17 +252,8 @@ void exit_thread(void)
 		tss->x86_tss.io_bitmap_base = INVALID_IO_BITMAP_OFFSET;
 		put_cpu();
 	}
-#ifdef CONFIG_X86_DS
-	/* Free any BTS tracers that have not been properly released. */
-	if (unlikely(current->bts)) {
-		ds_release_bts(current->bts);
-		current->bts = NULL;
-
-		kfree(current->bts_buffer);
-		current->bts_buffer = NULL;
-		current->bts_size = 0;
-	}
-#endif /* CONFIG_X86_DS */
+
+	ds_exit_thread(current);
 }
 
 void flush_thread(void)
@@ -343,6 +335,12 @@ int copy_thread(int nr, unsigned long clone_flags, unsigned long sp,
 		kfree(p->thread.io_bitmap_ptr);
 		p->thread.io_bitmap_max = 0;
 	}
+
+	ds_copy_thread(p, current);
+
+	clear_tsk_thread_flag(p, TIF_DEBUGCTLMSR);
+	p->thread.debugctlmsr = 0;
+
 	return err;
 }
 
diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c
index 1cfd2a4bf853..416fb9282f4f 100644
--- a/arch/x86/kernel/process_64.c
+++ b/arch/x86/kernel/process_64.c
@@ -53,6 +53,7 @@
 #include <asm/ia32.h>
 #include <asm/idle.h>
 #include <asm/syscalls.h>
+#include <asm/ds.h>
 
 asmlinkage extern void ret_from_fork(void);
 
@@ -236,17 +237,8 @@ void exit_thread(void)
 		t->io_bitmap_max = 0;
 		put_cpu();
 	}
-#ifdef CONFIG_X86_DS
-	/* Free any BTS tracers that have not been properly released. */
-	if (unlikely(current->bts)) {
-		ds_release_bts(current->bts);
-		current->bts = NULL;
-
-		kfree(current->bts_buffer);
-		current->bts_buffer = NULL;
-		current->bts_size = 0;
-	}
-#endif /* CONFIG_X86_DS */
+
+	ds_exit_thread(current);
 }
 
 void flush_thread(void)
@@ -376,6 +368,12 @@ int copy_thread(int nr, unsigned long clone_flags, unsigned long sp,
 		if (err)
 			goto out;
 	}
+
+	ds_copy_thread(p, me);
+
+	clear_tsk_thread_flag(p, TIF_DEBUGCTLMSR);
+	p->thread.debugctlmsr = 0;
+
 	err = 0;
 out:
 	if (err && p->thread.io_bitmap_ptr) {
diff --git a/arch/x86/kernel/ptrace.c b/arch/x86/kernel/ptrace.c
index 45e9855da2d2..6ad2bb607650 100644
--- a/arch/x86/kernel/ptrace.c
+++ b/arch/x86/kernel/ptrace.c
@@ -769,8 +769,47 @@ static int ptrace_bts_size(struct task_struct *child)
 
 	return (trace->ds.top - trace->ds.begin) / trace->ds.size;
 }
+
+static void ptrace_bts_fork(struct task_struct *tsk)
+{
+	tsk->bts = NULL;
+	tsk->bts_buffer = NULL;
+	tsk->bts_size = 0;
+	tsk->thread.bts_ovfl_signal = 0;
+}
+
+static void ptrace_bts_untrace(struct task_struct *child)
+{
+	if (unlikely(child->bts)) {
+		ds_release_bts(child->bts);
+		child->bts = NULL;
+
+		kfree(child->bts_buffer);
+		child->bts_buffer = NULL;
+		child->bts_size = 0;
+	}
+}
+
+static void ptrace_bts_detach(struct task_struct *child)
+{
+	ptrace_bts_untrace(child);
+}
+#else
+static inline void ptrace_bts_fork(struct task_struct *tsk) {}
+static inline void ptrace_bts_detach(struct task_struct *child) {}
+static inline void ptrace_bts_untrace(struct task_struct *child) {}
 #endif /* CONFIG_X86_PTRACE_BTS */
 
+void x86_ptrace_fork(struct task_struct *child, unsigned long clone_flags)
+{
+	ptrace_bts_fork(child);
+}
+
+void x86_ptrace_untrace(struct task_struct *child)
+{
+	ptrace_bts_untrace(child);
+}
+
 /*
  * Called by kernel/ptrace.c when detaching..
  *
@@ -782,16 +821,7 @@ void ptrace_disable(struct task_struct *child)
 #ifdef TIF_SYSCALL_EMU
 	clear_tsk_thread_flag(child, TIF_SYSCALL_EMU);
 #endif
-#ifdef CONFIG_X86_PTRACE_BTS
-	if (child->bts) {
-		ds_release_bts(child->bts);
-		child->bts = NULL;
-
-		kfree(child->bts_buffer);
-		child->bts_buffer = NULL;
-		child->bts_size = 0;
-	}
-#endif /* CONFIG_X86_PTRACE_BTS */
+	ptrace_bts_detach(child);
 }
 
 #if defined CONFIG_X86_32 || defined CONFIG_IA32_EMULATION
diff --git a/include/linux/ptrace.h b/include/linux/ptrace.h
index 22641d5d45df..98b93ca4db06 100644
--- a/include/linux/ptrace.h
+++ b/include/linux/ptrace.h
@@ -94,6 +94,7 @@ extern void ptrace_notify(int exit_code);
 extern void __ptrace_link(struct task_struct *child,
 			  struct task_struct *new_parent);
 extern void __ptrace_unlink(struct task_struct *child);
+extern void ptrace_fork(struct task_struct *task, unsigned long clone_flags);
 #define PTRACE_MODE_READ   1
 #define PTRACE_MODE_ATTACH 2
 /* Returns 0 on success, -errno on denial. */
@@ -313,6 +314,27 @@ static inline void user_enable_block_step(struct task_struct *task)
 #define arch_ptrace_stop(code, info)		do { } while (0)
 #endif
 
+#ifndef arch_ptrace_untrace
+/*
+ * Do machine-specific work before untracing child.
+ *
+ * This is called for a normal detach as well as from ptrace_exit()
+ * when the tracing task dies.
+ *
+ * Called with write_lock(&tasklist_lock) held.
+ */
+#define arch_ptrace_untrace(task)		do { } while (0)
+#endif
+
+#ifndef arch_ptrace_fork
+/*
+ * Do machine-specific work to initialize a new task.
+ *
+ * This is called from copy_process().
+ */
+#define arch_ptrace_fork(child, clone_flags)	do { } while (0)
+#endif
+
 extern int task_current_syscall(struct task_struct *target, long *callno,
 				unsigned long args[6], unsigned int maxargs,
 				unsigned long *sp, unsigned long *pc);
diff --git a/kernel/fork.c b/kernel/fork.c
index 7b93da72d4a2..65ce60adc8e8 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -1096,6 +1096,8 @@ static struct task_struct *copy_process(unsigned long clone_flags,
 #ifdef CONFIG_DEBUG_MUTEXES
 	p->blocked_on = NULL; /* not blocked yet */
 #endif
+	if (unlikely(ptrace_reparented(current)))
+		ptrace_fork(p, clone_flags);
 
 	/* Perform scheduler related setup. Assign this task to a CPU. */
 	sched_fork(p, clone_flags);
diff --git a/kernel/ptrace.c b/kernel/ptrace.c
index 4c8bcd7dd8e0..100a71cfdaba 100644
--- a/kernel/ptrace.c
+++ b/kernel/ptrace.c
@@ -25,6 +25,17 @@
 #include <asm/pgtable.h>
 #include <asm/uaccess.h>
 
+
+/*
+ * Initialize a new task whose father had been ptraced.
+ *
+ * Called from copy_process().
+ */
+void ptrace_fork(struct task_struct *child, unsigned long clone_flags)
+{
+	arch_ptrace_fork(child, clone_flags);
+}
+
 /*
  * ptrace a task: make the debugger its new parent and
  * move it to the ptrace list.
@@ -72,6 +83,7 @@ void __ptrace_unlink(struct task_struct *child)
 	child->parent = child->real_parent;
 	list_del_init(&child->ptrace_entry);
 
+	arch_ptrace_untrace(child);
 	if (task_is_traced(child))
 		ptrace_untrace(child);
 }
-- 
cgit v1.2.3


From c5dee6177f4bd2095aab7d9be9f6ebdddd6deee9 Mon Sep 17 00:00:00 2001
From: Markus Metzger <markus.t.metzger@intel.com>
Date: Fri, 19 Dec 2008 15:17:02 +0100
Subject: x86, bts: memory accounting

Impact: move the BTS buffer accounting to the mlock bucket

Add alloc_locked_buffer() and free_locked_buffer() functions to mm/mlock.c
to kalloc a buffer and account the locked memory to current.

Account the memory for the BTS buffer to the tracer.

Signed-off-by: Markus Metzger <markus.t.metzger@intel.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/ptrace.c | 45 ++++++++++++++++++++++++++++++++++-----------
 include/linux/mm.h       |  2 ++
 mm/mlock.c               | 45 +++++++++++++++++++++++++++++++++++++++++++++
 3 files changed, 81 insertions(+), 11 deletions(-)

(limited to 'include')

diff --git a/arch/x86/kernel/ptrace.c b/arch/x86/kernel/ptrace.c
index 6ad2bb607650..0a5df5f82fb9 100644
--- a/arch/x86/kernel/ptrace.c
+++ b/arch/x86/kernel/ptrace.c
@@ -650,6 +650,24 @@ static int ptrace_bts_drain(struct task_struct *child,
 	return drained;
 }
 
+static int ptrace_bts_allocate_buffer(struct task_struct *child, size_t size)
+{
+	child->bts_buffer = alloc_locked_buffer(size);
+	if (!child->bts_buffer)
+		return -ENOMEM;
+
+	child->bts_size = size;
+
+	return 0;
+}
+
+static void ptrace_bts_free_buffer(struct task_struct *child)
+{
+	free_locked_buffer(child->bts_buffer, child->bts_size);
+	child->bts_buffer = NULL;
+	child->bts_size = 0;
+}
+
 static int ptrace_bts_config(struct task_struct *child,
 			     long cfg_size,
 			     const struct ptrace_bts_config __user *ucfg)
@@ -679,14 +697,13 @@ static int ptrace_bts_config(struct task_struct *child,
 
 	if ((cfg.flags & PTRACE_BTS_O_ALLOC) &&
 	    (cfg.size != child->bts_size)) {
-		kfree(child->bts_buffer);
+		int error;
 
-		child->bts_size = cfg.size;
-		child->bts_buffer = kzalloc(cfg.size, GFP_KERNEL);
-		if (!child->bts_buffer) {
-			child->bts_size = 0;
-			return -ENOMEM;
-		}
+		ptrace_bts_free_buffer(child);
+
+		error = ptrace_bts_allocate_buffer(child, cfg.size);
+		if (error < 0)
+			return error;
 	}
 
 	if (cfg.flags & PTRACE_BTS_O_TRACE)
@@ -701,10 +718,8 @@ static int ptrace_bts_config(struct task_struct *child,
 	if (IS_ERR(child->bts)) {
 		int error = PTR_ERR(child->bts);
 
-		kfree(child->bts_buffer);
+		ptrace_bts_free_buffer(child);
 		child->bts = NULL;
-		child->bts_buffer = NULL;
-		child->bts_size = 0;
 
 		return error;
 	}
@@ -784,6 +799,9 @@ static void ptrace_bts_untrace(struct task_struct *child)
 		ds_release_bts(child->bts);
 		child->bts = NULL;
 
+		/* We cannot update total_vm and locked_vm since
+		   child's mm is already gone. But we can reclaim the
+		   memory. */
 		kfree(child->bts_buffer);
 		child->bts_buffer = NULL;
 		child->bts_size = 0;
@@ -792,7 +810,12 @@ static void ptrace_bts_untrace(struct task_struct *child)
 
 static void ptrace_bts_detach(struct task_struct *child)
 {
-	ptrace_bts_untrace(child);
+	if (unlikely(child->bts)) {
+		ds_release_bts(child->bts);
+		child->bts = NULL;
+
+		ptrace_bts_free_buffer(child);
+	}
 }
 #else
 static inline void ptrace_bts_fork(struct task_struct *tsk) {}
diff --git a/include/linux/mm.h b/include/linux/mm.h
index ffee2f743418..9979d3fab6e7 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -1286,5 +1286,7 @@ int vmemmap_populate_basepages(struct page *start_page,
 int vmemmap_populate(struct page *start_page, unsigned long pages, int node);
 void vmemmap_populate_print_last(void);
 
+extern void *alloc_locked_buffer(size_t size);
+extern void free_locked_buffer(void *buffer, size_t size);
 #endif /* __KERNEL__ */
 #endif /* _LINUX_MM_H */
diff --git a/mm/mlock.c b/mm/mlock.c
index 1ada366570cb..3035a56e7616 100644
--- a/mm/mlock.c
+++ b/mm/mlock.c
@@ -667,3 +667,48 @@ void user_shm_unlock(size_t size, struct user_struct *user)
 	spin_unlock(&shmlock_user_lock);
 	free_uid(user);
 }
+
+void *alloc_locked_buffer(size_t size)
+{
+	unsigned long rlim, vm, pgsz;
+	void *buffer = NULL;
+
+	pgsz = PAGE_ALIGN(size) >> PAGE_SHIFT;
+
+	down_write(&current->mm->mmap_sem);
+
+	rlim = current->signal->rlim[RLIMIT_AS].rlim_cur >> PAGE_SHIFT;
+	vm   = current->mm->total_vm + pgsz;
+	if (rlim < vm)
+		goto out;
+
+	rlim = current->signal->rlim[RLIMIT_MEMLOCK].rlim_cur >> PAGE_SHIFT;
+	vm   = current->mm->locked_vm + pgsz;
+	if (rlim < vm)
+		goto out;
+
+	buffer = kzalloc(size, GFP_KERNEL);
+	if (!buffer)
+		goto out;
+
+	current->mm->total_vm  += pgsz;
+	current->mm->locked_vm += pgsz;
+
+ out:
+	up_write(&current->mm->mmap_sem);
+	return buffer;
+}
+
+void free_locked_buffer(void *buffer, size_t size)
+{
+	unsigned long pgsz = PAGE_ALIGN(size) >> PAGE_SHIFT;
+
+	down_write(&current->mm->mmap_sem);
+
+	current->mm->total_vm  -= pgsz;
+	current->mm->locked_vm -= pgsz;
+
+	up_write(&current->mm->mmap_sem);
+
+	kfree(buffer);
+}
-- 
cgit v1.2.3


From 749820928a2fd47ff536773d869d2c3f8038b7d1 Mon Sep 17 00:00:00 2001
From: Anton Vorontsov <avorontsov@ru.mvista.com>
Date: Fri, 5 Dec 2008 08:15:54 +0000
Subject: of/gpio: Implement of_gpio_count()

This function is used to count how many GPIOs are specified for
a device node.

Signed-off-by: Anton Vorontsov <avorontsov@ru.mvista.com>
Signed-off-by: Paul Mackerras <paulus@samba.org>
---
 drivers/of/gpio.c       | 34 ++++++++++++++++++++++++++++++++++
 include/linux/of_gpio.h |  6 ++++++
 2 files changed, 40 insertions(+)

(limited to 'include')

diff --git a/drivers/of/gpio.c b/drivers/of/gpio.c
index a4ba217116eb..6eea601a9204 100644
--- a/drivers/of/gpio.c
+++ b/drivers/of/gpio.c
@@ -79,6 +79,40 @@ err0:
 }
 EXPORT_SYMBOL(of_get_gpio_flags);
 
+/**
+ * of_gpio_count - Count GPIOs for a device
+ * @np:		device node to count GPIOs for
+ *
+ * The function returns the count of GPIOs specified for a node.
+ *
+ * Note that the empty GPIO specifiers counts too. For example,
+ *
+ * gpios = <0
+ *          &pio1 1 2
+ *          0
+ *          &pio2 3 4>;
+ *
+ * defines four GPIOs (so this function will return 4), two of which
+ * are not specified.
+ */
+unsigned int of_gpio_count(struct device_node *np)
+{
+	unsigned int cnt = 0;
+
+	do {
+		int ret;
+
+		ret = of_parse_phandles_with_args(np, "gpios", "#gpio-cells",
+						  cnt, NULL, NULL);
+		/* A hole in the gpios = <> counts anyway. */
+		if (ret < 0 && ret != -EEXIST)
+			break;
+	} while (++cnt);
+
+	return cnt;
+}
+EXPORT_SYMBOL(of_gpio_count);
+
 /**
  * of_gpio_simple_xlate - translate gpio_spec to the GPIO number and flags
  * @of_gc:	pointer to the of_gpio_chip structure
diff --git a/include/linux/of_gpio.h b/include/linux/of_gpio.h
index e25abf610cb6..fc2472c3c254 100644
--- a/include/linux/of_gpio.h
+++ b/include/linux/of_gpio.h
@@ -65,6 +65,7 @@ static inline struct of_mm_gpio_chip *to_of_mm_gpio_chip(struct gpio_chip *gc)
 
 extern int of_get_gpio_flags(struct device_node *np, int index,
 			     enum of_gpio_flags *flags);
+extern unsigned int of_gpio_count(struct device_node *np);
 
 extern int of_mm_gpiochip_add(struct device_node *np,
 			      struct of_mm_gpio_chip *mm_gc);
@@ -81,6 +82,11 @@ static inline int of_get_gpio_flags(struct device_node *np, int index,
 	return -ENOSYS;
 }
 
+static inline unsigned int of_gpio_count(struct device_node *np)
+{
+	return 0;
+}
+
 #endif /* CONFIG_OF_GPIO */
 
 /**
-- 
cgit v1.2.3


From 3ddeb912f41801fd1968c7880d031702a396e4d0 Mon Sep 17 00:00:00 2001
From: Lai Jiangshan <laijs@cn.fujitsu.com>
Date: Sat, 20 Dec 2008 17:15:14 +0800
Subject: ftrace: enable format arguments checking

Impact: broaden gcc printf format checks for ftrace_printk()

format arguments checking for ftrace_printk() is __printf(1, 2),
not __printf(1, 0).

Signed-off-by: Lai Jiangshan <laijs@cn.fujitsu.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/ftrace.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h
index 04b52e6ebc66..677432b9cb7e 100644
--- a/include/linux/ftrace.h
+++ b/include/linux/ftrace.h
@@ -303,7 +303,7 @@ extern void ftrace_dump(void);
 static inline void
 ftrace_special(unsigned long arg1, unsigned long arg2, unsigned long arg3) { }
 static inline int
-ftrace_printk(const char *fmt, ...) __attribute__ ((format (printf, 1, 0)));
+ftrace_printk(const char *fmt, ...) __attribute__ ((format (printf, 1, 2)));
 
 static inline void tracing_start(void) { }
 static inline void tracing_stop(void) { }
-- 
cgit v1.2.3


From 1486a61ebcd2711532f8163d30babc40e11e7b40 Mon Sep 17 00:00:00 2001
From: Don Skidmore <donald.c.skidmore@intel.com>
Date: Sun, 21 Dec 2008 20:09:50 -0800
Subject: net: fix DCB setstate to return success/failure

Data Center Bridging (DCB) had no way to know if setstate had failed in the
driver.  This patch enables dcb netlink code to handle the status for the DCB
setstate interface.  Likewise it allows the driver to return a failed status
if MSI-X isn't enabled.

Signed-off-by: Don Skidmore <donald.c.skidmore@intel.com>
Signed-off-by: Eric W Multanen <eric.w.multanen@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ixgbe/ixgbe_dcb_nl.c | 52 ++++++++++++++++++++++------------------
 include/net/dcbnl.h              |  2 +-
 net/dcb/dcbnl.c                  |  5 ++--
 3 files changed, 32 insertions(+), 27 deletions(-)

(limited to 'include')

diff --git a/drivers/net/ixgbe/ixgbe_dcb_nl.c b/drivers/net/ixgbe/ixgbe_dcb_nl.c
index 615c2803202a..7d158a5c545b 100644
--- a/drivers/net/ixgbe/ixgbe_dcb_nl.c
+++ b/drivers/net/ixgbe/ixgbe_dcb_nl.c
@@ -124,39 +124,45 @@ static u16 ixgbe_dcb_select_queue(struct net_device *dev, struct sk_buff *skb)
 	return 0;
 }
 
-static void ixgbe_dcbnl_set_state(struct net_device *netdev, u8 state)
+static u8 ixgbe_dcbnl_set_state(struct net_device *netdev, u8 state)
 {
+	u8 err = 0;
 	struct ixgbe_adapter *adapter = netdev_priv(netdev);
 
 	DPRINTK(DRV, INFO, "Set DCB Admin Mode.\n");
 
 	if (state > 0) {
 		/* Turn on DCB */
-		if (adapter->flags & IXGBE_FLAG_DCB_ENABLED) {
-			return;
-		} else {
-			if (netif_running(netdev))
-				netdev->stop(netdev);
-			ixgbe_reset_interrupt_capability(adapter);
-			ixgbe_napi_del_all(adapter);
-			kfree(adapter->tx_ring);
-			kfree(adapter->rx_ring);
-			adapter->tx_ring = NULL;
-			adapter->rx_ring = NULL;
-			netdev->select_queue = &ixgbe_dcb_select_queue;
+		if (adapter->flags & IXGBE_FLAG_DCB_ENABLED)
+			goto out;
 
-			adapter->flags &= ~IXGBE_FLAG_RSS_ENABLED;
-			adapter->flags |= IXGBE_FLAG_DCB_ENABLED;
-			ixgbe_init_interrupt_scheme(adapter);
-			ixgbe_napi_add_all(adapter);
-			if (netif_running(netdev))
-				netdev->open(netdev);
+		if (!(adapter->flags & IXGBE_FLAG_MSIX_ENABLED)) {
+			DPRINTK(DRV, ERR, "Enable failed, needs MSI-X\n");
+			err = 1;
+			goto out;
 		}
+
+		if (netif_running(netdev))
+			netdev->netdev_ops->ndo_stop(netdev);
+		ixgbe_reset_interrupt_capability(adapter);
+		ixgbe_napi_del_all(adapter);
+		kfree(adapter->tx_ring);
+		kfree(adapter->rx_ring);
+		adapter->tx_ring = NULL;
+		adapter->rx_ring = NULL;
+		netdev->select_queue = &ixgbe_dcb_select_queue;
+
+		adapter->flags &= ~IXGBE_FLAG_RSS_ENABLED;
+		adapter->flags |= IXGBE_FLAG_DCB_ENABLED;
+		ixgbe_init_interrupt_scheme(adapter);
+		ixgbe_napi_add_all(adapter);
+		if (netif_running(netdev))
+			netdev->netdev_ops->ndo_open(netdev);
 	} else {
 		/* Turn off DCB */
 		if (adapter->flags & IXGBE_FLAG_DCB_ENABLED) {
 			if (netif_running(netdev))
-				netdev->stop(netdev);
+				netdev->netdev_ops->ndo_stop(netdev);
 			ixgbe_reset_interrupt_capability(adapter);
 			ixgbe_napi_del_all(adapter);
 			kfree(adapter->tx_ring);
@@ -170,11 +176,11 @@ static void ixgbe_dcbnl_set_state(struct net_device *netdev, u8 state)
 			ixgbe_init_interrupt_scheme(adapter);
 			ixgbe_napi_add_all(adapter);
 			if (netif_running(netdev))
-				netdev->open(netdev);
-		} else {
-			return;
+				netdev->netdev_ops->ndo_open(netdev);
 		}
 	}
+out:
+	return err;
 }
 
 static void ixgbe_dcbnl_get_perm_hw_addr(struct net_device *netdev,
diff --git a/include/net/dcbnl.h b/include/net/dcbnl.h
index 91e0a3d7faf2..775cfc8055be 100644
--- a/include/net/dcbnl.h
+++ b/include/net/dcbnl.h
@@ -26,7 +26,7 @@
  */
 struct dcbnl_rtnl_ops {
 	u8   (*getstate)(struct net_device *);
-	void (*setstate)(struct net_device *, u8);
+	u8   (*setstate)(struct net_device *, u8);
 	void (*getpermhwaddr)(struct net_device *, u8 *);
 	void (*setpgtccfgtx)(struct net_device *, int, u8, u8, u8, u8);
 	void (*setpgbwgcfgtx)(struct net_device *, int, u8);
diff --git a/net/dcb/dcbnl.c b/net/dcb/dcbnl.c
index a1254061629f..fc88fc4d4f63 100644
--- a/net/dcb/dcbnl.c
+++ b/net/dcb/dcbnl.c
@@ -714,9 +714,8 @@ static int dcbnl_setstate(struct net_device *netdev, struct nlattr **tb,
 
 	value = nla_get_u8(tb[DCB_ATTR_STATE]);
 
-	netdev->dcbnl_ops->setstate(netdev, value);
-
-	ret = dcbnl_reply(0, RTM_SETDCB, DCB_CMD_SSTATE, DCB_ATTR_STATE,
+	ret = dcbnl_reply(netdev->dcbnl_ops->setstate(netdev, value),
+	                  RTM_SETDCB, DCB_CMD_SSTATE, DCB_ATTR_STATE,
 	                  pid, seq, flags);
 
 	return ret;
-- 
cgit v1.2.3


From f4314e815e87b4ab1c9b1115dd5853cd20ca999c Mon Sep 17 00:00:00 2001
From: Don Skidmore <donald.c.skidmore@intel.com>
Date: Sun, 21 Dec 2008 20:10:29 -0800
Subject: net: add DCNA attribute to the BCN interface for DCB

Adds the Backward Congestion Notification Address (BCNA) attribute to the
Backward Congestion Notification (BCN) interface for Data Center Bridging
(DCB), which was missing.  Receive the BCNA attribute in the ixgbe driver.
The BCNA attribute is for a switch to inform the endstation about the physical
port identification in order to support BCN on aggregated links.

Signed-off-by: Don Skidmore <donald.c.skidmore@intel.com>
Signed-off-by: Eric W Multanen <eric.w.multanen@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---
 drivers/net/ixgbe/ixgbe_dcb_nl.c | 20 ++++++++++++++++++++
 include/linux/dcbnl.h            |  2 ++
 net/dcb/dcbnl.c                  |  6 ++++--
 3 files changed, 26 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/drivers/net/ixgbe/ixgbe_dcb_nl.c b/drivers/net/ixgbe/ixgbe_dcb_nl.c
index 7d158a5c545b..8ac639d0da03 100644
--- a/drivers/net/ixgbe/ixgbe_dcb_nl.c
+++ b/drivers/net/ixgbe/ixgbe_dcb_nl.c
@@ -93,6 +93,8 @@ int ixgbe_copy_dcb_cfg(struct ixgbe_dcb_config *src_dcb_cfg,
 		dst_dcb_cfg->bcn.rp_admin_mode[i - DCB_BCN_ATTR_RP_0] =
 			src_dcb_cfg->bcn.rp_admin_mode[i - DCB_BCN_ATTR_RP_0];
 	}
+	dst_dcb_cfg->bcn.bcna_option[0] = src_dcb_cfg->bcn.bcna_option[0];
+	dst_dcb_cfg->bcn.bcna_option[1] = src_dcb_cfg->bcn.bcna_option[1];
 	dst_dcb_cfg->bcn.rp_alpha = src_dcb_cfg->bcn.rp_alpha;
 	dst_dcb_cfg->bcn.rp_beta = src_dcb_cfg->bcn.rp_beta;
 	dst_dcb_cfg->bcn.rp_gd = src_dcb_cfg->bcn.rp_gd;
@@ -457,6 +459,12 @@ static void ixgbe_dcbnl_getbcncfg(struct net_device *netdev, int enum_index,
 	struct ixgbe_adapter *adapter = netdev_priv(netdev);
 
 	switch (enum_index) {
+	case DCB_BCN_ATTR_BCNA_0:
+		*setting = adapter->dcb_cfg.bcn.bcna_option[0];
+		break;
+	case DCB_BCN_ATTR_BCNA_1:
+		*setting = adapter->dcb_cfg.bcn.bcna_option[1];
+		break;
 	case DCB_BCN_ATTR_ALPHA:
 		*setting = adapter->dcb_cfg.bcn.rp_alpha;
 		break;
@@ -516,6 +524,18 @@ static void ixgbe_dcbnl_setbcncfg(struct net_device *netdev, int enum_index,
 	struct ixgbe_adapter *adapter = netdev_priv(netdev);
 
 	switch (enum_index) {
+	case DCB_BCN_ATTR_BCNA_0:
+		adapter->temp_dcb_cfg.bcn.bcna_option[0] = setting;
+		if (adapter->temp_dcb_cfg.bcn.bcna_option[0] !=
+			adapter->dcb_cfg.bcn.bcna_option[0])
+			adapter->dcb_set_bitmap |= BIT_BCN;
+		break;
+	case DCB_BCN_ATTR_BCNA_1:
+		adapter->temp_dcb_cfg.bcn.bcna_option[1] = setting;
+		if (adapter->temp_dcb_cfg.bcn.bcna_option[1] !=
+			adapter->dcb_cfg.bcn.bcna_option[1])
+			adapter->dcb_set_bitmap |= BIT_BCN;
+		break;
 	case DCB_BCN_ATTR_ALPHA:
 		adapter->temp_dcb_cfg.bcn.rp_alpha = setting;
 		if (adapter->temp_dcb_cfg.bcn.rp_alpha !=
diff --git a/include/linux/dcbnl.h b/include/linux/dcbnl.h
index e73a61449ad6..b0ef274e0031 100644
--- a/include/linux/dcbnl.h
+++ b/include/linux/dcbnl.h
@@ -305,6 +305,8 @@ enum dcbnl_bcn_attrs{
 	DCB_BCN_ATTR_RP_7,
 	DCB_BCN_ATTR_RP_ALL,
 
+	DCB_BCN_ATTR_BCNA_0,
+	DCB_BCN_ATTR_BCNA_1,
 	DCB_BCN_ATTR_ALPHA,
 	DCB_BCN_ATTR_BETA,
 	DCB_BCN_ATTR_GD,
diff --git a/net/dcb/dcbnl.c b/net/dcb/dcbnl.c
index fc88fc4d4f63..5dbfe5fdc0d6 100644
--- a/net/dcb/dcbnl.c
+++ b/net/dcb/dcbnl.c
@@ -140,6 +140,8 @@ static struct nla_policy dcbnl_bcn_nest[DCB_BCN_ATTR_MAX + 1] = {
 	[DCB_BCN_ATTR_RP_6]         = {.type = NLA_U8},
 	[DCB_BCN_ATTR_RP_7]         = {.type = NLA_U8},
 	[DCB_BCN_ATTR_RP_ALL]       = {.type = NLA_FLAG},
+	[DCB_BCN_ATTR_BCNA_0]       = {.type = NLA_U32},
+	[DCB_BCN_ATTR_BCNA_1]       = {.type = NLA_U32},
 	[DCB_BCN_ATTR_ALPHA]        = {.type = NLA_U32},
 	[DCB_BCN_ATTR_BETA]         = {.type = NLA_U32},
 	[DCB_BCN_ATTR_GD]           = {.type = NLA_U32},
@@ -922,7 +924,7 @@ static int dcbnl_bcn_getcfg(struct net_device *netdev, struct nlattr **tb,
 			goto err_bcn;
 	}
 
-	for (i = DCB_BCN_ATTR_ALPHA; i <= DCB_BCN_ATTR_RI; i++) {
+	for (i = DCB_BCN_ATTR_BCNA_0; i <= DCB_BCN_ATTR_RI; i++) {
 		if (!getall && !bcn_tb[i])
 			continue;
 
@@ -980,7 +982,7 @@ static int dcbnl_bcn_setcfg(struct net_device *netdev, struct nlattr **tb,
 			data[i]->nla_type - DCB_BCN_ATTR_RP_0, value_byte);
 	}
 
-	for (i = DCB_BCN_ATTR_ALPHA; i <= DCB_BCN_ATTR_RI; i++) {
+	for (i = DCB_BCN_ATTR_BCNA_0; i <= DCB_BCN_ATTR_RI; i++) {
 		if (data[i] == NULL)
 			continue;
 		value_int = nla_get_u32(data[i]);
-- 
cgit v1.2.3


From 209aa4fdc39eacc145a7f9c32a4b9ffcc68912c6 Mon Sep 17 00:00:00 2001
From: Paul Mundt <lethal@linux-sh.org>
Date: Fri, 12 Dec 2008 16:35:40 +0900
Subject: fb: SH-5 uses __raw I/O accessors now also, drop the special casing.

Signed-off-by: Paul Mundt <lethal@linux-sh.org>
---
 include/linux/fb.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/fb.h b/include/linux/fb.h
index 75a81eaf3430..1ee63df5be92 100644
--- a/include/linux/fb.h
+++ b/include/linux/fb.h
@@ -888,7 +888,7 @@ struct fb_info {
 #define fb_writeq sbus_writeq
 #define fb_memset sbus_memset_io
 
-#elif defined(__i386__) || defined(__alpha__) || defined(__x86_64__) || defined(__hppa__) || (defined(__sh__) && !defined(__SH5__)) || defined(__powerpc__) || defined(__avr32__)
+#elif defined(__i386__) || defined(__alpha__) || defined(__x86_64__) || defined(__hppa__) || defined(__sh__) || defined(__powerpc__) || defined(__avr32__)
 
 #define fb_readb __raw_readb
 #define fb_readw __raw_readw
-- 
cgit v1.2.3


From 8564557a03c12adb9c4b76ae1e86db4113a04d13 Mon Sep 17 00:00:00 2001
From: Magnus Damm <damm@igel.co.jp>
Date: Fri, 19 Dec 2008 15:34:41 +0900
Subject: video: sh_mobile_lcdcfb deferred io support

This patch adds sh_mobile_lcdcfb deferred io support for SYS panels.

The LCDC hardware block managed by the sh_mobile_lcdcfb driver supports
RGB or SYS panel configurations. SYS panels come with an external display
controller that is resposible for refreshing the actual LCD panel. RGB
panels are controlled directly by the LCDC and they need to be refreshed
by the LCDC hardware.

In the case of SYS panels we can save some power by configuring the LCDC
hardware block in one-shot mode. In this one-shot mode panel refresh is
managed by software. This works well together with deferred io since it
allows us to stop clocks for most of the time and only enable clocks when
we actually want to trigger an update. When there is no fbdev activity
the clocks are kept stopped which allows us to deep sleep.

The refresh rate in deferred io mode is set using platform data. The same
platform data can also be used to disable deferred io mode.

As with other deferred io frame buffers user space code should use fsync()
on the frame buffer device to trigger an update.

Signed-off-by: Magnus Damm <damm@igel.co.jp>
Signed-off-by: Paul Mundt <lethal@linux-sh.org>
---
 drivers/video/Kconfig            |   1 +
 drivers/video/sh_mobile_lcdcfb.c | 170 ++++++++++++++++++++++++++++++++++-----
 include/video/sh_mobile_lcdc.h   |   1 +
 3 files changed, 150 insertions(+), 22 deletions(-)

(limited to 'include')

diff --git a/drivers/video/Kconfig b/drivers/video/Kconfig
index dd483bfe3951..d0c821992a99 100644
--- a/drivers/video/Kconfig
+++ b/drivers/video/Kconfig
@@ -1893,6 +1893,7 @@ config FB_SH_MOBILE_LCDC
 	select FB_SYS_COPYAREA
 	select FB_SYS_IMAGEBLIT
 	select FB_SYS_FOPS
+	select FB_DEFERRED_IO
 	---help---
 	  Frame buffer driver for the on-chip SH-Mobile LCD controller.
 
diff --git a/drivers/video/sh_mobile_lcdcfb.c b/drivers/video/sh_mobile_lcdcfb.c
index e339d829183c..0e2b8fd24df1 100644
--- a/drivers/video/sh_mobile_lcdcfb.c
+++ b/drivers/video/sh_mobile_lcdcfb.c
@@ -16,7 +16,9 @@
 #include <linux/clk.h>
 #include <linux/platform_device.h>
 #include <linux/dma-mapping.h>
+#include <linux/interrupt.h>
 #include <video/sh_mobile_lcdc.h>
+#include <asm/atomic.h>
 
 #define PALETTE_NR 16
 
@@ -30,11 +32,14 @@ struct sh_mobile_lcdc_chan {
 	u32 pseudo_palette[PALETTE_NR];
 	struct fb_info info;
 	dma_addr_t dma_handle;
+	struct fb_deferred_io defio;
 };
 
 struct sh_mobile_lcdc_priv {
 	void __iomem *base;
+	int irq;
 #ifdef CONFIG_HAVE_CLK
+	atomic_t clk_usecnt;
 	struct clk *dot_clk;
 	struct clk *clk;
 #endif
@@ -57,7 +62,7 @@ struct sh_mobile_lcdc_priv {
 
 /* per-channel registers */
 enum { LDDCKPAT1R, LDDCKPAT2R, LDMT1R, LDMT2R, LDMT3R, LDDFR, LDSM1R,
-       LDSA1R, LDMLSR, LDHCNR, LDHSYNR, LDVLNR, LDVSYNR, LDPMR };
+       LDSM2R, LDSA1R, LDMLSR, LDHCNR, LDHSYNR, LDVLNR, LDVSYNR, LDPMR };
 
 static unsigned long lcdc_offs_mainlcd[] = {
 	[LDDCKPAT1R] = 0x400,
@@ -67,6 +72,7 @@ static unsigned long lcdc_offs_mainlcd[] = {
 	[LDMT3R] = 0x420,
 	[LDDFR] = 0x424,
 	[LDSM1R] = 0x428,
+	[LDSM2R] = 0x42c,
 	[LDSA1R] = 0x430,
 	[LDMLSR] = 0x438,
 	[LDHCNR] = 0x448,
@@ -84,6 +90,7 @@ static unsigned long lcdc_offs_sublcd[] = {
 	[LDMT3R] = 0x608,
 	[LDDFR] = 0x60c,
 	[LDSM1R] = 0x610,
+	[LDSM2R] = 0x614,
 	[LDSA1R] = 0x618,
 	[LDMLSR] = 0x620,
 	[LDHCNR] = 0x624,
@@ -97,6 +104,8 @@ static unsigned long lcdc_offs_sublcd[] = {
 #define LCDC_RESET	0x00000100
 #define DISPLAY_BEU	0x00000008
 #define LCDC_ENABLE	0x00000001
+#define LDINTR_FE	0x00000400
+#define LDINTR_FS	0x00000004
 
 static void lcdc_write_chan(struct sh_mobile_lcdc_chan *chan,
 			    int reg_nr, unsigned long data)
@@ -171,6 +180,65 @@ struct sh_mobile_lcdc_sys_bus_ops sh_mobile_lcdc_sys_bus_ops = {
 	lcdc_sys_read_data,
 };
 
+#ifdef CONFIG_HAVE_CLK
+static void sh_mobile_lcdc_clk_on(struct sh_mobile_lcdc_priv *priv)
+{
+	if (atomic_inc_and_test(&priv->clk_usecnt)) {
+		clk_enable(priv->clk);
+		if (priv->dot_clk)
+			clk_enable(priv->dot_clk);
+	}
+}
+
+static void sh_mobile_lcdc_clk_off(struct sh_mobile_lcdc_priv *priv)
+{
+	if (atomic_sub_return(1, &priv->clk_usecnt) == -1) {
+		if (priv->dot_clk)
+			clk_disable(priv->dot_clk);
+		clk_disable(priv->clk);
+	}
+}
+#else
+static void sh_mobile_lcdc_clk_on(struct sh_mobile_lcdc_priv *priv) {}
+static void sh_mobile_lcdc_clk_off(struct sh_mobile_lcdc_priv *priv) {}
+#endif
+
+static void sh_mobile_lcdc_deferred_io(struct fb_info *info,
+				       struct list_head *pagelist)
+{
+	struct sh_mobile_lcdc_chan *ch = info->par;
+
+	/* enable clocks before accessing hardware */
+	sh_mobile_lcdc_clk_on(ch->lcdc);
+
+	/* trigger panel update */
+	lcdc_write_chan(ch, LDSM2R, 1);
+}
+
+static void sh_mobile_lcdc_deferred_io_touch(struct fb_info *info)
+{
+	struct fb_deferred_io *fbdefio = info->fbdefio;
+
+	if (fbdefio)
+		schedule_delayed_work(&info->deferred_work, fbdefio->delay);
+}
+
+static irqreturn_t sh_mobile_lcdc_irq(int irq, void *data)
+{
+	struct sh_mobile_lcdc_priv *priv = data;
+	unsigned long tmp;
+
+	/* acknowledge interrupt */
+	tmp = lcdc_read(priv, _LDINTR);
+	tmp &= 0xffffff00; /* mask in high 24 bits */
+	tmp |= 0x000000ff ^ LDINTR_FS; /* status in low 8 */
+	lcdc_write(priv, _LDINTR, tmp);
+
+	/* disable clocks */
+	sh_mobile_lcdc_clk_off(priv);
+	return IRQ_HANDLED;
+}
+
 static void sh_mobile_lcdc_start_stop(struct sh_mobile_lcdc_priv *priv,
 				      int start)
 {
@@ -208,11 +276,11 @@ static int sh_mobile_lcdc_start(struct sh_mobile_lcdc_priv *priv)
 	int k, m;
 	int ret = 0;
 
-#ifdef CONFIG_HAVE_CLK
-	clk_enable(priv->clk);
-	if (priv->dot_clk)
-		clk_enable(priv->dot_clk);
-#endif
+	/* enable clocks before accessing the hardware */
+	for (k = 0; k < ARRAY_SIZE(priv->ch); k++)
+		if (priv->ch[k].enabled)
+			sh_mobile_lcdc_clk_on(priv);
+
 	/* reset */
 	lcdc_write(priv, _LDCNT2R, lcdc_read(priv, _LDCNT2R) | LCDC_RESET);
 	lcdc_wait_bit(priv, _LDCNT2R, LCDC_RESET, 0);
@@ -255,7 +323,7 @@ static int sh_mobile_lcdc_start(struct sh_mobile_lcdc_priv *priv)
 	lcdc_write(priv, _LDDCKSTPR, 0);
 	lcdc_wait_bit(priv, _LDDCKSTPR, ~0, 0);
 
-	/* interrupts are disabled */
+	/* interrupts are disabled to begin with */
 	lcdc_write(priv, _LDINTR, 0);
 
 	for (k = 0; k < ARRAY_SIZE(priv->ch); k++) {
@@ -316,9 +384,6 @@ static int sh_mobile_lcdc_start(struct sh_mobile_lcdc_priv *priv)
 			return ret;
 	}
 
-	/* --- display_lcdc_data() --- */
-	lcdc_write(priv, _LDINTR, 0x00000f00);
-
 	/* word and long word swap */
 	lcdc_write(priv, _LDDDSR, lcdc_read(priv, _LDDDSR) | 6);
 
@@ -340,8 +405,24 @@ static int sh_mobile_lcdc_start(struct sh_mobile_lcdc_priv *priv)
 		/* set line size */
 		lcdc_write_chan(ch, LDMLSR, ch->info.fix.line_length);
 
-		/* continuous read mode */
-		lcdc_write_chan(ch, LDSM1R, 0);
+		/* setup deferred io if SYS bus */
+		tmp = ch->cfg.sys_bus_cfg.deferred_io_msec;
+		if (ch->ldmt1r_value & (1 << 12) && tmp) {
+			ch->defio.deferred_io = sh_mobile_lcdc_deferred_io;
+			ch->defio.delay = msecs_to_jiffies(tmp);
+			ch->info.fbdefio = &ch->defio;
+			fb_deferred_io_init(&ch->info);
+
+			/* one-shot mode */
+			lcdc_write_chan(ch, LDSM1R, 1);
+
+			/* enable "Frame End Interrupt Enable" bit */
+			lcdc_write(priv, _LDINTR, LDINTR_FE);
+
+		} else {
+			/* continuous read mode */
+			lcdc_write_chan(ch, LDSM1R, 0);
+		}
 	}
 
 	/* display output */
@@ -365,6 +446,7 @@ static void sh_mobile_lcdc_stop(struct sh_mobile_lcdc_priv *priv)
 {
 	struct sh_mobile_lcdc_chan *ch;
 	struct sh_mobile_lcdc_board_cfg	*board_cfg;
+	unsigned long tmp;
 	int k;
 
 	/* tell the board code to disable the panel */
@@ -373,16 +455,22 @@ static void sh_mobile_lcdc_stop(struct sh_mobile_lcdc_priv *priv)
 		board_cfg = &ch->cfg.board_cfg;
 		if (board_cfg->display_off)
 			board_cfg->display_off(board_cfg->board_data);
+
+		/* cleanup deferred io if SYS bus */
+		tmp = ch->cfg.sys_bus_cfg.deferred_io_msec;
+		if (ch->ldmt1r_value & (1 << 12) && tmp) {
+			fb_deferred_io_cleanup(&ch->info);
+			ch->info.fbdefio = NULL;
+		}
 	}
 
 	/* stop the lcdc */
 	sh_mobile_lcdc_start_stop(priv, 0);
 
-#ifdef CONFIG_HAVE_CLK
-	if (priv->dot_clk)
-		clk_disable(priv->dot_clk);
-	clk_disable(priv->clk);
-#endif
+	/* stop clocks */
+	for (k = 0; k < ARRAY_SIZE(priv->ch); k++)
+		if (priv->ch[k].enabled)
+			sh_mobile_lcdc_clk_off(priv);
 }
 
 static int sh_mobile_lcdc_check_interface(struct sh_mobile_lcdc_chan *ch)
@@ -446,6 +534,7 @@ static int sh_mobile_lcdc_setup_clocks(struct platform_device *pdev,
 	priv->lddckr = icksel << 16;
 
 #ifdef CONFIG_HAVE_CLK
+	atomic_set(&priv->clk_usecnt, -1);
 	snprintf(clk_name, sizeof(clk_name), "lcdc%d", pdev->id);
 	priv->clk = clk_get(&pdev->dev, clk_name);
 	if (IS_ERR(priv->clk)) {
@@ -497,13 +586,34 @@ static struct fb_fix_screeninfo sh_mobile_lcdc_fix  = {
 	.accel =	FB_ACCEL_NONE,
 };
 
+static void sh_mobile_lcdc_fillrect(struct fb_info *info,
+				    const struct fb_fillrect *rect)
+{
+	sys_fillrect(info, rect);
+	sh_mobile_lcdc_deferred_io_touch(info);
+}
+
+static void sh_mobile_lcdc_copyarea(struct fb_info *info,
+				    const struct fb_copyarea *area)
+{
+	sys_copyarea(info, area);
+	sh_mobile_lcdc_deferred_io_touch(info);
+}
+
+static void sh_mobile_lcdc_imageblit(struct fb_info *info,
+				     const struct fb_image *image)
+{
+	sys_imageblit(info, image);
+	sh_mobile_lcdc_deferred_io_touch(info);
+}
+
 static struct fb_ops sh_mobile_lcdc_ops = {
 	.fb_setcolreg	= sh_mobile_lcdc_setcolreg,
 	.fb_read        = fb_sys_read,
 	.fb_write       = fb_sys_write,
-	.fb_fillrect	= sys_fillrect,
-	.fb_copyarea	= sys_copyarea,
-	.fb_imageblit	= sys_imageblit,
+	.fb_fillrect	= sh_mobile_lcdc_fillrect,
+	.fb_copyarea	= sh_mobile_lcdc_copyarea,
+	.fb_imageblit	= sh_mobile_lcdc_imageblit,
 };
 
 static int sh_mobile_lcdc_set_bpp(struct fb_var_screeninfo *var, int bpp)
@@ -564,8 +674,9 @@ static int __init sh_mobile_lcdc_probe(struct platform_device *pdev)
 	}
 
 	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
-	if (res == NULL) {
-		dev_err(&pdev->dev, "cannot find IO resource\n");
+	i = platform_get_irq(pdev, 0);
+	if (!res || i < 0) {
+		dev_err(&pdev->dev, "cannot get platform resources\n");
 		error = -ENOENT;
 		goto err0;
 	}
@@ -577,6 +688,14 @@ static int __init sh_mobile_lcdc_probe(struct platform_device *pdev)
 		goto err0;
 	}
 
+	error = request_irq(i, sh_mobile_lcdc_irq, IRQF_DISABLED,
+			    pdev->dev.bus_id, priv);
+	if (error) {
+		dev_err(&pdev->dev, "unable to request irq\n");
+		goto err1;
+	}
+
+	priv->irq = i;
 	platform_set_drvdata(pdev, priv);
 	pdata = pdev->dev.platform_data;
 
@@ -660,6 +779,7 @@ static int __init sh_mobile_lcdc_probe(struct platform_device *pdev)
 		info->fix.smem_start = priv->ch[i].dma_handle;
 		info->screen_base = buf;
 		info->device = &pdev->dev;
+		info->par = &priv->ch[i];
 	}
 
 	if (error)
@@ -687,6 +807,10 @@ static int __init sh_mobile_lcdc_probe(struct platform_device *pdev)
 			 (int) priv->ch[i].cfg.lcd_cfg.xres,
 			 (int) priv->ch[i].cfg.lcd_cfg.yres,
 			 priv->ch[i].cfg.bpp);
+
+		/* deferred io mode: disable clock to save power */
+		if (info->fbdefio)
+			sh_mobile_lcdc_clk_off(priv);
 	}
 
 	return 0;
@@ -728,6 +852,8 @@ static int sh_mobile_lcdc_remove(struct platform_device *pdev)
 	if (priv->base)
 		iounmap(priv->base);
 
+	if (priv->irq)
+		free_irq(priv->irq, priv);
 	kfree(priv);
 	return 0;
 }
diff --git a/include/video/sh_mobile_lcdc.h b/include/video/sh_mobile_lcdc.h
index 1a4bc6ada606..25144ab22b95 100644
--- a/include/video/sh_mobile_lcdc.h
+++ b/include/video/sh_mobile_lcdc.h
@@ -37,6 +37,7 @@ enum { LCDC_CLK_BUS, LCDC_CLK_PERIPHERAL, LCDC_CLK_EXTERNAL };
 struct sh_mobile_lcdc_sys_bus_cfg {
 	unsigned long ldmt2r;
 	unsigned long ldmt3r;
+	unsigned long deferred_io_msec;
 };
 
 struct sh_mobile_lcdc_sys_bus_ops {
-- 
cgit v1.2.3


From b8dd786f9417e5885929bfe33a235c76a9c1c569 Mon Sep 17 00:00:00 2001
From: Yevgeny Petrilin <yevgenyp@mellanox.co.il>
Date: Mon, 22 Dec 2008 07:15:03 -0800
Subject: mlx4_core: Add support for multiple completion event vectors

When using MSI-X mode, create a completion event queue for each CPU.
Report the number of completion EQs in a new struct mlx4_caps member,
num_comp_vectors, and extend the mlx4_cq_alloc() interface with a
vector parameter so that consumers can specify which completion EQ
should be used to report events for the CQ being created.

Signed-off-by: Yevgeny Petrilin <yevgenyp@mellanox.co.il>
Signed-off-by: Roland Dreier <rolandd@cisco.com>
---
 drivers/infiniband/hw/mlx4/cq.c   |   2 +-
 drivers/infiniband/hw/mlx4/main.c |   2 +-
 drivers/net/mlx4/cq.c             |  11 +++-
 drivers/net/mlx4/en_cq.c          |   9 ++-
 drivers/net/mlx4/en_main.c        |   4 +-
 drivers/net/mlx4/eq.c             | 117 ++++++++++++++++++++++++++++----------
 drivers/net/mlx4/main.c           |  53 ++++++++++++-----
 drivers/net/mlx4/mlx4.h           |  14 ++---
 drivers/net/mlx4/profile.c        |   4 +-
 include/linux/mlx4/device.h       |   4 +-
 10 files changed, 157 insertions(+), 63 deletions(-)

(limited to 'include')

diff --git a/drivers/infiniband/hw/mlx4/cq.c b/drivers/infiniband/hw/mlx4/cq.c
index 18308494a195..2198753bf13d 100644
--- a/drivers/infiniband/hw/mlx4/cq.c
+++ b/drivers/infiniband/hw/mlx4/cq.c
@@ -222,7 +222,7 @@ struct ib_cq *mlx4_ib_create_cq(struct ib_device *ibdev, int entries, int vector
 	}
 
 	err = mlx4_cq_alloc(dev->dev, entries, &cq->buf.mtt, uar,
-			    cq->db.dma, &cq->mcq, 0);
+			    cq->db.dma, &cq->mcq, vector, 0);
 	if (err)
 		goto err_dbmap;
 
diff --git a/drivers/infiniband/hw/mlx4/main.c b/drivers/infiniband/hw/mlx4/main.c
index 2e80f8f47b02..dcefe1fceb5c 100644
--- a/drivers/infiniband/hw/mlx4/main.c
+++ b/drivers/infiniband/hw/mlx4/main.c
@@ -578,7 +578,7 @@ static void *mlx4_ib_add(struct mlx4_dev *dev)
 	mlx4_foreach_port(i, dev, MLX4_PORT_TYPE_IB)
 		ibdev->num_ports++;
 	ibdev->ib_dev.phys_port_cnt     = ibdev->num_ports;
-	ibdev->ib_dev.num_comp_vectors	= 1;
+	ibdev->ib_dev.num_comp_vectors	= dev->caps.num_comp_vectors;
 	ibdev->ib_dev.dma_device	= &dev->pdev->dev;
 
 	ibdev->ib_dev.uverbs_abi_ver	= MLX4_IB_UVERBS_ABI_VERSION;
diff --git a/drivers/net/mlx4/cq.c b/drivers/net/mlx4/cq.c
index b7ad2829d67e..ac57b6a42c6e 100644
--- a/drivers/net/mlx4/cq.c
+++ b/drivers/net/mlx4/cq.c
@@ -189,7 +189,7 @@ EXPORT_SYMBOL_GPL(mlx4_cq_resize);
 
 int mlx4_cq_alloc(struct mlx4_dev *dev, int nent, struct mlx4_mtt *mtt,
 		  struct mlx4_uar *uar, u64 db_rec, struct mlx4_cq *cq,
-		  int collapsed)
+		  unsigned vector, int collapsed)
 {
 	struct mlx4_priv *priv = mlx4_priv(dev);
 	struct mlx4_cq_table *cq_table = &priv->cq_table;
@@ -198,6 +198,11 @@ int mlx4_cq_alloc(struct mlx4_dev *dev, int nent, struct mlx4_mtt *mtt,
 	u64 mtt_addr;
 	int err;
 
+	if (vector >= dev->caps.num_comp_vectors)
+		return -EINVAL;
+
+	cq->vector = vector;
+
 	cq->cqn = mlx4_bitmap_alloc(&cq_table->bitmap);
 	if (cq->cqn == -1)
 		return -ENOMEM;
@@ -227,7 +232,7 @@ int mlx4_cq_alloc(struct mlx4_dev *dev, int nent, struct mlx4_mtt *mtt,
 
 	cq_context->flags	    = cpu_to_be32(!!collapsed << 18);
 	cq_context->logsize_usrpage = cpu_to_be32((ilog2(nent) << 24) | uar->index);
-	cq_context->comp_eqn        = priv->eq_table.eq[MLX4_EQ_COMP].eqn;
+	cq_context->comp_eqn	    = priv->eq_table.eq[vector].eqn;
 	cq_context->log_page_size   = mtt->page_shift - MLX4_ICM_PAGE_SHIFT;
 
 	mtt_addr = mlx4_mtt_addr(dev, mtt);
@@ -276,7 +281,7 @@ void mlx4_cq_free(struct mlx4_dev *dev, struct mlx4_cq *cq)
 	if (err)
 		mlx4_warn(dev, "HW2SW_CQ failed (%d) for CQN %06x\n", err, cq->cqn);
 
-	synchronize_irq(priv->eq_table.eq[MLX4_EQ_COMP].irq);
+	synchronize_irq(priv->eq_table.eq[cq->vector].irq);
 
 	spin_lock_irq(&cq_table->lock);
 	radix_tree_delete(&cq_table->tree, cq->cqn);
diff --git a/drivers/net/mlx4/en_cq.c b/drivers/net/mlx4/en_cq.c
index 1368a8010af4..674f836e225b 100644
--- a/drivers/net/mlx4/en_cq.c
+++ b/drivers/net/mlx4/en_cq.c
@@ -51,10 +51,13 @@ int mlx4_en_create_cq(struct mlx4_en_priv *priv,
 	int err;
 
 	cq->size = entries;
-	if (mode == RX)
+	if (mode == RX) {
 		cq->buf_size = cq->size * sizeof(struct mlx4_cqe);
-	else
+		cq->vector   = ring % mdev->dev->caps.num_comp_vectors;
+	} else {
 		cq->buf_size = sizeof(struct mlx4_cqe);
+		cq->vector   = 0;
+	}
 
 	cq->ring = ring;
 	cq->is_tx = mode;
@@ -86,7 +89,7 @@ int mlx4_en_activate_cq(struct mlx4_en_priv *priv, struct mlx4_en_cq *cq)
 	memset(cq->buf, 0, cq->buf_size);
 
 	err = mlx4_cq_alloc(mdev->dev, cq->size, &cq->wqres.mtt, &mdev->priv_uar,
-			    cq->wqres.db.dma, &cq->mcq, cq->is_tx);
+			    cq->wqres.db.dma, &cq->mcq, cq->vector, cq->is_tx);
 	if (err)
 		return err;
 
diff --git a/drivers/net/mlx4/en_main.c b/drivers/net/mlx4/en_main.c
index 4b9794e97a79..c1c05852a95e 100644
--- a/drivers/net/mlx4/en_main.c
+++ b/drivers/net/mlx4/en_main.c
@@ -170,9 +170,9 @@ static void *mlx4_en_add(struct mlx4_dev *dev)
 		mlx4_info(mdev, "Using %d tx rings for port:%d\n",
 			  mdev->profile.prof[i].tx_ring_num, i);
 		if (!mdev->profile.prof[i].rx_ring_num) {
-			mdev->profile.prof[i].rx_ring_num = 1;
+			mdev->profile.prof[i].rx_ring_num = dev->caps.num_comp_vectors;
 			mlx4_info(mdev, "Defaulting to %d rx rings for port:%d\n",
-				  1, i);
+				  mdev->profile.prof[i].rx_ring_num, i);
 		} else
 			mlx4_info(mdev, "Using %d rx rings for port:%d\n",
 				  mdev->profile.prof[i].rx_ring_num, i);
diff --git a/drivers/net/mlx4/eq.c b/drivers/net/mlx4/eq.c
index de169338cd90..5d867ebe6a4d 100644
--- a/drivers/net/mlx4/eq.c
+++ b/drivers/net/mlx4/eq.c
@@ -266,7 +266,7 @@ static irqreturn_t mlx4_interrupt(int irq, void *dev_ptr)
 
 	writel(priv->eq_table.clr_mask, priv->eq_table.clr_int);
 
-	for (i = 0; i < MLX4_NUM_EQ; ++i)
+	for (i = 0; i < dev->caps.num_comp_vectors + 1; ++i)
 		work |= mlx4_eq_int(dev, &priv->eq_table.eq[i]);
 
 	return IRQ_RETVAL(work);
@@ -304,6 +304,17 @@ static int mlx4_HW2SW_EQ(struct mlx4_dev *dev, struct mlx4_cmd_mailbox *mailbox,
 			    MLX4_CMD_TIME_CLASS_A);
 }
 
+static int mlx4_num_eq_uar(struct mlx4_dev *dev)
+{
+	/*
+	 * Each UAR holds 4 EQ doorbells.  To figure out how many UARs
+	 * we need to map, take the difference of highest index and
+	 * the lowest index we'll use and add 1.
+	 */
+	return (dev->caps.num_comp_vectors + 1 + dev->caps.reserved_eqs) / 4 -
+		dev->caps.reserved_eqs / 4 + 1;
+}
+
 static void __iomem *mlx4_get_eq_uar(struct mlx4_dev *dev, struct mlx4_eq *eq)
 {
 	struct mlx4_priv *priv = mlx4_priv(dev);
@@ -483,9 +494,11 @@ static void mlx4_free_irqs(struct mlx4_dev *dev)
 
 	if (eq_table->have_irq)
 		free_irq(dev->pdev->irq, dev);
-	for (i = 0; i < MLX4_NUM_EQ; ++i)
+	for (i = 0; i < dev->caps.num_comp_vectors + 1; ++i)
 		if (eq_table->eq[i].have_irq)
 			free_irq(eq_table->eq[i].irq, eq_table->eq + i);
+
+	kfree(eq_table->irq_names);
 }
 
 static int mlx4_map_clr_int(struct mlx4_dev *dev)
@@ -551,57 +564,93 @@ void mlx4_unmap_eq_icm(struct mlx4_dev *dev)
 	__free_page(priv->eq_table.icm_page);
 }
 
+int mlx4_alloc_eq_table(struct mlx4_dev *dev)
+{
+	struct mlx4_priv *priv = mlx4_priv(dev);
+
+	priv->eq_table.eq = kcalloc(dev->caps.num_eqs - dev->caps.reserved_eqs,
+				    sizeof *priv->eq_table.eq, GFP_KERNEL);
+	if (!priv->eq_table.eq)
+		return -ENOMEM;
+
+	return 0;
+}
+
+void mlx4_free_eq_table(struct mlx4_dev *dev)
+{
+	kfree(mlx4_priv(dev)->eq_table.eq);
+}
+
 int mlx4_init_eq_table(struct mlx4_dev *dev)
 {
 	struct mlx4_priv *priv = mlx4_priv(dev);
 	int err;
 	int i;
 
+	priv->eq_table.uar_map = kcalloc(sizeof *priv->eq_table.uar_map,
+					 mlx4_num_eq_uar(dev), GFP_KERNEL);
+	if (!priv->eq_table.uar_map) {
+		err = -ENOMEM;
+		goto err_out_free;
+	}
+
 	err = mlx4_bitmap_init(&priv->eq_table.bitmap, dev->caps.num_eqs,
 			       dev->caps.num_eqs - 1, dev->caps.reserved_eqs, 0);
 	if (err)
-		return err;
+		goto err_out_free;
 
-	for (i = 0; i < ARRAY_SIZE(priv->eq_table.uar_map); ++i)
+	for (i = 0; i < mlx4_num_eq_uar(dev); ++i)
 		priv->eq_table.uar_map[i] = NULL;
 
 	err = mlx4_map_clr_int(dev);
 	if (err)
-		goto err_out_free;
+		goto err_out_bitmap;
 
 	priv->eq_table.clr_mask =
 		swab32(1 << (priv->eq_table.inta_pin & 31));
 	priv->eq_table.clr_int  = priv->clr_base +
 		(priv->eq_table.inta_pin < 32 ? 4 : 0);
 
-	err = mlx4_create_eq(dev, dev->caps.num_cqs + MLX4_NUM_SPARE_EQE,
-			     (dev->flags & MLX4_FLAG_MSI_X) ? MLX4_EQ_COMP : 0,
-			     &priv->eq_table.eq[MLX4_EQ_COMP]);
-	if (err)
-		goto err_out_unmap;
+	priv->eq_table.irq_names = kmalloc(16 * dev->caps.num_comp_vectors, GFP_KERNEL);
+	if (!priv->eq_table.irq_names) {
+		err = -ENOMEM;
+		goto err_out_bitmap;
+	}
+
+	for (i = 0; i < dev->caps.num_comp_vectors; ++i) {
+		err = mlx4_create_eq(dev, dev->caps.num_cqs + MLX4_NUM_SPARE_EQE,
+				     (dev->flags & MLX4_FLAG_MSI_X) ? i : 0,
+				     &priv->eq_table.eq[i]);
+		if (err)
+			goto err_out_unmap;
+	}
 
 	err = mlx4_create_eq(dev, MLX4_NUM_ASYNC_EQE + MLX4_NUM_SPARE_EQE,
-			     (dev->flags & MLX4_FLAG_MSI_X) ? MLX4_EQ_ASYNC : 0,
-			     &priv->eq_table.eq[MLX4_EQ_ASYNC]);
+			     (dev->flags & MLX4_FLAG_MSI_X) ? dev->caps.num_comp_vectors : 0,
+			     &priv->eq_table.eq[dev->caps.num_comp_vectors]);
 	if (err)
 		goto err_out_comp;
 
 	if (dev->flags & MLX4_FLAG_MSI_X) {
-		static const char *eq_name[] = {
-			[MLX4_EQ_COMP]  = DRV_NAME " (comp)",
-			[MLX4_EQ_ASYNC] = DRV_NAME " (async)"
-		};
+		static const char async_eq_name[] = "mlx4-async";
+		const char *eq_name;
+
+		for (i = 0; i < dev->caps.num_comp_vectors + 1; ++i) {
+			if (i < dev->caps.num_comp_vectors) {
+				snprintf(priv->eq_table.irq_names + i * 16, 16,
+					 "mlx4-comp-%d", i);
+				eq_name = priv->eq_table.irq_names + i * 16;
+			} else
+				eq_name = async_eq_name;
 
-		for (i = 0; i < MLX4_NUM_EQ; ++i) {
 			err = request_irq(priv->eq_table.eq[i].irq,
-					  mlx4_msi_x_interrupt,
-					  0, eq_name[i], priv->eq_table.eq + i);
+					  mlx4_msi_x_interrupt, 0, eq_name,
+					  priv->eq_table.eq + i);
 			if (err)
 				goto err_out_async;
 
 			priv->eq_table.eq[i].have_irq = 1;
 		}
-
 	} else {
 		err = request_irq(dev->pdev->irq, mlx4_interrupt,
 				  IRQF_SHARED, DRV_NAME, dev);
@@ -612,28 +661,36 @@ int mlx4_init_eq_table(struct mlx4_dev *dev)
 	}
 
 	err = mlx4_MAP_EQ(dev, MLX4_ASYNC_EVENT_MASK, 0,
-			  priv->eq_table.eq[MLX4_EQ_ASYNC].eqn);
+			  priv->eq_table.eq[dev->caps.num_comp_vectors].eqn);
 	if (err)
 		mlx4_warn(dev, "MAP_EQ for async EQ %d failed (%d)\n",
-			   priv->eq_table.eq[MLX4_EQ_ASYNC].eqn, err);
+			   priv->eq_table.eq[dev->caps.num_comp_vectors].eqn, err);
 
-	for (i = 0; i < MLX4_NUM_EQ; ++i)
+	for (i = 0; i < dev->caps.num_comp_vectors + 1; ++i)
 		eq_set_ci(&priv->eq_table.eq[i], 1);
 
 	return 0;
 
 err_out_async:
-	mlx4_free_eq(dev, &priv->eq_table.eq[MLX4_EQ_ASYNC]);
+	mlx4_free_eq(dev, &priv->eq_table.eq[dev->caps.num_comp_vectors]);
 
 err_out_comp:
-	mlx4_free_eq(dev, &priv->eq_table.eq[MLX4_EQ_COMP]);
+	i = dev->caps.num_comp_vectors - 1;
 
 err_out_unmap:
+	while (i >= 0) {
+		mlx4_free_eq(dev, &priv->eq_table.eq[i]);
+		--i;
+	}
 	mlx4_unmap_clr_int(dev);
 	mlx4_free_irqs(dev);
 
-err_out_free:
+err_out_bitmap:
 	mlx4_bitmap_cleanup(&priv->eq_table.bitmap);
+
+err_out_free:
+	kfree(priv->eq_table.uar_map);
+
 	return err;
 }
 
@@ -643,18 +700,20 @@ void mlx4_cleanup_eq_table(struct mlx4_dev *dev)
 	int i;
 
 	mlx4_MAP_EQ(dev, MLX4_ASYNC_EVENT_MASK, 1,
-		    priv->eq_table.eq[MLX4_EQ_ASYNC].eqn);
+		    priv->eq_table.eq[dev->caps.num_comp_vectors].eqn);
 
 	mlx4_free_irqs(dev);
 
-	for (i = 0; i < MLX4_NUM_EQ; ++i)
+	for (i = 0; i < dev->caps.num_comp_vectors + 1; ++i)
 		mlx4_free_eq(dev, &priv->eq_table.eq[i]);
 
 	mlx4_unmap_clr_int(dev);
 
-	for (i = 0; i < ARRAY_SIZE(priv->eq_table.uar_map); ++i)
+	for (i = 0; i < mlx4_num_eq_uar(dev); ++i)
 		if (priv->eq_table.uar_map[i])
 			iounmap(priv->eq_table.uar_map[i]);
 
 	mlx4_bitmap_cleanup(&priv->eq_table.bitmap);
+
+	kfree(priv->eq_table.uar_map);
 }
diff --git a/drivers/net/mlx4/main.c b/drivers/net/mlx4/main.c
index 90a0281d15ea..710c79e7a2db 100644
--- a/drivers/net/mlx4/main.c
+++ b/drivers/net/mlx4/main.c
@@ -421,9 +421,7 @@ static int mlx4_init_cmpt_table(struct mlx4_dev *dev, u64 cmpt_base,
 				  ((u64) (MLX4_CMPT_TYPE_EQ *
 					  cmpt_entry_sz) << MLX4_CMPT_SHIFT),
 				  cmpt_entry_sz,
-				  roundup_pow_of_two(MLX4_NUM_EQ +
-						     dev->caps.reserved_eqs),
-				  MLX4_NUM_EQ + dev->caps.reserved_eqs, 0, 0);
+				  dev->caps.num_eqs, dev->caps.num_eqs, 0, 0);
 	if (err)
 		goto err_cq;
 
@@ -810,12 +808,12 @@ static int mlx4_setup_hca(struct mlx4_dev *dev)
 		if (dev->flags & MLX4_FLAG_MSI_X) {
 			mlx4_warn(dev, "NOP command failed to generate MSI-X "
 				  "interrupt IRQ %d).\n",
-				  priv->eq_table.eq[MLX4_EQ_ASYNC].irq);
+				  priv->eq_table.eq[dev->caps.num_comp_vectors].irq);
 			mlx4_warn(dev, "Trying again without MSI-X.\n");
 		} else {
 			mlx4_err(dev, "NOP command failed to generate interrupt "
 				 "(IRQ %d), aborting.\n",
-				 priv->eq_table.eq[MLX4_EQ_ASYNC].irq);
+				 priv->eq_table.eq[dev->caps.num_comp_vectors].irq);
 			mlx4_err(dev, "BIOS or ACPI interrupt routing problem?\n");
 		}
 
@@ -908,31 +906,50 @@ err_uar_table_free:
 static void mlx4_enable_msi_x(struct mlx4_dev *dev)
 {
 	struct mlx4_priv *priv = mlx4_priv(dev);
-	struct msix_entry entries[MLX4_NUM_EQ];
+	struct msix_entry *entries;
+	int nreq;
 	int err;
 	int i;
 
 	if (msi_x) {
-		for (i = 0; i < MLX4_NUM_EQ; ++i)
+		nreq = min(dev->caps.num_eqs - dev->caps.reserved_eqs,
+			   num_possible_cpus() + 1);
+		entries = kcalloc(nreq, sizeof *entries, GFP_KERNEL);
+		if (!entries)
+			goto no_msi;
+
+		for (i = 0; i < nreq; ++i)
 			entries[i].entry = i;
 
-		err = pci_enable_msix(dev->pdev, entries, ARRAY_SIZE(entries));
+	retry:
+		err = pci_enable_msix(dev->pdev, entries, nreq);
 		if (err) {
-			if (err > 0)
-				mlx4_info(dev, "Only %d MSI-X vectors available, "
-					  "not using MSI-X\n", err);
+			/* Try again if at least 2 vectors are available */
+			if (err > 1) {
+				mlx4_info(dev, "Requested %d vectors, "
+					  "but only %d MSI-X vectors available, "
+					  "trying again\n", nreq, err);
+				nreq = err;
+				goto retry;
+			}
+
 			goto no_msi;
 		}
 
-		for (i = 0; i < MLX4_NUM_EQ; ++i)
+		dev->caps.num_comp_vectors = nreq - 1;
+		for (i = 0; i < nreq; ++i)
 			priv->eq_table.eq[i].irq = entries[i].vector;
 
 		dev->flags |= MLX4_FLAG_MSI_X;
+
+		kfree(entries);
 		return;
 	}
 
 no_msi:
-	for (i = 0; i < MLX4_NUM_EQ; ++i)
+	dev->caps.num_comp_vectors = 1;
+
+	for (i = 0; i < 2; ++i)
 		priv->eq_table.eq[i].irq = dev->pdev->irq;
 }
 
@@ -1074,6 +1091,10 @@ static int __mlx4_init_one(struct pci_dev *pdev, const struct pci_device_id *id)
 	if (err)
 		goto err_cmd;
 
+	err = mlx4_alloc_eq_table(dev);
+	if (err)
+		goto err_close;
+
 	mlx4_enable_msi_x(dev);
 
 	err = mlx4_setup_hca(dev);
@@ -1084,7 +1105,7 @@ static int __mlx4_init_one(struct pci_dev *pdev, const struct pci_device_id *id)
 	}
 
 	if (err)
-		goto err_close;
+		goto err_free_eq;
 
 	for (port = 1; port <= dev->caps.num_ports; port++) {
 		err = mlx4_init_port_info(dev, port);
@@ -1114,6 +1135,9 @@ err_port:
 	mlx4_cleanup_pd_table(dev);
 	mlx4_cleanup_uar_table(dev);
 
+err_free_eq:
+	mlx4_free_eq_table(dev);
+
 err_close:
 	if (dev->flags & MLX4_FLAG_MSI_X)
 		pci_disable_msix(pdev);
@@ -1177,6 +1201,7 @@ static void mlx4_remove_one(struct pci_dev *pdev)
 		iounmap(priv->kar);
 		mlx4_uar_free(dev, &priv->driver_uar);
 		mlx4_cleanup_uar_table(dev);
+		mlx4_free_eq_table(dev);
 		mlx4_close_hca(dev);
 		mlx4_cmd_cleanup(dev);
 
diff --git a/drivers/net/mlx4/mlx4.h b/drivers/net/mlx4/mlx4.h
index 34c909deaff3..e0213bad61c7 100644
--- a/drivers/net/mlx4/mlx4.h
+++ b/drivers/net/mlx4/mlx4.h
@@ -62,12 +62,6 @@ enum {
 	MLX4_MTT_ENTRY_PER_SEG	= 8
 };
 
-enum {
-	MLX4_EQ_ASYNC,
-	MLX4_EQ_COMP,
-	MLX4_NUM_EQ
-};
-
 enum {
 	MLX4_NUM_PDS		= 1 << 15
 };
@@ -205,10 +199,11 @@ struct mlx4_cq_table {
 
 struct mlx4_eq_table {
 	struct mlx4_bitmap	bitmap;
+	char		       *irq_names;
 	void __iomem	       *clr_int;
-	void __iomem	       *uar_map[(MLX4_NUM_EQ + 6) / 4];
+	void __iomem	      **uar_map;
 	u32			clr_mask;
-	struct mlx4_eq		eq[MLX4_NUM_EQ];
+	struct mlx4_eq	       *eq;
 	u64			icm_virt;
 	struct page	       *icm_page;
 	dma_addr_t		icm_dma;
@@ -328,6 +323,9 @@ void mlx4_bitmap_cleanup(struct mlx4_bitmap *bitmap);
 
 int mlx4_reset(struct mlx4_dev *dev);
 
+int mlx4_alloc_eq_table(struct mlx4_dev *dev);
+void mlx4_free_eq_table(struct mlx4_dev *dev);
+
 int mlx4_init_pd_table(struct mlx4_dev *dev);
 int mlx4_init_uar_table(struct mlx4_dev *dev);
 int mlx4_init_mr_table(struct mlx4_dev *dev);
diff --git a/drivers/net/mlx4/profile.c b/drivers/net/mlx4/profile.c
index 9ca42b213d54..919fb9eb1b62 100644
--- a/drivers/net/mlx4/profile.c
+++ b/drivers/net/mlx4/profile.c
@@ -107,7 +107,9 @@ u64 mlx4_make_profile(struct mlx4_dev *dev,
 	profile[MLX4_RES_AUXC].num    = request->num_qp;
 	profile[MLX4_RES_SRQ].num     = request->num_srq;
 	profile[MLX4_RES_CQ].num      = request->num_cq;
-	profile[MLX4_RES_EQ].num      = MLX4_NUM_EQ + dev_cap->reserved_eqs;
+	profile[MLX4_RES_EQ].num      = min(dev_cap->max_eqs,
+					    dev_cap->reserved_eqs +
+					    num_possible_cpus() + 1);
 	profile[MLX4_RES_DMPT].num    = request->num_mpt;
 	profile[MLX4_RES_CMPT].num    = MLX4_NUM_CMPTS;
 	profile[MLX4_RES_MTT].num     = request->num_mtt;
diff --git a/include/linux/mlx4/device.h b/include/linux/mlx4/device.h
index 371086fd946f..8f659cc29960 100644
--- a/include/linux/mlx4/device.h
+++ b/include/linux/mlx4/device.h
@@ -206,6 +206,7 @@ struct mlx4_caps {
 	int			reserved_cqs;
 	int			num_eqs;
 	int			reserved_eqs;
+	int			num_comp_vectors;
 	int			num_mpts;
 	int			num_mtt_segs;
 	int			fmr_reserved_mtts;
@@ -328,6 +329,7 @@ struct mlx4_cq {
 	int			arm_sn;
 
 	int			cqn;
+	unsigned		vector;
 
 	atomic_t		refcount;
 	struct completion	free;
@@ -437,7 +439,7 @@ void mlx4_free_hwq_res(struct mlx4_dev *mdev, struct mlx4_hwq_resources *wqres,
 
 int mlx4_cq_alloc(struct mlx4_dev *dev, int nent, struct mlx4_mtt *mtt,
 		  struct mlx4_uar *uar, u64 db_rec, struct mlx4_cq *cq,
-		  int collapsed);
+		  unsigned vector, int collapsed);
 void mlx4_cq_free(struct mlx4_dev *dev, struct mlx4_cq *cq);
 
 int mlx4_qp_reserve_range(struct mlx4_dev *dev, int cnt, int align, int *base);
-- 
cgit v1.2.3


From 908a7a16b852ffd618a9127be8d62432182d81b4 Mon Sep 17 00:00:00 2001
From: Neil Horman <nhorman@tuxdriver.com>
Date: Mon, 22 Dec 2008 20:43:12 -0800
Subject: net: Remove unused netdev arg from some NAPI interfaces.

When the napi api was changed to separate its 1:1 binding to the net_device
struct, the netif_rx_[prep|schedule|complete] api failed to remove the now
vestigual net_device structure parameter.  This patch cleans up that api by
properly removing it..

Signed-off-by: Neil Horman <nhorman@tuxdriver.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/infiniband/hw/nes/nes_hw.c      |  2 +-
 drivers/infiniband/hw/nes/nes_nic.c     |  2 +-
 drivers/infiniband/ulp/ipoib/ipoib_ib.c |  6 +++---
 drivers/net/8139cp.c                    |  6 +++---
 drivers/net/8139too.c                   |  6 +++---
 drivers/net/amd8111e.c                  |  6 +++---
 drivers/net/arm/ep93xx_eth.c            |  6 +++---
 drivers/net/arm/ixp4xx_eth.c            |  6 +++---
 drivers/net/atl1e/atl1e_main.c          |  6 +++---
 drivers/net/b44.c                       |  6 +++---
 drivers/net/bnx2.c                      | 15 ++++++---------
 drivers/net/bnx2x_main.c                |  6 +++---
 drivers/net/cassini.c                   |  8 ++++----
 drivers/net/chelsio/sge.c               |  4 ++--
 drivers/net/cpmac.c                     | 10 +++++-----
 drivers/net/e100.c                      |  7 +++----
 drivers/net/e1000/e1000_main.c          | 10 +++++-----
 drivers/net/e1000e/netdev.c             | 14 +++++++-------
 drivers/net/ehea/ehea_main.c            |  6 +++---
 drivers/net/enic/enic_main.c            | 12 ++++++------
 drivers/net/epic100.c                   |  6 +++---
 drivers/net/forcedeth.c                 | 10 +++++-----
 drivers/net/fs_enet/fs_enet-main.c      |  4 ++--
 drivers/net/gianfar.c                   |  6 +++---
 drivers/net/ibmveth.c                   |  6 +++---
 drivers/net/igb/igb_main.c              | 12 ++++++------
 drivers/net/ixgb/ixgb_main.c            |  6 +++---
 drivers/net/ixgbe/ixgbe_main.c          | 12 ++++++------
 drivers/net/ixp2000/ixpdev.c            |  4 ++--
 drivers/net/jme.c                       |  1 -
 drivers/net/jme.h                       |  6 +++---
 drivers/net/korina.c                    |  4 ++--
 drivers/net/macb.c                      | 10 +++++-----
 drivers/net/mlx4/en_rx.c                |  4 ++--
 drivers/net/myri10ge/myri10ge.c         |  6 +++---
 drivers/net/natsemi.c                   |  6 +++---
 drivers/net/netxen/netxen_nic_main.c    |  2 +-
 drivers/net/niu.c                       |  6 +++---
 drivers/net/pasemi_mac.c                |  6 +++---
 drivers/net/pcnet32.c                   |  6 +++---
 drivers/net/qla3xxx.c                   |  6 +++---
 drivers/net/qlge/qlge_main.c            |  7 +++----
 drivers/net/r6040.c                     |  4 ++--
 drivers/net/r8169.c                     |  6 +++---
 drivers/net/s2io.c                      |  8 ++++----
 drivers/net/sb1250-mac.c                |  6 +++---
 drivers/net/sfc/efx.c                   |  2 +-
 drivers/net/sfc/efx.h                   |  2 +-
 drivers/net/skge.c                      |  6 +++---
 drivers/net/smsc911x.c                  |  2 +-
 drivers/net/smsc9420.c                  |  4 ++--
 drivers/net/spider_net.c                | 15 ++++++---------
 drivers/net/starfire.c                  |  6 +++---
 drivers/net/sungem.c                    |  6 +++---
 drivers/net/tc35815.c                   |  6 +++---
 drivers/net/tehuti.c                    |  7 +++----
 drivers/net/tg3.c                       | 14 +++++++-------
 drivers/net/tsi108_eth.c                |  6 +++---
 drivers/net/tulip/interrupt.c           |  8 ++++----
 drivers/net/typhoon.c                   |  7 +++----
 drivers/net/ucc_geth.c                  |  6 +++---
 drivers/net/via-rhine.c                 |  4 ++--
 drivers/net/virtio_net.c                | 12 ++++++------
 drivers/net/wan/hd64572.c               |  4 ++--
 drivers/net/xen-netfront.c              |  8 ++++----
 include/linux/netdevice.h               | 24 +++++++++---------------
 66 files changed, 218 insertions(+), 235 deletions(-)

(limited to 'include')

diff --git a/drivers/infiniband/hw/nes/nes_hw.c b/drivers/infiniband/hw/nes/nes_hw.c
index 7c49cc882d75..735c125b48af 100644
--- a/drivers/infiniband/hw/nes/nes_hw.c
+++ b/drivers/infiniband/hw/nes/nes_hw.c
@@ -2541,7 +2541,7 @@ static void nes_nic_napi_ce_handler(struct nes_device *nesdev, struct nes_hw_nic
 {
 	struct nes_vnic *nesvnic = container_of(cq, struct nes_vnic, nic_cq);
 
-	netif_rx_schedule(nesdev->netdev[nesvnic->netdev_index], &nesvnic->napi);
+	netif_rx_schedule(&nesvnic->napi);
 }
 
 
diff --git a/drivers/infiniband/hw/nes/nes_nic.c b/drivers/infiniband/hw/nes/nes_nic.c
index 3c96203e0d91..80e7a4d98d5b 100644
--- a/drivers/infiniband/hw/nes/nes_nic.c
+++ b/drivers/infiniband/hw/nes/nes_nic.c
@@ -112,7 +112,7 @@ static int nes_netdev_poll(struct napi_struct *napi, int budget)
 	nes_nic_ce_handler(nesdev, nescq);
 
 	if (nescq->cqes_pending == 0) {
-		netif_rx_complete(netdev, napi);
+		netif_rx_complete(napi);
 		/* clear out completed cqes and arm */
 		nes_write32(nesdev->regs+NES_CQE_ALLOC, NES_CQE_ALLOC_NOTIFY_NEXT |
 				nescq->cq_number | (nescq->cqe_allocs_pending << 16));
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_ib.c b/drivers/infiniband/ulp/ipoib/ipoib_ib.c
index 28eb6f03c588..a1925810be3c 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_ib.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_ib.c
@@ -446,11 +446,11 @@ poll_more:
 		if (dev->features & NETIF_F_LRO)
 			lro_flush_all(&priv->lro.lro_mgr);
 
-		netif_rx_complete(dev, napi);
+		netif_rx_complete(napi);
 		if (unlikely(ib_req_notify_cq(priv->recv_cq,
 					      IB_CQ_NEXT_COMP |
 					      IB_CQ_REPORT_MISSED_EVENTS)) &&
-		    netif_rx_reschedule(dev, napi))
+		    netif_rx_reschedule(napi))
 			goto poll_more;
 	}
 
@@ -462,7 +462,7 @@ void ipoib_ib_completion(struct ib_cq *cq, void *dev_ptr)
 	struct net_device *dev = dev_ptr;
 	struct ipoib_dev_priv *priv = netdev_priv(dev);
 
-	netif_rx_schedule(dev, &priv->napi);
+	netif_rx_schedule(&priv->napi);
 }
 
 static void drain_tx_cq(struct net_device *dev)
diff --git a/drivers/net/8139cp.c b/drivers/net/8139cp.c
index f6d9d1353dd5..dd7ac8290aec 100644
--- a/drivers/net/8139cp.c
+++ b/drivers/net/8139cp.c
@@ -604,7 +604,7 @@ rx_next:
 
 		spin_lock_irqsave(&cp->lock, flags);
 		cpw16_f(IntrMask, cp_intr_mask);
-		__netif_rx_complete(dev, napi);
+		__netif_rx_complete(napi);
 		spin_unlock_irqrestore(&cp->lock, flags);
 	}
 
@@ -641,9 +641,9 @@ static irqreturn_t cp_interrupt (int irq, void *dev_instance)
 	}
 
 	if (status & (RxOK | RxErr | RxEmpty | RxFIFOOvr))
-		if (netif_rx_schedule_prep(dev, &cp->napi)) {
+		if (netif_rx_schedule_prep(&cp->napi)) {
 			cpw16_f(IntrMask, cp_norx_intr_mask);
-			__netif_rx_schedule(dev, &cp->napi);
+			__netif_rx_schedule(&cp->napi);
 		}
 
 	if (status & (TxOK | TxErr | TxEmpty | SWInt))
diff --git a/drivers/net/8139too.c b/drivers/net/8139too.c
index 67bbf4f25bea..fe370f805793 100644
--- a/drivers/net/8139too.c
+++ b/drivers/net/8139too.c
@@ -2128,7 +2128,7 @@ static int rtl8139_poll(struct napi_struct *napi, int budget)
 		 */
 		spin_lock_irqsave(&tp->lock, flags);
 		RTL_W16_F(IntrMask, rtl8139_intr_mask);
-		__netif_rx_complete(dev, napi);
+		__netif_rx_complete(napi);
 		spin_unlock_irqrestore(&tp->lock, flags);
 	}
 	spin_unlock(&tp->rx_lock);
@@ -2178,9 +2178,9 @@ static irqreturn_t rtl8139_interrupt (int irq, void *dev_instance)
 	/* Receive packets are processed by poll routine.
 	   If not running start it now. */
 	if (status & RxAckBits){
-		if (netif_rx_schedule_prep(dev, &tp->napi)) {
+		if (netif_rx_schedule_prep(&tp->napi)) {
 			RTL_W16_F (IntrMask, rtl8139_norx_intr_mask);
-			__netif_rx_schedule(dev, &tp->napi);
+			__netif_rx_schedule(&tp->napi);
 		}
 	}
 
diff --git a/drivers/net/amd8111e.c b/drivers/net/amd8111e.c
index 0bc4f54d5db9..187ac6eb6e94 100644
--- a/drivers/net/amd8111e.c
+++ b/drivers/net/amd8111e.c
@@ -831,7 +831,7 @@ static int amd8111e_rx_poll(struct napi_struct *napi, int budget)
 	if (rx_pkt_limit > 0) {
 		/* Receive descriptor is empty now */
 		spin_lock_irqsave(&lp->lock, flags);
-		__netif_rx_complete(dev, napi);
+		__netif_rx_complete(napi);
 		writel(VAL0|RINTEN0, mmio + INTEN0);
 		writel(VAL2 | RDMD0, mmio + CMD0);
 		spin_unlock_irqrestore(&lp->lock, flags);
@@ -1170,11 +1170,11 @@ static irqreturn_t amd8111e_interrupt(int irq, void *dev_id)
 
 	/* Check if Receive Interrupt has occurred. */
 	if (intr0 & RINT0) {
-		if (netif_rx_schedule_prep(dev, &lp->napi)) {
+		if (netif_rx_schedule_prep(&lp->napi)) {
 			/* Disable receive interupts */
 			writel(RINTEN0, mmio + INTEN0);
 			/* Schedule a polling routine */
-			__netif_rx_schedule(dev, &lp->napi);
+			__netif_rx_schedule(&lp->napi);
 		} else if (intren0 & RINTEN0) {
 			printk("************Driver bug! \
 				interrupt while in poll\n");
diff --git a/drivers/net/arm/ep93xx_eth.c b/drivers/net/arm/ep93xx_eth.c
index 588c9739d13d..6ecc600c1bcc 100644
--- a/drivers/net/arm/ep93xx_eth.c
+++ b/drivers/net/arm/ep93xx_eth.c
@@ -298,7 +298,7 @@ poll_some_more:
 		int more = 0;
 
 		spin_lock_irq(&ep->rx_lock);
-		__netif_rx_complete(dev, napi);
+		__netif_rx_complete(napi);
 		wrl(ep, REG_INTEN, REG_INTEN_TX | REG_INTEN_RX);
 		if (ep93xx_have_more_rx(ep)) {
 			wrl(ep, REG_INTEN, REG_INTEN_TX);
@@ -415,9 +415,9 @@ static irqreturn_t ep93xx_irq(int irq, void *dev_id)
 
 	if (status & REG_INTSTS_RX) {
 		spin_lock(&ep->rx_lock);
-		if (likely(netif_rx_schedule_prep(dev, &ep->napi))) {
+		if (likely(netif_rx_schedule_prep(&ep->napi))) {
 			wrl(ep, REG_INTEN, REG_INTEN_TX);
-			__netif_rx_schedule(dev, &ep->napi);
+			__netif_rx_schedule(&ep->napi);
 		}
 		spin_unlock(&ep->rx_lock);
 	}
diff --git a/drivers/net/arm/ixp4xx_eth.c b/drivers/net/arm/ixp4xx_eth.c
index 14ffa2a61890..b03609f2e90f 100644
--- a/drivers/net/arm/ixp4xx_eth.c
+++ b/drivers/net/arm/ixp4xx_eth.c
@@ -498,7 +498,7 @@ static void eth_rx_irq(void *pdev)
 	printk(KERN_DEBUG "%s: eth_rx_irq\n", dev->name);
 #endif
 	qmgr_disable_irq(port->plat->rxq);
-	netif_rx_schedule(dev, &port->napi);
+	netif_rx_schedule(&port->napi);
 }
 
 static int eth_poll(struct napi_struct *napi, int budget)
@@ -526,7 +526,7 @@ static int eth_poll(struct napi_struct *napi, int budget)
 			printk(KERN_DEBUG "%s: eth_poll netif_rx_complete\n",
 			       dev->name);
 #endif
-			netif_rx_complete(dev, napi);
+			netif_rx_complete(napi);
 			qmgr_enable_irq(rxq);
 			if (!qmgr_stat_empty(rxq) &&
 			    netif_rx_reschedule(dev, napi)) {
@@ -1025,7 +1025,7 @@ static int eth_open(struct net_device *dev)
 	}
 	ports_open++;
 	/* we may already have RX data, enables IRQ */
-	netif_rx_schedule(dev, &port->napi);
+	netif_rx_schedule(&port->napi);
 	return 0;
 }
 
diff --git a/drivers/net/atl1e/atl1e_main.c b/drivers/net/atl1e/atl1e_main.c
index 98b2a7a466b8..a72a46145ed7 100644
--- a/drivers/net/atl1e/atl1e_main.c
+++ b/drivers/net/atl1e/atl1e_main.c
@@ -1326,9 +1326,9 @@ static irqreturn_t atl1e_intr(int irq, void *data)
 			AT_WRITE_REG(hw, REG_IMR,
 				     IMR_NORMAL_MASK & ~ISR_RX_EVENT);
 			AT_WRITE_FLUSH(hw);
-			if (likely(netif_rx_schedule_prep(netdev,
+			if (likely(netif_rx_schedule_prep(
 				   &adapter->napi)))
-				__netif_rx_schedule(netdev, &adapter->napi);
+				__netif_rx_schedule(&adapter->napi);
 		}
 	} while (--max_ints > 0);
 	/* re-enable Interrupt*/
@@ -1515,7 +1515,7 @@ static int atl1e_clean(struct napi_struct *napi, int budget)
 	/* If no Tx and not enough Rx work done, exit the polling mode */
 	if (work_done < budget) {
 quit_polling:
-		netif_rx_complete(netdev, napi);
+		netif_rx_complete(napi);
 		imr_data = AT_READ_REG(&adapter->hw, REG_IMR);
 		AT_WRITE_REG(&adapter->hw, REG_IMR, imr_data | ISR_RX_EVENT);
 		/* test debug */
diff --git a/drivers/net/b44.c b/drivers/net/b44.c
index 2c7a32eb92a5..934a95091dc3 100644
--- a/drivers/net/b44.c
+++ b/drivers/net/b44.c
@@ -875,7 +875,7 @@ static int b44_poll(struct napi_struct *napi, int budget)
 	}
 
 	if (work_done < budget) {
-		netif_rx_complete(netdev, napi);
+		netif_rx_complete(napi);
 		b44_enable_ints(bp);
 	}
 
@@ -907,13 +907,13 @@ static irqreturn_t b44_interrupt(int irq, void *dev_id)
 			goto irq_ack;
 		}
 
-		if (netif_rx_schedule_prep(dev, &bp->napi)) {
+		if (netif_rx_schedule_prep(&bp->napi)) {
 			/* NOTE: These writes are posted by the readback of
 			 *       the ISTAT register below.
 			 */
 			bp->istat = istat;
 			__b44_disable_ints(bp);
-			__netif_rx_schedule(dev, &bp->napi);
+			__netif_rx_schedule(&bp->napi);
 		} else {
 			printk(KERN_ERR PFX "%s: Error, poll already scheduled\n",
 			       dev->name);
diff --git a/drivers/net/bnx2.c b/drivers/net/bnx2.c
index 1a2780374a49..33d69ddc90a3 100644
--- a/drivers/net/bnx2.c
+++ b/drivers/net/bnx2.c
@@ -3043,7 +3043,6 @@ bnx2_msi(int irq, void *dev_instance)
 {
 	struct bnx2_napi *bnapi = dev_instance;
 	struct bnx2 *bp = bnapi->bp;
-	struct net_device *dev = bp->dev;
 
 	prefetch(bnapi->status_blk.msi);
 	REG_WR(bp, BNX2_PCICFG_INT_ACK_CMD,
@@ -3054,7 +3053,7 @@ bnx2_msi(int irq, void *dev_instance)
 	if (unlikely(atomic_read(&bp->intr_sem) != 0))
 		return IRQ_HANDLED;
 
-	netif_rx_schedule(dev, &bnapi->napi);
+	netif_rx_schedule(&bnapi->napi);
 
 	return IRQ_HANDLED;
 }
@@ -3064,7 +3063,6 @@ bnx2_msi_1shot(int irq, void *dev_instance)
 {
 	struct bnx2_napi *bnapi = dev_instance;
 	struct bnx2 *bp = bnapi->bp;
-	struct net_device *dev = bp->dev;
 
 	prefetch(bnapi->status_blk.msi);
 
@@ -3072,7 +3070,7 @@ bnx2_msi_1shot(int irq, void *dev_instance)
 	if (unlikely(atomic_read(&bp->intr_sem) != 0))
 		return IRQ_HANDLED;
 
-	netif_rx_schedule(dev, &bnapi->napi);
+	netif_rx_schedule(&bnapi->napi);
 
 	return IRQ_HANDLED;
 }
@@ -3082,7 +3080,6 @@ bnx2_interrupt(int irq, void *dev_instance)
 {
 	struct bnx2_napi *bnapi = dev_instance;
 	struct bnx2 *bp = bnapi->bp;
-	struct net_device *dev = bp->dev;
 	struct status_block *sblk = bnapi->status_blk.msi;
 
 	/* When using INTx, it is possible for the interrupt to arrive
@@ -3109,9 +3106,9 @@ bnx2_interrupt(int irq, void *dev_instance)
 	if (unlikely(atomic_read(&bp->intr_sem) != 0))
 		return IRQ_HANDLED;
 
-	if (netif_rx_schedule_prep(dev, &bnapi->napi)) {
+	if (netif_rx_schedule_prep(&bnapi->napi)) {
 		bnapi->last_status_idx = sblk->status_idx;
-		__netif_rx_schedule(dev, &bnapi->napi);
+		__netif_rx_schedule(&bnapi->napi);
 	}
 
 	return IRQ_HANDLED;
@@ -3221,7 +3218,7 @@ static int bnx2_poll_msix(struct napi_struct *napi, int budget)
 		rmb();
 		if (likely(!bnx2_has_fast_work(bnapi))) {
 
-			netif_rx_complete(bp->dev, napi);
+			netif_rx_complete(napi);
 			REG_WR(bp, BNX2_PCICFG_INT_ACK_CMD, bnapi->int_num |
 			       BNX2_PCICFG_INT_ACK_CMD_INDEX_VALID |
 			       bnapi->last_status_idx);
@@ -3254,7 +3251,7 @@ static int bnx2_poll(struct napi_struct *napi, int budget)
 
 		rmb();
 		if (likely(!bnx2_has_work(bnapi))) {
-			netif_rx_complete(bp->dev, napi);
+			netif_rx_complete(napi);
 			if (likely(bp->flags & BNX2_FLAG_USING_MSI_OR_MSIX)) {
 				REG_WR(bp, BNX2_PCICFG_INT_ACK_CMD,
 				       BNX2_PCICFG_INT_ACK_CMD_INDEX_VALID |
diff --git a/drivers/net/bnx2x_main.c b/drivers/net/bnx2x_main.c
index 24d2ae8b74bf..02ab9b0ea697 100644
--- a/drivers/net/bnx2x_main.c
+++ b/drivers/net/bnx2x_main.c
@@ -1615,7 +1615,7 @@ static irqreturn_t bnx2x_msix_fp_int(int irq, void *fp_cookie)
 	prefetch(&fp->status_blk->c_status_block.status_block_index);
 	prefetch(&fp->status_blk->u_status_block.status_block_index);
 
-	netif_rx_schedule(dev, &bnx2x_fp(bp, index, napi));
+	netif_rx_schedule(&bnx2x_fp(bp, index, napi));
 
 	return IRQ_HANDLED;
 }
@@ -1654,7 +1654,7 @@ static irqreturn_t bnx2x_interrupt(int irq, void *dev_instance)
 		prefetch(&fp->status_blk->c_status_block.status_block_index);
 		prefetch(&fp->status_blk->u_status_block.status_block_index);
 
-		netif_rx_schedule(dev, &bnx2x_fp(bp, 0, napi));
+		netif_rx_schedule(&bnx2x_fp(bp, 0, napi));
 
 		status &= ~mask;
 	}
@@ -9284,7 +9284,7 @@ static int bnx2x_poll(struct napi_struct *napi, int budget)
 #ifdef BNX2X_STOP_ON_ERROR
 poll_panic:
 #endif
-		netif_rx_complete(bp->dev, napi);
+		netif_rx_complete(napi);
 
 		bnx2x_ack_sb(bp, FP_SB_ID(fp), USTORM_ID,
 			     le16_to_cpu(fp->fp_u_idx), IGU_INT_NOP, 1);
diff --git a/drivers/net/cassini.c b/drivers/net/cassini.c
index 023d205e9054..321f43d9f0e2 100644
--- a/drivers/net/cassini.c
+++ b/drivers/net/cassini.c
@@ -2506,7 +2506,7 @@ static irqreturn_t cas_interruptN(int irq, void *dev_id)
 	if (status & INTR_RX_DONE_ALT) { /* handle rx separately */
 #ifdef USE_NAPI
 		cas_mask_intr(cp);
-		netif_rx_schedule(dev, &cp->napi);
+		netif_rx_schedule(&cp->napi);
 #else
 		cas_rx_ringN(cp, ring, 0);
 #endif
@@ -2557,7 +2557,7 @@ static irqreturn_t cas_interrupt1(int irq, void *dev_id)
 	if (status & INTR_RX_DONE_ALT) { /* handle rx separately */
 #ifdef USE_NAPI
 		cas_mask_intr(cp);
-		netif_rx_schedule(dev, &cp->napi);
+		netif_rx_schedule(&cp->napi);
 #else
 		cas_rx_ringN(cp, 1, 0);
 #endif
@@ -2613,7 +2613,7 @@ static irqreturn_t cas_interrupt(int irq, void *dev_id)
 	if (status & INTR_RX_DONE) {
 #ifdef USE_NAPI
 		cas_mask_intr(cp);
-		netif_rx_schedule(dev, &cp->napi);
+		netif_rx_schedule(&cp->napi);
 #else
 		cas_rx_ringN(cp, 0, 0);
 #endif
@@ -2691,7 +2691,7 @@ rx_comp:
 #endif
 	spin_unlock_irqrestore(&cp->lock, flags);
 	if (enable_intr) {
-		netif_rx_complete(dev, napi);
+		netif_rx_complete(napi);
 		cas_unmask_intr(cp);
 	}
 	return credits;
diff --git a/drivers/net/chelsio/sge.c b/drivers/net/chelsio/sge.c
index 1da70070c2fa..7896468dda11 100644
--- a/drivers/net/chelsio/sge.c
+++ b/drivers/net/chelsio/sge.c
@@ -1613,7 +1613,7 @@ int t1_poll(struct napi_struct *napi, int budget)
 	int work_done = process_responses(adapter, budget);
 
 	if (likely(work_done < budget)) {
-		netif_rx_complete(dev, napi);
+		netif_rx_complete(napi);
 		writel(adapter->sge->respQ.cidx,
 		       adapter->regs + A_SG_SLEEPING);
 	}
@@ -1633,7 +1633,7 @@ irqreturn_t t1_interrupt(int irq, void *data)
 
 		if (napi_schedule_prep(&adapter->napi)) {
 			if (process_pure_responses(adapter))
-				__netif_rx_schedule(dev, &adapter->napi);
+				__netif_rx_schedule(&adapter->napi);
 			else {
 				/* no data, no NAPI needed */
 				writel(sge->respQ.cidx, adapter->regs + A_SG_SLEEPING);
diff --git a/drivers/net/cpmac.c b/drivers/net/cpmac.c
index d39a77cba1af..f66548751c38 100644
--- a/drivers/net/cpmac.c
+++ b/drivers/net/cpmac.c
@@ -428,7 +428,7 @@ static int cpmac_poll(struct napi_struct *napi, int budget)
 			printk(KERN_WARNING "%s: rx: polling, but no queue\n",
 			       priv->dev->name);
 		spin_unlock(&priv->rx_lock);
-		netif_rx_complete(priv->dev, napi);
+		netif_rx_complete(napi);
 		return 0;
 	}
 
@@ -514,7 +514,7 @@ static int cpmac_poll(struct napi_struct *napi, int budget)
 	if (processed == 0) {
 		/* we ran out of packets to read,
 		 * revert to interrupt-driven mode */
-		netif_rx_complete(priv->dev, napi);
+		netif_rx_complete(napi);
 		cpmac_write(priv->regs, CPMAC_RX_INT_ENABLE, 1);
 		return 0;
 	}
@@ -536,7 +536,7 @@ fatal_error:
 	}
 
 	spin_unlock(&priv->rx_lock);
-	netif_rx_complete(priv->dev, napi);
+	netif_rx_complete(napi);
 	netif_tx_stop_all_queues(priv->dev);
 	napi_disable(&priv->napi);
 
@@ -802,9 +802,9 @@ static irqreturn_t cpmac_irq(int irq, void *dev_id)
 
 	if (status & MAC_INT_RX) {
 		queue = (status >> 8) & 7;
-		if (netif_rx_schedule_prep(dev, &priv->napi)) {
+		if (netif_rx_schedule_prep(&priv->napi)) {
 			cpmac_write(priv->regs, CPMAC_RX_INT_CLEAR, 1 << queue);
-			__netif_rx_schedule(dev, &priv->napi);
+			__netif_rx_schedule(&priv->napi);
 		}
 	}
 
diff --git a/drivers/net/e100.c b/drivers/net/e100.c
index dce7ff28c3ff..9f38b16ccbbd 100644
--- a/drivers/net/e100.c
+++ b/drivers/net/e100.c
@@ -2049,9 +2049,9 @@ static irqreturn_t e100_intr(int irq, void *dev_id)
 	if(stat_ack & stat_ack_rnr)
 		nic->ru_running = RU_SUSPENDED;
 
-	if(likely(netif_rx_schedule_prep(netdev, &nic->napi))) {
+	if(likely(netif_rx_schedule_prep(&nic->napi))) {
 		e100_disable_irq(nic);
-		__netif_rx_schedule(netdev, &nic->napi);
+		__netif_rx_schedule(&nic->napi);
 	}
 
 	return IRQ_HANDLED;
@@ -2060,7 +2060,6 @@ static irqreturn_t e100_intr(int irq, void *dev_id)
 static int e100_poll(struct napi_struct *napi, int budget)
 {
 	struct nic *nic = container_of(napi, struct nic, napi);
-	struct net_device *netdev = nic->netdev;
 	unsigned int work_done = 0;
 
 	e100_rx_clean(nic, &work_done, budget);
@@ -2068,7 +2067,7 @@ static int e100_poll(struct napi_struct *napi, int budget)
 
 	/* If budget not fully consumed, exit the polling mode */
 	if (work_done < budget) {
-		netif_rx_complete(netdev, napi);
+		netif_rx_complete(napi);
 		e100_enable_irq(nic);
 	}
 
diff --git a/drivers/net/e1000/e1000_main.c b/drivers/net/e1000/e1000_main.c
index 116c96e0b119..26474c92193f 100644
--- a/drivers/net/e1000/e1000_main.c
+++ b/drivers/net/e1000/e1000_main.c
@@ -3687,12 +3687,12 @@ static irqreturn_t e1000_intr_msi(int irq, void *data)
 			mod_timer(&adapter->watchdog_timer, jiffies + 1);
 	}
 
-	if (likely(netif_rx_schedule_prep(netdev, &adapter->napi))) {
+	if (likely(netif_rx_schedule_prep(&adapter->napi))) {
 		adapter->total_tx_bytes = 0;
 		adapter->total_tx_packets = 0;
 		adapter->total_rx_bytes = 0;
 		adapter->total_rx_packets = 0;
-		__netif_rx_schedule(netdev, &adapter->napi);
+		__netif_rx_schedule(&adapter->napi);
 	} else
 		e1000_irq_enable(adapter);
 
@@ -3747,12 +3747,12 @@ static irqreturn_t e1000_intr(int irq, void *data)
 		ew32(IMC, ~0);
 		E1000_WRITE_FLUSH();
 	}
-	if (likely(netif_rx_schedule_prep(netdev, &adapter->napi))) {
+	if (likely(netif_rx_schedule_prep(&adapter->napi))) {
 		adapter->total_tx_bytes = 0;
 		adapter->total_tx_packets = 0;
 		adapter->total_rx_bytes = 0;
 		adapter->total_rx_packets = 0;
-		__netif_rx_schedule(netdev, &adapter->napi);
+		__netif_rx_schedule(&adapter->napi);
 	} else
 		/* this really should not happen! if it does it is basically a
 		 * bug, but not a hard error, so enable ints and continue */
@@ -3793,7 +3793,7 @@ static int e1000_clean(struct napi_struct *napi, int budget)
 	if (work_done < budget) {
 		if (likely(adapter->itr_setting & 3))
 			e1000_set_itr(adapter);
-		netif_rx_complete(poll_dev, napi);
+		netif_rx_complete(napi);
 		e1000_irq_enable(adapter);
 	}
 
diff --git a/drivers/net/e1000e/netdev.c b/drivers/net/e1000e/netdev.c
index f7b05609073d..d4639facd1bd 100644
--- a/drivers/net/e1000e/netdev.c
+++ b/drivers/net/e1000e/netdev.c
@@ -1179,12 +1179,12 @@ static irqreturn_t e1000_intr_msi(int irq, void *data)
 			mod_timer(&adapter->watchdog_timer, jiffies + 1);
 	}
 
-	if (netif_rx_schedule_prep(netdev, &adapter->napi)) {
+	if (netif_rx_schedule_prep(&adapter->napi)) {
 		adapter->total_tx_bytes = 0;
 		adapter->total_tx_packets = 0;
 		adapter->total_rx_bytes = 0;
 		adapter->total_rx_packets = 0;
-		__netif_rx_schedule(netdev, &adapter->napi);
+		__netif_rx_schedule(&adapter->napi);
 	}
 
 	return IRQ_HANDLED;
@@ -1246,12 +1246,12 @@ static irqreturn_t e1000_intr(int irq, void *data)
 			mod_timer(&adapter->watchdog_timer, jiffies + 1);
 	}
 
-	if (netif_rx_schedule_prep(netdev, &adapter->napi)) {
+	if (netif_rx_schedule_prep(&adapter->napi)) {
 		adapter->total_tx_bytes = 0;
 		adapter->total_tx_packets = 0;
 		adapter->total_rx_bytes = 0;
 		adapter->total_rx_packets = 0;
-		__netif_rx_schedule(netdev, &adapter->napi);
+		__netif_rx_schedule(&adapter->napi);
 	}
 
 	return IRQ_HANDLED;
@@ -1320,10 +1320,10 @@ static irqreturn_t e1000_intr_msix_rx(int irq, void *data)
 		adapter->rx_ring->set_itr = 0;
 	}
 
-	if (netif_rx_schedule_prep(netdev, &adapter->napi)) {
+	if (netif_rx_schedule_prep(&adapter->napi)) {
 		adapter->total_rx_bytes = 0;
 		adapter->total_rx_packets = 0;
-		__netif_rx_schedule(netdev, &adapter->napi);
+		__netif_rx_schedule(&adapter->napi);
 	}
 	return IRQ_HANDLED;
 }
@@ -2028,7 +2028,7 @@ clean_rx:
 	if (work_done < budget) {
 		if (adapter->itr_setting & 3)
 			e1000_set_itr(adapter);
-		netif_rx_complete(poll_dev, napi);
+		netif_rx_complete(napi);
 		if (adapter->msix_entries)
 			ew32(IMS, adapter->rx_ring->ims_val);
 		else
diff --git a/drivers/net/ehea/ehea_main.c b/drivers/net/ehea/ehea_main.c
index 44c9ae18383f..035aa7dfc5cd 100644
--- a/drivers/net/ehea/ehea_main.c
+++ b/drivers/net/ehea/ehea_main.c
@@ -830,7 +830,7 @@ static int ehea_poll(struct napi_struct *napi, int budget)
 	while ((rx != budget) || force_irq) {
 		pr->poll_counter = 0;
 		force_irq = 0;
-		netif_rx_complete(dev, napi);
+		netif_rx_complete(napi);
 		ehea_reset_cq_ep(pr->recv_cq);
 		ehea_reset_cq_ep(pr->send_cq);
 		ehea_reset_cq_n1(pr->recv_cq);
@@ -859,7 +859,7 @@ static void ehea_netpoll(struct net_device *dev)
 	int i;
 
 	for (i = 0; i < port->num_def_qps; i++)
-		netif_rx_schedule(dev, &port->port_res[i].napi);
+		netif_rx_schedule(&port->port_res[i].napi);
 }
 #endif
 
@@ -867,7 +867,7 @@ static irqreturn_t ehea_recv_irq_handler(int irq, void *param)
 {
 	struct ehea_port_res *pr = param;
 
-	netif_rx_schedule(pr->port->netdev, &pr->napi);
+	netif_rx_schedule(&pr->napi);
 
 	return IRQ_HANDLED;
 }
diff --git a/drivers/net/enic/enic_main.c b/drivers/net/enic/enic_main.c
index deddd76a550c..d039e16f2763 100644
--- a/drivers/net/enic/enic_main.c
+++ b/drivers/net/enic/enic_main.c
@@ -411,8 +411,8 @@ static irqreturn_t enic_isr_legacy(int irq, void *data)
 	}
 
 	if (ENIC_TEST_INTR(pba, ENIC_INTX_WQ_RQ)) {
-		if (netif_rx_schedule_prep(netdev, &enic->napi))
-			__netif_rx_schedule(netdev, &enic->napi);
+		if (netif_rx_schedule_prep(&enic->napi))
+			__netif_rx_schedule(&enic->napi);
 	} else {
 		vnic_intr_unmask(&enic->intr[ENIC_INTX_WQ_RQ]);
 	}
@@ -440,7 +440,7 @@ static irqreturn_t enic_isr_msi(int irq, void *data)
 	 * writes).
 	 */
 
-	netif_rx_schedule(enic->netdev, &enic->napi);
+	netif_rx_schedule(&enic->napi);
 
 	return IRQ_HANDLED;
 }
@@ -450,7 +450,7 @@ static irqreturn_t enic_isr_msix_rq(int irq, void *data)
 	struct enic *enic = data;
 
 	/* schedule NAPI polling for RQ cleanup */
-	netif_rx_schedule(enic->netdev, &enic->napi);
+	netif_rx_schedule(&enic->napi);
 
 	return IRQ_HANDLED;
 }
@@ -1068,7 +1068,7 @@ static int enic_poll(struct napi_struct *napi, int budget)
 		if (netdev->features & NETIF_F_LRO)
 			lro_flush_all(&enic->lro_mgr);
 
-		netif_rx_complete(netdev, napi);
+		netif_rx_complete(napi);
 		vnic_intr_unmask(&enic->intr[ENIC_MSIX_RQ]);
 	}
 
@@ -1112,7 +1112,7 @@ static int enic_poll_msix(struct napi_struct *napi, int budget)
 		if (netdev->features & NETIF_F_LRO)
 			lro_flush_all(&enic->lro_mgr);
 
-		netif_rx_complete(netdev, napi);
+		netif_rx_complete(napi);
 		vnic_intr_unmask(&enic->intr[ENIC_MSIX_RQ]);
 	}
 
diff --git a/drivers/net/epic100.c b/drivers/net/epic100.c
index 4a951b8cb4d7..f9b37c80dda6 100644
--- a/drivers/net/epic100.c
+++ b/drivers/net/epic100.c
@@ -1109,9 +1109,9 @@ static irqreturn_t epic_interrupt(int irq, void *dev_instance)
 
 	if ((status & EpicNapiEvent) && !ep->reschedule_in_poll) {
 		spin_lock(&ep->napi_lock);
-		if (netif_rx_schedule_prep(dev, &ep->napi)) {
+		if (netif_rx_schedule_prep(&ep->napi)) {
 			epic_napi_irq_off(dev, ep);
-			__netif_rx_schedule(dev, &ep->napi);
+			__netif_rx_schedule(&ep->napi);
 		} else
 			ep->reschedule_in_poll++;
 		spin_unlock(&ep->napi_lock);
@@ -1288,7 +1288,7 @@ rx_action:
 
 		more = ep->reschedule_in_poll;
 		if (!more) {
-			__netif_rx_complete(dev, napi);
+			__netif_rx_complete(napi);
 			outl(EpicNapiEvent, ioaddr + INTSTAT);
 			epic_napi_irq_on(dev, ep);
 		} else
diff --git a/drivers/net/forcedeth.c b/drivers/net/forcedeth.c
index 1f2b24743ee9..9fbfa856ae5b 100644
--- a/drivers/net/forcedeth.c
+++ b/drivers/net/forcedeth.c
@@ -1760,7 +1760,7 @@ static void nv_do_rx_refill(unsigned long data)
 	struct fe_priv *np = netdev_priv(dev);
 
 	/* Just reschedule NAPI rx processing */
-	netif_rx_schedule(dev, &np->napi);
+	netif_rx_schedule(&np->napi);
 }
 #else
 static void nv_do_rx_refill(unsigned long data)
@@ -3403,7 +3403,7 @@ static irqreturn_t nv_nic_irq(int foo, void *data)
 
 #ifdef CONFIG_FORCEDETH_NAPI
 		if (events & NVREG_IRQ_RX_ALL) {
-			netif_rx_schedule(dev, &np->napi);
+			netif_rx_schedule(&np->napi);
 
 			/* Disable furthur receive irq's */
 			spin_lock(&np->lock);
@@ -3520,7 +3520,7 @@ static irqreturn_t nv_nic_irq_optimized(int foo, void *data)
 
 #ifdef CONFIG_FORCEDETH_NAPI
 		if (events & NVREG_IRQ_RX_ALL) {
-			netif_rx_schedule(dev, &np->napi);
+			netif_rx_schedule(&np->napi);
 
 			/* Disable furthur receive irq's */
 			spin_lock(&np->lock);
@@ -3678,7 +3678,7 @@ static int nv_napi_poll(struct napi_struct *napi, int budget)
 		/* re-enable receive interrupts */
 		spin_lock_irqsave(&np->lock, flags);
 
-		__netif_rx_complete(dev, napi);
+		__netif_rx_complete(napi);
 
 		np->irqmask |= NVREG_IRQ_RX_ALL;
 		if (np->msi_flags & NV_MSI_X_ENABLED)
@@ -3704,7 +3704,7 @@ static irqreturn_t nv_nic_irq_rx(int foo, void *data)
 	writel(NVREG_IRQ_RX_ALL, base + NvRegMSIXIrqStatus);
 
 	if (events) {
-		netif_rx_schedule(dev, &np->napi);
+		netif_rx_schedule(&np->napi);
 		/* disable receive interrupts on the nic */
 		writel(NVREG_IRQ_RX_ALL, base + NvRegIrqMask);
 		pci_push(base);
diff --git a/drivers/net/fs_enet/fs_enet-main.c b/drivers/net/fs_enet/fs_enet-main.c
index df66d620b115..4e6a9195fe5f 100644
--- a/drivers/net/fs_enet/fs_enet-main.c
+++ b/drivers/net/fs_enet/fs_enet-main.c
@@ -209,7 +209,7 @@ static int fs_enet_rx_napi(struct napi_struct *napi, int budget)
 
 	if (received < budget) {
 		/* done */
-		netif_rx_complete(dev, napi);
+		netif_rx_complete(napi);
 		(*fep->ops->napi_enable_rx)(dev);
 	}
 	return received;
@@ -478,7 +478,7 @@ fs_enet_interrupt(int irq, void *dev_id)
 				/* NOTE: it is possible for FCCs in NAPI mode    */
 				/* to submit a spurious interrupt while in poll  */
 				if (napi_ok)
-					__netif_rx_schedule(dev, &fep->napi);
+					__netif_rx_schedule(&fep->napi);
 			}
 		}
 
diff --git a/drivers/net/gianfar.c b/drivers/net/gianfar.c
index 13f49643ba0b..c672ecfc9595 100644
--- a/drivers/net/gianfar.c
+++ b/drivers/net/gianfar.c
@@ -1607,9 +1607,9 @@ static int gfar_clean_tx_ring(struct net_device *dev)
 static void gfar_schedule_cleanup(struct net_device *dev)
 {
 	struct gfar_private *priv = netdev_priv(dev);
-	if (netif_rx_schedule_prep(dev, &priv->napi)) {
+	if (netif_rx_schedule_prep(&priv->napi)) {
 		gfar_write(&priv->regs->imask, IMASK_RTX_DISABLED);
-		__netif_rx_schedule(dev, &priv->napi);
+		__netif_rx_schedule(&priv->napi);
 	}
 }
 
@@ -1863,7 +1863,7 @@ static int gfar_poll(struct napi_struct *napi, int budget)
 		return budget;
 
 	if (rx_cleaned < budget) {
-		netif_rx_complete(dev, napi);
+		netif_rx_complete(napi);
 
 		/* Clear the halt bit in RSTAT */
 		gfar_write(&priv->regs->rstat, RSTAT_CLEAR_RHALT);
diff --git a/drivers/net/ibmveth.c b/drivers/net/ibmveth.c
index 02ecfdb4df6b..1f055a955089 100644
--- a/drivers/net/ibmveth.c
+++ b/drivers/net/ibmveth.c
@@ -1028,7 +1028,7 @@ static int ibmveth_poll(struct napi_struct *napi, int budget)
 
 		ibmveth_assert(lpar_rc == H_SUCCESS);
 
-		netif_rx_complete(netdev, napi);
+		netif_rx_complete(napi);
 
 		if (ibmveth_rxq_pending_buffer(adapter) &&
 		    netif_rx_reschedule(netdev, napi)) {
@@ -1047,11 +1047,11 @@ static irqreturn_t ibmveth_interrupt(int irq, void *dev_instance)
 	struct ibmveth_adapter *adapter = netdev_priv(netdev);
 	unsigned long lpar_rc;
 
-	if (netif_rx_schedule_prep(netdev, &adapter->napi)) {
+	if (netif_rx_schedule_prep(&adapter->napi)) {
 		lpar_rc = h_vio_signal(adapter->vdev->unit_address,
 				       VIO_IRQ_DISABLE);
 		ibmveth_assert(lpar_rc == H_SUCCESS);
-		__netif_rx_schedule(netdev, &adapter->napi);
+		__netif_rx_schedule(&adapter->napi);
 	}
 	return IRQ_HANDLED;
 }
diff --git a/drivers/net/igb/igb_main.c b/drivers/net/igb/igb_main.c
index 25df7c931064..6a40d9486daf 100644
--- a/drivers/net/igb/igb_main.c
+++ b/drivers/net/igb/igb_main.c
@@ -3347,8 +3347,8 @@ static irqreturn_t igb_msix_rx(int irq, void *data)
 
 	igb_write_itr(rx_ring);
 
-	if (netif_rx_schedule_prep(adapter->netdev, &rx_ring->napi))
-		__netif_rx_schedule(adapter->netdev, &rx_ring->napi);
+	if (netif_rx_schedule_prep(&rx_ring->napi))
+		__netif_rx_schedule(&rx_ring->napi);
 
 #ifdef CONFIG_IGB_DCA
 	if (adapter->flags & IGB_FLAG_DCA_ENABLED)
@@ -3500,7 +3500,7 @@ static irqreturn_t igb_intr_msi(int irq, void *data)
 			mod_timer(&adapter->watchdog_timer, jiffies + 1);
 	}
 
-	netif_rx_schedule(netdev, &adapter->rx_ring[0].napi);
+	netif_rx_schedule(&adapter->rx_ring[0].napi);
 
 	return IRQ_HANDLED;
 }
@@ -3538,7 +3538,7 @@ static irqreturn_t igb_intr(int irq, void *data)
 			mod_timer(&adapter->watchdog_timer, jiffies + 1);
 	}
 
-	netif_rx_schedule(netdev, &adapter->rx_ring[0].napi);
+	netif_rx_schedule(&adapter->rx_ring[0].napi);
 
 	return IRQ_HANDLED;
 }
@@ -3573,7 +3573,7 @@ static int igb_poll(struct napi_struct *napi, int budget)
 	    !netif_running(netdev)) {
 		if (adapter->itr_setting & 3)
 			igb_set_itr(adapter);
-		netif_rx_complete(netdev, napi);
+		netif_rx_complete(napi);
 		if (!test_bit(__IGB_DOWN, &adapter->state))
 			igb_irq_enable(adapter);
 		return 0;
@@ -3599,7 +3599,7 @@ static int igb_clean_rx_ring_msix(struct napi_struct *napi, int budget)
 
 	/* If not enough Rx work done, exit the polling mode */
 	if ((work_done == 0) || !netif_running(netdev)) {
-		netif_rx_complete(netdev, napi);
+		netif_rx_complete(napi);
 
 		if (adapter->itr_setting & 3) {
 			if (adapter->num_rx_queues == 1)
diff --git a/drivers/net/ixgb/ixgb_main.c b/drivers/net/ixgb/ixgb_main.c
index 820a92cc7f62..679125b3bbc9 100644
--- a/drivers/net/ixgb/ixgb_main.c
+++ b/drivers/net/ixgb/ixgb_main.c
@@ -1721,14 +1721,14 @@ ixgb_intr(int irq, void *data)
 		if (!test_bit(__IXGB_DOWN, &adapter->flags))
 			mod_timer(&adapter->watchdog_timer, jiffies);
 
-	if (netif_rx_schedule_prep(netdev, &adapter->napi)) {
+	if (netif_rx_schedule_prep(&adapter->napi)) {
 
 		/* Disable interrupts and register for poll. The flush
 		  of the posted write is intentionally left out.
 		*/
 
 		IXGB_WRITE_REG(&adapter->hw, IMC, ~0);
-		__netif_rx_schedule(netdev, &adapter->napi);
+		__netif_rx_schedule(&adapter->napi);
 	}
 	return IRQ_HANDLED;
 }
@@ -1750,7 +1750,7 @@ ixgb_clean(struct napi_struct *napi, int budget)
 
 	/* If budget not fully consumed, exit the polling mode */
 	if (work_done < budget) {
-		netif_rx_complete(netdev, napi);
+		netif_rx_complete(napi);
 		if (!test_bit(__IXGB_DOWN, &adapter->flags))
 			ixgb_irq_enable(adapter);
 	}
diff --git a/drivers/net/ixgbe/ixgbe_main.c b/drivers/net/ixgbe/ixgbe_main.c
index 92b35cfc7a46..b6ae9f674ba5 100644
--- a/drivers/net/ixgbe/ixgbe_main.c
+++ b/drivers/net/ixgbe/ixgbe_main.c
@@ -1012,7 +1012,7 @@ static irqreturn_t ixgbe_msix_clean_rx(int irq, void *data)
 	rx_ring = &(adapter->rx_ring[r_idx]);
 	/* disable interrupts on this vector only */
 	IXGBE_WRITE_REG(&adapter->hw, IXGBE_EIMC, rx_ring->v_idx);
-	netif_rx_schedule(adapter->netdev, &q_vector->napi);
+	netif_rx_schedule(&q_vector->napi);
 
 	return IRQ_HANDLED;
 }
@@ -1053,7 +1053,7 @@ static int ixgbe_clean_rxonly(struct napi_struct *napi, int budget)
 
 	/* If all Rx work done, exit the polling mode */
 	if (work_done < budget) {
-		netif_rx_complete(adapter->netdev, napi);
+		netif_rx_complete(napi);
 		if (adapter->itr_setting & 3)
 			ixgbe_set_itr_msix(q_vector);
 		if (!test_bit(__IXGBE_DOWN, &adapter->state))
@@ -1102,7 +1102,7 @@ static int ixgbe_clean_rxonly_many(struct napi_struct *napi, int budget)
 	rx_ring = &(adapter->rx_ring[r_idx]);
 	/* If all Rx work done, exit the polling mode */
 	if (work_done < budget) {
-		netif_rx_complete(adapter->netdev, napi);
+		netif_rx_complete(napi);
 		if (adapter->itr_setting & 3)
 			ixgbe_set_itr_msix(q_vector);
 		if (!test_bit(__IXGBE_DOWN, &adapter->state))
@@ -1378,13 +1378,13 @@ static irqreturn_t ixgbe_intr(int irq, void *data)
 
 	ixgbe_check_fan_failure(adapter, eicr);
 
-	if (netif_rx_schedule_prep(netdev, &adapter->q_vector[0].napi)) {
+	if (netif_rx_schedule_prep(&adapter->q_vector[0].napi)) {
 		adapter->tx_ring[0].total_packets = 0;
 		adapter->tx_ring[0].total_bytes = 0;
 		adapter->rx_ring[0].total_packets = 0;
 		adapter->rx_ring[0].total_bytes = 0;
 		/* would disable interrupts here but EIAM disabled it */
-		__netif_rx_schedule(netdev, &adapter->q_vector[0].napi);
+		__netif_rx_schedule(&adapter->q_vector[0].napi);
 	}
 
 	return IRQ_HANDLED;
@@ -2308,7 +2308,7 @@ static int ixgbe_poll(struct napi_struct *napi, int budget)
 
 	/* If budget not fully consumed, exit the polling mode */
 	if (work_done < budget) {
-		netif_rx_complete(adapter->netdev, napi);
+		netif_rx_complete(napi);
 		if (adapter->itr_setting & 3)
 			ixgbe_set_itr(adapter);
 		if (!test_bit(__IXGBE_DOWN, &adapter->state))
diff --git a/drivers/net/ixp2000/ixpdev.c b/drivers/net/ixp2000/ixpdev.c
index bd96dbc8e021..014745720560 100644
--- a/drivers/net/ixp2000/ixpdev.c
+++ b/drivers/net/ixp2000/ixpdev.c
@@ -141,7 +141,7 @@ static int ixpdev_poll(struct napi_struct *napi, int budget)
 			break;
 	} while (ixp2000_reg_read(IXP2000_IRQ_THD_RAW_STATUS_A_0) & 0x00ff);
 
-	netif_rx_complete(dev, napi);
+	netif_rx_complete(napi);
 	ixp2000_reg_write(IXP2000_IRQ_THD_ENABLE_SET_A_0, 0x00ff);
 
 	return rx;
@@ -204,7 +204,7 @@ static irqreturn_t ixpdev_interrupt(int irq, void *dev_id)
 
 		ixp2000_reg_wrb(IXP2000_IRQ_THD_ENABLE_CLEAR_A_0, 0x00ff);
 		if (likely(napi_schedule_prep(&ip->napi))) {
-			__netif_rx_schedule(dev, &ip->napi);
+			__netif_rx_schedule(&ip->napi);
 		} else {
 			printk(KERN_CRIT "ixp2000: irq while polling!!\n");
 		}
diff --git a/drivers/net/jme.c b/drivers/net/jme.c
index 15035cb1738a..08b34051c646 100644
--- a/drivers/net/jme.c
+++ b/drivers/net/jme.c
@@ -1250,7 +1250,6 @@ static int
 jme_poll(JME_NAPI_HOLDER(holder), JME_NAPI_WEIGHT(budget))
 {
 	struct jme_adapter *jme = jme_napi_priv(holder);
-	struct net_device *netdev = jme->dev;
 	int rest;
 
 	rest = jme_process_receive(jme, JME_NAPI_WEIGHT_VAL(budget));
diff --git a/drivers/net/jme.h b/drivers/net/jme.h
index adaf3ddbf783..2d6f30e638fc 100644
--- a/drivers/net/jme.h
+++ b/drivers/net/jme.h
@@ -398,15 +398,15 @@ struct jme_ring {
 #define JME_NAPI_WEIGHT(w) int w
 #define JME_NAPI_WEIGHT_VAL(w) w
 #define JME_NAPI_WEIGHT_SET(w, r)
-#define JME_RX_COMPLETE(dev, napis) netif_rx_complete(dev, napis)
+#define JME_RX_COMPLETE(dev, napis) netif_rx_complete(napis)
 #define JME_NAPI_ENABLE(priv) napi_enable(&priv->napi);
 #define JME_NAPI_DISABLE(priv) \
 	if (!napi_disable_pending(&priv->napi)) \
 		napi_disable(&priv->napi);
 #define JME_RX_SCHEDULE_PREP(priv) \
-	netif_rx_schedule_prep(priv->dev, &priv->napi)
+	netif_rx_schedule_prep(&priv->napi)
 #define JME_RX_SCHEDULE(priv) \
-	__netif_rx_schedule(priv->dev, &priv->napi);
+	__netif_rx_schedule(&priv->napi);
 
 /*
  * Jmac Adapter Private data
diff --git a/drivers/net/korina.c b/drivers/net/korina.c
index 63626953f07e..4a5580c1126a 100644
--- a/drivers/net/korina.c
+++ b/drivers/net/korina.c
@@ -327,7 +327,7 @@ static irqreturn_t korina_rx_dma_interrupt(int irq, void *dev_id)
 
 	dmas = readl(&lp->rx_dma_regs->dmas);
 	if (dmas & (DMA_STAT_DONE | DMA_STAT_HALT | DMA_STAT_ERR)) {
-		netif_rx_schedule_prep(dev, &lp->napi);
+		netif_rx_schedule_prep(&lp->napi);
 
 		dmasm = readl(&lp->rx_dma_regs->dmasm);
 		writel(dmasm | (DMA_STAT_DONE |
@@ -466,7 +466,7 @@ static int korina_poll(struct napi_struct *napi, int budget)
 
 	work_done = korina_rx(dev, budget);
 	if (work_done < budget) {
-		netif_rx_complete(dev, napi);
+		netif_rx_complete(napi);
 
 		writel(readl(&lp->rx_dma_regs->dmasm) &
 			~(DMA_STAT_DONE | DMA_STAT_HALT | DMA_STAT_ERR),
diff --git a/drivers/net/macb.c b/drivers/net/macb.c
index 261b9507124b..a04da4ecaa88 100644
--- a/drivers/net/macb.c
+++ b/drivers/net/macb.c
@@ -519,7 +519,7 @@ static int macb_poll(struct napi_struct *napi, int budget)
 		 * this function was called last time, and no packets
 		 * have been received since.
 		 */
-		netif_rx_complete(dev, napi);
+		netif_rx_complete(napi);
 		goto out;
 	}
 
@@ -530,13 +530,13 @@ static int macb_poll(struct napi_struct *napi, int budget)
 		dev_warn(&bp->pdev->dev,
 			 "No RX buffers complete, status = %02lx\n",
 			 (unsigned long)status);
-		netif_rx_complete(dev, napi);
+		netif_rx_complete(napi);
 		goto out;
 	}
 
 	work_done = macb_rx(bp, budget);
 	if (work_done < budget)
-		netif_rx_complete(dev, napi);
+		netif_rx_complete(napi);
 
 	/*
 	 * We've done what we can to clean the buffers. Make sure we
@@ -571,7 +571,7 @@ static irqreturn_t macb_interrupt(int irq, void *dev_id)
 		}
 
 		if (status & MACB_RX_INT_FLAGS) {
-			if (netif_rx_schedule_prep(dev, &bp->napi)) {
+			if (netif_rx_schedule_prep(&bp->napi)) {
 				/*
 				 * There's no point taking any more interrupts
 				 * until we have processed the buffers
@@ -579,7 +579,7 @@ static irqreturn_t macb_interrupt(int irq, void *dev_id)
 				macb_writel(bp, IDR, MACB_RX_INT_FLAGS);
 				dev_dbg(&bp->pdev->dev,
 					"scheduling RX softirq\n");
-				__netif_rx_schedule(dev, &bp->napi);
+				__netif_rx_schedule(&bp->napi);
 			}
 		}
 
diff --git a/drivers/net/mlx4/en_rx.c b/drivers/net/mlx4/en_rx.c
index ffe28089b687..c61b0bdca1a4 100644
--- a/drivers/net/mlx4/en_rx.c
+++ b/drivers/net/mlx4/en_rx.c
@@ -814,7 +814,7 @@ void mlx4_en_rx_irq(struct mlx4_cq *mcq)
 	struct mlx4_en_priv *priv = netdev_priv(cq->dev);
 
 	if (priv->port_up)
-		netif_rx_schedule(cq->dev, &cq->napi);
+		netif_rx_schedule(&cq->napi);
 	else
 		mlx4_en_arm_cq(priv, cq);
 }
@@ -834,7 +834,7 @@ int mlx4_en_poll_rx_cq(struct napi_struct *napi, int budget)
 		INC_PERF_COUNTER(priv->pstats.napi_quota);
 	else {
 		/* Done for now */
-		netif_rx_complete(dev, napi);
+		netif_rx_complete(napi);
 		mlx4_en_arm_cq(priv, cq);
 	}
 	return done;
diff --git a/drivers/net/myri10ge/myri10ge.c b/drivers/net/myri10ge/myri10ge.c
index f017c774e1a4..378c89e6c7d5 100644
--- a/drivers/net/myri10ge/myri10ge.c
+++ b/drivers/net/myri10ge/myri10ge.c
@@ -1515,7 +1515,7 @@ static int myri10ge_poll(struct napi_struct *napi, int budget)
 	work_done = myri10ge_clean_rx_done(ss, budget);
 
 	if (work_done < budget) {
-		netif_rx_complete(netdev, napi);
+		netif_rx_complete(napi);
 		put_be32(htonl(3), ss->irq_claim);
 	}
 	return work_done;
@@ -1533,7 +1533,7 @@ static irqreturn_t myri10ge_intr(int irq, void *arg)
 	/* an interrupt on a non-zero receive-only slice is implicitly
 	 * valid  since MSI-X irqs are not shared */
 	if ((mgp->dev->real_num_tx_queues == 1) && (ss != mgp->ss)) {
-		netif_rx_schedule(ss->dev, &ss->napi);
+		netif_rx_schedule(&ss->napi);
 		return (IRQ_HANDLED);
 	}
 
@@ -1544,7 +1544,7 @@ static irqreturn_t myri10ge_intr(int irq, void *arg)
 	/* low bit indicates receives are present, so schedule
 	 * napi poll handler */
 	if (stats->valid & 1)
-		netif_rx_schedule(ss->dev, &ss->napi);
+		netif_rx_schedule(&ss->napi);
 
 	if (!mgp->msi_enabled && !mgp->msix_enabled) {
 		put_be32(0, mgp->irq_deassert);
diff --git a/drivers/net/natsemi.c b/drivers/net/natsemi.c
index 9f81fcb96882..478edb92bca3 100644
--- a/drivers/net/natsemi.c
+++ b/drivers/net/natsemi.c
@@ -2193,10 +2193,10 @@ static irqreturn_t intr_handler(int irq, void *dev_instance)
 
 	prefetch(&np->rx_skbuff[np->cur_rx % RX_RING_SIZE]);
 
-	if (netif_rx_schedule_prep(dev, &np->napi)) {
+	if (netif_rx_schedule_prep(&np->napi)) {
 		/* Disable interrupts and register for poll */
 		natsemi_irq_disable(dev);
-		__netif_rx_schedule(dev, &np->napi);
+		__netif_rx_schedule(&np->napi);
 	} else
 		printk(KERN_WARNING
 	       	       "%s: Ignoring interrupt, status %#08x, mask %#08x.\n",
@@ -2248,7 +2248,7 @@ static int natsemi_poll(struct napi_struct *napi, int budget)
 		np->intr_status = readl(ioaddr + IntrStatus);
 	} while (np->intr_status);
 
-	netif_rx_complete(dev, napi);
+	netif_rx_complete(napi);
 
 	/* Reenable interrupts providing nothing is trying to shut
 	 * the chip down. */
diff --git a/drivers/net/netxen/netxen_nic_main.c b/drivers/net/netxen/netxen_nic_main.c
index 6876bfd4455a..ba01524b5531 100644
--- a/drivers/net/netxen/netxen_nic_main.c
+++ b/drivers/net/netxen/netxen_nic_main.c
@@ -1583,7 +1583,7 @@ static int netxen_nic_poll(struct napi_struct *napi, int budget)
 	}
 
 	if ((work_done < budget) && tx_complete) {
-		netif_rx_complete(adapter->netdev, &adapter->napi);
+		netif_rx_complete(&adapter->napi);
 		netxen_nic_enable_int(adapter);
 	}
 
diff --git a/drivers/net/niu.c b/drivers/net/niu.c
index f219f16ec97a..5698c155bbf3 100644
--- a/drivers/net/niu.c
+++ b/drivers/net/niu.c
@@ -3669,7 +3669,7 @@ static int niu_poll(struct napi_struct *napi, int budget)
 	work_done = niu_poll_core(np, lp, budget);
 
 	if (work_done < budget) {
-		netif_rx_complete(np->dev, napi);
+		netif_rx_complete(napi);
 		niu_ldg_rearm(np, lp, 1);
 	}
 	return work_done;
@@ -4088,12 +4088,12 @@ static void __niu_fastpath_interrupt(struct niu *np, int ldg, u64 v0)
 static void niu_schedule_napi(struct niu *np, struct niu_ldg *lp,
 			      u64 v0, u64 v1, u64 v2)
 {
-	if (likely(netif_rx_schedule_prep(np->dev, &lp->napi))) {
+	if (likely(netif_rx_schedule_prep(&lp->napi))) {
 		lp->v0 = v0;
 		lp->v1 = v1;
 		lp->v2 = v2;
 		__niu_fastpath_interrupt(np, lp->ldg_num, v0);
-		__netif_rx_schedule(np->dev, &lp->napi);
+		__netif_rx_schedule(&lp->napi);
 	}
 }
 
diff --git a/drivers/net/pasemi_mac.c b/drivers/net/pasemi_mac.c
index fcbf6ccd0a85..dcd199045613 100644
--- a/drivers/net/pasemi_mac.c
+++ b/drivers/net/pasemi_mac.c
@@ -971,7 +971,7 @@ static irqreturn_t pasemi_mac_rx_intr(int irq, void *data)
 	if (*chan->status & PAS_STATUS_ERROR)
 		reg |= PAS_IOB_DMA_RXCH_RESET_DINTC;
 
-	netif_rx_schedule(dev, &mac->napi);
+	netif_rx_schedule(&mac->napi);
 
 	write_iob_reg(PAS_IOB_DMA_RXCH_RESET(chan->chno), reg);
 
@@ -1011,7 +1011,7 @@ static irqreturn_t pasemi_mac_tx_intr(int irq, void *data)
 
 	mod_timer(&txring->clean_timer, jiffies + (TX_CLEAN_INTERVAL)*2);
 
-	netif_rx_schedule(mac->netdev, &mac->napi);
+	netif_rx_schedule(&mac->napi);
 
 	if (reg)
 		write_iob_reg(PAS_IOB_DMA_TXCH_RESET(chan->chno), reg);
@@ -1641,7 +1641,7 @@ static int pasemi_mac_poll(struct napi_struct *napi, int budget)
 	pkts = pasemi_mac_clean_rx(rx_ring(mac), budget);
 	if (pkts < budget) {
 		/* all done, no more packets present */
-		netif_rx_complete(dev, napi);
+		netif_rx_complete(napi);
 
 		pasemi_mac_restart_rx_intr(mac);
 		pasemi_mac_restart_tx_intr(mac);
diff --git a/drivers/net/pcnet32.c b/drivers/net/pcnet32.c
index f2b192c80e17..044b7b07f5f4 100644
--- a/drivers/net/pcnet32.c
+++ b/drivers/net/pcnet32.c
@@ -1397,7 +1397,7 @@ static int pcnet32_poll(struct napi_struct *napi, int budget)
 	if (work_done < budget) {
 		spin_lock_irqsave(&lp->lock, flags);
 
-		__netif_rx_complete(dev, napi);
+		__netif_rx_complete(napi);
 
 		/* clear interrupt masks */
 		val = lp->a.read_csr(ioaddr, CSR3);
@@ -2586,14 +2586,14 @@ pcnet32_interrupt(int irq, void *dev_id)
 				       dev->name, csr0);
 			/* unlike for the lance, there is no restart needed */
 		}
-		if (netif_rx_schedule_prep(dev, &lp->napi)) {
+		if (netif_rx_schedule_prep(&lp->napi)) {
 			u16 val;
 			/* set interrupt masks */
 			val = lp->a.read_csr(ioaddr, CSR3);
 			val |= 0x5f00;
 			lp->a.write_csr(ioaddr, CSR3, val);
 			mmiowb();
-			__netif_rx_schedule(dev, &lp->napi);
+			__netif_rx_schedule(&lp->napi);
 			break;
 		}
 		csr0 = lp->a.read_csr(ioaddr, CSR0);
diff --git a/drivers/net/qla3xxx.c b/drivers/net/qla3xxx.c
index 6b7ed1a5b3b7..33e8e62b4502 100644
--- a/drivers/net/qla3xxx.c
+++ b/drivers/net/qla3xxx.c
@@ -2293,7 +2293,7 @@ static int ql_poll(struct napi_struct *napi, int budget)
 
 	if (tx_cleaned + rx_cleaned != budget) {
 		spin_lock_irqsave(&qdev->hw_lock, hw_flags);
-		__netif_rx_complete(ndev, napi);
+		__netif_rx_complete(napi);
 		ql_update_small_bufq_prod_index(qdev);
 		ql_update_lrg_bufq_prod_index(qdev);
 		writel(qdev->rsp_consumer_index,
@@ -2352,8 +2352,8 @@ static irqreturn_t ql3xxx_isr(int irq, void *dev_id)
 		spin_unlock(&qdev->adapter_lock);
 	} else if (value & ISP_IMR_DISABLE_CMPL_INT) {
 		ql_disable_interrupts(qdev);
-		if (likely(netif_rx_schedule_prep(ndev, &qdev->napi))) {
-			__netif_rx_schedule(ndev, &qdev->napi);
+		if (likely(netif_rx_schedule_prep(&qdev->napi))) {
+			__netif_rx_schedule(&qdev->napi);
 		}
 	} else {
 		return IRQ_NONE;
diff --git a/drivers/net/qlge/qlge_main.c b/drivers/net/qlge/qlge_main.c
index 225930fda5af..02147082786d 100644
--- a/drivers/net/qlge/qlge_main.c
+++ b/drivers/net/qlge/qlge_main.c
@@ -1647,7 +1647,7 @@ static int ql_napi_poll_msix(struct napi_struct *napi, int budget)
 		rx_ring->cq_id);
 
 	if (work_done < budget) {
-		__netif_rx_complete(qdev->ndev, napi);
+		__netif_rx_complete(napi);
 		ql_enable_completion_interrupt(qdev, rx_ring->irq);
 	}
 	return work_done;
@@ -1733,7 +1733,7 @@ static irqreturn_t qlge_msix_rx_isr(int irq, void *dev_id)
 {
 	struct rx_ring *rx_ring = dev_id;
 	struct ql_adapter *qdev = rx_ring->qdev;
-	netif_rx_schedule(qdev->ndev, &rx_ring->napi);
+	netif_rx_schedule(&rx_ring->napi);
 	return IRQ_HANDLED;
 }
 
@@ -1819,8 +1819,7 @@ static irqreturn_t qlge_isr(int irq, void *dev_id)
 							      &rx_ring->rx_work,
 							      0);
 				else
-					netif_rx_schedule(qdev->ndev,
-							  &rx_ring->napi);
+					netif_rx_schedule(&rx_ring->napi);
 				work_done++;
 			}
 		}
diff --git a/drivers/net/r6040.c b/drivers/net/r6040.c
index aff1cc627c05..53bbddfc8c95 100644
--- a/drivers/net/r6040.c
+++ b/drivers/net/r6040.c
@@ -667,7 +667,7 @@ static int r6040_poll(struct napi_struct *napi, int budget)
 	work_done = r6040_rx(dev, budget);
 
 	if (work_done < budget) {
-		netif_rx_complete(dev, napi);
+		netif_rx_complete(napi);
 		/* Enable RX interrupt */
 		iowrite16(ioread16(ioaddr + MIER) | RX_INTS, ioaddr + MIER);
 	}
@@ -704,7 +704,7 @@ static irqreturn_t r6040_interrupt(int irq, void *dev_id)
 
 		/* Mask off RX interrupt */
 		misr &= ~RX_INTS;
-		netif_rx_schedule(dev, &lp->napi);
+		netif_rx_schedule(&lp->napi);
 	}
 
 	/* TX interrupt request */
diff --git a/drivers/net/r8169.c b/drivers/net/r8169.c
index dddf6aeff498..2c73ca606b35 100644
--- a/drivers/net/r8169.c
+++ b/drivers/net/r8169.c
@@ -3581,8 +3581,8 @@ static irqreturn_t rtl8169_interrupt(int irq, void *dev_instance)
 		RTL_W16(IntrMask, tp->intr_event & ~tp->napi_event);
 		tp->intr_mask = ~tp->napi_event;
 
-		if (likely(netif_rx_schedule_prep(dev, &tp->napi)))
-			__netif_rx_schedule(dev, &tp->napi);
+		if (likely(netif_rx_schedule_prep(&tp->napi)))
+			__netif_rx_schedule(&tp->napi);
 		else if (netif_msg_intr(tp)) {
 			printk(KERN_INFO "%s: interrupt %04x in poll\n",
 			       dev->name, status);
@@ -3603,7 +3603,7 @@ static int rtl8169_poll(struct napi_struct *napi, int budget)
 	rtl8169_tx_interrupt(dev, tp, ioaddr);
 
 	if (work_done < budget) {
-		netif_rx_complete(dev, napi);
+		netif_rx_complete(napi);
 		tp->intr_mask = 0xffff;
 		/*
 		 * 20040426: the barrier is not strictly required but the
diff --git a/drivers/net/s2io.c b/drivers/net/s2io.c
index 1b489df80fa6..512861923c6b 100644
--- a/drivers/net/s2io.c
+++ b/drivers/net/s2io.c
@@ -2852,7 +2852,7 @@ static int s2io_poll_msix(struct napi_struct *napi, int budget)
 	s2io_chk_rx_buffers(nic, ring);
 
 	if (pkts_processed < budget_org) {
-		netif_rx_complete(dev, napi);
+		netif_rx_complete(napi);
 		/*Re Enable MSI-Rx Vector*/
 		addr = (u8 __iomem *)&bar0->xmsi_mask_reg;
 		addr += 7 - ring->ring_no;
@@ -2890,7 +2890,7 @@ static int s2io_poll_inta(struct napi_struct *napi, int budget)
 			break;
 	}
 	if (pkts_processed < budget_org) {
-		netif_rx_complete(dev, napi);
+		netif_rx_complete(napi);
 		/* Re enable the Rx interrupts for the ring */
 		writeq(0, &bar0->rx_traffic_mask);
 		readl(&bar0->rx_traffic_mask);
@@ -4344,7 +4344,7 @@ static irqreturn_t s2io_msix_ring_handle(int irq, void *dev_id)
 		val8 = (ring->ring_no == 0) ? 0x7f : 0xff;
 		writeb(val8, addr);
 		val8 = readb(addr);
-		netif_rx_schedule(dev, &ring->napi);
+		netif_rx_schedule(&ring->napi);
 	} else {
 		rx_intr_handler(ring, 0);
 		s2io_chk_rx_buffers(sp, ring);
@@ -4791,7 +4791,7 @@ static irqreturn_t s2io_isr(int irq, void *dev_id)
 
 		if (config->napi) {
 			if (reason & GEN_INTR_RXTRAFFIC) {
-				netif_rx_schedule(dev, &sp->napi);
+				netif_rx_schedule(&sp->napi);
 				writeq(S2IO_MINUS_ONE, &bar0->rx_traffic_mask);
 				writeq(S2IO_MINUS_ONE, &bar0->rx_traffic_int);
 				readl(&bar0->rx_traffic_int);
diff --git a/drivers/net/sb1250-mac.c b/drivers/net/sb1250-mac.c
index 480caec1e024..31e38fae017f 100644
--- a/drivers/net/sb1250-mac.c
+++ b/drivers/net/sb1250-mac.c
@@ -2039,9 +2039,9 @@ static irqreturn_t sbmac_intr(int irq,void *dev_instance)
 		sbdma_tx_process(sc,&(sc->sbm_txdma), 0);
 
 	if (isr & (M_MAC_INT_CHANNEL << S_MAC_RX_CH0)) {
-		if (netif_rx_schedule_prep(dev, &sc->napi)) {
+		if (netif_rx_schedule_prep(&sc->napi)) {
 			__raw_writeq(0, sc->sbm_imr);
-			__netif_rx_schedule(dev, &sc->napi);
+			__netif_rx_schedule(&sc->napi);
 			/* Depend on the exit from poll to reenable intr */
 		}
 		else {
@@ -2667,7 +2667,7 @@ static int sbmac_poll(struct napi_struct *napi, int budget)
 	sbdma_tx_process(sc, &(sc->sbm_txdma), 1);
 
 	if (work_done < budget) {
-		netif_rx_complete(dev, napi);
+		netif_rx_complete(napi);
 
 #ifdef CONFIG_SBMAC_COALESCE
 		__raw_writeq(((M_MAC_INT_EOP_COUNT | M_MAC_INT_EOP_TIMER) << S_MAC_TX_CH0) |
diff --git a/drivers/net/sfc/efx.c b/drivers/net/sfc/efx.c
index 086629c0fe57..42934ba2030d 100644
--- a/drivers/net/sfc/efx.c
+++ b/drivers/net/sfc/efx.c
@@ -230,7 +230,7 @@ static int efx_poll(struct napi_struct *napi, int budget)
 		 * since efx_channel_processed() will have no effect if
 		 * interrupts have already been disabled.
 		 */
-		netif_rx_complete(napi_dev, napi);
+		netif_rx_complete(napi);
 		efx_channel_processed(channel);
 	}
 
diff --git a/drivers/net/sfc/efx.h b/drivers/net/sfc/efx.h
index dd0d45b9e71f..0dd7a532c78a 100644
--- a/drivers/net/sfc/efx.h
+++ b/drivers/net/sfc/efx.h
@@ -77,7 +77,7 @@ static inline void efx_schedule_channel(struct efx_channel *channel)
 		  channel->channel, raw_smp_processor_id());
 	channel->work_pending = true;
 
-	netif_rx_schedule(channel->napi_dev, &channel->napi_str);
+	netif_rx_schedule(&channel->napi_str);
 }
 
 #endif /* EFX_EFX_H */
diff --git a/drivers/net/skge.c b/drivers/net/skge.c
index f73ee7974003..c9dbb06f8c94 100644
--- a/drivers/net/skge.c
+++ b/drivers/net/skge.c
@@ -3214,7 +3214,7 @@ static int skge_poll(struct napi_struct *napi, int to_do)
 		unsigned long flags;
 
 		spin_lock_irqsave(&hw->hw_lock, flags);
-		__netif_rx_complete(dev, napi);
+		__netif_rx_complete(napi);
 		hw->intr_mask |= napimask[skge->port];
 		skge_write32(hw, B0_IMSK, hw->intr_mask);
 		skge_read32(hw, B0_IMSK);
@@ -3377,7 +3377,7 @@ static irqreturn_t skge_intr(int irq, void *dev_id)
 	if (status & (IS_XA1_F|IS_R1_F)) {
 		struct skge_port *skge = netdev_priv(hw->dev[0]);
 		hw->intr_mask &= ~(IS_XA1_F|IS_R1_F);
-		netif_rx_schedule(hw->dev[0], &skge->napi);
+		netif_rx_schedule(&skge->napi);
 	}
 
 	if (status & IS_PA_TO_TX1)
@@ -3397,7 +3397,7 @@ static irqreturn_t skge_intr(int irq, void *dev_id)
 
 		if (status & (IS_XA2_F|IS_R2_F)) {
 			hw->intr_mask &= ~(IS_XA2_F|IS_R2_F);
-			netif_rx_schedule(hw->dev[1], &skge->napi);
+			netif_rx_schedule(&skge->napi);
 		}
 
 		if (status & IS_PA_TO_RX2) {
diff --git a/drivers/net/smsc911x.c b/drivers/net/smsc911x.c
index fa28542b47d5..ecdde03d4167 100644
--- a/drivers/net/smsc911x.c
+++ b/drivers/net/smsc911x.c
@@ -984,7 +984,7 @@ static int smsc911x_poll(struct napi_struct *napi, int budget)
 			/* We processed all packets available.  Tell NAPI it can
 			 * stop polling then re-enable rx interrupts */
 			smsc911x_reg_write(pdata, INT_STS, INT_STS_RSFL_);
-			netif_rx_complete(dev, napi);
+			netif_rx_complete(napi);
 			temp = smsc911x_reg_read(pdata, INT_EN);
 			temp |= INT_EN_RSFL_EN_;
 			smsc911x_reg_write(pdata, INT_EN, temp);
diff --git a/drivers/net/smsc9420.c b/drivers/net/smsc9420.c
index 940220f60921..27e017d96966 100644
--- a/drivers/net/smsc9420.c
+++ b/drivers/net/smsc9420.c
@@ -666,7 +666,7 @@ static irqreturn_t smsc9420_isr(int irq, void *dev_id)
 			smsc9420_pci_flush_write(pd);
 
 			ints_to_clear |= (DMAC_STS_RX_ | DMAC_STS_NIS_);
-			netif_rx_schedule(pd->dev, &pd->napi);
+			netif_rx_schedule(&pd->napi);
 		}
 
 		if (ints_to_clear)
@@ -889,7 +889,7 @@ static int smsc9420_rx_poll(struct napi_struct *napi, int budget)
 	smsc9420_pci_flush_write(pd);
 
 	if (work_done < budget) {
-		netif_rx_complete(dev, &pd->napi);
+		netif_rx_complete(&pd->napi);
 
 		/* re-enable RX DMA interrupts */
 		dma_intr_ena = smsc9420_reg_read(pd, DMAC_INTR_ENA);
diff --git a/drivers/net/spider_net.c b/drivers/net/spider_net.c
index 325fbc9612c9..c5c123d3af57 100644
--- a/drivers/net/spider_net.c
+++ b/drivers/net/spider_net.c
@@ -1302,7 +1302,7 @@ static int spider_net_poll(struct napi_struct *napi, int budget)
 	/* if all packets are in the stack, enable interrupts and return 0 */
 	/* if not, return 1 */
 	if (packets_done < budget) {
-		netif_rx_complete(netdev, napi);
+		netif_rx_complete(napi);
 		spider_net_rx_irq_on(card);
 		card->ignore_rx_ramfull = 0;
 	}
@@ -1529,8 +1529,7 @@ spider_net_handle_error_irq(struct spider_net_card *card, u32 status_reg,
 			spider_net_refill_rx_chain(card);
 			spider_net_enable_rxdmac(card);
 			card->num_rx_ints ++;
-			netif_rx_schedule(card->netdev,
-					  &card->napi);
+			netif_rx_schedule(&card->napi);
 		}
 		show_error = 0;
 		break;
@@ -1550,8 +1549,7 @@ spider_net_handle_error_irq(struct spider_net_card *card, u32 status_reg,
 		spider_net_refill_rx_chain(card);
 		spider_net_enable_rxdmac(card);
 		card->num_rx_ints ++;
-		netif_rx_schedule(card->netdev,
-				  &card->napi);
+		netif_rx_schedule(&card->napi);
 		show_error = 0;
 		break;
 
@@ -1565,8 +1563,7 @@ spider_net_handle_error_irq(struct spider_net_card *card, u32 status_reg,
 		spider_net_refill_rx_chain(card);
 		spider_net_enable_rxdmac(card);
 		card->num_rx_ints ++;
-		netif_rx_schedule(card->netdev,
-				  &card->napi);
+		netif_rx_schedule(&card->napi);
 		show_error = 0;
 		break;
 
@@ -1660,11 +1657,11 @@ spider_net_interrupt(int irq, void *ptr)
 
 	if (status_reg & SPIDER_NET_RXINT ) {
 		spider_net_rx_irq_off(card);
-		netif_rx_schedule(netdev, &card->napi);
+		netif_rx_schedule(&card->napi);
 		card->num_rx_ints ++;
 	}
 	if (status_reg & SPIDER_NET_TXINT)
-		netif_rx_schedule(netdev, &card->napi);
+		netif_rx_schedule(&card->napi);
 
 	if (status_reg & SPIDER_NET_LINKINT)
 		spider_net_link_reset(netdev);
diff --git a/drivers/net/starfire.c b/drivers/net/starfire.c
index 0358809f409c..d5b9dd842c61 100644
--- a/drivers/net/starfire.c
+++ b/drivers/net/starfire.c
@@ -1290,8 +1290,8 @@ static irqreturn_t intr_handler(int irq, void *dev_instance)
 		if (intr_status & (IntrRxDone | IntrRxEmpty)) {
 			u32 enable;
 
-			if (likely(netif_rx_schedule_prep(dev, &np->napi))) {
-				__netif_rx_schedule(dev, &np->napi);
+			if (likely(netif_rx_schedule_prep(&np->napi))) {
+				__netif_rx_schedule(&np->napi);
 				enable = readl(ioaddr + IntrEnable);
 				enable &= ~(IntrRxDone | IntrRxEmpty);
 				writel(enable, ioaddr + IntrEnable);
@@ -1530,7 +1530,7 @@ static int netdev_poll(struct napi_struct *napi, int budget)
 		intr_status = readl(ioaddr + IntrStatus);
 	} while (intr_status & (IntrRxDone | IntrRxEmpty));
 
-	netif_rx_complete(dev, napi);
+	netif_rx_complete(napi);
 	intr_status = readl(ioaddr + IntrEnable);
 	intr_status |= IntrRxDone | IntrRxEmpty;
 	writel(intr_status, ioaddr + IntrEnable);
diff --git a/drivers/net/sungem.c b/drivers/net/sungem.c
index f4b0beec4d19..8a7460412482 100644
--- a/drivers/net/sungem.c
+++ b/drivers/net/sungem.c
@@ -921,7 +921,7 @@ static int gem_poll(struct napi_struct *napi, int budget)
 		gp->status = readl(gp->regs + GREG_STAT);
 	} while (gp->status & GREG_STAT_NAPI);
 
-	__netif_rx_complete(dev, napi);
+	__netif_rx_complete(napi);
 	gem_enable_ints(gp);
 
 	spin_unlock_irqrestore(&gp->lock, flags);
@@ -944,7 +944,7 @@ static irqreturn_t gem_interrupt(int irq, void *dev_id)
 
 	spin_lock_irqsave(&gp->lock, flags);
 
-	if (netif_rx_schedule_prep(dev, &gp->napi)) {
+	if (netif_rx_schedule_prep(&gp->napi)) {
 		u32 gem_status = readl(gp->regs + GREG_STAT);
 
 		if (gem_status == 0) {
@@ -954,7 +954,7 @@ static irqreturn_t gem_interrupt(int irq, void *dev_id)
 		}
 		gp->status = gem_status;
 		gem_disable_ints(gp);
-		__netif_rx_schedule(dev, &gp->napi);
+		__netif_rx_schedule(&gp->napi);
 	}
 
 	spin_unlock_irqrestore(&gp->lock, flags);
diff --git a/drivers/net/tc35815.c b/drivers/net/tc35815.c
index 308f365270e9..bcd0e60cbda9 100644
--- a/drivers/net/tc35815.c
+++ b/drivers/net/tc35815.c
@@ -1609,8 +1609,8 @@ static irqreturn_t tc35815_interrupt(int irq, void *dev_id)
 	if (!(dmactl & DMA_IntMask)) {
 		/* disable interrupts */
 		tc_writel(dmactl | DMA_IntMask, &tr->DMA_Ctl);
-		if (netif_rx_schedule_prep(dev, &lp->napi))
-			__netif_rx_schedule(dev, &lp->napi);
+		if (netif_rx_schedule_prep(&lp->napi))
+			__netif_rx_schedule(&lp->napi);
 		else {
 			printk(KERN_ERR "%s: interrupt taken in poll\n",
 			       dev->name);
@@ -1919,7 +1919,7 @@ static int tc35815_poll(struct napi_struct *napi, int budget)
 	spin_unlock(&lp->lock);
 
 	if (received < budget) {
-		netif_rx_complete(dev, napi);
+		netif_rx_complete(napi);
 		/* enable interrupts */
 		tc_writel(tc_readl(&tr->DMA_Ctl) & ~DMA_IntMask, &tr->DMA_Ctl);
 	}
diff --git a/drivers/net/tehuti.c b/drivers/net/tehuti.c
index 5b83fbb02013..a10a83a11d9f 100644
--- a/drivers/net/tehuti.c
+++ b/drivers/net/tehuti.c
@@ -265,8 +265,8 @@ static irqreturn_t bdx_isr_napi(int irq, void *dev)
 		bdx_isr_extra(priv, isr);
 
 	if (isr & (IR_RX_DESC_0 | IR_TX_FREE_0)) {
-		if (likely(netif_rx_schedule_prep(ndev, &priv->napi))) {
-			__netif_rx_schedule(ndev, &priv->napi);
+		if (likely(netif_rx_schedule_prep(&priv->napi))) {
+			__netif_rx_schedule(&priv->napi);
 			RET(IRQ_HANDLED);
 		} else {
 			/* NOTE: we get here if intr has slipped into window
@@ -289,7 +289,6 @@ static irqreturn_t bdx_isr_napi(int irq, void *dev)
 static int bdx_poll(struct napi_struct *napi, int budget)
 {
 	struct bdx_priv *priv = container_of(napi, struct bdx_priv, napi);
-	struct net_device *dev = priv->ndev;
 	int work_done;
 
 	ENTER;
@@ -303,7 +302,7 @@ static int bdx_poll(struct napi_struct *napi, int budget)
 		 * device lock and allow waiting tasks (eg rmmod) to advance) */
 		priv->napi_stop = 0;
 
-		netif_rx_complete(dev, napi);
+		netif_rx_complete(napi);
 		bdx_enable_interrupts(priv);
 	}
 	return work_done;
diff --git a/drivers/net/tg3.c b/drivers/net/tg3.c
index 7971d802508d..04ae1e86aeaa 100644
--- a/drivers/net/tg3.c
+++ b/drivers/net/tg3.c
@@ -4451,7 +4451,7 @@ static int tg3_poll(struct napi_struct *napi, int budget)
 			sblk->status &= ~SD_STATUS_UPDATED;
 
 		if (likely(!tg3_has_work(tp))) {
-			netif_rx_complete(tp->dev, napi);
+			netif_rx_complete(napi);
 			tg3_restart_ints(tp);
 			break;
 		}
@@ -4461,7 +4461,7 @@ static int tg3_poll(struct napi_struct *napi, int budget)
 
 tx_recovery:
 	/* work_done is guaranteed to be less than budget. */
-	netif_rx_complete(tp->dev, napi);
+	netif_rx_complete(napi);
 	schedule_work(&tp->reset_task);
 	return work_done;
 }
@@ -4510,7 +4510,7 @@ static irqreturn_t tg3_msi_1shot(int irq, void *dev_id)
 	prefetch(&tp->rx_rcb[tp->rx_rcb_ptr]);
 
 	if (likely(!tg3_irq_sync(tp)))
-		netif_rx_schedule(dev, &tp->napi);
+		netif_rx_schedule(&tp->napi);
 
 	return IRQ_HANDLED;
 }
@@ -4535,7 +4535,7 @@ static irqreturn_t tg3_msi(int irq, void *dev_id)
 	 */
 	tw32_mailbox(MAILBOX_INTERRUPT_0 + TG3_64BIT_REG_LOW, 0x00000001);
 	if (likely(!tg3_irq_sync(tp)))
-		netif_rx_schedule(dev, &tp->napi);
+		netif_rx_schedule(&tp->napi);
 
 	return IRQ_RETVAL(1);
 }
@@ -4577,7 +4577,7 @@ static irqreturn_t tg3_interrupt(int irq, void *dev_id)
 	sblk->status &= ~SD_STATUS_UPDATED;
 	if (likely(tg3_has_work(tp))) {
 		prefetch(&tp->rx_rcb[tp->rx_rcb_ptr]);
-		netif_rx_schedule(dev, &tp->napi);
+		netif_rx_schedule(&tp->napi);
 	} else {
 		/* No work, shared interrupt perhaps?  re-enable
 		 * interrupts, and flush that PCI write
@@ -4623,7 +4623,7 @@ static irqreturn_t tg3_interrupt_tagged(int irq, void *dev_id)
 	tw32_mailbox_f(MAILBOX_INTERRUPT_0 + TG3_64BIT_REG_LOW, 0x00000001);
 	if (tg3_irq_sync(tp))
 		goto out;
-	if (netif_rx_schedule_prep(dev, &tp->napi)) {
+	if (netif_rx_schedule_prep(&tp->napi)) {
 		prefetch(&tp->rx_rcb[tp->rx_rcb_ptr]);
 		/* Update last_tag to mark that this status has been
 		 * seen. Because interrupt may be shared, we may be
@@ -4631,7 +4631,7 @@ static irqreturn_t tg3_interrupt_tagged(int irq, void *dev_id)
 		 * if tg3_poll() is not scheduled.
 		 */
 		tp->last_tag = sblk->status_tag;
-		__netif_rx_schedule(dev, &tp->napi);
+		__netif_rx_schedule(&tp->napi);
 	}
 out:
 	return IRQ_RETVAL(handled);
diff --git a/drivers/net/tsi108_eth.c b/drivers/net/tsi108_eth.c
index 271bc230c8a9..75461dbd4876 100644
--- a/drivers/net/tsi108_eth.c
+++ b/drivers/net/tsi108_eth.c
@@ -888,7 +888,7 @@ static int tsi108_poll(struct napi_struct *napi, int budget)
 
 	if (num_received < budget) {
 		data->rxpending = 0;
-		netif_rx_complete(dev, napi);
+		netif_rx_complete(napi);
 
 		TSI_WRITE(TSI108_EC_INTMASK,
 				     TSI_READ(TSI108_EC_INTMASK)
@@ -919,7 +919,7 @@ static void tsi108_rx_int(struct net_device *dev)
 	 * from tsi108_check_rxring().
 	 */
 
-	if (netif_rx_schedule_prep(dev, &data->napi)) {
+	if (netif_rx_schedule_prep(&data->napi)) {
 		/* Mask, rather than ack, the receive interrupts.  The ack
 		 * will happen in tsi108_poll().
 		 */
@@ -930,7 +930,7 @@ static void tsi108_rx_int(struct net_device *dev)
 				     | TSI108_INT_RXTHRESH |
 				     TSI108_INT_RXOVERRUN | TSI108_INT_RXERROR |
 				     TSI108_INT_RXWAIT);
-		__netif_rx_schedule(dev, &data->napi);
+		__netif_rx_schedule(&data->napi);
 	} else {
 		if (!netif_running(dev)) {
 			/* This can happen if an interrupt occurs while the
diff --git a/drivers/net/tulip/interrupt.c b/drivers/net/tulip/interrupt.c
index 739d610d18c5..6c3428a37c0b 100644
--- a/drivers/net/tulip/interrupt.c
+++ b/drivers/net/tulip/interrupt.c
@@ -103,7 +103,7 @@ void oom_timer(unsigned long data)
 {
         struct net_device *dev = (struct net_device *)data;
 	struct tulip_private *tp = netdev_priv(dev);
-	netif_rx_schedule(dev, &tp->napi);
+	netif_rx_schedule(&tp->napi);
 }
 
 int tulip_poll(struct napi_struct *napi, int budget)
@@ -300,7 +300,7 @@ int tulip_poll(struct napi_struct *napi, int budget)
 
          /* Remove us from polling list and enable RX intr. */
 
-         netif_rx_complete(dev, napi);
+         netif_rx_complete(napi);
          iowrite32(tulip_tbl[tp->chip_id].valid_intrs, tp->base_addr+CSR7);
 
          /* The last op happens after poll completion. Which means the following:
@@ -336,7 +336,7 @@ int tulip_poll(struct napi_struct *napi, int budget)
           * before we did netif_rx_complete(). See? We would lose it. */
 
          /* remove ourselves from the polling list */
-         netif_rx_complete(dev, napi);
+         netif_rx_complete(napi);
 
          return work_done;
 }
@@ -519,7 +519,7 @@ irqreturn_t tulip_interrupt(int irq, void *dev_instance)
 			rxd++;
 			/* Mask RX intrs and add the device to poll list. */
 			iowrite32(tulip_tbl[tp->chip_id].valid_intrs&~RxPollInt, ioaddr + CSR7);
-			netif_rx_schedule(dev, &tp->napi);
+			netif_rx_schedule(&tp->napi);
 
 			if (!(csr5&~(AbnormalIntr|NormalIntr|RxPollInt|TPLnkPass)))
                                break;
diff --git a/drivers/net/typhoon.c b/drivers/net/typhoon.c
index 5386d9b73e6a..0009f4e34433 100644
--- a/drivers/net/typhoon.c
+++ b/drivers/net/typhoon.c
@@ -1755,7 +1755,6 @@ static int
 typhoon_poll(struct napi_struct *napi, int budget)
 {
 	struct typhoon *tp = container_of(napi, struct typhoon, napi);
-	struct net_device *dev = tp->dev;
 	struct typhoon_indexes *indexes = tp->indexes;
 	int work_done;
 
@@ -1784,7 +1783,7 @@ typhoon_poll(struct napi_struct *napi, int budget)
 	}
 
 	if (work_done < budget) {
-		netif_rx_complete(dev, napi);
+		netif_rx_complete(napi);
 		iowrite32(TYPHOON_INTR_NONE,
 				tp->ioaddr + TYPHOON_REG_INTR_MASK);
 		typhoon_post_pci_writes(tp->ioaddr);
@@ -1807,10 +1806,10 @@ typhoon_interrupt(int irq, void *dev_instance)
 
 	iowrite32(intr_status, ioaddr + TYPHOON_REG_INTR_STATUS);
 
-	if (netif_rx_schedule_prep(dev, &tp->napi)) {
+	if (netif_rx_schedule_prep(&tp->napi)) {
 		iowrite32(TYPHOON_INTR_ALL, ioaddr + TYPHOON_REG_INTR_MASK);
 		typhoon_post_pci_writes(ioaddr);
-		__netif_rx_schedule(dev, &tp->napi);
+		__netif_rx_schedule(&tp->napi);
 	} else {
 		printk(KERN_ERR "%s: Error, poll already scheduled\n",
                        dev->name);
diff --git a/drivers/net/ucc_geth.c b/drivers/net/ucc_geth.c
index 5c82f147f151..78a2ede19c5e 100644
--- a/drivers/net/ucc_geth.c
+++ b/drivers/net/ucc_geth.c
@@ -3330,7 +3330,7 @@ static int ucc_geth_poll(struct napi_struct *napi, int budget)
 		struct ucc_fast_private *uccf;
 		u32 uccm;
 
-		netif_rx_complete(dev, napi);
+		netif_rx_complete(napi);
 		uccf = ugeth->uccf;
 		uccm = in_be32(uccf->p_uccm);
 		uccm |= UCCE_RX_EVENTS;
@@ -3364,10 +3364,10 @@ static irqreturn_t ucc_geth_irq_handler(int irq, void *info)
 
 	/* check for receive events that require processing */
 	if (ucce & UCCE_RX_EVENTS) {
-		if (netif_rx_schedule_prep(dev, &ugeth->napi)) {
+		if (netif_rx_schedule_prep(&ugeth->napi)) {
 			uccm &= ~UCCE_RX_EVENTS;
 			out_be32(uccf->p_uccm, uccm);
-			__netif_rx_schedule(dev, &ugeth->napi);
+			__netif_rx_schedule(&ugeth->napi);
 		}
 	}
 
diff --git a/drivers/net/via-rhine.c b/drivers/net/via-rhine.c
index 8d405c83df8b..ac07cc6e3cb2 100644
--- a/drivers/net/via-rhine.c
+++ b/drivers/net/via-rhine.c
@@ -589,7 +589,7 @@ static int rhine_napipoll(struct napi_struct *napi, int budget)
 	work_done = rhine_rx(dev, budget);
 
 	if (work_done < budget) {
-		netif_rx_complete(dev, napi);
+		netif_rx_complete(napi);
 
 		iowrite16(IntrRxDone | IntrRxErr | IntrRxEmpty| IntrRxOverflow |
 			  IntrRxDropped | IntrRxNoBuf | IntrTxAborted |
@@ -1318,7 +1318,7 @@ static irqreturn_t rhine_interrupt(int irq, void *dev_instance)
 				  IntrPCIErr | IntrStatsMax | IntrLinkChange,
 				  ioaddr + IntrEnable);
 
-			netif_rx_schedule(dev, &rp->napi);
+			netif_rx_schedule(&rp->napi);
 		}
 
 		if (intr_status & (IntrTxErrSummary | IntrTxDone)) {
diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
index 71ca29cc184d..b7004ff36451 100644
--- a/drivers/net/virtio_net.c
+++ b/drivers/net/virtio_net.c
@@ -374,9 +374,9 @@ static void skb_recv_done(struct virtqueue *rvq)
 {
 	struct virtnet_info *vi = rvq->vdev->priv;
 	/* Schedule NAPI, Suppress further interrupts if successful. */
-	if (netif_rx_schedule_prep(vi->dev, &vi->napi)) {
+	if (netif_rx_schedule_prep(&vi->napi)) {
 		rvq->vq_ops->disable_cb(rvq);
-		__netif_rx_schedule(vi->dev, &vi->napi);
+		__netif_rx_schedule(&vi->napi);
 	}
 }
 
@@ -402,11 +402,11 @@ again:
 
 	/* Out of packets? */
 	if (received < budget) {
-		netif_rx_complete(vi->dev, napi);
+		netif_rx_complete(napi);
 		if (unlikely(!vi->rvq->vq_ops->enable_cb(vi->rvq))
 		    && napi_schedule_prep(napi)) {
 			vi->rvq->vq_ops->disable_cb(vi->rvq);
-			__netif_rx_schedule(vi->dev, napi);
+			__netif_rx_schedule(napi);
 			goto again;
 		}
 	}
@@ -580,9 +580,9 @@ static int virtnet_open(struct net_device *dev)
 	 * won't get another interrupt, so process any outstanding packets
 	 * now.  virtnet_poll wants re-enable the queue, so we disable here.
 	 * We synchronize against interrupts via NAPI_STATE_SCHED */
-	if (netif_rx_schedule_prep(dev, &vi->napi)) {
+	if (netif_rx_schedule_prep(&vi->napi)) {
 		vi->rvq->vq_ops->disable_cb(vi->rvq);
-		__netif_rx_schedule(dev, &vi->napi);
+		__netif_rx_schedule(&vi->napi);
 	}
 	return 0;
 }
diff --git a/drivers/net/wan/hd64572.c b/drivers/net/wan/hd64572.c
index 0bcc0b5f22d7..08b3536944fe 100644
--- a/drivers/net/wan/hd64572.c
+++ b/drivers/net/wan/hd64572.c
@@ -341,7 +341,7 @@ static int sca_poll(struct napi_struct *napi, int budget)
 		received = sca_rx_done(port, budget);
 
 	if (received < budget) {
-		netif_rx_complete(port->netdev, napi);
+		netif_rx_complete(napi);
 		enable_intr(port);
 	}
 
@@ -359,7 +359,7 @@ static irqreturn_t sca_intr(int irq, void *dev_id)
 		if (port && (isr0 & (i ? 0x08002200 : 0x00080022))) {
 			handled = 1;
 			disable_intr(port);
-			netif_rx_schedule(port->netdev, &port->napi);
+			netif_rx_schedule(&port->napi);
 		}
 	}
 
diff --git a/drivers/net/xen-netfront.c b/drivers/net/xen-netfront.c
index fe376fde4e89..761635be9104 100644
--- a/drivers/net/xen-netfront.c
+++ b/drivers/net/xen-netfront.c
@@ -196,7 +196,7 @@ static void rx_refill_timeout(unsigned long data)
 {
 	struct net_device *dev = (struct net_device *)data;
 	struct netfront_info *np = netdev_priv(dev);
-	netif_rx_schedule(dev, &np->napi);
+	netif_rx_schedule(&np->napi);
 }
 
 static int netfront_tx_slot_available(struct netfront_info *np)
@@ -328,7 +328,7 @@ static int xennet_open(struct net_device *dev)
 		xennet_alloc_rx_buffers(dev);
 		np->rx.sring->rsp_event = np->rx.rsp_cons + 1;
 		if (RING_HAS_UNCONSUMED_RESPONSES(&np->rx))
-			netif_rx_schedule(dev, &np->napi);
+			netif_rx_schedule(&np->napi);
 	}
 	spin_unlock_bh(&np->rx_lock);
 
@@ -979,7 +979,7 @@ err:
 
 		RING_FINAL_CHECK_FOR_RESPONSES(&np->rx, more_to_do);
 		if (!more_to_do)
-			__netif_rx_complete(dev, napi);
+			__netif_rx_complete(napi);
 
 		local_irq_restore(flags);
 	}
@@ -1310,7 +1310,7 @@ static irqreturn_t xennet_interrupt(int irq, void *dev_id)
 		xennet_tx_buf_gc(dev);
 		/* Under tx_lock: protects access to rx shared-ring indexes. */
 		if (RING_HAS_UNCONSUMED_RESPONSES(&np->rx))
-			netif_rx_schedule(dev, &np->napi);
+			netif_rx_schedule(&np->napi);
 	}
 
 	spin_unlock_irqrestore(&np->tx_lock, flags);
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 58856b6737fb..41e1224651cf 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -1555,8 +1555,7 @@ static inline u32 netif_msg_init(int debug_value, int default_msg_enable_bits)
 }
 
 /* Test if receive needs to be scheduled but only if up */
-static inline int netif_rx_schedule_prep(struct net_device *dev,
-					 struct napi_struct *napi)
+static inline int netif_rx_schedule_prep(struct napi_struct *napi)
 {
 	return napi_schedule_prep(napi);
 }
@@ -1564,27 +1563,24 @@ static inline int netif_rx_schedule_prep(struct net_device *dev,
 /* Add interface to tail of rx poll list. This assumes that _prep has
  * already been called and returned 1.
  */
-static inline void __netif_rx_schedule(struct net_device *dev,
-				       struct napi_struct *napi)
+static inline void __netif_rx_schedule(struct napi_struct *napi)
 {
 	__napi_schedule(napi);
 }
 
 /* Try to reschedule poll. Called by irq handler. */
 
-static inline void netif_rx_schedule(struct net_device *dev,
-				     struct napi_struct *napi)
+static inline void netif_rx_schedule(struct napi_struct *napi)
 {
-	if (netif_rx_schedule_prep(dev, napi))
-		__netif_rx_schedule(dev, napi);
+	if (netif_rx_schedule_prep(napi))
+		__netif_rx_schedule(napi);
 }
 
 /* Try to reschedule poll. Called by dev->poll() after netif_rx_complete().  */
-static inline int netif_rx_reschedule(struct net_device *dev,
-				      struct napi_struct *napi)
+static inline int netif_rx_reschedule(struct napi_struct *napi)
 {
 	if (napi_schedule_prep(napi)) {
-		__netif_rx_schedule(dev, napi);
+		__netif_rx_schedule(napi);
 		return 1;
 	}
 	return 0;
@@ -1593,8 +1589,7 @@ static inline int netif_rx_reschedule(struct net_device *dev,
 /* same as netif_rx_complete, except that local_irq_save(flags)
  * has already been issued
  */
-static inline void __netif_rx_complete(struct net_device *dev,
-				       struct napi_struct *napi)
+static inline void __netif_rx_complete(struct napi_struct *napi)
 {
 	__napi_complete(napi);
 }
@@ -1604,8 +1599,7 @@ static inline void __netif_rx_complete(struct net_device *dev,
  * it completes the work. The device cannot be out of poll list at this
  * moment, it is BUG().
  */
-static inline void netif_rx_complete(struct net_device *dev,
-				     struct napi_struct *napi)
+static inline void netif_rx_complete(struct napi_struct *napi)
 {
 	napi_complete(napi);
 }
-- 
cgit v1.2.3


From 88a9fe8cae3bb52e82489447f45e8d7ba1409ca8 Mon Sep 17 00:00:00 2001
From: Trond Myklebust <Trond.Myklebust@netapp.com>
Date: Tue, 23 Dec 2008 15:21:31 -0500
Subject: SUNRPC: Remove the last remnant of the BKL...

Somehow, this escaped the previous purge. There should be no need to keep
any extra locks in the XDR callbacks.

The NFS client XDR code only writes into private objects, whereas all reads
of shared objects are confined to fields that do not change, such as
filehandles...

Ditto for lockd, the NFSv2/v3 client mount code, and rpcbind.

The nfsd XDR code may require the BKL, but since it does a synchronous RPC
call from a thread that already holds the lock, that issue is moot.

Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 include/linux/sunrpc/xdr.h     | 15 ---------------
 net/sunrpc/auth.c              |  4 ++--
 net/sunrpc/auth_gss/auth_gss.c | 10 +++++-----
 3 files changed, 7 insertions(+), 22 deletions(-)

(limited to 'include')

diff --git a/include/linux/sunrpc/xdr.h b/include/linux/sunrpc/xdr.h
index e4057d729f03..49e1eb454465 100644
--- a/include/linux/sunrpc/xdr.h
+++ b/include/linux/sunrpc/xdr.h
@@ -36,21 +36,6 @@ struct xdr_netobj {
  */
 typedef int	(*kxdrproc_t)(void *rqstp, __be32 *data, void *obj);
 
-/*
- * We're still requiring the BKL in the xdr code until it's been
- * more carefully audited, at which point this wrapper will become
- * unnecessary.
- */
-static inline int rpc_call_xdrproc(kxdrproc_t xdrproc, void *rqstp, __be32 *data, void *obj)
-{
-	int ret;
-
-	lock_kernel();
-	ret = xdrproc(rqstp, data, obj);
-	unlock_kernel();
-	return ret;
-}
-
 /*
  * Basic structure for transmission/reception of a client XDR message.
  * Features a header (for a linear buffer containing RPC headers
diff --git a/net/sunrpc/auth.c b/net/sunrpc/auth.c
index cb216b2df666..6e28744b1709 100644
--- a/net/sunrpc/auth.c
+++ b/net/sunrpc/auth.c
@@ -513,7 +513,7 @@ rpcauth_wrap_req(struct rpc_task *task, kxdrproc_t encode, void *rqstp,
 	if (cred->cr_ops->crwrap_req)
 		return cred->cr_ops->crwrap_req(task, encode, rqstp, data, obj);
 	/* By default, we encode the arguments normally. */
-	return rpc_call_xdrproc(encode, rqstp, data, obj);
+	return encode(rqstp, data, obj);
 }
 
 int
@@ -528,7 +528,7 @@ rpcauth_unwrap_resp(struct rpc_task *task, kxdrproc_t decode, void *rqstp,
 		return cred->cr_ops->crunwrap_resp(task, decode, rqstp,
 						   data, obj);
 	/* By default, we decode the arguments normally. */
-	return rpc_call_xdrproc(decode, rqstp, data, obj);
+	return decode(rqstp, data, obj);
 }
 
 int
diff --git a/net/sunrpc/auth_gss/auth_gss.c b/net/sunrpc/auth_gss/auth_gss.c
index 853a4142cea1..b8561597f0c8 100644
--- a/net/sunrpc/auth_gss/auth_gss.c
+++ b/net/sunrpc/auth_gss/auth_gss.c
@@ -1017,7 +1017,7 @@ gss_wrap_req_integ(struct rpc_cred *cred, struct gss_cl_ctx *ctx,
 	offset = (u8 *)p - (u8 *)snd_buf->head[0].iov_base;
 	*p++ = htonl(rqstp->rq_seqno);
 
-	status = rpc_call_xdrproc(encode, rqstp, p, obj);
+	status = encode(rqstp, p, obj);
 	if (status)
 		return status;
 
@@ -1111,7 +1111,7 @@ gss_wrap_req_priv(struct rpc_cred *cred, struct gss_cl_ctx *ctx,
 	offset = (u8 *)p - (u8 *)snd_buf->head[0].iov_base;
 	*p++ = htonl(rqstp->rq_seqno);
 
-	status = rpc_call_xdrproc(encode, rqstp, p, obj);
+	status = encode(rqstp, p, obj);
 	if (status)
 		return status;
 
@@ -1170,12 +1170,12 @@ gss_wrap_req(struct rpc_task *task,
 		/* The spec seems a little ambiguous here, but I think that not
 		 * wrapping context destruction requests makes the most sense.
 		 */
-		status = rpc_call_xdrproc(encode, rqstp, p, obj);
+		status = encode(rqstp, p, obj);
 		goto out;
 	}
 	switch (gss_cred->gc_service) {
 		case RPC_GSS_SVC_NONE:
-			status = rpc_call_xdrproc(encode, rqstp, p, obj);
+			status = encode(rqstp, p, obj);
 			break;
 		case RPC_GSS_SVC_INTEGRITY:
 			status = gss_wrap_req_integ(cred, ctx, encode,
@@ -1291,7 +1291,7 @@ gss_unwrap_resp(struct rpc_task *task,
 	cred->cr_auth->au_rslack = cred->cr_auth->au_verfsize + (p - savedp)
 						+ (savedlen - head->iov_len);
 out_decode:
-	status = rpc_call_xdrproc(decode, rqstp, p, obj);
+	status = decode(rqstp, p, obj);
 out:
 	gss_put_ctx(ctx);
 	dprintk("RPC: %5u gss_unwrap_resp returning %d\n", task->tk_pid,
-- 
cgit v1.2.3


From 146ec944bbd31d241a44a00518b054fb01921d22 Mon Sep 17 00:00:00 2001
From: Chuck Lever <chuck.lever@oracle.com>
Date: Tue, 23 Dec 2008 15:21:34 -0500
Subject: NFS: Move declaration of nfs_mount() to fs/nfs/internal.h

Clean up:  The nfs_mount() function is not to be used outside of the
NFS client.  Move its public declaration to fs/nfs/internal.h.

Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/nfs/internal.h      | 4 ++++
 fs/nfs/nfsroot.c       | 2 ++
 include/linux/nfs_fs.h | 6 ------
 3 files changed, 6 insertions(+), 6 deletions(-)

(limited to 'include')

diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h
index d212ee41caf2..7a38cc7b4137 100644
--- a/fs/nfs/internal.h
+++ b/fs/nfs/internal.h
@@ -63,6 +63,10 @@ struct nfs_parsed_mount_data {
 	struct security_mnt_opts lsm_opts;
 };
 
+/* mount_clnt.c */
+extern int nfs_mount(struct sockaddr *, size_t, char *, char *,
+					int, int, struct nfs_fh *);
+
 /* client.c */
 extern struct rpc_program nfs_program;
 
diff --git a/fs/nfs/nfsroot.c b/fs/nfs/nfsroot.c
index f1c0a066439f..a96e5fdb38af 100644
--- a/fs/nfs/nfsroot.c
+++ b/fs/nfs/nfsroot.c
@@ -86,6 +86,8 @@
 #include <net/ipconfig.h>
 #include <linux/parser.h>
 
+#include "internal.h"
+
 /* Define this to allow debugging output */
 #undef NFSROOT_DEBUG
 #define NFSDBG_FACILITY NFSDBG_ROOT
diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h
index 4eaa8347a0d9..f11077285a6c 100644
--- a/include/linux/nfs_fs.h
+++ b/include/linux/nfs_fs.h
@@ -532,12 +532,6 @@ static inline void nfs3_forget_cached_acls(struct inode *inode)
 }
 #endif /* CONFIG_NFS_V3_ACL */
 
-/*
- * linux/fs/mount_clnt.c
- */
-extern int  nfs_mount(struct sockaddr *, size_t, char *, char *,
-		      int, int, struct nfs_fh *);
-
 /*
  * inline functions
  */
-- 
cgit v1.2.3


From d740351bf0960e89ce1aef45cfe00167cb0f9e5b Mon Sep 17 00:00:00 2001
From: Chuck Lever <chuck.lever@oracle.com>
Date: Tue, 23 Dec 2008 15:21:37 -0500
Subject: NFS: add "[no]resvport" mount option

The standard default security setting for NFS is AUTH_SYS.  An NFS
client connects to NFS servers via a privileged source port and a
fixed standard destination port (2049).  The client sends raw uid and
gid numbers to identify users making NFS requests, and the server
assumes an appropriate authority on the client has vetted these
values because the source port is privileged.

On Linux, by default in-kernel RPC services use a privileged port in
the range between 650 and 1023 to avoid using source ports of well-
known IP services.  Using such a small range limits the number of NFS
mount points and the number of unique NFS servers to which a client
can connect concurrently.

An NFS client can use unprivileged source ports to expand the range of
source port numbers, allowing more concurrent server connections and
more NFS mount points.  Servers must explicitly allow NFS connections
from unprivileged ports for this to work.

In the past, bumping the value of the sunrpc.max_resvport sysctl on
the client would permit the NFS client to use unprivileged ports.
Bumping this setting also changes the maximum port number used by
other in-kernel RPC services, some of which still required a port
number less than 1023.

This is exacerbated by the way source port numbers are chosen by the
Linux RPC client, which starts at the top of the range and works
downwards.  It means that bumping the maximum means all RPC services
requesting a source port will likely get an unprivileged port instead
of a privileged one.

Changing this setting effects all NFS mount points on a client.  A
sysadmin could not selectively choose which mount points would use
non-privileged ports and which could not.

Lastly, this mechanism of expanding the limit on the number of NFS
mount points was entirely undocumented.

To address the need for the NFS client to use a large range of source
ports without interfering with the activity of other in-kernel RPC
services, we introduce a new NFS mount option.  This option explicitly
tells only the NFS client to use a non-privileged source port when
communicating with the NFS server for one specific mount point.

This new mount option is called "resvport," like the similar NFS mount
option on FreeBSD and Mac OS X.  A sister patch for nfs-utils will be
submitted that documents this new option in nfs(5).

The default setting for this new mount option requires the NFS client
to use a privileged port, as before.  Explicitly specifying the
"noresvport" mount option allows the NFS client to use an unprivileged
source port for this mount point when connecting to the NFS server
port.

This mount option is supported only for text-based NFS mounts.

[ Sidebar: it is widely known that security mechanisms based on the
  use of privileged source ports are ineffective.  However, the NFS
  client can combine the use of unprivileged ports with the use of
  secure authentication mechanisms, such as Kerberos.  This allows a
  large number of connections and mount points while ensuring a useful
  level of security.

  Eventually we may change the default setting for this option
  depending on the security flavor used for the mount.  For example,
  if the mount is using only AUTH_SYS, then the default setting will
  be "resvport;" if the mount is using a strong security flavor such
  as krb5, the default setting will be "noresvport." ]

Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
[Trond.Myklebust@netapp.com: Fixed a bug whereby nfs4_init_client()
was being called with incorrect arguments.]
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/nfs/client.c           | 11 +++++++----
 fs/nfs/super.c            | 12 +++++++++++-
 include/linux/nfs_mount.h |  3 ++-
 3 files changed, 20 insertions(+), 6 deletions(-)

(limited to 'include')

diff --git a/fs/nfs/client.c b/fs/nfs/client.c
index 27190337fc13..3a69cacc4fa4 100644
--- a/fs/nfs/client.c
+++ b/fs/nfs/client.c
@@ -627,7 +627,8 @@ static int nfs_init_client(struct nfs_client *clp,
 	 * Create a client RPC handle for doing FSSTAT with UNIX auth only
 	 * - RFC 2623, sec 2.3.2
 	 */
-	error = nfs_create_rpc_client(clp, timeparms, RPC_AUTH_UNIX, 0, 0);
+	error = nfs_create_rpc_client(clp, timeparms, RPC_AUTH_UNIX,
+				      0, data->flags & NFS_MOUNT_NORESVPORT);
 	if (error < 0)
 		goto error;
 	nfs_mark_client_ready(clp, NFS_CS_READY);
@@ -969,7 +970,8 @@ error:
 static int nfs4_init_client(struct nfs_client *clp,
 		const struct rpc_timeout *timeparms,
 		const char *ip_addr,
-		rpc_authflavor_t authflavour)
+		rpc_authflavor_t authflavour,
+		int flags)
 {
 	int error;
 
@@ -983,7 +985,7 @@ static int nfs4_init_client(struct nfs_client *clp,
 	clp->rpc_ops = &nfs_v4_clientops;
 
 	error = nfs_create_rpc_client(clp, timeparms, authflavour,
-				      1, 0);
+				      1, flags & NFS_MOUNT_NORESVPORT);
 	if (error < 0)
 		goto error;
 	memcpy(clp->cl_ipaddr, ip_addr, sizeof(clp->cl_ipaddr));
@@ -1034,7 +1036,8 @@ static int nfs4_set_client(struct nfs_server *server,
 		error = PTR_ERR(clp);
 		goto error;
 	}
-	error = nfs4_init_client(clp, timeparms, ip_addr, authflavour);
+	error = nfs4_init_client(clp, timeparms, ip_addr, authflavour,
+					server->flags);
 	if (error < 0)
 		goto error_put;
 
diff --git a/fs/nfs/super.c b/fs/nfs/super.c
index 2b0c8e132b54..e05a77be3068 100644
--- a/fs/nfs/super.c
+++ b/fs/nfs/super.c
@@ -75,6 +75,7 @@ enum {
 	Opt_acl, Opt_noacl,
 	Opt_rdirplus, Opt_nordirplus,
 	Opt_sharecache, Opt_nosharecache,
+	Opt_resvport, Opt_noresvport,
 
 	/* Mount options that take integer arguments */
 	Opt_port,
@@ -129,6 +130,8 @@ static const match_table_t nfs_mount_option_tokens = {
 	{ Opt_nordirplus, "nordirplus" },
 	{ Opt_sharecache, "sharecache" },
 	{ Opt_nosharecache, "nosharecache" },
+	{ Opt_resvport, "resvport" },
+	{ Opt_noresvport, "noresvport" },
 
 	{ Opt_port, "port=%u" },
 	{ Opt_rsize, "rsize=%u" },
@@ -514,7 +517,8 @@ static void nfs_show_mount_options(struct seq_file *m, struct nfs_server *nfss,
 		{ NFS_MOUNT_NONLM, ",nolock", "" },
 		{ NFS_MOUNT_NOACL, ",noacl", "" },
 		{ NFS_MOUNT_NORDIRPLUS, ",nordirplus", "" },
-		{ NFS_MOUNT_UNSHARED, ",nosharecache", ""},
+		{ NFS_MOUNT_UNSHARED, ",nosharecache", "" },
+		{ NFS_MOUNT_NORESVPORT, ",noresvport", "" },
 		{ 0, NULL, NULL }
 	};
 	const struct proc_nfs_info *nfs_infop;
@@ -1035,6 +1039,12 @@ static int nfs_parse_mount_options(char *raw,
 		case Opt_nosharecache:
 			mnt->flags |= NFS_MOUNT_UNSHARED;
 			break;
+		case Opt_resvport:
+			mnt->flags &= ~NFS_MOUNT_NORESVPORT;
+			break;
+		case Opt_noresvport:
+			mnt->flags |= NFS_MOUNT_NORESVPORT;
+			break;
 
 		/*
 		 * options that take numeric values
diff --git a/include/linux/nfs_mount.h b/include/linux/nfs_mount.h
index 6549a06ac16e..4499016e6d0d 100644
--- a/include/linux/nfs_mount.h
+++ b/include/linux/nfs_mount.h
@@ -45,7 +45,7 @@ struct nfs_mount_data {
 	char		context[NFS_MAX_CONTEXT_LEN + 1];	/* 6 */
 };
 
-/* bits in the flags field */
+/* bits in the flags field visible to user space */
 
 #define NFS_MOUNT_SOFT		0x0001	/* 1 */
 #define NFS_MOUNT_INTR		0x0002	/* 1 */ /* now unused, but ABI */
@@ -68,5 +68,6 @@ struct nfs_mount_data {
 /* The following are for internal use only */
 #define NFS_MOUNT_LOOKUP_CACHE_NONEG	0x10000
 #define NFS_MOUNT_LOOKUP_CACHE_NONE	0x20000
+#define NFS_MOUNT_NORESVPORT		0x40000
 
 #endif
-- 
cgit v1.2.3


From 0cb2659b818eca99235e17c04291cfa9985c14f7 Mon Sep 17 00:00:00 2001
From: Chuck Lever <chuck.lever@oracle.com>
Date: Tue, 23 Dec 2008 15:21:38 -0500
Subject: NLM: allow lockd requests from an unprivileged port

If the admin has specified the "noresvport" option for an NFS mount
point, the kernel's NFS client uses an unprivileged source port for
the main NFS transport.  The kernel's lockd client should use an
unprivileged port in this case as well.

Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/lockd/clntlock.c         |  2 +-
 fs/lockd/host.c             | 10 +++++++++-
 fs/nfs/client.c             |  2 ++
 include/linux/lockd/bind.h  |  1 +
 include/linux/lockd/lockd.h |  4 +++-
 5 files changed, 16 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/fs/lockd/clntlock.c b/fs/lockd/clntlock.c
index 94d42cc4e393..1f3b0fc0d351 100644
--- a/fs/lockd/clntlock.c
+++ b/fs/lockd/clntlock.c
@@ -61,7 +61,7 @@ struct nlm_host *nlmclnt_init(const struct nlmclnt_initdata *nlm_init)
 
 	host = nlmclnt_lookup_host(nlm_init->address, nlm_init->addrlen,
 				   nlm_init->protocol, nlm_version,
-				   nlm_init->hostname);
+				   nlm_init->hostname, nlm_init->noresvport);
 	if (host == NULL) {
 		lockd_down();
 		return ERR_PTR(-ENOLCK);
diff --git a/fs/lockd/host.c b/fs/lockd/host.c
index 70fc63a1727b..acc2aa5021d1 100644
--- a/fs/lockd/host.c
+++ b/fs/lockd/host.c
@@ -48,6 +48,7 @@ struct nlm_lookup_host_info {
 	const size_t		hostname_len;	/* it's length */
 	const struct sockaddr	*src_sap;	/* our address (optional) */
 	const size_t		src_len;	/* it's length */
+	const int		noresvport;	/* use non-priv port */
 };
 
 /*
@@ -222,6 +223,7 @@ static struct nlm_host *nlm_lookup_host(struct nlm_lookup_host_info *ni)
 	host->h_nsmstate   = 0;			/* real NSM state */
 	host->h_nsmhandle  = nsm;
 	host->h_server	   = ni->server;
+	host->h_noresvport = ni->noresvport;
 	hlist_add_head(&host->h_hash, chain);
 	INIT_LIST_HEAD(&host->h_lockowners);
 	spin_lock_init(&host->h_lock);
@@ -272,6 +274,7 @@ nlm_destroy_host(struct nlm_host *host)
  * @protocol: transport protocol to use
  * @version: NLM protocol version
  * @hostname: '\0'-terminated hostname of server
+ * @noresvport: 1 if non-privileged port should be used
  *
  * Returns an nlm_host structure that matches the passed-in
  * [server address, transport protocol, NLM version, server hostname].
@@ -281,7 +284,9 @@ nlm_destroy_host(struct nlm_host *host)
 struct nlm_host *nlmclnt_lookup_host(const struct sockaddr *sap,
 				     const size_t salen,
 				     const unsigned short protocol,
-				     const u32 version, const char *hostname)
+				     const u32 version,
+				     const char *hostname,
+				     int noresvport)
 {
 	const struct sockaddr source = {
 		.sa_family	= AF_UNSPEC,
@@ -296,6 +301,7 @@ struct nlm_host *nlmclnt_lookup_host(const struct sockaddr *sap,
 		.hostname_len	= strlen(hostname),
 		.src_sap	= &source,
 		.src_len	= sizeof(source),
+		.noresvport	= noresvport,
 	};
 
 	dprintk("lockd: %s(host='%s', vers=%u, proto=%s)\n", __func__,
@@ -417,6 +423,8 @@ nlm_bind_host(struct nlm_host *host)
 		 */
 		if (!host->h_server)
 			args.flags |= RPC_CLNT_CREATE_HARDRTRY;
+		if (host->h_noresvport)
+			args.flags |= RPC_CLNT_CREATE_NONPRIVPORT;
 
 		clnt = rpc_create(&args);
 		if (!IS_ERR(clnt))
diff --git a/fs/nfs/client.c b/fs/nfs/client.c
index 3a69cacc4fa4..70b6d9e8517d 100644
--- a/fs/nfs/client.c
+++ b/fs/nfs/client.c
@@ -526,6 +526,8 @@ static int nfs_start_lockd(struct nfs_server *server)
 		.protocol	= server->flags & NFS_MOUNT_TCP ?
 						IPPROTO_TCP : IPPROTO_UDP,
 		.nfs_version	= clp->rpc_ops->version,
+		.noresvport	= server->flags & NFS_MOUNT_NORESVPORT ?
+					1 : 0,
 	};
 
 	if (nlm_init.nfs_version > 3)
diff --git a/include/linux/lockd/bind.h b/include/linux/lockd/bind.h
index e5872dc994c0..fbc48f898521 100644
--- a/include/linux/lockd/bind.h
+++ b/include/linux/lockd/bind.h
@@ -41,6 +41,7 @@ struct nlmclnt_initdata {
 	size_t			addrlen;
 	unsigned short		protocol;
 	u32			nfs_version;
+	int			noresvport;
 };
 
 /*
diff --git a/include/linux/lockd/lockd.h b/include/linux/lockd/lockd.h
index b56d5aa9b194..23da3fa69efa 100644
--- a/include/linux/lockd/lockd.h
+++ b/include/linux/lockd/lockd.h
@@ -49,6 +49,7 @@ struct nlm_host {
 	unsigned short		h_proto;	/* transport proto */
 	unsigned short		h_reclaiming : 1,
 				h_server     : 1, /* server side, not client side */
+				h_noresvport : 1,
 				h_inuse      : 1;
 	wait_queue_head_t	h_gracewait;	/* wait while reclaiming */
 	struct rw_semaphore	h_rwsem;	/* Reboot recovery lock */
@@ -220,7 +221,8 @@ struct nlm_host  *nlmclnt_lookup_host(const struct sockaddr *sap,
 					const size_t salen,
 					const unsigned short protocol,
 					const u32 version,
-					const char *hostname);
+					const char *hostname,
+					int noresvport);
 struct nlm_host  *nlmsvc_lookup_host(const struct svc_rqst *rqstp,
 					const char *hostname,
 					const size_t hostname_len);
-- 
cgit v1.2.3


From 95d35cb4c473c754824967c0b069bbeb7efa4847 Mon Sep 17 00:00:00 2001
From: Trond Myklebust <Trond.Myklebust@netapp.com>
Date: Tue, 23 Dec 2008 15:21:45 -0500
Subject: NFSv4: Remove nfs_client->cl_sem

Now that we're using the flags to indicate state that needs to be
recovered, as well as having implemented proper refcounting and spinlocking
on the state and open_owners, we can get rid of nfs_client->cl_sem. The
only remaining case that was dubious was the file locking, and that case is
now covered by the nfsi->rwsem.

Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/nfs/client.c           |  1 -
 fs/nfs/delegation.c       |  5 -----
 fs/nfs/nfs4proc.c         | 20 +-------------------
 fs/nfs/nfs4renewd.c       |  3 ---
 fs/nfs/nfs4state.c        | 17 -----------------
 include/linux/nfs_fs_sb.h |  6 ------
 6 files changed, 1 insertion(+), 51 deletions(-)

(limited to 'include')

diff --git a/fs/nfs/client.c b/fs/nfs/client.c
index 70b6d9e8517d..b643f0ec5c21 100644
--- a/fs/nfs/client.c
+++ b/fs/nfs/client.c
@@ -143,7 +143,6 @@ static struct nfs_client *nfs_alloc_client(const struct nfs_client_initdata *cl_
 	clp->cl_proto = cl_init->proto;
 
 #ifdef CONFIG_NFS_V4
-	init_rwsem(&clp->cl_sem);
 	INIT_LIST_HEAD(&clp->cl_delegations);
 	spin_lock_init(&clp->cl_lock);
 	INIT_DELAYED_WORK(&clp->cl_renewd, nfs4_renew_state);
diff --git a/fs/nfs/delegation.c b/fs/nfs/delegation.c
index 646ba3e75a1e..ebc06f2da31a 100644
--- a/fs/nfs/delegation.c
+++ b/fs/nfs/delegation.c
@@ -243,16 +243,13 @@ static void nfs_msync_inode(struct inode *inode)
  */
 static int __nfs_inode_return_delegation(struct inode *inode, struct nfs_delegation *delegation)
 {
-	struct nfs_client *clp = NFS_SERVER(inode)->nfs_client;
 	struct nfs_inode *nfsi = NFS_I(inode);
 
 	nfs_msync_inode(inode);
-	down_read(&clp->cl_sem);
 	/* Guard against new delegated open calls */
 	down_write(&nfsi->rwsem);
 	nfs_delegation_claim_opens(inode, &delegation->stateid);
 	up_write(&nfsi->rwsem);
-	up_read(&clp->cl_sem);
 	nfs_msync_inode(inode);
 
 	return nfs_do_return_delegation(inode, delegation, 1);
@@ -425,7 +422,6 @@ static int recall_thread(void *data)
 	daemonize("nfsv4-delegreturn");
 
 	nfs_msync_inode(inode);
-	down_read(&clp->cl_sem);
 	down_write(&nfsi->rwsem);
 	spin_lock(&clp->cl_lock);
 	delegation = nfs_detach_delegation_locked(nfsi, args->stateid);
@@ -437,7 +433,6 @@ static int recall_thread(void *data)
 	complete(&args->started);
 	nfs_delegation_claim_opens(inode, args->stateid);
 	up_write(&nfsi->rwsem);
-	up_read(&clp->cl_sem);
 	nfs_msync_inode(inode);
 
 	if (delegation != NULL)
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index aec4e47c462d..26dcd77abb07 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -207,12 +207,8 @@ static int nfs4_wait_clnt_recover(struct nfs_client *clp)
 
 	might_sleep();
 
-	rwsem_acquire(&clp->cl_sem.dep_map, 0, 0, _RET_IP_);
-
 	res = wait_on_bit(&clp->cl_state, NFS4CLNT_STATE_RECOVER,
 			nfs4_wait_bit_killable, TASK_KILLABLE);
-
-	rwsem_release(&clp->cl_sem.dep_map, 1, _RET_IP_);
 	return res;
 }
 
@@ -1135,7 +1131,6 @@ static int _nfs4_do_open(struct inode *dir, struct path *path, int flags, struct
 	struct nfs4_state_owner  *sp;
 	struct nfs4_state     *state = NULL;
 	struct nfs_server       *server = NFS_SERVER(dir);
-	struct nfs_client *clp = server->nfs_client;
 	struct nfs4_opendata *opendata;
 	int status;
 
@@ -1150,11 +1145,10 @@ static int _nfs4_do_open(struct inode *dir, struct path *path, int flags, struct
 		goto err_put_state_owner;
 	if (path->dentry->d_inode != NULL)
 		nfs4_return_incompatible_delegation(path->dentry->d_inode, flags & (FMODE_READ|FMODE_WRITE));
-	down_read(&clp->cl_sem);
 	status = -ENOMEM;
 	opendata = nfs4_opendata_alloc(path, sp, flags, sattr);
 	if (opendata == NULL)
-		goto err_release_rwsem;
+		goto err_put_state_owner;
 
 	if (path->dentry->d_inode != NULL)
 		opendata->state = nfs4_get_open_state(path->dentry->d_inode, sp);
@@ -1172,13 +1166,10 @@ static int _nfs4_do_open(struct inode *dir, struct path *path, int flags, struct
 		goto err_opendata_put;
 	nfs4_opendata_put(opendata);
 	nfs4_put_state_owner(sp);
-	up_read(&clp->cl_sem);
 	*res = state;
 	return 0;
 err_opendata_put:
 	nfs4_opendata_put(opendata);
-err_release_rwsem:
-	up_read(&clp->cl_sem);
 err_put_state_owner:
 	nfs4_put_state_owner(sp);
 out_err:
@@ -3099,7 +3090,6 @@ static int _nfs4_proc_getlk(struct nfs4_state *state, int cmd, struct file_lock
 	struct nfs4_lock_state *lsp;
 	int status;
 
-	down_read(&clp->cl_sem);
 	arg.lock_owner.clientid = clp->cl_clientid;
 	status = nfs4_set_lock_state(state, request);
 	if (status != 0)
@@ -3116,7 +3106,6 @@ static int _nfs4_proc_getlk(struct nfs4_state *state, int cmd, struct file_lock
 	}
 	request->fl_ops->fl_release_private(request);
 out:
-	up_read(&clp->cl_sem);
 	return status;
 }
 
@@ -3273,7 +3262,6 @@ static struct rpc_task *nfs4_do_unlck(struct file_lock *fl,
 
 static int nfs4_proc_unlck(struct nfs4_state *state, int cmd, struct file_lock *request)
 {
-	struct nfs_client *clp = state->owner->so_client;
 	struct nfs_inode *nfsi = NFS_I(state->inode);
 	struct nfs_seqid *seqid;
 	struct nfs4_lock_state *lsp;
@@ -3284,15 +3272,12 @@ static int nfs4_proc_unlck(struct nfs4_state *state, int cmd, struct file_lock *
 	status = nfs4_set_lock_state(state, request);
 	/* Unlock _before_ we do the RPC call */
 	request->fl_flags |= FL_EXISTS;
-	down_read(&clp->cl_sem);
 	down_read(&nfsi->rwsem);
 	if (do_vfs_lock(request->fl_file, request) == -ENOENT) {
 		up_read(&nfsi->rwsem);
-		up_read(&clp->cl_sem);
 		goto out;
 	}
 	up_read(&nfsi->rwsem);
-	up_read(&clp->cl_sem);
 	if (status != 0)
 		goto out;
 	/* Is this a delegated lock? */
@@ -3518,7 +3503,6 @@ static int nfs4_lock_expired(struct nfs4_state *state, struct file_lock *request
 
 static int _nfs4_proc_setlk(struct nfs4_state *state, int cmd, struct file_lock *request)
 {
-	struct nfs_client *clp = state->owner->so_client;
 	struct nfs_inode *nfsi = NFS_I(state->inode);
 	unsigned char fl_flags = request->fl_flags;
 	int status;
@@ -3531,7 +3515,6 @@ static int _nfs4_proc_setlk(struct nfs4_state *state, int cmd, struct file_lock
 	status = do_vfs_lock(request->fl_file, request);
 	if (status < 0)
 		goto out;
-	down_read(&clp->cl_sem);
 	down_read(&nfsi->rwsem);
 	if (test_bit(NFS_DELEGATED_STATE, &state->flags)) {
 		/* Yes: cache locks! */
@@ -3549,7 +3532,6 @@ static int _nfs4_proc_setlk(struct nfs4_state *state, int cmd, struct file_lock
 		printk(KERN_WARNING "%s: VFS is out of sync with lock manager!\n", __func__);
 out_unlock:
 	up_read(&nfsi->rwsem);
-	up_read(&clp->cl_sem);
 out:
 	request->fl_flags = fl_flags;
 	return status;
diff --git a/fs/nfs/nfs4renewd.c b/fs/nfs/nfs4renewd.c
index 9fe8640a88eb..6101f955f231 100644
--- a/fs/nfs/nfs4renewd.c
+++ b/fs/nfs/nfs4renewd.c
@@ -65,7 +65,6 @@ nfs4_renew_state(struct work_struct *work)
 	long lease, timeout;
 	unsigned long last, now;
 
-	down_read(&clp->cl_sem);
 	dprintk("%s: start\n", __func__);
 	/* Are there any active superblocks? */
 	if (list_empty(&clp->cl_superblocks))
@@ -101,11 +100,9 @@ nfs4_renew_state(struct work_struct *work)
 	schedule_delayed_work(&clp->cl_renewd, timeout);
 	spin_unlock(&clp->cl_lock);
 out:
-	up_read(&clp->cl_sem);
 	dprintk("%s: done\n", __func__);
 }
 
-/* Must be called with clp->cl_sem locked for writes */
 void
 nfs4_schedule_state_renewal(struct nfs_client *clp)
 {
diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c
index 16c9fbdf97b4..ec0cbe00a804 100644
--- a/fs/nfs/nfs4state.c
+++ b/fs/nfs/nfs4state.c
@@ -305,10 +305,6 @@ nfs4_drop_state_owner(struct nfs4_state_owner *sp)
 	}
 }
 
-/*
- * Note: must be called with clp->cl_sem held in order to prevent races
- *       with reboot recovery!
- */
 struct nfs4_state_owner *nfs4_get_state_owner(struct nfs_server *server, struct rpc_cred *cred)
 {
 	struct nfs_client *clp = server->nfs_client;
@@ -337,10 +333,6 @@ struct nfs4_state_owner *nfs4_get_state_owner(struct nfs_server *server, struct
 	return sp;
 }
 
-/*
- * Must be called with clp->cl_sem held in order to avoid races
- * with state recovery...
- */
 void nfs4_put_state_owner(struct nfs4_state_owner *sp)
 {
 	struct nfs_client *clp = sp->so_client;
@@ -442,10 +434,6 @@ out:
 	return state;
 }
 
-/*
- * Beware! Caller must be holding exactly one
- * reference to clp->cl_sem!
- */
 void nfs4_put_open_state(struct nfs4_state *state)
 {
 	struct inode *inode = state->inode;
@@ -578,7 +566,6 @@ static void nfs4_free_lock_state(struct nfs4_lock_state *lsp)
  * Return a compatible lock_state. If no initialized lock_state structure
  * exists, return an uninitialized one.
  *
- * The caller must be holding clp->cl_sem
  */
 static struct nfs4_lock_state *nfs4_get_lock_state(struct nfs4_state *state, fl_owner_t owner)
 {
@@ -1127,7 +1114,6 @@ static int reclaimer(void *ptr)
 	allow_signal(SIGKILL);
 
 	/* Ensure exclusive access to NFSv4 state */
-	down_write(&clp->cl_sem);
 	while (!list_empty(&clp->cl_superblocks)) {
 		if (test_and_clear_bit(NFS4CLNT_LEASE_EXPIRED, &clp->cl_state)) {
 			/* We're going to have to re-establish a clientid */
@@ -1149,7 +1135,6 @@ static int reclaimer(void *ptr)
 
 		/* First recover reboot state... */
 		if (test_and_clear_bit(NFS4CLNT_RECLAIM_REBOOT, &clp->cl_state)) {
-			/* Note: list is protected by exclusive lock on cl->cl_sem */
 			status = nfs4_do_reclaim(clp, &nfs4_reboot_recovery_ops);
 			if (status == -NFS4ERR_STALE_CLIENTID)
 				continue;
@@ -1159,7 +1144,6 @@ static int reclaimer(void *ptr)
 
 		/* Now recover expired state... */
 		if (test_and_clear_bit(NFS4CLNT_RECLAIM_NOGRACE, &clp->cl_state)) {
-			/* Note: list is protected by exclusive lock on cl->cl_sem */
 			status = nfs4_do_reclaim(clp, &nfs4_nograce_recovery_ops);
 			if (status < 0) {
 				set_bit(NFS4CLNT_RECLAIM_NOGRACE, &clp->cl_state);
@@ -1175,7 +1159,6 @@ static int reclaimer(void *ptr)
 		break;
 	}
 out:
-	up_write(&clp->cl_sem);
 	if (test_and_clear_bit(NFS4CLNT_CB_PATH_DOWN, &clp->cl_state))
 		nfs_handle_cb_pathdown(clp);
 	nfs4_clear_recover_bit(clp);
diff --git a/include/linux/nfs_fs_sb.h b/include/linux/nfs_fs_sb.h
index 4e477ae58699..9bb81aec91cf 100644
--- a/include/linux/nfs_fs_sb.h
+++ b/include/linux/nfs_fs_sb.h
@@ -42,12 +42,6 @@ struct nfs_client {
 	struct rb_root		cl_openowner_id;
 	struct rb_root		cl_lockowner_id;
 
-	/*
-	 * The following rwsem ensures exclusive access to the server
-	 * while we recover the state following a lease expiration.
-	 */
-	struct rw_semaphore	cl_sem;
-
 	struct list_head	cl_delegations;
 	struct rb_root		cl_state_owners;
 	spinlock_t		cl_lock;
-- 
cgit v1.2.3


From bd7bf9d540c001055fba796ebf146d90e4dd2eb2 Mon Sep 17 00:00:00 2001
From: Trond Myklebust <Trond.Myklebust@netapp.com>
Date: Tue, 23 Dec 2008 15:21:53 -0500
Subject: NFSv4: Convert delegation->type field to fmode_t

Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/nfs/delegation.c     | 2 +-
 fs/nfs/delegation.h     | 6 +++---
 fs/nfs/nfs4xdr.c        | 4 ++--
 include/linux/nfs_fs.h  | 2 +-
 include/linux/nfs_xdr.h | 4 ++--
 5 files changed, 9 insertions(+), 9 deletions(-)

(limited to 'include')

diff --git a/fs/nfs/delegation.c b/fs/nfs/delegation.c
index e75f2f8c5245..968225a88015 100644
--- a/fs/nfs/delegation.c
+++ b/fs/nfs/delegation.c
@@ -48,7 +48,7 @@ void nfs_mark_delegation_referenced(struct nfs_delegation *delegation)
 	set_bit(NFS_DELEGATION_REFERENCED, &delegation->flags);
 }
 
-int nfs_have_delegation(struct inode *inode, int flags)
+int nfs_have_delegation(struct inode *inode, fmode_t flags)
 {
 	struct nfs_delegation *delegation;
 	int ret = 0;
diff --git a/fs/nfs/delegation.h b/fs/nfs/delegation.h
index 2a74882e5d50..09f383795174 100644
--- a/fs/nfs/delegation.h
+++ b/fs/nfs/delegation.h
@@ -17,7 +17,7 @@ struct nfs_delegation {
 	struct rpc_cred *cred;
 	struct inode *inode;
 	nfs4_stateid stateid;
-	int type;
+	fmode_t type;
 	loff_t maxsize;
 	__u64 change_attr;
 	unsigned long flags;
@@ -54,10 +54,10 @@ int nfs4_lock_delegation_recall(struct nfs4_state *state, struct file_lock *fl);
 int nfs4_copy_delegation_stateid(nfs4_stateid *dst, struct inode *inode);
 
 void nfs_mark_delegation_referenced(struct nfs_delegation *delegation);
-int nfs_have_delegation(struct inode *inode, int flags);
+int nfs_have_delegation(struct inode *inode, fmode_t flags);
 
 #else
-static inline int nfs_have_delegation(struct inode *inode, int flags)
+static inline int nfs_have_delegation(struct inode *inode, fmode_t flags)
 {
 	return 0;
 }
diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c
index b916297d2334..879911c99030 100644
--- a/fs/nfs/nfs4xdr.c
+++ b/fs/nfs/nfs4xdr.c
@@ -1024,7 +1024,7 @@ static void encode_opentype(struct xdr_stream *xdr, const struct nfs_openargs *a
 	}
 }
 
-static inline void encode_delegation_type(struct xdr_stream *xdr, int delegation_type)
+static inline void encode_delegation_type(struct xdr_stream *xdr, fmode_t delegation_type)
 {
 	__be32 *p;
 
@@ -1053,7 +1053,7 @@ static inline void encode_claim_null(struct xdr_stream *xdr, const struct qstr *
 	encode_string(xdr, name->len, name->name);
 }
 
-static inline void encode_claim_previous(struct xdr_stream *xdr, int type)
+static inline void encode_claim_previous(struct xdr_stream *xdr, fmode_t type)
 {
 	__be32 *p;
 
diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h
index f11077285a6c..8d71d7b7c78a 100644
--- a/include/linux/nfs_fs.h
+++ b/include/linux/nfs_fs.h
@@ -180,7 +180,7 @@ struct nfs_inode {
         /* NFSv4 state */
 	struct list_head	open_states;
 	struct nfs_delegation	*delegation;
-	int			 delegation_state;
+	fmode_t			 delegation_state;
 	struct rw_semaphore	rwsem;
 #endif /* CONFIG_NFS_V4*/
 	struct inode		vfs_inode;
diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h
index c1c31acb8a2b..32c1a0ecdbff 100644
--- a/include/linux/nfs_xdr.h
+++ b/include/linux/nfs_xdr.h
@@ -126,7 +126,7 @@ struct nfs_openargs {
 		struct iattr *  attrs;    /* UNCHECKED, GUARDED */
 		nfs4_verifier   verifier; /* EXCLUSIVE */
 		nfs4_stateid	delegation;		/* CLAIM_DELEGATE_CUR */
-		int		delegation_type;	/* CLAIM_PREVIOUS */
+		fmode_t		delegation_type;	/* CLAIM_PREVIOUS */
 	} u;
 	const struct qstr *	name;
 	const struct nfs_server *server;	 /* Needed for ID mapping */
@@ -143,7 +143,7 @@ struct nfs_openres {
 	struct nfs_fattr *      dir_attr;
 	struct nfs_seqid *	seqid;
 	const struct nfs_server *server;
-	int			delegation_type;
+	fmode_t			delegation_type;
 	nfs4_stateid		delegation;
 	__u32			do_recall;
 	__u64			maxsize;
-- 
cgit v1.2.3


From dc0b027dfadfcb8a5504f7d8052754bf8d501ab9 Mon Sep 17 00:00:00 2001
From: Trond Myklebust <Trond.Myklebust@netapp.com>
Date: Tue, 23 Dec 2008 15:21:56 -0500
Subject: NFSv4: Convert the open and close ops to use fmode

Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/nfs/inode.c          |   2 +-
 fs/nfs/nfs4_fs.h        |   8 +--
 fs/nfs/nfs4proc.c       | 126 ++++++++++++++++++++++++++----------------------
 fs/nfs/nfs4state.c      |  24 ++++-----
 fs/nfs/nfs4xdr.c        |  10 ++--
 include/linux/nfs_fs.h  |   4 +-
 include/linux/nfs_xdr.h |   3 +-
 7 files changed, 94 insertions(+), 83 deletions(-)

(limited to 'include')

diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c
index d22eb383e1cf..1cf39202c3ea 100644
--- a/fs/nfs/inode.c
+++ b/fs/nfs/inode.c
@@ -592,7 +592,7 @@ static void nfs_file_set_open_context(struct file *filp, struct nfs_open_context
 /*
  * Given an inode, search for an open context with the desired characteristics
  */
-struct nfs_open_context *nfs_find_open_context(struct inode *inode, struct rpc_cred *cred, int mode)
+struct nfs_open_context *nfs_find_open_context(struct inode *inode, struct rpc_cred *cred, fmode_t mode)
 {
 	struct nfs_inode *nfsi = NFS_I(inode);
 	struct nfs_open_context *pos, *ctx = NULL;
diff --git a/fs/nfs/nfs4_fs.h b/fs/nfs/nfs4_fs.h
index ee5f51ef696d..d5f5efaf2b26 100644
--- a/fs/nfs/nfs4_fs.h
+++ b/fs/nfs/nfs4_fs.h
@@ -161,7 +161,7 @@ struct nfs4_state {
 	unsigned int n_rdonly;		/* Number of read-only references */
 	unsigned int n_wronly;		/* Number of write-only references */
 	unsigned int n_rdwr;		/* Number of read/write references */
-	int state;			/* State on the server (R,W, or RW) */
+	fmode_t state;			/* State on the server (R,W, or RW) */
 	atomic_t count;
 };
 
@@ -223,9 +223,9 @@ extern struct nfs4_state_owner * nfs4_get_state_owner(struct nfs_server *, struc
 extern void nfs4_put_state_owner(struct nfs4_state_owner *);
 extern struct nfs4_state * nfs4_get_open_state(struct inode *, struct nfs4_state_owner *);
 extern void nfs4_put_open_state(struct nfs4_state *);
-extern void nfs4_close_state(struct path *, struct nfs4_state *, mode_t);
-extern void nfs4_close_sync(struct path *, struct nfs4_state *, mode_t);
-extern void nfs4_state_set_mode_locked(struct nfs4_state *, mode_t);
+extern void nfs4_close_state(struct path *, struct nfs4_state *, fmode_t);
+extern void nfs4_close_sync(struct path *, struct nfs4_state *, fmode_t);
+extern void nfs4_state_set_mode_locked(struct nfs4_state *, fmode_t);
 extern void nfs4_schedule_state_recovery(struct nfs_client *);
 extern void nfs4_schedule_state_manager(struct nfs_client *);
 extern int nfs4_state_mark_reclaim_nograce(struct nfs_client *clp, struct nfs4_state *state);
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index fc0c9d255cf7..e8df52dc2bc5 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -323,7 +323,7 @@ static void nfs4_init_opendata_res(struct nfs4_opendata *p)
 }
 
 static struct nfs4_opendata *nfs4_opendata_alloc(struct path *path,
-		struct nfs4_state_owner *sp, int flags,
+		struct nfs4_state_owner *sp, fmode_t fmode, int flags,
 		const struct iattr *attrs)
 {
 	struct dentry *parent = dget_parent(path->dentry);
@@ -343,7 +343,8 @@ static struct nfs4_opendata *nfs4_opendata_alloc(struct path *path,
 	p->owner = sp;
 	atomic_inc(&sp->so_count);
 	p->o_arg.fh = NFS_FH(dir);
-	p->o_arg.open_flags = flags,
+	p->o_arg.open_flags = flags;
+	p->o_arg.fmode = fmode & (FMODE_READ|FMODE_WRITE);
 	p->o_arg.clientid = server->nfs_client->cl_clientid;
 	p->o_arg.id = sp->so_owner_id.id;
 	p->o_arg.name = &p->path.dentry->d_name;
@@ -399,10 +400,13 @@ static int nfs4_wait_for_completion_rpc_task(struct rpc_task *task)
 	return ret;
 }
 
-static int can_open_cached(struct nfs4_state *state, int mode)
+static int can_open_cached(struct nfs4_state *state, fmode_t mode, int open_mode)
 {
 	int ret = 0;
-	switch (mode & (FMODE_READ|FMODE_WRITE|O_EXCL)) {
+
+	if (open_mode & O_EXCL)
+		goto out;
+	switch (mode & (FMODE_READ|FMODE_WRITE)) {
 		case FMODE_READ:
 			ret |= test_bit(NFS_O_RDONLY_STATE, &state->flags) != 0;
 			break;
@@ -412,12 +416,13 @@ static int can_open_cached(struct nfs4_state *state, int mode)
 		case FMODE_READ|FMODE_WRITE:
 			ret |= test_bit(NFS_O_RDWR_STATE, &state->flags) != 0;
 	}
+out:
 	return ret;
 }
 
-static int can_open_delegated(struct nfs_delegation *delegation, mode_t open_flags)
+static int can_open_delegated(struct nfs_delegation *delegation, fmode_t fmode)
 {
-	if ((delegation->type & open_flags) != open_flags)
+	if ((delegation->type & fmode) != fmode)
 		return 0;
 	if (test_bit(NFS_DELEGATION_NEED_RECLAIM, &delegation->flags))
 		return 0;
@@ -425,9 +430,9 @@ static int can_open_delegated(struct nfs_delegation *delegation, mode_t open_fla
 	return 1;
 }
 
-static void update_open_stateflags(struct nfs4_state *state, mode_t open_flags)
+static void update_open_stateflags(struct nfs4_state *state, fmode_t fmode)
 {
-	switch (open_flags) {
+	switch (fmode) {
 		case FMODE_WRITE:
 			state->n_wronly++;
 			break;
@@ -437,15 +442,15 @@ static void update_open_stateflags(struct nfs4_state *state, mode_t open_flags)
 		case FMODE_READ|FMODE_WRITE:
 			state->n_rdwr++;
 	}
-	nfs4_state_set_mode_locked(state, state->state | open_flags);
+	nfs4_state_set_mode_locked(state, state->state | fmode);
 }
 
-static void nfs_set_open_stateid_locked(struct nfs4_state *state, nfs4_stateid *stateid, int open_flags)
+static void nfs_set_open_stateid_locked(struct nfs4_state *state, nfs4_stateid *stateid, fmode_t fmode)
 {
 	if (test_bit(NFS_DELEGATED_STATE, &state->flags) == 0)
 		memcpy(state->stateid.data, stateid->data, sizeof(state->stateid.data));
 	memcpy(state->open_stateid.data, stateid->data, sizeof(state->open_stateid.data));
-	switch (open_flags) {
+	switch (fmode) {
 		case FMODE_READ:
 			set_bit(NFS_O_RDONLY_STATE, &state->flags);
 			break;
@@ -457,14 +462,14 @@ static void nfs_set_open_stateid_locked(struct nfs4_state *state, nfs4_stateid *
 	}
 }
 
-static void nfs_set_open_stateid(struct nfs4_state *state, nfs4_stateid *stateid, int open_flags)
+static void nfs_set_open_stateid(struct nfs4_state *state, nfs4_stateid *stateid, fmode_t fmode)
 {
 	write_seqlock(&state->seqlock);
-	nfs_set_open_stateid_locked(state, stateid, open_flags);
+	nfs_set_open_stateid_locked(state, stateid, fmode);
 	write_sequnlock(&state->seqlock);
 }
 
-static void __update_open_stateid(struct nfs4_state *state, nfs4_stateid *open_stateid, const nfs4_stateid *deleg_stateid, int open_flags)
+static void __update_open_stateid(struct nfs4_state *state, nfs4_stateid *open_stateid, const nfs4_stateid *deleg_stateid, fmode_t fmode)
 {
 	/*
 	 * Protect the call to nfs4_state_set_mode_locked and
@@ -476,20 +481,20 @@ static void __update_open_stateid(struct nfs4_state *state, nfs4_stateid *open_s
 		set_bit(NFS_DELEGATED_STATE, &state->flags);
 	}
 	if (open_stateid != NULL)
-		nfs_set_open_stateid_locked(state, open_stateid, open_flags);
+		nfs_set_open_stateid_locked(state, open_stateid, fmode);
 	write_sequnlock(&state->seqlock);
 	spin_lock(&state->owner->so_lock);
-	update_open_stateflags(state, open_flags);
+	update_open_stateflags(state, fmode);
 	spin_unlock(&state->owner->so_lock);
 }
 
-static int update_open_stateid(struct nfs4_state *state, nfs4_stateid *open_stateid, nfs4_stateid *delegation, int open_flags)
+static int update_open_stateid(struct nfs4_state *state, nfs4_stateid *open_stateid, nfs4_stateid *delegation, fmode_t fmode)
 {
 	struct nfs_inode *nfsi = NFS_I(state->inode);
 	struct nfs_delegation *deleg_cur;
 	int ret = 0;
 
-	open_flags &= (FMODE_READ|FMODE_WRITE);
+	fmode &= (FMODE_READ|FMODE_WRITE);
 
 	rcu_read_lock();
 	deleg_cur = rcu_dereference(nfsi->delegation);
@@ -498,7 +503,7 @@ static int update_open_stateid(struct nfs4_state *state, nfs4_stateid *open_stat
 
 	spin_lock(&deleg_cur->lock);
 	if (nfsi->delegation != deleg_cur ||
-	    (deleg_cur->type & open_flags) != open_flags)
+	    (deleg_cur->type & fmode) != fmode)
 		goto no_delegation_unlock;
 
 	if (delegation == NULL)
@@ -507,7 +512,7 @@ static int update_open_stateid(struct nfs4_state *state, nfs4_stateid *open_stat
 		goto no_delegation_unlock;
 
 	nfs_mark_delegation_referenced(deleg_cur);
-	__update_open_stateid(state, open_stateid, &deleg_cur->stateid, open_flags);
+	__update_open_stateid(state, open_stateid, &deleg_cur->stateid, fmode);
 	ret = 1;
 no_delegation_unlock:
 	spin_unlock(&deleg_cur->lock);
@@ -515,7 +520,7 @@ no_delegation:
 	rcu_read_unlock();
 
 	if (!ret && open_stateid != NULL) {
-		__update_open_stateid(state, open_stateid, NULL, open_flags);
+		__update_open_stateid(state, open_stateid, NULL, fmode);
 		ret = 1;
 	}
 
@@ -523,13 +528,13 @@ no_delegation:
 }
 
 
-static void nfs4_return_incompatible_delegation(struct inode *inode, mode_t open_flags)
+static void nfs4_return_incompatible_delegation(struct inode *inode, fmode_t fmode)
 {
 	struct nfs_delegation *delegation;
 
 	rcu_read_lock();
 	delegation = rcu_dereference(NFS_I(inode)->delegation);
-	if (delegation == NULL || (delegation->type & open_flags) == open_flags) {
+	if (delegation == NULL || (delegation->type & fmode) == fmode) {
 		rcu_read_unlock();
 		return;
 	}
@@ -542,15 +547,16 @@ static struct nfs4_state *nfs4_try_open_cached(struct nfs4_opendata *opendata)
 	struct nfs4_state *state = opendata->state;
 	struct nfs_inode *nfsi = NFS_I(state->inode);
 	struct nfs_delegation *delegation;
-	int open_mode = opendata->o_arg.open_flags & (FMODE_READ|FMODE_WRITE|O_EXCL);
+	int open_mode = opendata->o_arg.open_flags & O_EXCL;
+	fmode_t fmode = opendata->o_arg.fmode;
 	nfs4_stateid stateid;
 	int ret = -EAGAIN;
 
 	for (;;) {
-		if (can_open_cached(state, open_mode)) {
+		if (can_open_cached(state, fmode, open_mode)) {
 			spin_lock(&state->owner->so_lock);
-			if (can_open_cached(state, open_mode)) {
-				update_open_stateflags(state, open_mode);
+			if (can_open_cached(state, fmode, open_mode)) {
+				update_open_stateflags(state, fmode);
 				spin_unlock(&state->owner->so_lock);
 				goto out_return_state;
 			}
@@ -559,7 +565,7 @@ static struct nfs4_state *nfs4_try_open_cached(struct nfs4_opendata *opendata)
 		rcu_read_lock();
 		delegation = rcu_dereference(nfsi->delegation);
 		if (delegation == NULL ||
-		    !can_open_delegated(delegation, open_mode)) {
+		    !can_open_delegated(delegation, fmode)) {
 			rcu_read_unlock();
 			break;
 		}
@@ -572,7 +578,7 @@ static struct nfs4_state *nfs4_try_open_cached(struct nfs4_opendata *opendata)
 		ret = -EAGAIN;
 
 		/* Try to update the stateid using the delegation */
-		if (update_open_stateid(state, NULL, &stateid, open_mode))
+		if (update_open_stateid(state, NULL, &stateid, fmode))
 			goto out_return_state;
 	}
 out:
@@ -624,7 +630,7 @@ static struct nfs4_state *nfs4_opendata_to_nfs4_state(struct nfs4_opendata *data
 	}
 
 	update_open_stateid(state, &data->o_res.stateid, NULL,
-			data->o_arg.open_flags);
+			data->o_arg.fmode);
 	iput(inode);
 out:
 	return state;
@@ -655,7 +661,7 @@ static struct nfs4_opendata *nfs4_open_recoverdata_alloc(struct nfs_open_context
 {
 	struct nfs4_opendata *opendata;
 
-	opendata = nfs4_opendata_alloc(&ctx->path, state->owner, 0, NULL);
+	opendata = nfs4_opendata_alloc(&ctx->path, state->owner, 0, 0, NULL);
 	if (opendata == NULL)
 		return ERR_PTR(-ENOMEM);
 	opendata->state = state;
@@ -663,12 +669,13 @@ static struct nfs4_opendata *nfs4_open_recoverdata_alloc(struct nfs_open_context
 	return opendata;
 }
 
-static int nfs4_open_recover_helper(struct nfs4_opendata *opendata, mode_t openflags, struct nfs4_state **res)
+static int nfs4_open_recover_helper(struct nfs4_opendata *opendata, fmode_t fmode, struct nfs4_state **res)
 {
 	struct nfs4_state *newstate;
 	int ret;
 
-	opendata->o_arg.open_flags = openflags;
+	opendata->o_arg.open_flags = 0;
+	opendata->o_arg.fmode = fmode;
 	memset(&opendata->o_res, 0, sizeof(opendata->o_res));
 	memset(&opendata->c_res, 0, sizeof(opendata->c_res));
 	nfs4_init_opendata_res(opendata);
@@ -678,7 +685,7 @@ static int nfs4_open_recover_helper(struct nfs4_opendata *opendata, mode_t openf
 	newstate = nfs4_opendata_to_nfs4_state(opendata);
 	if (IS_ERR(newstate))
 		return PTR_ERR(newstate);
-	nfs4_close_state(&opendata->path, newstate, openflags);
+	nfs4_close_state(&opendata->path, newstate, fmode);
 	*res = newstate;
 	return 0;
 }
@@ -734,7 +741,7 @@ static int _nfs4_do_open_reclaim(struct nfs_open_context *ctx, struct nfs4_state
 {
 	struct nfs_delegation *delegation;
 	struct nfs4_opendata *opendata;
-	int delegation_type = 0;
+	fmode_t delegation_type = 0;
 	int status;
 
 	opendata = nfs4_open_recoverdata_alloc(ctx, state);
@@ -847,7 +854,7 @@ static void nfs4_open_confirm_release(void *calldata)
 		goto out_free;
 	state = nfs4_opendata_to_nfs4_state(data);
 	if (!IS_ERR(state))
-		nfs4_close_state(&data->path, state, data->o_arg.open_flags);
+		nfs4_close_state(&data->path, state, data->o_arg.fmode);
 out_free:
 	nfs4_opendata_put(data);
 }
@@ -911,7 +918,7 @@ static void nfs4_open_prepare(struct rpc_task *task, void *calldata)
 	if (data->state != NULL) {
 		struct nfs_delegation *delegation;
 
-		if (can_open_cached(data->state, data->o_arg.open_flags & (FMODE_READ|FMODE_WRITE|O_EXCL)))
+		if (can_open_cached(data->state, data->o_arg.fmode, data->o_arg.open_flags))
 			goto out_no_action;
 		rcu_read_lock();
 		delegation = rcu_dereference(NFS_I(data->state->inode)->delegation);
@@ -980,7 +987,7 @@ static void nfs4_open_release(void *calldata)
 		goto out_free;
 	state = nfs4_opendata_to_nfs4_state(data);
 	if (!IS_ERR(state))
-		nfs4_close_state(&data->path, state, data->o_arg.open_flags);
+		nfs4_close_state(&data->path, state, data->o_arg.fmode);
 out_free:
 	nfs4_opendata_put(data);
 }
@@ -1135,7 +1142,7 @@ static inline void nfs4_exclusive_attrset(struct nfs4_opendata *opendata, struct
 /*
  * Returns a referenced nfs4_state
  */
-static int _nfs4_do_open(struct inode *dir, struct path *path, int flags, struct iattr *sattr, struct rpc_cred *cred, struct nfs4_state **res)
+static int _nfs4_do_open(struct inode *dir, struct path *path, fmode_t fmode, int flags, struct iattr *sattr, struct rpc_cred *cred, struct nfs4_state **res)
 {
 	struct nfs4_state_owner  *sp;
 	struct nfs4_state     *state = NULL;
@@ -1153,9 +1160,9 @@ static int _nfs4_do_open(struct inode *dir, struct path *path, int flags, struct
 	if (status != 0)
 		goto err_put_state_owner;
 	if (path->dentry->d_inode != NULL)
-		nfs4_return_incompatible_delegation(path->dentry->d_inode, flags & (FMODE_READ|FMODE_WRITE));
+		nfs4_return_incompatible_delegation(path->dentry->d_inode, fmode);
 	status = -ENOMEM;
-	opendata = nfs4_opendata_alloc(path, sp, flags, sattr);
+	opendata = nfs4_opendata_alloc(path, sp, fmode, flags, sattr);
 	if (opendata == NULL)
 		goto err_put_state_owner;
 
@@ -1187,14 +1194,14 @@ out_err:
 }
 
 
-static struct nfs4_state *nfs4_do_open(struct inode *dir, struct path *path, int flags, struct iattr *sattr, struct rpc_cred *cred)
+static struct nfs4_state *nfs4_do_open(struct inode *dir, struct path *path, fmode_t fmode, int flags, struct iattr *sattr, struct rpc_cred *cred)
 {
 	struct nfs4_exception exception = { };
 	struct nfs4_state *res;
 	int status;
 
 	do {
-		status = _nfs4_do_open(dir, path, flags, sattr, cred, &res);
+		status = _nfs4_do_open(dir, path, fmode, flags, sattr, cred, &res);
 		if (status == 0)
 			break;
 		/* NOTE: BAD_SEQID means the server and client disagree about the
@@ -1332,7 +1339,7 @@ static void nfs4_close_done(struct rpc_task *task, void *data)
 		case -NFS4ERR_OLD_STATEID:
 		case -NFS4ERR_BAD_STATEID:
 		case -NFS4ERR_EXPIRED:
-			if (calldata->arg.open_flags == 0)
+			if (calldata->arg.fmode == 0)
 				break;
 		default:
 			if (nfs4_async_handle_error(task, server, state) == -EAGAIN) {
@@ -1374,10 +1381,10 @@ static void nfs4_close_prepare(struct rpc_task *task, void *data)
 	nfs_fattr_init(calldata->res.fattr);
 	if (test_bit(NFS_O_RDONLY_STATE, &state->flags) != 0) {
 		task->tk_msg.rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_OPEN_DOWNGRADE];
-		calldata->arg.open_flags = FMODE_READ;
+		calldata->arg.fmode = FMODE_READ;
 	} else if (test_bit(NFS_O_WRONLY_STATE, &state->flags) != 0) {
 		task->tk_msg.rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_OPEN_DOWNGRADE];
-		calldata->arg.open_flags = FMODE_WRITE;
+		calldata->arg.fmode = FMODE_WRITE;
 	}
 	calldata->timestamp = jiffies;
 	rpc_call_start(task);
@@ -1430,7 +1437,7 @@ int nfs4_do_close(struct path *path, struct nfs4_state *state, int wait)
 	calldata->arg.seqid = nfs_alloc_seqid(&state->owner->so_seqid);
 	if (calldata->arg.seqid == NULL)
 		goto out_free_calldata;
-	calldata->arg.open_flags = 0;
+	calldata->arg.fmode = 0;
 	calldata->arg.bitmask = server->attr_bitmask;
 	calldata->res.fattr = &calldata->fattr;
 	calldata->res.seqid = calldata->arg.seqid;
@@ -1457,13 +1464,13 @@ out:
 	return status;
 }
 
-static int nfs4_intent_set_file(struct nameidata *nd, struct path *path, struct nfs4_state *state)
+static int nfs4_intent_set_file(struct nameidata *nd, struct path *path, struct nfs4_state *state, fmode_t fmode)
 {
 	struct file *filp;
 	int ret;
 
 	/* If the open_intent is for execute, we have an extra check to make */
-	if (nd->intent.open.flags & FMODE_EXEC) {
+	if (fmode & FMODE_EXEC) {
 		ret = nfs_may_open(state->inode,
 				state->owner->so_cred,
 				nd->intent.open.flags);
@@ -1479,7 +1486,7 @@ static int nfs4_intent_set_file(struct nameidata *nd, struct path *path, struct
 	}
 	ret = PTR_ERR(filp);
 out_close:
-	nfs4_close_sync(path, state, nd->intent.open.flags);
+	nfs4_close_sync(path, state, fmode & (FMODE_READ|FMODE_WRITE));
 	return ret;
 }
 
@@ -1495,6 +1502,7 @@ nfs4_atomic_open(struct inode *dir, struct dentry *dentry, struct nameidata *nd)
 	struct rpc_cred *cred;
 	struct nfs4_state *state;
 	struct dentry *res;
+	fmode_t fmode = nd->intent.open.flags & (FMODE_READ | FMODE_WRITE | FMODE_EXEC);
 
 	if (nd->flags & LOOKUP_CREATE) {
 		attr.ia_mode = nd->intent.open.create_mode;
@@ -1512,7 +1520,7 @@ nfs4_atomic_open(struct inode *dir, struct dentry *dentry, struct nameidata *nd)
 	parent = dentry->d_parent;
 	/* Protect against concurrent sillydeletes */
 	nfs_block_sillyrename(parent);
-	state = nfs4_do_open(dir, &path, nd->intent.open.flags, &attr, cred);
+	state = nfs4_do_open(dir, &path, fmode, nd->intent.open.flags, &attr, cred);
 	put_rpccred(cred);
 	if (IS_ERR(state)) {
 		if (PTR_ERR(state) == -ENOENT) {
@@ -1527,7 +1535,7 @@ nfs4_atomic_open(struct inode *dir, struct dentry *dentry, struct nameidata *nd)
 		path.dentry = res;
 	nfs_set_verifier(path.dentry, nfs_save_change_attribute(dir));
 	nfs_unblock_sillyrename(parent);
-	nfs4_intent_set_file(nd, &path, state);
+	nfs4_intent_set_file(nd, &path, state, fmode);
 	return res;
 }
 
@@ -1540,11 +1548,12 @@ nfs4_open_revalidate(struct inode *dir, struct dentry *dentry, int openflags, st
 	};
 	struct rpc_cred *cred;
 	struct nfs4_state *state;
+	fmode_t fmode = openflags & (FMODE_READ | FMODE_WRITE);
 
 	cred = rpc_lookup_cred();
 	if (IS_ERR(cred))
 		return PTR_ERR(cred);
-	state = nfs4_do_open(dir, &path, openflags, NULL, cred);
+	state = nfs4_do_open(dir, &path, fmode, openflags, NULL, cred);
 	put_rpccred(cred);
 	if (IS_ERR(state)) {
 		switch (PTR_ERR(state)) {
@@ -1561,10 +1570,10 @@ nfs4_open_revalidate(struct inode *dir, struct dentry *dentry, int openflags, st
 	}
 	if (state->inode == dentry->d_inode) {
 		nfs_set_verifier(dentry, nfs_save_change_attribute(dir));
-		nfs4_intent_set_file(nd, &path, state);
+		nfs4_intent_set_file(nd, &path, state, fmode);
 		return 1;
 	}
-	nfs4_close_sync(&path, state, openflags);
+	nfs4_close_sync(&path, state, fmode);
 out_drop:
 	d_drop(dentry);
 	return 0;
@@ -1990,6 +1999,7 @@ nfs4_proc_create(struct inode *dir, struct dentry *dentry, struct iattr *sattr,
 	};
 	struct nfs4_state *state;
 	struct rpc_cred *cred;
+	fmode_t fmode = flags & (FMODE_READ | FMODE_WRITE);
 	int status = 0;
 
 	cred = rpc_lookup_cred();
@@ -1997,7 +2007,7 @@ nfs4_proc_create(struct inode *dir, struct dentry *dentry, struct iattr *sattr,
 		status = PTR_ERR(cred);
 		goto out;
 	}
-	state = nfs4_do_open(dir, &path, flags, sattr, cred);
+	state = nfs4_do_open(dir, &path, fmode, flags, sattr, cred);
 	d_drop(dentry);
 	if (IS_ERR(state)) {
 		status = PTR_ERR(state);
@@ -2013,9 +2023,9 @@ nfs4_proc_create(struct inode *dir, struct dentry *dentry, struct iattr *sattr,
 		nfs_post_op_update_inode(state->inode, &fattr);
 	}
 	if (status == 0 && (nd->flags & LOOKUP_OPEN) != 0)
-		status = nfs4_intent_set_file(nd, &path, state);
+		status = nfs4_intent_set_file(nd, &path, state, fmode);
 	else
-		nfs4_close_sync(&path, state, flags);
+		nfs4_close_sync(&path, state, fmode);
 out_putcred:
 	put_rpccred(cred);
 out:
diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c
index cd16b301f13c..2022fe47966f 100644
--- a/fs/nfs/nfs4state.c
+++ b/fs/nfs/nfs4state.c
@@ -363,18 +363,18 @@ nfs4_alloc_open_state(void)
 }
 
 void
-nfs4_state_set_mode_locked(struct nfs4_state *state, mode_t mode)
+nfs4_state_set_mode_locked(struct nfs4_state *state, fmode_t fmode)
 {
-	if (state->state == mode)
+	if (state->state == fmode)
 		return;
 	/* NB! List reordering - see the reclaim code for why.  */
-	if ((mode & FMODE_WRITE) != (state->state & FMODE_WRITE)) {
-		if (mode & FMODE_WRITE)
+	if ((fmode & FMODE_WRITE) != (state->state & FMODE_WRITE)) {
+		if (fmode & FMODE_WRITE)
 			list_move(&state->open_states, &state->owner->so_states);
 		else
 			list_move_tail(&state->open_states, &state->owner->so_states);
 	}
-	state->state = mode;
+	state->state = fmode;
 }
 
 static struct nfs4_state *
@@ -454,16 +454,16 @@ void nfs4_put_open_state(struct nfs4_state *state)
 /*
  * Close the current file.
  */
-static void __nfs4_close(struct path *path, struct nfs4_state *state, mode_t mode, int wait)
+static void __nfs4_close(struct path *path, struct nfs4_state *state, fmode_t fmode, int wait)
 {
 	struct nfs4_state_owner *owner = state->owner;
 	int call_close = 0;
-	int newstate;
+	fmode_t newstate;
 
 	atomic_inc(&owner->so_count);
 	/* Protect against nfs4_find_state() */
 	spin_lock(&owner->so_lock);
-	switch (mode & (FMODE_READ | FMODE_WRITE)) {
+	switch (fmode & (FMODE_READ | FMODE_WRITE)) {
 		case FMODE_READ:
 			state->n_rdonly--;
 			break;
@@ -498,14 +498,14 @@ static void __nfs4_close(struct path *path, struct nfs4_state *state, mode_t mod
 		nfs4_do_close(path, state, wait);
 }
 
-void nfs4_close_state(struct path *path, struct nfs4_state *state, mode_t mode)
+void nfs4_close_state(struct path *path, struct nfs4_state *state, fmode_t fmode)
 {
-	__nfs4_close(path, state, mode, 0);
+	__nfs4_close(path, state, fmode, 0);
 }
 
-void nfs4_close_sync(struct path *path, struct nfs4_state *state, mode_t mode)
+void nfs4_close_sync(struct path *path, struct nfs4_state *state, fmode_t fmode)
 {
-	__nfs4_close(path, state, mode, 1);
+	__nfs4_close(path, state, fmode, 1);
 }
 
 /*
diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c
index 879911c99030..d8ddfc5467d6 100644
--- a/fs/nfs/nfs4xdr.c
+++ b/fs/nfs/nfs4xdr.c
@@ -953,12 +953,12 @@ static int encode_lookup(struct xdr_stream *xdr, const struct qstr *name)
 	return 0;
 }
 
-static void encode_share_access(struct xdr_stream *xdr, int open_flags)
+static void encode_share_access(struct xdr_stream *xdr, fmode_t fmode)
 {
 	__be32 *p;
 
 	RESERVE_SPACE(8);
-	switch (open_flags & (FMODE_READ|FMODE_WRITE)) {
+	switch (fmode & (FMODE_READ|FMODE_WRITE)) {
 		case FMODE_READ:
 			WRITE32(NFS4_SHARE_ACCESS_READ);
 			break;
@@ -969,7 +969,7 @@ static void encode_share_access(struct xdr_stream *xdr, int open_flags)
 			WRITE32(NFS4_SHARE_ACCESS_BOTH);
 			break;
 		default:
-			BUG();
+			WRITE32(0);
 	}
 	WRITE32(0);		/* for linux, share_deny = 0 always */
 }
@@ -984,7 +984,7 @@ static inline void encode_openhdr(struct xdr_stream *xdr, const struct nfs_opena
 	RESERVE_SPACE(8);
 	WRITE32(OP_OPEN);
 	WRITE32(arg->seqid->sequence->counter);
-	encode_share_access(xdr, arg->open_flags);
+	encode_share_access(xdr, arg->fmode);
 	RESERVE_SPACE(28);
 	WRITE64(arg->clientid);
 	WRITE32(16);
@@ -1112,7 +1112,7 @@ static int encode_open_downgrade(struct xdr_stream *xdr, const struct nfs_closea
 	WRITE32(OP_OPEN_DOWNGRADE);
 	WRITEMEM(arg->stateid->data, NFS4_STATEID_SIZE);
 	WRITE32(arg->seqid->sequence->counter);
-	encode_share_access(xdr, arg->open_flags);
+	encode_share_access(xdr, arg->fmode);
 	return 0;
 }
 
diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h
index 8d71d7b7c78a..b8d9c6dd4f63 100644
--- a/include/linux/nfs_fs.h
+++ b/include/linux/nfs_fs.h
@@ -83,7 +83,7 @@ struct nfs_open_context {
 	struct rpc_cred *cred;
 	struct nfs4_state *state;
 	fl_owner_t lockowner;
-	int mode;
+	fmode_t mode;
 
 	unsigned long flags;
 #define NFS_CONTEXT_ERROR_WRITE		(0)
@@ -342,7 +342,7 @@ extern int nfs_setattr(struct dentry *, struct iattr *);
 extern void nfs_setattr_update_inode(struct inode *inode, struct iattr *attr);
 extern struct nfs_open_context *get_nfs_open_context(struct nfs_open_context *ctx);
 extern void put_nfs_open_context(struct nfs_open_context *ctx);
-extern struct nfs_open_context *nfs_find_open_context(struct inode *inode, struct rpc_cred *cred, int mode);
+extern struct nfs_open_context *nfs_find_open_context(struct inode *inode, struct rpc_cred *cred, fmode_t mode);
 extern u64 nfs_compat_user_ino64(u64 fileid);
 extern void nfs_fattr_init(struct nfs_fattr *fattr);
 
diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h
index 32c1a0ecdbff..a550b528319f 100644
--- a/include/linux/nfs_xdr.h
+++ b/include/linux/nfs_xdr.h
@@ -120,6 +120,7 @@ struct nfs_openargs {
 	const struct nfs_fh *	fh;
 	struct nfs_seqid *	seqid;
 	int			open_flags;
+	fmode_t			fmode;
 	__u64                   clientid;
 	__u64                   id;
 	union {
@@ -171,7 +172,7 @@ struct nfs_closeargs {
 	struct nfs_fh *         fh;
 	nfs4_stateid *		stateid;
 	struct nfs_seqid *	seqid;
-	int			open_flags;
+	fmode_t			fmode;
 	const u32 *		bitmask;
 };
 
-- 
cgit v1.2.3


From 64672d55d93c26fb4035fd1a84a803cbc09cb058 Mon Sep 17 00:00:00 2001
From: Peter Staubach <staubach@redhat.com>
Date: Tue, 23 Dec 2008 15:21:56 -0500
Subject: optimize attribute timeouts for "noac" and "actimeo=0"

Hi.

I've been looking at a bugzilla which describes a problem where
a customer was advised to use either the "noac" or "actimeo=0"
mount options to solve a consistency problem that they were
seeing in the file attributes.  It turned out that this solution
did not work reliably for them because sometimes, the local
attribute cache was believed to be valid and not timed out.
(With an attribute cache timeout of 0, the cache should always
appear to be timed out.)

In looking at this situation, it appears to me that the problem
is that the attribute cache timeout code has an off-by-one
error in it.  It is assuming that the cache is valid in the
region, [read_cache_jiffies, read_cache_jiffies + attrtimeo].  The
cache should be considered valid only in the region,
[read_cache_jiffies, read_cache_jiffies + attrtimeo).  With this
change, the options, "noac" and "actimeo=0", work as originally
expected.

This problem was previously addressed by special casing the
attrtimeo == 0 case.  However, since the problem is only an off-
by-one error, the cleaner solution is address the off-by-one
error and thus, not require the special case.

    Thanx...

        ps

Signed-off-by: Peter Staubach <staubach@redhat.com>
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/nfs/dir.c            |  2 +-
 fs/nfs/inode.c          | 11 ++---------
 include/linux/jiffies.h | 10 ++++++++++
 include/linux/nfs_fs.h  |  5 ++++-
 net/sunrpc/auth.c       |  2 +-
 5 files changed, 18 insertions(+), 12 deletions(-)

(limited to 'include')

diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c
index ed7024c34885..e35c8199f82f 100644
--- a/fs/nfs/dir.c
+++ b/fs/nfs/dir.c
@@ -1798,7 +1798,7 @@ static int nfs_access_get_cached(struct inode *inode, struct rpc_cred *cred, str
 	if (cache == NULL)
 		goto out;
 	if (!nfs_have_delegation(inode, FMODE_READ) &&
-	    !time_in_range(jiffies, cache->jiffies, cache->jiffies + nfsi->attrtimeo))
+	    !time_in_range_open(jiffies, cache->jiffies, cache->jiffies + nfsi->attrtimeo))
 		goto out_stale;
 	res->jiffies = cache->jiffies;
 	res->cred = cache->cred;
diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c
index 1cf39202c3ea..0c381686171e 100644
--- a/fs/nfs/inode.c
+++ b/fs/nfs/inode.c
@@ -712,14 +712,7 @@ int nfs_attribute_timeout(struct inode *inode)
 
 	if (nfs_have_delegation(inode, FMODE_READ))
 		return 0;
-	/*
-	 * Special case: if the attribute timeout is set to 0, then always
-	 * 		 treat the cache as having expired (unless holding
-	 * 		 a delegation).
-	 */
-	if (nfsi->attrtimeo == 0)
-		return 1;
-	return !time_in_range(jiffies, nfsi->read_cache_jiffies, nfsi->read_cache_jiffies + nfsi->attrtimeo);
+	return !time_in_range_open(jiffies, nfsi->read_cache_jiffies, nfsi->read_cache_jiffies + nfsi->attrtimeo);
 }
 
 /**
@@ -1182,7 +1175,7 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr)
 		nfsi->attrtimeo_timestamp = now;
 		nfsi->attr_gencount = nfs_inc_attr_generation_counter();
 	} else {
-		if (!time_in_range(now, nfsi->attrtimeo_timestamp, nfsi->attrtimeo_timestamp + nfsi->attrtimeo)) {
+		if (!time_in_range_open(now, nfsi->attrtimeo_timestamp, nfsi->attrtimeo_timestamp + nfsi->attrtimeo)) {
 			if ((nfsi->attrtimeo <<= 1) > NFS_MAXATTRTIMEO(inode))
 				nfsi->attrtimeo = NFS_MAXATTRTIMEO(inode);
 			nfsi->attrtimeo_timestamp = now;
diff --git a/include/linux/jiffies.h b/include/linux/jiffies.h
index abb6ac639e8e..1a9cf78bfce5 100644
--- a/include/linux/jiffies.h
+++ b/include/linux/jiffies.h
@@ -115,10 +115,20 @@ static inline u64 get_jiffies_64(void)
 	 ((long)(a) - (long)(b) >= 0))
 #define time_before_eq(a,b)	time_after_eq(b,a)
 
+/*
+ * Calculate whether a is in the range of [b, c].
+ */
 #define time_in_range(a,b,c) \
 	(time_after_eq(a,b) && \
 	 time_before_eq(a,c))
 
+/*
+ * Calculate whether a is in the range of [b, c).
+ */
+#define time_in_range_open(a,b,c) \
+	(time_after_eq(a,b) && \
+	 time_before(a,c))
+
 /* Same as above, but does so with platform independent 64bit types.
  * These must be used when utilizing jiffies_64 (i.e. return value of
  * get_jiffies_64() */
diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h
index b8d9c6dd4f63..db867b04ac3c 100644
--- a/include/linux/nfs_fs.h
+++ b/include/linux/nfs_fs.h
@@ -130,7 +130,10 @@ struct nfs_inode {
 	 *
 	 * We need to revalidate the cached attrs for this inode if
 	 *
-	 *	jiffies - read_cache_jiffies > attrtimeo
+	 *	jiffies - read_cache_jiffies >= attrtimeo
+	 *
+	 * Please note the comparison is greater than or equal
+	 * so that zero timeout values can be specified.
 	 */
 	unsigned long		read_cache_jiffies;
 	unsigned long		attrtimeo;
diff --git a/net/sunrpc/auth.c b/net/sunrpc/auth.c
index 6e28744b1709..050e4e84d9e3 100644
--- a/net/sunrpc/auth.c
+++ b/net/sunrpc/auth.c
@@ -234,7 +234,7 @@ rpcauth_prune_expired(struct list_head *free, int nr_to_scan)
 	list_for_each_entry_safe(cred, next, &cred_unused, cr_lru) {
 
 		/* Enforce a 60 second garbage collection moratorium */
-		if (time_in_range(cred->cr_expire, expired, jiffies) &&
+		if (time_in_range_open(cred->cr_expire, expired, jiffies) &&
 		    test_bit(RPCAUTH_CRED_HASHED, &cred->cr_flags) != 0)
 			continue;
 
-- 
cgit v1.2.3


From c977a2ef40a38c45537ad03823d0a004f06373f0 Mon Sep 17 00:00:00 2001
From: Benny Halevy <bhalevy@panasas.com>
Date: Tue, 23 Dec 2008 16:06:13 -0500
Subject: sunrpc: get rid of rpc_rqst.rq_bufsize

rq_bufsize is not used.

Signed-off-by: Mike Sager <Mike.Sager@netapp.com>
Signed-off-by: Benny Halevy <bhalevy@panasas.com>
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 include/linux/sunrpc/xprt.h | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/sunrpc/xprt.h b/include/linux/sunrpc/xprt.h
index 4d80a118d538..11fc71d50c1e 100644
--- a/include/linux/sunrpc/xprt.h
+++ b/include/linux/sunrpc/xprt.h
@@ -76,8 +76,7 @@ struct rpc_rqst {
 	struct list_head	rq_list;
 
 	__u32 *			rq_buffer;	/* XDR encode buffer */
-	size_t			rq_bufsize,
-				rq_callsize,
+	size_t			rq_callsize,
 				rq_rcvsize;
 
 	struct xdr_buf		rq_private_buf;		/* The receive buffer
-- 
cgit v1.2.3


From c381060869317b3c84430d4f54965d409cbfe65f Mon Sep 17 00:00:00 2001
From: "\\\"J. Bruce Fields\\" <bfields@citi.umich.edu>
Date: Tue, 23 Dec 2008 16:08:32 -0500
Subject: rpc: add an rpc_pipe_open method

We want to transition to a new gssd upcall which is text-based and more
easily extensible.

To simplify upgrades, as well as testing and debugging, it will help if
we can upgrade gssd (to a version which understands the new upcall)
without having to choose at boot (or module-load) time whether we want
the new or the old upcall.

We will do this by providing two different pipes: one named, as
currently, after the mechanism (normally "krb5"), and supporting the
old upcall.  One named "gssd" and supporting the new upcall version.

We allow gssd to indicate which version it supports by its choice of
which pipe to open.

As we have no interest in supporting *simultaneous* use of both
versions, we'll forbid opening both pipes at the same time.

So, add a new pipe_open callback to the rpc_pipefs api, which the gss
code can use to track which pipes have been open, and to refuse opens of
incompatible pipes.

We only need this to be called on the first open of a given pipe.

Signed-off-by: J. Bruce Fields <bfields@citi.umich.edu>
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 include/linux/sunrpc/rpc_pipe_fs.h |  1 +
 net/sunrpc/rpc_pipe.c              | 22 +++++++++++++++-------
 2 files changed, 16 insertions(+), 7 deletions(-)

(limited to 'include')

diff --git a/include/linux/sunrpc/rpc_pipe_fs.h b/include/linux/sunrpc/rpc_pipe_fs.h
index 51b977a4ca20..cea764c2359f 100644
--- a/include/linux/sunrpc/rpc_pipe_fs.h
+++ b/include/linux/sunrpc/rpc_pipe_fs.h
@@ -15,6 +15,7 @@ struct rpc_pipe_ops {
 	ssize_t (*upcall)(struct file *, struct rpc_pipe_msg *, char __user *, size_t);
 	ssize_t (*downcall)(struct file *, const char __user *, size_t);
 	void (*release_pipe)(struct inode *);
+	int (*open_pipe)(struct inode *);
 	void (*destroy_msg)(struct rpc_pipe_msg *);
 };
 
diff --git a/net/sunrpc/rpc_pipe.c b/net/sunrpc/rpc_pipe.c
index 55b2049834c4..c9b57f47108c 100644
--- a/net/sunrpc/rpc_pipe.c
+++ b/net/sunrpc/rpc_pipe.c
@@ -169,16 +169,24 @@ static int
 rpc_pipe_open(struct inode *inode, struct file *filp)
 {
 	struct rpc_inode *rpci = RPC_I(inode);
+	int first_open;
 	int res = -ENXIO;
 
 	mutex_lock(&inode->i_mutex);
-	if (rpci->ops != NULL) {
-		if (filp->f_mode & FMODE_READ)
-			rpci->nreaders ++;
-		if (filp->f_mode & FMODE_WRITE)
-			rpci->nwriters ++;
-		res = 0;
+	if (rpci->ops == NULL)
+		goto out;
+	first_open = rpci->nreaders == 0 && rpci->nwriters == 0;
+	if (first_open && rpci->ops->open_pipe) {
+		res = rpci->ops->open_pipe(inode);
+		if (res)
+			goto out;
 	}
+	if (filp->f_mode & FMODE_READ)
+		rpci->nreaders++;
+	if (filp->f_mode & FMODE_WRITE)
+		rpci->nwriters++;
+	res = 0;
+out:
 	mutex_unlock(&inode->i_mutex);
 	return res;
 }
@@ -748,7 +756,7 @@ rpc_rmdir(struct dentry *dentry)
  * @name: name of pipe
  * @private: private data to associate with the pipe, for the caller's use
  * @ops: operations defining the behavior of the pipe: upcall, downcall,
- *	release_pipe, and destroy_msg.
+ *	release_pipe, open_pipe, and destroy_msg.
  * @flags: rpc_inode flags
  *
  * Data is made available for userspace to read by calls to
-- 
cgit v1.2.3


From 68e76ad0baf8f5d5060377c2423ee6eed5c63057 Mon Sep 17 00:00:00 2001
From: Olga Kornievskaia <aglo@citi.umich.edu>
Date: Tue, 23 Dec 2008 16:17:15 -0500
Subject: nfsd: pass client principal name in rsc downcall

Two principals are involved in krb5 authentication: the target, who we
authenticate *to* (normally the name of the server, like
nfs/server.citi.umich.edu@CITI.UMICH.EDU), and the source, we we
authenticate *as* (normally a user, like bfields@UMICH.EDU)

In the case of NFSv4 callbacks, the target of the callback should be the
source of the client's setclientid call, and the source should be the
nfs server's own principal.

Therefore we allow svcgssd to pass down the name of the principal that
just authenticated, so that on setclientid we can store that principal
name with the new client, to be used later on callbacks.

Signed-off-by: Olga Kornievskaia <aglo@citi.umich.edu>
Signed-off-by: J. Bruce Fields <bfields@citi.umich.edu>
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/nfsd/nfs4state.c                | 11 +++++++++++
 include/linux/nfsd/state.h         |  1 +
 include/linux/sunrpc/svcauth_gss.h |  1 +
 net/sunrpc/auth_gss/svcauth_gss.c  | 23 +++++++++++++++++++++++
 4 files changed, 36 insertions(+)

(limited to 'include')

diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index 1a052ac2bde9..f3b9a8d064f3 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -54,6 +54,7 @@
 #include <linux/mutex.h>
 #include <linux/lockd/bind.h>
 #include <linux/module.h>
+#include <linux/sunrpc/svcauth_gss.h>
 
 #define NFSDDBG_FACILITY                NFSDDBG_PROC
 
@@ -377,6 +378,7 @@ free_client(struct nfs4_client *clp)
 	shutdown_callback_client(clp);
 	if (clp->cl_cred.cr_group_info)
 		put_group_info(clp->cl_cred.cr_group_info);
+	kfree(clp->cl_principal);
 	kfree(clp->cl_name.data);
 	kfree(clp);
 }
@@ -696,6 +698,7 @@ nfsd4_setclientid(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
 	unsigned int 		strhashval;
 	struct nfs4_client	*conf, *unconf, *new;
 	__be32 			status;
+	char			*princ;
 	char                    dname[HEXDIR_LEN];
 	
 	if (!check_name(clname))
@@ -783,6 +786,14 @@ nfsd4_setclientid(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
 	}
 	copy_verf(new, &clverifier);
 	new->cl_addr = sin->sin_addr.s_addr;
+	princ = svc_gss_principal(rqstp);
+	if (princ) {
+		new->cl_principal = kstrdup(princ, GFP_KERNEL);
+		if (new->cl_principal == NULL) {
+			free_client(new);
+			goto out;
+		}
+	}
 	copy_cred(&new->cl_cred, &rqstp->rq_cred);
 	gen_confirm(new);
 	gen_callback(new, setclid);
diff --git a/include/linux/nfsd/state.h b/include/linux/nfsd/state.h
index d0fe2e378452..ce7cbf4b7c93 100644
--- a/include/linux/nfsd/state.h
+++ b/include/linux/nfsd/state.h
@@ -124,6 +124,7 @@ struct nfs4_client {
 	nfs4_verifier		cl_verifier; 	/* generated by client */
 	time_t                  cl_time;        /* time of last lease renewal */
 	__be32			cl_addr; 	/* client ipaddress */
+	char			*cl_principal;	/* setclientid principal name */
 	struct svc_cred		cl_cred; 	/* setclientid principal */
 	clientid_t		cl_clientid;	/* generated by server */
 	nfs4_verifier		cl_confirm;	/* generated by server */
diff --git a/include/linux/sunrpc/svcauth_gss.h b/include/linux/sunrpc/svcauth_gss.h
index c9165d9771a8..ca7d725861fc 100644
--- a/include/linux/sunrpc/svcauth_gss.h
+++ b/include/linux/sunrpc/svcauth_gss.h
@@ -20,6 +20,7 @@ int gss_svc_init(void);
 void gss_svc_shutdown(void);
 int svcauth_gss_register_pseudoflavor(u32 pseudoflavor, char * name);
 u32 svcauth_gss_flavor(struct auth_domain *dom);
+char *svc_gss_principal(struct svc_rqst *);
 
 #endif /* __KERNEL__ */
 #endif /* _LINUX_SUNRPC_SVCAUTH_GSS_H */
diff --git a/net/sunrpc/auth_gss/svcauth_gss.c b/net/sunrpc/auth_gss/svcauth_gss.c
index 12803da95dc4..e9baa6ebb1dd 100644
--- a/net/sunrpc/auth_gss/svcauth_gss.c
+++ b/net/sunrpc/auth_gss/svcauth_gss.c
@@ -332,6 +332,7 @@ struct rsc {
 	struct svc_cred		cred;
 	struct gss_svc_seq_data	seqdata;
 	struct gss_ctx		*mechctx;
+	char			*client_name;
 };
 
 static struct cache_head *rsc_table[RSC_HASHMAX];
@@ -346,6 +347,7 @@ static void rsc_free(struct rsc *rsci)
 		gss_delete_sec_context(&rsci->mechctx);
 	if (rsci->cred.cr_group_info)
 		put_group_info(rsci->cred.cr_group_info);
+	kfree(rsci->client_name);
 }
 
 static void rsc_put(struct kref *ref)
@@ -383,6 +385,7 @@ rsc_init(struct cache_head *cnew, struct cache_head *ctmp)
 	tmp->handle.data = NULL;
 	new->mechctx = NULL;
 	new->cred.cr_group_info = NULL;
+	new->client_name = NULL;
 }
 
 static void
@@ -397,6 +400,8 @@ update_rsc(struct cache_head *cnew, struct cache_head *ctmp)
 	spin_lock_init(&new->seqdata.sd_lock);
 	new->cred = tmp->cred;
 	tmp->cred.cr_group_info = NULL;
+	new->client_name = tmp->client_name;
+	tmp->client_name = NULL;
 }
 
 static struct cache_head *
@@ -486,6 +491,15 @@ static int rsc_parse(struct cache_detail *cd,
 		status = gss_import_sec_context(buf, len, gm, &rsci.mechctx);
 		if (status)
 			goto out;
+
+		/* get client name */
+		len = qword_get(&mesg, buf, mlen);
+		if (len > 0) {
+			rsci.client_name = kstrdup(buf, GFP_KERNEL);
+			if (!rsci.client_name)
+				goto out;
+		}
+
 	}
 	rsci.h.expiry_time = expiry;
 	rscp = rsc_update(&rsci, rscp);
@@ -913,6 +927,15 @@ struct gss_svc_data {
 	struct rsc			*rsci;
 };
 
+char *svc_gss_principal(struct svc_rqst *rqstp)
+{
+	struct gss_svc_data *gd = (struct gss_svc_data *)rqstp->rq_auth_data;
+
+	if (gd && gd->rsci)
+		return gd->rsci->client_name;
+	return NULL;
+}
+
 static int
 svcauth_gss_set_client(struct svc_rqst *rqstp)
 {
-- 
cgit v1.2.3


From 608207e8884e083ad8b8d33eda868da70f0d63e8 Mon Sep 17 00:00:00 2001
From: Olga Kornievskaia <aglo@citi.umich.edu>
Date: Tue, 23 Dec 2008 16:17:40 -0500
Subject: rpc: pass target name down to rpc level on callbacks

The rpc client needs to know the principal that the setclientid was done
as, so it can tell gssd who to authenticate to.

Signed-off-by: Olga Kornievskaia <aglo@citi.umich.edu>
Signed-off-by: J. Bruce Fields <bfields@citi.umich.edu>
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/nfsd/nfs4callback.c      |  6 ++++++
 include/linux/sunrpc/clnt.h |  2 ++
 net/sunrpc/clnt.c           | 16 ++++++++++++++++
 3 files changed, 24 insertions(+)

(limited to 'include')

diff --git a/fs/nfsd/nfs4callback.c b/fs/nfsd/nfs4callback.c
index 094747a1227c..3ca141782145 100644
--- a/fs/nfsd/nfs4callback.c
+++ b/fs/nfsd/nfs4callback.c
@@ -384,6 +384,7 @@ static int do_probe_callback(void *data)
 		.version	= nfs_cb_version[1]->number,
 		.authflavor	= RPC_AUTH_UNIX, /* XXX: need AUTH_GSS... */
 		.flags		= (RPC_CLNT_CREATE_NOPING | RPC_CLNT_CREATE_QUIET),
+		.client_name    = clp->cl_principal,
 	};
 	struct rpc_message msg = {
 		.rpc_proc       = &nfs4_cb_procedures[NFSPROC4_CLNT_CB_NULL],
@@ -392,6 +393,11 @@ static int do_probe_callback(void *data)
 	struct rpc_clnt *client;
 	int status;
 
+	if (!clp->cl_principal && (clp->cl_flavor >= RPC_AUTH_GSS_KRB5)) {
+		status = nfserr_cb_path_down;
+		goto out_err;
+	}
+
 	/* Initialize address */
 	memset(&addr, 0, sizeof(addr));
 	addr.sin_family = AF_INET;
diff --git a/include/linux/sunrpc/clnt.h b/include/linux/sunrpc/clnt.h
index 6f0ee1b84a4f..c39a21040dcb 100644
--- a/include/linux/sunrpc/clnt.h
+++ b/include/linux/sunrpc/clnt.h
@@ -58,6 +58,7 @@ struct rpc_clnt {
 	struct rpc_timeout	cl_timeout_default;
 	struct rpc_program *	cl_program;
 	char			cl_inline_name[32];
+	char			*cl_principal;	/* target to authenticate to */
 };
 
 /*
@@ -108,6 +109,7 @@ struct rpc_create_args {
 	u32			version;
 	rpc_authflavor_t	authflavor;
 	unsigned long		flags;
+	char			*client_name;
 };
 
 /* Values for "flags" field */
diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c
index 4895c341e46d..347f2a25abb6 100644
--- a/net/sunrpc/clnt.c
+++ b/net/sunrpc/clnt.c
@@ -197,6 +197,12 @@ static struct rpc_clnt * rpc_new_client(const struct rpc_create_args *args, stru
 
 	clnt->cl_rtt = &clnt->cl_rtt_default;
 	rpc_init_rtt(&clnt->cl_rtt_default, clnt->cl_timeout->to_initval);
+	clnt->cl_principal = NULL;
+	if (args->client_name) {
+		clnt->cl_principal = kstrdup(args->client_name, GFP_KERNEL);
+		if (!clnt->cl_principal)
+			goto out_no_principal;
+	}
 
 	kref_init(&clnt->cl_kref);
 
@@ -226,6 +232,8 @@ out_no_auth:
 		rpc_put_mount();
 	}
 out_no_path:
+	kfree(clnt->cl_principal);
+out_no_principal:
 	rpc_free_iostats(clnt->cl_metrics);
 out_no_stats:
 	if (clnt->cl_server != clnt->cl_inline_name)
@@ -354,6 +362,11 @@ rpc_clone_client(struct rpc_clnt *clnt)
 	new->cl_metrics = rpc_alloc_iostats(clnt);
 	if (new->cl_metrics == NULL)
 		goto out_no_stats;
+	if (clnt->cl_principal) {
+		new->cl_principal = kstrdup(clnt->cl_principal, GFP_KERNEL);
+		if (new->cl_principal == NULL)
+			goto out_no_principal;
+	}
 	kref_init(&new->cl_kref);
 	err = rpc_setup_pipedir(new, clnt->cl_program->pipe_dir_name);
 	if (err != 0)
@@ -366,6 +379,8 @@ rpc_clone_client(struct rpc_clnt *clnt)
 	rpciod_up();
 	return new;
 out_no_path:
+	kfree(new->cl_principal);
+out_no_principal:
 	rpc_free_iostats(new->cl_metrics);
 out_no_stats:
 	kfree(new);
@@ -417,6 +432,7 @@ rpc_free_client(struct kref *kref)
 out_free:
 	rpc_unregister_client(clnt);
 	rpc_free_iostats(clnt->cl_metrics);
+	kfree(clnt->cl_principal);
 	clnt->cl_metrics = NULL;
 	xprt_put(clnt->cl_xprt);
 	rpciod_down();
-- 
cgit v1.2.3


From 61054b14d545e257b9415d5ca0cd5f43762b4d0c Mon Sep 17 00:00:00 2001
From: Olga Kornievskaia <aglo@citi.umich.edu>
Date: Tue, 23 Dec 2008 16:19:00 -0500
Subject: nfsd: support callbacks with gss flavors

This patch adds server-side support for callbacks other than AUTH_SYS.

Signed-off-by: Olga Kornievskaia <aglo@citi.umich.edu>
Signed-off-by: J. Bruce Fields <bfields@citi.umich.edu>
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/nfsd/nfs4callback.c     | 3 ++-
 fs/nfsd/nfs4state.c        | 1 +
 include/linux/nfsd/state.h | 1 +
 net/sunrpc/rpc_pipe.c      | 5 +++++
 4 files changed, 9 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/fs/nfsd/nfs4callback.c b/fs/nfsd/nfs4callback.c
index 3ca141782145..6d7d8c02c197 100644
--- a/fs/nfsd/nfs4callback.c
+++ b/fs/nfsd/nfs4callback.c
@@ -358,6 +358,7 @@ static struct rpc_program cb_program = {
 		.nrvers		= ARRAY_SIZE(nfs_cb_version),
 		.version	= nfs_cb_version,
 		.stats		= &cb_stats,
+		.pipe_dir_name  = "/nfsd4_cb",
 };
 
 /* Reference counting, callback cleanup, etc., all look racy as heck.
@@ -382,7 +383,7 @@ static int do_probe_callback(void *data)
 		.program	= &cb_program,
 		.prognumber	= cb->cb_prog,
 		.version	= nfs_cb_version[1]->number,
-		.authflavor	= RPC_AUTH_UNIX, /* XXX: need AUTH_GSS... */
+		.authflavor	= clp->cl_flavor,
 		.flags		= (RPC_CLNT_CREATE_NOPING | RPC_CLNT_CREATE_QUIET),
 		.client_name    = clp->cl_principal,
 	};
diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index f3b9a8d064f3..07db31568ac9 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -786,6 +786,7 @@ nfsd4_setclientid(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
 	}
 	copy_verf(new, &clverifier);
 	new->cl_addr = sin->sin_addr.s_addr;
+	new->cl_flavor = rqstp->rq_flavor;
 	princ = svc_gss_principal(rqstp);
 	if (princ) {
 		new->cl_principal = kstrdup(princ, GFP_KERNEL);
diff --git a/include/linux/nfsd/state.h b/include/linux/nfsd/state.h
index ce7cbf4b7c93..128298c0362d 100644
--- a/include/linux/nfsd/state.h
+++ b/include/linux/nfsd/state.h
@@ -124,6 +124,7 @@ struct nfs4_client {
 	nfs4_verifier		cl_verifier; 	/* generated by client */
 	time_t                  cl_time;        /* time of last lease renewal */
 	__be32			cl_addr; 	/* client ipaddress */
+	u32			cl_flavor;	/* setclientid pseudoflavor */
 	char			*cl_principal;	/* setclientid principal name */
 	struct svc_cred		cl_cred; 	/* setclientid principal */
 	clientid_t		cl_clientid;	/* generated by server */
diff --git a/net/sunrpc/rpc_pipe.c b/net/sunrpc/rpc_pipe.c
index 3105efbb182d..192453248870 100644
--- a/net/sunrpc/rpc_pipe.c
+++ b/net/sunrpc/rpc_pipe.c
@@ -407,6 +407,7 @@ enum {
 	RPCAUTH_nfs,
 	RPCAUTH_portmap,
 	RPCAUTH_statd,
+	RPCAUTH_nfsd4_cb,
 	RPCAUTH_RootEOF
 };
 
@@ -440,6 +441,10 @@ static struct rpc_filelist files[] = {
 		.name = "statd",
 		.mode = S_IFDIR | S_IRUGO | S_IXUGO,
 	},
+	[RPCAUTH_nfsd4_cb] = {
+		.name = "nfsd4_cb",
+		.mode = S_IFDIR | S_IRUGO | S_IXUGO,
+	},
 };
 
 enum {
-- 
cgit v1.2.3


From 4a7794860ba2b56693b1d89fd485fd08cdc763e3 Mon Sep 17 00:00:00 2001
From: Herbert Xu <herbert@gondor.apana.org.au>
Date: Sat, 13 Sep 2008 18:19:03 -0700
Subject: crypto: api - Move type exit function into crypto_tfm

The type exit function needs to undo any allocations done by the type
init function.  However, the type init function may differ depending
on the upper-level type of the transform (e.g., a crypto_blkcipher
instantiated as a crypto_ablkcipher).

So we need to move the exit function out of the lower-level
structure and into crypto_tfm itself.

As it stands this is a no-op since nobody uses exit functions at
all.  However, all cases where a lower-level type is instantiated
as a different upper-level type (such as blkcipher as ablkcipher)
will be converted such that they allocate the underlying transform
and use that instead of casting (e.g., crypto_ablkcipher casted
into crypto_blkcipher).  That will need to use a different exit
function depending on the upper-level type.

This patch also allows the type init/exit functions to call (or not)
cra_init/cra_exit instead of always calling them from the top level.

Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 crypto/api.c            | 13 ++++++-------
 include/crypto/algapi.h |  1 -
 include/linux/crypto.h  |  2 ++
 3 files changed, 8 insertions(+), 8 deletions(-)

(limited to 'include')

diff --git a/crypto/api.c b/crypto/api.c
index 0444d242e985..cbaaf346ad13 100644
--- a/crypto/api.c
+++ b/crypto/api.c
@@ -300,8 +300,8 @@ static void crypto_exit_ops(struct crypto_tfm *tfm)
 	const struct crypto_type *type = tfm->__crt_alg->cra_type;
 
 	if (type) {
-		if (type->exit)
-			type->exit(tfm);
+		if (tfm->exit)
+			tfm->exit(tfm);
 		return;
 	}
 
@@ -379,17 +379,16 @@ struct crypto_tfm *__crypto_alloc_tfm(struct crypto_alg *alg, u32 type,
 	if (err)
 		goto out_free_tfm;
 
-	if (alg->cra_init && (err = alg->cra_init(tfm))) {
-		if (err == -EAGAIN)
-			crypto_shoot_alg(alg);
+	if (!tfm->exit && alg->cra_init && (err = alg->cra_init(tfm)))
 		goto cra_init_failed;
-	}
 
 	goto out;
 
 cra_init_failed:
 	crypto_exit_ops(tfm);
 out_free_tfm:
+	if (err == -EAGAIN)
+		crypto_shoot_alg(alg);
 	kfree(tfm);
 out_err:
 	tfm = ERR_PTR(err);
@@ -469,7 +468,7 @@ void crypto_free_tfm(struct crypto_tfm *tfm)
 	alg = tfm->__crt_alg;
 	size = sizeof(*tfm) + alg->cra_ctxsize;
 
-	if (alg->cra_exit)
+	if (!tfm->exit && alg->cra_exit)
 		alg->cra_exit(tfm);
 	crypto_exit_ops(tfm);
 	crypto_mod_put(alg);
diff --git a/include/crypto/algapi.h b/include/crypto/algapi.h
index 60d06e784be3..5fb6d8618d4d 100644
--- a/include/crypto/algapi.h
+++ b/include/crypto/algapi.h
@@ -23,7 +23,6 @@ struct seq_file;
 struct crypto_type {
 	unsigned int (*ctxsize)(struct crypto_alg *alg, u32 type, u32 mask);
 	int (*init)(struct crypto_tfm *tfm, u32 type, u32 mask);
-	void (*exit)(struct crypto_tfm *tfm);
 	void (*show)(struct seq_file *m, struct crypto_alg *alg);
 };
 
diff --git a/include/linux/crypto.h b/include/linux/crypto.h
index 3d2317e4af2e..ea52cd944fd9 100644
--- a/include/linux/crypto.h
+++ b/include/linux/crypto.h
@@ -480,6 +480,8 @@ struct crypto_tfm {
 		struct compress_tfm compress;
 		struct rng_tfm rng;
 	} crt_u;
+
+	void (*exit)(struct crypto_tfm *tfm);
 	
 	struct crypto_alg *__crt_alg;
 
-- 
cgit v1.2.3


From 7b0bac64cd5b74d6f1147524c26216de13a501fd Mon Sep 17 00:00:00 2001
From: Herbert Xu <herbert@gondor.apana.org.au>
Date: Sun, 21 Sep 2008 06:52:53 +0900
Subject: crypto: api - Rebirth of crypto_alloc_tfm

This patch reintroduces a completely revamped crypto_alloc_tfm.
The biggest change is that we now take two crypto_type objects
when allocating a tfm, a frontend and a backend.  In fact this
simply formalises what we've been doing behind the API's back.

For example, as it stands crypto_alloc_ahash may use an
actual ahash algorithm or a crypto_hash algorithm.  Putting
this in the API allows us to do this much more cleanly.

The existing types will be converted across gradually.

Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 crypto/api.c            | 108 ++++++++++++++++++++++++++++++++++++++++++++++++
 crypto/internal.h       |   2 +
 include/crypto/algapi.h |  10 +++++
 include/linux/crypto.h  |   4 +-
 4 files changed, 123 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/crypto/api.c b/crypto/api.c
index cbaaf346ad13..9975a7bd246c 100644
--- a/crypto/api.c
+++ b/crypto/api.c
@@ -403,6 +403,9 @@ EXPORT_SYMBOL_GPL(__crypto_alloc_tfm);
  *	@type: Type of algorithm
  *	@mask: Mask for type comparison
  *
+ *	This function should not be used by new algorithm types.
+ *	Plesae use crypto_alloc_tfm instead.
+ *
  *	crypto_alloc_base() will first attempt to locate an already loaded
  *	algorithm.  If that fails and the kernel supports dynamically loadable
  *	modules, it will then attempt to load a module of the same name or
@@ -449,6 +452,111 @@ err:
 	return ERR_PTR(err);
 }
 EXPORT_SYMBOL_GPL(crypto_alloc_base);
+
+struct crypto_tfm *crypto_create_tfm(struct crypto_alg *alg,
+				     const struct crypto_type *frontend)
+{
+	char *mem;
+	struct crypto_tfm *tfm = NULL;
+	unsigned int tfmsize;
+	unsigned int total;
+	int err = -ENOMEM;
+
+	tfmsize = frontend->tfmsize;
+	total = tfmsize + sizeof(*tfm) + frontend->extsize(alg, frontend);
+
+	mem = kzalloc(total, GFP_KERNEL);
+	if (mem == NULL)
+		goto out_err;
+
+	tfm = (struct crypto_tfm *)(mem + tfmsize);
+	tfm->__crt_alg = alg;
+
+	err = frontend->init_tfm(tfm, frontend);
+	if (err)
+		goto out_free_tfm;
+
+	if (!tfm->exit && alg->cra_init && (err = alg->cra_init(tfm)))
+		goto cra_init_failed;
+
+	goto out;
+
+cra_init_failed:
+	crypto_exit_ops(tfm);
+out_free_tfm:
+	if (err == -EAGAIN)
+		crypto_shoot_alg(alg);
+	kfree(mem);
+out_err:
+	tfm = ERR_PTR(err);
+out:
+	return tfm;
+}
+EXPORT_SYMBOL_GPL(crypto_create_tfm);
+
+/*
+ *	crypto_alloc_tfm - Locate algorithm and allocate transform
+ *	@alg_name: Name of algorithm
+ *	@frontend: Frontend algorithm type
+ *	@type: Type of algorithm
+ *	@mask: Mask for type comparison
+ *
+ *	crypto_alloc_tfm() will first attempt to locate an already loaded
+ *	algorithm.  If that fails and the kernel supports dynamically loadable
+ *	modules, it will then attempt to load a module of the same name or
+ *	alias.  If that fails it will send a query to any loaded crypto manager
+ *	to construct an algorithm on the fly.  A refcount is grabbed on the
+ *	algorithm which is then associated with the new transform.
+ *
+ *	The returned transform is of a non-determinate type.  Most people
+ *	should use one of the more specific allocation functions such as
+ *	crypto_alloc_blkcipher.
+ *
+ *	In case of error the return value is an error pointer.
+ */
+struct crypto_tfm *crypto_alloc_tfm(const char *alg_name,
+				    const struct crypto_type *frontend,
+				    u32 type, u32 mask)
+{
+	struct crypto_alg *(*lookup)(const char *name, u32 type, u32 mask);
+	struct crypto_tfm *tfm;
+	int err;
+
+	type &= frontend->maskclear;
+	mask &= frontend->maskclear;
+	type |= frontend->type;
+	mask |= frontend->maskset;
+
+	lookup = frontend->lookup ?: crypto_alg_mod_lookup;
+
+	for (;;) {
+		struct crypto_alg *alg;
+
+		alg = lookup(alg_name, type, mask);
+		if (IS_ERR(alg)) {
+			err = PTR_ERR(alg);
+			goto err;
+		}
+
+		tfm = crypto_create_tfm(alg, frontend);
+		if (!IS_ERR(tfm))
+			return tfm;
+
+		crypto_mod_put(alg);
+		err = PTR_ERR(tfm);
+
+err:
+		if (err != -EAGAIN)
+			break;
+		if (signal_pending(current)) {
+			err = -EINTR;
+			break;
+		}
+	}
+
+	return ERR_PTR(err);
+}
+EXPORT_SYMBOL_GPL(crypto_alloc_tfm);
  
 /*
  *	crypto_free_tfm - Free crypto transform
diff --git a/crypto/internal.h b/crypto/internal.h
index 8ef72d76092e..3c19a27a7563 100644
--- a/crypto/internal.h
+++ b/crypto/internal.h
@@ -109,6 +109,8 @@ void crypto_alg_tested(const char *name, int err);
 void crypto_shoot_alg(struct crypto_alg *alg);
 struct crypto_tfm *__crypto_alloc_tfm(struct crypto_alg *alg, u32 type,
 				      u32 mask);
+struct crypto_tfm *crypto_create_tfm(struct crypto_alg *alg,
+				     const struct crypto_type *frontend);
 
 int crypto_register_instance(struct crypto_template *tmpl,
 			     struct crypto_instance *inst);
diff --git a/include/crypto/algapi.h b/include/crypto/algapi.h
index 5fb6d8618d4d..986db68548f6 100644
--- a/include/crypto/algapi.h
+++ b/include/crypto/algapi.h
@@ -22,8 +22,18 @@ struct seq_file;
 
 struct crypto_type {
 	unsigned int (*ctxsize)(struct crypto_alg *alg, u32 type, u32 mask);
+	unsigned int (*extsize)(struct crypto_alg *alg,
+				const struct crypto_type *frontend);
 	int (*init)(struct crypto_tfm *tfm, u32 type, u32 mask);
+	int (*init_tfm)(struct crypto_tfm *tfm,
+		        const struct crypto_type *frontend);
 	void (*show)(struct seq_file *m, struct crypto_alg *alg);
+	struct crypto_alg *(*lookup)(const char *name, u32 type, u32 mask);
+
+	unsigned int type;
+	unsigned int maskclear;
+	unsigned int maskset;
+	unsigned int tfmsize;
 };
 
 struct crypto_instance {
diff --git a/include/linux/crypto.h b/include/linux/crypto.h
index ea52cd944fd9..ffaaa418cf59 100644
--- a/include/linux/crypto.h
+++ b/include/linux/crypto.h
@@ -546,7 +546,9 @@ struct crypto_attr_u32 {
  * Transform user interface.
  */
  
-struct crypto_tfm *crypto_alloc_tfm(const char *alg_name, u32 tfm_flags);
+struct crypto_tfm *crypto_alloc_tfm(const char *alg_name,
+				    const struct crypto_type *frontend,
+				    u32 type, u32 mask);
 struct crypto_tfm *crypto_alloc_base(const char *alg_name, u32 type, u32 mask);
 void crypto_free_tfm(struct crypto_tfm *tfm);
 
-- 
cgit v1.2.3


From 7b5a080b3c46f0cac71c0d0262634c6517d4ee4f Mon Sep 17 00:00:00 2001
From: Herbert Xu <herbert@gondor.apana.org.au>
Date: Sun, 31 Aug 2008 15:47:27 +1000
Subject: crypto: hash - Add shash interface

The shash interface replaces the current synchronous hash interface.
It improves over hash in two ways.  Firstly shash is reentrant,
meaning that the same tfm may be used by two threads simultaneously
as all hashing state is stored in a local descriptor.

The other enhancement is that shash no longer takes scatter list
entries.  This is because shash is specifically designed for
synchronous algorithms and as such scatter lists are unnecessary.

All existing hash users will be converted to shash once the
algorithms have been completely converted.

There is also a new finup function that combines update with final.
This will be extended to ahash once the algorithm conversion is
done.

This is also the first time that an algorithm type has their own
registration function.  Existing algorithm types will be converted
to this way in due course.

Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 crypto/Makefile                |   1 +
 crypto/shash.c                 | 239 +++++++++++++++++++++++++++++++++++++++++
 include/crypto/hash.h          | 104 ++++++++++++++++++
 include/crypto/internal/hash.h |   8 ++
 include/linux/crypto.h         |   1 +
 5 files changed, 353 insertions(+)
 create mode 100644 crypto/shash.c

(limited to 'include')

diff --git a/crypto/Makefile b/crypto/Makefile
index cd4a4ed078ff..46b08bf2035f 100644
--- a/crypto/Makefile
+++ b/crypto/Makefile
@@ -22,6 +22,7 @@ obj-$(CONFIG_CRYPTO_SEQIV) += seqiv.o
 
 crypto_hash-objs := hash.o
 crypto_hash-objs += ahash.o
+crypto_hash-objs += shash.o
 obj-$(CONFIG_CRYPTO_HASH2) += crypto_hash.o
 
 cryptomgr-objs := algboss.o testmgr.o
diff --git a/crypto/shash.c b/crypto/shash.c
new file mode 100644
index 000000000000..82ec4bd8d2f5
--- /dev/null
+++ b/crypto/shash.c
@@ -0,0 +1,239 @@
+/*
+ * Synchronous Cryptographic Hash operations.
+ *
+ * Copyright (c) 2008 Herbert Xu <herbert@gondor.apana.org.au>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option)
+ * any later version.
+ *
+ */
+
+#include <crypto/internal/hash.h>
+#include <linux/err.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/slab.h>
+#include <linux/seq_file.h>
+
+static inline struct crypto_shash *__crypto_shash_cast(struct crypto_tfm *tfm)
+{
+	return container_of(tfm, struct crypto_shash, base);
+}
+
+static int shash_setkey_unaligned(struct crypto_shash *tfm, const u8 *key,
+				  unsigned int keylen)
+{
+	struct shash_alg *shash = crypto_shash_alg(tfm);
+	unsigned long alignmask = crypto_shash_alignmask(tfm);
+	unsigned long absize;
+	u8 *buffer, *alignbuffer;
+	int err;
+
+	absize = keylen + (alignmask & ~(CRYPTO_MINALIGN - 1));
+	buffer = kmalloc(absize, GFP_KERNEL);
+	if (!buffer)
+		return -ENOMEM;
+
+	alignbuffer = (u8 *)ALIGN((unsigned long)buffer, alignmask + 1);
+	memcpy(alignbuffer, key, keylen);
+	err = shash->setkey(tfm, alignbuffer, keylen);
+	memset(alignbuffer, 0, keylen);
+	kfree(buffer);
+	return err;
+}
+
+int crypto_shash_setkey(struct crypto_shash *tfm, const u8 *key,
+			unsigned int keylen)
+{
+	struct shash_alg *shash = crypto_shash_alg(tfm);
+	unsigned long alignmask = crypto_shash_alignmask(tfm);
+
+	if ((unsigned long)key & alignmask)
+		return shash_setkey_unaligned(tfm, key, keylen);
+
+	return shash->setkey(tfm, key, keylen);
+}
+EXPORT_SYMBOL_GPL(crypto_shash_setkey);
+
+static inline unsigned int shash_align_buffer_size(unsigned len,
+						   unsigned long mask)
+{
+	return len + (mask & ~(__alignof__(u8 __attribute__ ((aligned))) - 1));
+}
+
+static int shash_update_unaligned(struct shash_desc *desc, const u8 *data,
+				  unsigned int len)
+{
+	struct crypto_shash *tfm = desc->tfm;
+	struct shash_alg *shash = crypto_shash_alg(tfm);
+	unsigned long alignmask = crypto_shash_alignmask(tfm);
+	unsigned int unaligned_len = alignmask + 1 -
+				     ((unsigned long)data & alignmask);
+	u8 buf[shash_align_buffer_size(unaligned_len, alignmask)]
+		__attribute__ ((aligned));
+
+	memcpy(buf, data, unaligned_len);
+
+	return shash->update(desc, buf, unaligned_len) ?:
+	       shash->update(desc, data + unaligned_len, len - unaligned_len);
+}
+
+int crypto_shash_update(struct shash_desc *desc, const u8 *data,
+			unsigned int len)
+{
+	struct crypto_shash *tfm = desc->tfm;
+	struct shash_alg *shash = crypto_shash_alg(tfm);
+	unsigned long alignmask = crypto_shash_alignmask(tfm);
+
+	if ((unsigned long)data & alignmask)
+		return shash_update_unaligned(desc, data, len);
+
+	return shash->update(desc, data, len);
+}
+EXPORT_SYMBOL_GPL(crypto_shash_update);
+
+static int shash_final_unaligned(struct shash_desc *desc, u8 *out)
+{
+	struct crypto_shash *tfm = desc->tfm;
+	unsigned long alignmask = crypto_shash_alignmask(tfm);
+	struct shash_alg *shash = crypto_shash_alg(tfm);
+	unsigned int ds = crypto_shash_digestsize(tfm);
+	u8 buf[shash_align_buffer_size(ds, alignmask)]
+		__attribute__ ((aligned));
+	int err;
+
+	err = shash->final(desc, buf);
+	memcpy(out, buf, ds);
+	return err;
+}
+
+int crypto_shash_final(struct shash_desc *desc, u8 *out)
+{
+	struct crypto_shash *tfm = desc->tfm;
+	struct shash_alg *shash = crypto_shash_alg(tfm);
+	unsigned long alignmask = crypto_shash_alignmask(tfm);
+
+	if ((unsigned long)out & alignmask)
+		return shash_final_unaligned(desc, out);
+
+	return shash->final(desc, out);
+}
+EXPORT_SYMBOL_GPL(crypto_shash_final);
+
+static int shash_finup_unaligned(struct shash_desc *desc, const u8 *data,
+				 unsigned int len, u8 *out)
+{
+	return crypto_shash_update(desc, data, len) ?:
+	       crypto_shash_final(desc, out);
+}
+
+int crypto_shash_finup(struct shash_desc *desc, const u8 *data,
+		       unsigned int len, u8 *out)
+{
+	struct crypto_shash *tfm = desc->tfm;
+	struct shash_alg *shash = crypto_shash_alg(tfm);
+	unsigned long alignmask = crypto_shash_alignmask(tfm);
+
+	if (((unsigned long)data | (unsigned long)out) & alignmask ||
+	    !shash->finup)
+		return shash_finup_unaligned(desc, data, len, out);
+
+	return shash->finup(desc, data, len, out);
+}
+EXPORT_SYMBOL_GPL(crypto_shash_finup);
+
+static int shash_digest_unaligned(struct shash_desc *desc, const u8 *data,
+				  unsigned int len, u8 *out)
+{
+	return crypto_shash_init(desc) ?:
+	       crypto_shash_update(desc, data, len) ?:
+	       crypto_shash_final(desc, out);
+}
+
+int crypto_shash_digest(struct shash_desc *desc, const u8 *data,
+			unsigned int len, u8 *out)
+{
+	struct crypto_shash *tfm = desc->tfm;
+	struct shash_alg *shash = crypto_shash_alg(tfm);
+	unsigned long alignmask = crypto_shash_alignmask(tfm);
+
+	if (((unsigned long)data | (unsigned long)out) & alignmask ||
+	    !shash->digest)
+		return shash_digest_unaligned(desc, data, len, out);
+
+	return shash->digest(desc, data, len, out);
+}
+EXPORT_SYMBOL_GPL(crypto_shash_digest);
+
+static int crypto_shash_init_tfm(struct crypto_tfm *tfm,
+				 const struct crypto_type *frontend)
+{
+	if (frontend->type != CRYPTO_ALG_TYPE_SHASH)
+		return -EINVAL;
+	return 0;
+}
+
+static unsigned int crypto_shash_extsize(struct crypto_alg *alg,
+					 const struct crypto_type *frontend)
+{
+	return alg->cra_ctxsize;
+}
+
+static void crypto_shash_show(struct seq_file *m, struct crypto_alg *alg)
+	__attribute__ ((unused));
+static void crypto_shash_show(struct seq_file *m, struct crypto_alg *alg)
+{
+	struct shash_alg *salg = __crypto_shash_alg(alg);
+
+	seq_printf(m, "type         : shash\n");
+	seq_printf(m, "blocksize    : %u\n", alg->cra_blocksize);
+	seq_printf(m, "digestsize   : %u\n", salg->digestsize);
+	seq_printf(m, "descsize     : %u\n", salg->descsize);
+}
+
+static const struct crypto_type crypto_shash_type = {
+	.extsize = crypto_shash_extsize,
+	.init_tfm = crypto_shash_init_tfm,
+#ifdef CONFIG_PROC_FS
+	.show = crypto_shash_show,
+#endif
+	.maskclear = ~CRYPTO_ALG_TYPE_MASK,
+	.maskset = CRYPTO_ALG_TYPE_MASK,
+	.type = CRYPTO_ALG_TYPE_SHASH,
+	.tfmsize = offsetof(struct crypto_shash, base),
+};
+
+struct crypto_shash *crypto_alloc_shash(const char *alg_name, u32 type,
+					u32 mask)
+{
+	return __crypto_shash_cast(
+		crypto_alloc_tfm(alg_name, &crypto_shash_type, type, mask));
+}
+EXPORT_SYMBOL_GPL(crypto_alloc_shash);
+
+int crypto_register_shash(struct shash_alg *alg)
+{
+	struct crypto_alg *base = &alg->base;
+
+	if (alg->digestsize > PAGE_SIZE / 8 ||
+	    alg->descsize > PAGE_SIZE / 8)
+		return -EINVAL;
+
+	base->cra_type = &crypto_shash_type;
+	base->cra_flags &= ~CRYPTO_ALG_TYPE_MASK;
+	base->cra_flags |= CRYPTO_ALG_TYPE_SHASH;
+
+	return crypto_register_alg(base);
+}
+EXPORT_SYMBOL_GPL(crypto_register_shash);
+
+int crypto_unregister_shash(struct shash_alg *alg)
+{
+	return crypto_unregister_alg(&alg->base);
+}
+EXPORT_SYMBOL_GPL(crypto_unregister_shash);
+
+MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("Synchronous cryptographic hash type");
diff --git a/include/crypto/hash.h b/include/crypto/hash.h
index ee48ef8fb2ea..f9b51d408953 100644
--- a/include/crypto/hash.h
+++ b/include/crypto/hash.h
@@ -15,10 +15,39 @@
 
 #include <linux/crypto.h>
 
+struct shash_desc {
+	struct crypto_shash *tfm;
+	u32 flags;
+
+	void *__ctx[] CRYPTO_MINALIGN_ATTR;
+};
+
+struct shash_alg {
+	int (*init)(struct shash_desc *desc);
+	int (*update)(struct shash_desc *desc, const u8 *data,
+		      unsigned int len);
+	int (*final)(struct shash_desc *desc, u8 *out);
+	int (*finup)(struct shash_desc *desc, const u8 *data,
+		     unsigned int len, u8 *out);
+	int (*digest)(struct shash_desc *desc, const u8 *data,
+		      unsigned int len, u8 *out);
+	int (*setkey)(struct crypto_shash *tfm, const u8 *key,
+		      unsigned int keylen);
+
+	unsigned int descsize;
+	unsigned int digestsize;
+
+	struct crypto_alg base;
+};
+
 struct crypto_ahash {
 	struct crypto_tfm base;
 };
 
+struct crypto_shash {
+	struct crypto_tfm base;
+};
+
 static inline struct crypto_ahash *__crypto_ahash_cast(struct crypto_tfm *tfm)
 {
 	return (struct crypto_ahash *)tfm;
@@ -169,4 +198,79 @@ static inline void ahash_request_set_crypt(struct ahash_request *req,
 	req->result = result;
 }
 
+struct crypto_shash *crypto_alloc_shash(const char *alg_name, u32 type,
+					u32 mask);
+
+static inline struct crypto_tfm *crypto_shash_tfm(struct crypto_shash *tfm)
+{
+	return &tfm->base;
+}
+
+static inline void crypto_free_shash(struct crypto_shash *tfm)
+{
+	crypto_free_tfm(crypto_shash_tfm(tfm));
+}
+
+static inline unsigned int crypto_shash_alignmask(
+	struct crypto_shash *tfm)
+{
+	return crypto_tfm_alg_alignmask(crypto_shash_tfm(tfm));
+}
+
+static inline struct shash_alg *__crypto_shash_alg(struct crypto_alg *alg)
+{
+	return container_of(alg, struct shash_alg, base);
+}
+
+static inline struct shash_alg *crypto_shash_alg(struct crypto_shash *tfm)
+{
+	return __crypto_shash_alg(crypto_shash_tfm(tfm)->__crt_alg);
+}
+
+static inline unsigned int crypto_shash_digestsize(struct crypto_shash *tfm)
+{
+	return crypto_shash_alg(tfm)->digestsize;
+}
+
+static inline u32 crypto_shash_get_flags(struct crypto_shash *tfm)
+{
+	return crypto_tfm_get_flags(crypto_shash_tfm(tfm));
+}
+
+static inline void crypto_shash_set_flags(struct crypto_shash *tfm, u32 flags)
+{
+	crypto_tfm_set_flags(crypto_shash_tfm(tfm), flags);
+}
+
+static inline void crypto_shash_clear_flags(struct crypto_shash *tfm, u32 flags)
+{
+	crypto_tfm_clear_flags(crypto_shash_tfm(tfm), flags);
+}
+
+static inline unsigned int crypto_shash_descsize(struct crypto_shash *tfm)
+{
+	return crypto_shash_alg(tfm)->descsize;
+}
+
+static inline void *shash_desc_ctx(struct shash_desc *desc)
+{
+	return desc->__ctx;
+}
+
+int crypto_shash_setkey(struct crypto_shash *tfm, const u8 *key,
+			unsigned int keylen);
+int crypto_shash_digest(struct shash_desc *desc, const u8 *data,
+			unsigned int len, u8 *out);
+
+static inline int crypto_shash_init(struct shash_desc *desc)
+{
+	return crypto_shash_alg(desc->tfm)->init(desc);
+}
+
+int crypto_shash_update(struct shash_desc *desc, const u8 *data,
+			unsigned int len);
+int crypto_shash_final(struct shash_desc *desc, u8 *out);
+int crypto_shash_finup(struct shash_desc *desc, const u8 *data,
+		       unsigned int len, u8 *out);
+
 #endif	/* _CRYPTO_HASH_H */
diff --git a/include/crypto/internal/hash.h b/include/crypto/internal/hash.h
index 917ae57bad4a..32d3a8ed06de 100644
--- a/include/crypto/internal/hash.h
+++ b/include/crypto/internal/hash.h
@@ -40,6 +40,9 @@ int crypto_hash_walk_done(struct crypto_hash_walk *walk, int err);
 int crypto_hash_walk_first(struct ahash_request *req,
 			   struct crypto_hash_walk *walk);
 
+int crypto_register_shash(struct shash_alg *alg);
+int crypto_unregister_shash(struct shash_alg *alg);
+
 static inline void *crypto_ahash_ctx(struct crypto_ahash *tfm)
 {
 	return crypto_tfm_ctx(&tfm->base);
@@ -74,5 +77,10 @@ static inline int ahash_tfm_in_queue(struct crypto_queue *queue,
 	return crypto_tfm_in_queue(queue, crypto_ahash_tfm(tfm));
 }
 
+static inline void *crypto_shash_ctx(struct crypto_shash *tfm)
+{
+	return crypto_tfm_ctx(&tfm->base);
+}
+
 #endif	/* _CRYPTO_INTERNAL_HASH_H */
 
diff --git a/include/linux/crypto.h b/include/linux/crypto.h
index ffaaa418cf59..ee95c748695c 100644
--- a/include/linux/crypto.h
+++ b/include/linux/crypto.h
@@ -39,6 +39,7 @@
 #define CRYPTO_ALG_TYPE_HASH		0x00000009
 #define CRYPTO_ALG_TYPE_AHASH		0x0000000a
 #define CRYPTO_ALG_TYPE_RNG		0x0000000c
+#define CRYPTO_ALG_TYPE_SHASH		0x0000000d
 
 #define CRYPTO_ALG_TYPE_HASH_MASK	0x0000000e
 #define CRYPTO_ALG_TYPE_AHASH_MASK	0x0000000c
-- 
cgit v1.2.3


From 3b2f6df08258e2875f42bd630eece7e7241a053b Mon Sep 17 00:00:00 2001
From: Herbert Xu <herbert@gondor.apana.org.au>
Date: Sun, 31 Aug 2008 18:52:18 +1000
Subject: crypto: hash - Export shash through ahash

This patch allows shash algorithms to be used through the ahash
interface.  This is required before we can convert digest algorithms
over to shash.

Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 crypto/shash.c         | 143 +++++++++++++++++++++++++++++++++++++++++++++++++
 include/linux/crypto.h |   2 +-
 2 files changed, 144 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/crypto/shash.c b/crypto/shash.c
index 82ec4bd8d2f5..3f4c713a21ea 100644
--- a/crypto/shash.c
+++ b/crypto/shash.c
@@ -10,6 +10,7 @@
  *
  */
 
+#include <crypto/scatterwalk.h>
 #include <crypto/internal/hash.h>
 #include <linux/err.h>
 #include <linux/kernel.h>
@@ -17,11 +18,15 @@
 #include <linux/slab.h>
 #include <linux/seq_file.h>
 
+static const struct crypto_type crypto_shash_type;
+
 static inline struct crypto_shash *__crypto_shash_cast(struct crypto_tfm *tfm)
 {
 	return container_of(tfm, struct crypto_shash, base);
 }
 
+#include "internal.h"
+
 static int shash_setkey_unaligned(struct crypto_shash *tfm, const u8 *key,
 				  unsigned int keylen)
 {
@@ -167,6 +172,142 @@ int crypto_shash_digest(struct shash_desc *desc, const u8 *data,
 }
 EXPORT_SYMBOL_GPL(crypto_shash_digest);
 
+static int shash_async_setkey(struct crypto_ahash *tfm, const u8 *key,
+			      unsigned int keylen)
+{
+	struct crypto_shash **ctx = crypto_ahash_ctx(tfm);
+
+	return crypto_shash_setkey(*ctx, key, keylen);
+}
+
+static int shash_async_init(struct ahash_request *req)
+{
+	struct crypto_shash **ctx = crypto_ahash_ctx(crypto_ahash_reqtfm(req));
+	struct shash_desc *desc = ahash_request_ctx(req);
+
+	desc->tfm = *ctx;
+	desc->flags = req->base.flags;
+
+	return crypto_shash_init(desc);
+}
+
+static int shash_async_update(struct ahash_request *req)
+{
+	struct shash_desc *desc = ahash_request_ctx(req);
+	struct crypto_hash_walk walk;
+	int nbytes;
+
+	for (nbytes = crypto_hash_walk_first(req, &walk); nbytes > 0;
+	     nbytes = crypto_hash_walk_done(&walk, nbytes))
+		nbytes = crypto_shash_update(desc, walk.data, nbytes);
+
+	return nbytes;
+}
+
+static int shash_async_final(struct ahash_request *req)
+{
+	return crypto_shash_final(ahash_request_ctx(req), req->result);
+}
+
+static int shash_async_digest(struct ahash_request *req)
+{
+	struct scatterlist *sg = req->src;
+	unsigned int offset = sg->offset;
+	unsigned int nbytes = req->nbytes;
+	int err;
+
+	if (nbytes < min(sg->length, ((unsigned int)(PAGE_SIZE)) - offset)) {
+		struct crypto_shash **ctx =
+			crypto_ahash_ctx(crypto_ahash_reqtfm(req));
+		struct shash_desc *desc = ahash_request_ctx(req);
+		void *data;
+
+		desc->tfm = *ctx;
+		desc->flags = req->base.flags;
+
+		data = crypto_kmap(sg_page(sg), 0);
+		err = crypto_shash_digest(desc, data + offset, nbytes,
+					  req->result);
+		crypto_kunmap(data, 0);
+		crypto_yield(desc->flags);
+		goto out;
+	}
+
+	err = shash_async_init(req);
+	if (err)
+		goto out;
+
+	err = shash_async_update(req);
+	if (err)
+		goto out;
+
+	err = shash_async_final(req);
+
+out:
+	return err;
+}
+
+static void crypto_exit_shash_ops_async(struct crypto_tfm *tfm)
+{
+	struct crypto_shash **ctx = crypto_tfm_ctx(tfm);
+
+	crypto_free_shash(*ctx);
+}
+
+static int crypto_init_shash_ops_async(struct crypto_tfm *tfm)
+{
+	struct crypto_alg *calg = tfm->__crt_alg;
+	struct shash_alg *alg = __crypto_shash_alg(calg);
+	struct ahash_tfm *crt = &tfm->crt_ahash;
+	struct crypto_shash **ctx = crypto_tfm_ctx(tfm);
+	struct crypto_shash *shash;
+
+	if (!crypto_mod_get(calg))
+		return -EAGAIN;
+
+	shash = __crypto_shash_cast(crypto_create_tfm(
+		calg, &crypto_shash_type));
+	if (IS_ERR(shash)) {
+		crypto_mod_put(calg);
+		return PTR_ERR(shash);
+	}
+
+	*ctx = shash;
+	tfm->exit = crypto_exit_shash_ops_async;
+
+	crt->init = shash_async_init;
+	crt->update = shash_async_update;
+	crt->final  = shash_async_final;
+	crt->digest = shash_async_digest;
+	crt->setkey = shash_async_setkey;
+
+	crt->digestsize = alg->digestsize;
+	crt->reqsize = sizeof(struct shash_desc) + crypto_shash_descsize(shash);
+
+	return 0;
+}
+
+static int crypto_init_shash_ops(struct crypto_tfm *tfm, u32 type, u32 mask)
+{
+	switch (mask & CRYPTO_ALG_TYPE_MASK) {
+	case CRYPTO_ALG_TYPE_AHASH_MASK:
+		return crypto_init_shash_ops_async(tfm);
+	}
+
+	return -EINVAL;
+}
+
+static unsigned int crypto_shash_ctxsize(struct crypto_alg *alg, u32 type,
+					 u32 mask)
+{
+	switch (mask & CRYPTO_ALG_TYPE_MASK) {
+	case CRYPTO_ALG_TYPE_AHASH_MASK:
+		return sizeof(struct crypto_shash *);
+	}
+
+	return 0;
+}
+
 static int crypto_shash_init_tfm(struct crypto_tfm *tfm,
 				 const struct crypto_type *frontend)
 {
@@ -194,7 +335,9 @@ static void crypto_shash_show(struct seq_file *m, struct crypto_alg *alg)
 }
 
 static const struct crypto_type crypto_shash_type = {
+	.ctxsize = crypto_shash_ctxsize,
 	.extsize = crypto_shash_extsize,
+	.init = crypto_init_shash_ops,
 	.init_tfm = crypto_shash_init_tfm,
 #ifdef CONFIG_PROC_FS
 	.show = crypto_shash_show,
diff --git a/include/linux/crypto.h b/include/linux/crypto.h
index ee95c748695c..44c72f0f9b05 100644
--- a/include/linux/crypto.h
+++ b/include/linux/crypto.h
@@ -38,8 +38,8 @@
 #define CRYPTO_ALG_TYPE_DIGEST		0x00000008
 #define CRYPTO_ALG_TYPE_HASH		0x00000009
 #define CRYPTO_ALG_TYPE_AHASH		0x0000000a
+#define CRYPTO_ALG_TYPE_SHASH		0x0000000b
 #define CRYPTO_ALG_TYPE_RNG		0x0000000c
-#define CRYPTO_ALG_TYPE_SHASH		0x0000000d
 
 #define CRYPTO_ALG_TYPE_HASH_MASK	0x0000000e
 #define CRYPTO_ALG_TYPE_AHASH_MASK	0x0000000c
-- 
cgit v1.2.3


From dec8b78606ebd5f309c38f2fb10196ce996dd18d Mon Sep 17 00:00:00 2001
From: Herbert Xu <herbert@gondor.apana.org.au>
Date: Sun, 2 Nov 2008 21:38:11 +0800
Subject: crypto: hash - Add import/export interface

It is often useful to save the partial state of a hash function
so that it can be used as a base for two or more computations.

The most prominent example is HMAC where all hashes start from
a base determined by the key.  Having an import/export interface
means that we only have to compute that base once rather than
for each message.

Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 crypto/ahash.c                 | 14 ++++++++++++++
 crypto/shash.c                 | 14 ++++++++++++++
 include/crypto/hash.h          | 21 +++++++++++++++++++++
 include/crypto/internal/hash.h |  5 -----
 include/linux/crypto.h         |  1 +
 5 files changed, 50 insertions(+), 5 deletions(-)

(limited to 'include')

diff --git a/crypto/ahash.c b/crypto/ahash.c
index 27128f2c687a..7d4e33dfe212 100644
--- a/crypto/ahash.c
+++ b/crypto/ahash.c
@@ -146,6 +146,20 @@ static int ahash_setkey(struct crypto_ahash *tfm, const u8 *key,
 	return ahash->setkey(tfm, key, keylen);
 }
 
+int crypto_ahash_import(struct ahash_request *req, const u8 *in)
+{
+	struct crypto_ahash *tfm = crypto_ahash_reqtfm(req);
+	struct ahash_alg *alg = crypto_ahash_alg(tfm);
+
+	memcpy(ahash_request_ctx(req), in, crypto_ahash_reqsize(tfm));
+
+	if (alg->reinit)
+		alg->reinit(req);
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(crypto_ahash_import);
+
 static unsigned int crypto_ahash_ctxsize(struct crypto_alg *alg, u32 type,
 					u32 mask)
 {
diff --git a/crypto/shash.c b/crypto/shash.c
index 3f4c713a21ea..26aff3feefc0 100644
--- a/crypto/shash.c
+++ b/crypto/shash.c
@@ -172,6 +172,20 @@ int crypto_shash_digest(struct shash_desc *desc, const u8 *data,
 }
 EXPORT_SYMBOL_GPL(crypto_shash_digest);
 
+int crypto_shash_import(struct shash_desc *desc, const u8 *in)
+{
+	struct crypto_shash *tfm = desc->tfm;
+	struct shash_alg *alg = crypto_shash_alg(tfm);
+
+	memcpy(shash_desc_ctx(desc), in, crypto_shash_descsize(tfm));
+
+	if (alg->reinit)
+		alg->reinit(desc);
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(crypto_shash_import);
+
 static int shash_async_setkey(struct crypto_ahash *tfm, const u8 *key,
 			      unsigned int keylen)
 {
diff --git a/include/crypto/hash.h b/include/crypto/hash.h
index f9b51d408953..cd16d6e668ce 100644
--- a/include/crypto/hash.h
+++ b/include/crypto/hash.h
@@ -24,6 +24,7 @@ struct shash_desc {
 
 struct shash_alg {
 	int (*init)(struct shash_desc *desc);
+	int (*reinit)(struct shash_desc *desc);
 	int (*update)(struct shash_desc *desc, const u8 *data,
 		      unsigned int len);
 	int (*final)(struct shash_desc *desc, u8 *out);
@@ -116,6 +117,11 @@ static inline unsigned int crypto_ahash_reqsize(struct crypto_ahash *tfm)
 	return crypto_ahash_crt(tfm)->reqsize;
 }
 
+static inline void *ahash_request_ctx(struct ahash_request *req)
+{
+	return req->__ctx;
+}
+
 static inline int crypto_ahash_setkey(struct crypto_ahash *tfm,
 				      const u8 *key, unsigned int keylen)
 {
@@ -130,6 +136,14 @@ static inline int crypto_ahash_digest(struct ahash_request *req)
 	return crt->digest(req);
 }
 
+static inline void crypto_ahash_export(struct ahash_request *req, u8 *out)
+{
+	memcpy(out, ahash_request_ctx(req),
+	       crypto_ahash_reqsize(crypto_ahash_reqtfm(req)));
+}
+
+int crypto_ahash_import(struct ahash_request *req, const u8 *in);
+
 static inline int crypto_ahash_init(struct ahash_request *req)
 {
 	struct ahash_tfm *crt = crypto_ahash_crt(crypto_ahash_reqtfm(req));
@@ -262,6 +276,13 @@ int crypto_shash_setkey(struct crypto_shash *tfm, const u8 *key,
 int crypto_shash_digest(struct shash_desc *desc, const u8 *data,
 			unsigned int len, u8 *out);
 
+static inline void crypto_shash_export(struct shash_desc *desc, u8 *out)
+{
+	memcpy(out, shash_desc_ctx(desc), crypto_shash_descsize(desc->tfm));
+}
+
+int crypto_shash_import(struct shash_desc *desc, const u8 *in);
+
 static inline int crypto_shash_init(struct shash_desc *desc)
 {
 	return crypto_shash_alg(desc->tfm)->init(desc);
diff --git a/include/crypto/internal/hash.h b/include/crypto/internal/hash.h
index 32d3a8ed06de..92fbe7385856 100644
--- a/include/crypto/internal/hash.h
+++ b/include/crypto/internal/hash.h
@@ -66,11 +66,6 @@ static inline struct ahash_request *ahash_dequeue_request(
 	return ahash_request_cast(crypto_dequeue_request(queue));
 }
 
-static inline void *ahash_request_ctx(struct ahash_request *req)
-{
-	return req->__ctx;
-}
-
 static inline int ahash_tfm_in_queue(struct crypto_queue *queue,
 					  struct crypto_ahash *tfm)
 {
diff --git a/include/linux/crypto.h b/include/linux/crypto.h
index 44c72f0f9b05..77a1f3d9416d 100644
--- a/include/linux/crypto.h
+++ b/include/linux/crypto.h
@@ -221,6 +221,7 @@ struct ablkcipher_alg {
 
 struct ahash_alg {
 	int (*init)(struct ahash_request *req);
+	int (*reinit)(struct ahash_request *req);
 	int (*update)(struct ahash_request *req);
 	int (*final)(struct ahash_request *req);
 	int (*digest)(struct ahash_request *req);
-- 
cgit v1.2.3


From 5f7082ed4f482f05db01d84dbf58190492ebf0ad Mon Sep 17 00:00:00 2001
From: Herbert Xu <herbert@gondor.apana.org.au>
Date: Sun, 31 Aug 2008 22:21:09 +1000
Subject: crypto: hash - Export shash through hash

This patch allows shash algorithms to be used through the old hash
interface.  This is a transitional measure so we can convert the
underlying algorithms to shash before converting the users across.

Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 crypto/ahash.c                 |  16 ++++++
 crypto/authenc.c               |   3 ++
 crypto/hmac.c                  |  10 ++--
 crypto/shash.c                 | 109 +++++++++++++++++++++++++++++++++++++++++
 include/crypto/algapi.h        |   5 ++
 include/crypto/internal/hash.h |   3 ++
 include/linux/crypto.h         |   4 +-
 7 files changed, 144 insertions(+), 6 deletions(-)

(limited to 'include')

diff --git a/crypto/ahash.c b/crypto/ahash.c
index 7d4e33dfe212..9f98956b17fc 100644
--- a/crypto/ahash.c
+++ b/crypto/ahash.c
@@ -112,6 +112,22 @@ int crypto_hash_walk_first(struct ahash_request *req,
 }
 EXPORT_SYMBOL_GPL(crypto_hash_walk_first);
 
+int crypto_hash_walk_first_compat(struct hash_desc *hdesc,
+				  struct crypto_hash_walk *walk,
+				  struct scatterlist *sg, unsigned int len)
+{
+	walk->total = len;
+
+	if (!walk->total)
+		return 0;
+
+	walk->alignmask = crypto_hash_alignmask(hdesc->tfm);
+	walk->sg = sg;
+	walk->flags = hdesc->flags;
+
+	return hash_walk_new_entry(walk);
+}
+
 static int ahash_setkey_unaligned(struct crypto_ahash *tfm, const u8 *key,
 				unsigned int keylen)
 {
diff --git a/crypto/authenc.c b/crypto/authenc.c
index fd9f06c63d76..40b6e9ec9e3a 100644
--- a/crypto/authenc.c
+++ b/crypto/authenc.c
@@ -11,6 +11,7 @@
  */
 
 #include <crypto/aead.h>
+#include <crypto/internal/hash.h>
 #include <crypto/internal/skcipher.h>
 #include <crypto/authenc.h>
 #include <crypto/scatterwalk.h>
@@ -431,6 +432,8 @@ static struct crypto_instance *crypto_authenc_alloc(struct rtattr **tb)
 	inst->alg.cra_aead.ivsize = enc->cra_ablkcipher.ivsize;
 	inst->alg.cra_aead.maxauthsize = auth->cra_type == &crypto_hash_type ?
 					 auth->cra_hash.digestsize :
+					 auth->cra_type ?
+					 __crypto_shash_alg(auth)->digestsize :
 					 auth->cra_digest.dia_digestsize;
 
 	inst->alg.cra_ctxsize = sizeof(struct crypto_authenc_ctx);
diff --git a/crypto/hmac.c b/crypto/hmac.c
index 7ff2d6a8c7d0..0ad39c374963 100644
--- a/crypto/hmac.c
+++ b/crypto/hmac.c
@@ -16,7 +16,7 @@
  *
  */
 
-#include <crypto/algapi.h>
+#include <crypto/internal/hash.h>
 #include <crypto/scatterwalk.h>
 #include <linux/err.h>
 #include <linux/init.h>
@@ -238,9 +238,11 @@ static struct crypto_instance *hmac_alloc(struct rtattr **tb)
 		return ERR_CAST(alg);
 
 	inst = ERR_PTR(-EINVAL);
-	ds = (alg->cra_flags & CRYPTO_ALG_TYPE_MASK) ==
-	     CRYPTO_ALG_TYPE_HASH ? alg->cra_hash.digestsize :
-				    alg->cra_digest.dia_digestsize;
+	ds = alg->cra_type == &crypto_hash_type ?
+	     alg->cra_hash.digestsize :
+	     alg->cra_type ?
+	     __crypto_shash_alg(alg)->digestsize :
+	     alg->cra_digest.dia_digestsize;
 	if (ds > alg->cra_blocksize)
 		goto out_put_alg;
 
diff --git a/crypto/shash.c b/crypto/shash.c
index 26aff3feefc0..50d69a4e4b61 100644
--- a/crypto/shash.c
+++ b/crypto/shash.c
@@ -301,9 +301,114 @@ static int crypto_init_shash_ops_async(struct crypto_tfm *tfm)
 	return 0;
 }
 
+static int shash_compat_setkey(struct crypto_hash *tfm, const u8 *key,
+			       unsigned int keylen)
+{
+	struct shash_desc *desc = crypto_hash_ctx(tfm);
+
+	return crypto_shash_setkey(desc->tfm, key, keylen);
+}
+
+static int shash_compat_init(struct hash_desc *hdesc)
+{
+	struct shash_desc *desc = crypto_hash_ctx(hdesc->tfm);
+
+	desc->flags = hdesc->flags;
+
+	return crypto_shash_init(desc);
+}
+
+static int shash_compat_update(struct hash_desc *hdesc, struct scatterlist *sg,
+			       unsigned int len)
+{
+	struct shash_desc *desc = crypto_hash_ctx(hdesc->tfm);
+	struct crypto_hash_walk walk;
+	int nbytes;
+
+	for (nbytes = crypto_hash_walk_first_compat(hdesc, &walk, sg, len);
+	     nbytes > 0; nbytes = crypto_hash_walk_done(&walk, nbytes))
+		nbytes = crypto_shash_update(desc, walk.data, nbytes);
+
+	return nbytes;
+}
+
+static int shash_compat_final(struct hash_desc *hdesc, u8 *out)
+{
+	return crypto_shash_final(crypto_hash_ctx(hdesc->tfm), out);
+}
+
+static int shash_compat_digest(struct hash_desc *hdesc, struct scatterlist *sg,
+			       unsigned int nbytes, u8 *out)
+{
+	unsigned int offset = sg->offset;
+	int err;
+
+	if (nbytes < min(sg->length, ((unsigned int)(PAGE_SIZE)) - offset)) {
+		struct shash_desc *desc = crypto_hash_ctx(hdesc->tfm);
+		void *data;
+
+		desc->flags = hdesc->flags;
+
+		data = crypto_kmap(sg_page(sg), 0);
+		err = crypto_shash_digest(desc, data + offset, nbytes, out);
+		crypto_kunmap(data, 0);
+		crypto_yield(desc->flags);
+		goto out;
+	}
+
+	err = shash_compat_init(hdesc);
+	if (err)
+		goto out;
+
+	err = shash_compat_update(hdesc, sg, nbytes);
+	if (err)
+		goto out;
+
+	err = shash_compat_final(hdesc, out);
+
+out:
+	return err;
+}
+
+static void crypto_exit_shash_ops_compat(struct crypto_tfm *tfm)
+{
+	struct shash_desc *desc= crypto_tfm_ctx(tfm);
+
+	crypto_free_shash(desc->tfm);
+}
+
+static int crypto_init_shash_ops_compat(struct crypto_tfm *tfm)
+{
+	struct hash_tfm *crt = &tfm->crt_hash;
+	struct crypto_alg *calg = tfm->__crt_alg;
+	struct shash_alg *alg = __crypto_shash_alg(calg);
+	struct shash_desc *desc = crypto_tfm_ctx(tfm);
+	struct crypto_shash *shash;
+
+	shash = __crypto_shash_cast(crypto_create_tfm(
+		calg, &crypto_shash_type));
+	if (IS_ERR(shash))
+		return PTR_ERR(shash);
+
+	desc->tfm = shash;
+	tfm->exit = crypto_exit_shash_ops_compat;
+
+	crt->init = shash_compat_init;
+	crt->update = shash_compat_update;
+	crt->final  = shash_compat_final;
+	crt->digest = shash_compat_digest;
+	crt->setkey = shash_compat_setkey;
+
+	crt->digestsize = alg->digestsize;
+
+	return 0;
+}
+
 static int crypto_init_shash_ops(struct crypto_tfm *tfm, u32 type, u32 mask)
 {
 	switch (mask & CRYPTO_ALG_TYPE_MASK) {
+	case CRYPTO_ALG_TYPE_HASH_MASK:
+		return crypto_init_shash_ops_compat(tfm);
 	case CRYPTO_ALG_TYPE_AHASH_MASK:
 		return crypto_init_shash_ops_async(tfm);
 	}
@@ -314,7 +419,11 @@ static int crypto_init_shash_ops(struct crypto_tfm *tfm, u32 type, u32 mask)
 static unsigned int crypto_shash_ctxsize(struct crypto_alg *alg, u32 type,
 					 u32 mask)
 {
+	struct shash_alg *salg = __crypto_shash_alg(alg);
+
 	switch (mask & CRYPTO_ALG_TYPE_MASK) {
+	case CRYPTO_ALG_TYPE_HASH_MASK:
+		return sizeof(struct shash_desc) + salg->descsize;
 	case CRYPTO_ALG_TYPE_AHASH_MASK:
 		return sizeof(struct crypto_shash *);
 	}
diff --git a/include/crypto/algapi.h b/include/crypto/algapi.h
index 986db68548f6..010545436efa 100644
--- a/include/crypto/algapi.h
+++ b/include/crypto/algapi.h
@@ -248,6 +248,11 @@ static inline struct crypto_hash *crypto_spawn_hash(struct crypto_spawn *spawn)
 	return __crypto_hash_cast(crypto_spawn_tfm(spawn, type, mask));
 }
 
+static inline void *crypto_hash_ctx(struct crypto_hash *tfm)
+{
+	return crypto_tfm_ctx(&tfm->base);
+}
+
 static inline void *crypto_hash_ctx_aligned(struct crypto_hash *tfm)
 {
 	return crypto_tfm_ctx_aligned(&tfm->base);
diff --git a/include/crypto/internal/hash.h b/include/crypto/internal/hash.h
index 92fbe7385856..82b70564bcab 100644
--- a/include/crypto/internal/hash.h
+++ b/include/crypto/internal/hash.h
@@ -39,6 +39,9 @@ extern const struct crypto_type crypto_ahash_type;
 int crypto_hash_walk_done(struct crypto_hash_walk *walk, int err);
 int crypto_hash_walk_first(struct ahash_request *req,
 			   struct crypto_hash_walk *walk);
+int crypto_hash_walk_first_compat(struct hash_desc *hdesc,
+				  struct crypto_hash_walk *walk,
+				  struct scatterlist *sg, unsigned int len);
 
 int crypto_register_shash(struct shash_alg *alg);
 int crypto_unregister_shash(struct shash_alg *alg);
diff --git a/include/linux/crypto.h b/include/linux/crypto.h
index 77a1f3d9416d..3bacd71509fb 100644
--- a/include/linux/crypto.h
+++ b/include/linux/crypto.h
@@ -36,9 +36,9 @@
 #define CRYPTO_ALG_TYPE_ABLKCIPHER	0x00000005
 #define CRYPTO_ALG_TYPE_GIVCIPHER	0x00000006
 #define CRYPTO_ALG_TYPE_DIGEST		0x00000008
-#define CRYPTO_ALG_TYPE_HASH		0x00000009
+#define CRYPTO_ALG_TYPE_HASH		0x00000008
+#define CRYPTO_ALG_TYPE_SHASH		0x00000009
 #define CRYPTO_ALG_TYPE_AHASH		0x0000000a
-#define CRYPTO_ALG_TYPE_SHASH		0x0000000b
 #define CRYPTO_ALG_TYPE_RNG		0x0000000c
 
 #define CRYPTO_ALG_TYPE_HASH_MASK	0x0000000e
-- 
cgit v1.2.3


From 69c35efcf1576ab5f00cba83e8ca740923afb6c9 Mon Sep 17 00:00:00 2001
From: Herbert Xu <herbert@gondor.apana.org.au>
Date: Fri, 7 Nov 2008 15:11:47 +0800
Subject: libcrc32c: Move implementation to crypto crc32c

This patch swaps the role of libcrc32c and crc32c.  Previously
the implementation was in libcrc32c and crc32c was a wrapper.
Now the code is in crc32c and libcrc32c just calls the crypto
layer.

The reason for the change is to tap into the algorithm selection
capability of the crypto API so that optimised implementations
such as the one utilising Intel's CRC32C instruction can be
used where available.

Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 crypto/Kconfig         |   4 +-
 crypto/crc32c.c        | 113 +++++++++++++++++++++++++++++-
 include/linux/crc32c.h |   5 +-
 lib/Kconfig            |   1 +
 lib/libcrc32c.c        | 182 +++++++++----------------------------------------
 5 files changed, 146 insertions(+), 159 deletions(-)

(limited to 'include')

diff --git a/crypto/Kconfig b/crypto/Kconfig
index dc20a34ba5ef..aede80246df2 100644
--- a/crypto/Kconfig
+++ b/crypto/Kconfig
@@ -256,12 +256,10 @@ comment "Digest"
 config CRYPTO_CRC32C
 	tristate "CRC32c CRC algorithm"
 	select CRYPTO_HASH
-	select LIBCRC32C
 	help
 	  Castagnoli, et al Cyclic Redundancy-Check Algorithm.  Used
 	  by iSCSI for header and data digests and by others.
-	  See Castagnoli93.  This implementation uses lib/libcrc32c.
-	  Module will be crc32c.
+	  See Castagnoli93.  Module will be crc32c.
 
 config CRYPTO_CRC32C_INTEL
 	tristate "CRC32c INTEL hardware acceleration"
diff --git a/crypto/crc32c.c b/crypto/crc32c.c
index b21b93f2bb90..973bc2cfab2e 100644
--- a/crypto/crc32c.c
+++ b/crypto/crc32c.c
@@ -3,8 +3,29 @@
  *
  * CRC32C chksum
  *
- * This module file is a wrapper to invoke the lib/crc32c routines.
+ *@Article{castagnoli-crc,
+ * author =       { Guy Castagnoli and Stefan Braeuer and Martin Herrman},
+ * title =        {{Optimization of Cyclic Redundancy-Check Codes with 24
+ *                 and 32 Parity Bits}},
+ * journal =      IEEE Transactions on Communication,
+ * year =         {1993},
+ * volume =       {41},
+ * number =       {6},
+ * pages =        {},
+ * month =        {June},
+ *}
+ * Used by the iSCSI driver, possibly others, and derived from the
+ * the iscsi-crc.c module of the linux-iscsi driver at
+ * http://linux-iscsi.sourceforge.net.
  *
+ * Following the example of lib/crc32, this function is intended to be
+ * flexible and useful for all users.  Modules that currently have their
+ * own crc32c, but hopefully may be able to use this one are:
+ *  net/sctp (please add all your doco to here if you change to
+ *            use this one!)
+ *  <endoflist>
+ *
+ * Copyright (c) 2004 Cisco Systems, Inc.
  * Copyright (c) 2008 Herbert Xu <herbert@gondor.apana.org.au>
  *
  * This program is free software; you can redistribute it and/or modify it
@@ -18,7 +39,6 @@
 #include <linux/init.h>
 #include <linux/module.h>
 #include <linux/string.h>
-#include <linux/crc32c.h>
 #include <linux/kernel.h>
 
 #define CHKSUM_BLOCK_SIZE	1
@@ -32,6 +52,95 @@ struct chksum_desc_ctx {
 	u32 crc;
 };
 
+/*
+ * This is the CRC-32C table
+ * Generated with:
+ * width = 32 bits
+ * poly = 0x1EDC6F41
+ * reflect input bytes = true
+ * reflect output bytes = true
+ */
+
+static const u32 crc32c_table[256] = {
+	0x00000000L, 0xF26B8303L, 0xE13B70F7L, 0x1350F3F4L,
+	0xC79A971FL, 0x35F1141CL, 0x26A1E7E8L, 0xD4CA64EBL,
+	0x8AD958CFL, 0x78B2DBCCL, 0x6BE22838L, 0x9989AB3BL,
+	0x4D43CFD0L, 0xBF284CD3L, 0xAC78BF27L, 0x5E133C24L,
+	0x105EC76FL, 0xE235446CL, 0xF165B798L, 0x030E349BL,
+	0xD7C45070L, 0x25AFD373L, 0x36FF2087L, 0xC494A384L,
+	0x9A879FA0L, 0x68EC1CA3L, 0x7BBCEF57L, 0x89D76C54L,
+	0x5D1D08BFL, 0xAF768BBCL, 0xBC267848L, 0x4E4DFB4BL,
+	0x20BD8EDEL, 0xD2D60DDDL, 0xC186FE29L, 0x33ED7D2AL,
+	0xE72719C1L, 0x154C9AC2L, 0x061C6936L, 0xF477EA35L,
+	0xAA64D611L, 0x580F5512L, 0x4B5FA6E6L, 0xB93425E5L,
+	0x6DFE410EL, 0x9F95C20DL, 0x8CC531F9L, 0x7EAEB2FAL,
+	0x30E349B1L, 0xC288CAB2L, 0xD1D83946L, 0x23B3BA45L,
+	0xF779DEAEL, 0x05125DADL, 0x1642AE59L, 0xE4292D5AL,
+	0xBA3A117EL, 0x4851927DL, 0x5B016189L, 0xA96AE28AL,
+	0x7DA08661L, 0x8FCB0562L, 0x9C9BF696L, 0x6EF07595L,
+	0x417B1DBCL, 0xB3109EBFL, 0xA0406D4BL, 0x522BEE48L,
+	0x86E18AA3L, 0x748A09A0L, 0x67DAFA54L, 0x95B17957L,
+	0xCBA24573L, 0x39C9C670L, 0x2A993584L, 0xD8F2B687L,
+	0x0C38D26CL, 0xFE53516FL, 0xED03A29BL, 0x1F682198L,
+	0x5125DAD3L, 0xA34E59D0L, 0xB01EAA24L, 0x42752927L,
+	0x96BF4DCCL, 0x64D4CECFL, 0x77843D3BL, 0x85EFBE38L,
+	0xDBFC821CL, 0x2997011FL, 0x3AC7F2EBL, 0xC8AC71E8L,
+	0x1C661503L, 0xEE0D9600L, 0xFD5D65F4L, 0x0F36E6F7L,
+	0x61C69362L, 0x93AD1061L, 0x80FDE395L, 0x72966096L,
+	0xA65C047DL, 0x5437877EL, 0x4767748AL, 0xB50CF789L,
+	0xEB1FCBADL, 0x197448AEL, 0x0A24BB5AL, 0xF84F3859L,
+	0x2C855CB2L, 0xDEEEDFB1L, 0xCDBE2C45L, 0x3FD5AF46L,
+	0x7198540DL, 0x83F3D70EL, 0x90A324FAL, 0x62C8A7F9L,
+	0xB602C312L, 0x44694011L, 0x5739B3E5L, 0xA55230E6L,
+	0xFB410CC2L, 0x092A8FC1L, 0x1A7A7C35L, 0xE811FF36L,
+	0x3CDB9BDDL, 0xCEB018DEL, 0xDDE0EB2AL, 0x2F8B6829L,
+	0x82F63B78L, 0x709DB87BL, 0x63CD4B8FL, 0x91A6C88CL,
+	0x456CAC67L, 0xB7072F64L, 0xA457DC90L, 0x563C5F93L,
+	0x082F63B7L, 0xFA44E0B4L, 0xE9141340L, 0x1B7F9043L,
+	0xCFB5F4A8L, 0x3DDE77ABL, 0x2E8E845FL, 0xDCE5075CL,
+	0x92A8FC17L, 0x60C37F14L, 0x73938CE0L, 0x81F80FE3L,
+	0x55326B08L, 0xA759E80BL, 0xB4091BFFL, 0x466298FCL,
+	0x1871A4D8L, 0xEA1A27DBL, 0xF94AD42FL, 0x0B21572CL,
+	0xDFEB33C7L, 0x2D80B0C4L, 0x3ED04330L, 0xCCBBC033L,
+	0xA24BB5A6L, 0x502036A5L, 0x4370C551L, 0xB11B4652L,
+	0x65D122B9L, 0x97BAA1BAL, 0x84EA524EL, 0x7681D14DL,
+	0x2892ED69L, 0xDAF96E6AL, 0xC9A99D9EL, 0x3BC21E9DL,
+	0xEF087A76L, 0x1D63F975L, 0x0E330A81L, 0xFC588982L,
+	0xB21572C9L, 0x407EF1CAL, 0x532E023EL, 0xA145813DL,
+	0x758FE5D6L, 0x87E466D5L, 0x94B49521L, 0x66DF1622L,
+	0x38CC2A06L, 0xCAA7A905L, 0xD9F75AF1L, 0x2B9CD9F2L,
+	0xFF56BD19L, 0x0D3D3E1AL, 0x1E6DCDEEL, 0xEC064EEDL,
+	0xC38D26C4L, 0x31E6A5C7L, 0x22B65633L, 0xD0DDD530L,
+	0x0417B1DBL, 0xF67C32D8L, 0xE52CC12CL, 0x1747422FL,
+	0x49547E0BL, 0xBB3FFD08L, 0xA86F0EFCL, 0x5A048DFFL,
+	0x8ECEE914L, 0x7CA56A17L, 0x6FF599E3L, 0x9D9E1AE0L,
+	0xD3D3E1ABL, 0x21B862A8L, 0x32E8915CL, 0xC083125FL,
+	0x144976B4L, 0xE622F5B7L, 0xF5720643L, 0x07198540L,
+	0x590AB964L, 0xAB613A67L, 0xB831C993L, 0x4A5A4A90L,
+	0x9E902E7BL, 0x6CFBAD78L, 0x7FAB5E8CL, 0x8DC0DD8FL,
+	0xE330A81AL, 0x115B2B19L, 0x020BD8EDL, 0xF0605BEEL,
+	0x24AA3F05L, 0xD6C1BC06L, 0xC5914FF2L, 0x37FACCF1L,
+	0x69E9F0D5L, 0x9B8273D6L, 0x88D28022L, 0x7AB90321L,
+	0xAE7367CAL, 0x5C18E4C9L, 0x4F48173DL, 0xBD23943EL,
+	0xF36E6F75L, 0x0105EC76L, 0x12551F82L, 0xE03E9C81L,
+	0x34F4F86AL, 0xC69F7B69L, 0xD5CF889DL, 0x27A40B9EL,
+	0x79B737BAL, 0x8BDCB4B9L, 0x988C474DL, 0x6AE7C44EL,
+	0xBE2DA0A5L, 0x4C4623A6L, 0x5F16D052L, 0xAD7D5351L
+};
+
+/*
+ * Steps through buffer one byte at at time, calculates reflected
+ * crc using table.
+ */
+
+static u32 crc32c(u32 crc, const u8 *data, unsigned int length)
+{
+	while (length--)
+		crc = crc32c_table[(crc ^ *data++) & 0xFFL] ^ (crc >> 8);
+
+	return crc;
+}
+
 /*
  * Steps through buffer one byte at at time, calculates reflected 
  * crc using table.
diff --git a/include/linux/crc32c.h b/include/linux/crc32c.h
index 508f512e5a2f..66fa8ff795ec 100644
--- a/include/linux/crc32c.h
+++ b/include/linux/crc32c.h
@@ -3,9 +3,6 @@
 
 #include <linux/types.h>
 
-extern u32 crc32c_le(u32 crc, unsigned char const *address, size_t length);
-extern u32 crc32c_be(u32 crc, unsigned char const *address, size_t length);
-
-#define crc32c(seed, data, length)  crc32c_le(seed, (unsigned char const *)data, length)
+extern u32 crc32c(u32 crc, const void *address, unsigned int length);
 
 #endif	/* _LINUX_CRC32C_H */
diff --git a/lib/Kconfig b/lib/Kconfig
index 85cf7ea978aa..ce303f13ed92 100644
--- a/lib/Kconfig
+++ b/lib/Kconfig
@@ -64,6 +64,7 @@ config CRC7
 
 config LIBCRC32C
 	tristate "CRC32c (Castagnoli, et al) Cyclic Redundancy-Check"
+	select CRYPTO_CRC32C
 	help
 	  This option is provided for the case where no in-kernel-tree
 	  modules require CRC32c functions, but a module built outside the
diff --git a/lib/libcrc32c.c b/lib/libcrc32c.c
index b5c3287d8ea4..38b17ab52ff9 100644
--- a/lib/libcrc32c.c
+++ b/lib/libcrc32c.c
@@ -30,168 +30,50 @@
  * any later version.
  *
  */
-#include <linux/crc32c.h>
-#include <linux/compiler.h>
-#include <linux/module.h>
-
-MODULE_AUTHOR("Clay Haapala <chaapala@cisco.com>");
-MODULE_DESCRIPTION("CRC32c (Castagnoli) calculations");
-MODULE_LICENSE("GPL");
-
-#define CRC32C_POLY_BE 0x1EDC6F41
-#define CRC32C_POLY_LE 0x82F63B78
 
-#ifndef CRC_LE_BITS 
-# define CRC_LE_BITS 8
-#endif
-
-
-/*
- * Haven't generated a big-endian table yet, but the bit-wise version
- * should at least work.
- */
-#if defined CRC_BE_BITS && CRC_BE_BITS != 1
-#undef CRC_BE_BITS
-#endif
-#ifndef CRC_BE_BITS
-# define CRC_BE_BITS 1
-#endif
+#include <crypto/hash.h>
+#include <linux/err.h>
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
 
-EXPORT_SYMBOL(crc32c_le);
+static struct crypto_shash *tfm;
 
-#if CRC_LE_BITS == 1
-/*
- * Compute things bit-wise, as done in crc32.c.  We could share the tight 
- * loop below with crc32 and vary the POLY if we don't find value in terms
- * of space and maintainability in keeping the two modules separate.
- */
-u32 __pure
-crc32c_le(u32 crc, unsigned char const *p, size_t len)
+u32 crc32c(u32 crc, const void *address, unsigned int length)
 {
-	int i;
-	while (len--) {
-		crc ^= *p++;
-		for (i = 0; i < 8; i++)
-			crc = (crc >> 1) ^ ((crc & 1) ? CRC32C_POLY_LE : 0);
-	}
-	return crc;
-}
-#else
+	struct {
+		struct shash_desc shash;
+		char ctx[crypto_shash_descsize(tfm)];
+	} desc;
+	int err;
 
-/*
- * This is the CRC-32C table
- * Generated with:
- * width = 32 bits
- * poly = 0x1EDC6F41
- * reflect input bytes = true
- * reflect output bytes = true
- */
+	desc.shash.tfm = tfm;
+	desc.shash.flags = 0;
+	*(u32 *)desc.ctx = crc;
 
-static const u32 crc32c_table[256] = {
-	0x00000000L, 0xF26B8303L, 0xE13B70F7L, 0x1350F3F4L,
-	0xC79A971FL, 0x35F1141CL, 0x26A1E7E8L, 0xD4CA64EBL,
-	0x8AD958CFL, 0x78B2DBCCL, 0x6BE22838L, 0x9989AB3BL,
-	0x4D43CFD0L, 0xBF284CD3L, 0xAC78BF27L, 0x5E133C24L,
-	0x105EC76FL, 0xE235446CL, 0xF165B798L, 0x030E349BL,
-	0xD7C45070L, 0x25AFD373L, 0x36FF2087L, 0xC494A384L,
-	0x9A879FA0L, 0x68EC1CA3L, 0x7BBCEF57L, 0x89D76C54L,
-	0x5D1D08BFL, 0xAF768BBCL, 0xBC267848L, 0x4E4DFB4BL,
-	0x20BD8EDEL, 0xD2D60DDDL, 0xC186FE29L, 0x33ED7D2AL,
-	0xE72719C1L, 0x154C9AC2L, 0x061C6936L, 0xF477EA35L,
-	0xAA64D611L, 0x580F5512L, 0x4B5FA6E6L, 0xB93425E5L,
-	0x6DFE410EL, 0x9F95C20DL, 0x8CC531F9L, 0x7EAEB2FAL,
-	0x30E349B1L, 0xC288CAB2L, 0xD1D83946L, 0x23B3BA45L,
-	0xF779DEAEL, 0x05125DADL, 0x1642AE59L, 0xE4292D5AL,
-	0xBA3A117EL, 0x4851927DL, 0x5B016189L, 0xA96AE28AL,
-	0x7DA08661L, 0x8FCB0562L, 0x9C9BF696L, 0x6EF07595L,
-	0x417B1DBCL, 0xB3109EBFL, 0xA0406D4BL, 0x522BEE48L,
-	0x86E18AA3L, 0x748A09A0L, 0x67DAFA54L, 0x95B17957L,
-	0xCBA24573L, 0x39C9C670L, 0x2A993584L, 0xD8F2B687L,
-	0x0C38D26CL, 0xFE53516FL, 0xED03A29BL, 0x1F682198L,
-	0x5125DAD3L, 0xA34E59D0L, 0xB01EAA24L, 0x42752927L,
-	0x96BF4DCCL, 0x64D4CECFL, 0x77843D3BL, 0x85EFBE38L,
-	0xDBFC821CL, 0x2997011FL, 0x3AC7F2EBL, 0xC8AC71E8L,
-	0x1C661503L, 0xEE0D9600L, 0xFD5D65F4L, 0x0F36E6F7L,
-	0x61C69362L, 0x93AD1061L, 0x80FDE395L, 0x72966096L,
-	0xA65C047DL, 0x5437877EL, 0x4767748AL, 0xB50CF789L,
-	0xEB1FCBADL, 0x197448AEL, 0x0A24BB5AL, 0xF84F3859L,
-	0x2C855CB2L, 0xDEEEDFB1L, 0xCDBE2C45L, 0x3FD5AF46L,
-	0x7198540DL, 0x83F3D70EL, 0x90A324FAL, 0x62C8A7F9L,
-	0xB602C312L, 0x44694011L, 0x5739B3E5L, 0xA55230E6L,
-	0xFB410CC2L, 0x092A8FC1L, 0x1A7A7C35L, 0xE811FF36L,
-	0x3CDB9BDDL, 0xCEB018DEL, 0xDDE0EB2AL, 0x2F8B6829L,
-	0x82F63B78L, 0x709DB87BL, 0x63CD4B8FL, 0x91A6C88CL,
-	0x456CAC67L, 0xB7072F64L, 0xA457DC90L, 0x563C5F93L,
-	0x082F63B7L, 0xFA44E0B4L, 0xE9141340L, 0x1B7F9043L,
-	0xCFB5F4A8L, 0x3DDE77ABL, 0x2E8E845FL, 0xDCE5075CL,
-	0x92A8FC17L, 0x60C37F14L, 0x73938CE0L, 0x81F80FE3L,
-	0x55326B08L, 0xA759E80BL, 0xB4091BFFL, 0x466298FCL,
-	0x1871A4D8L, 0xEA1A27DBL, 0xF94AD42FL, 0x0B21572CL,
-	0xDFEB33C7L, 0x2D80B0C4L, 0x3ED04330L, 0xCCBBC033L,
-	0xA24BB5A6L, 0x502036A5L, 0x4370C551L, 0xB11B4652L,
-	0x65D122B9L, 0x97BAA1BAL, 0x84EA524EL, 0x7681D14DL,
-	0x2892ED69L, 0xDAF96E6AL, 0xC9A99D9EL, 0x3BC21E9DL,
-	0xEF087A76L, 0x1D63F975L, 0x0E330A81L, 0xFC588982L,
-	0xB21572C9L, 0x407EF1CAL, 0x532E023EL, 0xA145813DL,
-	0x758FE5D6L, 0x87E466D5L, 0x94B49521L, 0x66DF1622L,
-	0x38CC2A06L, 0xCAA7A905L, 0xD9F75AF1L, 0x2B9CD9F2L,
-	0xFF56BD19L, 0x0D3D3E1AL, 0x1E6DCDEEL, 0xEC064EEDL,
-	0xC38D26C4L, 0x31E6A5C7L, 0x22B65633L, 0xD0DDD530L,
-	0x0417B1DBL, 0xF67C32D8L, 0xE52CC12CL, 0x1747422FL,
-	0x49547E0BL, 0xBB3FFD08L, 0xA86F0EFCL, 0x5A048DFFL,
-	0x8ECEE914L, 0x7CA56A17L, 0x6FF599E3L, 0x9D9E1AE0L,
-	0xD3D3E1ABL, 0x21B862A8L, 0x32E8915CL, 0xC083125FL,
-	0x144976B4L, 0xE622F5B7L, 0xF5720643L, 0x07198540L,
-	0x590AB964L, 0xAB613A67L, 0xB831C993L, 0x4A5A4A90L,
-	0x9E902E7BL, 0x6CFBAD78L, 0x7FAB5E8CL, 0x8DC0DD8FL,
-	0xE330A81AL, 0x115B2B19L, 0x020BD8EDL, 0xF0605BEEL,
-	0x24AA3F05L, 0xD6C1BC06L, 0xC5914FF2L, 0x37FACCF1L,
-	0x69E9F0D5L, 0x9B8273D6L, 0x88D28022L, 0x7AB90321L,
-	0xAE7367CAL, 0x5C18E4C9L, 0x4F48173DL, 0xBD23943EL,
-	0xF36E6F75L, 0x0105EC76L, 0x12551F82L, 0xE03E9C81L,
-	0x34F4F86AL, 0xC69F7B69L, 0xD5CF889DL, 0x27A40B9EL,
-	0x79B737BAL, 0x8BDCB4B9L, 0x988C474DL, 0x6AE7C44EL,
-	0xBE2DA0A5L, 0x4C4623A6L, 0x5F16D052L, 0xAD7D5351L
-};
+	err = crypto_shash_update(&desc.shash, address, length);
+	BUG_ON(err);
 
-/*
- * Steps through buffer one byte at at time, calculates reflected 
- * crc using table.
- */
+	return *(u32 *)desc.ctx;
+}
 
-u32 __pure
-crc32c_le(u32 crc, unsigned char const *data, size_t length)
+static int __init libcrc32c_mod_init(void)
 {
-	while (length--)
-		crc =
-		    crc32c_table[(crc ^ *data++) & 0xFFL] ^ (crc >> 8);
+	tfm = crypto_alloc_shash("crc32c", 0, 0);
+	if (IS_ERR(tfm))
+		return PTR_ERR(tfm);
 
-	return crc;
+	return 0;
 }
 
-#endif	/* CRC_LE_BITS == 8 */
-
-EXPORT_SYMBOL(crc32c_be);
-
-#if CRC_BE_BITS == 1
-u32 __pure
-crc32c_be(u32 crc, unsigned char const *p, size_t len)
+static void __exit libcrc32c_mod_fini(void)
 {
-	int i;
-	while (len--) {
-		crc ^= *p++ << 24;
-		for (i = 0; i < 8; i++)
-			crc =
-			    (crc << 1) ^ ((crc & 0x80000000) ? CRC32C_POLY_BE :
-					  0);
-	}
-	return crc;
+	crypto_free_shash(tfm);
 }
-#endif
 
-/*
- * Unit test
- *
- * A small unit test suite is implemented as part of the crypto suite.
- * Select CRYPTO_CRC32C and use the tcrypt module to run the tests.
- */
+module_init(libcrc32c_mod_init);
+module_exit(libcrc32c_mod_fini);
+
+MODULE_AUTHOR("Clay Haapala <chaapala@cisco.com>");
+MODULE_DESCRIPTION("CRC32c (Castagnoli) calculations");
+MODULE_LICENSE("GPL");
-- 
cgit v1.2.3


From 0426c166424ea6d3d0412f47879c8ba268f874c4 Mon Sep 17 00:00:00 2001
From: Herbert Xu <herbert@gondor.apana.org.au>
Date: Tue, 11 Nov 2008 12:20:06 +0800
Subject: libcrc32c: Add crc32c_le macro

The bnx2x driver actually uses the crc32c_le name so this patch
restores the crc32c_le symbol through a macro.

Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 include/linux/crc32c.h | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'include')

diff --git a/include/linux/crc32c.h b/include/linux/crc32c.h
index 66fa8ff795ec..bd8b44d96bdc 100644
--- a/include/linux/crc32c.h
+++ b/include/linux/crc32c.h
@@ -5,4 +5,7 @@
 
 extern u32 crc32c(u32 crc, const void *address, unsigned int length);
 
+/* This macro exists for backwards-compatibility. */
+#define crc32c_le crc32c
+
 #endif	/* _LINUX_CRC32C_H */
-- 
cgit v1.2.3


From 0ee4a96902dd7858e65f378c86f428a0355bd841 Mon Sep 17 00:00:00 2001
From: Herbert Xu <herbert@gondor.apana.org.au>
Date: Thu, 25 Dec 2008 11:05:13 +1100
Subject: crypto: aes - Precompute tables

The tables used by the various AES algorithms are currently
computed at run-time.  This has created an init ordering problem
because some AES algorithms may be registered before the tables
have been initialised.

This patch gets around this whole thing by precomputing the tables.

Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 crypto/aes_generic.c | 1145 ++++++++++++++++++++++++++++++++++++++++++++++----
 include/crypto/aes.h |    8 +-
 2 files changed, 1059 insertions(+), 94 deletions(-)

(limited to 'include')

diff --git a/crypto/aes_generic.c b/crypto/aes_generic.c
index 136dc98d8a03..b8b66ec3883b 100644
--- a/crypto/aes_generic.c
+++ b/crypto/aes_generic.c
@@ -60,102 +60,1068 @@ static inline u8 byte(const u32 x, const unsigned n)
 	return x >> (n << 3);
 }
 
-static u8 pow_tab[256] __initdata;
-static u8 log_tab[256] __initdata;
-static u8 sbx_tab[256] __initdata;
-static u8 isb_tab[256] __initdata;
-static u32 rco_tab[10];
-
-u32 crypto_ft_tab[4][256];
-u32 crypto_fl_tab[4][256];
-u32 crypto_it_tab[4][256];
-u32 crypto_il_tab[4][256];
-
-EXPORT_SYMBOL_GPL(crypto_ft_tab);
-EXPORT_SYMBOL_GPL(crypto_fl_tab);
-EXPORT_SYMBOL_GPL(crypto_it_tab);
-EXPORT_SYMBOL_GPL(crypto_il_tab);
-
-static inline u8 __init f_mult(u8 a, u8 b)
-{
-	u8 aa = log_tab[a], cc = aa + log_tab[b];
-
-	return pow_tab[cc + (cc < aa ? 1 : 0)];
-}
-
-#define ff_mult(a, b)	(a && b ? f_mult(a, b) : 0)
-
-static void __init gen_tabs(void)
-{
-	u32 i, t;
-	u8 p, q;
-
-	/*
-	 * log and power tables for GF(2**8) finite field with
-	 * 0x011b as modular polynomial - the simplest primitive
-	 * root is 0x03, used here to generate the tables
-	 */
-
-	for (i = 0, p = 1; i < 256; ++i) {
-		pow_tab[i] = (u8) p;
-		log_tab[p] = (u8) i;
-
-		p ^= (p << 1) ^ (p & 0x80 ? 0x01b : 0);
+static const u32 rco_tab[10] = { 1, 2, 4, 8, 16, 32, 64, 128, 27, 54 };
+
+const u32 crypto_ft_tab[4][256] = {
+	{
+		0xa56363c6, 0x847c7cf8, 0x997777ee, 0x8d7b7bf6,
+		0x0df2f2ff, 0xbd6b6bd6, 0xb16f6fde, 0x54c5c591,
+		0x50303060, 0x03010102, 0xa96767ce, 0x7d2b2b56,
+		0x19fefee7, 0x62d7d7b5, 0xe6abab4d, 0x9a7676ec,
+		0x45caca8f, 0x9d82821f, 0x40c9c989, 0x877d7dfa,
+		0x15fafaef, 0xeb5959b2, 0xc947478e, 0x0bf0f0fb,
+		0xecadad41, 0x67d4d4b3, 0xfda2a25f, 0xeaafaf45,
+		0xbf9c9c23, 0xf7a4a453, 0x967272e4, 0x5bc0c09b,
+		0xc2b7b775, 0x1cfdfde1, 0xae93933d, 0x6a26264c,
+		0x5a36366c, 0x413f3f7e, 0x02f7f7f5, 0x4fcccc83,
+		0x5c343468, 0xf4a5a551, 0x34e5e5d1, 0x08f1f1f9,
+		0x937171e2, 0x73d8d8ab, 0x53313162, 0x3f15152a,
+		0x0c040408, 0x52c7c795, 0x65232346, 0x5ec3c39d,
+		0x28181830, 0xa1969637, 0x0f05050a, 0xb59a9a2f,
+		0x0907070e, 0x36121224, 0x9b80801b, 0x3de2e2df,
+		0x26ebebcd, 0x6927274e, 0xcdb2b27f, 0x9f7575ea,
+		0x1b090912, 0x9e83831d, 0x742c2c58, 0x2e1a1a34,
+		0x2d1b1b36, 0xb26e6edc, 0xee5a5ab4, 0xfba0a05b,
+		0xf65252a4, 0x4d3b3b76, 0x61d6d6b7, 0xceb3b37d,
+		0x7b292952, 0x3ee3e3dd, 0x712f2f5e, 0x97848413,
+		0xf55353a6, 0x68d1d1b9, 0x00000000, 0x2cededc1,
+		0x60202040, 0x1ffcfce3, 0xc8b1b179, 0xed5b5bb6,
+		0xbe6a6ad4, 0x46cbcb8d, 0xd9bebe67, 0x4b393972,
+		0xde4a4a94, 0xd44c4c98, 0xe85858b0, 0x4acfcf85,
+		0x6bd0d0bb, 0x2aefefc5, 0xe5aaaa4f, 0x16fbfbed,
+		0xc5434386, 0xd74d4d9a, 0x55333366, 0x94858511,
+		0xcf45458a, 0x10f9f9e9, 0x06020204, 0x817f7ffe,
+		0xf05050a0, 0x443c3c78, 0xba9f9f25, 0xe3a8a84b,
+		0xf35151a2, 0xfea3a35d, 0xc0404080, 0x8a8f8f05,
+		0xad92923f, 0xbc9d9d21, 0x48383870, 0x04f5f5f1,
+		0xdfbcbc63, 0xc1b6b677, 0x75dadaaf, 0x63212142,
+		0x30101020, 0x1affffe5, 0x0ef3f3fd, 0x6dd2d2bf,
+		0x4ccdcd81, 0x140c0c18, 0x35131326, 0x2fececc3,
+		0xe15f5fbe, 0xa2979735, 0xcc444488, 0x3917172e,
+		0x57c4c493, 0xf2a7a755, 0x827e7efc, 0x473d3d7a,
+		0xac6464c8, 0xe75d5dba, 0x2b191932, 0x957373e6,
+		0xa06060c0, 0x98818119, 0xd14f4f9e, 0x7fdcdca3,
+		0x66222244, 0x7e2a2a54, 0xab90903b, 0x8388880b,
+		0xca46468c, 0x29eeeec7, 0xd3b8b86b, 0x3c141428,
+		0x79dedea7, 0xe25e5ebc, 0x1d0b0b16, 0x76dbdbad,
+		0x3be0e0db, 0x56323264, 0x4e3a3a74, 0x1e0a0a14,
+		0xdb494992, 0x0a06060c, 0x6c242448, 0xe45c5cb8,
+		0x5dc2c29f, 0x6ed3d3bd, 0xefacac43, 0xa66262c4,
+		0xa8919139, 0xa4959531, 0x37e4e4d3, 0x8b7979f2,
+		0x32e7e7d5, 0x43c8c88b, 0x5937376e, 0xb76d6dda,
+		0x8c8d8d01, 0x64d5d5b1, 0xd24e4e9c, 0xe0a9a949,
+		0xb46c6cd8, 0xfa5656ac, 0x07f4f4f3, 0x25eaeacf,
+		0xaf6565ca, 0x8e7a7af4, 0xe9aeae47, 0x18080810,
+		0xd5baba6f, 0x887878f0, 0x6f25254a, 0x722e2e5c,
+		0x241c1c38, 0xf1a6a657, 0xc7b4b473, 0x51c6c697,
+		0x23e8e8cb, 0x7cdddda1, 0x9c7474e8, 0x211f1f3e,
+		0xdd4b4b96, 0xdcbdbd61, 0x868b8b0d, 0x858a8a0f,
+		0x907070e0, 0x423e3e7c, 0xc4b5b571, 0xaa6666cc,
+		0xd8484890, 0x05030306, 0x01f6f6f7, 0x120e0e1c,
+		0xa36161c2, 0x5f35356a, 0xf95757ae, 0xd0b9b969,
+		0x91868617, 0x58c1c199, 0x271d1d3a, 0xb99e9e27,
+		0x38e1e1d9, 0x13f8f8eb, 0xb398982b, 0x33111122,
+		0xbb6969d2, 0x70d9d9a9, 0x898e8e07, 0xa7949433,
+		0xb69b9b2d, 0x221e1e3c, 0x92878715, 0x20e9e9c9,
+		0x49cece87, 0xff5555aa, 0x78282850, 0x7adfdfa5,
+		0x8f8c8c03, 0xf8a1a159, 0x80898909, 0x170d0d1a,
+		0xdabfbf65, 0x31e6e6d7, 0xc6424284, 0xb86868d0,
+		0xc3414182, 0xb0999929, 0x772d2d5a, 0x110f0f1e,
+		0xcbb0b07b, 0xfc5454a8, 0xd6bbbb6d, 0x3a16162c,
+	}, {
+		0x6363c6a5, 0x7c7cf884, 0x7777ee99, 0x7b7bf68d,
+		0xf2f2ff0d, 0x6b6bd6bd, 0x6f6fdeb1, 0xc5c59154,
+		0x30306050, 0x01010203, 0x6767cea9, 0x2b2b567d,
+		0xfefee719, 0xd7d7b562, 0xabab4de6, 0x7676ec9a,
+		0xcaca8f45, 0x82821f9d, 0xc9c98940, 0x7d7dfa87,
+		0xfafaef15, 0x5959b2eb, 0x47478ec9, 0xf0f0fb0b,
+		0xadad41ec, 0xd4d4b367, 0xa2a25ffd, 0xafaf45ea,
+		0x9c9c23bf, 0xa4a453f7, 0x7272e496, 0xc0c09b5b,
+		0xb7b775c2, 0xfdfde11c, 0x93933dae, 0x26264c6a,
+		0x36366c5a, 0x3f3f7e41, 0xf7f7f502, 0xcccc834f,
+		0x3434685c, 0xa5a551f4, 0xe5e5d134, 0xf1f1f908,
+		0x7171e293, 0xd8d8ab73, 0x31316253, 0x15152a3f,
+		0x0404080c, 0xc7c79552, 0x23234665, 0xc3c39d5e,
+		0x18183028, 0x969637a1, 0x05050a0f, 0x9a9a2fb5,
+		0x07070e09, 0x12122436, 0x80801b9b, 0xe2e2df3d,
+		0xebebcd26, 0x27274e69, 0xb2b27fcd, 0x7575ea9f,
+		0x0909121b, 0x83831d9e, 0x2c2c5874, 0x1a1a342e,
+		0x1b1b362d, 0x6e6edcb2, 0x5a5ab4ee, 0xa0a05bfb,
+		0x5252a4f6, 0x3b3b764d, 0xd6d6b761, 0xb3b37dce,
+		0x2929527b, 0xe3e3dd3e, 0x2f2f5e71, 0x84841397,
+		0x5353a6f5, 0xd1d1b968, 0x00000000, 0xededc12c,
+		0x20204060, 0xfcfce31f, 0xb1b179c8, 0x5b5bb6ed,
+		0x6a6ad4be, 0xcbcb8d46, 0xbebe67d9, 0x3939724b,
+		0x4a4a94de, 0x4c4c98d4, 0x5858b0e8, 0xcfcf854a,
+		0xd0d0bb6b, 0xefefc52a, 0xaaaa4fe5, 0xfbfbed16,
+		0x434386c5, 0x4d4d9ad7, 0x33336655, 0x85851194,
+		0x45458acf, 0xf9f9e910, 0x02020406, 0x7f7ffe81,
+		0x5050a0f0, 0x3c3c7844, 0x9f9f25ba, 0xa8a84be3,
+		0x5151a2f3, 0xa3a35dfe, 0x404080c0, 0x8f8f058a,
+		0x92923fad, 0x9d9d21bc, 0x38387048, 0xf5f5f104,
+		0xbcbc63df, 0xb6b677c1, 0xdadaaf75, 0x21214263,
+		0x10102030, 0xffffe51a, 0xf3f3fd0e, 0xd2d2bf6d,
+		0xcdcd814c, 0x0c0c1814, 0x13132635, 0xececc32f,
+		0x5f5fbee1, 0x979735a2, 0x444488cc, 0x17172e39,
+		0xc4c49357, 0xa7a755f2, 0x7e7efc82, 0x3d3d7a47,
+		0x6464c8ac, 0x5d5dbae7, 0x1919322b, 0x7373e695,
+		0x6060c0a0, 0x81811998, 0x4f4f9ed1, 0xdcdca37f,
+		0x22224466, 0x2a2a547e, 0x90903bab, 0x88880b83,
+		0x46468cca, 0xeeeec729, 0xb8b86bd3, 0x1414283c,
+		0xdedea779, 0x5e5ebce2, 0x0b0b161d, 0xdbdbad76,
+		0xe0e0db3b, 0x32326456, 0x3a3a744e, 0x0a0a141e,
+		0x494992db, 0x06060c0a, 0x2424486c, 0x5c5cb8e4,
+		0xc2c29f5d, 0xd3d3bd6e, 0xacac43ef, 0x6262c4a6,
+		0x919139a8, 0x959531a4, 0xe4e4d337, 0x7979f28b,
+		0xe7e7d532, 0xc8c88b43, 0x37376e59, 0x6d6ddab7,
+		0x8d8d018c, 0xd5d5b164, 0x4e4e9cd2, 0xa9a949e0,
+		0x6c6cd8b4, 0x5656acfa, 0xf4f4f307, 0xeaeacf25,
+		0x6565caaf, 0x7a7af48e, 0xaeae47e9, 0x08081018,
+		0xbaba6fd5, 0x7878f088, 0x25254a6f, 0x2e2e5c72,
+		0x1c1c3824, 0xa6a657f1, 0xb4b473c7, 0xc6c69751,
+		0xe8e8cb23, 0xdddda17c, 0x7474e89c, 0x1f1f3e21,
+		0x4b4b96dd, 0xbdbd61dc, 0x8b8b0d86, 0x8a8a0f85,
+		0x7070e090, 0x3e3e7c42, 0xb5b571c4, 0x6666ccaa,
+		0x484890d8, 0x03030605, 0xf6f6f701, 0x0e0e1c12,
+		0x6161c2a3, 0x35356a5f, 0x5757aef9, 0xb9b969d0,
+		0x86861791, 0xc1c19958, 0x1d1d3a27, 0x9e9e27b9,
+		0xe1e1d938, 0xf8f8eb13, 0x98982bb3, 0x11112233,
+		0x6969d2bb, 0xd9d9a970, 0x8e8e0789, 0x949433a7,
+		0x9b9b2db6, 0x1e1e3c22, 0x87871592, 0xe9e9c920,
+		0xcece8749, 0x5555aaff, 0x28285078, 0xdfdfa57a,
+		0x8c8c038f, 0xa1a159f8, 0x89890980, 0x0d0d1a17,
+		0xbfbf65da, 0xe6e6d731, 0x424284c6, 0x6868d0b8,
+		0x414182c3, 0x999929b0, 0x2d2d5a77, 0x0f0f1e11,
+		0xb0b07bcb, 0x5454a8fc, 0xbbbb6dd6, 0x16162c3a,
+	}, {
+		0x63c6a563, 0x7cf8847c, 0x77ee9977, 0x7bf68d7b,
+		0xf2ff0df2, 0x6bd6bd6b, 0x6fdeb16f, 0xc59154c5,
+		0x30605030, 0x01020301, 0x67cea967, 0x2b567d2b,
+		0xfee719fe, 0xd7b562d7, 0xab4de6ab, 0x76ec9a76,
+		0xca8f45ca, 0x821f9d82, 0xc98940c9, 0x7dfa877d,
+		0xfaef15fa, 0x59b2eb59, 0x478ec947, 0xf0fb0bf0,
+		0xad41ecad, 0xd4b367d4, 0xa25ffda2, 0xaf45eaaf,
+		0x9c23bf9c, 0xa453f7a4, 0x72e49672, 0xc09b5bc0,
+		0xb775c2b7, 0xfde11cfd, 0x933dae93, 0x264c6a26,
+		0x366c5a36, 0x3f7e413f, 0xf7f502f7, 0xcc834fcc,
+		0x34685c34, 0xa551f4a5, 0xe5d134e5, 0xf1f908f1,
+		0x71e29371, 0xd8ab73d8, 0x31625331, 0x152a3f15,
+		0x04080c04, 0xc79552c7, 0x23466523, 0xc39d5ec3,
+		0x18302818, 0x9637a196, 0x050a0f05, 0x9a2fb59a,
+		0x070e0907, 0x12243612, 0x801b9b80, 0xe2df3de2,
+		0xebcd26eb, 0x274e6927, 0xb27fcdb2, 0x75ea9f75,
+		0x09121b09, 0x831d9e83, 0x2c58742c, 0x1a342e1a,
+		0x1b362d1b, 0x6edcb26e, 0x5ab4ee5a, 0xa05bfba0,
+		0x52a4f652, 0x3b764d3b, 0xd6b761d6, 0xb37dceb3,
+		0x29527b29, 0xe3dd3ee3, 0x2f5e712f, 0x84139784,
+		0x53a6f553, 0xd1b968d1, 0x00000000, 0xedc12ced,
+		0x20406020, 0xfce31ffc, 0xb179c8b1, 0x5bb6ed5b,
+		0x6ad4be6a, 0xcb8d46cb, 0xbe67d9be, 0x39724b39,
+		0x4a94de4a, 0x4c98d44c, 0x58b0e858, 0xcf854acf,
+		0xd0bb6bd0, 0xefc52aef, 0xaa4fe5aa, 0xfbed16fb,
+		0x4386c543, 0x4d9ad74d, 0x33665533, 0x85119485,
+		0x458acf45, 0xf9e910f9, 0x02040602, 0x7ffe817f,
+		0x50a0f050, 0x3c78443c, 0x9f25ba9f, 0xa84be3a8,
+		0x51a2f351, 0xa35dfea3, 0x4080c040, 0x8f058a8f,
+		0x923fad92, 0x9d21bc9d, 0x38704838, 0xf5f104f5,
+		0xbc63dfbc, 0xb677c1b6, 0xdaaf75da, 0x21426321,
+		0x10203010, 0xffe51aff, 0xf3fd0ef3, 0xd2bf6dd2,
+		0xcd814ccd, 0x0c18140c, 0x13263513, 0xecc32fec,
+		0x5fbee15f, 0x9735a297, 0x4488cc44, 0x172e3917,
+		0xc49357c4, 0xa755f2a7, 0x7efc827e, 0x3d7a473d,
+		0x64c8ac64, 0x5dbae75d, 0x19322b19, 0x73e69573,
+		0x60c0a060, 0x81199881, 0x4f9ed14f, 0xdca37fdc,
+		0x22446622, 0x2a547e2a, 0x903bab90, 0x880b8388,
+		0x468cca46, 0xeec729ee, 0xb86bd3b8, 0x14283c14,
+		0xdea779de, 0x5ebce25e, 0x0b161d0b, 0xdbad76db,
+		0xe0db3be0, 0x32645632, 0x3a744e3a, 0x0a141e0a,
+		0x4992db49, 0x060c0a06, 0x24486c24, 0x5cb8e45c,
+		0xc29f5dc2, 0xd3bd6ed3, 0xac43efac, 0x62c4a662,
+		0x9139a891, 0x9531a495, 0xe4d337e4, 0x79f28b79,
+		0xe7d532e7, 0xc88b43c8, 0x376e5937, 0x6ddab76d,
+		0x8d018c8d, 0xd5b164d5, 0x4e9cd24e, 0xa949e0a9,
+		0x6cd8b46c, 0x56acfa56, 0xf4f307f4, 0xeacf25ea,
+		0x65caaf65, 0x7af48e7a, 0xae47e9ae, 0x08101808,
+		0xba6fd5ba, 0x78f08878, 0x254a6f25, 0x2e5c722e,
+		0x1c38241c, 0xa657f1a6, 0xb473c7b4, 0xc69751c6,
+		0xe8cb23e8, 0xdda17cdd, 0x74e89c74, 0x1f3e211f,
+		0x4b96dd4b, 0xbd61dcbd, 0x8b0d868b, 0x8a0f858a,
+		0x70e09070, 0x3e7c423e, 0xb571c4b5, 0x66ccaa66,
+		0x4890d848, 0x03060503, 0xf6f701f6, 0x0e1c120e,
+		0x61c2a361, 0x356a5f35, 0x57aef957, 0xb969d0b9,
+		0x86179186, 0xc19958c1, 0x1d3a271d, 0x9e27b99e,
+		0xe1d938e1, 0xf8eb13f8, 0x982bb398, 0x11223311,
+		0x69d2bb69, 0xd9a970d9, 0x8e07898e, 0x9433a794,
+		0x9b2db69b, 0x1e3c221e, 0x87159287, 0xe9c920e9,
+		0xce8749ce, 0x55aaff55, 0x28507828, 0xdfa57adf,
+		0x8c038f8c, 0xa159f8a1, 0x89098089, 0x0d1a170d,
+		0xbf65dabf, 0xe6d731e6, 0x4284c642, 0x68d0b868,
+		0x4182c341, 0x9929b099, 0x2d5a772d, 0x0f1e110f,
+		0xb07bcbb0, 0x54a8fc54, 0xbb6dd6bb, 0x162c3a16,
+	}, {
+		0xc6a56363, 0xf8847c7c, 0xee997777, 0xf68d7b7b,
+		0xff0df2f2, 0xd6bd6b6b, 0xdeb16f6f, 0x9154c5c5,
+		0x60503030, 0x02030101, 0xcea96767, 0x567d2b2b,
+		0xe719fefe, 0xb562d7d7, 0x4de6abab, 0xec9a7676,
+		0x8f45caca, 0x1f9d8282, 0x8940c9c9, 0xfa877d7d,
+		0xef15fafa, 0xb2eb5959, 0x8ec94747, 0xfb0bf0f0,
+		0x41ecadad, 0xb367d4d4, 0x5ffda2a2, 0x45eaafaf,
+		0x23bf9c9c, 0x53f7a4a4, 0xe4967272, 0x9b5bc0c0,
+		0x75c2b7b7, 0xe11cfdfd, 0x3dae9393, 0x4c6a2626,
+		0x6c5a3636, 0x7e413f3f, 0xf502f7f7, 0x834fcccc,
+		0x685c3434, 0x51f4a5a5, 0xd134e5e5, 0xf908f1f1,
+		0xe2937171, 0xab73d8d8, 0x62533131, 0x2a3f1515,
+		0x080c0404, 0x9552c7c7, 0x46652323, 0x9d5ec3c3,
+		0x30281818, 0x37a19696, 0x0a0f0505, 0x2fb59a9a,
+		0x0e090707, 0x24361212, 0x1b9b8080, 0xdf3de2e2,
+		0xcd26ebeb, 0x4e692727, 0x7fcdb2b2, 0xea9f7575,
+		0x121b0909, 0x1d9e8383, 0x58742c2c, 0x342e1a1a,
+		0x362d1b1b, 0xdcb26e6e, 0xb4ee5a5a, 0x5bfba0a0,
+		0xa4f65252, 0x764d3b3b, 0xb761d6d6, 0x7dceb3b3,
+		0x527b2929, 0xdd3ee3e3, 0x5e712f2f, 0x13978484,
+		0xa6f55353, 0xb968d1d1, 0x00000000, 0xc12ceded,
+		0x40602020, 0xe31ffcfc, 0x79c8b1b1, 0xb6ed5b5b,
+		0xd4be6a6a, 0x8d46cbcb, 0x67d9bebe, 0x724b3939,
+		0x94de4a4a, 0x98d44c4c, 0xb0e85858, 0x854acfcf,
+		0xbb6bd0d0, 0xc52aefef, 0x4fe5aaaa, 0xed16fbfb,
+		0x86c54343, 0x9ad74d4d, 0x66553333, 0x11948585,
+		0x8acf4545, 0xe910f9f9, 0x04060202, 0xfe817f7f,
+		0xa0f05050, 0x78443c3c, 0x25ba9f9f, 0x4be3a8a8,
+		0xa2f35151, 0x5dfea3a3, 0x80c04040, 0x058a8f8f,
+		0x3fad9292, 0x21bc9d9d, 0x70483838, 0xf104f5f5,
+		0x63dfbcbc, 0x77c1b6b6, 0xaf75dada, 0x42632121,
+		0x20301010, 0xe51affff, 0xfd0ef3f3, 0xbf6dd2d2,
+		0x814ccdcd, 0x18140c0c, 0x26351313, 0xc32fecec,
+		0xbee15f5f, 0x35a29797, 0x88cc4444, 0x2e391717,
+		0x9357c4c4, 0x55f2a7a7, 0xfc827e7e, 0x7a473d3d,
+		0xc8ac6464, 0xbae75d5d, 0x322b1919, 0xe6957373,
+		0xc0a06060, 0x19988181, 0x9ed14f4f, 0xa37fdcdc,
+		0x44662222, 0x547e2a2a, 0x3bab9090, 0x0b838888,
+		0x8cca4646, 0xc729eeee, 0x6bd3b8b8, 0x283c1414,
+		0xa779dede, 0xbce25e5e, 0x161d0b0b, 0xad76dbdb,
+		0xdb3be0e0, 0x64563232, 0x744e3a3a, 0x141e0a0a,
+		0x92db4949, 0x0c0a0606, 0x486c2424, 0xb8e45c5c,
+		0x9f5dc2c2, 0xbd6ed3d3, 0x43efacac, 0xc4a66262,
+		0x39a89191, 0x31a49595, 0xd337e4e4, 0xf28b7979,
+		0xd532e7e7, 0x8b43c8c8, 0x6e593737, 0xdab76d6d,
+		0x018c8d8d, 0xb164d5d5, 0x9cd24e4e, 0x49e0a9a9,
+		0xd8b46c6c, 0xacfa5656, 0xf307f4f4, 0xcf25eaea,
+		0xcaaf6565, 0xf48e7a7a, 0x47e9aeae, 0x10180808,
+		0x6fd5baba, 0xf0887878, 0x4a6f2525, 0x5c722e2e,
+		0x38241c1c, 0x57f1a6a6, 0x73c7b4b4, 0x9751c6c6,
+		0xcb23e8e8, 0xa17cdddd, 0xe89c7474, 0x3e211f1f,
+		0x96dd4b4b, 0x61dcbdbd, 0x0d868b8b, 0x0f858a8a,
+		0xe0907070, 0x7c423e3e, 0x71c4b5b5, 0xccaa6666,
+		0x90d84848, 0x06050303, 0xf701f6f6, 0x1c120e0e,
+		0xc2a36161, 0x6a5f3535, 0xaef95757, 0x69d0b9b9,
+		0x17918686, 0x9958c1c1, 0x3a271d1d, 0x27b99e9e,
+		0xd938e1e1, 0xeb13f8f8, 0x2bb39898, 0x22331111,
+		0xd2bb6969, 0xa970d9d9, 0x07898e8e, 0x33a79494,
+		0x2db69b9b, 0x3c221e1e, 0x15928787, 0xc920e9e9,
+		0x8749cece, 0xaaff5555, 0x50782828, 0xa57adfdf,
+		0x038f8c8c, 0x59f8a1a1, 0x09808989, 0x1a170d0d,
+		0x65dabfbf, 0xd731e6e6, 0x84c64242, 0xd0b86868,
+		0x82c34141, 0x29b09999, 0x5a772d2d, 0x1e110f0f,
+		0x7bcbb0b0, 0xa8fc5454, 0x6dd6bbbb, 0x2c3a1616,
 	}
+};
 
-	log_tab[1] = 0;
-
-	for (i = 0, p = 1; i < 10; ++i) {
-		rco_tab[i] = p;
-
-		p = (p << 1) ^ (p & 0x80 ? 0x01b : 0);
+const u32 crypto_fl_tab[4][256] = {
+	{
+		0x00000063, 0x0000007c, 0x00000077, 0x0000007b,
+		0x000000f2, 0x0000006b, 0x0000006f, 0x000000c5,
+		0x00000030, 0x00000001, 0x00000067, 0x0000002b,
+		0x000000fe, 0x000000d7, 0x000000ab, 0x00000076,
+		0x000000ca, 0x00000082, 0x000000c9, 0x0000007d,
+		0x000000fa, 0x00000059, 0x00000047, 0x000000f0,
+		0x000000ad, 0x000000d4, 0x000000a2, 0x000000af,
+		0x0000009c, 0x000000a4, 0x00000072, 0x000000c0,
+		0x000000b7, 0x000000fd, 0x00000093, 0x00000026,
+		0x00000036, 0x0000003f, 0x000000f7, 0x000000cc,
+		0x00000034, 0x000000a5, 0x000000e5, 0x000000f1,
+		0x00000071, 0x000000d8, 0x00000031, 0x00000015,
+		0x00000004, 0x000000c7, 0x00000023, 0x000000c3,
+		0x00000018, 0x00000096, 0x00000005, 0x0000009a,
+		0x00000007, 0x00000012, 0x00000080, 0x000000e2,
+		0x000000eb, 0x00000027, 0x000000b2, 0x00000075,
+		0x00000009, 0x00000083, 0x0000002c, 0x0000001a,
+		0x0000001b, 0x0000006e, 0x0000005a, 0x000000a0,
+		0x00000052, 0x0000003b, 0x000000d6, 0x000000b3,
+		0x00000029, 0x000000e3, 0x0000002f, 0x00000084,
+		0x00000053, 0x000000d1, 0x00000000, 0x000000ed,
+		0x00000020, 0x000000fc, 0x000000b1, 0x0000005b,
+		0x0000006a, 0x000000cb, 0x000000be, 0x00000039,
+		0x0000004a, 0x0000004c, 0x00000058, 0x000000cf,
+		0x000000d0, 0x000000ef, 0x000000aa, 0x000000fb,
+		0x00000043, 0x0000004d, 0x00000033, 0x00000085,
+		0x00000045, 0x000000f9, 0x00000002, 0x0000007f,
+		0x00000050, 0x0000003c, 0x0000009f, 0x000000a8,
+		0x00000051, 0x000000a3, 0x00000040, 0x0000008f,
+		0x00000092, 0x0000009d, 0x00000038, 0x000000f5,
+		0x000000bc, 0x000000b6, 0x000000da, 0x00000021,
+		0x00000010, 0x000000ff, 0x000000f3, 0x000000d2,
+		0x000000cd, 0x0000000c, 0x00000013, 0x000000ec,
+		0x0000005f, 0x00000097, 0x00000044, 0x00000017,
+		0x000000c4, 0x000000a7, 0x0000007e, 0x0000003d,
+		0x00000064, 0x0000005d, 0x00000019, 0x00000073,
+		0x00000060, 0x00000081, 0x0000004f, 0x000000dc,
+		0x00000022, 0x0000002a, 0x00000090, 0x00000088,
+		0x00000046, 0x000000ee, 0x000000b8, 0x00000014,
+		0x000000de, 0x0000005e, 0x0000000b, 0x000000db,
+		0x000000e0, 0x00000032, 0x0000003a, 0x0000000a,
+		0x00000049, 0x00000006, 0x00000024, 0x0000005c,
+		0x000000c2, 0x000000d3, 0x000000ac, 0x00000062,
+		0x00000091, 0x00000095, 0x000000e4, 0x00000079,
+		0x000000e7, 0x000000c8, 0x00000037, 0x0000006d,
+		0x0000008d, 0x000000d5, 0x0000004e, 0x000000a9,
+		0x0000006c, 0x00000056, 0x000000f4, 0x000000ea,
+		0x00000065, 0x0000007a, 0x000000ae, 0x00000008,
+		0x000000ba, 0x00000078, 0x00000025, 0x0000002e,
+		0x0000001c, 0x000000a6, 0x000000b4, 0x000000c6,
+		0x000000e8, 0x000000dd, 0x00000074, 0x0000001f,
+		0x0000004b, 0x000000bd, 0x0000008b, 0x0000008a,
+		0x00000070, 0x0000003e, 0x000000b5, 0x00000066,
+		0x00000048, 0x00000003, 0x000000f6, 0x0000000e,
+		0x00000061, 0x00000035, 0x00000057, 0x000000b9,
+		0x00000086, 0x000000c1, 0x0000001d, 0x0000009e,
+		0x000000e1, 0x000000f8, 0x00000098, 0x00000011,
+		0x00000069, 0x000000d9, 0x0000008e, 0x00000094,
+		0x0000009b, 0x0000001e, 0x00000087, 0x000000e9,
+		0x000000ce, 0x00000055, 0x00000028, 0x000000df,
+		0x0000008c, 0x000000a1, 0x00000089, 0x0000000d,
+		0x000000bf, 0x000000e6, 0x00000042, 0x00000068,
+		0x00000041, 0x00000099, 0x0000002d, 0x0000000f,
+		0x000000b0, 0x00000054, 0x000000bb, 0x00000016,
+	}, {
+		0x00006300, 0x00007c00, 0x00007700, 0x00007b00,
+		0x0000f200, 0x00006b00, 0x00006f00, 0x0000c500,
+		0x00003000, 0x00000100, 0x00006700, 0x00002b00,
+		0x0000fe00, 0x0000d700, 0x0000ab00, 0x00007600,
+		0x0000ca00, 0x00008200, 0x0000c900, 0x00007d00,
+		0x0000fa00, 0x00005900, 0x00004700, 0x0000f000,
+		0x0000ad00, 0x0000d400, 0x0000a200, 0x0000af00,
+		0x00009c00, 0x0000a400, 0x00007200, 0x0000c000,
+		0x0000b700, 0x0000fd00, 0x00009300, 0x00002600,
+		0x00003600, 0x00003f00, 0x0000f700, 0x0000cc00,
+		0x00003400, 0x0000a500, 0x0000e500, 0x0000f100,
+		0x00007100, 0x0000d800, 0x00003100, 0x00001500,
+		0x00000400, 0x0000c700, 0x00002300, 0x0000c300,
+		0x00001800, 0x00009600, 0x00000500, 0x00009a00,
+		0x00000700, 0x00001200, 0x00008000, 0x0000e200,
+		0x0000eb00, 0x00002700, 0x0000b200, 0x00007500,
+		0x00000900, 0x00008300, 0x00002c00, 0x00001a00,
+		0x00001b00, 0x00006e00, 0x00005a00, 0x0000a000,
+		0x00005200, 0x00003b00, 0x0000d600, 0x0000b300,
+		0x00002900, 0x0000e300, 0x00002f00, 0x00008400,
+		0x00005300, 0x0000d100, 0x00000000, 0x0000ed00,
+		0x00002000, 0x0000fc00, 0x0000b100, 0x00005b00,
+		0x00006a00, 0x0000cb00, 0x0000be00, 0x00003900,
+		0x00004a00, 0x00004c00, 0x00005800, 0x0000cf00,
+		0x0000d000, 0x0000ef00, 0x0000aa00, 0x0000fb00,
+		0x00004300, 0x00004d00, 0x00003300, 0x00008500,
+		0x00004500, 0x0000f900, 0x00000200, 0x00007f00,
+		0x00005000, 0x00003c00, 0x00009f00, 0x0000a800,
+		0x00005100, 0x0000a300, 0x00004000, 0x00008f00,
+		0x00009200, 0x00009d00, 0x00003800, 0x0000f500,
+		0x0000bc00, 0x0000b600, 0x0000da00, 0x00002100,
+		0x00001000, 0x0000ff00, 0x0000f300, 0x0000d200,
+		0x0000cd00, 0x00000c00, 0x00001300, 0x0000ec00,
+		0x00005f00, 0x00009700, 0x00004400, 0x00001700,
+		0x0000c400, 0x0000a700, 0x00007e00, 0x00003d00,
+		0x00006400, 0x00005d00, 0x00001900, 0x00007300,
+		0x00006000, 0x00008100, 0x00004f00, 0x0000dc00,
+		0x00002200, 0x00002a00, 0x00009000, 0x00008800,
+		0x00004600, 0x0000ee00, 0x0000b800, 0x00001400,
+		0x0000de00, 0x00005e00, 0x00000b00, 0x0000db00,
+		0x0000e000, 0x00003200, 0x00003a00, 0x00000a00,
+		0x00004900, 0x00000600, 0x00002400, 0x00005c00,
+		0x0000c200, 0x0000d300, 0x0000ac00, 0x00006200,
+		0x00009100, 0x00009500, 0x0000e400, 0x00007900,
+		0x0000e700, 0x0000c800, 0x00003700, 0x00006d00,
+		0x00008d00, 0x0000d500, 0x00004e00, 0x0000a900,
+		0x00006c00, 0x00005600, 0x0000f400, 0x0000ea00,
+		0x00006500, 0x00007a00, 0x0000ae00, 0x00000800,
+		0x0000ba00, 0x00007800, 0x00002500, 0x00002e00,
+		0x00001c00, 0x0000a600, 0x0000b400, 0x0000c600,
+		0x0000e800, 0x0000dd00, 0x00007400, 0x00001f00,
+		0x00004b00, 0x0000bd00, 0x00008b00, 0x00008a00,
+		0x00007000, 0x00003e00, 0x0000b500, 0x00006600,
+		0x00004800, 0x00000300, 0x0000f600, 0x00000e00,
+		0x00006100, 0x00003500, 0x00005700, 0x0000b900,
+		0x00008600, 0x0000c100, 0x00001d00, 0x00009e00,
+		0x0000e100, 0x0000f800, 0x00009800, 0x00001100,
+		0x00006900, 0x0000d900, 0x00008e00, 0x00009400,
+		0x00009b00, 0x00001e00, 0x00008700, 0x0000e900,
+		0x0000ce00, 0x00005500, 0x00002800, 0x0000df00,
+		0x00008c00, 0x0000a100, 0x00008900, 0x00000d00,
+		0x0000bf00, 0x0000e600, 0x00004200, 0x00006800,
+		0x00004100, 0x00009900, 0x00002d00, 0x00000f00,
+		0x0000b000, 0x00005400, 0x0000bb00, 0x00001600,
+	}, {
+		0x00630000, 0x007c0000, 0x00770000, 0x007b0000,
+		0x00f20000, 0x006b0000, 0x006f0000, 0x00c50000,
+		0x00300000, 0x00010000, 0x00670000, 0x002b0000,
+		0x00fe0000, 0x00d70000, 0x00ab0000, 0x00760000,
+		0x00ca0000, 0x00820000, 0x00c90000, 0x007d0000,
+		0x00fa0000, 0x00590000, 0x00470000, 0x00f00000,
+		0x00ad0000, 0x00d40000, 0x00a20000, 0x00af0000,
+		0x009c0000, 0x00a40000, 0x00720000, 0x00c00000,
+		0x00b70000, 0x00fd0000, 0x00930000, 0x00260000,
+		0x00360000, 0x003f0000, 0x00f70000, 0x00cc0000,
+		0x00340000, 0x00a50000, 0x00e50000, 0x00f10000,
+		0x00710000, 0x00d80000, 0x00310000, 0x00150000,
+		0x00040000, 0x00c70000, 0x00230000, 0x00c30000,
+		0x00180000, 0x00960000, 0x00050000, 0x009a0000,
+		0x00070000, 0x00120000, 0x00800000, 0x00e20000,
+		0x00eb0000, 0x00270000, 0x00b20000, 0x00750000,
+		0x00090000, 0x00830000, 0x002c0000, 0x001a0000,
+		0x001b0000, 0x006e0000, 0x005a0000, 0x00a00000,
+		0x00520000, 0x003b0000, 0x00d60000, 0x00b30000,
+		0x00290000, 0x00e30000, 0x002f0000, 0x00840000,
+		0x00530000, 0x00d10000, 0x00000000, 0x00ed0000,
+		0x00200000, 0x00fc0000, 0x00b10000, 0x005b0000,
+		0x006a0000, 0x00cb0000, 0x00be0000, 0x00390000,
+		0x004a0000, 0x004c0000, 0x00580000, 0x00cf0000,
+		0x00d00000, 0x00ef0000, 0x00aa0000, 0x00fb0000,
+		0x00430000, 0x004d0000, 0x00330000, 0x00850000,
+		0x00450000, 0x00f90000, 0x00020000, 0x007f0000,
+		0x00500000, 0x003c0000, 0x009f0000, 0x00a80000,
+		0x00510000, 0x00a30000, 0x00400000, 0x008f0000,
+		0x00920000, 0x009d0000, 0x00380000, 0x00f50000,
+		0x00bc0000, 0x00b60000, 0x00da0000, 0x00210000,
+		0x00100000, 0x00ff0000, 0x00f30000, 0x00d20000,
+		0x00cd0000, 0x000c0000, 0x00130000, 0x00ec0000,
+		0x005f0000, 0x00970000, 0x00440000, 0x00170000,
+		0x00c40000, 0x00a70000, 0x007e0000, 0x003d0000,
+		0x00640000, 0x005d0000, 0x00190000, 0x00730000,
+		0x00600000, 0x00810000, 0x004f0000, 0x00dc0000,
+		0x00220000, 0x002a0000, 0x00900000, 0x00880000,
+		0x00460000, 0x00ee0000, 0x00b80000, 0x00140000,
+		0x00de0000, 0x005e0000, 0x000b0000, 0x00db0000,
+		0x00e00000, 0x00320000, 0x003a0000, 0x000a0000,
+		0x00490000, 0x00060000, 0x00240000, 0x005c0000,
+		0x00c20000, 0x00d30000, 0x00ac0000, 0x00620000,
+		0x00910000, 0x00950000, 0x00e40000, 0x00790000,
+		0x00e70000, 0x00c80000, 0x00370000, 0x006d0000,
+		0x008d0000, 0x00d50000, 0x004e0000, 0x00a90000,
+		0x006c0000, 0x00560000, 0x00f40000, 0x00ea0000,
+		0x00650000, 0x007a0000, 0x00ae0000, 0x00080000,
+		0x00ba0000, 0x00780000, 0x00250000, 0x002e0000,
+		0x001c0000, 0x00a60000, 0x00b40000, 0x00c60000,
+		0x00e80000, 0x00dd0000, 0x00740000, 0x001f0000,
+		0x004b0000, 0x00bd0000, 0x008b0000, 0x008a0000,
+		0x00700000, 0x003e0000, 0x00b50000, 0x00660000,
+		0x00480000, 0x00030000, 0x00f60000, 0x000e0000,
+		0x00610000, 0x00350000, 0x00570000, 0x00b90000,
+		0x00860000, 0x00c10000, 0x001d0000, 0x009e0000,
+		0x00e10000, 0x00f80000, 0x00980000, 0x00110000,
+		0x00690000, 0x00d90000, 0x008e0000, 0x00940000,
+		0x009b0000, 0x001e0000, 0x00870000, 0x00e90000,
+		0x00ce0000, 0x00550000, 0x00280000, 0x00df0000,
+		0x008c0000, 0x00a10000, 0x00890000, 0x000d0000,
+		0x00bf0000, 0x00e60000, 0x00420000, 0x00680000,
+		0x00410000, 0x00990000, 0x002d0000, 0x000f0000,
+		0x00b00000, 0x00540000, 0x00bb0000, 0x00160000,
+	}, {
+		0x63000000, 0x7c000000, 0x77000000, 0x7b000000,
+		0xf2000000, 0x6b000000, 0x6f000000, 0xc5000000,
+		0x30000000, 0x01000000, 0x67000000, 0x2b000000,
+		0xfe000000, 0xd7000000, 0xab000000, 0x76000000,
+		0xca000000, 0x82000000, 0xc9000000, 0x7d000000,
+		0xfa000000, 0x59000000, 0x47000000, 0xf0000000,
+		0xad000000, 0xd4000000, 0xa2000000, 0xaf000000,
+		0x9c000000, 0xa4000000, 0x72000000, 0xc0000000,
+		0xb7000000, 0xfd000000, 0x93000000, 0x26000000,
+		0x36000000, 0x3f000000, 0xf7000000, 0xcc000000,
+		0x34000000, 0xa5000000, 0xe5000000, 0xf1000000,
+		0x71000000, 0xd8000000, 0x31000000, 0x15000000,
+		0x04000000, 0xc7000000, 0x23000000, 0xc3000000,
+		0x18000000, 0x96000000, 0x05000000, 0x9a000000,
+		0x07000000, 0x12000000, 0x80000000, 0xe2000000,
+		0xeb000000, 0x27000000, 0xb2000000, 0x75000000,
+		0x09000000, 0x83000000, 0x2c000000, 0x1a000000,
+		0x1b000000, 0x6e000000, 0x5a000000, 0xa0000000,
+		0x52000000, 0x3b000000, 0xd6000000, 0xb3000000,
+		0x29000000, 0xe3000000, 0x2f000000, 0x84000000,
+		0x53000000, 0xd1000000, 0x00000000, 0xed000000,
+		0x20000000, 0xfc000000, 0xb1000000, 0x5b000000,
+		0x6a000000, 0xcb000000, 0xbe000000, 0x39000000,
+		0x4a000000, 0x4c000000, 0x58000000, 0xcf000000,
+		0xd0000000, 0xef000000, 0xaa000000, 0xfb000000,
+		0x43000000, 0x4d000000, 0x33000000, 0x85000000,
+		0x45000000, 0xf9000000, 0x02000000, 0x7f000000,
+		0x50000000, 0x3c000000, 0x9f000000, 0xa8000000,
+		0x51000000, 0xa3000000, 0x40000000, 0x8f000000,
+		0x92000000, 0x9d000000, 0x38000000, 0xf5000000,
+		0xbc000000, 0xb6000000, 0xda000000, 0x21000000,
+		0x10000000, 0xff000000, 0xf3000000, 0xd2000000,
+		0xcd000000, 0x0c000000, 0x13000000, 0xec000000,
+		0x5f000000, 0x97000000, 0x44000000, 0x17000000,
+		0xc4000000, 0xa7000000, 0x7e000000, 0x3d000000,
+		0x64000000, 0x5d000000, 0x19000000, 0x73000000,
+		0x60000000, 0x81000000, 0x4f000000, 0xdc000000,
+		0x22000000, 0x2a000000, 0x90000000, 0x88000000,
+		0x46000000, 0xee000000, 0xb8000000, 0x14000000,
+		0xde000000, 0x5e000000, 0x0b000000, 0xdb000000,
+		0xe0000000, 0x32000000, 0x3a000000, 0x0a000000,
+		0x49000000, 0x06000000, 0x24000000, 0x5c000000,
+		0xc2000000, 0xd3000000, 0xac000000, 0x62000000,
+		0x91000000, 0x95000000, 0xe4000000, 0x79000000,
+		0xe7000000, 0xc8000000, 0x37000000, 0x6d000000,
+		0x8d000000, 0xd5000000, 0x4e000000, 0xa9000000,
+		0x6c000000, 0x56000000, 0xf4000000, 0xea000000,
+		0x65000000, 0x7a000000, 0xae000000, 0x08000000,
+		0xba000000, 0x78000000, 0x25000000, 0x2e000000,
+		0x1c000000, 0xa6000000, 0xb4000000, 0xc6000000,
+		0xe8000000, 0xdd000000, 0x74000000, 0x1f000000,
+		0x4b000000, 0xbd000000, 0x8b000000, 0x8a000000,
+		0x70000000, 0x3e000000, 0xb5000000, 0x66000000,
+		0x48000000, 0x03000000, 0xf6000000, 0x0e000000,
+		0x61000000, 0x35000000, 0x57000000, 0xb9000000,
+		0x86000000, 0xc1000000, 0x1d000000, 0x9e000000,
+		0xe1000000, 0xf8000000, 0x98000000, 0x11000000,
+		0x69000000, 0xd9000000, 0x8e000000, 0x94000000,
+		0x9b000000, 0x1e000000, 0x87000000, 0xe9000000,
+		0xce000000, 0x55000000, 0x28000000, 0xdf000000,
+		0x8c000000, 0xa1000000, 0x89000000, 0x0d000000,
+		0xbf000000, 0xe6000000, 0x42000000, 0x68000000,
+		0x41000000, 0x99000000, 0x2d000000, 0x0f000000,
+		0xb0000000, 0x54000000, 0xbb000000, 0x16000000,
 	}
+};
 
-	for (i = 0; i < 256; ++i) {
-		p = (i ? pow_tab[255 - log_tab[i]] : 0);
-		q = ((p >> 7) | (p << 1)) ^ ((p >> 6) | (p << 2));
-		p ^= 0x63 ^ q ^ ((q >> 6) | (q << 2));
-		sbx_tab[i] = p;
-		isb_tab[p] = (u8) i;
+const u32 crypto_it_tab[4][256] = {
+	{
+		0x50a7f451, 0x5365417e, 0xc3a4171a, 0x965e273a,
+		0xcb6bab3b, 0xf1459d1f, 0xab58faac, 0x9303e34b,
+		0x55fa3020, 0xf66d76ad, 0x9176cc88, 0x254c02f5,
+		0xfcd7e54f, 0xd7cb2ac5, 0x80443526, 0x8fa362b5,
+		0x495ab1de, 0x671bba25, 0x980eea45, 0xe1c0fe5d,
+		0x02752fc3, 0x12f04c81, 0xa397468d, 0xc6f9d36b,
+		0xe75f8f03, 0x959c9215, 0xeb7a6dbf, 0xda595295,
+		0x2d83bed4, 0xd3217458, 0x2969e049, 0x44c8c98e,
+		0x6a89c275, 0x78798ef4, 0x6b3e5899, 0xdd71b927,
+		0xb64fe1be, 0x17ad88f0, 0x66ac20c9, 0xb43ace7d,
+		0x184adf63, 0x82311ae5, 0x60335197, 0x457f5362,
+		0xe07764b1, 0x84ae6bbb, 0x1ca081fe, 0x942b08f9,
+		0x58684870, 0x19fd458f, 0x876cde94, 0xb7f87b52,
+		0x23d373ab, 0xe2024b72, 0x578f1fe3, 0x2aab5566,
+		0x0728ebb2, 0x03c2b52f, 0x9a7bc586, 0xa50837d3,
+		0xf2872830, 0xb2a5bf23, 0xba6a0302, 0x5c8216ed,
+		0x2b1ccf8a, 0x92b479a7, 0xf0f207f3, 0xa1e2694e,
+		0xcdf4da65, 0xd5be0506, 0x1f6234d1, 0x8afea6c4,
+		0x9d532e34, 0xa055f3a2, 0x32e18a05, 0x75ebf6a4,
+		0x39ec830b, 0xaaef6040, 0x069f715e, 0x51106ebd,
+		0xf98a213e, 0x3d06dd96, 0xae053edd, 0x46bde64d,
+		0xb58d5491, 0x055dc471, 0x6fd40604, 0xff155060,
+		0x24fb9819, 0x97e9bdd6, 0xcc434089, 0x779ed967,
+		0xbd42e8b0, 0x888b8907, 0x385b19e7, 0xdbeec879,
+		0x470a7ca1, 0xe90f427c, 0xc91e84f8, 0x00000000,
+		0x83868009, 0x48ed2b32, 0xac70111e, 0x4e725a6c,
+		0xfbff0efd, 0x5638850f, 0x1ed5ae3d, 0x27392d36,
+		0x64d90f0a, 0x21a65c68, 0xd1545b9b, 0x3a2e3624,
+		0xb1670a0c, 0x0fe75793, 0xd296eeb4, 0x9e919b1b,
+		0x4fc5c080, 0xa220dc61, 0x694b775a, 0x161a121c,
+		0x0aba93e2, 0xe52aa0c0, 0x43e0223c, 0x1d171b12,
+		0x0b0d090e, 0xadc78bf2, 0xb9a8b62d, 0xc8a91e14,
+		0x8519f157, 0x4c0775af, 0xbbdd99ee, 0xfd607fa3,
+		0x9f2601f7, 0xbcf5725c, 0xc53b6644, 0x347efb5b,
+		0x7629438b, 0xdcc623cb, 0x68fcedb6, 0x63f1e4b8,
+		0xcadc31d7, 0x10856342, 0x40229713, 0x2011c684,
+		0x7d244a85, 0xf83dbbd2, 0x1132f9ae, 0x6da129c7,
+		0x4b2f9e1d, 0xf330b2dc, 0xec52860d, 0xd0e3c177,
+		0x6c16b32b, 0x99b970a9, 0xfa489411, 0x2264e947,
+		0xc48cfca8, 0x1a3ff0a0, 0xd82c7d56, 0xef903322,
+		0xc74e4987, 0xc1d138d9, 0xfea2ca8c, 0x360bd498,
+		0xcf81f5a6, 0x28de7aa5, 0x268eb7da, 0xa4bfad3f,
+		0xe49d3a2c, 0x0d927850, 0x9bcc5f6a, 0x62467e54,
+		0xc2138df6, 0xe8b8d890, 0x5ef7392e, 0xf5afc382,
+		0xbe805d9f, 0x7c93d069, 0xa92dd56f, 0xb31225cf,
+		0x3b99acc8, 0xa77d1810, 0x6e639ce8, 0x7bbb3bdb,
+		0x097826cd, 0xf418596e, 0x01b79aec, 0xa89a4f83,
+		0x656e95e6, 0x7ee6ffaa, 0x08cfbc21, 0xe6e815ef,
+		0xd99be7ba, 0xce366f4a, 0xd4099fea, 0xd67cb029,
+		0xafb2a431, 0x31233f2a, 0x3094a5c6, 0xc066a235,
+		0x37bc4e74, 0xa6ca82fc, 0xb0d090e0, 0x15d8a733,
+		0x4a9804f1, 0xf7daec41, 0x0e50cd7f, 0x2ff69117,
+		0x8dd64d76, 0x4db0ef43, 0x544daacc, 0xdf0496e4,
+		0xe3b5d19e, 0x1b886a4c, 0xb81f2cc1, 0x7f516546,
+		0x04ea5e9d, 0x5d358c01, 0x737487fa, 0x2e410bfb,
+		0x5a1d67b3, 0x52d2db92, 0x335610e9, 0x1347d66d,
+		0x8c61d79a, 0x7a0ca137, 0x8e14f859, 0x893c13eb,
+		0xee27a9ce, 0x35c961b7, 0xede51ce1, 0x3cb1477a,
+		0x59dfd29c, 0x3f73f255, 0x79ce1418, 0xbf37c773,
+		0xeacdf753, 0x5baafd5f, 0x146f3ddf, 0x86db4478,
+		0x81f3afca, 0x3ec468b9, 0x2c342438, 0x5f40a3c2,
+		0x72c31d16, 0x0c25e2bc, 0x8b493c28, 0x41950dff,
+		0x7101a839, 0xdeb30c08, 0x9ce4b4d8, 0x90c15664,
+		0x6184cb7b, 0x70b632d5, 0x745c6c48, 0x4257b8d0,
+	}, {
+		0xa7f45150, 0x65417e53, 0xa4171ac3, 0x5e273a96,
+		0x6bab3bcb, 0x459d1ff1, 0x58faacab, 0x03e34b93,
+		0xfa302055, 0x6d76adf6, 0x76cc8891, 0x4c02f525,
+		0xd7e54ffc, 0xcb2ac5d7, 0x44352680, 0xa362b58f,
+		0x5ab1de49, 0x1bba2567, 0x0eea4598, 0xc0fe5de1,
+		0x752fc302, 0xf04c8112, 0x97468da3, 0xf9d36bc6,
+		0x5f8f03e7, 0x9c921595, 0x7a6dbfeb, 0x595295da,
+		0x83bed42d, 0x217458d3, 0x69e04929, 0xc8c98e44,
+		0x89c2756a, 0x798ef478, 0x3e58996b, 0x71b927dd,
+		0x4fe1beb6, 0xad88f017, 0xac20c966, 0x3ace7db4,
+		0x4adf6318, 0x311ae582, 0x33519760, 0x7f536245,
+		0x7764b1e0, 0xae6bbb84, 0xa081fe1c, 0x2b08f994,
+		0x68487058, 0xfd458f19, 0x6cde9487, 0xf87b52b7,
+		0xd373ab23, 0x024b72e2, 0x8f1fe357, 0xab55662a,
+		0x28ebb207, 0xc2b52f03, 0x7bc5869a, 0x0837d3a5,
+		0x872830f2, 0xa5bf23b2, 0x6a0302ba, 0x8216ed5c,
+		0x1ccf8a2b, 0xb479a792, 0xf207f3f0, 0xe2694ea1,
+		0xf4da65cd, 0xbe0506d5, 0x6234d11f, 0xfea6c48a,
+		0x532e349d, 0x55f3a2a0, 0xe18a0532, 0xebf6a475,
+		0xec830b39, 0xef6040aa, 0x9f715e06, 0x106ebd51,
+		0x8a213ef9, 0x06dd963d, 0x053eddae, 0xbde64d46,
+		0x8d5491b5, 0x5dc47105, 0xd406046f, 0x155060ff,
+		0xfb981924, 0xe9bdd697, 0x434089cc, 0x9ed96777,
+		0x42e8b0bd, 0x8b890788, 0x5b19e738, 0xeec879db,
+		0x0a7ca147, 0x0f427ce9, 0x1e84f8c9, 0x00000000,
+		0x86800983, 0xed2b3248, 0x70111eac, 0x725a6c4e,
+		0xff0efdfb, 0x38850f56, 0xd5ae3d1e, 0x392d3627,
+		0xd90f0a64, 0xa65c6821, 0x545b9bd1, 0x2e36243a,
+		0x670a0cb1, 0xe757930f, 0x96eeb4d2, 0x919b1b9e,
+		0xc5c0804f, 0x20dc61a2, 0x4b775a69, 0x1a121c16,
+		0xba93e20a, 0x2aa0c0e5, 0xe0223c43, 0x171b121d,
+		0x0d090e0b, 0xc78bf2ad, 0xa8b62db9, 0xa91e14c8,
+		0x19f15785, 0x0775af4c, 0xdd99eebb, 0x607fa3fd,
+		0x2601f79f, 0xf5725cbc, 0x3b6644c5, 0x7efb5b34,
+		0x29438b76, 0xc623cbdc, 0xfcedb668, 0xf1e4b863,
+		0xdc31d7ca, 0x85634210, 0x22971340, 0x11c68420,
+		0x244a857d, 0x3dbbd2f8, 0x32f9ae11, 0xa129c76d,
+		0x2f9e1d4b, 0x30b2dcf3, 0x52860dec, 0xe3c177d0,
+		0x16b32b6c, 0xb970a999, 0x489411fa, 0x64e94722,
+		0x8cfca8c4, 0x3ff0a01a, 0x2c7d56d8, 0x903322ef,
+		0x4e4987c7, 0xd138d9c1, 0xa2ca8cfe, 0x0bd49836,
+		0x81f5a6cf, 0xde7aa528, 0x8eb7da26, 0xbfad3fa4,
+		0x9d3a2ce4, 0x9278500d, 0xcc5f6a9b, 0x467e5462,
+		0x138df6c2, 0xb8d890e8, 0xf7392e5e, 0xafc382f5,
+		0x805d9fbe, 0x93d0697c, 0x2dd56fa9, 0x1225cfb3,
+		0x99acc83b, 0x7d1810a7, 0x639ce86e, 0xbb3bdb7b,
+		0x7826cd09, 0x18596ef4, 0xb79aec01, 0x9a4f83a8,
+		0x6e95e665, 0xe6ffaa7e, 0xcfbc2108, 0xe815efe6,
+		0x9be7bad9, 0x366f4ace, 0x099fead4, 0x7cb029d6,
+		0xb2a431af, 0x233f2a31, 0x94a5c630, 0x66a235c0,
+		0xbc4e7437, 0xca82fca6, 0xd090e0b0, 0xd8a73315,
+		0x9804f14a, 0xdaec41f7, 0x50cd7f0e, 0xf691172f,
+		0xd64d768d, 0xb0ef434d, 0x4daacc54, 0x0496e4df,
+		0xb5d19ee3, 0x886a4c1b, 0x1f2cc1b8, 0x5165467f,
+		0xea5e9d04, 0x358c015d, 0x7487fa73, 0x410bfb2e,
+		0x1d67b35a, 0xd2db9252, 0x5610e933, 0x47d66d13,
+		0x61d79a8c, 0x0ca1377a, 0x14f8598e, 0x3c13eb89,
+		0x27a9ceee, 0xc961b735, 0xe51ce1ed, 0xb1477a3c,
+		0xdfd29c59, 0x73f2553f, 0xce141879, 0x37c773bf,
+		0xcdf753ea, 0xaafd5f5b, 0x6f3ddf14, 0xdb447886,
+		0xf3afca81, 0xc468b93e, 0x3424382c, 0x40a3c25f,
+		0xc31d1672, 0x25e2bc0c, 0x493c288b, 0x950dff41,
+		0x01a83971, 0xb30c08de, 0xe4b4d89c, 0xc1566490,
+		0x84cb7b61, 0xb632d570, 0x5c6c4874, 0x57b8d042,
+	}, {
+		0xf45150a7, 0x417e5365, 0x171ac3a4, 0x273a965e,
+		0xab3bcb6b, 0x9d1ff145, 0xfaacab58, 0xe34b9303,
+		0x302055fa, 0x76adf66d, 0xcc889176, 0x02f5254c,
+		0xe54ffcd7, 0x2ac5d7cb, 0x35268044, 0x62b58fa3,
+		0xb1de495a, 0xba25671b, 0xea45980e, 0xfe5de1c0,
+		0x2fc30275, 0x4c8112f0, 0x468da397, 0xd36bc6f9,
+		0x8f03e75f, 0x9215959c, 0x6dbfeb7a, 0x5295da59,
+		0xbed42d83, 0x7458d321, 0xe0492969, 0xc98e44c8,
+		0xc2756a89, 0x8ef47879, 0x58996b3e, 0xb927dd71,
+		0xe1beb64f, 0x88f017ad, 0x20c966ac, 0xce7db43a,
+		0xdf63184a, 0x1ae58231, 0x51976033, 0x5362457f,
+		0x64b1e077, 0x6bbb84ae, 0x81fe1ca0, 0x08f9942b,
+		0x48705868, 0x458f19fd, 0xde94876c, 0x7b52b7f8,
+		0x73ab23d3, 0x4b72e202, 0x1fe3578f, 0x55662aab,
+		0xebb20728, 0xb52f03c2, 0xc5869a7b, 0x37d3a508,
+		0x2830f287, 0xbf23b2a5, 0x0302ba6a, 0x16ed5c82,
+		0xcf8a2b1c, 0x79a792b4, 0x07f3f0f2, 0x694ea1e2,
+		0xda65cdf4, 0x0506d5be, 0x34d11f62, 0xa6c48afe,
+		0x2e349d53, 0xf3a2a055, 0x8a0532e1, 0xf6a475eb,
+		0x830b39ec, 0x6040aaef, 0x715e069f, 0x6ebd5110,
+		0x213ef98a, 0xdd963d06, 0x3eddae05, 0xe64d46bd,
+		0x5491b58d, 0xc471055d, 0x06046fd4, 0x5060ff15,
+		0x981924fb, 0xbdd697e9, 0x4089cc43, 0xd967779e,
+		0xe8b0bd42, 0x8907888b, 0x19e7385b, 0xc879dbee,
+		0x7ca1470a, 0x427ce90f, 0x84f8c91e, 0x00000000,
+		0x80098386, 0x2b3248ed, 0x111eac70, 0x5a6c4e72,
+		0x0efdfbff, 0x850f5638, 0xae3d1ed5, 0x2d362739,
+		0x0f0a64d9, 0x5c6821a6, 0x5b9bd154, 0x36243a2e,
+		0x0a0cb167, 0x57930fe7, 0xeeb4d296, 0x9b1b9e91,
+		0xc0804fc5, 0xdc61a220, 0x775a694b, 0x121c161a,
+		0x93e20aba, 0xa0c0e52a, 0x223c43e0, 0x1b121d17,
+		0x090e0b0d, 0x8bf2adc7, 0xb62db9a8, 0x1e14c8a9,
+		0xf1578519, 0x75af4c07, 0x99eebbdd, 0x7fa3fd60,
+		0x01f79f26, 0x725cbcf5, 0x6644c53b, 0xfb5b347e,
+		0x438b7629, 0x23cbdcc6, 0xedb668fc, 0xe4b863f1,
+		0x31d7cadc, 0x63421085, 0x97134022, 0xc6842011,
+		0x4a857d24, 0xbbd2f83d, 0xf9ae1132, 0x29c76da1,
+		0x9e1d4b2f, 0xb2dcf330, 0x860dec52, 0xc177d0e3,
+		0xb32b6c16, 0x70a999b9, 0x9411fa48, 0xe9472264,
+		0xfca8c48c, 0xf0a01a3f, 0x7d56d82c, 0x3322ef90,
+		0x4987c74e, 0x38d9c1d1, 0xca8cfea2, 0xd498360b,
+		0xf5a6cf81, 0x7aa528de, 0xb7da268e, 0xad3fa4bf,
+		0x3a2ce49d, 0x78500d92, 0x5f6a9bcc, 0x7e546246,
+		0x8df6c213, 0xd890e8b8, 0x392e5ef7, 0xc382f5af,
+		0x5d9fbe80, 0xd0697c93, 0xd56fa92d, 0x25cfb312,
+		0xacc83b99, 0x1810a77d, 0x9ce86e63, 0x3bdb7bbb,
+		0x26cd0978, 0x596ef418, 0x9aec01b7, 0x4f83a89a,
+		0x95e6656e, 0xffaa7ee6, 0xbc2108cf, 0x15efe6e8,
+		0xe7bad99b, 0x6f4ace36, 0x9fead409, 0xb029d67c,
+		0xa431afb2, 0x3f2a3123, 0xa5c63094, 0xa235c066,
+		0x4e7437bc, 0x82fca6ca, 0x90e0b0d0, 0xa73315d8,
+		0x04f14a98, 0xec41f7da, 0xcd7f0e50, 0x91172ff6,
+		0x4d768dd6, 0xef434db0, 0xaacc544d, 0x96e4df04,
+		0xd19ee3b5, 0x6a4c1b88, 0x2cc1b81f, 0x65467f51,
+		0x5e9d04ea, 0x8c015d35, 0x87fa7374, 0x0bfb2e41,
+		0x67b35a1d, 0xdb9252d2, 0x10e93356, 0xd66d1347,
+		0xd79a8c61, 0xa1377a0c, 0xf8598e14, 0x13eb893c,
+		0xa9ceee27, 0x61b735c9, 0x1ce1ede5, 0x477a3cb1,
+		0xd29c59df, 0xf2553f73, 0x141879ce, 0xc773bf37,
+		0xf753eacd, 0xfd5f5baa, 0x3ddf146f, 0x447886db,
+		0xafca81f3, 0x68b93ec4, 0x24382c34, 0xa3c25f40,
+		0x1d1672c3, 0xe2bc0c25, 0x3c288b49, 0x0dff4195,
+		0xa8397101, 0x0c08deb3, 0xb4d89ce4, 0x566490c1,
+		0xcb7b6184, 0x32d570b6, 0x6c48745c, 0xb8d04257,
+	}, {
+		0x5150a7f4, 0x7e536541, 0x1ac3a417, 0x3a965e27,
+		0x3bcb6bab, 0x1ff1459d, 0xacab58fa, 0x4b9303e3,
+		0x2055fa30, 0xadf66d76, 0x889176cc, 0xf5254c02,
+		0x4ffcd7e5, 0xc5d7cb2a, 0x26804435, 0xb58fa362,
+		0xde495ab1, 0x25671bba, 0x45980eea, 0x5de1c0fe,
+		0xc302752f, 0x8112f04c, 0x8da39746, 0x6bc6f9d3,
+		0x03e75f8f, 0x15959c92, 0xbfeb7a6d, 0x95da5952,
+		0xd42d83be, 0x58d32174, 0x492969e0, 0x8e44c8c9,
+		0x756a89c2, 0xf478798e, 0x996b3e58, 0x27dd71b9,
+		0xbeb64fe1, 0xf017ad88, 0xc966ac20, 0x7db43ace,
+		0x63184adf, 0xe582311a, 0x97603351, 0x62457f53,
+		0xb1e07764, 0xbb84ae6b, 0xfe1ca081, 0xf9942b08,
+		0x70586848, 0x8f19fd45, 0x94876cde, 0x52b7f87b,
+		0xab23d373, 0x72e2024b, 0xe3578f1f, 0x662aab55,
+		0xb20728eb, 0x2f03c2b5, 0x869a7bc5, 0xd3a50837,
+		0x30f28728, 0x23b2a5bf, 0x02ba6a03, 0xed5c8216,
+		0x8a2b1ccf, 0xa792b479, 0xf3f0f207, 0x4ea1e269,
+		0x65cdf4da, 0x06d5be05, 0xd11f6234, 0xc48afea6,
+		0x349d532e, 0xa2a055f3, 0x0532e18a, 0xa475ebf6,
+		0x0b39ec83, 0x40aaef60, 0x5e069f71, 0xbd51106e,
+		0x3ef98a21, 0x963d06dd, 0xddae053e, 0x4d46bde6,
+		0x91b58d54, 0x71055dc4, 0x046fd406, 0x60ff1550,
+		0x1924fb98, 0xd697e9bd, 0x89cc4340, 0x67779ed9,
+		0xb0bd42e8, 0x07888b89, 0xe7385b19, 0x79dbeec8,
+		0xa1470a7c, 0x7ce90f42, 0xf8c91e84, 0x00000000,
+		0x09838680, 0x3248ed2b, 0x1eac7011, 0x6c4e725a,
+		0xfdfbff0e, 0x0f563885, 0x3d1ed5ae, 0x3627392d,
+		0x0a64d90f, 0x6821a65c, 0x9bd1545b, 0x243a2e36,
+		0x0cb1670a, 0x930fe757, 0xb4d296ee, 0x1b9e919b,
+		0x804fc5c0, 0x61a220dc, 0x5a694b77, 0x1c161a12,
+		0xe20aba93, 0xc0e52aa0, 0x3c43e022, 0x121d171b,
+		0x0e0b0d09, 0xf2adc78b, 0x2db9a8b6, 0x14c8a91e,
+		0x578519f1, 0xaf4c0775, 0xeebbdd99, 0xa3fd607f,
+		0xf79f2601, 0x5cbcf572, 0x44c53b66, 0x5b347efb,
+		0x8b762943, 0xcbdcc623, 0xb668fced, 0xb863f1e4,
+		0xd7cadc31, 0x42108563, 0x13402297, 0x842011c6,
+		0x857d244a, 0xd2f83dbb, 0xae1132f9, 0xc76da129,
+		0x1d4b2f9e, 0xdcf330b2, 0x0dec5286, 0x77d0e3c1,
+		0x2b6c16b3, 0xa999b970, 0x11fa4894, 0x472264e9,
+		0xa8c48cfc, 0xa01a3ff0, 0x56d82c7d, 0x22ef9033,
+		0x87c74e49, 0xd9c1d138, 0x8cfea2ca, 0x98360bd4,
+		0xa6cf81f5, 0xa528de7a, 0xda268eb7, 0x3fa4bfad,
+		0x2ce49d3a, 0x500d9278, 0x6a9bcc5f, 0x5462467e,
+		0xf6c2138d, 0x90e8b8d8, 0x2e5ef739, 0x82f5afc3,
+		0x9fbe805d, 0x697c93d0, 0x6fa92dd5, 0xcfb31225,
+		0xc83b99ac, 0x10a77d18, 0xe86e639c, 0xdb7bbb3b,
+		0xcd097826, 0x6ef41859, 0xec01b79a, 0x83a89a4f,
+		0xe6656e95, 0xaa7ee6ff, 0x2108cfbc, 0xefe6e815,
+		0xbad99be7, 0x4ace366f, 0xead4099f, 0x29d67cb0,
+		0x31afb2a4, 0x2a31233f, 0xc63094a5, 0x35c066a2,
+		0x7437bc4e, 0xfca6ca82, 0xe0b0d090, 0x3315d8a7,
+		0xf14a9804, 0x41f7daec, 0x7f0e50cd, 0x172ff691,
+		0x768dd64d, 0x434db0ef, 0xcc544daa, 0xe4df0496,
+		0x9ee3b5d1, 0x4c1b886a, 0xc1b81f2c, 0x467f5165,
+		0x9d04ea5e, 0x015d358c, 0xfa737487, 0xfb2e410b,
+		0xb35a1d67, 0x9252d2db, 0xe9335610, 0x6d1347d6,
+		0x9a8c61d7, 0x377a0ca1, 0x598e14f8, 0xeb893c13,
+		0xceee27a9, 0xb735c961, 0xe1ede51c, 0x7a3cb147,
+		0x9c59dfd2, 0x553f73f2, 0x1879ce14, 0x73bf37c7,
+		0x53eacdf7, 0x5f5baafd, 0xdf146f3d, 0x7886db44,
+		0xca81f3af, 0xb93ec468, 0x382c3424, 0xc25f40a3,
+		0x1672c31d, 0xbc0c25e2, 0x288b493c, 0xff41950d,
+		0x397101a8, 0x08deb30c, 0xd89ce4b4, 0x6490c156,
+		0x7b6184cb, 0xd570b632, 0x48745c6c, 0xd04257b8,
 	}
+};
 
-	for (i = 0; i < 256; ++i) {
-		p = sbx_tab[i];
-
-		t = p;
-		crypto_fl_tab[0][i] = t;
-		crypto_fl_tab[1][i] = rol32(t, 8);
-		crypto_fl_tab[2][i] = rol32(t, 16);
-		crypto_fl_tab[3][i] = rol32(t, 24);
-
-		t = ((u32) ff_mult(2, p)) |
-		    ((u32) p << 8) |
-		    ((u32) p << 16) | ((u32) ff_mult(3, p) << 24);
-
-		crypto_ft_tab[0][i] = t;
-		crypto_ft_tab[1][i] = rol32(t, 8);
-		crypto_ft_tab[2][i] = rol32(t, 16);
-		crypto_ft_tab[3][i] = rol32(t, 24);
-
-		p = isb_tab[i];
-
-		t = p;
-		crypto_il_tab[0][i] = t;
-		crypto_il_tab[1][i] = rol32(t, 8);
-		crypto_il_tab[2][i] = rol32(t, 16);
-		crypto_il_tab[3][i] = rol32(t, 24);
-
-		t = ((u32) ff_mult(14, p)) |
-		    ((u32) ff_mult(9, p) << 8) |
-		    ((u32) ff_mult(13, p) << 16) |
-		    ((u32) ff_mult(11, p) << 24);
-
-		crypto_it_tab[0][i] = t;
-		crypto_it_tab[1][i] = rol32(t, 8);
-		crypto_it_tab[2][i] = rol32(t, 16);
-		crypto_it_tab[3][i] = rol32(t, 24);
+const u32 crypto_il_tab[4][256] = {
+	{
+		0x00000052, 0x00000009, 0x0000006a, 0x000000d5,
+		0x00000030, 0x00000036, 0x000000a5, 0x00000038,
+		0x000000bf, 0x00000040, 0x000000a3, 0x0000009e,
+		0x00000081, 0x000000f3, 0x000000d7, 0x000000fb,
+		0x0000007c, 0x000000e3, 0x00000039, 0x00000082,
+		0x0000009b, 0x0000002f, 0x000000ff, 0x00000087,
+		0x00000034, 0x0000008e, 0x00000043, 0x00000044,
+		0x000000c4, 0x000000de, 0x000000e9, 0x000000cb,
+		0x00000054, 0x0000007b, 0x00000094, 0x00000032,
+		0x000000a6, 0x000000c2, 0x00000023, 0x0000003d,
+		0x000000ee, 0x0000004c, 0x00000095, 0x0000000b,
+		0x00000042, 0x000000fa, 0x000000c3, 0x0000004e,
+		0x00000008, 0x0000002e, 0x000000a1, 0x00000066,
+		0x00000028, 0x000000d9, 0x00000024, 0x000000b2,
+		0x00000076, 0x0000005b, 0x000000a2, 0x00000049,
+		0x0000006d, 0x0000008b, 0x000000d1, 0x00000025,
+		0x00000072, 0x000000f8, 0x000000f6, 0x00000064,
+		0x00000086, 0x00000068, 0x00000098, 0x00000016,
+		0x000000d4, 0x000000a4, 0x0000005c, 0x000000cc,
+		0x0000005d, 0x00000065, 0x000000b6, 0x00000092,
+		0x0000006c, 0x00000070, 0x00000048, 0x00000050,
+		0x000000fd, 0x000000ed, 0x000000b9, 0x000000da,
+		0x0000005e, 0x00000015, 0x00000046, 0x00000057,
+		0x000000a7, 0x0000008d, 0x0000009d, 0x00000084,
+		0x00000090, 0x000000d8, 0x000000ab, 0x00000000,
+		0x0000008c, 0x000000bc, 0x000000d3, 0x0000000a,
+		0x000000f7, 0x000000e4, 0x00000058, 0x00000005,
+		0x000000b8, 0x000000b3, 0x00000045, 0x00000006,
+		0x000000d0, 0x0000002c, 0x0000001e, 0x0000008f,
+		0x000000ca, 0x0000003f, 0x0000000f, 0x00000002,
+		0x000000c1, 0x000000af, 0x000000bd, 0x00000003,
+		0x00000001, 0x00000013, 0x0000008a, 0x0000006b,
+		0x0000003a, 0x00000091, 0x00000011, 0x00000041,
+		0x0000004f, 0x00000067, 0x000000dc, 0x000000ea,
+		0x00000097, 0x000000f2, 0x000000cf, 0x000000ce,
+		0x000000f0, 0x000000b4, 0x000000e6, 0x00000073,
+		0x00000096, 0x000000ac, 0x00000074, 0x00000022,
+		0x000000e7, 0x000000ad, 0x00000035, 0x00000085,
+		0x000000e2, 0x000000f9, 0x00000037, 0x000000e8,
+		0x0000001c, 0x00000075, 0x000000df, 0x0000006e,
+		0x00000047, 0x000000f1, 0x0000001a, 0x00000071,
+		0x0000001d, 0x00000029, 0x000000c5, 0x00000089,
+		0x0000006f, 0x000000b7, 0x00000062, 0x0000000e,
+		0x000000aa, 0x00000018, 0x000000be, 0x0000001b,
+		0x000000fc, 0x00000056, 0x0000003e, 0x0000004b,
+		0x000000c6, 0x000000d2, 0x00000079, 0x00000020,
+		0x0000009a, 0x000000db, 0x000000c0, 0x000000fe,
+		0x00000078, 0x000000cd, 0x0000005a, 0x000000f4,
+		0x0000001f, 0x000000dd, 0x000000a8, 0x00000033,
+		0x00000088, 0x00000007, 0x000000c7, 0x00000031,
+		0x000000b1, 0x00000012, 0x00000010, 0x00000059,
+		0x00000027, 0x00000080, 0x000000ec, 0x0000005f,
+		0x00000060, 0x00000051, 0x0000007f, 0x000000a9,
+		0x00000019, 0x000000b5, 0x0000004a, 0x0000000d,
+		0x0000002d, 0x000000e5, 0x0000007a, 0x0000009f,
+		0x00000093, 0x000000c9, 0x0000009c, 0x000000ef,
+		0x000000a0, 0x000000e0, 0x0000003b, 0x0000004d,
+		0x000000ae, 0x0000002a, 0x000000f5, 0x000000b0,
+		0x000000c8, 0x000000eb, 0x000000bb, 0x0000003c,
+		0x00000083, 0x00000053, 0x00000099, 0x00000061,
+		0x00000017, 0x0000002b, 0x00000004, 0x0000007e,
+		0x000000ba, 0x00000077, 0x000000d6, 0x00000026,
+		0x000000e1, 0x00000069, 0x00000014, 0x00000063,
+		0x00000055, 0x00000021, 0x0000000c, 0x0000007d,
+	}, {
+		0x00005200, 0x00000900, 0x00006a00, 0x0000d500,
+		0x00003000, 0x00003600, 0x0000a500, 0x00003800,
+		0x0000bf00, 0x00004000, 0x0000a300, 0x00009e00,
+		0x00008100, 0x0000f300, 0x0000d700, 0x0000fb00,
+		0x00007c00, 0x0000e300, 0x00003900, 0x00008200,
+		0x00009b00, 0x00002f00, 0x0000ff00, 0x00008700,
+		0x00003400, 0x00008e00, 0x00004300, 0x00004400,
+		0x0000c400, 0x0000de00, 0x0000e900, 0x0000cb00,
+		0x00005400, 0x00007b00, 0x00009400, 0x00003200,
+		0x0000a600, 0x0000c200, 0x00002300, 0x00003d00,
+		0x0000ee00, 0x00004c00, 0x00009500, 0x00000b00,
+		0x00004200, 0x0000fa00, 0x0000c300, 0x00004e00,
+		0x00000800, 0x00002e00, 0x0000a100, 0x00006600,
+		0x00002800, 0x0000d900, 0x00002400, 0x0000b200,
+		0x00007600, 0x00005b00, 0x0000a200, 0x00004900,
+		0x00006d00, 0x00008b00, 0x0000d100, 0x00002500,
+		0x00007200, 0x0000f800, 0x0000f600, 0x00006400,
+		0x00008600, 0x00006800, 0x00009800, 0x00001600,
+		0x0000d400, 0x0000a400, 0x00005c00, 0x0000cc00,
+		0x00005d00, 0x00006500, 0x0000b600, 0x00009200,
+		0x00006c00, 0x00007000, 0x00004800, 0x00005000,
+		0x0000fd00, 0x0000ed00, 0x0000b900, 0x0000da00,
+		0x00005e00, 0x00001500, 0x00004600, 0x00005700,
+		0x0000a700, 0x00008d00, 0x00009d00, 0x00008400,
+		0x00009000, 0x0000d800, 0x0000ab00, 0x00000000,
+		0x00008c00, 0x0000bc00, 0x0000d300, 0x00000a00,
+		0x0000f700, 0x0000e400, 0x00005800, 0x00000500,
+		0x0000b800, 0x0000b300, 0x00004500, 0x00000600,
+		0x0000d000, 0x00002c00, 0x00001e00, 0x00008f00,
+		0x0000ca00, 0x00003f00, 0x00000f00, 0x00000200,
+		0x0000c100, 0x0000af00, 0x0000bd00, 0x00000300,
+		0x00000100, 0x00001300, 0x00008a00, 0x00006b00,
+		0x00003a00, 0x00009100, 0x00001100, 0x00004100,
+		0x00004f00, 0x00006700, 0x0000dc00, 0x0000ea00,
+		0x00009700, 0x0000f200, 0x0000cf00, 0x0000ce00,
+		0x0000f000, 0x0000b400, 0x0000e600, 0x00007300,
+		0x00009600, 0x0000ac00, 0x00007400, 0x00002200,
+		0x0000e700, 0x0000ad00, 0x00003500, 0x00008500,
+		0x0000e200, 0x0000f900, 0x00003700, 0x0000e800,
+		0x00001c00, 0x00007500, 0x0000df00, 0x00006e00,
+		0x00004700, 0x0000f100, 0x00001a00, 0x00007100,
+		0x00001d00, 0x00002900, 0x0000c500, 0x00008900,
+		0x00006f00, 0x0000b700, 0x00006200, 0x00000e00,
+		0x0000aa00, 0x00001800, 0x0000be00, 0x00001b00,
+		0x0000fc00, 0x00005600, 0x00003e00, 0x00004b00,
+		0x0000c600, 0x0000d200, 0x00007900, 0x00002000,
+		0x00009a00, 0x0000db00, 0x0000c000, 0x0000fe00,
+		0x00007800, 0x0000cd00, 0x00005a00, 0x0000f400,
+		0x00001f00, 0x0000dd00, 0x0000a800, 0x00003300,
+		0x00008800, 0x00000700, 0x0000c700, 0x00003100,
+		0x0000b100, 0x00001200, 0x00001000, 0x00005900,
+		0x00002700, 0x00008000, 0x0000ec00, 0x00005f00,
+		0x00006000, 0x00005100, 0x00007f00, 0x0000a900,
+		0x00001900, 0x0000b500, 0x00004a00, 0x00000d00,
+		0x00002d00, 0x0000e500, 0x00007a00, 0x00009f00,
+		0x00009300, 0x0000c900, 0x00009c00, 0x0000ef00,
+		0x0000a000, 0x0000e000, 0x00003b00, 0x00004d00,
+		0x0000ae00, 0x00002a00, 0x0000f500, 0x0000b000,
+		0x0000c800, 0x0000eb00, 0x0000bb00, 0x00003c00,
+		0x00008300, 0x00005300, 0x00009900, 0x00006100,
+		0x00001700, 0x00002b00, 0x00000400, 0x00007e00,
+		0x0000ba00, 0x00007700, 0x0000d600, 0x00002600,
+		0x0000e100, 0x00006900, 0x00001400, 0x00006300,
+		0x00005500, 0x00002100, 0x00000c00, 0x00007d00,
+	}, {
+		0x00520000, 0x00090000, 0x006a0000, 0x00d50000,
+		0x00300000, 0x00360000, 0x00a50000, 0x00380000,
+		0x00bf0000, 0x00400000, 0x00a30000, 0x009e0000,
+		0x00810000, 0x00f30000, 0x00d70000, 0x00fb0000,
+		0x007c0000, 0x00e30000, 0x00390000, 0x00820000,
+		0x009b0000, 0x002f0000, 0x00ff0000, 0x00870000,
+		0x00340000, 0x008e0000, 0x00430000, 0x00440000,
+		0x00c40000, 0x00de0000, 0x00e90000, 0x00cb0000,
+		0x00540000, 0x007b0000, 0x00940000, 0x00320000,
+		0x00a60000, 0x00c20000, 0x00230000, 0x003d0000,
+		0x00ee0000, 0x004c0000, 0x00950000, 0x000b0000,
+		0x00420000, 0x00fa0000, 0x00c30000, 0x004e0000,
+		0x00080000, 0x002e0000, 0x00a10000, 0x00660000,
+		0x00280000, 0x00d90000, 0x00240000, 0x00b20000,
+		0x00760000, 0x005b0000, 0x00a20000, 0x00490000,
+		0x006d0000, 0x008b0000, 0x00d10000, 0x00250000,
+		0x00720000, 0x00f80000, 0x00f60000, 0x00640000,
+		0x00860000, 0x00680000, 0x00980000, 0x00160000,
+		0x00d40000, 0x00a40000, 0x005c0000, 0x00cc0000,
+		0x005d0000, 0x00650000, 0x00b60000, 0x00920000,
+		0x006c0000, 0x00700000, 0x00480000, 0x00500000,
+		0x00fd0000, 0x00ed0000, 0x00b90000, 0x00da0000,
+		0x005e0000, 0x00150000, 0x00460000, 0x00570000,
+		0x00a70000, 0x008d0000, 0x009d0000, 0x00840000,
+		0x00900000, 0x00d80000, 0x00ab0000, 0x00000000,
+		0x008c0000, 0x00bc0000, 0x00d30000, 0x000a0000,
+		0x00f70000, 0x00e40000, 0x00580000, 0x00050000,
+		0x00b80000, 0x00b30000, 0x00450000, 0x00060000,
+		0x00d00000, 0x002c0000, 0x001e0000, 0x008f0000,
+		0x00ca0000, 0x003f0000, 0x000f0000, 0x00020000,
+		0x00c10000, 0x00af0000, 0x00bd0000, 0x00030000,
+		0x00010000, 0x00130000, 0x008a0000, 0x006b0000,
+		0x003a0000, 0x00910000, 0x00110000, 0x00410000,
+		0x004f0000, 0x00670000, 0x00dc0000, 0x00ea0000,
+		0x00970000, 0x00f20000, 0x00cf0000, 0x00ce0000,
+		0x00f00000, 0x00b40000, 0x00e60000, 0x00730000,
+		0x00960000, 0x00ac0000, 0x00740000, 0x00220000,
+		0x00e70000, 0x00ad0000, 0x00350000, 0x00850000,
+		0x00e20000, 0x00f90000, 0x00370000, 0x00e80000,
+		0x001c0000, 0x00750000, 0x00df0000, 0x006e0000,
+		0x00470000, 0x00f10000, 0x001a0000, 0x00710000,
+		0x001d0000, 0x00290000, 0x00c50000, 0x00890000,
+		0x006f0000, 0x00b70000, 0x00620000, 0x000e0000,
+		0x00aa0000, 0x00180000, 0x00be0000, 0x001b0000,
+		0x00fc0000, 0x00560000, 0x003e0000, 0x004b0000,
+		0x00c60000, 0x00d20000, 0x00790000, 0x00200000,
+		0x009a0000, 0x00db0000, 0x00c00000, 0x00fe0000,
+		0x00780000, 0x00cd0000, 0x005a0000, 0x00f40000,
+		0x001f0000, 0x00dd0000, 0x00a80000, 0x00330000,
+		0x00880000, 0x00070000, 0x00c70000, 0x00310000,
+		0x00b10000, 0x00120000, 0x00100000, 0x00590000,
+		0x00270000, 0x00800000, 0x00ec0000, 0x005f0000,
+		0x00600000, 0x00510000, 0x007f0000, 0x00a90000,
+		0x00190000, 0x00b50000, 0x004a0000, 0x000d0000,
+		0x002d0000, 0x00e50000, 0x007a0000, 0x009f0000,
+		0x00930000, 0x00c90000, 0x009c0000, 0x00ef0000,
+		0x00a00000, 0x00e00000, 0x003b0000, 0x004d0000,
+		0x00ae0000, 0x002a0000, 0x00f50000, 0x00b00000,
+		0x00c80000, 0x00eb0000, 0x00bb0000, 0x003c0000,
+		0x00830000, 0x00530000, 0x00990000, 0x00610000,
+		0x00170000, 0x002b0000, 0x00040000, 0x007e0000,
+		0x00ba0000, 0x00770000, 0x00d60000, 0x00260000,
+		0x00e10000, 0x00690000, 0x00140000, 0x00630000,
+		0x00550000, 0x00210000, 0x000c0000, 0x007d0000,
+	}, {
+		0x52000000, 0x09000000, 0x6a000000, 0xd5000000,
+		0x30000000, 0x36000000, 0xa5000000, 0x38000000,
+		0xbf000000, 0x40000000, 0xa3000000, 0x9e000000,
+		0x81000000, 0xf3000000, 0xd7000000, 0xfb000000,
+		0x7c000000, 0xe3000000, 0x39000000, 0x82000000,
+		0x9b000000, 0x2f000000, 0xff000000, 0x87000000,
+		0x34000000, 0x8e000000, 0x43000000, 0x44000000,
+		0xc4000000, 0xde000000, 0xe9000000, 0xcb000000,
+		0x54000000, 0x7b000000, 0x94000000, 0x32000000,
+		0xa6000000, 0xc2000000, 0x23000000, 0x3d000000,
+		0xee000000, 0x4c000000, 0x95000000, 0x0b000000,
+		0x42000000, 0xfa000000, 0xc3000000, 0x4e000000,
+		0x08000000, 0x2e000000, 0xa1000000, 0x66000000,
+		0x28000000, 0xd9000000, 0x24000000, 0xb2000000,
+		0x76000000, 0x5b000000, 0xa2000000, 0x49000000,
+		0x6d000000, 0x8b000000, 0xd1000000, 0x25000000,
+		0x72000000, 0xf8000000, 0xf6000000, 0x64000000,
+		0x86000000, 0x68000000, 0x98000000, 0x16000000,
+		0xd4000000, 0xa4000000, 0x5c000000, 0xcc000000,
+		0x5d000000, 0x65000000, 0xb6000000, 0x92000000,
+		0x6c000000, 0x70000000, 0x48000000, 0x50000000,
+		0xfd000000, 0xed000000, 0xb9000000, 0xda000000,
+		0x5e000000, 0x15000000, 0x46000000, 0x57000000,
+		0xa7000000, 0x8d000000, 0x9d000000, 0x84000000,
+		0x90000000, 0xd8000000, 0xab000000, 0x00000000,
+		0x8c000000, 0xbc000000, 0xd3000000, 0x0a000000,
+		0xf7000000, 0xe4000000, 0x58000000, 0x05000000,
+		0xb8000000, 0xb3000000, 0x45000000, 0x06000000,
+		0xd0000000, 0x2c000000, 0x1e000000, 0x8f000000,
+		0xca000000, 0x3f000000, 0x0f000000, 0x02000000,
+		0xc1000000, 0xaf000000, 0xbd000000, 0x03000000,
+		0x01000000, 0x13000000, 0x8a000000, 0x6b000000,
+		0x3a000000, 0x91000000, 0x11000000, 0x41000000,
+		0x4f000000, 0x67000000, 0xdc000000, 0xea000000,
+		0x97000000, 0xf2000000, 0xcf000000, 0xce000000,
+		0xf0000000, 0xb4000000, 0xe6000000, 0x73000000,
+		0x96000000, 0xac000000, 0x74000000, 0x22000000,
+		0xe7000000, 0xad000000, 0x35000000, 0x85000000,
+		0xe2000000, 0xf9000000, 0x37000000, 0xe8000000,
+		0x1c000000, 0x75000000, 0xdf000000, 0x6e000000,
+		0x47000000, 0xf1000000, 0x1a000000, 0x71000000,
+		0x1d000000, 0x29000000, 0xc5000000, 0x89000000,
+		0x6f000000, 0xb7000000, 0x62000000, 0x0e000000,
+		0xaa000000, 0x18000000, 0xbe000000, 0x1b000000,
+		0xfc000000, 0x56000000, 0x3e000000, 0x4b000000,
+		0xc6000000, 0xd2000000, 0x79000000, 0x20000000,
+		0x9a000000, 0xdb000000, 0xc0000000, 0xfe000000,
+		0x78000000, 0xcd000000, 0x5a000000, 0xf4000000,
+		0x1f000000, 0xdd000000, 0xa8000000, 0x33000000,
+		0x88000000, 0x07000000, 0xc7000000, 0x31000000,
+		0xb1000000, 0x12000000, 0x10000000, 0x59000000,
+		0x27000000, 0x80000000, 0xec000000, 0x5f000000,
+		0x60000000, 0x51000000, 0x7f000000, 0xa9000000,
+		0x19000000, 0xb5000000, 0x4a000000, 0x0d000000,
+		0x2d000000, 0xe5000000, 0x7a000000, 0x9f000000,
+		0x93000000, 0xc9000000, 0x9c000000, 0xef000000,
+		0xa0000000, 0xe0000000, 0x3b000000, 0x4d000000,
+		0xae000000, 0x2a000000, 0xf5000000, 0xb0000000,
+		0xc8000000, 0xeb000000, 0xbb000000, 0x3c000000,
+		0x83000000, 0x53000000, 0x99000000, 0x61000000,
+		0x17000000, 0x2b000000, 0x04000000, 0x7e000000,
+		0xba000000, 0x77000000, 0xd6000000, 0x26000000,
+		0xe1000000, 0x69000000, 0x14000000, 0x63000000,
+		0x55000000, 0x21000000, 0x0c000000, 0x7d000000,
 	}
-}
+};
+
+EXPORT_SYMBOL_GPL(crypto_ft_tab);
+EXPORT_SYMBOL_GPL(crypto_fl_tab);
+EXPORT_SYMBOL_GPL(crypto_it_tab);
+EXPORT_SYMBOL_GPL(crypto_il_tab);
 
 /* initialise the key schedule from the user supplied key */
 
@@ -491,7 +1457,6 @@ static struct crypto_alg aes_alg = {
 
 static int __init aes_init(void)
 {
-	gen_tabs();
 	return crypto_register_alg(&aes_alg);
 }
 
diff --git a/include/crypto/aes.h b/include/crypto/aes.h
index 40008d67ee3d..656a4c66a568 100644
--- a/include/crypto/aes.h
+++ b/include/crypto/aes.h
@@ -23,10 +23,10 @@ struct crypto_aes_ctx {
 	u32 key_dec[AES_MAX_KEYLENGTH_U32];
 };
 
-extern u32 crypto_ft_tab[4][256];
-extern u32 crypto_fl_tab[4][256];
-extern u32 crypto_it_tab[4][256];
-extern u32 crypto_il_tab[4][256];
+extern const u32 crypto_ft_tab[4][256];
+extern const u32 crypto_fl_tab[4][256];
+extern const u32 crypto_it_tab[4][256];
+extern const u32 crypto_il_tab[4][256];
 
 int crypto_aes_set_key(struct crypto_tfm *tfm, const u8 *in_key,
 		unsigned int key_len);
-- 
cgit v1.2.3


From 468a15bb4cc61694495cc5ed7ffca29e87c79b69 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Tue, 16 Dec 2008 08:07:03 +0100
Subject: sched, trace: update trace_sched_wakeup()

Impact: extend the wakeup tracepoint with the info whether the wakeup was real

Add the information needed to distinguish 'real' wakeups from 'false'
wakeups.

Signed-off-by: Peter Zijlstra <peterz@infradead.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/trace/sched.h             | 4 ++--
 kernel/sched.c                    | 2 +-
 kernel/trace/trace_sched_switch.c | 2 +-
 kernel/trace/trace_sched_wakeup.c | 2 +-
 4 files changed, 5 insertions(+), 5 deletions(-)

(limited to 'include')

diff --git a/include/trace/sched.h b/include/trace/sched.h
index bc4c9eadc6ba..0d81098ee9fc 100644
--- a/include/trace/sched.h
+++ b/include/trace/sched.h
@@ -17,8 +17,8 @@ DECLARE_TRACE(sched_wait_task,
 		TPARGS(rq, p));
 
 DECLARE_TRACE(sched_wakeup,
-	TPPROTO(struct rq *rq, struct task_struct *p),
-		TPARGS(rq, p));
+	TPPROTO(struct rq *rq, struct task_struct *p, int success),
+		TPARGS(rq, p, success));
 
 DECLARE_TRACE(sched_wakeup_new,
 	TPPROTO(struct rq *rq, struct task_struct *p, int success),
diff --git a/kernel/sched.c b/kernel/sched.c
index ceda5799466e..dcb39bc88f6c 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -2324,7 +2324,7 @@ out_activate:
 	success = 1;
 
 out_running:
-	trace_sched_wakeup(rq, p);
+	trace_sched_wakeup(rq, p, success);
 	check_preempt_curr(rq, p, sync);
 
 	p->state = TASK_RUNNING;
diff --git a/kernel/trace/trace_sched_switch.c b/kernel/trace/trace_sched_switch.c
index add2c1fdae92..df175cb4564f 100644
--- a/kernel/trace/trace_sched_switch.c
+++ b/kernel/trace/trace_sched_switch.c
@@ -49,7 +49,7 @@ probe_sched_switch(struct rq *__rq, struct task_struct *prev,
 }
 
 static void
-probe_sched_wakeup(struct rq *__rq, struct task_struct *wakee)
+probe_sched_wakeup(struct rq *__rq, struct task_struct *wakee, int success)
 {
 	struct trace_array_cpu *data;
 	unsigned long flags;
diff --git a/kernel/trace/trace_sched_wakeup.c b/kernel/trace/trace_sched_wakeup.c
index 0067b49746c1..43586b689e31 100644
--- a/kernel/trace/trace_sched_wakeup.c
+++ b/kernel/trace/trace_sched_wakeup.c
@@ -211,7 +211,7 @@ static void wakeup_reset(struct trace_array *tr)
 }
 
 static void
-probe_wakeup(struct rq *rq, struct task_struct *p)
+probe_wakeup(struct rq *rq, struct task_struct *p, int success)
 {
 	int cpu = smp_processor_id();
 	unsigned long flags;
-- 
cgit v1.2.3


From 91d5d45ee0a8978870fd12e5c3fe394a530ec2ed Mon Sep 17 00:00:00 2001
From: Hendrik Brueckner <brueckner@linux.vnet.ibm.com>
Date: Thu, 25 Dec 2008 13:38:58 +0100
Subject: [S390] iucv: Locking free version of iucv_message_(receive|send)

Provide a locking free version of iucv_message_receive and iucv_message_send
that do not call local_bh_enable in a spin_lock_(bh|irqsave)() context.

Signed-off-by: Hendrik Brueckner <brueckner@linux.vnet.ibm.com>
---
 include/net/iucv/iucv.h |  45 ++++++++++++++
 net/iucv/iucv.c         | 152 ++++++++++++++++++++++++++++++++++++------------
 2 files changed, 161 insertions(+), 36 deletions(-)

(limited to 'include')

diff --git a/include/net/iucv/iucv.h b/include/net/iucv/iucv.h
index fd70adbb3566..5e310c8d8e2f 100644
--- a/include/net/iucv/iucv.h
+++ b/include/net/iucv/iucv.h
@@ -337,11 +337,34 @@ int iucv_message_purge(struct iucv_path *path, struct iucv_message *msg,
  * established paths. This function will deal with RMDATA messages
  * embedded in struct iucv_message as well.
  *
+ * Locking:	local_bh_enable/local_bh_disable
+ *
  * Returns the result from the CP IUCV call.
  */
 int iucv_message_receive(struct iucv_path *path, struct iucv_message *msg,
 			 u8 flags, void *buffer, size_t size, size_t *residual);
 
+/**
+ * __iucv_message_receive
+ * @path: address of iucv path structure
+ * @msg: address of iucv msg structure
+ * @flags: flags that affect how the message is received (IUCV_IPBUFLST)
+ * @buffer: address of data buffer or address of struct iucv_array
+ * @size: length of data buffer
+ * @residual:
+ *
+ * This function receives messages that are being sent to you over
+ * established paths. This function will deal with RMDATA messages
+ * embedded in struct iucv_message as well.
+ *
+ * Locking:	no locking.
+ *
+ * Returns the result from the CP IUCV call.
+ */
+int __iucv_message_receive(struct iucv_path *path, struct iucv_message *msg,
+			   u8 flags, void *buffer, size_t size,
+			   size_t *residual);
+
 /**
  * iucv_message_reject
  * @path: address of iucv path structure
@@ -386,11 +409,33 @@ int iucv_message_reply(struct iucv_path *path, struct iucv_message *msg,
  * transmitted is in a buffer and this is a one-way message and the
  * receiver will not reply to the message.
  *
+ * Locking:	local_bh_enable/local_bh_disable
+ *
  * Returns the result from the CP IUCV call.
  */
 int iucv_message_send(struct iucv_path *path, struct iucv_message *msg,
 		      u8 flags, u32 srccls, void *buffer, size_t size);
 
+/**
+ * __iucv_message_send
+ * @path: address of iucv path structure
+ * @msg: address of iucv msg structure
+ * @flags: how the message is sent (IUCV_IPRMDATA, IUCV_IPPRTY, IUCV_IPBUFLST)
+ * @srccls: source class of message
+ * @buffer: address of data buffer or address of struct iucv_array
+ * @size: length of send buffer
+ *
+ * This function transmits data to another application. Data to be
+ * transmitted is in a buffer and this is a one-way message and the
+ * receiver will not reply to the message.
+ *
+ * Locking:	no locking.
+ *
+ * Returns the result from the CP IUCV call.
+ */
+int __iucv_message_send(struct iucv_path *path, struct iucv_message *msg,
+			u8 flags, u32 srccls, void *buffer, size_t size);
+
 /**
  * iucv_message_send2way
  * @path: address of iucv path structure
diff --git a/net/iucv/iucv.c b/net/iucv/iucv.c
index d7b54b5bfa69..6bf51f7e597f 100644
--- a/net/iucv/iucv.c
+++ b/net/iucv/iucv.c
@@ -957,7 +957,52 @@ int iucv_message_purge(struct iucv_path *path, struct iucv_message *msg,
 EXPORT_SYMBOL(iucv_message_purge);
 
 /**
- * iucv_message_receive
+ * iucv_message_receive_iprmdata
+ * @path: address of iucv path structure
+ * @msg: address of iucv msg structure
+ * @flags: how the message is received (IUCV_IPBUFLST)
+ * @buffer: address of data buffer or address of struct iucv_array
+ * @size: length of data buffer
+ * @residual:
+ *
+ * Internal function used by iucv_message_receive and __iucv_message_receive
+ * to receive RMDATA data stored in struct iucv_message.
+ */
+static int iucv_message_receive_iprmdata(struct iucv_path *path,
+					 struct iucv_message *msg,
+					 u8 flags, void *buffer,
+					 size_t size, size_t *residual)
+{
+	struct iucv_array *array;
+	u8 *rmmsg;
+	size_t copy;
+
+	/*
+	 * Message is 8 bytes long and has been stored to the
+	 * message descriptor itself.
+	 */
+	if (residual)
+		*residual = abs(size - 8);
+	rmmsg = msg->rmmsg;
+	if (flags & IUCV_IPBUFLST) {
+		/* Copy to struct iucv_array. */
+		size = (size < 8) ? size : 8;
+		for (array = buffer; size > 0; array++) {
+			copy = min_t(size_t, size, array->length);
+			memcpy((u8 *)(addr_t) array->address,
+				rmmsg, copy);
+			rmmsg += copy;
+			size -= copy;
+		}
+	} else {
+		/* Copy to direct buffer. */
+		memcpy(buffer, rmmsg, min_t(size_t, size, 8));
+	}
+	return 0;
+}
+
+/**
+ * __iucv_message_receive
  * @path: address of iucv path structure
  * @msg: address of iucv msg structure
  * @flags: how the message is received (IUCV_IPBUFLST)
@@ -969,44 +1014,19 @@ EXPORT_SYMBOL(iucv_message_purge);
  * established paths. This function will deal with RMDATA messages
  * embedded in struct iucv_message as well.
  *
+ * Locking:	no locking
+ *
  * Returns the result from the CP IUCV call.
  */
-int iucv_message_receive(struct iucv_path *path, struct iucv_message *msg,
-			 u8 flags, void *buffer, size_t size, size_t *residual)
+int __iucv_message_receive(struct iucv_path *path, struct iucv_message *msg,
+			   u8 flags, void *buffer, size_t size, size_t *residual)
 {
 	union iucv_param *parm;
-	struct iucv_array *array;
-	u8 *rmmsg;
-	size_t copy;
 	int rc;
 
-	if (msg->flags & IUCV_IPRMDATA) {
-		/*
-		 * Message is 8 bytes long and has been stored to the
-		 * message descriptor itself.
-		 */
-		rc = (size < 8) ? 5 : 0;
-		if (residual)
-			*residual = abs(size - 8);
-		rmmsg = msg->rmmsg;
-		if (flags & IUCV_IPBUFLST) {
-			/* Copy to struct iucv_array. */
-			size = (size < 8) ? size : 8;
-			for (array = buffer; size > 0; array++) {
-				copy = min_t(size_t, size, array->length);
-				memcpy((u8 *)(addr_t) array->address,
-				       rmmsg, copy);
-				rmmsg += copy;
-				size -= copy;
-			}
-		} else {
-			/* Copy to direct buffer. */
-			memcpy(buffer, rmmsg, min_t(size_t, size, 8));
-		}
-		return 0;
-	}
-
-	local_bh_disable();
+	if (msg->flags & IUCV_IPRMDATA)
+		return iucv_message_receive_iprmdata(path, msg, flags,
+						     buffer, size, residual);
 	parm = iucv_param[smp_processor_id()];
 	memset(parm, 0, sizeof(union iucv_param));
 	parm->db.ipbfadr1 = (u32)(addr_t) buffer;
@@ -1022,6 +1042,37 @@ int iucv_message_receive(struct iucv_path *path, struct iucv_message *msg,
 		if (residual)
 			*residual = parm->db.ipbfln1f;
 	}
+	return rc;
+}
+EXPORT_SYMBOL(__iucv_message_receive);
+
+/**
+ * iucv_message_receive
+ * @path: address of iucv path structure
+ * @msg: address of iucv msg structure
+ * @flags: how the message is received (IUCV_IPBUFLST)
+ * @buffer: address of data buffer or address of struct iucv_array
+ * @size: length of data buffer
+ * @residual:
+ *
+ * This function receives messages that are being sent to you over
+ * established paths. This function will deal with RMDATA messages
+ * embedded in struct iucv_message as well.
+ *
+ * Locking:	local_bh_enable/local_bh_disable
+ *
+ * Returns the result from the CP IUCV call.
+ */
+int iucv_message_receive(struct iucv_path *path, struct iucv_message *msg,
+			 u8 flags, void *buffer, size_t size, size_t *residual)
+{
+	int rc;
+
+	if (msg->flags & IUCV_IPRMDATA)
+		return iucv_message_receive_iprmdata(path, msg, flags,
+						     buffer, size, residual);
+	local_bh_disable();
+	rc = __iucv_message_receive(path, msg, flags, buffer, size, residual);
 	local_bh_enable();
 	return rc;
 }
@@ -1101,7 +1152,7 @@ int iucv_message_reply(struct iucv_path *path, struct iucv_message *msg,
 EXPORT_SYMBOL(iucv_message_reply);
 
 /**
- * iucv_message_send
+ * __iucv_message_send
  * @path: address of iucv path structure
  * @msg: address of iucv msg structure
  * @flags: how the message is sent (IUCV_IPRMDATA, IUCV_IPPRTY, IUCV_IPBUFLST)
@@ -1113,15 +1164,16 @@ EXPORT_SYMBOL(iucv_message_reply);
  * transmitted is in a buffer and this is a one-way message and the
  * receiver will not reply to the message.
  *
+ * Locking:	no locking
+ *
  * Returns the result from the CP IUCV call.
  */
-int iucv_message_send(struct iucv_path *path, struct iucv_message *msg,
+int __iucv_message_send(struct iucv_path *path, struct iucv_message *msg,
 		      u8 flags, u32 srccls, void *buffer, size_t size)
 {
 	union iucv_param *parm;
 	int rc;
 
-	local_bh_disable();
 	parm = iucv_param[smp_processor_id()];
 	memset(parm, 0, sizeof(union iucv_param));
 	if (flags & IUCV_IPRMDATA) {
@@ -1144,6 +1196,34 @@ int iucv_message_send(struct iucv_path *path, struct iucv_message *msg,
 	rc = iucv_call_b2f0(IUCV_SEND, parm);
 	if (!rc)
 		msg->id = parm->db.ipmsgid;
+	return rc;
+}
+EXPORT_SYMBOL(__iucv_message_send);
+
+/**
+ * iucv_message_send
+ * @path: address of iucv path structure
+ * @msg: address of iucv msg structure
+ * @flags: how the message is sent (IUCV_IPRMDATA, IUCV_IPPRTY, IUCV_IPBUFLST)
+ * @srccls: source class of message
+ * @buffer: address of send buffer or address of struct iucv_array
+ * @size: length of send buffer
+ *
+ * This function transmits data to another application. Data to be
+ * transmitted is in a buffer and this is a one-way message and the
+ * receiver will not reply to the message.
+ *
+ * Locking:	local_bh_enable/local_bh_disable
+ *
+ * Returns the result from the CP IUCV call.
+ */
+int iucv_message_send(struct iucv_path *path, struct iucv_message *msg,
+		      u8 flags, u32 srccls, void *buffer, size_t size)
+{
+	int rc;
+
+	local_bh_disable();
+	rc = __iucv_message_send(path, msg, flags, srccls, buffer, size);
 	local_bh_enable();
 	return rc;
 }
-- 
cgit v1.2.3


From 32272a26974d2027384fd4010cd1780fca425d94 Mon Sep 17 00:00:00 2001
From: Martin Schwidefsky <schwidefsky@de.ibm.com>
Date: Thu, 25 Dec 2008 13:38:59 +0100
Subject: [S390] __page_to_pfn warnings

For CONFIG_SPARSEMEM_VMEMMAP=y on s390 I get warnings like

init/main.c: In function 'start_kernel':
init/main.c:641: warning: format '%08lx' expects type 'long unsigned int', but argument 2 has type 'int'

The warning can be suppressed with a cast to unsigned long in the
CONFIG_SPARSEMEM_VMEMMAP=y version of __page_to_pfn.

Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
---
 include/asm-generic/memory_model.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/asm-generic/memory_model.h b/include/asm-generic/memory_model.h
index 18546d8eb78e..36fa286adad5 100644
--- a/include/asm-generic/memory_model.h
+++ b/include/asm-generic/memory_model.h
@@ -49,7 +49,7 @@
 
 /* memmap is virtually contigious.  */
 #define __pfn_to_page(pfn)	(vmemmap + (pfn))
-#define __page_to_pfn(page)	((page) - vmemmap)
+#define __page_to_pfn(page)	(unsigned long)((page) - vmemmap)
 
 #elif defined(CONFIG_SPARSEMEM)
 /*
-- 
cgit v1.2.3


From aea3c5c05d2c409e93bfa80dcedc06af7da6c13b Mon Sep 17 00:00:00 2001
From: Wei Yongjun <yjwei@cn.fujitsu.com>
Date: Thu, 25 Dec 2008 16:57:24 -0800
Subject: sctp: Implement socket option SCTP_GET_ASSOC_NUMBER

Implement socket option SCTP_GET_ASSOC_NUMBER of the latest ietf socket
extensions API draft.

  8.2.5.  Get the Current Number of Associations (SCTP_GET_ASSOC_NUMBER)

   This option gets the current number of associations that are attached
   to a one-to-many style socket.  The option value is an uint32_t.

Signed-off-by: Wei Yongjun <yjwei@cn.fujitsu.com>
Signed-off-by: Vlad Yasevich <vladislav.yasevich@hp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/sctp/user.h |  2 ++
 net/sctp/socket.c       | 35 +++++++++++++++++++++++++++++++++++
 2 files changed, 37 insertions(+)

(limited to 'include')

diff --git a/include/net/sctp/user.h b/include/net/sctp/user.h
index f205b10f0ab9..b259fc5798fb 100644
--- a/include/net/sctp/user.h
+++ b/include/net/sctp/user.h
@@ -118,6 +118,8 @@ enum sctp_optname {
 #define SCTP_PEER_AUTH_CHUNKS SCTP_PEER_AUTH_CHUNKS
 	SCTP_LOCAL_AUTH_CHUNKS,		/* Read only */
 #define SCTP_LOCAL_AUTH_CHUNKS SCTP_LOCAL_AUTH_CHUNKS
+	SCTP_GET_ASSOC_NUMBER,		/* Read only */
+#define SCTP_GET_ASSOC_NUMBER SCTP_GET_ASSOC_NUMBER
 
 
 	/* Internal Socket Options. Some of the sctp library functions are 
diff --git a/net/sctp/socket.c b/net/sctp/socket.c
index e432927310c9..9f5fe23773a9 100644
--- a/net/sctp/socket.c
+++ b/net/sctp/socket.c
@@ -5460,6 +5460,38 @@ num:
 	return 0;
 }
 
+/*
+ * 8.2.5.  Get the Current Number of Associations (SCTP_GET_ASSOC_NUMBER)
+ * This option gets the current number of associations that are attached
+ * to a one-to-many style socket.  The option value is an uint32_t.
+ */
+static int sctp_getsockopt_assoc_number(struct sock *sk, int len,
+				    char __user *optval, int __user *optlen)
+{
+	struct sctp_sock *sp = sctp_sk(sk);
+	struct sctp_association *asoc;
+	u32 val = 0;
+
+	if (sctp_style(sk, TCP))
+		return -EOPNOTSUPP;
+
+	if (len < sizeof(u32))
+		return -EINVAL;
+
+	len = sizeof(u32);
+
+	list_for_each_entry(asoc, &(sp->ep->asocs), asocs) {
+		val++;
+	}
+
+	if (put_user(len, optlen))
+		return -EFAULT;
+	if (copy_to_user(optval, &val, len))
+		return -EFAULT;
+
+	return 0;
+}
+
 SCTP_STATIC int sctp_getsockopt(struct sock *sk, int level, int optname,
 				char __user *optval, int __user *optlen)
 {
@@ -5602,6 +5634,9 @@ SCTP_STATIC int sctp_getsockopt(struct sock *sk, int level, int optname,
 		retval = sctp_getsockopt_local_auth_chunks(sk, len, optval,
 							optlen);
 		break;
+	case SCTP_GET_ASSOC_NUMBER:
+		retval = sctp_getsockopt_assoc_number(sk, len, optval, optlen);
+		break;
 	default:
 		retval = -ENOPROTOOPT;
 		break;
-- 
cgit v1.2.3


From 619e803d3c1b7bcc17c45e81f309d0b9b3df2d5d Mon Sep 17 00:00:00 2001
From: Vegard Nossum <vegard.nossum@gmail.com>
Date: Thu, 25 Dec 2008 17:21:17 -0800
Subject: netlink: fix (theoretical) overrun in message iteration

See commit 1045b03e07d85f3545118510a587035536030c1c ("netlink: fix
overrun in attribute iteration") for a detailed explanation of why
this patch is necessary.

In short, nlmsg_next() can make "remaining" go negative, and the
remaining >= sizeof(...) comparison will promote "remaining" to an
unsigned type, which means that the expression will evaluate to
true for negative numbers, even though it was not intended.

I put "theoretical" in the title because I have no evidence that
this can actually happen, but I suspect that a crafted netlink
packet can trigger some badness.

Note that the last test, which seemingly has the exact same
problem (also true for nla_ok()), is perfectly OK, since we
already know that remaining is positive.

Signed-off-by: Vegard Nossum <vegard.nossum@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/netlink.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/net/netlink.h b/include/net/netlink.h
index 46b7764f1774..8a6150a3f4c7 100644
--- a/include/net/netlink.h
+++ b/include/net/netlink.h
@@ -332,7 +332,7 @@ static inline int nlmsg_attrlen(const struct nlmsghdr *nlh, int hdrlen)
  */
 static inline int nlmsg_ok(const struct nlmsghdr *nlh, int remaining)
 {
-	return (remaining >= sizeof(struct nlmsghdr) &&
+	return (remaining >= (int) sizeof(struct nlmsghdr) &&
 		nlh->nlmsg_len >= sizeof(struct nlmsghdr) &&
 		nlh->nlmsg_len <= remaining);
 }
-- 
cgit v1.2.3


From f9af0e70911e9d6cc9a68f784dca86415486084d Mon Sep 17 00:00:00 2001
From: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Date: Fri, 26 Dec 2008 12:24:24 +0900
Subject: irq: for_each_irq_desc() move to irqnr.h

Impact: cleanup

before CONFIG_SPARSE_IRQ age, for_each_irq_desc() sat in irqnr.h and
could be called from generic code.

CONFIG_SPARSE_IRQ breaks this assumption, but SPARSE_IRQ version
for_each_irq_desc() also can move into irqnr.h easily.

Also, this patch unifies CONFIG_SPARSE_IRQ and !CONFIG_SPARSE_IRQ
for_each_irq_desc().

Signed-off-by: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/irq.h   | 24 ++++--------------------
 include/linux/irqnr.h | 19 +++++++++----------
 kernel/irq/handle.c   | 13 +++++++++++--
 3 files changed, 24 insertions(+), 32 deletions(-)

(limited to 'include')

diff --git a/include/linux/irq.h b/include/linux/irq.h
index 98564dc64476..69da275c0ebd 100644
--- a/include/linux/irq.h
+++ b/include/linux/irq.h
@@ -202,33 +202,17 @@ extern void arch_free_chip_data(struct irq_desc *old_desc, struct irq_desc *desc
 
 #ifndef CONFIG_SPARSE_IRQ
 extern struct irq_desc irq_desc[NR_IRQS];
-
-static inline struct irq_desc *irq_to_desc(unsigned int irq)
-{
-	return (irq < NR_IRQS) ? irq_desc + irq : NULL;
-}
-static inline struct irq_desc *irq_to_desc_alloc_cpu(unsigned int irq, int cpu)
-{
-	return irq_to_desc(irq);
-}
-
-#else
-
-extern struct irq_desc *irq_to_desc(unsigned int irq);
-extern struct irq_desc *irq_to_desc_alloc_cpu(unsigned int irq, int cpu);
+#else /* CONFIG_SPARSE_IRQ */
 extern struct irq_desc *move_irq_desc(struct irq_desc *old_desc, int cpu);
 
-# define for_each_irq_desc(irq, desc)		\
-	for (irq = 0, desc = irq_to_desc(irq); irq < nr_irqs; irq++, desc = irq_to_desc(irq))
-# define for_each_irq_desc_reverse(irq, desc)                          \
-	for (irq = nr_irqs - 1, desc = irq_to_desc(irq); irq >= 0; irq--, desc = irq_to_desc(irq))
-
 #define kstat_irqs_this_cpu(DESC) \
 	((DESC)->kstat_irqs[smp_processor_id()])
 #define kstat_incr_irqs_this_cpu(irqno, DESC) \
 	((DESC)->kstat_irqs[smp_processor_id()]++)
 
-#endif
+#endif /* CONFIG_SPARSE_IRQ */
+
+extern struct irq_desc *irq_to_desc_alloc_cpu(unsigned int irq, int cpu);
 
 static inline struct irq_desc *
 irq_remap_to_desc(unsigned int irq, struct irq_desc *desc)
diff --git a/include/linux/irqnr.h b/include/linux/irqnr.h
index 95d2b74641f5..c4a59c7a478b 100644
--- a/include/linux/irqnr.h
+++ b/include/linux/irqnr.h
@@ -15,20 +15,19 @@
 
 # define for_each_irq_desc_reverse(irq, desc)                          \
 	for (irq = nr_irqs - 1; irq >= 0; irq--)
-#else
+#else /* CONFIG_GENERIC_HARDIRQS */
 
 extern int nr_irqs;
+extern struct irq_desc *irq_to_desc(unsigned int irq);
 
-#ifndef CONFIG_SPARSE_IRQ
+# define for_each_irq_desc(irq, desc)					\
+	for (irq = 0, desc = irq_to_desc(irq); irq < nr_irqs;		\
+	     irq++, desc = irq_to_desc(irq))
+# define for_each_irq_desc_reverse(irq, desc)				\
+	for (irq = nr_irqs - 1, desc = irq_to_desc(irq); irq >= 0;	\
+	     irq--, desc = irq_to_desc(irq))
 
-struct irq_desc;
-# define for_each_irq_desc(irq, desc)		\
-	for (irq = 0, desc = irq_desc; irq < nr_irqs; irq++, desc++)
-# define for_each_irq_desc_reverse(irq, desc)                          \
-	for (irq = nr_irqs - 1, desc = irq_desc + (nr_irqs - 1);        \
-	    irq >= 0; irq--, desc--)
-#endif
-#endif
+#endif /* CONFIG_GENERIC_HARDIRQS */
 
 #define for_each_irq_nr(irq)                   \
        for (irq = 0; irq < nr_irqs; irq++)
diff --git a/kernel/irq/handle.c b/kernel/irq/handle.c
index 6492400cb50d..4db7d2df86b6 100644
--- a/kernel/irq/handle.c
+++ b/kernel/irq/handle.c
@@ -203,7 +203,7 @@ out_unlock:
 	return desc;
 }
 
-#else
+#else /* !CONFIG_SPARSE_IRQ */
 
 struct irq_desc irq_desc[NR_IRQS] __cacheline_aligned_in_smp = {
 	[0 ... NR_IRQS-1] = {
@@ -218,7 +218,16 @@ struct irq_desc irq_desc[NR_IRQS] __cacheline_aligned_in_smp = {
 	}
 };
 
-#endif
+struct irq_desc *irq_to_desc(unsigned int irq)
+{
+	return (irq < NR_IRQS) ? irq_desc + irq : NULL;
+}
+
+struct irq_desc *irq_to_desc_alloc_cpu(unsigned int irq, int cpu)
+{
+	return irq_to_desc(irq);
+}
+#endif /* !CONFIG_SPARSE_IRQ */
 
 /*
  * What should we do if we get a hw irq event on an illegal vector?
-- 
cgit v1.2.3


From 18eefedfe8ad33e8fc7614c13359e29a9fab4644 Mon Sep 17 00:00:00 2001
From: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Date: Fri, 26 Dec 2008 12:29:48 +0900
Subject: irq: simplify for_each_irq_desc() usage

Impact: cleanup

all for_each_irq_desc() usage point have !desc check.
then its check can move into for_each_irq_desc() macro.

Signed-off-by: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Acked-by: Yinghai Lu <yinghai@kernel.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/io_apic.c | 10 ----------
 drivers/xen/events.c      |  3 ---
 include/linux/irqnr.h     |  8 ++++++--
 kernel/irq/autoprobe.c    | 15 ---------------
 kernel/irq/handle.c       |  3 ---
 kernel/irq/spurious.c     |  5 -----
 6 files changed, 6 insertions(+), 38 deletions(-)

(limited to 'include')

diff --git a/arch/x86/kernel/io_apic.c b/arch/x86/kernel/io_apic.c
index a74887b416cc..2fe543f58ac8 100644
--- a/arch/x86/kernel/io_apic.c
+++ b/arch/x86/kernel/io_apic.c
@@ -1345,8 +1345,6 @@ void __setup_vector_irq(int cpu)
 
 	/* Mark the inuse vectors */
 	for_each_irq_desc(irq, desc) {
-		if (!desc)
-			continue;
 		cfg = desc->chip_data;
 		if (!cpu_isset(cpu, cfg->domain))
 			continue;
@@ -1730,8 +1728,6 @@ __apicdebuginit(void) print_IO_APIC(void)
 	for_each_irq_desc(irq, desc) {
 		struct irq_pin_list *entry;
 
-		if (!desc)
-			continue;
 		cfg = desc->chip_data;
 		entry = cfg->irq_2_pin;
 		if (!entry)
@@ -2378,9 +2374,6 @@ static void ir_irq_migration(struct work_struct *work)
 	struct irq_desc *desc;
 
 	for_each_irq_desc(irq, desc) {
-		if (!desc)
-			continue;
-
 		if (desc->status & IRQ_MOVE_PENDING) {
 			unsigned long flags;
 
@@ -2671,9 +2664,6 @@ static inline void init_IO_APIC_traps(void)
 	 * 0x80, because int 0x80 is hm, kind of importantish. ;)
 	 */
 	for_each_irq_desc(irq, desc) {
-		if (!desc)
-			continue;
-
 		cfg = desc->chip_data;
 		if (IO_APIC_IRQ(irq) && cfg && !cfg->vector) {
 			/*
diff --git a/drivers/xen/events.c b/drivers/xen/events.c
index 46625cd38743..e26733a9df21 100644
--- a/drivers/xen/events.c
+++ b/drivers/xen/events.c
@@ -142,9 +142,6 @@ static void init_evtchn_cpu_bindings(void)
 
 	/* By default all event channels notify CPU#0. */
 	for_each_irq_desc(i, desc) {
-		if (!desc)
-			continue;
-
 		desc->affinity = cpumask_of_cpu(0);
 	}
 #endif
diff --git a/include/linux/irqnr.h b/include/linux/irqnr.h
index c4a59c7a478b..5504a5c97836 100644
--- a/include/linux/irqnr.h
+++ b/include/linux/irqnr.h
@@ -22,10 +22,14 @@ extern struct irq_desc *irq_to_desc(unsigned int irq);
 
 # define for_each_irq_desc(irq, desc)					\
 	for (irq = 0, desc = irq_to_desc(irq); irq < nr_irqs;		\
-	     irq++, desc = irq_to_desc(irq))
+	     irq++, desc = irq_to_desc(irq))				\
+		if (desc)
+
+
 # define for_each_irq_desc_reverse(irq, desc)				\
 	for (irq = nr_irqs - 1, desc = irq_to_desc(irq); irq >= 0;	\
-	     irq--, desc = irq_to_desc(irq))
+	     irq--, desc = irq_to_desc(irq))				\
+		if (desc)
 
 #endif /* CONFIG_GENERIC_HARDIRQS */
 
diff --git a/kernel/irq/autoprobe.c b/kernel/irq/autoprobe.c
index 650ce4102a63..cc0f7321b8ce 100644
--- a/kernel/irq/autoprobe.c
+++ b/kernel/irq/autoprobe.c
@@ -40,9 +40,6 @@ unsigned long probe_irq_on(void)
 	 * flush such a longstanding irq before considering it as spurious.
 	 */
 	for_each_irq_desc_reverse(i, desc) {
-		if (!desc)
-			continue;
-
 		spin_lock_irq(&desc->lock);
 		if (!desc->action && !(desc->status & IRQ_NOPROBE)) {
 			/*
@@ -71,9 +68,6 @@ unsigned long probe_irq_on(void)
 	 * happened in the previous stage, it may have masked itself)
 	 */
 	for_each_irq_desc_reverse(i, desc) {
-		if (!desc)
-			continue;
-
 		spin_lock_irq(&desc->lock);
 		if (!desc->action && !(desc->status & IRQ_NOPROBE)) {
 			desc->status |= IRQ_AUTODETECT | IRQ_WAITING;
@@ -92,9 +86,6 @@ unsigned long probe_irq_on(void)
 	 * Now filter out any obviously spurious interrupts
 	 */
 	for_each_irq_desc(i, desc) {
-		if (!desc)
-			continue;
-
 		spin_lock_irq(&desc->lock);
 		status = desc->status;
 
@@ -133,9 +124,6 @@ unsigned int probe_irq_mask(unsigned long val)
 	int i;
 
 	for_each_irq_desc(i, desc) {
-		if (!desc)
-			continue;
-
 		spin_lock_irq(&desc->lock);
 		status = desc->status;
 
@@ -178,9 +166,6 @@ int probe_irq_off(unsigned long val)
 	unsigned int status;
 
 	for_each_irq_desc(i, desc) {
-		if (!desc)
-			continue;
-
 		spin_lock_irq(&desc->lock);
 		status = desc->status;
 
diff --git a/kernel/irq/handle.c b/kernel/irq/handle.c
index 03479dfdebb8..7dbdfe524693 100644
--- a/kernel/irq/handle.c
+++ b/kernel/irq/handle.c
@@ -437,9 +437,6 @@ void early_init_irq_lock_class(void)
 	int i;
 
 	for_each_irq_desc(i, desc) {
-		if (!desc)
-			continue;
-
 		lockdep_set_class(&desc->lock, &irq_desc_lock_class);
 	}
 }
diff --git a/kernel/irq/spurious.c b/kernel/irq/spurious.c
index 3738107531fd..dd364c11e56e 100644
--- a/kernel/irq/spurious.c
+++ b/kernel/irq/spurious.c
@@ -91,9 +91,6 @@ static int misrouted_irq(int irq)
 	int i, ok = 0;
 
 	for_each_irq_desc(i, desc) {
-		if (!desc)
-			continue;
-
 		if (!i)
 			 continue;
 
@@ -115,8 +112,6 @@ static void poll_spurious_irqs(unsigned long dummy)
 	for_each_irq_desc(i, desc) {
 		unsigned int status;
 
-		if (!desc)
-			continue;
 		if (!i)
 			 continue;
 
-- 
cgit v1.2.3


From 13a0c3c269b223f60abfac8a9811d77111a8b4ba Mon Sep 17 00:00:00 2001
From: Yinghai Lu <yinghai@kernel.org>
Date: Fri, 26 Dec 2008 02:05:47 -0800
Subject: sparseirq: work around compiler optimizing away __weak functions

Impact: fix panic on null pointer with sparseirq

Some GCC versions seem to inline the weak global function,
when that function is empty.

Work it around, by making the functions return a (dummy) integer.

Signed-off-by: Yinghai <yinghai@kernel.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/io_apic.c | 8 ++++++--
 include/linux/irq.h       | 6 +++---
 init/main.c               | 7 ++++---
 kernel/irq/handle.c       | 7 ++++---
 4 files changed, 17 insertions(+), 11 deletions(-)

(limited to 'include')

diff --git a/arch/x86/kernel/io_apic.c b/arch/x86/kernel/io_apic.c
index 2fe543f58ac8..976039377846 100644
--- a/arch/x86/kernel/io_apic.c
+++ b/arch/x86/kernel/io_apic.c
@@ -170,7 +170,7 @@ static struct irq_cfg irq_cfgx[NR_IRQS] = {
 	[15] = { .domain = CPU_MASK_ALL, .vector = IRQ15_VECTOR, },
 };
 
-void __init arch_early_irq_init(void)
+int __init arch_early_irq_init(void)
 {
 	struct irq_cfg *cfg;
 	struct irq_desc *desc;
@@ -184,6 +184,8 @@ void __init arch_early_irq_init(void)
 		desc = irq_to_desc(i);
 		desc->chip_data = &cfg[i];
 	}
+
+	return 0;
 }
 
 #ifdef CONFIG_SPARSE_IRQ
@@ -212,7 +214,7 @@ static struct irq_cfg *get_one_free_irq_cfg(int cpu)
 	return cfg;
 }
 
-void arch_init_chip_data(struct irq_desc *desc, int cpu)
+int arch_init_chip_data(struct irq_desc *desc, int cpu)
 {
 	struct irq_cfg *cfg;
 
@@ -224,6 +226,8 @@ void arch_init_chip_data(struct irq_desc *desc, int cpu)
 			BUG_ON(1);
 		}
 	}
+
+	return 0;
 }
 
 #ifdef CONFIG_NUMA_MIGRATE_IRQ_DESC
diff --git a/include/linux/irq.h b/include/linux/irq.h
index 69da275c0ebd..0e40af4bac40 100644
--- a/include/linux/irq.h
+++ b/include/linux/irq.h
@@ -193,9 +193,9 @@ struct irq_desc {
 	const char		*name;
 } ____cacheline_internodealigned_in_smp;
 
-extern void early_irq_init(void);
-extern void arch_early_irq_init(void);
-extern void arch_init_chip_data(struct irq_desc *desc, int cpu);
+extern int early_irq_init(void);
+extern int arch_early_irq_init(void);
+extern int arch_init_chip_data(struct irq_desc *desc, int cpu);
 extern void arch_init_copy_chip_data(struct irq_desc *old_desc,
 					struct irq_desc *desc, int cpu);
 extern void arch_free_chip_data(struct irq_desc *old_desc, struct irq_desc *desc);
diff --git a/init/main.c b/init/main.c
index c1f999a3cf31..c314aa15370e 100644
--- a/init/main.c
+++ b/init/main.c
@@ -539,13 +539,14 @@ void __init __weak thread_info_cache_init(void)
 {
 }
 
-void __init __weak arch_early_irq_init(void)
+int __init __weak arch_early_irq_init(void)
 {
+	return 0;
 }
 
-void __init __weak early_irq_init(void)
+int __init __weak early_irq_init(void)
 {
-	arch_early_irq_init();
+	return arch_early_irq_init();
 }
 
 asmlinkage void __init start_kernel(void)
diff --git a/kernel/irq/handle.c b/kernel/irq/handle.c
index 893da67b7781..0bef3ecb7a0e 100644
--- a/kernel/irq/handle.c
+++ b/kernel/irq/handle.c
@@ -86,8 +86,9 @@ void init_kstat_irqs(struct irq_desc *desc, int cpu, int nr)
 		desc->kstat_irqs = (unsigned int *)ptr;
 }
 
-void __attribute__((weak)) arch_init_chip_data(struct irq_desc *desc, int cpu)
+int __weak arch_init_chip_data(struct irq_desc *desc, int cpu)
 {
+	return 0;
 }
 
 static void init_one_irq_desc(int irq, struct irq_desc *desc, int cpu)
@@ -132,7 +133,7 @@ static struct irq_desc irq_desc_legacy[NR_IRQS_LEGACY] __cacheline_aligned_in_sm
 /* FIXME: use bootmem alloc ...*/
 static unsigned int kstat_irqs_legacy[NR_IRQS_LEGACY][NR_CPUS];
 
-void __init early_irq_init(void)
+int __init early_irq_init(void)
 {
 	struct irq_desc *desc;
 	int legacy_count;
@@ -151,7 +152,7 @@ void __init early_irq_init(void)
 	for (i = legacy_count; i < NR_IRQS; i++)
 		irq_desc_ptrs[i] = NULL;
 
-	arch_early_irq_init();
+	return arch_early_irq_init();
 }
 
 struct irq_desc *irq_to_desc(unsigned int irq)
-- 
cgit v1.2.3


From 429dbf53bca49b110f1058f0d9417a59115c41b8 Mon Sep 17 00:00:00 2001
From: Finn Thain <fthain@telegraphics.com.au>
Date: Tue, 18 Nov 2008 20:45:20 +0100
Subject: m68k: machw.h cleanup

Remove some more cruft from machw.h and drop the #include where it isn't
needed.

Signed-off-by: Finn Thain <fthain@telegraphics.com.au>
Signed-off-by: Geert Uytterhoeven <geert@linux-m68k.org>
---
 arch/m68k/mac/debug.c           |  1 -
 arch/m68k/mac/macints.c         |  1 -
 arch/m68k/mac/oss.c             |  1 -
 arch/m68k/mac/via.c             |  1 -
 drivers/ide/macide.c            |  1 -
 drivers/macintosh/via-cuda.c    |  1 -
 drivers/macintosh/via-macii.c   |  1 -
 drivers/macintosh/via-maciisi.c |  1 -
 drivers/macintosh/via-pmu68k.c  |  1 -
 drivers/scsi/mac_scsi.c         |  1 -
 drivers/video/macfb.c           |  1 -
 include/asm-m68k/machw.h        | 22 ----------------------
 12 files changed, 33 deletions(-)

(limited to 'include')

diff --git a/arch/m68k/mac/debug.c b/arch/m68k/mac/debug.c
index 2165740786a5..65dd77a742a3 100644
--- a/arch/m68k/mac/debug.c
+++ b/arch/m68k/mac/debug.c
@@ -24,7 +24,6 @@
 #define BOOTINFO_COMPAT_1_0
 #include <asm/setup.h>
 #include <asm/bootinfo.h>
-#include <asm/machw.h>
 #include <asm/macints.h>
 
 extern unsigned long mac_videobase;
diff --git a/arch/m68k/mac/macints.c b/arch/m68k/mac/macints.c
index ecddac4a02b9..e17b8d56ce04 100644
--- a/arch/m68k/mac/macints.c
+++ b/arch/m68k/mac/macints.c
@@ -127,7 +127,6 @@
 #include <asm/irq.h>
 #include <asm/traps.h>
 #include <asm/bootinfo.h>
-#include <asm/machw.h>
 #include <asm/macintosh.h>
 #include <asm/mac_via.h>
 #include <asm/mac_psc.h>
diff --git a/arch/m68k/mac/oss.c b/arch/m68k/mac/oss.c
index 43d83e054b8e..8426501119ca 100644
--- a/arch/m68k/mac/oss.c
+++ b/arch/m68k/mac/oss.c
@@ -21,7 +21,6 @@
 #include <linux/init.h>
 
 #include <asm/bootinfo.h>
-#include <asm/machw.h>
 #include <asm/macintosh.h>
 #include <asm/macints.h>
 #include <asm/mac_via.h>
diff --git a/arch/m68k/mac/via.c b/arch/m68k/mac/via.c
index 125404788e8d..f01d418e64fe 100644
--- a/arch/m68k/mac/via.c
+++ b/arch/m68k/mac/via.c
@@ -32,7 +32,6 @@
 #include <asm/bootinfo.h>
 #include <asm/macintosh.h>
 #include <asm/macints.h>
-#include <asm/machw.h>
 #include <asm/mac_via.h>
 #include <asm/mac_psc.h>
 
diff --git a/drivers/ide/macide.c b/drivers/ide/macide.c
index 43f97cc1d30e..3c60064f1d4f 100644
--- a/drivers/ide/macide.c
+++ b/drivers/ide/macide.c
@@ -18,7 +18,6 @@
 #include <linux/delay.h>
 #include <linux/ide.h>
 
-#include <asm/machw.h>
 #include <asm/macintosh.h>
 #include <asm/macints.h>
 #include <asm/mac_baboon.h>
diff --git a/drivers/macintosh/via-cuda.c b/drivers/macintosh/via-cuda.c
index 741a93a3eb61..62dd1fdafecf 100644
--- a/drivers/macintosh/via-cuda.c
+++ b/drivers/macintosh/via-cuda.c
@@ -23,7 +23,6 @@
 #else
 #include <asm/macintosh.h>
 #include <asm/macints.h>
-#include <asm/machw.h>
 #include <asm/mac_via.h>
 #endif
 #include <asm/io.h>
diff --git a/drivers/macintosh/via-macii.c b/drivers/macintosh/via-macii.c
index 6e6dd17ab572..817f37a875c9 100644
--- a/drivers/macintosh/via-macii.c
+++ b/drivers/macintosh/via-macii.c
@@ -33,7 +33,6 @@
 #include <linux/init.h>
 #include <asm/macintosh.h>
 #include <asm/macints.h>
-#include <asm/machw.h>
 #include <asm/mac_via.h>
 #include <asm/system.h>
 
diff --git a/drivers/macintosh/via-maciisi.c b/drivers/macintosh/via-maciisi.c
index 2dc788042707..4d686c0bdea0 100644
--- a/drivers/macintosh/via-maciisi.c
+++ b/drivers/macintosh/via-maciisi.c
@@ -24,7 +24,6 @@
 #include <linux/interrupt.h>
 #include <asm/macintosh.h>
 #include <asm/macints.h>
-#include <asm/machw.h>
 #include <asm/mac_via.h>
 
 static volatile unsigned char *via;
diff --git a/drivers/macintosh/via-pmu68k.c b/drivers/macintosh/via-pmu68k.c
index b64741c95ac4..fb9fa614a0e8 100644
--- a/drivers/macintosh/via-pmu68k.c
+++ b/drivers/macintosh/via-pmu68k.c
@@ -35,7 +35,6 @@
 
 #include <asm/macintosh.h>
 #include <asm/macints.h>
-#include <asm/machw.h>
 #include <asm/mac_via.h>
 
 #include <asm/pgtable.h>
diff --git a/drivers/scsi/mac_scsi.c b/drivers/scsi/mac_scsi.c
index 0248919bc2df..bf2a1c516293 100644
--- a/drivers/scsi/mac_scsi.c
+++ b/drivers/scsi/mac_scsi.c
@@ -47,7 +47,6 @@
 
 #include <asm/macintosh.h>
 #include <asm/macints.h>
-#include <asm/machw.h>
 #include <asm/mac_via.h>
 
 #include "scsi.h"
diff --git a/drivers/video/macfb.c b/drivers/video/macfb.c
index ee380d5f3410..3a437f937779 100644
--- a/drivers/video/macfb.c
+++ b/drivers/video/macfb.c
@@ -36,7 +36,6 @@
 #include <asm/irq.h>
 #include <asm/macintosh.h>
 #include <asm/io.h>
-#include <asm/machw.h>
 
 /* Common DAC base address for the LC, RBV, Valkyrie, and IIvx */
 #define DAC_BASE 0x50f24000
diff --git a/include/asm-m68k/machw.h b/include/asm-m68k/machw.h
index 35624998291c..2b4de0c2ce4a 100644
--- a/include/asm-m68k/machw.h
+++ b/include/asm-m68k/machw.h
@@ -26,28 +26,6 @@
 #include <linux/types.h>
 
 #if 0
-/* Mac SCSI Controller 5380 */
-
-#define	MAC_5380_BAS	(0x50F10000) /* This is definitely wrong!! */
-struct MAC_5380 {
-	u_char	scsi_data;
-	u_char	char_dummy1;
-	u_char	scsi_icr;
-	u_char	char_dummy2;
-	u_char	scsi_mode;
-	u_char	char_dummy3;
-	u_char	scsi_tcr;
-	u_char	char_dummy4;
-	u_char	scsi_idstat;
-	u_char	char_dummy5;
-	u_char	scsi_dmastat;
-	u_char	char_dummy6;
-	u_char	scsi_targrcv;
-	u_char	char_dummy7;
-	u_char	scsi_inircv;
-};
-#define	mac_scsi       ((*(volatile struct MAC_5380 *)MAC_5380_BAS))
-
 /*
 ** SCC Z8530
 */
-- 
cgit v1.2.3


From aa6eeeef78263e9891185c6cfaaf64808460a54a Mon Sep 17 00:00:00 2001
From: Harvey Harrison <harvey.harrison@gmail.com>
Date: Tue, 18 Nov 2008 20:45:23 +0100
Subject: m68k: use the new byteorder headers

Signed-off-by: Harvey Harrison <harvey.harrison@gmail.com>
Signed-off-by: Geert Uytterhoeven <geert@linux-m68k.org>
---
 include/asm-m68k/byteorder.h | 16 +++++-----------
 1 file changed, 5 insertions(+), 11 deletions(-)

(limited to 'include')

diff --git a/include/asm-m68k/byteorder.h b/include/asm-m68k/byteorder.h
index 81d420b35c80..b354acdafec8 100644
--- a/include/asm-m68k/byteorder.h
+++ b/include/asm-m68k/byteorder.h
@@ -4,22 +4,16 @@
 #include <asm/types.h>
 #include <linux/compiler.h>
 
-#ifdef __GNUC__
+#define __BIG_ENDIAN
+#define __SWAB_64_THRU_32__
 
-static __inline__ __attribute_const__ __u32 ___arch__swab32(__u32 val)
+static inline __attribute_const__ __u32 __arch_swab32(__u32 val)
 {
 	__asm__("rolw #8,%0; swap %0; rolw #8,%0" : "=d" (val) : "0" (val));
 	return val;
 }
-#define __arch__swab32(x) ___arch__swab32(x)
+#define __arch_swab32 __arch_swab32
 
-#endif
-
-#if defined(__GNUC__) && !defined(__STRICT_ANSI__) || defined(__KERNEL__)
-#  define __BYTEORDER_HAS_U64__
-#  define __SWAB_64_THRU_32__
-#endif
-
-#include <linux/byteorder/big_endian.h>
+#include <linux/byteorder.h>
 
 #endif /* _M68K_BYTEORDER_H */
-- 
cgit v1.2.3


From 1eca4365be25c540650693e941bc06a66cf38f94 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Mon, 3 Nov 2008 20:03:17 +0900
Subject: libata: beef up iterators

There currently are the following looping constructs.

* __ata_port_for_each_link() for all available links
* ata_port_for_each_link() for edge links
* ata_link_for_each_dev() for all devices
* ata_link_for_each_dev_reverse() for all devices in reverse order

Now there's a need for looping construct which is similar to
__ata_port_for_each_link() but iterates over PMP links before the host
link.  Instead of adding another one with long name, do the following
cleanup.

* Implement and export ata_link_next() and ata_dev_next() which take
  @mode parameter and can be used to build custom loop.
* Implement ata_for_each_link() and ata_for_each_dev() which take
  looping mode explicitly.

The following iteration modes are implemented.

* ATA_LITER_EDGE		: loop over edge links
* ATA_LITER_HOST_FIRST		: loop over all links, host link first
* ATA_LITER_PMP_FIRST		: loop over all links, PMP links first

* ATA_DITER_ENABLED		: loop over enabled devices
* ATA_DITER_ENABLED_REVERSE	: loop over enabled devices in reverse order
* ATA_DITER_ALL			: loop over all devices
* ATA_DITER_ALL_REVERSE		: loop over all devices in reverse order

This change removes exlicit device enabledness checks from many loops
and makes it clear which ones are iterated over in which direction.

Signed-off-by: Tejun Heo <tj@kernel.org>
Signed-off-by: Jeff Garzik <jgarzik@redhat.com>
---
 drivers/ata/ahci.c           |   8 +-
 drivers/ata/ata_generic.c    |   5 +-
 drivers/ata/libata-acpi.c    |  19 ++---
 drivers/ata/libata-core.c    | 183 ++++++++++++++++++++++++++++---------------
 drivers/ata/libata-eh.c      |  84 +++++++++-----------
 drivers/ata/libata-pmp.c     |  22 +++---
 drivers/ata/libata-scsi.c    |  22 +++---
 drivers/ata/pata_it821x.c    |  34 ++++----
 drivers/ata/pata_ixp4xx_cf.c |  14 ++--
 drivers/ata/pata_legacy.c    |  17 ++--
 drivers/ata/pata_pdc2027x.c  |  31 ++++----
 drivers/ata/pata_platform.c  |  14 ++--
 drivers/ata/pata_rz1000.c    |  16 ++--
 drivers/ata/sata_sil.c       |   2 +-
 include/linux/libata.h       |  76 +++++++++++++-----
 15 files changed, 307 insertions(+), 240 deletions(-)

(limited to 'include')

diff --git a/drivers/ata/ahci.c b/drivers/ata/ahci.c
index a67b8e7c712d..656448c7fef9 100644
--- a/drivers/ata/ahci.c
+++ b/drivers/ata/ahci.c
@@ -1119,14 +1119,14 @@ static void ahci_start_port(struct ata_port *ap)
 
 	/* turn on LEDs */
 	if (ap->flags & ATA_FLAG_EM) {
-		ata_port_for_each_link(link, ap) {
+		ata_for_each_link(link, ap, EDGE) {
 			emp = &pp->em_priv[link->pmp];
 			ahci_transmit_led_message(ap, emp->led_state, 4);
 		}
 	}
 
 	if (ap->flags & ATA_FLAG_SW_ACTIVITY)
-		ata_port_for_each_link(link, ap)
+		ata_for_each_link(link, ap, EDGE)
 			ahci_init_sw_activity(link);
 
 }
@@ -1361,7 +1361,7 @@ static ssize_t ahci_led_show(struct ata_port *ap, char *buf)
 	struct ahci_em_priv *emp;
 	int rc = 0;
 
-	ata_port_for_each_link(link, ap) {
+	ata_for_each_link(link, ap, EDGE) {
 		emp = &pp->em_priv[link->pmp];
 		rc += sprintf(buf, "%lx\n", emp->led_state);
 	}
@@ -1941,7 +1941,7 @@ static void ahci_error_intr(struct ata_port *ap, u32 irq_stat)
 	u32 serror;
 
 	/* determine active link */
-	ata_port_for_each_link(link, ap)
+	ata_for_each_link(link, ap, EDGE)
 		if (ata_link_active(link))
 			break;
 	if (!link)
diff --git a/drivers/ata/ata_generic.c b/drivers/ata/ata_generic.c
index 5c33767e66de..dc48a6398abe 100644
--- a/drivers/ata/ata_generic.c
+++ b/drivers/ata/ata_generic.c
@@ -57,10 +57,7 @@ static int generic_set_mode(struct ata_link *link, struct ata_device **unused)
 	if (pdev->vendor == PCI_VENDOR_ID_CENATEK)
 		dma_enabled = 0xFF;
 
-	ata_link_for_each_dev(dev, link) {
-		if (!ata_dev_enabled(dev))
-			continue;
-
+	ata_for_each_dev(dev, link, ENABLED) {
 		/* We don't really care */
 		dev->pio_mode = XFER_PIO_0;
 		dev->dma_mode = XFER_MW_DMA_0;
diff --git a/drivers/ata/libata-acpi.c b/drivers/ata/libata-acpi.c
index c012307d0ba6..ef02e488d468 100644
--- a/drivers/ata/libata-acpi.c
+++ b/drivers/ata/libata-acpi.c
@@ -89,7 +89,7 @@ void ata_acpi_associate_sata_port(struct ata_port *ap)
 
 		ap->link.device->acpi_handle = NULL;
 
-		ata_port_for_each_link(link, ap) {
+		ata_for_each_link(link, ap, EDGE) {
 			acpi_integer adr = SATA_ADR(ap->port_no, link->pmp);
 
 			link->device->acpi_handle =
@@ -129,8 +129,8 @@ static void ata_acpi_detach_device(struct ata_port *ap, struct ata_device *dev)
 		struct ata_link *tlink;
 		struct ata_device *tdev;
 
-		ata_port_for_each_link(tlink, ap)
-			ata_link_for_each_dev(tdev, tlink)
+		ata_for_each_link(tlink, ap, EDGE)
+			ata_for_each_dev(tdev, tlink, ALL)
 				tdev->flags |= ATA_DFLAG_DETACH;
 	}
 
@@ -588,12 +588,9 @@ int ata_acpi_cbl_80wire(struct ata_port *ap, const struct ata_acpi_gtm *gtm)
 {
 	struct ata_device *dev;
 
-	ata_link_for_each_dev(dev, &ap->link) {
+	ata_for_each_dev(dev, &ap->link, ENABLED) {
 		unsigned long xfer_mask, udma_mask;
 
-		if (!ata_dev_enabled(dev))
-			continue;
-
 		xfer_mask = ata_acpi_gtm_xfermask(dev, gtm);
 		ata_unpack_xfermask(xfer_mask, NULL, NULL, &udma_mask);
 
@@ -893,7 +890,7 @@ void ata_acpi_on_resume(struct ata_port *ap)
 		 * use values set by _STM.  Cache _GTF result and
 		 * schedule _GTF.
 		 */
-		ata_link_for_each_dev(dev, &ap->link) {
+		ata_for_each_dev(dev, &ap->link, ALL) {
 			ata_acpi_clear_gtf(dev);
 			if (ata_dev_enabled(dev) &&
 			    ata_dev_get_GTF(dev, NULL) >= 0)
@@ -904,7 +901,7 @@ void ata_acpi_on_resume(struct ata_port *ap)
 		 * there's no reason to evaluate IDE _GTF early
 		 * without _STM.  Clear cache and schedule _GTF.
 		 */
-		ata_link_for_each_dev(dev, &ap->link) {
+		ata_for_each_dev(dev, &ap->link, ALL) {
 			ata_acpi_clear_gtf(dev);
 			if (ata_dev_enabled(dev))
 				dev->flags |= ATA_DFLAG_ACPI_PENDING;
@@ -932,8 +929,8 @@ void ata_acpi_set_state(struct ata_port *ap, pm_message_t state)
 	if (state.event == PM_EVENT_ON)
 		acpi_bus_set_power(ap->acpi_handle, ACPI_STATE_D0);
 
-	ata_link_for_each_dev(dev, &ap->link) {
-		if (dev->acpi_handle && ata_dev_enabled(dev))
+	ata_for_each_dev(dev, &ap->link, ENABLED) {
+		if (dev->acpi_handle)
 			acpi_bus_set_power(dev->acpi_handle,
 				state.event == PM_EVENT_ON ?
 					ACPI_STATE_D0 : ACPI_STATE_D3);
diff --git a/drivers/ata/libata-core.c b/drivers/ata/libata-core.c
index bc6695e3c848..ffd98e4e65b4 100644
--- a/drivers/ata/libata-core.c
+++ b/drivers/ata/libata-core.c
@@ -163,42 +163,118 @@ MODULE_LICENSE("GPL");
 MODULE_VERSION(DRV_VERSION);
 
 
-/*
- * Iterator helpers.  Don't use directly.
+/**
+ *	ata_link_next - link iteration helper
+ *	@link: the previous link, NULL to start
+ *	@ap: ATA port containing links to iterate
+ *	@mode: iteration mode, one of ATA_LITER_*
+ *
+ *	LOCKING:
+ *	Host lock or EH context.
  *
- * LOCKING:
- * Host lock or EH context.
+ *	RETURNS:
+ *	Pointer to the next link.
  */
-struct ata_link *__ata_port_next_link(struct ata_port *ap,
-				      struct ata_link *link, bool dev_only)
+struct ata_link *ata_link_next(struct ata_link *link, struct ata_port *ap,
+			       enum ata_link_iter_mode mode)
 {
+	BUG_ON(mode != ATA_LITER_EDGE &&
+	       mode != ATA_LITER_PMP_FIRST && mode != ATA_LITER_HOST_FIRST);
+
 	/* NULL link indicates start of iteration */
-	if (!link) {
-		if (dev_only && sata_pmp_attached(ap))
-			return ap->pmp_link;
-		return &ap->link;
-	}
+	if (!link)
+		switch (mode) {
+		case ATA_LITER_EDGE:
+		case ATA_LITER_PMP_FIRST:
+			if (sata_pmp_attached(ap))
+				return ap->pmp_link;
+			/* fall through */
+		case ATA_LITER_HOST_FIRST:
+			return &ap->link;
+		}
 
-	/* we just iterated over the host master link, what's next? */
-	if (link == &ap->link) {
-		if (!sata_pmp_attached(ap)) {
-			if (unlikely(ap->slave_link) && !dev_only)
+	/* we just iterated over the host link, what's next? */
+	if (link == &ap->link)
+		switch (mode) {
+		case ATA_LITER_HOST_FIRST:
+			if (sata_pmp_attached(ap))
+				return ap->pmp_link;
+			/* fall through */
+		case ATA_LITER_PMP_FIRST:
+			if (unlikely(ap->slave_link))
 				return ap->slave_link;
+			/* fall through */
+		case ATA_LITER_EDGE:
 			return NULL;
 		}
-		return ap->pmp_link;
-	}
 
 	/* slave_link excludes PMP */
 	if (unlikely(link == ap->slave_link))
 		return NULL;
 
-	/* iterate to the next PMP link */
+	/* we were over a PMP link */
 	if (++link < ap->pmp_link + ap->nr_pmp_links)
 		return link;
+
+	if (mode == ATA_LITER_PMP_FIRST)
+		return &ap->link;
+
 	return NULL;
 }
 
+/**
+ *	ata_dev_next - device iteration helper
+ *	@dev: the previous device, NULL to start
+ *	@link: ATA link containing devices to iterate
+ *	@mode: iteration mode, one of ATA_DITER_*
+ *
+ *	LOCKING:
+ *	Host lock or EH context.
+ *
+ *	RETURNS:
+ *	Pointer to the next device.
+ */
+struct ata_device *ata_dev_next(struct ata_device *dev, struct ata_link *link,
+				enum ata_dev_iter_mode mode)
+{
+	BUG_ON(mode != ATA_DITER_ENABLED && mode != ATA_DITER_ENABLED_REVERSE &&
+	       mode != ATA_DITER_ALL && mode != ATA_DITER_ALL_REVERSE);
+
+	/* NULL dev indicates start of iteration */
+	if (!dev)
+		switch (mode) {
+		case ATA_DITER_ENABLED:
+		case ATA_DITER_ALL:
+			dev = link->device;
+			goto check;
+		case ATA_DITER_ENABLED_REVERSE:
+		case ATA_DITER_ALL_REVERSE:
+			dev = link->device + ata_link_max_devices(link) - 1;
+			goto check;
+		}
+
+ next:
+	/* move to the next one */
+	switch (mode) {
+	case ATA_DITER_ENABLED:
+	case ATA_DITER_ALL:
+		if (++dev < link->device + ata_link_max_devices(link))
+			goto check;
+		return NULL;
+	case ATA_DITER_ENABLED_REVERSE:
+	case ATA_DITER_ALL_REVERSE:
+		if (--dev >= link->device)
+			goto check;
+		return NULL;
+	}
+
+ check:
+	if ((mode == ATA_DITER_ENABLED || mode == ATA_DITER_ENABLED_REVERSE) &&
+	    !ata_dev_enabled(dev))
+		goto next;
+	return dev;
+}
+
 /**
  *	ata_dev_phys_link - find physical link for a device
  *	@dev: ATA device to look up physical link for
@@ -1107,8 +1183,8 @@ static void ata_lpm_enable(struct ata_host *host)
 
 	for (i = 0; i < host->n_ports; i++) {
 		ap = host->ports[i];
-		ata_port_for_each_link(link, ap) {
-			ata_link_for_each_dev(dev, link)
+		ata_for_each_link(link, ap, EDGE) {
+			ata_for_each_dev(dev, link, ALL)
 				ata_dev_disable_pm(dev);
 		}
 	}
@@ -2594,11 +2670,11 @@ int ata_bus_probe(struct ata_port *ap)
 
 	ata_port_probe(ap);
 
-	ata_link_for_each_dev(dev, &ap->link)
+	ata_for_each_dev(dev, &ap->link, ALL)
 		tries[dev->devno] = ATA_PROBE_MAX_TRIES;
 
  retry:
-	ata_link_for_each_dev(dev, &ap->link) {
+	ata_for_each_dev(dev, &ap->link, ALL) {
 		/* If we issue an SRST then an ATA drive (not ATAPI)
 		 * may change configuration and be in PIO0 timing. If
 		 * we do a hard reset (or are coming from power on)
@@ -2620,7 +2696,7 @@ int ata_bus_probe(struct ata_port *ap)
 	/* reset and determine device classes */
 	ap->ops->phy_reset(ap);
 
-	ata_link_for_each_dev(dev, &ap->link) {
+	ata_for_each_dev(dev, &ap->link, ALL) {
 		if (!(ap->flags & ATA_FLAG_DISABLED) &&
 		    dev->class != ATA_DEV_UNKNOWN)
 			classes[dev->devno] = dev->class;
@@ -2636,7 +2712,7 @@ int ata_bus_probe(struct ata_port *ap)
 	   specific sequence bass-ackwards so that PDIAG- is released by
 	   the slave device */
 
-	ata_link_for_each_dev_reverse(dev, &ap->link) {
+	ata_for_each_dev(dev, &ap->link, ALL_REVERSE) {
 		if (tries[dev->devno])
 			dev->class = classes[dev->devno];
 
@@ -2653,24 +2729,19 @@ int ata_bus_probe(struct ata_port *ap)
 	if (ap->ops->cable_detect)
 		ap->cbl = ap->ops->cable_detect(ap);
 
-	/* We may have SATA bridge glue hiding here irrespective of the
-	   reported cable types and sensed types */
-	ata_link_for_each_dev(dev, &ap->link) {
-		if (!ata_dev_enabled(dev))
-			continue;
-		/* SATA drives indicate we have a bridge. We don't know which
-		   end of the link the bridge is which is a problem */
+	/* We may have SATA bridge glue hiding here irrespective of
+	 * the reported cable types and sensed types.  When SATA
+	 * drives indicate we have a bridge, we don't know which end
+	 * of the link the bridge is which is a problem.
+	 */
+	ata_for_each_dev(dev, &ap->link, ENABLED)
 		if (ata_id_is_sata(dev->id))
 			ap->cbl = ATA_CBL_SATA;
-	}
 
 	/* After the identify sequence we can now set up the devices. We do
 	   this in the normal order so that the user doesn't get confused */
 
-	ata_link_for_each_dev(dev, &ap->link) {
-		if (!ata_dev_enabled(dev))
-			continue;
-
+	ata_for_each_dev(dev, &ap->link, ENABLED) {
 		ap->link.eh_context.i.flags |= ATA_EHI_PRINTINFO;
 		rc = ata_dev_configure(dev);
 		ap->link.eh_context.i.flags &= ~ATA_EHI_PRINTINFO;
@@ -2683,9 +2754,8 @@ int ata_bus_probe(struct ata_port *ap)
 	if (rc)
 		goto fail;
 
-	ata_link_for_each_dev(dev, &ap->link)
-		if (ata_dev_enabled(dev))
-			return 0;
+	ata_for_each_dev(dev, &ap->link, ENABLED)
+		return 0;
 
 	/* no device present, disable port */
 	ata_port_disable(ap);
@@ -3331,13 +3401,10 @@ int ata_do_set_mode(struct ata_link *link, struct ata_device **r_failed_dev)
 	int rc = 0, used_dma = 0, found = 0;
 
 	/* step 1: calculate xfer_mask */
-	ata_link_for_each_dev(dev, link) {
+	ata_for_each_dev(dev, link, ENABLED) {
 		unsigned long pio_mask, dma_mask;
 		unsigned int mode_mask;
 
-		if (!ata_dev_enabled(dev))
-			continue;
-
 		mode_mask = ATA_DMA_MASK_ATA;
 		if (dev->class == ATA_DEV_ATAPI)
 			mode_mask = ATA_DMA_MASK_ATAPI;
@@ -3366,10 +3433,7 @@ int ata_do_set_mode(struct ata_link *link, struct ata_device **r_failed_dev)
 		goto out;
 
 	/* step 2: always set host PIO timings */
-	ata_link_for_each_dev(dev, link) {
-		if (!ata_dev_enabled(dev))
-			continue;
-
+	ata_for_each_dev(dev, link, ENABLED) {
 		if (dev->pio_mode == 0xff) {
 			ata_dev_printk(dev, KERN_WARNING, "no PIO support\n");
 			rc = -EINVAL;
@@ -3383,8 +3447,8 @@ int ata_do_set_mode(struct ata_link *link, struct ata_device **r_failed_dev)
 	}
 
 	/* step 3: set host DMA timings */
-	ata_link_for_each_dev(dev, link) {
-		if (!ata_dev_enabled(dev) || !ata_dma_enabled(dev))
+	ata_for_each_dev(dev, link, ENABLED) {
+		if (!ata_dma_enabled(dev))
 			continue;
 
 		dev->xfer_mode = dev->dma_mode;
@@ -3394,11 +3458,7 @@ int ata_do_set_mode(struct ata_link *link, struct ata_device **r_failed_dev)
 	}
 
 	/* step 4: update devices' xfer mode */
-	ata_link_for_each_dev(dev, link) {
-		/* don't update suspended devices' xfer mode */
-		if (!ata_dev_enabled(dev))
-			continue;
-
+	ata_for_each_dev(dev, link, ENABLED) {
 		rc = ata_dev_set_mode(dev);
 		if (rc)
 			goto out;
@@ -4263,9 +4323,9 @@ static int cable_is_40wire(struct ata_port *ap)
 	 * - if you have a non detect capable drive you don't want it
 	 *   to colour the choice
 	 */
-	ata_port_for_each_link(link, ap) {
-		ata_link_for_each_dev(dev, link) {
-			if (ata_dev_enabled(dev) && !ata_is_40wire(dev))
+	ata_for_each_link(link, ap, EDGE) {
+		ata_for_each_dev(dev, link, ENABLED) {
+			if (!ata_is_40wire(dev))
 				return 0;
 		}
 	}
@@ -5218,7 +5278,7 @@ static int ata_host_request_pm(struct ata_host *host, pm_message_t mesg,
 		}
 
 		ap->pflags |= ATA_PFLAG_PM_PENDING;
-		__ata_port_for_each_link(link, ap) {
+		ata_for_each_link(link, ap, HOST_FIRST) {
 			link->eh_info.action |= action;
 			link->eh_info.flags |= ehi_flags;
 		}
@@ -6063,9 +6123,9 @@ static void ata_port_detach(struct ata_port *ap)
 	/* EH is now guaranteed to see UNLOADING - EH context belongs
 	 * to us.  Restore SControl and disable all existing devices.
 	 */
-	__ata_port_for_each_link(link, ap) {
+	ata_for_each_link(link, ap, HOST_FIRST) {
 		sata_scr_write(link, SCR_CONTROL, link->saved_scontrol & 0xff0);
-		ata_link_for_each_dev(dev, link)
+		ata_for_each_dev(dev, link, ALL)
 			ata_dev_disable(dev);
 	}
 
@@ -6528,7 +6588,8 @@ EXPORT_SYMBOL_GPL(ata_base_port_ops);
 EXPORT_SYMBOL_GPL(sata_port_ops);
 EXPORT_SYMBOL_GPL(ata_dummy_port_ops);
 EXPORT_SYMBOL_GPL(ata_dummy_port_info);
-EXPORT_SYMBOL_GPL(__ata_port_next_link);
+EXPORT_SYMBOL_GPL(ata_link_next);
+EXPORT_SYMBOL_GPL(ata_dev_next);
 EXPORT_SYMBOL_GPL(ata_std_bios_param);
 EXPORT_SYMBOL_GPL(ata_host_init);
 EXPORT_SYMBOL_GPL(ata_host_alloc);
diff --git a/drivers/ata/libata-eh.c b/drivers/ata/libata-eh.c
index 32da9a93ce44..d673f3712bdc 100644
--- a/drivers/ata/libata-eh.c
+++ b/drivers/ata/libata-eh.c
@@ -422,7 +422,7 @@ static void ata_eh_clear_action(struct ata_link *link, struct ata_device *dev,
 
 	if (!dev) {
 		ehi->action &= ~action;
-		ata_link_for_each_dev(tdev, link)
+		ata_for_each_dev(tdev, link, ALL)
 			ehi->dev_action[tdev->devno] &= ~action;
 	} else {
 		/* doesn't make sense for port-wide EH actions */
@@ -430,7 +430,7 @@ static void ata_eh_clear_action(struct ata_link *link, struct ata_device *dev,
 
 		/* break ehi->action into ehi->dev_action */
 		if (ehi->action & action) {
-			ata_link_for_each_dev(tdev, link)
+			ata_for_each_dev(tdev, link, ALL)
 				ehi->dev_action[tdev->devno] |=
 					ehi->action & action;
 			ehi->action &= ~action;
@@ -592,7 +592,7 @@ void ata_scsi_error(struct Scsi_Host *host)
 		/* fetch & clear EH info */
 		spin_lock_irqsave(ap->lock, flags);
 
-		__ata_port_for_each_link(link, ap) {
+		ata_for_each_link(link, ap, HOST_FIRST) {
 			struct ata_eh_context *ehc = &link->eh_context;
 			struct ata_device *dev;
 
@@ -600,12 +600,9 @@ void ata_scsi_error(struct Scsi_Host *host)
 			link->eh_context.i = link->eh_info;
 			memset(&link->eh_info, 0, sizeof(link->eh_info));
 
-			ata_link_for_each_dev(dev, link) {
+			ata_for_each_dev(dev, link, ENABLED) {
 				int devno = dev->devno;
 
-				if (!ata_dev_enabled(dev))
-					continue;
-
 				ehc->saved_xfer_mode[devno] = dev->xfer_mode;
 				if (ata_ncq_enabled(dev))
 					ehc->saved_ncq_enabled |= 1 << devno;
@@ -644,7 +641,7 @@ void ata_scsi_error(struct Scsi_Host *host)
 		}
 
 		/* this run is complete, make sure EH info is clear */
-		__ata_port_for_each_link(link, ap)
+		ata_for_each_link(link, ap, HOST_FIRST)
 			memset(&link->eh_info, 0, sizeof(link->eh_info));
 
 		/* Clear host_eh_scheduled while holding ap->lock such
@@ -1025,7 +1022,7 @@ int sata_async_notification(struct ata_port *ap)
 		struct ata_link *link;
 
 		/* check and notify ATAPI AN */
-		ata_port_for_each_link(link, ap) {
+		ata_for_each_link(link, ap, EDGE) {
 			if (!(sntf & (1 << link->pmp)))
 				continue;
 
@@ -2005,7 +2002,7 @@ void ata_eh_autopsy(struct ata_port *ap)
 {
 	struct ata_link *link;
 
-	ata_port_for_each_link(link, ap)
+	ata_for_each_link(link, ap, EDGE)
 		ata_eh_link_autopsy(link);
 
 	/* Handle the frigging slave link.  Autopsy is done similarly
@@ -2219,7 +2216,7 @@ void ata_eh_report(struct ata_port *ap)
 {
 	struct ata_link *link;
 
-	__ata_port_for_each_link(link, ap)
+	ata_for_each_link(link, ap, HOST_FIRST)
 		ata_eh_link_report(link);
 }
 
@@ -2230,7 +2227,7 @@ static int ata_do_reset(struct ata_link *link, ata_reset_fn_t reset,
 	struct ata_device *dev;
 
 	if (clear_classes)
-		ata_link_for_each_dev(dev, link)
+		ata_for_each_dev(dev, link, ALL)
 			classes[dev->devno] = ATA_DEV_UNKNOWN;
 
 	return reset(link, classes, deadline);
@@ -2294,7 +2291,7 @@ int ata_eh_reset(struct ata_link *link, int classify,
 
 	ata_eh_about_to_do(link, NULL, ATA_EH_RESET);
 
-	ata_link_for_each_dev(dev, link) {
+	ata_for_each_dev(dev, link, ALL) {
 		/* If we issue an SRST then an ATA drive (not ATAPI)
 		 * may change configuration and be in PIO0 timing. If
 		 * we do a hard reset (or are coming from power on)
@@ -2355,7 +2352,7 @@ int ata_eh_reset(struct ata_link *link, int classify,
 						"port disabled. ignoring.\n");
 				ehc->i.action &= ~ATA_EH_RESET;
 
-				ata_link_for_each_dev(dev, link)
+				ata_for_each_dev(dev, link, ALL)
 					classes[dev->devno] = ATA_DEV_NONE;
 
 				rc = 0;
@@ -2369,7 +2366,7 @@ int ata_eh_reset(struct ata_link *link, int classify,
 		 * bang classes and return.
 		 */
 		if (reset && !(ehc->i.action & ATA_EH_RESET)) {
-			ata_link_for_each_dev(dev, link)
+			ata_for_each_dev(dev, link, ALL)
 				classes[dev->devno] = ATA_DEV_NONE;
 			rc = 0;
 			goto out;
@@ -2454,7 +2451,7 @@ int ata_eh_reset(struct ata_link *link, int classify,
 	/*
 	 * Post-reset processing
 	 */
-	ata_link_for_each_dev(dev, link) {
+	ata_for_each_dev(dev, link, ALL) {
 		/* After the reset, the device state is PIO 0 and the
 		 * controller state is undefined.  Reset also wakes up
 		 * drives from sleeping mode.
@@ -2510,7 +2507,7 @@ int ata_eh_reset(struct ata_link *link, int classify,
 	 * can be reliably detected and retried.
 	 */
 	nr_unknown = 0;
-	ata_link_for_each_dev(dev, link) {
+	ata_for_each_dev(dev, link, ALL) {
 		/* convert all ATA_DEV_UNKNOWN to ATA_DEV_NONE */
 		if (classes[dev->devno] == ATA_DEV_UNKNOWN) {
 			classes[dev->devno] = ATA_DEV_NONE;
@@ -2619,8 +2616,8 @@ static inline void ata_eh_pull_park_action(struct ata_port *ap)
 
 	spin_lock_irqsave(ap->lock, flags);
 	INIT_COMPLETION(ap->park_req_pending);
-	ata_port_for_each_link(link, ap) {
-		ata_link_for_each_dev(dev, link) {
+	ata_for_each_link(link, ap, EDGE) {
+		ata_for_each_dev(dev, link, ALL) {
 			struct ata_eh_info *ehi = &link->eh_info;
 
 			link->eh_context.i.dev_action[dev->devno] |=
@@ -2675,7 +2672,7 @@ static int ata_eh_revalidate_and_attach(struct ata_link *link,
 	 * be done backwards such that PDIAG- is released by the slave
 	 * device before the master device is identified.
 	 */
-	ata_link_for_each_dev_reverse(dev, link) {
+	ata_for_each_dev(dev, link, ALL_REVERSE) {
 		unsigned int action = ata_eh_dev_action(dev);
 		unsigned int readid_flags = 0;
 
@@ -2744,7 +2741,7 @@ static int ata_eh_revalidate_and_attach(struct ata_link *link,
 	/* Configure new devices forward such that user doesn't see
 	 * device detection messages backwards.
 	 */
-	ata_link_for_each_dev(dev, link) {
+	ata_for_each_dev(dev, link, ALL) {
 		if (!(new_mask & (1 << dev->devno)) ||
 		    dev->class == ATA_DEV_PMP)
 			continue;
@@ -2793,10 +2790,7 @@ int ata_set_mode(struct ata_link *link, struct ata_device **r_failed_dev)
 	int rc;
 
 	/* if data transfer is verified, clear DUBIOUS_XFER on ering top */
-	ata_link_for_each_dev(dev, link) {
-		if (!ata_dev_enabled(dev))
-			continue;
-
+	ata_for_each_dev(dev, link, ENABLED) {
 		if (!(dev->flags & ATA_DFLAG_DUBIOUS_XFER)) {
 			struct ata_ering_entry *ent;
 
@@ -2813,14 +2807,11 @@ int ata_set_mode(struct ata_link *link, struct ata_device **r_failed_dev)
 		rc = ata_do_set_mode(link, r_failed_dev);
 
 	/* if transfer mode has changed, set DUBIOUS_XFER on device */
-	ata_link_for_each_dev(dev, link) {
+	ata_for_each_dev(dev, link, ENABLED) {
 		struct ata_eh_context *ehc = &link->eh_context;
 		u8 saved_xfer_mode = ehc->saved_xfer_mode[dev->devno];
 		u8 saved_ncq = !!(ehc->saved_ncq_enabled & (1 << dev->devno));
 
-		if (!ata_dev_enabled(dev))
-			continue;
-
 		if (dev->xfer_mode != saved_xfer_mode ||
 		    ata_ncq_enabled(dev) != saved_ncq)
 			dev->flags |= ATA_DFLAG_DUBIOUS_XFER;
@@ -2881,9 +2872,8 @@ static int ata_link_nr_enabled(struct ata_link *link)
 	struct ata_device *dev;
 	int cnt = 0;
 
-	ata_link_for_each_dev(dev, link)
-		if (ata_dev_enabled(dev))
-			cnt++;
+	ata_for_each_dev(dev, link, ENABLED)
+		cnt++;
 	return cnt;
 }
 
@@ -2892,7 +2882,7 @@ static int ata_link_nr_vacant(struct ata_link *link)
 	struct ata_device *dev;
 	int cnt = 0;
 
-	ata_link_for_each_dev(dev, link)
+	ata_for_each_dev(dev, link, ALL)
 		if (dev->class == ATA_DEV_UNKNOWN)
 			cnt++;
 	return cnt;
@@ -2918,7 +2908,7 @@ static int ata_eh_skip_recovery(struct ata_link *link)
 		return 0;
 
 	/* skip if class codes for all vacant slots are ATA_DEV_NONE */
-	ata_link_for_each_dev(dev, link) {
+	ata_for_each_dev(dev, link, ALL) {
 		if (dev->class == ATA_DEV_UNKNOWN &&
 		    ehc->classes[dev->devno] != ATA_DEV_NONE)
 			return 0;
@@ -3026,7 +3016,7 @@ int ata_eh_recover(struct ata_port *ap, ata_prereset_fn_t prereset,
 	DPRINTK("ENTER\n");
 
 	/* prep for recovery */
-	ata_port_for_each_link(link, ap) {
+	ata_for_each_link(link, ap, EDGE) {
 		struct ata_eh_context *ehc = &link->eh_context;
 
 		/* re-enable link? */
@@ -3038,7 +3028,7 @@ int ata_eh_recover(struct ata_port *ap, ata_prereset_fn_t prereset,
 			ata_eh_done(link, NULL, ATA_EH_ENABLE_LINK);
 		}
 
-		ata_link_for_each_dev(dev, link) {
+		ata_for_each_dev(dev, link, ALL) {
 			if (link->flags & ATA_LFLAG_NO_RETRY)
 				ehc->tries[dev->devno] = 1;
 			else
@@ -3068,19 +3058,19 @@ int ata_eh_recover(struct ata_port *ap, ata_prereset_fn_t prereset,
 		goto out;
 
 	/* prep for EH */
-	ata_port_for_each_link(link, ap) {
+	ata_for_each_link(link, ap, EDGE) {
 		struct ata_eh_context *ehc = &link->eh_context;
 
 		/* skip EH if possible. */
 		if (ata_eh_skip_recovery(link))
 			ehc->i.action = 0;
 
-		ata_link_for_each_dev(dev, link)
+		ata_for_each_dev(dev, link, ALL)
 			ehc->classes[dev->devno] = ATA_DEV_UNKNOWN;
 	}
 
 	/* reset */
-	ata_port_for_each_link(link, ap) {
+	ata_for_each_link(link, ap, EDGE) {
 		struct ata_eh_context *ehc = &link->eh_context;
 
 		if (!(ehc->i.action & ATA_EH_RESET))
@@ -3105,8 +3095,8 @@ int ata_eh_recover(struct ata_port *ap, ata_prereset_fn_t prereset,
 		ata_eh_pull_park_action(ap);
 
 		deadline = jiffies;
-		ata_port_for_each_link(link, ap) {
-			ata_link_for_each_dev(dev, link) {
+		ata_for_each_link(link, ap, EDGE) {
+			ata_for_each_dev(dev, link, ALL) {
 				struct ata_eh_context *ehc = &link->eh_context;
 				unsigned long tmp;
 
@@ -3134,8 +3124,8 @@ int ata_eh_recover(struct ata_port *ap, ata_prereset_fn_t prereset,
 		deadline = wait_for_completion_timeout(&ap->park_req_pending,
 						       deadline - now);
 	} while (deadline);
-	ata_port_for_each_link(link, ap) {
-		ata_link_for_each_dev(dev, link) {
+	ata_for_each_link(link, ap, EDGE) {
+		ata_for_each_dev(dev, link, ALL) {
 			if (!(link->eh_context.unloaded_mask &
 			      (1 << dev->devno)))
 				continue;
@@ -3146,7 +3136,7 @@ int ata_eh_recover(struct ata_port *ap, ata_prereset_fn_t prereset,
 	}
 
 	/* the rest */
-	ata_port_for_each_link(link, ap) {
+	ata_for_each_link(link, ap, EDGE) {
 		struct ata_eh_context *ehc = &link->eh_context;
 
 		/* revalidate existing devices and attach new ones */
@@ -3172,7 +3162,7 @@ int ata_eh_recover(struct ata_port *ap, ata_prereset_fn_t prereset,
 		 * disrupting the current users of the device.
 		 */
 		if (ehc->i.flags & ATA_EHI_DID_RESET) {
-			ata_link_for_each_dev(dev, link) {
+			ata_for_each_dev(dev, link, ALL) {
 				if (dev->class != ATA_DEV_ATAPI)
 					continue;
 				rc = atapi_eh_clear_ua(dev);
@@ -3183,7 +3173,7 @@ int ata_eh_recover(struct ata_port *ap, ata_prereset_fn_t prereset,
 
 		/* configure link power saving */
 		if (ehc->i.action & ATA_EH_LPM)
-			ata_link_for_each_dev(dev, link)
+			ata_for_each_dev(dev, link, ALL)
 				ata_dev_enable_pm(dev, ap->pm_policy);
 
 		/* this link is okay now */
@@ -3288,7 +3278,7 @@ void ata_do_eh(struct ata_port *ap, ata_prereset_fn_t prereset,
 	rc = ata_eh_recover(ap, prereset, softreset, hardreset, postreset,
 			    NULL);
 	if (rc) {
-		ata_link_for_each_dev(dev, &ap->link)
+		ata_for_each_dev(dev, &ap->link, ALL)
 			ata_dev_disable(dev);
 	}
 
diff --git a/drivers/ata/libata-pmp.c b/drivers/ata/libata-pmp.c
index b65db309c181..98ca07a2db87 100644
--- a/drivers/ata/libata-pmp.c
+++ b/drivers/ata/libata-pmp.c
@@ -321,7 +321,7 @@ static void sata_pmp_quirks(struct ata_port *ap)
 
 	if (vendor == 0x1095 && devid == 0x3726) {
 		/* sil3726 quirks */
-		ata_port_for_each_link(link, ap) {
+		ata_for_each_link(link, ap, EDGE) {
 			/* Class code report is unreliable and SRST
 			 * times out under certain configurations.
 			 */
@@ -336,7 +336,7 @@ static void sata_pmp_quirks(struct ata_port *ap)
 		}
 	} else if (vendor == 0x1095 && devid == 0x4723) {
 		/* sil4723 quirks */
-		ata_port_for_each_link(link, ap) {
+		ata_for_each_link(link, ap, EDGE) {
 			/* class code report is unreliable */
 			if (link->pmp < 2)
 				link->flags |= ATA_LFLAG_ASSUME_ATA;
@@ -348,7 +348,7 @@ static void sata_pmp_quirks(struct ata_port *ap)
 		}
 	} else if (vendor == 0x1095 && devid == 0x4726) {
 		/* sil4726 quirks */
-		ata_port_for_each_link(link, ap) {
+		ata_for_each_link(link, ap, EDGE) {
 			/* Class code report is unreliable and SRST
 			 * times out under certain configurations.
 			 * Config device can be at port 0 or 5 and
@@ -450,7 +450,7 @@ int sata_pmp_attach(struct ata_device *dev)
 	if (ap->ops->pmp_attach)
 		ap->ops->pmp_attach(ap);
 
-	ata_port_for_each_link(tlink, ap)
+	ata_for_each_link(tlink, ap, EDGE)
 		sata_link_init_spd(tlink);
 
 	ata_acpi_associate_sata_port(ap);
@@ -487,7 +487,7 @@ static void sata_pmp_detach(struct ata_device *dev)
 	if (ap->ops->pmp_detach)
 		ap->ops->pmp_detach(ap);
 
-	ata_port_for_each_link(tlink, ap)
+	ata_for_each_link(tlink, ap, EDGE)
 		ata_eh_detach_dev(tlink->device);
 
 	spin_lock_irqsave(ap->lock, flags);
@@ -700,7 +700,7 @@ static int sata_pmp_eh_recover_pmp(struct ata_port *ap,
 		}
 
 		/* PMP is reset, SErrors cannot be trusted, scan all */
-		ata_port_for_each_link(tlink, ap) {
+		ata_for_each_link(tlink, ap, EDGE) {
 			struct ata_eh_context *ehc = &tlink->eh_context;
 
 			ehc->i.probe_mask |= ATA_ALL_DEVICES;
@@ -768,7 +768,7 @@ static int sata_pmp_eh_handle_disabled_links(struct ata_port *ap)
 
 	spin_lock_irqsave(ap->lock, flags);
 
-	ata_port_for_each_link(link, ap) {
+	ata_for_each_link(link, ap, EDGE) {
 		if (!(link->flags & ATA_LFLAG_DISABLED))
 			continue;
 
@@ -852,7 +852,7 @@ static int sata_pmp_eh_recover(struct ata_port *ap)
 	int cnt, rc;
 
 	pmp_tries = ATA_EH_PMP_TRIES;
-	ata_port_for_each_link(link, ap)
+	ata_for_each_link(link, ap, EDGE)
 		link_tries[link->pmp] = ATA_EH_PMP_LINK_TRIES;
 
  retry:
@@ -861,7 +861,7 @@ static int sata_pmp_eh_recover(struct ata_port *ap)
 		rc = ata_eh_recover(ap, ops->prereset, ops->softreset,
 				    ops->hardreset, ops->postreset, NULL);
 		if (rc) {
-			ata_link_for_each_dev(dev, &ap->link)
+			ata_for_each_dev(dev, &ap->link, ALL)
 				ata_dev_disable(dev);
 			return rc;
 		}
@@ -870,7 +870,7 @@ static int sata_pmp_eh_recover(struct ata_port *ap)
 			return 0;
 
 		/* new PMP online */
-		ata_port_for_each_link(link, ap)
+		ata_for_each_link(link, ap, EDGE)
 			link_tries[link->pmp] = ATA_EH_PMP_LINK_TRIES;
 
 		/* fall through */
@@ -942,7 +942,7 @@ static int sata_pmp_eh_recover(struct ata_port *ap)
 	}
 
 	cnt = 0;
-	ata_port_for_each_link(link, ap) {
+	ata_for_each_link(link, ap, EDGE) {
 		if (!(gscr_error & (1 << link->pmp)))
 			continue;
 
diff --git a/drivers/ata/libata-scsi.c b/drivers/ata/libata-scsi.c
index 47c7afcb36f2..0b2e14f67655 100644
--- a/drivers/ata/libata-scsi.c
+++ b/drivers/ata/libata-scsi.c
@@ -3229,12 +3229,12 @@ void ata_scsi_scan_host(struct ata_port *ap, int sync)
 		return;
 
  repeat:
-	ata_port_for_each_link(link, ap) {
-		ata_link_for_each_dev(dev, link) {
+	ata_for_each_link(link, ap, EDGE) {
+		ata_for_each_dev(dev, link, ENABLED) {
 			struct scsi_device *sdev;
 			int channel = 0, id = 0;
 
-			if (!ata_dev_enabled(dev) || dev->sdev)
+			if (dev->sdev)
 				continue;
 
 			if (ata_is_host_link(link))
@@ -3255,9 +3255,9 @@ void ata_scsi_scan_host(struct ata_port *ap, int sync)
 	 * failure occurred, scan would have failed silently.  Check
 	 * whether all devices are attached.
 	 */
-	ata_port_for_each_link(link, ap) {
-		ata_link_for_each_dev(dev, link) {
-			if (ata_dev_enabled(dev) && !dev->sdev)
+	ata_for_each_link(link, ap, EDGE) {
+		ata_for_each_dev(dev, link, ENABLED) {
+			if (!dev->sdev)
 				goto exit_loop;
 		}
 	}
@@ -3381,7 +3381,7 @@ static void ata_scsi_handle_link_detach(struct ata_link *link)
 	struct ata_port *ap = link->ap;
 	struct ata_device *dev;
 
-	ata_link_for_each_dev(dev, link) {
+	ata_for_each_dev(dev, link, ALL) {
 		unsigned long flags;
 
 		if (!(dev->flags & ATA_DFLAG_DETACHED))
@@ -3496,7 +3496,7 @@ static int ata_scsi_user_scan(struct Scsi_Host *shost, unsigned int channel,
 	if (devno == SCAN_WILD_CARD) {
 		struct ata_link *link;
 
-		ata_port_for_each_link(link, ap) {
+		ata_for_each_link(link, ap, EDGE) {
 			struct ata_eh_info *ehi = &link->eh_info;
 			ehi->probe_mask |= ATA_ALL_DEVICES;
 			ehi->action |= ATA_EH_RESET;
@@ -3544,11 +3544,11 @@ void ata_scsi_dev_rescan(struct work_struct *work)
 
 	spin_lock_irqsave(ap->lock, flags);
 
-	ata_port_for_each_link(link, ap) {
-		ata_link_for_each_dev(dev, link) {
+	ata_for_each_link(link, ap, EDGE) {
+		ata_for_each_dev(dev, link, ENABLED) {
 			struct scsi_device *sdev = dev->sdev;
 
-			if (!ata_dev_enabled(dev) || !sdev)
+			if (!sdev)
 				continue;
 			if (scsi_device_get(sdev))
 				continue;
diff --git a/drivers/ata/pata_it821x.c b/drivers/ata/pata_it821x.c
index 860ede526282..f828a29d7756 100644
--- a/drivers/ata/pata_it821x.c
+++ b/drivers/ata/pata_it821x.c
@@ -465,24 +465,22 @@ static int it821x_smart_set_mode(struct ata_link *link, struct ata_device **unus
 {
 	struct ata_device *dev;
 
-	ata_link_for_each_dev(dev, link) {
-		if (ata_dev_enabled(dev)) {
-			/* We don't really care */
-			dev->pio_mode = XFER_PIO_0;
-			dev->dma_mode = XFER_MW_DMA_0;
-			/* We do need the right mode information for DMA or PIO
-			   and this comes from the current configuration flags */
-			if (ata_id_has_dma(dev->id)) {
-				ata_dev_printk(dev, KERN_INFO, "configured for DMA\n");
-				dev->xfer_mode = XFER_MW_DMA_0;
-				dev->xfer_shift = ATA_SHIFT_MWDMA;
-				dev->flags &= ~ATA_DFLAG_PIO;
-			} else {
-				ata_dev_printk(dev, KERN_INFO, "configured for PIO\n");
-				dev->xfer_mode = XFER_PIO_0;
-				dev->xfer_shift = ATA_SHIFT_PIO;
-				dev->flags |= ATA_DFLAG_PIO;
-			}
+	ata_for_each_dev(dev, link, ENABLED) {
+		/* We don't really care */
+		dev->pio_mode = XFER_PIO_0;
+		dev->dma_mode = XFER_MW_DMA_0;
+		/* We do need the right mode information for DMA or PIO
+		   and this comes from the current configuration flags */
+		if (ata_id_has_dma(dev->id)) {
+			ata_dev_printk(dev, KERN_INFO, "configured for DMA\n");
+			dev->xfer_mode = XFER_MW_DMA_0;
+			dev->xfer_shift = ATA_SHIFT_MWDMA;
+			dev->flags &= ~ATA_DFLAG_PIO;
+		} else {
+			ata_dev_printk(dev, KERN_INFO, "configured for PIO\n");
+			dev->xfer_mode = XFER_PIO_0;
+			dev->xfer_shift = ATA_SHIFT_PIO;
+			dev->flags |= ATA_DFLAG_PIO;
 		}
 	}
 	return 0;
diff --git a/drivers/ata/pata_ixp4xx_cf.c b/drivers/ata/pata_ixp4xx_cf.c
index 2014253f6c88..b173c157ab00 100644
--- a/drivers/ata/pata_ixp4xx_cf.c
+++ b/drivers/ata/pata_ixp4xx_cf.c
@@ -30,14 +30,12 @@ static int ixp4xx_set_mode(struct ata_link *link, struct ata_device **error)
 {
 	struct ata_device *dev;
 
-	ata_link_for_each_dev(dev, link) {
-		if (ata_dev_enabled(dev)) {
-			ata_dev_printk(dev, KERN_INFO, "configured for PIO0\n");
-			dev->pio_mode = XFER_PIO_0;
-			dev->xfer_mode = XFER_PIO_0;
-			dev->xfer_shift = ATA_SHIFT_PIO;
-			dev->flags |= ATA_DFLAG_PIO;
-		}
+	ata_for_each_dev(dev, link, ENABLED) {
+		ata_dev_printk(dev, KERN_INFO, "configured for PIO0\n");
+		dev->pio_mode = XFER_PIO_0;
+		dev->xfer_mode = XFER_PIO_0;
+		dev->xfer_shift = ATA_SHIFT_PIO;
+		dev->flags |= ATA_DFLAG_PIO;
 	}
 	return 0;
 }
diff --git a/drivers/ata/pata_legacy.c b/drivers/ata/pata_legacy.c
index 930c2208640b..cbd98c16eea4 100644
--- a/drivers/ata/pata_legacy.c
+++ b/drivers/ata/pata_legacy.c
@@ -194,15 +194,12 @@ static int legacy_set_mode(struct ata_link *link, struct ata_device **unused)
 {
 	struct ata_device *dev;
 
-	ata_link_for_each_dev(dev, link) {
-		if (ata_dev_enabled(dev)) {
-			ata_dev_printk(dev, KERN_INFO,
-						"configured for PIO\n");
-			dev->pio_mode = XFER_PIO_0;
-			dev->xfer_mode = XFER_PIO_0;
-			dev->xfer_shift = ATA_SHIFT_PIO;
-			dev->flags |= ATA_DFLAG_PIO;
-		}
+	ata_for_each_dev(dev, link, ENABLED) {
+		ata_dev_printk(dev, KERN_INFO, "configured for PIO\n");
+		dev->pio_mode = XFER_PIO_0;
+		dev->xfer_mode = XFER_PIO_0;
+		dev->xfer_shift = ATA_SHIFT_PIO;
+		dev->flags |= ATA_DFLAG_PIO;
 	}
 	return 0;
 }
@@ -1028,7 +1025,7 @@ static __init int legacy_init_one(struct legacy_probe *probe)
 	/* Nothing found means we drop the port as its probably not there */
 
 	ret = -ENODEV;
-	ata_link_for_each_dev(dev, &ap->link) {
+	ata_for_each_dev(dev, &ap->link, ALL) {
 		if (!ata_dev_absent(dev)) {
 			legacy_host[probe->slot] = host;
 			ld->platform_dev = pdev;
diff --git a/drivers/ata/pata_pdc2027x.c b/drivers/ata/pata_pdc2027x.c
index 0e1c2c1134d3..d43e8be96b88 100644
--- a/drivers/ata/pata_pdc2027x.c
+++ b/drivers/ata/pata_pdc2027x.c
@@ -406,23 +406,20 @@ static int pdc2027x_set_mode(struct ata_link *link, struct ata_device **r_failed
 	if (rc < 0)
 		return rc;
 
-	ata_link_for_each_dev(dev, link) {
-		if (ata_dev_enabled(dev)) {
-
-			pdc2027x_set_piomode(ap, dev);
-
-			/*
-			 * Enable prefetch if the device support PIO only.
-			 */
-			if (dev->xfer_shift == ATA_SHIFT_PIO) {
-				u32 ctcr1 = ioread32(dev_mmio(ap, dev, PDC_CTCR1));
-				ctcr1 |= (1 << 25);
-				iowrite32(ctcr1, dev_mmio(ap, dev, PDC_CTCR1));
-
-				PDPRINTK("Turn on prefetch\n");
-			} else {
-				pdc2027x_set_dmamode(ap, dev);
-			}
+	ata_for_each_dev(dev, link, ENABLED) {
+		pdc2027x_set_piomode(ap, dev);
+
+		/*
+		 * Enable prefetch if the device support PIO only.
+		 */
+		if (dev->xfer_shift == ATA_SHIFT_PIO) {
+			u32 ctcr1 = ioread32(dev_mmio(ap, dev, PDC_CTCR1));
+			ctcr1 |= (1 << 25);
+			iowrite32(ctcr1, dev_mmio(ap, dev, PDC_CTCR1));
+
+			PDPRINTK("Turn on prefetch\n");
+		} else {
+			pdc2027x_set_dmamode(ap, dev);
 		}
 	}
 	return 0;
diff --git a/drivers/ata/pata_platform.c b/drivers/ata/pata_platform.c
index 77e4e3b17f54..6afa07a37648 100644
--- a/drivers/ata/pata_platform.c
+++ b/drivers/ata/pata_platform.c
@@ -34,14 +34,12 @@ static int pata_platform_set_mode(struct ata_link *link, struct ata_device **unu
 {
 	struct ata_device *dev;
 
-	ata_link_for_each_dev(dev, link) {
-		if (ata_dev_enabled(dev)) {
-			/* We don't really care */
-			dev->pio_mode = dev->xfer_mode = XFER_PIO_0;
-			dev->xfer_shift = ATA_SHIFT_PIO;
-			dev->flags |= ATA_DFLAG_PIO;
-			ata_dev_printk(dev, KERN_INFO, "configured for PIO\n");
-		}
+	ata_for_each_dev(dev, link, ENABLED) {
+		/* We don't really care */
+		dev->pio_mode = dev->xfer_mode = XFER_PIO_0;
+		dev->xfer_shift = ATA_SHIFT_PIO;
+		dev->flags |= ATA_DFLAG_PIO;
+		ata_dev_printk(dev, KERN_INFO, "configured for PIO\n");
 	}
 	return 0;
 }
diff --git a/drivers/ata/pata_rz1000.c b/drivers/ata/pata_rz1000.c
index 7dfd1f3f6f3a..46d6bc1bf1e9 100644
--- a/drivers/ata/pata_rz1000.c
+++ b/drivers/ata/pata_rz1000.c
@@ -38,15 +38,13 @@ static int rz1000_set_mode(struct ata_link *link, struct ata_device **unused)
 {
 	struct ata_device *dev;
 
-	ata_link_for_each_dev(dev, link) {
-		if (ata_dev_enabled(dev)) {
-			/* We don't really care */
-			dev->pio_mode = XFER_PIO_0;
-			dev->xfer_mode = XFER_PIO_0;
-			dev->xfer_shift = ATA_SHIFT_PIO;
-			dev->flags |= ATA_DFLAG_PIO;
-			ata_dev_printk(dev, KERN_INFO, "configured for PIO\n");
-		}
+	ata_for_each_dev(dev, link, ENABLED) {
+		/* We don't really care */
+		dev->pio_mode = XFER_PIO_0;
+		dev->xfer_mode = XFER_PIO_0;
+		dev->xfer_shift = ATA_SHIFT_PIO;
+		dev->flags |= ATA_DFLAG_PIO;
+		ata_dev_printk(dev, KERN_INFO, "configured for PIO\n");
 	}
 	return 0;
 }
diff --git a/drivers/ata/sata_sil.c b/drivers/ata/sata_sil.c
index 031d7b7dee34..177370921774 100644
--- a/drivers/ata/sata_sil.c
+++ b/drivers/ata/sata_sil.c
@@ -278,7 +278,7 @@ static int sil_set_mode(struct ata_link *link, struct ata_device **r_failed)
 	if (rc)
 		return rc;
 
-	ata_link_for_each_dev(dev, link) {
+	ata_for_each_dev(dev, link, ALL) {
 		if (!ata_dev_enabled(dev))
 			dev_mode[dev->devno] = 0;	/* PIO0/1/2 */
 		else if (dev->flags & ATA_DFLAG_PIO)
diff --git a/include/linux/libata.h b/include/linux/libata.h
index ed3f26eb5df1..3b2a0c6444ee 100644
--- a/include/linux/libata.h
+++ b/include/linux/libata.h
@@ -1285,26 +1285,62 @@ static inline int ata_link_active(struct ata_link *link)
 	return ata_tag_valid(link->active_tag) || link->sactive;
 }
 
-extern struct ata_link *__ata_port_next_link(struct ata_port *ap,
-					     struct ata_link *link,
-					     bool dev_only);
-
-#define __ata_port_for_each_link(link, ap) \
-	for ((link) = __ata_port_next_link((ap), NULL, false); (link); \
-	     (link) = __ata_port_next_link((ap), (link), false))
-
-#define ata_port_for_each_link(link, ap) \
-	for ((link) = __ata_port_next_link((ap), NULL, true); (link); \
-	     (link) = __ata_port_next_link((ap), (link), true))
-
-#define ata_link_for_each_dev(dev, link) \
-	for ((dev) = (link)->device; \
-	     (dev) < (link)->device + ata_link_max_devices(link) || ((dev) = NULL); \
-	     (dev)++)
-
-#define ata_link_for_each_dev_reverse(dev, link) \
-	for ((dev) = (link)->device + ata_link_max_devices(link) - 1; \
-	     (dev) >= (link)->device || ((dev) = NULL); (dev)--)
+/*
+ * Iterators
+ *
+ * ATA_LITER_* constants are used to select link iteration mode and
+ * ATA_DITER_* device iteration mode.
+ *
+ * For a custom iteration directly using ata_{link|dev}_next(), if
+ * @link or @dev, respectively, is NULL, the first element is
+ * returned.  @dev and @link can be any valid device or link and the
+ * next element according to the iteration mode will be returned.
+ * After the last element, NULL is returned.
+ */
+enum ata_link_iter_mode {
+	ATA_LITER_EDGE,		/* if present, PMP links only; otherwise,
+				 * host link.  no slave link */
+	ATA_LITER_HOST_FIRST,	/* host link followed by PMP or slave links */
+	ATA_LITER_PMP_FIRST,	/* PMP links followed by host link,
+				 * slave link still comes after host link */
+};
+
+enum ata_dev_iter_mode {
+	ATA_DITER_ENABLED,
+	ATA_DITER_ENABLED_REVERSE,
+	ATA_DITER_ALL,
+	ATA_DITER_ALL_REVERSE,
+};
+
+extern struct ata_link *ata_link_next(struct ata_link *link,
+				      struct ata_port *ap,
+				      enum ata_link_iter_mode mode);
+
+extern struct ata_device *ata_dev_next(struct ata_device *dev,
+				       struct ata_link *link,
+				       enum ata_dev_iter_mode mode);
+
+/*
+ * Shortcut notation for iterations
+ *
+ * ata_for_each_link() iterates over each link of @ap according to
+ * @mode.  @link points to the current link in the loop.  @link is
+ * NULL after loop termination.  ata_for_each_dev() works the same way
+ * except that it iterates over each device of @link.
+ *
+ * Note that the mode prefixes ATA_{L|D}ITER_ shouldn't need to be
+ * specified when using the following shorthand notations.  Only the
+ * mode itself (EDGE, HOST_FIRST, ENABLED, etc...) should be
+ * specified.  This not only increases brevity but also makes it
+ * impossible to use ATA_LITER_* for device iteration or vice-versa.
+ */
+#define ata_for_each_link(link, ap, mode) \
+	for ((link) = ata_link_next(NULL, (ap), ATA_LITER_##mode); (link); \
+	     (link) = ata_link_next((link), (ap), ATA_LITER_##mode))
+
+#define ata_for_each_dev(dev, link, mode) \
+	for ((dev) = ata_dev_next(NULL, (link), ATA_DITER_##mode); (dev); \
+	     (dev) = ata_dev_next((dev), (link), ATA_DITER_##mode))
 
 /**
  *	ata_ncq_enabled - Test whether NCQ is enabled
-- 
cgit v1.2.3


From ece180d1cfe5fa751eaa85bf796cf28b2150af15 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Mon, 3 Nov 2008 20:04:37 +0900
Subject: libata: perform port detach in EH

ata_port_detach() first made sure EH saw ATA_PFLAG_UNLOADING and then
assumed EH context belongs to it and performed detach operation
itself.  However, UNLOADING doesn't disable all of EH and this could
lead to problems including triggering WARN_ON()'s in EH path.

This patch makes port detach behave more like other EH actions such
that ata_port_detach() requests EH to detach and waits for completion.

Signed-off-by: Tejun Heo <tj@kernel.org>
Signed-off-by: Jeff Garzik <jgarzik@redhat.com>
---
 drivers/ata/libata-core.c | 23 ++++-------------------
 drivers/ata/libata-eh.c   | 32 +++++++++++++++++++++++++++++++-
 include/linux/libata.h    |  3 ++-
 3 files changed, 37 insertions(+), 21 deletions(-)

(limited to 'include')

diff --git a/drivers/ata/libata-core.c b/drivers/ata/libata-core.c
index 1ecc3cb0b722..837fb60a6dcc 100644
--- a/drivers/ata/libata-core.c
+++ b/drivers/ata/libata-core.c
@@ -6107,8 +6107,6 @@ int ata_host_activate(struct ata_host *host, int irq,
 static void ata_port_detach(struct ata_port *ap)
 {
 	unsigned long flags;
-	struct ata_link *link;
-	struct ata_device *dev;
 
 	if (!ap->ops->error_handler)
 		goto skip_eh;
@@ -6116,28 +6114,15 @@ static void ata_port_detach(struct ata_port *ap)
 	/* tell EH we're leaving & flush EH */
 	spin_lock_irqsave(ap->lock, flags);
 	ap->pflags |= ATA_PFLAG_UNLOADING;
+	ata_port_schedule_eh(ap);
 	spin_unlock_irqrestore(ap->lock, flags);
 
+	/* wait till EH commits suicide */
 	ata_port_wait_eh(ap);
 
-	/* EH is now guaranteed to see UNLOADING - EH context belongs
-	 * to us.  Restore SControl and disable all existing devices.
-	 */
-	ata_for_each_link(link, ap, PMP_FIRST) {
-		sata_scr_write(link, SCR_CONTROL, link->saved_scontrol & 0xff0);
-		ata_for_each_dev(dev, link, ALL)
-			ata_dev_disable(dev);
-	}
-
-	/* Final freeze & EH.  All in-flight commands are aborted.  EH
-	 * will be skipped and retrials will be terminated with bad
-	 * target.
-	 */
-	spin_lock_irqsave(ap->lock, flags);
-	ata_port_freeze(ap);	/* won't be thawed */
-	spin_unlock_irqrestore(ap->lock, flags);
+	/* it better be dead now */
+	WARN_ON(!(ap->pflags & ATA_PFLAG_UNLOADED));
 
-	ata_port_wait_eh(ap);
 	cancel_rearming_delayed_work(&ap->hotplug_task);
 
  skip_eh:
diff --git a/drivers/ata/libata-eh.c b/drivers/ata/libata-eh.c
index d673f3712bdc..8147a8386370 100644
--- a/drivers/ata/libata-eh.c
+++ b/drivers/ata/libata-eh.c
@@ -491,6 +491,31 @@ enum blk_eh_timer_return ata_scsi_timed_out(struct scsi_cmnd *cmd)
 	return ret;
 }
 
+static void ata_eh_unload(struct ata_port *ap)
+{
+	struct ata_link *link;
+	struct ata_device *dev;
+	unsigned long flags;
+
+	/* Restore SControl IPM and SPD for the next driver and
+	 * disable attached devices.
+	 */
+	ata_for_each_link(link, ap, PMP_FIRST) {
+		sata_scr_write(link, SCR_CONTROL, link->saved_scontrol & 0xff0);
+		ata_for_each_dev(dev, link, ALL)
+			ata_dev_disable(dev);
+	}
+
+	/* freeze and set UNLOADED */
+	spin_lock_irqsave(ap->lock, flags);
+
+	ata_port_freeze(ap);			/* won't be thawed */
+	ap->pflags &= ~ATA_PFLAG_EH_PENDING;	/* clear pending from freeze */
+	ap->pflags |= ATA_PFLAG_UNLOADED;
+
+	spin_unlock_irqrestore(ap->lock, flags);
+}
+
 /**
  *	ata_scsi_error - SCSI layer error handler callback
  *	@host: SCSI host on which error occurred
@@ -618,8 +643,13 @@ void ata_scsi_error(struct Scsi_Host *host)
 		/* invoke EH, skip if unloading or suspended */
 		if (!(ap->pflags & (ATA_PFLAG_UNLOADING | ATA_PFLAG_SUSPENDED)))
 			ap->ops->error_handler(ap);
-		else
+		else {
+			/* if unloading, commence suicide */
+			if ((ap->pflags & ATA_PFLAG_UNLOADING) &&
+			    !(ap->pflags & ATA_PFLAG_UNLOADED))
+				ata_eh_unload(ap);
 			ata_eh_finish(ap);
+		}
 
 		/* process port suspend request */
 		ata_eh_handle_port_suspend(ap);
diff --git a/include/linux/libata.h b/include/linux/libata.h
index 3b2a0c6444ee..3449de597eff 100644
--- a/include/linux/libata.h
+++ b/include/linux/libata.h
@@ -213,10 +213,11 @@ enum {
 	ATA_PFLAG_FROZEN	= (1 << 2), /* port is frozen */
 	ATA_PFLAG_RECOVERED	= (1 << 3), /* recovery action performed */
 	ATA_PFLAG_LOADING	= (1 << 4), /* boot/loading probe */
-	ATA_PFLAG_UNLOADING	= (1 << 5), /* module is unloading */
 	ATA_PFLAG_SCSI_HOTPLUG	= (1 << 6), /* SCSI hotplug scheduled */
 	ATA_PFLAG_INITIALIZING	= (1 << 7), /* being initialized, don't touch */
 	ATA_PFLAG_RESETTING	= (1 << 8), /* reset in progress */
+	ATA_PFLAG_UNLOADING	= (1 << 9), /* driver is being unloaded */
+	ATA_PFLAG_UNLOADED	= (1 << 10), /* driver is unloaded */
 
 	ATA_PFLAG_SUSPENDED	= (1 << 17), /* port is suspended (power) */
 	ATA_PFLAG_PM_PENDING	= (1 << 18), /* PM operation pending */
-- 
cgit v1.2.3


From 88e740f1654bf28565edd528a060695c1f2b5ad7 Mon Sep 17 00:00:00 2001
From: Fernando Luis Vázquez Cao <fernando@oss.ntt.co.jp>
Date: Mon, 27 Oct 2008 18:44:46 +0900
Subject: block: add queue flag for paravirt frontend drivers

As is the case with SSD devices, we do not want to idle in AS/CFQ when
the block device is a paravirt front-end driver. This patch adds a flag
(QUEUE_FLAG_VIRT) which should be used by front-end drivers such as
virtio_blk and xen-blkfront to indicate a paravirtualized device.

Signed-off-by: Fernando Luis Vazquez Cao <fernando@oss.ntt.co.jp>
Signed-off-by: Jens Axboe <jens.axboe@oracle.com>
---
 include/linux/blkdev.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 031a315c0509..482e9600f7a2 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -449,6 +449,7 @@ struct request_queue
 #define QUEUE_FLAG_FAIL_IO     12	/* fake timeout */
 #define QUEUE_FLAG_STACKABLE   13	/* supports request stacking */
 #define QUEUE_FLAG_NONROT      14	/* non-rotational device (SSD) */
+#define QUEUE_FLAG_VIRT        QUEUE_FLAG_NONROT /* paravirt device */
 
 static inline int queue_is_locked(struct request_queue *q)
 {
-- 
cgit v1.2.3


From 08bafc0341f2f7920e9045bc32c40299cac8c21b Mon Sep 17 00:00:00 2001
From: Keith Mannthey <kmannth@us.ibm.com>
Date: Tue, 25 Nov 2008 10:24:35 +0100
Subject: block: Supress Buffer I/O errors when SCSI REQ_QUIET flag set

Allow the scsi request REQ_QUIET flag to be propagated to the buffer
file system layer. The basic ideas is to pass the flag from the scsi
request to the bio (block IO) and then to the buffer layer.  The buffer
layer can then suppress needless printks.

This patch declutters the kernel log by removed the 40-50 (per lun)
buffer io error messages seen during a boot in my multipath setup . It
is a good chance any real errors will be missed in the "noise" it the
logs without this patch.

During boot I see blocks of messages like
"
__ratelimit: 211 callbacks suppressed
Buffer I/O error on device sdm, logical block 5242879
Buffer I/O error on device sdm, logical block 5242879
Buffer I/O error on device sdm, logical block 5242847
Buffer I/O error on device sdm, logical block 1
Buffer I/O error on device sdm, logical block 5242878
Buffer I/O error on device sdm, logical block 5242879
Buffer I/O error on device sdm, logical block 5242879
Buffer I/O error on device sdm, logical block 5242879
Buffer I/O error on device sdm, logical block 5242879
Buffer I/O error on device sdm, logical block 5242872
"
in my logs.

My disk environment is multipath fiber channel using the SCSI_DH_RDAC
code and multipathd.  This topology includes an "active" and "ghost"
path for each lun. IO's to the "ghost" path will never complete and the
SCSI layer, via the scsi device handler rdac code, quick returns the IOs
to theses paths and sets the REQ_QUIET scsi flag to suppress the scsi
layer messages.

 I am wanting to extend the QUIET behavior to include the buffer file
system layer to deal with these errors as well. I have been running this
patch for a while now on several boxes without issue.  A few runs of
bonnie++ show no noticeable difference in performance in my setup.

Thanks for John Stultz for the quiet_error finalization.

Submitted-by:  Keith Mannthey <kmannth@us.ibm.com>
Signed-off-by: Jens Axboe <jens.axboe@oracle.com>
---
 block/blk-core.c            |  3 +++
 fs/buffer.c                 | 19 +++++++++++++++----
 include/linux/bio.h         |  1 +
 include/linux/buffer_head.h |  1 +
 4 files changed, 20 insertions(+), 4 deletions(-)

(limited to 'include')

diff --git a/block/blk-core.c b/block/blk-core.c
index 243d18b4ceb0..20e1724ccb4c 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -153,6 +153,9 @@ static void req_bio_endio(struct request *rq, struct bio *bio,
 			nbytes = bio->bi_size;
 		}
 
+		if (unlikely(rq->cmd_flags & REQ_QUIET))
+			set_bit(BIO_QUIET, &bio->bi_flags);
+
 		bio->bi_size -= nbytes;
 		bio->bi_sector += (nbytes >> 9);
 
diff --git a/fs/buffer.c b/fs/buffer.c
index 10179cfa1152..776ae091d3b0 100644
--- a/fs/buffer.c
+++ b/fs/buffer.c
@@ -99,10 +99,18 @@ __clear_page_buffers(struct page *page)
 	page_cache_release(page);
 }
 
+
+static int quiet_error(struct buffer_head *bh)
+{
+	if (!test_bit(BH_Quiet, &bh->b_state) && printk_ratelimit())
+		return 0;
+	return 1;
+}
+
+
 static void buffer_io_error(struct buffer_head *bh)
 {
 	char b[BDEVNAME_SIZE];
-
 	printk(KERN_ERR "Buffer I/O error on device %s, logical block %Lu\n",
 			bdevname(bh->b_bdev, b),
 			(unsigned long long)bh->b_blocknr);
@@ -144,7 +152,7 @@ void end_buffer_write_sync(struct buffer_head *bh, int uptodate)
 	if (uptodate) {
 		set_buffer_uptodate(bh);
 	} else {
-		if (!buffer_eopnotsupp(bh) && printk_ratelimit()) {
+		if (!buffer_eopnotsupp(bh) && !quiet_error(bh)) {
 			buffer_io_error(bh);
 			printk(KERN_WARNING "lost page write due to "
 					"I/O error on %s\n",
@@ -394,7 +402,7 @@ static void end_buffer_async_read(struct buffer_head *bh, int uptodate)
 		set_buffer_uptodate(bh);
 	} else {
 		clear_buffer_uptodate(bh);
-		if (printk_ratelimit())
+		if (!quiet_error(bh))
 			buffer_io_error(bh);
 		SetPageError(page);
 	}
@@ -455,7 +463,7 @@ static void end_buffer_async_write(struct buffer_head *bh, int uptodate)
 	if (uptodate) {
 		set_buffer_uptodate(bh);
 	} else {
-		if (printk_ratelimit()) {
+		if (!quiet_error(bh)) {
 			buffer_io_error(bh);
 			printk(KERN_WARNING "lost page write due to "
 					"I/O error on %s\n",
@@ -2913,6 +2921,9 @@ static void end_bio_bh_io_sync(struct bio *bio, int err)
 		set_bit(BH_Eopnotsupp, &bh->b_state);
 	}
 
+	if (unlikely (test_bit(BIO_QUIET,&bio->bi_flags)))
+		set_bit(BH_Quiet, &bh->b_state);
+
 	bh->b_end_io(bh, test_bit(BIO_UPTODATE, &bio->bi_flags));
 	bio_put(bio);
 }
diff --git a/include/linux/bio.h b/include/linux/bio.h
index 6a642098e5c3..cf132bfbbacf 100644
--- a/include/linux/bio.h
+++ b/include/linux/bio.h
@@ -117,6 +117,7 @@ struct bio {
 #define BIO_CPU_AFFINE	8	/* complete bio on same CPU as submitted */
 #define BIO_NULL_MAPPED 9	/* contains invalid user pages */
 #define BIO_FS_INTEGRITY 10	/* fs owns integrity data, not block layer */
+#define BIO_QUIET	11	/* Make BIO Quiet */
 #define bio_flagged(bio, flag)	((bio)->bi_flags & (1 << (flag)))
 
 /*
diff --git a/include/linux/buffer_head.h b/include/linux/buffer_head.h
index 3ce64b90118c..8605f8a74df9 100644
--- a/include/linux/buffer_head.h
+++ b/include/linux/buffer_head.h
@@ -35,6 +35,7 @@ enum bh_state_bits {
 	BH_Ordered,	/* ordered write */
 	BH_Eopnotsupp,	/* operation not supported (barrier) */
 	BH_Unwritten,	/* Buffer is allocated on disk but not written */
+	BH_Quiet,	/* Buffer Error Prinks to be quiet */
 
 	BH_PrivateStart,/* not a state bit, but the first bit available
 			 * for private allocation by other entities
-- 
cgit v1.2.3


From 64d01dc9e1927e6535627d73f2336c75d1dd3fe2 Mon Sep 17 00:00:00 2001
From: Cheng Renquan <crquan@gmail.com>
Date: Wed, 3 Dec 2008 12:41:39 +0100
Subject: block: use cancel_work_sync() instead of kblockd_flush_work()

After many improvements on kblockd_flush_work, it is now identical to
cancel_work_sync, so a direct call to cancel_work_sync is suggested.

The only difference is that cancel_work_sync is a GPL symbol,
so no non-GPL modules anymore.

Signed-off-by: Cheng Renquan <crquan@gmail.com>
Cc: Jens Axboe <jens.axboe@oracle.com>
Signed-off-by: Jens Axboe <jens.axboe@oracle.com>
---
 block/as-iosched.c     | 2 +-
 block/blk-core.c       | 8 +-------
 block/cfq-iosched.c    | 2 +-
 include/linux/blkdev.h | 1 -
 4 files changed, 3 insertions(+), 10 deletions(-)

(limited to 'include')

diff --git a/block/as-iosched.c b/block/as-iosched.c
index 71f0abb219ee..802b5d0d8536 100644
--- a/block/as-iosched.c
+++ b/block/as-iosched.c
@@ -1344,7 +1344,7 @@ static void as_exit_queue(elevator_t *e)
 	struct as_data *ad = e->elevator_data;
 
 	del_timer_sync(&ad->antic_timer);
-	kblockd_flush_work(&ad->antic_work);
+	cancel_work_sync(&ad->antic_work);
 
 	BUG_ON(!list_empty(&ad->fifo_list[REQ_SYNC]));
 	BUG_ON(!list_empty(&ad->fifo_list[REQ_ASYNC]));
diff --git a/block/blk-core.c b/block/blk-core.c
index 20e1724ccb4c..2fdcd0cff57f 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -408,7 +408,7 @@ void blk_sync_queue(struct request_queue *q)
 {
 	del_timer_sync(&q->unplug_timer);
 	del_timer_sync(&q->timeout);
-	kblockd_flush_work(&q->unplug_work);
+	cancel_work_sync(&q->unplug_work);
 }
 EXPORT_SYMBOL(blk_sync_queue);
 
@@ -2147,12 +2147,6 @@ int kblockd_schedule_work(struct request_queue *q, struct work_struct *work)
 }
 EXPORT_SYMBOL(kblockd_schedule_work);
 
-void kblockd_flush_work(struct work_struct *work)
-{
-	cancel_work_sync(work);
-}
-EXPORT_SYMBOL(kblockd_flush_work);
-
 int __init blk_dev_init(void)
 {
 	kblockd_workqueue = create_workqueue("kblockd");
diff --git a/block/cfq-iosched.c b/block/cfq-iosched.c
index 6a062eebbd15..a2bfec7d6b36 100644
--- a/block/cfq-iosched.c
+++ b/block/cfq-iosched.c
@@ -2160,7 +2160,7 @@ out_cont:
 static void cfq_shutdown_timer_wq(struct cfq_data *cfqd)
 {
 	del_timer_sync(&cfqd->idle_slice_timer);
-	kblockd_flush_work(&cfqd->unplug_work);
+	cancel_work_sync(&cfqd->unplug_work);
 }
 
 static void cfq_put_async_queues(struct cfq_data *cfqd)
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 482e9600f7a2..e9bb73ff1d64 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -978,7 +978,6 @@ static inline void put_dev_sector(Sector p)
 
 struct work_struct;
 int kblockd_schedule_work(struct request_queue *q, struct work_struct *work);
-void kblockd_flush_work(struct work_struct *work);
 
 #define MODULE_ALIAS_BLOCKDEV(major,minor) \
 	MODULE_ALIAS("block-major-" __stringify(major) "-" __stringify(minor))
-- 
cgit v1.2.3


From ba744d5e290055d171c68067259fcc1e2721f542 Mon Sep 17 00:00:00 2001
From: Richard Kennedy <richard@rsk.demon.co.uk>
Date: Wed, 3 Dec 2008 12:41:40 +0100
Subject: block: reorder struct bio to remove padding on 64bit

Remove 8 bytes of padding from struct bio which also removes 16 bytes from
struct bio_pair to make it 248 bytes.  bio_pair then fits into one fewer
cache lines & into a smaller slab.

Signed-off-by: Richard Kennedy <richard@rsk.demon.co.uk>
Signed-off-by: Jens Axboe <jens.axboe@oracle.com>
---
 include/linux/bio.h | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/bio.h b/include/linux/bio.h
index cf132bfbbacf..3ed714eb54d9 100644
--- a/include/linux/bio.h
+++ b/include/linux/bio.h
@@ -90,10 +90,11 @@ struct bio {
 
 	unsigned int		bi_comp_cpu;	/* completion CPU */
 
+	atomic_t		bi_cnt;		/* pin count */
+
 	struct bio_vec		*bi_io_vec;	/* the actual vec list */
 
 	bio_end_io_t		*bi_end_io;
-	atomic_t		bi_cnt;		/* pin count */
 
 	void			*bi_private;
 #if defined(CONFIG_BLK_DEV_INTEGRITY)
-- 
cgit v1.2.3


From 313e42999dbc0f234ca5909a236f78f082cb43b1 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Fri, 28 Nov 2008 13:32:02 +0900
Subject: block: reorganize QUEUE_ORDERED_* constants

Separate out ordering type (drain,) and action masks (preflush,
postflush, fua) from visible ordering mode selectors
(QUEUE_ORDERED_*).  Ordering types are now named QUEUE_ORDERED_BY_*
while action masks are named QUEUE_ORDERED_DO_*.

This change is necessary to add QUEUE_ORDERED_DO_BAR and make it
optional to improve empty barrier implementation.

Signed-off-by: Tejun Heo <tj@kernel.org>
Signed-off-by: Jens Axboe <jens.axboe@oracle.com>
---
 block/blk-barrier.c    | 20 ++++++++++----------
 include/linux/blkdev.h | 39 +++++++++++++++++++++++----------------
 2 files changed, 33 insertions(+), 26 deletions(-)

(limited to 'include')

diff --git a/block/blk-barrier.c b/block/blk-barrier.c
index 6e72d661ae42..1d7adc72c95d 100644
--- a/block/blk-barrier.c
+++ b/block/blk-barrier.c
@@ -24,8 +24,8 @@
 int blk_queue_ordered(struct request_queue *q, unsigned ordered,
 		      prepare_flush_fn *prepare_flush_fn)
 {
-	if (ordered & (QUEUE_ORDERED_PREFLUSH | QUEUE_ORDERED_POSTFLUSH) &&
-	    prepare_flush_fn == NULL) {
+	if (!prepare_flush_fn && (ordered & (QUEUE_ORDERED_DO_PREFLUSH |
+					     QUEUE_ORDERED_DO_POSTFLUSH))) {
 		printk(KERN_ERR "%s: prepare_flush_fn required\n", __func__);
 		return -EINVAL;
 	}
@@ -134,7 +134,7 @@ static void queue_flush(struct request_queue *q, unsigned which)
 	struct request *rq;
 	rq_end_io_fn *end_io;
 
-	if (which == QUEUE_ORDERED_PREFLUSH) {
+	if (which == QUEUE_ORDERED_DO_PREFLUSH) {
 		rq = &q->pre_flush_rq;
 		end_io = pre_flush_end_io;
 	} else {
@@ -167,7 +167,7 @@ static inline struct request *start_ordered(struct request_queue *q,
 	blk_rq_init(q, rq);
 	if (bio_data_dir(q->orig_bar_rq->bio) == WRITE)
 		rq->cmd_flags |= REQ_RW;
-	if (q->ordered & QUEUE_ORDERED_FUA)
+	if (q->ordered & QUEUE_ORDERED_DO_FUA)
 		rq->cmd_flags |= REQ_FUA;
 	init_request_from_bio(rq, q->orig_bar_rq->bio);
 	rq->end_io = bar_end_io;
@@ -181,20 +181,20 @@ static inline struct request *start_ordered(struct request_queue *q,
 	 * there will be no data written between the pre and post flush.
 	 * Hence a single flush will suffice.
 	 */
-	if ((q->ordered & QUEUE_ORDERED_POSTFLUSH) && !blk_empty_barrier(rq))
-		queue_flush(q, QUEUE_ORDERED_POSTFLUSH);
+	if ((q->ordered & QUEUE_ORDERED_DO_POSTFLUSH) && !blk_empty_barrier(rq))
+		queue_flush(q, QUEUE_ORDERED_DO_POSTFLUSH);
 	else
 		q->ordseq |= QUEUE_ORDSEQ_POSTFLUSH;
 
 	elv_insert(q, rq, ELEVATOR_INSERT_FRONT);
 
-	if (q->ordered & QUEUE_ORDERED_PREFLUSH) {
-		queue_flush(q, QUEUE_ORDERED_PREFLUSH);
+	if (q->ordered & QUEUE_ORDERED_DO_PREFLUSH) {
+		queue_flush(q, QUEUE_ORDERED_DO_PREFLUSH);
 		rq = &q->pre_flush_rq;
 	} else
 		q->ordseq |= QUEUE_ORDSEQ_PREFLUSH;
 
-	if ((q->ordered & QUEUE_ORDERED_TAG) || q->in_flight == 0)
+	if ((q->ordered & QUEUE_ORDERED_BY_TAG) || q->in_flight == 0)
 		q->ordseq |= QUEUE_ORDSEQ_DRAIN;
 	else
 		rq = NULL;
@@ -237,7 +237,7 @@ int blk_do_ordered(struct request_queue *q, struct request **rqp)
 	    rq != &q->pre_flush_rq && rq != &q->post_flush_rq)
 		return 1;
 
-	if (q->ordered & QUEUE_ORDERED_TAG) {
+	if (q->ordered & QUEUE_ORDERED_BY_TAG) {
 		/* Ordered by tag.  Blocking the next barrier is enough. */
 		if (is_barrier && rq != &q->bar_rq)
 			*rqp = NULL;
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index e9bb73ff1d64..5c92b4432399 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -523,22 +523,29 @@ enum {
 	 * TAG_FLUSH	: ordering by tag w/ pre and post flushes
 	 * TAG_FUA	: ordering by tag w/ pre flush and FUA write
 	 */
-	QUEUE_ORDERED_NONE	= 0x00,
-	QUEUE_ORDERED_DRAIN	= 0x01,
-	QUEUE_ORDERED_TAG	= 0x02,
-
-	QUEUE_ORDERED_PREFLUSH	= 0x10,
-	QUEUE_ORDERED_POSTFLUSH	= 0x20,
-	QUEUE_ORDERED_FUA	= 0x40,
-
-	QUEUE_ORDERED_DRAIN_FLUSH = QUEUE_ORDERED_DRAIN |
-			QUEUE_ORDERED_PREFLUSH | QUEUE_ORDERED_POSTFLUSH,
-	QUEUE_ORDERED_DRAIN_FUA	= QUEUE_ORDERED_DRAIN |
-			QUEUE_ORDERED_PREFLUSH | QUEUE_ORDERED_FUA,
-	QUEUE_ORDERED_TAG_FLUSH	= QUEUE_ORDERED_TAG |
-			QUEUE_ORDERED_PREFLUSH | QUEUE_ORDERED_POSTFLUSH,
-	QUEUE_ORDERED_TAG_FUA	= QUEUE_ORDERED_TAG |
-			QUEUE_ORDERED_PREFLUSH | QUEUE_ORDERED_FUA,
+	QUEUE_ORDERED_BY_DRAIN		= 0x01,
+	QUEUE_ORDERED_BY_TAG		= 0x02,
+	QUEUE_ORDERED_DO_PREFLUSH	= 0x10,
+	QUEUE_ORDERED_DO_POSTFLUSH	= 0x40,
+	QUEUE_ORDERED_DO_FUA		= 0x80,
+
+	QUEUE_ORDERED_NONE		= 0x00,
+
+	QUEUE_ORDERED_DRAIN		= QUEUE_ORDERED_BY_DRAIN,
+	QUEUE_ORDERED_DRAIN_FLUSH	= QUEUE_ORDERED_DRAIN |
+					  QUEUE_ORDERED_DO_PREFLUSH |
+					  QUEUE_ORDERED_DO_POSTFLUSH,
+	QUEUE_ORDERED_DRAIN_FUA		= QUEUE_ORDERED_DRAIN |
+					  QUEUE_ORDERED_DO_PREFLUSH |
+					  QUEUE_ORDERED_DO_FUA,
+
+	QUEUE_ORDERED_TAG		= QUEUE_ORDERED_BY_TAG,
+	QUEUE_ORDERED_TAG_FLUSH		= QUEUE_ORDERED_TAG |
+					  QUEUE_ORDERED_DO_PREFLUSH |
+					  QUEUE_ORDERED_DO_POSTFLUSH,
+	QUEUE_ORDERED_TAG_FUA		= QUEUE_ORDERED_TAG |
+					  QUEUE_ORDERED_DO_PREFLUSH |
+					  QUEUE_ORDERED_DO_FUA,
 
 	/*
 	 * Ordered operation sequence
-- 
cgit v1.2.3


From f671620e7d895af221bdfeda751d54fa55ed9546 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Fri, 28 Nov 2008 13:32:04 +0900
Subject: block: make every barrier action optional

In all barrier sequences, the barrier write itself was always assumed
to be issued and thus didn't have corresponding control flag.  This
patch adds QUEUE_ORDERED_DO_BAR and unify action mask handling in
start_ordered() such that any barrier action can be skipped.

This patch doesn't introduce any visible behavior changes.

Signed-off-by: Tejun Heo <tj@kernel.org>
Signed-off-by: Jens Axboe <jens.axboe@oracle.com>
---
 block/blk-barrier.c    | 41 ++++++++++++++++++++++++-----------------
 include/linux/blkdev.h |  7 +++++--
 2 files changed, 29 insertions(+), 19 deletions(-)

(limited to 'include')

diff --git a/block/blk-barrier.c b/block/blk-barrier.c
index 43d479a1e664..1efabf829c53 100644
--- a/block/blk-barrier.c
+++ b/block/blk-barrier.c
@@ -158,19 +158,10 @@ static inline struct request *start_ordered(struct request_queue *q,
 	q->ordered = q->next_ordered;
 	q->ordseq |= QUEUE_ORDSEQ_STARTED;
 
-	/*
-	 * Prep proxy barrier request.
-	 */
+	/* stash away the original request */
 	elv_dequeue_request(q, rq);
 	q->orig_bar_rq = rq;
-	rq = &q->bar_rq;
-	blk_rq_init(q, rq);
-	if (bio_data_dir(q->orig_bar_rq->bio) == WRITE)
-		rq->cmd_flags |= REQ_RW;
-	if (q->ordered & QUEUE_ORDERED_DO_FUA)
-		rq->cmd_flags |= REQ_FUA;
-	init_request_from_bio(rq, q->orig_bar_rq->bio);
-	rq->end_io = bar_end_io;
+	rq = NULL;
 
 	/*
 	 * Queue ordered sequence.  As we stack them at the head, we
@@ -181,12 +172,28 @@ static inline struct request *start_ordered(struct request_queue *q,
 	 * there will be no data written between the pre and post flush.
 	 * Hence a single flush will suffice.
 	 */
-	if ((q->ordered & QUEUE_ORDERED_DO_POSTFLUSH) && !blk_empty_barrier(rq))
+	if ((q->ordered & QUEUE_ORDERED_DO_POSTFLUSH) &&
+	    !blk_empty_barrier(q->orig_bar_rq)) {
 		queue_flush(q, QUEUE_ORDERED_DO_POSTFLUSH);
-	else
+		rq = &q->post_flush_rq;
+	} else
 		q->ordseq |= QUEUE_ORDSEQ_POSTFLUSH;
 
-	elv_insert(q, rq, ELEVATOR_INSERT_FRONT);
+	if (q->ordered & QUEUE_ORDERED_DO_BAR) {
+		rq = &q->bar_rq;
+
+		/* initialize proxy request and queue it */
+		blk_rq_init(q, rq);
+		if (bio_data_dir(q->orig_bar_rq->bio) == WRITE)
+			rq->cmd_flags |= REQ_RW;
+		if (q->ordered & QUEUE_ORDERED_DO_FUA)
+			rq->cmd_flags |= REQ_FUA;
+		init_request_from_bio(rq, q->orig_bar_rq->bio);
+		rq->end_io = bar_end_io;
+
+		elv_insert(q, rq, ELEVATOR_INSERT_FRONT);
+	} else
+		q->ordseq |= QUEUE_ORDSEQ_BAR;
 
 	if (q->ordered & QUEUE_ORDERED_DO_PREFLUSH) {
 		queue_flush(q, QUEUE_ORDERED_DO_PREFLUSH);
@@ -194,10 +201,10 @@ static inline struct request *start_ordered(struct request_queue *q,
 	} else
 		q->ordseq |= QUEUE_ORDSEQ_PREFLUSH;
 
-	if ((q->ordered & QUEUE_ORDERED_BY_TAG) || q->in_flight == 0)
-		q->ordseq |= QUEUE_ORDSEQ_DRAIN;
-	else
+	if ((q->ordered & QUEUE_ORDERED_BY_DRAIN) && q->in_flight)
 		rq = NULL;
+	else
+		q->ordseq |= QUEUE_ORDSEQ_DRAIN;
 
 	return rq;
 }
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 5c92b4432399..b044267009ed 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -526,12 +526,14 @@ enum {
 	QUEUE_ORDERED_BY_DRAIN		= 0x01,
 	QUEUE_ORDERED_BY_TAG		= 0x02,
 	QUEUE_ORDERED_DO_PREFLUSH	= 0x10,
+	QUEUE_ORDERED_DO_BAR		= 0x20,
 	QUEUE_ORDERED_DO_POSTFLUSH	= 0x40,
 	QUEUE_ORDERED_DO_FUA		= 0x80,
 
 	QUEUE_ORDERED_NONE		= 0x00,
 
-	QUEUE_ORDERED_DRAIN		= QUEUE_ORDERED_BY_DRAIN,
+	QUEUE_ORDERED_DRAIN		= QUEUE_ORDERED_BY_DRAIN |
+					  QUEUE_ORDERED_DO_BAR,
 	QUEUE_ORDERED_DRAIN_FLUSH	= QUEUE_ORDERED_DRAIN |
 					  QUEUE_ORDERED_DO_PREFLUSH |
 					  QUEUE_ORDERED_DO_POSTFLUSH,
@@ -539,7 +541,8 @@ enum {
 					  QUEUE_ORDERED_DO_PREFLUSH |
 					  QUEUE_ORDERED_DO_FUA,
 
-	QUEUE_ORDERED_TAG		= QUEUE_ORDERED_BY_TAG,
+	QUEUE_ORDERED_TAG		= QUEUE_ORDERED_BY_TAG |
+					  QUEUE_ORDERED_DO_BAR,
 	QUEUE_ORDERED_TAG_FLUSH		= QUEUE_ORDERED_TAG |
 					  QUEUE_ORDERED_DO_PREFLUSH |
 					  QUEUE_ORDERED_DO_POSTFLUSH,
-- 
cgit v1.2.3


From 8f11b3e99a1136fcbb67316c3260f085299c0bff Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Fri, 28 Nov 2008 13:32:05 +0900
Subject: block: make barrier completion more robust

Barrier completion had the following assumptions.

* start_ordered() couldn't finish the whole sequence properly.  If all
  actions are to be skipped, q->ordseq is set correctly but the actual
  completion was never triggered thus hanging the barrier request.

* Drain completion in elv_complete_request() assumed that there's
  always at least one request in the queue when drain completes.

Both assumptions are true but these assumptions need to be removed to
improve empty barrier implementation.  This patch makes the following
changes.

* Make start_ordered() use blk_ordered_complete_seq() to mark skipped
  steps complete and notify __elv_next_request() that it should fetch
  the next request if the whole barrier has completed inside
  start_ordered().

* Make drain completion path in elv_complete_request() check whether
  the queue is empty.  Empty queue also indicates drain completion.

* While at it, convert 0/1 return from blk_do_ordered() to false/true.

Signed-off-by: Tejun Heo <tj@kernel.org>
Signed-off-by: Jens Axboe <jens.axboe@oracle.com>
---
 block/blk-barrier.c    | 45 +++++++++++++++++++++++++++------------------
 block/elevator.c       | 10 +++++++---
 include/linux/blkdev.h |  4 ++--
 3 files changed, 36 insertions(+), 23 deletions(-)

(limited to 'include')

diff --git a/block/blk-barrier.c b/block/blk-barrier.c
index 1efabf829c53..b03d88013e1e 100644
--- a/block/blk-barrier.c
+++ b/block/blk-barrier.c
@@ -88,7 +88,7 @@ unsigned blk_ordered_req_seq(struct request *rq)
 		return QUEUE_ORDSEQ_DONE;
 }
 
-void blk_ordered_complete_seq(struct request_queue *q, unsigned seq, int error)
+bool blk_ordered_complete_seq(struct request_queue *q, unsigned seq, int error)
 {
 	struct request *rq;
 
@@ -99,7 +99,7 @@ void blk_ordered_complete_seq(struct request_queue *q, unsigned seq, int error)
 	q->ordseq |= seq;
 
 	if (blk_ordered_cur_seq(q) != QUEUE_ORDSEQ_DONE)
-		return;
+		return false;
 
 	/*
 	 * Okay, sequence complete.
@@ -109,6 +109,8 @@ void blk_ordered_complete_seq(struct request_queue *q, unsigned seq, int error)
 
 	if (__blk_end_request(rq, q->orderr, blk_rq_bytes(rq)))
 		BUG();
+
+	return true;
 }
 
 static void pre_flush_end_io(struct request *rq, int error)
@@ -151,9 +153,11 @@ static void queue_flush(struct request_queue *q, unsigned which)
 	elv_insert(q, rq, ELEVATOR_INSERT_FRONT);
 }
 
-static inline struct request *start_ordered(struct request_queue *q,
-					    struct request *rq)
+static inline bool start_ordered(struct request_queue *q, struct request **rqp)
 {
+	struct request *rq = *rqp;
+	unsigned skip = 0;
+
 	q->orderr = 0;
 	q->ordered = q->next_ordered;
 	q->ordseq |= QUEUE_ORDSEQ_STARTED;
@@ -177,7 +181,7 @@ static inline struct request *start_ordered(struct request_queue *q,
 		queue_flush(q, QUEUE_ORDERED_DO_POSTFLUSH);
 		rq = &q->post_flush_rq;
 	} else
-		q->ordseq |= QUEUE_ORDSEQ_POSTFLUSH;
+		skip |= QUEUE_ORDSEQ_POSTFLUSH;
 
 	if (q->ordered & QUEUE_ORDERED_DO_BAR) {
 		rq = &q->bar_rq;
@@ -193,35 +197,40 @@ static inline struct request *start_ordered(struct request_queue *q,
 
 		elv_insert(q, rq, ELEVATOR_INSERT_FRONT);
 	} else
-		q->ordseq |= QUEUE_ORDSEQ_BAR;
+		skip |= QUEUE_ORDSEQ_BAR;
 
 	if (q->ordered & QUEUE_ORDERED_DO_PREFLUSH) {
 		queue_flush(q, QUEUE_ORDERED_DO_PREFLUSH);
 		rq = &q->pre_flush_rq;
 	} else
-		q->ordseq |= QUEUE_ORDSEQ_PREFLUSH;
+		skip |= QUEUE_ORDSEQ_PREFLUSH;
 
 	if ((q->ordered & QUEUE_ORDERED_BY_DRAIN) && q->in_flight)
 		rq = NULL;
 	else
-		q->ordseq |= QUEUE_ORDSEQ_DRAIN;
+		skip |= QUEUE_ORDSEQ_DRAIN;
+
+	*rqp = rq;
 
-	return rq;
+	/*
+	 * Complete skipped sequences.  If whole sequence is complete,
+	 * return false to tell elevator that this request is gone.
+	 */
+	return !blk_ordered_complete_seq(q, skip, 0);
 }
 
-int blk_do_ordered(struct request_queue *q, struct request **rqp)
+bool blk_do_ordered(struct request_queue *q, struct request **rqp)
 {
 	struct request *rq = *rqp;
 	const int is_barrier = blk_fs_request(rq) && blk_barrier_rq(rq);
 
 	if (!q->ordseq) {
 		if (!is_barrier)
-			return 1;
+			return true;
 
-		if (q->next_ordered != QUEUE_ORDERED_NONE) {
-			*rqp = start_ordered(q, rq);
-			return 1;
-		} else {
+		if (q->next_ordered != QUEUE_ORDERED_NONE)
+			return start_ordered(q, rqp);
+		else {
 			/*
 			 * Queue ordering not supported.  Terminate
 			 * with prejudice.
@@ -231,7 +240,7 @@ int blk_do_ordered(struct request_queue *q, struct request **rqp)
 					      blk_rq_bytes(rq)))
 				BUG();
 			*rqp = NULL;
-			return 0;
+			return false;
 		}
 	}
 
@@ -242,7 +251,7 @@ int blk_do_ordered(struct request_queue *q, struct request **rqp)
 	/* Special requests are not subject to ordering rules. */
 	if (!blk_fs_request(rq) &&
 	    rq != &q->pre_flush_rq && rq != &q->post_flush_rq)
-		return 1;
+		return true;
 
 	if (q->ordered & QUEUE_ORDERED_BY_TAG) {
 		/* Ordered by tag.  Blocking the next barrier is enough. */
@@ -255,7 +264,7 @@ int blk_do_ordered(struct request_queue *q, struct request **rqp)
 			*rqp = NULL;
 	}
 
-	return 1;
+	return true;
 }
 
 static void bio_end_empty_barrier(struct bio *bio, int err)
diff --git a/block/elevator.c b/block/elevator.c
index 86836dd179c0..261ffaaf47bd 100644
--- a/block/elevator.c
+++ b/block/elevator.c
@@ -944,10 +944,14 @@ void elv_completed_request(struct request_queue *q, struct request *rq)
 	 * drained for flush sequence.
 	 */
 	if (unlikely(q->ordseq)) {
-		struct request *first_rq = list_entry_rq(q->queue_head.next);
-		if (q->in_flight == 0 &&
+		struct request *next = NULL;
+
+		if (!list_empty(&q->queue_head))
+			next = list_entry_rq(q->queue_head.next);
+
+		if (!q->in_flight &&
 		    blk_ordered_cur_seq(q) == QUEUE_ORDSEQ_DRAIN &&
-		    blk_ordered_req_seq(first_rq) > QUEUE_ORDSEQ_DRAIN) {
+		    (!next || blk_ordered_req_seq(next) > QUEUE_ORDSEQ_DRAIN)) {
 			blk_ordered_complete_seq(q, QUEUE_ORDSEQ_DRAIN, 0);
 			blk_start_queueing(q);
 		}
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index b044267009ed..3c7078e0129d 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -866,10 +866,10 @@ extern void blk_queue_rq_timed_out(struct request_queue *, rq_timed_out_fn *);
 extern void blk_queue_rq_timeout(struct request_queue *, unsigned int);
 extern struct backing_dev_info *blk_get_backing_dev_info(struct block_device *bdev);
 extern int blk_queue_ordered(struct request_queue *, unsigned, prepare_flush_fn *);
-extern int blk_do_ordered(struct request_queue *, struct request **);
+extern bool blk_do_ordered(struct request_queue *, struct request **);
 extern unsigned blk_ordered_cur_seq(struct request_queue *);
 extern unsigned blk_ordered_req_seq(struct request *);
-extern void blk_ordered_complete_seq(struct request_queue *, unsigned, int);
+extern bool blk_ordered_complete_seq(struct request_queue *, unsigned, int);
 
 extern int blk_rq_map_sg(struct request_queue *, struct request *, struct scatterlist *);
 extern void blk_dump_rq_flags(struct request *, char *);
-- 
cgit v1.2.3


From 58eea927d2de43dc6f03d1ba2c46e55854b31540 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Fri, 28 Nov 2008 13:32:06 +0900
Subject: block: simplify empty barrier implementation

Empty barrier required special handling in __elv_next_request() to
complete it without letting the low level driver see it.

With previous changes, barrier code is now flexible enough to skip the
BAR step using the same barrier sequence selection mechanism.  Drop
the special handling and mask off q->ordered from start_ordered().

Remove blk_empty_barrier() test which now has no user.

Signed-off-by: Tejun Heo <tj@kernel.org>
Signed-off-by: Jens Axboe <jens.axboe@oracle.com>
---
 block/blk-barrier.c    | 16 ++++++++++------
 block/elevator.c       |  8 --------
 include/linux/blkdev.h |  1 -
 3 files changed, 10 insertions(+), 15 deletions(-)

(limited to 'include')

diff --git a/block/blk-barrier.c b/block/blk-barrier.c
index b03d88013e1e..c63044e9c4c0 100644
--- a/block/blk-barrier.c
+++ b/block/blk-barrier.c
@@ -162,6 +162,14 @@ static inline bool start_ordered(struct request_queue *q, struct request **rqp)
 	q->ordered = q->next_ordered;
 	q->ordseq |= QUEUE_ORDSEQ_STARTED;
 
+	/*
+	 * For an empty barrier, there's no actual BAR request, which
+	 * in turn makes POSTFLUSH unnecessary.  Mask them off.
+	 */
+	if (!rq->hard_nr_sectors)
+		q->ordered &= ~(QUEUE_ORDERED_DO_BAR |
+				QUEUE_ORDERED_DO_POSTFLUSH);
+
 	/* stash away the original request */
 	elv_dequeue_request(q, rq);
 	q->orig_bar_rq = rq;
@@ -171,13 +179,9 @@ static inline bool start_ordered(struct request_queue *q, struct request **rqp)
 	 * Queue ordered sequence.  As we stack them at the head, we
 	 * need to queue in reverse order.  Note that we rely on that
 	 * no fs request uses ELEVATOR_INSERT_FRONT and thus no fs
-	 * request gets inbetween ordered sequence. If this request is
-	 * an empty barrier, we don't need to do a postflush ever since
-	 * there will be no data written between the pre and post flush.
-	 * Hence a single flush will suffice.
+	 * request gets inbetween ordered sequence.
 	 */
-	if ((q->ordered & QUEUE_ORDERED_DO_POSTFLUSH) &&
-	    !blk_empty_barrier(q->orig_bar_rq)) {
+	if (q->ordered & QUEUE_ORDERED_DO_POSTFLUSH) {
 		queue_flush(q, QUEUE_ORDERED_DO_POSTFLUSH);
 		rq = &q->post_flush_rq;
 	} else
diff --git a/block/elevator.c b/block/elevator.c
index 261ffaaf47bd..ff60177a3bab 100644
--- a/block/elevator.c
+++ b/block/elevator.c
@@ -755,14 +755,6 @@ struct request *elv_next_request(struct request_queue *q)
 	int ret;
 
 	while ((rq = __elv_next_request(q)) != NULL) {
-		/*
-		 * Kill the empty barrier place holder, the driver must
-		 * not ever see it.
-		 */
-		if (blk_empty_barrier(rq)) {
-			__blk_end_request(rq, 0, blk_rq_bytes(rq));
-			continue;
-		}
 		if (!(rq->cmd_flags & REQ_STARTED)) {
 			/*
 			 * This is the first time the device driver
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 3c7078e0129d..41bbadfd17f6 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -596,7 +596,6 @@ enum {
 #define blk_fua_rq(rq)		((rq)->cmd_flags & REQ_FUA)
 #define blk_discard_rq(rq)	((rq)->cmd_flags & REQ_DISCARD)
 #define blk_bidi_rq(rq)		((rq)->next_rq != NULL)
-#define blk_empty_barrier(rq)	(blk_barrier_rq(rq) && blk_fs_request(rq) && !(rq)->hard_nr_sectors)
 /* rq->queuelist of dequeued request must be list_empty() */
 #define blk_queued_rq(rq)	(!list_empty(&(rq)->queuelist))
 
-- 
cgit v1.2.3


From 7ff9345ffac56743b5001561bc2dc1e041b79149 Mon Sep 17 00:00:00 2001
From: Jens Axboe <jens.axboe@oracle.com>
Date: Thu, 11 Dec 2008 11:53:43 +0100
Subject: bio: only mempool back the largest bio_vec slab cache

We only very rarely need the mempool backing, so it makes sense to
get rid of all but one of the mempool in a bio_set. So keep the
largest bio_vec count mempool so we can always honor the largest
allocation, and "upgrade" callers that fail.

Signed-off-by: Jens Axboe <jens.axboe@oracle.com>
---
 fs/bio.c            | 125 ++++++++++++++++++++++++++++++----------------------
 include/linux/bio.h |   3 +-
 2 files changed, 74 insertions(+), 54 deletions(-)

(limited to 'include')

diff --git a/fs/bio.c b/fs/bio.c
index df99c882b807..eb6b4683a265 100644
--- a/fs/bio.c
+++ b/fs/bio.c
@@ -58,7 +58,8 @@ unsigned int bvec_nr_vecs(unsigned short idx)
 	return bvec_slabs[idx].nr_vecs;
 }
 
-struct bio_vec *bvec_alloc_bs(gfp_t gfp_mask, int nr, unsigned long *idx, struct bio_set *bs)
+struct bio_vec *bvec_alloc_bs(gfp_t gfp_mask, int nr, unsigned long *idx,
+			      struct bio_set *bs)
 {
 	struct bio_vec *bvl;
 
@@ -67,60 +68,90 @@ struct bio_vec *bvec_alloc_bs(gfp_t gfp_mask, int nr, unsigned long *idx, struct
 	 * If not, this is a bio_kmalloc() allocation and just do a
 	 * kzalloc() for the exact number of vecs right away.
 	 */
-	if (bs) {
+	if (!bs)
+		bvl = kzalloc(nr * sizeof(struct bio_vec), gfp_mask);
+
+	/*
+	 * see comment near bvec_array define!
+	 */
+	switch (nr) {
+	case 1:
+		*idx = 0;
+		break;
+	case 2 ... 4:
+		*idx = 1;
+		break;
+	case 5 ... 16:
+		*idx = 2;
+		break;
+	case 17 ... 64:
+		*idx = 3;
+		break;
+	case 65 ... 128:
+		*idx = 4;
+		break;
+	case 129 ... BIO_MAX_PAGES:
+		*idx = 5;
+		break;
+	default:
+		return NULL;
+	}
+
+	/*
+	 * idx now points to the pool we want to allocate from. only the
+	 * 1-vec entry pool is mempool backed.
+	 */
+	if (*idx == BIOVEC_MAX_IDX) {
+fallback:
+		bvl = mempool_alloc(bs->bvec_pool, gfp_mask);
+	} else {
+		struct biovec_slab *bvs = bvec_slabs + *idx;
+		gfp_t __gfp_mask = gfp_mask & ~(__GFP_WAIT | __GFP_IO);
+
 		/*
-		 * see comment near bvec_array define!
+		 * Make this allocation restricted and don't dump info on
+		 * allocation failures, since we'll fallback to the mempool
+		 * in case of failure.
 		 */
-		switch (nr) {
-		case 1:
-			*idx = 0;
-			break;
-		case 2 ... 4:
-			*idx = 1;
-			break;
-		case 5 ... 16:
-			*idx = 2;
-			break;
-		case 17 ... 64:
-			*idx = 3;
-			break;
-		case 65 ... 128:
-			*idx = 4;
-			break;
-		case 129 ... BIO_MAX_PAGES:
-			*idx = 5;
-			break;
-		default:
-			return NULL;
-		}
+		__gfp_mask |= __GFP_NOMEMALLOC | __GFP_NORETRY | __GFP_NOWARN;
 
 		/*
-		 * idx now points to the pool we want to allocate from
+		 * Try a slab allocation. If this fails and __GFP_WAIT
+		 * is set, retry with the 1-entry mempool
 		 */
-		bvl = mempool_alloc(bs->bvec_pools[*idx], gfp_mask);
-		if (bvl)
-			memset(bvl, 0,
-				bvec_nr_vecs(*idx) * sizeof(struct bio_vec));
-	} else
-		bvl = kzalloc(nr * sizeof(struct bio_vec), gfp_mask);
+		bvl = kmem_cache_alloc(bvs->slab, __gfp_mask);
+		if (unlikely(!bvl && (gfp_mask & __GFP_WAIT))) {
+			*idx = BIOVEC_MAX_IDX;
+			goto fallback;
+		}
+	}
+
+	if (bvl)
+		memset(bvl, 0, bvec_nr_vecs(*idx) * sizeof(struct bio_vec));
 
 	return bvl;
 }
 
-void bio_free(struct bio *bio, struct bio_set *bio_set)
+void bio_free(struct bio *bio, struct bio_set *bs)
 {
 	if (bio->bi_io_vec) {
 		const int pool_idx = BIO_POOL_IDX(bio);
 
 		BIO_BUG_ON(pool_idx >= BIOVEC_NR_POOLS);
 
-		mempool_free(bio->bi_io_vec, bio_set->bvec_pools[pool_idx]);
+		if (pool_idx == BIOVEC_MAX_IDX)
+			mempool_free(bio->bi_io_vec, bs->bvec_pool);
+		else {
+			struct biovec_slab *bvs = bvec_slabs + pool_idx;
+
+			kmem_cache_free(bvs->slab, bio->bi_io_vec);
+		}
 	}
 
 	if (bio_integrity(bio))
-		bio_integrity_free(bio, bio_set);
+		bio_integrity_free(bio, bs);
 
-	mempool_free(bio, bio_set->bio_pool);
+	mempool_free(bio, bs->bio_pool);
 }
 
 /*
@@ -1346,30 +1377,18 @@ EXPORT_SYMBOL(bio_sector_offset);
  */
 static int biovec_create_pools(struct bio_set *bs, int pool_entries)
 {
-	int i;
+	struct biovec_slab *bp = bvec_slabs + BIOVEC_MAX_IDX;
 
-	for (i = 0; i < BIOVEC_NR_POOLS; i++) {
-		struct biovec_slab *bp = bvec_slabs + i;
-		mempool_t **bvp = bs->bvec_pools + i;
+	bs->bvec_pool = mempool_create_slab_pool(pool_entries, bp->slab);
+	if (!bs->bvec_pool)
+		return -ENOMEM;
 
-		*bvp = mempool_create_slab_pool(pool_entries, bp->slab);
-		if (!*bvp)
-			return -ENOMEM;
-	}
 	return 0;
 }
 
 static void biovec_free_pools(struct bio_set *bs)
 {
-	int i;
-
-	for (i = 0; i < BIOVEC_NR_POOLS; i++) {
-		mempool_t *bvp = bs->bvec_pools[i];
-
-		if (bvp)
-			mempool_destroy(bvp);
-	}
-
+	mempool_destroy(bs->bvec_pool);
 }
 
 void bioset_free(struct bio_set *bs)
diff --git a/include/linux/bio.h b/include/linux/bio.h
index 3ed714eb54d9..d76e4bf22f29 100644
--- a/include/linux/bio.h
+++ b/include/linux/bio.h
@@ -397,13 +397,14 @@ static inline void bio_set_completion_cpu(struct bio *bio, unsigned int cpu)
  */
 #define BIO_POOL_SIZE 2
 #define BIOVEC_NR_POOLS 6
+#define BIOVEC_MAX_IDX	(BIOVEC_NR_POOLS - 1)
 
 struct bio_set {
 	mempool_t *bio_pool;
 #if defined(CONFIG_BLK_DEV_INTEGRITY)
 	mempool_t *bio_integrity_pool;
 #endif
-	mempool_t *bvec_pools[BIOVEC_NR_POOLS];
+	mempool_t *bvec_pool;
 };
 
 struct biovec_slab {
-- 
cgit v1.2.3


From 1b4344986926da324b5cd10b683e5a1a5e1b7db3 Mon Sep 17 00:00:00 2001
From: Jens Axboe <jens.axboe@oracle.com>
Date: Wed, 22 Oct 2008 20:32:58 +0200
Subject: bio: move the slab pointer inside the bio_set

In preparation for adding differently sized bios.

Signed-off-by: Jens Axboe <jens.axboe@oracle.com>
---
 fs/bio.c            | 7 +++++--
 include/linux/bio.h | 1 +
 2 files changed, 6 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/fs/bio.c b/fs/bio.c
index eb6b4683a265..1ab8986b0411 100644
--- a/fs/bio.c
+++ b/fs/bio.c
@@ -1404,12 +1404,15 @@ void bioset_free(struct bio_set *bs)
 
 struct bio_set *bioset_create(int bio_pool_size, int bvec_pool_size)
 {
-	struct bio_set *bs = kzalloc(sizeof(*bs), GFP_KERNEL);
+	struct bio_set *bs;
 
+	bs = kzalloc(sizeof(*bs), GFP_KERNEL);
 	if (!bs)
 		return NULL;
 
-	bs->bio_pool = mempool_create_slab_pool(bio_pool_size, bio_slab);
+	bs->bio_slab = bio_slab;
+
+	bs->bio_pool = mempool_create_slab_pool(bio_pool_size, bs->bio_slab);
 	if (!bs->bio_pool)
 		goto bad;
 
diff --git a/include/linux/bio.h b/include/linux/bio.h
index d76e4bf22f29..9340098d75dc 100644
--- a/include/linux/bio.h
+++ b/include/linux/bio.h
@@ -400,6 +400,7 @@ static inline void bio_set_completion_cpu(struct bio *bio, unsigned int cpu)
 #define BIOVEC_MAX_IDX	(BIOVEC_NR_POOLS - 1)
 
 struct bio_set {
+	struct kmem_cache *bio_slab;
 	mempool_t *bio_pool;
 #if defined(CONFIG_BLK_DEV_INTEGRITY)
 	mempool_t *bio_integrity_pool;
-- 
cgit v1.2.3


From bb799ca0202a360fa74d5f17039b9100caebdde7 Mon Sep 17 00:00:00 2001
From: Jens Axboe <jens.axboe@oracle.com>
Date: Wed, 10 Dec 2008 15:35:05 +0100
Subject: bio: allow individual slabs in the bio_set

Instead of having a global bio slab cache, add a reference to one
in each bio_set that is created. This allows for personalized slabs
in each bio_set, so that they can have bios of different sizes.

This means we can personalize the bios we return. File systems may
want to embed the bio inside another structure, to avoid allocation
more items (and stuffing them in ->bi_private) after the get a bio.
Or we may want to embed a number of bio_vecs directly at the end
of a bio, to avoid doing two allocations to return a bio. This is now
possible.

Signed-off-by: Jens Axboe <jens.axboe@oracle.com>
---
 drivers/md/dm-crypt.c |   2 +-
 drivers/md/dm-io.c    |   2 +-
 drivers/md/dm.c       |   2 +-
 fs/bio-integrity.c    |   2 +-
 fs/bio.c              | 191 ++++++++++++++++++++++++++++++++++++++++++--------
 include/linux/bio.h   |   6 +-
 6 files changed, 170 insertions(+), 35 deletions(-)

(limited to 'include')

diff --git a/drivers/md/dm-crypt.c b/drivers/md/dm-crypt.c
index ce26c84af064..3326750ec02c 100644
--- a/drivers/md/dm-crypt.c
+++ b/drivers/md/dm-crypt.c
@@ -1060,7 +1060,7 @@ static int crypt_ctr(struct dm_target *ti, unsigned int argc, char **argv)
 		goto bad_page_pool;
 	}
 
-	cc->bs = bioset_create(MIN_IOS, MIN_IOS);
+	cc->bs = bioset_create(MIN_IOS, 0);
 	if (!cc->bs) {
 		ti->error = "Cannot allocate crypt bioset";
 		goto bad_bs;
diff --git a/drivers/md/dm-io.c b/drivers/md/dm-io.c
index 2fd6d4450637..a34338567a2a 100644
--- a/drivers/md/dm-io.c
+++ b/drivers/md/dm-io.c
@@ -56,7 +56,7 @@ struct dm_io_client *dm_io_client_create(unsigned num_pages)
 	if (!client->pool)
 		goto bad;
 
-	client->bios = bioset_create(16, 16);
+	client->bios = bioset_create(16, 0);
 	if (!client->bios)
 		goto bad;
 
diff --git a/drivers/md/dm.c b/drivers/md/dm.c
index 343094c3feeb..421c9f02d8ca 100644
--- a/drivers/md/dm.c
+++ b/drivers/md/dm.c
@@ -1093,7 +1093,7 @@ static struct mapped_device *alloc_dev(int minor)
 	if (!md->tio_pool)
 		goto bad_tio_pool;
 
-	md->bs = bioset_create(16, 16);
+	md->bs = bioset_create(16, 0);
 	if (!md->bs)
 		goto bad_no_bioset;
 
diff --git a/fs/bio-integrity.c b/fs/bio-integrity.c
index 19caf7c962ac..77ebc3c263d6 100644
--- a/fs/bio-integrity.c
+++ b/fs/bio-integrity.c
@@ -111,7 +111,7 @@ void bio_integrity_free(struct bio *bio, struct bio_set *bs)
 	    && bip->bip_buf != NULL)
 		kfree(bip->bip_buf);
 
-	mempool_free(bip->bip_vec, bs->bvec_pools[bip->bip_pool]);
+	bvec_free_bs(bs, bip->bip_vec, bip->bip_pool);
 	mempool_free(bip, bs->bio_integrity_pool);
 
 	bio->bi_integrity = NULL;
diff --git a/fs/bio.c b/fs/bio.c
index 1ab8986b0411..0146f80789e9 100644
--- a/fs/bio.c
+++ b/fs/bio.c
@@ -31,8 +31,6 @@
 
 DEFINE_TRACE(block_split);
 
-static struct kmem_cache *bio_slab __read_mostly;
-
 static mempool_t *bio_split_pool __read_mostly;
 
 /*
@@ -40,9 +38,8 @@ static mempool_t *bio_split_pool __read_mostly;
  * break badly! cannot be bigger than what you can fit into an
  * unsigned short
  */
-
 #define BV(x) { .nr_vecs = x, .name = "biovec-"__stringify(x) }
-static struct biovec_slab bvec_slabs[BIOVEC_NR_POOLS] __read_mostly = {
+struct biovec_slab bvec_slabs[BIOVEC_NR_POOLS] __read_mostly = {
 	BV(1), BV(4), BV(16), BV(64), BV(128), BV(BIO_MAX_PAGES),
 };
 #undef BV
@@ -53,11 +50,119 @@ static struct biovec_slab bvec_slabs[BIOVEC_NR_POOLS] __read_mostly = {
  */
 struct bio_set *fs_bio_set;
 
+/*
+ * Our slab pool management
+ */
+struct bio_slab {
+	struct kmem_cache *slab;
+	unsigned int slab_ref;
+	unsigned int slab_size;
+	char name[8];
+};
+static DEFINE_MUTEX(bio_slab_lock);
+static struct bio_slab *bio_slabs;
+static unsigned int bio_slab_nr, bio_slab_max;
+
+static struct kmem_cache *bio_find_or_create_slab(unsigned int extra_size)
+{
+	unsigned int sz = sizeof(struct bio) + extra_size;
+	struct kmem_cache *slab = NULL;
+	struct bio_slab *bslab;
+	unsigned int i, entry = -1;
+
+	mutex_lock(&bio_slab_lock);
+
+	i = 0;
+	while (i < bio_slab_nr) {
+		struct bio_slab *bslab = &bio_slabs[i];
+
+		if (!bslab->slab && entry == -1)
+			entry = i;
+		else if (bslab->slab_size == sz) {
+			slab = bslab->slab;
+			bslab->slab_ref++;
+			break;
+		}
+		i++;
+	}
+
+	if (slab)
+		goto out_unlock;
+
+	if (bio_slab_nr == bio_slab_max && entry == -1) {
+		bio_slab_max <<= 1;
+		bio_slabs = krealloc(bio_slabs,
+				     bio_slab_max * sizeof(struct bio_slab),
+				     GFP_KERNEL);
+		if (!bio_slabs)
+			goto out_unlock;
+	}
+	if (entry == -1)
+		entry = bio_slab_nr++;
+
+	bslab = &bio_slabs[entry];
+
+	snprintf(bslab->name, sizeof(bslab->name), "bio-%d", entry);
+	slab = kmem_cache_create(bslab->name, sz, 0, SLAB_HWCACHE_ALIGN, NULL);
+	if (!slab)
+		goto out_unlock;
+
+	printk("bio: create slab <%s> at %d\n", bslab->name, entry);
+	bslab->slab = slab;
+	bslab->slab_ref = 1;
+	bslab->slab_size = sz;
+out_unlock:
+	mutex_unlock(&bio_slab_lock);
+	return slab;
+}
+
+static void bio_put_slab(struct bio_set *bs)
+{
+	struct bio_slab *bslab = NULL;
+	unsigned int i;
+
+	mutex_lock(&bio_slab_lock);
+
+	for (i = 0; i < bio_slab_nr; i++) {
+		if (bs->bio_slab == bio_slabs[i].slab) {
+			bslab = &bio_slabs[i];
+			break;
+		}
+	}
+
+	if (WARN(!bslab, KERN_ERR "bio: unable to find slab!\n"))
+		goto out;
+
+	WARN_ON(!bslab->slab_ref);
+
+	if (--bslab->slab_ref)
+		goto out;
+
+	kmem_cache_destroy(bslab->slab);
+	bslab->slab = NULL;
+
+out:
+	mutex_unlock(&bio_slab_lock);
+}
+
 unsigned int bvec_nr_vecs(unsigned short idx)
 {
 	return bvec_slabs[idx].nr_vecs;
 }
 
+void bvec_free_bs(struct bio_set *bs, struct bio_vec *bv, unsigned int idx)
+{
+	BIO_BUG_ON(idx >= BIOVEC_NR_POOLS);
+
+	if (idx == BIOVEC_MAX_IDX)
+		mempool_free(bv, bs->bvec_pool);
+	else {
+		struct biovec_slab *bvs = bvec_slabs + idx;
+
+		kmem_cache_free(bvs->slab, bv);
+	}
+}
+
 struct bio_vec *bvec_alloc_bs(gfp_t gfp_mask, int nr, unsigned long *idx,
 			      struct bio_set *bs)
 {
@@ -134,24 +239,22 @@ fallback:
 
 void bio_free(struct bio *bio, struct bio_set *bs)
 {
-	if (bio->bi_io_vec) {
-		const int pool_idx = BIO_POOL_IDX(bio);
+	void *p;
 
-		BIO_BUG_ON(pool_idx >= BIOVEC_NR_POOLS);
-
-		if (pool_idx == BIOVEC_MAX_IDX)
-			mempool_free(bio->bi_io_vec, bs->bvec_pool);
-		else {
-			struct biovec_slab *bvs = bvec_slabs + pool_idx;
-
-			kmem_cache_free(bvs->slab, bio->bi_io_vec);
-		}
-	}
+	if (bio->bi_io_vec)
+		bvec_free_bs(bs, bio->bi_io_vec, BIO_POOL_IDX(bio));
 
 	if (bio_integrity(bio))
 		bio_integrity_free(bio, bs);
 
-	mempool_free(bio, bs->bio_pool);
+	/*
+	 * If we have front padding, adjust the bio pointer before freeing
+	 */
+	p = bio;
+	if (bs->front_pad)
+		p -= bs->front_pad;
+
+	mempool_free(p, bs->bio_pool);
 }
 
 /*
@@ -188,16 +291,20 @@ void bio_init(struct bio *bio)
  *   for a &struct bio to become free. If a %NULL @bs is passed in, we will
  *   fall back to just using @kmalloc to allocate the required memory.
  *
- *   allocate bio and iovecs from the memory pools specified by the
- *   bio_set structure, or @kmalloc if none given.
+ *   Note that the caller must set ->bi_destructor on succesful return
+ *   of a bio, to do the appropriate freeing of the bio once the reference
+ *   count drops to zero.
  **/
 struct bio *bio_alloc_bioset(gfp_t gfp_mask, int nr_iovecs, struct bio_set *bs)
 {
-	struct bio *bio;
+	struct bio *bio = NULL;
+
+	if (bs) {
+		void *p = mempool_alloc(bs->bio_pool, gfp_mask);
 
-	if (bs)
-		bio = mempool_alloc(bs->bio_pool, gfp_mask);
-	else
+		if (p)
+			bio = p + bs->front_pad;
+	} else
 		bio = kmalloc(sizeof(*bio), gfp_mask);
 
 	if (likely(bio)) {
@@ -1398,11 +1505,25 @@ void bioset_free(struct bio_set *bs)
 
 	bioset_integrity_free(bs);
 	biovec_free_pools(bs);
+	bio_put_slab(bs);
 
 	kfree(bs);
 }
 
-struct bio_set *bioset_create(int bio_pool_size, int bvec_pool_size)
+/**
+ * bioset_create  - Create a bio_set
+ * @pool_size:	Number of bio and bio_vecs to cache in the mempool
+ * @front_pad:	Number of bytes to allocate in front of the returned bio
+ *
+ * Description:
+ *    Set up a bio_set to be used with @bio_alloc_bioset. Allows the caller
+ *    to ask for a number of bytes to be allocated in front of the bio.
+ *    Front pad allocation is useful for embedding the bio inside
+ *    another structure, to avoid allocating extra data to go with the bio.
+ *    Note that the bio must be embedded at the END of that structure always,
+ *    or things will break badly.
+ */
+struct bio_set *bioset_create(unsigned int pool_size, unsigned int front_pad)
 {
 	struct bio_set *bs;
 
@@ -1410,16 +1531,22 @@ struct bio_set *bioset_create(int bio_pool_size, int bvec_pool_size)
 	if (!bs)
 		return NULL;
 
-	bs->bio_slab = bio_slab;
+	bs->front_pad = front_pad;
 
-	bs->bio_pool = mempool_create_slab_pool(bio_pool_size, bs->bio_slab);
+	bs->bio_slab = bio_find_or_create_slab(front_pad);
+	if (!bs->bio_slab) {
+		kfree(bs);
+		return NULL;
+	}
+
+	bs->bio_pool = mempool_create_slab_pool(pool_size, bs->bio_slab);
 	if (!bs->bio_pool)
 		goto bad;
 
-	if (bioset_integrity_create(bs, bio_pool_size))
+	if (bioset_integrity_create(bs, pool_size))
 		goto bad;
 
-	if (!biovec_create_pools(bs, bvec_pool_size))
+	if (!biovec_create_pools(bs, pool_size))
 		return bs;
 
 bad:
@@ -1443,12 +1570,16 @@ static void __init biovec_init_slabs(void)
 
 static int __init init_bio(void)
 {
-	bio_slab = KMEM_CACHE(bio, SLAB_HWCACHE_ALIGN|SLAB_PANIC);
+	bio_slab_max = 2;
+	bio_slab_nr = 0;
+	bio_slabs = kzalloc(bio_slab_max * sizeof(struct bio_slab), GFP_KERNEL);
+	if (!bio_slabs)
+		panic("bio: can't allocate bios\n");
 
 	bio_integrity_init_slab();
 	biovec_init_slabs();
 
-	fs_bio_set = bioset_create(BIO_POOL_SIZE, 2);
+	fs_bio_set = bioset_create(BIO_POOL_SIZE, 0);
 	if (!fs_bio_set)
 		panic("bio: can't allocate bios\n");
 
diff --git a/include/linux/bio.h b/include/linux/bio.h
index 9340098d75dc..4b80d3537f97 100644
--- a/include/linux/bio.h
+++ b/include/linux/bio.h
@@ -334,7 +334,7 @@ struct bio_pair {
 extern struct bio_pair *bio_split(struct bio *bi, int first_sectors);
 extern void bio_pair_release(struct bio_pair *dbio);
 
-extern struct bio_set *bioset_create(int, int);
+extern struct bio_set *bioset_create(unsigned int, unsigned int);
 extern void bioset_free(struct bio_set *);
 
 extern struct bio *bio_alloc(gfp_t, int);
@@ -379,6 +379,7 @@ extern struct bio *bio_copy_user_iov(struct request_queue *,
 extern int bio_uncopy_user(struct bio *);
 void zero_fill_bio(struct bio *bio);
 extern struct bio_vec *bvec_alloc_bs(gfp_t, int, unsigned long *, struct bio_set *);
+extern void bvec_free_bs(struct bio_set *, struct bio_vec *, unsigned int);
 extern unsigned int bvec_nr_vecs(unsigned short idx);
 
 /*
@@ -401,6 +402,8 @@ static inline void bio_set_completion_cpu(struct bio *bio, unsigned int cpu)
 
 struct bio_set {
 	struct kmem_cache *bio_slab;
+	unsigned int front_pad;
+
 	mempool_t *bio_pool;
 #if defined(CONFIG_BLK_DEV_INTEGRITY)
 	mempool_t *bio_integrity_pool;
@@ -415,6 +418,7 @@ struct biovec_slab {
 };
 
 extern struct bio_set *fs_bio_set;
+extern struct biovec_slab bvec_slabs[BIOVEC_NR_POOLS] __read_mostly;
 
 /*
  * a small number of entries is fine, not going to be performance critical.
-- 
cgit v1.2.3


From 392ddc32982a5c661dd90dd49a3cb37f1c68b782 Mon Sep 17 00:00:00 2001
From: Jens Axboe <jens.axboe@oracle.com>
Date: Tue, 23 Dec 2008 12:42:54 +0100
Subject: bio: add support for inlining a number of bio_vecs inside the bio

When we go and allocate a bio for IO, we actually do two allocations.
One for the bio itself, and one for the bi_io_vec that holds the
actual pages we are interested in.

This feature inlines a definable amount of io vecs inside the bio
itself, so we eliminate the bio_vec array allocation for IO's up
to a certain size. It defaults to 4 vecs, which is typically 16k
of IO.

Signed-off-by: Jens Axboe <jens.axboe@oracle.com>
---
 fs/bio.c            | 27 ++++++++++++++++++++++-----
 include/linux/bio.h | 12 ++++++++++++
 2 files changed, 34 insertions(+), 5 deletions(-)

(limited to 'include')

diff --git a/fs/bio.c b/fs/bio.c
index 0146f80789e9..75e6be18ecd3 100644
--- a/fs/bio.c
+++ b/fs/bio.c
@@ -31,6 +31,12 @@
 
 DEFINE_TRACE(block_split);
 
+/*
+ * Test patch to inline a certain number of bi_io_vec's inside the bio
+ * itself, to shrink a bio data allocation from two mempool calls to one
+ */
+#define BIO_INLINE_VECS		4
+
 static mempool_t *bio_split_pool __read_mostly;
 
 /*
@@ -241,7 +247,7 @@ void bio_free(struct bio *bio, struct bio_set *bs)
 {
 	void *p;
 
-	if (bio->bi_io_vec)
+	if (bio_has_allocated_vec(bio))
 		bvec_free_bs(bs, bio->bi_io_vec, BIO_POOL_IDX(bio));
 
 	if (bio_integrity(bio))
@@ -267,7 +273,8 @@ static void bio_fs_destructor(struct bio *bio)
 
 static void bio_kmalloc_destructor(struct bio *bio)
 {
-	kfree(bio->bi_io_vec);
+	if (bio_has_allocated_vec(bio))
+		kfree(bio->bi_io_vec);
 	kfree(bio);
 }
 
@@ -314,7 +321,16 @@ struct bio *bio_alloc_bioset(gfp_t gfp_mask, int nr_iovecs, struct bio_set *bs)
 		if (likely(nr_iovecs)) {
 			unsigned long uninitialized_var(idx);
 
-			bvl = bvec_alloc_bs(gfp_mask, nr_iovecs, &idx, bs);
+			if (nr_iovecs <= BIO_INLINE_VECS) {
+				idx = 0;
+				bvl = bio->bi_inline_vecs;
+				nr_iovecs = BIO_INLINE_VECS;
+				memset(bvl, 0, BIO_INLINE_VECS * sizeof(*bvl));
+			} else {
+				bvl = bvec_alloc_bs(gfp_mask, nr_iovecs, &idx,
+							bs);
+				nr_iovecs = bvec_nr_vecs(idx);
+			}
 			if (unlikely(!bvl)) {
 				if (bs)
 					mempool_free(bio, bs->bio_pool);
@@ -324,7 +340,7 @@ struct bio *bio_alloc_bioset(gfp_t gfp_mask, int nr_iovecs, struct bio_set *bs)
 				goto out;
 			}
 			bio->bi_flags |= idx << BIO_POOL_OFFSET;
-			bio->bi_max_vecs = bvec_nr_vecs(idx);
+			bio->bi_max_vecs = nr_iovecs;
 		}
 		bio->bi_io_vec = bvl;
 	}
@@ -1525,6 +1541,7 @@ void bioset_free(struct bio_set *bs)
  */
 struct bio_set *bioset_create(unsigned int pool_size, unsigned int front_pad)
 {
+	unsigned int back_pad = BIO_INLINE_VECS * sizeof(struct bio_vec);
 	struct bio_set *bs;
 
 	bs = kzalloc(sizeof(*bs), GFP_KERNEL);
@@ -1533,7 +1550,7 @@ struct bio_set *bioset_create(unsigned int pool_size, unsigned int front_pad)
 
 	bs->front_pad = front_pad;
 
-	bs->bio_slab = bio_find_or_create_slab(front_pad);
+	bs->bio_slab = bio_find_or_create_slab(front_pad + back_pad);
 	if (!bs->bio_slab) {
 		kfree(bs);
 		return NULL;
diff --git a/include/linux/bio.h b/include/linux/bio.h
index 4b80d3537f97..18462c5b8fff 100644
--- a/include/linux/bio.h
+++ b/include/linux/bio.h
@@ -102,6 +102,13 @@ struct bio {
 #endif
 
 	bio_destructor_t	*bi_destructor;	/* destructor */
+
+	/*
+	 * We can inline a number of vecs at the end of the bio, to avoid
+	 * double allocations for a small number of bio_vecs. This member
+	 * MUST obviously be kept at the very end of the bio.
+	 */
+	struct bio_vec		bi_inline_vecs[0];
 };
 
 /*
@@ -213,6 +220,11 @@ static inline void *bio_data(struct bio *bio)
 	return NULL;
 }
 
+static inline int bio_has_allocated_vec(struct bio *bio)
+{
+	return bio->bi_io_vec && bio->bi_io_vec != bio->bi_inline_vecs;
+}
+
 /*
  * will die
  */
-- 
cgit v1.2.3


From abf137dd7712132ee56d5b3143c2ff61a72a5faa Mon Sep 17 00:00:00 2001
From: Jens Axboe <jens.axboe@oracle.com>
Date: Tue, 9 Dec 2008 08:11:22 +0100
Subject: aio: make the lookup_ioctx() lockless

The mm->ioctx_list is currently protected by a reader-writer lock,
so we always grab that lock on the read side for doing ioctx
lookups. As the workload is extremely reader biased, turn this into
an rcu hlist so we can make lookup_ioctx() lockless. Get rid of
the rwlock and use a spinlock for providing update side exclusion.

There's usually only 1 entry on this list, so it doesn't make sense
to look into fancier data structures.

Reviewed-by: Jeff Moyer <jmoyer@redhat.com>
Signed-off-by: Jens Axboe <jens.axboe@oracle.com>
---
 arch/s390/mm/pgtable.c   |   4 +-
 fs/aio.c                 | 100 ++++++++++++++++++++++++++---------------------
 include/linux/aio.h      |   5 ++-
 include/linux/mm_types.h |   5 ++-
 kernel/fork.c            |   4 +-
 5 files changed, 67 insertions(+), 51 deletions(-)

(limited to 'include')

diff --git a/arch/s390/mm/pgtable.c b/arch/s390/mm/pgtable.c
index ef3635b52fc0..0767827540b1 100644
--- a/arch/s390/mm/pgtable.c
+++ b/arch/s390/mm/pgtable.c
@@ -263,7 +263,7 @@ int s390_enable_sie(void)
 	/* lets check if we are allowed to replace the mm */
 	task_lock(tsk);
 	if (!tsk->mm || atomic_read(&tsk->mm->mm_users) > 1 ||
-	    tsk->mm != tsk->active_mm || tsk->mm->ioctx_list) {
+	    tsk->mm != tsk->active_mm || !hlist_empty(&tsk->mm->ioctx_list)) {
 		task_unlock(tsk);
 		return -EINVAL;
 	}
@@ -279,7 +279,7 @@ int s390_enable_sie(void)
 	/* Now lets check again if something happened */
 	task_lock(tsk);
 	if (!tsk->mm || atomic_read(&tsk->mm->mm_users) > 1 ||
-	    tsk->mm != tsk->active_mm || tsk->mm->ioctx_list) {
+	    tsk->mm != tsk->active_mm || !hlist_empty(&tsk->mm->ioctx_list)) {
 		mmput(mm);
 		task_unlock(tsk);
 		return -EINVAL;
diff --git a/fs/aio.c b/fs/aio.c
index f658441d5666..d6f89d3c15e8 100644
--- a/fs/aio.c
+++ b/fs/aio.c
@@ -191,6 +191,20 @@ static int aio_setup_ring(struct kioctx *ctx)
 	kunmap_atomic((void *)((unsigned long)__event & PAGE_MASK), km); \
 } while(0)
 
+static void ctx_rcu_free(struct rcu_head *head)
+{
+	struct kioctx *ctx = container_of(head, struct kioctx, rcu_head);
+	unsigned nr_events = ctx->max_reqs;
+
+	kmem_cache_free(kioctx_cachep, ctx);
+
+	if (nr_events) {
+		spin_lock(&aio_nr_lock);
+		BUG_ON(aio_nr - nr_events > aio_nr);
+		aio_nr -= nr_events;
+		spin_unlock(&aio_nr_lock);
+	}
+}
 
 /* __put_ioctx
  *	Called when the last user of an aio context has gone away,
@@ -198,8 +212,6 @@ static int aio_setup_ring(struct kioctx *ctx)
  */
 static void __put_ioctx(struct kioctx *ctx)
 {
-	unsigned nr_events = ctx->max_reqs;
-
 	BUG_ON(ctx->reqs_active);
 
 	cancel_delayed_work(&ctx->wq);
@@ -208,14 +220,7 @@ static void __put_ioctx(struct kioctx *ctx)
 	mmdrop(ctx->mm);
 	ctx->mm = NULL;
 	pr_debug("__put_ioctx: freeing %p\n", ctx);
-	kmem_cache_free(kioctx_cachep, ctx);
-
-	if (nr_events) {
-		spin_lock(&aio_nr_lock);
-		BUG_ON(aio_nr - nr_events > aio_nr);
-		aio_nr -= nr_events;
-		spin_unlock(&aio_nr_lock);
-	}
+	call_rcu(&ctx->rcu_head, ctx_rcu_free);
 }
 
 #define get_ioctx(kioctx) do {						\
@@ -235,6 +240,7 @@ static struct kioctx *ioctx_alloc(unsigned nr_events)
 {
 	struct mm_struct *mm;
 	struct kioctx *ctx;
+	int did_sync = 0;
 
 	/* Prevent overflows */
 	if ((nr_events > (0x10000000U / sizeof(struct io_event))) ||
@@ -267,21 +273,30 @@ static struct kioctx *ioctx_alloc(unsigned nr_events)
 		goto out_freectx;
 
 	/* limit the number of system wide aios */
-	spin_lock(&aio_nr_lock);
-	if (aio_nr + ctx->max_reqs > aio_max_nr ||
-	    aio_nr + ctx->max_reqs < aio_nr)
-		ctx->max_reqs = 0;
-	else
-		aio_nr += ctx->max_reqs;
-	spin_unlock(&aio_nr_lock);
+	do {
+		spin_lock_bh(&aio_nr_lock);
+		if (aio_nr + nr_events > aio_max_nr ||
+		    aio_nr + nr_events < aio_nr)
+			ctx->max_reqs = 0;
+		else
+			aio_nr += ctx->max_reqs;
+		spin_unlock_bh(&aio_nr_lock);
+		if (ctx->max_reqs || did_sync)
+			break;
+
+		/* wait for rcu callbacks to have completed before giving up */
+		synchronize_rcu();
+		did_sync = 1;
+		ctx->max_reqs = nr_events;
+	} while (1);
+
 	if (ctx->max_reqs == 0)
 		goto out_cleanup;
 
 	/* now link into global list. */
-	write_lock(&mm->ioctx_list_lock);
-	ctx->next = mm->ioctx_list;
-	mm->ioctx_list = ctx;
-	write_unlock(&mm->ioctx_list_lock);
+	spin_lock(&mm->ioctx_lock);
+	hlist_add_head_rcu(&ctx->list, &mm->ioctx_list);
+	spin_unlock(&mm->ioctx_lock);
 
 	dprintk("aio: allocated ioctx %p[%ld]: mm=%p mask=0x%x\n",
 		ctx, ctx->user_id, current->mm, ctx->ring_info.nr);
@@ -375,11 +390,12 @@ ssize_t wait_on_sync_kiocb(struct kiocb *iocb)
  */
 void exit_aio(struct mm_struct *mm)
 {
-	struct kioctx *ctx = mm->ioctx_list;
-	mm->ioctx_list = NULL;
-	while (ctx) {
-		struct kioctx *next = ctx->next;
-		ctx->next = NULL;
+	struct kioctx *ctx;
+
+	while (!hlist_empty(&mm->ioctx_list)) {
+		ctx = hlist_entry(mm->ioctx_list.first, struct kioctx, list);
+		hlist_del_rcu(&ctx->list);
+
 		aio_cancel_all(ctx);
 
 		wait_for_all_aios(ctx);
@@ -394,7 +410,6 @@ void exit_aio(struct mm_struct *mm)
 				atomic_read(&ctx->users), ctx->dead,
 				ctx->reqs_active);
 		put_ioctx(ctx);
-		ctx = next;
 	}
 }
 
@@ -555,19 +570,21 @@ int aio_put_req(struct kiocb *req)
 
 static struct kioctx *lookup_ioctx(unsigned long ctx_id)
 {
-	struct kioctx *ioctx;
-	struct mm_struct *mm;
+	struct mm_struct *mm = current->mm;
+	struct kioctx *ctx = NULL;
+	struct hlist_node *n;
 
-	mm = current->mm;
-	read_lock(&mm->ioctx_list_lock);
-	for (ioctx = mm->ioctx_list; ioctx; ioctx = ioctx->next)
-		if (likely(ioctx->user_id == ctx_id && !ioctx->dead)) {
-			get_ioctx(ioctx);
+	rcu_read_lock();
+
+	hlist_for_each_entry_rcu(ctx, n, &mm->ioctx_list, list) {
+		if (ctx->user_id == ctx_id && !ctx->dead) {
+			get_ioctx(ctx);
 			break;
 		}
-	read_unlock(&mm->ioctx_list_lock);
+	}
 
-	return ioctx;
+	rcu_read_unlock();
+	return ctx;
 }
 
 /*
@@ -1215,19 +1232,14 @@ out:
 static void io_destroy(struct kioctx *ioctx)
 {
 	struct mm_struct *mm = current->mm;
-	struct kioctx **tmp;
 	int was_dead;
 
 	/* delete the entry from the list is someone else hasn't already */
-	write_lock(&mm->ioctx_list_lock);
+	spin_lock(&mm->ioctx_lock);
 	was_dead = ioctx->dead;
 	ioctx->dead = 1;
-	for (tmp = &mm->ioctx_list; *tmp && *tmp != ioctx;
-	     tmp = &(*tmp)->next)
-		;
-	if (*tmp)
-		*tmp = ioctx->next;
-	write_unlock(&mm->ioctx_list_lock);
+	hlist_del_rcu(&ioctx->list);
+	spin_unlock(&mm->ioctx_lock);
 
 	dprintk("aio_release(%p)\n", ioctx);
 	if (likely(!was_dead))
diff --git a/include/linux/aio.h b/include/linux/aio.h
index f6b8cf99b596..b16a957030f8 100644
--- a/include/linux/aio.h
+++ b/include/linux/aio.h
@@ -5,6 +5,7 @@
 #include <linux/workqueue.h>
 #include <linux/aio_abi.h>
 #include <linux/uio.h>
+#include <linux/rcupdate.h>
 
 #include <asm/atomic.h>
 
@@ -183,7 +184,7 @@ struct kioctx {
 
 	/* This needs improving */
 	unsigned long		user_id;
-	struct kioctx		*next;
+	struct hlist_node	list;
 
 	wait_queue_head_t	wait;
 
@@ -199,6 +200,8 @@ struct kioctx {
 	struct aio_ring_info	ring_info;
 
 	struct delayed_work	wq;
+
+	struct rcu_head		rcu_head;
 };
 
 /* prototypes */
diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
index fe825471d5aa..9cfc9b627fdd 100644
--- a/include/linux/mm_types.h
+++ b/include/linux/mm_types.h
@@ -232,8 +232,9 @@ struct mm_struct {
 	struct core_state *core_state; /* coredumping support */
 
 	/* aio bits */
-	rwlock_t		ioctx_list_lock;	/* aio lock */
-	struct kioctx		*ioctx_list;
+	spinlock_t		ioctx_lock;
+	struct hlist_head	ioctx_list;
+
 #ifdef CONFIG_MM_OWNER
 	/*
 	 * "owner" points to a task that is regarded as the canonical
diff --git a/kernel/fork.c b/kernel/fork.c
index 6144b36cd897..43cbf30669e6 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -415,8 +415,8 @@ static struct mm_struct * mm_init(struct mm_struct * mm, struct task_struct *p)
 	set_mm_counter(mm, file_rss, 0);
 	set_mm_counter(mm, anon_rss, 0);
 	spin_lock_init(&mm->page_table_lock);
-	rwlock_init(&mm->ioctx_list_lock);
-	mm->ioctx_list = NULL;
+	spin_lock_init(&mm->ioctx_lock);
+	INIT_HLIST_HEAD(&mm->ioctx_list);
 	mm->free_area_cache = TASK_UNMAPPED_BASE;
 	mm->cached_hole_size = ~0UL;
 	mm_init_owner(mm, p);
-- 
cgit v1.2.3


From b374d18a4bfce705e4a99ae9f501b53e86ecb283 Mon Sep 17 00:00:00 2001
From: Jens Axboe <jens.axboe@oracle.com>
Date: Fri, 31 Oct 2008 10:05:07 +0100
Subject: block: get rid of elevator_t typedef

Just use struct elevator_queue everywhere instead.

Signed-off-by: Jens Axboe <jens.axboe@oracle.com>
---
 block/as-iosched.c       |  8 +++----
 block/cfq-iosched.c      |  6 +++---
 block/deadline-iosched.c |  6 +++---
 block/elevator.c         | 55 +++++++++++++++++++++++++-----------------------
 block/noop-iosched.c     |  2 +-
 drivers/block/nbd.c      |  2 +-
 include/linux/blkdev.h   |  3 +--
 include/linux/elevator.h |  8 +++----
 8 files changed, 46 insertions(+), 44 deletions(-)

(limited to 'include')

diff --git a/block/as-iosched.c b/block/as-iosched.c
index 802b5d0d8536..631f6f44460a 100644
--- a/block/as-iosched.c
+++ b/block/as-iosched.c
@@ -1339,7 +1339,7 @@ static int as_may_queue(struct request_queue *q, int rw)
 	return ret;
 }
 
-static void as_exit_queue(elevator_t *e)
+static void as_exit_queue(struct elevator_queue *e)
 {
 	struct as_data *ad = e->elevator_data;
 
@@ -1409,7 +1409,7 @@ as_var_store(unsigned long *var, const char *page, size_t count)
 	return count;
 }
 
-static ssize_t est_time_show(elevator_t *e, char *page)
+static ssize_t est_time_show(struct elevator_queue *e, char *page)
 {
 	struct as_data *ad = e->elevator_data;
 	int pos = 0;
@@ -1427,7 +1427,7 @@ static ssize_t est_time_show(elevator_t *e, char *page)
 }
 
 #define SHOW_FUNCTION(__FUNC, __VAR)				\
-static ssize_t __FUNC(elevator_t *e, char *page)		\
+static ssize_t __FUNC(struct elevator_queue *e, char *page)	\
 {								\
 	struct as_data *ad = e->elevator_data;			\
 	return as_var_show(jiffies_to_msecs((__VAR)), (page));	\
@@ -1440,7 +1440,7 @@ SHOW_FUNCTION(as_write_batch_expire_show, ad->batch_expire[REQ_ASYNC]);
 #undef SHOW_FUNCTION
 
 #define STORE_FUNCTION(__FUNC, __PTR, MIN, MAX)				\
-static ssize_t __FUNC(elevator_t *e, const char *page, size_t count)	\
+static ssize_t __FUNC(struct elevator_queue *e, const char *page, size_t count)	\
 {									\
 	struct as_data *ad = e->elevator_data;				\
 	int ret = as_var_store(__PTR, (page), count);			\
diff --git a/block/cfq-iosched.c b/block/cfq-iosched.c
index a2bfec7d6b36..adaf93a9d19d 100644
--- a/block/cfq-iosched.c
+++ b/block/cfq-iosched.c
@@ -2178,7 +2178,7 @@ static void cfq_put_async_queues(struct cfq_data *cfqd)
 		cfq_put_queue(cfqd->async_idle_cfqq);
 }
 
-static void cfq_exit_queue(elevator_t *e)
+static void cfq_exit_queue(struct elevator_queue *e)
 {
 	struct cfq_data *cfqd = e->elevator_data;
 	struct request_queue *q = cfqd->queue;
@@ -2288,7 +2288,7 @@ cfq_var_store(unsigned int *var, const char *page, size_t count)
 }
 
 #define SHOW_FUNCTION(__FUNC, __VAR, __CONV)				\
-static ssize_t __FUNC(elevator_t *e, char *page)			\
+static ssize_t __FUNC(struct elevator_queue *e, char *page)		\
 {									\
 	struct cfq_data *cfqd = e->elevator_data;			\
 	unsigned int __data = __VAR;					\
@@ -2308,7 +2308,7 @@ SHOW_FUNCTION(cfq_slice_async_rq_show, cfqd->cfq_slice_async_rq, 0);
 #undef SHOW_FUNCTION
 
 #define STORE_FUNCTION(__FUNC, __PTR, MIN, MAX, __CONV)			\
-static ssize_t __FUNC(elevator_t *e, const char *page, size_t count)	\
+static ssize_t __FUNC(struct elevator_queue *e, const char *page, size_t count)	\
 {									\
 	struct cfq_data *cfqd = e->elevator_data;			\
 	unsigned int __data;						\
diff --git a/block/deadline-iosched.c b/block/deadline-iosched.c
index fd311179f44c..c4d991d4adef 100644
--- a/block/deadline-iosched.c
+++ b/block/deadline-iosched.c
@@ -334,7 +334,7 @@ static int deadline_queue_empty(struct request_queue *q)
 		&& list_empty(&dd->fifo_list[READ]);
 }
 
-static void deadline_exit_queue(elevator_t *e)
+static void deadline_exit_queue(struct elevator_queue *e)
 {
 	struct deadline_data *dd = e->elevator_data;
 
@@ -387,7 +387,7 @@ deadline_var_store(int *var, const char *page, size_t count)
 }
 
 #define SHOW_FUNCTION(__FUNC, __VAR, __CONV)				\
-static ssize_t __FUNC(elevator_t *e, char *page)			\
+static ssize_t __FUNC(struct elevator_queue *e, char *page)		\
 {									\
 	struct deadline_data *dd = e->elevator_data;			\
 	int __data = __VAR;						\
@@ -403,7 +403,7 @@ SHOW_FUNCTION(deadline_fifo_batch_show, dd->fifo_batch, 0);
 #undef SHOW_FUNCTION
 
 #define STORE_FUNCTION(__FUNC, __PTR, MIN, MAX, __CONV)			\
-static ssize_t __FUNC(elevator_t *e, const char *page, size_t count)	\
+static ssize_t __FUNC(struct elevator_queue *e, const char *page, size_t count)	\
 {									\
 	struct deadline_data *dd = e->elevator_data;			\
 	int __data;							\
diff --git a/block/elevator.c b/block/elevator.c
index ff60177a3bab..98259eda0ef6 100644
--- a/block/elevator.c
+++ b/block/elevator.c
@@ -65,7 +65,7 @@ DEFINE_TRACE(block_rq_issue);
 static int elv_iosched_allow_merge(struct request *rq, struct bio *bio)
 {
 	struct request_queue *q = rq->q;
-	elevator_t *e = q->elevator;
+	struct elevator_queue *e = q->elevator;
 
 	if (e->ops->elevator_allow_merge_fn)
 		return e->ops->elevator_allow_merge_fn(q, rq, bio);
@@ -208,13 +208,13 @@ __setup("elevator=", elevator_setup);
 
 static struct kobj_type elv_ktype;
 
-static elevator_t *elevator_alloc(struct request_queue *q,
+static struct elevator_queue *elevator_alloc(struct request_queue *q,
 				  struct elevator_type *e)
 {
-	elevator_t *eq;
+	struct elevator_queue *eq;
 	int i;
 
-	eq = kmalloc_node(sizeof(elevator_t), GFP_KERNEL | __GFP_ZERO, q->node);
+	eq = kmalloc_node(sizeof(*eq), GFP_KERNEL | __GFP_ZERO, q->node);
 	if (unlikely(!eq))
 		goto err;
 
@@ -240,8 +240,9 @@ err:
 
 static void elevator_release(struct kobject *kobj)
 {
-	elevator_t *e = container_of(kobj, elevator_t, kobj);
+	struct elevator_queue *e;
 
+	e = container_of(kobj, struct elevator_queue, kobj);
 	elevator_put(e->elevator_type);
 	kfree(e->hash);
 	kfree(e);
@@ -297,7 +298,7 @@ int elevator_init(struct request_queue *q, char *name)
 }
 EXPORT_SYMBOL(elevator_init);
 
-void elevator_exit(elevator_t *e)
+void elevator_exit(struct elevator_queue *e)
 {
 	mutex_lock(&e->sysfs_lock);
 	if (e->ops->elevator_exit_fn)
@@ -311,7 +312,7 @@ EXPORT_SYMBOL(elevator_exit);
 
 static void elv_activate_rq(struct request_queue *q, struct request *rq)
 {
-	elevator_t *e = q->elevator;
+	struct elevator_queue *e = q->elevator;
 
 	if (e->ops->elevator_activate_req_fn)
 		e->ops->elevator_activate_req_fn(q, rq);
@@ -319,7 +320,7 @@ static void elv_activate_rq(struct request_queue *q, struct request *rq)
 
 static void elv_deactivate_rq(struct request_queue *q, struct request *rq)
 {
-	elevator_t *e = q->elevator;
+	struct elevator_queue *e = q->elevator;
 
 	if (e->ops->elevator_deactivate_req_fn)
 		e->ops->elevator_deactivate_req_fn(q, rq);
@@ -338,7 +339,7 @@ static void elv_rqhash_del(struct request_queue *q, struct request *rq)
 
 static void elv_rqhash_add(struct request_queue *q, struct request *rq)
 {
-	elevator_t *e = q->elevator;
+	struct elevator_queue *e = q->elevator;
 
 	BUG_ON(ELV_ON_HASH(rq));
 	hlist_add_head(&rq->hash, &e->hash[ELV_HASH_FN(rq_hash_key(rq))]);
@@ -352,7 +353,7 @@ static void elv_rqhash_reposition(struct request_queue *q, struct request *rq)
 
 static struct request *elv_rqhash_find(struct request_queue *q, sector_t offset)
 {
-	elevator_t *e = q->elevator;
+	struct elevator_queue *e = q->elevator;
 	struct hlist_head *hash_list = &e->hash[ELV_HASH_FN(offset)];
 	struct hlist_node *entry, *next;
 	struct request *rq;
@@ -494,7 +495,7 @@ EXPORT_SYMBOL(elv_dispatch_add_tail);
 
 int elv_merge(struct request_queue *q, struct request **req, struct bio *bio)
 {
-	elevator_t *e = q->elevator;
+	struct elevator_queue *e = q->elevator;
 	struct request *__rq;
 	int ret;
 
@@ -529,7 +530,7 @@ int elv_merge(struct request_queue *q, struct request **req, struct bio *bio)
 
 void elv_merged_request(struct request_queue *q, struct request *rq, int type)
 {
-	elevator_t *e = q->elevator;
+	struct elevator_queue *e = q->elevator;
 
 	if (e->ops->elevator_merged_fn)
 		e->ops->elevator_merged_fn(q, rq, type);
@@ -543,7 +544,7 @@ void elv_merged_request(struct request_queue *q, struct request *rq, int type)
 void elv_merge_requests(struct request_queue *q, struct request *rq,
 			     struct request *next)
 {
-	elevator_t *e = q->elevator;
+	struct elevator_queue *e = q->elevator;
 
 	if (e->ops->elevator_merge_req_fn)
 		e->ops->elevator_merge_req_fn(q, rq, next);
@@ -846,7 +847,7 @@ void elv_dequeue_request(struct request_queue *q, struct request *rq)
 
 int elv_queue_empty(struct request_queue *q)
 {
-	elevator_t *e = q->elevator;
+	struct elevator_queue *e = q->elevator;
 
 	if (!list_empty(&q->queue_head))
 		return 0;
@@ -860,7 +861,7 @@ EXPORT_SYMBOL(elv_queue_empty);
 
 struct request *elv_latter_request(struct request_queue *q, struct request *rq)
 {
-	elevator_t *e = q->elevator;
+	struct elevator_queue *e = q->elevator;
 
 	if (e->ops->elevator_latter_req_fn)
 		return e->ops->elevator_latter_req_fn(q, rq);
@@ -869,7 +870,7 @@ struct request *elv_latter_request(struct request_queue *q, struct request *rq)
 
 struct request *elv_former_request(struct request_queue *q, struct request *rq)
 {
-	elevator_t *e = q->elevator;
+	struct elevator_queue *e = q->elevator;
 
 	if (e->ops->elevator_former_req_fn)
 		return e->ops->elevator_former_req_fn(q, rq);
@@ -878,7 +879,7 @@ struct request *elv_former_request(struct request_queue *q, struct request *rq)
 
 int elv_set_request(struct request_queue *q, struct request *rq, gfp_t gfp_mask)
 {
-	elevator_t *e = q->elevator;
+	struct elevator_queue *e = q->elevator;
 
 	if (e->ops->elevator_set_req_fn)
 		return e->ops->elevator_set_req_fn(q, rq, gfp_mask);
@@ -889,7 +890,7 @@ int elv_set_request(struct request_queue *q, struct request *rq, gfp_t gfp_mask)
 
 void elv_put_request(struct request_queue *q, struct request *rq)
 {
-	elevator_t *e = q->elevator;
+	struct elevator_queue *e = q->elevator;
 
 	if (e->ops->elevator_put_req_fn)
 		e->ops->elevator_put_req_fn(rq);
@@ -897,7 +898,7 @@ void elv_put_request(struct request_queue *q, struct request *rq)
 
 int elv_may_queue(struct request_queue *q, int rw)
 {
-	elevator_t *e = q->elevator;
+	struct elevator_queue *e = q->elevator;
 
 	if (e->ops->elevator_may_queue_fn)
 		return e->ops->elevator_may_queue_fn(q, rw);
@@ -920,7 +921,7 @@ EXPORT_SYMBOL(elv_abort_queue);
 
 void elv_completed_request(struct request_queue *q, struct request *rq)
 {
-	elevator_t *e = q->elevator;
+	struct elevator_queue *e = q->elevator;
 
 	/*
 	 * request is released from the driver, io must be done
@@ -955,13 +956,14 @@ void elv_completed_request(struct request_queue *q, struct request *rq)
 static ssize_t
 elv_attr_show(struct kobject *kobj, struct attribute *attr, char *page)
 {
-	elevator_t *e = container_of(kobj, elevator_t, kobj);
 	struct elv_fs_entry *entry = to_elv(attr);
+	struct elevator_queue *e;
 	ssize_t error;
 
 	if (!entry->show)
 		return -EIO;
 
+	e = container_of(kobj, struct elevator_queue, kobj);
 	mutex_lock(&e->sysfs_lock);
 	error = e->ops ? entry->show(e, page) : -ENOENT;
 	mutex_unlock(&e->sysfs_lock);
@@ -972,13 +974,14 @@ static ssize_t
 elv_attr_store(struct kobject *kobj, struct attribute *attr,
 	       const char *page, size_t length)
 {
-	elevator_t *e = container_of(kobj, elevator_t, kobj);
 	struct elv_fs_entry *entry = to_elv(attr);
+	struct elevator_queue *e;
 	ssize_t error;
 
 	if (!entry->store)
 		return -EIO;
 
+	e = container_of(kobj, struct elevator_queue, kobj);
 	mutex_lock(&e->sysfs_lock);
 	error = e->ops ? entry->store(e, page, length) : -ENOENT;
 	mutex_unlock(&e->sysfs_lock);
@@ -997,7 +1000,7 @@ static struct kobj_type elv_ktype = {
 
 int elv_register_queue(struct request_queue *q)
 {
-	elevator_t *e = q->elevator;
+	struct elevator_queue *e = q->elevator;
 	int error;
 
 	error = kobject_add(&e->kobj, &q->kobj, "%s", "iosched");
@@ -1015,7 +1018,7 @@ int elv_register_queue(struct request_queue *q)
 	return error;
 }
 
-static void __elv_unregister_queue(elevator_t *e)
+static void __elv_unregister_queue(struct elevator_queue *e)
 {
 	kobject_uevent(&e->kobj, KOBJ_REMOVE);
 	kobject_del(&e->kobj);
@@ -1078,7 +1081,7 @@ EXPORT_SYMBOL_GPL(elv_unregister);
  */
 static int elevator_switch(struct request_queue *q, struct elevator_type *new_e)
 {
-	elevator_t *old_elevator, *e;
+	struct elevator_queue *old_elevator, *e;
 	void *data;
 
 	/*
@@ -1184,7 +1187,7 @@ ssize_t elv_iosched_store(struct request_queue *q, const char *name,
 
 ssize_t elv_iosched_show(struct request_queue *q, char *name)
 {
-	elevator_t *e = q->elevator;
+	struct elevator_queue *e = q->elevator;
 	struct elevator_type *elv = e->elevator_type;
 	struct elevator_type *__e;
 	int len = 0;
diff --git a/block/noop-iosched.c b/block/noop-iosched.c
index c23e02969650..3a0d369d08c7 100644
--- a/block/noop-iosched.c
+++ b/block/noop-iosched.c
@@ -76,7 +76,7 @@ static void *noop_init_queue(struct request_queue *q)
 	return nd;
 }
 
-static void noop_exit_queue(elevator_t *e)
+static void noop_exit_queue(struct elevator_queue *e)
 {
 	struct noop_data *nd = e->elevator_data;
 
diff --git a/drivers/block/nbd.c b/drivers/block/nbd.c
index d3a91cacee8c..0766ce6187a9 100644
--- a/drivers/block/nbd.c
+++ b/drivers/block/nbd.c
@@ -722,7 +722,7 @@ static int __init nbd_init(void)
 
 	for (i = 0; i < nbds_max; i++) {
 		struct gendisk *disk = alloc_disk(1 << part_shift);
-		elevator_t *old_e;
+		struct elevator_queue *old_e;
 		if (!disk)
 			goto out;
 		nbd_dev[i].disk = disk;
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 41bbadfd17f6..7035cec583b6 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -26,7 +26,6 @@ struct scsi_ioctl_command;
 
 struct request_queue;
 struct elevator_queue;
-typedef struct elevator_queue elevator_t;
 struct request_pm_state;
 struct blk_trace;
 struct request;
@@ -313,7 +312,7 @@ struct request_queue
 	 */
 	struct list_head	queue_head;
 	struct request		*last_merge;
-	elevator_t		*elevator;
+	struct elevator_queue	*elevator;
 
 	/*
 	 * the queue request freelist, one for reads and one for writes
diff --git a/include/linux/elevator.h b/include/linux/elevator.h
index 92f6f634e3e6..7a204256b155 100644
--- a/include/linux/elevator.h
+++ b/include/linux/elevator.h
@@ -28,7 +28,7 @@ typedef void (elevator_activate_req_fn) (struct request_queue *, struct request
 typedef void (elevator_deactivate_req_fn) (struct request_queue *, struct request *);
 
 typedef void *(elevator_init_fn) (struct request_queue *);
-typedef void (elevator_exit_fn) (elevator_t *);
+typedef void (elevator_exit_fn) (struct elevator_queue *);
 
 struct elevator_ops
 {
@@ -62,8 +62,8 @@ struct elevator_ops
 
 struct elv_fs_entry {
 	struct attribute attr;
-	ssize_t (*show)(elevator_t *, char *);
-	ssize_t (*store)(elevator_t *, const char *, size_t);
+	ssize_t (*show)(struct elevator_queue *, char *);
+	ssize_t (*store)(struct elevator_queue *, const char *, size_t);
 };
 
 /*
@@ -130,7 +130,7 @@ extern ssize_t elv_iosched_show(struct request_queue *, char *);
 extern ssize_t elv_iosched_store(struct request_queue *, const char *, size_t);
 
 extern int elevator_init(struct request_queue *, char *);
-extern void elevator_exit(elevator_t *);
+extern void elevator_exit(struct elevator_queue *);
 extern int elv_rq_merge_ok(struct request *, struct bio *);
 
 /*
-- 
cgit v1.2.3


From a6f23657d3072bde6844055bbc2290e497f33fbc Mon Sep 17 00:00:00 2001
From: Jens Axboe <jens.axboe@oracle.com>
Date: Fri, 24 Oct 2008 12:52:42 +0200
Subject: block: add one-hit cache for disk partition lookup

disk_map_sector_rcu() returns a partition from a sector offset,
which we use for IO statistics on a per-partition basis. The
lookup itself is an O(N) list lookup, where N is the number of
partitions. This actually hurts performance quite a bit, even
on the lower end partitions. On higher numbered partitions,
it can get pretty bad.

Solve this by adding a one-hit cache for partition lookup.
This makes the lookup O(1) for the case where we do most IO to
one partition. Even for mixed partition workloads, amortized cost
is pretty close to O(1) since the natural IO batching makes the
one-hit cache last for lots of IOs.

Signed-off-by: Jens Axboe <jens.axboe@oracle.com>
---
 block/genhd.c         | 23 +++++++++++++++++++----
 include/linux/genhd.h |  1 +
 2 files changed, 20 insertions(+), 4 deletions(-)

(limited to 'include')

diff --git a/block/genhd.c b/block/genhd.c
index 2f7feda61e35..d84a7df1e2a0 100644
--- a/block/genhd.c
+++ b/block/genhd.c
@@ -181,6 +181,12 @@ void disk_part_iter_exit(struct disk_part_iter *piter)
 }
 EXPORT_SYMBOL_GPL(disk_part_iter_exit);
 
+static inline int sector_in_part(struct hd_struct *part, sector_t sector)
+{
+	return part->start_sect <= sector &&
+		sector < part->start_sect + part->nr_sects;
+}
+
 /**
  * disk_map_sector_rcu - map sector to partition
  * @disk: gendisk of interest
@@ -199,16 +205,22 @@ EXPORT_SYMBOL_GPL(disk_part_iter_exit);
 struct hd_struct *disk_map_sector_rcu(struct gendisk *disk, sector_t sector)
 {
 	struct disk_part_tbl *ptbl;
+	struct hd_struct *part;
 	int i;
 
 	ptbl = rcu_dereference(disk->part_tbl);
 
+	part = rcu_dereference(ptbl->last_lookup);
+	if (part && sector_in_part(part, sector))
+		return part;
+
 	for (i = 1; i < ptbl->len; i++) {
-		struct hd_struct *part = rcu_dereference(ptbl->part[i]);
+		part = rcu_dereference(ptbl->part[i]);
 
-		if (part && part->start_sect <= sector &&
-		    sector < part->start_sect + part->nr_sects)
+		if (part && sector_in_part(part, sector)) {
+			rcu_assign_pointer(ptbl->last_lookup, part);
 			return part;
+		}
 	}
 	return &disk->part0;
 }
@@ -888,8 +900,11 @@ static void disk_replace_part_tbl(struct gendisk *disk,
 	struct disk_part_tbl *old_ptbl = disk->part_tbl;
 
 	rcu_assign_pointer(disk->part_tbl, new_ptbl);
-	if (old_ptbl)
+
+	if (old_ptbl) {
+		rcu_assign_pointer(old_ptbl->last_lookup, NULL);
 		call_rcu(&old_ptbl->rcu_head, disk_free_ptbl_rcu_cb);
+	}
 }
 
 /**
diff --git a/include/linux/genhd.h b/include/linux/genhd.h
index 3df7742ce246..16948eaecae3 100644
--- a/include/linux/genhd.h
+++ b/include/linux/genhd.h
@@ -126,6 +126,7 @@ struct blk_scsi_cmd_filter {
 struct disk_part_tbl {
 	struct rcu_head rcu_head;
 	int len;
+	struct hd_struct *last_lookup;
 	struct hd_struct *part[];
 };
 
-- 
cgit v1.2.3


From b3a6ffe16b5cc48abe7db8d04882dc45280eb693 Mon Sep 17 00:00:00 2001
From: Jens Axboe <jens.axboe@oracle.com>
Date: Fri, 12 Dec 2008 09:51:16 +0100
Subject: Get rid of CONFIG_LSF

We have two seperate config entries for large devices/files. One
is CONFIG_LBD that guards just the devices, the other is CONFIG_LSF
that handles large files. This doesn't make a lot of sense, you typically
want both or none. So get rid of CONFIG_LSF and change CONFIG_LBD wording
to indicate that it covers both.

Acked-by: Jean Delvare <khali@linux-fr.org>
Signed-off-by: Jens Axboe <jens.axboe@oracle.com>
---
 block/Kconfig         | 23 +++++------------------
 fs/ext4/super.c       |  8 ++++----
 include/linux/types.h | 11 +++--------
 3 files changed, 12 insertions(+), 30 deletions(-)

(limited to 'include')

diff --git a/block/Kconfig b/block/Kconfig
index 290b219fad9c..ac0956f77785 100644
--- a/block/Kconfig
+++ b/block/Kconfig
@@ -24,21 +24,17 @@ menuconfig BLOCK
 if BLOCK
 
 config LBD
-	bool "Support for Large Block Devices"
+	bool "Support for large block devices and files"
 	depends on !64BIT
 	help
-	  Enable block devices of size 2TB and larger.
+	  Enable block devices or files of size 2TB and larger.
 
 	  This option is required to support the full capacity of large
 	  (2TB+) block devices, including RAID, disk, Network Block Device,
 	  Logical Volume Manager (LVM) and loopback.
-
-	  For example, RAID devices are frequently bigger than the capacity
-	  of the largest individual hard drive.
-
-	  This option is not required if you have individual disk drives
-	  which total 2TB+ and you are not aggregating the capacity into
-	  a large block device (e.g. using RAID or LVM).
+	
+	  This option also enables support for single files larger than
+	  2TB.
 
 	  If unsure, say N.
 
@@ -58,15 +54,6 @@ config BLK_DEV_IO_TRACE
 
 	  If unsure, say N.
 
-config LSF
-	bool "Support for Large Single Files"
-	depends on !64BIT
-	help
-	  Say Y here if you want to be able to handle very large files (2TB
-	  and larger), otherwise say N.
-
-	  If unsure, say Y.
-
 config BLK_DEV_BSG
 	bool "Block layer SG support v4 (EXPERIMENTAL)"
 	depends on EXPERIMENTAL
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index e4a241c65dbe..04158ad74dbb 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -1721,7 +1721,7 @@ static loff_t ext4_max_size(int blkbits, int has_huge_files)
 	/* small i_blocks in vfs inode? */
 	if (!has_huge_files || sizeof(blkcnt_t) < sizeof(u64)) {
 		/*
-		 * CONFIG_LSF is not enabled implies the inode
+		 * CONFIG_LBD is not enabled implies the inode
 		 * i_block represent total blocks in 512 bytes
 		 * 32 == size of vfs inode i_blocks * 8
 		 */
@@ -1764,7 +1764,7 @@ static loff_t ext4_max_bitmap_size(int bits, int has_huge_files)
 
 	if (!has_huge_files || sizeof(blkcnt_t) < sizeof(u64)) {
 		/*
-		 * !has_huge_files or CONFIG_LSF is not enabled
+		 * !has_huge_files or CONFIG_LBD is not enabled
 		 * implies the inode i_block represent total blocks in
 		 * 512 bytes 32 == size of vfs inode i_blocks * 8
 		 */
@@ -2021,13 +2021,13 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
 	if (has_huge_files) {
 		/*
 		 * Large file size enabled file system can only be
-		 * mount if kernel is build with CONFIG_LSF
+		 * mount if kernel is build with CONFIG_LBD
 		 */
 		if (sizeof(root->i_blocks) < sizeof(u64) &&
 				!(sb->s_flags & MS_RDONLY)) {
 			printk(KERN_ERR "EXT4-fs: %s: Filesystem with huge "
 					"files cannot be mounted read-write "
-					"without CONFIG_LSF.\n", sb->s_id);
+					"without CONFIG_LBD.\n", sb->s_id);
 			goto failed_mount;
 		}
 	}
diff --git a/include/linux/types.h b/include/linux/types.h
index 1d98330b1f2c..121f349cb7ec 100644
--- a/include/linux/types.h
+++ b/include/linux/types.h
@@ -135,19 +135,14 @@ typedef		__s64		int64_t;
  *
  * Linux always considers sectors to be 512 bytes long independently
  * of the devices real block size.
+ *
+ * blkcnt_t is the type of the inode's block count.
  */
 #ifdef CONFIG_LBD
 typedef u64 sector_t;
-#else
-typedef unsigned long sector_t;
-#endif
-
-/*
- * The type of the inode's block count.
- */
-#ifdef CONFIG_LSF
 typedef u64 blkcnt_t;
 #else
+typedef unsigned long sector_t;
 typedef unsigned long blkcnt_t;
 #endif
 
-- 
cgit v1.2.3


From e7f7ab45ebcb54fd5f814ea15ea079e079662f67 Mon Sep 17 00:00:00 2001
From: Dave Airlie <airlied@redhat.com>
Date: Fri, 28 Nov 2008 13:43:47 +1000
Subject: drm: cleanup exit path for module unload

The current sub-module unload exit path is a mess, it tries
to abuse the idr. Just keep a list of devices per driver struct
and free them in-order on rmmod.

Signed-off-by: Dave Airlie <airlied@redhat.com>
---
 drivers/gpu/drm/drm_drv.c  | 25 +++++--------------------
 drivers/gpu/drm/drm_stub.c |  2 ++
 include/drm/drmP.h         |  3 +++
 3 files changed, 10 insertions(+), 20 deletions(-)

(limited to 'include')

diff --git a/drivers/gpu/drm/drm_drv.c b/drivers/gpu/drm/drm_drv.c
index 996097acb5e7..3cb87a932b33 100644
--- a/drivers/gpu/drm/drm_drv.c
+++ b/drivers/gpu/drm/drm_drv.c
@@ -263,6 +263,8 @@ int drm_init(struct drm_driver *driver)
 
 	DRM_DEBUG("\n");
 
+	INIT_LIST_HEAD(&driver->device_list);
+
 	for (i = 0; driver->pci_driver.id_table[i].vendor != 0; i++) {
 		pid = (struct pci_device_id *)&driver->pci_driver.id_table[i];
 
@@ -334,30 +336,13 @@ static void drm_cleanup(struct drm_device * dev)
 		DRM_ERROR("Cannot unload module\n");
 }
 
-static int drm_minors_cleanup(int id, void *ptr, void *data)
-{
-	struct drm_minor *minor = ptr;
-	struct drm_device *dev;
-	struct drm_driver *driver = data;
-
-	dev = minor->dev;
-	if (minor->dev->driver != driver)
-		return 0;
-
-	if (minor->type != DRM_MINOR_LEGACY)
-		return 0;
-
-	if (dev)
-		pci_dev_put(dev->pdev);
-	drm_cleanup(dev);
-	return 1;
-}
-
 void drm_exit(struct drm_driver *driver)
 {
+	struct drm_device *dev, *tmp;
 	DRM_DEBUG("\n");
 
-	idr_for_each(&drm_minors_idr, &drm_minors_cleanup, driver);
+	list_for_each_entry_safe(dev, tmp, &driver->device_list, driver_item)
+		drm_cleanup(dev);
 
 	DRM_INFO("Module unloaded\n");
 }
diff --git a/drivers/gpu/drm/drm_stub.c b/drivers/gpu/drm/drm_stub.c
index 66c96ec66672..849c0a9fe7fd 100644
--- a/drivers/gpu/drm/drm_stub.c
+++ b/drivers/gpu/drm/drm_stub.c
@@ -270,6 +270,8 @@ int drm_get_dev(struct pci_dev *pdev, const struct pci_device_id *ent,
 	if ((ret = drm_get_minor(dev, &dev->primary, DRM_MINOR_LEGACY)))
 		goto err_g2;
 
+	list_add_tail(&dev->driver_item, &driver->device_list);
+
 	DRM_INFO("Initialized %s %d.%d.%d %s on minor %d\n",
 		 driver->name, driver->major, driver->minor, driver->patchlevel,
 		 driver->date, dev->primary->index);
diff --git a/include/drm/drmP.h b/include/drm/drmP.h
index d5e8e5c89548..08b8539e7b3c 100644
--- a/include/drm/drmP.h
+++ b/include/drm/drmP.h
@@ -737,6 +737,8 @@ struct drm_driver {
 	int num_ioctls;
 	struct file_operations fops;
 	struct pci_driver pci_driver;
+	/* List of devices hanging off this driver */
+	struct list_head device_list;
 };
 
 #define DRM_MINOR_UNASSIGNED 0
@@ -759,6 +761,7 @@ struct drm_minor {
  * may contain multiple heads.
  */
 struct drm_device {
+	struct list_head driver_item;	/**< list of devices per driver */
 	char *unique;			/**< Unique identifier: e.g., busid */
 	int unique_len;			/**< Length of unique field */
 	char *devname;			/**< For /proc/interrupts */
-- 
cgit v1.2.3


From 7c1c2871a6a3a114853ec6836e9035ac1c0c7f7a Mon Sep 17 00:00:00 2001
From: Dave Airlie <airlied@redhat.com>
Date: Fri, 28 Nov 2008 14:22:24 +1000
Subject: drm: move to kref per-master structures.

This is step one towards having multiple masters sharing a drm
device in order to get fast-user-switching to work.

It splits out the information associated with the drm master
into a separate kref counted structure, and allocates this when
a master opens the device node. It also allows the current master
to abdicate (say while VT switched), and a new master to take over
the hardware.

It moves the Intel and radeon drivers to using the sarea from
within the new master structures.

Signed-off-by: Dave Airlie <airlied@redhat.com>
---
 drivers/gpu/drm/drm_auth.c            |  29 ++---
 drivers/gpu/drm/drm_bufs.c            |  20 +++-
 drivers/gpu/drm/drm_context.c         |  10 +-
 drivers/gpu/drm/drm_drv.c             |  33 +-----
 drivers/gpu/drm/drm_fops.c            | 201 +++++++++++++++++++++-------------
 drivers/gpu/drm/drm_ioctl.c           |  57 +++++-----
 drivers/gpu/drm/drm_lock.c            |  42 +++----
 drivers/gpu/drm/drm_proc.c            |   8 +-
 drivers/gpu/drm/drm_stub.c            | 105 +++++++++++++++++-
 drivers/gpu/drm/i915/i915_dma.c       |  81 ++++++++------
 drivers/gpu/drm/i915/i915_drv.c       |   2 +
 drivers/gpu/drm/i915/i915_drv.h       |  10 +-
 drivers/gpu/drm/i915/i915_irq.c       |  30 ++---
 drivers/gpu/drm/i915/i915_mem.c       |   3 +-
 drivers/gpu/drm/radeon/r300_cmdbuf.c  |  11 +-
 drivers/gpu/drm/radeon/radeon_cp.c    |  73 +++++++++---
 drivers/gpu/drm/radeon/radeon_drv.h   |  14 ++-
 drivers/gpu/drm/radeon/radeon_state.c | 166 +++++++++++++++-------------
 include/drm/drm.h                     |   3 +
 include/drm/drmP.h                    |  64 ++++++++---
 include/drm/drm_sarea.h               |   6 +-
 21 files changed, 617 insertions(+), 351 deletions(-)

(limited to 'include')

diff --git a/drivers/gpu/drm/drm_auth.c b/drivers/gpu/drm/drm_auth.c
index a73462723d2d..ca7a9ef5007b 100644
--- a/drivers/gpu/drm/drm_auth.c
+++ b/drivers/gpu/drm/drm_auth.c
@@ -45,14 +45,15 @@
  * the one with matching magic number, while holding the drm_device::struct_mutex
  * lock.
  */
-static struct drm_file *drm_find_file(struct drm_device * dev, drm_magic_t magic)
+static struct drm_file *drm_find_file(struct drm_master *master, drm_magic_t magic)
 {
 	struct drm_file *retval = NULL;
 	struct drm_magic_entry *pt;
 	struct drm_hash_item *hash;
+	struct drm_device *dev = master->minor->dev;
 
 	mutex_lock(&dev->struct_mutex);
-	if (!drm_ht_find_item(&dev->magiclist, (unsigned long)magic, &hash)) {
+	if (!drm_ht_find_item(&master->magiclist, (unsigned long)magic, &hash)) {
 		pt = drm_hash_entry(hash, struct drm_magic_entry, hash_item);
 		retval = pt->priv;
 	}
@@ -71,11 +72,11 @@ static struct drm_file *drm_find_file(struct drm_device * dev, drm_magic_t magic
  * associated the magic number hash key in drm_device::magiclist, while holding
  * the drm_device::struct_mutex lock.
  */
-static int drm_add_magic(struct drm_device * dev, struct drm_file * priv,
+static int drm_add_magic(struct drm_master *master, struct drm_file *priv,
 			 drm_magic_t magic)
 {
 	struct drm_magic_entry *entry;
-
+	struct drm_device *dev = master->minor->dev;
 	DRM_DEBUG("%d\n", magic);
 
 	entry = drm_alloc(sizeof(*entry), DRM_MEM_MAGIC);
@@ -83,11 +84,10 @@ static int drm_add_magic(struct drm_device * dev, struct drm_file * priv,
 		return -ENOMEM;
 	memset(entry, 0, sizeof(*entry));
 	entry->priv = priv;
-
 	entry->hash_item.key = (unsigned long)magic;
 	mutex_lock(&dev->struct_mutex);
-	drm_ht_insert_item(&dev->magiclist, &entry->hash_item);
-	list_add_tail(&entry->head, &dev->magicfree);
+	drm_ht_insert_item(&master->magiclist, &entry->hash_item);
+	list_add_tail(&entry->head, &master->magicfree);
 	mutex_unlock(&dev->struct_mutex);
 
 	return 0;
@@ -102,20 +102,21 @@ static int drm_add_magic(struct drm_device * dev, struct drm_file * priv,
  * Searches and unlinks the entry in drm_device::magiclist with the magic
  * number hash key, while holding the drm_device::struct_mutex lock.
  */
-static int drm_remove_magic(struct drm_device * dev, drm_magic_t magic)
+static int drm_remove_magic(struct drm_master *master, drm_magic_t magic)
 {
 	struct drm_magic_entry *pt;
 	struct drm_hash_item *hash;
+	struct drm_device *dev = master->minor->dev;
 
 	DRM_DEBUG("%d\n", magic);
 
 	mutex_lock(&dev->struct_mutex);
-	if (drm_ht_find_item(&dev->magiclist, (unsigned long)magic, &hash)) {
+	if (drm_ht_find_item(&master->magiclist, (unsigned long)magic, &hash)) {
 		mutex_unlock(&dev->struct_mutex);
 		return -EINVAL;
 	}
 	pt = drm_hash_entry(hash, struct drm_magic_entry, hash_item);
-	drm_ht_remove_item(&dev->magiclist, hash);
+	drm_ht_remove_item(&master->magiclist, hash);
 	list_del(&pt->head);
 	mutex_unlock(&dev->struct_mutex);
 
@@ -153,9 +154,9 @@ int drm_getmagic(struct drm_device *dev, void *data, struct drm_file *file_priv)
 				++sequence;	/* reserve 0 */
 			auth->magic = sequence++;
 			spin_unlock(&lock);
-		} while (drm_find_file(dev, auth->magic));
+		} while (drm_find_file(file_priv->master, auth->magic));
 		file_priv->magic = auth->magic;
-		drm_add_magic(dev, file_priv, auth->magic);
+		drm_add_magic(file_priv->master, file_priv, auth->magic);
 	}
 
 	DRM_DEBUG("%u\n", auth->magic);
@@ -181,9 +182,9 @@ int drm_authmagic(struct drm_device *dev, void *data,
 	struct drm_file *file;
 
 	DRM_DEBUG("%u\n", auth->magic);
-	if ((file = drm_find_file(dev, auth->magic))) {
+	if ((file = drm_find_file(file_priv->master, auth->magic))) {
 		file->authenticated = 1;
-		drm_remove_magic(dev, auth->magic);
+		drm_remove_magic(file_priv->master, auth->magic);
 		return 0;
 	}
 	return -EINVAL;
diff --git a/drivers/gpu/drm/drm_bufs.c b/drivers/gpu/drm/drm_bufs.c
index bde64b84166e..dc3ce3e0a0a4 100644
--- a/drivers/gpu/drm/drm_bufs.c
+++ b/drivers/gpu/drm/drm_bufs.c
@@ -54,9 +54,9 @@ static struct drm_map_list *drm_find_matching_map(struct drm_device *dev,
 {
 	struct drm_map_list *entry;
 	list_for_each_entry(entry, &dev->maplist, head) {
-		if (entry->map && map->type == entry->map->type &&
+		if (entry->map && (entry->master == dev->primary->master) && (map->type == entry->map->type) &&
 		    ((entry->map->offset == map->offset) ||
-		     (map->type == _DRM_SHM && map->flags==_DRM_CONTAINS_LOCK))) {
+		     ((map->type == _DRM_SHM) && (map->flags&_DRM_CONTAINS_LOCK)))) {
 			return entry;
 		}
 	}
@@ -210,12 +210,12 @@ static int drm_addmap_core(struct drm_device * dev, unsigned int offset,
 		map->offset = (unsigned long)map->handle;
 		if (map->flags & _DRM_CONTAINS_LOCK) {
 			/* Prevent a 2nd X Server from creating a 2nd lock */
-			if (dev->lock.hw_lock != NULL) {
+			if (dev->primary->master->lock.hw_lock != NULL) {
 				vfree(map->handle);
 				drm_free(map, sizeof(*map), DRM_MEM_MAPS);
 				return -EBUSY;
 			}
-			dev->sigdata.lock = dev->lock.hw_lock = map->handle;	/* Pointer to lock */
+			dev->sigdata.lock = dev->primary->master->lock.hw_lock = map->handle;	/* Pointer to lock */
 		}
 		break;
 	case _DRM_AGP: {
@@ -319,6 +319,7 @@ static int drm_addmap_core(struct drm_device * dev, unsigned int offset,
 	list->user_token = list->hash.key << PAGE_SHIFT;
 	mutex_unlock(&dev->struct_mutex);
 
+	list->master = dev->primary->master;
 	*maplist = list;
 	return 0;
 	}
@@ -345,7 +346,7 @@ int drm_addmap_ioctl(struct drm_device *dev, void *data,
 	struct drm_map_list *maplist;
 	int err;
 
-	if (!(capable(CAP_SYS_ADMIN) || map->type == _DRM_AGP))
+	if (!(capable(CAP_SYS_ADMIN) || map->type == _DRM_AGP || map->type == _DRM_SHM))
 		return -EPERM;
 
 	err = drm_addmap_core(dev, map->offset, map->size, map->type,
@@ -380,10 +381,12 @@ int drm_rmmap_locked(struct drm_device *dev, drm_local_map_t *map)
 	struct drm_map_list *r_list = NULL, *list_t;
 	drm_dma_handle_t dmah;
 	int found = 0;
+	struct drm_master *master;
 
 	/* Find the list entry for the map and remove it */
 	list_for_each_entry_safe(r_list, list_t, &dev->maplist, head) {
 		if (r_list->map == map) {
+			master = r_list->master;
 			list_del(&r_list->head);
 			drm_ht_remove_key(&dev->map_hash,
 					  r_list->user_token >> PAGE_SHIFT);
@@ -409,6 +412,13 @@ int drm_rmmap_locked(struct drm_device *dev, drm_local_map_t *map)
 		break;
 	case _DRM_SHM:
 		vfree(map->handle);
+		if (master) {
+			if (dev->sigdata.lock == master->lock.hw_lock)
+				dev->sigdata.lock = NULL;
+			master->lock.hw_lock = NULL;   /* SHM removed */
+			master->lock.file_priv = NULL;
+			wake_up_interruptible(&master->lock.lock_queue);
+		}
 		break;
 	case _DRM_AGP:
 	case _DRM_SCATTER_GATHER:
diff --git a/drivers/gpu/drm/drm_context.c b/drivers/gpu/drm/drm_context.c
index d505f695421f..809ec0f03452 100644
--- a/drivers/gpu/drm/drm_context.c
+++ b/drivers/gpu/drm/drm_context.c
@@ -256,12 +256,13 @@ static int drm_context_switch(struct drm_device * dev, int old, int new)
  * hardware lock is held, clears the drm_device::context_flag and wakes up
  * drm_device::context_wait.
  */
-static int drm_context_switch_complete(struct drm_device * dev, int new)
+static int drm_context_switch_complete(struct drm_device *dev,
+				       struct drm_file *file_priv, int new)
 {
 	dev->last_context = new;	/* PRE/POST: This is the _only_ writer. */
 	dev->last_switch = jiffies;
 
-	if (!_DRM_LOCK_IS_HELD(dev->lock.hw_lock->lock)) {
+	if (!_DRM_LOCK_IS_HELD(file_priv->master->lock.hw_lock->lock)) {
 		DRM_ERROR("Lock isn't held after context switch\n");
 	}
 
@@ -420,7 +421,7 @@ int drm_newctx(struct drm_device *dev, void *data,
 	struct drm_ctx *ctx = data;
 
 	DRM_DEBUG("%d\n", ctx->handle);
-	drm_context_switch_complete(dev, ctx->handle);
+	drm_context_switch_complete(dev, file_priv, ctx->handle);
 
 	return 0;
 }
@@ -442,9 +443,6 @@ int drm_rmctx(struct drm_device *dev, void *data,
 	struct drm_ctx *ctx = data;
 
 	DRM_DEBUG("%d\n", ctx->handle);
-	if (ctx->handle == DRM_KERNEL_CONTEXT + 1) {
-		file_priv->remove_auth_on_close = 1;
-	}
 	if (ctx->handle != DRM_KERNEL_CONTEXT) {
 		if (dev->driver->context_dtor)
 			dev->driver->context_dtor(dev, ctx->handle);
diff --git a/drivers/gpu/drm/drm_drv.c b/drivers/gpu/drm/drm_drv.c
index 3cb87a932b33..9f04ca37df6d 100644
--- a/drivers/gpu/drm/drm_drv.c
+++ b/drivers/gpu/drm/drm_drv.c
@@ -74,6 +74,9 @@ static struct drm_ioctl_desc drm_ioctls[] = {
 	DRM_IOCTL_DEF(DRM_IOCTL_SET_SAREA_CTX, drm_setsareactx, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY),
 	DRM_IOCTL_DEF(DRM_IOCTL_GET_SAREA_CTX, drm_getsareactx, DRM_AUTH),
 
+	DRM_IOCTL_DEF(DRM_IOCTL_SET_MASTER, drm_setmaster_ioctl, DRM_ROOT_ONLY),
+	DRM_IOCTL_DEF(DRM_IOCTL_DROP_MASTER, drm_dropmaster_ioctl, DRM_ROOT_ONLY),
+
 	DRM_IOCTL_DEF(DRM_IOCTL_ADD_CTX, drm_addctx, DRM_AUTH|DRM_ROOT_ONLY),
 	DRM_IOCTL_DEF(DRM_IOCTL_RM_CTX, drm_rmctx, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY),
 	DRM_IOCTL_DEF(DRM_IOCTL_MOD_CTX, drm_modctx, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY),
@@ -138,8 +141,6 @@ static struct drm_ioctl_desc drm_ioctls[] = {
  */
 int drm_lastclose(struct drm_device * dev)
 {
-	struct drm_magic_entry *pt, *next;
-	struct drm_map_list *r_list, *list_t;
 	struct drm_vma_entry *vma, *vma_temp;
 	int i;
 
@@ -149,12 +150,6 @@ int drm_lastclose(struct drm_device * dev)
 		dev->driver->lastclose(dev);
 	DRM_DEBUG("driver lastclose completed\n");
 
-	if (dev->unique) {
-		drm_free(dev->unique, strlen(dev->unique) + 1, DRM_MEM_DRIVER);
-		dev->unique = NULL;
-		dev->unique_len = 0;
-	}
-
 	if (dev->irq_enabled)
 		drm_irq_uninstall(dev);
 
@@ -164,16 +159,6 @@ int drm_lastclose(struct drm_device * dev)
 	drm_drawable_free_all(dev);
 	del_timer(&dev->timer);
 
-	/* Clear pid list */
-	if (dev->magicfree.next) {
-		list_for_each_entry_safe(pt, next, &dev->magicfree, head) {
-			list_del(&pt->head);
-			drm_ht_remove_item(&dev->magiclist, &pt->hash_item);
-			drm_free(pt, sizeof(*pt), DRM_MEM_MAGIC);
-		}
-		drm_ht_remove(&dev->magiclist);
-	}
-
 	/* Clear AGP information */
 	if (drm_core_has_AGP(dev) && dev->agp) {
 		struct drm_agp_mem *entry, *tempe;
@@ -205,13 +190,6 @@ int drm_lastclose(struct drm_device * dev)
 		drm_free(vma, sizeof(*vma), DRM_MEM_VMAS);
 	}
 
-	list_for_each_entry_safe(r_list, list_t, &dev->maplist, head) {
-		if (!(r_list->map->flags & _DRM_DRIVER)) {
-			drm_rmmap_locked(dev, r_list->map);
-			r_list = NULL;
-		}
-	}
-
 	if (drm_core_check_feature(dev, DRIVER_DMA_QUEUE) && dev->queuelist) {
 		for (i = 0; i < dev->queue_count; i++) {
 			if (dev->queuelist[i]) {
@@ -231,11 +209,6 @@ int drm_lastclose(struct drm_device * dev)
 	if (drm_core_check_feature(dev, DRIVER_HAVE_DMA))
 		drm_dma_takedown(dev);
 
-	if (dev->lock.hw_lock) {
-		dev->sigdata.lock = dev->lock.hw_lock = NULL;	/* SHM removed */
-		dev->lock.file_priv = NULL;
-		wake_up_interruptible(&dev->lock.lock_queue);
-	}
 	mutex_unlock(&dev->struct_mutex);
 
 	DRM_DEBUG("lastclose completed\n");
diff --git a/drivers/gpu/drm/drm_fops.c b/drivers/gpu/drm/drm_fops.c
index 78eeed5caaff..f2285237df49 100644
--- a/drivers/gpu/drm/drm_fops.c
+++ b/drivers/gpu/drm/drm_fops.c
@@ -44,10 +44,8 @@ static int drm_open_helper(struct inode *inode, struct file *filp,
 
 static int drm_setup(struct drm_device * dev)
 {
-	drm_local_map_t *map;
 	int i;
 	int ret;
-	u32 sareapage;
 
 	if (dev->driver->firstopen) {
 		ret = dev->driver->firstopen(dev);
@@ -55,14 +53,6 @@ static int drm_setup(struct drm_device * dev)
 			return ret;
 	}
 
-	dev->magicfree.next = NULL;
-
-	/* prebuild the SAREA */
-	sareapage = max_t(unsigned, SAREA_MAX, PAGE_SIZE);
-	i = drm_addmap(dev, 0, sareapage, _DRM_SHM, _DRM_CONTAINS_LOCK, &map);
-	if (i != 0)
-		return i;
-
 	atomic_set(&dev->ioctl_count, 0);
 	atomic_set(&dev->vma_count, 0);
 	dev->buf_use = 0;
@@ -77,16 +67,12 @@ static int drm_setup(struct drm_device * dev)
 	for (i = 0; i < ARRAY_SIZE(dev->counts); i++)
 		atomic_set(&dev->counts[i], 0);
 
-	drm_ht_create(&dev->magiclist, DRM_MAGIC_HASH_ORDER);
-	INIT_LIST_HEAD(&dev->magicfree);
-
 	dev->sigdata.lock = NULL;
-	init_waitqueue_head(&dev->lock.lock_queue);
+
 	dev->queue_count = 0;
 	dev->queue_reserved = 0;
 	dev->queue_slots = 0;
 	dev->queuelist = NULL;
-	dev->irq_enabled = 0;
 	dev->context_flag = 0;
 	dev->interrupt_flag = 0;
 	dev->dma_flag = 0;
@@ -265,10 +251,42 @@ static int drm_open_helper(struct inode *inode, struct file *filp,
 			goto out_free;
 	}
 
+
+	/* if there is no current master make this fd it */
 	mutex_lock(&dev->struct_mutex);
-	if (list_empty(&dev->filelist))
-		priv->master = 1;
+	if (!priv->minor->master) {
+		/* create a new master */
+		priv->minor->master = drm_master_create(priv->minor);
+		if (!priv->minor->master) {
+			ret = -ENOMEM;
+			goto out_free;
+		}
+
+		priv->is_master = 1;
+		/* take another reference for the copy in the local file priv */
+		priv->master = drm_master_get(priv->minor->master);
+
+		priv->authenticated = 1;
+
+		mutex_unlock(&dev->struct_mutex);
+		if (dev->driver->master_create) {
+			ret = dev->driver->master_create(dev, priv->master);
+			if (ret) {
+				mutex_lock(&dev->struct_mutex);
+				/* drop both references if this fails */
+				drm_master_put(&priv->minor->master);
+				drm_master_put(&priv->master);
+				mutex_unlock(&dev->struct_mutex);
+				goto out_free;
+			}
+		}
+	} else {
+		/* get a reference to the master */
+		priv->master = drm_master_get(priv->minor->master);
+		mutex_unlock(&dev->struct_mutex);
+	}
 
+	mutex_lock(&dev->struct_mutex);
 	list_add(&priv->lhead, &dev->filelist);
 	mutex_unlock(&dev->struct_mutex);
 
@@ -314,6 +332,74 @@ int drm_fasync(int fd, struct file *filp, int on)
 }
 EXPORT_SYMBOL(drm_fasync);
 
+/*
+ * Reclaim locked buffers; note that this may be a bad idea if the current
+ * context doesn't have the hw lock...
+ */
+static void drm_reclaim_locked_buffers(struct drm_device *dev, struct file *f)
+{
+	struct drm_file *file_priv = f->private_data;
+
+	if (drm_i_have_hw_lock(dev, file_priv)) {
+		dev->driver->reclaim_buffers_locked(dev, file_priv);
+	} else {
+		unsigned long _end = jiffies + 3 * DRM_HZ;
+		int locked = 0;
+
+		drm_idlelock_take(&file_priv->master->lock);
+
+		/*
+		 * Wait for a while.
+		 */
+		do {
+			spin_lock_bh(&file_priv->master->lock.spinlock);
+			locked = file_priv->master->lock.idle_has_lock;
+			spin_unlock_bh(&file_priv->master->lock.spinlock);
+			if (locked)
+				break;
+			schedule();
+		} while (!time_after_eq(jiffies, _end));
+
+		if (!locked) {
+			DRM_ERROR("reclaim_buffers_locked() deadlock. Please rework this\n"
+				  "\tdriver to use reclaim_buffers_idlelocked() instead.\n"
+				  "\tI will go on reclaiming the buffers anyway.\n");
+		}
+
+		dev->driver->reclaim_buffers_locked(dev, file_priv);
+		drm_idlelock_release(&file_priv->master->lock);
+	}
+}
+
+static void drm_master_release(struct drm_device *dev, struct file *filp)
+{
+	struct drm_file *file_priv = filp->private_data;
+
+	if (dev->driver->reclaim_buffers_locked &&
+	    file_priv->master->lock.hw_lock)
+		drm_reclaim_locked_buffers(dev, filp);
+
+	if (dev->driver->reclaim_buffers_idlelocked &&
+	    file_priv->master->lock.hw_lock) {
+		drm_idlelock_take(&file_priv->master->lock);
+		dev->driver->reclaim_buffers_idlelocked(dev, file_priv);
+		drm_idlelock_release(&file_priv->master->lock);
+	}
+
+
+	if (drm_i_have_hw_lock(dev, file_priv)) {
+		DRM_DEBUG("File %p released, freeing lock for context %d\n",
+			  filp, _DRM_LOCKING_CONTEXT(file_priv->master->lock.hw_lock->lock));
+		drm_lock_free(&file_priv->master->lock,
+			      _DRM_LOCKING_CONTEXT(file_priv->master->lock.hw_lock->lock));
+	}
+
+	if (drm_core_check_feature(dev, DRIVER_HAVE_DMA) &&
+	    !dev->driver->reclaim_buffers_locked) {
+		dev->driver->reclaim_buffers(dev, file_priv);
+	}
+}
+
 /**
  * Release file.
  *
@@ -348,60 +434,9 @@ int drm_release(struct inode *inode, struct file *filp)
 		  (long)old_encode_dev(file_priv->minor->device),
 		  dev->open_count);
 
-	if (dev->driver->reclaim_buffers_locked && dev->lock.hw_lock) {
-		if (drm_i_have_hw_lock(dev, file_priv)) {
-			dev->driver->reclaim_buffers_locked(dev, file_priv);
-		} else {
-			unsigned long endtime = jiffies + 3 * DRM_HZ;
-			int locked = 0;
-
-			drm_idlelock_take(&dev->lock);
-
-			/*
-			 * Wait for a while.
-			 */
-
-			do{
-				spin_lock_bh(&dev->lock.spinlock);
-				locked = dev->lock.idle_has_lock;
-				spin_unlock_bh(&dev->lock.spinlock);
-				if (locked)
-					break;
-				schedule();
-			} while (!time_after_eq(jiffies, endtime));
-
-			if (!locked) {
-				DRM_ERROR("reclaim_buffers_locked() deadlock. Please rework this\n"
-					  "\tdriver to use reclaim_buffers_idlelocked() instead.\n"
-					  "\tI will go on reclaiming the buffers anyway.\n");
-			}
-
-			dev->driver->reclaim_buffers_locked(dev, file_priv);
-			drm_idlelock_release(&dev->lock);
-		}
-	}
-
-	if (dev->driver->reclaim_buffers_idlelocked && dev->lock.hw_lock) {
-
-		drm_idlelock_take(&dev->lock);
-		dev->driver->reclaim_buffers_idlelocked(dev, file_priv);
-		drm_idlelock_release(&dev->lock);
-
-	}
-
-	if (drm_i_have_hw_lock(dev, file_priv)) {
-		DRM_DEBUG("File %p released, freeing lock for context %d\n",
-			  filp, _DRM_LOCKING_CONTEXT(dev->lock.hw_lock->lock));
-
-		drm_lock_free(&dev->lock,
-			      _DRM_LOCKING_CONTEXT(dev->lock.hw_lock->lock));
-	}
-
-
-	if (drm_core_check_feature(dev, DRIVER_HAVE_DMA) &&
-	    !dev->driver->reclaim_buffers_locked) {
-		dev->driver->reclaim_buffers(dev, file_priv);
-	}
+	/* if the master has gone away we can't do anything with the lock */
+	if (file_priv->minor->master)
+		drm_master_release(dev, filp);
 
 	if (dev->driver->driver_features & DRIVER_GEM)
 		drm_gem_release(dev, file_priv);
@@ -428,12 +463,24 @@ int drm_release(struct inode *inode, struct file *filp)
 	mutex_unlock(&dev->ctxlist_mutex);
 
 	mutex_lock(&dev->struct_mutex);
-	if (file_priv->remove_auth_on_close == 1) {
+
+	if (file_priv->is_master) {
 		struct drm_file *temp;
+		list_for_each_entry(temp, &dev->filelist, lhead) {
+			if ((temp->master == file_priv->master) &&
+			    (temp != file_priv))
+				temp->authenticated = 0;
+		}
 
-		list_for_each_entry(temp, &dev->filelist, lhead)
-			temp->authenticated = 0;
+		if (file_priv->minor->master == file_priv->master) {
+			/* drop the reference held my the minor */
+			drm_master_put(&file_priv->minor->master);
+		}
 	}
+
+	/* drop the reference held my the file priv */
+	drm_master_put(&file_priv->master);
+	file_priv->is_master = 0;
 	list_del(&file_priv->lhead);
 	mutex_unlock(&dev->struct_mutex);
 
@@ -448,9 +495,9 @@ int drm_release(struct inode *inode, struct file *filp)
 	atomic_inc(&dev->counts[_DRM_STAT_CLOSES]);
 	spin_lock(&dev->count_lock);
 	if (!--dev->open_count) {
-		if (atomic_read(&dev->ioctl_count) || dev->blocked) {
-			DRM_ERROR("Device busy: %d %d\n",
-				  atomic_read(&dev->ioctl_count), dev->blocked);
+		if (atomic_read(&dev->ioctl_count)) {
+			DRM_ERROR("Device busy: %d\n",
+				  atomic_read(&dev->ioctl_count));
 			spin_unlock(&dev->count_lock);
 			unlock_kernel();
 			return -EBUSY;
diff --git a/drivers/gpu/drm/drm_ioctl.c b/drivers/gpu/drm/drm_ioctl.c
index 16829fb3089d..e35126a35093 100644
--- a/drivers/gpu/drm/drm_ioctl.c
+++ b/drivers/gpu/drm/drm_ioctl.c
@@ -53,12 +53,13 @@ int drm_getunique(struct drm_device *dev, void *data,
 		  struct drm_file *file_priv)
 {
 	struct drm_unique *u = data;
+	struct drm_master *master = file_priv->master;
 
-	if (u->unique_len >= dev->unique_len) {
-		if (copy_to_user(u->unique, dev->unique, dev->unique_len))
+	if (u->unique_len >= master->unique_len) {
+		if (copy_to_user(u->unique, master->unique, master->unique_len))
 			return -EFAULT;
 	}
-	u->unique_len = dev->unique_len;
+	u->unique_len = master->unique_len;
 
 	return 0;
 }
@@ -81,36 +82,37 @@ int drm_setunique(struct drm_device *dev, void *data,
 		  struct drm_file *file_priv)
 {
 	struct drm_unique *u = data;
+	struct drm_master *master = file_priv->master;
 	int domain, bus, slot, func, ret;
 
-	if (dev->unique_len || dev->unique)
+	if (master->unique_len || master->unique)
 		return -EBUSY;
 
 	if (!u->unique_len || u->unique_len > 1024)
 		return -EINVAL;
 
-	dev->unique_len = u->unique_len;
-	dev->unique = drm_alloc(u->unique_len + 1, DRM_MEM_DRIVER);
-	if (!dev->unique)
+	master->unique_len = u->unique_len;
+	master->unique = drm_alloc(u->unique_len + 1, DRM_MEM_DRIVER);
+	if (!master->unique)
 		return -ENOMEM;
-	if (copy_from_user(dev->unique, u->unique, dev->unique_len))
+	if (copy_from_user(master->unique, u->unique, master->unique_len))
 		return -EFAULT;
 
-	dev->unique[dev->unique_len] = '\0';
+	master->unique[master->unique_len] = '\0';
 
 	dev->devname =
 	    drm_alloc(strlen(dev->driver->pci_driver.name) +
-		      strlen(dev->unique) + 2, DRM_MEM_DRIVER);
+		      strlen(master->unique) + 2, DRM_MEM_DRIVER);
 	if (!dev->devname)
 		return -ENOMEM;
 
 	sprintf(dev->devname, "%s@%s", dev->driver->pci_driver.name,
-		dev->unique);
+		master->unique);
 
 	/* Return error if the busid submitted doesn't match the device's actual
 	 * busid.
 	 */
-	ret = sscanf(dev->unique, "PCI:%d:%d:%d", &bus, &slot, &func);
+	ret = sscanf(master->unique, "PCI:%d:%d:%d", &bus, &slot, &func);
 	if (ret != 3)
 		return -EINVAL;
 	domain = bus >> 8;
@@ -125,34 +127,35 @@ int drm_setunique(struct drm_device *dev, void *data,
 	return 0;
 }
 
-static int drm_set_busid(struct drm_device * dev)
+static int drm_set_busid(struct drm_device *dev, struct drm_file *file_priv)
 {
+	struct drm_master *master = file_priv->master;
 	int len;
 
-	if (dev->unique != NULL)
-		return 0;
+	if (master->unique != NULL)
+		return -EBUSY;
 
-	dev->unique_len = 40;
-	dev->unique = drm_alloc(dev->unique_len + 1, DRM_MEM_DRIVER);
-	if (dev->unique == NULL)
+	master->unique_len = 40;
+	master->unique = drm_alloc(master->unique_len + 1, DRM_MEM_DRIVER);
+	if (master->unique == NULL)
 		return -ENOMEM;
 
-	len = snprintf(dev->unique, dev->unique_len, "pci:%04x:%02x:%02x.%d",
-		       drm_get_pci_domain(dev), dev->pdev->bus->number,
+	len = snprintf(master->unique, master->unique_len, "pci:%04x:%02x:%02x.%d",
+		       drm_get_pci_domain(dev),
+		       dev->pdev->bus->number,
 		       PCI_SLOT(dev->pdev->devfn),
 		       PCI_FUNC(dev->pdev->devfn));
-
-	if (len > dev->unique_len)
-		DRM_ERROR("Unique buffer overflowed\n");
+	if (len > master->unique_len)
+		DRM_ERROR("buffer overflow");
 
 	dev->devname =
-	    drm_alloc(strlen(dev->driver->pci_driver.name) + dev->unique_len +
+	    drm_alloc(strlen(dev->driver->pci_driver.name) + master->unique_len +
 		      2, DRM_MEM_DRIVER);
 	if (dev->devname == NULL)
 		return -ENOMEM;
 
 	sprintf(dev->devname, "%s@%s", dev->driver->pci_driver.name,
-		dev->unique);
+		master->unique);
 
 	return 0;
 }
@@ -276,7 +279,7 @@ int drm_getstats(struct drm_device *dev, void *data,
 	for (i = 0; i < dev->counters; i++) {
 		if (dev->types[i] == _DRM_STAT_LOCK)
 			stats->data[i].value =
-			    (dev->lock.hw_lock ? dev->lock.hw_lock->lock : 0);
+			    (file_priv->master->lock.hw_lock ? file_priv->master->lock.hw_lock->lock : 0);
 		else
 			stats->data[i].value = atomic_read(&dev->counts[i]);
 		stats->data[i].type = dev->types[i];
@@ -318,7 +321,7 @@ int drm_setversion(struct drm_device *dev, void *data, struct drm_file *file_pri
 			/*
 			 * Version 1.1 includes tying of DRM to specific device
 			 */
-			drm_set_busid(dev);
+			drm_set_busid(dev, file_priv);
 		}
 	}
 
diff --git a/drivers/gpu/drm/drm_lock.c b/drivers/gpu/drm/drm_lock.c
index 1cfa72031f8f..46e7b28f0707 100644
--- a/drivers/gpu/drm/drm_lock.c
+++ b/drivers/gpu/drm/drm_lock.c
@@ -52,6 +52,7 @@ int drm_lock(struct drm_device *dev, void *data, struct drm_file *file_priv)
 {
 	DECLARE_WAITQUEUE(entry, current);
 	struct drm_lock *lock = data;
+	struct drm_master *master = file_priv->master;
 	int ret = 0;
 
 	++file_priv->lock_count;
@@ -64,26 +65,27 @@ int drm_lock(struct drm_device *dev, void *data, struct drm_file *file_priv)
 
 	DRM_DEBUG("%d (pid %d) requests lock (0x%08x), flags = 0x%08x\n",
 		  lock->context, task_pid_nr(current),
-		  dev->lock.hw_lock->lock, lock->flags);
+		  master->lock.hw_lock->lock, lock->flags);
 
 	if (drm_core_check_feature(dev, DRIVER_DMA_QUEUE))
 		if (lock->context < 0)
 			return -EINVAL;
 
-	add_wait_queue(&dev->lock.lock_queue, &entry);
-	spin_lock_bh(&dev->lock.spinlock);
-	dev->lock.user_waiters++;
-	spin_unlock_bh(&dev->lock.spinlock);
+	add_wait_queue(&master->lock.lock_queue, &entry);
+	spin_lock_bh(&master->lock.spinlock);
+	master->lock.user_waiters++;
+	spin_unlock_bh(&master->lock.spinlock);
+
 	for (;;) {
 		__set_current_state(TASK_INTERRUPTIBLE);
-		if (!dev->lock.hw_lock) {
+		if (!master->lock.hw_lock) {
 			/* Device has been unregistered */
 			ret = -EINTR;
 			break;
 		}
-		if (drm_lock_take(&dev->lock, lock->context)) {
-			dev->lock.file_priv = file_priv;
-			dev->lock.lock_time = jiffies;
+		if (drm_lock_take(&master->lock, lock->context)) {
+			master->lock.file_priv = file_priv;
+			master->lock.lock_time = jiffies;
 			atomic_inc(&dev->counts[_DRM_STAT_LOCKS]);
 			break;	/* Got lock */
 		}
@@ -95,11 +97,11 @@ int drm_lock(struct drm_device *dev, void *data, struct drm_file *file_priv)
 			break;
 		}
 	}
-	spin_lock_bh(&dev->lock.spinlock);
-	dev->lock.user_waiters--;
-	spin_unlock_bh(&dev->lock.spinlock);
+	spin_lock_bh(&master->lock.spinlock);
+	master->lock.user_waiters--;
+	spin_unlock_bh(&master->lock.spinlock);
 	__set_current_state(TASK_RUNNING);
-	remove_wait_queue(&dev->lock.lock_queue, &entry);
+	remove_wait_queue(&master->lock.lock_queue, &entry);
 
 	DRM_DEBUG("%d %s\n", lock->context,
 		  ret ? "interrupted" : "has lock");
@@ -108,14 +110,14 @@ int drm_lock(struct drm_device *dev, void *data, struct drm_file *file_priv)
 	/* don't set the block all signals on the master process for now 
 	 * really probably not the correct answer but lets us debug xkb
  	 * xserver for now */
-	if (!file_priv->master) {
+	if (!file_priv->is_master) {
 		sigemptyset(&dev->sigmask);
 		sigaddset(&dev->sigmask, SIGSTOP);
 		sigaddset(&dev->sigmask, SIGTSTP);
 		sigaddset(&dev->sigmask, SIGTTIN);
 		sigaddset(&dev->sigmask, SIGTTOU);
 		dev->sigdata.context = lock->context;
-		dev->sigdata.lock = dev->lock.hw_lock;
+		dev->sigdata.lock = master->lock.hw_lock;
 		block_all_signals(drm_notifier, &dev->sigdata, &dev->sigmask);
 	}
 
@@ -154,6 +156,7 @@ int drm_lock(struct drm_device *dev, void *data, struct drm_file *file_priv)
 int drm_unlock(struct drm_device *dev, void *data, struct drm_file *file_priv)
 {
 	struct drm_lock *lock = data;
+	struct drm_master *master = file_priv->master;
 
 	if (lock->context == DRM_KERNEL_CONTEXT) {
 		DRM_ERROR("Process %d using kernel context %d\n",
@@ -169,7 +172,7 @@ int drm_unlock(struct drm_device *dev, void *data, struct drm_file *file_priv)
 	if (dev->driver->kernel_context_switch_unlock)
 		dev->driver->kernel_context_switch_unlock(dev);
 	else {
-		if (drm_lock_free(&dev->lock,lock->context)) {
+		if (drm_lock_free(&master->lock, lock->context)) {
 			/* FIXME: Should really bail out here. */
 		}
 	}
@@ -379,9 +382,10 @@ EXPORT_SYMBOL(drm_idlelock_release);
 
 int drm_i_have_hw_lock(struct drm_device *dev, struct drm_file *file_priv)
 {
-	return (file_priv->lock_count && dev->lock.hw_lock &&
-		_DRM_LOCK_IS_HELD(dev->lock.hw_lock->lock) &&
-		dev->lock.file_priv == file_priv);
+	struct drm_master *master = file_priv->master;
+	return (file_priv->lock_count && master->lock.hw_lock &&
+		_DRM_LOCK_IS_HELD(master->lock.hw_lock->lock) &&
+		master->lock.file_priv == file_priv);
 }
 
 EXPORT_SYMBOL(drm_i_have_hw_lock);
diff --git a/drivers/gpu/drm/drm_proc.c b/drivers/gpu/drm/drm_proc.c
index ae73b7f7249a..7dbaa1a19ea4 100644
--- a/drivers/gpu/drm/drm_proc.c
+++ b/drivers/gpu/drm/drm_proc.c
@@ -195,6 +195,7 @@ static int drm_name_info(char *buf, char **start, off_t offset, int request,
 			 int *eof, void *data)
 {
 	struct drm_minor *minor = (struct drm_minor *) data;
+	struct drm_master *master = minor->master;
 	struct drm_device *dev = minor->dev;
 	int len = 0;
 
@@ -203,13 +204,16 @@ static int drm_name_info(char *buf, char **start, off_t offset, int request,
 		return 0;
 	}
 
+	if (!master)
+		return 0;
+
 	*start = &buf[offset];
 	*eof = 0;
 
-	if (dev->unique) {
+	if (master->unique) {
 		DRM_PROC_PRINT("%s %s %s\n",
 			       dev->driver->pci_driver.name,
-			       pci_name(dev->pdev), dev->unique);
+			       pci_name(dev->pdev), master->unique);
 	} else {
 		DRM_PROC_PRINT("%s %s\n", dev->driver->pci_driver.name,
 			       pci_name(dev->pdev));
diff --git a/drivers/gpu/drm/drm_stub.c b/drivers/gpu/drm/drm_stub.c
index 849c0a9fe7fd..0f24c2dcd517 100644
--- a/drivers/gpu/drm/drm_stub.c
+++ b/drivers/gpu/drm/drm_stub.c
@@ -79,6 +79,104 @@ again:
 	return new_id;
 }
 
+struct drm_master *drm_master_create(struct drm_minor *minor)
+{
+	struct drm_master *master;
+
+	master = drm_calloc(1, sizeof(*master), DRM_MEM_DRIVER);
+	if (!master)
+		return NULL;
+
+	kref_init(&master->refcount);
+	spin_lock_init(&master->lock.spinlock);
+	init_waitqueue_head(&master->lock.lock_queue);
+	drm_ht_create(&master->magiclist, DRM_MAGIC_HASH_ORDER);
+	INIT_LIST_HEAD(&master->magicfree);
+	master->minor = minor;
+
+	list_add_tail(&master->head, &minor->master_list);
+
+	return master;
+}
+
+struct drm_master *drm_master_get(struct drm_master *master)
+{
+	kref_get(&master->refcount);
+	return master;
+}
+
+static void drm_master_destroy(struct kref *kref)
+{
+	struct drm_master *master = container_of(kref, struct drm_master, refcount);
+	struct drm_magic_entry *pt, *next;
+	struct drm_device *dev = master->minor->dev;
+
+	list_del(&master->head);
+
+	if (dev->driver->master_destroy)
+		dev->driver->master_destroy(dev, master);
+
+	if (master->unique) {
+		drm_free(master->unique, strlen(master->unique) + 1, DRM_MEM_DRIVER);
+		master->unique = NULL;
+		master->unique_len = 0;
+	}
+
+	list_for_each_entry_safe(pt, next, &master->magicfree, head) {
+		list_del(&pt->head);
+		drm_ht_remove_item(&master->magiclist, &pt->hash_item);
+		drm_free(pt, sizeof(*pt), DRM_MEM_MAGIC);
+	}
+
+	drm_ht_remove(&master->magiclist);
+
+	if (master->lock.hw_lock) {
+		if (dev->sigdata.lock == master->lock.hw_lock)
+			dev->sigdata.lock = NULL;
+		master->lock.hw_lock = NULL;
+		master->lock.file_priv = NULL;
+		wake_up_interruptible(&master->lock.lock_queue);
+	}
+
+	drm_free(master, sizeof(*master), DRM_MEM_DRIVER);
+}
+
+void drm_master_put(struct drm_master **master)
+{
+	kref_put(&(*master)->refcount, drm_master_destroy);
+	*master = NULL;
+}
+
+int drm_setmaster_ioctl(struct drm_device *dev, void *data,
+			struct drm_file *file_priv)
+{
+	if (file_priv->minor->master && file_priv->minor->master != file_priv->master)
+		return -EINVAL;
+
+	if (!file_priv->master)
+		return -EINVAL;
+
+	if (!file_priv->minor->master &&
+	    file_priv->minor->master != file_priv->master) {
+		mutex_lock(&dev->struct_mutex);
+		file_priv->minor->master = drm_master_get(file_priv->master);
+		mutex_lock(&dev->struct_mutex);
+	}
+
+	return 0;
+}
+
+int drm_dropmaster_ioctl(struct drm_device *dev, void *data,
+			 struct drm_file *file_priv)
+{
+	if (!file_priv->master)
+		return -EINVAL;
+	mutex_lock(&dev->struct_mutex);
+	drm_master_put(&file_priv->minor->master);
+	mutex_unlock(&dev->struct_mutex);
+	return 0;
+}
+
 static int drm_fill_in_dev(struct drm_device * dev, struct pci_dev *pdev,
 			   const struct pci_device_id *ent,
 			   struct drm_driver *driver)
@@ -92,7 +190,6 @@ static int drm_fill_in_dev(struct drm_device * dev, struct pci_dev *pdev,
 
 	spin_lock_init(&dev->count_lock);
 	spin_lock_init(&dev->drw_lock);
-	spin_lock_init(&dev->lock.spinlock);
 	init_timer(&dev->timer);
 	mutex_init(&dev->struct_mutex);
 	mutex_init(&dev->ctxlist_mutex);
@@ -200,6 +297,7 @@ static int drm_get_minor(struct drm_device *dev, struct drm_minor **minor, int t
 	new_minor->device = MKDEV(DRM_MAJOR, minor_id);
 	new_minor->dev = dev;
 	new_minor->index = minor_id;
+	INIT_LIST_HEAD(&new_minor->master_list);
 
 	idr_replace(&drm_minors_idr, new_minor, minor_id);
 
@@ -299,11 +397,6 @@ int drm_put_dev(struct drm_device * dev)
 {
 	DRM_DEBUG("release primary %s\n", dev->driver->pci_driver.name);
 
-	if (dev->unique) {
-		drm_free(dev->unique, strlen(dev->unique) + 1, DRM_MEM_DRIVER);
-		dev->unique = NULL;
-		dev->unique_len = 0;
-	}
 	if (dev->devname) {
 		drm_free(dev->devname, strlen(dev->devname) + 1,
 			 DRM_MEM_DRIVER);
diff --git a/drivers/gpu/drm/i915/i915_dma.c b/drivers/gpu/drm/i915/i915_dma.c
index afa8a12cd009..dacdf3c577cb 100644
--- a/drivers/gpu/drm/i915/i915_dma.c
+++ b/drivers/gpu/drm/i915/i915_dma.c
@@ -39,6 +39,7 @@
 int i915_wait_ring(struct drm_device * dev, int n, const char *caller)
 {
 	drm_i915_private_t *dev_priv = dev->dev_private;
+	struct drm_i915_master_private *master_priv = dev->primary->master->driver_priv;
 	drm_i915_ring_buffer_t *ring = &(dev_priv->ring);
 	u32 acthd_reg = IS_I965G(dev) ? ACTHD_I965 : ACTHD;
 	u32 last_acthd = I915_READ(acthd_reg);
@@ -55,8 +56,8 @@ int i915_wait_ring(struct drm_device * dev, int n, const char *caller)
 		if (ring->space >= n)
 			return 0;
 
-		if (dev_priv->sarea_priv)
-			dev_priv->sarea_priv->perf_boxes |= I915_BOX_WAIT;
+		if (master_priv->sarea_priv)
+			master_priv->sarea_priv->perf_boxes |= I915_BOX_WAIT;
 
 		if (ring->head != last_head)
 			i = 0;
@@ -121,6 +122,7 @@ static void i915_free_hws(struct drm_device *dev)
 void i915_kernel_lost_context(struct drm_device * dev)
 {
 	drm_i915_private_t *dev_priv = dev->dev_private;
+	struct drm_i915_master_private *master_priv;
 	drm_i915_ring_buffer_t *ring = &(dev_priv->ring);
 
 	ring->head = I915_READ(PRB0_HEAD) & HEAD_ADDR;
@@ -129,8 +131,12 @@ void i915_kernel_lost_context(struct drm_device * dev)
 	if (ring->space < 0)
 		ring->space += ring->Size;
 
-	if (ring->head == ring->tail && dev_priv->sarea_priv)
-		dev_priv->sarea_priv->perf_boxes |= I915_BOX_RING_EMPTY;
+	if (!dev->primary->master)
+		return;
+
+	master_priv = dev->primary->master->driver_priv;
+	if (ring->head == ring->tail && master_priv->sarea_priv)
+		master_priv->sarea_priv->perf_boxes |= I915_BOX_RING_EMPTY;
 }
 
 static int i915_dma_cleanup(struct drm_device * dev)
@@ -154,25 +160,13 @@ static int i915_dma_cleanup(struct drm_device * dev)
 	if (I915_NEED_GFX_HWS(dev))
 		i915_free_hws(dev);
 
-	dev_priv->sarea = NULL;
-	dev_priv->sarea_priv = NULL;
-
 	return 0;
 }
 
 static int i915_initialize(struct drm_device * dev, drm_i915_init_t * init)
 {
 	drm_i915_private_t *dev_priv = dev->dev_private;
-
-	dev_priv->sarea = drm_getsarea(dev);
-	if (!dev_priv->sarea) {
-		DRM_ERROR("can not find sarea!\n");
-		i915_dma_cleanup(dev);
-		return -EINVAL;
-	}
-
-	dev_priv->sarea_priv = (drm_i915_sarea_t *)
-	    ((u8 *) dev_priv->sarea->handle + init->sarea_priv_offset);
+	struct drm_i915_master_private *master_priv = dev->primary->master->driver_priv;
 
 	if (init->ring_size != 0) {
 		if (dev_priv->ring.ring_obj != NULL) {
@@ -207,7 +201,8 @@ static int i915_initialize(struct drm_device * dev, drm_i915_init_t * init)
 	dev_priv->back_offset = init->back_offset;
 	dev_priv->front_offset = init->front_offset;
 	dev_priv->current_page = 0;
-	dev_priv->sarea_priv->pf_current_page = dev_priv->current_page;
+	if (master_priv->sarea_priv)
+		master_priv->sarea_priv->pf_current_page = 0;
 
 	/* Allow hardware batchbuffers unless told otherwise.
 	 */
@@ -222,11 +217,6 @@ static int i915_dma_resume(struct drm_device * dev)
 
 	DRM_DEBUG("%s\n", __func__);
 
-	if (!dev_priv->sarea) {
-		DRM_ERROR("can not find sarea!\n");
-		return -EINVAL;
-	}
-
 	if (dev_priv->ring.map.handle == NULL) {
 		DRM_ERROR("can not ioremap virtual address for"
 			  " ring buffer\n");
@@ -435,13 +425,14 @@ i915_emit_box(struct drm_device *dev,
 static void i915_emit_breadcrumb(struct drm_device *dev)
 {
 	drm_i915_private_t *dev_priv = dev->dev_private;
+	struct drm_i915_master_private *master_priv = dev->primary->master->driver_priv;
 	RING_LOCALS;
 
 	dev_priv->counter++;
 	if (dev_priv->counter > 0x7FFFFFFFUL)
 		dev_priv->counter = 0;
-	if (dev_priv->sarea_priv)
-		dev_priv->sarea_priv->last_enqueue = dev_priv->counter;
+	if (master_priv->sarea_priv)
+		master_priv->sarea_priv->last_enqueue = dev_priv->counter;
 
 	BEGIN_LP_RING(4);
 	OUT_RING(MI_STORE_DWORD_INDEX);
@@ -537,15 +528,17 @@ static int i915_dispatch_batchbuffer(struct drm_device * dev,
 static int i915_dispatch_flip(struct drm_device * dev)
 {
 	drm_i915_private_t *dev_priv = dev->dev_private;
+	struct drm_i915_master_private *master_priv =
+		dev->primary->master->driver_priv;
 	RING_LOCALS;
 
-	if (!dev_priv->sarea_priv)
+	if (!master_priv->sarea_priv)
 		return -EINVAL;
 
 	DRM_DEBUG("%s: page=%d pfCurrentPage=%d\n",
 		  __func__,
 		  dev_priv->current_page,
-		  dev_priv->sarea_priv->pf_current_page);
+		  master_priv->sarea_priv->pf_current_page);
 
 	i915_kernel_lost_context(dev);
 
@@ -572,7 +565,7 @@ static int i915_dispatch_flip(struct drm_device * dev)
 	OUT_RING(0);
 	ADVANCE_LP_RING();
 
-	dev_priv->sarea_priv->last_enqueue = dev_priv->counter++;
+	master_priv->sarea_priv->last_enqueue = dev_priv->counter++;
 
 	BEGIN_LP_RING(4);
 	OUT_RING(MI_STORE_DWORD_INDEX);
@@ -581,7 +574,7 @@ static int i915_dispatch_flip(struct drm_device * dev)
 	OUT_RING(0);
 	ADVANCE_LP_RING();
 
-	dev_priv->sarea_priv->pf_current_page = dev_priv->current_page;
+	master_priv->sarea_priv->pf_current_page = dev_priv->current_page;
 	return 0;
 }
 
@@ -611,8 +604,9 @@ static int i915_batchbuffer(struct drm_device *dev, void *data,
 			    struct drm_file *file_priv)
 {
 	drm_i915_private_t *dev_priv = (drm_i915_private_t *) dev->dev_private;
+	struct drm_i915_master_private *master_priv = dev->primary->master->driver_priv;
 	drm_i915_sarea_t *sarea_priv = (drm_i915_sarea_t *)
-	    dev_priv->sarea_priv;
+	    master_priv->sarea_priv;
 	drm_i915_batchbuffer_t *batch = data;
 	int ret;
 
@@ -644,8 +638,9 @@ static int i915_cmdbuffer(struct drm_device *dev, void *data,
 			  struct drm_file *file_priv)
 {
 	drm_i915_private_t *dev_priv = (drm_i915_private_t *) dev->dev_private;
+	struct drm_i915_master_private *master_priv = dev->primary->master->driver_priv;
 	drm_i915_sarea_t *sarea_priv = (drm_i915_sarea_t *)
-	    dev_priv->sarea_priv;
+	    master_priv->sarea_priv;
 	drm_i915_cmdbuffer_t *cmdbuf = data;
 	int ret;
 
@@ -802,6 +797,30 @@ static int i915_set_status_page(struct drm_device *dev, void *data,
 	return 0;
 }
 
+int i915_master_create(struct drm_device *dev, struct drm_master *master)
+{
+	struct drm_i915_master_private *master_priv;
+
+	master_priv = drm_calloc(1, sizeof(*master_priv), DRM_MEM_DRIVER);
+	if (!master_priv)
+		return -ENOMEM;
+
+	master->driver_priv = master_priv;
+	return 0;
+}
+
+void i915_master_destroy(struct drm_device *dev, struct drm_master *master)
+{
+	struct drm_i915_master_private *master_priv = master->driver_priv;
+
+	if (!master_priv)
+		return;
+
+	drm_free(master_priv, sizeof(*master_priv), DRM_MEM_DRIVER);
+
+	master->driver_priv = NULL;
+}
+
 int i915_driver_load(struct drm_device *dev, unsigned long flags)
 {
 	struct drm_i915_private *dev_priv = dev->dev_private;
diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c
index a80ead215282..c91648320a8b 100644
--- a/drivers/gpu/drm/i915/i915_drv.c
+++ b/drivers/gpu/drm/i915/i915_drv.c
@@ -107,6 +107,8 @@ static struct drm_driver driver = {
 	.reclaim_buffers = drm_core_reclaim_buffers,
 	.get_map_ofs = drm_core_get_map_ofs,
 	.get_reg_ofs = drm_core_get_reg_ofs,
+	.master_create = i915_master_create,
+	.master_destroy = i915_master_destroy,
 	.proc_init = i915_gem_proc_init,
 	.proc_cleanup = i915_gem_proc_cleanup,
 	.gem_init_object = i915_gem_init_object,
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index b3cc4731aa7c..ba096f9a7641 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -103,15 +103,18 @@ struct intel_opregion {
 	int enabled;
 };
 
+struct drm_i915_master_private {
+	drm_local_map_t *sarea;
+	struct _drm_i915_sarea *sarea_priv;
+};
+
 typedef struct drm_i915_private {
 	struct drm_device *dev;
 
 	int has_gem;
 
 	void __iomem *regs;
-	drm_local_map_t *sarea;
 
-	drm_i915_sarea_t *sarea_priv;
 	drm_i915_ring_buffer_t ring;
 
 	drm_dma_handle_t *status_page_dmah;
@@ -417,6 +420,9 @@ struct drm_i915_file_private {
 extern struct drm_ioctl_desc i915_ioctls[];
 extern int i915_max_ioctl;
 
+extern int i915_master_create(struct drm_device *dev, struct drm_master *master);
+extern void i915_master_destroy(struct drm_device *dev, struct drm_master *master);
+
 				/* i915_dma.c */
 extern void i915_kernel_lost_context(struct drm_device * dev);
 extern int i915_driver_load(struct drm_device *, unsigned long flags);
diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c
index 69b9a42da95e..9b673d2f912b 100644
--- a/drivers/gpu/drm/i915/i915_irq.c
+++ b/drivers/gpu/drm/i915/i915_irq.c
@@ -168,6 +168,7 @@ irqreturn_t i915_driver_irq_handler(DRM_IRQ_ARGS)
 {
 	struct drm_device *dev = (struct drm_device *) arg;
 	drm_i915_private_t *dev_priv = (drm_i915_private_t *) dev->dev_private;
+	struct drm_i915_master_private *master_priv;
 	u32 iir, new_iir;
 	u32 pipea_stats, pipeb_stats;
 	u32 vblank_status;
@@ -222,9 +223,12 @@ irqreturn_t i915_driver_irq_handler(DRM_IRQ_ARGS)
 		I915_WRITE(IIR, iir);
 		new_iir = I915_READ(IIR); /* Flush posted writes */
 
-		if (dev_priv->sarea_priv)
-			dev_priv->sarea_priv->last_dispatch =
-				READ_BREADCRUMB(dev_priv);
+		if (dev->primary->master) {
+			master_priv = dev->primary->master->driver_priv;
+			if (master_priv->sarea_priv)
+				master_priv->sarea_priv->last_dispatch =
+					READ_BREADCRUMB(dev_priv);
+		}
 
 		if (iir & I915_USER_INTERRUPT) {
 			dev_priv->mm.irq_gem_seqno = i915_get_gem_seqno(dev);
@@ -269,6 +273,7 @@ irqreturn_t i915_driver_irq_handler(DRM_IRQ_ARGS)
 static int i915_emit_irq(struct drm_device * dev)
 {
 	drm_i915_private_t *dev_priv = dev->dev_private;
+	struct drm_i915_master_private *master_priv = dev->primary->master->driver_priv;
 	RING_LOCALS;
 
 	i915_kernel_lost_context(dev);
@@ -278,8 +283,8 @@ static int i915_emit_irq(struct drm_device * dev)
 	dev_priv->counter++;
 	if (dev_priv->counter > 0x7FFFFFFFUL)
 		dev_priv->counter = 1;
-	if (dev_priv->sarea_priv)
-		dev_priv->sarea_priv->last_enqueue = dev_priv->counter;
+	if (master_priv->sarea_priv)
+		master_priv->sarea_priv->last_enqueue = dev_priv->counter;
 
 	BEGIN_LP_RING(4);
 	OUT_RING(MI_STORE_DWORD_INDEX);
@@ -317,21 +322,20 @@ void i915_user_irq_put(struct drm_device *dev)
 static int i915_wait_irq(struct drm_device * dev, int irq_nr)
 {
 	drm_i915_private_t *dev_priv = (drm_i915_private_t *) dev->dev_private;
+	struct drm_i915_master_private *master_priv = dev->primary->master->driver_priv;
 	int ret = 0;
 
 	DRM_DEBUG("irq_nr=%d breadcrumb=%d\n", irq_nr,
 		  READ_BREADCRUMB(dev_priv));
 
 	if (READ_BREADCRUMB(dev_priv) >= irq_nr) {
-		if (dev_priv->sarea_priv) {
-			dev_priv->sarea_priv->last_dispatch =
-				READ_BREADCRUMB(dev_priv);
-		}
+		if (master_priv->sarea_priv)
+			master_priv->sarea_priv->last_dispatch = READ_BREADCRUMB(dev_priv);
 		return 0;
 	}
 
-	if (dev_priv->sarea_priv)
-		dev_priv->sarea_priv->perf_boxes |= I915_BOX_WAIT;
+	if (master_priv->sarea_priv)
+		master_priv->sarea_priv->perf_boxes |= I915_BOX_WAIT;
 
 	i915_user_irq_get(dev);
 	DRM_WAIT_ON(ret, dev_priv->irq_queue, 3 * DRM_HZ,
@@ -343,10 +347,6 @@ static int i915_wait_irq(struct drm_device * dev, int irq_nr)
 			  READ_BREADCRUMB(dev_priv), (int)dev_priv->counter);
 	}
 
-	if (dev_priv->sarea_priv)
-		dev_priv->sarea_priv->last_dispatch =
-			READ_BREADCRUMB(dev_priv);
-
 	return ret;
 }
 
diff --git a/drivers/gpu/drm/i915/i915_mem.c b/drivers/gpu/drm/i915/i915_mem.c
index 6126a60dc9cb..96e271986d2a 100644
--- a/drivers/gpu/drm/i915/i915_mem.c
+++ b/drivers/gpu/drm/i915/i915_mem.c
@@ -46,7 +46,8 @@
 static void mark_block(struct drm_device * dev, struct mem_block *p, int in_use)
 {
 	drm_i915_private_t *dev_priv = dev->dev_private;
-	drm_i915_sarea_t *sarea_priv = dev_priv->sarea_priv;
+	struct drm_i915_master_private *master_priv = dev->primary->master->driver_priv;
+	drm_i915_sarea_t *sarea_priv = master_priv->sarea_priv;
 	struct drm_tex_region *list;
 	unsigned shift, nr;
 	unsigned start;
diff --git a/drivers/gpu/drm/radeon/r300_cmdbuf.c b/drivers/gpu/drm/radeon/r300_cmdbuf.c
index 4b27d9abb7bc..cace3964feeb 100644
--- a/drivers/gpu/drm/radeon/r300_cmdbuf.c
+++ b/drivers/gpu/drm/radeon/r300_cmdbuf.c
@@ -860,12 +860,12 @@ static __inline__ void r300_pacify(drm_radeon_private_t *dev_priv)
  * The actual age emit is done by r300_do_cp_cmdbuf, which is why you must
  * be careful about how this function is called.
  */
-static void r300_discard_buffer(struct drm_device * dev, struct drm_buf * buf)
+static void r300_discard_buffer(struct drm_device *dev, struct drm_master *master, struct drm_buf *buf)
 {
-	drm_radeon_private_t *dev_priv = dev->dev_private;
 	drm_radeon_buf_priv_t *buf_priv = buf->dev_private;
+	struct drm_radeon_master_private *master_priv = master->driver_priv;
 
-	buf_priv->age = ++dev_priv->sarea_priv->last_dispatch;
+	buf_priv->age = ++master_priv->sarea_priv->last_dispatch;
 	buf->pending = 1;
 	buf->used = 0;
 }
@@ -1027,6 +1027,7 @@ int r300_do_cp_cmdbuf(struct drm_device *dev,
 		      drm_radeon_kcmd_buffer_t *cmdbuf)
 {
 	drm_radeon_private_t *dev_priv = dev->dev_private;
+	struct drm_radeon_master_private *master_priv = file_priv->master->driver_priv;
 	struct drm_device_dma *dma = dev->dma;
 	struct drm_buf *buf = NULL;
 	int emit_dispatch_age = 0;
@@ -1134,7 +1135,7 @@ int r300_do_cp_cmdbuf(struct drm_device *dev,
 			}
 
 			emit_dispatch_age = 1;
-			r300_discard_buffer(dev, buf);
+			r300_discard_buffer(dev, file_priv->master, buf);
 			break;
 
 		case R300_CMD_WAIT:
@@ -1189,7 +1190,7 @@ int r300_do_cp_cmdbuf(struct drm_device *dev,
 
 		/* Emit the vertex buffer age */
 		BEGIN_RING(2);
-		RADEON_DISPATCH_AGE(dev_priv->sarea_priv->last_dispatch);
+		RADEON_DISPATCH_AGE(master_priv->sarea_priv->last_dispatch);
 		ADVANCE_RING();
 	}
 
diff --git a/drivers/gpu/drm/radeon/radeon_cp.c b/drivers/gpu/drm/radeon/radeon_cp.c
index dcebb4bee7aa..7b37a4906377 100644
--- a/drivers/gpu/drm/radeon/radeon_cp.c
+++ b/drivers/gpu/drm/radeon/radeon_cp.c
@@ -31,6 +31,7 @@
 
 #include "drmP.h"
 #include "drm.h"
+#include "drm_sarea.h"
 #include "radeon_drm.h"
 #include "radeon_drv.h"
 #include "r300_reg.h"
@@ -667,15 +668,14 @@ static void radeon_cp_init_ring_buffer(struct drm_device * dev,
 		RADEON_WRITE(RADEON_BUS_CNTL, tmp);
 	} /* PCIE cards appears to not need this */
 
-	dev_priv->sarea_priv->last_frame = dev_priv->scratch[0] = 0;
-	RADEON_WRITE(RADEON_LAST_FRAME_REG, dev_priv->sarea_priv->last_frame);
+	dev_priv->scratch[0] = 0;
+	RADEON_WRITE(RADEON_LAST_FRAME_REG, 0);
 
-	dev_priv->sarea_priv->last_dispatch = dev_priv->scratch[1] = 0;
-	RADEON_WRITE(RADEON_LAST_DISPATCH_REG,
-		     dev_priv->sarea_priv->last_dispatch);
+	dev_priv->scratch[1] = 0;
+	RADEON_WRITE(RADEON_LAST_DISPATCH_REG, 0);
 
-	dev_priv->sarea_priv->last_clear = dev_priv->scratch[2] = 0;
-	RADEON_WRITE(RADEON_LAST_CLEAR_REG, dev_priv->sarea_priv->last_clear);
+	dev_priv->scratch[2] = 0;
+	RADEON_WRITE(RADEON_LAST_CLEAR_REG, 0);
 
 	radeon_do_wait_for_idle(dev_priv);
 
@@ -871,9 +871,11 @@ static void radeon_set_pcigart(drm_radeon_private_t * dev_priv, int on)
 	}
 }
 
-static int radeon_do_init_cp(struct drm_device * dev, drm_radeon_init_t * init)
+static int radeon_do_init_cp(struct drm_device *dev, drm_radeon_init_t *init,
+			     struct drm_file *file_priv)
 {
 	drm_radeon_private_t *dev_priv = dev->dev_private;
+	struct drm_radeon_master_private *master_priv = file_priv->master->driver_priv;
 
 	DRM_DEBUG("\n");
 
@@ -998,8 +1000,8 @@ static int radeon_do_init_cp(struct drm_device * dev, drm_radeon_init_t * init)
 	dev_priv->buffers_offset = init->buffers_offset;
 	dev_priv->gart_textures_offset = init->gart_textures_offset;
 
-	dev_priv->sarea = drm_getsarea(dev);
-	if (!dev_priv->sarea) {
+	master_priv->sarea = drm_getsarea(dev);
+	if (!master_priv->sarea) {
 		DRM_ERROR("could not find sarea!\n");
 		radeon_do_cleanup_cp(dev);
 		return -EINVAL;
@@ -1035,10 +1037,6 @@ static int radeon_do_init_cp(struct drm_device * dev, drm_radeon_init_t * init)
 		}
 	}
 
-	dev_priv->sarea_priv =
-	    (drm_radeon_sarea_t *) ((u8 *) dev_priv->sarea->handle +
-				    init->sarea_priv_offset);
-
 #if __OS_HAS_AGP
 	if (dev_priv->flags & RADEON_IS_AGP) {
 		drm_core_ioremap(dev_priv->cp_ring, dev);
@@ -1329,7 +1327,7 @@ int radeon_cp_init(struct drm_device *dev, void *data, struct drm_file *file_pri
 	case RADEON_INIT_CP:
 	case RADEON_INIT_R200_CP:
 	case RADEON_INIT_R300_CP:
-		return radeon_do_init_cp(dev, init);
+		return radeon_do_init_cp(dev, init, file_priv);
 	case RADEON_CLEANUP_CP:
 		return radeon_do_cleanup_cp(dev);
 	}
@@ -1768,6 +1766,51 @@ int radeon_driver_load(struct drm_device *dev, unsigned long flags)
 	return ret;
 }
 
+int radeon_master_create(struct drm_device *dev, struct drm_master *master)
+{
+	struct drm_radeon_master_private *master_priv;
+	unsigned long sareapage;
+	int ret;
+
+	master_priv = drm_calloc(1, sizeof(*master_priv), DRM_MEM_DRIVER);
+	if (!master_priv)
+		return -ENOMEM;
+
+	/* prebuild the SAREA */
+	sareapage = max(SAREA_MAX, PAGE_SIZE);
+	ret = drm_addmap(dev, 0, sareapage, _DRM_SHM, _DRM_CONTAINS_LOCK|_DRM_DRIVER,
+			 &master_priv->sarea);
+	if (ret) {
+		DRM_ERROR("SAREA setup failed\n");
+		return ret;
+	}
+	master_priv->sarea_priv = master_priv->sarea->handle + sizeof(struct drm_sarea);
+	master_priv->sarea_priv->pfCurrentPage = 0;
+
+	master->driver_priv = master_priv;
+	return 0;
+}
+
+void radeon_master_destroy(struct drm_device *dev, struct drm_master *master)
+{
+	struct drm_radeon_master_private *master_priv = master->driver_priv;
+
+	if (!master_priv)
+		return;
+
+	if (master_priv->sarea_priv &&
+	    master_priv->sarea_priv->pfCurrentPage != 0)
+		radeon_cp_dispatch_flip(dev, master);
+
+	master_priv->sarea_priv = NULL;
+	if (master_priv->sarea)
+		drm_rmmap(dev, master_priv->sarea);
+
+	drm_free(master_priv, sizeof(*master_priv), DRM_MEM_DRIVER);
+
+	master->driver_priv = NULL;
+}
+
 /* Create mappings for registers and framebuffer so userland doesn't necessarily
  * have to find them.
  */
diff --git a/drivers/gpu/drm/radeon/radeon_drv.h b/drivers/gpu/drm/radeon/radeon_drv.h
index 3bbb871b25d5..490bc7ceef60 100644
--- a/drivers/gpu/drm/radeon/radeon_drv.h
+++ b/drivers/gpu/drm/radeon/radeon_drv.h
@@ -226,9 +226,13 @@ struct radeon_virt_surface {
 #define RADEON_FLUSH_EMITED	(1 < 0)
 #define RADEON_PURGE_EMITED	(1 < 1)
 
+struct drm_radeon_master_private {
+	drm_local_map_t *sarea;
+	drm_radeon_sarea_t *sarea_priv;
+};
+
 typedef struct drm_radeon_private {
 	drm_radeon_ring_buffer_t ring;
-	drm_radeon_sarea_t *sarea_priv;
 
 	u32 fb_location;
 	u32 fb_size;
@@ -409,6 +413,9 @@ extern int radeon_driver_open(struct drm_device *dev,
 extern long radeon_compat_ioctl(struct file *filp, unsigned int cmd,
 				unsigned long arg);
 
+extern int radeon_master_create(struct drm_device *dev, struct drm_master *master);
+extern void radeon_master_destroy(struct drm_device *dev, struct drm_master *master);
+extern void radeon_cp_dispatch_flip(struct drm_device *dev, struct drm_master *master);
 /* r300_cmdbuf.c */
 extern void r300_init_reg_flags(struct drm_device *dev);
 
@@ -1335,8 +1342,9 @@ do {									\
 } while (0)
 
 #define VB_AGE_TEST_WITH_RETURN( dev_priv )				\
-do {									\
-	drm_radeon_sarea_t *sarea_priv = dev_priv->sarea_priv;		\
+do {								\
+	struct drm_radeon_master_private *master_priv = file_priv->master->driver_priv;	\
+	drm_radeon_sarea_t *sarea_priv = master_priv->sarea_priv;	\
 	if ( sarea_priv->last_dispatch >= RADEON_MAX_VB_AGE ) {		\
 		int __ret = radeon_do_cp_idle( dev_priv );		\
 		if ( __ret ) return __ret;				\
diff --git a/drivers/gpu/drm/radeon/radeon_state.c b/drivers/gpu/drm/radeon/radeon_state.c
index 5d7153fcc7b0..ef940a079dcb 100644
--- a/drivers/gpu/drm/radeon/radeon_state.c
+++ b/drivers/gpu/drm/radeon/radeon_state.c
@@ -742,13 +742,14 @@ static struct {
  */
 
 static void radeon_clear_box(drm_radeon_private_t * dev_priv,
+			     struct drm_radeon_master_private *master_priv,
 			     int x, int y, int w, int h, int r, int g, int b)
 {
 	u32 color;
 	RING_LOCALS;
 
-	x += dev_priv->sarea_priv->boxes[0].x1;
-	y += dev_priv->sarea_priv->boxes[0].y1;
+	x += master_priv->sarea_priv->boxes[0].x1;
+	y += master_priv->sarea_priv->boxes[0].y1;
 
 	switch (dev_priv->color_fmt) {
 	case RADEON_COLOR_FORMAT_RGB565:
@@ -776,7 +777,7 @@ static void radeon_clear_box(drm_radeon_private_t * dev_priv,
 		 RADEON_GMC_SRC_DATATYPE_COLOR |
 		 RADEON_ROP3_P | RADEON_GMC_CLR_CMP_CNTL_DIS);
 
-	if (dev_priv->sarea_priv->pfCurrentPage == 1) {
+	if (master_priv->sarea_priv->pfCurrentPage == 1) {
 		OUT_RING(dev_priv->front_pitch_offset);
 	} else {
 		OUT_RING(dev_priv->back_pitch_offset);
@@ -790,7 +791,7 @@ static void radeon_clear_box(drm_radeon_private_t * dev_priv,
 	ADVANCE_RING();
 }
 
-static void radeon_cp_performance_boxes(drm_radeon_private_t * dev_priv)
+static void radeon_cp_performance_boxes(drm_radeon_private_t *dev_priv, struct drm_radeon_master_private *master_priv)
 {
 	/* Collapse various things into a wait flag -- trying to
 	 * guess if userspase slept -- better just to have them tell us.
@@ -807,12 +808,12 @@ static void radeon_cp_performance_boxes(drm_radeon_private_t * dev_priv)
 	/* Purple box for page flipping
 	 */
 	if (dev_priv->stats.boxes & RADEON_BOX_FLIP)
-		radeon_clear_box(dev_priv, 4, 4, 8, 8, 255, 0, 255);
+		radeon_clear_box(dev_priv, master_priv, 4, 4, 8, 8, 255, 0, 255);
 
 	/* Red box if we have to wait for idle at any point
 	 */
 	if (dev_priv->stats.boxes & RADEON_BOX_WAIT_IDLE)
-		radeon_clear_box(dev_priv, 16, 4, 8, 8, 255, 0, 0);
+		radeon_clear_box(dev_priv, master_priv, 16, 4, 8, 8, 255, 0, 0);
 
 	/* Blue box: lost context?
 	 */
@@ -820,12 +821,12 @@ static void radeon_cp_performance_boxes(drm_radeon_private_t * dev_priv)
 	/* Yellow box for texture swaps
 	 */
 	if (dev_priv->stats.boxes & RADEON_BOX_TEXTURE_LOAD)
-		radeon_clear_box(dev_priv, 40, 4, 8, 8, 255, 255, 0);
+		radeon_clear_box(dev_priv, master_priv, 40, 4, 8, 8, 255, 255, 0);
 
 	/* Green box if hardware never idles (as far as we can tell)
 	 */
 	if (!(dev_priv->stats.boxes & RADEON_BOX_DMA_IDLE))
-		radeon_clear_box(dev_priv, 64, 4, 8, 8, 0, 255, 0);
+		radeon_clear_box(dev_priv, master_priv, 64, 4, 8, 8, 0, 255, 0);
 
 	/* Draw bars indicating number of buffers allocated
 	 * (not a great measure, easily confused)
@@ -834,7 +835,7 @@ static void radeon_cp_performance_boxes(drm_radeon_private_t * dev_priv)
 		if (dev_priv->stats.requested_bufs > 100)
 			dev_priv->stats.requested_bufs = 100;
 
-		radeon_clear_box(dev_priv, 4, 16,
+		radeon_clear_box(dev_priv, master_priv, 4, 16,
 				 dev_priv->stats.requested_bufs, 4,
 				 196, 128, 128);
 	}
@@ -848,11 +849,13 @@ static void radeon_cp_performance_boxes(drm_radeon_private_t * dev_priv)
  */
 
 static void radeon_cp_dispatch_clear(struct drm_device * dev,
+				     struct drm_master *master,
 				     drm_radeon_clear_t * clear,
 				     drm_radeon_clear_rect_t * depth_boxes)
 {
 	drm_radeon_private_t *dev_priv = dev->dev_private;
-	drm_radeon_sarea_t *sarea_priv = dev_priv->sarea_priv;
+	struct drm_radeon_master_private *master_priv = master->driver_priv;
+	drm_radeon_sarea_t *sarea_priv = master_priv->sarea_priv;
 	drm_radeon_depth_clear_t *depth_clear = &dev_priv->depth_clear;
 	int nbox = sarea_priv->nbox;
 	struct drm_clip_rect *pbox = sarea_priv->boxes;
@@ -864,7 +867,7 @@ static void radeon_cp_dispatch_clear(struct drm_device * dev,
 
 	dev_priv->stats.clears++;
 
-	if (dev_priv->sarea_priv->pfCurrentPage == 1) {
+	if (sarea_priv->pfCurrentPage == 1) {
 		unsigned int tmp = flags;
 
 		flags &= ~(RADEON_FRONT | RADEON_BACK);
@@ -890,7 +893,7 @@ static void radeon_cp_dispatch_clear(struct drm_device * dev,
 
 		/* Make sure we restore the 3D state next time.
 		 */
-		dev_priv->sarea_priv->ctx_owner = 0;
+		sarea_priv->ctx_owner = 0;
 
 		for (i = 0; i < nbox; i++) {
 			int x = pbox[i].x1;
@@ -967,7 +970,7 @@ static void radeon_cp_dispatch_clear(struct drm_device * dev,
 		/* Make sure we restore the 3D state next time.
 		 * we haven't touched any "normal" state - still need this?
 		 */
-		dev_priv->sarea_priv->ctx_owner = 0;
+		sarea_priv->ctx_owner = 0;
 
 		if ((dev_priv->flags & RADEON_HAS_HIERZ)
 		    && (flags & RADEON_USE_HIERZ)) {
@@ -1214,7 +1217,7 @@ static void radeon_cp_dispatch_clear(struct drm_device * dev,
 
 		/* Make sure we restore the 3D state next time.
 		 */
-		dev_priv->sarea_priv->ctx_owner = 0;
+		sarea_priv->ctx_owner = 0;
 
 		for (i = 0; i < nbox; i++) {
 
@@ -1285,7 +1288,7 @@ static void radeon_cp_dispatch_clear(struct drm_device * dev,
 
 		/* Make sure we restore the 3D state next time.
 		 */
-		dev_priv->sarea_priv->ctx_owner = 0;
+		sarea_priv->ctx_owner = 0;
 
 		for (i = 0; i < nbox; i++) {
 
@@ -1328,20 +1331,21 @@ static void radeon_cp_dispatch_clear(struct drm_device * dev,
 	 * wait on this value before performing the clear ioctl.  We
 	 * need this because the card's so damned fast...
 	 */
-	dev_priv->sarea_priv->last_clear++;
+	sarea_priv->last_clear++;
 
 	BEGIN_RING(4);
 
-	RADEON_CLEAR_AGE(dev_priv->sarea_priv->last_clear);
+	RADEON_CLEAR_AGE(sarea_priv->last_clear);
 	RADEON_WAIT_UNTIL_IDLE();
 
 	ADVANCE_RING();
 }
 
-static void radeon_cp_dispatch_swap(struct drm_device * dev)
+static void radeon_cp_dispatch_swap(struct drm_device *dev, struct drm_master *master)
 {
 	drm_radeon_private_t *dev_priv = dev->dev_private;
-	drm_radeon_sarea_t *sarea_priv = dev_priv->sarea_priv;
+	struct drm_radeon_master_private *master_priv = master->driver_priv;
+	drm_radeon_sarea_t *sarea_priv = master_priv->sarea_priv;
 	int nbox = sarea_priv->nbox;
 	struct drm_clip_rect *pbox = sarea_priv->boxes;
 	int i;
@@ -1351,7 +1355,7 @@ static void radeon_cp_dispatch_swap(struct drm_device * dev)
 	/* Do some trivial performance monitoring...
 	 */
 	if (dev_priv->do_boxes)
-		radeon_cp_performance_boxes(dev_priv);
+		radeon_cp_performance_boxes(dev_priv, master_priv);
 
 	/* Wait for the 3D stream to idle before dispatching the bitblt.
 	 * This will prevent data corruption between the two streams.
@@ -1385,7 +1389,7 @@ static void radeon_cp_dispatch_swap(struct drm_device * dev)
 		/* Make this work even if front & back are flipped:
 		 */
 		OUT_RING(CP_PACKET0(RADEON_SRC_PITCH_OFFSET, 1));
-		if (dev_priv->sarea_priv->pfCurrentPage == 0) {
+		if (sarea_priv->pfCurrentPage == 0) {
 			OUT_RING(dev_priv->back_pitch_offset);
 			OUT_RING(dev_priv->front_pitch_offset);
 		} else {
@@ -1405,31 +1409,32 @@ static void radeon_cp_dispatch_swap(struct drm_device * dev)
 	 * throttle the framerate by waiting for this value before
 	 * performing the swapbuffer ioctl.
 	 */
-	dev_priv->sarea_priv->last_frame++;
+	sarea_priv->last_frame++;
 
 	BEGIN_RING(4);
 
-	RADEON_FRAME_AGE(dev_priv->sarea_priv->last_frame);
+	RADEON_FRAME_AGE(sarea_priv->last_frame);
 	RADEON_WAIT_UNTIL_2D_IDLE();
 
 	ADVANCE_RING();
 }
 
-static void radeon_cp_dispatch_flip(struct drm_device * dev)
+void radeon_cp_dispatch_flip(struct drm_device *dev, struct drm_master *master)
 {
 	drm_radeon_private_t *dev_priv = dev->dev_private;
-	struct drm_sarea *sarea = (struct drm_sarea *) dev_priv->sarea->handle;
-	int offset = (dev_priv->sarea_priv->pfCurrentPage == 1)
+	struct drm_radeon_master_private *master_priv = master->driver_priv;
+	struct drm_sarea *sarea = (struct drm_sarea *)master_priv->sarea->handle;
+	int offset = (master_priv->sarea_priv->pfCurrentPage == 1)
 	    ? dev_priv->front_offset : dev_priv->back_offset;
 	RING_LOCALS;
 	DRM_DEBUG("pfCurrentPage=%d\n",
-		  dev_priv->sarea_priv->pfCurrentPage);
+		  master_priv->sarea_priv->pfCurrentPage);
 
 	/* Do some trivial performance monitoring...
 	 */
 	if (dev_priv->do_boxes) {
 		dev_priv->stats.boxes |= RADEON_BOX_FLIP;
-		radeon_cp_performance_boxes(dev_priv);
+		radeon_cp_performance_boxes(dev_priv, master_priv);
 	}
 
 	/* Update the frame offsets for both CRTCs
@@ -1441,7 +1446,7 @@ static void radeon_cp_dispatch_flip(struct drm_device * dev)
 		     ((sarea->frame.y * dev_priv->front_pitch +
 		       sarea->frame.x * (dev_priv->color_fmt - 2)) & ~7)
 		     + offset);
-	OUT_RING_REG(RADEON_CRTC2_OFFSET, dev_priv->sarea_priv->crtc2_base
+	OUT_RING_REG(RADEON_CRTC2_OFFSET, master_priv->sarea_priv->crtc2_base
 		     + offset);
 
 	ADVANCE_RING();
@@ -1450,13 +1455,13 @@ static void radeon_cp_dispatch_flip(struct drm_device * dev)
 	 * throttle the framerate by waiting for this value before
 	 * performing the swapbuffer ioctl.
 	 */
-	dev_priv->sarea_priv->last_frame++;
-	dev_priv->sarea_priv->pfCurrentPage =
-		1 - dev_priv->sarea_priv->pfCurrentPage;
+	master_priv->sarea_priv->last_frame++;
+	master_priv->sarea_priv->pfCurrentPage =
+		1 - master_priv->sarea_priv->pfCurrentPage;
 
 	BEGIN_RING(2);
 
-	RADEON_FRAME_AGE(dev_priv->sarea_priv->last_frame);
+	RADEON_FRAME_AGE(master_priv->sarea_priv->last_frame);
 
 	ADVANCE_RING();
 }
@@ -1494,11 +1499,13 @@ typedef struct {
 } drm_radeon_tcl_prim_t;
 
 static void radeon_cp_dispatch_vertex(struct drm_device * dev,
+				      struct drm_file *file_priv,
 				      struct drm_buf * buf,
 				      drm_radeon_tcl_prim_t * prim)
 {
 	drm_radeon_private_t *dev_priv = dev->dev_private;
-	drm_radeon_sarea_t *sarea_priv = dev_priv->sarea_priv;
+	struct drm_radeon_master_private *master_priv = file_priv->master->driver_priv;
+	drm_radeon_sarea_t *sarea_priv = master_priv->sarea_priv;
 	int offset = dev_priv->gart_buffers_offset + buf->offset + prim->start;
 	int numverts = (int)prim->numverts;
 	int nbox = sarea_priv->nbox;
@@ -1539,13 +1546,14 @@ static void radeon_cp_dispatch_vertex(struct drm_device * dev,
 	} while (i < nbox);
 }
 
-static void radeon_cp_discard_buffer(struct drm_device * dev, struct drm_buf * buf)
+static void radeon_cp_discard_buffer(struct drm_device *dev, struct drm_master *master, struct drm_buf *buf)
 {
 	drm_radeon_private_t *dev_priv = dev->dev_private;
+	struct drm_radeon_master_private *master_priv = master->driver_priv;
 	drm_radeon_buf_priv_t *buf_priv = buf->dev_private;
 	RING_LOCALS;
 
-	buf_priv->age = ++dev_priv->sarea_priv->last_dispatch;
+	buf_priv->age = ++master_priv->sarea_priv->last_dispatch;
 
 	/* Emit the vertex buffer age */
 	BEGIN_RING(2);
@@ -1590,12 +1598,14 @@ static void radeon_cp_dispatch_indirect(struct drm_device * dev,
 	}
 }
 
-static void radeon_cp_dispatch_indices(struct drm_device * dev,
+static void radeon_cp_dispatch_indices(struct drm_device *dev,
+				       struct drm_master *master,
 				       struct drm_buf * elt_buf,
 				       drm_radeon_tcl_prim_t * prim)
 {
 	drm_radeon_private_t *dev_priv = dev->dev_private;
-	drm_radeon_sarea_t *sarea_priv = dev_priv->sarea_priv;
+	struct drm_radeon_master_private *master_priv = master->driver_priv;
+	drm_radeon_sarea_t *sarea_priv = master_priv->sarea_priv;
 	int offset = dev_priv->gart_buffers_offset + prim->offset;
 	u32 *data;
 	int dwords;
@@ -1870,7 +1880,7 @@ static int radeon_cp_dispatch_texture(struct drm_device * dev,
 		ADVANCE_RING();
 		COMMIT_RING();
 
-		radeon_cp_discard_buffer(dev, buf);
+		radeon_cp_discard_buffer(dev, file_priv->master, buf);
 
 		/* Update the input parameters for next time */
 		image->y += height;
@@ -2110,7 +2120,8 @@ static int radeon_surface_free(struct drm_device *dev, void *data, struct drm_fi
 static int radeon_cp_clear(struct drm_device *dev, void *data, struct drm_file *file_priv)
 {
 	drm_radeon_private_t *dev_priv = dev->dev_private;
-	drm_radeon_sarea_t *sarea_priv = dev_priv->sarea_priv;
+	struct drm_radeon_master_private *master_priv = file_priv->master->driver_priv;
+	drm_radeon_sarea_t *sarea_priv = master_priv->sarea_priv;
 	drm_radeon_clear_t *clear = data;
 	drm_radeon_clear_rect_t depth_boxes[RADEON_NR_SAREA_CLIPRECTS];
 	DRM_DEBUG("\n");
@@ -2126,7 +2137,7 @@ static int radeon_cp_clear(struct drm_device *dev, void *data, struct drm_file *
 			       sarea_priv->nbox * sizeof(depth_boxes[0])))
 		return -EFAULT;
 
-	radeon_cp_dispatch_clear(dev, clear, depth_boxes);
+	radeon_cp_dispatch_clear(dev, file_priv->master, clear, depth_boxes);
 
 	COMMIT_RING();
 	return 0;
@@ -2134,9 +2145,10 @@ static int radeon_cp_clear(struct drm_device *dev, void *data, struct drm_file *
 
 /* Not sure why this isn't set all the time:
  */
-static int radeon_do_init_pageflip(struct drm_device * dev)
+static int radeon_do_init_pageflip(struct drm_device *dev, struct drm_master *master)
 {
 	drm_radeon_private_t *dev_priv = dev->dev_private;
+	struct drm_radeon_master_private *master_priv = master->driver_priv;
 	RING_LOCALS;
 
 	DRM_DEBUG("\n");
@@ -2153,8 +2165,8 @@ static int radeon_do_init_pageflip(struct drm_device * dev)
 
 	dev_priv->page_flipping = 1;
 
-	if (dev_priv->sarea_priv->pfCurrentPage != 1)
-		dev_priv->sarea_priv->pfCurrentPage = 0;
+	if (master_priv->sarea_priv->pfCurrentPage != 1)
+		master_priv->sarea_priv->pfCurrentPage = 0;
 
 	return 0;
 }
@@ -2172,9 +2184,9 @@ static int radeon_cp_flip(struct drm_device *dev, void *data, struct drm_file *f
 	RING_SPACE_TEST_WITH_RETURN(dev_priv);
 
 	if (!dev_priv->page_flipping)
-		radeon_do_init_pageflip(dev);
+		radeon_do_init_pageflip(dev, file_priv->master);
 
-	radeon_cp_dispatch_flip(dev);
+	radeon_cp_dispatch_flip(dev, file_priv->master);
 
 	COMMIT_RING();
 	return 0;
@@ -2183,7 +2195,9 @@ static int radeon_cp_flip(struct drm_device *dev, void *data, struct drm_file *f
 static int radeon_cp_swap(struct drm_device *dev, void *data, struct drm_file *file_priv)
 {
 	drm_radeon_private_t *dev_priv = dev->dev_private;
-	drm_radeon_sarea_t *sarea_priv = dev_priv->sarea_priv;
+	struct drm_radeon_master_private *master_priv = file_priv->master->driver_priv;
+	drm_radeon_sarea_t *sarea_priv = master_priv->sarea_priv;
+
 	DRM_DEBUG("\n");
 
 	LOCK_TEST_WITH_RETURN(dev, file_priv);
@@ -2193,8 +2207,8 @@ static int radeon_cp_swap(struct drm_device *dev, void *data, struct drm_file *f
 	if (sarea_priv->nbox > RADEON_NR_SAREA_CLIPRECTS)
 		sarea_priv->nbox = RADEON_NR_SAREA_CLIPRECTS;
 
-	radeon_cp_dispatch_swap(dev);
-	dev_priv->sarea_priv->ctx_owner = 0;
+	radeon_cp_dispatch_swap(dev, file_priv->master);
+	sarea_priv->ctx_owner = 0;
 
 	COMMIT_RING();
 	return 0;
@@ -2203,7 +2217,8 @@ static int radeon_cp_swap(struct drm_device *dev, void *data, struct drm_file *f
 static int radeon_cp_vertex(struct drm_device *dev, void *data, struct drm_file *file_priv)
 {
 	drm_radeon_private_t *dev_priv = dev->dev_private;
-	drm_radeon_sarea_t *sarea_priv = dev_priv->sarea_priv;
+	struct drm_radeon_master_private *master_priv = file_priv->master->driver_priv;
+	drm_radeon_sarea_t *sarea_priv;
 	struct drm_device_dma *dma = dev->dma;
 	struct drm_buf *buf;
 	drm_radeon_vertex_t *vertex = data;
@@ -2211,6 +2226,8 @@ static int radeon_cp_vertex(struct drm_device *dev, void *data, struct drm_file
 
 	LOCK_TEST_WITH_RETURN(dev, file_priv);
 
+	sarea_priv = master_priv->sarea_priv;
+
 	DRM_DEBUG("pid=%d index=%d count=%d discard=%d\n",
 		  DRM_CURRENTPID, vertex->idx, vertex->count, vertex->discard);
 
@@ -2263,13 +2280,13 @@ static int radeon_cp_vertex(struct drm_device *dev, void *data, struct drm_file
 		prim.finish = vertex->count;	/* unused */
 		prim.prim = vertex->prim;
 		prim.numverts = vertex->count;
-		prim.vc_format = dev_priv->sarea_priv->vc_format;
+		prim.vc_format = sarea_priv->vc_format;
 
-		radeon_cp_dispatch_vertex(dev, buf, &prim);
+		radeon_cp_dispatch_vertex(dev, file_priv, buf, &prim);
 	}
 
 	if (vertex->discard) {
-		radeon_cp_discard_buffer(dev, buf);
+		radeon_cp_discard_buffer(dev, file_priv->master, buf);
 	}
 
 	COMMIT_RING();
@@ -2279,7 +2296,8 @@ static int radeon_cp_vertex(struct drm_device *dev, void *data, struct drm_file
 static int radeon_cp_indices(struct drm_device *dev, void *data, struct drm_file *file_priv)
 {
 	drm_radeon_private_t *dev_priv = dev->dev_private;
-	drm_radeon_sarea_t *sarea_priv = dev_priv->sarea_priv;
+	struct drm_radeon_master_private *master_priv = file_priv->master->driver_priv;
+	drm_radeon_sarea_t *sarea_priv;
 	struct drm_device_dma *dma = dev->dma;
 	struct drm_buf *buf;
 	drm_radeon_indices_t *elts = data;
@@ -2288,6 +2306,8 @@ static int radeon_cp_indices(struct drm_device *dev, void *data, struct drm_file
 
 	LOCK_TEST_WITH_RETURN(dev, file_priv);
 
+	sarea_priv = master_priv->sarea_priv;
+
 	DRM_DEBUG("pid=%d index=%d start=%d end=%d discard=%d\n",
 		  DRM_CURRENTPID, elts->idx, elts->start, elts->end,
 		  elts->discard);
@@ -2353,11 +2373,11 @@ static int radeon_cp_indices(struct drm_device *dev, void *data, struct drm_file
 	prim.prim = elts->prim;
 	prim.offset = 0;	/* offset from start of dma buffers */
 	prim.numverts = RADEON_MAX_VB_VERTS;	/* duh */
-	prim.vc_format = dev_priv->sarea_priv->vc_format;
+	prim.vc_format = sarea_priv->vc_format;
 
-	radeon_cp_dispatch_indices(dev, buf, &prim);
+	radeon_cp_dispatch_indices(dev, file_priv->master, buf, &prim);
 	if (elts->discard) {
-		radeon_cp_discard_buffer(dev, buf);
+		radeon_cp_discard_buffer(dev, file_priv->master, buf);
 	}
 
 	COMMIT_RING();
@@ -2468,7 +2488,7 @@ static int radeon_cp_indirect(struct drm_device *dev, void *data, struct drm_fil
 	 */
 	radeon_cp_dispatch_indirect(dev, buf, indirect->start, indirect->end);
 	if (indirect->discard) {
-		radeon_cp_discard_buffer(dev, buf);
+		radeon_cp_discard_buffer(dev, file_priv->master, buf);
 	}
 
 	COMMIT_RING();
@@ -2478,7 +2498,8 @@ static int radeon_cp_indirect(struct drm_device *dev, void *data, struct drm_fil
 static int radeon_cp_vertex2(struct drm_device *dev, void *data, struct drm_file *file_priv)
 {
 	drm_radeon_private_t *dev_priv = dev->dev_private;
-	drm_radeon_sarea_t *sarea_priv = dev_priv->sarea_priv;
+	struct drm_radeon_master_private *master_priv = file_priv->master->driver_priv;
+	drm_radeon_sarea_t *sarea_priv;
 	struct drm_device_dma *dma = dev->dma;
 	struct drm_buf *buf;
 	drm_radeon_vertex2_t *vertex = data;
@@ -2487,6 +2508,8 @@ static int radeon_cp_vertex2(struct drm_device *dev, void *data, struct drm_file
 
 	LOCK_TEST_WITH_RETURN(dev, file_priv);
 
+	sarea_priv = master_priv->sarea_priv;
+
 	DRM_DEBUG("pid=%d index=%d discard=%d\n",
 		  DRM_CURRENTPID, vertex->idx, vertex->discard);
 
@@ -2547,12 +2570,12 @@ static int radeon_cp_vertex2(struct drm_device *dev, void *data, struct drm_file
 			tclprim.offset = prim.numverts * 64;
 			tclprim.numverts = RADEON_MAX_VB_VERTS;	/* duh */
 
-			radeon_cp_dispatch_indices(dev, buf, &tclprim);
+			radeon_cp_dispatch_indices(dev, file_priv->master, buf, &tclprim);
 		} else {
 			tclprim.numverts = prim.numverts;
 			tclprim.offset = 0;	/* not used */
 
-			radeon_cp_dispatch_vertex(dev, buf, &tclprim);
+			radeon_cp_dispatch_vertex(dev, file_priv, buf, &tclprim);
 		}
 
 		if (sarea_priv->nbox == 1)
@@ -2560,7 +2583,7 @@ static int radeon_cp_vertex2(struct drm_device *dev, void *data, struct drm_file
 	}
 
 	if (vertex->discard) {
-		radeon_cp_discard_buffer(dev, buf);
+		radeon_cp_discard_buffer(dev, file_priv->master, buf);
 	}
 
 	COMMIT_RING();
@@ -2909,7 +2932,7 @@ static int radeon_cp_cmdbuf(struct drm_device *dev, void *data, struct drm_file
 				goto err;
 			}
 
-			radeon_cp_discard_buffer(dev, buf);
+			radeon_cp_discard_buffer(dev, file_priv->master, buf);
 			break;
 
 		case RADEON_CMD_PACKET3:
@@ -3020,7 +3043,7 @@ static int radeon_cp_getparam(struct drm_device *dev, void *data, struct drm_fil
 		 */
 	case RADEON_PARAM_SAREA_HANDLE:
 		/* The lock is the first dword in the sarea. */
-		value = (long)dev->lock.hw_lock;
+		/* no users of this parameter */
 		break;
 #endif
 	case RADEON_PARAM_GART_TEX_HANDLE:
@@ -3064,6 +3087,7 @@ static int radeon_cp_getparam(struct drm_device *dev, void *data, struct drm_fil
 static int radeon_cp_setparam(struct drm_device *dev, void *data, struct drm_file *file_priv)
 {
 	drm_radeon_private_t *dev_priv = dev->dev_private;
+	struct drm_radeon_master_private *master_priv = file_priv->master->driver_priv;
 	drm_radeon_setparam_t *sp = data;
 	struct drm_radeon_driver_file_fields *radeon_priv;
 
@@ -3078,12 +3102,14 @@ static int radeon_cp_setparam(struct drm_device *dev, void *data, struct drm_fil
 			DRM_DEBUG("color tiling disabled\n");
 			dev_priv->front_pitch_offset &= ~RADEON_DST_TILE_MACRO;
 			dev_priv->back_pitch_offset &= ~RADEON_DST_TILE_MACRO;
-			dev_priv->sarea_priv->tiling_enabled = 0;
+			if (master_priv->sarea_priv)
+				master_priv->sarea_priv->tiling_enabled = 0;
 		} else if (sp->value == 1) {
 			DRM_DEBUG("color tiling enabled\n");
 			dev_priv->front_pitch_offset |= RADEON_DST_TILE_MACRO;
 			dev_priv->back_pitch_offset |= RADEON_DST_TILE_MACRO;
-			dev_priv->sarea_priv->tiling_enabled = 1;
+			if (master_priv->sarea_priv)
+				master_priv->sarea_priv->tiling_enabled = 1;
 		}
 		break;
 	case RADEON_SETPARAM_PCIGART_LOCATION:
@@ -3129,14 +3155,6 @@ void radeon_driver_preclose(struct drm_device *dev, struct drm_file *file_priv)
 
 void radeon_driver_lastclose(struct drm_device *dev)
 {
-	if (dev->dev_private) {
-		drm_radeon_private_t *dev_priv = dev->dev_private;
-
-		if (dev_priv->sarea_priv &&
-		    dev_priv->sarea_priv->pfCurrentPage != 0)
-			radeon_cp_dispatch_flip(dev);
-	}
-
 	radeon_do_release(dev);
 }
 
diff --git a/include/drm/drm.h b/include/drm/drm.h
index f46ba4b57da4..3fb173c5af3e 100644
--- a/include/drm/drm.h
+++ b/include/drm/drm.h
@@ -634,6 +634,9 @@ struct drm_gem_open {
 #define DRM_IOCTL_SET_SAREA_CTX		DRM_IOW( 0x1c, struct drm_ctx_priv_map)
 #define DRM_IOCTL_GET_SAREA_CTX 	DRM_IOWR(0x1d, struct drm_ctx_priv_map)
 
+#define DRM_IOCTL_SET_MASTER            DRM_IO(0x1e)
+#define DRM_IOCTL_DROP_MASTER           DRM_IO(0x1f)
+
 #define DRM_IOCTL_ADD_CTX		DRM_IOWR(0x20, struct drm_ctx)
 #define DRM_IOCTL_RM_CTX		DRM_IOWR(0x21, struct drm_ctx)
 #define DRM_IOCTL_MOD_CTX		DRM_IOW( 0x22, struct drm_ctx)
diff --git a/include/drm/drmP.h b/include/drm/drmP.h
index 08b8539e7b3c..4c6e8298b424 100644
--- a/include/drm/drmP.h
+++ b/include/drm/drmP.h
@@ -238,11 +238,11 @@ struct drm_device;
  */
 #define LOCK_TEST_WITH_RETURN( dev, file_priv )				\
 do {									\
-	if ( !_DRM_LOCK_IS_HELD( dev->lock.hw_lock->lock ) ||		\
-	     dev->lock.file_priv != file_priv )	{			\
+	if (!_DRM_LOCK_IS_HELD(file_priv->master->lock.hw_lock->lock) ||		\
+	    file_priv->master->lock.file_priv != file_priv)	{			\
 		DRM_ERROR( "%s called without lock held, held  %d owner %p %p\n",\
-			   __func__, _DRM_LOCK_IS_HELD( dev->lock.hw_lock->lock ),\
-			   dev->lock.file_priv, file_priv );		\
+			   __func__, _DRM_LOCK_IS_HELD(file_priv->master->lock.hw_lock->lock),\
+			   file_priv->master->lock.file_priv, file_priv);		\
 		return -EINVAL;						\
 	}								\
 } while (0)
@@ -379,21 +379,25 @@ struct drm_buf_entry {
 /** File private data */
 struct drm_file {
 	int authenticated;
-	int master;
 	pid_t pid;
 	uid_t uid;
 	drm_magic_t magic;
 	unsigned long ioctl_count;
 	struct list_head lhead;
 	struct drm_minor *minor;
-	int remove_auth_on_close;
 	unsigned long lock_count;
+
 	/** Mapping of mm object handles to object pointers. */
 	struct idr object_idr;
 	/** Lock for synchronization of access to object_idr. */
 	spinlock_t table_lock;
+
 	struct file *filp;
 	void *driver_priv;
+
+	int is_master; /* this file private is a master for a minor */
+	struct drm_master *master; /* master this node is currently associated with
+				      N.B. not always minor->master */
 };
 
 /** Wait queue */
@@ -523,6 +527,7 @@ struct drm_map_list {
 	struct drm_hash_item hash;
 	struct drm_map *map;			/**< mapping */
 	uint64_t user_token;
+	struct drm_master *master;
 };
 
 typedef struct drm_map drm_local_map_t;
@@ -612,6 +617,30 @@ struct drm_gem_object {
 	void *driver_private;
 };
 
+/* per-master structure */
+struct drm_master {
+
+	struct kref refcount; /* refcount for this master */
+
+	struct list_head head; /**< each minor contains a list of masters */
+	struct drm_minor *minor; /**< link back to minor we are a master for */
+
+	char *unique;			/**< Unique identifier: e.g., busid */
+	int unique_len;			/**< Length of unique field */
+
+	int blocked;			/**< Blocked due to VC switch? */
+
+	/** \name Authentication */
+	/*@{ */
+	struct drm_open_hash magiclist;
+	struct list_head magicfree;
+	/*@} */
+
+	struct drm_lock_data lock;	/**< Information on hardware lock */
+
+	void *driver_priv; /**< Private structure for driver to use */
+};
+
 /**
  * DRM driver structure. This structure represent the common code for
  * a family of cards. There will one drm_device for each card present
@@ -712,6 +741,10 @@ struct drm_driver {
 	void (*set_version) (struct drm_device *dev,
 			     struct drm_set_version *sv);
 
+	/* Master routines */
+	int (*master_create)(struct drm_device *dev, struct drm_master *master);
+	void (*master_destroy)(struct drm_device *dev, struct drm_master *master);
+
 	int (*proc_init)(struct drm_minor *minor);
 	void (*proc_cleanup)(struct drm_minor *minor);
 
@@ -754,6 +787,8 @@ struct drm_minor {
 	struct device kdev;		/**< Linux device */
 	struct drm_device *dev;
 	struct proc_dir_entry *dev_root;  /**< proc directory entry */
+	struct drm_master *master; /* currently active master for this node */
+	struct list_head master_list;
 };
 
 /**
@@ -762,13 +797,9 @@ struct drm_minor {
  */
 struct drm_device {
 	struct list_head driver_item;	/**< list of devices per driver */
-	char *unique;			/**< Unique identifier: e.g., busid */
-	int unique_len;			/**< Length of unique field */
 	char *devname;			/**< For /proc/interrupts */
 	int if_version;			/**< Highest interface version set */
 
-	int blocked;			/**< Blocked due to VC switch? */
-
 	/** \name Locks */
 	/*@{ */
 	spinlock_t count_lock;		/**< For inuse, drm_device::open_count, drm_device::buf_use */
@@ -791,12 +822,7 @@ struct drm_device {
 	atomic_t counts[15];
 	/*@} */
 
-	/** \name Authentication */
-	/*@{ */
 	struct list_head filelist;
-	struct drm_open_hash magiclist;	/**< magic hash table */
-	struct list_head magicfree;
-	/*@} */
 
 	/** \name Memory management */
 	/*@{ */
@@ -813,7 +839,6 @@ struct drm_device {
 	struct idr ctx_idr;
 
 	struct list_head vmalist;	/**< List of vmas (for debugging) */
-	struct drm_lock_data lock;	/**< Information on hardware lock */
 	/*@} */
 
 	/** \name DMA queues (contexts) */
@@ -1192,6 +1217,13 @@ extern int drm_agp_unbind_memory(DRM_AGP_MEM * handle);
 extern void drm_agp_chipset_flush(struct drm_device *dev);
 
 				/* Stub support (drm_stub.h) */
+extern int drm_setmaster_ioctl(struct drm_device *dev, void *data,
+			       struct drm_file *file_priv);
+extern int drm_dropmaster_ioctl(struct drm_device *dev, void *data,
+				struct drm_file *file_priv);
+struct drm_master *drm_master_create(struct drm_minor *minor);
+extern struct drm_master *drm_master_get(struct drm_master *master);
+extern void drm_master_put(struct drm_master **master);
 extern int drm_get_dev(struct pci_dev *pdev, const struct pci_device_id *ent,
 		       struct drm_driver *driver);
 extern int drm_put_dev(struct drm_device *dev);
diff --git a/include/drm/drm_sarea.h b/include/drm/drm_sarea.h
index 480037331e4e..ee5389d22c64 100644
--- a/include/drm/drm_sarea.h
+++ b/include/drm/drm_sarea.h
@@ -36,12 +36,12 @@
 
 /* SAREA area needs to be at least a page */
 #if defined(__alpha__)
-#define SAREA_MAX                       0x2000
+#define SAREA_MAX                       0x2000U
 #elif defined(__ia64__)
-#define SAREA_MAX                       0x10000	/* 64kB */
+#define SAREA_MAX                       0x10000U	/* 64kB */
 #else
 /* Intel 830M driver needs at least 8k SAREA */
-#define SAREA_MAX                       0x2000
+#define SAREA_MAX                       0x2000U
 #endif
 
 /** Maximum number of drawables in the SAREA */
-- 
cgit v1.2.3


From 1147c9cdd0f60f09a98702a9f865176af18a989f Mon Sep 17 00:00:00 2001
From: Vegard Nossum <vegard.nossum@gmail.com>
Date: Tue, 2 Dec 2008 13:38:47 +1000
Subject: drm: fix leak of uninitialized data to userspace

...so drm_getunique() is trying to copy some uninitialized data to
userspace. The ECX register contains the number of words that are
left to copy -- so there are 5 * 4 = 20 bytes left. The offset of the
first uninitialized byte (counting from the start of the string) is
also 20 (i.e. 0xf65d2294&((1 << 5)-1) == 20). So somebody tried to
copy 40 bytes when the string was only 19 long.

In drm_set_busid() we have this code:

        dev->unique_len = 40;
        dev->unique = drm_alloc(dev->unique_len + 1, DRM_MEM_DRIVER);
      ...
        len = snprintf(dev->unique, dev->unique_len, pci:%04x:%02x:%02x.%d",

...so it seems that dev->unique is never updated to reflect the
actual length of the string. The remaining bytes (20 in this case)
are random uninitialized bytes that are copied into userspace.

This patch fixes the problem by setting dev->unique_len after the
snprintf().

airlied- I've had to fix this up to store the alloced size so
we have it for drm_free later.

Reported-by: Sitsofe Wheeler <sitsofe@yahoo.com>
Signed-off-by: Vegard Nossum <vegardno@thuin.ifi.uio.no>
Signed-off-by: Dave Airlie <airlied@redhat.com>
---
 drivers/gpu/drm/drm_ioctl.c | 10 +++++++---
 drivers/gpu/drm/drm_stub.c  |  2 +-
 include/drm/drmP.h          |  1 +
 3 files changed, 9 insertions(+), 4 deletions(-)

(limited to 'include')

diff --git a/drivers/gpu/drm/drm_ioctl.c b/drivers/gpu/drm/drm_ioctl.c
index e35126a35093..1fad76289e66 100644
--- a/drivers/gpu/drm/drm_ioctl.c
+++ b/drivers/gpu/drm/drm_ioctl.c
@@ -92,7 +92,8 @@ int drm_setunique(struct drm_device *dev, void *data,
 		return -EINVAL;
 
 	master->unique_len = u->unique_len;
-	master->unique = drm_alloc(u->unique_len + 1, DRM_MEM_DRIVER);
+	master->unique_size = u->unique_len + 1;
+	master->unique = drm_alloc(master->unique_size, DRM_MEM_DRIVER);
 	if (!master->unique)
 		return -ENOMEM;
 	if (copy_from_user(master->unique, u->unique, master->unique_len))
@@ -136,7 +137,8 @@ static int drm_set_busid(struct drm_device *dev, struct drm_file *file_priv)
 		return -EBUSY;
 
 	master->unique_len = 40;
-	master->unique = drm_alloc(master->unique_len + 1, DRM_MEM_DRIVER);
+	master->unique_size = master->unique_len;
+	master->unique = drm_alloc(master->unique_size, DRM_MEM_DRIVER);
 	if (master->unique == NULL)
 		return -ENOMEM;
 
@@ -145,8 +147,10 @@ static int drm_set_busid(struct drm_device *dev, struct drm_file *file_priv)
 		       dev->pdev->bus->number,
 		       PCI_SLOT(dev->pdev->devfn),
 		       PCI_FUNC(dev->pdev->devfn));
-	if (len > master->unique_len)
+	if (len >= master->unique_len)
 		DRM_ERROR("buffer overflow");
+	else
+		master->unique_len = len;
 
 	dev->devname =
 	    drm_alloc(strlen(dev->driver->pci_driver.name) + master->unique_len +
diff --git a/drivers/gpu/drm/drm_stub.c b/drivers/gpu/drm/drm_stub.c
index 0f24c2dcd517..f7985c303cb0 100644
--- a/drivers/gpu/drm/drm_stub.c
+++ b/drivers/gpu/drm/drm_stub.c
@@ -117,7 +117,7 @@ static void drm_master_destroy(struct kref *kref)
 		dev->driver->master_destroy(dev, master);
 
 	if (master->unique) {
-		drm_free(master->unique, strlen(master->unique) + 1, DRM_MEM_DRIVER);
+		drm_free(master->unique, master->unique_size, DRM_MEM_DRIVER);
 		master->unique = NULL;
 		master->unique_len = 0;
 	}
diff --git a/include/drm/drmP.h b/include/drm/drmP.h
index 4c6e8298b424..c9cc618dbcfc 100644
--- a/include/drm/drmP.h
+++ b/include/drm/drmP.h
@@ -627,6 +627,7 @@ struct drm_master {
 
 	char *unique;			/**< Unique identifier: e.g., busid */
 	int unique_len;			/**< Length of unique field */
+	int unique_size;		/**< amount allocated */
 
 	int blocked;			/**< Blocked due to VC switch? */
 
-- 
cgit v1.2.3


From a2c0a97b784f837300f7b0869c82ab712c600952 Mon Sep 17 00:00:00 2001
From: Jesse Barnes <jbarnes@virtuousgeek.org>
Date: Wed, 5 Nov 2008 10:31:53 -0800
Subject: drm: GEM mmap support
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Add core support for mapping of GEM objects.  Drivers should provide a
vm_operations_struct if they want to support page faulting of objects.
The code for handling GEM object offsets was taken from TTM, which was
written by Thomas Hellström.

Signed-off-by: Jesse Barnes <jbarnes@virtuousgeek.org>
Signed-off-by: Eric Anholt <eric@anholt.net>
Signed-off-by: Dave Airlie <airlied@redhat.com>
---
 drivers/gpu/drm/drm_bufs.c    |   6 +++
 drivers/gpu/drm/drm_drv.c     |   6 +++
 drivers/gpu/drm/drm_fops.c    |  12 ++++-
 drivers/gpu/drm/drm_gem.c     | 109 ++++++++++++++++++++++++++++++++++++++++++
 drivers/gpu/drm/drm_hashtab.c |   2 +
 drivers/gpu/drm/drm_vm.c      |   7 ++-
 include/drm/drm.h             |   1 +
 include/drm/drmP.h            |  21 ++++++++
 8 files changed, 161 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/drivers/gpu/drm/drm_bufs.c b/drivers/gpu/drm/drm_bufs.c
index dc3ce3e0a0a4..7fb690bcd492 100644
--- a/drivers/gpu/drm/drm_bufs.c
+++ b/drivers/gpu/drm/drm_bufs.c
@@ -261,6 +261,9 @@ static int drm_addmap_core(struct drm_device * dev, unsigned int offset,
 		}
 		DRM_DEBUG("AGP offset = 0x%08lx, size = 0x%08lx\n", map->offset, map->size);
 
+		break;
+	case _DRM_GEM:
+		DRM_ERROR("tried to rmmap GEM object\n");
 		break;
 	}
 	case _DRM_SCATTER_GATHER:
@@ -429,6 +432,9 @@ int drm_rmmap_locked(struct drm_device *dev, drm_local_map_t *map)
 		dmah.size = map->size;
 		__drm_pci_free(dev, &dmah);
 		break;
+	case _DRM_GEM:
+		DRM_ERROR("tried to rmmap GEM object\n");
+		break;
 	}
 	drm_free(map, sizeof(*map), DRM_MEM_MAPS);
 
diff --git a/drivers/gpu/drm/drm_drv.c b/drivers/gpu/drm/drm_drv.c
index 9f04ca37df6d..98a781375f60 100644
--- a/drivers/gpu/drm/drm_drv.c
+++ b/drivers/gpu/drm/drm_drv.c
@@ -209,6 +209,7 @@ int drm_lastclose(struct drm_device * dev)
 	if (drm_core_check_feature(dev, DRIVER_HAVE_DMA))
 		drm_dma_takedown(dev);
 
+	dev->dev_mapping = NULL;
 	mutex_unlock(&dev->struct_mutex);
 
 	DRM_DEBUG("lastclose completed\n");
@@ -273,6 +274,8 @@ EXPORT_SYMBOL(drm_init);
  */
 static void drm_cleanup(struct drm_device * dev)
 {
+	struct drm_driver *driver = dev->driver;
+
 	DRM_DEBUG("\n");
 
 	if (!dev) {
@@ -304,6 +307,9 @@ static void drm_cleanup(struct drm_device * dev)
 	drm_ht_remove(&dev->map_hash);
 	drm_ctxbitmap_cleanup(dev);
 
+	if (driver->driver_features & DRIVER_GEM)
+		drm_gem_destroy(dev);
+
 	drm_put_minor(&dev->primary);
 	if (drm_put_dev(dev))
 		DRM_ERROR("Cannot unload module\n");
diff --git a/drivers/gpu/drm/drm_fops.c b/drivers/gpu/drm/drm_fops.c
index f2285237df49..3a6c439652a5 100644
--- a/drivers/gpu/drm/drm_fops.c
+++ b/drivers/gpu/drm/drm_fops.c
@@ -133,11 +133,21 @@ int drm_open(struct inode *inode, struct file *filp)
 		spin_lock(&dev->count_lock);
 		if (!dev->open_count++) {
 			spin_unlock(&dev->count_lock);
-			return drm_setup(dev);
+			retcode = drm_setup(dev);
+			goto out;
 		}
 		spin_unlock(&dev->count_lock);
 	}
 
+out:
+	mutex_lock(&dev->struct_mutex);
+	if (dev->dev_mapping == NULL)
+		dev->dev_mapping = inode->i_mapping;
+	else if (dev->dev_mapping != inode->i_mapping)
+		WARN(1, "dev->dev_mapping not inode mapping (%p expected %p)\n",
+		     dev->dev_mapping, inode->i_mapping);
+	mutex_unlock(&dev->struct_mutex);
+
 	return retcode;
 }
 EXPORT_SYMBOL(drm_open);
diff --git a/drivers/gpu/drm/drm_gem.c b/drivers/gpu/drm/drm_gem.c
index ccd1afdede02..b3939de6affd 100644
--- a/drivers/gpu/drm/drm_gem.c
+++ b/drivers/gpu/drm/drm_gem.c
@@ -64,6 +64,13 @@
  * up at a later date, and as our interface with shmfs for memory allocation.
  */
 
+/*
+ * We make up offsets for buffer objects so we can recognize them at
+ * mmap time.
+ */
+#define DRM_FILE_PAGE_OFFSET_START ((0xFFFFFFFFUL >> PAGE_SHIFT) + 1)
+#define DRM_FILE_PAGE_OFFSET_SIZE ((0xFFFFFFFFUL >> PAGE_SHIFT) * 16)
+
 /**
  * Initialize the GEM device fields
  */
@@ -71,6 +78,8 @@
 int
 drm_gem_init(struct drm_device *dev)
 {
+	struct drm_gem_mm *mm;
+
 	spin_lock_init(&dev->object_name_lock);
 	idr_init(&dev->object_name_idr);
 	atomic_set(&dev->object_count, 0);
@@ -79,9 +88,41 @@ drm_gem_init(struct drm_device *dev)
 	atomic_set(&dev->pin_memory, 0);
 	atomic_set(&dev->gtt_count, 0);
 	atomic_set(&dev->gtt_memory, 0);
+
+	mm = drm_calloc(1, sizeof(struct drm_gem_mm), DRM_MEM_MM);
+	if (!mm) {
+		DRM_ERROR("out of memory\n");
+		return -ENOMEM;
+	}
+
+	dev->mm_private = mm;
+
+	if (drm_ht_create(&mm->offset_hash, 19)) {
+		drm_free(mm, sizeof(struct drm_gem_mm), DRM_MEM_MM);
+		return -ENOMEM;
+	}
+
+	if (drm_mm_init(&mm->offset_manager, DRM_FILE_PAGE_OFFSET_START,
+			DRM_FILE_PAGE_OFFSET_SIZE)) {
+		drm_free(mm, sizeof(struct drm_gem_mm), DRM_MEM_MM);
+		drm_ht_remove(&mm->offset_hash);
+		return -ENOMEM;
+	}
+
 	return 0;
 }
 
+void
+drm_gem_destroy(struct drm_device *dev)
+{
+	struct drm_gem_mm *mm = dev->mm_private;
+
+	drm_mm_takedown(&mm->offset_manager);
+	drm_ht_remove(&mm->offset_hash);
+	drm_free(mm, sizeof(struct drm_gem_mm), DRM_MEM_MM);
+	dev->mm_private = NULL;
+}
+
 /**
  * Allocate a GEM object of the specified size with shmfs backing store
  */
@@ -419,3 +460,71 @@ drm_gem_object_handle_free(struct kref *kref)
 }
 EXPORT_SYMBOL(drm_gem_object_handle_free);
 
+/**
+ * drm_gem_mmap - memory map routine for GEM objects
+ * @filp: DRM file pointer
+ * @vma: VMA for the area to be mapped
+ *
+ * If a driver supports GEM object mapping, mmap calls on the DRM file
+ * descriptor will end up here.
+ *
+ * If we find the object based on the offset passed in (vma->vm_pgoff will
+ * contain the fake offset we created when the GTT map ioctl was called on
+ * the object), we set up the driver fault handler so that any accesses
+ * to the object can be trapped, to perform migration, GTT binding, surface
+ * register allocation, or performance monitoring.
+ */
+int drm_gem_mmap(struct file *filp, struct vm_area_struct *vma)
+{
+	struct drm_file *priv = filp->private_data;
+	struct drm_device *dev = priv->minor->dev;
+	struct drm_gem_mm *mm = dev->mm_private;
+	struct drm_map *map = NULL;
+	struct drm_gem_object *obj;
+	struct drm_hash_item *hash;
+	unsigned long prot;
+	int ret = 0;
+
+	mutex_lock(&dev->struct_mutex);
+
+	if (drm_ht_find_item(&mm->offset_hash, vma->vm_pgoff, &hash)) {
+		mutex_unlock(&dev->struct_mutex);
+		return drm_mmap(filp, vma);
+	}
+
+	map = drm_hash_entry(hash, struct drm_map_list, hash)->map;
+	if (!map ||
+	    ((map->flags & _DRM_RESTRICTED) && !capable(CAP_SYS_ADMIN))) {
+		ret =  -EPERM;
+		goto out_unlock;
+	}
+
+	/* Check for valid size. */
+	if (map->size < vma->vm_end - vma->vm_start) {
+		ret = -EINVAL;
+		goto out_unlock;
+	}
+
+	obj = map->handle;
+	if (!obj->dev->driver->gem_vm_ops) {
+		ret = -EINVAL;
+		goto out_unlock;
+	}
+
+	vma->vm_flags |= VM_RESERVED | VM_IO | VM_PFNMAP | VM_DONTEXPAND;
+	vma->vm_ops = obj->dev->driver->gem_vm_ops;
+	vma->vm_private_data = map->handle;
+	/* FIXME: use pgprot_writecombine when available */
+	prot = pgprot_val(vma->vm_page_prot);
+	prot |= _PAGE_CACHE_WC;
+	vma->vm_page_prot = __pgprot(prot);
+
+	vma->vm_file = filp;	/* Needed for drm_vm_open() */
+	drm_vm_open_locked(vma);
+
+out_unlock:
+	mutex_unlock(&dev->struct_mutex);
+
+	return ret;
+}
+EXPORT_SYMBOL(drm_gem_mmap);
diff --git a/drivers/gpu/drm/drm_hashtab.c b/drivers/gpu/drm/drm_hashtab.c
index 33160673a7b7..af539f7d87dd 100644
--- a/drivers/gpu/drm/drm_hashtab.c
+++ b/drivers/gpu/drm/drm_hashtab.c
@@ -127,6 +127,7 @@ int drm_ht_insert_item(struct drm_open_hash *ht, struct drm_hash_item *item)
 	}
 	return 0;
 }
+EXPORT_SYMBOL(drm_ht_insert_item);
 
 /*
  * Just insert an item and return any "bits" bit key that hasn't been
@@ -188,6 +189,7 @@ int drm_ht_remove_item(struct drm_open_hash *ht, struct drm_hash_item *item)
 	ht->fill--;
 	return 0;
 }
+EXPORT_SYMBOL(drm_ht_remove_item);
 
 void drm_ht_remove(struct drm_open_hash *ht)
 {
diff --git a/drivers/gpu/drm/drm_vm.c b/drivers/gpu/drm/drm_vm.c
index c234c6f24a8d..3ffae021d280 100644
--- a/drivers/gpu/drm/drm_vm.c
+++ b/drivers/gpu/drm/drm_vm.c
@@ -267,6 +267,9 @@ static void drm_vm_shm_close(struct vm_area_struct *vma)
 				dmah.size = map->size;
 				__drm_pci_free(dev, &dmah);
 				break;
+			case _DRM_GEM:
+				DRM_ERROR("tried to rmmap GEM object\n");
+				break;
 			}
 			drm_free(map, sizeof(*map), DRM_MEM_MAPS);
 		}
@@ -399,7 +402,7 @@ static struct vm_operations_struct drm_vm_sg_ops = {
  * Create a new drm_vma_entry structure as the \p vma private data entry and
  * add it to drm_device::vmalist.
  */
-static void drm_vm_open_locked(struct vm_area_struct *vma)
+void drm_vm_open_locked(struct vm_area_struct *vma)
 {
 	struct drm_file *priv = vma->vm_file->private_data;
 	struct drm_device *dev = priv->minor->dev;
@@ -540,7 +543,7 @@ EXPORT_SYMBOL(drm_core_get_reg_ofs);
  * according to the mapping type and remaps the pages. Finally sets the file
  * pointer and calls vm_open().
  */
-static int drm_mmap_locked(struct file *filp, struct vm_area_struct *vma)
+int drm_mmap_locked(struct file *filp, struct vm_area_struct *vma)
 {
 	struct drm_file *priv = filp->private_data;
 	struct drm_device *dev = priv->minor->dev;
diff --git a/include/drm/drm.h b/include/drm/drm.h
index 3fb173c5af3e..3a66252456ba 100644
--- a/include/drm/drm.h
+++ b/include/drm/drm.h
@@ -173,6 +173,7 @@ enum drm_map_type {
 	_DRM_AGP = 3,		  /**< AGP/GART */
 	_DRM_SCATTER_GATHER = 4,  /**< Scatter/gather memory for PCI DMA */
 	_DRM_CONSISTENT = 5,	  /**< Consistent memory for PCI DMA */
+	_DRM_GEM = 6,		  /**< GEM object */
 };
 
 /**
diff --git a/include/drm/drmP.h b/include/drm/drmP.h
index c9cc618dbcfc..ae42a6a5c24e 100644
--- a/include/drm/drmP.h
+++ b/include/drm/drmP.h
@@ -528,6 +528,7 @@ struct drm_map_list {
 	struct drm_map *map;			/**< mapping */
 	uint64_t user_token;
 	struct drm_master *master;
+	struct drm_mm_node *file_offset_node;	/**< fake offset */
 };
 
 typedef struct drm_map drm_local_map_t;
@@ -567,6 +568,14 @@ struct drm_ati_pcigart_info {
 	int table_size;
 };
 
+/**
+ * GEM specific mm private for tracking GEM objects
+ */
+struct drm_gem_mm {
+	struct drm_mm offset_manager;	/**< Offset mgmt for buffer objects */
+	struct drm_open_hash offset_hash; /**< User token hash table for maps */
+};
+
 /**
  * This structure defines the drm_mm memory object, which will be used by the
  * DRM for its buffer objects.
@@ -584,6 +593,9 @@ struct drm_gem_object {
 	/** File representing the shmem storage */
 	struct file *filp;
 
+	/* Mapping info for this object */
+	struct drm_map_list map_list;
+
 	/**
 	 * Size of the object, in bytes.  Immutable over the object's
 	 * lifetime.
@@ -758,6 +770,9 @@ struct drm_driver {
 	int (*gem_init_object) (struct drm_gem_object *obj);
 	void (*gem_free_object) (struct drm_gem_object *obj);
 
+	/* Driver private ops for this object */
+	struct vm_operations_struct *gem_vm_ops;
+
 	int major;
 	int minor;
 	int patchlevel;
@@ -910,6 +925,8 @@ struct drm_device {
 	struct drm_sg_mem *sg;	/**< Scatter gather memory */
 	int num_crtcs;                  /**< Number of CRTCs on this device */
 	void *dev_private;		/**< device private data */
+	void *mm_private;
+	struct address_space *dev_mapping;
 	struct drm_sigdata sigdata;	   /**< For block_all_signals */
 	sigset_t sigmask;
 
@@ -1026,6 +1043,8 @@ extern int drm_release(struct inode *inode, struct file *filp);
 
 				/* Mapping support (drm_vm.h) */
 extern int drm_mmap(struct file *filp, struct vm_area_struct *vma);
+extern int drm_mmap_locked(struct file *filp, struct vm_area_struct *vma);
+extern void drm_vm_open_locked(struct vm_area_struct *vma);
 extern unsigned long drm_core_get_map_ofs(struct drm_map * map);
 extern unsigned long drm_core_get_reg_ofs(struct drm_device *dev);
 extern unsigned int drm_poll(struct file *filp, struct poll_table_struct *wait);
@@ -1287,10 +1306,12 @@ extern int drm_mm_add_space_to_tail(struct drm_mm *mm, unsigned long size);
 
 /* Graphics Execution Manager library functions (drm_gem.c) */
 int drm_gem_init(struct drm_device *dev);
+void drm_gem_destroy(struct drm_device *dev);
 void drm_gem_object_free(struct kref *kref);
 struct drm_gem_object *drm_gem_object_alloc(struct drm_device *dev,
 					    size_t size);
 void drm_gem_object_handle_free(struct kref *kref);
+int drm_gem_mmap(struct file *filp, struct vm_area_struct *vma);
 
 static inline void
 drm_gem_object_reference(struct drm_gem_object *obj)
-- 
cgit v1.2.3


From de151cf67ce52ed2d88083daa5e60c7858947329 Mon Sep 17 00:00:00 2001
From: Jesse Barnes <jbarnes@virtuousgeek.org>
Date: Wed, 12 Nov 2008 10:03:55 -0800
Subject: drm/i915: add GEM GTT mapping support

Use the new core GEM object mapping code to allow GTT mapping of GEM
objects on i915.  The fault handler will make sure a fence register is
allocated too, if the object in question is tiled.

Signed-off-by: Jesse Barnes <jbarnes@virtuousgeek.org>
Signed-off-by: Eric Anholt <eric@anholt.net>
Signed-off-by: Dave Airlie <airlied@redhat.com>
---
 drivers/gpu/drm/i915/i915_dma.c        |   1 +
 drivers/gpu/drm/i915/i915_drv.c        |   7 +-
 drivers/gpu/drm/i915/i915_drv.h        |  35 +++
 drivers/gpu/drm/i915/i915_gem.c        | 488 ++++++++++++++++++++++++++++++++-
 drivers/gpu/drm/i915/i915_gem_tiling.c |   1 +
 drivers/gpu/drm/i915/i915_reg.h        |  20 +-
 include/drm/i915_drm.h                 |  14 +
 7 files changed, 562 insertions(+), 4 deletions(-)

(limited to 'include')

diff --git a/drivers/gpu/drm/i915/i915_dma.c b/drivers/gpu/drm/i915/i915_dma.c
index dacdf3c577cb..1b81b6a6d81b 100644
--- a/drivers/gpu/drm/i915/i915_dma.c
+++ b/drivers/gpu/drm/i915/i915_dma.c
@@ -991,6 +991,7 @@ struct drm_ioctl_desc i915_ioctls[] = {
 	DRM_IOCTL_DEF(DRM_I915_GEM_PREAD, i915_gem_pread_ioctl, 0),
 	DRM_IOCTL_DEF(DRM_I915_GEM_PWRITE, i915_gem_pwrite_ioctl, 0),
 	DRM_IOCTL_DEF(DRM_I915_GEM_MMAP, i915_gem_mmap_ioctl, 0),
+	DRM_IOCTL_DEF(DRM_I915_GEM_MMAP_GTT, i915_gem_mmap_gtt_ioctl, 0),
 	DRM_IOCTL_DEF(DRM_I915_GEM_SET_DOMAIN, i915_gem_set_domain_ioctl, 0),
 	DRM_IOCTL_DEF(DRM_I915_GEM_SW_FINISH, i915_gem_sw_finish_ioctl, 0),
 	DRM_IOCTL_DEF(DRM_I915_GEM_SET_TILING, i915_gem_set_tiling, 0),
diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c
index c91648320a8b..e0d996ed9026 100644
--- a/drivers/gpu/drm/i915/i915_drv.c
+++ b/drivers/gpu/drm/i915/i915_drv.c
@@ -81,6 +81,10 @@ static int i915_resume(struct drm_device *dev)
 	return 0;
 }
 
+static struct vm_operations_struct i915_gem_vm_ops = {
+	.fault = i915_gem_fault,
+};
+
 static struct drm_driver driver = {
 	/* don't use mtrr's here, the Xserver or user space app should
 	 * deal with them for intel hardware.
@@ -113,13 +117,14 @@ static struct drm_driver driver = {
 	.proc_cleanup = i915_gem_proc_cleanup,
 	.gem_init_object = i915_gem_init_object,
 	.gem_free_object = i915_gem_free_object,
+	.gem_vm_ops = &i915_gem_vm_ops,
 	.ioctls = i915_ioctls,
 	.fops = {
 		 .owner = THIS_MODULE,
 		 .open = drm_open,
 		 .release = drm_release,
 		 .ioctl = drm_ioctl,
-		 .mmap = drm_mmap,
+		 .mmap = drm_gem_mmap,
 		 .poll = drm_poll,
 		 .fasync = drm_fasync,
 #ifdef CONFIG_COMPAT
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index ba096f9a7641..85a072e80637 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -107,6 +107,11 @@ struct drm_i915_master_private {
 	drm_local_map_t *sarea;
 	struct _drm_i915_sarea *sarea_priv;
 };
+#define I915_FENCE_REG_NONE -1
+
+struct drm_i915_fence_reg {
+	struct drm_gem_object *obj;
+};
 
 typedef struct drm_i915_private {
 	struct drm_device *dev;
@@ -149,6 +154,10 @@ typedef struct drm_i915_private {
 
 	struct intel_opregion opregion;
 
+	struct drm_i915_fence_reg fence_regs[16]; /* assume 965 */
+	int fence_reg_start; /* 4 if userland hasn't ioctl'd us yet */
+	int num_fence_regs; /* 8 on pre-965, 16 otherwise */
+
 	/* Register state */
 	u8 saveLBB;
 	u32 saveDSPACNTR;
@@ -367,6 +376,21 @@ struct drm_i915_gem_object {
 	 * This is the same as gtt_space->start
 	 */
 	uint32_t gtt_offset;
+	/**
+	 * Required alignment for the object
+	 */
+	uint32_t gtt_alignment;
+	/**
+	 * Fake offset for use by mmap(2)
+	 */
+	uint64_t mmap_offset;
+
+	/**
+	 * Fence register bits (if any) for this object.  Will be set
+	 * as needed when mapped into the GTT.
+	 * Protected by dev->struct_mutex.
+	 */
+	int fence_reg;
 
 	/** Boolean whether this object has a valid gtt offset. */
 	int gtt_bound;
@@ -379,6 +403,7 @@ struct drm_i915_gem_object {
 
 	/** Current tiling mode for the object. */
 	uint32_t tiling_mode;
+	uint32_t stride;
 
 	/** AGP mapping type (AGP_USER_MEMORY or AGP_USER_CACHED_MEMORY */
 	uint32_t agp_type;
@@ -493,6 +518,8 @@ int i915_gem_pwrite_ioctl(struct drm_device *dev, void *data,
 			  struct drm_file *file_priv);
 int i915_gem_mmap_ioctl(struct drm_device *dev, void *data,
 			struct drm_file *file_priv);
+int i915_gem_mmap_gtt_ioctl(struct drm_device *dev, void *data,
+			struct drm_file *file_priv);
 int i915_gem_set_domain_ioctl(struct drm_device *dev, void *data,
 			      struct drm_file *file_priv);
 int i915_gem_sw_finish_ioctl(struct drm_device *dev, void *data,
@@ -529,6 +556,7 @@ uint32_t i915_get_gem_seqno(struct drm_device *dev);
 void i915_gem_retire_requests(struct drm_device *dev);
 void i915_gem_retire_work_handler(struct work_struct *work);
 void i915_gem_clflush_object(struct drm_gem_object *obj);
+int i915_gem_fault(struct vm_area_struct *vma, struct vm_fault *vmf);
 
 /* i915_gem_tiling.c */
 void i915_gem_detect_bit_6_swizzle(struct drm_device *dev);
@@ -584,6 +612,13 @@ static inline void opregion_enable_asle(struct drm_device *dev) { return; }
 #define I915_WRITE16(reg, val)	writel(val, dev_priv->regs + (reg))
 #define I915_READ8(reg)		readb(dev_priv->regs + (reg))
 #define I915_WRITE8(reg, val)	writeb(val, dev_priv->regs + (reg))
+#ifdef writeq
+#define I915_WRITE64(reg, val)	writeq(val, dev_priv->regs + (reg))
+#else
+#define I915_WRITE64(reg, val)	(writel(val, dev_priv->regs + (reg)), \
+				 writel(upper_32_bits(val), dev_priv->regs + \
+					(reg) + 4))
+#endif
 
 #define I915_VERBOSE 0
 
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 24fe8c10b4b2..0ac977112f72 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -51,6 +51,11 @@ static void i915_gem_object_set_to_full_cpu_read_domain(struct drm_gem_object *o
 static int i915_gem_object_get_page_list(struct drm_gem_object *obj);
 static void i915_gem_object_free_page_list(struct drm_gem_object *obj);
 static int i915_gem_object_wait_rendering(struct drm_gem_object *obj);
+static int i915_gem_object_bind_to_gtt(struct drm_gem_object *obj,
+					   unsigned alignment);
+static void i915_gem_object_get_fence_reg(struct drm_gem_object *obj);
+static void i915_gem_clear_fence_reg(struct drm_gem_object *obj);
+static int i915_gem_evict_something(struct drm_device *dev);
 
 static void
 i915_gem_cleanup_ringbuffer(struct drm_device *dev);
@@ -529,6 +534,252 @@ i915_gem_mmap_ioctl(struct drm_device *dev, void *data,
 	return 0;
 }
 
+/**
+ * i915_gem_fault - fault a page into the GTT
+ * vma: VMA in question
+ * vmf: fault info
+ *
+ * The fault handler is set up by drm_gem_mmap() when a object is GTT mapped
+ * from userspace.  The fault handler takes care of binding the object to
+ * the GTT (if needed), allocating and programming a fence register (again,
+ * only if needed based on whether the old reg is still valid or the object
+ * is tiled) and inserting a new PTE into the faulting process.
+ *
+ * Note that the faulting process may involve evicting existing objects
+ * from the GTT and/or fence registers to make room.  So performance may
+ * suffer if the GTT working set is large or there are few fence registers
+ * left.
+ */
+int i915_gem_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
+{
+	struct drm_gem_object *obj = vma->vm_private_data;
+	struct drm_device *dev = obj->dev;
+	struct drm_i915_private *dev_priv = dev->dev_private;
+	struct drm_i915_gem_object *obj_priv = obj->driver_private;
+	pgoff_t page_offset;
+	unsigned long pfn;
+	int ret = 0;
+
+	/* We don't use vmf->pgoff since that has the fake offset */
+	page_offset = ((unsigned long)vmf->virtual_address - vma->vm_start) >>
+		PAGE_SHIFT;
+
+	/* Now bind it into the GTT if needed */
+	mutex_lock(&dev->struct_mutex);
+	if (!obj_priv->gtt_space) {
+		ret = i915_gem_object_bind_to_gtt(obj, obj_priv->gtt_alignment);
+		if (ret) {
+			mutex_unlock(&dev->struct_mutex);
+			return VM_FAULT_SIGBUS;
+		}
+		list_add(&obj_priv->list, &dev_priv->mm.inactive_list);
+	}
+
+	/* Need a new fence register? */
+	if (obj_priv->fence_reg == I915_FENCE_REG_NONE &&
+	    obj_priv->tiling_mode != I915_TILING_NONE)
+		i915_gem_object_get_fence_reg(obj);
+
+	pfn = ((dev->agp->base + obj_priv->gtt_offset) >> PAGE_SHIFT) +
+		page_offset;
+
+	/* Finally, remap it using the new GTT offset */
+	ret = vm_insert_pfn(vma, (unsigned long)vmf->virtual_address, pfn);
+
+	mutex_unlock(&dev->struct_mutex);
+
+	switch (ret) {
+	case -ENOMEM:
+	case -EAGAIN:
+		return VM_FAULT_OOM;
+	case -EFAULT:
+	case -EBUSY:
+		DRM_ERROR("can't insert pfn??  fault or busy...\n");
+		return VM_FAULT_SIGBUS;
+	default:
+		return VM_FAULT_NOPAGE;
+	}
+}
+
+/**
+ * i915_gem_create_mmap_offset - create a fake mmap offset for an object
+ * @obj: obj in question
+ *
+ * GEM memory mapping works by handing back to userspace a fake mmap offset
+ * it can use in a subsequent mmap(2) call.  The DRM core code then looks
+ * up the object based on the offset and sets up the various memory mapping
+ * structures.
+ *
+ * This routine allocates and attaches a fake offset for @obj.
+ */
+static int
+i915_gem_create_mmap_offset(struct drm_gem_object *obj)
+{
+	struct drm_device *dev = obj->dev;
+	struct drm_gem_mm *mm = dev->mm_private;
+	struct drm_i915_gem_object *obj_priv = obj->driver_private;
+	struct drm_map_list *list;
+	struct drm_map *map;
+	int ret = 0;
+
+	/* Set the object up for mmap'ing */
+	list = &obj->map_list;
+	list->map = drm_calloc(1, sizeof(struct drm_map_list),
+			       DRM_MEM_DRIVER);
+	if (!list->map)
+		return -ENOMEM;
+
+	map = list->map;
+	map->type = _DRM_GEM;
+	map->size = obj->size;
+	map->handle = obj;
+
+	/* Get a DRM GEM mmap offset allocated... */
+	list->file_offset_node = drm_mm_search_free(&mm->offset_manager,
+						    obj->size / PAGE_SIZE, 0, 0);
+	if (!list->file_offset_node) {
+		DRM_ERROR("failed to allocate offset for bo %d\n", obj->name);
+		ret = -ENOMEM;
+		goto out_free_list;
+	}
+
+	list->file_offset_node = drm_mm_get_block(list->file_offset_node,
+						  obj->size / PAGE_SIZE, 0);
+	if (!list->file_offset_node) {
+		ret = -ENOMEM;
+		goto out_free_list;
+	}
+
+	list->hash.key = list->file_offset_node->start;
+	if (drm_ht_insert_item(&mm->offset_hash, &list->hash)) {
+		DRM_ERROR("failed to add to map hash\n");
+		goto out_free_mm;
+	}
+
+	/* By now we should be all set, any drm_mmap request on the offset
+	 * below will get to our mmap & fault handler */
+	obj_priv->mmap_offset = ((uint64_t) list->hash.key) << PAGE_SHIFT;
+
+	return 0;
+
+out_free_mm:
+	drm_mm_put_block(list->file_offset_node);
+out_free_list:
+	drm_free(list->map, sizeof(struct drm_map_list), DRM_MEM_DRIVER);
+
+	return ret;
+}
+
+/**
+ * i915_gem_get_gtt_alignment - return required GTT alignment for an object
+ * @obj: object to check
+ *
+ * Return the required GTT alignment for an object, taking into account
+ * potential fence register mapping if needed.
+ */
+static uint32_t
+i915_gem_get_gtt_alignment(struct drm_gem_object *obj)
+{
+	struct drm_device *dev = obj->dev;
+	struct drm_i915_gem_object *obj_priv = obj->driver_private;
+	int start, i;
+
+	/*
+	 * Minimum alignment is 4k (GTT page size), but might be greater
+	 * if a fence register is needed for the object.
+	 */
+	if (IS_I965G(dev) || obj_priv->tiling_mode == I915_TILING_NONE)
+		return 4096;
+
+	/*
+	 * Previous chips need to be aligned to the size of the smallest
+	 * fence register that can contain the object.
+	 */
+	if (IS_I9XX(dev))
+		start = 1024*1024;
+	else
+		start = 512*1024;
+
+	for (i = start; i < obj->size; i <<= 1)
+		;
+
+	return i;
+}
+
+/**
+ * i915_gem_mmap_gtt_ioctl - prepare an object for GTT mmap'ing
+ * @dev: DRM device
+ * @data: GTT mapping ioctl data
+ * @file_priv: GEM object info
+ *
+ * Simply returns the fake offset to userspace so it can mmap it.
+ * The mmap call will end up in drm_gem_mmap(), which will set things
+ * up so we can get faults in the handler above.
+ *
+ * The fault handler will take care of binding the object into the GTT
+ * (since it may have been evicted to make room for something), allocating
+ * a fence register, and mapping the appropriate aperture address into
+ * userspace.
+ */
+int
+i915_gem_mmap_gtt_ioctl(struct drm_device *dev, void *data,
+			struct drm_file *file_priv)
+{
+	struct drm_i915_gem_mmap_gtt *args = data;
+	struct drm_i915_private *dev_priv = dev->dev_private;
+	struct drm_gem_object *obj;
+	struct drm_i915_gem_object *obj_priv;
+	int ret;
+
+	if (!(dev->driver->driver_features & DRIVER_GEM))
+		return -ENODEV;
+
+	obj = drm_gem_object_lookup(dev, file_priv, args->handle);
+	if (obj == NULL)
+		return -EBADF;
+
+	mutex_lock(&dev->struct_mutex);
+
+	obj_priv = obj->driver_private;
+
+	if (!obj_priv->mmap_offset) {
+		ret = i915_gem_create_mmap_offset(obj);
+		if (ret)
+			return ret;
+	}
+
+	args->offset = obj_priv->mmap_offset;
+
+	obj_priv->gtt_alignment = i915_gem_get_gtt_alignment(obj);
+
+	/* Make sure the alignment is correct for fence regs etc */
+	if (obj_priv->agp_mem &&
+	    (obj_priv->gtt_offset & (obj_priv->gtt_alignment - 1))) {
+		drm_gem_object_unreference(obj);
+		mutex_unlock(&dev->struct_mutex);
+		return -EINVAL;
+	}
+
+	/*
+	 * Pull it into the GTT so that we have a page list (makes the
+	 * initial fault faster and any subsequent flushing possible).
+	 */
+	if (!obj_priv->agp_mem) {
+		ret = i915_gem_object_bind_to_gtt(obj, obj_priv->gtt_alignment);
+		if (ret) {
+			drm_gem_object_unreference(obj);
+			mutex_unlock(&dev->struct_mutex);
+			return ret;
+		}
+		list_add(&obj_priv->list, &dev_priv->mm.inactive_list);
+	}
+
+	drm_gem_object_unreference(obj);
+	mutex_unlock(&dev->struct_mutex);
+
+	return 0;
+}
+
 static void
 i915_gem_object_free_page_list(struct drm_gem_object *obj)
 {
@@ -726,6 +977,7 @@ i915_gem_retire_request(struct drm_device *dev,
 		 */
 		if (obj_priv->last_rendering_seqno != request->seqno)
 			return;
+
 #if WATCH_LRU
 		DRM_INFO("%s: retire %d moves to inactive list %p\n",
 			 __func__, request->seqno, obj);
@@ -956,6 +1208,7 @@ i915_gem_object_unbind(struct drm_gem_object *obj)
 {
 	struct drm_device *dev = obj->dev;
 	struct drm_i915_gem_object *obj_priv = obj->driver_private;
+	loff_t offset;
 	int ret = 0;
 
 #if WATCH_BUF
@@ -991,6 +1244,13 @@ i915_gem_object_unbind(struct drm_gem_object *obj)
 
 	BUG_ON(obj_priv->active);
 
+	/* blow away mappings if mapped through GTT */
+	offset = ((loff_t) obj->map_list.hash.key) << PAGE_SHIFT;
+	unmap_mapping_range(dev->dev_mapping, offset, obj->size, 1);
+
+	if (obj_priv->fence_reg != I915_FENCE_REG_NONE)
+		i915_gem_clear_fence_reg(obj);
+
 	i915_gem_object_free_page_list(obj);
 
 	if (obj_priv->gtt_space) {
@@ -1149,6 +1409,203 @@ i915_gem_object_get_page_list(struct drm_gem_object *obj)
 	return 0;
 }
 
+static void i965_write_fence_reg(struct drm_i915_fence_reg *reg)
+{
+	struct drm_gem_object *obj = reg->obj;
+	struct drm_device *dev = obj->dev;
+	drm_i915_private_t *dev_priv = dev->dev_private;
+	struct drm_i915_gem_object *obj_priv = obj->driver_private;
+	int regnum = obj_priv->fence_reg;
+	uint64_t val;
+
+	val = (uint64_t)((obj_priv->gtt_offset + obj->size - 4096) &
+		    0xfffff000) << 32;
+	val |= obj_priv->gtt_offset & 0xfffff000;
+	val |= ((obj_priv->stride / 128) - 1) << I965_FENCE_PITCH_SHIFT;
+	if (obj_priv->tiling_mode == I915_TILING_Y)
+		val |= 1 << I965_FENCE_TILING_Y_SHIFT;
+	val |= I965_FENCE_REG_VALID;
+
+	I915_WRITE64(FENCE_REG_965_0 + (regnum * 8), val);
+}
+
+static void i915_write_fence_reg(struct drm_i915_fence_reg *reg)
+{
+	struct drm_gem_object *obj = reg->obj;
+	struct drm_device *dev = obj->dev;
+	drm_i915_private_t *dev_priv = dev->dev_private;
+	struct drm_i915_gem_object *obj_priv = obj->driver_private;
+	int regnum = obj_priv->fence_reg;
+	uint32_t val;
+	uint32_t pitch_val;
+
+	if ((obj_priv->gtt_offset & ~I915_FENCE_START_MASK) ||
+	    (obj_priv->gtt_offset & (obj->size - 1))) {
+		WARN(1, "%s: object not 1M or size aligned\n", __FUNCTION__);
+		return;
+	}
+
+	if (obj_priv->tiling_mode == I915_TILING_Y && (IS_I945G(dev) ||
+						       IS_I945GM(dev) ||
+						       IS_G33(dev)))
+		pitch_val = (obj_priv->stride / 128) - 1;
+	else
+		pitch_val = (obj_priv->stride / 512) - 1;
+
+	val = obj_priv->gtt_offset;
+	if (obj_priv->tiling_mode == I915_TILING_Y)
+		val |= 1 << I830_FENCE_TILING_Y_SHIFT;
+	val |= I915_FENCE_SIZE_BITS(obj->size);
+	val |= pitch_val << I830_FENCE_PITCH_SHIFT;
+	val |= I830_FENCE_REG_VALID;
+
+	I915_WRITE(FENCE_REG_830_0 + (regnum * 4), val);
+}
+
+static void i830_write_fence_reg(struct drm_i915_fence_reg *reg)
+{
+	struct drm_gem_object *obj = reg->obj;
+	struct drm_device *dev = obj->dev;
+	drm_i915_private_t *dev_priv = dev->dev_private;
+	struct drm_i915_gem_object *obj_priv = obj->driver_private;
+	int regnum = obj_priv->fence_reg;
+	uint32_t val;
+	uint32_t pitch_val;
+
+	if ((obj_priv->gtt_offset & ~I915_FENCE_START_MASK) ||
+	    (obj_priv->gtt_offset & (obj->size - 1))) {
+		WARN(1, "%s: object not 1M or size aligned\n", __FUNCTION__);
+		return;
+	}
+
+	pitch_val = (obj_priv->stride / 128) - 1;
+
+	val = obj_priv->gtt_offset;
+	if (obj_priv->tiling_mode == I915_TILING_Y)
+		val |= 1 << I830_FENCE_TILING_Y_SHIFT;
+	val |= I830_FENCE_SIZE_BITS(obj->size);
+	val |= pitch_val << I830_FENCE_PITCH_SHIFT;
+	val |= I830_FENCE_REG_VALID;
+
+	I915_WRITE(FENCE_REG_830_0 + (regnum * 4), val);
+
+}
+
+/**
+ * i915_gem_object_get_fence_reg - set up a fence reg for an object
+ * @obj: object to map through a fence reg
+ *
+ * When mapping objects through the GTT, userspace wants to be able to write
+ * to them without having to worry about swizzling if the object is tiled.
+ *
+ * This function walks the fence regs looking for a free one for @obj,
+ * stealing one if it can't find any.
+ *
+ * It then sets up the reg based on the object's properties: address, pitch
+ * and tiling format.
+ */
+static void
+i915_gem_object_get_fence_reg(struct drm_gem_object *obj)
+{
+	struct drm_device *dev = obj->dev;
+	drm_i915_private_t *dev_priv = dev->dev_private;
+	struct drm_i915_gem_object *obj_priv = obj->driver_private;
+	struct drm_i915_fence_reg *reg = NULL;
+	int i, ret;
+
+	switch (obj_priv->tiling_mode) {
+	case I915_TILING_NONE:
+		WARN(1, "allocating a fence for non-tiled object?\n");
+		break;
+	case I915_TILING_X:
+		WARN(obj_priv->stride & (512 - 1),
+		     "object is X tiled but has non-512B pitch\n");
+		break;
+	case I915_TILING_Y:
+		WARN(obj_priv->stride & (128 - 1),
+		     "object is Y tiled but has non-128B pitch\n");
+		break;
+	}
+
+	/* First try to find a free reg */
+	for (i = dev_priv->fence_reg_start; i < dev_priv->num_fence_regs; i++) {
+		reg = &dev_priv->fence_regs[i];
+		if (!reg->obj)
+			break;
+	}
+
+	/* None available, try to steal one or wait for a user to finish */
+	if (i == dev_priv->num_fence_regs) {
+		struct drm_i915_gem_object *old_obj_priv = NULL;
+		loff_t offset;
+
+try_again:
+		/* Could try to use LRU here instead... */
+		for (i = dev_priv->fence_reg_start;
+		     i < dev_priv->num_fence_regs; i++) {
+			reg = &dev_priv->fence_regs[i];
+			old_obj_priv = reg->obj->driver_private;
+			if (!old_obj_priv->pin_count)
+				break;
+		}
+
+		/*
+		 * Now things get ugly... we have to wait for one of the
+		 * objects to finish before trying again.
+		 */
+		if (i == dev_priv->num_fence_regs) {
+			ret = i915_gem_object_wait_rendering(reg->obj);
+			if (ret) {
+				WARN(ret, "wait_rendering failed: %d\n", ret);
+				return;
+			}
+			goto try_again;
+		}
+
+		/*
+		 * Zap this virtual mapping so we can set up a fence again
+		 * for this object next time we need it.
+		 */
+		offset = ((loff_t) reg->obj->map_list.hash.key) << PAGE_SHIFT;
+		unmap_mapping_range(dev->dev_mapping, offset,
+				    reg->obj->size, 1);
+		old_obj_priv->fence_reg = I915_FENCE_REG_NONE;
+	}
+
+	obj_priv->fence_reg = i;
+	reg->obj = obj;
+
+	if (IS_I965G(dev))
+		i965_write_fence_reg(reg);
+	else if (IS_I9XX(dev))
+		i915_write_fence_reg(reg);
+	else
+		i830_write_fence_reg(reg);
+}
+
+/**
+ * i915_gem_clear_fence_reg - clear out fence register info
+ * @obj: object to clear
+ *
+ * Zeroes out the fence register itself and clears out the associated
+ * data structures in dev_priv and obj_priv.
+ */
+static void
+i915_gem_clear_fence_reg(struct drm_gem_object *obj)
+{
+	struct drm_device *dev = obj->dev;
+	struct drm_i915_private *dev_priv = dev->dev_private;
+	struct drm_i915_gem_object *obj_priv = obj->driver_private;
+
+	if (IS_I965G(dev))
+		I915_WRITE64(FENCE_REG_965_0 + (obj_priv->fence_reg * 8), 0);
+	else
+		I915_WRITE(FENCE_REG_830_0 + (obj_priv->fence_reg * 4), 0);
+
+	dev_priv->fence_regs[obj_priv->fence_reg].obj = NULL;
+	obj_priv->fence_reg = I915_FENCE_REG_NONE;
+}
+
 /**
  * Finds free space in the GTT aperture and binds the object there.
  */
@@ -2351,12 +2808,18 @@ int i915_gem_init_object(struct drm_gem_object *obj)
 
 	obj->driver_private = obj_priv;
 	obj_priv->obj = obj;
+	obj_priv->fence_reg = I915_FENCE_REG_NONE;
 	INIT_LIST_HEAD(&obj_priv->list);
+
 	return 0;
 }
 
 void i915_gem_free_object(struct drm_gem_object *obj)
 {
+	struct drm_device *dev = obj->dev;
+	struct drm_gem_mm *mm = dev->mm_private;
+	struct drm_map_list *list;
+	struct drm_map *map;
 	struct drm_i915_gem_object *obj_priv = obj->driver_private;
 
 	while (obj_priv->pin_count > 0)
@@ -2364,6 +2827,20 @@ void i915_gem_free_object(struct drm_gem_object *obj)
 
 	i915_gem_object_unbind(obj);
 
+	list = &obj->map_list;
+	drm_ht_remove_item(&mm->offset_hash, &list->hash);
+
+	if (list->file_offset_node) {
+		drm_mm_put_block(list->file_offset_node);
+		list->file_offset_node = NULL;
+	}
+
+	map = list->map;
+	if (map) {
+		drm_free(map, sizeof(*map), DRM_MEM_DRIVER);
+		list->map = NULL;
+	}
+
 	drm_free(obj_priv->page_cpu_valid, 1, DRM_MEM_DRIVER);
 	drm_free(obj->driver_private, 1, DRM_MEM_DRIVER);
 }
@@ -2432,8 +2909,7 @@ i915_gem_idle(struct drm_device *dev)
 	 */
 	i915_gem_flush(dev, ~(I915_GEM_DOMAIN_CPU|I915_GEM_DOMAIN_GTT),
 		       ~(I915_GEM_DOMAIN_CPU|I915_GEM_DOMAIN_GTT));
-	seqno = i915_add_request(dev, ~(I915_GEM_DOMAIN_CPU |
-					I915_GEM_DOMAIN_GTT));
+	seqno = i915_add_request(dev, ~I915_GEM_DOMAIN_CPU);
 
 	if (seqno == 0) {
 		mutex_unlock(&dev->struct_mutex);
@@ -2758,5 +3234,13 @@ i915_gem_load(struct drm_device *dev)
 			  i915_gem_retire_work_handler);
 	dev_priv->mm.next_gem_seqno = 1;
 
+	/* Old X drivers will take 0-2 for front, back, depth buffers */
+	dev_priv->fence_reg_start = 3;
+
+	if (IS_I965G(dev))
+		dev_priv->num_fence_regs = 16;
+	else
+		dev_priv->num_fence_regs = 8;
+
 	i915_gem_detect_bit_6_swizzle(dev);
 }
diff --git a/drivers/gpu/drm/i915/i915_gem_tiling.c b/drivers/gpu/drm/i915/i915_gem_tiling.c
index a8cb69469c64..241f39b7f460 100644
--- a/drivers/gpu/drm/i915/i915_gem_tiling.c
+++ b/drivers/gpu/drm/i915/i915_gem_tiling.c
@@ -208,6 +208,7 @@ i915_gem_set_tiling(struct drm_device *dev, void *data,
 		}
 	}
 	obj_priv->tiling_mode = args->tiling_mode;
+	obj_priv->stride = args->stride;
 
 	mutex_unlock(&dev->struct_mutex);
 
diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h
index 9d24aaeb8a45..47e6bafeb743 100644
--- a/drivers/gpu/drm/i915/i915_reg.h
+++ b/drivers/gpu/drm/i915/i915_reg.h
@@ -175,9 +175,26 @@
 #define   DISPLAY_PLANE_B           (1<<20)
 
 /*
- * Instruction and interrupt control regs
+ * Fence registers
  */
+#define FENCE_REG_830_0			0x2000
+#define   I830_FENCE_START_MASK		0x07f80000
+#define   I830_FENCE_TILING_Y_SHIFT	12
+#define   I830_FENCE_SIZE_BITS(size)	((get_order(size >> 19) - 1) << 8)
+#define   I830_FENCE_PITCH_SHIFT	4
+#define   I830_FENCE_REG_VALID		(1<<0)
+
+#define   I915_FENCE_START_MASK		0x0ff00000
+#define   I915_FENCE_SIZE_BITS(size)	((get_order(size >> 20) - 1) << 8)
 
+#define FENCE_REG_965_0			0x03000
+#define   I965_FENCE_PITCH_SHIFT	2
+#define   I965_FENCE_TILING_Y_SHIFT	1
+#define   I965_FENCE_REG_VALID		(1<<0)
+
+/*
+ * Instruction and interrupt control regs
+ */
 #define PRB0_TAIL	0x02030
 #define PRB0_HEAD	0x02034
 #define PRB0_START	0x02038
@@ -245,6 +262,7 @@
 #define   CM0_RC_OP_FLUSH_DISABLE (1<<0)
 #define GFX_FLSH_CNTL	0x02170 /* 915+ only */
 
+
 /*
  * Framebuffer compression (915+ only)
  */
diff --git a/include/drm/i915_drm.h b/include/drm/i915_drm.h
index 152b34da927c..3f663ecc3dbb 100644
--- a/include/drm/i915_drm.h
+++ b/include/drm/i915_drm.h
@@ -160,6 +160,7 @@ typedef struct _drm_i915_sarea {
 #define DRM_I915_GEM_SET_TILING	0x21
 #define DRM_I915_GEM_GET_TILING	0x22
 #define DRM_I915_GEM_GET_APERTURE 0x23
+#define DRM_I915_GEM_MMAP_GTT	0x24
 
 #define DRM_IOCTL_I915_INIT		DRM_IOW( DRM_COMMAND_BASE + DRM_I915_INIT, drm_i915_init_t)
 #define DRM_IOCTL_I915_FLUSH		DRM_IO ( DRM_COMMAND_BASE + DRM_I915_FLUSH)
@@ -187,6 +188,7 @@ typedef struct _drm_i915_sarea {
 #define DRM_IOCTL_I915_GEM_PREAD	DRM_IOW (DRM_COMMAND_BASE + DRM_I915_GEM_PREAD, struct drm_i915_gem_pread)
 #define DRM_IOCTL_I915_GEM_PWRITE	DRM_IOW (DRM_COMMAND_BASE + DRM_I915_GEM_PWRITE, struct drm_i915_gem_pwrite)
 #define DRM_IOCTL_I915_GEM_MMAP		DRM_IOWR(DRM_COMMAND_BASE + DRM_I915_GEM_MMAP, struct drm_i915_gem_mmap)
+#define DRM_IOCTL_I915_GEM_MMAP_GTT	DRM_IOWR(DRM_COMMAND_BASE + DRM_I915_GEM_MMAP_GTT, struct drm_i915_gem_mmap_gtt)
 #define DRM_IOCTL_I915_GEM_SET_DOMAIN	DRM_IOW (DRM_COMMAND_BASE + DRM_I915_GEM_SET_DOMAIN, struct drm_i915_gem_set_domain)
 #define DRM_IOCTL_I915_GEM_SW_FINISH	DRM_IOW (DRM_COMMAND_BASE + DRM_I915_GEM_SW_FINISH, struct drm_i915_gem_sw_finish)
 #define DRM_IOCTL_I915_GEM_SET_TILING	DRM_IOWR (DRM_COMMAND_BASE + DRM_I915_GEM_SET_TILING, struct drm_i915_gem_set_tiling)
@@ -382,6 +384,18 @@ struct drm_i915_gem_mmap {
 	uint64_t addr_ptr;
 };
 
+struct drm_i915_gem_mmap_gtt {
+	/** Handle for the object being mapped. */
+	uint32_t handle;
+	uint32_t pad;
+	/**
+	 * Fake offset to use for subsequent mmap call
+	 *
+	 * This is a fixed-size type for 32/64 compatibility.
+	 */
+	uint64_t offset;
+};
+
 struct drm_i915_gem_set_domain {
 	/** Handle for the object */
 	uint32_t handle;
-- 
cgit v1.2.3


From f453ba0460742ad027ae0c4c7d61e62817b3e7ef Mon Sep 17 00:00:00 2001
From: Dave Airlie <airlied@redhat.com>
Date: Fri, 7 Nov 2008 14:05:41 -0800
Subject: DRM: add mode setting support

Add mode setting support to the DRM layer.

This is a fairly big chunk of work that allows DRM drivers to provide
full output control and configuration capabilities to userspace.  It was
motivated by several factors:
  - the fb layer's APIs aren't suited for anything but simple
    configurations
  - coordination between the fb layer, DRM layer, and various userspace
    drivers is poor to non-existent (radeonfb excepted)
  - user level mode setting drivers makes displaying panic & oops
    messages more difficult
  - suspend/resume of graphics state is possible in many more
    configurations with kernel level support

This commit just adds the core DRM part of the mode setting APIs.
Driver specific commits using these new structure and APIs will follow.

Co-authors: Jesse Barnes <jbarnes@virtuousgeek.org>, Jakob Bornecrantz <jakob@tungstengraphics.com>
Contributors: Alan Hourihane <alanh@tungstengraphics.com>, Maarten Maathuis <madman2003@gmail.com>

Signed-off-by: Jesse Barnes <jbarnes@virtuousgeek.org>
Signed-off-by: Eric Anholt <eric@anholt.net>
Signed-off-by: Dave Airlie <airlied@redhat.com>
---
 drivers/gpu/drm/Makefile          |    3 +-
 drivers/gpu/drm/drm_crtc.c        | 2497 +++++++++++++++++++++++++++++++++++++
 drivers/gpu/drm/drm_crtc_helper.c |  822 ++++++++++++
 drivers/gpu/drm/drm_drv.c         |   34 +-
 drivers/gpu/drm/drm_edid.c        |  732 +++++++++++
 drivers/gpu/drm/drm_fops.c        |   22 +-
 drivers/gpu/drm/drm_irq.c         |   64 +-
 drivers/gpu/drm/drm_mm.c          |    1 +
 drivers/gpu/drm/drm_modes.c       |  576 +++++++++
 drivers/gpu/drm/drm_stub.c        |   30 +-
 drivers/gpu/drm/drm_sysfs.c       |  329 ++++-
 drivers/video/console/vgacon.c    |   17 +
 include/drm/Kbuild                |    2 +-
 include/drm/drm.h                 |   21 +
 include/drm/drmP.h                |   18 +
 include/drm/drm_crtc.h            |  737 +++++++++++
 include/drm/drm_crtc_helper.h     |  121 ++
 include/drm/drm_edid.h            |  202 +++
 include/drm/drm_mode.h            |  278 +++++
 include/linux/console.h           |    4 +
 20 files changed, 6469 insertions(+), 41 deletions(-)
 create mode 100644 drivers/gpu/drm/drm_crtc.c
 create mode 100644 drivers/gpu/drm/drm_crtc_helper.c
 create mode 100644 drivers/gpu/drm/drm_edid.c
 create mode 100644 drivers/gpu/drm/drm_modes.c
 create mode 100644 include/drm/drm_crtc.h
 create mode 100644 include/drm/drm_crtc_helper.h
 create mode 100644 include/drm/drm_edid.h
 create mode 100644 include/drm/drm_mode.h

(limited to 'include')

diff --git a/drivers/gpu/drm/Makefile b/drivers/gpu/drm/Makefile
index 74da99495e21..30022c4a5c12 100644
--- a/drivers/gpu/drm/Makefile
+++ b/drivers/gpu/drm/Makefile
@@ -9,7 +9,8 @@ drm-y       :=	drm_auth.o drm_bufs.o drm_cache.o \
 		drm_drv.o drm_fops.o drm_gem.o drm_ioctl.o drm_irq.o \
 		drm_lock.o drm_memory.o drm_proc.o drm_stub.o drm_vm.o \
 		drm_agpsupport.o drm_scatter.o ati_pcigart.o drm_pci.o \
-		drm_sysfs.o drm_hashtab.o drm_sman.o drm_mm.o
+		drm_sysfs.o drm_hashtab.o drm_sman.o drm_mm.o \
+		drm_crtc.o drm_crtc_helper.o drm_modes.o drm_edid.o
 
 drm-$(CONFIG_COMPAT) += drm_ioc32.o
 
diff --git a/drivers/gpu/drm/drm_crtc.c b/drivers/gpu/drm/drm_crtc.c
new file mode 100644
index 000000000000..2e880240477e
--- /dev/null
+++ b/drivers/gpu/drm/drm_crtc.c
@@ -0,0 +1,2497 @@
+/*
+ * Copyright (c) 2006-2008 Intel Corporation
+ * Copyright (c) 2007 Dave Airlie <airlied@linux.ie>
+ * Copyright (c) 2008 Red Hat Inc.
+ *
+ * DRM core CRTC related functions
+ *
+ * Permission to use, copy, modify, distribute, and sell this software and its
+ * documentation for any purpose is hereby granted without fee, provided that
+ * the above copyright notice appear in all copies and that both that copyright
+ * notice and this permission notice appear in supporting documentation, and
+ * that the name of the copyright holders not be used in advertising or
+ * publicity pertaining to distribution of the software without specific,
+ * written prior permission.  The copyright holders make no representations
+ * about the suitability of this software for any purpose.  It is provided "as
+ * is" without express or implied warranty.
+ *
+ * THE COPYRIGHT HOLDERS DISCLAIM ALL WARRANTIES WITH REGARD TO THIS SOFTWARE,
+ * INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO
+ * EVENT SHALL THE COPYRIGHT HOLDERS BE LIABLE FOR ANY SPECIAL, INDIRECT OR
+ * CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE,
+ * DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER
+ * TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE
+ * OF THIS SOFTWARE.
+ *
+ * Authors:
+ *      Keith Packard
+ *	Eric Anholt <eric@anholt.net>
+ *      Dave Airlie <airlied@linux.ie>
+ *      Jesse Barnes <jesse.barnes@intel.com>
+ */
+#include <linux/list.h>
+#include "drm.h"
+#include "drmP.h"
+#include "drm_crtc.h"
+
+struct drm_prop_enum_list {
+	int type;
+	char *name;
+};
+
+/* Avoid boilerplate.  I'm tired of typing. */
+#define DRM_ENUM_NAME_FN(fnname, list)				\
+	char *fnname(int val)					\
+	{							\
+		int i;						\
+		for (i = 0; i < ARRAY_SIZE(list); i++) {	\
+			if (list[i].type == val)		\
+				return list[i].name;		\
+		}						\
+		return "(unknown)";				\
+	}
+
+/*
+ * Global properties
+ */
+static struct drm_prop_enum_list drm_dpms_enum_list[] =
+{	{ DRM_MODE_DPMS_ON, "On" },
+	{ DRM_MODE_DPMS_STANDBY, "Standby" },
+	{ DRM_MODE_DPMS_SUSPEND, "Suspend" },
+	{ DRM_MODE_DPMS_OFF, "Off" }
+};
+
+DRM_ENUM_NAME_FN(drm_get_dpms_name, drm_dpms_enum_list)
+
+/*
+ * Optional properties
+ */
+static struct drm_prop_enum_list drm_scaling_mode_enum_list[] =
+{
+	{ DRM_MODE_SCALE_NON_GPU, "Non-GPU" },
+	{ DRM_MODE_SCALE_FULLSCREEN, "Fullscreen" },
+	{ DRM_MODE_SCALE_NO_SCALE, "No scale" },
+	{ DRM_MODE_SCALE_ASPECT, "Aspect" },
+};
+
+static struct drm_prop_enum_list drm_dithering_mode_enum_list[] =
+{
+	{ DRM_MODE_DITHERING_OFF, "Off" },
+	{ DRM_MODE_DITHERING_ON, "On" },
+};
+
+/*
+ * Non-global properties, but "required" for certain connectors.
+ */
+static struct drm_prop_enum_list drm_dvi_i_select_enum_list[] =
+{
+	{ DRM_MODE_SUBCONNECTOR_Automatic, "Automatic" }, /* DVI-I and TV-out */
+	{ DRM_MODE_SUBCONNECTOR_DVID,      "DVI-D"     }, /* DVI-I  */
+	{ DRM_MODE_SUBCONNECTOR_DVIA,      "DVI-A"     }, /* DVI-I  */
+};
+
+DRM_ENUM_NAME_FN(drm_get_dvi_i_select_name, drm_dvi_i_select_enum_list)
+
+static struct drm_prop_enum_list drm_dvi_i_subconnector_enum_list[] =
+{
+	{ DRM_MODE_SUBCONNECTOR_Unknown,   "Unknown"   }, /* DVI-I and TV-out */
+	{ DRM_MODE_SUBCONNECTOR_DVID,      "DVI-D"     }, /* DVI-I  */
+	{ DRM_MODE_SUBCONNECTOR_DVIA,      "DVI-A"     }, /* DVI-I  */
+};
+
+DRM_ENUM_NAME_FN(drm_get_dvi_i_subconnector_name,
+		 drm_dvi_i_subconnector_enum_list)
+
+static struct drm_prop_enum_list drm_tv_select_enum_list[] =
+{
+	{ DRM_MODE_SUBCONNECTOR_Automatic, "Automatic" }, /* DVI-I and TV-out */
+	{ DRM_MODE_SUBCONNECTOR_Composite, "Composite" }, /* TV-out */
+	{ DRM_MODE_SUBCONNECTOR_SVIDEO,    "SVIDEO"    }, /* TV-out */
+	{ DRM_MODE_SUBCONNECTOR_Component, "Component" }, /* TV-out */
+};
+
+DRM_ENUM_NAME_FN(drm_get_tv_select_name, drm_tv_select_enum_list)
+
+static struct drm_prop_enum_list drm_tv_subconnector_enum_list[] =
+{
+	{ DRM_MODE_SUBCONNECTOR_Unknown,   "Unknown"   }, /* DVI-I and TV-out */
+	{ DRM_MODE_SUBCONNECTOR_Composite, "Composite" }, /* TV-out */
+	{ DRM_MODE_SUBCONNECTOR_SVIDEO,    "SVIDEO"    }, /* TV-out */
+	{ DRM_MODE_SUBCONNECTOR_Component, "Component" }, /* TV-out */
+};
+
+DRM_ENUM_NAME_FN(drm_get_tv_subconnector_name,
+		 drm_tv_subconnector_enum_list)
+
+struct drm_conn_prop_enum_list {
+	int type;
+	char *name;
+	int count;
+};
+
+/*
+ * Connector and encoder types.
+ */
+static struct drm_conn_prop_enum_list drm_connector_enum_list[] =
+{	{ DRM_MODE_CONNECTOR_Unknown, "Unknown", 0 },
+	{ DRM_MODE_CONNECTOR_VGA, "VGA", 0 },
+	{ DRM_MODE_CONNECTOR_DVII, "DVI-I", 0 },
+	{ DRM_MODE_CONNECTOR_DVID, "DVI-D", 0 },
+	{ DRM_MODE_CONNECTOR_DVIA, "DVI-A", 0 },
+	{ DRM_MODE_CONNECTOR_Composite, "Composite", 0 },
+	{ DRM_MODE_CONNECTOR_SVIDEO, "SVIDEO", 0 },
+	{ DRM_MODE_CONNECTOR_LVDS, "LVDS", 0 },
+	{ DRM_MODE_CONNECTOR_Component, "Component", 0 },
+	{ DRM_MODE_CONNECTOR_9PinDIN, "9-pin DIN", 0 },
+	{ DRM_MODE_CONNECTOR_DisplayPort, "DisplayPort", 0 },
+	{ DRM_MODE_CONNECTOR_HDMIA, "HDMI Type A", 0 },
+	{ DRM_MODE_CONNECTOR_HDMIB, "HDMI Type B", 0 },
+};
+
+static struct drm_prop_enum_list drm_encoder_enum_list[] =
+{	{ DRM_MODE_ENCODER_NONE, "None" },
+	{ DRM_MODE_ENCODER_DAC, "DAC" },
+	{ DRM_MODE_ENCODER_TMDS, "TMDS" },
+	{ DRM_MODE_ENCODER_LVDS, "LVDS" },
+	{ DRM_MODE_ENCODER_TVDAC, "TV" },
+};
+
+char *drm_get_encoder_name(struct drm_encoder *encoder)
+{
+	static char buf[32];
+
+	snprintf(buf, 32, "%s-%d",
+		 drm_encoder_enum_list[encoder->encoder_type].name,
+		 encoder->base.id);
+	return buf;
+}
+
+char *drm_get_connector_name(struct drm_connector *connector)
+{
+	static char buf[32];
+
+	snprintf(buf, 32, "%s-%d",
+		 drm_connector_enum_list[connector->connector_type].name,
+		 connector->connector_type_id);
+	return buf;
+}
+EXPORT_SYMBOL(drm_get_connector_name);
+
+char *drm_get_connector_status_name(enum drm_connector_status status)
+{
+	if (status == connector_status_connected)
+		return "connected";
+	else if (status == connector_status_disconnected)
+		return "disconnected";
+	else
+		return "unknown";
+}
+
+/**
+ * drm_mode_object_get - allocate a new identifier
+ * @dev: DRM device
+ * @ptr: object pointer, used to generate unique ID
+ * @type: object type
+ *
+ * LOCKING:
+ * Caller must hold DRM mode_config lock.
+ *
+ * Create a unique identifier based on @ptr in @dev's identifier space.  Used
+ * for tracking modes, CRTCs and connectors.
+ *
+ * RETURNS:
+ * New unique (relative to other objects in @dev) integer identifier for the
+ * object.
+ */
+static int drm_mode_object_get(struct drm_device *dev,
+			       struct drm_mode_object *obj, uint32_t obj_type)
+{
+	int new_id = 0;
+	int ret;
+
+	WARN(!mutex_is_locked(&dev->mode_config.mutex),
+	     "%s called w/o mode_config lock\n", __FUNCTION__);
+again:
+	if (idr_pre_get(&dev->mode_config.crtc_idr, GFP_KERNEL) == 0) {
+		DRM_ERROR("Ran out memory getting a mode number\n");
+		return -EINVAL;
+	}
+
+	ret = idr_get_new_above(&dev->mode_config.crtc_idr, obj, 1, &new_id);
+	if (ret == -EAGAIN)
+		goto again;
+
+	obj->id = new_id;
+	obj->type = obj_type;
+	return 0;
+}
+
+/**
+ * drm_mode_object_put - free an identifer
+ * @dev: DRM device
+ * @id: ID to free
+ *
+ * LOCKING:
+ * Caller must hold DRM mode_config lock.
+ *
+ * Free @id from @dev's unique identifier pool.
+ */
+static void drm_mode_object_put(struct drm_device *dev,
+				struct drm_mode_object *object)
+{
+	idr_remove(&dev->mode_config.crtc_idr, object->id);
+}
+
+void *drm_mode_object_find(struct drm_device *dev, uint32_t id, uint32_t type)
+{
+	struct drm_mode_object *obj;
+
+	obj = idr_find(&dev->mode_config.crtc_idr, id);
+	if (!obj || (obj->type != type) || (obj->id != id))
+		return NULL;
+
+	return obj;
+}
+EXPORT_SYMBOL(drm_mode_object_find);
+
+/**
+ * drm_crtc_from_fb - find the CRTC structure associated with an fb
+ * @dev: DRM device
+ * @fb: framebuffer in question
+ *
+ * LOCKING:
+ * Caller must hold mode_config lock.
+ *
+ * Find CRTC in the mode_config structure that matches @fb.
+ *
+ * RETURNS:
+ * Pointer to the CRTC or NULL if it wasn't found.
+ */
+struct drm_crtc *drm_crtc_from_fb(struct drm_device *dev,
+				  struct drm_framebuffer *fb)
+{
+	struct drm_crtc *crtc;
+
+	list_for_each_entry(crtc, &dev->mode_config.crtc_list, head) {
+		if (crtc->fb == fb)
+			return crtc;
+	}
+	return NULL;
+}
+
+/**
+ * drm_framebuffer_init - initialize a framebuffer
+ * @dev: DRM device
+ *
+ * LOCKING:
+ * Caller must hold mode config lock.
+ *
+ * Allocates an ID for the framebuffer's parent mode object, sets its mode
+ * functions & device file and adds it to the master fd list.
+ *
+ * RETURNS:
+ * Zero on success, error code on falure.
+ */
+int drm_framebuffer_init(struct drm_device *dev, struct drm_framebuffer *fb,
+			 const struct drm_framebuffer_funcs *funcs)
+{
+	int ret;
+
+	ret = drm_mode_object_get(dev, &fb->base, DRM_MODE_OBJECT_FB);
+	if (ret) {
+		return ret;
+	}
+
+	fb->dev = dev;
+	fb->funcs = funcs;
+	dev->mode_config.num_fb++;
+	list_add(&fb->head, &dev->mode_config.fb_list);
+
+	return 0;
+}
+EXPORT_SYMBOL(drm_framebuffer_init);
+
+/**
+ * drm_framebuffer_cleanup - remove a framebuffer object
+ * @fb: framebuffer to remove
+ *
+ * LOCKING:
+ * Caller must hold mode config lock.
+ *
+ * Scans all the CRTCs in @dev's mode_config.  If they're using @fb, removes
+ * it, setting it to NULL.
+ */
+void drm_framebuffer_cleanup(struct drm_framebuffer *fb)
+{
+	struct drm_device *dev = fb->dev;
+	struct drm_crtc *crtc;
+
+	/* remove from any CRTC */
+	list_for_each_entry(crtc, &dev->mode_config.crtc_list, head) {
+		if (crtc->fb == fb)
+			crtc->fb = NULL;
+	}
+
+	drm_mode_object_put(dev, &fb->base);
+	list_del(&fb->head);
+	dev->mode_config.num_fb--;
+}
+EXPORT_SYMBOL(drm_framebuffer_cleanup);
+
+/**
+ * drm_crtc_init - Initialise a new CRTC object
+ * @dev: DRM device
+ * @crtc: CRTC object to init
+ * @funcs: callbacks for the new CRTC
+ *
+ * LOCKING:
+ * Caller must hold mode config lock.
+ *
+ * Inits a new object created as base part of an driver crtc object.
+ */
+void drm_crtc_init(struct drm_device *dev, struct drm_crtc *crtc,
+		   const struct drm_crtc_funcs *funcs)
+{
+	crtc->dev = dev;
+	crtc->funcs = funcs;
+
+	mutex_lock(&dev->mode_config.mutex);
+	drm_mode_object_get(dev, &crtc->base, DRM_MODE_OBJECT_CRTC);
+
+	list_add_tail(&crtc->head, &dev->mode_config.crtc_list);
+	dev->mode_config.num_crtc++;
+	mutex_unlock(&dev->mode_config.mutex);
+}
+EXPORT_SYMBOL(drm_crtc_init);
+
+/**
+ * drm_crtc_cleanup - Cleans up the core crtc usage.
+ * @crtc: CRTC to cleanup
+ *
+ * LOCKING:
+ * Caller must hold mode config lock.
+ *
+ * Cleanup @crtc. Removes from drm modesetting space
+ * does NOT free object, caller does that.
+ */
+void drm_crtc_cleanup(struct drm_crtc *crtc)
+{
+	struct drm_device *dev = crtc->dev;
+
+	if (crtc->gamma_store) {
+		kfree(crtc->gamma_store);
+		crtc->gamma_store = NULL;
+	}
+
+	drm_mode_object_put(dev, &crtc->base);
+	list_del(&crtc->head);
+	dev->mode_config.num_crtc--;
+}
+EXPORT_SYMBOL(drm_crtc_cleanup);
+
+/**
+ * drm_mode_probed_add - add a mode to a connector's probed mode list
+ * @connector: connector the new mode
+ * @mode: mode data
+ *
+ * LOCKING:
+ * Caller must hold mode config lock.
+ *
+ * Add @mode to @connector's mode list for later use.
+ */
+void drm_mode_probed_add(struct drm_connector *connector,
+			 struct drm_display_mode *mode)
+{
+	list_add(&mode->head, &connector->probed_modes);
+}
+EXPORT_SYMBOL(drm_mode_probed_add);
+
+/**
+ * drm_mode_remove - remove and free a mode
+ * @connector: connector list to modify
+ * @mode: mode to remove
+ *
+ * LOCKING:
+ * Caller must hold mode config lock.
+ *
+ * Remove @mode from @connector's mode list, then free it.
+ */
+void drm_mode_remove(struct drm_connector *connector,
+		     struct drm_display_mode *mode)
+{
+	list_del(&mode->head);
+	kfree(mode);
+}
+EXPORT_SYMBOL(drm_mode_remove);
+
+/**
+ * drm_connector_init - Init a preallocated connector
+ * @dev: DRM device
+ * @connector: the connector to init
+ * @funcs: callbacks for this connector
+ * @name: user visible name of the connector
+ *
+ * LOCKING:
+ * Caller must hold @dev's mode_config lock.
+ *
+ * Initialises a preallocated connector. Connectors should be
+ * subclassed as part of driver connector objects.
+ */
+void drm_connector_init(struct drm_device *dev,
+		     struct drm_connector *connector,
+		     const struct drm_connector_funcs *funcs,
+		     int connector_type)
+{
+	mutex_lock(&dev->mode_config.mutex);
+
+	connector->dev = dev;
+	connector->funcs = funcs;
+	drm_mode_object_get(dev, &connector->base, DRM_MODE_OBJECT_CONNECTOR);
+	connector->connector_type = connector_type;
+	connector->connector_type_id =
+		++drm_connector_enum_list[connector_type].count; /* TODO */
+	INIT_LIST_HEAD(&connector->user_modes);
+	INIT_LIST_HEAD(&connector->probed_modes);
+	INIT_LIST_HEAD(&connector->modes);
+	connector->edid_blob_ptr = NULL;
+
+	list_add_tail(&connector->head, &dev->mode_config.connector_list);
+	dev->mode_config.num_connector++;
+
+	drm_connector_attach_property(connector,
+				      dev->mode_config.edid_property, 0);
+
+	drm_connector_attach_property(connector,
+				      dev->mode_config.dpms_property, 0);
+
+	mutex_unlock(&dev->mode_config.mutex);
+}
+EXPORT_SYMBOL(drm_connector_init);
+
+/**
+ * drm_connector_cleanup - cleans up an initialised connector
+ * @connector: connector to cleanup
+ *
+ * LOCKING:
+ * Caller must hold @dev's mode_config lock.
+ *
+ * Cleans up the connector but doesn't free the object.
+ */
+void drm_connector_cleanup(struct drm_connector *connector)
+{
+	struct drm_device *dev = connector->dev;
+	struct drm_display_mode *mode, *t;
+
+	list_for_each_entry_safe(mode, t, &connector->probed_modes, head)
+		drm_mode_remove(connector, mode);
+
+	list_for_each_entry_safe(mode, t, &connector->modes, head)
+		drm_mode_remove(connector, mode);
+
+	list_for_each_entry_safe(mode, t, &connector->user_modes, head)
+		drm_mode_remove(connector, mode);
+
+	mutex_lock(&dev->mode_config.mutex);
+	drm_mode_object_put(dev, &connector->base);
+	list_del(&connector->head);
+	mutex_unlock(&dev->mode_config.mutex);
+}
+EXPORT_SYMBOL(drm_connector_cleanup);
+
+void drm_encoder_init(struct drm_device *dev,
+		      struct drm_encoder *encoder,
+		      const struct drm_encoder_funcs *funcs,
+		      int encoder_type)
+{
+	mutex_lock(&dev->mode_config.mutex);
+
+	encoder->dev = dev;
+
+	drm_mode_object_get(dev, &encoder->base, DRM_MODE_OBJECT_ENCODER);
+	encoder->encoder_type = encoder_type;
+	encoder->funcs = funcs;
+
+	list_add_tail(&encoder->head, &dev->mode_config.encoder_list);
+	dev->mode_config.num_encoder++;
+
+	mutex_unlock(&dev->mode_config.mutex);
+}
+EXPORT_SYMBOL(drm_encoder_init);
+
+void drm_encoder_cleanup(struct drm_encoder *encoder)
+{
+	struct drm_device *dev = encoder->dev;
+	mutex_lock(&dev->mode_config.mutex);
+	drm_mode_object_put(dev, &encoder->base);
+	list_del(&encoder->head);
+	mutex_unlock(&dev->mode_config.mutex);
+}
+EXPORT_SYMBOL(drm_encoder_cleanup);
+
+/**
+ * drm_mode_create - create a new display mode
+ * @dev: DRM device
+ *
+ * LOCKING:
+ * Caller must hold DRM mode_config lock.
+ *
+ * Create a new drm_display_mode, give it an ID, and return it.
+ *
+ * RETURNS:
+ * Pointer to new mode on success, NULL on error.
+ */
+struct drm_display_mode *drm_mode_create(struct drm_device *dev)
+{
+	struct drm_display_mode *nmode;
+
+	nmode = kzalloc(sizeof(struct drm_display_mode), GFP_KERNEL);
+	if (!nmode)
+		return NULL;
+
+	drm_mode_object_get(dev, &nmode->base, DRM_MODE_OBJECT_MODE);
+	return nmode;
+}
+EXPORT_SYMBOL(drm_mode_create);
+
+/**
+ * drm_mode_destroy - remove a mode
+ * @dev: DRM device
+ * @mode: mode to remove
+ *
+ * LOCKING:
+ * Caller must hold mode config lock.
+ *
+ * Free @mode's unique identifier, then free it.
+ */
+void drm_mode_destroy(struct drm_device *dev, struct drm_display_mode *mode)
+{
+	drm_mode_object_put(dev, &mode->base);
+
+	kfree(mode);
+}
+EXPORT_SYMBOL(drm_mode_destroy);
+
+static int drm_mode_create_standard_connector_properties(struct drm_device *dev)
+{
+	struct drm_property *edid;
+	struct drm_property *dpms;
+	int i;
+
+	/*
+	 * Standard properties (apply to all connectors)
+	 */
+	edid = drm_property_create(dev, DRM_MODE_PROP_BLOB |
+				   DRM_MODE_PROP_IMMUTABLE,
+				   "EDID", 0);
+	dev->mode_config.edid_property = edid;
+
+	dpms = drm_property_create(dev, DRM_MODE_PROP_ENUM,
+				   "DPMS", ARRAY_SIZE(drm_dpms_enum_list));
+	for (i = 0; i < ARRAY_SIZE(drm_dpms_enum_list); i++)
+		drm_property_add_enum(dpms, i, drm_dpms_enum_list[i].type,
+				      drm_dpms_enum_list[i].name);
+	dev->mode_config.dpms_property = dpms;
+
+	return 0;
+}
+
+/**
+ * drm_mode_create_dvi_i_properties - create DVI-I specific connector properties
+ * @dev: DRM device
+ *
+ * Called by a driver the first time a DVI-I connector is made.
+ */
+int drm_mode_create_dvi_i_properties(struct drm_device *dev)
+{
+	struct drm_property *dvi_i_selector;
+	struct drm_property *dvi_i_subconnector;
+	int i;
+
+	if (dev->mode_config.dvi_i_select_subconnector_property)
+		return 0;
+
+	dvi_i_selector =
+		drm_property_create(dev, DRM_MODE_PROP_ENUM,
+				    "select subconnector",
+				    ARRAY_SIZE(drm_dvi_i_select_enum_list));
+	for (i = 0; i < ARRAY_SIZE(drm_dvi_i_select_enum_list); i++)
+		drm_property_add_enum(dvi_i_selector, i,
+				      drm_dvi_i_select_enum_list[i].type,
+				      drm_dvi_i_select_enum_list[i].name);
+	dev->mode_config.dvi_i_select_subconnector_property = dvi_i_selector;
+
+	dvi_i_subconnector =
+		drm_property_create(dev, DRM_MODE_PROP_ENUM |
+				    DRM_MODE_PROP_IMMUTABLE,
+				    "subconnector",
+				    ARRAY_SIZE(drm_dvi_i_subconnector_enum_list));
+	for (i = 0; i < ARRAY_SIZE(drm_dvi_i_subconnector_enum_list); i++)
+		drm_property_add_enum(dvi_i_subconnector, i,
+				      drm_dvi_i_subconnector_enum_list[i].type,
+				      drm_dvi_i_subconnector_enum_list[i].name);
+	dev->mode_config.dvi_i_subconnector_property = dvi_i_subconnector;
+
+	return 0;
+}
+EXPORT_SYMBOL(drm_mode_create_dvi_i_properties);
+
+/**
+ * drm_create_tv_properties - create TV specific connector properties
+ * @dev: DRM device
+ * @num_modes: number of different TV formats (modes) supported
+ * @modes: array of pointers to strings containing name of each format
+ *
+ * Called by a driver's TV initialization routine, this function creates
+ * the TV specific connector properties for a given device.  Caller is
+ * responsible for allocating a list of format names and passing them to
+ * this routine.
+ */
+int drm_mode_create_tv_properties(struct drm_device *dev, int num_modes,
+				  char *modes[])
+{
+	struct drm_property *tv_selector;
+	struct drm_property *tv_subconnector;
+	int i;
+
+	if (dev->mode_config.tv_select_subconnector_property)
+		return 0;
+
+	/*
+	 * Basic connector properties
+	 */
+	tv_selector = drm_property_create(dev, DRM_MODE_PROP_ENUM,
+					  "select subconnector",
+					  ARRAY_SIZE(drm_tv_select_enum_list));
+	for (i = 0; i < ARRAY_SIZE(drm_tv_select_enum_list); i++)
+		drm_property_add_enum(tv_selector, i,
+				      drm_tv_select_enum_list[i].type,
+				      drm_tv_select_enum_list[i].name);
+	dev->mode_config.tv_select_subconnector_property = tv_selector;
+
+	tv_subconnector =
+		drm_property_create(dev, DRM_MODE_PROP_ENUM |
+				    DRM_MODE_PROP_IMMUTABLE, "subconnector",
+				    ARRAY_SIZE(drm_tv_subconnector_enum_list));
+	for (i = 0; i < ARRAY_SIZE(drm_tv_subconnector_enum_list); i++)
+		drm_property_add_enum(tv_subconnector, i,
+				      drm_tv_subconnector_enum_list[i].type,
+				      drm_tv_subconnector_enum_list[i].name);
+	dev->mode_config.tv_subconnector_property = tv_subconnector;
+
+	/*
+	 * Other, TV specific properties: margins & TV modes.
+	 */
+	dev->mode_config.tv_left_margin_property =
+		drm_property_create(dev, DRM_MODE_PROP_RANGE,
+				    "left margin", 2);
+	dev->mode_config.tv_left_margin_property->values[0] = 0;
+	dev->mode_config.tv_left_margin_property->values[1] = 100;
+
+	dev->mode_config.tv_right_margin_property =
+		drm_property_create(dev, DRM_MODE_PROP_RANGE,
+				    "right margin", 2);
+	dev->mode_config.tv_right_margin_property->values[0] = 0;
+	dev->mode_config.tv_right_margin_property->values[1] = 100;
+
+	dev->mode_config.tv_top_margin_property =
+		drm_property_create(dev, DRM_MODE_PROP_RANGE,
+				    "top margin", 2);
+	dev->mode_config.tv_top_margin_property->values[0] = 0;
+	dev->mode_config.tv_top_margin_property->values[1] = 100;
+
+	dev->mode_config.tv_bottom_margin_property =
+		drm_property_create(dev, DRM_MODE_PROP_RANGE,
+				    "bottom margin", 2);
+	dev->mode_config.tv_bottom_margin_property->values[0] = 0;
+	dev->mode_config.tv_bottom_margin_property->values[1] = 100;
+
+	dev->mode_config.tv_mode_property =
+		drm_property_create(dev, DRM_MODE_PROP_ENUM,
+				    "mode", num_modes);
+	for (i = 0; i < num_modes; i++)
+		drm_property_add_enum(dev->mode_config.tv_mode_property, i,
+				      i, modes[i]);
+
+	return 0;
+}
+EXPORT_SYMBOL(drm_mode_create_tv_properties);
+
+/**
+ * drm_mode_create_scaling_mode_property - create scaling mode property
+ * @dev: DRM device
+ *
+ * Called by a driver the first time it's needed, must be attached to desired
+ * connectors.
+ */
+int drm_mode_create_scaling_mode_property(struct drm_device *dev)
+{
+	struct drm_property *scaling_mode;
+	int i;
+
+	if (dev->mode_config.scaling_mode_property)
+		return 0;
+
+	scaling_mode =
+		drm_property_create(dev, DRM_MODE_PROP_ENUM, "scaling mode",
+				    ARRAY_SIZE(drm_scaling_mode_enum_list));
+	for (i = 0; i < ARRAY_SIZE(drm_scaling_mode_enum_list); i++)
+		drm_property_add_enum(scaling_mode, i,
+				      drm_scaling_mode_enum_list[i].type,
+				      drm_scaling_mode_enum_list[i].name);
+
+	dev->mode_config.scaling_mode_property = scaling_mode;
+
+	return 0;
+}
+EXPORT_SYMBOL(drm_mode_create_scaling_mode_property);
+
+/**
+ * drm_mode_create_dithering_property - create dithering property
+ * @dev: DRM device
+ *
+ * Called by a driver the first time it's needed, must be attached to desired
+ * connectors.
+ */
+int drm_mode_create_dithering_property(struct drm_device *dev)
+{
+	struct drm_property *dithering_mode;
+	int i;
+
+	if (dev->mode_config.dithering_mode_property)
+		return 0;
+
+	dithering_mode =
+		drm_property_create(dev, DRM_MODE_PROP_ENUM, "dithering",
+				    ARRAY_SIZE(drm_dithering_mode_enum_list));
+	for (i = 0; i < ARRAY_SIZE(drm_dithering_mode_enum_list); i++)
+		drm_property_add_enum(dithering_mode, i,
+				      drm_dithering_mode_enum_list[i].type,
+				      drm_dithering_mode_enum_list[i].name);
+	dev->mode_config.dithering_mode_property = dithering_mode;
+
+	return 0;
+}
+EXPORT_SYMBOL(drm_mode_create_dithering_property);
+
+/**
+ * drm_mode_config_init - initialize DRM mode_configuration structure
+ * @dev: DRM device
+ *
+ * LOCKING:
+ * None, should happen single threaded at init time.
+ *
+ * Initialize @dev's mode_config structure, used for tracking the graphics
+ * configuration of @dev.
+ */
+void drm_mode_config_init(struct drm_device *dev)
+{
+	mutex_init(&dev->mode_config.mutex);
+	INIT_LIST_HEAD(&dev->mode_config.fb_list);
+	INIT_LIST_HEAD(&dev->mode_config.fb_kernel_list);
+	INIT_LIST_HEAD(&dev->mode_config.crtc_list);
+	INIT_LIST_HEAD(&dev->mode_config.connector_list);
+	INIT_LIST_HEAD(&dev->mode_config.encoder_list);
+	INIT_LIST_HEAD(&dev->mode_config.property_list);
+	INIT_LIST_HEAD(&dev->mode_config.property_blob_list);
+	idr_init(&dev->mode_config.crtc_idr);
+
+	mutex_lock(&dev->mode_config.mutex);
+	drm_mode_create_standard_connector_properties(dev);
+	mutex_unlock(&dev->mode_config.mutex);
+
+	/* Just to be sure */
+	dev->mode_config.num_fb = 0;
+	dev->mode_config.num_connector = 0;
+	dev->mode_config.num_crtc = 0;
+	dev->mode_config.num_encoder = 0;
+	dev->mode_config.hotplug_counter = 0;
+}
+EXPORT_SYMBOL(drm_mode_config_init);
+
+int drm_mode_group_init(struct drm_device *dev, struct drm_mode_group *group)
+{
+	uint32_t total_objects = 0;
+
+	total_objects += dev->mode_config.num_crtc;
+	total_objects += dev->mode_config.num_connector;
+	total_objects += dev->mode_config.num_encoder;
+
+	if (total_objects == 0)
+		return -EINVAL;
+
+	group->id_list = kzalloc(total_objects * sizeof(uint32_t), GFP_KERNEL);
+	if (!group->id_list)
+		return -ENOMEM;
+
+	group->num_crtcs = 0;
+	group->num_connectors = 0;
+	group->num_encoders = 0;
+	return 0;
+}
+
+int drm_mode_group_init_legacy_group(struct drm_device *dev,
+				     struct drm_mode_group *group)
+{
+	struct drm_crtc *crtc;
+	struct drm_encoder *encoder;
+	struct drm_connector *connector;
+	int ret;
+
+	if ((ret = drm_mode_group_init(dev, group)))
+		return ret;
+
+	list_for_each_entry(crtc, &dev->mode_config.crtc_list, head)
+		group->id_list[group->num_crtcs++] = crtc->base.id;
+
+	list_for_each_entry(encoder, &dev->mode_config.encoder_list, head)
+		group->id_list[group->num_crtcs + group->num_encoders++] =
+		encoder->base.id;
+
+	list_for_each_entry(connector, &dev->mode_config.connector_list, head)
+		group->id_list[group->num_crtcs + group->num_encoders +
+			       group->num_connectors++] = connector->base.id;
+
+	return 0;
+}
+
+/**
+ * drm_mode_config_cleanup - free up DRM mode_config info
+ * @dev: DRM device
+ *
+ * LOCKING:
+ * Caller must hold mode config lock.
+ *
+ * Free up all the connectors and CRTCs associated with this DRM device, then
+ * free up the framebuffers and associated buffer objects.
+ *
+ * FIXME: cleanup any dangling user buffer objects too
+ */
+void drm_mode_config_cleanup(struct drm_device *dev)
+{
+	struct drm_connector *connector, *ot;
+	struct drm_crtc *crtc, *ct;
+	struct drm_encoder *encoder, *enct;
+	struct drm_framebuffer *fb, *fbt;
+	struct drm_property *property, *pt;
+
+	list_for_each_entry_safe(encoder, enct, &dev->mode_config.encoder_list,
+				 head) {
+		encoder->funcs->destroy(encoder);
+	}
+
+	list_for_each_entry_safe(connector, ot,
+				 &dev->mode_config.connector_list, head) {
+		connector->funcs->destroy(connector);
+	}
+
+	list_for_each_entry_safe(property, pt, &dev->mode_config.property_list,
+				 head) {
+		drm_property_destroy(dev, property);
+	}
+
+	list_for_each_entry_safe(fb, fbt, &dev->mode_config.fb_list, head) {
+		fb->funcs->destroy(fb);
+	}
+
+	list_for_each_entry_safe(crtc, ct, &dev->mode_config.crtc_list, head) {
+		crtc->funcs->destroy(crtc);
+	}
+
+}
+EXPORT_SYMBOL(drm_mode_config_cleanup);
+
+int drm_mode_hotplug_ioctl(struct drm_device *dev,
+			   void *data, struct drm_file *file_priv)
+{
+	struct drm_mode_hotplug *arg = data;
+
+	arg->counter = dev->mode_config.hotplug_counter;
+
+	return 0;
+}
+
+/**
+ * drm_crtc_convert_to_umode - convert a drm_display_mode into a modeinfo
+ * @out: drm_mode_modeinfo struct to return to the user
+ * @in: drm_display_mode to use
+ *
+ * LOCKING:
+ * None.
+ *
+ * Convert a drm_display_mode into a drm_mode_modeinfo structure to return to
+ * the user.
+ */
+void drm_crtc_convert_to_umode(struct drm_mode_modeinfo *out,
+			       struct drm_display_mode *in)
+{
+	out->clock = in->clock;
+	out->hdisplay = in->hdisplay;
+	out->hsync_start = in->hsync_start;
+	out->hsync_end = in->hsync_end;
+	out->htotal = in->htotal;
+	out->hskew = in->hskew;
+	out->vdisplay = in->vdisplay;
+	out->vsync_start = in->vsync_start;
+	out->vsync_end = in->vsync_end;
+	out->vtotal = in->vtotal;
+	out->vscan = in->vscan;
+	out->vrefresh = in->vrefresh;
+	out->flags = in->flags;
+	out->type = in->type;
+	strncpy(out->name, in->name, DRM_DISPLAY_MODE_LEN);
+	out->name[DRM_DISPLAY_MODE_LEN-1] = 0;
+}
+
+/**
+ * drm_crtc_convert_to_umode - convert a modeinfo into a drm_display_mode
+ * @out: drm_display_mode to return to the user
+ * @in: drm_mode_modeinfo to use
+ *
+ * LOCKING:
+ * None.
+ *
+ * Convert a drm_mode_modeinfo into a drm_display_mode structure to return to
+ * the caller.
+ */
+void drm_crtc_convert_umode(struct drm_display_mode *out,
+			    struct drm_mode_modeinfo *in)
+{
+	out->clock = in->clock;
+	out->hdisplay = in->hdisplay;
+	out->hsync_start = in->hsync_start;
+	out->hsync_end = in->hsync_end;
+	out->htotal = in->htotal;
+	out->hskew = in->hskew;
+	out->vdisplay = in->vdisplay;
+	out->vsync_start = in->vsync_start;
+	out->vsync_end = in->vsync_end;
+	out->vtotal = in->vtotal;
+	out->vscan = in->vscan;
+	out->vrefresh = in->vrefresh;
+	out->flags = in->flags;
+	out->type = in->type;
+	strncpy(out->name, in->name, DRM_DISPLAY_MODE_LEN);
+	out->name[DRM_DISPLAY_MODE_LEN-1] = 0;
+}
+
+/**
+ * drm_mode_getresources - get graphics configuration
+ * @inode: inode from the ioctl
+ * @filp: file * from the ioctl
+ * @cmd: cmd from ioctl
+ * @arg: arg from ioctl
+ *
+ * LOCKING:
+ * Takes mode config lock.
+ *
+ * Construct a set of configuration description structures and return
+ * them to the user, including CRTC, connector and framebuffer configuration.
+ *
+ * Called by the user via ioctl.
+ *
+ * RETURNS:
+ * Zero on success, errno on failure.
+ */
+int drm_mode_getresources(struct drm_device *dev, void *data,
+			  struct drm_file *file_priv)
+{
+	struct drm_mode_card_res *card_res = data;
+	struct list_head *lh;
+	struct drm_framebuffer *fb;
+	struct drm_connector *connector;
+	struct drm_crtc *crtc;
+	struct drm_encoder *encoder;
+	int ret = 0;
+	int connector_count = 0;
+	int crtc_count = 0;
+	int fb_count = 0;
+	int encoder_count = 0;
+	int copied = 0, i;
+	uint32_t __user *fb_id;
+	uint32_t __user *crtc_id;
+	uint32_t __user *connector_id;
+	uint32_t __user *encoder_id;
+	struct drm_mode_group *mode_group;
+
+	mutex_lock(&dev->mode_config.mutex);
+
+	/*
+	 * For the non-control nodes we need to limit the list of resources
+	 * by IDs in the group list for this node
+	 */
+	list_for_each(lh, &file_priv->fbs)
+		fb_count++;
+
+	mode_group = &file_priv->master->minor->mode_group;
+	if (file_priv->master->minor->type == DRM_MINOR_CONTROL) {
+
+		list_for_each(lh, &dev->mode_config.crtc_list)
+			crtc_count++;
+
+		list_for_each(lh, &dev->mode_config.connector_list)
+			connector_count++;
+
+		list_for_each(lh, &dev->mode_config.encoder_list)
+			encoder_count++;
+	} else {
+
+		crtc_count = mode_group->num_crtcs;
+		connector_count = mode_group->num_connectors;
+		encoder_count = mode_group->num_encoders;
+	}
+
+	card_res->max_height = dev->mode_config.max_height;
+	card_res->min_height = dev->mode_config.min_height;
+	card_res->max_width = dev->mode_config.max_width;
+	card_res->min_width = dev->mode_config.min_width;
+
+	/* handle this in 4 parts */
+	/* FBs */
+	if (card_res->count_fbs >= fb_count) {
+		copied = 0;
+		fb_id = (uint32_t __user *)(unsigned long)card_res->fb_id_ptr;
+		list_for_each_entry(fb, &file_priv->fbs, head) {
+			if (put_user(fb->base.id, fb_id + copied)) {
+				ret = -EFAULT;
+				goto out;
+			}
+			copied++;
+		}
+	}
+	card_res->count_fbs = fb_count;
+
+	/* CRTCs */
+	if (card_res->count_crtcs >= crtc_count) {
+		copied = 0;
+		crtc_id = (uint32_t __user *)(unsigned long)card_res->crtc_id_ptr;
+		if (file_priv->master->minor->type == DRM_MINOR_CONTROL) {
+			list_for_each_entry(crtc, &dev->mode_config.crtc_list,
+					    head) {
+				DRM_DEBUG("CRTC ID is %d\n", crtc->base.id);
+				if (put_user(crtc->base.id, crtc_id + copied)) {
+					ret = -EFAULT;
+					goto out;
+				}
+				copied++;
+			}
+		} else {
+			for (i = 0; i < mode_group->num_crtcs; i++) {
+				if (put_user(mode_group->id_list[i],
+					     crtc_id + copied)) {
+					ret = -EFAULT;
+					goto out;
+				}
+				copied++;
+			}
+		}
+	}
+	card_res->count_crtcs = crtc_count;
+
+	/* Encoders */
+	if (card_res->count_encoders >= encoder_count) {
+		copied = 0;
+		encoder_id = (uint32_t __user *)(unsigned long)card_res->encoder_id_ptr;
+		if (file_priv->master->minor->type == DRM_MINOR_CONTROL) {
+			list_for_each_entry(encoder,
+					    &dev->mode_config.encoder_list,
+					    head) {
+				DRM_DEBUG("ENCODER ID is %d\n",
+					  encoder->base.id);
+				if (put_user(encoder->base.id, encoder_id +
+					     copied)) {
+					ret = -EFAULT;
+					goto out;
+				}
+				copied++;
+			}
+		} else {
+			for (i = mode_group->num_crtcs; i < mode_group->num_crtcs + mode_group->num_encoders; i++) {
+				if (put_user(mode_group->id_list[i],
+					     encoder_id + copied)) {
+					ret = -EFAULT;
+					goto out;
+				}
+				copied++;
+			}
+
+		}
+	}
+	card_res->count_encoders = encoder_count;
+
+	/* Connectors */
+	if (card_res->count_connectors >= connector_count) {
+		copied = 0;
+		connector_id = (uint32_t __user *)(unsigned long)card_res->connector_id_ptr;
+		if (file_priv->master->minor->type == DRM_MINOR_CONTROL) {
+			list_for_each_entry(connector,
+					    &dev->mode_config.connector_list,
+					    head) {
+				DRM_DEBUG("CONNECTOR ID is %d\n",
+					  connector->base.id);
+				if (put_user(connector->base.id,
+					     connector_id + copied)) {
+					ret = -EFAULT;
+					goto out;
+				}
+				copied++;
+			}
+		} else {
+			int start = mode_group->num_crtcs +
+				mode_group->num_encoders;
+			for (i = start; i < start + mode_group->num_connectors; i++) {
+				if (put_user(mode_group->id_list[i],
+					     connector_id + copied)) {
+					ret = -EFAULT;
+					goto out;
+				}
+				copied++;
+			}
+		}
+	}
+	card_res->count_connectors = connector_count;
+
+	DRM_DEBUG("Counted %d %d %d\n", card_res->count_crtcs,
+		  card_res->count_connectors, card_res->count_encoders);
+
+out:
+	mutex_unlock(&dev->mode_config.mutex);
+	return ret;
+}
+
+/**
+ * drm_mode_getcrtc - get CRTC configuration
+ * @inode: inode from the ioctl
+ * @filp: file * from the ioctl
+ * @cmd: cmd from ioctl
+ * @arg: arg from ioctl
+ *
+ * LOCKING:
+ * Caller? (FIXME)
+ *
+ * Construct a CRTC configuration structure to return to the user.
+ *
+ * Called by the user via ioctl.
+ *
+ * RETURNS:
+ * Zero on success, errno on failure.
+ */
+int drm_mode_getcrtc(struct drm_device *dev,
+		     void *data, struct drm_file *file_priv)
+{
+	struct drm_mode_crtc *crtc_resp = data;
+	struct drm_crtc *crtc;
+	struct drm_mode_object *obj;
+	int ret = 0;
+
+	mutex_lock(&dev->mode_config.mutex);
+
+	obj = drm_mode_object_find(dev, crtc_resp->crtc_id,
+				   DRM_MODE_OBJECT_CRTC);
+	if (!obj) {
+		ret = -EINVAL;
+		goto out;
+	}
+	crtc = obj_to_crtc(obj);
+
+	crtc_resp->x = crtc->x;
+	crtc_resp->y = crtc->y;
+	crtc_resp->gamma_size = crtc->gamma_size;
+	if (crtc->fb)
+		crtc_resp->fb_id = crtc->fb->base.id;
+	else
+		crtc_resp->fb_id = 0;
+
+	if (crtc->enabled) {
+
+		drm_crtc_convert_to_umode(&crtc_resp->mode, &crtc->mode);
+		crtc_resp->mode_valid = 1;
+
+	} else {
+		crtc_resp->mode_valid = 0;
+	}
+
+out:
+	mutex_unlock(&dev->mode_config.mutex);
+	return ret;
+}
+
+/**
+ * drm_mode_getconnector - get connector configuration
+ * @inode: inode from the ioctl
+ * @filp: file * from the ioctl
+ * @cmd: cmd from ioctl
+ * @arg: arg from ioctl
+ *
+ * LOCKING:
+ * Caller? (FIXME)
+ *
+ * Construct a connector configuration structure to return to the user.
+ *
+ * Called by the user via ioctl.
+ *
+ * RETURNS:
+ * Zero on success, errno on failure.
+ */
+int drm_mode_getconnector(struct drm_device *dev, void *data,
+			  struct drm_file *file_priv)
+{
+	struct drm_mode_get_connector *out_resp = data;
+	struct drm_mode_object *obj;
+	struct drm_connector *connector;
+	struct drm_display_mode *mode;
+	int mode_count = 0;
+	int props_count = 0;
+	int encoders_count = 0;
+	int ret = 0;
+	int copied = 0;
+	int i;
+	struct drm_mode_modeinfo u_mode;
+	struct drm_mode_modeinfo __user *mode_ptr;
+	uint32_t __user *prop_ptr;
+	uint64_t __user *prop_values;
+	uint32_t __user *encoder_ptr;
+
+	memset(&u_mode, 0, sizeof(struct drm_mode_modeinfo));
+
+	DRM_DEBUG("connector id %d:\n", out_resp->connector_id);
+
+	mutex_lock(&dev->mode_config.mutex);
+
+	obj = drm_mode_object_find(dev, out_resp->connector_id,
+				   DRM_MODE_OBJECT_CONNECTOR);
+	if (!obj) {
+		ret = -EINVAL;
+		goto out;
+	}
+	connector = obj_to_connector(obj);
+
+	for (i = 0; i < DRM_CONNECTOR_MAX_PROPERTY; i++) {
+		if (connector->property_ids[i] != 0) {
+			props_count++;
+		}
+	}
+
+	for (i = 0; i < DRM_CONNECTOR_MAX_ENCODER; i++) {
+		if (connector->encoder_ids[i] != 0) {
+			encoders_count++;
+		}
+	}
+
+	if (out_resp->count_modes == 0) {
+		connector->funcs->fill_modes(connector,
+					     dev->mode_config.max_width,
+					     dev->mode_config.max_height);
+	}
+
+	/* delayed so we get modes regardless of pre-fill_modes state */
+	list_for_each_entry(mode, &connector->modes, head)
+		mode_count++;
+
+	out_resp->connector_id = connector->base.id;
+	out_resp->connector_type = connector->connector_type;
+	out_resp->connector_type_id = connector->connector_type_id;
+	out_resp->mm_width = connector->display_info.width_mm;
+	out_resp->mm_height = connector->display_info.height_mm;
+	out_resp->subpixel = connector->display_info.subpixel_order;
+	out_resp->connection = connector->status;
+	if (connector->encoder)
+		out_resp->encoder_id = connector->encoder->base.id;
+	else
+		out_resp->encoder_id = 0;
+
+	/*
+	 * This ioctl is called twice, once to determine how much space is
+	 * needed, and the 2nd time to fill it.
+	 */
+	if ((out_resp->count_modes >= mode_count) && mode_count) {
+		copied = 0;
+		mode_ptr = (struct drm_mode_modeinfo *)(unsigned long)out_resp->modes_ptr;
+		list_for_each_entry(mode, &connector->modes, head) {
+			drm_crtc_convert_to_umode(&u_mode, mode);
+			if (copy_to_user(mode_ptr + copied,
+					 &u_mode, sizeof(u_mode))) {
+				ret = -EFAULT;
+				goto out;
+			}
+			copied++;
+		}
+	}
+	out_resp->count_modes = mode_count;
+
+	if ((out_resp->count_props >= props_count) && props_count) {
+		copied = 0;
+		prop_ptr = (uint32_t *)(unsigned long)(out_resp->props_ptr);
+		prop_values = (uint64_t *)(unsigned long)(out_resp->prop_values_ptr);
+		for (i = 0; i < DRM_CONNECTOR_MAX_PROPERTY; i++) {
+			if (connector->property_ids[i] != 0) {
+				if (put_user(connector->property_ids[i],
+					     prop_ptr + copied)) {
+					ret = -EFAULT;
+					goto out;
+				}
+
+				if (put_user(connector->property_values[i],
+					     prop_values + copied)) {
+					ret = -EFAULT;
+					goto out;
+				}
+				copied++;
+			}
+		}
+	}
+	out_resp->count_props = props_count;
+
+	if ((out_resp->count_encoders >= encoders_count) && encoders_count) {
+		copied = 0;
+		encoder_ptr = (uint32_t *)(unsigned long)(out_resp->encoders_ptr);
+		for (i = 0; i < DRM_CONNECTOR_MAX_ENCODER; i++) {
+			if (connector->encoder_ids[i] != 0) {
+				if (put_user(connector->encoder_ids[i],
+					     encoder_ptr + copied)) {
+					ret = -EFAULT;
+					goto out;
+				}
+				copied++;
+			}
+		}
+	}
+	out_resp->count_encoders = encoders_count;
+
+out:
+	mutex_unlock(&dev->mode_config.mutex);
+	return ret;
+}
+
+int drm_mode_getencoder(struct drm_device *dev, void *data,
+			struct drm_file *file_priv)
+{
+	struct drm_mode_get_encoder *enc_resp = data;
+	struct drm_mode_object *obj;
+	struct drm_encoder *encoder;
+	int ret = 0;
+
+	mutex_lock(&dev->mode_config.mutex);
+	obj = drm_mode_object_find(dev, enc_resp->encoder_id,
+				   DRM_MODE_OBJECT_ENCODER);
+	if (!obj) {
+		ret = -EINVAL;
+		goto out;
+	}
+	encoder = obj_to_encoder(obj);
+
+	if (encoder->crtc)
+		enc_resp->crtc_id = encoder->crtc->base.id;
+	else
+		enc_resp->crtc_id = 0;
+	enc_resp->encoder_type = encoder->encoder_type;
+	enc_resp->encoder_id = encoder->base.id;
+	enc_resp->possible_crtcs = encoder->possible_crtcs;
+	enc_resp->possible_clones = encoder->possible_clones;
+
+out:
+	mutex_unlock(&dev->mode_config.mutex);
+	return ret;
+}
+
+/**
+ * drm_mode_setcrtc - set CRTC configuration
+ * @inode: inode from the ioctl
+ * @filp: file * from the ioctl
+ * @cmd: cmd from ioctl
+ * @arg: arg from ioctl
+ *
+ * LOCKING:
+ * Caller? (FIXME)
+ *
+ * Build a new CRTC configuration based on user request.
+ *
+ * Called by the user via ioctl.
+ *
+ * RETURNS:
+ * Zero on success, errno on failure.
+ */
+int drm_mode_setcrtc(struct drm_device *dev, void *data,
+		     struct drm_file *file_priv)
+{
+	struct drm_mode_config *config = &dev->mode_config;
+	struct drm_mode_crtc *crtc_req = data;
+	struct drm_mode_object *obj;
+	struct drm_crtc *crtc, *crtcfb;
+	struct drm_connector **connector_set = NULL, *connector;
+	struct drm_framebuffer *fb = NULL;
+	struct drm_display_mode *mode = NULL;
+	struct drm_mode_set set;
+	uint32_t __user *set_connectors_ptr;
+	int ret = 0;
+	int i;
+
+	mutex_lock(&dev->mode_config.mutex);
+	obj = drm_mode_object_find(dev, crtc_req->crtc_id,
+				   DRM_MODE_OBJECT_CRTC);
+	if (!obj) {
+		DRM_DEBUG("Unknown CRTC ID %d\n", crtc_req->crtc_id);
+		ret = -EINVAL;
+		goto out;
+	}
+	crtc = obj_to_crtc(obj);
+
+	if (crtc_req->mode_valid) {
+		/* If we have a mode we need a framebuffer. */
+		/* If we pass -1, set the mode with the currently bound fb */
+		if (crtc_req->fb_id == -1) {
+			list_for_each_entry(crtcfb,
+					    &dev->mode_config.crtc_list, head) {
+				if (crtcfb == crtc) {
+					DRM_DEBUG("Using current fb for setmode\n");
+					fb = crtc->fb;
+				}
+			}
+		} else {
+			obj = drm_mode_object_find(dev, crtc_req->fb_id,
+						   DRM_MODE_OBJECT_FB);
+			if (!obj) {
+				DRM_DEBUG("Unknown FB ID%d\n", crtc_req->fb_id);
+				ret = -EINVAL;
+				goto out;
+			}
+			fb = obj_to_fb(obj);
+		}
+
+		mode = drm_mode_create(dev);
+		drm_crtc_convert_umode(mode, &crtc_req->mode);
+		drm_mode_set_crtcinfo(mode, CRTC_INTERLACE_HALVE_V);
+	}
+
+	if (crtc_req->count_connectors == 0 && mode) {
+		DRM_DEBUG("Count connectors is 0 but mode set\n");
+		ret = -EINVAL;
+		goto out;
+	}
+
+	if (crtc_req->count_connectors > 0 && !mode && !fb) {
+		DRM_DEBUG("Count connectors is %d but no mode or fb set\n",
+			  crtc_req->count_connectors);
+		ret = -EINVAL;
+		goto out;
+	}
+
+	if (crtc_req->count_connectors > 0) {
+		u32 out_id;
+
+		/* Avoid unbounded kernel memory allocation */
+		if (crtc_req->count_connectors > config->num_connector) {
+			ret = -EINVAL;
+			goto out;
+		}
+
+		connector_set = kmalloc(crtc_req->count_connectors *
+					sizeof(struct drm_connector *),
+					GFP_KERNEL);
+		if (!connector_set) {
+			ret = -ENOMEM;
+			goto out;
+		}
+
+		for (i = 0; i < crtc_req->count_connectors; i++) {
+			set_connectors_ptr = (uint32_t *)(unsigned long)crtc_req->set_connectors_ptr;
+			if (get_user(out_id, &set_connectors_ptr[i])) {
+				ret = -EFAULT;
+				goto out;
+			}
+
+			obj = drm_mode_object_find(dev, out_id,
+						   DRM_MODE_OBJECT_CONNECTOR);
+			if (!obj) {
+				DRM_DEBUG("Connector id %d unknown\n", out_id);
+				ret = -EINVAL;
+				goto out;
+			}
+			connector = obj_to_connector(obj);
+
+			connector_set[i] = connector;
+		}
+	}
+
+	set.crtc = crtc;
+	set.x = crtc_req->x;
+	set.y = crtc_req->y;
+	set.mode = mode;
+	set.connectors = connector_set;
+	set.num_connectors = crtc_req->count_connectors;
+	set.fb =fb;
+	ret = crtc->funcs->set_config(&set);
+
+out:
+	kfree(connector_set);
+	mutex_unlock(&dev->mode_config.mutex);
+	return ret;
+}
+
+int drm_mode_cursor_ioctl(struct drm_device *dev,
+			void *data, struct drm_file *file_priv)
+{
+	struct drm_mode_cursor *req = data;
+	struct drm_mode_object *obj;
+	struct drm_crtc *crtc;
+	int ret = 0;
+
+	DRM_DEBUG("\n");
+
+	if (!req->flags) {
+		DRM_ERROR("no operation set\n");
+		return -EINVAL;
+	}
+
+	mutex_lock(&dev->mode_config.mutex);
+	obj = drm_mode_object_find(dev, req->crtc, DRM_MODE_OBJECT_CRTC);
+	if (!obj) {
+		DRM_DEBUG("Unknown CRTC ID %d\n", req->crtc);
+		ret = -EINVAL;
+		goto out;
+	}
+	crtc = obj_to_crtc(obj);
+
+	if (req->flags & DRM_MODE_CURSOR_BO) {
+		if (!crtc->funcs->cursor_set) {
+			DRM_ERROR("crtc does not support cursor\n");
+			ret = -ENXIO;
+			goto out;
+		}
+		/* Turns off the cursor if handle is 0 */
+		ret = crtc->funcs->cursor_set(crtc, file_priv, req->handle,
+					      req->width, req->height);
+	}
+
+	if (req->flags & DRM_MODE_CURSOR_MOVE) {
+		if (crtc->funcs->cursor_move) {
+			ret = crtc->funcs->cursor_move(crtc, req->x, req->y);
+		} else {
+			DRM_ERROR("crtc does not support cursor\n");
+			ret = -EFAULT;
+			goto out;
+		}
+	}
+out:
+	mutex_unlock(&dev->mode_config.mutex);
+	return ret;
+}
+
+/**
+ * drm_mode_addfb - add an FB to the graphics configuration
+ * @inode: inode from the ioctl
+ * @filp: file * from the ioctl
+ * @cmd: cmd from ioctl
+ * @arg: arg from ioctl
+ *
+ * LOCKING:
+ * Takes mode config lock.
+ *
+ * Add a new FB to the specified CRTC, given a user request.
+ *
+ * Called by the user via ioctl.
+ *
+ * RETURNS:
+ * Zero on success, errno on failure.
+ */
+int drm_mode_addfb(struct drm_device *dev,
+		   void *data, struct drm_file *file_priv)
+{
+	struct drm_mode_fb_cmd *r = data;
+	struct drm_mode_config *config = &dev->mode_config;
+	struct drm_framebuffer *fb;
+	int ret = 0;
+
+	if ((config->min_width > r->width) || (r->width > config->max_width)) {
+		DRM_ERROR("mode new framebuffer width not within limits\n");
+		return -EINVAL;
+	}
+	if ((config->min_height > r->height) || (r->height > config->max_height)) {
+		DRM_ERROR("mode new framebuffer height not within limits\n");
+		return -EINVAL;
+	}
+
+	mutex_lock(&dev->mode_config.mutex);
+
+	/* TODO check buffer is sufficently large */
+	/* TODO setup destructor callback */
+
+	fb = dev->mode_config.funcs->fb_create(dev, file_priv, r);
+	if (!fb) {
+		DRM_ERROR("could not create framebuffer\n");
+		ret = -EINVAL;
+		goto out;
+	}
+
+	r->buffer_id = fb->base.id;
+	list_add(&fb->filp_head, &file_priv->fbs);
+
+out:
+	mutex_unlock(&dev->mode_config.mutex);
+	return ret;
+}
+
+/**
+ * drm_mode_rmfb - remove an FB from the configuration
+ * @inode: inode from the ioctl
+ * @filp: file * from the ioctl
+ * @cmd: cmd from ioctl
+ * @arg: arg from ioctl
+ *
+ * LOCKING:
+ * Takes mode config lock.
+ *
+ * Remove the FB specified by the user.
+ *
+ * Called by the user via ioctl.
+ *
+ * RETURNS:
+ * Zero on success, errno on failure.
+ */
+int drm_mode_rmfb(struct drm_device *dev,
+		   void *data, struct drm_file *file_priv)
+{
+	struct drm_mode_object *obj;
+	struct drm_framebuffer *fb = NULL;
+	struct drm_framebuffer *fbl = NULL;
+	uint32_t *id = data;
+	int ret = 0;
+	int found = 0;
+
+	mutex_lock(&dev->mode_config.mutex);
+	obj = drm_mode_object_find(dev, *id, DRM_MODE_OBJECT_FB);
+	/* TODO check that we realy get a framebuffer back. */
+	if (!obj) {
+		DRM_ERROR("mode invalid framebuffer id\n");
+		ret = -EINVAL;
+		goto out;
+	}
+	fb = obj_to_fb(obj);
+
+	list_for_each_entry(fbl, &file_priv->fbs, filp_head)
+		if (fb == fbl)
+			found = 1;
+
+	if (!found) {
+		DRM_ERROR("tried to remove a fb that we didn't own\n");
+		ret = -EINVAL;
+		goto out;
+	}
+
+	/* TODO release all crtc connected to the framebuffer */
+	/* TODO unhock the destructor from the buffer object */
+
+	list_del(&fb->filp_head);
+	fb->funcs->destroy(fb);
+
+out:
+	mutex_unlock(&dev->mode_config.mutex);
+	return ret;
+}
+
+/**
+ * drm_mode_getfb - get FB info
+ * @inode: inode from the ioctl
+ * @filp: file * from the ioctl
+ * @cmd: cmd from ioctl
+ * @arg: arg from ioctl
+ *
+ * LOCKING:
+ * Caller? (FIXME)
+ *
+ * Lookup the FB given its ID and return info about it.
+ *
+ * Called by the user via ioctl.
+ *
+ * RETURNS:
+ * Zero on success, errno on failure.
+ */
+int drm_mode_getfb(struct drm_device *dev,
+		   void *data, struct drm_file *file_priv)
+{
+	struct drm_mode_fb_cmd *r = data;
+	struct drm_mode_object *obj;
+	struct drm_framebuffer *fb;
+	int ret = 0;
+
+	mutex_lock(&dev->mode_config.mutex);
+	obj = drm_mode_object_find(dev, r->buffer_id, DRM_MODE_OBJECT_FB);
+	if (!obj) {
+		DRM_ERROR("invalid framebuffer id\n");
+		ret = -EINVAL;
+		goto out;
+	}
+	fb = obj_to_fb(obj);
+
+	r->height = fb->height;
+	r->width = fb->width;
+	r->depth = fb->depth;
+	r->bpp = fb->bits_per_pixel;
+	r->pitch = fb->pitch;
+	fb->funcs->create_handle(fb, file_priv, &r->handle);
+
+out:
+	mutex_unlock(&dev->mode_config.mutex);
+	return ret;
+}
+
+/**
+ * drm_fb_release - remove and free the FBs on this file
+ * @filp: file * from the ioctl
+ *
+ * LOCKING:
+ * Takes mode config lock.
+ *
+ * Destroy all the FBs associated with @filp.
+ *
+ * Called by the user via ioctl.
+ *
+ * RETURNS:
+ * Zero on success, errno on failure.
+ */
+void drm_fb_release(struct file *filp)
+{
+	struct drm_file *priv = filp->private_data;
+	struct drm_device *dev = priv->minor->dev;
+	struct drm_framebuffer *fb, *tfb;
+
+	mutex_lock(&dev->mode_config.mutex);
+	list_for_each_entry_safe(fb, tfb, &priv->fbs, filp_head) {
+		list_del(&fb->filp_head);
+		fb->funcs->destroy(fb);
+	}
+	mutex_unlock(&dev->mode_config.mutex);
+}
+
+/**
+ * drm_mode_attachmode - add a mode to the user mode list
+ * @dev: DRM device
+ * @connector: connector to add the mode to
+ * @mode: mode to add
+ *
+ * Add @mode to @connector's user mode list.
+ */
+static int drm_mode_attachmode(struct drm_device *dev,
+			       struct drm_connector *connector,
+			       struct drm_display_mode *mode)
+{
+	int ret = 0;
+
+	list_add_tail(&mode->head, &connector->user_modes);
+	return ret;
+}
+
+int drm_mode_attachmode_crtc(struct drm_device *dev, struct drm_crtc *crtc,
+			     struct drm_display_mode *mode)
+{
+	struct drm_connector *connector;
+	int ret = 0;
+	struct drm_display_mode *dup_mode;
+	int need_dup = 0;
+	list_for_each_entry(connector, &dev->mode_config.connector_list, head) {
+		if (!connector->encoder)
+			break;
+		if (connector->encoder->crtc == crtc) {
+			if (need_dup)
+				dup_mode = drm_mode_duplicate(dev, mode);
+			else
+				dup_mode = mode;
+			ret = drm_mode_attachmode(dev, connector, dup_mode);
+			if (ret)
+				return ret;
+			need_dup = 1;
+		}
+	}
+	return 0;
+}
+EXPORT_SYMBOL(drm_mode_attachmode_crtc);
+
+static int drm_mode_detachmode(struct drm_device *dev,
+			       struct drm_connector *connector,
+			       struct drm_display_mode *mode)
+{
+	int found = 0;
+	int ret = 0;
+	struct drm_display_mode *match_mode, *t;
+
+	list_for_each_entry_safe(match_mode, t, &connector->user_modes, head) {
+		if (drm_mode_equal(match_mode, mode)) {
+			list_del(&match_mode->head);
+			drm_mode_destroy(dev, match_mode);
+			found = 1;
+			break;
+		}
+	}
+
+	if (!found)
+		ret = -EINVAL;
+
+	return ret;
+}
+
+int drm_mode_detachmode_crtc(struct drm_device *dev, struct drm_display_mode *mode)
+{
+	struct drm_connector *connector;
+
+	list_for_each_entry(connector, &dev->mode_config.connector_list, head) {
+		drm_mode_detachmode(dev, connector, mode);
+	}
+	return 0;
+}
+EXPORT_SYMBOL(drm_mode_detachmode_crtc);
+
+/**
+ * drm_fb_attachmode - Attach a user mode to an connector
+ * @inode: inode from the ioctl
+ * @filp: file * from the ioctl
+ * @cmd: cmd from ioctl
+ * @arg: arg from ioctl
+ *
+ * This attaches a user specified mode to an connector.
+ * Called by the user via ioctl.
+ *
+ * RETURNS:
+ * Zero on success, errno on failure.
+ */
+int drm_mode_attachmode_ioctl(struct drm_device *dev,
+			      void *data, struct drm_file *file_priv)
+{
+	struct drm_mode_mode_cmd *mode_cmd = data;
+	struct drm_connector *connector;
+	struct drm_display_mode *mode;
+	struct drm_mode_object *obj;
+	struct drm_mode_modeinfo *umode = &mode_cmd->mode;
+	int ret = 0;
+
+	mutex_lock(&dev->mode_config.mutex);
+
+	obj = drm_mode_object_find(dev, mode_cmd->connector_id, DRM_MODE_OBJECT_CONNECTOR);
+	if (!obj) {
+		ret = -EINVAL;
+		goto out;
+	}
+	connector = obj_to_connector(obj);
+
+	mode = drm_mode_create(dev);
+	if (!mode) {
+		ret = -ENOMEM;
+		goto out;
+	}
+
+	drm_crtc_convert_umode(mode, umode);
+
+	ret = drm_mode_attachmode(dev, connector, mode);
+out:
+	mutex_unlock(&dev->mode_config.mutex);
+	return ret;
+}
+
+
+/**
+ * drm_fb_detachmode - Detach a user specified mode from an connector
+ * @inode: inode from the ioctl
+ * @filp: file * from the ioctl
+ * @cmd: cmd from ioctl
+ * @arg: arg from ioctl
+ *
+ * Called by the user via ioctl.
+ *
+ * RETURNS:
+ * Zero on success, errno on failure.
+ */
+int drm_mode_detachmode_ioctl(struct drm_device *dev,
+			      void *data, struct drm_file *file_priv)
+{
+	struct drm_mode_object *obj;
+	struct drm_mode_mode_cmd *mode_cmd = data;
+	struct drm_connector *connector;
+	struct drm_display_mode mode;
+	struct drm_mode_modeinfo *umode = &mode_cmd->mode;
+	int ret = 0;
+
+	mutex_lock(&dev->mode_config.mutex);
+
+	obj = drm_mode_object_find(dev, mode_cmd->connector_id, DRM_MODE_OBJECT_CONNECTOR);
+	if (!obj) {
+		ret = -EINVAL;
+		goto out;
+	}
+	connector = obj_to_connector(obj);
+
+	drm_crtc_convert_umode(&mode, umode);
+	ret = drm_mode_detachmode(dev, connector, &mode);
+out:
+	mutex_unlock(&dev->mode_config.mutex);
+	return ret;
+}
+
+struct drm_property *drm_property_create(struct drm_device *dev, int flags,
+					 const char *name, int num_values)
+{
+	struct drm_property *property = NULL;
+
+	property = kzalloc(sizeof(struct drm_property), GFP_KERNEL);
+	if (!property)
+		return NULL;
+
+	if (num_values) {
+		property->values = kzalloc(sizeof(uint64_t)*num_values, GFP_KERNEL);
+		if (!property->values)
+			goto fail;
+	}
+
+	drm_mode_object_get(dev, &property->base, DRM_MODE_OBJECT_PROPERTY);
+	property->flags = flags;
+	property->num_values = num_values;
+	INIT_LIST_HEAD(&property->enum_blob_list);
+
+	if (name)
+		strncpy(property->name, name, DRM_PROP_NAME_LEN);
+
+	list_add_tail(&property->head, &dev->mode_config.property_list);
+	return property;
+fail:
+	kfree(property);
+	return NULL;
+}
+EXPORT_SYMBOL(drm_property_create);
+
+int drm_property_add_enum(struct drm_property *property, int index,
+			  uint64_t value, const char *name)
+{
+	struct drm_property_enum *prop_enum;
+
+	if (!(property->flags & DRM_MODE_PROP_ENUM))
+		return -EINVAL;
+
+	if (!list_empty(&property->enum_blob_list)) {
+		list_for_each_entry(prop_enum, &property->enum_blob_list, head) {
+			if (prop_enum->value == value) {
+				strncpy(prop_enum->name, name, DRM_PROP_NAME_LEN);
+				prop_enum->name[DRM_PROP_NAME_LEN-1] = '\0';
+				return 0;
+			}
+		}
+	}
+
+	prop_enum = kzalloc(sizeof(struct drm_property_enum), GFP_KERNEL);
+	if (!prop_enum)
+		return -ENOMEM;
+
+	strncpy(prop_enum->name, name, DRM_PROP_NAME_LEN);
+	prop_enum->name[DRM_PROP_NAME_LEN-1] = '\0';
+	prop_enum->value = value;
+
+	property->values[index] = value;
+	list_add_tail(&prop_enum->head, &property->enum_blob_list);
+	return 0;
+}
+EXPORT_SYMBOL(drm_property_add_enum);
+
+void drm_property_destroy(struct drm_device *dev, struct drm_property *property)
+{
+	struct drm_property_enum *prop_enum, *pt;
+
+	list_for_each_entry_safe(prop_enum, pt, &property->enum_blob_list, head) {
+		list_del(&prop_enum->head);
+		kfree(prop_enum);
+	}
+
+	if (property->num_values)
+		kfree(property->values);
+	drm_mode_object_put(dev, &property->base);
+	list_del(&property->head);
+	kfree(property);
+}
+EXPORT_SYMBOL(drm_property_destroy);
+
+int drm_connector_attach_property(struct drm_connector *connector,
+			       struct drm_property *property, uint64_t init_val)
+{
+	int i;
+
+	for (i = 0; i < DRM_CONNECTOR_MAX_PROPERTY; i++) {
+		if (connector->property_ids[i] == 0) {
+			connector->property_ids[i] = property->base.id;
+			connector->property_values[i] = init_val;
+			break;
+		}
+	}
+
+	if (i == DRM_CONNECTOR_MAX_PROPERTY)
+		return -EINVAL;
+	return 0;
+}
+EXPORT_SYMBOL(drm_connector_attach_property);
+
+int drm_connector_property_set_value(struct drm_connector *connector,
+				  struct drm_property *property, uint64_t value)
+{
+	int i;
+
+	for (i = 0; i < DRM_CONNECTOR_MAX_PROPERTY; i++) {
+		if (connector->property_ids[i] == property->base.id) {
+			connector->property_values[i] = value;
+			break;
+		}
+	}
+
+	if (i == DRM_CONNECTOR_MAX_PROPERTY)
+		return -EINVAL;
+	return 0;
+}
+EXPORT_SYMBOL(drm_connector_property_set_value);
+
+int drm_connector_property_get_value(struct drm_connector *connector,
+				  struct drm_property *property, uint64_t *val)
+{
+	int i;
+
+	for (i = 0; i < DRM_CONNECTOR_MAX_PROPERTY; i++) {
+		if (connector->property_ids[i] == property->base.id) {
+			*val = connector->property_values[i];
+			break;
+		}
+	}
+
+	if (i == DRM_CONNECTOR_MAX_PROPERTY)
+		return -EINVAL;
+	return 0;
+}
+EXPORT_SYMBOL(drm_connector_property_get_value);
+
+int drm_mode_getproperty_ioctl(struct drm_device *dev,
+			       void *data, struct drm_file *file_priv)
+{
+	struct drm_mode_object *obj;
+	struct drm_mode_get_property *out_resp = data;
+	struct drm_property *property;
+	int enum_count = 0;
+	int blob_count = 0;
+	int value_count = 0;
+	int ret = 0, i;
+	int copied;
+	struct drm_property_enum *prop_enum;
+	struct drm_mode_property_enum __user *enum_ptr;
+	struct drm_property_blob *prop_blob;
+	uint32_t *blob_id_ptr;
+	uint64_t __user *values_ptr;
+	uint32_t __user *blob_length_ptr;
+
+	mutex_lock(&dev->mode_config.mutex);
+	obj = drm_mode_object_find(dev, out_resp->prop_id, DRM_MODE_OBJECT_PROPERTY);
+	if (!obj) {
+		ret = -EINVAL;
+		goto done;
+	}
+	property = obj_to_property(obj);
+
+	if (property->flags & DRM_MODE_PROP_ENUM) {
+		list_for_each_entry(prop_enum, &property->enum_blob_list, head)
+			enum_count++;
+	} else if (property->flags & DRM_MODE_PROP_BLOB) {
+		list_for_each_entry(prop_blob, &property->enum_blob_list, head)
+			blob_count++;
+	}
+
+	value_count = property->num_values;
+
+	strncpy(out_resp->name, property->name, DRM_PROP_NAME_LEN);
+	out_resp->name[DRM_PROP_NAME_LEN-1] = 0;
+	out_resp->flags = property->flags;
+
+	if ((out_resp->count_values >= value_count) && value_count) {
+		values_ptr = (uint64_t *)(unsigned long)out_resp->values_ptr;
+		for (i = 0; i < value_count; i++) {
+			if (copy_to_user(values_ptr + i, &property->values[i], sizeof(uint64_t))) {
+				ret = -EFAULT;
+				goto done;
+			}
+		}
+	}
+	out_resp->count_values = value_count;
+
+	if (property->flags & DRM_MODE_PROP_ENUM) {
+		if ((out_resp->count_enum_blobs >= enum_count) && enum_count) {
+			copied = 0;
+			enum_ptr = (struct drm_mode_property_enum *)(unsigned long)out_resp->enum_blob_ptr;
+			list_for_each_entry(prop_enum, &property->enum_blob_list, head) {
+
+				if (copy_to_user(&enum_ptr[copied].value, &prop_enum->value, sizeof(uint64_t))) {
+					ret = -EFAULT;
+					goto done;
+				}
+
+				if (copy_to_user(&enum_ptr[copied].name,
+						 &prop_enum->name, DRM_PROP_NAME_LEN)) {
+					ret = -EFAULT;
+					goto done;
+				}
+				copied++;
+			}
+		}
+		out_resp->count_enum_blobs = enum_count;
+	}
+
+	if (property->flags & DRM_MODE_PROP_BLOB) {
+		if ((out_resp->count_enum_blobs >= blob_count) && blob_count) {
+			copied = 0;
+			blob_id_ptr = (uint32_t *)(unsigned long)out_resp->enum_blob_ptr;
+			blob_length_ptr = (uint32_t *)(unsigned long)out_resp->values_ptr;
+
+			list_for_each_entry(prop_blob, &property->enum_blob_list, head) {
+				if (put_user(prop_blob->base.id, blob_id_ptr + copied)) {
+					ret = -EFAULT;
+					goto done;
+				}
+
+				if (put_user(prop_blob->length, blob_length_ptr + copied)) {
+					ret = -EFAULT;
+					goto done;
+				}
+
+				copied++;
+			}
+		}
+		out_resp->count_enum_blobs = blob_count;
+	}
+done:
+	mutex_unlock(&dev->mode_config.mutex);
+	return ret;
+}
+
+static struct drm_property_blob *drm_property_create_blob(struct drm_device *dev, int length,
+							  void *data)
+{
+	struct drm_property_blob *blob;
+
+	if (!length || !data)
+		return NULL;
+
+	blob = kzalloc(sizeof(struct drm_property_blob)+length, GFP_KERNEL);
+	if (!blob)
+		return NULL;
+
+	blob->data = (void *)((char *)blob + sizeof(struct drm_property_blob));
+	blob->length = length;
+
+	memcpy(blob->data, data, length);
+
+	drm_mode_object_get(dev, &blob->base, DRM_MODE_OBJECT_BLOB);
+
+	list_add_tail(&blob->head, &dev->mode_config.property_blob_list);
+	return blob;
+}
+
+static void drm_property_destroy_blob(struct drm_device *dev,
+			       struct drm_property_blob *blob)
+{
+	drm_mode_object_put(dev, &blob->base);
+	list_del(&blob->head);
+	kfree(blob);
+}
+
+int drm_mode_getblob_ioctl(struct drm_device *dev,
+			   void *data, struct drm_file *file_priv)
+{
+	struct drm_mode_object *obj;
+	struct drm_mode_get_blob *out_resp = data;
+	struct drm_property_blob *blob;
+	int ret = 0;
+	void *blob_ptr;
+
+	mutex_lock(&dev->mode_config.mutex);
+	obj = drm_mode_object_find(dev, out_resp->blob_id, DRM_MODE_OBJECT_BLOB);
+	if (!obj) {
+		ret = -EINVAL;
+		goto done;
+	}
+	blob = obj_to_blob(obj);
+
+	if (out_resp->length == blob->length) {
+		blob_ptr = (void *)(unsigned long)out_resp->data;
+		if (copy_to_user(blob_ptr, blob->data, blob->length)){
+			ret = -EFAULT;
+			goto done;
+		}
+	}
+	out_resp->length = blob->length;
+
+done:
+	mutex_unlock(&dev->mode_config.mutex);
+	return ret;
+}
+
+int drm_mode_connector_update_edid_property(struct drm_connector *connector,
+					    struct edid *edid)
+{
+	struct drm_device *dev = connector->dev;
+	int ret = 0;
+
+	if (connector->edid_blob_ptr)
+		drm_property_destroy_blob(dev, connector->edid_blob_ptr);
+
+	/* Delete edid, when there is none. */
+	if (!edid) {
+		connector->edid_blob_ptr = NULL;
+		ret = drm_connector_property_set_value(connector, dev->mode_config.edid_property, 0);
+		return ret;
+	}
+
+	connector->edid_blob_ptr = drm_property_create_blob(connector->dev, 128, edid);
+
+	ret = drm_connector_property_set_value(connector,
+					       dev->mode_config.edid_property,
+					       connector->edid_blob_ptr->base.id);
+
+	return ret;
+}
+EXPORT_SYMBOL(drm_mode_connector_update_edid_property);
+
+int drm_mode_connector_property_set_ioctl(struct drm_device *dev,
+				       void *data, struct drm_file *file_priv)
+{
+	struct drm_mode_connector_set_property *out_resp = data;
+	struct drm_mode_object *obj;
+	struct drm_property *property;
+	struct drm_connector *connector;
+	int ret = -EINVAL;
+	int i;
+
+	mutex_lock(&dev->mode_config.mutex);
+
+	obj = drm_mode_object_find(dev, out_resp->connector_id, DRM_MODE_OBJECT_CONNECTOR);
+	if (!obj) {
+		goto out;
+	}
+	connector = obj_to_connector(obj);
+
+	for (i = 0; i < DRM_CONNECTOR_MAX_PROPERTY; i++) {
+		if (connector->property_ids[i] == out_resp->prop_id)
+			break;
+	}
+
+	if (i == DRM_CONNECTOR_MAX_PROPERTY) {
+		goto out;
+	}
+
+	obj = drm_mode_object_find(dev, out_resp->prop_id, DRM_MODE_OBJECT_PROPERTY);
+	if (!obj) {
+		goto out;
+	}
+	property = obj_to_property(obj);
+
+	if (property->flags & DRM_MODE_PROP_IMMUTABLE)
+		goto out;
+
+	if (property->flags & DRM_MODE_PROP_RANGE) {
+		if (out_resp->value < property->values[0])
+			goto out;
+
+		if (out_resp->value > property->values[1])
+			goto out;
+	} else {
+		int found = 0;
+		for (i = 0; i < property->num_values; i++) {
+			if (property->values[i] == out_resp->value) {
+				found = 1;
+				break;
+			}
+		}
+		if (!found) {
+			goto out;
+		}
+	}
+
+	if (connector->funcs->set_property)
+		ret = connector->funcs->set_property(connector, property, out_resp->value);
+
+	/* store the property value if succesful */
+	if (!ret)
+		drm_connector_property_set_value(connector, property, out_resp->value);
+out:
+	mutex_unlock(&dev->mode_config.mutex);
+	return ret;
+}
+
+
+int drm_mode_replacefb(struct drm_device *dev,
+		       void *data, struct drm_file *file_priv)
+{
+	struct drm_mode_fb_cmd *r = data;
+	struct drm_mode_object *obj;
+	struct drm_framebuffer *fb;
+	int found = 0;
+	struct drm_framebuffer *fbl = NULL;
+	int ret = 0;
+
+	/* right replace the current bo attached to this fb with a new bo */
+	mutex_lock(&dev->mode_config.mutex);
+	obj = drm_mode_object_find(dev, r->buffer_id, DRM_MODE_OBJECT_FB);
+	if (!obj) {
+		ret = -EINVAL;
+		goto out;
+	}
+	fb = obj_to_fb(obj);
+
+	list_for_each_entry(fbl, &file_priv->fbs, filp_head)
+		if (fb == fbl)
+			found = 1;
+
+	if (!found) {
+		DRM_ERROR("tried to replace an fb we didn't own\n");
+		ret = -EINVAL;
+		goto out;
+	}
+
+	if (dev->mode_config.funcs->resize_fb)
+		ret = dev->mode_config.funcs->resize_fb(dev, file_priv, fb, r);
+	else
+		ret = -EINVAL;
+out:
+	mutex_unlock(&dev->mode_config.mutex);
+	return ret;
+
+}
+
+int drm_mode_connector_attach_encoder(struct drm_connector *connector,
+				      struct drm_encoder *encoder)
+{
+	int i;
+
+	for (i = 0; i < DRM_CONNECTOR_MAX_ENCODER; i++) {
+		if (connector->encoder_ids[i] == 0) {
+			connector->encoder_ids[i] = encoder->base.id;
+			return 0;
+		}
+	}
+	return -ENOMEM;
+}
+EXPORT_SYMBOL(drm_mode_connector_attach_encoder);
+
+void drm_mode_connector_detach_encoder(struct drm_connector *connector,
+				    struct drm_encoder *encoder)
+{
+	int i;
+	for (i = 0; i < DRM_CONNECTOR_MAX_ENCODER; i++) {
+		if (connector->encoder_ids[i] == encoder->base.id) {
+			connector->encoder_ids[i] = 0;
+			if (connector->encoder == encoder)
+				connector->encoder = NULL;
+			break;
+		}
+	}
+}
+EXPORT_SYMBOL(drm_mode_connector_detach_encoder);
+
+bool drm_mode_crtc_set_gamma_size(struct drm_crtc *crtc,
+				  int gamma_size)
+{
+	crtc->gamma_size = gamma_size;
+
+	crtc->gamma_store = kzalloc(gamma_size * sizeof(uint16_t) * 3, GFP_KERNEL);
+	if (!crtc->gamma_store) {
+		crtc->gamma_size = 0;
+		return false;
+	}
+
+	return true;
+}
+EXPORT_SYMBOL(drm_mode_crtc_set_gamma_size);
+
+int drm_mode_gamma_set_ioctl(struct drm_device *dev,
+			     void *data, struct drm_file *file_priv)
+{
+	struct drm_mode_crtc_lut *crtc_lut = data;
+	struct drm_mode_object *obj;
+	struct drm_crtc *crtc;
+	void *r_base, *g_base, *b_base;
+	int size;
+	int ret = 0;
+
+	mutex_lock(&dev->mode_config.mutex);
+	obj = drm_mode_object_find(dev, crtc_lut->crtc_id, DRM_MODE_OBJECT_CRTC);
+	if (!obj) {
+		ret = -EINVAL;
+		goto out;
+	}
+	crtc = obj_to_crtc(obj);
+
+	/* memcpy into gamma store */
+	if (crtc_lut->gamma_size != crtc->gamma_size) {
+		ret = -EINVAL;
+		goto out;
+	}
+
+	size = crtc_lut->gamma_size * (sizeof(uint16_t));
+	r_base = crtc->gamma_store;
+	if (copy_from_user(r_base, (void __user *)(unsigned long)crtc_lut->red, size)) {
+		ret = -EFAULT;
+		goto out;
+	}
+
+	g_base = r_base + size;
+	if (copy_from_user(g_base, (void __user *)(unsigned long)crtc_lut->green, size)) {
+		ret = -EFAULT;
+		goto out;
+	}
+
+	b_base = g_base + size;
+	if (copy_from_user(b_base, (void __user *)(unsigned long)crtc_lut->blue, size)) {
+		ret = -EFAULT;
+		goto out;
+	}
+
+	crtc->funcs->gamma_set(crtc, r_base, g_base, b_base, crtc->gamma_size);
+
+out:
+	mutex_unlock(&dev->mode_config.mutex);
+	return ret;
+
+}
+
+int drm_mode_gamma_get_ioctl(struct drm_device *dev,
+			     void *data, struct drm_file *file_priv)
+{
+	struct drm_mode_crtc_lut *crtc_lut = data;
+	struct drm_mode_object *obj;
+	struct drm_crtc *crtc;
+	void *r_base, *g_base, *b_base;
+	int size;
+	int ret = 0;
+
+	mutex_lock(&dev->mode_config.mutex);
+	obj = drm_mode_object_find(dev, crtc_lut->crtc_id, DRM_MODE_OBJECT_CRTC);
+	if (!obj) {
+		ret = -EINVAL;
+		goto out;
+	}
+	crtc = obj_to_crtc(obj);
+
+	/* memcpy into gamma store */
+	if (crtc_lut->gamma_size != crtc->gamma_size) {
+		ret = -EINVAL;
+		goto out;
+	}
+
+	size = crtc_lut->gamma_size * (sizeof(uint16_t));
+	r_base = crtc->gamma_store;
+	if (copy_to_user((void __user *)(unsigned long)crtc_lut->red, r_base, size)) {
+		ret = -EFAULT;
+		goto out;
+	}
+
+	g_base = r_base + size;
+	if (copy_to_user((void __user *)(unsigned long)crtc_lut->green, g_base, size)) {
+		ret = -EFAULT;
+		goto out;
+	}
+
+	b_base = g_base + size;
+	if (copy_to_user((void __user *)(unsigned long)crtc_lut->blue, b_base, size)) {
+		ret = -EFAULT;
+		goto out;
+	}
+out:
+	mutex_unlock(&dev->mode_config.mutex);
+	return ret;
+}
diff --git a/drivers/gpu/drm/drm_crtc_helper.c b/drivers/gpu/drm/drm_crtc_helper.c
new file mode 100644
index 000000000000..887ed33b0694
--- /dev/null
+++ b/drivers/gpu/drm/drm_crtc_helper.c
@@ -0,0 +1,822 @@
+/*
+ * Copyright (c) 2006-2008 Intel Corporation
+ * Copyright (c) 2007 Dave Airlie <airlied@linux.ie>
+ *
+ * DRM core CRTC related functions
+ *
+ * Permission to use, copy, modify, distribute, and sell this software and its
+ * documentation for any purpose is hereby granted without fee, provided that
+ * the above copyright notice appear in all copies and that both that copyright
+ * notice and this permission notice appear in supporting documentation, and
+ * that the name of the copyright holders not be used in advertising or
+ * publicity pertaining to distribution of the software without specific,
+ * written prior permission.  The copyright holders make no representations
+ * about the suitability of this software for any purpose.  It is provided "as
+ * is" without express or implied warranty.
+ *
+ * THE COPYRIGHT HOLDERS DISCLAIM ALL WARRANTIES WITH REGARD TO THIS SOFTWARE,
+ * INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO
+ * EVENT SHALL THE COPYRIGHT HOLDERS BE LIABLE FOR ANY SPECIAL, INDIRECT OR
+ * CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE,
+ * DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER
+ * TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE
+ * OF THIS SOFTWARE.
+ *
+ * Authors:
+ *      Keith Packard
+ *	Eric Anholt <eric@anholt.net>
+ *      Dave Airlie <airlied@linux.ie>
+ *      Jesse Barnes <jesse.barnes@intel.com>
+ */
+
+#include "drmP.h"
+#include "drm_crtc.h"
+#include "drm_crtc_helper.h"
+
+/*
+ * Detailed mode info for a standard 640x480@60Hz monitor
+ */
+static struct drm_display_mode std_mode[] = {
+	{ DRM_MODE("640x480", DRM_MODE_TYPE_DEFAULT, 25200, 640, 656,
+		   752, 800, 0, 480, 490, 492, 525, 0,
+		   DRM_MODE_FLAG_NHSYNC | DRM_MODE_FLAG_NVSYNC) },
+};
+
+/**
+ * drm_helper_probe_connector_modes - get complete set of display modes
+ * @dev: DRM device
+ * @maxX: max width for modes
+ * @maxY: max height for modes
+ *
+ * LOCKING:
+ * Caller must hold mode config lock.
+ *
+ * Based on @dev's mode_config layout, scan all the connectors and try to detect
+ * modes on them.  Modes will first be added to the connector's probed_modes
+ * list, then culled (based on validity and the @maxX, @maxY parameters) and
+ * put into the normal modes list.
+ *
+ * Intended to be used either at bootup time or when major configuration
+ * changes have occurred.
+ *
+ * FIXME: take into account monitor limits
+ */
+void drm_helper_probe_single_connector_modes(struct drm_connector *connector,
+					     uint32_t maxX, uint32_t maxY)
+{
+	struct drm_device *dev = connector->dev;
+	struct drm_display_mode *mode, *t;
+	struct drm_connector_helper_funcs *connector_funcs =
+		connector->helper_private;
+	int ret;
+
+	DRM_DEBUG("%s\n", drm_get_connector_name(connector));
+	/* set all modes to the unverified state */
+	list_for_each_entry_safe(mode, t, &connector->modes, head)
+		mode->status = MODE_UNVERIFIED;
+
+	connector->status = connector->funcs->detect(connector);
+
+	if (connector->status == connector_status_disconnected) {
+		DRM_DEBUG("%s is disconnected\n",
+			  drm_get_connector_name(connector));
+		/* TODO set EDID to NULL */
+		return;
+	}
+
+	ret = (*connector_funcs->get_modes)(connector);
+
+	if (ret) {
+		drm_mode_connector_list_update(connector);
+	}
+
+	if (maxX && maxY)
+		drm_mode_validate_size(dev, &connector->modes, maxX,
+				       maxY, 0);
+	list_for_each_entry_safe(mode, t, &connector->modes, head) {
+		if (mode->status == MODE_OK)
+			mode->status = connector_funcs->mode_valid(connector,
+								   mode);
+	}
+
+
+	drm_mode_prune_invalid(dev, &connector->modes, true);
+
+	if (list_empty(&connector->modes)) {
+		struct drm_display_mode *stdmode;
+
+		DRM_DEBUG("No valid modes on %s\n",
+			  drm_get_connector_name(connector));
+
+		/* Should we do this here ???
+		 * When no valid EDID modes are available we end up
+		 * here and bailed in the past, now we add a standard
+		 * 640x480@60Hz mode and carry on.
+		 */
+		stdmode = drm_mode_duplicate(dev, &std_mode[0]);
+		drm_mode_probed_add(connector, stdmode);
+		drm_mode_list_concat(&connector->probed_modes,
+				     &connector->modes);
+
+		DRM_DEBUG("Adding standard 640x480 @ 60Hz to %s\n",
+			  drm_get_connector_name(connector));
+	}
+
+	drm_mode_sort(&connector->modes);
+
+	DRM_DEBUG("Probed modes for %s\n", drm_get_connector_name(connector));
+	list_for_each_entry_safe(mode, t, &connector->modes, head) {
+		mode->vrefresh = drm_mode_vrefresh(mode);
+
+		drm_mode_set_crtcinfo(mode, CRTC_INTERLACE_HALVE_V);
+		drm_mode_debug_printmodeline(mode);
+	}
+}
+EXPORT_SYMBOL(drm_helper_probe_single_connector_modes);
+
+void drm_helper_probe_connector_modes(struct drm_device *dev, uint32_t maxX,
+				      uint32_t maxY)
+{
+	struct drm_connector *connector;
+
+	list_for_each_entry(connector, &dev->mode_config.connector_list, head) {
+		drm_helper_probe_single_connector_modes(connector, maxX, maxY);
+	}
+}
+EXPORT_SYMBOL(drm_helper_probe_connector_modes);
+
+
+/**
+ * drm_helper_crtc_in_use - check if a given CRTC is in a mode_config
+ * @crtc: CRTC to check
+ *
+ * LOCKING:
+ * Caller must hold mode config lock.
+ *
+ * Walk @crtc's DRM device's mode_config and see if it's in use.
+ *
+ * RETURNS:
+ * True if @crtc is part of the mode_config, false otherwise.
+ */
+bool drm_helper_crtc_in_use(struct drm_crtc *crtc)
+{
+	struct drm_encoder *encoder;
+	struct drm_device *dev = crtc->dev;
+	/* FIXME: Locking around list access? */
+	list_for_each_entry(encoder, &dev->mode_config.encoder_list, head)
+		if (encoder->crtc == crtc)
+			return true;
+	return false;
+}
+EXPORT_SYMBOL(drm_helper_crtc_in_use);
+
+/**
+ * drm_disable_unused_functions - disable unused objects
+ * @dev: DRM device
+ *
+ * LOCKING:
+ * Caller must hold mode config lock.
+ *
+ * If an connector or CRTC isn't part of @dev's mode_config, it can be disabled
+ * by calling its dpms function, which should power it off.
+ */
+void drm_helper_disable_unused_functions(struct drm_device *dev)
+{
+	struct drm_encoder *encoder;
+	struct drm_encoder_helper_funcs *encoder_funcs;
+	struct drm_crtc *crtc;
+
+	list_for_each_entry(encoder, &dev->mode_config.encoder_list, head) {
+		encoder_funcs = encoder->helper_private;
+		if (!encoder->crtc)
+			(*encoder_funcs->dpms)(encoder, DRM_MODE_DPMS_OFF);
+	}
+
+	list_for_each_entry(crtc, &dev->mode_config.crtc_list, head) {
+		struct drm_crtc_helper_funcs *crtc_funcs = crtc->helper_private;
+		crtc->enabled = drm_helper_crtc_in_use(crtc);
+		if (!crtc->enabled) {
+			crtc_funcs->dpms(crtc, DRM_MODE_DPMS_OFF);
+			crtc->fb = NULL;
+		}
+	}
+}
+EXPORT_SYMBOL(drm_helper_disable_unused_functions);
+
+static struct drm_display_mode *drm_has_preferred_mode(struct drm_connector *connector, int width, int height)
+{
+	struct drm_display_mode *mode;
+
+	list_for_each_entry(mode, &connector->modes, head) {
+		if (drm_mode_width(mode) > width ||
+		    drm_mode_height(mode) > height)
+			continue;
+		if (mode->type & DRM_MODE_TYPE_PREFERRED)
+			return mode;
+	}
+	return NULL;
+}
+
+static bool drm_connector_enabled(struct drm_connector *connector, bool strict)
+{
+	bool enable;
+
+	if (strict) {
+		enable = connector->status == connector_status_connected;
+	} else {
+		enable = connector->status != connector_status_disconnected;
+	}
+	return enable;
+}
+
+static void drm_enable_connectors(struct drm_device *dev, bool *enabled)
+{
+	bool any_enabled = false;
+	struct drm_connector *connector;
+	int i = 0;
+
+	list_for_each_entry(connector, &dev->mode_config.connector_list, head) {
+		enabled[i] = drm_connector_enabled(connector, true);
+		any_enabled |= enabled[i];
+		i++;
+	}
+
+	if (any_enabled)
+		return;
+
+	i = 0;
+	list_for_each_entry(connector, &dev->mode_config.connector_list, head) {
+		enabled[i] = drm_connector_enabled(connector, false);
+		i++;
+	}
+}
+
+static bool drm_target_preferred(struct drm_device *dev,
+				 struct drm_display_mode **modes,
+				 bool *enabled, int width, int height)
+{
+	struct drm_connector *connector;
+	int i = 0;
+
+	list_for_each_entry(connector, &dev->mode_config.connector_list, head) {
+
+		if (enabled[i] == false) {
+			i++;
+			continue;
+		}
+
+		modes[i] = drm_has_preferred_mode(connector, width, height);
+		if (!modes[i]) {
+			list_for_each_entry(modes[i], &connector->modes, head)
+				break;
+		}
+		i++;
+	}
+	return true;
+}
+
+static int drm_pick_crtcs(struct drm_device *dev,
+			  struct drm_crtc **best_crtcs,
+			  struct drm_display_mode **modes,
+			  int n, int width, int height)
+{
+	int c, o;
+	struct drm_connector *connector;
+	struct drm_connector_helper_funcs *connector_funcs;
+	struct drm_encoder *encoder;
+	struct drm_crtc *best_crtc;
+	int my_score, best_score, score;
+	struct drm_crtc **crtcs, *crtc;
+
+	if (n == dev->mode_config.num_connector)
+		return 0;
+	c = 0;
+	list_for_each_entry(connector, &dev->mode_config.connector_list, head) {
+		if (c == n)
+			break;
+		c++;
+	}
+
+	best_crtcs[n] = NULL;
+	best_crtc = NULL;
+	best_score = drm_pick_crtcs(dev, best_crtcs, modes, n+1, width, height);
+	if (modes[n] == NULL)
+		return best_score;
+
+	crtcs = kmalloc(dev->mode_config.num_connector *
+			sizeof(struct drm_crtc *), GFP_KERNEL);
+	if (!crtcs)
+		return best_score;
+
+	my_score = 1;
+	if (connector->status == connector_status_connected)
+		my_score++;
+	if (drm_has_preferred_mode(connector, width, height))
+		my_score++;
+
+	connector_funcs = connector->helper_private;
+	encoder = connector_funcs->best_encoder(connector);
+	if (!encoder)
+		goto out;
+
+	connector->encoder = encoder;
+
+	/* select a crtc for this connector and then attempt to configure
+	   remaining connectors */
+	c = 0;
+	list_for_each_entry(crtc, &dev->mode_config.crtc_list, head) {
+
+		if ((connector->encoder->possible_crtcs & (1 << c)) == 0) {
+			c++;
+			continue;
+		}
+
+		for (o = 0; o < n; o++)
+			if (best_crtcs[o] == crtc)
+				break;
+
+		if (o < n) {
+			/* ignore cloning for now */
+			c++;
+			continue;
+		}
+
+		crtcs[n] = crtc;
+		memcpy(crtcs, best_crtcs, n * sizeof(struct drm_crtc *));
+		score = my_score + drm_pick_crtcs(dev, crtcs, modes, n + 1,
+						  width, height);
+		if (score > best_score) {
+			best_crtc = crtc;
+			best_score = score;
+			memcpy(best_crtcs, crtcs,
+			       dev->mode_config.num_connector *
+			       sizeof(struct drm_crtc *));
+		}
+		c++;
+	}
+out:
+	kfree(crtcs);
+	return best_score;
+}
+
+static void drm_setup_crtcs(struct drm_device *dev)
+{
+	struct drm_crtc **crtcs;
+	struct drm_display_mode **modes;
+	struct drm_encoder *encoder;
+	struct drm_connector *connector;
+	bool *enabled;
+	int width, height;
+	int i, ret;
+
+	width = dev->mode_config.max_width;
+	height = dev->mode_config.max_height;
+
+	/* clean out all the encoder/crtc combos */
+	list_for_each_entry(encoder, &dev->mode_config.encoder_list, head) {
+		encoder->crtc = NULL;
+	}
+
+	crtcs = kcalloc(dev->mode_config.num_connector,
+			sizeof(struct drm_crtc *), GFP_KERNEL);
+	modes = kcalloc(dev->mode_config.num_connector,
+			sizeof(struct drm_display_mode *), GFP_KERNEL);
+	enabled = kcalloc(dev->mode_config.num_connector,
+			  sizeof(bool), GFP_KERNEL);
+
+	drm_enable_connectors(dev, enabled);
+
+	ret = drm_target_preferred(dev, modes, enabled, width, height);
+	if (!ret)
+		DRM_ERROR("Unable to find initial modes\n");
+
+	drm_pick_crtcs(dev, crtcs, modes, 0, width, height);
+
+	i = 0;
+	list_for_each_entry(connector, &dev->mode_config.connector_list, head) {
+		struct drm_display_mode *mode = modes[i];
+		struct drm_crtc *crtc = crtcs[i];
+
+		if (connector->encoder == NULL) {
+			i++;
+			continue;
+		}
+
+		if (mode && crtc) {
+			crtc->desired_mode = mode;
+			connector->encoder->crtc = crtc;
+		} else
+			connector->encoder->crtc = NULL;
+		i++;
+	}
+
+	kfree(crtcs);
+	kfree(modes);
+	kfree(enabled);
+}
+/**
+ * drm_crtc_set_mode - set a mode
+ * @crtc: CRTC to program
+ * @mode: mode to use
+ * @x: width of mode
+ * @y: height of mode
+ *
+ * LOCKING:
+ * Caller must hold mode config lock.
+ *
+ * Try to set @mode on @crtc.  Give @crtc and its associated connectors a chance
+ * to fixup or reject the mode prior to trying to set it.
+ *
+ * RETURNS:
+ * True if the mode was set successfully, or false otherwise.
+ */
+bool drm_crtc_helper_set_mode(struct drm_crtc *crtc,
+			      struct drm_display_mode *mode,
+			      int x, int y)
+{
+	struct drm_device *dev = crtc->dev;
+	struct drm_display_mode *adjusted_mode, saved_mode;
+	struct drm_crtc_helper_funcs *crtc_funcs = crtc->helper_private;
+	struct drm_encoder_helper_funcs *encoder_funcs;
+	int saved_x, saved_y;
+	struct drm_encoder *encoder;
+	bool ret = true;
+
+	adjusted_mode = drm_mode_duplicate(dev, mode);
+
+	crtc->enabled = drm_helper_crtc_in_use(crtc);
+
+	if (!crtc->enabled)
+		return true;
+
+	saved_mode = crtc->mode;
+	saved_x = crtc->x;
+	saved_y = crtc->y;
+
+	/* Update crtc values up front so the driver can rely on them for mode
+	 * setting.
+	 */
+	crtc->mode = *mode;
+	crtc->x = x;
+	crtc->y = y;
+
+	if (drm_mode_equal(&saved_mode, &crtc->mode)) {
+		if (saved_x != crtc->x || saved_y != crtc->y) {
+			crtc_funcs->mode_set_base(crtc, crtc->x, crtc->y);
+			goto done;
+		}
+	}
+
+	/* Pass our mode to the connectors and the CRTC to give them a chance to
+	 * adjust it according to limitations or connector properties, and also
+	 * a chance to reject the mode entirely.
+	 */
+	list_for_each_entry(encoder, &dev->mode_config.encoder_list, head) {
+
+		if (encoder->crtc != crtc)
+			continue;
+		encoder_funcs = encoder->helper_private;
+		if (!(ret = encoder_funcs->mode_fixup(encoder, mode,
+						      adjusted_mode))) {
+			goto done;
+		}
+	}
+
+	if (!(ret = crtc_funcs->mode_fixup(crtc, mode, adjusted_mode))) {
+		goto done;
+	}
+
+	/* Prepare the encoders and CRTCs before setting the mode. */
+	list_for_each_entry(encoder, &dev->mode_config.encoder_list, head) {
+
+		if (encoder->crtc != crtc)
+			continue;
+		encoder_funcs = encoder->helper_private;
+		/* Disable the encoders as the first thing we do. */
+		encoder_funcs->prepare(encoder);
+	}
+
+	crtc_funcs->prepare(crtc);
+
+	/* Set up the DPLL and any encoders state that needs to adjust or depend
+	 * on the DPLL.
+	 */
+	crtc_funcs->mode_set(crtc, mode, adjusted_mode, x, y);
+
+	list_for_each_entry(encoder, &dev->mode_config.encoder_list, head) {
+
+		if (encoder->crtc != crtc)
+			continue;
+
+		DRM_INFO("%s: set mode %s %x\n", drm_get_encoder_name(encoder),
+			 mode->name, mode->base.id);
+		encoder_funcs = encoder->helper_private;
+		encoder_funcs->mode_set(encoder, mode, adjusted_mode);
+	}
+
+	/* Now enable the clocks, plane, pipe, and connectors that we set up. */
+	crtc_funcs->commit(crtc);
+
+	list_for_each_entry(encoder, &dev->mode_config.encoder_list, head) {
+
+		if (encoder->crtc != crtc)
+			continue;
+
+		encoder_funcs = encoder->helper_private;
+		encoder_funcs->commit(encoder);
+
+	}
+
+	/* XXX free adjustedmode */
+	drm_mode_destroy(dev, adjusted_mode);
+	/* FIXME: add subpixel order */
+done:
+	if (!ret) {
+		crtc->mode = saved_mode;
+		crtc->x = saved_x;
+		crtc->y = saved_y;
+	}
+
+	return ret;
+}
+EXPORT_SYMBOL(drm_crtc_helper_set_mode);
+
+
+/**
+ * drm_crtc_helper_set_config - set a new config from userspace
+ * @crtc: CRTC to setup
+ * @crtc_info: user provided configuration
+ * @new_mode: new mode to set
+ * @connector_set: set of connectors for the new config
+ * @fb: new framebuffer
+ *
+ * LOCKING:
+ * Caller must hold mode config lock.
+ *
+ * Setup a new configuration, provided by the user in @crtc_info, and enable
+ * it.
+ *
+ * RETURNS:
+ * Zero. (FIXME)
+ */
+int drm_crtc_helper_set_config(struct drm_mode_set *set)
+{
+	struct drm_device *dev;
+	struct drm_crtc **save_crtcs, *new_crtc;
+	struct drm_encoder **save_encoders, *new_encoder;
+	bool save_enabled;
+	bool changed = false;
+	bool flip_or_move = false;
+	struct drm_connector *connector;
+	int count = 0, ro, fail = 0;
+	struct drm_crtc_helper_funcs *crtc_funcs;
+	int ret = 0;
+
+	DRM_DEBUG("\n");
+
+	if (!set)
+		return -EINVAL;
+
+	if (!set->crtc)
+		return -EINVAL;
+
+	if (!set->crtc->helper_private)
+		return -EINVAL;
+
+	crtc_funcs = set->crtc->helper_private;
+
+	DRM_DEBUG("crtc: %p %d fb: %p connectors: %p num_connectors: %d (x, y) (%i, %i)\n",
+		  set->crtc, set->crtc->base.id, set->fb, set->connectors,
+		  (int)set->num_connectors, set->x, set->y);
+
+	dev = set->crtc->dev;
+
+	/* save previous config */
+	save_enabled = set->crtc->enabled;
+
+	/* this is meant to be num_connector not num_crtc */
+	save_crtcs = kzalloc(dev->mode_config.num_connector *
+			     sizeof(struct drm_crtc *), GFP_KERNEL);
+	if (!save_crtcs)
+		return -ENOMEM;
+
+	save_encoders = kzalloc(dev->mode_config.num_connector *
+				sizeof(struct drm_encoders *), GFP_KERNEL);
+	if (!save_encoders) {
+		kfree(save_crtcs);
+		return -ENOMEM;
+	}
+
+	/* We should be able to check here if the fb has the same properties
+	 * and then just flip_or_move it */
+	if (set->crtc->fb != set->fb) {
+		/* if we have no fb then its a change not a flip */
+		if (set->crtc->fb == NULL)
+			changed = true;
+		else
+			flip_or_move = true;
+	}
+
+	if (set->x != set->crtc->x || set->y != set->crtc->y)
+		flip_or_move = true;
+
+	if (set->mode && !drm_mode_equal(set->mode, &set->crtc->mode)) {
+		DRM_DEBUG("modes are different\n");
+		drm_mode_debug_printmodeline(&set->crtc->mode);
+		drm_mode_debug_printmodeline(set->mode);
+		changed = true;
+	}
+
+	/* a) traverse passed in connector list and get encoders for them */
+	count = 0;
+	list_for_each_entry(connector, &dev->mode_config.connector_list, head) {
+		struct drm_connector_helper_funcs *connector_funcs =
+			connector->helper_private;
+		save_encoders[count++] = connector->encoder;
+		new_encoder = connector->encoder;
+		for (ro = 0; ro < set->num_connectors; ro++) {
+			if (set->connectors[ro] == connector) {
+				new_encoder = connector_funcs->best_encoder(connector);
+				/* if we can't get an encoder for a connector
+				   we are setting now - then fail */
+				if (new_encoder == NULL)
+					/* don't break so fail path works correct */
+					fail = 1;
+				break;
+			}
+		}
+
+		if (new_encoder != connector->encoder) {
+			changed = true;
+			connector->encoder = new_encoder;
+		}
+	}
+
+	if (fail) {
+		ret = -EINVAL;
+		goto fail_no_encoder;
+	}
+
+	count = 0;
+	list_for_each_entry(connector, &dev->mode_config.connector_list, head) {
+		if (!connector->encoder)
+			continue;
+
+		save_crtcs[count++] = connector->encoder->crtc;
+
+		if (connector->encoder->crtc == set->crtc)
+			new_crtc = NULL;
+		else
+			new_crtc = connector->encoder->crtc;
+
+		for (ro = 0; ro < set->num_connectors; ro++) {
+			if (set->connectors[ro] == connector)
+				new_crtc = set->crtc;
+		}
+		if (new_crtc != connector->encoder->crtc) {
+			changed = true;
+			connector->encoder->crtc = new_crtc;
+		}
+	}
+
+	/* mode_set_base is not a required function */
+	if (flip_or_move && !crtc_funcs->mode_set_base)
+		changed = true;
+
+	if (changed) {
+		set->crtc->fb = set->fb;
+		set->crtc->enabled = (set->mode != NULL);
+		if (set->mode != NULL) {
+			DRM_DEBUG("attempting to set mode from userspace\n");
+			drm_mode_debug_printmodeline(set->mode);
+			if (!drm_crtc_helper_set_mode(set->crtc, set->mode,
+						      set->x, set->y)) {
+				ret = -EINVAL;
+				goto fail_set_mode;
+			}
+			/* TODO are these needed? */
+			set->crtc->desired_x = set->x;
+			set->crtc->desired_y = set->y;
+			set->crtc->desired_mode = set->mode;
+		}
+		drm_helper_disable_unused_functions(dev);
+	} else if (flip_or_move) {
+		if (set->crtc->fb != set->fb)
+			set->crtc->fb = set->fb;
+		crtc_funcs->mode_set_base(set->crtc, set->x, set->y);
+	}
+
+	kfree(save_encoders);
+	kfree(save_crtcs);
+	return 0;
+
+fail_set_mode:
+	set->crtc->enabled = save_enabled;
+	count = 0;
+	list_for_each_entry(connector, &dev->mode_config.connector_list, head)
+		connector->encoder->crtc = save_crtcs[count++];
+fail_no_encoder:
+	kfree(save_crtcs);
+	count = 0;
+	list_for_each_entry(connector, &dev->mode_config.connector_list, head) {
+		connector->encoder = save_encoders[count++];
+	}
+	kfree(save_encoders);
+	return ret;
+}
+EXPORT_SYMBOL(drm_crtc_helper_set_config);
+
+bool drm_helper_plugged_event(struct drm_device *dev)
+{
+	DRM_DEBUG("\n");
+
+	drm_helper_probe_connector_modes(dev, dev->mode_config.max_width,
+					 dev->mode_config.max_height);
+
+	drm_setup_crtcs(dev);
+
+	/* alert the driver fb layer */
+	dev->mode_config.funcs->fb_changed(dev);
+
+	/* FIXME: send hotplug event */
+	return true;
+}
+/**
+ * drm_initial_config - setup a sane initial connector configuration
+ * @dev: DRM device
+ * @can_grow: this configuration is growable
+ *
+ * LOCKING:
+ * Called at init time, must take mode config lock.
+ *
+ * Scan the CRTCs and connectors and try to put together an initial setup.
+ * At the moment, this is a cloned configuration across all heads with
+ * a new framebuffer object as the backing store.
+ *
+ * RETURNS:
+ * Zero if everything went ok, nonzero otherwise.
+ */
+bool drm_helper_initial_config(struct drm_device *dev, bool can_grow)
+{
+	int ret = false;
+
+	drm_helper_plugged_event(dev);
+	return ret;
+}
+EXPORT_SYMBOL(drm_helper_initial_config);
+
+/**
+ * drm_hotplug_stage_two
+ * @dev DRM device
+ * @connector hotpluged connector
+ *
+ * LOCKING.
+ * Caller must hold mode config lock, function might grab struct lock.
+ *
+ * Stage two of a hotplug.
+ *
+ * RETURNS:
+ * Zero on success, errno on failure.
+ */
+int drm_helper_hotplug_stage_two(struct drm_device *dev)
+{
+	dev->mode_config.hotplug_counter++;
+
+	drm_helper_plugged_event(dev);
+
+	return 0;
+}
+EXPORT_SYMBOL(drm_helper_hotplug_stage_two);
+
+int drm_helper_mode_fill_fb_struct(struct drm_framebuffer *fb,
+				   struct drm_mode_fb_cmd *mode_cmd)
+{
+	fb->width = mode_cmd->width;
+	fb->height = mode_cmd->height;
+	fb->pitch = mode_cmd->pitch;
+	fb->bits_per_pixel = mode_cmd->bpp;
+	fb->depth = mode_cmd->depth;
+
+	return 0;
+}
+EXPORT_SYMBOL(drm_helper_mode_fill_fb_struct);
+
+int drm_helper_resume_force_mode(struct drm_device *dev)
+{
+	struct drm_crtc *crtc;
+	int ret;
+
+	list_for_each_entry(crtc, &dev->mode_config.crtc_list, head) {
+
+		if (!crtc->enabled)
+			continue;
+
+		ret = drm_crtc_helper_set_mode(crtc, &crtc->mode, crtc->x,
+					       crtc->y);
+
+		if (ret == false)
+			DRM_ERROR("failed to set mode on crtc %p\n", crtc);
+	}
+	return 0;
+}
+EXPORT_SYMBOL(drm_helper_resume_force_mode);
diff --git a/drivers/gpu/drm/drm_drv.c b/drivers/gpu/drm/drm_drv.c
index 98a781375f60..0b9f3164a3b2 100644
--- a/drivers/gpu/drm/drm_drv.c
+++ b/drivers/gpu/drm/drm_drv.c
@@ -126,6 +126,26 @@ static struct drm_ioctl_desc drm_ioctls[] = {
 	DRM_IOCTL_DEF(DRM_IOCTL_GEM_CLOSE, drm_gem_close_ioctl, 0),
 	DRM_IOCTL_DEF(DRM_IOCTL_GEM_FLINK, drm_gem_flink_ioctl, DRM_AUTH),
 	DRM_IOCTL_DEF(DRM_IOCTL_GEM_OPEN, drm_gem_open_ioctl, DRM_AUTH),
+
+	DRM_IOCTL_DEF(DRM_IOCTL_MODE_GETRESOURCES, drm_mode_getresources, DRM_MASTER|DRM_CONTROL_ALLOW),
+	DRM_IOCTL_DEF(DRM_IOCTL_MODE_GETCRTC, drm_mode_getcrtc, DRM_MASTER|DRM_CONTROL_ALLOW),
+	DRM_IOCTL_DEF(DRM_IOCTL_MODE_GETCONNECTOR, drm_mode_getconnector, DRM_MASTER|DRM_CONTROL_ALLOW),
+	DRM_IOCTL_DEF(DRM_IOCTL_MODE_SETCRTC, drm_mode_setcrtc, DRM_MASTER|DRM_CONTROL_ALLOW),
+	DRM_IOCTL_DEF(DRM_IOCTL_MODE_CURSOR, drm_mode_cursor_ioctl, DRM_MASTER|DRM_CONTROL_ALLOW),
+	DRM_IOCTL_DEF(DRM_IOCTL_MODE_ADDFB, drm_mode_addfb, DRM_MASTER|DRM_CONTROL_ALLOW),
+	DRM_IOCTL_DEF(DRM_IOCTL_MODE_RMFB, drm_mode_rmfb, DRM_MASTER|DRM_CONTROL_ALLOW),
+	DRM_IOCTL_DEF(DRM_IOCTL_MODE_GETFB, drm_mode_getfb, DRM_MASTER|DRM_CONTROL_ALLOW),
+
+	DRM_IOCTL_DEF(DRM_IOCTL_MODE_SETPROPERTY, drm_mode_connector_property_set_ioctl, DRM_MASTER|DRM_CONTROL_ALLOW),
+	DRM_IOCTL_DEF(DRM_IOCTL_MODE_GETPROPBLOB, drm_mode_getblob_ioctl, DRM_MASTER|DRM_CONTROL_ALLOW),
+	DRM_IOCTL_DEF(DRM_IOCTL_MODE_ATTACHMODE, drm_mode_attachmode_ioctl, DRM_MASTER|DRM_CONTROL_ALLOW),
+	DRM_IOCTL_DEF(DRM_IOCTL_MODE_DETACHMODE, drm_mode_detachmode_ioctl, DRM_MASTER|DRM_CONTROL_ALLOW),
+	DRM_IOCTL_DEF(DRM_IOCTL_MODE_GETPROPERTY, drm_mode_getproperty_ioctl, DRM_MASTER | DRM_CONTROL_ALLOW),
+
+	DRM_IOCTL_DEF(DRM_IOCTL_MODE_REPLACEFB, drm_mode_replacefb, DRM_MASTER|DRM_ROOT_ONLY|DRM_CONTROL_ALLOW),
+	DRM_IOCTL_DEF(DRM_IOCTL_MODE_GETENCODER, drm_mode_getencoder, DRM_MASTER|DRM_CONTROL_ALLOW),
+	DRM_IOCTL_DEF(DRM_IOCTL_MODE_GETGAMMA, drm_mode_gamma_get_ioctl, DRM_MASTER),
+	DRM_IOCTL_DEF(DRM_IOCTL_MODE_SETGAMMA, drm_mode_gamma_set_ioctl, DRM_MASTER),
 };
 
 #define DRM_CORE_IOCTL_COUNT	ARRAY_SIZE( drm_ioctls )
@@ -150,7 +170,7 @@ int drm_lastclose(struct drm_device * dev)
 		dev->driver->lastclose(dev);
 	DRM_DEBUG("driver lastclose completed\n");
 
-	if (dev->irq_enabled)
+	if (dev->irq_enabled && !drm_core_check_feature(dev, DRIVER_MODESET))
 		drm_irq_uninstall(dev);
 
 	mutex_lock(&dev->struct_mutex);
@@ -160,7 +180,8 @@ int drm_lastclose(struct drm_device * dev)
 	del_timer(&dev->timer);
 
 	/* Clear AGP information */
-	if (drm_core_has_AGP(dev) && dev->agp) {
+	if (drm_core_has_AGP(dev) && dev->agp &&
+	    !drm_core_check_feature(dev, DRIVER_MODESET)) {
 		struct drm_agp_mem *entry, *tempe;
 
 		/* Remove AGP resources, but leave dev->agp
@@ -179,7 +200,8 @@ int drm_lastclose(struct drm_device * dev)
 		dev->agp->acquired = 0;
 		dev->agp->enabled = 0;
 	}
-	if (drm_core_check_feature(dev, DRIVER_SG) && dev->sg) {
+	if (drm_core_check_feature(dev, DRIVER_SG) && dev->sg &&
+	    !drm_core_check_feature(dev, DRIVER_MODESET)) {
 		drm_sg_cleanup(dev->sg);
 		dev->sg = NULL;
 	}
@@ -206,7 +228,8 @@ int drm_lastclose(struct drm_device * dev)
 	}
 	dev->queue_count = 0;
 
-	if (drm_core_check_feature(dev, DRIVER_HAVE_DMA))
+	if (drm_core_check_feature(dev, DRIVER_HAVE_DMA) &&
+	    !drm_core_check_feature(dev, DRIVER_MODESET))
 		drm_dma_takedown(dev);
 
 	dev->dev_mapping = NULL;
@@ -307,6 +330,9 @@ static void drm_cleanup(struct drm_device * dev)
 	drm_ht_remove(&dev->map_hash);
 	drm_ctxbitmap_cleanup(dev);
 
+	if (drm_core_check_feature(dev, DRIVER_MODESET))
+		drm_put_minor(&dev->control);
+
 	if (driver->driver_features & DRIVER_GEM)
 		drm_gem_destroy(dev);
 
diff --git a/drivers/gpu/drm/drm_edid.c b/drivers/gpu/drm/drm_edid.c
new file mode 100644
index 000000000000..681753e57dc7
--- /dev/null
+++ b/drivers/gpu/drm/drm_edid.c
@@ -0,0 +1,732 @@
+/*
+ * Copyright (c) 2006 Luc Verhaegen (quirks list)
+ * Copyright (c) 2007-2008 Intel Corporation
+ *   Jesse Barnes <jesse.barnes@intel.com>
+ *
+ * DDC probing routines (drm_ddc_read & drm_do_probe_ddc_edid) originally from
+ * FB layer.
+ *   Copyright (C) 2006 Dennis Munsie <dmunsie@cecropia.com>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sub license,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+#include <linux/kernel.h>
+#include <linux/i2c.h>
+#include <linux/i2c-algo-bit.h>
+#include "drmP.h"
+#include "drm_edid.h"
+
+/*
+ * TODO:
+ *   - support EDID 1.4 (incl. CE blocks)
+ */
+
+/*
+ * EDID blocks out in the wild have a variety of bugs, try to collect
+ * them here (note that userspace may work around broken monitors first,
+ * but fixes should make their way here so that the kernel "just works"
+ * on as many displays as possible).
+ */
+
+/* First detailed mode wrong, use largest 60Hz mode */
+#define EDID_QUIRK_PREFER_LARGE_60		(1 << 0)
+/* Reported 135MHz pixel clock is too high, needs adjustment */
+#define EDID_QUIRK_135_CLOCK_TOO_HIGH		(1 << 1)
+/* Prefer the largest mode at 75 Hz */
+#define EDID_QUIRK_PREFER_LARGE_75		(1 << 2)
+/* Detail timing is in cm not mm */
+#define EDID_QUIRK_DETAILED_IN_CM		(1 << 3)
+/* Detailed timing descriptors have bogus size values, so just take the
+ * maximum size and use that.
+ */
+#define EDID_QUIRK_DETAILED_USE_MAXIMUM_SIZE	(1 << 4)
+/* Monitor forgot to set the first detailed is preferred bit. */
+#define EDID_QUIRK_FIRST_DETAILED_PREFERRED	(1 << 5)
+/* use +hsync +vsync for detailed mode */
+#define EDID_QUIRK_DETAILED_SYNC_PP		(1 << 6)
+
+static struct edid_quirk {
+	char *vendor;
+	int product_id;
+	u32 quirks;
+} edid_quirk_list[] = {
+	/* Acer AL1706 */
+	{ "ACR", 44358, EDID_QUIRK_PREFER_LARGE_60 },
+	/* Acer F51 */
+	{ "API", 0x7602, EDID_QUIRK_PREFER_LARGE_60 },
+	/* Unknown Acer */
+	{ "ACR", 2423, EDID_QUIRK_FIRST_DETAILED_PREFERRED },
+
+	/* Belinea 10 15 55 */
+	{ "MAX", 1516, EDID_QUIRK_PREFER_LARGE_60 },
+	{ "MAX", 0x77e, EDID_QUIRK_PREFER_LARGE_60 },
+
+	/* Envision Peripherals, Inc. EN-7100e */
+	{ "EPI", 59264, EDID_QUIRK_135_CLOCK_TOO_HIGH },
+
+	/* Funai Electronics PM36B */
+	{ "FCM", 13600, EDID_QUIRK_PREFER_LARGE_75 |
+	  EDID_QUIRK_DETAILED_IN_CM },
+
+	/* LG Philips LCD LP154W01-A5 */
+	{ "LPL", 0, EDID_QUIRK_DETAILED_USE_MAXIMUM_SIZE },
+	{ "LPL", 0x2a00, EDID_QUIRK_DETAILED_USE_MAXIMUM_SIZE },
+
+	/* Philips 107p5 CRT */
+	{ "PHL", 57364, EDID_QUIRK_FIRST_DETAILED_PREFERRED },
+
+	/* Proview AY765C */
+	{ "PTS", 765, EDID_QUIRK_FIRST_DETAILED_PREFERRED },
+
+	/* Samsung SyncMaster 205BW.  Note: irony */
+	{ "SAM", 541, EDID_QUIRK_DETAILED_SYNC_PP },
+	/* Samsung SyncMaster 22[5-6]BW */
+	{ "SAM", 596, EDID_QUIRK_PREFER_LARGE_60 },
+	{ "SAM", 638, EDID_QUIRK_PREFER_LARGE_60 },
+};
+
+
+/* Valid EDID header has these bytes */
+static u8 edid_header[] = { 0x00, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x00 };
+
+/**
+ * edid_is_valid - sanity check EDID data
+ * @edid: EDID data
+ *
+ * Sanity check the EDID block by looking at the header, the version number
+ * and the checksum.  Return 0 if the EDID doesn't check out, or 1 if it's
+ * valid.
+ */
+static bool edid_is_valid(struct edid *edid)
+{
+	int i;
+	u8 csum = 0;
+	u8 *raw_edid = (u8 *)edid;
+
+	if (memcmp(edid->header, edid_header, sizeof(edid_header)))
+		goto bad;
+	if (edid->version != 1) {
+		DRM_ERROR("EDID has major version %d, instead of 1\n", edid->version);
+		goto bad;
+	}
+	if (edid->revision <= 0 || edid->revision > 3) {
+		DRM_ERROR("EDID has minor version %d, which is not between 0-3\n", edid->revision);
+		goto bad;
+	}
+
+	for (i = 0; i < EDID_LENGTH; i++)
+		csum += raw_edid[i];
+	if (csum) {
+		DRM_ERROR("EDID checksum is invalid, remainder is %d\n", csum);
+		goto bad;
+	}
+
+	return 1;
+
+bad:
+	if (raw_edid) {
+		DRM_ERROR("Raw EDID:\n");
+		print_hex_dump_bytes(KERN_ERR, DUMP_PREFIX_NONE, raw_edid, EDID_LENGTH);
+		printk("\n");
+	}
+	return 0;
+}
+
+/**
+ * edid_vendor - match a string against EDID's obfuscated vendor field
+ * @edid: EDID to match
+ * @vendor: vendor string
+ *
+ * Returns true if @vendor is in @edid, false otherwise
+ */
+static bool edid_vendor(struct edid *edid, char *vendor)
+{
+	char edid_vendor[3];
+
+	edid_vendor[0] = ((edid->mfg_id[0] & 0x7c) >> 2) + '@';
+	edid_vendor[1] = (((edid->mfg_id[0] & 0x3) << 3) |
+			  ((edid->mfg_id[1] & 0xe0) >> 5)) + '@';
+	edid_vendor[2] = (edid->mfg_id[2] & 0x1f) + '@';
+
+	return !strncmp(edid_vendor, vendor, 3);
+}
+
+/**
+ * edid_get_quirks - return quirk flags for a given EDID
+ * @edid: EDID to process
+ *
+ * This tells subsequent routines what fixes they need to apply.
+ */
+static u32 edid_get_quirks(struct edid *edid)
+{
+	struct edid_quirk *quirk;
+	int i;
+
+	for (i = 0; i < ARRAY_SIZE(edid_quirk_list); i++) {
+		quirk = &edid_quirk_list[i];
+
+		if (edid_vendor(edid, quirk->vendor) &&
+		    (EDID_PRODUCT_ID(edid) == quirk->product_id))
+			return quirk->quirks;
+	}
+
+	return 0;
+}
+
+#define MODE_SIZE(m) ((m)->hdisplay * (m)->vdisplay)
+#define MODE_REFRESH_DIFF(m,r) (abs((m)->vrefresh - target_refresh))
+
+
+/**
+ * edid_fixup_preferred - set preferred modes based on quirk list
+ * @connector: has mode list to fix up
+ * @quirks: quirks list
+ *
+ * Walk the mode list for @connector, clearing the preferred status
+ * on existing modes and setting it anew for the right mode ala @quirks.
+ */
+static void edid_fixup_preferred(struct drm_connector *connector,
+				 u32 quirks)
+{
+	struct drm_display_mode *t, *cur_mode, *preferred_mode;
+	int target_refresh;
+
+	if (list_empty(&connector->probed_modes))
+		return;
+
+	if (quirks & EDID_QUIRK_PREFER_LARGE_60)
+		target_refresh = 60;
+	if (quirks & EDID_QUIRK_PREFER_LARGE_75)
+		target_refresh = 75;
+
+	preferred_mode = list_first_entry(&connector->probed_modes,
+					  struct drm_display_mode, head);
+
+	list_for_each_entry_safe(cur_mode, t, &connector->probed_modes, head) {
+		cur_mode->type &= ~DRM_MODE_TYPE_PREFERRED;
+
+		if (cur_mode == preferred_mode)
+			continue;
+
+		/* Largest mode is preferred */
+		if (MODE_SIZE(cur_mode) > MODE_SIZE(preferred_mode))
+			preferred_mode = cur_mode;
+
+		/* At a given size, try to get closest to target refresh */
+		if ((MODE_SIZE(cur_mode) == MODE_SIZE(preferred_mode)) &&
+		    MODE_REFRESH_DIFF(cur_mode, target_refresh) <
+		    MODE_REFRESH_DIFF(preferred_mode, target_refresh)) {
+			preferred_mode = cur_mode;
+		}
+	}
+
+	preferred_mode->type |= DRM_MODE_TYPE_PREFERRED;
+}
+
+/**
+ * drm_mode_std - convert standard mode info (width, height, refresh) into mode
+ * @t: standard timing params
+ *
+ * Take the standard timing params (in this case width, aspect, and refresh)
+ * and convert them into a real mode using CVT.
+ *
+ * Punts for now, but should eventually use the FB layer's CVT based mode
+ * generation code.
+ */
+struct drm_display_mode *drm_mode_std(struct drm_device *dev,
+				      struct std_timing *t)
+{
+	struct drm_display_mode *mode;
+	int hsize = t->hsize * 8 + 248, vsize;
+
+	mode = drm_mode_create(dev);
+	if (!mode)
+		return NULL;
+
+	if (t->aspect_ratio == 0)
+		vsize = (hsize * 10) / 16;
+	else if (t->aspect_ratio == 1)
+		vsize = (hsize * 3) / 4;
+	else if (t->aspect_ratio == 2)
+		vsize = (hsize * 4) / 5;
+	else
+		vsize = (hsize * 9) / 16;
+
+	drm_mode_set_name(mode);
+
+	return mode;
+}
+
+/**
+ * drm_mode_detailed - create a new mode from an EDID detailed timing section
+ * @dev: DRM device (needed to create new mode)
+ * @edid: EDID block
+ * @timing: EDID detailed timing info
+ * @quirks: quirks to apply
+ *
+ * An EDID detailed timing block contains enough info for us to create and
+ * return a new struct drm_display_mode.
+ */
+static struct drm_display_mode *drm_mode_detailed(struct drm_device *dev,
+						  struct edid *edid,
+						  struct detailed_timing *timing,
+						  u32 quirks)
+{
+	struct drm_display_mode *mode;
+	struct detailed_pixel_timing *pt = &timing->data.pixel_data;
+
+	if (pt->stereo) {
+		printk(KERN_WARNING "stereo mode not supported\n");
+		return NULL;
+	}
+	if (!pt->separate_sync) {
+		printk(KERN_WARNING "integrated sync not supported\n");
+		return NULL;
+	}
+
+	mode = drm_mode_create(dev);
+	if (!mode)
+		return NULL;
+
+	mode->type = DRM_MODE_TYPE_DRIVER;
+
+	if (quirks & EDID_QUIRK_135_CLOCK_TOO_HIGH)
+		timing->pixel_clock = 1088;
+
+	mode->clock = timing->pixel_clock * 10;
+
+	mode->hdisplay = (pt->hactive_hi << 8) | pt->hactive_lo;
+	mode->hsync_start = mode->hdisplay + ((pt->hsync_offset_hi << 8) |
+					      pt->hsync_offset_lo);
+	mode->hsync_end = mode->hsync_start +
+		((pt->hsync_pulse_width_hi << 8) |
+		 pt->hsync_pulse_width_lo);
+	mode->htotal = mode->hdisplay + ((pt->hblank_hi << 8) | pt->hblank_lo);
+
+	mode->vdisplay = (pt->vactive_hi << 8) | pt->vactive_lo;
+	mode->vsync_start = mode->vdisplay + ((pt->vsync_offset_hi << 8) |
+					      pt->vsync_offset_lo);
+	mode->vsync_end = mode->vsync_start +
+		((pt->vsync_pulse_width_hi << 8) |
+		 pt->vsync_pulse_width_lo);
+	mode->vtotal = mode->vdisplay + ((pt->vblank_hi << 8) | pt->vblank_lo);
+
+	drm_mode_set_name(mode);
+
+	if (pt->interlaced)
+		mode->flags |= DRM_MODE_FLAG_INTERLACE;
+
+	if (quirks & EDID_QUIRK_DETAILED_SYNC_PP) {
+		pt->hsync_positive = 1;
+		pt->vsync_positive = 1;
+	}
+
+	mode->flags |= pt->hsync_positive ? DRM_MODE_FLAG_PHSYNC : DRM_MODE_FLAG_NHSYNC;
+	mode->flags |= pt->vsync_positive ? DRM_MODE_FLAG_PVSYNC : DRM_MODE_FLAG_NVSYNC;
+
+	mode->width_mm = pt->width_mm_lo | (pt->width_mm_hi << 8);
+	mode->height_mm = pt->height_mm_lo | (pt->height_mm_hi << 8);
+
+	if (quirks & EDID_QUIRK_DETAILED_IN_CM) {
+		mode->width_mm *= 10;
+		mode->height_mm *= 10;
+	}
+
+	if (quirks & EDID_QUIRK_DETAILED_USE_MAXIMUM_SIZE) {
+		mode->width_mm = edid->width_cm * 10;
+		mode->height_mm = edid->height_cm * 10;
+	}
+
+	return mode;
+}
+
+/*
+ * Detailed mode info for the EDID "established modes" data to use.
+ */
+static struct drm_display_mode edid_est_modes[] = {
+	{ DRM_MODE("800x600", DRM_MODE_TYPE_DRIVER, 40000, 800, 840,
+		   968, 1056, 0, 600, 601, 605, 628, 0,
+		   DRM_MODE_FLAG_PHSYNC | DRM_MODE_FLAG_PVSYNC) }, /* 800x600@60Hz */
+	{ DRM_MODE("800x600", DRM_MODE_TYPE_DRIVER, 36000, 800, 824,
+		   896, 1024, 0, 600, 601, 603,  625, 0,
+		   DRM_MODE_FLAG_PHSYNC | DRM_MODE_FLAG_PVSYNC) }, /* 800x600@56Hz */
+	{ DRM_MODE("640x480", DRM_MODE_TYPE_DRIVER, 31500, 640, 656,
+		   720, 840, 0, 480, 481, 484, 500, 0,
+		   DRM_MODE_FLAG_NHSYNC | DRM_MODE_FLAG_NVSYNC) }, /* 640x480@75Hz */
+	{ DRM_MODE("640x480", DRM_MODE_TYPE_DRIVER, 31500, 640, 664,
+		   704,  832, 0, 480, 489, 491, 520, 0,
+		   DRM_MODE_FLAG_NHSYNC | DRM_MODE_FLAG_NVSYNC) }, /* 640x480@72Hz */
+	{ DRM_MODE("640x480", DRM_MODE_TYPE_DRIVER, 30240, 640, 704,
+		   768,  864, 0, 480, 483, 486, 525, 0,
+		   DRM_MODE_FLAG_NHSYNC | DRM_MODE_FLAG_NVSYNC) }, /* 640x480@67Hz */
+	{ DRM_MODE("640x480", DRM_MODE_TYPE_DRIVER, 25200, 640, 656,
+		   752, 800, 0, 480, 490, 492, 525, 0,
+		   DRM_MODE_FLAG_NHSYNC | DRM_MODE_FLAG_NVSYNC) }, /* 640x480@60Hz */
+	{ DRM_MODE("720x400", DRM_MODE_TYPE_DRIVER, 35500, 720, 738,
+		   846, 900, 0, 400, 421, 423,  449, 0,
+		   DRM_MODE_FLAG_NHSYNC | DRM_MODE_FLAG_NVSYNC) }, /* 720x400@88Hz */
+	{ DRM_MODE("720x400", DRM_MODE_TYPE_DRIVER, 28320, 720, 738,
+		   846,  900, 0, 400, 412, 414, 449, 0,
+		   DRM_MODE_FLAG_NHSYNC | DRM_MODE_FLAG_PVSYNC) }, /* 720x400@70Hz */
+	{ DRM_MODE("1280x1024", DRM_MODE_TYPE_DRIVER, 135000, 1280, 1296,
+		   1440, 1688, 0, 1024, 1025, 1028, 1066, 0,
+		   DRM_MODE_FLAG_PHSYNC | DRM_MODE_FLAG_PVSYNC) }, /* 1280x1024@75Hz */
+	{ DRM_MODE("1024x768", DRM_MODE_TYPE_DRIVER, 78800, 1024, 1040,
+		   1136, 1312, 0,  768, 769, 772, 800, 0,
+		   DRM_MODE_FLAG_PHSYNC | DRM_MODE_FLAG_PVSYNC) }, /* 1024x768@75Hz */
+	{ DRM_MODE("1024x768", DRM_MODE_TYPE_DRIVER, 75000, 1024, 1048,
+		   1184, 1328, 0,  768, 771, 777, 806, 0,
+		   DRM_MODE_FLAG_NHSYNC | DRM_MODE_FLAG_NVSYNC) }, /* 1024x768@70Hz */
+	{ DRM_MODE("1024x768", DRM_MODE_TYPE_DRIVER, 65000, 1024, 1048,
+		   1184, 1344, 0,  768, 771, 777, 806, 0,
+		   DRM_MODE_FLAG_NHSYNC | DRM_MODE_FLAG_NVSYNC) }, /* 1024x768@60Hz */
+	{ DRM_MODE("1024x768", DRM_MODE_TYPE_DRIVER,44900, 1024, 1032,
+		   1208, 1264, 0, 768, 768, 776, 817, 0,
+		   DRM_MODE_FLAG_PHSYNC | DRM_MODE_FLAG_PVSYNC | DRM_MODE_FLAG_INTERLACE) }, /* 1024x768@43Hz */
+	{ DRM_MODE("832x624", DRM_MODE_TYPE_DRIVER, 57284, 832, 864,
+		   928, 1152, 0, 624, 625, 628, 667, 0,
+		   DRM_MODE_FLAG_NHSYNC | DRM_MODE_FLAG_NVSYNC) }, /* 832x624@75Hz */
+	{ DRM_MODE("800x600", DRM_MODE_TYPE_DRIVER, 49500, 800, 816,
+		   896, 1056, 0, 600, 601, 604,  625, 0,
+		   DRM_MODE_FLAG_PHSYNC | DRM_MODE_FLAG_PVSYNC) }, /* 800x600@75Hz */
+	{ DRM_MODE("800x600", DRM_MODE_TYPE_DRIVER, 50000, 800, 856,
+		   976, 1040, 0, 600, 637, 643, 666, 0,
+		   DRM_MODE_FLAG_PHSYNC | DRM_MODE_FLAG_PVSYNC) }, /* 800x600@72Hz */
+	{ DRM_MODE("1152x864", DRM_MODE_TYPE_DRIVER, 108000, 1152, 1216,
+		   1344, 1600, 0,  864, 865, 868, 900, 0,
+		   DRM_MODE_FLAG_PHSYNC | DRM_MODE_FLAG_PVSYNC) }, /* 1152x864@75Hz */
+};
+
+#define EDID_EST_TIMINGS 16
+#define EDID_STD_TIMINGS 8
+#define EDID_DETAILED_TIMINGS 4
+
+/**
+ * add_established_modes - get est. modes from EDID and add them
+ * @edid: EDID block to scan
+ *
+ * Each EDID block contains a bitmap of the supported "established modes" list
+ * (defined above).  Tease them out and add them to the global modes list.
+ */
+static int add_established_modes(struct drm_connector *connector, struct edid *edid)
+{
+	struct drm_device *dev = connector->dev;
+	unsigned long est_bits = edid->established_timings.t1 |
+		(edid->established_timings.t2 << 8) |
+		((edid->established_timings.mfg_rsvd & 0x80) << 9);
+	int i, modes = 0;
+
+	for (i = 0; i <= EDID_EST_TIMINGS; i++)
+		if (est_bits & (1<<i)) {
+			struct drm_display_mode *newmode;
+			newmode = drm_mode_duplicate(dev, &edid_est_modes[i]);
+			if (newmode) {
+				drm_mode_probed_add(connector, newmode);
+				modes++;
+			}
+		}
+
+	return modes;
+}
+
+/**
+ * add_standard_modes - get std. modes from EDID and add them
+ * @edid: EDID block to scan
+ *
+ * Standard modes can be calculated using the CVT standard.  Grab them from
+ * @edid, calculate them, and add them to the list.
+ */
+static int add_standard_modes(struct drm_connector *connector, struct edid *edid)
+{
+	struct drm_device *dev = connector->dev;
+	int i, modes = 0;
+
+	for (i = 0; i < EDID_STD_TIMINGS; i++) {
+		struct std_timing *t = &edid->standard_timings[i];
+		struct drm_display_mode *newmode;
+
+		/* If std timings bytes are 1, 1 it's empty */
+		if (t->hsize == 1 && (t->aspect_ratio | t->vfreq) == 1)
+			continue;
+
+		newmode = drm_mode_std(dev, &edid->standard_timings[i]);
+		if (newmode) {
+			drm_mode_probed_add(connector, newmode);
+			modes++;
+		}
+	}
+
+	return modes;
+}
+
+/**
+ * add_detailed_modes - get detailed mode info from EDID data
+ * @connector: attached connector
+ * @edid: EDID block to scan
+ * @quirks: quirks to apply
+ *
+ * Some of the detailed timing sections may contain mode information.  Grab
+ * it and add it to the list.
+ */
+static int add_detailed_info(struct drm_connector *connector,
+			     struct edid *edid, u32 quirks)
+{
+	struct drm_device *dev = connector->dev;
+	int i, j, modes = 0;
+
+	for (i = 0; i < EDID_DETAILED_TIMINGS; i++) {
+		struct detailed_timing *timing = &edid->detailed_timings[i];
+		struct detailed_non_pixel *data = &timing->data.other_data;
+		struct drm_display_mode *newmode;
+
+		/* EDID up to and including 1.2 may put monitor info here */
+		if (edid->version == 1 && edid->revision < 3)
+			continue;
+
+		/* Detailed mode timing */
+		if (timing->pixel_clock) {
+			newmode = drm_mode_detailed(dev, edid, timing, quirks);
+			if (!newmode)
+				continue;
+
+			/* First detailed mode is preferred */
+			if (i == 0 && edid->preferred_timing)
+				newmode->type |= DRM_MODE_TYPE_PREFERRED;
+			drm_mode_probed_add(connector, newmode);
+
+			modes++;
+			continue;
+		}
+
+		/* Other timing or info */
+		switch (data->type) {
+		case EDID_DETAIL_MONITOR_SERIAL:
+			break;
+		case EDID_DETAIL_MONITOR_STRING:
+			break;
+		case EDID_DETAIL_MONITOR_RANGE:
+			/* Get monitor range data */
+			break;
+		case EDID_DETAIL_MONITOR_NAME:
+			break;
+		case EDID_DETAIL_MONITOR_CPDATA:
+			break;
+		case EDID_DETAIL_STD_MODES:
+			/* Five modes per detailed section */
+			for (j = 0; j < 5; i++) {
+				struct std_timing *std;
+				struct drm_display_mode *newmode;
+
+				std = &data->data.timings[j];
+				newmode = drm_mode_std(dev, std);
+				if (newmode) {
+					drm_mode_probed_add(connector, newmode);
+					modes++;
+				}
+			}
+			break;
+		default:
+			break;
+		}
+	}
+
+	return modes;
+}
+
+#define DDC_ADDR 0x50
+
+unsigned char *drm_do_probe_ddc_edid(struct i2c_adapter *adapter)
+{
+	unsigned char start = 0x0;
+	unsigned char *buf = kmalloc(EDID_LENGTH, GFP_KERNEL);
+	struct i2c_msg msgs[] = {
+		{
+			.addr	= DDC_ADDR,
+			.flags	= 0,
+			.len	= 1,
+			.buf	= &start,
+		}, {
+			.addr	= DDC_ADDR,
+			.flags	= I2C_M_RD,
+			.len	= EDID_LENGTH,
+			.buf	= buf,
+		}
+	};
+
+	if (!buf) {
+		dev_warn(&adapter->dev, "unable to allocate memory for EDID "
+			 "block.\n");
+		return NULL;
+	}
+
+	if (i2c_transfer(adapter, msgs, 2) == 2)
+		return buf;
+
+	dev_info(&adapter->dev, "unable to read EDID block.\n");
+	kfree(buf);
+	return NULL;
+}
+EXPORT_SYMBOL(drm_do_probe_ddc_edid);
+
+static unsigned char *drm_ddc_read(struct i2c_adapter *adapter)
+{
+	struct i2c_algo_bit_data *algo_data = adapter->algo_data;
+	unsigned char *edid = NULL;
+	int i, j;
+
+	algo_data->setscl(algo_data->data, 1);
+
+	for (i = 0; i < 1; i++) {
+		/* For some old monitors we need the
+		 * following process to initialize/stop DDC
+		 */
+		algo_data->setsda(algo_data->data, 1);
+		msleep(13);
+
+		algo_data->setscl(algo_data->data, 1);
+		for (j = 0; j < 5; j++) {
+			msleep(10);
+			if (algo_data->getscl(algo_data->data))
+				break;
+		}
+		if (j == 5)
+			continue;
+
+		algo_data->setsda(algo_data->data, 0);
+		msleep(15);
+		algo_data->setscl(algo_data->data, 0);
+		msleep(15);
+		algo_data->setsda(algo_data->data, 1);
+		msleep(15);
+
+		/* Do the real work */
+		edid = drm_do_probe_ddc_edid(adapter);
+		algo_data->setsda(algo_data->data, 0);
+		algo_data->setscl(algo_data->data, 0);
+		msleep(15);
+
+		algo_data->setscl(algo_data->data, 1);
+		for (j = 0; j < 10; j++) {
+			msleep(10);
+			if (algo_data->getscl(algo_data->data))
+				break;
+		}
+
+		algo_data->setsda(algo_data->data, 1);
+		msleep(15);
+		algo_data->setscl(algo_data->data, 0);
+		algo_data->setsda(algo_data->data, 0);
+		if (edid)
+			break;
+	}
+	/* Release the DDC lines when done or the Apple Cinema HD display
+	 * will switch off
+	 */
+	algo_data->setsda(algo_data->data, 1);
+	algo_data->setscl(algo_data->data, 1);
+
+	return edid;
+}
+
+/**
+ * drm_get_edid - get EDID data, if available
+ * @connector: connector we're probing
+ * @adapter: i2c adapter to use for DDC
+ *
+ * Poke the given connector's i2c channel to grab EDID data if possible.
+ *
+ * Return edid data or NULL if we couldn't find any.
+ */
+struct edid *drm_get_edid(struct drm_connector *connector,
+			  struct i2c_adapter *adapter)
+{
+	struct edid *edid;
+
+	edid = (struct edid *)drm_ddc_read(adapter);
+	if (!edid) {
+		dev_warn(&connector->dev->pdev->dev, "%s: no EDID data\n",
+			 drm_get_connector_name(connector));
+		return NULL;
+	}
+	if (!edid_is_valid(edid)) {
+		dev_warn(&connector->dev->pdev->dev, "%s: EDID invalid.\n",
+			 drm_get_connector_name(connector));
+		kfree(edid);
+		return NULL;
+	}
+
+	connector->display_info.raw_edid = (char *)edid;
+
+	return edid;
+}
+EXPORT_SYMBOL(drm_get_edid);
+
+/**
+ * drm_add_edid_modes - add modes from EDID data, if available
+ * @connector: connector we're probing
+ * @edid: edid data
+ *
+ * Add the specified modes to the connector's mode list.
+ *
+ * Return number of modes added or 0 if we couldn't find any.
+ */
+int drm_add_edid_modes(struct drm_connector *connector, struct edid *edid)
+{
+	int num_modes = 0;
+	u32 quirks;
+
+	if (edid == NULL) {
+		return 0;
+	}
+	if (!edid_is_valid(edid)) {
+		dev_warn(&connector->dev->pdev->dev, "%s: EDID invalid.\n",
+			 drm_get_connector_name(connector));
+		return 0;
+	}
+
+	quirks = edid_get_quirks(edid);
+
+	num_modes += add_established_modes(connector, edid);
+	num_modes += add_standard_modes(connector, edid);
+	num_modes += add_detailed_info(connector, edid, quirks);
+
+	if (quirks & (EDID_QUIRK_PREFER_LARGE_60 | EDID_QUIRK_PREFER_LARGE_75))
+		edid_fixup_preferred(connector, quirks);
+
+	connector->display_info.serration_vsync = edid->serration_vsync;
+	connector->display_info.sync_on_green = edid->sync_on_green;
+	connector->display_info.composite_sync = edid->composite_sync;
+	connector->display_info.separate_syncs = edid->separate_syncs;
+	connector->display_info.blank_to_black = edid->blank_to_black;
+	connector->display_info.video_level = edid->video_level;
+	connector->display_info.digital = edid->digital;
+	connector->display_info.width_mm = edid->width_cm * 10;
+	connector->display_info.height_mm = edid->height_cm * 10;
+	connector->display_info.gamma = edid->gamma;
+	connector->display_info.gtf_supported = edid->default_gtf;
+	connector->display_info.standard_color = edid->standard_color;
+	connector->display_info.display_type = edid->display_type;
+	connector->display_info.active_off_supported = edid->pm_active_off;
+	connector->display_info.suspend_supported = edid->pm_suspend;
+	connector->display_info.standby_supported = edid->pm_standby;
+	connector->display_info.gamma = edid->gamma;
+
+	return num_modes;
+}
+EXPORT_SYMBOL(drm_add_edid_modes);
diff --git a/drivers/gpu/drm/drm_fops.c b/drivers/gpu/drm/drm_fops.c
index 3a6c439652a5..3733e36d135e 100644
--- a/drivers/gpu/drm/drm_fops.c
+++ b/drivers/gpu/drm/drm_fops.c
@@ -35,7 +35,6 @@
  */
 
 #include "drmP.h"
-#include "drm_sarea.h"
 #include <linux/poll.h>
 #include <linux/smp_lock.h>
 
@@ -55,10 +54,12 @@ static int drm_setup(struct drm_device * dev)
 
 	atomic_set(&dev->ioctl_count, 0);
 	atomic_set(&dev->vma_count, 0);
-	dev->buf_use = 0;
-	atomic_set(&dev->buf_alloc, 0);
 
-	if (drm_core_check_feature(dev, DRIVER_HAVE_DMA)) {
+	if (drm_core_check_feature(dev, DRIVER_HAVE_DMA) &&
+	    !drm_core_check_feature(dev, DRIVER_MODESET)) {
+		dev->buf_use = 0;
+		atomic_set(&dev->buf_alloc, 0);
+
 		i = drm_dma_setup(dev);
 		if (i < 0)
 			return i;
@@ -138,14 +139,14 @@ int drm_open(struct inode *inode, struct file *filp)
 		}
 		spin_unlock(&dev->count_lock);
 	}
-
 out:
 	mutex_lock(&dev->struct_mutex);
-	if (dev->dev_mapping == NULL)
-		dev->dev_mapping = inode->i_mapping;
-	else if (dev->dev_mapping != inode->i_mapping)
-		WARN(1, "dev->dev_mapping not inode mapping (%p expected %p)\n",
-		     dev->dev_mapping, inode->i_mapping);
+	if (minor->type == DRM_MINOR_LEGACY) {
+		BUG_ON((dev->dev_mapping != NULL) &&
+			(dev->dev_mapping != inode->i_mapping));
+		if (dev->dev_mapping == NULL)
+			dev->dev_mapping = inode->i_mapping;
+	}
 	mutex_unlock(&dev->struct_mutex);
 
 	return retcode;
@@ -251,6 +252,7 @@ static int drm_open_helper(struct inode *inode, struct file *filp,
 	priv->lock_count = 0;
 
 	INIT_LIST_HEAD(&priv->lhead);
+	INIT_LIST_HEAD(&priv->fbs);
 
 	if (dev->driver->driver_features & DRIVER_GEM)
 		drm_gem_open(dev, priv);
diff --git a/drivers/gpu/drm/drm_irq.c b/drivers/gpu/drm/drm_irq.c
index 1e787f894b3c..1608f8dbfda0 100644
--- a/drivers/gpu/drm/drm_irq.c
+++ b/drivers/gpu/drm/drm_irq.c
@@ -305,6 +305,8 @@ int drm_control(struct drm_device *dev, void *data,
 	case DRM_INST_HANDLER:
 		if (!drm_core_check_feature(dev, DRIVER_HAVE_IRQ))
 			return 0;
+		if (drm_core_check_feature(dev, DRIVER_MODESET))
+			return 0;
 		if (dev->if_version < DRM_IF_VERSION(1, 2) &&
 		    ctl->irq != dev->pdev->irq)
 			return -EINVAL;
@@ -312,6 +314,8 @@ int drm_control(struct drm_device *dev, void *data,
 	case DRM_UNINST_HANDLER:
 		if (!drm_core_check_feature(dev, DRIVER_HAVE_IRQ))
 			return 0;
+		if (drm_core_check_feature(dev, DRIVER_MODESET))
+			return 0;
 		return drm_irq_uninstall(dev);
 	default:
 		return -EINVAL;
@@ -426,6 +430,45 @@ void drm_vblank_put(struct drm_device *dev, int crtc)
 }
 EXPORT_SYMBOL(drm_vblank_put);
 
+/**
+ * drm_vblank_pre_modeset - account for vblanks across mode sets
+ * @dev: DRM device
+ * @crtc: CRTC in question
+ * @post: post or pre mode set?
+ *
+ * Account for vblank events across mode setting events, which will likely
+ * reset the hardware frame counter.
+ */
+void drm_vblank_pre_modeset(struct drm_device *dev, int crtc)
+{
+	/*
+	 * To avoid all the problems that might happen if interrupts
+	 * were enabled/disabled around or between these calls, we just
+	 * have the kernel take a reference on the CRTC (just once though
+	 * to avoid corrupting the count if multiple, mismatch calls occur),
+	 * so that interrupts remain enabled in the interim.
+	 */
+	if (!dev->vblank_inmodeset[crtc]) {
+		dev->vblank_inmodeset[crtc] = 1;
+		drm_vblank_get(dev, crtc);
+	}
+}
+EXPORT_SYMBOL(drm_vblank_pre_modeset);
+
+void drm_vblank_post_modeset(struct drm_device *dev, int crtc)
+{
+	unsigned long irqflags;
+
+	if (dev->vblank_inmodeset[crtc]) {
+		spin_lock_irqsave(&dev->vbl_lock, irqflags);
+		dev->vblank_disable_allowed = 1;
+		dev->vblank_inmodeset[crtc] = 0;
+		spin_unlock_irqrestore(&dev->vbl_lock, irqflags);
+		drm_vblank_put(dev, crtc);
+	}
+}
+EXPORT_SYMBOL(drm_vblank_post_modeset);
+
 /**
  * drm_modeset_ctl - handle vblank event counter changes across mode switch
  * @DRM_IOCTL_ARGS: standard ioctl arguments
@@ -441,7 +484,6 @@ int drm_modeset_ctl(struct drm_device *dev, void *data,
 		    struct drm_file *file_priv)
 {
 	struct drm_modeset_ctl *modeset = data;
-	unsigned long irqflags;
 	int crtc, ret = 0;
 
 	/* If drm_vblank_init() hasn't been called yet, just no-op */
@@ -454,28 +496,12 @@ int drm_modeset_ctl(struct drm_device *dev, void *data,
 		goto out;
 	}
 
-	/*
-	 * To avoid all the problems that might happen if interrupts
-	 * were enabled/disabled around or between these calls, we just
-	 * have the kernel take a reference on the CRTC (just once though
-	 * to avoid corrupting the count if multiple, mismatch calls occur),
-	 * so that interrupts remain enabled in the interim.
-	 */
 	switch (modeset->cmd) {
 	case _DRM_PRE_MODESET:
-		if (!dev->vblank_inmodeset[crtc]) {
-			dev->vblank_inmodeset[crtc] = 1;
-			drm_vblank_get(dev, crtc);
-		}
+		drm_vblank_pre_modeset(dev, crtc);
 		break;
 	case _DRM_POST_MODESET:
-		if (dev->vblank_inmodeset[crtc]) {
-			spin_lock_irqsave(&dev->vbl_lock, irqflags);
-			dev->vblank_disable_allowed = 1;
-			dev->vblank_inmodeset[crtc] = 0;
-			spin_unlock_irqrestore(&dev->vbl_lock, irqflags);
-			drm_vblank_put(dev, crtc);
-		}
+		drm_vblank_post_modeset(dev, crtc);
 		break;
 	default:
 		ret = -EINVAL;
diff --git a/drivers/gpu/drm/drm_mm.c b/drivers/gpu/drm/drm_mm.c
index 217ad7dc7076..367c590ffbba 100644
--- a/drivers/gpu/drm/drm_mm.c
+++ b/drivers/gpu/drm/drm_mm.c
@@ -296,3 +296,4 @@ void drm_mm_takedown(struct drm_mm * mm)
 
 	drm_free(entry, sizeof(*entry), DRM_MEM_MM);
 }
+EXPORT_SYMBOL(drm_mm_takedown);
diff --git a/drivers/gpu/drm/drm_modes.c b/drivers/gpu/drm/drm_modes.c
new file mode 100644
index 000000000000..c9b80fdd4630
--- /dev/null
+++ b/drivers/gpu/drm/drm_modes.c
@@ -0,0 +1,576 @@
+/*
+ * The list_sort function is (presumably) licensed under the GPL (see the
+ * top level "COPYING" file for details).
+ *
+ * The remainder of this file is:
+ *
+ * Copyright © 1997-2003 by The XFree86 Project, Inc.
+ * Copyright © 2007 Dave Airlie
+ * Copyright © 2007-2008 Intel Corporation
+ *   Jesse Barnes <jesse.barnes@intel.com>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Except as contained in this notice, the name of the copyright holder(s)
+ * and author(s) shall not be used in advertising or otherwise to promote
+ * the sale, use or other dealings in this Software without prior written
+ * authorization from the copyright holder(s) and author(s).
+ */
+
+#include <linux/list.h>
+#include "drmP.h"
+#include "drm.h"
+#include "drm_crtc.h"
+
+/**
+ * drm_mode_debug_printmodeline - debug print a mode
+ * @dev: DRM device
+ * @mode: mode to print
+ *
+ * LOCKING:
+ * None.
+ *
+ * Describe @mode using DRM_DEBUG.
+ */
+void drm_mode_debug_printmodeline(struct drm_display_mode *mode)
+{
+	DRM_DEBUG("Modeline %d:\"%s\" %d %d %d %d %d %d %d %d %d %d 0x%x 0x%x\n",
+		  mode->base.id, mode->name, mode->vrefresh, mode->clock,
+		  mode->hdisplay, mode->hsync_start,
+		  mode->hsync_end, mode->htotal,
+		  mode->vdisplay, mode->vsync_start,
+		  mode->vsync_end, mode->vtotal, mode->type, mode->flags);
+}
+EXPORT_SYMBOL(drm_mode_debug_printmodeline);
+
+/**
+ * drm_mode_set_name - set the name on a mode
+ * @mode: name will be set in this mode
+ *
+ * LOCKING:
+ * None.
+ *
+ * Set the name of @mode to a standard format.
+ */
+void drm_mode_set_name(struct drm_display_mode *mode)
+{
+	snprintf(mode->name, DRM_DISPLAY_MODE_LEN, "%dx%d", mode->hdisplay,
+		 mode->vdisplay);
+}
+EXPORT_SYMBOL(drm_mode_set_name);
+
+/**
+ * drm_mode_list_concat - move modes from one list to another
+ * @head: source list
+ * @new: dst list
+ *
+ * LOCKING:
+ * Caller must ensure both lists are locked.
+ *
+ * Move all the modes from @head to @new.
+ */
+void drm_mode_list_concat(struct list_head *head, struct list_head *new)
+{
+
+	struct list_head *entry, *tmp;
+
+	list_for_each_safe(entry, tmp, head) {
+		list_move_tail(entry, new);
+	}
+}
+EXPORT_SYMBOL(drm_mode_list_concat);
+
+/**
+ * drm_mode_width - get the width of a mode
+ * @mode: mode
+ *
+ * LOCKING:
+ * None.
+ *
+ * Return @mode's width (hdisplay) value.
+ *
+ * FIXME: is this needed?
+ *
+ * RETURNS:
+ * @mode->hdisplay
+ */
+int drm_mode_width(struct drm_display_mode *mode)
+{
+	return mode->hdisplay;
+
+}
+EXPORT_SYMBOL(drm_mode_width);
+
+/**
+ * drm_mode_height - get the height of a mode
+ * @mode: mode
+ *
+ * LOCKING:
+ * None.
+ *
+ * Return @mode's height (vdisplay) value.
+ *
+ * FIXME: is this needed?
+ *
+ * RETURNS:
+ * @mode->vdisplay
+ */
+int drm_mode_height(struct drm_display_mode *mode)
+{
+	return mode->vdisplay;
+}
+EXPORT_SYMBOL(drm_mode_height);
+
+/**
+ * drm_mode_vrefresh - get the vrefresh of a mode
+ * @mode: mode
+ *
+ * LOCKING:
+ * None.
+ *
+ * Return @mode's vrefresh rate or calculate it if necessary.
+ *
+ * FIXME: why is this needed?  shouldn't vrefresh be set already?
+ *
+ * RETURNS:
+ * Vertical refresh rate of @mode x 1000. For precision reasons.
+ */
+int drm_mode_vrefresh(struct drm_display_mode *mode)
+{
+	int refresh = 0;
+	unsigned int calc_val;
+
+	if (mode->vrefresh > 0)
+		refresh = mode->vrefresh;
+	else if (mode->htotal > 0 && mode->vtotal > 0) {
+		/* work out vrefresh the value will be x1000 */
+		calc_val = (mode->clock * 1000);
+
+		calc_val /= mode->htotal;
+		calc_val *= 1000;
+		calc_val /= mode->vtotal;
+
+		refresh = calc_val;
+		if (mode->flags & DRM_MODE_FLAG_INTERLACE)
+			refresh *= 2;
+		if (mode->flags & DRM_MODE_FLAG_DBLSCAN)
+			refresh /= 2;
+		if (mode->vscan > 1)
+			refresh /= mode->vscan;
+	}
+	return refresh;
+}
+EXPORT_SYMBOL(drm_mode_vrefresh);
+
+/**
+ * drm_mode_set_crtcinfo - set CRTC modesetting parameters
+ * @p: mode
+ * @adjust_flags: unused? (FIXME)
+ *
+ * LOCKING:
+ * None.
+ *
+ * Setup the CRTC modesetting parameters for @p, adjusting if necessary.
+ */
+void drm_mode_set_crtcinfo(struct drm_display_mode *p, int adjust_flags)
+{
+	if ((p == NULL) || ((p->type & DRM_MODE_TYPE_CRTC_C) == DRM_MODE_TYPE_BUILTIN))
+		return;
+
+	p->crtc_hdisplay = p->hdisplay;
+	p->crtc_hsync_start = p->hsync_start;
+	p->crtc_hsync_end = p->hsync_end;
+	p->crtc_htotal = p->htotal;
+	p->crtc_hskew = p->hskew;
+	p->crtc_vdisplay = p->vdisplay;
+	p->crtc_vsync_start = p->vsync_start;
+	p->crtc_vsync_end = p->vsync_end;
+	p->crtc_vtotal = p->vtotal;
+
+	if (p->flags & DRM_MODE_FLAG_INTERLACE) {
+		if (adjust_flags & CRTC_INTERLACE_HALVE_V) {
+			p->crtc_vdisplay /= 2;
+			p->crtc_vsync_start /= 2;
+			p->crtc_vsync_end /= 2;
+			p->crtc_vtotal /= 2;
+		}
+
+		p->crtc_vtotal |= 1;
+	}
+
+	if (p->flags & DRM_MODE_FLAG_DBLSCAN) {
+		p->crtc_vdisplay *= 2;
+		p->crtc_vsync_start *= 2;
+		p->crtc_vsync_end *= 2;
+		p->crtc_vtotal *= 2;
+	}
+
+	if (p->vscan > 1) {
+		p->crtc_vdisplay *= p->vscan;
+		p->crtc_vsync_start *= p->vscan;
+		p->crtc_vsync_end *= p->vscan;
+		p->crtc_vtotal *= p->vscan;
+	}
+
+	p->crtc_vblank_start = min(p->crtc_vsync_start, p->crtc_vdisplay);
+	p->crtc_vblank_end = max(p->crtc_vsync_end, p->crtc_vtotal);
+	p->crtc_hblank_start = min(p->crtc_hsync_start, p->crtc_hdisplay);
+	p->crtc_hblank_end = max(p->crtc_hsync_end, p->crtc_htotal);
+
+	p->crtc_hadjusted = false;
+	p->crtc_vadjusted = false;
+}
+EXPORT_SYMBOL(drm_mode_set_crtcinfo);
+
+
+/**
+ * drm_mode_duplicate - allocate and duplicate an existing mode
+ * @m: mode to duplicate
+ *
+ * LOCKING:
+ * None.
+ *
+ * Just allocate a new mode, copy the existing mode into it, and return
+ * a pointer to it.  Used to create new instances of established modes.
+ */
+struct drm_display_mode *drm_mode_duplicate(struct drm_device *dev,
+					    struct drm_display_mode *mode)
+{
+	struct drm_display_mode *nmode;
+	int new_id;
+
+	nmode = drm_mode_create(dev);
+	if (!nmode)
+		return NULL;
+
+	new_id = nmode->base.id;
+	*nmode = *mode;
+	nmode->base.id = new_id;
+	INIT_LIST_HEAD(&nmode->head);
+	return nmode;
+}
+EXPORT_SYMBOL(drm_mode_duplicate);
+
+/**
+ * drm_mode_equal - test modes for equality
+ * @mode1: first mode
+ * @mode2: second mode
+ *
+ * LOCKING:
+ * None.
+ *
+ * Check to see if @mode1 and @mode2 are equivalent.
+ *
+ * RETURNS:
+ * True if the modes are equal, false otherwise.
+ */
+bool drm_mode_equal(struct drm_display_mode *mode1, struct drm_display_mode *mode2)
+{
+	/* do clock check convert to PICOS so fb modes get matched
+	 * the same */
+	if (mode1->clock && mode2->clock) {
+		if (KHZ2PICOS(mode1->clock) != KHZ2PICOS(mode2->clock))
+			return false;
+	} else if (mode1->clock != mode2->clock)
+		return false;
+
+	if (mode1->hdisplay == mode2->hdisplay &&
+	    mode1->hsync_start == mode2->hsync_start &&
+	    mode1->hsync_end == mode2->hsync_end &&
+	    mode1->htotal == mode2->htotal &&
+	    mode1->hskew == mode2->hskew &&
+	    mode1->vdisplay == mode2->vdisplay &&
+	    mode1->vsync_start == mode2->vsync_start &&
+	    mode1->vsync_end == mode2->vsync_end &&
+	    mode1->vtotal == mode2->vtotal &&
+	    mode1->vscan == mode2->vscan &&
+	    mode1->flags == mode2->flags)
+		return true;
+
+	return false;
+}
+EXPORT_SYMBOL(drm_mode_equal);
+
+/**
+ * drm_mode_validate_size - make sure modes adhere to size constraints
+ * @dev: DRM device
+ * @mode_list: list of modes to check
+ * @maxX: maximum width
+ * @maxY: maximum height
+ * @maxPitch: max pitch
+ *
+ * LOCKING:
+ * Caller must hold a lock protecting @mode_list.
+ *
+ * The DRM device (@dev) has size and pitch limits.  Here we validate the
+ * modes we probed for @dev against those limits and set their status as
+ * necessary.
+ */
+void drm_mode_validate_size(struct drm_device *dev,
+			    struct list_head *mode_list,
+			    int maxX, int maxY, int maxPitch)
+{
+	struct drm_display_mode *mode;
+
+	list_for_each_entry(mode, mode_list, head) {
+		if (maxPitch > 0 && mode->hdisplay > maxPitch)
+			mode->status = MODE_BAD_WIDTH;
+
+		if (maxX > 0 && mode->hdisplay > maxX)
+			mode->status = MODE_VIRTUAL_X;
+
+		if (maxY > 0 && mode->vdisplay > maxY)
+			mode->status = MODE_VIRTUAL_Y;
+	}
+}
+EXPORT_SYMBOL(drm_mode_validate_size);
+
+/**
+ * drm_mode_validate_clocks - validate modes against clock limits
+ * @dev: DRM device
+ * @mode_list: list of modes to check
+ * @min: minimum clock rate array
+ * @max: maximum clock rate array
+ * @n_ranges: number of clock ranges (size of arrays)
+ *
+ * LOCKING:
+ * Caller must hold a lock protecting @mode_list.
+ *
+ * Some code may need to check a mode list against the clock limits of the
+ * device in question.  This function walks the mode list, testing to make
+ * sure each mode falls within a given range (defined by @min and @max
+ * arrays) and sets @mode->status as needed.
+ */
+void drm_mode_validate_clocks(struct drm_device *dev,
+			      struct list_head *mode_list,
+			      int *min, int *max, int n_ranges)
+{
+	struct drm_display_mode *mode;
+	int i;
+
+	list_for_each_entry(mode, mode_list, head) {
+		bool good = false;
+		for (i = 0; i < n_ranges; i++) {
+			if (mode->clock >= min[i] && mode->clock <= max[i]) {
+				good = true;
+				break;
+			}
+		}
+		if (!good)
+			mode->status = MODE_CLOCK_RANGE;
+	}
+}
+EXPORT_SYMBOL(drm_mode_validate_clocks);
+
+/**
+ * drm_mode_prune_invalid - remove invalid modes from mode list
+ * @dev: DRM device
+ * @mode_list: list of modes to check
+ * @verbose: be verbose about it
+ *
+ * LOCKING:
+ * Caller must hold a lock protecting @mode_list.
+ *
+ * Once mode list generation is complete, a caller can use this routine to
+ * remove invalid modes from a mode list.  If any of the modes have a
+ * status other than %MODE_OK, they are removed from @mode_list and freed.
+ */
+void drm_mode_prune_invalid(struct drm_device *dev,
+			    struct list_head *mode_list, bool verbose)
+{
+	struct drm_display_mode *mode, *t;
+
+	list_for_each_entry_safe(mode, t, mode_list, head) {
+		if (mode->status != MODE_OK) {
+			list_del(&mode->head);
+			if (verbose) {
+				drm_mode_debug_printmodeline(mode);
+				DRM_DEBUG("Not using %s mode %d\n", mode->name, mode->status);
+			}
+			drm_mode_destroy(dev, mode);
+		}
+	}
+}
+EXPORT_SYMBOL(drm_mode_prune_invalid);
+
+/**
+ * drm_mode_compare - compare modes for favorability
+ * @lh_a: list_head for first mode
+ * @lh_b: list_head for second mode
+ *
+ * LOCKING:
+ * None.
+ *
+ * Compare two modes, given by @lh_a and @lh_b, returning a value indicating
+ * which is better.
+ *
+ * RETURNS:
+ * Negative if @lh_a is better than @lh_b, zero if they're equivalent, or
+ * positive if @lh_b is better than @lh_a.
+ */
+static int drm_mode_compare(struct list_head *lh_a, struct list_head *lh_b)
+{
+	struct drm_display_mode *a = list_entry(lh_a, struct drm_display_mode, head);
+	struct drm_display_mode *b = list_entry(lh_b, struct drm_display_mode, head);
+	int diff;
+
+	diff = ((b->type & DRM_MODE_TYPE_PREFERRED) != 0) -
+		((a->type & DRM_MODE_TYPE_PREFERRED) != 0);
+	if (diff)
+		return diff;
+	diff = b->hdisplay * b->vdisplay - a->hdisplay * a->vdisplay;
+	if (diff)
+		return diff;
+	diff = b->clock - a->clock;
+	return diff;
+}
+
+/* FIXME: what we don't have a list sort function? */
+/* list sort from Mark J Roberts (mjr@znex.org) */
+void list_sort(struct list_head *head,
+	       int (*cmp)(struct list_head *a, struct list_head *b))
+{
+	struct list_head *p, *q, *e, *list, *tail, *oldhead;
+	int insize, nmerges, psize, qsize, i;
+
+	list = head->next;
+	list_del(head);
+	insize = 1;
+	for (;;) {
+		p = oldhead = list;
+		list = tail = NULL;
+		nmerges = 0;
+
+		while (p) {
+			nmerges++;
+			q = p;
+			psize = 0;
+			for (i = 0; i < insize; i++) {
+				psize++;
+				q = q->next == oldhead ? NULL : q->next;
+				if (!q)
+					break;
+			}
+
+			qsize = insize;
+			while (psize > 0 || (qsize > 0 && q)) {
+				if (!psize) {
+					e = q;
+					q = q->next;
+					qsize--;
+					if (q == oldhead)
+						q = NULL;
+				} else if (!qsize || !q) {
+					e = p;
+					p = p->next;
+					psize--;
+					if (p == oldhead)
+						p = NULL;
+				} else if (cmp(p, q) <= 0) {
+					e = p;
+					p = p->next;
+					psize--;
+					if (p == oldhead)
+						p = NULL;
+				} else {
+					e = q;
+					q = q->next;
+					qsize--;
+					if (q == oldhead)
+						q = NULL;
+				}
+				if (tail)
+					tail->next = e;
+				else
+					list = e;
+				e->prev = tail;
+				tail = e;
+			}
+			p = q;
+		}
+
+		tail->next = list;
+		list->prev = tail;
+
+		if (nmerges <= 1)
+			break;
+
+		insize *= 2;
+	}
+
+	head->next = list;
+	head->prev = list->prev;
+	list->prev->next = head;
+	list->prev = head;
+}
+
+/**
+ * drm_mode_sort - sort mode list
+ * @mode_list: list to sort
+ *
+ * LOCKING:
+ * Caller must hold a lock protecting @mode_list.
+ *
+ * Sort @mode_list by favorability, putting good modes first.
+ */
+void drm_mode_sort(struct list_head *mode_list)
+{
+	list_sort(mode_list, drm_mode_compare);
+}
+EXPORT_SYMBOL(drm_mode_sort);
+
+/**
+ * drm_mode_connector_list_update - update the mode list for the connector
+ * @connector: the connector to update
+ *
+ * LOCKING:
+ * Caller must hold a lock protecting @mode_list.
+ *
+ * This moves the modes from the @connector probed_modes list
+ * to the actual mode list. It compares the probed mode against the current
+ * list and only adds different modes. All modes unverified after this point
+ * will be removed by the prune invalid modes.
+ */
+void drm_mode_connector_list_update(struct drm_connector *connector)
+{
+	struct drm_display_mode *mode;
+	struct drm_display_mode *pmode, *pt;
+	int found_it;
+
+	list_for_each_entry_safe(pmode, pt, &connector->probed_modes,
+				 head) {
+		found_it = 0;
+		/* go through current modes checking for the new probed mode */
+		list_for_each_entry(mode, &connector->modes, head) {
+			if (drm_mode_equal(pmode, mode)) {
+				found_it = 1;
+				/* if equal delete the probed mode */
+				mode->status = pmode->status;
+				list_del(&pmode->head);
+				drm_mode_destroy(connector->dev, pmode);
+				break;
+			}
+		}
+
+		if (!found_it) {
+			list_move_tail(&pmode->head, &connector->modes);
+		}
+	}
+}
+EXPORT_SYMBOL(drm_mode_connector_list_update);
diff --git a/drivers/gpu/drm/drm_stub.c b/drivers/gpu/drm/drm_stub.c
index ea7f9e5d47fa..5ca132afa4f2 100644
--- a/drivers/gpu/drm/drm_stub.c
+++ b/drivers/gpu/drm/drm_stub.c
@@ -57,6 +57,14 @@ static int drm_minor_get_id(struct drm_device *dev, int type)
 	int ret;
 	int base = 0, limit = 63;
 
+	if (type == DRM_MINOR_CONTROL) {
+                base += 64;
+                limit = base + 127;
+        } else if (type == DRM_MINOR_RENDER) {
+                base += 128;
+                limit = base + 255;
+        }
+
 again:
 	if (idr_pre_get(&drm_minors_idr, GFP_KERNEL) == 0) {
 		DRM_ERROR("Out of memory expanding drawable idr\n");
@@ -362,12 +370,28 @@ int drm_get_dev(struct pci_dev *pdev, const struct pci_device_id *ent,
 		printk(KERN_ERR "DRM: Fill_in_dev failed.\n");
 		goto err_g2;
 	}
+
+	if (drm_core_check_feature(dev, DRIVER_MODESET)) {
+		ret = drm_get_minor(dev, &dev->control, DRM_MINOR_CONTROL);
+		if (ret)
+			goto err_g2;
+	}
+
 	if ((ret = drm_get_minor(dev, &dev->primary, DRM_MINOR_LEGACY)))
-		goto err_g2;
+		goto err_g3;
 
-	if (dev->driver->load)
-		if ((ret = dev->driver->load(dev, ent->driver_data)))
+	if (dev->driver->load) {
+		ret = dev->driver->load(dev, ent->driver_data);
+		if (ret)
 			goto err_g3;
+	}
+
+        /* setup the grouping for the legacy output */
+	if (drm_core_check_feature(dev, DRIVER_MODESET)) {
+		ret = drm_mode_group_init_legacy_group(dev, &dev->primary->mode_group);
+		if (ret)
+			goto err_g3;
+	}
 
 	list_add_tail(&dev->driver_item, &driver->device_list);
 
diff --git a/drivers/gpu/drm/drm_sysfs.c b/drivers/gpu/drm/drm_sysfs.c
index 1611b9bcbe7f..65d72d094c81 100644
--- a/drivers/gpu/drm/drm_sysfs.c
+++ b/drivers/gpu/drm/drm_sysfs.c
@@ -20,6 +20,7 @@
 #include "drmP.h"
 
 #define to_drm_minor(d) container_of(d, struct drm_minor, kdev)
+#define to_drm_connector(d) container_of(d, struct drm_connector, kdev)
 
 /**
  * drm_sysfs_suspend - DRM class suspend hook
@@ -34,7 +35,7 @@ static int drm_sysfs_suspend(struct device *dev, pm_message_t state)
 	struct drm_minor *drm_minor = to_drm_minor(dev);
 	struct drm_device *drm_dev = drm_minor->dev;
 
-	if (drm_dev->driver->suspend)
+	if (drm_minor->type == DRM_MINOR_LEGACY && drm_dev->driver->suspend)
 		return drm_dev->driver->suspend(drm_dev, state);
 
 	return 0;
@@ -52,7 +53,7 @@ static int drm_sysfs_resume(struct device *dev)
 	struct drm_minor *drm_minor = to_drm_minor(dev);
 	struct drm_device *drm_dev = drm_minor->dev;
 
-	if (drm_dev->driver->resume)
+	if (drm_minor->type == DRM_MINOR_LEGACY && drm_dev->driver->resume)
 		return drm_dev->driver->resume(drm_dev);
 
 	return 0;
@@ -144,6 +145,323 @@ static void drm_sysfs_device_release(struct device *dev)
 	return;
 }
 
+/*
+ * Connector properties
+ */
+static ssize_t status_show(struct device *device,
+			   struct device_attribute *attr,
+			   char *buf)
+{
+	struct drm_connector *connector = to_drm_connector(device);
+	enum drm_connector_status status;
+
+	status = connector->funcs->detect(connector);
+	return snprintf(buf, PAGE_SIZE, "%s",
+			drm_get_connector_status_name(status));
+}
+
+static ssize_t dpms_show(struct device *device,
+			   struct device_attribute *attr,
+			   char *buf)
+{
+	struct drm_connector *connector = to_drm_connector(device);
+	struct drm_device *dev = connector->dev;
+	uint64_t dpms_status;
+	int ret;
+
+	ret = drm_connector_property_get_value(connector,
+					    dev->mode_config.dpms_property,
+					    &dpms_status);
+	if (ret)
+		return 0;
+
+	return snprintf(buf, PAGE_SIZE, "%s",
+			drm_get_dpms_name((int)dpms_status));
+}
+
+static ssize_t enabled_show(struct device *device,
+			    struct device_attribute *attr,
+			   char *buf)
+{
+	struct drm_connector *connector = to_drm_connector(device);
+
+	return snprintf(buf, PAGE_SIZE, connector->encoder ? "enabled" :
+			"disabled");
+}
+
+static ssize_t edid_show(struct kobject *kobj, struct bin_attribute *attr,
+			 char *buf, loff_t off, size_t count)
+{
+	struct device *connector_dev = container_of(kobj, struct device, kobj);
+	struct drm_connector *connector = to_drm_connector(connector_dev);
+	unsigned char *edid;
+	size_t size;
+
+	if (!connector->edid_blob_ptr)
+		return 0;
+
+	edid = connector->edid_blob_ptr->data;
+	size = connector->edid_blob_ptr->length;
+	if (!edid)
+		return 0;
+
+	if (off >= size)
+		return 0;
+
+	if (off + count > size)
+		count = size - off;
+	memcpy(buf, edid + off, count);
+
+	return count;
+}
+
+static ssize_t modes_show(struct device *device,
+			   struct device_attribute *attr,
+			   char *buf)
+{
+	struct drm_connector *connector = to_drm_connector(device);
+	struct drm_display_mode *mode;
+	int written = 0;
+
+	list_for_each_entry(mode, &connector->modes, head) {
+		written += snprintf(buf + written, PAGE_SIZE - written, "%s\n",
+				    mode->name);
+	}
+
+	return written;
+}
+
+static ssize_t subconnector_show(struct device *device,
+			   struct device_attribute *attr,
+			   char *buf)
+{
+	struct drm_connector *connector = to_drm_connector(device);
+	struct drm_device *dev = connector->dev;
+	struct drm_property *prop = NULL;
+	uint64_t subconnector;
+	int is_tv = 0;
+	int ret;
+
+	switch (connector->connector_type) {
+		case DRM_MODE_CONNECTOR_DVII:
+			prop = dev->mode_config.dvi_i_subconnector_property;
+			break;
+		case DRM_MODE_CONNECTOR_Composite:
+		case DRM_MODE_CONNECTOR_SVIDEO:
+		case DRM_MODE_CONNECTOR_Component:
+			prop = dev->mode_config.tv_subconnector_property;
+			is_tv = 1;
+			break;
+		default:
+			DRM_ERROR("Wrong connector type for this property\n");
+			return 0;
+	}
+
+	if (!prop) {
+		DRM_ERROR("Unable to find subconnector property\n");
+		return 0;
+	}
+
+	ret = drm_connector_property_get_value(connector, prop, &subconnector);
+	if (ret)
+		return 0;
+
+	return snprintf(buf, PAGE_SIZE, "%s", is_tv ?
+			drm_get_tv_subconnector_name((int)subconnector) :
+			drm_get_dvi_i_subconnector_name((int)subconnector));
+}
+
+static ssize_t select_subconnector_show(struct device *device,
+			   struct device_attribute *attr,
+			   char *buf)
+{
+	struct drm_connector *connector = to_drm_connector(device);
+	struct drm_device *dev = connector->dev;
+	struct drm_property *prop = NULL;
+	uint64_t subconnector;
+	int is_tv = 0;
+	int ret;
+
+	switch (connector->connector_type) {
+		case DRM_MODE_CONNECTOR_DVII:
+			prop = dev->mode_config.dvi_i_select_subconnector_property;
+			break;
+		case DRM_MODE_CONNECTOR_Composite:
+		case DRM_MODE_CONNECTOR_SVIDEO:
+		case DRM_MODE_CONNECTOR_Component:
+			prop = dev->mode_config.tv_select_subconnector_property;
+			is_tv = 1;
+			break;
+		default:
+			DRM_ERROR("Wrong connector type for this property\n");
+			return 0;
+	}
+
+	if (!prop) {
+		DRM_ERROR("Unable to find select subconnector property\n");
+		return 0;
+	}
+
+	ret = drm_connector_property_get_value(connector, prop, &subconnector);
+	if (ret)
+		return 0;
+
+	return snprintf(buf, PAGE_SIZE, "%s", is_tv ?
+			drm_get_tv_select_name((int)subconnector) :
+			drm_get_dvi_i_select_name((int)subconnector));
+}
+
+static struct device_attribute connector_attrs[] = {
+	__ATTR_RO(status),
+	__ATTR_RO(enabled),
+	__ATTR_RO(dpms),
+	__ATTR_RO(modes),
+};
+
+/* These attributes are for both DVI-I connectors and all types of tv-out. */
+static struct device_attribute connector_attrs_opt1[] = {
+	__ATTR_RO(subconnector),
+	__ATTR_RO(select_subconnector),
+};
+
+static struct bin_attribute edid_attr = {
+	.attr.name = "edid",
+	.size = 128,
+	.read = edid_show,
+};
+
+/**
+ * drm_sysfs_connector_add - add an connector to sysfs
+ * @connector: connector to add
+ *
+ * Create an connector device in sysfs, along with its associated connector
+ * properties (so far, connection status, dpms, mode list & edid) and
+ * generate a hotplug event so userspace knows there's a new connector
+ * available.
+ *
+ * Note:
+ * This routine should only be called *once* for each DRM minor registered.
+ * A second call for an already registered device will trigger the BUG_ON
+ * below.
+ */
+int drm_sysfs_connector_add(struct drm_connector *connector)
+{
+	struct drm_device *dev = connector->dev;
+	int ret = 0, i, j;
+
+	/* We shouldn't get called more than once for the same connector */
+	BUG_ON(device_is_registered(&connector->kdev));
+
+	connector->kdev.parent = &dev->primary->kdev;
+	connector->kdev.class = drm_class;
+	connector->kdev.release = drm_sysfs_device_release;
+
+	DRM_DEBUG("adding \"%s\" to sysfs\n",
+		  drm_get_connector_name(connector));
+
+	snprintf(connector->kdev.bus_id, BUS_ID_SIZE, "card%d-%s",
+		 dev->primary->index, drm_get_connector_name(connector));
+	ret = device_register(&connector->kdev);
+
+	if (ret) {
+		DRM_ERROR("failed to register connector device: %d\n", ret);
+		goto out;
+	}
+
+	/* Standard attributes */
+
+	for (i = 0; i < ARRAY_SIZE(connector_attrs); i++) {
+		ret = device_create_file(&connector->kdev, &connector_attrs[i]);
+		if (ret)
+			goto err_out_files;
+	}
+
+	/* Optional attributes */
+	/*
+	 * In the long run it maybe a good idea to make one set of
+	 * optionals per connector type.
+	 */
+	switch (connector->connector_type) {
+		case DRM_MODE_CONNECTOR_DVII:
+		case DRM_MODE_CONNECTOR_Composite:
+		case DRM_MODE_CONNECTOR_SVIDEO:
+		case DRM_MODE_CONNECTOR_Component:
+			for (i = 0; i < ARRAY_SIZE(connector_attrs_opt1); i++) {
+				ret = device_create_file(&connector->kdev, &connector_attrs_opt1[i]);
+				if (ret)
+					goto err_out_files;
+			}
+			break;
+		default:
+			break;
+	}
+
+	ret = sysfs_create_bin_file(&connector->kdev.kobj, &edid_attr);
+	if (ret)
+		goto err_out_files;
+
+	/* Let userspace know we have a new connector */
+	drm_sysfs_hotplug_event(dev);
+
+	return 0;
+
+err_out_files:
+	if (i > 0)
+		for (j = 0; j < i; j++)
+			device_remove_file(&connector->kdev,
+					   &connector_attrs[i]);
+	device_unregister(&connector->kdev);
+
+out:
+	return ret;
+}
+EXPORT_SYMBOL(drm_sysfs_connector_add);
+
+/**
+ * drm_sysfs_connector_remove - remove an connector device from sysfs
+ * @connector: connector to remove
+ *
+ * Remove @connector and its associated attributes from sysfs.  Note that
+ * the device model core will take care of sending the "remove" uevent
+ * at this time, so we don't need to do it.
+ *
+ * Note:
+ * This routine should only be called if the connector was previously
+ * successfully registered.  If @connector hasn't been registered yet,
+ * you'll likely see a panic somewhere deep in sysfs code when called.
+ */
+void drm_sysfs_connector_remove(struct drm_connector *connector)
+{
+	int i;
+
+	DRM_DEBUG("removing \"%s\" from sysfs\n",
+		  drm_get_connector_name(connector));
+
+	for (i = 0; i < ARRAY_SIZE(connector_attrs); i++)
+		device_remove_file(&connector->kdev, &connector_attrs[i]);
+	sysfs_remove_bin_file(&connector->kdev.kobj, &edid_attr);
+	device_unregister(&connector->kdev);
+}
+EXPORT_SYMBOL(drm_sysfs_connector_remove);
+
+/**
+ * drm_sysfs_hotplug_event - generate a DRM uevent
+ * @dev: DRM device
+ *
+ * Send a uevent for the DRM device specified by @dev.  Currently we only
+ * set HOTPLUG=1 in the uevent environment, but this could be expanded to
+ * deal with other types of events.
+ */
+void drm_sysfs_hotplug_event(struct drm_device *dev)
+{
+	char *event_string = "HOTPLUG=1";
+	char *envp[] = { event_string, NULL };
+
+	DRM_DEBUG("generating hotplug event\n");
+
+	kobject_uevent_env(&dev->primary->kdev.kobj, KOBJ_CHANGE, envp);
+}
+
 /**
  * drm_sysfs_device_add - adds a class device to sysfs for a character driver
  * @dev: DRM device to be added
@@ -163,7 +481,12 @@ int drm_sysfs_device_add(struct drm_minor *minor)
 	minor->kdev.class = drm_class;
 	minor->kdev.release = drm_sysfs_device_release;
 	minor->kdev.devt = minor->device;
-	minor_str = "card%d";
+	if (minor->type == DRM_MINOR_CONTROL)
+		minor_str = "controlD%d";
+        else if (minor->type == DRM_MINOR_RENDER)
+                minor_str = "renderD%d";
+        else
+                minor_str = "card%d";
 
 	snprintf(minor->kdev.bus_id, BUS_ID_SIZE, minor_str, minor->index);
 
diff --git a/drivers/video/console/vgacon.c b/drivers/video/console/vgacon.c
index 448d209a0bf2..e6210725b9ab 100644
--- a/drivers/video/console/vgacon.c
+++ b/drivers/video/console/vgacon.c
@@ -112,6 +112,23 @@ static int 		vga_video_font_height;
 static int 		vga_scan_lines		__read_mostly;
 static unsigned int 	vga_rolled_over;
 
+int vgacon_text_mode_force = 0;
+
+bool vgacon_text_force(void)
+{
+	return vgacon_text_mode_force ? true : false;
+}
+EXPORT_SYMBOL(vgacon_text_force);
+
+static int __init text_mode(char *str)
+{
+	vgacon_text_mode_force = 1;
+	return 1;
+}
+
+/* force text mode - used by kernel modesetting */
+__setup("nomodeset", text_mode);
+
 static int __init no_scroll(char *str)
 {
 	/*
diff --git a/include/drm/Kbuild b/include/drm/Kbuild
index 82b6983b7fbb..b940fdfa3b25 100644
--- a/include/drm/Kbuild
+++ b/include/drm/Kbuild
@@ -1,4 +1,4 @@
-unifdef-y += drm.h drm_sarea.h
+unifdef-y += drm.h drm_sarea.h drm_mode.h
 unifdef-y += i810_drm.h
 unifdef-y += i830_drm.h
 unifdef-y += i915_drm.h
diff --git a/include/drm/drm.h b/include/drm/drm.h
index 3a66252456ba..76ce6fe300b7 100644
--- a/include/drm/drm.h
+++ b/include/drm/drm.h
@@ -599,6 +599,8 @@ struct drm_gem_open {
 	uint64_t size;
 };
 
+#include "drm_mode.h"
+
 #define DRM_IOCTL_BASE			'd'
 #define DRM_IO(nr)			_IO(DRM_IOCTL_BASE,nr)
 #define DRM_IOR(nr,type)		_IOR(DRM_IOCTL_BASE,nr,type)
@@ -668,6 +670,25 @@ struct drm_gem_open {
 
 #define DRM_IOCTL_UPDATE_DRAW		DRM_IOW(0x3f, struct drm_update_draw)
 
+#define DRM_IOCTL_MODE_GETRESOURCES	DRM_IOWR(0xA0, struct drm_mode_card_res)
+#define DRM_IOCTL_MODE_GETCRTC		DRM_IOWR(0xA1, struct drm_mode_crtc)
+#define DRM_IOCTL_MODE_SETCRTC		DRM_IOWR(0xA2, struct drm_mode_crtc)
+#define DRM_IOCTL_MODE_CURSOR		DRM_IOWR(0xA3, struct drm_mode_cursor)
+#define DRM_IOCTL_MODE_GETGAMMA		DRM_IOWR(0xA4, struct drm_mode_crtc_lut)
+#define DRM_IOCTL_MODE_SETGAMMA		DRM_IOWR(0xA5, struct drm_mode_crtc_lut)
+#define DRM_IOCTL_MODE_GETENCODER	DRM_IOWR(0xA6, struct drm_mode_get_encoder)
+#define DRM_IOCTL_MODE_GETCONNECTOR	DRM_IOWR(0xA7, struct drm_mode_get_connector)
+#define DRM_IOCTL_MODE_ATTACHMODE	DRM_IOWR(0xA8, struct drm_mode_mode_cmd)
+#define DRM_IOCTL_MODE_DETACHMODE	DRM_IOWR(0xA9, struct drm_mode_mode_cmd)
+
+#define DRM_IOCTL_MODE_GETPROPERTY	DRM_IOWR(0xAA, struct drm_mode_get_property)
+#define DRM_IOCTL_MODE_SETPROPERTY	DRM_IOWR(0xAB, struct drm_mode_connector_set_property)
+#define DRM_IOCTL_MODE_GETPROPBLOB	DRM_IOWR(0xAC, struct drm_mode_get_blob)
+#define DRM_IOCTL_MODE_GETFB		DRM_IOWR(0xAD, struct drm_mode_fb_cmd)
+#define DRM_IOCTL_MODE_ADDFB		DRM_IOWR(0xAE, struct drm_mode_fb_cmd)
+#define DRM_IOCTL_MODE_RMFB		DRM_IOWR(0xAF, unsigned int)
+#define DRM_IOCTL_MODE_REPLACEFB	DRM_IOWR(0xB0, struct drm_mode_fb_cmd)
+
 /**
  * Device specific ioctls should only be in their respective headers
  * The device specific ioctl range is from 0x40 to 0x99.
diff --git a/include/drm/drmP.h b/include/drm/drmP.h
index ae42a6a5c24e..7802c80f2b23 100644
--- a/include/drm/drmP.h
+++ b/include/drm/drmP.h
@@ -105,6 +105,7 @@ struct drm_device;
 #define DRIVER_FB_DMA      0x400
 #define DRIVER_IRQ_VBL2    0x800
 #define DRIVER_GEM         0x1000
+#define DRIVER_MODESET     0x2000
 
 /***********************************************************************/
 /** \name Begin the DRM... */
@@ -276,6 +277,7 @@ typedef int drm_ioctl_compat_t(struct file *filp, unsigned int cmd,
 #define DRM_AUTH	0x1
 #define	DRM_MASTER	0x2
 #define DRM_ROOT_ONLY	0x4
+#define DRM_CONTROL_ALLOW 0x8
 
 struct drm_ioctl_desc {
 	unsigned int cmd;
@@ -398,6 +400,7 @@ struct drm_file {
 	int is_master; /* this file private is a master for a minor */
 	struct drm_master *master; /* master this node is currently associated with
 				      N.B. not always minor->master */
+	struct list_head fbs;
 };
 
 /** Wait queue */
@@ -629,6 +632,8 @@ struct drm_gem_object {
 	void *driver_private;
 };
 
+#include "drm_crtc.h"
+
 /* per-master structure */
 struct drm_master {
 
@@ -792,6 +797,8 @@ struct drm_driver {
 
 #define DRM_MINOR_UNASSIGNED 0
 #define DRM_MINOR_LEGACY 1
+#define DRM_MINOR_CONTROL 2
+#define DRM_MINOR_RENDER 3
 
 /**
  * DRM minor structure. This structure represents a drm minor number.
@@ -805,6 +812,7 @@ struct drm_minor {
 	struct proc_dir_entry *dev_root;  /**< proc directory entry */
 	struct drm_master *master; /* currently active master for this node */
 	struct list_head master_list;
+	struct drm_mode_group mode_group;
 };
 
 /**
@@ -855,6 +863,7 @@ struct drm_device {
 	struct idr ctx_idr;
 
 	struct list_head vmalist;	/**< List of vmas (for debugging) */
+
 	/*@} */
 
 	/** \name DMA queues (contexts) */
@@ -933,6 +942,7 @@ struct drm_device {
 	struct drm_driver *driver;
 	drm_local_map_t *agp_buffer_map;
 	unsigned int agp_buffer_token;
+	struct drm_minor *control;		/**< Control node for card */
 	struct drm_minor *primary;		/**< render type primary screen head */
 
 	/** \name Drawable information */
@@ -941,6 +951,8 @@ struct drm_device {
 	struct idr drw_idr;
 	/*@} */
 
+        struct drm_mode_config mode_config;	/**< Current mode config */
+
 	/** \name GEM information */
 	/*@{ */
 	spinlock_t object_name_lock;
@@ -1201,6 +1213,8 @@ extern int drm_vblank_get(struct drm_device *dev, int crtc);
 extern void drm_vblank_put(struct drm_device *dev, int crtc);
 extern void drm_vblank_cleanup(struct drm_device *dev);
 /* Modesetting support */
+extern void drm_vblank_pre_modeset(struct drm_device *dev, int crtc);
+extern void drm_vblank_post_modeset(struct drm_device *dev, int crtc);
 extern int drm_modeset_ctl(struct drm_device *dev, void *data,
 			   struct drm_file *file_priv);
 
@@ -1286,7 +1300,11 @@ struct drm_sysfs_class;
 extern struct class *drm_sysfs_create(struct module *owner, char *name);
 extern void drm_sysfs_destroy(void);
 extern int drm_sysfs_device_add(struct drm_minor *minor);
+extern void drm_sysfs_hotplug_event(struct drm_device *dev);
 extern void drm_sysfs_device_remove(struct drm_minor *minor);
+extern char *drm_get_connector_status_name(enum drm_connector_status status);
+extern int drm_sysfs_connector_add(struct drm_connector *connector);
+extern void drm_sysfs_connector_remove(struct drm_connector *connector);
 
 /*
  * Basic memory manager support (drm_mm.c)
diff --git a/include/drm/drm_crtc.h b/include/drm/drm_crtc.h
new file mode 100644
index 000000000000..08a884bea446
--- /dev/null
+++ b/include/drm/drm_crtc.h
@@ -0,0 +1,737 @@
+/*
+ * Copyright © 2006 Keith Packard
+ * Copyright © 2007-2008 Dave Airlie
+ * Copyright © 2007-2008 Intel Corporation
+ *   Jesse Barnes <jesse.barnes@intel.com>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+#ifndef __DRM_CRTC_H__
+#define __DRM_CRTC_H__
+
+#include <linux/i2c.h>
+#include <linux/spinlock.h>
+#include <linux/types.h>
+#include <linux/idr.h>
+
+#include <linux/fb.h>
+
+struct drm_device;
+struct drm_mode_set;
+struct drm_framebuffer;
+
+
+#define DRM_MODE_OBJECT_CRTC 0xcccccccc
+#define DRM_MODE_OBJECT_CONNECTOR 0xc0c0c0c0
+#define DRM_MODE_OBJECT_ENCODER 0xe0e0e0e0
+#define DRM_MODE_OBJECT_MODE 0xdededede
+#define DRM_MODE_OBJECT_PROPERTY 0xb0b0b0b0
+#define DRM_MODE_OBJECT_FB 0xfbfbfbfb
+#define DRM_MODE_OBJECT_BLOB 0xbbbbbbbb
+
+struct drm_mode_object {
+	uint32_t id;
+	uint32_t type;
+};
+
+/*
+ * Note on terminology:  here, for brevity and convenience, we refer to connector
+ * control chips as 'CRTCs'.  They can control any type of connector, VGA, LVDS,
+ * DVI, etc.  And 'screen' refers to the whole of the visible display, which
+ * may span multiple monitors (and therefore multiple CRTC and connector
+ * structures).
+ */
+
+enum drm_mode_status {
+    MODE_OK	= 0,	/* Mode OK */
+    MODE_HSYNC,		/* hsync out of range */
+    MODE_VSYNC,		/* vsync out of range */
+    MODE_H_ILLEGAL,	/* mode has illegal horizontal timings */
+    MODE_V_ILLEGAL,	/* mode has illegal horizontal timings */
+    MODE_BAD_WIDTH,	/* requires an unsupported linepitch */
+    MODE_NOMODE,	/* no mode with a maching name */
+    MODE_NO_INTERLACE,	/* interlaced mode not supported */
+    MODE_NO_DBLESCAN,	/* doublescan mode not supported */
+    MODE_NO_VSCAN,	/* multiscan mode not supported */
+    MODE_MEM,		/* insufficient video memory */
+    MODE_VIRTUAL_X,	/* mode width too large for specified virtual size */
+    MODE_VIRTUAL_Y,	/* mode height too large for specified virtual size */
+    MODE_MEM_VIRT,	/* insufficient video memory given virtual size */
+    MODE_NOCLOCK,	/* no fixed clock available */
+    MODE_CLOCK_HIGH,	/* clock required is too high */
+    MODE_CLOCK_LOW,	/* clock required is too low */
+    MODE_CLOCK_RANGE,	/* clock/mode isn't in a ClockRange */
+    MODE_BAD_HVALUE,	/* horizontal timing was out of range */
+    MODE_BAD_VVALUE,	/* vertical timing was out of range */
+    MODE_BAD_VSCAN,	/* VScan value out of range */
+    MODE_HSYNC_NARROW,	/* horizontal sync too narrow */
+    MODE_HSYNC_WIDE,	/* horizontal sync too wide */
+    MODE_HBLANK_NARROW,	/* horizontal blanking too narrow */
+    MODE_HBLANK_WIDE,	/* horizontal blanking too wide */
+    MODE_VSYNC_NARROW,	/* vertical sync too narrow */
+    MODE_VSYNC_WIDE,	/* vertical sync too wide */
+    MODE_VBLANK_NARROW,	/* vertical blanking too narrow */
+    MODE_VBLANK_WIDE,	/* vertical blanking too wide */
+    MODE_PANEL,         /* exceeds panel dimensions */
+    MODE_INTERLACE_WIDTH, /* width too large for interlaced mode */
+    MODE_ONE_WIDTH,     /* only one width is supported */
+    MODE_ONE_HEIGHT,    /* only one height is supported */
+    MODE_ONE_SIZE,      /* only one resolution is supported */
+    MODE_NO_REDUCED,    /* monitor doesn't accept reduced blanking */
+    MODE_UNVERIFIED = -3, /* mode needs to reverified */
+    MODE_BAD = -2,	/* unspecified reason */
+    MODE_ERROR	= -1	/* error condition */
+};
+
+#define DRM_MODE_TYPE_CLOCK_CRTC_C (DRM_MODE_TYPE_CLOCK_C | \
+				    DRM_MODE_TYPE_CRTC_C)
+
+#define DRM_MODE(nm, t, c, hd, hss, hse, ht, hsk, vd, vss, vse, vt, vs, f) \
+	.name = nm, .status = 0, .type = (t), .clock = (c), \
+	.hdisplay = (hd), .hsync_start = (hss), .hsync_end = (hse), \
+	.htotal = (ht), .hskew = (hsk), .vdisplay = (vd), \
+	.vsync_start = (vss), .vsync_end = (vse), .vtotal = (vt), \
+	.vscan = (vs), .flags = (f), .vrefresh = 0
+
+#define CRTC_INTERLACE_HALVE_V 0x1 /* halve V values for interlacing */
+
+struct drm_display_mode {
+	/* Header */
+	struct list_head head;
+	struct drm_mode_object base;
+
+	char name[DRM_DISPLAY_MODE_LEN];
+
+	int connector_count;
+	enum drm_mode_status status;
+	int type;
+
+	/* Proposed mode values */
+	int clock;
+	int hdisplay;
+	int hsync_start;
+	int hsync_end;
+	int htotal;
+	int hskew;
+	int vdisplay;
+	int vsync_start;
+	int vsync_end;
+	int vtotal;
+	int vscan;
+	unsigned int flags;
+
+	/* Addressable image size (may be 0 for projectors, etc.) */
+	int width_mm;
+	int height_mm;
+
+	/* Actual mode we give to hw */
+	int clock_index;
+	int synth_clock;
+	int crtc_hdisplay;
+	int crtc_hblank_start;
+	int crtc_hblank_end;
+	int crtc_hsync_start;
+	int crtc_hsync_end;
+	int crtc_htotal;
+	int crtc_hskew;
+	int crtc_vdisplay;
+	int crtc_vblank_start;
+	int crtc_vblank_end;
+	int crtc_vsync_start;
+	int crtc_vsync_end;
+	int crtc_vtotal;
+	int crtc_hadjusted;
+	int crtc_vadjusted;
+
+	/* Driver private mode info */
+	int private_size;
+	int *private;
+	int private_flags;
+
+	int vrefresh;
+	float hsync;
+};
+
+enum drm_connector_status {
+	connector_status_connected = 1,
+	connector_status_disconnected = 2,
+	connector_status_unknown = 3,
+};
+
+enum subpixel_order {
+	SubPixelUnknown = 0,
+	SubPixelHorizontalRGB,
+	SubPixelHorizontalBGR,
+	SubPixelVerticalRGB,
+	SubPixelVerticalBGR,
+	SubPixelNone,
+};
+
+
+/*
+ * Describes a given display (e.g. CRT or flat panel) and its limitations.
+ */
+struct drm_display_info {
+	char name[DRM_DISPLAY_INFO_LEN];
+	/* Input info */
+	bool serration_vsync;
+	bool sync_on_green;
+	bool composite_sync;
+	bool separate_syncs;
+	bool blank_to_black;
+	unsigned char video_level;
+	bool digital;
+	/* Physical size */
+        unsigned int width_mm;
+	unsigned int height_mm;
+
+	/* Display parameters */
+	unsigned char gamma; /* FIXME: storage format */
+	bool gtf_supported;
+	bool standard_color;
+	enum {
+		monochrome = 0,
+		rgb,
+		other,
+		unknown,
+	} display_type;
+	bool active_off_supported;
+	bool suspend_supported;
+	bool standby_supported;
+
+	/* Color info FIXME: storage format */
+	unsigned short redx, redy;
+	unsigned short greenx, greeny;
+	unsigned short bluex, bluey;
+	unsigned short whitex, whitey;
+
+	/* Clock limits FIXME: storage format */
+	unsigned int min_vfreq, max_vfreq;
+	unsigned int min_hfreq, max_hfreq;
+	unsigned int pixel_clock;
+
+	/* White point indices FIXME: storage format */
+	unsigned int wpx1, wpy1;
+	unsigned int wpgamma1;
+	unsigned int wpx2, wpy2;
+	unsigned int wpgamma2;
+
+	enum subpixel_order subpixel_order;
+
+	char *raw_edid; /* if any */
+};
+
+struct drm_framebuffer_funcs {
+	void (*destroy)(struct drm_framebuffer *framebuffer);
+	int (*create_handle)(struct drm_framebuffer *fb,
+			     struct drm_file *file_priv,
+			     unsigned int *handle);
+};
+
+struct drm_framebuffer {
+	struct drm_device *dev;
+	struct list_head head;
+	struct drm_mode_object base;
+	const struct drm_framebuffer_funcs *funcs;
+	unsigned int pitch;
+	unsigned int width;
+	unsigned int height;
+	/* depth can be 15 or 16 */
+	unsigned int depth;
+	int bits_per_pixel;
+	int flags;
+	void *fbdev;
+	u32 pseudo_palette[17];
+	struct list_head filp_head;
+};
+
+struct drm_property_blob {
+	struct drm_mode_object base;
+	struct list_head head;
+	unsigned int length;
+	void *data;
+};
+
+struct drm_property_enum {
+	uint64_t value;
+	struct list_head head;
+	char name[DRM_PROP_NAME_LEN];
+};
+
+struct drm_property {
+	struct list_head head;
+	struct drm_mode_object base;
+	uint32_t flags;
+	char name[DRM_PROP_NAME_LEN];
+	uint32_t num_values;
+	uint64_t *values;
+
+	struct list_head enum_blob_list;
+};
+
+struct drm_crtc;
+struct drm_connector;
+struct drm_encoder;
+
+/**
+ * drm_crtc_funcs - control CRTCs for a given device
+ * @dpms: control display power levels
+ * @save: save CRTC state
+ * @resore: restore CRTC state
+ * @lock: lock the CRTC
+ * @unlock: unlock the CRTC
+ * @shadow_allocate: allocate shadow pixmap
+ * @shadow_create: create shadow pixmap for rotation support
+ * @shadow_destroy: free shadow pixmap
+ * @mode_fixup: fixup proposed mode
+ * @mode_set: set the desired mode on the CRTC
+ * @gamma_set: specify color ramp for CRTC
+ * @destroy: deinit and free object.
+ *
+ * The drm_crtc_funcs structure is the central CRTC management structure
+ * in the DRM.  Each CRTC controls one or more connectors (note that the name
+ * CRTC is simply historical, a CRTC may control LVDS, VGA, DVI, TV out, etc.
+ * connectors, not just CRTs).
+ *
+ * Each driver is responsible for filling out this structure at startup time,
+ * in addition to providing other modesetting features, like i2c and DDC
+ * bus accessors.
+ */
+struct drm_crtc_funcs {
+	/* Save CRTC state */
+	void (*save)(struct drm_crtc *crtc); /* suspend? */
+	/* Restore CRTC state */
+	void (*restore)(struct drm_crtc *crtc); /* resume? */
+
+	/* cursor controls */
+	int (*cursor_set)(struct drm_crtc *crtc, struct drm_file *file_priv,
+			  uint32_t handle, uint32_t width, uint32_t height);
+	int (*cursor_move)(struct drm_crtc *crtc, int x, int y);
+
+	/* Set gamma on the CRTC */
+	void (*gamma_set)(struct drm_crtc *crtc, u16 *r, u16 *g, u16 *b,
+			  uint32_t size);
+	/* Object destroy routine */
+	void (*destroy)(struct drm_crtc *crtc);
+
+	int (*set_config)(struct drm_mode_set *set);
+};
+
+/**
+ * drm_crtc - central CRTC control structure
+ * @enabled: is this CRTC enabled?
+ * @x: x position on screen
+ * @y: y position on screen
+ * @desired_mode: new desired mode
+ * @desired_x: desired x for desired_mode
+ * @desired_y: desired y for desired_mode
+ * @funcs: CRTC control functions
+ *
+ * Each CRTC may have one or more connectors associated with it.  This structure
+ * allows the CRTC to be controlled.
+ */
+struct drm_crtc {
+	struct drm_device *dev;
+	struct list_head head;
+
+	struct drm_mode_object base;
+
+	/* framebuffer the connector is currently bound to */
+	struct drm_framebuffer *fb;
+
+	bool enabled;
+
+	struct drm_display_mode mode;
+
+	int x, y;
+	struct drm_display_mode *desired_mode;
+	int desired_x, desired_y;
+	const struct drm_crtc_funcs *funcs;
+
+	/* CRTC gamma size for reporting to userspace */
+	uint32_t gamma_size;
+	uint16_t *gamma_store;
+
+	/* if you are using the helper */
+	void *helper_private;
+};
+
+
+/**
+ * drm_connector_funcs - control connectors on a given device
+ * @dpms: set power state (see drm_crtc_funcs above)
+ * @save: save connector state
+ * @restore: restore connector state
+ * @mode_valid: is this mode valid on the given connector?
+ * @mode_fixup: try to fixup proposed mode for this connector
+ * @mode_set: set this mode
+ * @detect: is this connector active?
+ * @get_modes: get mode list for this connector
+ * @set_property: property for this connector may need update
+ * @destroy: make object go away
+ *
+ * Each CRTC may have one or more connectors attached to it.  The functions
+ * below allow the core DRM code to control connectors, enumerate available modes,
+ * etc.
+ */
+struct drm_connector_funcs {
+	void (*dpms)(struct drm_connector *connector, int mode);
+	void (*save)(struct drm_connector *connector);
+	void (*restore)(struct drm_connector *connector);
+	enum drm_connector_status (*detect)(struct drm_connector *connector);
+	void (*fill_modes)(struct drm_connector *connector, uint32_t max_width, uint32_t max_height);
+	int (*set_property)(struct drm_connector *connector, struct drm_property *property,
+			     uint64_t val);
+	void (*destroy)(struct drm_connector *connector);
+};
+
+struct drm_encoder_funcs {
+	void (*destroy)(struct drm_encoder *encoder);
+};
+
+#define DRM_CONNECTOR_MAX_UMODES 16
+#define DRM_CONNECTOR_MAX_PROPERTY 16
+#define DRM_CONNECTOR_LEN 32
+#define DRM_CONNECTOR_MAX_ENCODER 2
+
+/**
+ * drm_encoder - central DRM encoder structure
+ */
+struct drm_encoder {
+	struct drm_device *dev;
+	struct list_head head;
+
+	struct drm_mode_object base;
+	int encoder_type;
+	uint32_t possible_crtcs;
+	uint32_t possible_clones;
+
+	struct drm_crtc *crtc;
+	const struct drm_encoder_funcs *funcs;
+	void *helper_private;
+};
+
+/**
+ * drm_connector - central DRM connector control structure
+ * @crtc: CRTC this connector is currently connected to, NULL if none
+ * @interlace_allowed: can this connector handle interlaced modes?
+ * @doublescan_allowed: can this connector handle doublescan?
+ * @available_modes: modes available on this connector (from get_modes() + user)
+ * @initial_x: initial x position for this connector
+ * @initial_y: initial y position for this connector
+ * @status: connector connected?
+ * @funcs: connector control functions
+ *
+ * Each connector may be connected to one or more CRTCs, or may be clonable by
+ * another connector if they can share a CRTC.  Each connector also has a specific
+ * position in the broader display (referred to as a 'screen' though it could
+ * span multiple monitors).
+ */
+struct drm_connector {
+	struct drm_device *dev;
+	struct device kdev;
+	struct device_attribute *attr;
+	struct list_head head;
+
+	struct drm_mode_object base;
+
+	int connector_type;
+	int connector_type_id;
+	bool interlace_allowed;
+	bool doublescan_allowed;
+	struct list_head modes; /* list of modes on this connector */
+
+	int initial_x, initial_y;
+	enum drm_connector_status status;
+
+	/* these are modes added by probing with DDC or the BIOS */
+	struct list_head probed_modes;
+
+	struct drm_display_info display_info;
+	const struct drm_connector_funcs *funcs;
+
+	struct list_head user_modes;
+	struct drm_property_blob *edid_blob_ptr;
+	u32 property_ids[DRM_CONNECTOR_MAX_PROPERTY];
+	uint64_t property_values[DRM_CONNECTOR_MAX_PROPERTY];
+
+	void *helper_private;
+
+	uint32_t encoder_ids[DRM_CONNECTOR_MAX_ENCODER];
+	uint32_t force_encoder_id;
+	struct drm_encoder *encoder; /* currently active encoder */
+};
+
+/**
+ * struct drm_mode_set
+ *
+ * Represents a single crtc the connectors that it drives with what mode
+ * and from which framebuffer it scans out from.
+ *
+ * This is used to set modes.
+ */
+struct drm_mode_set {
+	struct list_head head;
+
+	struct drm_framebuffer *fb;
+	struct drm_crtc *crtc;
+	struct drm_display_mode *mode;
+
+	uint32_t x;
+	uint32_t y;
+
+	struct drm_connector **connectors;
+	size_t num_connectors;
+};
+
+/**
+ * struct drm_mode_config_funcs - configure CRTCs for a given screen layout
+ * @resize: adjust CRTCs as necessary for the proposed layout
+ *
+ * Currently only a resize hook is available.  DRM will call back into the
+ * driver with a new screen width and height.  If the driver can't support
+ * the proposed size, it can return false.  Otherwise it should adjust
+ * the CRTC<->connector mappings as needed and update its view of the screen.
+ */
+struct drm_mode_config_funcs {
+	int (*resize_fb)(struct drm_device *dev, struct drm_file *file_priv, struct drm_framebuffer *fb, struct drm_mode_fb_cmd *mode_cmd);
+	struct drm_framebuffer *(*fb_create)(struct drm_device *dev, struct drm_file *file_priv, struct drm_mode_fb_cmd *mode_cmd);
+	int (*fb_changed)(struct drm_device *dev);
+};
+
+struct drm_mode_group {
+	uint32_t num_crtcs;
+	uint32_t num_encoders;
+	uint32_t num_connectors;
+
+	/* list of object IDs for this group */
+	uint32_t *id_list;
+};
+
+/**
+ * drm_mode_config - Mode configuration control structure
+ *
+ */
+struct drm_mode_config {
+	struct mutex mutex; /* protects configuration and IDR */
+	struct idr crtc_idr; /* use this idr for all IDs, fb, crtc, connector, modes - just makes life easier */
+	/* this is limited to one for now */
+	int num_fb;
+	struct list_head fb_list;
+	int num_connector;
+	struct list_head connector_list;
+	int num_encoder;
+	struct list_head encoder_list;
+
+	int num_crtc;
+	struct list_head crtc_list;
+
+	struct list_head property_list;
+
+	/* in-kernel framebuffers - hung of filp_head in drm_framebuffer */
+	struct list_head fb_kernel_list;
+
+	int min_width, min_height;
+	int max_width, max_height;
+	struct drm_mode_config_funcs *funcs;
+	unsigned long fb_base;
+
+	/* pointers to standard properties */
+	struct list_head property_blob_list;
+	struct drm_property *edid_property;
+	struct drm_property *dpms_property;
+
+	/* DVI-I properties */
+	struct drm_property *dvi_i_subconnector_property;
+	struct drm_property *dvi_i_select_subconnector_property;
+
+	/* TV properties */
+	struct drm_property *tv_subconnector_property;
+	struct drm_property *tv_select_subconnector_property;
+	struct drm_property *tv_mode_property;
+	struct drm_property *tv_left_margin_property;
+	struct drm_property *tv_right_margin_property;
+	struct drm_property *tv_top_margin_property;
+	struct drm_property *tv_bottom_margin_property;
+
+	/* Optional properties */
+	struct drm_property *scaling_mode_property;
+	struct drm_property *dithering_mode_property;
+
+	/* hotplug */
+	uint32_t hotplug_counter;
+};
+
+#define obj_to_crtc(x) container_of(x, struct drm_crtc, base)
+#define obj_to_connector(x) container_of(x, struct drm_connector, base)
+#define obj_to_encoder(x) container_of(x, struct drm_encoder, base)
+#define obj_to_mode(x) container_of(x, struct drm_display_mode, base)
+#define obj_to_fb(x) container_of(x, struct drm_framebuffer, base)
+#define obj_to_property(x) container_of(x, struct drm_property, base)
+#define obj_to_blob(x) container_of(x, struct drm_property_blob, base)
+
+
+extern void drm_crtc_init(struct drm_device *dev,
+			  struct drm_crtc *crtc,
+			  const struct drm_crtc_funcs *funcs);
+extern void drm_crtc_cleanup(struct drm_crtc *crtc);
+
+extern void drm_connector_init(struct drm_device *dev,
+			    struct drm_connector *connector,
+			    const struct drm_connector_funcs *funcs,
+			    int connector_type);
+
+extern void drm_connector_cleanup(struct drm_connector *connector);
+
+extern void drm_encoder_init(struct drm_device *dev,
+			     struct drm_encoder *encoder,
+			     const struct drm_encoder_funcs *funcs,
+			     int encoder_type);
+
+extern void drm_encoder_cleanup(struct drm_encoder *encoder);
+
+extern char *drm_get_connector_name(struct drm_connector *connector);
+extern char *drm_get_dpms_name(int val);
+extern char *drm_get_dvi_i_subconnector_name(int val);
+extern char *drm_get_dvi_i_select_name(int val);
+extern char *drm_get_tv_subconnector_name(int val);
+extern char *drm_get_tv_select_name(int val);
+extern void drm_fb_release(struct file *filp);
+extern int drm_mode_group_init_legacy_group(struct drm_device *dev, struct drm_mode_group *group);
+extern struct edid *drm_get_edid(struct drm_connector *connector,
+				 struct i2c_adapter *adapter);
+extern unsigned char *drm_do_probe_ddc_edid(struct i2c_adapter *adapter);
+extern int drm_add_edid_modes(struct drm_connector *connector, struct edid *edid);
+extern void drm_mode_probed_add(struct drm_connector *connector, struct drm_display_mode *mode);
+extern void drm_mode_remove(struct drm_connector *connector, struct drm_display_mode *mode);
+extern struct drm_display_mode *drm_mode_duplicate(struct drm_device *dev,
+						   struct drm_display_mode *mode);
+extern void drm_mode_debug_printmodeline(struct drm_display_mode *mode);
+extern void drm_mode_config_init(struct drm_device *dev);
+extern void drm_mode_config_cleanup(struct drm_device *dev);
+extern void drm_mode_set_name(struct drm_display_mode *mode);
+extern bool drm_mode_equal(struct drm_display_mode *mode1, struct drm_display_mode *mode2);
+extern int drm_mode_width(struct drm_display_mode *mode);
+extern int drm_mode_height(struct drm_display_mode *mode);
+
+/* for us by fb module */
+extern int drm_mode_attachmode_crtc(struct drm_device *dev,
+				    struct drm_crtc *crtc,
+				    struct drm_display_mode *mode);
+extern int drm_mode_detachmode_crtc(struct drm_device *dev, struct drm_display_mode *mode);
+
+extern struct drm_display_mode *drm_mode_create(struct drm_device *dev);
+extern void drm_mode_destroy(struct drm_device *dev, struct drm_display_mode *mode);
+extern void drm_mode_list_concat(struct list_head *head,
+				 struct list_head *new);
+extern void drm_mode_validate_size(struct drm_device *dev,
+				   struct list_head *mode_list,
+				   int maxX, int maxY, int maxPitch);
+extern void drm_mode_prune_invalid(struct drm_device *dev,
+				   struct list_head *mode_list, bool verbose);
+extern void drm_mode_sort(struct list_head *mode_list);
+extern int drm_mode_vrefresh(struct drm_display_mode *mode);
+extern void drm_mode_set_crtcinfo(struct drm_display_mode *p,
+				  int adjust_flags);
+extern void drm_mode_connector_list_update(struct drm_connector *connector);
+extern int drm_mode_connector_update_edid_property(struct drm_connector *connector,
+						struct edid *edid);
+extern int drm_connector_property_set_value(struct drm_connector *connector,
+					 struct drm_property *property,
+					 uint64_t value);
+extern int drm_connector_property_get_value(struct drm_connector *connector,
+					 struct drm_property *property,
+					 uint64_t *value);
+extern struct drm_display_mode *drm_crtc_mode_create(struct drm_device *dev);
+extern void drm_framebuffer_set_object(struct drm_device *dev,
+				       unsigned long handle);
+extern int drm_framebuffer_init(struct drm_device *dev,
+				struct drm_framebuffer *fb,
+				const struct drm_framebuffer_funcs *funcs);
+extern void drm_framebuffer_cleanup(struct drm_framebuffer *fb);
+extern int drmfb_probe(struct drm_device *dev, struct drm_crtc *crtc);
+extern int drmfb_remove(struct drm_device *dev, struct drm_framebuffer *fb);
+extern void drm_crtc_probe_connector_modes(struct drm_device *dev, int maxX, int maxY);
+extern bool drm_crtc_in_use(struct drm_crtc *crtc);
+
+extern int drm_connector_attach_property(struct drm_connector *connector,
+				      struct drm_property *property, uint64_t init_val);
+extern struct drm_property *drm_property_create(struct drm_device *dev, int flags,
+						const char *name, int num_values);
+extern void drm_property_destroy(struct drm_device *dev, struct drm_property *property);
+extern int drm_property_add_enum(struct drm_property *property, int index,
+				 uint64_t value, const char *name);
+extern int drm_mode_create_dvi_i_properties(struct drm_device *dev);
+extern int drm_mode_create_tv_properties(struct drm_device *dev, int num_formats,
+				     char *formats[]);
+extern int drm_mode_create_scaling_mode_property(struct drm_device *dev);
+extern int drm_mode_create_dithering_property(struct drm_device *dev);
+extern char *drm_get_encoder_name(struct drm_encoder *encoder);
+
+extern int drm_mode_connector_attach_encoder(struct drm_connector *connector,
+					     struct drm_encoder *encoder);
+extern void drm_mode_connector_detach_encoder(struct drm_connector *connector,
+					   struct drm_encoder *encoder);
+extern bool drm_mode_crtc_set_gamma_size(struct drm_crtc *crtc,
+					 int gamma_size);
+extern void *drm_mode_object_find(struct drm_device *dev, uint32_t id, uint32_t type);
+/* IOCTLs */
+extern int drm_mode_getresources(struct drm_device *dev,
+				 void *data, struct drm_file *file_priv);
+
+extern int drm_mode_getcrtc(struct drm_device *dev,
+			    void *data, struct drm_file *file_priv);
+extern int drm_mode_getconnector(struct drm_device *dev,
+			      void *data, struct drm_file *file_priv);
+extern int drm_mode_setcrtc(struct drm_device *dev,
+			    void *data, struct drm_file *file_priv);
+extern int drm_mode_cursor_ioctl(struct drm_device *dev,
+				void *data, struct drm_file *file_priv);
+extern int drm_mode_addfb(struct drm_device *dev,
+			  void *data, struct drm_file *file_priv);
+extern int drm_mode_rmfb(struct drm_device *dev,
+			 void *data, struct drm_file *file_priv);
+extern int drm_mode_getfb(struct drm_device *dev,
+			  void *data, struct drm_file *file_priv);
+extern int drm_mode_addmode_ioctl(struct drm_device *dev,
+				  void *data, struct drm_file *file_priv);
+extern int drm_mode_rmmode_ioctl(struct drm_device *dev,
+				 void *data, struct drm_file *file_priv);
+extern int drm_mode_attachmode_ioctl(struct drm_device *dev,
+				     void *data, struct drm_file *file_priv);
+extern int drm_mode_detachmode_ioctl(struct drm_device *dev,
+				     void *data, struct drm_file *file_priv);
+
+extern int drm_mode_getproperty_ioctl(struct drm_device *dev,
+				      void *data, struct drm_file *file_priv);
+extern int drm_mode_getblob_ioctl(struct drm_device *dev,
+				  void *data, struct drm_file *file_priv);
+extern int drm_mode_connector_property_set_ioctl(struct drm_device *dev,
+					      void *data, struct drm_file *file_priv);
+extern int drm_mode_hotplug_ioctl(struct drm_device *dev,
+				  void *data, struct drm_file *file_priv);
+extern int drm_mode_replacefb(struct drm_device *dev,
+			      void *data, struct drm_file *file_priv);
+extern int drm_mode_getencoder(struct drm_device *dev,
+			       void *data, struct drm_file *file_priv);
+extern int drm_mode_gamma_get_ioctl(struct drm_device *dev,
+				    void *data, struct drm_file *file_priv);
+extern int drm_mode_gamma_set_ioctl(struct drm_device *dev,
+				    void *data, struct drm_file *file_priv);
+#endif /* __DRM_CRTC_H__ */
diff --git a/include/drm/drm_crtc_helper.h b/include/drm/drm_crtc_helper.h
new file mode 100644
index 000000000000..a341828d1d15
--- /dev/null
+++ b/include/drm/drm_crtc_helper.h
@@ -0,0 +1,121 @@
+/*
+ * Copyright © 2006 Keith Packard
+ * Copyright © 2007-2008 Dave Airlie
+ * Copyright © 2007-2008 Intel Corporation
+ *   Jesse Barnes <jesse.barnes@intel.com>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+/*
+ * The DRM mode setting helper functions are common code for drivers to use if
+ * they wish.  Drivers are not forced to use this code in their
+ * implementations but it would be useful if they code they do use at least
+ * provides a consistent interface and operation to userspace
+ */
+
+#ifndef __DRM_CRTC_HELPER_H__
+#define __DRM_CRTC_HELPER_H__
+
+#include <linux/i2c.h>
+#include <linux/spinlock.h>
+#include <linux/types.h>
+#include <linux/idr.h>
+
+#include <linux/fb.h>
+
+struct drm_crtc_helper_funcs {
+	/*
+	 * Control power levels on the CRTC.  If the mode passed in is
+	 * unsupported, the provider must use the next lowest power level.
+	 */
+	void (*dpms)(struct drm_crtc *crtc, int mode);
+	void (*prepare)(struct drm_crtc *crtc);
+	void (*commit)(struct drm_crtc *crtc);
+
+	/* Provider can fixup or change mode timings before modeset occurs */
+	bool (*mode_fixup)(struct drm_crtc *crtc,
+			   struct drm_display_mode *mode,
+			   struct drm_display_mode *adjusted_mode);
+	/* Actually set the mode */
+	void (*mode_set)(struct drm_crtc *crtc, struct drm_display_mode *mode,
+			 struct drm_display_mode *adjusted_mode, int x, int y);
+
+	/* Move the crtc on the current fb to the given position *optional* */
+	void (*mode_set_base)(struct drm_crtc *crtc, int x, int y);
+};
+
+struct drm_encoder_helper_funcs {
+	void (*dpms)(struct drm_encoder *encoder, int mode);
+	void (*save)(struct drm_encoder *encoder);
+	void (*restore)(struct drm_encoder *encoder);
+
+	bool (*mode_fixup)(struct drm_encoder *encoder,
+			   struct drm_display_mode *mode,
+			   struct drm_display_mode *adjusted_mode);
+	void (*prepare)(struct drm_encoder *encoder);
+	void (*commit)(struct drm_encoder *encoder);
+	void (*mode_set)(struct drm_encoder *encoder,
+			 struct drm_display_mode *mode,
+			 struct drm_display_mode *adjusted_mode);
+	/* detect for DAC style encoders */
+	enum drm_connector_status (*detect)(struct drm_encoder *encoder,
+					    struct drm_connector *connector);
+};
+
+struct drm_connector_helper_funcs {
+	int (*get_modes)(struct drm_connector *connector);
+	int (*mode_valid)(struct drm_connector *connector,
+			  struct drm_display_mode *mode);
+	struct drm_encoder *(*best_encoder)(struct drm_connector *connector);
+};
+
+extern void drm_helper_probe_single_connector_modes(struct drm_connector *connector, uint32_t maxX, uint32_t maxY);
+extern void drm_helper_disable_unused_functions(struct drm_device *dev);
+extern int drm_helper_hotplug_stage_two(struct drm_device *dev);
+extern bool drm_helper_initial_config(struct drm_device *dev, bool can_grow);
+extern int drm_crtc_helper_set_config(struct drm_mode_set *set);
+extern bool drm_crtc_helper_set_mode(struct drm_crtc *crtc,
+				     struct drm_display_mode *mode,
+				     int x, int y);
+extern bool drm_helper_crtc_in_use(struct drm_crtc *crtc);
+
+extern int drm_helper_mode_fill_fb_struct(struct drm_framebuffer *fb,
+					  struct drm_mode_fb_cmd *mode_cmd);
+
+static inline void drm_crtc_helper_add(struct drm_crtc *crtc,
+				       const struct drm_crtc_helper_funcs *funcs)
+{
+	crtc->helper_private = (void *)funcs;
+}
+
+static inline void drm_encoder_helper_add(struct drm_encoder *encoder,
+					  const struct drm_encoder_helper_funcs *funcs)
+{
+	encoder->helper_private = (void *)funcs;
+}
+
+static inline void drm_connector_helper_add(struct drm_connector *connector,
+					    const struct drm_connector_helper_funcs *funcs)
+{
+	connector->helper_private = (void *)funcs;
+}
+
+extern int drm_helper_resume_force_mode(struct drm_device *dev);
+#endif
diff --git a/include/drm/drm_edid.h b/include/drm/drm_edid.h
new file mode 100644
index 000000000000..c707c15f5164
--- /dev/null
+++ b/include/drm/drm_edid.h
@@ -0,0 +1,202 @@
+/*
+ * Copyright © 2007-2008 Intel Corporation
+ *   Jesse Barnes <jesse.barnes@intel.com>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+#ifndef __DRM_EDID_H__
+#define __DRM_EDID_H__
+
+#include <linux/types.h>
+
+#define EDID_LENGTH 128
+#define DDC_ADDR 0x50
+
+#ifdef BIG_ENDIAN
+#error "EDID structure is little endian, need big endian versions"
+#else
+
+struct est_timings {
+	u8 t1;
+	u8 t2;
+	u8 mfg_rsvd;
+} __attribute__((packed));
+
+struct std_timing {
+	u8 hsize; /* need to multiply by 8 then add 248 */
+	u8 vfreq:6; /* need to add 60 */
+	u8 aspect_ratio:2; /* 00=16:10, 01=4:3, 10=5:4, 11=16:9 */
+} __attribute__((packed));
+
+/* If detailed data is pixel timing */
+struct detailed_pixel_timing {
+	u8 hactive_lo;
+	u8 hblank_lo;
+	u8 hblank_hi:4;
+	u8 hactive_hi:4;
+	u8 vactive_lo;
+	u8 vblank_lo;
+	u8 vblank_hi:4;
+	u8 vactive_hi:4;
+	u8 hsync_offset_lo;
+	u8 hsync_pulse_width_lo;
+	u8 vsync_pulse_width_lo:4;
+	u8 vsync_offset_lo:4;
+	u8 hsync_pulse_width_hi:2;
+	u8 hsync_offset_hi:2;
+	u8 vsync_pulse_width_hi:2;
+	u8 vsync_offset_hi:2;
+	u8 width_mm_lo;
+	u8 height_mm_lo;
+	u8 height_mm_hi:4;
+	u8 width_mm_hi:4;
+	u8 hborder;
+	u8 vborder;
+	u8 unknown0:1;
+	u8 vsync_positive:1;
+	u8 hsync_positive:1;
+	u8 separate_sync:2;
+	u8 stereo:1;
+	u8 unknown6:1;
+	u8 interlaced:1;
+} __attribute__((packed));
+
+/* If it's not pixel timing, it'll be one of the below */
+struct detailed_data_string {
+	u8 str[13];
+} __attribute__((packed));
+
+struct detailed_data_monitor_range {
+	u8 min_vfreq;
+	u8 max_vfreq;
+	u8 min_hfreq_khz;
+	u8 max_hfreq_khz;
+	u8 pixel_clock_mhz; /* need to multiply by 10 */
+	u16 sec_gtf_toggle; /* A000=use above, 20=use below */ /* FIXME: byte order */
+	u8 hfreq_start_khz; /* need to multiply by 2 */
+	u8 c; /* need to divide by 2 */
+	u16 m; /* FIXME: byte order */
+	u8 k;
+	u8 j; /* need to divide by 2 */
+} __attribute__((packed));
+
+struct detailed_data_wpindex {
+	u8 white_y_lo:2;
+	u8 white_x_lo:2;
+	u8 pad:4;
+	u8 white_x_hi;
+	u8 white_y_hi;
+	u8 gamma; /* need to divide by 100 then add 1 */
+} __attribute__((packed));
+
+struct detailed_data_color_point {
+	u8 windex1;
+	u8 wpindex1[3];
+	u8 windex2;
+	u8 wpindex2[3];
+} __attribute__((packed));
+
+struct detailed_non_pixel {
+	u8 pad1;
+	u8 type; /* ff=serial, fe=string, fd=monitor range, fc=monitor name
+		    fb=color point data, fa=standard timing data,
+		    f9=undefined, f8=mfg. reserved */
+	u8 pad2;
+	union {
+		struct detailed_data_string str;
+		struct detailed_data_monitor_range range;
+		struct detailed_data_wpindex color;
+		struct std_timing timings[5];
+	} data;
+} __attribute__((packed));
+
+#define EDID_DETAIL_STD_MODES 0xfa
+#define EDID_DETAIL_MONITOR_CPDATA 0xfb
+#define EDID_DETAIL_MONITOR_NAME 0xfc
+#define EDID_DETAIL_MONITOR_RANGE 0xfd
+#define EDID_DETAIL_MONITOR_STRING 0xfe
+#define EDID_DETAIL_MONITOR_SERIAL 0xff
+
+struct detailed_timing {
+	u16 pixel_clock; /* need to multiply by 10 KHz */ /* FIXME: byte order */
+	union {
+		struct detailed_pixel_timing pixel_data;
+		struct detailed_non_pixel other_data;
+	} data;
+} __attribute__((packed));
+
+struct edid {
+	u8 header[8];
+	/* Vendor & product info */
+	u8 mfg_id[2];
+	u8 prod_code[2];
+	u32 serial; /* FIXME: byte order */
+	u8 mfg_week;
+	u8 mfg_year;
+	/* EDID version */
+	u8 version;
+	u8 revision;
+	/* Display info: */
+	/*   input definition */
+	u8 serration_vsync:1;
+	u8 sync_on_green:1;
+	u8 composite_sync:1;
+	u8 separate_syncs:1;
+	u8 blank_to_black:1;
+	u8 video_level:2;
+	u8 digital:1; /* bits below must be zero if set */
+	u8 width_cm;
+	u8 height_cm;
+	u8 gamma;
+	/*   feature support */
+	u8 default_gtf:1;
+	u8 preferred_timing:1;
+	u8 standard_color:1;
+	u8 display_type:2; /* 00=mono, 01=rgb, 10=non-rgb, 11=unknown */
+	u8 pm_active_off:1;
+	u8 pm_suspend:1;
+	u8 pm_standby:1;
+	/* Color characteristics */
+	u8 red_green_lo;
+	u8 black_white_lo;
+	u8 red_x;
+	u8 red_y;
+	u8 green_x;
+	u8 green_y;
+	u8 blue_x;
+	u8 blue_y;
+	u8 white_x;
+	u8 white_y;
+	/* Est. timings and mfg rsvd timings*/
+	struct est_timings established_timings;
+	/* Standard timings 1-8*/
+	struct std_timing standard_timings[8];
+	/* Detailing timings 1-4 */
+	struct detailed_timing detailed_timings[4];
+	/* Number of 128 byte ext. blocks */
+	u8 extensions;
+	/* Checksum */
+	u8 checksum;
+} __attribute__((packed));
+
+#endif /* little endian structs */
+
+#define EDID_PRODUCT_ID(e) ((e)->prod_code[0] | ((e)->prod_code[1] << 8))
+
+#endif /* __DRM_EDID_H__ */
diff --git a/include/drm/drm_mode.h b/include/drm/drm_mode.h
new file mode 100644
index 000000000000..d2e791920aba
--- /dev/null
+++ b/include/drm/drm_mode.h
@@ -0,0 +1,278 @@
+/*
+ * Copyright (c) 2007 Dave Airlie <airlied@linux.ie>
+ * Copyright (c) 2007 Jakob Bornecrantz <wallbraker@gmail.com>
+ * Copyright (c) 2008 Red Hat Inc.
+ * Copyright (c) 2007-2008 Tungsten Graphics, Inc., Cedar Park, TX., USA
+ * Copyright (c) 2007-2008 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#ifndef _DRM_MODE_H
+#define _DRM_MODE_H
+
+#if !defined(__KERNEL__) && !defined(_KERNEL)
+#include <stdint.h>
+#else
+#include <linux/kernel.h>
+#endif
+
+#define DRM_DISPLAY_INFO_LEN 32
+#define DRM_CONNECTOR_NAME_LEN 32
+#define DRM_DISPLAY_MODE_LEN 32
+#define DRM_PROP_NAME_LEN 32
+
+#define DRM_MODE_TYPE_BUILTIN	(1<<0)
+#define DRM_MODE_TYPE_CLOCK_C	((1<<1) | DRM_MODE_TYPE_BUILTIN)
+#define DRM_MODE_TYPE_CRTC_C	((1<<2) | DRM_MODE_TYPE_BUILTIN)
+#define DRM_MODE_TYPE_PREFERRED	(1<<3)
+#define DRM_MODE_TYPE_DEFAULT	(1<<4)
+#define DRM_MODE_TYPE_USERDEF	(1<<5)
+#define DRM_MODE_TYPE_DRIVER	(1<<6)
+
+/* Video mode flags */
+/* bit compatible with the xorg definitions. */
+#define DRM_MODE_FLAG_PHSYNC	(1<<0)
+#define DRM_MODE_FLAG_NHSYNC	(1<<1)
+#define DRM_MODE_FLAG_PVSYNC	(1<<2)
+#define DRM_MODE_FLAG_NVSYNC	(1<<3)
+#define DRM_MODE_FLAG_INTERLACE	(1<<4)
+#define DRM_MODE_FLAG_DBLSCAN	(1<<5)
+#define DRM_MODE_FLAG_CSYNC	(1<<6)
+#define DRM_MODE_FLAG_PCSYNC	(1<<7)
+#define DRM_MODE_FLAG_NCSYNC	(1<<8)
+#define DRM_MODE_FLAG_HSKEW	(1<<9) /* hskew provided */
+#define DRM_MODE_FLAG_BCAST	(1<<10)
+#define DRM_MODE_FLAG_PIXMUX	(1<<11)
+#define DRM_MODE_FLAG_DBLCLK	(1<<12)
+#define DRM_MODE_FLAG_CLKDIV2	(1<<13)
+
+/* DPMS flags */
+/* bit compatible with the xorg definitions. */
+#define DRM_MODE_DPMS_ON 0
+#define DRM_MODE_DPMS_STANDBY 1
+#define DRM_MODE_DPMS_SUSPEND 2
+#define DRM_MODE_DPMS_OFF 3
+
+/* Scaling mode options */
+#define DRM_MODE_SCALE_NON_GPU 0
+#define DRM_MODE_SCALE_FULLSCREEN 1
+#define DRM_MODE_SCALE_NO_SCALE 2
+#define DRM_MODE_SCALE_ASPECT 3
+
+/* Dithering mode options */
+#define DRM_MODE_DITHERING_OFF 0
+#define DRM_MODE_DITHERING_ON 1
+
+struct drm_mode_modeinfo {
+	unsigned int clock;
+	unsigned short hdisplay, hsync_start, hsync_end, htotal, hskew;
+	unsigned short vdisplay, vsync_start, vsync_end, vtotal, vscan;
+
+	unsigned int vrefresh; /* vertical refresh * 1000 */
+
+	unsigned int flags;
+	unsigned int type;
+	char name[DRM_DISPLAY_MODE_LEN];
+};
+
+struct drm_mode_card_res {
+	uint64_t fb_id_ptr;
+	uint64_t crtc_id_ptr;
+	uint64_t connector_id_ptr;
+	uint64_t encoder_id_ptr;
+	int count_fbs;
+	int count_crtcs;
+	int count_connectors;
+	int count_encoders;
+	int min_width, max_width;
+	int min_height, max_height;
+};
+
+struct drm_mode_crtc {
+	uint64_t set_connectors_ptr;
+	int count_connectors;
+
+	unsigned int crtc_id; /**< Id */
+	unsigned int fb_id; /**< Id of framebuffer */
+
+	int x, y; /**< Position on the frameuffer */
+
+	uint32_t gamma_size;
+	int mode_valid;
+	struct drm_mode_modeinfo mode;
+};
+
+#define DRM_MODE_ENCODER_NONE 0
+#define DRM_MODE_ENCODER_DAC  1
+#define DRM_MODE_ENCODER_TMDS 2
+#define DRM_MODE_ENCODER_LVDS 3
+#define DRM_MODE_ENCODER_TVDAC 4
+
+struct drm_mode_get_encoder {
+	unsigned int encoder_id;
+	unsigned int encoder_type;
+
+	unsigned int crtc_id; /**< Id of crtc */
+
+	uint32_t possible_crtcs;
+	uint32_t possible_clones;
+};
+
+/* This is for connectors with multiple signal types. */
+/* Try to match DRM_MODE_CONNECTOR_X as closely as possible. */
+#define DRM_MODE_SUBCONNECTOR_Automatic 0
+#define DRM_MODE_SUBCONNECTOR_Unknown 0
+#define DRM_MODE_SUBCONNECTOR_DVID 3
+#define DRM_MODE_SUBCONNECTOR_DVIA 4
+#define DRM_MODE_SUBCONNECTOR_Composite 5
+#define DRM_MODE_SUBCONNECTOR_SVIDEO 6
+#define DRM_MODE_SUBCONNECTOR_Component 8
+
+#define DRM_MODE_CONNECTOR_Unknown 0
+#define DRM_MODE_CONNECTOR_VGA 1
+#define DRM_MODE_CONNECTOR_DVII 2
+#define DRM_MODE_CONNECTOR_DVID 3
+#define DRM_MODE_CONNECTOR_DVIA 4
+#define DRM_MODE_CONNECTOR_Composite 5
+#define DRM_MODE_CONNECTOR_SVIDEO 6
+#define DRM_MODE_CONNECTOR_LVDS 7
+#define DRM_MODE_CONNECTOR_Component 8
+#define DRM_MODE_CONNECTOR_9PinDIN 9
+#define DRM_MODE_CONNECTOR_DisplayPort 10
+#define DRM_MODE_CONNECTOR_HDMIA 11
+#define DRM_MODE_CONNECTOR_HDMIB 12
+
+struct drm_mode_get_connector {
+
+	uint64_t encoders_ptr;
+	uint64_t modes_ptr;
+	uint64_t props_ptr;
+	uint64_t prop_values_ptr;
+
+	int count_modes;
+	int count_props;
+	int count_encoders;
+
+	unsigned int encoder_id; /**< Current Encoder */
+	unsigned int connector_id; /**< Id */
+	unsigned int connector_type;
+	unsigned int connector_type_id;
+
+	unsigned int connection;
+	unsigned int mm_width, mm_height; /**< HxW in millimeters */
+	unsigned int subpixel;
+};
+
+#define DRM_MODE_PROP_PENDING (1<<0)
+#define DRM_MODE_PROP_RANGE (1<<1)
+#define DRM_MODE_PROP_IMMUTABLE (1<<2)
+#define DRM_MODE_PROP_ENUM (1<<3) /* enumerated type with text strings */
+#define DRM_MODE_PROP_BLOB (1<<4)
+
+struct drm_mode_property_enum {
+	uint64_t value;
+	unsigned char name[DRM_PROP_NAME_LEN];
+};
+
+struct drm_mode_get_property {
+	uint64_t values_ptr; /* values and blob lengths */
+	uint64_t enum_blob_ptr; /* enum and blob id ptrs */
+
+	unsigned int prop_id;
+	unsigned int flags;
+	unsigned char name[DRM_PROP_NAME_LEN];
+
+	int count_values;
+	int count_enum_blobs;
+};
+
+struct drm_mode_connector_set_property {
+	uint64_t value;
+	unsigned int prop_id;
+	unsigned int connector_id;
+};
+
+struct drm_mode_get_blob {
+	uint32_t blob_id;
+	uint32_t length;
+	uint64_t data;
+};
+
+struct drm_mode_fb_cmd {
+	unsigned int buffer_id;
+	unsigned int width, height;
+	unsigned int pitch;
+	unsigned int bpp;
+	unsigned int depth;
+
+	unsigned int handle;
+};
+
+struct drm_mode_mode_cmd {
+	unsigned int connector_id;
+	struct drm_mode_modeinfo mode;
+};
+
+#define DRM_MODE_CURSOR_BO   0x01
+#define DRM_MODE_CURSOR_MOVE 0x02
+
+/*
+ * depending on the value in flags diffrent members are used.
+ *
+ * CURSOR_BO uses
+ *    crtc
+ *    width
+ *    height
+ *    handle - if 0 turns the cursor of
+ *
+ * CURSOR_MOVE uses
+ *    crtc
+ *    x
+ *    y
+ */
+struct drm_mode_cursor {
+	unsigned int flags;
+	unsigned int crtc;
+	int x;
+	int y;
+	uint32_t width;
+	uint32_t height;
+	unsigned int handle;
+};
+
+/*
+ * oh so ugly hotplug
+ */
+struct drm_mode_hotplug {
+	uint32_t counter;
+};
+
+struct drm_mode_crtc_lut {
+
+	uint32_t crtc_id;
+	uint32_t gamma_size;
+
+	/* pointers to arrays */
+	uint64_t red;
+	uint64_t green;
+	uint64_t blue;
+};
+
+#endif
diff --git a/include/linux/console.h b/include/linux/console.h
index 248e6e3b9b73..a67a90cf8268 100644
--- a/include/linux/console.h
+++ b/include/linux/console.h
@@ -153,4 +153,8 @@ void vcs_remove_sysfs(struct tty_struct *tty);
 #define VESA_HSYNC_SUSPEND      2
 #define VESA_POWERDOWN          3
 
+#ifdef CONFIG_VGA_CONSOLE
+extern bool vgacon_text_force(void);
+#endif
+
 #endif /* _LINUX_CONSOLE_H */
-- 
cgit v1.2.3


From 79e539453b34e35f39299a899d263b0a1f1670bd Mon Sep 17 00:00:00 2001
From: Jesse Barnes <jbarnes@virtuousgeek.org>
Date: Fri, 7 Nov 2008 14:24:08 -0800
Subject: DRM: i915: add mode setting support

This commit adds i915 driver support for the DRM mode setting APIs.
Currently, VGA, LVDS, SDVO DVI & VGA, TV and DVO LVDS outputs are
supported.  HDMI, DisplayPort and additional SDVO output support will
follow.

Support for the mode setting code is controlled by the new 'modeset'
module option.  A new config option, CONFIG_DRM_I915_KMS controls the
default behavior, and whether a PCI ID list is built into the module for
use by user level module utilities.

Note that if mode setting is enabled, user level drivers that access
display registers directly or that don't use the kernel graphics memory
manager will likely corrupt kernel graphics memory, disrupt output
configuration (possibly leading to hangs and/or blank displays), and
prevent panic/oops messages from appearing.  So use caution when
enabling this code; be sure your user level code supports the new
interfaces.

A new SysRq key, 'g', provides emergency support for switching back to
the kernel's framebuffer console; which is useful for testing.

Co-authors: Dave Airlie <airlied@linux.ie>, Hong Liu <hong.liu@intel.com>

Signed-off-by: Jesse Barnes <jbarnes@virtuousgeek.org>
Signed-off-by: Eric Anholt <eric@anholt.net>
Signed-off-by: Dave Airlie <airlied@redhat.com>
---
 drivers/gpu/drm/Kconfig                |   11 +
 drivers/gpu/drm/i915/Makefile          |   17 +-
 drivers/gpu/drm/i915/dvo.h             |  151 +++
 drivers/gpu/drm/i915/dvo_ch7017.c      |  454 +++++++++
 drivers/gpu/drm/i915/dvo_ch7xxx.c      |  368 +++++++
 drivers/gpu/drm/i915/dvo_ivch.c        |  442 ++++++++
 drivers/gpu/drm/i915/dvo_sil164.c      |  302 ++++++
 drivers/gpu/drm/i915/dvo_tfp410.c      |  335 +++++++
 drivers/gpu/drm/i915/i915_dma.c        |  258 ++++-
 drivers/gpu/drm/i915/i915_drv.c        |   33 +
 drivers/gpu/drm/i915/i915_drv.h        |   45 +
 drivers/gpu/drm/i915/i915_gem.c        |  132 ++-
 drivers/gpu/drm/i915/i915_irq.c        |   21 +
 drivers/gpu/drm/i915/intel_bios.c      |  193 ++++
 drivers/gpu/drm/i915/intel_bios.h      |  405 ++++++++
 drivers/gpu/drm/i915/intel_crt.c       |  284 ++++++
 drivers/gpu/drm/i915/intel_display.c   | 1621 ++++++++++++++++++++++++++++++
 drivers/gpu/drm/i915/intel_drv.h       |  146 +++
 drivers/gpu/drm/i915/intel_dvo.c       |  501 ++++++++++
 drivers/gpu/drm/i915/intel_fb.c        |  926 +++++++++++++++++
 drivers/gpu/drm/i915/intel_i2c.c       |  184 ++++
 drivers/gpu/drm/i915/intel_lvds.c      |  525 ++++++++++
 drivers/gpu/drm/i915/intel_modes.c     |   83 ++
 drivers/gpu/drm/i915/intel_sdvo.c      | 1127 +++++++++++++++++++++
 drivers/gpu/drm/i915/intel_sdvo_regs.h |  327 ++++++
 drivers/gpu/drm/i915/intel_tv.c        | 1725 ++++++++++++++++++++++++++++++++
 include/drm/i915_drm.h                 |    2 +-
 27 files changed, 10567 insertions(+), 51 deletions(-)
 create mode 100644 drivers/gpu/drm/i915/dvo.h
 create mode 100644 drivers/gpu/drm/i915/dvo_ch7017.c
 create mode 100644 drivers/gpu/drm/i915/dvo_ch7xxx.c
 create mode 100644 drivers/gpu/drm/i915/dvo_ivch.c
 create mode 100644 drivers/gpu/drm/i915/dvo_sil164.c
 create mode 100644 drivers/gpu/drm/i915/dvo_tfp410.c
 create mode 100644 drivers/gpu/drm/i915/intel_bios.c
 create mode 100644 drivers/gpu/drm/i915/intel_bios.h
 create mode 100644 drivers/gpu/drm/i915/intel_crt.c
 create mode 100644 drivers/gpu/drm/i915/intel_display.c
 create mode 100644 drivers/gpu/drm/i915/intel_drv.h
 create mode 100644 drivers/gpu/drm/i915/intel_dvo.c
 create mode 100644 drivers/gpu/drm/i915/intel_fb.c
 create mode 100644 drivers/gpu/drm/i915/intel_i2c.c
 create mode 100644 drivers/gpu/drm/i915/intel_lvds.c
 create mode 100644 drivers/gpu/drm/i915/intel_modes.c
 create mode 100644 drivers/gpu/drm/i915/intel_sdvo.c
 create mode 100644 drivers/gpu/drm/i915/intel_sdvo_regs.h
 create mode 100644 drivers/gpu/drm/i915/intel_tv.c

(limited to 'include')

diff --git a/drivers/gpu/drm/Kconfig b/drivers/gpu/drm/Kconfig
index a8b33c2ec8d2..5f26d6c840de 100644
--- a/drivers/gpu/drm/Kconfig
+++ b/drivers/gpu/drm/Kconfig
@@ -76,6 +76,17 @@ config DRM_I915
 
 endchoice
 
+config DRM_I915_KMS
+	bool "Enable modesetting on intel by default"
+	depends on DRM_I915
+	help
+	Choose this option if you want kernel modesetting enabled by default,
+	and you have a new enough userspace to support this. Running old
+	userspaces with this enabled will cause pain.  Note that this causes
+	the driver to bind to PCI devices, which precludes loading things
+	like intelfb.
+
+
 config DRM_MGA
 	tristate "Matrox g200/g400"
 	depends on DRM
diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile
index d8fb5d8ee7ea..dd57a5bd4572 100644
--- a/drivers/gpu/drm/i915/Makefile
+++ b/drivers/gpu/drm/i915/Makefile
@@ -8,7 +8,22 @@ i915-y := i915_drv.o i915_dma.o i915_irq.o i915_mem.o \
 	  i915_gem.o \
 	  i915_gem_debug.o \
 	  i915_gem_proc.o \
-	  i915_gem_tiling.o
+	  i915_gem_tiling.o \
+	  intel_display.o \
+	  intel_crt.o \
+	  intel_lvds.o \
+	  intel_bios.o \
+	  intel_sdvo.o \
+	  intel_modes.o \
+	  intel_i2c.o \
+	  intel_fb.o \
+	  intel_tv.o \
+	  intel_dvo.o \
+	  dvo_ch7xxx.o \
+	  dvo_ch7017.o \
+	  dvo_ivch.o \
+	  dvo_tfp410.o \
+	  dvo_sil164.o
 
 i915-$(CONFIG_ACPI)	+= i915_opregion.o
 i915-$(CONFIG_COMPAT)   += i915_ioc32.o
diff --git a/drivers/gpu/drm/i915/dvo.h b/drivers/gpu/drm/i915/dvo.h
new file mode 100644
index 000000000000..e80866c5e1ae
--- /dev/null
+++ b/drivers/gpu/drm/i915/dvo.h
@@ -0,0 +1,151 @@
+/*
+ * Copyright © 2006 Eric Anholt
+ *
+ * Permission to use, copy, modify, distribute, and sell this software and its
+ * documentation for any purpose is hereby granted without fee, provided that
+ * the above copyright notice appear in all copies and that both that copyright
+ * notice and this permission notice appear in supporting documentation, and
+ * that the name of the copyright holders not be used in advertising or
+ * publicity pertaining to distribution of the software without specific,
+ * written prior permission.  The copyright holders make no representations
+ * about the suitability of this software for any purpose.  It is provided "as
+ * is" without express or implied warranty.
+ *
+ * THE COPYRIGHT HOLDERS DISCLAIM ALL WARRANTIES WITH REGARD TO THIS SOFTWARE,
+ * INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO
+ * EVENT SHALL THE COPYRIGHT HOLDERS BE LIABLE FOR ANY SPECIAL, INDIRECT OR
+ * CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE,
+ * DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER
+ * TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE
+ * OF THIS SOFTWARE.
+ */
+
+#ifndef _INTEL_DVO_H
+#define _INTEL_DVO_H
+
+#include <linux/i2c.h>
+#include "drmP.h"
+#include "drm.h"
+#include "drm_crtc.h"
+#include "intel_drv.h"
+
+struct intel_dvo_device {
+	char *name;
+	int type;
+	/* DVOA/B/C output register */
+	u32 dvo_reg;
+	/* GPIO register used for i2c bus to control this device */
+	u32 gpio;
+	int slave_addr;
+	struct intel_i2c_chan *i2c_bus;
+
+	const struct intel_dvo_dev_ops *dev_ops;
+	void *dev_priv;
+
+	struct drm_display_mode *panel_fixed_mode;
+	bool panel_wants_dither;
+};
+
+struct intel_dvo_dev_ops {
+	/*
+	 * Initialize the device at startup time.
+	 * Returns NULL if the device does not exist.
+	 */
+	bool (*init)(struct intel_dvo_device *dvo,
+		     struct intel_i2c_chan *i2cbus);
+
+	/*
+	 * Called to allow the output a chance to create properties after the
+	 * RandR objects have been created.
+	 */
+	void (*create_resources)(struct intel_dvo_device *dvo);
+
+	/*
+	 * Turn on/off output or set intermediate power levels if available.
+	 *
+	 * Unsupported intermediate modes drop to the lower power setting.
+	 * If the  mode is DPMSModeOff, the output must be disabled,
+	 * as the DPLL may be disabled afterwards.
+	 */
+	void (*dpms)(struct intel_dvo_device *dvo, int mode);
+
+	/*
+	 * Saves the output's state for restoration on VT switch.
+	 */
+	void (*save)(struct intel_dvo_device *dvo);
+
+	/*
+	 * Restore's the output's state at VT switch.
+	 */
+	void (*restore)(struct intel_dvo_device *dvo);
+
+	/*
+	 * Callback for testing a video mode for a given output.
+	 *
+	 * This function should only check for cases where a mode can't
+	 * be supported on the output specifically, and not represent
+	 * generic CRTC limitations.
+	 *
+	 * \return MODE_OK if the mode is valid, or another MODE_* otherwise.
+	 */
+	int (*mode_valid)(struct intel_dvo_device *dvo,
+			  struct drm_display_mode *mode);
+
+	/*
+	 * Callback to adjust the mode to be set in the CRTC.
+	 *
+	 * This allows an output to adjust the clock or even the entire set of
+	 * timings, which is used for panels with fixed timings or for
+	 * buses with clock limitations.
+	 */
+	bool (*mode_fixup)(struct intel_dvo_device *dvo,
+			   struct drm_display_mode *mode,
+			   struct drm_display_mode *adjusted_mode);
+
+	/*
+	 * Callback for preparing mode changes on an output
+	 */
+	void (*prepare)(struct intel_dvo_device *dvo);
+
+	/*
+	 * Callback for committing mode changes on an output
+	 */
+	void (*commit)(struct intel_dvo_device *dvo);
+
+	/*
+	 * Callback for setting up a video mode after fixups have been made.
+	 *
+	 * This is only called while the output is disabled.  The dpms callback
+	 * must be all that's necessary for the output, to turn the output on
+	 * after this function is called.
+	 */
+	void (*mode_set)(struct intel_dvo_device *dvo,
+			 struct drm_display_mode *mode,
+			 struct drm_display_mode *adjusted_mode);
+
+	/*
+	 * Probe for a connected output, and return detect_status.
+	 */
+	enum drm_connector_status (*detect)(struct intel_dvo_device *dvo);
+
+	/**
+	 * Query the device for the modes it provides.
+	 *
+	 * This function may also update MonInfo, mm_width, and mm_height.
+	 *
+	 * \return singly-linked list of modes or NULL if no modes found.
+	 */
+	struct drm_display_mode *(*get_modes)(struct intel_dvo_device *dvo);
+
+	/**
+	 * Clean up driver-specific bits of the output
+	 */
+	void (*destroy) (struct intel_dvo_device *dvo);
+
+	/**
+	 * Debugging hook to dump device registers to log file
+	 */
+	void (*dump_regs)(struct intel_dvo_device *dvo);
+};
+
+#endif /* _INTEL_DVO_H */
diff --git a/drivers/gpu/drm/i915/dvo_ch7017.c b/drivers/gpu/drm/i915/dvo_ch7017.c
new file mode 100644
index 000000000000..03d4b4973b02
--- /dev/null
+++ b/drivers/gpu/drm/i915/dvo_ch7017.c
@@ -0,0 +1,454 @@
+/*
+ * Copyright © 2006 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ *
+ * Authors:
+ *    Eric Anholt <eric@anholt.net>
+ *
+ */
+
+#include "dvo.h"
+
+#define CH7017_TV_DISPLAY_MODE		0x00
+#define CH7017_FLICKER_FILTER		0x01
+#define CH7017_VIDEO_BANDWIDTH		0x02
+#define CH7017_TEXT_ENHANCEMENT		0x03
+#define CH7017_START_ACTIVE_VIDEO	0x04
+#define CH7017_HORIZONTAL_POSITION	0x05
+#define CH7017_VERTICAL_POSITION	0x06
+#define CH7017_BLACK_LEVEL		0x07
+#define CH7017_CONTRAST_ENHANCEMENT	0x08
+#define CH7017_TV_PLL			0x09
+#define CH7017_TV_PLL_M			0x0a
+#define CH7017_TV_PLL_N			0x0b
+#define CH7017_SUB_CARRIER_0		0x0c
+#define CH7017_CIV_CONTROL		0x10
+#define CH7017_CIV_0			0x11
+#define CH7017_CHROMA_BOOST		0x14
+#define CH7017_CLOCK_MODE		0x1c
+#define CH7017_INPUT_CLOCK		0x1d
+#define CH7017_GPIO_CONTROL		0x1e
+#define CH7017_INPUT_DATA_FORMAT	0x1f
+#define CH7017_CONNECTION_DETECT	0x20
+#define CH7017_DAC_CONTROL		0x21
+#define CH7017_BUFFERED_CLOCK_OUTPUT	0x22
+#define CH7017_DEFEAT_VSYNC		0x47
+#define CH7017_TEST_PATTERN		0x48
+
+#define CH7017_POWER_MANAGEMENT		0x49
+/** Enables the TV output path. */
+#define CH7017_TV_EN			(1 << 0)
+#define CH7017_DAC0_POWER_DOWN		(1 << 1)
+#define CH7017_DAC1_POWER_DOWN		(1 << 2)
+#define CH7017_DAC2_POWER_DOWN		(1 << 3)
+#define CH7017_DAC3_POWER_DOWN		(1 << 4)
+/** Powers down the TV out block, and DAC0-3 */
+#define CH7017_TV_POWER_DOWN_EN		(1 << 5)
+
+#define CH7017_VERSION_ID		0x4a
+
+#define CH7017_DEVICE_ID		0x4b
+#define CH7017_DEVICE_ID_VALUE		0x1b
+#define CH7018_DEVICE_ID_VALUE		0x1a
+#define CH7019_DEVICE_ID_VALUE		0x19
+
+#define CH7017_XCLK_D2_ADJUST		0x53
+#define CH7017_UP_SCALER_COEFF_0	0x55
+#define CH7017_UP_SCALER_COEFF_1	0x56
+#define CH7017_UP_SCALER_COEFF_2	0x57
+#define CH7017_UP_SCALER_COEFF_3	0x58
+#define CH7017_UP_SCALER_COEFF_4	0x59
+#define CH7017_UP_SCALER_VERTICAL_INC_0	0x5a
+#define CH7017_UP_SCALER_VERTICAL_INC_1	0x5b
+#define CH7017_GPIO_INVERT		0x5c
+#define CH7017_UP_SCALER_HORIZONTAL_INC_0	0x5d
+#define CH7017_UP_SCALER_HORIZONTAL_INC_1	0x5e
+
+#define CH7017_HORIZONTAL_ACTIVE_PIXEL_INPUT	0x5f
+/**< Low bits of horizontal active pixel input */
+
+#define CH7017_ACTIVE_INPUT_LINE_OUTPUT	0x60
+/** High bits of horizontal active pixel input */
+#define CH7017_LVDS_HAP_INPUT_MASK	(0x7 << 0)
+/** High bits of vertical active line output */
+#define CH7017_LVDS_VAL_HIGH_MASK	(0x7 << 3)
+
+#define CH7017_VERTICAL_ACTIVE_LINE_OUTPUT	0x61
+/**< Low bits of vertical active line output */
+
+#define CH7017_HORIZONTAL_ACTIVE_PIXEL_OUTPUT	0x62
+/**< Low bits of horizontal active pixel output */
+
+#define CH7017_LVDS_POWER_DOWN		0x63
+/** High bits of horizontal active pixel output */
+#define CH7017_LVDS_HAP_HIGH_MASK	(0x7 << 0)
+/** Enables the LVDS power down state transition */
+#define CH7017_LVDS_POWER_DOWN_EN	(1 << 6)
+/** Enables the LVDS upscaler */
+#define CH7017_LVDS_UPSCALER_EN		(1 << 7)
+#define CH7017_LVDS_POWER_DOWN_DEFAULT_RESERVED 0x08
+
+#define CH7017_LVDS_ENCODING		0x64
+#define CH7017_LVDS_DITHER_2D		(1 << 2)
+#define CH7017_LVDS_DITHER_DIS		(1 << 3)
+#define CH7017_LVDS_DUAL_CHANNEL_EN	(1 << 4)
+#define CH7017_LVDS_24_BIT		(1 << 5)
+
+#define CH7017_LVDS_ENCODING_2		0x65
+
+#define CH7017_LVDS_PLL_CONTROL		0x66
+/** Enables the LVDS panel output path */
+#define CH7017_LVDS_PANEN		(1 << 0)
+/** Enables the LVDS panel backlight */
+#define CH7017_LVDS_BKLEN		(1 << 3)
+
+#define CH7017_POWER_SEQUENCING_T1	0x67
+#define CH7017_POWER_SEQUENCING_T2	0x68
+#define CH7017_POWER_SEQUENCING_T3	0x69
+#define CH7017_POWER_SEQUENCING_T4	0x6a
+#define CH7017_POWER_SEQUENCING_T5	0x6b
+#define CH7017_GPIO_DRIVER_TYPE		0x6c
+#define CH7017_GPIO_DATA		0x6d
+#define CH7017_GPIO_DIRECTION_CONTROL	0x6e
+
+#define CH7017_LVDS_PLL_FEEDBACK_DIV	0x71
+# define CH7017_LVDS_PLL_FEED_BACK_DIVIDER_SHIFT 4
+# define CH7017_LVDS_PLL_FEED_FORWARD_DIVIDER_SHIFT 0
+# define CH7017_LVDS_PLL_FEEDBACK_DEFAULT_RESERVED 0x80
+
+#define CH7017_LVDS_PLL_VCO_CONTROL	0x72
+# define CH7017_LVDS_PLL_VCO_DEFAULT_RESERVED 0x80
+# define CH7017_LVDS_PLL_VCO_SHIFT	4
+# define CH7017_LVDS_PLL_POST_SCALE_DIV_SHIFT 0
+
+#define CH7017_OUTPUTS_ENABLE		0x73
+# define CH7017_CHARGE_PUMP_LOW		0x0
+# define CH7017_CHARGE_PUMP_HIGH	0x3
+# define CH7017_LVDS_CHANNEL_A		(1 << 3)
+# define CH7017_LVDS_CHANNEL_B		(1 << 4)
+# define CH7017_TV_DAC_A		(1 << 5)
+# define CH7017_TV_DAC_B		(1 << 6)
+# define CH7017_DDC_SELECT_DC2		(1 << 7)
+
+#define CH7017_LVDS_OUTPUT_AMPLITUDE	0x74
+#define CH7017_LVDS_PLL_EMI_REDUCTION	0x75
+#define CH7017_LVDS_POWER_DOWN_FLICKER	0x76
+
+#define CH7017_LVDS_CONTROL_2		0x78
+# define CH7017_LOOP_FILTER_SHIFT	5
+# define CH7017_PHASE_DETECTOR_SHIFT	0
+
+#define CH7017_BANG_LIMIT_CONTROL	0x7f
+
+struct ch7017_priv {
+	uint8_t save_hapi;
+	uint8_t save_vali;
+	uint8_t save_valo;
+	uint8_t save_ailo;
+	uint8_t save_lvds_pll_vco;
+	uint8_t save_feedback_div;
+	uint8_t save_lvds_control_2;
+	uint8_t save_outputs_enable;
+	uint8_t save_lvds_power_down;
+	uint8_t save_power_management;
+};
+
+static void ch7017_dump_regs(struct intel_dvo_device *dvo);
+static void ch7017_dpms(struct intel_dvo_device *dvo, int mode);
+
+static bool ch7017_read(struct intel_dvo_device *dvo, int addr, uint8_t *val)
+{
+	struct intel_i2c_chan *i2cbus = dvo->i2c_bus;
+	u8 out_buf[2];
+	u8 in_buf[2];
+
+	struct i2c_msg msgs[] = {
+		{
+			.addr = i2cbus->slave_addr,
+			.flags = 0,
+			.len = 1,
+			.buf = out_buf,
+		},
+		{
+			.addr = i2cbus->slave_addr,
+			.flags = I2C_M_RD,
+			.len = 1,
+			.buf = in_buf,
+		}
+	};
+
+	out_buf[0] = addr;
+	out_buf[1] = 0;
+
+	if (i2c_transfer(&i2cbus->adapter, msgs, 2) == 2) {
+		*val= in_buf[0];
+		return true;
+	};
+
+	return false;
+}
+
+static bool ch7017_write(struct intel_dvo_device *dvo, int addr, uint8_t val)
+{
+	struct intel_i2c_chan *i2cbus = dvo->i2c_bus;
+	uint8_t out_buf[2];
+	struct i2c_msg msg = {
+		.addr = i2cbus->slave_addr,
+		.flags = 0,
+		.len = 2,
+		.buf = out_buf,
+	};
+
+	out_buf[0] = addr;
+	out_buf[1] = val;
+
+	if (i2c_transfer(&i2cbus->adapter, &msg, 1) == 1)
+		return true;
+
+	return false;
+}
+
+/** Probes for a CH7017 on the given bus and slave address. */
+static bool ch7017_init(struct intel_dvo_device *dvo,
+			struct intel_i2c_chan *i2cbus)
+{
+	struct ch7017_priv *priv;
+	uint8_t val;
+
+	priv = kzalloc(sizeof(struct ch7017_priv), GFP_KERNEL);
+	if (priv == NULL)
+		return false;
+
+	dvo->i2c_bus = i2cbus;
+	dvo->i2c_bus->slave_addr = dvo->slave_addr;
+	dvo->dev_priv = priv;
+
+	if (!ch7017_read(dvo, CH7017_DEVICE_ID, &val))
+		goto fail;
+
+	if (val != CH7017_DEVICE_ID_VALUE &&
+	    val != CH7018_DEVICE_ID_VALUE &&
+	    val != CH7019_DEVICE_ID_VALUE) {
+		DRM_DEBUG("ch701x not detected, got %d: from %s Slave %d.\n",
+			  val, i2cbus->adapter.name,i2cbus->slave_addr);
+		goto fail;
+	}
+
+	return true;
+fail:
+	kfree(priv);
+	return false;
+}
+
+static enum drm_connector_status ch7017_detect(struct intel_dvo_device *dvo)
+{
+	return connector_status_unknown;
+}
+
+static enum drm_mode_status ch7017_mode_valid(struct intel_dvo_device *dvo,
+					      struct drm_display_mode *mode)
+{
+	if (mode->clock > 160000)
+		return MODE_CLOCK_HIGH;
+
+	return MODE_OK;
+}
+
+static void ch7017_mode_set(struct intel_dvo_device *dvo,
+			    struct drm_display_mode *mode,
+			    struct drm_display_mode *adjusted_mode)
+{
+	uint8_t lvds_pll_feedback_div, lvds_pll_vco_control;
+	uint8_t outputs_enable, lvds_control_2, lvds_power_down;
+	uint8_t horizontal_active_pixel_input;
+	uint8_t horizontal_active_pixel_output, vertical_active_line_output;
+	uint8_t active_input_line_output;
+
+	DRM_DEBUG("Registers before mode setting\n");
+	ch7017_dump_regs(dvo);
+
+	/* LVDS PLL settings from page 75 of 7017-7017ds.pdf*/
+	if (mode->clock < 100000) {
+		outputs_enable = CH7017_LVDS_CHANNEL_A | CH7017_CHARGE_PUMP_LOW;
+		lvds_pll_feedback_div = CH7017_LVDS_PLL_FEEDBACK_DEFAULT_RESERVED |
+			(2 << CH7017_LVDS_PLL_FEED_BACK_DIVIDER_SHIFT) |
+			(13 << CH7017_LVDS_PLL_FEED_FORWARD_DIVIDER_SHIFT);
+		lvds_pll_vco_control = CH7017_LVDS_PLL_VCO_DEFAULT_RESERVED |
+			(2 << CH7017_LVDS_PLL_VCO_SHIFT) |
+			(3 << CH7017_LVDS_PLL_POST_SCALE_DIV_SHIFT);
+		lvds_control_2 = (1 << CH7017_LOOP_FILTER_SHIFT) |
+			(0 << CH7017_PHASE_DETECTOR_SHIFT);
+	} else {
+		outputs_enable = CH7017_LVDS_CHANNEL_A | CH7017_CHARGE_PUMP_HIGH;
+		lvds_pll_feedback_div = CH7017_LVDS_PLL_FEEDBACK_DEFAULT_RESERVED |
+			(2 << CH7017_LVDS_PLL_FEED_BACK_DIVIDER_SHIFT) |
+			(3 << CH7017_LVDS_PLL_FEED_FORWARD_DIVIDER_SHIFT);
+		lvds_pll_feedback_div = 35;
+		lvds_control_2 = (3 << CH7017_LOOP_FILTER_SHIFT) |
+			(0 << CH7017_PHASE_DETECTOR_SHIFT);
+		if (1) { /* XXX: dual channel panel detection.  Assume yes for now. */
+			outputs_enable |= CH7017_LVDS_CHANNEL_B;
+			lvds_pll_vco_control = CH7017_LVDS_PLL_VCO_DEFAULT_RESERVED |
+				(2 << CH7017_LVDS_PLL_VCO_SHIFT) |
+				(13 << CH7017_LVDS_PLL_POST_SCALE_DIV_SHIFT);
+		} else {
+			lvds_pll_vco_control = CH7017_LVDS_PLL_VCO_DEFAULT_RESERVED |
+				(1 << CH7017_LVDS_PLL_VCO_SHIFT) |
+				(13 << CH7017_LVDS_PLL_POST_SCALE_DIV_SHIFT);
+		}
+	}
+
+	horizontal_active_pixel_input = mode->hdisplay & 0x00ff;
+
+	vertical_active_line_output = mode->vdisplay & 0x00ff;
+	horizontal_active_pixel_output = mode->hdisplay & 0x00ff;
+
+	active_input_line_output = ((mode->hdisplay & 0x0700) >> 8) |
+				   (((mode->vdisplay & 0x0700) >> 8) << 3);
+
+	lvds_power_down = CH7017_LVDS_POWER_DOWN_DEFAULT_RESERVED |
+			  (mode->hdisplay & 0x0700) >> 8;
+
+	ch7017_dpms(dvo, DRM_MODE_DPMS_OFF);
+	ch7017_write(dvo, CH7017_HORIZONTAL_ACTIVE_PIXEL_INPUT,
+			horizontal_active_pixel_input);
+	ch7017_write(dvo, CH7017_HORIZONTAL_ACTIVE_PIXEL_OUTPUT,
+			horizontal_active_pixel_output);
+	ch7017_write(dvo, CH7017_VERTICAL_ACTIVE_LINE_OUTPUT,
+			vertical_active_line_output);
+	ch7017_write(dvo, CH7017_ACTIVE_INPUT_LINE_OUTPUT,
+			active_input_line_output);
+	ch7017_write(dvo, CH7017_LVDS_PLL_VCO_CONTROL, lvds_pll_vco_control);
+	ch7017_write(dvo, CH7017_LVDS_PLL_FEEDBACK_DIV, lvds_pll_feedback_div);
+	ch7017_write(dvo, CH7017_LVDS_CONTROL_2, lvds_control_2);
+	ch7017_write(dvo, CH7017_OUTPUTS_ENABLE, outputs_enable);
+
+	/* Turn the LVDS back on with new settings. */
+	ch7017_write(dvo, CH7017_LVDS_POWER_DOWN, lvds_power_down);
+
+	DRM_DEBUG("Registers after mode setting\n");
+	ch7017_dump_regs(dvo);
+}
+
+/* set the CH7017 power state */
+static void ch7017_dpms(struct intel_dvo_device *dvo, int mode)
+{
+	uint8_t val;
+
+	ch7017_read(dvo, CH7017_LVDS_POWER_DOWN, &val);
+
+	/* Turn off TV/VGA, and never turn it on since we don't support it. */
+	ch7017_write(dvo, CH7017_POWER_MANAGEMENT,
+			CH7017_DAC0_POWER_DOWN |
+			CH7017_DAC1_POWER_DOWN |
+			CH7017_DAC2_POWER_DOWN |
+			CH7017_DAC3_POWER_DOWN |
+			CH7017_TV_POWER_DOWN_EN);
+
+	if (mode == DRM_MODE_DPMS_ON) {
+		/* Turn on the LVDS */
+		ch7017_write(dvo, CH7017_LVDS_POWER_DOWN,
+			     val & ~CH7017_LVDS_POWER_DOWN_EN);
+	} else {
+		/* Turn off the LVDS */
+		ch7017_write(dvo, CH7017_LVDS_POWER_DOWN,
+			     val | CH7017_LVDS_POWER_DOWN_EN);
+	}
+
+	/* XXX: Should actually wait for update power status somehow */
+	udelay(20000);
+}
+
+static void ch7017_dump_regs(struct intel_dvo_device *dvo)
+{
+	uint8_t val;
+
+#define DUMP(reg)					\
+do {							\
+	ch7017_read(dvo, reg, &val);			\
+	DRM_DEBUG(#reg ": %02x\n", val);		\
+} while (0)
+
+	DUMP(CH7017_HORIZONTAL_ACTIVE_PIXEL_INPUT);
+	DUMP(CH7017_HORIZONTAL_ACTIVE_PIXEL_OUTPUT);
+	DUMP(CH7017_VERTICAL_ACTIVE_LINE_OUTPUT);
+	DUMP(CH7017_ACTIVE_INPUT_LINE_OUTPUT);
+	DUMP(CH7017_LVDS_PLL_VCO_CONTROL);
+	DUMP(CH7017_LVDS_PLL_FEEDBACK_DIV);
+	DUMP(CH7017_LVDS_CONTROL_2);
+	DUMP(CH7017_OUTPUTS_ENABLE);
+	DUMP(CH7017_LVDS_POWER_DOWN);
+}
+
+static void ch7017_save(struct intel_dvo_device *dvo)
+{
+	struct ch7017_priv *priv = dvo->dev_priv;
+
+	ch7017_read(dvo, CH7017_HORIZONTAL_ACTIVE_PIXEL_INPUT, &priv->save_hapi);
+	ch7017_read(dvo, CH7017_VERTICAL_ACTIVE_LINE_OUTPUT, &priv->save_valo);
+	ch7017_read(dvo, CH7017_ACTIVE_INPUT_LINE_OUTPUT, &priv->save_ailo);
+	ch7017_read(dvo, CH7017_LVDS_PLL_VCO_CONTROL, &priv->save_lvds_pll_vco);
+	ch7017_read(dvo, CH7017_LVDS_PLL_FEEDBACK_DIV, &priv->save_feedback_div);
+	ch7017_read(dvo, CH7017_LVDS_CONTROL_2, &priv->save_lvds_control_2);
+	ch7017_read(dvo, CH7017_OUTPUTS_ENABLE, &priv->save_outputs_enable);
+	ch7017_read(dvo, CH7017_LVDS_POWER_DOWN, &priv->save_lvds_power_down);
+	ch7017_read(dvo, CH7017_POWER_MANAGEMENT, &priv->save_power_management);
+}
+
+static void ch7017_restore(struct intel_dvo_device *dvo)
+{
+	struct ch7017_priv *priv = dvo->dev_priv;
+
+	/* Power down before changing mode */
+	ch7017_dpms(dvo, DRM_MODE_DPMS_OFF);
+
+	ch7017_write(dvo, CH7017_HORIZONTAL_ACTIVE_PIXEL_INPUT, priv->save_hapi);
+	ch7017_write(dvo, CH7017_VERTICAL_ACTIVE_LINE_OUTPUT, priv->save_valo);
+	ch7017_write(dvo, CH7017_ACTIVE_INPUT_LINE_OUTPUT, priv->save_ailo);
+	ch7017_write(dvo, CH7017_LVDS_PLL_VCO_CONTROL, priv->save_lvds_pll_vco);
+	ch7017_write(dvo, CH7017_LVDS_PLL_FEEDBACK_DIV, priv->save_feedback_div);
+	ch7017_write(dvo, CH7017_LVDS_CONTROL_2, priv->save_lvds_control_2);
+	ch7017_write(dvo, CH7017_OUTPUTS_ENABLE, priv->save_outputs_enable);
+	ch7017_write(dvo, CH7017_LVDS_POWER_DOWN, priv->save_lvds_power_down);
+	ch7017_write(dvo, CH7017_POWER_MANAGEMENT, priv->save_power_management);
+}
+
+static void ch7017_destroy(struct intel_dvo_device *dvo)
+{
+	struct ch7017_priv *priv = dvo->dev_priv;
+
+	if (priv) {
+		kfree(priv);
+		dvo->dev_priv = NULL;
+	}
+}
+
+struct intel_dvo_dev_ops ch7017_ops = {
+	.init = ch7017_init,
+	.detect = ch7017_detect,
+	.mode_valid = ch7017_mode_valid,
+	.mode_set = ch7017_mode_set,
+	.dpms = ch7017_dpms,
+	.dump_regs = ch7017_dump_regs,
+	.save = ch7017_save,
+	.restore = ch7017_restore,
+	.destroy = ch7017_destroy,
+};
diff --git a/drivers/gpu/drm/i915/dvo_ch7xxx.c b/drivers/gpu/drm/i915/dvo_ch7xxx.c
new file mode 100644
index 000000000000..d2fd95dbd034
--- /dev/null
+++ b/drivers/gpu/drm/i915/dvo_ch7xxx.c
@@ -0,0 +1,368 @@
+/**************************************************************************
+
+Copyright © 2006 Dave Airlie
+
+All Rights Reserved.
+
+Permission is hereby granted, free of charge, to any person obtaining a
+copy of this software and associated documentation files (the
+"Software"), to deal in the Software without restriction, including
+without limitation the rights to use, copy, modify, merge, publish,
+distribute, sub license, and/or sell copies of the Software, and to
+permit persons to whom the Software is furnished to do so, subject to
+the following conditions:
+
+The above copyright notice and this permission notice (including the
+next paragraph) shall be included in all copies or substantial portions
+of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+**************************************************************************/
+
+#include "dvo.h"
+
+#define CH7xxx_REG_VID		0x4a
+#define CH7xxx_REG_DID		0x4b
+
+#define CH7011_VID		0x83 /* 7010 as well */
+#define CH7009A_VID		0x84
+#define CH7009B_VID		0x85
+#define CH7301_VID		0x95
+
+#define CH7xxx_VID		0x84
+#define CH7xxx_DID		0x17
+
+#define CH7xxx_NUM_REGS		0x4c
+
+#define CH7xxx_CM		0x1c
+#define CH7xxx_CM_XCM		(1<<0)
+#define CH7xxx_CM_MCP		(1<<2)
+#define CH7xxx_INPUT_CLOCK	0x1d
+#define CH7xxx_GPIO		0x1e
+#define CH7xxx_GPIO_HPIR	(1<<3)
+#define CH7xxx_IDF		0x1f
+
+#define CH7xxx_IDF_HSP		(1<<3)
+#define CH7xxx_IDF_VSP		(1<<4)
+
+#define CH7xxx_CONNECTION_DETECT 0x20
+#define CH7xxx_CDET_DVI		(1<<5)
+
+#define CH7301_DAC_CNTL		0x21
+#define CH7301_HOTPLUG		0x23
+#define CH7xxx_TCTL		0x31
+#define CH7xxx_TVCO		0x32
+#define CH7xxx_TPCP		0x33
+#define CH7xxx_TPD		0x34
+#define CH7xxx_TPVT		0x35
+#define CH7xxx_TLPF		0x36
+#define CH7xxx_TCT		0x37
+#define CH7301_TEST_PATTERN	0x48
+
+#define CH7xxx_PM		0x49
+#define CH7xxx_PM_FPD		(1<<0)
+#define CH7301_PM_DACPD0	(1<<1)
+#define CH7301_PM_DACPD1	(1<<2)
+#define CH7301_PM_DACPD2	(1<<3)
+#define CH7xxx_PM_DVIL		(1<<6)
+#define CH7xxx_PM_DVIP		(1<<7)
+
+#define CH7301_SYNC_POLARITY	0x56
+#define CH7301_SYNC_RGB_YUV	(1<<0)
+#define CH7301_SYNC_POL_DVI	(1<<5)
+
+/** @file
+ * driver for the Chrontel 7xxx DVI chip over DVO.
+ */
+
+static struct ch7xxx_id_struct {
+	uint8_t vid;
+	char *name;
+} ch7xxx_ids[] = {
+	{ CH7011_VID, "CH7011" },
+	{ CH7009A_VID, "CH7009A" },
+	{ CH7009B_VID, "CH7009B" },
+	{ CH7301_VID, "CH7301" },
+};
+
+struct ch7xxx_reg_state {
+    uint8_t regs[CH7xxx_NUM_REGS];
+};
+
+struct ch7xxx_priv {
+	bool quiet;
+
+	struct ch7xxx_reg_state save_reg;
+	struct ch7xxx_reg_state mode_reg;
+	uint8_t save_TCTL, save_TPCP, save_TPD, save_TPVT;
+	uint8_t save_TLPF, save_TCT, save_PM, save_IDF;
+};
+
+static void ch7xxx_save(struct intel_dvo_device *dvo);
+
+static char *ch7xxx_get_id(uint8_t vid)
+{
+	int i;
+
+	for (i = 0; i < ARRAY_SIZE(ch7xxx_ids); i++) {
+		if (ch7xxx_ids[i].vid == vid)
+			return ch7xxx_ids[i].name;
+	}
+
+	return NULL;
+}
+
+/** Reads an 8 bit register */
+static bool ch7xxx_readb(struct intel_dvo_device *dvo, int addr, uint8_t *ch)
+{
+	struct ch7xxx_priv *ch7xxx= dvo->dev_priv;
+	struct intel_i2c_chan *i2cbus = dvo->i2c_bus;
+	u8 out_buf[2];
+	u8 in_buf[2];
+
+	struct i2c_msg msgs[] = {
+		{
+			.addr = i2cbus->slave_addr,
+			.flags = 0,
+			.len = 1,
+			.buf = out_buf,
+		},
+		{
+			.addr = i2cbus->slave_addr,
+			.flags = I2C_M_RD,
+			.len = 1,
+			.buf = in_buf,
+		}
+	};
+
+	out_buf[0] = addr;
+	out_buf[1] = 0;
+
+	if (i2c_transfer(&i2cbus->adapter, msgs, 2) == 2) {
+		*ch = in_buf[0];
+		return true;
+	};
+
+	if (!ch7xxx->quiet) {
+		DRM_DEBUG("Unable to read register 0x%02x from %s:%02x.\n",
+			  addr, i2cbus->adapter.name, i2cbus->slave_addr);
+	}
+	return false;
+}
+
+/** Writes an 8 bit register */
+static bool ch7xxx_writeb(struct intel_dvo_device *dvo, int addr, uint8_t ch)
+{
+	struct ch7xxx_priv *ch7xxx = dvo->dev_priv;
+	struct intel_i2c_chan *i2cbus = dvo->i2c_bus;
+	uint8_t out_buf[2];
+	struct i2c_msg msg = {
+		.addr = i2cbus->slave_addr,
+		.flags = 0,
+		.len = 2,
+		.buf = out_buf,
+	};
+
+	out_buf[0] = addr;
+	out_buf[1] = ch;
+
+	if (i2c_transfer(&i2cbus->adapter, &msg, 1) == 1)
+		return true;
+
+	if (!ch7xxx->quiet) {
+		DRM_DEBUG("Unable to write register 0x%02x to %s:%d.\n",
+			  addr, i2cbus->adapter.name, i2cbus->slave_addr);
+	}
+
+	return false;
+}
+
+static bool ch7xxx_init(struct intel_dvo_device *dvo,
+			struct intel_i2c_chan *i2cbus)
+{
+	/* this will detect the CH7xxx chip on the specified i2c bus */
+	struct ch7xxx_priv *ch7xxx;
+	uint8_t vendor, device;
+	char *name;
+
+	ch7xxx = kzalloc(sizeof(struct ch7xxx_priv), GFP_KERNEL);
+	if (ch7xxx == NULL)
+		return false;
+
+	dvo->i2c_bus = i2cbus;
+	dvo->i2c_bus->slave_addr = dvo->slave_addr;
+	dvo->dev_priv = ch7xxx;
+	ch7xxx->quiet = true;
+
+	if (!ch7xxx_readb(dvo, CH7xxx_REG_VID, &vendor))
+		goto out;
+
+	name = ch7xxx_get_id(vendor);
+	if (!name) {
+		DRM_DEBUG("ch7xxx not detected; got 0x%02x from %s slave %d.\n",
+			  vendor, i2cbus->adapter.name, i2cbus->slave_addr);
+		goto out;
+	}
+
+
+	if (!ch7xxx_readb(dvo, CH7xxx_REG_DID, &device))
+		goto out;
+
+	if (device != CH7xxx_DID) {
+		DRM_DEBUG("ch7xxx not detected; got 0x%02x from %s slave %d.\n",
+			  vendor, i2cbus->adapter.name, i2cbus->slave_addr);
+		goto out;
+	}
+
+	ch7xxx->quiet = false;
+	DRM_DEBUG("Detected %s chipset, vendor/device ID 0x%02x/0x%02x\n",
+		  name, vendor, device);
+	return true;
+out:
+	kfree(ch7xxx);
+	return false;
+}
+
+static enum drm_connector_status ch7xxx_detect(struct intel_dvo_device *dvo)
+{
+	uint8_t cdet, orig_pm, pm;
+
+	ch7xxx_readb(dvo, CH7xxx_PM, &orig_pm);
+
+	pm = orig_pm;
+	pm &= ~CH7xxx_PM_FPD;
+	pm |= CH7xxx_PM_DVIL | CH7xxx_PM_DVIP;
+
+	ch7xxx_writeb(dvo, CH7xxx_PM, pm);
+
+	ch7xxx_readb(dvo, CH7xxx_CONNECTION_DETECT, &cdet);
+
+	ch7xxx_writeb(dvo, CH7xxx_PM, orig_pm);
+
+	if (cdet & CH7xxx_CDET_DVI)
+		return connector_status_connected;
+	return connector_status_disconnected;
+}
+
+static enum drm_mode_status ch7xxx_mode_valid(struct intel_dvo_device *dvo,
+					      struct drm_display_mode *mode)
+{
+	if (mode->clock > 165000)
+		return MODE_CLOCK_HIGH;
+
+	return MODE_OK;
+}
+
+static void ch7xxx_mode_set(struct intel_dvo_device *dvo,
+			    struct drm_display_mode *mode,
+			    struct drm_display_mode *adjusted_mode)
+{
+	uint8_t tvco, tpcp, tpd, tlpf, idf;
+
+	if (mode->clock <= 65000) {
+		tvco = 0x23;
+		tpcp = 0x08;
+		tpd = 0x16;
+		tlpf = 0x60;
+	} else {
+		tvco = 0x2d;
+		tpcp = 0x06;
+		tpd = 0x26;
+		tlpf = 0xa0;
+	}
+
+	ch7xxx_writeb(dvo, CH7xxx_TCTL, 0x00);
+	ch7xxx_writeb(dvo, CH7xxx_TVCO, tvco);
+	ch7xxx_writeb(dvo, CH7xxx_TPCP, tpcp);
+	ch7xxx_writeb(dvo, CH7xxx_TPD, tpd);
+	ch7xxx_writeb(dvo, CH7xxx_TPVT, 0x30);
+	ch7xxx_writeb(dvo, CH7xxx_TLPF, tlpf);
+	ch7xxx_writeb(dvo, CH7xxx_TCT, 0x00);
+
+	ch7xxx_readb(dvo, CH7xxx_IDF, &idf);
+
+	idf &= ~(CH7xxx_IDF_HSP | CH7xxx_IDF_VSP);
+	if (mode->flags & DRM_MODE_FLAG_PHSYNC)
+		idf |= CH7xxx_IDF_HSP;
+
+	if (mode->flags & DRM_MODE_FLAG_PVSYNC)
+		idf |= CH7xxx_IDF_HSP;
+
+	ch7xxx_writeb(dvo, CH7xxx_IDF, idf);
+}
+
+/* set the CH7xxx power state */
+static void ch7xxx_dpms(struct intel_dvo_device *dvo, int mode)
+{
+	if (mode == DRM_MODE_DPMS_ON)
+		ch7xxx_writeb(dvo, CH7xxx_PM, CH7xxx_PM_DVIL | CH7xxx_PM_DVIP);
+	else
+		ch7xxx_writeb(dvo, CH7xxx_PM, CH7xxx_PM_FPD);
+}
+
+static void ch7xxx_dump_regs(struct intel_dvo_device *dvo)
+{
+	struct ch7xxx_priv *ch7xxx = dvo->dev_priv;
+	int i;
+
+	for (i = 0; i < CH7xxx_NUM_REGS; i++) {
+		if ((i % 8) == 0 )
+			DRM_DEBUG("\n %02X: ", i);
+		DRM_DEBUG("%02X ", ch7xxx->mode_reg.regs[i]);
+	}
+}
+
+static void ch7xxx_save(struct intel_dvo_device *dvo)
+{
+	struct ch7xxx_priv *ch7xxx= dvo->dev_priv;
+
+	ch7xxx_readb(dvo, CH7xxx_TCTL, &ch7xxx->save_TCTL);
+	ch7xxx_readb(dvo, CH7xxx_TPCP, &ch7xxx->save_TPCP);
+	ch7xxx_readb(dvo, CH7xxx_TPD, &ch7xxx->save_TPD);
+	ch7xxx_readb(dvo, CH7xxx_TPVT, &ch7xxx->save_TPVT);
+	ch7xxx_readb(dvo, CH7xxx_TLPF, &ch7xxx->save_TLPF);
+	ch7xxx_readb(dvo, CH7xxx_PM, &ch7xxx->save_PM);
+	ch7xxx_readb(dvo, CH7xxx_IDF, &ch7xxx->save_IDF);
+}
+
+static void ch7xxx_restore(struct intel_dvo_device *dvo)
+{
+	struct ch7xxx_priv *ch7xxx = dvo->dev_priv;
+
+	ch7xxx_writeb(dvo, CH7xxx_TCTL, ch7xxx->save_TCTL);
+	ch7xxx_writeb(dvo, CH7xxx_TPCP, ch7xxx->save_TPCP);
+	ch7xxx_writeb(dvo, CH7xxx_TPD, ch7xxx->save_TPD);
+	ch7xxx_writeb(dvo, CH7xxx_TPVT, ch7xxx->save_TPVT);
+	ch7xxx_writeb(dvo, CH7xxx_TLPF, ch7xxx->save_TLPF);
+	ch7xxx_writeb(dvo, CH7xxx_IDF, ch7xxx->save_IDF);
+	ch7xxx_writeb(dvo, CH7xxx_PM, ch7xxx->save_PM);
+}
+
+static void ch7xxx_destroy(struct intel_dvo_device *dvo)
+{
+	struct ch7xxx_priv *ch7xxx = dvo->dev_priv;
+
+	if (ch7xxx) {
+		kfree(ch7xxx);
+		dvo->dev_priv = NULL;
+	}
+}
+
+struct intel_dvo_dev_ops ch7xxx_ops = {
+	.init = ch7xxx_init,
+	.detect = ch7xxx_detect,
+	.mode_valid = ch7xxx_mode_valid,
+	.mode_set = ch7xxx_mode_set,
+	.dpms = ch7xxx_dpms,
+	.dump_regs = ch7xxx_dump_regs,
+	.save = ch7xxx_save,
+	.restore = ch7xxx_restore,
+	.destroy = ch7xxx_destroy,
+};
diff --git a/drivers/gpu/drm/i915/dvo_ivch.c b/drivers/gpu/drm/i915/dvo_ivch.c
new file mode 100644
index 000000000000..0c8d375e8e37
--- /dev/null
+++ b/drivers/gpu/drm/i915/dvo_ivch.c
@@ -0,0 +1,442 @@
+/*
+ * Copyright © 2006 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ *
+ * Authors:
+ *    Eric Anholt <eric@anholt.net>
+ *
+ */
+
+#include "dvo.h"
+
+/*
+ * register definitions for the i82807aa.
+ *
+ * Documentation on this chipset can be found in datasheet #29069001 at
+ * intel.com.
+ */
+
+/*
+ * VCH Revision & GMBus Base Addr
+ */
+#define VR00		0x00
+# define VR00_BASE_ADDRESS_MASK		0x007f
+
+/*
+ * Functionality Enable
+ */
+#define VR01		0x01
+
+/*
+ * Enable the panel fitter
+ */
+# define VR01_PANEL_FIT_ENABLE		(1 << 3)
+/*
+ * Enables the LCD display.
+ *
+ * This must not be set while VR01_DVO_BYPASS_ENABLE is set.
+ */
+# define VR01_LCD_ENABLE		(1 << 2)
+/** Enables the DVO repeater. */
+# define VR01_DVO_BYPASS_ENABLE		(1 << 1)
+/** Enables the DVO clock */
+# define VR01_DVO_ENABLE		(1 << 0)
+
+/*
+ * LCD Interface Format
+ */
+#define VR10		0x10
+/** Enables LVDS output instead of CMOS */
+# define VR10_LVDS_ENABLE		(1 << 4)
+/** Enables 18-bit LVDS output. */
+# define VR10_INTERFACE_1X18		(0 << 2)
+/** Enables 24-bit LVDS or CMOS output */
+# define VR10_INTERFACE_1X24		(1 << 2)
+/** Enables 2x18-bit LVDS or CMOS output. */
+# define VR10_INTERFACE_2X18		(2 << 2)
+/** Enables 2x24-bit LVDS output */
+# define VR10_INTERFACE_2X24		(3 << 2)
+
+/*
+ * VR20 LCD Horizontal Display Size
+ */
+#define VR20	0x20
+
+/*
+ * LCD Vertical Display Size
+ */
+#define VR21	0x20
+
+/*
+ * Panel power down status
+ */
+#define VR30		0x30
+/** Read only bit indicating that the panel is not in a safe poweroff state. */
+# define VR30_PANEL_ON			(1 << 15)
+
+#define VR40		0x40
+# define VR40_STALL_ENABLE		(1 << 13)
+# define VR40_VERTICAL_INTERP_ENABLE	(1 << 12)
+# define VR40_ENHANCED_PANEL_FITTING	(1 << 11)
+# define VR40_HORIZONTAL_INTERP_ENABLE	(1 << 10)
+# define VR40_AUTO_RATIO_ENABLE		(1 << 9)
+# define VR40_CLOCK_GATING_ENABLE	(1 << 8)
+
+/*
+ * Panel Fitting Vertical Ratio
+ * (((image_height - 1) << 16) / ((panel_height - 1))) >> 2
+ */
+#define VR41		0x41
+
+/*
+ * Panel Fitting Horizontal Ratio
+ * (((image_width - 1) << 16) / ((panel_width - 1))) >> 2
+ */
+#define VR42		0x42
+
+/*
+ * Horizontal Image Size
+ */
+#define VR43		0x43
+
+/* VR80 GPIO 0
+ */
+#define VR80	    0x80
+#define VR81	    0x81
+#define VR82	    0x82
+#define VR83	    0x83
+#define VR84	    0x84
+#define VR85	    0x85
+#define VR86	    0x86
+#define VR87	    0x87
+
+/* VR88 GPIO 8
+ */
+#define VR88	    0x88
+
+/* Graphics BIOS scratch 0
+ */
+#define VR8E	    0x8E
+# define VR8E_PANEL_TYPE_MASK		(0xf << 0)
+# define VR8E_PANEL_INTERFACE_CMOS	(0 << 4)
+# define VR8E_PANEL_INTERFACE_LVDS	(1 << 4)
+# define VR8E_FORCE_DEFAULT_PANEL	(1 << 5)
+
+/* Graphics BIOS scratch 1
+ */
+#define VR8F	    0x8F
+# define VR8F_VCH_PRESENT		(1 << 0)
+# define VR8F_DISPLAY_CONN		(1 << 1)
+# define VR8F_POWER_MASK		(0x3c)
+# define VR8F_POWER_POS			(2)
+
+
+struct ivch_priv {
+	bool quiet;
+
+	uint16_t width, height;
+
+	uint16_t save_VR01;
+	uint16_t save_VR40;
+};
+
+
+static void ivch_dump_regs(struct intel_dvo_device *dvo);
+
+/**
+ * Reads a register on the ivch.
+ *
+ * Each of the 256 registers are 16 bits long.
+ */
+static bool ivch_read(struct intel_dvo_device *dvo, int addr, uint16_t *data)
+{
+	struct ivch_priv *priv = dvo->dev_priv;
+	struct intel_i2c_chan *i2cbus = dvo->i2c_bus;
+	u8 out_buf[1];
+	u8 in_buf[2];
+
+	struct i2c_msg msgs[] = {
+		{
+			.addr = i2cbus->slave_addr,
+			.flags = I2C_M_RD,
+			.len = 0,
+		},
+		{
+			.addr = 0,
+			.flags = I2C_M_NOSTART,
+			.len = 1,
+			.buf = out_buf,
+		},
+		{
+			.addr = i2cbus->slave_addr,
+			.flags = I2C_M_RD | I2C_M_NOSTART,
+			.len = 2,
+			.buf = in_buf,
+		}
+	};
+
+	out_buf[0] = addr;
+
+	if (i2c_transfer(&i2cbus->adapter, msgs, 3) == 3) {
+		*data = (in_buf[1] << 8) | in_buf[0];
+		return true;
+	};
+
+	if (!priv->quiet) {
+		DRM_DEBUG("Unable to read register 0x%02x from %s:%02x.\n",
+			  addr, i2cbus->adapter.name, i2cbus->slave_addr);
+	}
+	return false;
+}
+
+/** Writes a 16-bit register on the ivch */
+static bool ivch_write(struct intel_dvo_device *dvo, int addr, uint16_t data)
+{
+	struct ivch_priv *priv = dvo->dev_priv;
+	struct intel_i2c_chan *i2cbus = dvo->i2c_bus;
+	u8 out_buf[3];
+	struct i2c_msg msg = {
+		.addr = i2cbus->slave_addr,
+		.flags = 0,
+		.len = 3,
+		.buf = out_buf,
+	};
+
+	out_buf[0] = addr;
+	out_buf[1] = data & 0xff;
+	out_buf[2] = data >> 8;
+
+	if (i2c_transfer(&i2cbus->adapter, &msg, 1) == 1)
+		return true;
+
+	if (!priv->quiet) {
+		DRM_DEBUG("Unable to write register 0x%02x to %s:%d.\n",
+			  addr, i2cbus->adapter.name, i2cbus->slave_addr);
+	}
+
+	return false;
+}
+
+/** Probes the given bus and slave address for an ivch */
+static bool ivch_init(struct intel_dvo_device *dvo,
+		      struct intel_i2c_chan *i2cbus)
+{
+	struct ivch_priv *priv;
+	uint16_t temp;
+
+	priv = kzalloc(sizeof(struct ivch_priv), GFP_KERNEL);
+	if (priv == NULL)
+		return false;
+
+	dvo->i2c_bus = i2cbus;
+	dvo->i2c_bus->slave_addr = dvo->slave_addr;
+	dvo->dev_priv = priv;
+	priv->quiet = true;
+
+	if (!ivch_read(dvo, VR00, &temp))
+		goto out;
+	priv->quiet = false;
+
+	/* Since the identification bits are probably zeroes, which doesn't seem
+	 * very unique, check that the value in the base address field matches
+	 * the address it's responding on.
+	 */
+	if ((temp & VR00_BASE_ADDRESS_MASK) != dvo->slave_addr) {
+		DRM_DEBUG("ivch detect failed due to address mismatch "
+			  "(%d vs %d)\n",
+			  (temp & VR00_BASE_ADDRESS_MASK), dvo->slave_addr);
+		goto out;
+	}
+
+	ivch_read(dvo, VR20, &priv->width);
+	ivch_read(dvo, VR21, &priv->height);
+
+	return true;
+
+out:
+	kfree(priv);
+	return false;
+}
+
+static enum drm_connector_status ivch_detect(struct intel_dvo_device *dvo)
+{
+	return connector_status_connected;
+}
+
+static enum drm_mode_status ivch_mode_valid(struct intel_dvo_device *dvo,
+					    struct drm_display_mode *mode)
+{
+	if (mode->clock > 112000)
+		return MODE_CLOCK_HIGH;
+
+	return MODE_OK;
+}
+
+/** Sets the power state of the panel connected to the ivch */
+static void ivch_dpms(struct intel_dvo_device *dvo, int mode)
+{
+	int i;
+	uint16_t vr01, vr30, backlight;
+
+	/* Set the new power state of the panel. */
+	if (!ivch_read(dvo, VR01, &vr01))
+		return;
+
+	if (mode == DRM_MODE_DPMS_ON)
+		backlight = 1;
+	else
+		backlight = 0;
+	ivch_write(dvo, VR80, backlight);
+
+	if (mode == DRM_MODE_DPMS_ON)
+		vr01 |= VR01_LCD_ENABLE | VR01_DVO_ENABLE;
+	else
+		vr01 &= ~(VR01_LCD_ENABLE | VR01_DVO_ENABLE);
+
+	ivch_write(dvo, VR01, vr01);
+
+	/* Wait for the panel to make its state transition */
+	for (i = 0; i < 100; i++) {
+		if (!ivch_read(dvo, VR30, &vr30))
+			break;
+
+		if (((vr30 & VR30_PANEL_ON) != 0) == (mode == DRM_MODE_DPMS_ON))
+			break;
+		udelay(1000);
+	}
+	/* wait some more; vch may fail to resync sometimes without this */
+	udelay(16 * 1000);
+}
+
+static void ivch_mode_set(struct intel_dvo_device *dvo,
+			  struct drm_display_mode *mode,
+			  struct drm_display_mode *adjusted_mode)
+{
+	uint16_t vr40 = 0;
+	uint16_t vr01;
+
+	vr01 = 0;
+	vr40 = (VR40_STALL_ENABLE | VR40_VERTICAL_INTERP_ENABLE |
+		VR40_HORIZONTAL_INTERP_ENABLE);
+
+	if (mode->hdisplay != adjusted_mode->hdisplay ||
+	    mode->vdisplay != adjusted_mode->vdisplay) {
+		uint16_t x_ratio, y_ratio;
+
+		vr01 |= VR01_PANEL_FIT_ENABLE;
+		vr40 |= VR40_CLOCK_GATING_ENABLE;
+		x_ratio = (((mode->hdisplay - 1) << 16) /
+			   (adjusted_mode->hdisplay - 1)) >> 2;
+		y_ratio = (((mode->vdisplay - 1) << 16) /
+			   (adjusted_mode->vdisplay - 1)) >> 2;
+		ivch_write (dvo, VR42, x_ratio);
+		ivch_write (dvo, VR41, y_ratio);
+	} else {
+		vr01 &= ~VR01_PANEL_FIT_ENABLE;
+		vr40 &= ~VR40_CLOCK_GATING_ENABLE;
+	}
+	vr40 &= ~VR40_AUTO_RATIO_ENABLE;
+
+	ivch_write(dvo, VR01, vr01);
+	ivch_write(dvo, VR40, vr40);
+
+	ivch_dump_regs(dvo);
+}
+
+static void ivch_dump_regs(struct intel_dvo_device *dvo)
+{
+	uint16_t val;
+
+	ivch_read(dvo, VR00, &val);
+	DRM_DEBUG("VR00: 0x%04x\n", val);
+	ivch_read(dvo, VR01, &val);
+	DRM_DEBUG("VR01: 0x%04x\n", val);
+	ivch_read(dvo, VR30, &val);
+	DRM_DEBUG("VR30: 0x%04x\n", val);
+	ivch_read(dvo, VR40, &val);
+	DRM_DEBUG("VR40: 0x%04x\n", val);
+
+	/* GPIO registers */
+	ivch_read(dvo, VR80, &val);
+	DRM_DEBUG("VR80: 0x%04x\n", val);
+	ivch_read(dvo, VR81, &val);
+	DRM_DEBUG("VR81: 0x%04x\n", val);
+	ivch_read(dvo, VR82, &val);
+	DRM_DEBUG("VR82: 0x%04x\n", val);
+	ivch_read(dvo, VR83, &val);
+	DRM_DEBUG("VR83: 0x%04x\n", val);
+	ivch_read(dvo, VR84, &val);
+	DRM_DEBUG("VR84: 0x%04x\n", val);
+	ivch_read(dvo, VR85, &val);
+	DRM_DEBUG("VR85: 0x%04x\n", val);
+	ivch_read(dvo, VR86, &val);
+	DRM_DEBUG("VR86: 0x%04x\n", val);
+	ivch_read(dvo, VR87, &val);
+	DRM_DEBUG("VR87: 0x%04x\n", val);
+	ivch_read(dvo, VR88, &val);
+	DRM_DEBUG("VR88: 0x%04x\n", val);
+
+	/* Scratch register 0 - AIM Panel type */
+	ivch_read(dvo, VR8E, &val);
+	DRM_DEBUG("VR8E: 0x%04x\n", val);
+
+	/* Scratch register 1 - Status register */
+	ivch_read(dvo, VR8F, &val);
+	DRM_DEBUG("VR8F: 0x%04x\n", val);
+}
+
+static void ivch_save(struct intel_dvo_device *dvo)
+{
+	struct ivch_priv *priv = dvo->dev_priv;
+
+	ivch_read(dvo, VR01, &priv->save_VR01);
+	ivch_read(dvo, VR40, &priv->save_VR40);
+}
+
+static void ivch_restore(struct intel_dvo_device *dvo)
+{
+	struct ivch_priv *priv = dvo->dev_priv;
+
+	ivch_write(dvo, VR01, priv->save_VR01);
+	ivch_write(dvo, VR40, priv->save_VR40);
+}
+
+static void ivch_destroy(struct intel_dvo_device *dvo)
+{
+	struct ivch_priv *priv = dvo->dev_priv;
+
+	if (priv) {
+		kfree(priv);
+		dvo->dev_priv = NULL;
+	}
+}
+
+struct intel_dvo_dev_ops ivch_ops= {
+	.init = ivch_init,
+	.dpms = ivch_dpms,
+	.save = ivch_save,
+	.restore = ivch_restore,
+	.mode_valid = ivch_mode_valid,
+	.mode_set = ivch_mode_set,
+	.detect = ivch_detect,
+	.dump_regs = ivch_dump_regs,
+	.destroy = ivch_destroy,
+};
diff --git a/drivers/gpu/drm/i915/dvo_sil164.c b/drivers/gpu/drm/i915/dvo_sil164.c
new file mode 100644
index 000000000000..033a4bb070b2
--- /dev/null
+++ b/drivers/gpu/drm/i915/dvo_sil164.c
@@ -0,0 +1,302 @@
+/**************************************************************************
+
+Copyright © 2006 Dave Airlie
+
+All Rights Reserved.
+
+Permission is hereby granted, free of charge, to any person obtaining a
+copy of this software and associated documentation files (the
+"Software"), to deal in the Software without restriction, including
+without limitation the rights to use, copy, modify, merge, publish,
+distribute, sub license, and/or sell copies of the Software, and to
+permit persons to whom the Software is furnished to do so, subject to
+the following conditions:
+
+The above copyright notice and this permission notice (including the
+next paragraph) shall be included in all copies or substantial portions
+of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+**************************************************************************/
+
+#include "dvo.h"
+
+#define SIL164_VID 0x0001
+#define SIL164_DID 0x0006
+
+#define SIL164_VID_LO 0x00
+#define SIL164_VID_HI 0x01
+#define SIL164_DID_LO 0x02
+#define SIL164_DID_HI 0x03
+#define SIL164_REV    0x04
+#define SIL164_RSVD   0x05
+#define SIL164_FREQ_LO 0x06
+#define SIL164_FREQ_HI 0x07
+
+#define SIL164_REG8 0x08
+#define SIL164_8_VEN (1<<5)
+#define SIL164_8_HEN (1<<4)
+#define SIL164_8_DSEL (1<<3)
+#define SIL164_8_BSEL (1<<2)
+#define SIL164_8_EDGE (1<<1)
+#define SIL164_8_PD   (1<<0)
+
+#define SIL164_REG9 0x09
+#define SIL164_9_VLOW (1<<7)
+#define SIL164_9_MSEL_MASK (0x7<<4)
+#define SIL164_9_TSEL (1<<3)
+#define SIL164_9_RSEN (1<<2)
+#define SIL164_9_HTPLG (1<<1)
+#define SIL164_9_MDI (1<<0)
+
+#define SIL164_REGC 0x0c
+
+struct sil164_save_rec {
+	uint8_t reg8;
+	uint8_t reg9;
+	uint8_t regc;
+};
+
+struct sil164_priv {
+	//I2CDevRec d;
+	bool quiet;
+	struct sil164_save_rec save_regs;
+	struct sil164_save_rec mode_regs;
+};
+
+#define SILPTR(d) ((SIL164Ptr)(d->DriverPrivate.ptr))
+
+static bool sil164_readb(struct intel_dvo_device *dvo, int addr, uint8_t *ch)
+{
+	struct sil164_priv *sil = dvo->dev_priv;
+	struct intel_i2c_chan *i2cbus = dvo->i2c_bus;
+	u8 out_buf[2];
+	u8 in_buf[2];
+
+	struct i2c_msg msgs[] = {
+		{
+			.addr = i2cbus->slave_addr,
+			.flags = 0,
+			.len = 1,
+			.buf = out_buf,
+		},
+		{
+			.addr = i2cbus->slave_addr,
+			.flags = I2C_M_RD,
+			.len = 1,
+			.buf = in_buf,
+		}
+	};
+
+	out_buf[0] = addr;
+	out_buf[1] = 0;
+
+	if (i2c_transfer(&i2cbus->adapter, msgs, 2) == 2) {
+		*ch = in_buf[0];
+		return true;
+	};
+
+	if (!sil->quiet) {
+		DRM_DEBUG("Unable to read register 0x%02x from %s:%02x.\n",
+			  addr, i2cbus->adapter.name, i2cbus->slave_addr);
+	}
+	return false;
+}
+
+static bool sil164_writeb(struct intel_dvo_device *dvo, int addr, uint8_t ch)
+{
+	struct sil164_priv *sil= dvo->dev_priv;
+	struct intel_i2c_chan *i2cbus = dvo->i2c_bus;
+	uint8_t out_buf[2];
+	struct i2c_msg msg = {
+		.addr = i2cbus->slave_addr,
+		.flags = 0,
+		.len = 2,
+		.buf = out_buf,
+	};
+
+	out_buf[0] = addr;
+	out_buf[1] = ch;
+
+	if (i2c_transfer(&i2cbus->adapter, &msg, 1) == 1)
+		return true;
+
+	if (!sil->quiet) {
+		DRM_DEBUG("Unable to write register 0x%02x to %s:%d.\n",
+			  addr, i2cbus->adapter.name, i2cbus->slave_addr);
+	}
+
+	return false;
+}
+
+/* Silicon Image 164 driver for chip on i2c bus */
+static bool sil164_init(struct intel_dvo_device *dvo,
+			struct intel_i2c_chan *i2cbus)
+{
+	/* this will detect the SIL164 chip on the specified i2c bus */
+	struct sil164_priv *sil;
+	unsigned char ch;
+
+	sil = kzalloc(sizeof(struct sil164_priv), GFP_KERNEL);
+	if (sil == NULL)
+		return false;
+
+	dvo->i2c_bus = i2cbus;
+	dvo->i2c_bus->slave_addr = dvo->slave_addr;
+	dvo->dev_priv = sil;
+	sil->quiet = true;
+
+	if (!sil164_readb(dvo, SIL164_VID_LO, &ch))
+		goto out;
+
+	if (ch != (SIL164_VID & 0xff)) {
+		DRM_DEBUG("sil164 not detected got %d: from %s Slave %d.\n",
+			  ch, i2cbus->adapter.name, i2cbus->slave_addr);
+		goto out;
+	}
+
+	if (!sil164_readb(dvo, SIL164_DID_LO, &ch))
+		goto out;
+
+	if (ch != (SIL164_DID & 0xff)) {
+		DRM_DEBUG("sil164 not detected got %d: from %s Slave %d.\n",
+			  ch, i2cbus->adapter.name, i2cbus->slave_addr);
+		goto out;
+	}
+	sil->quiet = false;
+
+	DRM_DEBUG("init sil164 dvo controller successfully!\n");
+	return true;
+
+out:
+	kfree(sil);
+	return false;
+}
+
+static enum drm_connector_status sil164_detect(struct intel_dvo_device *dvo)
+{
+	uint8_t reg9;
+
+	sil164_readb(dvo, SIL164_REG9, &reg9);
+
+	if (reg9 & SIL164_9_HTPLG)
+		return connector_status_connected;
+	else
+		return connector_status_disconnected;
+}
+
+static enum drm_mode_status sil164_mode_valid(struct intel_dvo_device *dvo,
+					      struct drm_display_mode *mode)
+{
+	return MODE_OK;
+}
+
+static void sil164_mode_set(struct intel_dvo_device *dvo,
+			    struct drm_display_mode *mode,
+			    struct drm_display_mode *adjusted_mode)
+{
+	/* As long as the basics are set up, since we don't have clock
+	 * dependencies in the mode setup, we can just leave the
+	 * registers alone and everything will work fine.
+	 */
+	/* recommended programming sequence from doc */
+	/*sil164_writeb(sil, 0x08, 0x30);
+	  sil164_writeb(sil, 0x09, 0x00);
+	  sil164_writeb(sil, 0x0a, 0x90);
+	  sil164_writeb(sil, 0x0c, 0x89);
+	  sil164_writeb(sil, 0x08, 0x31);*/
+	/* don't do much */
+	return;
+}
+
+/* set the SIL164 power state */
+static void sil164_dpms(struct intel_dvo_device *dvo, int mode)
+{
+	int ret;
+	unsigned char ch;
+
+	ret = sil164_readb(dvo, SIL164_REG8, &ch);
+	if (ret == false)
+		return;
+
+	if (mode == DRM_MODE_DPMS_ON)
+		ch |= SIL164_8_PD;
+	else
+		ch &= ~SIL164_8_PD;
+
+	sil164_writeb(dvo, SIL164_REG8, ch);
+	return;
+}
+
+static void sil164_dump_regs(struct intel_dvo_device *dvo)
+{
+	uint8_t val;
+
+	sil164_readb(dvo, SIL164_FREQ_LO, &val);
+	DRM_DEBUG("SIL164_FREQ_LO: 0x%02x\n", val);
+	sil164_readb(dvo, SIL164_FREQ_HI, &val);
+	DRM_DEBUG("SIL164_FREQ_HI: 0x%02x\n", val);
+	sil164_readb(dvo, SIL164_REG8, &val);
+	DRM_DEBUG("SIL164_REG8: 0x%02x\n", val);
+	sil164_readb(dvo, SIL164_REG9, &val);
+	DRM_DEBUG("SIL164_REG9: 0x%02x\n", val);
+	sil164_readb(dvo, SIL164_REGC, &val);
+	DRM_DEBUG("SIL164_REGC: 0x%02x\n", val);
+}
+
+static void sil164_save(struct intel_dvo_device *dvo)
+{
+	struct sil164_priv *sil= dvo->dev_priv;
+
+	if (!sil164_readb(dvo, SIL164_REG8, &sil->save_regs.reg8))
+		return;
+
+	if (!sil164_readb(dvo, SIL164_REG9, &sil->save_regs.reg9))
+		return;
+
+	if (!sil164_readb(dvo, SIL164_REGC, &sil->save_regs.regc))
+		return;
+
+	return;
+}
+
+static void sil164_restore(struct intel_dvo_device *dvo)
+{
+	struct sil164_priv *sil = dvo->dev_priv;
+
+	/* Restore it powered down initially */
+	sil164_writeb(dvo, SIL164_REG8, sil->save_regs.reg8 & ~0x1);
+
+	sil164_writeb(dvo, SIL164_REG9, sil->save_regs.reg9);
+	sil164_writeb(dvo, SIL164_REGC, sil->save_regs.regc);
+	sil164_writeb(dvo, SIL164_REG8, sil->save_regs.reg8);
+}
+
+static void sil164_destroy(struct intel_dvo_device *dvo)
+{
+	struct sil164_priv *sil = dvo->dev_priv;
+
+	if (sil) {
+		kfree(sil);
+		dvo->dev_priv = NULL;
+	}
+}
+
+struct intel_dvo_dev_ops sil164_ops = {
+	.init = sil164_init,
+	.detect = sil164_detect,
+	.mode_valid = sil164_mode_valid,
+	.mode_set = sil164_mode_set,
+	.dpms = sil164_dpms,
+	.dump_regs = sil164_dump_regs,
+	.save = sil164_save,
+	.restore = sil164_restore,
+	.destroy = sil164_destroy,
+};
diff --git a/drivers/gpu/drm/i915/dvo_tfp410.c b/drivers/gpu/drm/i915/dvo_tfp410.c
new file mode 100644
index 000000000000..207fda806ebf
--- /dev/null
+++ b/drivers/gpu/drm/i915/dvo_tfp410.c
@@ -0,0 +1,335 @@
+/*
+ * Copyright © 2007 Dave Mueller
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * Authors:
+ *    Dave Mueller <dave.mueller@gmx.ch>
+ *
+ */
+
+#include "dvo.h"
+
+/* register definitions according to the TFP410 data sheet */
+#define TFP410_VID		0x014C
+#define TFP410_DID		0x0410
+
+#define TFP410_VID_LO		0x00
+#define TFP410_VID_HI		0x01
+#define TFP410_DID_LO		0x02
+#define TFP410_DID_HI		0x03
+#define TFP410_REV		0x04
+
+#define TFP410_CTL_1		0x08
+#define TFP410_CTL_1_TDIS	(1<<6)
+#define TFP410_CTL_1_VEN	(1<<5)
+#define TFP410_CTL_1_HEN	(1<<4)
+#define TFP410_CTL_1_DSEL	(1<<3)
+#define TFP410_CTL_1_BSEL	(1<<2)
+#define TFP410_CTL_1_EDGE	(1<<1)
+#define TFP410_CTL_1_PD		(1<<0)
+
+#define TFP410_CTL_2		0x09
+#define TFP410_CTL_2_VLOW	(1<<7)
+#define TFP410_CTL_2_MSEL_MASK	(0x7<<4)
+#define TFP410_CTL_2_MSEL	(1<<4)
+#define TFP410_CTL_2_TSEL	(1<<3)
+#define TFP410_CTL_2_RSEN	(1<<2)
+#define TFP410_CTL_2_HTPLG	(1<<1)
+#define TFP410_CTL_2_MDI	(1<<0)
+
+#define TFP410_CTL_3		0x0A
+#define TFP410_CTL_3_DK_MASK 	(0x7<<5)
+#define TFP410_CTL_3_DK		(1<<5)
+#define TFP410_CTL_3_DKEN	(1<<4)
+#define TFP410_CTL_3_CTL_MASK	(0x7<<1)
+#define TFP410_CTL_3_CTL	(1<<1)
+
+#define TFP410_USERCFG		0x0B
+
+#define TFP410_DE_DLY		0x32
+
+#define TFP410_DE_CTL		0x33
+#define TFP410_DE_CTL_DEGEN	(1<<6)
+#define TFP410_DE_CTL_VSPOL	(1<<5)
+#define TFP410_DE_CTL_HSPOL	(1<<4)
+#define TFP410_DE_CTL_DEDLY8	(1<<0)
+
+#define TFP410_DE_TOP		0x34
+
+#define TFP410_DE_CNT_LO	0x36
+#define TFP410_DE_CNT_HI	0x37
+
+#define TFP410_DE_LIN_LO	0x38
+#define TFP410_DE_LIN_HI	0x39
+
+#define TFP410_H_RES_LO		0x3A
+#define TFP410_H_RES_HI		0x3B
+
+#define TFP410_V_RES_LO		0x3C
+#define TFP410_V_RES_HI		0x3D
+
+struct tfp410_save_rec {
+	uint8_t ctl1;
+	uint8_t ctl2;
+};
+
+struct tfp410_priv {
+	bool quiet;
+
+	struct tfp410_save_rec saved_reg;
+	struct tfp410_save_rec mode_reg;
+};
+
+static bool tfp410_readb(struct intel_dvo_device *dvo, int addr, uint8_t *ch)
+{
+	struct tfp410_priv *tfp = dvo->dev_priv;
+	struct intel_i2c_chan *i2cbus = dvo->i2c_bus;
+	u8 out_buf[2];
+	u8 in_buf[2];
+
+	struct i2c_msg msgs[] = {
+		{
+			.addr = i2cbus->slave_addr,
+			.flags = 0,
+			.len = 1,
+			.buf = out_buf,
+		},
+		{
+			.addr = i2cbus->slave_addr,
+			.flags = I2C_M_RD,
+			.len = 1,
+			.buf = in_buf,
+		}
+	};
+
+	out_buf[0] = addr;
+	out_buf[1] = 0;
+
+	if (i2c_transfer(&i2cbus->adapter, msgs, 2) == 2) {
+		*ch = in_buf[0];
+		return true;
+	};
+
+	if (!tfp->quiet) {
+		DRM_DEBUG("Unable to read register 0x%02x from %s:%02x.\n",
+			  addr, i2cbus->adapter.name, i2cbus->slave_addr);
+	}
+	return false;
+}
+
+static bool tfp410_writeb(struct intel_dvo_device *dvo, int addr, uint8_t ch)
+{
+	struct tfp410_priv *tfp = dvo->dev_priv;
+	struct intel_i2c_chan *i2cbus = dvo->i2c_bus;
+	uint8_t out_buf[2];
+	struct i2c_msg msg = {
+		.addr = i2cbus->slave_addr,
+		.flags = 0,
+		.len = 2,
+		.buf = out_buf,
+	};
+
+	out_buf[0] = addr;
+	out_buf[1] = ch;
+
+	if (i2c_transfer(&i2cbus->adapter, &msg, 1) == 1)
+		return true;
+
+	if (!tfp->quiet) {
+		DRM_DEBUG("Unable to write register 0x%02x to %s:%d.\n",
+			  addr, i2cbus->adapter.name, i2cbus->slave_addr);
+	}
+
+	return false;
+}
+
+static int tfp410_getid(struct intel_dvo_device *dvo, int addr)
+{
+	uint8_t ch1, ch2;
+
+	if (tfp410_readb(dvo, addr+0, &ch1) &&
+	    tfp410_readb(dvo, addr+1, &ch2))
+		return ((ch2 << 8) & 0xFF00) | (ch1 & 0x00FF);
+
+	return -1;
+}
+
+/* Ti TFP410 driver for chip on i2c bus */
+static bool tfp410_init(struct intel_dvo_device *dvo,
+			struct intel_i2c_chan *i2cbus)
+{
+	/* this will detect the tfp410 chip on the specified i2c bus */
+	struct tfp410_priv *tfp;
+	int id;
+
+	tfp = kzalloc(sizeof(struct tfp410_priv), GFP_KERNEL);
+	if (tfp == NULL)
+		return false;
+
+	dvo->i2c_bus = i2cbus;
+	dvo->i2c_bus->slave_addr = dvo->slave_addr;
+	dvo->dev_priv = tfp;
+	tfp->quiet = true;
+
+	if ((id = tfp410_getid(dvo, TFP410_VID_LO)) != TFP410_VID) {
+		DRM_DEBUG("tfp410 not detected got VID %X: from %s Slave %d.\n",
+			  id, i2cbus->adapter.name, i2cbus->slave_addr);
+		goto out;
+	}
+
+	if ((id = tfp410_getid(dvo, TFP410_DID_LO)) != TFP410_DID) {
+		DRM_DEBUG("tfp410 not detected got DID %X: from %s Slave %d.\n",
+			  id, i2cbus->adapter.name, i2cbus->slave_addr);
+		goto out;
+	}
+	tfp->quiet = false;
+	return true;
+out:
+	kfree(tfp);
+	return false;
+}
+
+static enum drm_connector_status tfp410_detect(struct intel_dvo_device *dvo)
+{
+	enum drm_connector_status ret = connector_status_disconnected;
+	uint8_t ctl2;
+
+	if (tfp410_readb(dvo, TFP410_CTL_2, &ctl2)) {
+		if (ctl2 & TFP410_CTL_2_HTPLG)
+			ret = connector_status_connected;
+		else
+			ret = connector_status_disconnected;
+	}
+
+	return ret;
+}
+
+static enum drm_mode_status tfp410_mode_valid(struct intel_dvo_device *dvo,
+					      struct drm_display_mode *mode)
+{
+	return MODE_OK;
+}
+
+static void tfp410_mode_set(struct intel_dvo_device *dvo,
+			    struct drm_display_mode *mode,
+			    struct drm_display_mode *adjusted_mode)
+{
+    /* As long as the basics are set up, since we don't have clock dependencies
+     * in the mode setup, we can just leave the registers alone and everything
+     * will work fine.
+     */
+    /* don't do much */
+    return;
+}
+
+/* set the tfp410 power state */
+static void tfp410_dpms(struct intel_dvo_device *dvo, int mode)
+{
+	uint8_t ctl1;
+
+	if (!tfp410_readb(dvo, TFP410_CTL_1, &ctl1))
+		return;
+
+	if (mode == DRM_MODE_DPMS_ON)
+		ctl1 |= TFP410_CTL_1_PD;
+	else
+		ctl1 &= ~TFP410_CTL_1_PD;
+
+	tfp410_writeb(dvo, TFP410_CTL_1, ctl1);
+}
+
+static void tfp410_dump_regs(struct intel_dvo_device *dvo)
+{
+	uint8_t val, val2;
+
+	tfp410_readb(dvo, TFP410_REV, &val);
+	DRM_DEBUG("TFP410_REV: 0x%02X\n", val);
+	tfp410_readb(dvo, TFP410_CTL_1, &val);
+	DRM_DEBUG("TFP410_CTL1: 0x%02X\n", val);
+	tfp410_readb(dvo, TFP410_CTL_2, &val);
+	DRM_DEBUG("TFP410_CTL2: 0x%02X\n", val);
+	tfp410_readb(dvo, TFP410_CTL_3, &val);
+	DRM_DEBUG("TFP410_CTL3: 0x%02X\n", val);
+	tfp410_readb(dvo, TFP410_USERCFG, &val);
+	DRM_DEBUG("TFP410_USERCFG: 0x%02X\n", val);
+	tfp410_readb(dvo, TFP410_DE_DLY, &val);
+	DRM_DEBUG("TFP410_DE_DLY: 0x%02X\n", val);
+	tfp410_readb(dvo, TFP410_DE_CTL, &val);
+	DRM_DEBUG("TFP410_DE_CTL: 0x%02X\n", val);
+	tfp410_readb(dvo, TFP410_DE_TOP, &val);
+	DRM_DEBUG("TFP410_DE_TOP: 0x%02X\n", val);
+	tfp410_readb(dvo, TFP410_DE_CNT_LO, &val);
+	tfp410_readb(dvo, TFP410_DE_CNT_HI, &val2);
+	DRM_DEBUG("TFP410_DE_CNT: 0x%02X%02X\n", val2, val);
+	tfp410_readb(dvo, TFP410_DE_LIN_LO, &val);
+	tfp410_readb(dvo, TFP410_DE_LIN_HI, &val2);
+	DRM_DEBUG("TFP410_DE_LIN: 0x%02X%02X\n", val2, val);
+	tfp410_readb(dvo, TFP410_H_RES_LO, &val);
+	tfp410_readb(dvo, TFP410_H_RES_HI, &val2);
+	DRM_DEBUG("TFP410_H_RES: 0x%02X%02X\n", val2, val);
+	tfp410_readb(dvo, TFP410_V_RES_LO, &val);
+	tfp410_readb(dvo, TFP410_V_RES_HI, &val2);
+	DRM_DEBUG("TFP410_V_RES: 0x%02X%02X\n", val2, val);
+}
+
+static void tfp410_save(struct intel_dvo_device *dvo)
+{
+	struct tfp410_priv *tfp = dvo->dev_priv;
+
+	if (!tfp410_readb(dvo, TFP410_CTL_1, &tfp->saved_reg.ctl1))
+		return;
+
+	if (!tfp410_readb(dvo, TFP410_CTL_2, &tfp->saved_reg.ctl2))
+		return;
+}
+
+static void tfp410_restore(struct intel_dvo_device *dvo)
+{
+	struct tfp410_priv *tfp = dvo->dev_priv;
+
+	/* Restore it powered down initially */
+	tfp410_writeb(dvo, TFP410_CTL_1, tfp->saved_reg.ctl1 & ~0x1);
+
+	tfp410_writeb(dvo, TFP410_CTL_2, tfp->saved_reg.ctl2);
+	tfp410_writeb(dvo, TFP410_CTL_1, tfp->saved_reg.ctl1);
+}
+
+static void tfp410_destroy(struct intel_dvo_device *dvo)
+{
+	struct tfp410_priv *tfp = dvo->dev_priv;
+
+	if (tfp) {
+		kfree(tfp);
+		dvo->dev_priv = NULL;
+	}
+}
+
+struct intel_dvo_dev_ops tfp410_ops = {
+	.init = tfp410_init,
+	.detect = tfp410_detect,
+	.mode_valid = tfp410_mode_valid,
+	.mode_set = tfp410_mode_set,
+	.dpms = tfp410_dpms,
+	.dump_regs = tfp410_dump_regs,
+	.save = tfp410_save,
+	.restore = tfp410_restore,
+	.destroy = tfp410_destroy,
+};
diff --git a/drivers/gpu/drm/i915/i915_dma.c b/drivers/gpu/drm/i915/i915_dma.c
index 1b81b6a6d81b..b5d0809eecb1 100644
--- a/drivers/gpu/drm/i915/i915_dma.c
+++ b/drivers/gpu/drm/i915/i915_dma.c
@@ -28,6 +28,8 @@
 
 #include "drmP.h"
 #include "drm.h"
+#include "drm_crtc_helper.h"
+#include "intel_drv.h"
 #include "i915_drm.h"
 #include "i915_drv.h"
 
@@ -125,6 +127,13 @@ void i915_kernel_lost_context(struct drm_device * dev)
 	struct drm_i915_master_private *master_priv;
 	drm_i915_ring_buffer_t *ring = &(dev_priv->ring);
 
+	/*
+	 * We should never lose context on the ring with modesetting
+	 * as we don't expose it to userspace
+	 */
+	if (drm_core_check_feature(dev, DRIVER_MODESET))
+		return;
+
 	ring->head = I915_READ(PRB0_HEAD) & HEAD_ADDR;
 	ring->tail = I915_READ(PRB0_TAIL) & TAIL_ADDR;
 	ring->space = ring->head - (ring->tail + 8);
@@ -769,6 +778,11 @@ static int i915_set_status_page(struct drm_device *dev, void *data,
 		return -EINVAL;
 	}
 
+	if (drm_core_check_feature(dev, DRIVER_MODESET)) {
+		WARN(1, "tried to set status page when mode setting active\n");
+		return 0;
+	}
+
 	printk(KERN_DEBUG "set status page addr 0x%08x\n", (u32)hws->addr);
 
 	dev_priv->status_gfx_addr = hws->addr & (0x1ffff<<12);
@@ -797,6 +811,173 @@ static int i915_set_status_page(struct drm_device *dev, void *data,
 	return 0;
 }
 
+/**
+ * i915_probe_agp - get AGP bootup configuration
+ * @pdev: PCI device
+ * @aperture_size: returns AGP aperture configured size
+ * @preallocated_size: returns size of BIOS preallocated AGP space
+ *
+ * Since Intel integrated graphics are UMA, the BIOS has to set aside
+ * some RAM for the framebuffer at early boot.  This code figures out
+ * how much was set aside so we can use it for our own purposes.
+ */
+int i915_probe_agp(struct pci_dev *pdev, unsigned long *aperture_size,
+		   unsigned long *preallocated_size)
+{
+	struct pci_dev *bridge_dev;
+	u16 tmp = 0;
+	unsigned long overhead;
+
+	bridge_dev = pci_get_bus_and_slot(0, PCI_DEVFN(0,0));
+	if (!bridge_dev) {
+		DRM_ERROR("bridge device not found\n");
+		return -1;
+	}
+
+	/* Get the fb aperture size and "stolen" memory amount. */
+	pci_read_config_word(bridge_dev, INTEL_GMCH_CTRL, &tmp);
+	pci_dev_put(bridge_dev);
+
+	*aperture_size = 1024 * 1024;
+	*preallocated_size = 1024 * 1024;
+
+	switch (pdev->device) {
+	case PCI_DEVICE_ID_INTEL_82830_CGC:
+	case PCI_DEVICE_ID_INTEL_82845G_IG:
+	case PCI_DEVICE_ID_INTEL_82855GM_IG:
+	case PCI_DEVICE_ID_INTEL_82865_IG:
+		if ((tmp & INTEL_GMCH_MEM_MASK) == INTEL_GMCH_MEM_64M)
+			*aperture_size *= 64;
+		else
+			*aperture_size *= 128;
+		break;
+	default:
+		/* 9xx supports large sizes, just look at the length */
+		*aperture_size = pci_resource_len(pdev, 2);
+		break;
+	}
+
+	/*
+	 * Some of the preallocated space is taken by the GTT
+	 * and popup.  GTT is 1K per MB of aperture size, and popup is 4K.
+	 */
+	overhead = (*aperture_size / 1024) + 4096;
+	switch (tmp & INTEL_855_GMCH_GMS_MASK) {
+	case INTEL_855_GMCH_GMS_STOLEN_1M:
+		break; /* 1M already */
+	case INTEL_855_GMCH_GMS_STOLEN_4M:
+		*preallocated_size *= 4;
+		break;
+	case INTEL_855_GMCH_GMS_STOLEN_8M:
+		*preallocated_size *= 8;
+		break;
+	case INTEL_855_GMCH_GMS_STOLEN_16M:
+		*preallocated_size *= 16;
+		break;
+	case INTEL_855_GMCH_GMS_STOLEN_32M:
+		*preallocated_size *= 32;
+		break;
+	case INTEL_915G_GMCH_GMS_STOLEN_48M:
+		*preallocated_size *= 48;
+		break;
+	case INTEL_915G_GMCH_GMS_STOLEN_64M:
+		*preallocated_size *= 64;
+		break;
+	case INTEL_855_GMCH_GMS_DISABLED:
+		DRM_ERROR("video memory is disabled\n");
+		return -1;
+	default:
+		DRM_ERROR("unexpected GMCH_GMS value: 0x%02x\n",
+			tmp & INTEL_855_GMCH_GMS_MASK);
+		return -1;
+	}
+	*preallocated_size -= overhead;
+
+	return 0;
+}
+
+static int i915_load_modeset_init(struct drm_device *dev)
+{
+	struct drm_i915_private *dev_priv = dev->dev_private;
+	unsigned long agp_size, prealloc_size;
+	int fb_bar = IS_I9XX(dev) ? 2 : 0;
+	int ret = 0;
+
+	dev->mode_config.fb_base = drm_get_resource_start(dev, fb_bar) &
+		0xff000000;
+
+	DRM_DEBUG("*** fb base 0x%08lx\n", dev->mode_config.fb_base);
+
+	if (IS_MOBILE(dev) || (IS_I9XX(dev) && !IS_I965G(dev) && !IS_G33(dev)))
+		dev_priv->cursor_needs_physical = true;
+	else
+		dev_priv->cursor_needs_physical = false;
+
+	i915_probe_agp(dev->pdev, &agp_size, &prealloc_size);
+
+	/* Basic memrange allocator for stolen space (aka vram) */
+	drm_mm_init(&dev_priv->vram, 0, prealloc_size);
+
+	/* Let GEM Manage from end of prealloc space to end of aperture */
+	i915_gem_do_init(dev, prealloc_size, agp_size);
+
+	ret = i915_gem_init_ringbuffer(dev);
+	if (ret)
+		goto out;
+
+        dev_priv->mm.gtt_mapping =
+		io_mapping_create_wc(dev->agp->base,
+				     dev->agp->agp_info.aper_size * 1024*1024);
+
+	/* Allow hardware batchbuffers unless told otherwise.
+	 */
+	dev_priv->allow_batchbuffer = 1;
+
+	ret = intel_init_bios(dev);
+	if (ret)
+		DRM_INFO("failed to find VBIOS tables\n");
+
+	ret = drm_irq_install(dev);
+	if (ret)
+		goto destroy_ringbuffer;
+
+	/* FIXME: re-add hotplug support */
+#if 0
+	ret = drm_hotplug_init(dev);
+	if (ret)
+		goto destroy_ringbuffer;
+#endif
+
+	/* Always safe in the mode setting case. */
+	/* FIXME: do pre/post-mode set stuff in core KMS code */
+	dev->vblank_disable_allowed = 1;
+
+	/*
+	 * Initialize the hardware status page IRQ location.
+	 */
+
+	I915_WRITE(INSTPM, (1 << 5) | (1 << 21));
+
+	intel_modeset_init(dev);
+
+	drm_helper_initial_config(dev, false);
+
+	dev->devname = kstrdup(DRIVER_NAME, GFP_KERNEL);
+	if (!dev->devname) {
+		ret = -ENOMEM;
+		goto modeset_cleanup;
+	}
+
+	return 0;
+
+modeset_cleanup:
+	intel_modeset_cleanup(dev);
+destroy_ringbuffer:
+	i915_gem_cleanup_ringbuffer(dev);
+out:
+	return ret;
+}
+
 int i915_master_create(struct drm_device *dev, struct drm_master *master)
 {
 	struct drm_i915_master_private *master_priv;
@@ -821,6 +1002,25 @@ void i915_master_destroy(struct drm_device *dev, struct drm_master *master)
 	master->driver_priv = NULL;
 }
 
+
+int i915_driver_firstopen(struct drm_device *dev)
+{
+	if (drm_core_check_feature(dev, DRIVER_MODESET))
+		return 0;
+	return 0;
+}
+
+/**
+ * i915_driver_load - setup chip and create an initial config
+ * @dev: DRM device
+ * @flags: startup flags
+ *
+ * The driver load routine has to do several things:
+ *   - drive output discovery via intel_modeset_init()
+ *   - initialize the memory manager
+ *   - allocate initial config memory
+ *   - setup the DRM framebuffer with the allocated memory
+ */
 int i915_driver_load(struct drm_device *dev, unsigned long flags)
 {
 	struct drm_i915_private *dev_priv = dev->dev_private;
@@ -848,6 +1048,11 @@ int i915_driver_load(struct drm_device *dev, unsigned long flags)
 	size = drm_get_resource_len(dev, mmio_bar);
 
 	dev_priv->regs = ioremap(base, size);
+	if (!dev_priv->regs) {
+		DRM_ERROR("failed to map registers\n");
+		ret = -EIO;
+		goto free_priv;
+	}
 
 #ifdef CONFIG_HIGHMEM64G
 	/* don't enable GEM on PAE - needs agp + set_memory_* interface fixes */
@@ -863,7 +1068,7 @@ int i915_driver_load(struct drm_device *dev, unsigned long flags)
 	if (!I915_NEED_GFX_HWS(dev)) {
 		ret = i915_init_phys_hws(dev);
 		if (ret != 0)
-			return ret;
+			goto out_rmmap;
 	}
 
 	/* On the 945G/GM, the chipset reports the MSI capability on the
@@ -883,6 +1088,7 @@ int i915_driver_load(struct drm_device *dev, unsigned long flags)
 	intel_opregion_init(dev);
 
 	spin_lock_init(&dev_priv->user_irq_lock);
+	dev_priv->user_irq_refcount = 0;
 
 	ret = drm_vblank_init(dev, I915_NUM_PIPE);
 
@@ -891,6 +1097,20 @@ int i915_driver_load(struct drm_device *dev, unsigned long flags)
 		return ret;
 	}
 
+	if (drm_core_check_feature(dev, DRIVER_MODESET)) {
+		ret = i915_load_modeset_init(dev);
+		if (ret < 0) {
+			DRM_ERROR("failed to init modeset\n");
+			goto out_rmmap;
+		}
+	}
+
+	return 0;
+
+out_rmmap:
+	iounmap(dev_priv->regs);
+free_priv:
+	drm_free(dev_priv, sizeof(struct drm_i915_private), DRM_MEM_DRIVER);
 	return ret;
 }
 
@@ -898,16 +1118,29 @@ int i915_driver_unload(struct drm_device *dev)
 {
 	struct drm_i915_private *dev_priv = dev->dev_private;
 
+	if (drm_core_check_feature(dev, DRIVER_MODESET)) {
+		io_mapping_free(dev_priv->mm.gtt_mapping);
+		drm_irq_uninstall(dev);
+	}
+
 	if (dev->pdev->msi_enabled)
 		pci_disable_msi(dev->pdev);
 
-	i915_free_hws(dev);
-
 	if (dev_priv->regs != NULL)
 		iounmap(dev_priv->regs);
 
 	intel_opregion_free(dev);
 
+	if (drm_core_check_feature(dev, DRIVER_MODESET)) {
+		intel_modeset_cleanup(dev);
+
+		mutex_lock(&dev->struct_mutex);
+		i915_gem_cleanup_ringbuffer(dev);
+		mutex_unlock(&dev->struct_mutex);
+		drm_mm_takedown(&dev_priv->vram);
+		i915_gem_lastclose(dev);
+	}
+
 	drm_free(dev->dev_private, sizeof(drm_i915_private_t),
 		 DRM_MEM_DRIVER);
 
@@ -933,12 +1166,26 @@ int i915_driver_open(struct drm_device *dev, struct drm_file *file_priv)
 	return 0;
 }
 
+/**
+ * i915_driver_lastclose - clean up after all DRM clients have exited
+ * @dev: DRM device
+ *
+ * Take care of cleaning up after all DRM clients have exited.  In the
+ * mode setting case, we want to restore the kernel's initial mode (just
+ * in case the last client left us in a bad state).
+ *
+ * Additionally, in the non-mode setting case, we'll tear down the AGP
+ * and DMA structures, since the kernel won't be using them, and clea
+ * up any GEM state.
+ */
 void i915_driver_lastclose(struct drm_device * dev)
 {
 	drm_i915_private_t *dev_priv = dev->dev_private;
 
-	if (!dev_priv)
+	if (!dev_priv || drm_core_check_feature(dev, DRIVER_MODESET)) {
+		intelfb_restore();
 		return;
+	}
 
 	i915_gem_lastclose(dev);
 
@@ -951,7 +1198,8 @@ void i915_driver_lastclose(struct drm_device * dev)
 void i915_driver_preclose(struct drm_device * dev, struct drm_file *file_priv)
 {
 	drm_i915_private_t *dev_priv = dev->dev_private;
-	i915_mem_release(dev, file_priv, dev_priv->agp_heap);
+	if (!drm_core_check_feature(dev, DRIVER_MODESET))
+		i915_mem_release(dev, file_priv, dev_priv->agp_heap);
 }
 
 void i915_driver_postclose(struct drm_device *dev, struct drm_file *file_priv)
diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c
index e0d996ed9026..cbee41c32417 100644
--- a/drivers/gpu/drm/i915/i915_drv.c
+++ b/drivers/gpu/drm/i915/i915_drv.c
@@ -33,11 +33,22 @@
 #include "i915_drv.h"
 
 #include "drm_pciids.h"
+#include <linux/console.h>
+
+unsigned int i915_modeset = -1;
+module_param_named(modeset, i915_modeset, int, 0400);
+
+unsigned int i915_fbpercrtc = 0;
+module_param_named(fbpercrtc, i915_fbpercrtc, int, 0400);
 
 static struct pci_device_id pciidlist[] = {
 	i915_PCI_IDS
 };
 
+#if defined(CONFIG_DRM_I915_KMS)
+MODULE_DEVICE_TABLE(pci, pciidlist);
+#endif
+
 static int i915_suspend(struct drm_device *dev, pm_message_t state)
 {
 	struct drm_i915_private *dev_priv = dev->dev_private;
@@ -148,6 +159,28 @@ static struct drm_driver driver = {
 static int __init i915_init(void)
 {
 	driver.num_ioctls = i915_max_ioctl;
+
+	/*
+	 * If CONFIG_DRM_I915_KMS is set, default to KMS unless
+	 * explicitly disabled with the module pararmeter.
+	 *
+	 * Otherwise, just follow the parameter (defaulting to off).
+	 *
+	 * Allow optional vga_text_mode_force boot option to override
+	 * the default behavior.
+	 */
+#if defined(CONFIG_DRM_I915_KMS)
+	if (i915_modeset != 0)
+		driver.driver_features |= DRIVER_MODESET;
+#endif
+	if (i915_modeset == 1)
+		driver.driver_features |= DRIVER_MODESET;
+
+#ifdef CONFIG_VGA_CONSOLE
+	if (vgacon_text_force() && i915_modeset == -1)
+		driver.driver_features &= ~DRIVER_MODESET;
+#endif
+
 	return drm_init(&driver);
 }
 
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 85a072e80637..6f18ee68d06a 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -31,6 +31,7 @@
 #define _I915_DRV_H_
 
 #include "i915_reg.h"
+#include "intel_bios.h"
 #include <linux/io-mapping.h>
 
 /* General customization:
@@ -152,8 +153,26 @@ typedef struct drm_i915_private {
 	unsigned int sr01, adpa, ppcr, dvob, dvoc, lvds;
 	int vblank_pipe;
 
+	bool cursor_needs_physical;
+
+	struct drm_mm vram;
+
+	int irq_enabled;
+
 	struct intel_opregion opregion;
 
+	/* LVDS info */
+	int backlight_duty_cycle;  /* restore backlight to this value */
+	bool panel_wants_dither;
+	struct drm_display_mode *panel_fixed_mode;
+	struct drm_display_mode *vbt_mode; /* if any */
+
+	/* Feature bits from the VBIOS */
+	int int_tv_support:1;
+	int lvds_dither:1;
+	int lvds_vbt:1;
+	int int_crt_support:1;
+
 	struct drm_i915_fence_reg fence_regs[16]; /* assume 965 */
 	int fence_reg_start; /* 4 if userland hasn't ioctl'd us yet */
 	int num_fence_regs; /* 8 on pre-965, 16 otherwise */
@@ -413,6 +432,10 @@ struct drm_i915_gem_object {
 	 * flags which individual pages are valid.
 	 */
 	uint8_t *page_cpu_valid;
+
+	/** User space pin count and filp owning the pin */
+	uint32_t user_pin_count;
+	struct drm_file *pin_filp;
 };
 
 /**
@@ -442,8 +465,16 @@ struct drm_i915_file_private {
 	} mm;
 };
 
+enum intel_chip_family {
+	CHIP_I8XX = 0x01,
+	CHIP_I9XX = 0x02,
+	CHIP_I915 = 0x04,
+	CHIP_I965 = 0x08,
+};
+
 extern struct drm_ioctl_desc i915_ioctls[];
 extern int i915_max_ioctl;
+extern unsigned int i915_fbpercrtc;
 
 extern int i915_master_create(struct drm_device *dev, struct drm_master *master);
 extern void i915_master_destroy(struct drm_device *dev, struct drm_master *master);
@@ -472,6 +503,7 @@ extern int i915_irq_wait(struct drm_device *dev, void *data,
 			 struct drm_file *file_priv);
 void i915_user_irq_get(struct drm_device *dev);
 void i915_user_irq_put(struct drm_device *dev);
+extern void i915_enable_interrupt (struct drm_device *dev);
 
 extern irqreturn_t i915_driver_irq_handler(DRM_IRQ_ARGS);
 extern void i915_driver_irq_preinstall(struct drm_device * dev);
@@ -556,7 +588,16 @@ uint32_t i915_get_gem_seqno(struct drm_device *dev);
 void i915_gem_retire_requests(struct drm_device *dev);
 void i915_gem_retire_work_handler(struct work_struct *work);
 void i915_gem_clflush_object(struct drm_gem_object *obj);
+int i915_gem_object_set_domain(struct drm_gem_object *obj,
+			       uint32_t read_domains,
+			       uint32_t write_domain);
+int i915_gem_init_ringbuffer(struct drm_device *dev);
+void i915_gem_cleanup_ringbuffer(struct drm_device *dev);
+int i915_gem_do_init(struct drm_device *dev, unsigned long start,
+		     unsigned long end);
 int i915_gem_fault(struct vm_area_struct *vma, struct vm_fault *vmf);
+int i915_gem_object_set_to_gtt_domain(struct drm_gem_object *obj,
+				      int write);
 
 /* i915_gem_tiling.c */
 void i915_gem_detect_bit_6_swizzle(struct drm_device *dev);
@@ -595,6 +636,10 @@ static inline void opregion_asle_intr(struct drm_device *dev) { return; }
 static inline void opregion_enable_asle(struct drm_device *dev) { return; }
 #endif
 
+/* modesetting */
+extern void intel_modeset_init(struct drm_device *dev);
+extern void intel_modeset_cleanup(struct drm_device *dev);
+
 /**
  * Lock test for when it's just for synchronization of ring access.
  *
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 0ac977112f72..c4ccaf3b3c7f 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -30,6 +30,7 @@
 #include "i915_drm.h"
 #include "i915_drv.h"
 #include <linux/swap.h>
+#include <linux/pci.h>
 
 #define I915_GEM_GPU_DOMAINS	(~(I915_GEM_DOMAIN_CPU | I915_GEM_DOMAIN_GTT))
 
@@ -40,8 +41,6 @@ i915_gem_object_set_to_gpu_domain(struct drm_gem_object *obj,
 static void i915_gem_object_flush_gpu_write_domain(struct drm_gem_object *obj);
 static void i915_gem_object_flush_gtt_write_domain(struct drm_gem_object *obj);
 static void i915_gem_object_flush_cpu_write_domain(struct drm_gem_object *obj);
-static int i915_gem_object_set_to_gtt_domain(struct drm_gem_object *obj,
-					     int write);
 static int i915_gem_object_set_to_cpu_domain(struct drm_gem_object *obj,
 					     int write);
 static int i915_gem_object_set_cpu_read_domain_range(struct drm_gem_object *obj,
@@ -57,33 +56,37 @@ static void i915_gem_object_get_fence_reg(struct drm_gem_object *obj);
 static void i915_gem_clear_fence_reg(struct drm_gem_object *obj);
 static int i915_gem_evict_something(struct drm_device *dev);
 
-static void
-i915_gem_cleanup_ringbuffer(struct drm_device *dev);
-
-int
-i915_gem_init_ioctl(struct drm_device *dev, void *data,
-		    struct drm_file *file_priv)
+int i915_gem_do_init(struct drm_device *dev, unsigned long start,
+		     unsigned long end)
 {
 	drm_i915_private_t *dev_priv = dev->dev_private;
-	struct drm_i915_gem_init *args = data;
-
-	mutex_lock(&dev->struct_mutex);
 
-	if (args->gtt_start >= args->gtt_end ||
-	    (args->gtt_start & (PAGE_SIZE - 1)) != 0 ||
-	    (args->gtt_end & (PAGE_SIZE - 1)) != 0) {
-		mutex_unlock(&dev->struct_mutex);
+	if (start >= end ||
+	    (start & (PAGE_SIZE - 1)) != 0 ||
+	    (end & (PAGE_SIZE - 1)) != 0) {
 		return -EINVAL;
 	}
 
-	drm_mm_init(&dev_priv->mm.gtt_space, args->gtt_start,
-	    args->gtt_end - args->gtt_start);
+	drm_mm_init(&dev_priv->mm.gtt_space, start,
+		    end - start);
 
-	dev->gtt_total = (uint32_t) (args->gtt_end - args->gtt_start);
+	dev->gtt_total = (uint32_t) (end - start);
+
+	return 0;
+}
 
+int
+i915_gem_init_ioctl(struct drm_device *dev, void *data,
+		    struct drm_file *file_priv)
+{
+	struct drm_i915_gem_init *args = data;
+	int ret;
+
+	mutex_lock(&dev->struct_mutex);
+	ret = i915_gem_do_init(dev, args->gtt_start, args->gtt_end);
 	mutex_unlock(&dev->struct_mutex);
 
-	return 0;
+	return ret;
 }
 
 int
@@ -1246,7 +1249,8 @@ i915_gem_object_unbind(struct drm_gem_object *obj)
 
 	/* blow away mappings if mapped through GTT */
 	offset = ((loff_t) obj->map_list.hash.key) << PAGE_SHIFT;
-	unmap_mapping_range(dev->dev_mapping, offset, obj->size, 1);
+	if (dev->dev_mapping)
+		unmap_mapping_range(dev->dev_mapping, offset, obj->size, 1);
 
 	if (obj_priv->fence_reg != I915_FENCE_REG_NONE)
 		i915_gem_clear_fence_reg(obj);
@@ -1508,7 +1512,7 @@ static void
 i915_gem_object_get_fence_reg(struct drm_gem_object *obj)
 {
 	struct drm_device *dev = obj->dev;
-	drm_i915_private_t *dev_priv = dev->dev_private;
+	struct drm_i915_private *dev_priv = dev->dev_private;
 	struct drm_i915_gem_object *obj_priv = obj->driver_private;
 	struct drm_i915_fence_reg *reg = NULL;
 	int i, ret;
@@ -1567,8 +1571,9 @@ try_again:
 		 * for this object next time we need it.
 		 */
 		offset = ((loff_t) reg->obj->map_list.hash.key) << PAGE_SHIFT;
-		unmap_mapping_range(dev->dev_mapping, offset,
-				    reg->obj->size, 1);
+		if (dev->dev_mapping)
+			unmap_mapping_range(dev->dev_mapping, offset,
+					    reg->obj->size, 1);
 		old_obj_priv->fence_reg = I915_FENCE_REG_NONE;
 	}
 
@@ -1594,7 +1599,7 @@ static void
 i915_gem_clear_fence_reg(struct drm_gem_object *obj)
 {
 	struct drm_device *dev = obj->dev;
-	struct drm_i915_private *dev_priv = dev->dev_private;
+	drm_i915_private_t *dev_priv = dev->dev_private;
 	struct drm_i915_gem_object *obj_priv = obj->driver_private;
 
 	if (IS_I965G(dev))
@@ -1764,7 +1769,7 @@ i915_gem_object_flush_cpu_write_domain(struct drm_gem_object *obj)
  * This function returns when the move is complete, including waiting on
  * flushes to occur.
  */
-static int
+int
 i915_gem_object_set_to_gtt_domain(struct drm_gem_object *obj, int write)
 {
 	struct drm_i915_gem_object *obj_priv = obj->driver_private;
@@ -2706,11 +2711,22 @@ i915_gem_pin_ioctl(struct drm_device *dev, void *data,
 	}
 	obj_priv = obj->driver_private;
 
-	ret = i915_gem_object_pin(obj, args->alignment);
-	if (ret != 0) {
-		drm_gem_object_unreference(obj);
+	if (obj_priv->pin_filp != NULL && obj_priv->pin_filp != file_priv) {
+		DRM_ERROR("Already pinned in i915_gem_pin_ioctl(): %d\n",
+			  args->handle);
 		mutex_unlock(&dev->struct_mutex);
-		return ret;
+		return -EINVAL;
+	}
+
+	obj_priv->user_pin_count++;
+	obj_priv->pin_filp = file_priv;
+	if (obj_priv->user_pin_count == 1) {
+		ret = i915_gem_object_pin(obj, args->alignment);
+		if (ret != 0) {
+			drm_gem_object_unreference(obj);
+			mutex_unlock(&dev->struct_mutex);
+			return ret;
+		}
 	}
 
 	/* XXX - flush the CPU caches for pinned objects
@@ -2730,6 +2746,7 @@ i915_gem_unpin_ioctl(struct drm_device *dev, void *data,
 {
 	struct drm_i915_gem_pin *args = data;
 	struct drm_gem_object *obj;
+	struct drm_i915_gem_object *obj_priv;
 
 	mutex_lock(&dev->struct_mutex);
 
@@ -2741,7 +2758,19 @@ i915_gem_unpin_ioctl(struct drm_device *dev, void *data,
 		return -EBADF;
 	}
 
-	i915_gem_object_unpin(obj);
+	obj_priv = obj->driver_private;
+	if (obj_priv->pin_filp != file_priv) {
+		DRM_ERROR("Not pinned by caller in i915_gem_pin_ioctl(): %d\n",
+			  args->handle);
+		drm_gem_object_unreference(obj);
+		mutex_unlock(&dev->struct_mutex);
+		return -EINVAL;
+	}
+	obj_priv->user_pin_count--;
+	if (obj_priv->user_pin_count == 0) {
+		obj_priv->pin_filp = NULL;
+		i915_gem_object_unpin(obj);
+	}
 
 	drm_gem_object_unreference(obj);
 	mutex_unlock(&dev->struct_mutex);
@@ -3036,12 +3065,13 @@ i915_gem_init_hws(struct drm_device *dev)
 	return 0;
 }
 
-static int
+int
 i915_gem_init_ringbuffer(struct drm_device *dev)
 {
 	drm_i915_private_t *dev_priv = dev->dev_private;
 	struct drm_gem_object *obj;
 	struct drm_i915_gem_object *obj_priv;
+	drm_i915_ring_buffer_t *ring = &dev_priv->ring;
 	int ret;
 	u32 head;
 
@@ -3063,24 +3093,24 @@ i915_gem_init_ringbuffer(struct drm_device *dev)
 	}
 
 	/* Set up the kernel mapping for the ring. */
-	dev_priv->ring.Size = obj->size;
-	dev_priv->ring.tail_mask = obj->size - 1;
+	ring->Size = obj->size;
+	ring->tail_mask = obj->size - 1;
 
-	dev_priv->ring.map.offset = dev->agp->base + obj_priv->gtt_offset;
-	dev_priv->ring.map.size = obj->size;
-	dev_priv->ring.map.type = 0;
-	dev_priv->ring.map.flags = 0;
-	dev_priv->ring.map.mtrr = 0;
+	ring->map.offset = dev->agp->base + obj_priv->gtt_offset;
+	ring->map.size = obj->size;
+	ring->map.type = 0;
+	ring->map.flags = 0;
+	ring->map.mtrr = 0;
 
-	drm_core_ioremap_wc(&dev_priv->ring.map, dev);
-	if (dev_priv->ring.map.handle == NULL) {
+	drm_core_ioremap_wc(&ring->map, dev);
+	if (ring->map.handle == NULL) {
 		DRM_ERROR("Failed to map ringbuffer.\n");
 		memset(&dev_priv->ring, 0, sizeof(dev_priv->ring));
 		drm_gem_object_unreference(obj);
 		return -EINVAL;
 	}
-	dev_priv->ring.ring_obj = obj;
-	dev_priv->ring.virtual_start = dev_priv->ring.map.handle;
+	ring->ring_obj = obj;
+	ring->virtual_start = ring->map.handle;
 
 	/* Stop the ring if it's running. */
 	I915_WRITE(PRB0_CTL, 0);
@@ -3128,12 +3158,20 @@ i915_gem_init_ringbuffer(struct drm_device *dev)
 	}
 
 	/* Update our cache of the ring state */
-	i915_kernel_lost_context(dev);
+	if (!drm_core_check_feature(dev, DRIVER_MODESET))
+		i915_kernel_lost_context(dev);
+	else {
+		ring->head = I915_READ(PRB0_HEAD) & HEAD_ADDR;
+		ring->tail = I915_READ(PRB0_TAIL) & TAIL_ADDR;
+		ring->space = ring->head - (ring->tail + 8);
+		if (ring->space < 0)
+			ring->space += ring->Size;
+	}
 
 	return 0;
 }
 
-static void
+void
 i915_gem_cleanup_ringbuffer(struct drm_device *dev)
 {
 	drm_i915_private_t *dev_priv = dev->dev_private;
@@ -3171,6 +3209,9 @@ i915_gem_entervt_ioctl(struct drm_device *dev, void *data,
 	drm_i915_private_t *dev_priv = dev->dev_private;
 	int ret;
 
+	if (drm_core_check_feature(dev, DRIVER_MODESET))
+		return 0;
+
 	if (dev_priv->mm.wedged) {
 		DRM_ERROR("Reenabling wedged hardware, good luck\n");
 		dev_priv->mm.wedged = 0;
@@ -3204,6 +3245,9 @@ i915_gem_leavevt_ioctl(struct drm_device *dev, void *data,
 	drm_i915_private_t *dev_priv = dev->dev_private;
 	int ret;
 
+	if (drm_core_check_feature(dev, DRIVER_MODESET))
+		return 0;
+
 	ret = i915_gem_idle(dev);
 	drm_irq_uninstall(dev);
 
diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c
index 9b673d2f912b..0cadafbef411 100644
--- a/drivers/gpu/drm/i915/i915_irq.c
+++ b/drivers/gpu/drm/i915/i915_irq.c
@@ -30,6 +30,7 @@
 #include "drm.h"
 #include "i915_drm.h"
 #include "i915_drv.h"
+#include "intel_drv.h"
 
 #define MAX_NOPID ((u32)~0)
 
@@ -51,6 +52,15 @@
 #define I915_INTERRUPT_ENABLE_MASK (I915_INTERRUPT_ENABLE_FIX | \
 				    I915_INTERRUPT_ENABLE_VAR)
 
+#define I915_PIPE_VBLANK_STATUS	(PIPE_START_VBLANK_INTERRUPT_STATUS |\
+				 PIPE_VBLANK_INTERRUPT_STATUS)
+
+#define I915_PIPE_VBLANK_ENABLE	(PIPE_START_VBLANK_INTERRUPT_ENABLE |\
+				 PIPE_VBLANK_INTERRUPT_ENABLE)
+
+#define DRM_I915_VBLANK_PIPE_ALL	(DRM_I915_VBLANK_PIPE_A | \
+					 DRM_I915_VBLANK_PIPE_B)
+
 void
 i915_enable_irq(drm_i915_private_t *dev_priv, u32 mask)
 {
@@ -201,6 +211,7 @@ irqreturn_t i915_driver_irq_handler(DRM_IRQ_ARGS)
 		spin_lock_irqsave(&dev_priv->user_irq_lock, irqflags);
 		pipea_stats = I915_READ(PIPEASTAT);
 		pipeb_stats = I915_READ(PIPEBSTAT);
+
 		/*
 		 * Clear the PIPE(A|B)STAT regs before the IIR
 		 */
@@ -427,6 +438,14 @@ void i915_disable_vblank(struct drm_device *dev, int pipe)
 	spin_unlock_irqrestore(&dev_priv->user_irq_lock, irqflags);
 }
 
+void i915_enable_interrupt (struct drm_device *dev)
+{
+	struct drm_i915_private *dev_priv = dev->dev_private;
+	opregion_enable_asle(dev);
+	dev_priv->irq_enabled = 1;
+}
+
+
 /* Set the vblank monitor pipe
  */
 int i915_vblank_pipe_set(struct drm_device *dev, void *data,
@@ -487,6 +506,8 @@ void i915_driver_irq_preinstall(struct drm_device * dev)
 {
 	drm_i915_private_t *dev_priv = (drm_i915_private_t *) dev->dev_private;
 
+	atomic_set(&dev_priv->irq_received, 0);
+
 	I915_WRITE(HWSTAM, 0xeffe);
 	I915_WRITE(PIPEASTAT, 0);
 	I915_WRITE(PIPEBSTAT, 0);
diff --git a/drivers/gpu/drm/i915/intel_bios.c b/drivers/gpu/drm/i915/intel_bios.c
new file mode 100644
index 000000000000..4ca82a025525
--- /dev/null
+++ b/drivers/gpu/drm/i915/intel_bios.c
@@ -0,0 +1,193 @@
+/*
+ * Copyright � 2006 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * Authors:
+ *    Eric Anholt <eric@anholt.net>
+ *
+ */
+#include "drmP.h"
+#include "drm.h"
+#include "i915_drm.h"
+#include "i915_drv.h"
+#include "intel_bios.h"
+
+
+static void *
+find_section(struct bdb_header *bdb, int section_id)
+{
+	u8 *base = (u8 *)bdb;
+	int index = 0;
+	u16 total, current_size;
+	u8 current_id;
+
+	/* skip to first section */
+	index += bdb->header_size;
+	total = bdb->bdb_size;
+
+	/* walk the sections looking for section_id */
+	while (index < total) {
+		current_id = *(base + index);
+		index++;
+		current_size = *((u16 *)(base + index));
+		index += 2;
+		if (current_id == section_id)
+			return base + index;
+		index += current_size;
+	}
+
+	return NULL;
+}
+
+/* Try to find panel data */
+static void
+parse_panel_data(struct drm_i915_private *dev_priv, struct bdb_header *bdb)
+{
+	struct bdb_lvds_options *lvds_options;
+	struct bdb_lvds_lfp_data *lvds_lfp_data;
+	struct bdb_lvds_lfp_data_entry *entry;
+	struct lvds_dvo_timing *dvo_timing;
+	struct drm_display_mode *panel_fixed_mode;
+
+	/* Defaults if we can't find VBT info */
+	dev_priv->lvds_dither = 0;
+	dev_priv->lvds_vbt = 0;
+
+	lvds_options = find_section(bdb, BDB_LVDS_OPTIONS);
+	if (!lvds_options)
+		return;
+
+	dev_priv->lvds_dither = lvds_options->pixel_dither;
+	if (lvds_options->panel_type == 0xff)
+		return;
+
+	lvds_lfp_data = find_section(bdb, BDB_LVDS_LFP_DATA);
+	if (!lvds_lfp_data)
+		return;
+
+	dev_priv->lvds_vbt = 1;
+
+	entry = &lvds_lfp_data->data[lvds_options->panel_type];
+	dvo_timing = &entry->dvo_timing;
+
+	panel_fixed_mode = drm_calloc(1, sizeof(*panel_fixed_mode),
+				      DRM_MEM_DRIVER);
+
+	panel_fixed_mode->hdisplay = (dvo_timing->hactive_hi << 8) |
+		dvo_timing->hactive_lo;
+	panel_fixed_mode->hsync_start = panel_fixed_mode->hdisplay +
+		((dvo_timing->hsync_off_hi << 8) | dvo_timing->hsync_off_lo);
+	panel_fixed_mode->hsync_end = panel_fixed_mode->hsync_start +
+		dvo_timing->hsync_pulse_width;
+	panel_fixed_mode->htotal = panel_fixed_mode->hdisplay +
+		((dvo_timing->hblank_hi << 8) | dvo_timing->hblank_lo);
+
+	panel_fixed_mode->vdisplay = (dvo_timing->vactive_hi << 8) |
+		dvo_timing->vactive_lo;
+	panel_fixed_mode->vsync_start = panel_fixed_mode->vdisplay +
+		dvo_timing->vsync_off;
+	panel_fixed_mode->vsync_end = panel_fixed_mode->vsync_start +
+		dvo_timing->vsync_pulse_width;
+	panel_fixed_mode->vtotal = panel_fixed_mode->vdisplay +
+		((dvo_timing->vblank_hi << 8) | dvo_timing->vblank_lo);
+	panel_fixed_mode->clock = dvo_timing->clock * 10;
+	panel_fixed_mode->type = DRM_MODE_TYPE_PREFERRED;
+
+	drm_mode_set_name(panel_fixed_mode);
+
+	dev_priv->vbt_mode = panel_fixed_mode;
+
+	DRM_DEBUG("Found panel mode in BIOS VBT tables:\n");
+	drm_mode_debug_printmodeline(panel_fixed_mode);
+
+	return;
+}
+
+static void
+parse_general_features(struct drm_i915_private *dev_priv,
+		       struct bdb_header *bdb)
+{
+	struct bdb_general_features *general;
+
+	/* Set sensible defaults in case we can't find the general block */
+	dev_priv->int_tv_support = 1;
+	dev_priv->int_crt_support = 1;
+
+	general = find_section(bdb, BDB_GENERAL_FEATURES);
+	if (general) {
+		dev_priv->int_tv_support = general->int_tv_support;
+		dev_priv->int_crt_support = general->int_crt_support;
+	}
+}
+
+/**
+ * intel_init_bios - initialize VBIOS settings & find VBT
+ * @dev: DRM device
+ *
+ * Loads the Video BIOS and checks that the VBT exists.  Sets scratch registers
+ * to appropriate values.
+ *
+ * VBT existence is a sanity check that is relied on by other i830_bios.c code.
+ * Note that it would be better to use a BIOS call to get the VBT, as BIOSes may
+ * feed an updated VBT back through that, compared to what we'll fetch using
+ * this method of groping around in the BIOS data.
+ *
+ * Returns 0 on success, nonzero on failure.
+ */
+bool
+intel_init_bios(struct drm_device *dev)
+{
+	struct drm_i915_private *dev_priv = dev->dev_private;
+	struct pci_dev *pdev = dev->pdev;
+	struct vbt_header *vbt = NULL;
+	struct bdb_header *bdb;
+	u8 __iomem *bios;
+	size_t size;
+	int i;
+
+	bios = pci_map_rom(pdev, &size);
+	if (!bios)
+		return -1;
+
+	/* Scour memory looking for the VBT signature */
+	for (i = 0; i + 4 < size; i++) {
+		if (!memcmp(bios + i, "$VBT", 4)) {
+			vbt = (struct vbt_header *)(bios + i);
+			break;
+		}
+	}
+
+	if (!vbt) {
+		DRM_ERROR("VBT signature missing\n");
+		pci_unmap_rom(pdev, bios);
+		return -1;
+	}
+
+	bdb = (struct bdb_header *)(bios + i + vbt->bdb_offset);
+
+	/* Grab useful general definitions */
+	parse_general_features(dev_priv, bdb);
+	parse_panel_data(dev_priv, bdb);
+
+	pci_unmap_rom(pdev, bios);
+
+	return 0;
+}
diff --git a/drivers/gpu/drm/i915/intel_bios.h b/drivers/gpu/drm/i915/intel_bios.h
new file mode 100644
index 000000000000..5ea715ace3a0
--- /dev/null
+++ b/drivers/gpu/drm/i915/intel_bios.h
@@ -0,0 +1,405 @@
+/*
+ * Copyright � 2006 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * Authors:
+ *    Eric Anholt <eric@anholt.net>
+ *
+ */
+
+#ifndef _I830_BIOS_H_
+#define _I830_BIOS_H_
+
+#include "drmP.h"
+
+struct vbt_header {
+	u8 signature[20];		/**< Always starts with 'VBT$' */
+	u16 version;			/**< decimal */
+	u16 header_size;		/**< in bytes */
+	u16 vbt_size;			/**< in bytes */
+	u8 vbt_checksum;
+	u8 reserved0;
+	u32 bdb_offset;			/**< from beginning of VBT */
+	u32 aim_offset[4];		/**< from beginning of VBT */
+} __attribute__((packed));
+
+struct bdb_header {
+	u8 signature[16];		/**< Always 'BIOS_DATA_BLOCK' */
+	u16 version;			/**< decimal */
+	u16 header_size;		/**< in bytes */
+	u16 bdb_size;			/**< in bytes */
+};
+
+/* strictly speaking, this is a "skip" block, but it has interesting info */
+struct vbios_data {
+	u8 type; /* 0 == desktop, 1 == mobile */
+	u8 relstage;
+	u8 chipset;
+	u8 lvds_present:1;
+	u8 tv_present:1;
+	u8 rsvd2:6; /* finish byte */
+	u8 rsvd3[4];
+	u8 signon[155];
+	u8 copyright[61];
+	u16 code_segment;
+	u8 dos_boot_mode;
+	u8 bandwidth_percent;
+	u8 rsvd4; /* popup memory size */
+	u8 resize_pci_bios;
+	u8 rsvd5; /* is crt already on ddc2 */
+} __attribute__((packed));
+
+/*
+ * There are several types of BIOS data blocks (BDBs), each block has
+ * an ID and size in the first 3 bytes (ID in first, size in next 2).
+ * Known types are listed below.
+ */
+#define BDB_GENERAL_FEATURES	  1
+#define BDB_GENERAL_DEFINITIONS	  2
+#define BDB_OLD_TOGGLE_LIST	  3
+#define BDB_MODE_SUPPORT_LIST	  4
+#define BDB_GENERIC_MODE_TABLE	  5
+#define BDB_EXT_MMIO_REGS	  6
+#define BDB_SWF_IO		  7
+#define BDB_SWF_MMIO		  8
+#define BDB_DOT_CLOCK_TABLE	  9
+#define BDB_MODE_REMOVAL_TABLE	 10
+#define BDB_CHILD_DEVICE_TABLE	 11
+#define BDB_DRIVER_FEATURES	 12
+#define BDB_DRIVER_PERSISTENCE	 13
+#define BDB_EXT_TABLE_PTRS	 14
+#define BDB_DOT_CLOCK_OVERRIDE	 15
+#define BDB_DISPLAY_SELECT	 16
+/* 17 rsvd */
+#define BDB_DRIVER_ROTATION	 18
+#define BDB_DISPLAY_REMOVE	 19
+#define BDB_OEM_CUSTOM		 20
+#define BDB_EFP_LIST		 21 /* workarounds for VGA hsync/vsync */
+#define BDB_SDVO_LVDS_OPTIONS	 22
+#define BDB_SDVO_PANEL_DTDS	 23
+#define BDB_SDVO_LVDS_PNP_IDS	 24
+#define BDB_SDVO_LVDS_POWER_SEQ	 25
+#define BDB_TV_OPTIONS		 26
+#define BDB_LVDS_OPTIONS	 40
+#define BDB_LVDS_LFP_DATA_PTRS	 41
+#define BDB_LVDS_LFP_DATA	 42
+#define BDB_LVDS_BACKLIGHT	 43
+#define BDB_LVDS_POWER		 44
+#define BDB_SKIP		254 /* VBIOS private block, ignore */
+
+struct bdb_general_features {
+        /* bits 1 */
+	u8 panel_fitting:2;
+	u8 flexaim:1;
+	u8 msg_enable:1;
+	u8 clear_screen:3;
+	u8 color_flip:1;
+
+        /* bits 2 */
+	u8 download_ext_vbt:1;
+	u8 enable_ssc:1;
+	u8 ssc_freq:1;
+	u8 enable_lfp_on_override:1;
+	u8 disable_ssc_ddt:1;
+	u8 rsvd8:3; /* finish byte */
+
+        /* bits 3 */
+	u8 disable_smooth_vision:1;
+	u8 single_dvi:1;
+	u8 rsvd9:6; /* finish byte */
+
+        /* bits 4 */
+	u8 legacy_monitor_detect;
+
+        /* bits 5 */
+	u8 int_crt_support:1;
+	u8 int_tv_support:1;
+	u8 rsvd11:6; /* finish byte */
+} __attribute__((packed));
+
+struct bdb_general_definitions {
+	/* DDC GPIO */
+	u8 crt_ddc_gmbus_pin;
+
+	/* DPMS bits */
+	u8 dpms_acpi:1;
+	u8 skip_boot_crt_detect:1;
+	u8 dpms_aim:1;
+	u8 rsvd1:5; /* finish byte */
+
+	/* boot device bits */
+	u8 boot_display[2];
+	u8 child_dev_size;
+
+	/* device info */
+	u8 tv_or_lvds_info[33];
+	u8 dev1[33];
+	u8 dev2[33];
+	u8 dev3[33];
+	u8 dev4[33];
+	/* may be another device block here on some platforms */
+};
+
+struct bdb_lvds_options {
+	u8 panel_type;
+	u8 rsvd1;
+	/* LVDS capabilities, stored in a dword */
+	u8 rsvd2:1;
+	u8 lvds_edid:1;
+	u8 pixel_dither:1;
+	u8 pfit_ratio_auto:1;
+	u8 pfit_gfx_mode_enhanced:1;
+	u8 pfit_text_mode_enhanced:1;
+	u8 pfit_mode:2;
+	u8 rsvd4;
+} __attribute__((packed));
+
+/* LFP pointer table contains entries to the struct below */
+struct bdb_lvds_lfp_data_ptr {
+	u16 fp_timing_offset; /* offsets are from start of bdb */
+	u8 fp_table_size;
+	u16 dvo_timing_offset;
+	u8 dvo_table_size;
+	u16 panel_pnp_id_offset;
+	u8 pnp_table_size;
+} __attribute__((packed));
+
+struct bdb_lvds_lfp_data_ptrs {
+	u8 lvds_entries; /* followed by one or more lvds_data_ptr structs */
+	struct bdb_lvds_lfp_data_ptr ptr[16];
+} __attribute__((packed));
+
+/* LFP data has 3 blocks per entry */
+struct lvds_fp_timing {
+	u16 x_res;
+	u16 y_res;
+	u32 lvds_reg;
+	u32 lvds_reg_val;
+	u32 pp_on_reg;
+	u32 pp_on_reg_val;
+	u32 pp_off_reg;
+	u32 pp_off_reg_val;
+	u32 pp_cycle_reg;
+	u32 pp_cycle_reg_val;
+	u32 pfit_reg;
+	u32 pfit_reg_val;
+	u16 terminator;
+} __attribute__((packed));
+
+struct lvds_dvo_timing {
+	u16 clock;		/**< In 10khz */
+	u8 hactive_lo;
+	u8 hblank_lo;
+	u8 hblank_hi:4;
+	u8 hactive_hi:4;
+	u8 vactive_lo;
+	u8 vblank_lo;
+	u8 vblank_hi:4;
+	u8 vactive_hi:4;
+	u8 hsync_off_lo;
+	u8 hsync_pulse_width;
+	u8 vsync_pulse_width:4;
+	u8 vsync_off:4;
+	u8 rsvd0:6;
+	u8 hsync_off_hi:2;
+	u8 h_image;
+	u8 v_image;
+	u8 max_hv;
+	u8 h_border;
+	u8 v_border;
+	u8 rsvd1:3;
+	u8 digital:2;
+	u8 vsync_positive:1;
+	u8 hsync_positive:1;
+	u8 rsvd2:1;
+} __attribute__((packed));
+
+struct lvds_pnp_id {
+	u16 mfg_name;
+	u16 product_code;
+	u32 serial;
+	u8 mfg_week;
+	u8 mfg_year;
+} __attribute__((packed));
+
+struct bdb_lvds_lfp_data_entry {
+	struct lvds_fp_timing fp_timing;
+	struct lvds_dvo_timing dvo_timing;
+	struct lvds_pnp_id pnp_id;
+} __attribute__((packed));
+
+struct bdb_lvds_lfp_data {
+	struct bdb_lvds_lfp_data_entry data[16];
+} __attribute__((packed));
+
+struct aimdb_header {
+	char signature[16];
+	char oem_device[20];
+	u16 aimdb_version;
+	u16 aimdb_header_size;
+	u16 aimdb_size;
+} __attribute__((packed));
+
+struct aimdb_block {
+	u8 aimdb_id;
+	u16 aimdb_size;
+} __attribute__((packed));
+
+struct vch_panel_data {
+	u16 fp_timing_offset;
+	u8 fp_timing_size;
+	u16 dvo_timing_offset;
+	u8 dvo_timing_size;
+	u16 text_fitting_offset;
+	u8 text_fitting_size;
+	u16 graphics_fitting_offset;
+	u8 graphics_fitting_size;
+} __attribute__((packed));
+
+struct vch_bdb_22 {
+	struct aimdb_block aimdb_block;
+	struct vch_panel_data panels[16];
+} __attribute__((packed));
+
+bool intel_init_bios(struct drm_device *dev);
+
+/*
+ * Driver<->VBIOS interaction occurs through scratch bits in
+ * GR18 & SWF*.
+ */
+
+/* GR18 bits are set on display switch and hotkey events */
+#define GR18_DRIVER_SWITCH_EN	(1<<7) /* 0: VBIOS control, 1: driver control */
+#define GR18_HOTKEY_MASK	0x78 /* See also SWF4 15:0 */
+#define   GR18_HK_NONE		(0x0<<3)
+#define   GR18_HK_LFP_STRETCH	(0x1<<3)
+#define   GR18_HK_TOGGLE_DISP	(0x2<<3)
+#define   GR18_HK_DISP_SWITCH	(0x4<<3) /* see SWF14 15:0 for what to enable */
+#define   GR18_HK_POPUP_DISABLED (0x6<<3)
+#define   GR18_HK_POPUP_ENABLED	(0x7<<3)
+#define   GR18_HK_PFIT		(0x8<<3)
+#define   GR18_HK_APM_CHANGE	(0xa<<3)
+#define   GR18_HK_MULTIPLE	(0xc<<3)
+#define GR18_USER_INT_EN	(1<<2)
+#define GR18_A0000_FLUSH_EN	(1<<1)
+#define GR18_SMM_EN		(1<<0)
+
+/* Set by driver, cleared by VBIOS */
+#define SWF00_YRES_SHIFT	16
+#define SWF00_XRES_SHIFT	0
+#define SWF00_RES_MASK		0xffff
+
+/* Set by VBIOS at boot time and driver at runtime */
+#define SWF01_TV2_FORMAT_SHIFT	8
+#define SWF01_TV1_FORMAT_SHIFT	0
+#define SWF01_TV_FORMAT_MASK	0xffff
+
+#define SWF10_VBIOS_BLC_I2C_EN	(1<<29)
+#define SWF10_GTT_OVERRIDE_EN	(1<<28)
+#define SWF10_LFP_DPMS_OVR	(1<<27) /* override DPMS on display switch */
+#define SWF10_ACTIVE_TOGGLE_LIST_MASK (7<<24)
+#define   SWF10_OLD_TOGGLE	0x0
+#define   SWF10_TOGGLE_LIST_1	0x1
+#define   SWF10_TOGGLE_LIST_2	0x2
+#define   SWF10_TOGGLE_LIST_3	0x3
+#define   SWF10_TOGGLE_LIST_4	0x4
+#define SWF10_PANNING_EN	(1<<23)
+#define SWF10_DRIVER_LOADED	(1<<22)
+#define SWF10_EXTENDED_DESKTOP	(1<<21)
+#define SWF10_EXCLUSIVE_MODE	(1<<20)
+#define SWF10_OVERLAY_EN	(1<<19)
+#define SWF10_PLANEB_HOLDOFF	(1<<18)
+#define SWF10_PLANEA_HOLDOFF	(1<<17)
+#define SWF10_VGA_HOLDOFF	(1<<16)
+#define SWF10_ACTIVE_DISP_MASK	0xffff
+#define   SWF10_PIPEB_LFP2	(1<<15)
+#define   SWF10_PIPEB_EFP2	(1<<14)
+#define   SWF10_PIPEB_TV2	(1<<13)
+#define   SWF10_PIPEB_CRT2	(1<<12)
+#define   SWF10_PIPEB_LFP	(1<<11)
+#define   SWF10_PIPEB_EFP	(1<<10)
+#define   SWF10_PIPEB_TV	(1<<9)
+#define   SWF10_PIPEB_CRT	(1<<8)
+#define   SWF10_PIPEA_LFP2	(1<<7)
+#define   SWF10_PIPEA_EFP2	(1<<6)
+#define   SWF10_PIPEA_TV2	(1<<5)
+#define   SWF10_PIPEA_CRT2	(1<<4)
+#define   SWF10_PIPEA_LFP	(1<<3)
+#define   SWF10_PIPEA_EFP	(1<<2)
+#define   SWF10_PIPEA_TV	(1<<1)
+#define   SWF10_PIPEA_CRT	(1<<0)
+
+#define SWF11_MEMORY_SIZE_SHIFT	16
+#define SWF11_SV_TEST_EN	(1<<15)
+#define SWF11_IS_AGP		(1<<14)
+#define SWF11_DISPLAY_HOLDOFF	(1<<13)
+#define SWF11_DPMS_REDUCED	(1<<12)
+#define SWF11_IS_VBE_MODE	(1<<11)
+#define SWF11_PIPEB_ACCESS	(1<<10) /* 0 here means pipe a */
+#define SWF11_DPMS_MASK		0x07
+#define   SWF11_DPMS_OFF	(1<<2)
+#define   SWF11_DPMS_SUSPEND	(1<<1)
+#define   SWF11_DPMS_STANDBY	(1<<0)
+#define   SWF11_DPMS_ON		0
+
+#define SWF14_GFX_PFIT_EN	(1<<31)
+#define SWF14_TEXT_PFIT_EN	(1<<30)
+#define SWF14_LID_STATUS_CLOSED	(1<<29) /* 0 here means open */
+#define SWF14_POPUP_EN		(1<<28)
+#define SWF14_DISPLAY_HOLDOFF	(1<<27)
+#define SWF14_DISP_DETECT_EN	(1<<26)
+#define SWF14_DOCKING_STATUS_DOCKED (1<<25) /* 0 here means undocked */
+#define SWF14_DRIVER_STATUS	(1<<24)
+#define SWF14_OS_TYPE_WIN9X	(1<<23)
+#define SWF14_OS_TYPE_WINNT	(1<<22)
+/* 21:19 rsvd */
+#define SWF14_PM_TYPE_MASK	0x00070000
+#define   SWF14_PM_ACPI_VIDEO	(0x4 << 16)
+#define   SWF14_PM_ACPI		(0x3 << 16)
+#define   SWF14_PM_APM_12	(0x2 << 16)
+#define   SWF14_PM_APM_11	(0x1 << 16)
+#define SWF14_HK_REQUEST_MASK	0x0000ffff /* see GR18 6:3 for event type */
+          /* if GR18 indicates a display switch */
+#define   SWF14_DS_PIPEB_LFP2_EN (1<<15)
+#define   SWF14_DS_PIPEB_EFP2_EN (1<<14)
+#define   SWF14_DS_PIPEB_TV2_EN  (1<<13)
+#define   SWF14_DS_PIPEB_CRT2_EN (1<<12)
+#define   SWF14_DS_PIPEB_LFP_EN  (1<<11)
+#define   SWF14_DS_PIPEB_EFP_EN  (1<<10)
+#define   SWF14_DS_PIPEB_TV_EN   (1<<9)
+#define   SWF14_DS_PIPEB_CRT_EN  (1<<8)
+#define   SWF14_DS_PIPEA_LFP2_EN (1<<7)
+#define   SWF14_DS_PIPEA_EFP2_EN (1<<6)
+#define   SWF14_DS_PIPEA_TV2_EN  (1<<5)
+#define   SWF14_DS_PIPEA_CRT2_EN (1<<4)
+#define   SWF14_DS_PIPEA_LFP_EN  (1<<3)
+#define   SWF14_DS_PIPEA_EFP_EN  (1<<2)
+#define   SWF14_DS_PIPEA_TV_EN   (1<<1)
+#define   SWF14_DS_PIPEA_CRT_EN  (1<<0)
+          /* if GR18 indicates a panel fitting request */
+#define   SWF14_PFIT_EN		(1<<0) /* 0 means disable */
+          /* if GR18 indicates an APM change request */
+#define   SWF14_APM_HIBERNATE	0x4
+#define   SWF14_APM_SUSPEND	0x3
+#define   SWF14_APM_STANDBY	0x1
+#define   SWF14_APM_RESTORE	0x0
+
+#endif /* _I830_BIOS_H_ */
diff --git a/drivers/gpu/drm/i915/intel_crt.c b/drivers/gpu/drm/i915/intel_crt.c
new file mode 100644
index 000000000000..5d9c94eb7055
--- /dev/null
+++ b/drivers/gpu/drm/i915/intel_crt.c
@@ -0,0 +1,284 @@
+/*
+ * Copyright © 2006-2007 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ *
+ * Authors:
+ *	Eric Anholt <eric@anholt.net>
+ */
+
+#include <linux/i2c.h>
+#include "drmP.h"
+#include "drm.h"
+#include "drm_crtc.h"
+#include "drm_crtc_helper.h"
+#include "intel_drv.h"
+#include "i915_drm.h"
+#include "i915_drv.h"
+
+static void intel_crt_dpms(struct drm_encoder *encoder, int mode)
+{
+	struct drm_device *dev = encoder->dev;
+	struct drm_i915_private *dev_priv = dev->dev_private;
+	u32 temp;
+
+	temp = I915_READ(ADPA);
+	temp &= ~(ADPA_HSYNC_CNTL_DISABLE | ADPA_VSYNC_CNTL_DISABLE);
+	temp &= ~ADPA_DAC_ENABLE;
+
+	switch(mode) {
+	case DRM_MODE_DPMS_ON:
+		temp |= ADPA_DAC_ENABLE;
+		break;
+	case DRM_MODE_DPMS_STANDBY:
+		temp |= ADPA_DAC_ENABLE | ADPA_HSYNC_CNTL_DISABLE;
+		break;
+	case DRM_MODE_DPMS_SUSPEND:
+		temp |= ADPA_DAC_ENABLE | ADPA_VSYNC_CNTL_DISABLE;
+		break;
+	case DRM_MODE_DPMS_OFF:
+		temp |= ADPA_HSYNC_CNTL_DISABLE | ADPA_VSYNC_CNTL_DISABLE;
+		break;
+	}
+
+	I915_WRITE(ADPA, temp);
+}
+
+static int intel_crt_mode_valid(struct drm_connector *connector,
+				struct drm_display_mode *mode)
+{
+	if (mode->flags & DRM_MODE_FLAG_DBLSCAN)
+		return MODE_NO_DBLESCAN;
+
+	if (mode->clock > 400000 || mode->clock < 25000)
+		return MODE_CLOCK_RANGE;
+
+	return MODE_OK;
+}
+
+static bool intel_crt_mode_fixup(struct drm_encoder *encoder,
+				 struct drm_display_mode *mode,
+				 struct drm_display_mode *adjusted_mode)
+{
+	return true;
+}
+
+static void intel_crt_mode_set(struct drm_encoder *encoder,
+			       struct drm_display_mode *mode,
+			       struct drm_display_mode *adjusted_mode)
+{
+
+	struct drm_device *dev = encoder->dev;
+	struct drm_crtc *crtc = encoder->crtc;
+	struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
+	struct drm_i915_private *dev_priv = dev->dev_private;
+	int dpll_md_reg;
+	u32 adpa, dpll_md;
+
+	if (intel_crtc->pipe == 0)
+		dpll_md_reg = DPLL_A_MD;
+	else
+		dpll_md_reg = DPLL_B_MD;
+
+	/*
+	 * Disable separate mode multiplier used when cloning SDVO to CRT
+	 * XXX this needs to be adjusted when we really are cloning
+	 */
+	if (IS_I965G(dev)) {
+		dpll_md = I915_READ(dpll_md_reg);
+		I915_WRITE(dpll_md_reg,
+			   dpll_md & ~DPLL_MD_UDI_MULTIPLIER_MASK);
+	}
+
+	adpa = 0;
+	if (adjusted_mode->flags & DRM_MODE_FLAG_PHSYNC)
+		adpa |= ADPA_HSYNC_ACTIVE_HIGH;
+	if (adjusted_mode->flags & DRM_MODE_FLAG_PVSYNC)
+		adpa |= ADPA_VSYNC_ACTIVE_HIGH;
+
+	if (intel_crtc->pipe == 0)
+		adpa |= ADPA_PIPE_A_SELECT;
+	else
+		adpa |= ADPA_PIPE_B_SELECT;
+
+	I915_WRITE(ADPA, adpa);
+}
+
+/**
+ * Uses CRT_HOTPLUG_EN and CRT_HOTPLUG_STAT to detect CRT presence.
+ *
+ * Not for i915G/i915GM
+ *
+ * \return true if CRT is connected.
+ * \return false if CRT is disconnected.
+ */
+static bool intel_crt_detect_hotplug(struct drm_connector *connector)
+{
+	struct drm_device *dev = connector->dev;
+	struct drm_i915_private *dev_priv = dev->dev_private;
+	u32 temp;
+
+	unsigned long timeout = jiffies + msecs_to_jiffies(1000);
+
+	temp = I915_READ(PORT_HOTPLUG_EN);
+
+	I915_WRITE(PORT_HOTPLUG_EN,
+		   temp | CRT_HOTPLUG_FORCE_DETECT | (1 << 5));
+
+	do {
+		if (!(I915_READ(PORT_HOTPLUG_EN) & CRT_HOTPLUG_FORCE_DETECT))
+			break;
+		msleep(1);
+	} while (time_after(timeout, jiffies));
+
+	if ((I915_READ(PORT_HOTPLUG_STAT) & CRT_HOTPLUG_MONITOR_MASK) ==
+	    CRT_HOTPLUG_MONITOR_COLOR)
+		return true;
+
+	return false;
+}
+
+static bool intel_crt_detect_ddc(struct drm_connector *connector)
+{
+	struct intel_output *intel_output = to_intel_output(connector);
+
+	/* CRT should always be at 0, but check anyway */
+	if (intel_output->type != INTEL_OUTPUT_ANALOG)
+		return false;
+
+	return intel_ddc_probe(intel_output);
+}
+
+static enum drm_connector_status intel_crt_detect(struct drm_connector *connector)
+{
+	struct drm_device *dev = connector->dev;
+
+	if (IS_I9XX(dev) && !IS_I915G(dev) && !IS_I915GM(dev)) {
+		if (intel_crt_detect_hotplug(connector))
+			return connector_status_connected;
+		else
+			return connector_status_disconnected;
+	}
+
+	if (intel_crt_detect_ddc(connector))
+		return connector_status_connected;
+
+	/* TODO use load detect */
+	return connector_status_unknown;
+}
+
+static void intel_crt_destroy(struct drm_connector *connector)
+{
+	struct intel_output *intel_output = to_intel_output(connector);
+
+	intel_i2c_destroy(intel_output->ddc_bus);
+	drm_sysfs_connector_remove(connector);
+	drm_connector_cleanup(connector);
+	kfree(connector);
+}
+
+static int intel_crt_get_modes(struct drm_connector *connector)
+{
+	struct intel_output *intel_output = to_intel_output(connector);
+	return intel_ddc_get_modes(intel_output);
+}
+
+static int intel_crt_set_property(struct drm_connector *connector,
+				  struct drm_property *property,
+				  uint64_t value)
+{
+	struct drm_device *dev = connector->dev;
+
+	if (property == dev->mode_config.dpms_property && connector->encoder)
+		intel_crt_dpms(connector->encoder, (uint32_t)(value & 0xf));
+
+	return 0;
+}
+
+/*
+ * Routines for controlling stuff on the analog port
+ */
+
+static const struct drm_encoder_helper_funcs intel_crt_helper_funcs = {
+	.dpms = intel_crt_dpms,
+	.mode_fixup = intel_crt_mode_fixup,
+	.prepare = intel_encoder_prepare,
+	.commit = intel_encoder_commit,
+	.mode_set = intel_crt_mode_set,
+};
+
+static const struct drm_connector_funcs intel_crt_connector_funcs = {
+	.detect = intel_crt_detect,
+	.fill_modes = drm_helper_probe_single_connector_modes,
+	.destroy = intel_crt_destroy,
+	.set_property = intel_crt_set_property,
+};
+
+static const struct drm_connector_helper_funcs intel_crt_connector_helper_funcs = {
+	.mode_valid = intel_crt_mode_valid,
+	.get_modes = intel_crt_get_modes,
+	.best_encoder = intel_best_encoder,
+};
+
+void intel_crt_enc_destroy(struct drm_encoder *encoder)
+{
+	drm_encoder_cleanup(encoder);
+}
+
+static const struct drm_encoder_funcs intel_crt_enc_funcs = {
+	.destroy = intel_crt_enc_destroy,
+};
+
+void intel_crt_init(struct drm_device *dev)
+{
+	struct drm_connector *connector;
+	struct intel_output *intel_output;
+
+	intel_output = kzalloc(sizeof(struct intel_output), GFP_KERNEL);
+	if (!intel_output)
+		return;
+
+	connector = &intel_output->base;
+	drm_connector_init(dev, &intel_output->base,
+			   &intel_crt_connector_funcs, DRM_MODE_CONNECTOR_VGA);
+
+	drm_encoder_init(dev, &intel_output->enc, &intel_crt_enc_funcs,
+			 DRM_MODE_ENCODER_DAC);
+
+	drm_mode_connector_attach_encoder(&intel_output->base,
+					  &intel_output->enc);
+
+	/* Set up the DDC bus. */
+	intel_output->ddc_bus = intel_i2c_create(dev, GPIOA, "CRTDDC_A");
+	if (!intel_output->ddc_bus) {
+		dev_printk(KERN_ERR, &dev->pdev->dev, "DDC bus registration "
+			   "failed.\n");
+		return;
+	}
+
+	intel_output->type = INTEL_OUTPUT_ANALOG;
+	connector->interlace_allowed = 0;
+	connector->doublescan_allowed = 0;
+
+	drm_encoder_helper_add(&intel_output->enc, &intel_crt_helper_funcs);
+	drm_connector_helper_add(connector, &intel_crt_connector_helper_funcs);
+
+	drm_sysfs_connector_add(connector);
+}
diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c
new file mode 100644
index 000000000000..96c2da5b74e3
--- /dev/null
+++ b/drivers/gpu/drm/i915/intel_display.c
@@ -0,0 +1,1621 @@
+/*
+ * Copyright © 2006-2007 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ *
+ * Authors:
+ *	Eric Anholt <eric@anholt.net>
+ */
+
+#include <linux/i2c.h>
+#include "drmP.h"
+#include "intel_drv.h"
+#include "i915_drm.h"
+#include "i915_drv.h"
+
+#include "drm_crtc_helper.h"
+
+bool intel_pipe_has_type (struct drm_crtc *crtc, int type);
+
+typedef struct {
+    /* given values */
+    int n;
+    int m1, m2;
+    int p1, p2;
+    /* derived values */
+    int	dot;
+    int	vco;
+    int	m;
+    int	p;
+} intel_clock_t;
+
+typedef struct {
+    int	min, max;
+} intel_range_t;
+
+typedef struct {
+    int	dot_limit;
+    int	p2_slow, p2_fast;
+} intel_p2_t;
+
+#define INTEL_P2_NUM		      2
+
+typedef struct {
+    intel_range_t   dot, vco, n, m, m1, m2, p, p1;
+    intel_p2_t	    p2;
+} intel_limit_t;
+
+#define I8XX_DOT_MIN		  25000
+#define I8XX_DOT_MAX		 350000
+#define I8XX_VCO_MIN		 930000
+#define I8XX_VCO_MAX		1400000
+#define I8XX_N_MIN		      3
+#define I8XX_N_MAX		     16
+#define I8XX_M_MIN		     96
+#define I8XX_M_MAX		    140
+#define I8XX_M1_MIN		     18
+#define I8XX_M1_MAX		     26
+#define I8XX_M2_MIN		      6
+#define I8XX_M2_MAX		     16
+#define I8XX_P_MIN		      4
+#define I8XX_P_MAX		    128
+#define I8XX_P1_MIN		      2
+#define I8XX_P1_MAX		     33
+#define I8XX_P1_LVDS_MIN	      1
+#define I8XX_P1_LVDS_MAX	      6
+#define I8XX_P2_SLOW		      4
+#define I8XX_P2_FAST		      2
+#define I8XX_P2_LVDS_SLOW	      14
+#define I8XX_P2_LVDS_FAST	      14 /* No fast option */
+#define I8XX_P2_SLOW_LIMIT	 165000
+
+#define I9XX_DOT_MIN		  20000
+#define I9XX_DOT_MAX		 400000
+#define I9XX_VCO_MIN		1400000
+#define I9XX_VCO_MAX		2800000
+#define I9XX_N_MIN		      3
+#define I9XX_N_MAX		      8
+#define I9XX_M_MIN		     70
+#define I9XX_M_MAX		    120
+#define I9XX_M1_MIN		     10
+#define I9XX_M1_MAX		     20
+#define I9XX_M2_MIN		      5
+#define I9XX_M2_MAX		      9
+#define I9XX_P_SDVO_DAC_MIN	      5
+#define I9XX_P_SDVO_DAC_MAX	     80
+#define I9XX_P_LVDS_MIN		      7
+#define I9XX_P_LVDS_MAX		     98
+#define I9XX_P1_MIN		      1
+#define I9XX_P1_MAX		      8
+#define I9XX_P2_SDVO_DAC_SLOW		     10
+#define I9XX_P2_SDVO_DAC_FAST		      5
+#define I9XX_P2_SDVO_DAC_SLOW_LIMIT	 200000
+#define I9XX_P2_LVDS_SLOW		     14
+#define I9XX_P2_LVDS_FAST		      7
+#define I9XX_P2_LVDS_SLOW_LIMIT		 112000
+
+#define INTEL_LIMIT_I8XX_DVO_DAC    0
+#define INTEL_LIMIT_I8XX_LVDS	    1
+#define INTEL_LIMIT_I9XX_SDVO_DAC   2
+#define INTEL_LIMIT_I9XX_LVDS	    3
+
+static const intel_limit_t intel_limits[] = {
+    { /* INTEL_LIMIT_I8XX_DVO_DAC */
+        .dot = { .min = I8XX_DOT_MIN,		.max = I8XX_DOT_MAX },
+        .vco = { .min = I8XX_VCO_MIN,		.max = I8XX_VCO_MAX },
+        .n   = { .min = I8XX_N_MIN,		.max = I8XX_N_MAX },
+        .m   = { .min = I8XX_M_MIN,		.max = I8XX_M_MAX },
+        .m1  = { .min = I8XX_M1_MIN,		.max = I8XX_M1_MAX },
+        .m2  = { .min = I8XX_M2_MIN,		.max = I8XX_M2_MAX },
+        .p   = { .min = I8XX_P_MIN,		.max = I8XX_P_MAX },
+        .p1  = { .min = I8XX_P1_MIN,		.max = I8XX_P1_MAX },
+	.p2  = { .dot_limit = I8XX_P2_SLOW_LIMIT,
+		 .p2_slow = I8XX_P2_SLOW,	.p2_fast = I8XX_P2_FAST },
+    },
+    { /* INTEL_LIMIT_I8XX_LVDS */
+        .dot = { .min = I8XX_DOT_MIN,		.max = I8XX_DOT_MAX },
+        .vco = { .min = I8XX_VCO_MIN,		.max = I8XX_VCO_MAX },
+        .n   = { .min = I8XX_N_MIN,		.max = I8XX_N_MAX },
+        .m   = { .min = I8XX_M_MIN,		.max = I8XX_M_MAX },
+        .m1  = { .min = I8XX_M1_MIN,		.max = I8XX_M1_MAX },
+        .m2  = { .min = I8XX_M2_MIN,		.max = I8XX_M2_MAX },
+        .p   = { .min = I8XX_P_MIN,		.max = I8XX_P_MAX },
+        .p1  = { .min = I8XX_P1_LVDS_MIN,	.max = I8XX_P1_LVDS_MAX },
+	.p2  = { .dot_limit = I8XX_P2_SLOW_LIMIT,
+		 .p2_slow = I8XX_P2_LVDS_SLOW,	.p2_fast = I8XX_P2_LVDS_FAST },
+    },
+    { /* INTEL_LIMIT_I9XX_SDVO_DAC */
+        .dot = { .min = I9XX_DOT_MIN,		.max = I9XX_DOT_MAX },
+        .vco = { .min = I9XX_VCO_MIN,		.max = I9XX_VCO_MAX },
+        .n   = { .min = I9XX_N_MIN,		.max = I9XX_N_MAX },
+        .m   = { .min = I9XX_M_MIN,		.max = I9XX_M_MAX },
+        .m1  = { .min = I9XX_M1_MIN,		.max = I9XX_M1_MAX },
+        .m2  = { .min = I9XX_M2_MIN,		.max = I9XX_M2_MAX },
+        .p   = { .min = I9XX_P_SDVO_DAC_MIN,	.max = I9XX_P_SDVO_DAC_MAX },
+        .p1  = { .min = I9XX_P1_MIN,		.max = I9XX_P1_MAX },
+	.p2  = { .dot_limit = I9XX_P2_SDVO_DAC_SLOW_LIMIT,
+		 .p2_slow = I9XX_P2_SDVO_DAC_SLOW,	.p2_fast = I9XX_P2_SDVO_DAC_FAST },
+    },
+    { /* INTEL_LIMIT_I9XX_LVDS */
+        .dot = { .min = I9XX_DOT_MIN,		.max = I9XX_DOT_MAX },
+        .vco = { .min = I9XX_VCO_MIN,		.max = I9XX_VCO_MAX },
+        .n   = { .min = I9XX_N_MIN,		.max = I9XX_N_MAX },
+        .m   = { .min = I9XX_M_MIN,		.max = I9XX_M_MAX },
+        .m1  = { .min = I9XX_M1_MIN,		.max = I9XX_M1_MAX },
+        .m2  = { .min = I9XX_M2_MIN,		.max = I9XX_M2_MAX },
+        .p   = { .min = I9XX_P_LVDS_MIN,	.max = I9XX_P_LVDS_MAX },
+        .p1  = { .min = I9XX_P1_MIN,		.max = I9XX_P1_MAX },
+	/* The single-channel range is 25-112Mhz, and dual-channel
+	 * is 80-224Mhz.  Prefer single channel as much as possible.
+	 */
+	.p2  = { .dot_limit = I9XX_P2_LVDS_SLOW_LIMIT,
+		 .p2_slow = I9XX_P2_LVDS_SLOW,	.p2_fast = I9XX_P2_LVDS_FAST },
+    },
+};
+
+static const intel_limit_t *intel_limit(struct drm_crtc *crtc)
+{
+	struct drm_device *dev = crtc->dev;
+	const intel_limit_t *limit;
+
+	if (IS_I9XX(dev)) {
+		if (intel_pipe_has_type(crtc, INTEL_OUTPUT_LVDS))
+			limit = &intel_limits[INTEL_LIMIT_I9XX_LVDS];
+		else
+			limit = &intel_limits[INTEL_LIMIT_I9XX_SDVO_DAC];
+	} else {
+		if (intel_pipe_has_type(crtc, INTEL_OUTPUT_LVDS))
+			limit = &intel_limits[INTEL_LIMIT_I8XX_LVDS];
+		else
+			limit = &intel_limits[INTEL_LIMIT_I8XX_DVO_DAC];
+	}
+	return limit;
+}
+
+/** Derive the pixel clock for the given refclk and divisors for 8xx chips. */
+
+static void i8xx_clock(int refclk, intel_clock_t *clock)
+{
+	clock->m = 5 * (clock->m1 + 2) + (clock->m2 + 2);
+	clock->p = clock->p1 * clock->p2;
+	clock->vco = refclk * clock->m / (clock->n + 2);
+	clock->dot = clock->vco / clock->p;
+}
+
+/** Derive the pixel clock for the given refclk and divisors for 9xx chips. */
+
+static void i9xx_clock(int refclk, intel_clock_t *clock)
+{
+	clock->m = 5 * (clock->m1 + 2) + (clock->m2 + 2);
+	clock->p = clock->p1 * clock->p2;
+	clock->vco = refclk * clock->m / (clock->n + 2);
+	clock->dot = clock->vco / clock->p;
+}
+
+static void intel_clock(struct drm_device *dev, int refclk,
+			intel_clock_t *clock)
+{
+	if (IS_I9XX(dev))
+		return i9xx_clock (refclk, clock);
+	else
+		return i8xx_clock (refclk, clock);
+}
+
+/**
+ * Returns whether any output on the specified pipe is of the specified type
+ */
+bool intel_pipe_has_type (struct drm_crtc *crtc, int type)
+{
+    struct drm_device *dev = crtc->dev;
+    struct drm_mode_config *mode_config = &dev->mode_config;
+    struct drm_connector *l_entry;
+
+    list_for_each_entry(l_entry, &mode_config->connector_list, head) {
+	    if (l_entry->encoder &&
+	        l_entry->encoder->crtc == crtc) {
+		    struct intel_output *intel_output = to_intel_output(l_entry);
+		    if (intel_output->type == type)
+			    return true;
+	    }
+    }
+    return false;
+}
+
+#define INTELPllInvalid(s)   { /* ErrorF (s) */; return false; }
+/**
+ * Returns whether the given set of divisors are valid for a given refclk with
+ * the given connectors.
+ */
+
+static bool intel_PLL_is_valid(struct drm_crtc *crtc, intel_clock_t *clock)
+{
+	const intel_limit_t *limit = intel_limit (crtc);
+
+	if (clock->p1  < limit->p1.min  || limit->p1.max  < clock->p1)
+		INTELPllInvalid ("p1 out of range\n");
+	if (clock->p   < limit->p.min   || limit->p.max   < clock->p)
+		INTELPllInvalid ("p out of range\n");
+	if (clock->m2  < limit->m2.min  || limit->m2.max  < clock->m2)
+		INTELPllInvalid ("m2 out of range\n");
+	if (clock->m1  < limit->m1.min  || limit->m1.max  < clock->m1)
+		INTELPllInvalid ("m1 out of range\n");
+	if (clock->m1 <= clock->m2)
+		INTELPllInvalid ("m1 <= m2\n");
+	if (clock->m   < limit->m.min   || limit->m.max   < clock->m)
+		INTELPllInvalid ("m out of range\n");
+	if (clock->n   < limit->n.min   || limit->n.max   < clock->n)
+		INTELPllInvalid ("n out of range\n");
+	if (clock->vco < limit->vco.min || limit->vco.max < clock->vco)
+		INTELPllInvalid ("vco out of range\n");
+	/* XXX: We may need to be checking "Dot clock" depending on the multiplier,
+	 * connector, etc., rather than just a single range.
+	 */
+	if (clock->dot < limit->dot.min || limit->dot.max < clock->dot)
+		INTELPllInvalid ("dot out of range\n");
+
+	return true;
+}
+
+/**
+ * Returns a set of divisors for the desired target clock with the given
+ * refclk, or FALSE.  The returned values represent the clock equation:
+ * reflck * (5 * (m1 + 2) + (m2 + 2)) / (n + 2) / p1 / p2.
+ */
+static bool intel_find_best_PLL(struct drm_crtc *crtc, int target,
+				int refclk, intel_clock_t *best_clock)
+{
+	struct drm_device *dev = crtc->dev;
+	struct drm_i915_private *dev_priv = dev->dev_private;
+	intel_clock_t clock;
+	const intel_limit_t *limit = intel_limit(crtc);
+	int err = target;
+
+	if (IS_I9XX(dev) && intel_pipe_has_type(crtc, INTEL_OUTPUT_LVDS) &&
+	    (I915_READ(LVDS) & LVDS_PORT_EN) != 0) {
+		/*
+		 * For LVDS, if the panel is on, just rely on its current
+		 * settings for dual-channel.  We haven't figured out how to
+		 * reliably set up different single/dual channel state, if we
+		 * even can.
+		 */
+		if ((I915_READ(LVDS) & LVDS_CLKB_POWER_MASK) ==
+		    LVDS_CLKB_POWER_UP)
+			clock.p2 = limit->p2.p2_fast;
+		else
+			clock.p2 = limit->p2.p2_slow;
+	} else {
+		if (target < limit->p2.dot_limit)
+			clock.p2 = limit->p2.p2_slow;
+		else
+			clock.p2 = limit->p2.p2_fast;
+	}
+
+	memset (best_clock, 0, sizeof (*best_clock));
+
+	for (clock.m1 = limit->m1.min; clock.m1 <= limit->m1.max; clock.m1++) {
+		for (clock.m2 = limit->m2.min; clock.m2 < clock.m1 &&
+			     clock.m2 <= limit->m2.max; clock.m2++) {
+			for (clock.n = limit->n.min; clock.n <= limit->n.max;
+			     clock.n++) {
+				for (clock.p1 = limit->p1.min;
+				     clock.p1 <= limit->p1.max; clock.p1++) {
+					int this_err;
+
+					intel_clock(dev, refclk, &clock);
+
+					if (!intel_PLL_is_valid(crtc, &clock))
+						continue;
+
+					this_err = abs(clock.dot - target);
+					if (this_err < err) {
+						*best_clock = clock;
+						err = this_err;
+					}
+				}
+			}
+		}
+	}
+
+	return (err != target);
+}
+
+void
+intel_wait_for_vblank(struct drm_device *dev)
+{
+	/* Wait for 20ms, i.e. one cycle at 50hz. */
+	udelay(20000);
+}
+
+void
+intel_pipe_set_base(struct drm_crtc *crtc, int x, int y)
+{
+	struct drm_device *dev = crtc->dev;
+	struct drm_i915_private *dev_priv = dev->dev_private;
+	struct drm_i915_master_private *master_priv;
+	struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
+	struct intel_framebuffer *intel_fb;
+	struct drm_i915_gem_object *obj_priv;
+	struct drm_gem_object *obj;
+	int pipe = intel_crtc->pipe;
+	unsigned long Start, Offset;
+	int dspbase = (pipe == 0 ? DSPAADDR : DSPBADDR);
+	int dspsurf = (pipe == 0 ? DSPASURF : DSPBSURF);
+	int dspstride = (pipe == 0) ? DSPASTRIDE : DSPBSTRIDE;
+	int dspcntr_reg = (pipe == 0) ? DSPACNTR : DSPBCNTR;
+	u32 dspcntr;
+
+	/* no fb bound */
+	if (!crtc->fb) {
+		DRM_DEBUG("No FB bound\n");
+		return;
+	}
+
+	intel_fb = to_intel_framebuffer(crtc->fb);
+
+	obj = intel_fb->obj;
+	obj_priv = obj->driver_private;
+
+	Start = obj_priv->gtt_offset;
+	Offset = y * crtc->fb->pitch + x * (crtc->fb->bits_per_pixel / 8);
+
+	I915_WRITE(dspstride, crtc->fb->pitch);
+
+	dspcntr = I915_READ(dspcntr_reg);
+	switch (crtc->fb->bits_per_pixel) {
+	case 8:
+		dspcntr |= DISPPLANE_8BPP;
+		break;
+	case 16:
+		if (crtc->fb->depth == 15)
+			dspcntr |= DISPPLANE_15_16BPP;
+		else
+			dspcntr |= DISPPLANE_16BPP;
+		break;
+	case 24:
+	case 32:
+		dspcntr |= DISPPLANE_32BPP_NO_ALPHA;
+		break;
+	default:
+		DRM_ERROR("Unknown color depth\n");
+		return;
+	}
+	I915_WRITE(dspcntr_reg, dspcntr);
+
+	DRM_DEBUG("Writing base %08lX %08lX %d %d\n", Start, Offset, x, y);
+	if (IS_I965G(dev)) {
+		I915_WRITE(dspbase, Offset);
+		I915_READ(dspbase);
+		I915_WRITE(dspsurf, Start);
+		I915_READ(dspsurf);
+	} else {
+		I915_WRITE(dspbase, Start + Offset);
+		I915_READ(dspbase);
+	}
+
+
+	if (!dev->primary->master)
+		return;
+
+	master_priv = dev->primary->master->driver_priv;
+	if (!master_priv->sarea_priv)
+		return;
+
+	switch (pipe) {
+	case 0:
+		master_priv->sarea_priv->pipeA_x = x;
+		master_priv->sarea_priv->pipeA_y = y;
+		break;
+	case 1:
+		master_priv->sarea_priv->pipeB_x = x;
+		master_priv->sarea_priv->pipeB_y = y;
+		break;
+	default:
+		DRM_ERROR("Can't update pipe %d in SAREA\n", pipe);
+		break;
+	}
+}
+
+
+
+/**
+ * Sets the power management mode of the pipe and plane.
+ *
+ * This code should probably grow support for turning the cursor off and back
+ * on appropriately at the same time as we're turning the pipe off/on.
+ */
+static void intel_crtc_dpms(struct drm_crtc *crtc, int mode)
+{
+	struct drm_device *dev = crtc->dev;
+	struct drm_i915_master_private *master_priv;
+	struct drm_i915_private *dev_priv = dev->dev_private;
+	struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
+	int pipe = intel_crtc->pipe;
+	int dpll_reg = (pipe == 0) ? DPLL_A : DPLL_B;
+	int dspcntr_reg = (pipe == 0) ? DSPACNTR : DSPBCNTR;
+	int dspbase_reg = (pipe == 0) ? DSPAADDR : DSPBADDR;
+	int pipeconf_reg = (pipe == 0) ? PIPEACONF : PIPEBCONF;
+	u32 temp;
+	bool enabled;
+
+	/* XXX: When our outputs are all unaware of DPMS modes other than off
+	 * and on, we should map those modes to DRM_MODE_DPMS_OFF in the CRTC.
+	 */
+	switch (mode) {
+	case DRM_MODE_DPMS_ON:
+	case DRM_MODE_DPMS_STANDBY:
+	case DRM_MODE_DPMS_SUSPEND:
+		/* Enable the DPLL */
+		temp = I915_READ(dpll_reg);
+		if ((temp & DPLL_VCO_ENABLE) == 0) {
+			I915_WRITE(dpll_reg, temp);
+			I915_READ(dpll_reg);
+			/* Wait for the clocks to stabilize. */
+			udelay(150);
+			I915_WRITE(dpll_reg, temp | DPLL_VCO_ENABLE);
+			I915_READ(dpll_reg);
+			/* Wait for the clocks to stabilize. */
+			udelay(150);
+			I915_WRITE(dpll_reg, temp | DPLL_VCO_ENABLE);
+			I915_READ(dpll_reg);
+			/* Wait for the clocks to stabilize. */
+			udelay(150);
+		}
+
+		/* Enable the pipe */
+		temp = I915_READ(pipeconf_reg);
+		if ((temp & PIPEACONF_ENABLE) == 0)
+			I915_WRITE(pipeconf_reg, temp | PIPEACONF_ENABLE);
+
+		/* Enable the plane */
+		temp = I915_READ(dspcntr_reg);
+		if ((temp & DISPLAY_PLANE_ENABLE) == 0) {
+			I915_WRITE(dspcntr_reg, temp | DISPLAY_PLANE_ENABLE);
+			/* Flush the plane changes */
+			I915_WRITE(dspbase_reg, I915_READ(dspbase_reg));
+		}
+
+		intel_crtc_load_lut(crtc);
+
+		/* Give the overlay scaler a chance to enable if it's on this pipe */
+		//intel_crtc_dpms_video(crtc, true); TODO
+	break;
+	case DRM_MODE_DPMS_OFF:
+		/* Give the overlay scaler a chance to disable if it's on this pipe */
+		//intel_crtc_dpms_video(crtc, FALSE); TODO
+
+		/* Disable the VGA plane that we never use */
+		I915_WRITE(VGACNTRL, VGA_DISP_DISABLE);
+
+		/* Disable display plane */
+		temp = I915_READ(dspcntr_reg);
+		if ((temp & DISPLAY_PLANE_ENABLE) != 0) {
+			I915_WRITE(dspcntr_reg, temp & ~DISPLAY_PLANE_ENABLE);
+			/* Flush the plane changes */
+			I915_WRITE(dspbase_reg, I915_READ(dspbase_reg));
+			I915_READ(dspbase_reg);
+		}
+
+		if (!IS_I9XX(dev)) {
+			/* Wait for vblank for the disable to take effect */
+			intel_wait_for_vblank(dev);
+		}
+
+		/* Next, disable display pipes */
+		temp = I915_READ(pipeconf_reg);
+		if ((temp & PIPEACONF_ENABLE) != 0) {
+			I915_WRITE(pipeconf_reg, temp & ~PIPEACONF_ENABLE);
+			I915_READ(pipeconf_reg);
+		}
+
+		/* Wait for vblank for the disable to take effect. */
+		intel_wait_for_vblank(dev);
+
+		temp = I915_READ(dpll_reg);
+		if ((temp & DPLL_VCO_ENABLE) != 0) {
+			I915_WRITE(dpll_reg, temp & ~DPLL_VCO_ENABLE);
+			I915_READ(dpll_reg);
+		}
+
+		/* Wait for the clocks to turn off. */
+		udelay(150);
+		break;
+	}
+
+	if (!dev->primary->master)
+		return;
+
+	master_priv = dev->primary->master->driver_priv;
+	if (!master_priv->sarea_priv)
+		return;
+
+	enabled = crtc->enabled && mode != DRM_MODE_DPMS_OFF;
+
+	switch (pipe) {
+	case 0:
+		master_priv->sarea_priv->pipeA_w = enabled ? crtc->mode.hdisplay : 0;
+		master_priv->sarea_priv->pipeA_h = enabled ? crtc->mode.vdisplay : 0;
+		break;
+	case 1:
+		master_priv->sarea_priv->pipeB_w = enabled ? crtc->mode.hdisplay : 0;
+		master_priv->sarea_priv->pipeB_h = enabled ? crtc->mode.vdisplay : 0;
+		break;
+	default:
+		DRM_ERROR("Can't update pipe %d in SAREA\n", pipe);
+		break;
+	}
+
+	intel_crtc->dpms_mode = mode;
+}
+
+static void intel_crtc_prepare (struct drm_crtc *crtc)
+{
+	struct drm_crtc_helper_funcs *crtc_funcs = crtc->helper_private;
+	crtc_funcs->dpms(crtc, DRM_MODE_DPMS_OFF);
+}
+
+static void intel_crtc_commit (struct drm_crtc *crtc)
+{
+	struct drm_crtc_helper_funcs *crtc_funcs = crtc->helper_private;
+	crtc_funcs->dpms(crtc, DRM_MODE_DPMS_ON);
+}
+
+void intel_encoder_prepare (struct drm_encoder *encoder)
+{
+	struct drm_encoder_helper_funcs *encoder_funcs = encoder->helper_private;
+	/* lvds has its own version of prepare see intel_lvds_prepare */
+	encoder_funcs->dpms(encoder, DRM_MODE_DPMS_OFF);
+}
+
+void intel_encoder_commit (struct drm_encoder *encoder)
+{
+	struct drm_encoder_helper_funcs *encoder_funcs = encoder->helper_private;
+	/* lvds has its own version of commit see intel_lvds_commit */
+	encoder_funcs->dpms(encoder, DRM_MODE_DPMS_ON);
+}
+
+static bool intel_crtc_mode_fixup(struct drm_crtc *crtc,
+				  struct drm_display_mode *mode,
+				  struct drm_display_mode *adjusted_mode)
+{
+	return true;
+}
+
+
+/** Returns the core display clock speed for i830 - i945 */
+static int intel_get_core_clock_speed(struct drm_device *dev)
+{
+
+	/* Core clock values taken from the published datasheets.
+	 * The 830 may go up to 166 Mhz, which we should check.
+	 */
+	if (IS_I945G(dev))
+		return 400000;
+	else if (IS_I915G(dev))
+		return 333000;
+	else if (IS_I945GM(dev) || IS_845G(dev))
+		return 200000;
+	else if (IS_I915GM(dev)) {
+		u16 gcfgc = 0;
+
+		pci_read_config_word(dev->pdev, GCFGC, &gcfgc);
+
+		if (gcfgc & GC_LOW_FREQUENCY_ENABLE)
+			return 133000;
+		else {
+			switch (gcfgc & GC_DISPLAY_CLOCK_MASK) {
+			case GC_DISPLAY_CLOCK_333_MHZ:
+				return 333000;
+			default:
+			case GC_DISPLAY_CLOCK_190_200_MHZ:
+				return 190000;
+			}
+		}
+	} else if (IS_I865G(dev))
+		return 266000;
+	else if (IS_I855(dev)) {
+		u16 hpllcc = 0;
+		/* Assume that the hardware is in the high speed state.  This
+		 * should be the default.
+		 */
+		switch (hpllcc & GC_CLOCK_CONTROL_MASK) {
+		case GC_CLOCK_133_200:
+		case GC_CLOCK_100_200:
+			return 200000;
+		case GC_CLOCK_166_250:
+			return 250000;
+		case GC_CLOCK_100_133:
+			return 133000;
+		}
+	} else /* 852, 830 */
+		return 133000;
+
+	return 0; /* Silence gcc warning */
+}
+
+
+/**
+ * Return the pipe currently connected to the panel fitter,
+ * or -1 if the panel fitter is not present or not in use
+ */
+static int intel_panel_fitter_pipe (struct drm_device *dev)
+{
+	struct drm_i915_private *dev_priv = dev->dev_private;
+	u32  pfit_control;
+
+	/* i830 doesn't have a panel fitter */
+	if (IS_I830(dev))
+		return -1;
+
+	pfit_control = I915_READ(PFIT_CONTROL);
+
+	/* See if the panel fitter is in use */
+	if ((pfit_control & PFIT_ENABLE) == 0)
+		return -1;
+
+	/* 965 can place panel fitter on either pipe */
+	if (IS_I965G(dev))
+		return (pfit_control >> 29) & 0x3;
+
+	/* older chips can only use pipe 1 */
+	return 1;
+}
+
+static void intel_crtc_mode_set(struct drm_crtc *crtc,
+				struct drm_display_mode *mode,
+				struct drm_display_mode *adjusted_mode,
+				int x, int y)
+{
+	struct drm_device *dev = crtc->dev;
+	struct drm_i915_private *dev_priv = dev->dev_private;
+	struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
+	int pipe = intel_crtc->pipe;
+	int fp_reg = (pipe == 0) ? FPA0 : FPB0;
+	int dpll_reg = (pipe == 0) ? DPLL_A : DPLL_B;
+	int dpll_md_reg = (intel_crtc->pipe == 0) ? DPLL_A_MD : DPLL_B_MD;
+	int dspcntr_reg = (pipe == 0) ? DSPACNTR : DSPBCNTR;
+	int pipeconf_reg = (pipe == 0) ? PIPEACONF : PIPEBCONF;
+	int htot_reg = (pipe == 0) ? HTOTAL_A : HTOTAL_B;
+	int hblank_reg = (pipe == 0) ? HBLANK_A : HBLANK_B;
+	int hsync_reg = (pipe == 0) ? HSYNC_A : HSYNC_B;
+	int vtot_reg = (pipe == 0) ? VTOTAL_A : VTOTAL_B;
+	int vblank_reg = (pipe == 0) ? VBLANK_A : VBLANK_B;
+	int vsync_reg = (pipe == 0) ? VSYNC_A : VSYNC_B;
+	int dspsize_reg = (pipe == 0) ? DSPASIZE : DSPBSIZE;
+	int dsppos_reg = (pipe == 0) ? DSPAPOS : DSPBPOS;
+	int pipesrc_reg = (pipe == 0) ? PIPEASRC : PIPEBSRC;
+	int refclk;
+	intel_clock_t clock;
+	u32 dpll = 0, fp = 0, dspcntr, pipeconf;
+	bool ok, is_sdvo = false, is_dvo = false;
+	bool is_crt = false, is_lvds = false, is_tv = false;
+	struct drm_mode_config *mode_config = &dev->mode_config;
+	struct drm_connector *connector;
+
+	drm_vblank_pre_modeset(dev, pipe);
+
+	list_for_each_entry(connector, &mode_config->connector_list, head) {
+		struct intel_output *intel_output = to_intel_output(connector);
+
+		if (!connector->encoder || connector->encoder->crtc != crtc)
+			continue;
+
+		switch (intel_output->type) {
+		case INTEL_OUTPUT_LVDS:
+			is_lvds = true;
+			break;
+		case INTEL_OUTPUT_SDVO:
+			is_sdvo = true;
+			break;
+		case INTEL_OUTPUT_DVO:
+			is_dvo = true;
+			break;
+		case INTEL_OUTPUT_TVOUT:
+			is_tv = true;
+			break;
+		case INTEL_OUTPUT_ANALOG:
+			is_crt = true;
+			break;
+		}
+	}
+
+	if (IS_I9XX(dev)) {
+		refclk = 96000;
+	} else {
+		refclk = 48000;
+	}
+
+	ok = intel_find_best_PLL(crtc, adjusted_mode->clock, refclk, &clock);
+	if (!ok) {
+		DRM_ERROR("Couldn't find PLL settings for mode!\n");
+		return;
+	}
+
+	fp = clock.n << 16 | clock.m1 << 8 | clock.m2;
+
+	dpll = DPLL_VGA_MODE_DIS;
+	if (IS_I9XX(dev)) {
+		if (is_lvds)
+			dpll |= DPLLB_MODE_LVDS;
+		else
+			dpll |= DPLLB_MODE_DAC_SERIAL;
+		if (is_sdvo) {
+			dpll |= DPLL_DVO_HIGH_SPEED;
+			if (IS_I945G(dev) || IS_I945GM(dev)) {
+				int sdvo_pixel_multiply = adjusted_mode->clock / mode->clock;
+				dpll |= (sdvo_pixel_multiply - 1) << SDVO_MULTIPLIER_SHIFT_HIRES;
+			}
+		}
+
+		/* compute bitmask from p1 value */
+		dpll |= (1 << (clock.p1 - 1)) << 16;
+		switch (clock.p2) {
+		case 5:
+			dpll |= DPLL_DAC_SERIAL_P2_CLOCK_DIV_5;
+			break;
+		case 7:
+			dpll |= DPLLB_LVDS_P2_CLOCK_DIV_7;
+			break;
+		case 10:
+			dpll |= DPLL_DAC_SERIAL_P2_CLOCK_DIV_10;
+			break;
+		case 14:
+			dpll |= DPLLB_LVDS_P2_CLOCK_DIV_14;
+			break;
+		}
+		if (IS_I965G(dev))
+			dpll |= (6 << PLL_LOAD_PULSE_PHASE_SHIFT);
+	} else {
+		if (is_lvds) {
+			dpll |= (1 << (clock.p1 - 1)) << DPLL_FPA01_P1_POST_DIV_SHIFT;
+		} else {
+			if (clock.p1 == 2)
+				dpll |= PLL_P1_DIVIDE_BY_TWO;
+			else
+				dpll |= (clock.p1 - 2) << DPLL_FPA01_P1_POST_DIV_SHIFT;
+			if (clock.p2 == 4)
+				dpll |= PLL_P2_DIVIDE_BY_4;
+		}
+	}
+
+	if (is_tv) {
+		/* XXX: just matching BIOS for now */
+/*	dpll |= PLL_REF_INPUT_TVCLKINBC; */
+		dpll |= 3;
+	}
+	else
+		dpll |= PLL_REF_INPUT_DREFCLK;
+
+	/* setup pipeconf */
+	pipeconf = I915_READ(pipeconf_reg);
+
+	/* Set up the display plane register */
+	dspcntr = DISPPLANE_GAMMA_ENABLE;
+
+	if (pipe == 0)
+		dspcntr |= DISPPLANE_SEL_PIPE_A;
+	else
+		dspcntr |= DISPPLANE_SEL_PIPE_B;
+
+	if (pipe == 0 && !IS_I965G(dev)) {
+		/* Enable pixel doubling when the dot clock is > 90% of the (display)
+		 * core speed.
+		 *
+		 * XXX: No double-wide on 915GM pipe B. Is that the only reason for the
+		 * pipe == 0 check?
+		 */
+		if (mode->clock > intel_get_core_clock_speed(dev) * 9 / 10)
+			pipeconf |= PIPEACONF_DOUBLE_WIDE;
+		else
+			pipeconf &= ~PIPEACONF_DOUBLE_WIDE;
+	}
+
+	dspcntr |= DISPLAY_PLANE_ENABLE;
+	pipeconf |= PIPEACONF_ENABLE;
+	dpll |= DPLL_VCO_ENABLE;
+
+
+	/* Disable the panel fitter if it was on our pipe */
+	if (intel_panel_fitter_pipe(dev) == pipe)
+		I915_WRITE(PFIT_CONTROL, 0);
+
+	DRM_DEBUG("Mode for pipe %c:\n", pipe == 0 ? 'A' : 'B');
+	drm_mode_debug_printmodeline(mode);
+
+
+	if (dpll & DPLL_VCO_ENABLE) {
+		I915_WRITE(fp_reg, fp);
+		I915_WRITE(dpll_reg, dpll & ~DPLL_VCO_ENABLE);
+		I915_READ(dpll_reg);
+		udelay(150);
+	}
+
+	/* The LVDS pin pair needs to be on before the DPLLs are enabled.
+	 * This is an exception to the general rule that mode_set doesn't turn
+	 * things on.
+	 */
+	if (is_lvds) {
+		u32 lvds = I915_READ(LVDS);
+
+		lvds |= LVDS_PORT_EN | LVDS_A0A2_CLKA_POWER_UP | LVDS_PIPEB_SELECT;
+		/* Set the B0-B3 data pairs corresponding to whether we're going to
+		 * set the DPLLs for dual-channel mode or not.
+		 */
+		if (clock.p2 == 7)
+			lvds |= LVDS_B0B3_POWER_UP | LVDS_CLKB_POWER_UP;
+		else
+			lvds &= ~(LVDS_B0B3_POWER_UP | LVDS_CLKB_POWER_UP);
+
+		/* It would be nice to set 24 vs 18-bit mode (LVDS_A3_POWER_UP)
+		 * appropriately here, but we need to look more thoroughly into how
+		 * panels behave in the two modes.
+		 */
+
+		I915_WRITE(LVDS, lvds);
+		I915_READ(LVDS);
+	}
+
+	I915_WRITE(fp_reg, fp);
+	I915_WRITE(dpll_reg, dpll);
+	I915_READ(dpll_reg);
+	/* Wait for the clocks to stabilize. */
+	udelay(150);
+
+	if (IS_I965G(dev)) {
+		int sdvo_pixel_multiply = adjusted_mode->clock / mode->clock;
+		I915_WRITE(dpll_md_reg, (0 << DPLL_MD_UDI_DIVIDER_SHIFT) |
+			   ((sdvo_pixel_multiply - 1) << DPLL_MD_UDI_MULTIPLIER_SHIFT));
+	} else {
+		/* write it again -- the BIOS does, after all */
+		I915_WRITE(dpll_reg, dpll);
+	}
+	I915_READ(dpll_reg);
+	/* Wait for the clocks to stabilize. */
+	udelay(150);
+
+	I915_WRITE(htot_reg, (adjusted_mode->crtc_hdisplay - 1) |
+		   ((adjusted_mode->crtc_htotal - 1) << 16));
+	I915_WRITE(hblank_reg, (adjusted_mode->crtc_hblank_start - 1) |
+		   ((adjusted_mode->crtc_hblank_end - 1) << 16));
+	I915_WRITE(hsync_reg, (adjusted_mode->crtc_hsync_start - 1) |
+		   ((adjusted_mode->crtc_hsync_end - 1) << 16));
+	I915_WRITE(vtot_reg, (adjusted_mode->crtc_vdisplay - 1) |
+		   ((adjusted_mode->crtc_vtotal - 1) << 16));
+	I915_WRITE(vblank_reg, (adjusted_mode->crtc_vblank_start - 1) |
+		   ((adjusted_mode->crtc_vblank_end - 1) << 16));
+	I915_WRITE(vsync_reg, (adjusted_mode->crtc_vsync_start - 1) |
+		   ((adjusted_mode->crtc_vsync_end - 1) << 16));
+	/* pipesrc and dspsize control the size that is scaled from, which should
+	 * always be the user's requested size.
+	 */
+	I915_WRITE(dspsize_reg, ((mode->vdisplay - 1) << 16) | (mode->hdisplay - 1));
+	I915_WRITE(dsppos_reg, 0);
+	I915_WRITE(pipesrc_reg, ((mode->hdisplay - 1) << 16) | (mode->vdisplay - 1));
+	I915_WRITE(pipeconf_reg, pipeconf);
+	I915_READ(pipeconf_reg);
+
+	intel_wait_for_vblank(dev);
+
+	I915_WRITE(dspcntr_reg, dspcntr);
+
+	/* Flush the plane changes */
+	intel_pipe_set_base(crtc, x, y);
+
+	intel_wait_for_vblank(dev);
+
+	drm_vblank_post_modeset(dev, pipe);
+}
+
+/** Loads the palette/gamma unit for the CRTC with the prepared values */
+void intel_crtc_load_lut(struct drm_crtc *crtc)
+{
+	struct drm_device *dev = crtc->dev;
+	struct drm_i915_private *dev_priv = dev->dev_private;
+	struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
+	int palreg = (intel_crtc->pipe == 0) ? PALETTE_A : PALETTE_B;
+	int i;
+
+	/* The clocks have to be on to load the palette. */
+	if (!crtc->enabled)
+		return;
+
+	for (i = 0; i < 256; i++) {
+		I915_WRITE(palreg + 4 * i,
+			   (intel_crtc->lut_r[i] << 16) |
+			   (intel_crtc->lut_g[i] << 8) |
+			   intel_crtc->lut_b[i]);
+	}
+}
+
+static int intel_crtc_cursor_set(struct drm_crtc *crtc,
+				 struct drm_file *file_priv,
+				 uint32_t handle,
+				 uint32_t width, uint32_t height)
+{
+	struct drm_device *dev = crtc->dev;
+	struct drm_i915_private *dev_priv = dev->dev_private;
+	struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
+	struct drm_gem_object *bo;
+	struct drm_i915_gem_object *obj_priv;
+	int pipe = intel_crtc->pipe;
+	uint32_t control = (pipe == 0) ? CURACNTR : CURBCNTR;
+	uint32_t base = (pipe == 0) ? CURABASE : CURBBASE;
+	uint32_t temp;
+	size_t addr;
+
+	DRM_DEBUG("\n");
+
+	/* if we want to turn off the cursor ignore width and height */
+	if (!handle) {
+		DRM_DEBUG("cursor off\n");
+		/* turn of the cursor */
+		temp = 0;
+		temp |= CURSOR_MODE_DISABLE;
+
+		I915_WRITE(control, temp);
+		I915_WRITE(base, 0);
+		return 0;
+	}
+
+	/* Currently we only support 64x64 cursors */
+	if (width != 64 || height != 64) {
+		DRM_ERROR("we currently only support 64x64 cursors\n");
+		return -EINVAL;
+	}
+
+	bo = drm_gem_object_lookup(dev, file_priv, handle);
+	if (!bo)
+		return -ENOENT;
+
+	obj_priv = bo->driver_private;
+
+	if (bo->size < width * height * 4) {
+		DRM_ERROR("buffer is to small\n");
+		drm_gem_object_unreference(bo);
+		return -ENOMEM;
+	}
+
+	if (dev_priv->cursor_needs_physical) {
+		addr = dev->agp->base + obj_priv->gtt_offset;
+	} else {
+		addr = obj_priv->gtt_offset;
+	}
+
+	intel_crtc->cursor_addr = addr;
+	temp = 0;
+	/* set the pipe for the cursor */
+	temp |= (pipe << 28);
+	temp |= CURSOR_MODE_64_ARGB_AX | MCURSOR_GAMMA_ENABLE;
+
+	I915_WRITE(control, temp);
+	I915_WRITE(base, addr);
+
+	return 0;
+}
+
+static int intel_crtc_cursor_move(struct drm_crtc *crtc, int x, int y)
+{
+	struct drm_device *dev = crtc->dev;
+	struct drm_i915_private *dev_priv = dev->dev_private;
+	struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
+	int pipe = intel_crtc->pipe;
+	uint32_t temp = 0;
+	uint32_t adder;
+
+	if (x < 0) {
+		temp |= (CURSOR_POS_SIGN << CURSOR_X_SHIFT);
+		x = -x;
+	}
+	if (y < 0) {
+		temp |= (CURSOR_POS_SIGN << CURSOR_Y_SHIFT);
+		y = -y;
+	}
+
+	temp |= ((x & CURSOR_POS_MASK) << CURSOR_X_SHIFT);
+	temp |= ((y & CURSOR_POS_MASK) << CURSOR_Y_SHIFT);
+
+	adder = intel_crtc->cursor_addr;
+	I915_WRITE((pipe == 0) ? CURAPOS : CURBPOS, temp);
+	I915_WRITE((pipe == 0) ? CURABASE : CURBBASE, adder);
+
+	return 0;
+}
+
+/** Sets the color ramps on behalf of RandR */
+void intel_crtc_fb_gamma_set(struct drm_crtc *crtc, u16 red, u16 green,
+				 u16 blue, int regno)
+{
+	struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
+
+	intel_crtc->lut_r[regno] = red >> 8;
+	intel_crtc->lut_g[regno] = green >> 8;
+	intel_crtc->lut_b[regno] = blue >> 8;
+}
+
+static void intel_crtc_gamma_set(struct drm_crtc *crtc, u16 *red, u16 *green,
+				 u16 *blue, uint32_t size)
+{
+	struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
+	int i;
+
+	if (size != 256)
+		return;
+
+	for (i = 0; i < 256; i++) {
+		intel_crtc->lut_r[i] = red[i] >> 8;
+		intel_crtc->lut_g[i] = green[i] >> 8;
+		intel_crtc->lut_b[i] = blue[i] >> 8;
+	}
+
+	intel_crtc_load_lut(crtc);
+}
+
+/**
+ * Get a pipe with a simple mode set on it for doing load-based monitor
+ * detection.
+ *
+ * It will be up to the load-detect code to adjust the pipe as appropriate for
+ * its requirements.  The pipe will be connected to no other outputs.
+ *
+ * Currently this code will only succeed if there is a pipe with no outputs
+ * configured for it.  In the future, it could choose to temporarily disable
+ * some outputs to free up a pipe for its use.
+ *
+ * \return crtc, or NULL if no pipes are available.
+ */
+
+/* VESA 640x480x72Hz mode to set on the pipe */
+static struct drm_display_mode load_detect_mode = {
+	DRM_MODE("640x480", DRM_MODE_TYPE_DEFAULT, 31500, 640, 664,
+		 704, 832, 0, 480, 489, 491, 520, 0, DRM_MODE_FLAG_NHSYNC | DRM_MODE_FLAG_NVSYNC),
+};
+
+struct drm_crtc *intel_get_load_detect_pipe(struct intel_output *intel_output,
+					    struct drm_display_mode *mode,
+					    int *dpms_mode)
+{
+	struct intel_crtc *intel_crtc;
+	struct drm_crtc *possible_crtc;
+	struct drm_crtc *supported_crtc =NULL;
+	struct drm_encoder *encoder = &intel_output->enc;
+	struct drm_crtc *crtc = NULL;
+	struct drm_device *dev = encoder->dev;
+	struct drm_encoder_helper_funcs *encoder_funcs = encoder->helper_private;
+	struct drm_crtc_helper_funcs *crtc_funcs;
+	int i = -1;
+
+	/*
+	 * Algorithm gets a little messy:
+	 *   - if the connector already has an assigned crtc, use it (but make
+	 *     sure it's on first)
+	 *   - try to find the first unused crtc that can drive this connector,
+	 *     and use that if we find one
+	 *   - if there are no unused crtcs available, try to use the first
+	 *     one we found that supports the connector
+	 */
+
+	/* See if we already have a CRTC for this connector */
+	if (encoder->crtc) {
+		crtc = encoder->crtc;
+		/* Make sure the crtc and connector are running */
+		intel_crtc = to_intel_crtc(crtc);
+		*dpms_mode = intel_crtc->dpms_mode;
+		if (intel_crtc->dpms_mode != DRM_MODE_DPMS_ON) {
+			crtc_funcs = crtc->helper_private;
+			crtc_funcs->dpms(crtc, DRM_MODE_DPMS_ON);
+			encoder_funcs->dpms(encoder, DRM_MODE_DPMS_ON);
+		}
+		return crtc;
+	}
+
+	/* Find an unused one (if possible) */
+	list_for_each_entry(possible_crtc, &dev->mode_config.crtc_list, head) {
+		i++;
+		if (!(encoder->possible_crtcs & (1 << i)))
+			continue;
+		if (!possible_crtc->enabled) {
+			crtc = possible_crtc;
+			break;
+		}
+		if (!supported_crtc)
+			supported_crtc = possible_crtc;
+	}
+
+	/*
+	 * If we didn't find an unused CRTC, don't use any.
+	 */
+	if (!crtc) {
+		return NULL;
+	}
+
+	encoder->crtc = crtc;
+	intel_output->load_detect_temp = true;
+
+	intel_crtc = to_intel_crtc(crtc);
+	*dpms_mode = intel_crtc->dpms_mode;
+
+	if (!crtc->enabled) {
+		if (!mode)
+			mode = &load_detect_mode;
+		drm_crtc_helper_set_mode(crtc, mode, 0, 0);
+	} else {
+		if (intel_crtc->dpms_mode != DRM_MODE_DPMS_ON) {
+			crtc_funcs = crtc->helper_private;
+			crtc_funcs->dpms(crtc, DRM_MODE_DPMS_ON);
+		}
+
+		/* Add this connector to the crtc */
+		encoder_funcs->mode_set(encoder, &crtc->mode, &crtc->mode);
+		encoder_funcs->commit(encoder);
+	}
+	/* let the connector get through one full cycle before testing */
+	intel_wait_for_vblank(dev);
+
+	return crtc;
+}
+
+void intel_release_load_detect_pipe(struct intel_output *intel_output, int dpms_mode)
+{
+	struct drm_encoder *encoder = &intel_output->enc;
+	struct drm_device *dev = encoder->dev;
+	struct drm_crtc *crtc = encoder->crtc;
+	struct drm_encoder_helper_funcs *encoder_funcs = encoder->helper_private;
+	struct drm_crtc_helper_funcs *crtc_funcs = crtc->helper_private;
+
+	if (intel_output->load_detect_temp) {
+		encoder->crtc = NULL;
+		intel_output->load_detect_temp = false;
+		crtc->enabled = drm_helper_crtc_in_use(crtc);
+		drm_helper_disable_unused_functions(dev);
+	}
+
+	/* Switch crtc and output back off if necessary */
+	if (crtc->enabled && dpms_mode != DRM_MODE_DPMS_ON) {
+		if (encoder->crtc == crtc)
+			encoder_funcs->dpms(encoder, dpms_mode);
+		crtc_funcs->dpms(crtc, dpms_mode);
+	}
+}
+
+/* Returns the clock of the currently programmed mode of the given pipe. */
+static int intel_crtc_clock_get(struct drm_device *dev, struct drm_crtc *crtc)
+{
+	struct drm_i915_private *dev_priv = dev->dev_private;
+	struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
+	int pipe = intel_crtc->pipe;
+	u32 dpll = I915_READ((pipe == 0) ? DPLL_A : DPLL_B);
+	u32 fp;
+	intel_clock_t clock;
+
+	if ((dpll & DISPLAY_RATE_SELECT_FPA1) == 0)
+		fp = I915_READ((pipe == 0) ? FPA0 : FPB0);
+	else
+		fp = I915_READ((pipe == 0) ? FPA1 : FPB1);
+
+	clock.m1 = (fp & FP_M1_DIV_MASK) >> FP_M1_DIV_SHIFT;
+	clock.m2 = (fp & FP_M2_DIV_MASK) >> FP_M2_DIV_SHIFT;
+	clock.n = (fp & FP_N_DIV_MASK) >> FP_N_DIV_SHIFT;
+	if (IS_I9XX(dev)) {
+		clock.p1 = ffs((dpll & DPLL_FPA01_P1_POST_DIV_MASK) >>
+			       DPLL_FPA01_P1_POST_DIV_SHIFT);
+
+		switch (dpll & DPLL_MODE_MASK) {
+		case DPLLB_MODE_DAC_SERIAL:
+			clock.p2 = dpll & DPLL_DAC_SERIAL_P2_CLOCK_DIV_5 ?
+				5 : 10;
+			break;
+		case DPLLB_MODE_LVDS:
+			clock.p2 = dpll & DPLLB_LVDS_P2_CLOCK_DIV_7 ?
+				7 : 14;
+			break;
+		default:
+			DRM_DEBUG("Unknown DPLL mode %08x in programmed "
+				  "mode\n", (int)(dpll & DPLL_MODE_MASK));
+			return 0;
+		}
+
+		/* XXX: Handle the 100Mhz refclk */
+		i9xx_clock(96000, &clock);
+	} else {
+		bool is_lvds = (pipe == 1) && (I915_READ(LVDS) & LVDS_PORT_EN);
+
+		if (is_lvds) {
+			clock.p1 = ffs((dpll & DPLL_FPA01_P1_POST_DIV_MASK_I830_LVDS) >>
+				       DPLL_FPA01_P1_POST_DIV_SHIFT);
+			clock.p2 = 14;
+
+			if ((dpll & PLL_REF_INPUT_MASK) ==
+			    PLLB_REF_INPUT_SPREADSPECTRUMIN) {
+				/* XXX: might not be 66MHz */
+				i8xx_clock(66000, &clock);
+			} else
+				i8xx_clock(48000, &clock);
+		} else {
+			if (dpll & PLL_P1_DIVIDE_BY_TWO)
+				clock.p1 = 2;
+			else {
+				clock.p1 = ((dpll & DPLL_FPA01_P1_POST_DIV_MASK_I830) >>
+					    DPLL_FPA01_P1_POST_DIV_SHIFT) + 2;
+			}
+			if (dpll & PLL_P2_DIVIDE_BY_4)
+				clock.p2 = 4;
+			else
+				clock.p2 = 2;
+
+			i8xx_clock(48000, &clock);
+		}
+	}
+
+	/* XXX: It would be nice to validate the clocks, but we can't reuse
+	 * i830PllIsValid() because it relies on the xf86_config connector
+	 * configuration being accurate, which it isn't necessarily.
+	 */
+
+	return clock.dot;
+}
+
+/** Returns the currently programmed mode of the given pipe. */
+struct drm_display_mode *intel_crtc_mode_get(struct drm_device *dev,
+					     struct drm_crtc *crtc)
+{
+	struct drm_i915_private *dev_priv = dev->dev_private;
+	struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
+	int pipe = intel_crtc->pipe;
+	struct drm_display_mode *mode;
+	int htot = I915_READ((pipe == 0) ? HTOTAL_A : HTOTAL_B);
+	int hsync = I915_READ((pipe == 0) ? HSYNC_A : HSYNC_B);
+	int vtot = I915_READ((pipe == 0) ? VTOTAL_A : VTOTAL_B);
+	int vsync = I915_READ((pipe == 0) ? VSYNC_A : VSYNC_B);
+
+	mode = kzalloc(sizeof(*mode), GFP_KERNEL);
+	if (!mode)
+		return NULL;
+
+	mode->clock = intel_crtc_clock_get(dev, crtc);
+	mode->hdisplay = (htot & 0xffff) + 1;
+	mode->htotal = ((htot & 0xffff0000) >> 16) + 1;
+	mode->hsync_start = (hsync & 0xffff) + 1;
+	mode->hsync_end = ((hsync & 0xffff0000) >> 16) + 1;
+	mode->vdisplay = (vtot & 0xffff) + 1;
+	mode->vtotal = ((vtot & 0xffff0000) >> 16) + 1;
+	mode->vsync_start = (vsync & 0xffff) + 1;
+	mode->vsync_end = ((vsync & 0xffff0000) >> 16) + 1;
+
+	drm_mode_set_name(mode);
+	drm_mode_set_crtcinfo(mode, 0);
+
+	return mode;
+}
+
+static void intel_crtc_destroy(struct drm_crtc *crtc)
+{
+	struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
+
+	drm_crtc_cleanup(crtc);
+	kfree(intel_crtc);
+}
+
+static const struct drm_crtc_helper_funcs intel_helper_funcs = {
+	.dpms = intel_crtc_dpms,
+	.mode_fixup = intel_crtc_mode_fixup,
+	.mode_set = intel_crtc_mode_set,
+	.mode_set_base = intel_pipe_set_base,
+	.prepare = intel_crtc_prepare,
+	.commit = intel_crtc_commit,
+};
+
+static const struct drm_crtc_funcs intel_crtc_funcs = {
+	.cursor_set = intel_crtc_cursor_set,
+	.cursor_move = intel_crtc_cursor_move,
+	.gamma_set = intel_crtc_gamma_set,
+	.set_config = drm_crtc_helper_set_config,
+	.destroy = intel_crtc_destroy,
+};
+
+
+void intel_crtc_init(struct drm_device *dev, int pipe)
+{
+	struct intel_crtc *intel_crtc;
+	int i;
+
+	intel_crtc = kzalloc(sizeof(struct intel_crtc) + (INTELFB_CONN_LIMIT * sizeof(struct drm_connector *)), GFP_KERNEL);
+	if (intel_crtc == NULL)
+		return;
+
+	drm_crtc_init(dev, &intel_crtc->base, &intel_crtc_funcs);
+
+	drm_mode_crtc_set_gamma_size(&intel_crtc->base, 256);
+	intel_crtc->pipe = pipe;
+	for (i = 0; i < 256; i++) {
+		intel_crtc->lut_r[i] = i;
+		intel_crtc->lut_g[i] = i;
+		intel_crtc->lut_b[i] = i;
+	}
+
+	intel_crtc->cursor_addr = 0;
+	intel_crtc->dpms_mode = DRM_MODE_DPMS_OFF;
+	drm_crtc_helper_add(&intel_crtc->base, &intel_helper_funcs);
+
+	intel_crtc->mode_set.crtc = &intel_crtc->base;
+	intel_crtc->mode_set.connectors = (struct drm_connector **)(intel_crtc + 1);
+	intel_crtc->mode_set.num_connectors = 0;
+
+	if (i915_fbpercrtc) {
+
+
+
+	}
+}
+
+struct drm_crtc *intel_get_crtc_from_pipe(struct drm_device *dev, int pipe)
+{
+	struct drm_crtc *crtc = NULL;
+
+	list_for_each_entry(crtc, &dev->mode_config.crtc_list, head) {
+		struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
+		if (intel_crtc->pipe == pipe)
+			break;
+	}
+	return crtc;
+}
+
+int intel_connector_clones(struct drm_device *dev, int type_mask)
+{
+	int index_mask = 0;
+	struct drm_connector *connector;
+	int entry = 0;
+
+        list_for_each_entry(connector, &dev->mode_config.connector_list, head) {
+		struct intel_output *intel_output = to_intel_output(connector);
+		if (type_mask & (1 << intel_output->type))
+			index_mask |= (1 << entry);
+		entry++;
+	}
+	return index_mask;
+}
+
+
+static void intel_setup_outputs(struct drm_device *dev)
+{
+	struct drm_connector *connector;
+
+	intel_crt_init(dev);
+
+	/* Set up integrated LVDS */
+	if (IS_MOBILE(dev) && !IS_I830(dev))
+		intel_lvds_init(dev);
+
+	if (IS_I9XX(dev)) {
+		intel_sdvo_init(dev, SDVOB);
+		intel_sdvo_init(dev, SDVOC);
+	} else
+		intel_dvo_init(dev);
+
+	if (IS_I9XX(dev) && !IS_I915G(dev))
+		intel_tv_init(dev);
+
+	list_for_each_entry(connector, &dev->mode_config.connector_list, head) {
+		struct intel_output *intel_output = to_intel_output(connector);
+		struct drm_encoder *encoder = &intel_output->enc;
+		int crtc_mask = 0, clone_mask = 0;
+
+		/* valid crtcs */
+		switch(intel_output->type) {
+		case INTEL_OUTPUT_DVO:
+		case INTEL_OUTPUT_SDVO:
+			crtc_mask = ((1 << 0)|
+				     (1 << 1));
+			clone_mask = ((1 << INTEL_OUTPUT_ANALOG) |
+				      (1 << INTEL_OUTPUT_DVO) |
+				      (1 << INTEL_OUTPUT_SDVO));
+			break;
+		case INTEL_OUTPUT_ANALOG:
+			crtc_mask = ((1 << 0)|
+				     (1 << 1));
+			clone_mask = ((1 << INTEL_OUTPUT_ANALOG) |
+				      (1 << INTEL_OUTPUT_DVO) |
+				      (1 << INTEL_OUTPUT_SDVO));
+			break;
+		case INTEL_OUTPUT_LVDS:
+			crtc_mask = (1 << 1);
+			clone_mask = (1 << INTEL_OUTPUT_LVDS);
+			break;
+		case INTEL_OUTPUT_TVOUT:
+			crtc_mask = ((1 << 0) |
+				     (1 << 1));
+			clone_mask = (1 << INTEL_OUTPUT_TVOUT);
+			break;
+		}
+		encoder->possible_crtcs = crtc_mask;
+		encoder->possible_clones = intel_connector_clones(dev, clone_mask);
+	}
+}
+
+static void intel_user_framebuffer_destroy(struct drm_framebuffer *fb)
+{
+	struct intel_framebuffer *intel_fb = to_intel_framebuffer(fb);
+	struct drm_device *dev = fb->dev;
+
+	if (fb->fbdev)
+		intelfb_remove(dev, fb);
+
+	drm_framebuffer_cleanup(fb);
+	mutex_lock(&dev->struct_mutex);
+	drm_gem_object_unreference(intel_fb->obj);
+	mutex_unlock(&dev->struct_mutex);
+
+	kfree(intel_fb);
+}
+
+static int intel_user_framebuffer_create_handle(struct drm_framebuffer *fb,
+						struct drm_file *file_priv,
+						unsigned int *handle)
+{
+	struct intel_framebuffer *intel_fb = to_intel_framebuffer(fb);
+	struct drm_gem_object *object = intel_fb->obj;
+
+	return drm_gem_handle_create(file_priv, object, handle);
+}
+
+static const struct drm_framebuffer_funcs intel_fb_funcs = {
+	.destroy = intel_user_framebuffer_destroy,
+	.create_handle = intel_user_framebuffer_create_handle,
+};
+
+int intel_framebuffer_create(struct drm_device *dev,
+			     struct drm_mode_fb_cmd *mode_cmd,
+			     struct drm_framebuffer **fb,
+			     struct drm_gem_object *obj)
+{
+	struct intel_framebuffer *intel_fb;
+	int ret;
+
+	intel_fb = kzalloc(sizeof(*intel_fb), GFP_KERNEL);
+	if (!intel_fb)
+		return -ENOMEM;
+
+	ret = drm_framebuffer_init(dev, &intel_fb->base, &intel_fb_funcs);
+	if (ret) {
+		DRM_ERROR("framebuffer init failed %d\n", ret);
+		return ret;
+	}
+
+	drm_helper_mode_fill_fb_struct(&intel_fb->base, mode_cmd);
+
+	intel_fb->obj = obj;
+
+	*fb = &intel_fb->base;
+
+	return 0;
+}
+
+
+static struct drm_framebuffer *
+intel_user_framebuffer_create(struct drm_device *dev,
+			      struct drm_file *filp,
+			      struct drm_mode_fb_cmd *mode_cmd)
+{
+	struct drm_gem_object *obj;
+	struct drm_framebuffer *fb;
+	int ret;
+
+	obj = drm_gem_object_lookup(dev, filp, mode_cmd->handle);
+	if (!obj)
+		return NULL;
+
+	ret = intel_framebuffer_create(dev, mode_cmd, &fb, obj);
+	if (ret) {
+		drm_gem_object_unreference(obj);
+		return NULL;
+	}
+
+	return fb;
+}
+
+static int intel_insert_new_fb(struct drm_device *dev,
+			       struct drm_file *file_priv,
+			       struct drm_framebuffer *fb,
+			       struct drm_mode_fb_cmd *mode_cmd)
+{
+	struct intel_framebuffer *intel_fb;
+	struct drm_gem_object *obj;
+	struct drm_crtc *crtc;
+
+	intel_fb = to_intel_framebuffer(fb);
+
+	obj = drm_gem_object_lookup(dev, file_priv, mode_cmd->handle);
+
+	if (!obj)
+		return -EINVAL;
+
+	intel_fb->obj = obj;
+	drm_gem_object_unreference(intel_fb->obj);
+	drm_helper_mode_fill_fb_struct(fb, mode_cmd);
+	mutex_unlock(&dev->struct_mutex);
+
+	list_for_each_entry(crtc, &dev->mode_config.crtc_list, head) {
+		if (crtc->fb == fb) {
+			struct drm_crtc_helper_funcs *crtc_funcs = crtc->helper_private;
+			crtc_funcs->mode_set_base(crtc, crtc->x, crtc->y);
+		}
+	}
+	return 0;
+}
+
+static const struct drm_mode_config_funcs intel_mode_funcs = {
+	.resize_fb = intel_insert_new_fb,
+	.fb_create = intel_user_framebuffer_create,
+	.fb_changed = intelfb_probe,
+};
+
+void intel_modeset_init(struct drm_device *dev)
+{
+	int num_pipe;
+	int i;
+
+	drm_mode_config_init(dev);
+
+	dev->mode_config.min_width = 0;
+	dev->mode_config.min_height = 0;
+
+	dev->mode_config.funcs = (void *)&intel_mode_funcs;
+
+	if (IS_I965G(dev)) {
+		dev->mode_config.max_width = 8192;
+		dev->mode_config.max_height = 8192;
+	} else {
+		dev->mode_config.max_width = 2048;
+		dev->mode_config.max_height = 2048;
+	}
+
+	/* set memory base */
+	if (IS_I9XX(dev))
+		dev->mode_config.fb_base = pci_resource_start(dev->pdev, 2);
+	else
+		dev->mode_config.fb_base = pci_resource_start(dev->pdev, 0);
+
+	if (IS_MOBILE(dev) || IS_I9XX(dev))
+		num_pipe = 2;
+	else
+		num_pipe = 1;
+	DRM_DEBUG("%d display pipe%s available.\n",
+		  num_pipe, num_pipe > 1 ? "s" : "");
+
+	for (i = 0; i < num_pipe; i++) {
+		intel_crtc_init(dev, i);
+	}
+
+	intel_setup_outputs(dev);
+}
+
+void intel_modeset_cleanup(struct drm_device *dev)
+{
+	drm_mode_config_cleanup(dev);
+}
+
+
+/* current intel driver doesn't take advantage of encoders
+   always give back the encoder for the connector
+*/
+struct drm_encoder *intel_best_encoder(struct drm_connector *connector)
+{
+	struct intel_output *intel_output = to_intel_output(connector);
+
+	return &intel_output->enc;
+}
diff --git a/drivers/gpu/drm/i915/intel_drv.h b/drivers/gpu/drm/i915/intel_drv.h
new file mode 100644
index 000000000000..407edd5bf582
--- /dev/null
+++ b/drivers/gpu/drm/i915/intel_drv.h
@@ -0,0 +1,146 @@
+/*
+ * Copyright (c) 2006 Dave Airlie <airlied@linux.ie>
+ * Copyright (c) 2007-2008 Intel Corporation
+ *   Jesse Barnes <jesse.barnes@intel.com>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+#ifndef __INTEL_DRV_H__
+#define __INTEL_DRV_H__
+
+#include <linux/i2c.h>
+#include <linux/i2c-id.h>
+#include <linux/i2c-algo-bit.h>
+#include "drm_crtc.h"
+
+#include "drm_crtc_helper.h"
+/*
+ * Display related stuff
+ */
+
+/* store information about an Ixxx DVO */
+/* The i830->i865 use multiple DVOs with multiple i2cs */
+/* the i915, i945 have a single sDVO i2c bus - which is different */
+#define MAX_OUTPUTS 6
+/* maximum connectors per crtcs in the mode set */
+#define INTELFB_CONN_LIMIT 4
+
+#define INTEL_I2C_BUS_DVO 1
+#define INTEL_I2C_BUS_SDVO 2
+
+/* these are outputs from the chip - integrated only
+   external chips are via DVO or SDVO output */
+#define INTEL_OUTPUT_UNUSED 0
+#define INTEL_OUTPUT_ANALOG 1
+#define INTEL_OUTPUT_DVO 2
+#define INTEL_OUTPUT_SDVO 3
+#define INTEL_OUTPUT_LVDS 4
+#define INTEL_OUTPUT_TVOUT 5
+
+#define INTEL_DVO_CHIP_NONE 0
+#define INTEL_DVO_CHIP_LVDS 1
+#define INTEL_DVO_CHIP_TMDS 2
+#define INTEL_DVO_CHIP_TVOUT 4
+
+struct intel_i2c_chan {
+	struct drm_device *drm_dev; /* for getting at dev. private (mmio etc.) */
+	u32 reg; /* GPIO reg */
+	struct i2c_adapter adapter;
+	struct i2c_algo_bit_data algo;
+        u8 slave_addr;
+};
+
+struct intel_framebuffer {
+	struct drm_framebuffer base;
+	struct drm_gem_object *obj;
+};
+
+
+struct intel_output {
+	struct drm_connector base;
+
+	struct drm_encoder enc;
+	int type;
+	struct intel_i2c_chan *i2c_bus; /* for control functions */
+	struct intel_i2c_chan *ddc_bus; /* for DDC only stuff */
+	bool load_detect_temp;
+	void *dev_priv;
+};
+
+struct intel_crtc {
+	struct drm_crtc base;
+	int pipe;
+	int plane;
+	uint32_t cursor_addr;
+	u8 lut_r[256], lut_g[256], lut_b[256];
+	int dpms_mode;
+	struct intel_framebuffer *fbdev_fb;
+	/* a mode_set for fbdev users on this crtc */
+	struct drm_mode_set mode_set;
+};
+
+#define to_intel_crtc(x) container_of(x, struct intel_crtc, base)
+#define to_intel_output(x) container_of(x, struct intel_output, base)
+#define enc_to_intel_output(x) container_of(x, struct intel_output, enc)
+#define to_intel_framebuffer(x) container_of(x, struct intel_framebuffer, base)
+
+struct intel_i2c_chan *intel_i2c_create(struct drm_device *dev, const u32 reg,
+					const char *name);
+void intel_i2c_destroy(struct intel_i2c_chan *chan);
+int intel_ddc_get_modes(struct intel_output *intel_output);
+extern bool intel_ddc_probe(struct intel_output *intel_output);
+
+extern void intel_crt_init(struct drm_device *dev);
+extern void intel_sdvo_init(struct drm_device *dev, int output_device);
+extern void intel_dvo_init(struct drm_device *dev);
+extern void intel_tv_init(struct drm_device *dev);
+extern void intel_lvds_init(struct drm_device *dev);
+
+extern void intel_crtc_load_lut(struct drm_crtc *crtc);
+extern void intel_encoder_prepare (struct drm_encoder *encoder);
+extern void intel_encoder_commit (struct drm_encoder *encoder);
+
+extern struct drm_encoder *intel_best_encoder(struct drm_connector *connector);
+
+extern struct drm_display_mode *intel_crtc_mode_get(struct drm_device *dev,
+						    struct drm_crtc *crtc);
+extern void intel_wait_for_vblank(struct drm_device *dev);
+extern struct drm_crtc *intel_get_crtc_from_pipe(struct drm_device *dev, int pipe);
+extern struct drm_crtc *intel_get_load_detect_pipe(struct intel_output *intel_output,
+						   struct drm_display_mode *mode,
+						   int *dpms_mode);
+extern void intel_release_load_detect_pipe(struct intel_output *intel_output,
+					   int dpms_mode);
+
+extern struct drm_connector* intel_sdvo_find(struct drm_device *dev, int sdvoB);
+extern int intel_sdvo_supports_hotplug(struct drm_connector *connector);
+extern void intel_sdvo_set_hotplug(struct drm_connector *connector, int enable);
+extern int intelfb_probe(struct drm_device *dev);
+extern int intelfb_remove(struct drm_device *dev, struct drm_framebuffer *fb);
+extern int intelfb_resize(struct drm_device *dev, struct drm_crtc *crtc);
+extern void intelfb_restore(void);
+extern void intel_crtc_fb_gamma_set(struct drm_crtc *crtc, u16 red, u16 green,
+				    u16 blue, int regno);
+
+extern int intel_framebuffer_create(struct drm_device *dev,
+				    struct drm_mode_fb_cmd *mode_cmd,
+				    struct drm_framebuffer **fb,
+				    struct drm_gem_object *obj);
+#endif /* __INTEL_DRV_H__ */
diff --git a/drivers/gpu/drm/i915/intel_dvo.c b/drivers/gpu/drm/i915/intel_dvo.c
new file mode 100644
index 000000000000..008bfaed4614
--- /dev/null
+++ b/drivers/gpu/drm/i915/intel_dvo.c
@@ -0,0 +1,501 @@
+/*
+ * Copyright 2006 Dave Airlie <airlied@linux.ie>
+ * Copyright © 2006-2007 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ *
+ * Authors:
+ *	Eric Anholt <eric@anholt.net>
+ */
+#include <linux/i2c.h>
+#include "drmP.h"
+#include "drm.h"
+#include "drm_crtc.h"
+#include "intel_drv.h"
+#include "i915_drm.h"
+#include "i915_drv.h"
+#include "dvo.h"
+
+#define SIL164_ADDR	0x38
+#define CH7xxx_ADDR	0x76
+#define TFP410_ADDR	0x38
+
+extern struct intel_dvo_dev_ops sil164_ops;
+extern struct intel_dvo_dev_ops ch7xxx_ops;
+extern struct intel_dvo_dev_ops ivch_ops;
+extern struct intel_dvo_dev_ops tfp410_ops;
+extern struct intel_dvo_dev_ops ch7017_ops;
+
+struct intel_dvo_device intel_dvo_devices[] = {
+	{
+		.type = INTEL_DVO_CHIP_TMDS,
+		.name = "sil164",
+		.dvo_reg = DVOC,
+		.slave_addr = SIL164_ADDR,
+		.dev_ops = &sil164_ops,
+	},
+	{
+		.type = INTEL_DVO_CHIP_TMDS,
+		.name = "ch7xxx",
+		.dvo_reg = DVOC,
+		.slave_addr = CH7xxx_ADDR,
+		.dev_ops = &ch7xxx_ops,
+	},
+	{
+		.type = INTEL_DVO_CHIP_LVDS,
+		.name = "ivch",
+		.dvo_reg = DVOA,
+		.slave_addr = 0x02, /* Might also be 0x44, 0x84, 0xc4 */
+		.dev_ops = &ivch_ops,
+	},
+	{
+		.type = INTEL_DVO_CHIP_TMDS,
+		.name = "tfp410",
+		.dvo_reg = DVOC,
+		.slave_addr = TFP410_ADDR,
+		.dev_ops = &tfp410_ops,
+	},
+	{
+		.type = INTEL_DVO_CHIP_LVDS,
+		.name = "ch7017",
+		.dvo_reg = DVOC,
+		.slave_addr = 0x75,
+		.gpio = GPIOE,
+		.dev_ops = &ch7017_ops,
+	}
+};
+
+static void intel_dvo_dpms(struct drm_encoder *encoder, int mode)
+{
+	struct drm_i915_private *dev_priv = encoder->dev->dev_private;
+	struct intel_output *intel_output = enc_to_intel_output(encoder);
+	struct intel_dvo_device *dvo = intel_output->dev_priv;
+	u32 dvo_reg = dvo->dvo_reg;
+	u32 temp = I915_READ(dvo_reg);
+
+	if (mode == DRM_MODE_DPMS_ON) {
+		I915_WRITE(dvo_reg, temp | DVO_ENABLE);
+		I915_READ(dvo_reg);
+		dvo->dev_ops->dpms(dvo, mode);
+	} else {
+		dvo->dev_ops->dpms(dvo, mode);
+		I915_WRITE(dvo_reg, temp & ~DVO_ENABLE);
+		I915_READ(dvo_reg);
+	}
+}
+
+static void intel_dvo_save(struct drm_connector *connector)
+{
+	struct drm_i915_private *dev_priv = connector->dev->dev_private;
+	struct intel_output *intel_output = to_intel_output(connector);
+	struct intel_dvo_device *dvo = intel_output->dev_priv;
+
+	/* Each output should probably just save the registers it touches,
+	 * but for now, use more overkill.
+	 */
+	dev_priv->saveDVOA = I915_READ(DVOA);
+	dev_priv->saveDVOB = I915_READ(DVOB);
+	dev_priv->saveDVOC = I915_READ(DVOC);
+
+	dvo->dev_ops->save(dvo);
+}
+
+static void intel_dvo_restore(struct drm_connector *connector)
+{
+	struct drm_i915_private *dev_priv = connector->dev->dev_private;
+	struct intel_output *intel_output = to_intel_output(connector);
+	struct intel_dvo_device *dvo = intel_output->dev_priv;
+
+	dvo->dev_ops->restore(dvo);
+
+	I915_WRITE(DVOA, dev_priv->saveDVOA);
+	I915_WRITE(DVOB, dev_priv->saveDVOB);
+	I915_WRITE(DVOC, dev_priv->saveDVOC);
+}
+
+static int intel_dvo_mode_valid(struct drm_connector *connector,
+				struct drm_display_mode *mode)
+{
+	struct intel_output *intel_output = to_intel_output(connector);
+	struct intel_dvo_device *dvo = intel_output->dev_priv;
+
+	if (mode->flags & DRM_MODE_FLAG_DBLSCAN)
+		return MODE_NO_DBLESCAN;
+
+	/* XXX: Validate clock range */
+
+	if (dvo->panel_fixed_mode) {
+		if (mode->hdisplay > dvo->panel_fixed_mode->hdisplay)
+			return MODE_PANEL;
+		if (mode->vdisplay > dvo->panel_fixed_mode->vdisplay)
+			return MODE_PANEL;
+	}
+
+	return dvo->dev_ops->mode_valid(dvo, mode);
+}
+
+static bool intel_dvo_mode_fixup(struct drm_encoder *encoder,
+				 struct drm_display_mode *mode,
+				 struct drm_display_mode *adjusted_mode)
+{
+	struct intel_output *intel_output = enc_to_intel_output(encoder);
+	struct intel_dvo_device *dvo = intel_output->dev_priv;
+
+	/* If we have timings from the BIOS for the panel, put them in
+	 * to the adjusted mode.  The CRTC will be set up for this mode,
+	 * with the panel scaling set up to source from the H/VDisplay
+	 * of the original mode.
+	 */
+	if (dvo->panel_fixed_mode != NULL) {
+#define C(x) adjusted_mode->x = dvo->panel_fixed_mode->x
+		C(hdisplay);
+		C(hsync_start);
+		C(hsync_end);
+		C(htotal);
+		C(vdisplay);
+		C(vsync_start);
+		C(vsync_end);
+		C(vtotal);
+		C(clock);
+		drm_mode_set_crtcinfo(adjusted_mode, CRTC_INTERLACE_HALVE_V);
+#undef C
+	}
+
+	if (dvo->dev_ops->mode_fixup)
+		return dvo->dev_ops->mode_fixup(dvo, mode, adjusted_mode);
+
+	return true;
+}
+
+static void intel_dvo_mode_set(struct drm_encoder *encoder,
+			       struct drm_display_mode *mode,
+			       struct drm_display_mode *adjusted_mode)
+{
+	struct drm_device *dev = encoder->dev;
+	struct drm_i915_private *dev_priv = dev->dev_private;
+	struct intel_crtc *intel_crtc = to_intel_crtc(encoder->crtc);
+	struct intel_output *intel_output = enc_to_intel_output(encoder);
+	struct intel_dvo_device *dvo = intel_output->dev_priv;
+	int pipe = intel_crtc->pipe;
+	u32 dvo_val;
+	u32 dvo_reg = dvo->dvo_reg, dvo_srcdim_reg;
+	int dpll_reg = (pipe == 0) ? DPLL_A : DPLL_B;
+
+	switch (dvo_reg) {
+	case DVOA:
+	default:
+		dvo_srcdim_reg = DVOA_SRCDIM;
+		break;
+	case DVOB:
+		dvo_srcdim_reg = DVOB_SRCDIM;
+		break;
+	case DVOC:
+		dvo_srcdim_reg = DVOC_SRCDIM;
+		break;
+	}
+
+	dvo->dev_ops->mode_set(dvo, mode, adjusted_mode);
+
+	/* Save the data order, since I don't know what it should be set to. */
+	dvo_val = I915_READ(dvo_reg) &
+		  (DVO_PRESERVE_MASK | DVO_DATA_ORDER_GBRG);
+	dvo_val |= DVO_DATA_ORDER_FP | DVO_BORDER_ENABLE |
+		   DVO_BLANK_ACTIVE_HIGH;
+
+	if (pipe == 1)
+		dvo_val |= DVO_PIPE_B_SELECT;
+	dvo_val |= DVO_PIPE_STALL;
+	if (adjusted_mode->flags & DRM_MODE_FLAG_PHSYNC)
+		dvo_val |= DVO_HSYNC_ACTIVE_HIGH;
+	if (adjusted_mode->flags & DRM_MODE_FLAG_PVSYNC)
+		dvo_val |= DVO_VSYNC_ACTIVE_HIGH;
+
+	I915_WRITE(dpll_reg, I915_READ(dpll_reg) | DPLL_DVO_HIGH_SPEED);
+
+	/*I915_WRITE(DVOB_SRCDIM,
+	  (adjusted_mode->hdisplay << DVO_SRCDIM_HORIZONTAL_SHIFT) |
+	  (adjusted_mode->VDisplay << DVO_SRCDIM_VERTICAL_SHIFT));*/
+	I915_WRITE(dvo_srcdim_reg,
+		   (adjusted_mode->hdisplay << DVO_SRCDIM_HORIZONTAL_SHIFT) |
+		   (adjusted_mode->vdisplay << DVO_SRCDIM_VERTICAL_SHIFT));
+	/*I915_WRITE(DVOB, dvo_val);*/
+	I915_WRITE(dvo_reg, dvo_val);
+}
+
+/**
+ * Detect the output connection on our DVO device.
+ *
+ * Unimplemented.
+ */
+static enum drm_connector_status intel_dvo_detect(struct drm_connector *connector)
+{
+	struct intel_output *intel_output = to_intel_output(connector);
+	struct intel_dvo_device *dvo = intel_output->dev_priv;
+
+	return dvo->dev_ops->detect(dvo);
+}
+
+static int intel_dvo_get_modes(struct drm_connector *connector)
+{
+	struct intel_output *intel_output = to_intel_output(connector);
+	struct intel_dvo_device *dvo = intel_output->dev_priv;
+
+	/* We should probably have an i2c driver get_modes function for those
+	 * devices which will have a fixed set of modes determined by the chip
+	 * (TV-out, for example), but for now with just TMDS and LVDS,
+	 * that's not the case.
+	 */
+	intel_ddc_get_modes(intel_output);
+	if (!list_empty(&connector->probed_modes))
+		return 1;
+
+
+	if (dvo->panel_fixed_mode != NULL) {
+		struct drm_display_mode *mode;
+		mode = drm_mode_duplicate(connector->dev, dvo->panel_fixed_mode);
+		if (mode) {
+			drm_mode_probed_add(connector, mode);
+			return 1;
+		}
+	}
+	return 0;
+}
+
+static void intel_dvo_destroy (struct drm_connector *connector)
+{
+	struct intel_output *intel_output = to_intel_output(connector);
+	struct intel_dvo_device *dvo = intel_output->dev_priv;
+
+	if (dvo) {
+		if (dvo->dev_ops->destroy)
+			dvo->dev_ops->destroy(dvo);
+		if (dvo->panel_fixed_mode)
+			kfree(dvo->panel_fixed_mode);
+		/* no need, in i830_dvoices[] now */
+		//kfree(dvo);
+	}
+	if (intel_output->i2c_bus)
+		intel_i2c_destroy(intel_output->i2c_bus);
+	if (intel_output->ddc_bus)
+		intel_i2c_destroy(intel_output->ddc_bus);
+	drm_sysfs_connector_remove(connector);
+	drm_connector_cleanup(connector);
+	kfree(intel_output);
+}
+
+#ifdef RANDR_GET_CRTC_INTERFACE
+static struct drm_crtc *intel_dvo_get_crtc(struct drm_connector *connector)
+{
+	struct drm_device *dev = connector->dev;
+	struct drm_i915_private *dev_priv = dev->dev_private;
+	struct intel_output *intel_output = to_intel_output(connector);
+	struct intel_dvo_device *dvo = intel_output->dev_priv;
+	int pipe = !!(I915_READ(dvo->dvo_reg) & SDVO_PIPE_B_SELECT);
+
+	return intel_pipe_to_crtc(pScrn, pipe);
+}
+#endif
+
+static const struct drm_encoder_helper_funcs intel_dvo_helper_funcs = {
+	.dpms = intel_dvo_dpms,
+	.mode_fixup = intel_dvo_mode_fixup,
+	.prepare = intel_encoder_prepare,
+	.mode_set = intel_dvo_mode_set,
+	.commit = intel_encoder_commit,
+};
+
+static const struct drm_connector_funcs intel_dvo_connector_funcs = {
+	.save = intel_dvo_save,
+	.restore = intel_dvo_restore,
+	.detect = intel_dvo_detect,
+	.destroy = intel_dvo_destroy,
+	.fill_modes = drm_helper_probe_single_connector_modes,
+};
+
+static const struct drm_connector_helper_funcs intel_dvo_connector_helper_funcs = {
+	.mode_valid = intel_dvo_mode_valid,
+	.get_modes = intel_dvo_get_modes,
+	.best_encoder = intel_best_encoder,
+};
+
+void intel_dvo_enc_destroy(struct drm_encoder *encoder)
+{
+	drm_encoder_cleanup(encoder);
+}
+
+static const struct drm_encoder_funcs intel_dvo_enc_funcs = {
+	.destroy = intel_dvo_enc_destroy,
+};
+
+
+/**
+ * Attempts to get a fixed panel timing for LVDS (currently only the i830).
+ *
+ * Other chips with DVO LVDS will need to extend this to deal with the LVDS
+ * chip being on DVOB/C and having multiple pipes.
+ */
+static struct drm_display_mode *
+intel_dvo_get_current_mode (struct drm_connector *connector)
+{
+	struct drm_device *dev = connector->dev;
+	struct drm_i915_private *dev_priv = dev->dev_private;
+	struct intel_output *intel_output = to_intel_output(connector);
+	struct intel_dvo_device *dvo = intel_output->dev_priv;
+	uint32_t dvo_reg = dvo->dvo_reg;
+	uint32_t dvo_val = I915_READ(dvo_reg);
+	struct drm_display_mode *mode = NULL;
+
+	/* If the DVO port is active, that'll be the LVDS, so we can pull out
+	 * its timings to get how the BIOS set up the panel.
+	 */
+	if (dvo_val & DVO_ENABLE) {
+		struct drm_crtc *crtc;
+		int pipe = (dvo_val & DVO_PIPE_B_SELECT) ? 1 : 0;
+
+		crtc = intel_get_crtc_from_pipe(dev, pipe);
+		if (crtc) {
+			mode = intel_crtc_mode_get(dev, crtc);
+
+			if (mode) {
+				mode->type |= DRM_MODE_TYPE_PREFERRED;
+				if (dvo_val & DVO_HSYNC_ACTIVE_HIGH)
+					mode->flags |= DRM_MODE_FLAG_PHSYNC;
+				if (dvo_val & DVO_VSYNC_ACTIVE_HIGH)
+					mode->flags |= DRM_MODE_FLAG_PVSYNC;
+			}
+		}
+	}
+	return mode;
+}
+
+void intel_dvo_init(struct drm_device *dev)
+{
+	struct intel_output *intel_output;
+	struct intel_dvo_device *dvo;
+	struct intel_i2c_chan *i2cbus = NULL;
+	int ret = 0;
+	int i;
+	int gpio_inited = 0;
+	int encoder_type = DRM_MODE_ENCODER_NONE;
+	intel_output = kzalloc (sizeof(struct intel_output), GFP_KERNEL);
+	if (!intel_output)
+		return;
+
+	/* Set up the DDC bus */
+	intel_output->ddc_bus = intel_i2c_create(dev, GPIOD, "DVODDC_D");
+	if (!intel_output->ddc_bus)
+		goto free_intel;
+
+	/* Now, try to find a controller */
+	for (i = 0; i < ARRAY_SIZE(intel_dvo_devices); i++) {
+		struct drm_connector *connector = &intel_output->base;
+		int gpio;
+
+		dvo = &intel_dvo_devices[i];
+
+		/* Allow the I2C driver info to specify the GPIO to be used in
+		 * special cases, but otherwise default to what's defined
+		 * in the spec.
+		 */
+		if (dvo->gpio != 0)
+			gpio = dvo->gpio;
+		else if (dvo->type == INTEL_DVO_CHIP_LVDS)
+			gpio = GPIOB;
+		else
+			gpio = GPIOE;
+
+		/* Set up the I2C bus necessary for the chip we're probing.
+		 * It appears that everything is on GPIOE except for panels
+		 * on i830 laptops, which are on GPIOB (DVOA).
+		 */
+		if (gpio_inited != gpio) {
+			if (i2cbus != NULL)
+				intel_i2c_destroy(i2cbus);
+			if (!(i2cbus = intel_i2c_create(dev, gpio,
+				gpio == GPIOB ? "DVOI2C_B" : "DVOI2C_E"))) {
+				continue;
+			}
+			gpio_inited = gpio;
+		}
+
+		if (dvo->dev_ops!= NULL)
+			ret = dvo->dev_ops->init(dvo, i2cbus);
+		else
+			ret = false;
+
+		if (!ret)
+			continue;
+
+		intel_output->type = INTEL_OUTPUT_DVO;
+		switch (dvo->type) {
+		case INTEL_DVO_CHIP_TMDS:
+			drm_connector_init(dev, connector,
+					   &intel_dvo_connector_funcs,
+					   DRM_MODE_CONNECTOR_DVII);
+			encoder_type = DRM_MODE_ENCODER_TMDS;
+			break;
+		case INTEL_DVO_CHIP_LVDS:
+			drm_connector_init(dev, connector,
+					   &intel_dvo_connector_funcs,
+					   DRM_MODE_CONNECTOR_LVDS);
+			encoder_type = DRM_MODE_ENCODER_LVDS;
+			break;
+		}
+
+		drm_connector_helper_add(connector,
+					 &intel_dvo_connector_helper_funcs);
+		connector->display_info.subpixel_order = SubPixelHorizontalRGB;
+		connector->interlace_allowed = false;
+		connector->doublescan_allowed = false;
+
+		intel_output->dev_priv = dvo;
+		intel_output->i2c_bus = i2cbus;
+
+		drm_encoder_init(dev, &intel_output->enc,
+				 &intel_dvo_enc_funcs, encoder_type);
+		drm_encoder_helper_add(&intel_output->enc,
+				       &intel_dvo_helper_funcs);
+
+		drm_mode_connector_attach_encoder(&intel_output->base,
+						  &intel_output->enc);
+		if (dvo->type == INTEL_DVO_CHIP_LVDS) {
+			/* For our LVDS chipsets, we should hopefully be able
+			 * to dig the fixed panel mode out of the BIOS data.
+			 * However, it's in a different format from the BIOS
+			 * data on chipsets with integrated LVDS (stored in AIM
+			 * headers, likely), so for now, just get the current
+			 * mode being output through DVO.
+			 */
+			dvo->panel_fixed_mode =
+				intel_dvo_get_current_mode(connector);
+			dvo->panel_wants_dither = true;
+		}
+
+		drm_sysfs_connector_add(connector);
+		return;
+	}
+
+	intel_i2c_destroy(intel_output->ddc_bus);
+	/* Didn't find a chip, so tear down. */
+	if (i2cbus != NULL)
+		intel_i2c_destroy(i2cbus);
+free_intel:
+	kfree(intel_output);
+}
diff --git a/drivers/gpu/drm/i915/intel_fb.c b/drivers/gpu/drm/i915/intel_fb.c
new file mode 100644
index 000000000000..a89ebea7b76d
--- /dev/null
+++ b/drivers/gpu/drm/i915/intel_fb.c
@@ -0,0 +1,926 @@
+/*
+ * Copyright © 2007 David Airlie
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ *
+ * Authors:
+ *     David Airlie
+ */
+
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/errno.h>
+#include <linux/string.h>
+#include <linux/mm.h>
+#include <linux/tty.h>
+#include <linux/slab.h>
+#include <linux/sysrq.h>
+#include <linux/delay.h>
+#include <linux/fb.h>
+#include <linux/init.h>
+
+#include "drmP.h"
+#include "drm.h"
+#include "drm_crtc.h"
+#include "intel_drv.h"
+#include "i915_drm.h"
+#include "i915_drv.h"
+
+struct intelfb_par {
+	struct drm_device *dev;
+	struct drm_display_mode *our_mode;
+	struct intel_framebuffer *intel_fb;
+	int crtc_count;
+	/* crtc currently bound to this */
+	uint32_t crtc_ids[2];
+};
+
+static int intelfb_setcolreg(unsigned regno, unsigned red, unsigned green,
+			unsigned blue, unsigned transp,
+			struct fb_info *info)
+{
+	struct intelfb_par *par = info->par;
+	struct drm_device *dev = par->dev;
+	struct drm_crtc *crtc;
+	int i;
+
+	list_for_each_entry(crtc, &dev->mode_config.crtc_list, head) {
+		struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
+		struct drm_mode_set *modeset = &intel_crtc->mode_set;
+		struct drm_framebuffer *fb = modeset->fb;
+
+		for (i = 0; i < par->crtc_count; i++)
+			if (crtc->base.id == par->crtc_ids[i])
+				break;
+
+		if (i == par->crtc_count)
+			continue;
+
+
+		if (regno > 255)
+			return 1;
+
+		if (fb->depth == 8) {
+			intel_crtc_fb_gamma_set(crtc, red, green, blue, regno);
+			return 0;
+		}
+
+		if (regno < 16) {
+			switch (fb->depth) {
+			case 15:
+				fb->pseudo_palette[regno] = ((red & 0xf800) >> 1) |
+					((green & 0xf800) >>  6) |
+					((blue & 0xf800) >> 11);
+				break;
+			case 16:
+				fb->pseudo_palette[regno] = (red & 0xf800) |
+					((green & 0xfc00) >>  5) |
+					((blue  & 0xf800) >> 11);
+				break;
+			case 24:
+			case 32:
+				fb->pseudo_palette[regno] = ((red & 0xff00) << 8) |
+					(green & 0xff00) |
+					((blue  & 0xff00) >> 8);
+				break;
+			}
+		}
+	}
+	return 0;
+}
+
+static int intelfb_check_var(struct fb_var_screeninfo *var,
+			struct fb_info *info)
+{
+	struct intelfb_par *par = info->par;
+	struct intel_framebuffer *intel_fb = par->intel_fb;
+	struct drm_framebuffer *fb = &intel_fb->base;
+	int depth;
+
+	if (var->pixclock == -1 || !var->pixclock)
+		return -EINVAL;
+
+	/* Need to resize the fb object !!! */
+	if (var->xres > fb->width || var->yres > fb->height) {
+		DRM_ERROR("Requested width/height is greater than current fb object %dx%d > %dx%d\n",var->xres,var->yres,fb->width,fb->height);
+		DRM_ERROR("Need resizing code.\n");
+		return -EINVAL;
+	}
+
+	switch (var->bits_per_pixel) {
+	case 16:
+		depth = (var->green.length == 6) ? 16 : 15;
+		break;
+	case 32:
+		depth = (var->transp.length > 0) ? 32 : 24;
+		break;
+	default:
+		depth = var->bits_per_pixel;
+		break;
+	}
+
+	switch (depth) {
+	case 8:
+		var->red.offset = 0;
+		var->green.offset = 0;
+		var->blue.offset = 0;
+		var->red.length = 8;
+		var->green.length = 8;
+		var->blue.length = 8;
+		var->transp.length = 0;
+		var->transp.offset = 0;
+		break;
+	case 15:
+		var->red.offset = 10;
+		var->green.offset = 5;
+		var->blue.offset = 0;
+		var->red.length = 5;
+		var->green.length = 5;
+		var->blue.length = 5;
+		var->transp.length = 1;
+		var->transp.offset = 15;
+		break;
+	case 16:
+		var->red.offset = 11;
+		var->green.offset = 5;
+		var->blue.offset = 0;
+		var->red.length = 5;
+		var->green.length = 6;
+		var->blue.length = 5;
+		var->transp.length = 0;
+		var->transp.offset = 0;
+		break;
+	case 24:
+		var->red.offset = 16;
+		var->green.offset = 8;
+		var->blue.offset = 0;
+		var->red.length = 8;
+		var->green.length = 8;
+		var->blue.length = 8;
+		var->transp.length = 0;
+		var->transp.offset = 0;
+		break;
+	case 32:
+		var->red.offset = 16;
+		var->green.offset = 8;
+		var->blue.offset = 0;
+		var->red.length = 8;
+		var->green.length = 8;
+		var->blue.length = 8;
+		var->transp.length = 8;
+		var->transp.offset = 24;
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+/* this will let fbcon do the mode init */
+/* FIXME: take mode config lock? */
+static int intelfb_set_par(struct fb_info *info)
+{
+	struct intelfb_par *par = info->par;
+	struct drm_device *dev = par->dev;
+	struct fb_var_screeninfo *var = &info->var;
+	int i;
+
+	DRM_DEBUG("%d %d\n", var->xres, var->pixclock);
+
+	if (var->pixclock != -1) {
+
+		DRM_ERROR("PIXEL CLCOK SET\n");
+		return -EINVAL;
+	} else {
+		struct drm_crtc *crtc;
+		int ret;
+
+		list_for_each_entry(crtc, &dev->mode_config.crtc_list, head) {
+			struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
+
+			for (i = 0; i < par->crtc_count; i++)
+				if (crtc->base.id == par->crtc_ids[i])
+					break;
+
+			if (i == par->crtc_count)
+				continue;
+
+			if (crtc->fb == intel_crtc->mode_set.fb) {
+				mutex_lock(&dev->mode_config.mutex);
+				ret = crtc->funcs->set_config(&intel_crtc->mode_set);
+				mutex_unlock(&dev->mode_config.mutex);
+				if (ret)
+					return ret;
+			}
+		}
+		return 0;
+	}
+}
+
+static int intelfb_pan_display(struct fb_var_screeninfo *var,
+				struct fb_info *info)
+{
+	struct intelfb_par *par = info->par;
+	struct drm_device *dev = par->dev;
+	struct drm_mode_set *modeset;
+	struct drm_crtc *crtc;
+	struct intel_crtc *intel_crtc;
+	int ret = 0;
+	int i;
+
+	list_for_each_entry(crtc, &dev->mode_config.crtc_list, head) {
+		for (i = 0; i < par->crtc_count; i++)
+			if (crtc->base.id == par->crtc_ids[i])
+				break;
+
+		if (i == par->crtc_count)
+			continue;
+
+		intel_crtc = to_intel_crtc(crtc);
+		modeset = &intel_crtc->mode_set;
+
+		modeset->x = var->xoffset;
+		modeset->y = var->yoffset;
+
+		if (modeset->num_connectors) {
+			mutex_lock(&dev->mode_config.mutex);
+			ret = crtc->funcs->set_config(modeset);
+			mutex_unlock(&dev->mode_config.mutex);
+			if (!ret) {
+				info->var.xoffset = var->xoffset;
+				info->var.yoffset = var->yoffset;
+			}
+		}
+	}
+
+	return ret;
+}
+
+static void intelfb_on(struct fb_info *info)
+{
+	struct intelfb_par *par = info->par;
+	struct drm_device *dev = par->dev;
+	struct drm_crtc *crtc;
+	struct drm_encoder *encoder;
+	int i;
+
+	/*
+	 * For each CRTC in this fb, find all associated encoders
+	 * and turn them off, then turn off the CRTC.
+	 */
+	list_for_each_entry(crtc, &dev->mode_config.crtc_list, head) {
+		struct drm_crtc_helper_funcs *crtc_funcs = crtc->helper_private;
+
+		for (i = 0; i < par->crtc_count; i++)
+			if (crtc->base.id == par->crtc_ids[i])
+				break;
+
+		crtc_funcs->dpms(crtc, DRM_MODE_DPMS_ON);
+
+		/* Found a CRTC on this fb, now find encoders */
+		list_for_each_entry(encoder, &dev->mode_config.encoder_list, head) {
+			if (encoder->crtc == crtc) {
+				struct drm_encoder_helper_funcs *encoder_funcs;
+				encoder_funcs = encoder->helper_private;
+				encoder_funcs->dpms(encoder, DRM_MODE_DPMS_ON);
+			}
+		}
+	}
+}
+
+static void intelfb_off(struct fb_info *info, int dpms_mode)
+{
+	struct intelfb_par *par = info->par;
+	struct drm_device *dev = par->dev;
+	struct drm_crtc *crtc;
+	struct drm_encoder *encoder;
+	int i;
+
+	/*
+	 * For each CRTC in this fb, find all associated encoders
+	 * and turn them off, then turn off the CRTC.
+	 */
+	list_for_each_entry(crtc, &dev->mode_config.crtc_list, head) {
+		struct drm_crtc_helper_funcs *crtc_funcs = crtc->helper_private;
+
+		for (i = 0; i < par->crtc_count; i++)
+			if (crtc->base.id == par->crtc_ids[i])
+				break;
+
+		/* Found a CRTC on this fb, now find encoders */
+		list_for_each_entry(encoder, &dev->mode_config.encoder_list, head) {
+			if (encoder->crtc == crtc) {
+				struct drm_encoder_helper_funcs *encoder_funcs;
+				encoder_funcs = encoder->helper_private;
+				encoder_funcs->dpms(encoder, dpms_mode);
+			}
+		}
+		if (dpms_mode == DRM_MODE_DPMS_OFF)
+			crtc_funcs->dpms(crtc, dpms_mode);
+	}
+}
+
+int intelfb_blank(int blank, struct fb_info *info)
+{
+	switch (blank) {
+	case FB_BLANK_UNBLANK:
+		intelfb_on(info);
+		break;
+	case FB_BLANK_NORMAL:
+		intelfb_off(info, DRM_MODE_DPMS_STANDBY);
+		break;
+	case FB_BLANK_HSYNC_SUSPEND:
+		intelfb_off(info, DRM_MODE_DPMS_STANDBY);
+		break;
+	case FB_BLANK_VSYNC_SUSPEND:
+		intelfb_off(info, DRM_MODE_DPMS_SUSPEND);
+		break;
+	case FB_BLANK_POWERDOWN:
+		intelfb_off(info, DRM_MODE_DPMS_OFF);
+		break;
+	}
+	return 0;
+}
+
+static struct fb_ops intelfb_ops = {
+	.owner = THIS_MODULE,
+	.fb_check_var = intelfb_check_var,
+	.fb_set_par = intelfb_set_par,
+	.fb_setcolreg = intelfb_setcolreg,
+	.fb_fillrect = cfb_fillrect,
+	.fb_copyarea = cfb_copyarea,
+	.fb_imageblit = cfb_imageblit,
+	.fb_pan_display = intelfb_pan_display,
+	.fb_blank = intelfb_blank,
+};
+
+/**
+ * Curretly it is assumed that the old framebuffer is reused.
+ *
+ * LOCKING
+ * caller should hold the mode config lock.
+ *
+ */
+int intelfb_resize(struct drm_device *dev, struct drm_crtc *crtc)
+{
+	struct fb_info *info;
+	struct drm_framebuffer *fb;
+	struct drm_display_mode *mode = crtc->desired_mode;
+
+	fb = crtc->fb;
+	if (!fb)
+		return 1;
+
+	info = fb->fbdev;
+	if (!info)
+		return 1;
+
+	if (!mode)
+		return 1;
+
+	info->var.xres = mode->hdisplay;
+	info->var.right_margin = mode->hsync_start - mode->hdisplay;
+	info->var.hsync_len = mode->hsync_end - mode->hsync_start;
+	info->var.left_margin = mode->htotal - mode->hsync_end;
+	info->var.yres = mode->vdisplay;
+	info->var.lower_margin = mode->vsync_start - mode->vdisplay;
+	info->var.vsync_len = mode->vsync_end - mode->vsync_start;
+	info->var.upper_margin = mode->vtotal - mode->vsync_end;
+	info->var.pixclock = 10000000 / mode->htotal * 1000 / mode->vtotal * 100;
+	/* avoid overflow */
+	info->var.pixclock = info->var.pixclock * 1000 / mode->vrefresh;
+
+	return 0;
+}
+EXPORT_SYMBOL(intelfb_resize);
+
+static struct drm_mode_set kernelfb_mode;
+
+int intelfb_panic(struct notifier_block *n, unsigned long ununsed,
+		  void *panic_str)
+{
+	DRM_ERROR("panic occurred, switching back to text console\n");
+
+	intelfb_restore();
+	return 0;
+}
+EXPORT_SYMBOL(intelfb_panic);
+
+static struct notifier_block paniced = {
+	.notifier_call = intelfb_panic,
+};
+
+int intelfb_create(struct drm_device *dev, uint32_t fb_width,
+		   uint32_t fb_height, uint32_t surface_width,
+		   uint32_t surface_height,
+		   struct intel_framebuffer **intel_fb_p)
+{
+	struct fb_info *info;
+	struct intelfb_par *par;
+	struct drm_framebuffer *fb;
+	struct intel_framebuffer *intel_fb;
+	struct drm_mode_fb_cmd mode_cmd;
+	struct drm_gem_object *fbo = NULL;
+	struct drm_i915_gem_object *obj_priv;
+	struct device *device = &dev->pdev->dev;
+	int size, ret, mmio_bar = IS_I9XX(dev) ? 0 : 1;
+
+	mode_cmd.width = surface_width;
+	mode_cmd.height = surface_height;
+
+	mode_cmd.bpp = 32;
+	mode_cmd.pitch = mode_cmd.width * ((mode_cmd.bpp + 1) / 8);
+	mode_cmd.depth = 24;
+
+	size = mode_cmd.pitch * mode_cmd.height;
+	size = ALIGN(size, PAGE_SIZE);
+	fbo = drm_gem_object_alloc(dev, size);
+	if (!fbo) {
+		printk(KERN_ERR "failed to allocate framebuffer\n");
+		ret = -ENOMEM;
+		goto out;
+	}
+	obj_priv = fbo->driver_private;
+
+	mutex_lock(&dev->struct_mutex);
+
+	ret = i915_gem_object_pin(fbo, PAGE_SIZE);
+	if (ret) {
+		DRM_ERROR("failed to pin fb: %d\n", ret);
+		goto out_unref;
+	}
+
+	/* Flush everything out, we'll be doing GTT only from now on */
+	i915_gem_object_set_to_gtt_domain(fbo, 1);
+
+	ret = intel_framebuffer_create(dev, &mode_cmd, &fb, fbo);
+	if (ret) {
+		DRM_ERROR("failed to allocate fb.\n");
+		goto out_unref;
+	}
+
+	list_add(&fb->filp_head, &dev->mode_config.fb_kernel_list);
+
+	intel_fb = to_intel_framebuffer(fb);
+	*intel_fb_p = intel_fb;
+
+	info = framebuffer_alloc(sizeof(struct intelfb_par), device);
+	if (!info) {
+		ret = -ENOMEM;
+		goto out_unref;
+	}
+
+	par = info->par;
+
+	strcpy(info->fix.id, "inteldrmfb");
+	info->fix.type = FB_TYPE_PACKED_PIXELS;
+	info->fix.visual = FB_VISUAL_TRUECOLOR;
+	info->fix.type_aux = 0;
+	info->fix.xpanstep = 1; /* doing it in hw */
+	info->fix.ypanstep = 1; /* doing it in hw */
+	info->fix.ywrapstep = 0;
+	info->fix.accel = FB_ACCEL_I830;
+	info->fix.type_aux = 0;
+
+	info->flags = FBINFO_DEFAULT;
+
+	info->fbops = &intelfb_ops;
+
+	info->fix.line_length = fb->pitch;
+	info->fix.smem_start = dev->mode_config.fb_base + obj_priv->gtt_offset;
+	info->fix.smem_len = size;
+
+	info->flags = FBINFO_DEFAULT;
+
+	info->screen_base = ioremap_wc(dev->agp->base + obj_priv->gtt_offset,
+				       size);
+	if (!info->screen_base) {
+		ret = -ENOSPC;
+		goto out_unref;
+	}
+	info->screen_size = size;
+
+//	memset(info->screen_base, 0, size);
+
+	info->pseudo_palette = fb->pseudo_palette;
+	info->var.xres_virtual = fb->width;
+	info->var.yres_virtual = fb->height;
+	info->var.bits_per_pixel = fb->bits_per_pixel;
+	info->var.xoffset = 0;
+	info->var.yoffset = 0;
+	info->var.activate = FB_ACTIVATE_NOW;
+	info->var.height = -1;
+	info->var.width = -1;
+
+	info->var.xres = fb_width;
+	info->var.yres = fb_height;
+
+	/* FIXME: we really shouldn't expose mmio space at all */
+	info->fix.mmio_start = pci_resource_start(dev->pdev, mmio_bar);
+	info->fix.mmio_len = pci_resource_len(dev->pdev, mmio_bar);
+
+	info->pixmap.size = 64*1024;
+	info->pixmap.buf_align = 8;
+	info->pixmap.access_align = 32;
+	info->pixmap.flags = FB_PIXMAP_SYSTEM;
+	info->pixmap.scan_align = 1;
+
+	switch(fb->depth) {
+	case 8:
+		info->var.red.offset = 0;
+		info->var.green.offset = 0;
+		info->var.blue.offset = 0;
+		info->var.red.length = 8; /* 8bit DAC */
+		info->var.green.length = 8;
+		info->var.blue.length = 8;
+		info->var.transp.offset = 0;
+		info->var.transp.length = 0;
+		break;
+	case 15:
+		info->var.red.offset = 10;
+		info->var.green.offset = 5;
+		info->var.blue.offset = 0;
+		info->var.red.length = 5;
+		info->var.green.length = 5;
+		info->var.blue.length = 5;
+		info->var.transp.offset = 15;
+		info->var.transp.length = 1;
+		break;
+	case 16:
+		info->var.red.offset = 11;
+		info->var.green.offset = 5;
+		info->var.blue.offset = 0;
+		info->var.red.length = 5;
+		info->var.green.length = 6;
+		info->var.blue.length = 5;
+		info->var.transp.offset = 0;
+		break;
+	case 24:
+		info->var.red.offset = 16;
+		info->var.green.offset = 8;
+		info->var.blue.offset = 0;
+		info->var.red.length = 8;
+		info->var.green.length = 8;
+		info->var.blue.length = 8;
+		info->var.transp.offset = 0;
+		info->var.transp.length = 0;
+		break;
+	case 32:
+		info->var.red.offset = 16;
+		info->var.green.offset = 8;
+		info->var.blue.offset = 0;
+		info->var.red.length = 8;
+		info->var.green.length = 8;
+		info->var.blue.length = 8;
+		info->var.transp.offset = 24;
+		info->var.transp.length = 8;
+		break;
+	default:
+		break;
+	}
+
+	fb->fbdev = info;
+
+	par->intel_fb = intel_fb;
+	par->dev = dev;
+
+	/* To allow resizeing without swapping buffers */
+	printk("allocated %dx%d fb: 0x%08x, bo %p\n", intel_fb->base.width,
+	       intel_fb->base.height, obj_priv->gtt_offset, fbo);
+
+	mutex_unlock(&dev->struct_mutex);
+	return 0;
+
+out_unref:
+	drm_gem_object_unreference(fbo);
+	mutex_unlock(&dev->struct_mutex);
+out:
+	return ret;
+}
+
+static int intelfb_multi_fb_probe_crtc(struct drm_device *dev, struct drm_crtc *crtc)
+{
+	struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
+	struct intel_framebuffer *intel_fb;
+	struct drm_framebuffer *fb;
+	struct drm_connector *connector;
+	struct fb_info *info;
+	struct intelfb_par *par;
+	struct drm_mode_set *modeset;
+	unsigned int width, height;
+	int new_fb = 0;
+	int ret, i, conn_count;
+
+	if (!drm_helper_crtc_in_use(crtc))
+		return 0;
+
+	if (!crtc->desired_mode)
+		return 0;
+
+	width = crtc->desired_mode->hdisplay;
+	height = crtc->desired_mode->vdisplay;
+
+	/* is there an fb bound to this crtc already */
+	if (!intel_crtc->mode_set.fb) {
+		ret = intelfb_create(dev, width, height, width, height, &intel_fb);
+		if (ret)
+			return -EINVAL;
+		new_fb = 1;
+	} else {
+		fb = intel_crtc->mode_set.fb;
+		intel_fb = to_intel_framebuffer(fb);
+		if ((intel_fb->base.width < width) || (intel_fb->base.height < height))
+			return -EINVAL;
+	}
+
+	info = intel_fb->base.fbdev;
+	par = info->par;
+
+	modeset = &intel_crtc->mode_set;
+	modeset->fb = &intel_fb->base;
+	conn_count = 0;
+	list_for_each_entry(connector, &dev->mode_config.connector_list, head) {
+		if (connector->encoder)
+			if (connector->encoder->crtc == modeset->crtc) {
+				modeset->connectors[conn_count] = connector;
+				conn_count++;
+				if (conn_count > INTELFB_CONN_LIMIT)
+					BUG();
+			}
+	}
+
+	for (i = conn_count; i < INTELFB_CONN_LIMIT; i++)
+		modeset->connectors[i] = NULL;
+
+	par->crtc_ids[0] = crtc->base.id;
+
+	modeset->num_connectors = conn_count;
+	if (modeset->mode != modeset->crtc->desired_mode)
+		modeset->mode = modeset->crtc->desired_mode;
+
+	par->crtc_count = 1;
+
+	if (new_fb) {
+		info->var.pixclock = -1;
+		if (register_framebuffer(info) < 0)
+			return -EINVAL;
+	} else
+		intelfb_set_par(info);
+
+	printk(KERN_INFO "fb%d: %s frame buffer device\n", info->node,
+	       info->fix.id);
+
+	/* Switch back to kernel console on panic */
+	kernelfb_mode = *modeset;
+	atomic_notifier_chain_register(&panic_notifier_list, &paniced);
+	printk(KERN_INFO "registered panic notifier\n");
+
+	return 0;
+}
+
+static int intelfb_multi_fb_probe(struct drm_device *dev)
+{
+
+	struct drm_crtc *crtc;
+	int ret = 0;
+
+	list_for_each_entry(crtc, &dev->mode_config.crtc_list, head) {
+		ret = intelfb_multi_fb_probe_crtc(dev, crtc);
+		if (ret)
+			return ret;
+	}
+	return ret;
+}
+
+static int intelfb_single_fb_probe(struct drm_device *dev)
+{
+	struct drm_crtc *crtc;
+	struct drm_connector *connector;
+	unsigned int fb_width = (unsigned)-1, fb_height = (unsigned)-1;
+	unsigned int surface_width = 0, surface_height = 0;
+	int new_fb = 0;
+	int crtc_count = 0;
+	int ret, i, conn_count = 0;
+	struct intel_framebuffer *intel_fb;
+	struct fb_info *info;
+	struct intelfb_par *par;
+	struct drm_mode_set *modeset = NULL;
+
+	DRM_DEBUG("\n");
+
+	/* Get a count of crtcs now in use and new min/maxes width/heights */
+	list_for_each_entry(crtc, &dev->mode_config.crtc_list, head) {
+		if (!drm_helper_crtc_in_use(crtc))
+			continue;
+
+		crtc_count++;
+		if (!crtc->desired_mode)
+			continue;
+
+		/* Smallest mode determines console size... */
+		if (crtc->desired_mode->hdisplay < fb_width)
+			fb_width = crtc->desired_mode->hdisplay;
+
+		if (crtc->desired_mode->vdisplay < fb_height)
+			fb_height = crtc->desired_mode->vdisplay;
+
+		/* ... but largest for memory allocation dimensions */
+		if (crtc->desired_mode->hdisplay > surface_width)
+			surface_width = crtc->desired_mode->hdisplay;
+
+		if (crtc->desired_mode->vdisplay > surface_height)
+			surface_height = crtc->desired_mode->vdisplay;
+	}
+
+	if (crtc_count == 0 || fb_width == -1 || fb_height == -1) {
+		/* hmm everyone went away - assume VGA cable just fell out
+		   and will come back later. */
+		DRM_DEBUG("no CRTCs available?\n");
+		return 0;
+	}
+
+//fail
+	/* Find the fb for our new config */
+	if (list_empty(&dev->mode_config.fb_kernel_list)) {
+		DRM_DEBUG("creating new fb (console size %dx%d, "
+			  "buffer size %dx%d)\n", fb_width, fb_height,
+			  surface_width, surface_height);
+		ret = intelfb_create(dev, fb_width, fb_height, surface_width,
+				     surface_height, &intel_fb);
+		if (ret)
+			return -EINVAL;
+		new_fb = 1;
+	} else {
+		struct drm_framebuffer *fb;
+
+		fb = list_first_entry(&dev->mode_config.fb_kernel_list,
+				      struct drm_framebuffer, filp_head);
+		intel_fb = to_intel_framebuffer(fb);
+
+		/* if someone hotplugs something bigger than we have already
+		 * allocated, we are pwned.  As really we can't resize an
+		 * fbdev that is in the wild currently due to fbdev not really
+		 * being designed for the lower layers moving stuff around
+		 * under it.
+		 * - so in the grand style of things - punt.
+		 */
+		if ((fb->width < surface_width) ||
+		    (fb->height < surface_height)) {
+			DRM_ERROR("fb not large enough for console\n");
+			return -EINVAL;
+		}
+	}
+// fail
+
+	info = intel_fb->base.fbdev;
+	par = info->par;
+
+	crtc_count = 0;
+	/*
+	 * For each CRTC, set up the connector list for the CRTC's mode
+	 * set configuration.
+	 */
+	list_for_each_entry(crtc, &dev->mode_config.crtc_list, head) {
+		struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
+
+		modeset = &intel_crtc->mode_set;
+		modeset->fb = &intel_fb->base;
+		conn_count = 0;
+		list_for_each_entry(connector, &dev->mode_config.connector_list,
+				    head) {
+			if (!connector->encoder)
+				continue;
+
+			if(connector->encoder->crtc == modeset->crtc) {
+				modeset->connectors[conn_count++] = connector;
+				if (conn_count > INTELFB_CONN_LIMIT)
+					BUG();
+			}
+		}
+
+		/* Zero out remaining connector pointers */
+		for (i = conn_count; i < INTELFB_CONN_LIMIT; i++)
+			modeset->connectors[i] = NULL;
+
+		par->crtc_ids[crtc_count++] = crtc->base.id;
+
+		modeset->num_connectors = conn_count;
+		if (modeset->mode != modeset->crtc->desired_mode)
+			modeset->mode = modeset->crtc->desired_mode;
+	}
+	par->crtc_count = crtc_count;
+
+	if (new_fb) {
+		info->var.pixclock = -1;
+		if (register_framebuffer(info) < 0)
+			return -EINVAL;
+	} else
+		intelfb_set_par(info);
+
+	printk(KERN_INFO "fb%d: %s frame buffer device\n", info->node,
+	       info->fix.id);
+
+	/* Switch back to kernel console on panic */
+	kernelfb_mode = *modeset;
+	atomic_notifier_chain_register(&panic_notifier_list, &paniced);
+	printk(KERN_INFO "registered panic notifier\n");
+
+	return 0;
+}
+
+/**
+ * intelfb_restore - restore the framebuffer console (kernel) config
+ *
+ * Restore's the kernel's fbcon mode, used for lastclose & panic paths.
+ */
+void intelfb_restore(void)
+{
+	drm_crtc_helper_set_config(&kernelfb_mode);
+}
+
+static void intelfb_sysrq(int dummy1, struct tty_struct *dummy3)
+{
+        intelfb_restore();
+}
+
+static struct sysrq_key_op sysrq_intelfb_restore_op = {
+        .handler = intelfb_sysrq,
+        .help_msg = "force fb",
+        .action_msg = "force restore of fb console",
+};
+
+int intelfb_probe(struct drm_device *dev)
+{
+	int ret;
+
+	DRM_DEBUG("\n");
+
+	/* something has changed in the lower levels of hell - deal with it
+	   here */
+
+	/* two modes : a) 1 fb to rule all crtcs.
+	               b) one fb per crtc.
+	   two actions 1) new connected device
+	               2) device removed.
+	   case a/1 : if the fb surface isn't big enough - resize the surface fb.
+	              if the fb size isn't big enough - resize fb into surface.
+		      if everything big enough configure the new crtc/etc.
+	   case a/2 : undo the configuration
+	              possibly resize down the fb to fit the new configuration.
+           case b/1 : see if it is on a new crtc - setup a new fb and add it.
+	   case b/2 : teardown the new fb.
+	*/
+
+	/* mode a first */
+	/* search for an fb */
+	if (i915_fbpercrtc == 1) {
+		ret = intelfb_multi_fb_probe(dev);
+	} else {
+		ret = intelfb_single_fb_probe(dev);
+	}
+
+	register_sysrq_key('g', &sysrq_intelfb_restore_op);
+
+	return ret;
+}
+EXPORT_SYMBOL(intelfb_probe);
+
+int intelfb_remove(struct drm_device *dev, struct drm_framebuffer *fb)
+{
+	struct fb_info *info;
+
+	if (!fb)
+		return -EINVAL;
+
+	info = fb->fbdev;
+
+	if (info) {
+		unregister_framebuffer(info);
+		iounmap(info->screen_base);
+		framebuffer_release(info);
+	}
+
+	atomic_notifier_chain_unregister(&panic_notifier_list, &paniced);
+	memset(&kernelfb_mode, 0, sizeof(struct drm_mode_set));
+	return 0;
+}
+EXPORT_SYMBOL(intelfb_remove);
+MODULE_LICENSE("GPL and additional rights");
diff --git a/drivers/gpu/drm/i915/intel_i2c.c b/drivers/gpu/drm/i915/intel_i2c.c
new file mode 100644
index 000000000000..a5a2f5339e9e
--- /dev/null
+++ b/drivers/gpu/drm/i915/intel_i2c.c
@@ -0,0 +1,184 @@
+/*
+ * Copyright (c) 2006 Dave Airlie <airlied@linux.ie>
+ * Copyright © 2006-2008 Intel Corporation
+ *   Jesse Barnes <jesse.barnes@intel.com>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ *
+ * Authors:
+ *	Eric Anholt <eric@anholt.net>
+ */
+#include <linux/i2c.h>
+#include <linux/i2c-id.h>
+#include <linux/i2c-algo-bit.h>
+#include "drmP.h"
+#include "drm.h"
+#include "intel_drv.h"
+#include "i915_drm.h"
+#include "i915_drv.h"
+
+/*
+ * Intel GPIO access functions
+ */
+
+#define I2C_RISEFALL_TIME 20
+
+static int get_clock(void *data)
+{
+	struct intel_i2c_chan *chan = data;
+	struct drm_i915_private *dev_priv = chan->drm_dev->dev_private;
+	u32 val;
+
+	val = I915_READ(chan->reg);
+	return ((val & GPIO_CLOCK_VAL_IN) != 0);
+}
+
+static int get_data(void *data)
+{
+	struct intel_i2c_chan *chan = data;
+	struct drm_i915_private *dev_priv = chan->drm_dev->dev_private;
+	u32 val;
+
+	val = I915_READ(chan->reg);
+	return ((val & GPIO_DATA_VAL_IN) != 0);
+}
+
+static void set_clock(void *data, int state_high)
+{
+	struct intel_i2c_chan *chan = data;
+	struct drm_device *dev = chan->drm_dev;
+	struct drm_i915_private *dev_priv = chan->drm_dev->dev_private;
+	u32 reserved = 0, clock_bits;
+
+	/* On most chips, these bits must be preserved in software. */
+	if (!IS_I830(dev) && !IS_845G(dev))
+		reserved = I915_READ(chan->reg) & (GPIO_DATA_PULLUP_DISABLE |
+						   GPIO_CLOCK_PULLUP_DISABLE);
+
+	if (state_high)
+		clock_bits = GPIO_CLOCK_DIR_IN | GPIO_CLOCK_DIR_MASK;
+	else
+		clock_bits = GPIO_CLOCK_DIR_OUT | GPIO_CLOCK_DIR_MASK |
+			GPIO_CLOCK_VAL_MASK;
+	I915_WRITE(chan->reg, reserved | clock_bits);
+	udelay(I2C_RISEFALL_TIME); /* wait for the line to change state */
+}
+
+static void set_data(void *data, int state_high)
+{
+	struct intel_i2c_chan *chan = data;
+	struct drm_device *dev = chan->drm_dev;
+	struct drm_i915_private *dev_priv = chan->drm_dev->dev_private;
+	u32 reserved = 0, data_bits;
+
+	/* On most chips, these bits must be preserved in software. */
+	if (!IS_I830(dev) && !IS_845G(dev))
+		reserved = I915_READ(chan->reg) & (GPIO_DATA_PULLUP_DISABLE |
+						   GPIO_CLOCK_PULLUP_DISABLE);
+
+	if (state_high)
+		data_bits = GPIO_DATA_DIR_IN | GPIO_DATA_DIR_MASK;
+	else
+		data_bits = GPIO_DATA_DIR_OUT | GPIO_DATA_DIR_MASK |
+			GPIO_DATA_VAL_MASK;
+
+	I915_WRITE(chan->reg, reserved | data_bits);
+	udelay(I2C_RISEFALL_TIME); /* wait for the line to change state */
+}
+
+/**
+ * intel_i2c_create - instantiate an Intel i2c bus using the specified GPIO reg
+ * @dev: DRM device
+ * @output: driver specific output device
+ * @reg: GPIO reg to use
+ * @name: name for this bus
+ *
+ * Creates and registers a new i2c bus with the Linux i2c layer, for use
+ * in output probing and control (e.g. DDC or SDVO control functions).
+ *
+ * Possible values for @reg include:
+ *   %GPIOA
+ *   %GPIOB
+ *   %GPIOC
+ *   %GPIOD
+ *   %GPIOE
+ *   %GPIOF
+ *   %GPIOG
+ *   %GPIOH
+ * see PRM for details on how these different busses are used.
+ */
+struct intel_i2c_chan *intel_i2c_create(struct drm_device *dev, const u32 reg,
+					const char *name)
+{
+	struct intel_i2c_chan *chan;
+
+	chan = kzalloc(sizeof(struct intel_i2c_chan), GFP_KERNEL);
+	if (!chan)
+		goto out_free;
+
+	chan->drm_dev = dev;
+	chan->reg = reg;
+	snprintf(chan->adapter.name, I2C_NAME_SIZE, "intel drm %s", name);
+	chan->adapter.owner = THIS_MODULE;
+#ifndef I2C_HW_B_INTELFB
+#define I2C_HW_B_INTELFB I2C_HW_B_I810
+#endif
+	chan->adapter.id = I2C_HW_B_INTELFB;
+	chan->adapter.algo_data	= &chan->algo;
+	chan->adapter.dev.parent = &dev->pdev->dev;
+	chan->algo.setsda = set_data;
+	chan->algo.setscl = set_clock;
+	chan->algo.getsda = get_data;
+	chan->algo.getscl = get_clock;
+	chan->algo.udelay = 20;
+	chan->algo.timeout = usecs_to_jiffies(2200);
+	chan->algo.data = chan;
+
+	i2c_set_adapdata(&chan->adapter, chan);
+
+	if(i2c_bit_add_bus(&chan->adapter))
+		goto out_free;
+
+	/* JJJ:  raise SCL and SDA? */
+	set_data(chan, 1);
+	set_clock(chan, 1);
+	udelay(20);
+
+	return chan;
+
+out_free:
+	kfree(chan);
+	return NULL;
+}
+
+/**
+ * intel_i2c_destroy - unregister and free i2c bus resources
+ * @output: channel to free
+ *
+ * Unregister the adapter from the i2c layer, then free the structure.
+ */
+void intel_i2c_destroy(struct intel_i2c_chan *chan)
+{
+	if (!chan)
+		return;
+
+	i2c_del_adapter(&chan->adapter);
+	kfree(chan);
+}
diff --git a/drivers/gpu/drm/i915/intel_lvds.c b/drivers/gpu/drm/i915/intel_lvds.c
new file mode 100644
index 000000000000..ccecfaf6307b
--- /dev/null
+++ b/drivers/gpu/drm/i915/intel_lvds.c
@@ -0,0 +1,525 @@
+/*
+ * Copyright © 2006-2007 Intel Corporation
+ * Copyright (c) 2006 Dave Airlie <airlied@linux.ie>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ *
+ * Authors:
+ *	Eric Anholt <eric@anholt.net>
+ *      Dave Airlie <airlied@linux.ie>
+ *      Jesse Barnes <jesse.barnes@intel.com>
+ */
+
+#include <linux/i2c.h>
+#include "drmP.h"
+#include "drm.h"
+#include "drm_crtc.h"
+#include "drm_edid.h"
+#include "intel_drv.h"
+#include "i915_drm.h"
+#include "i915_drv.h"
+
+/**
+ * Sets the backlight level.
+ *
+ * \param level backlight level, from 0 to intel_lvds_get_max_backlight().
+ */
+static void intel_lvds_set_backlight(struct drm_device *dev, int level)
+{
+	struct drm_i915_private *dev_priv = dev->dev_private;
+	u32 blc_pwm_ctl;
+
+	blc_pwm_ctl = I915_READ(BLC_PWM_CTL) & ~BACKLIGHT_DUTY_CYCLE_MASK;
+	I915_WRITE(BLC_PWM_CTL, (blc_pwm_ctl |
+				 (level << BACKLIGHT_DUTY_CYCLE_SHIFT)));
+}
+
+/**
+ * Returns the maximum level of the backlight duty cycle field.
+ */
+static u32 intel_lvds_get_max_backlight(struct drm_device *dev)
+{
+	struct drm_i915_private *dev_priv = dev->dev_private;
+
+	return ((I915_READ(BLC_PWM_CTL) & BACKLIGHT_MODULATION_FREQ_MASK) >>
+		BACKLIGHT_MODULATION_FREQ_SHIFT) * 2;
+}
+
+/**
+ * Sets the power state for the panel.
+ */
+static void intel_lvds_set_power(struct drm_device *dev, bool on)
+{
+	struct drm_i915_private *dev_priv = dev->dev_private;
+	u32 pp_status;
+
+	if (on) {
+		I915_WRITE(PP_CONTROL, I915_READ(PP_CONTROL) |
+			   POWER_TARGET_ON);
+		do {
+			pp_status = I915_READ(PP_STATUS);
+		} while ((pp_status & PP_ON) == 0);
+
+		intel_lvds_set_backlight(dev, dev_priv->backlight_duty_cycle);
+	} else {
+		intel_lvds_set_backlight(dev, 0);
+
+		I915_WRITE(PP_CONTROL, I915_READ(PP_CONTROL) &
+			   ~POWER_TARGET_ON);
+		do {
+			pp_status = I915_READ(PP_STATUS);
+		} while (pp_status & PP_ON);
+	}
+}
+
+static void intel_lvds_dpms(struct drm_encoder *encoder, int mode)
+{
+	struct drm_device *dev = encoder->dev;
+
+	if (mode == DRM_MODE_DPMS_ON)
+		intel_lvds_set_power(dev, true);
+	else
+		intel_lvds_set_power(dev, false);
+
+	/* XXX: We never power down the LVDS pairs. */
+}
+
+static void intel_lvds_save(struct drm_connector *connector)
+{
+	struct drm_device *dev = connector->dev;
+	struct drm_i915_private *dev_priv = dev->dev_private;
+
+	dev_priv->savePP_ON = I915_READ(PP_ON_DELAYS);
+	dev_priv->savePP_OFF = I915_READ(PP_OFF_DELAYS);
+	dev_priv->savePP_CONTROL = I915_READ(PP_CONTROL);
+	dev_priv->savePP_DIVISOR = I915_READ(PP_DIVISOR);
+	dev_priv->saveBLC_PWM_CTL = I915_READ(BLC_PWM_CTL);
+	dev_priv->backlight_duty_cycle = (dev_priv->saveBLC_PWM_CTL &
+				       BACKLIGHT_DUTY_CYCLE_MASK);
+
+	/*
+	 * If the light is off at server startup, just make it full brightness
+	 */
+	if (dev_priv->backlight_duty_cycle == 0)
+		dev_priv->backlight_duty_cycle =
+			intel_lvds_get_max_backlight(dev);
+}
+
+static void intel_lvds_restore(struct drm_connector *connector)
+{
+	struct drm_device *dev = connector->dev;
+	struct drm_i915_private *dev_priv = dev->dev_private;
+
+	I915_WRITE(BLC_PWM_CTL, dev_priv->saveBLC_PWM_CTL);
+	I915_WRITE(PP_ON_DELAYS, dev_priv->savePP_ON);
+	I915_WRITE(PP_OFF_DELAYS, dev_priv->savePP_OFF);
+	I915_WRITE(PP_DIVISOR, dev_priv->savePP_DIVISOR);
+	I915_WRITE(PP_CONTROL, dev_priv->savePP_CONTROL);
+	if (dev_priv->savePP_CONTROL & POWER_TARGET_ON)
+		intel_lvds_set_power(dev, true);
+	else
+		intel_lvds_set_power(dev, false);
+}
+
+static int intel_lvds_mode_valid(struct drm_connector *connector,
+				 struct drm_display_mode *mode)
+{
+	struct drm_device *dev = connector->dev;
+	struct drm_i915_private *dev_priv = dev->dev_private;
+	struct drm_display_mode *fixed_mode = dev_priv->panel_fixed_mode;
+
+	if (fixed_mode)	{
+		if (mode->hdisplay > fixed_mode->hdisplay)
+			return MODE_PANEL;
+		if (mode->vdisplay > fixed_mode->vdisplay)
+			return MODE_PANEL;
+	}
+
+	return MODE_OK;
+}
+
+static bool intel_lvds_mode_fixup(struct drm_encoder *encoder,
+				  struct drm_display_mode *mode,
+				  struct drm_display_mode *adjusted_mode)
+{
+	struct drm_device *dev = encoder->dev;
+	struct drm_i915_private *dev_priv = dev->dev_private;
+	struct intel_crtc *intel_crtc = to_intel_crtc(encoder->crtc);
+	struct drm_encoder *tmp_encoder;
+
+	/* Should never happen!! */
+	if (!IS_I965G(dev) && intel_crtc->pipe == 0) {
+		printk(KERN_ERR "Can't support LVDS on pipe A\n");
+		return false;
+	}
+
+	/* Should never happen!! */
+	list_for_each_entry(tmp_encoder, &dev->mode_config.encoder_list, head) {
+		if (tmp_encoder != encoder && tmp_encoder->crtc == encoder->crtc) {
+			printk(KERN_ERR "Can't enable LVDS and another "
+			       "encoder on the same pipe\n");
+			return false;
+		}
+	}
+
+	/*
+	 * If we have timings from the BIOS for the panel, put them in
+	 * to the adjusted mode.  The CRTC will be set up for this mode,
+	 * with the panel scaling set up to source from the H/VDisplay
+	 * of the original mode.
+	 */
+	if (dev_priv->panel_fixed_mode != NULL) {
+		adjusted_mode->hdisplay = dev_priv->panel_fixed_mode->hdisplay;
+		adjusted_mode->hsync_start =
+			dev_priv->panel_fixed_mode->hsync_start;
+		adjusted_mode->hsync_end =
+			dev_priv->panel_fixed_mode->hsync_end;
+		adjusted_mode->htotal = dev_priv->panel_fixed_mode->htotal;
+		adjusted_mode->vdisplay = dev_priv->panel_fixed_mode->vdisplay;
+		adjusted_mode->vsync_start =
+			dev_priv->panel_fixed_mode->vsync_start;
+		adjusted_mode->vsync_end =
+			dev_priv->panel_fixed_mode->vsync_end;
+		adjusted_mode->vtotal = dev_priv->panel_fixed_mode->vtotal;
+		adjusted_mode->clock = dev_priv->panel_fixed_mode->clock;
+		drm_mode_set_crtcinfo(adjusted_mode, CRTC_INTERLACE_HALVE_V);
+	}
+
+	/*
+	 * XXX: It would be nice to support lower refresh rates on the
+	 * panels to reduce power consumption, and perhaps match the
+	 * user's requested refresh rate.
+	 */
+
+	return true;
+}
+
+static void intel_lvds_prepare(struct drm_encoder *encoder)
+{
+	struct drm_device *dev = encoder->dev;
+	struct drm_i915_private *dev_priv = dev->dev_private;
+
+	dev_priv->saveBLC_PWM_CTL = I915_READ(BLC_PWM_CTL);
+	dev_priv->backlight_duty_cycle = (dev_priv->saveBLC_PWM_CTL &
+				       BACKLIGHT_DUTY_CYCLE_MASK);
+
+	intel_lvds_set_power(dev, false);
+}
+
+static void intel_lvds_commit( struct drm_encoder *encoder)
+{
+	struct drm_device *dev = encoder->dev;
+	struct drm_i915_private *dev_priv = dev->dev_private;
+
+	if (dev_priv->backlight_duty_cycle == 0)
+		dev_priv->backlight_duty_cycle =
+			intel_lvds_get_max_backlight(dev);
+
+	intel_lvds_set_power(dev, true);
+}
+
+static void intel_lvds_mode_set(struct drm_encoder *encoder,
+				struct drm_display_mode *mode,
+				struct drm_display_mode *adjusted_mode)
+{
+	struct drm_device *dev = encoder->dev;
+	struct drm_i915_private *dev_priv = dev->dev_private;
+	struct intel_crtc *intel_crtc = to_intel_crtc(encoder->crtc);
+	u32 pfit_control;
+
+	/*
+	 * The LVDS pin pair will already have been turned on in the
+	 * intel_crtc_mode_set since it has a large impact on the DPLL
+	 * settings.
+	 */
+
+	/*
+	 * Enable automatic panel scaling so that non-native modes fill the
+	 * screen.  Should be enabled before the pipe is enabled, according to
+	 * register description and PRM.
+	 */
+	if (mode->hdisplay != adjusted_mode->hdisplay ||
+	    mode->vdisplay != adjusted_mode->vdisplay)
+		pfit_control = (PFIT_ENABLE | VERT_AUTO_SCALE |
+				HORIZ_AUTO_SCALE | VERT_INTERP_BILINEAR |
+				HORIZ_INTERP_BILINEAR);
+	else
+		pfit_control = 0;
+
+	if (!IS_I965G(dev)) {
+		if (dev_priv->panel_wants_dither)
+			pfit_control |= PANEL_8TO6_DITHER_ENABLE;
+	}
+	else
+		pfit_control |= intel_crtc->pipe << PFIT_PIPE_SHIFT;
+
+	I915_WRITE(PFIT_CONTROL, pfit_control);
+}
+
+/**
+ * Detect the LVDS connection.
+ *
+ * This always returns CONNECTOR_STATUS_CONNECTED.  This connector should only have
+ * been set up if the LVDS was actually connected anyway.
+ */
+static enum drm_connector_status intel_lvds_detect(struct drm_connector *connector)
+{
+	return connector_status_connected;
+}
+
+/**
+ * Return the list of DDC modes if available, or the BIOS fixed mode otherwise.
+ */
+static int intel_lvds_get_modes(struct drm_connector *connector)
+{
+	struct drm_device *dev = connector->dev;
+	struct intel_output *intel_output = to_intel_output(connector);
+	struct drm_i915_private *dev_priv = dev->dev_private;
+	int ret = 0;
+
+	ret = intel_ddc_get_modes(intel_output);
+
+	if (ret)
+		return ret;
+
+	/* Didn't get an EDID, so
+	 * Set wide sync ranges so we get all modes
+	 * handed to valid_mode for checking
+	 */
+	connector->display_info.min_vfreq = 0;
+	connector->display_info.max_vfreq = 200;
+	connector->display_info.min_hfreq = 0;
+	connector->display_info.max_hfreq = 200;
+
+	if (dev_priv->panel_fixed_mode != NULL) {
+		struct drm_display_mode *mode;
+
+		mutex_unlock(&dev->mode_config.mutex);
+		mode = drm_mode_duplicate(dev, dev_priv->panel_fixed_mode);
+		drm_mode_probed_add(connector, mode);
+		mutex_unlock(&dev->mode_config.mutex);
+
+		return 1;
+	}
+
+	return 0;
+}
+
+/**
+ * intel_lvds_destroy - unregister and free LVDS structures
+ * @connector: connector to free
+ *
+ * Unregister the DDC bus for this connector then free the driver private
+ * structure.
+ */
+static void intel_lvds_destroy(struct drm_connector *connector)
+{
+	struct intel_output *intel_output = to_intel_output(connector);
+
+	if (intel_output->ddc_bus)
+		intel_i2c_destroy(intel_output->ddc_bus);
+	drm_sysfs_connector_remove(connector);
+	drm_connector_cleanup(connector);
+	kfree(connector);
+}
+
+static const struct drm_encoder_helper_funcs intel_lvds_helper_funcs = {
+	.dpms = intel_lvds_dpms,
+	.mode_fixup = intel_lvds_mode_fixup,
+	.prepare = intel_lvds_prepare,
+	.mode_set = intel_lvds_mode_set,
+	.commit = intel_lvds_commit,
+};
+
+static const struct drm_connector_helper_funcs intel_lvds_connector_helper_funcs = {
+	.get_modes = intel_lvds_get_modes,
+	.mode_valid = intel_lvds_mode_valid,
+	.best_encoder = intel_best_encoder,
+};
+
+static const struct drm_connector_funcs intel_lvds_connector_funcs = {
+	.save = intel_lvds_save,
+	.restore = intel_lvds_restore,
+	.detect = intel_lvds_detect,
+	.fill_modes = drm_helper_probe_single_connector_modes,
+	.destroy = intel_lvds_destroy,
+};
+
+
+static void intel_lvds_enc_destroy(struct drm_encoder *encoder)
+{
+	drm_encoder_cleanup(encoder);
+}
+
+static const struct drm_encoder_funcs intel_lvds_enc_funcs = {
+	.destroy = intel_lvds_enc_destroy,
+};
+
+
+
+/**
+ * intel_lvds_init - setup LVDS connectors on this device
+ * @dev: drm device
+ *
+ * Create the connector, register the LVDS DDC bus, and try to figure out what
+ * modes we can display on the LVDS panel (if present).
+ */
+void intel_lvds_init(struct drm_device *dev)
+{
+	struct drm_i915_private *dev_priv = dev->dev_private;
+	struct intel_output *intel_output;
+	struct drm_connector *connector;
+	struct drm_encoder *encoder;
+	struct drm_display_mode *scan; /* *modes, *bios_mode; */
+	struct drm_crtc *crtc;
+	u32 lvds;
+	int pipe;
+
+	intel_output = kzalloc(sizeof(struct intel_output), GFP_KERNEL);
+	if (!intel_output) {
+		return;
+	}
+
+	connector = &intel_output->base;
+	encoder = &intel_output->enc;
+	drm_connector_init(dev, &intel_output->base, &intel_lvds_connector_funcs,
+			   DRM_MODE_CONNECTOR_LVDS);
+
+	drm_encoder_init(dev, &intel_output->enc, &intel_lvds_enc_funcs,
+			 DRM_MODE_ENCODER_LVDS);
+
+	drm_mode_connector_attach_encoder(&intel_output->base, &intel_output->enc);
+	intel_output->type = INTEL_OUTPUT_LVDS;
+
+	drm_encoder_helper_add(encoder, &intel_lvds_helper_funcs);
+	drm_connector_helper_add(connector, &intel_lvds_connector_helper_funcs);
+	connector->display_info.subpixel_order = SubPixelHorizontalRGB;
+	connector->interlace_allowed = false;
+	connector->doublescan_allowed = false;
+
+
+	/*
+	 * LVDS discovery:
+	 * 1) check for EDID on DDC
+	 * 2) check for VBT data
+	 * 3) check to see if LVDS is already on
+	 *    if none of the above, no panel
+	 * 4) make sure lid is open
+	 *    if closed, act like it's not there for now
+	 */
+
+	/* Set up the DDC bus. */
+	intel_output->ddc_bus = intel_i2c_create(dev, GPIOC, "LVDSDDC_C");
+	if (!intel_output->ddc_bus) {
+		dev_printk(KERN_ERR, &dev->pdev->dev, "DDC bus registration "
+			   "failed.\n");
+		goto failed;
+	}
+
+	/*
+	 * Attempt to get the fixed panel mode from DDC.  Assume that the
+	 * preferred mode is the right one.
+	 */
+	intel_ddc_get_modes(intel_output);
+
+	list_for_each_entry(scan, &connector->probed_modes, head) {
+		mutex_lock(&dev->mode_config.mutex);
+		if (scan->type & DRM_MODE_TYPE_PREFERRED) {
+			dev_priv->panel_fixed_mode =
+				drm_mode_duplicate(dev, scan);
+			mutex_unlock(&dev->mode_config.mutex);
+			goto out; /* FIXME: check for quirks */
+		}
+		mutex_unlock(&dev->mode_config.mutex);
+	}
+
+	/* Failed to get EDID, what about VBT? */
+	if (dev_priv->vbt_mode) {
+		mutex_lock(&dev->mode_config.mutex);
+		dev_priv->panel_fixed_mode =
+			drm_mode_duplicate(dev, dev_priv->vbt_mode);
+		mutex_unlock(&dev->mode_config.mutex);
+	}
+
+	/*
+	 * If we didn't get EDID, try checking if the panel is already turned
+	 * on.  If so, assume that whatever is currently programmed is the
+	 * correct mode.
+	 */
+	lvds = I915_READ(LVDS);
+	pipe = (lvds & LVDS_PIPEB_SELECT) ? 1 : 0;
+	crtc = intel_get_crtc_from_pipe(dev, pipe);
+
+	if (crtc && (lvds & LVDS_PORT_EN)) {
+		dev_priv->panel_fixed_mode = intel_crtc_mode_get(dev, crtc);
+		if (dev_priv->panel_fixed_mode) {
+			dev_priv->panel_fixed_mode->type |=
+				DRM_MODE_TYPE_PREFERRED;
+			goto out; /* FIXME: check for quirks */
+		}
+	}
+
+	/* If we still don't have a mode after all that, give up. */
+	if (!dev_priv->panel_fixed_mode)
+		goto failed;
+
+	/* FIXME: detect aopen & mac mini type stuff automatically? */
+	/*
+	 * Blacklist machines with BIOSes that list an LVDS panel without
+	 * actually having one.
+	 */
+	if (IS_I945GM(dev)) {
+		/* aopen mini pc */
+		if (dev->pdev->subsystem_vendor == 0xa0a0)
+			goto failed;
+
+		if ((dev->pdev->subsystem_vendor == 0x8086) &&
+		    (dev->pdev->subsystem_device == 0x7270)) {
+			/* It's a Mac Mini or Macbook Pro.
+			 *
+			 * Apple hardware is out to get us.  The macbook pro
+			 * has a real LVDS panel, but the mac mini does not,
+			 * and they have the same device IDs.  We'll
+			 * distinguish by panel size, on the assumption
+			 * that Apple isn't about to make any machines with an
+			 * 800x600 display.
+			 */
+
+			if (dev_priv->panel_fixed_mode != NULL &&
+			    dev_priv->panel_fixed_mode->hdisplay == 800 &&
+			    dev_priv->panel_fixed_mode->vdisplay == 600) {
+				DRM_DEBUG("Suspected Mac Mini, ignoring the LVDS\n");
+				goto failed;
+			}
+		}
+	}
+
+
+out:
+	drm_sysfs_connector_add(connector);
+	return;
+
+failed:
+	DRM_DEBUG("No LVDS modes found, disabling.\n");
+	if (intel_output->ddc_bus)
+		intel_i2c_destroy(intel_output->ddc_bus);
+	drm_connector_cleanup(connector);
+	kfree(connector);
+}
diff --git a/drivers/gpu/drm/i915/intel_modes.c b/drivers/gpu/drm/i915/intel_modes.c
new file mode 100644
index 000000000000..e42019e5d661
--- /dev/null
+++ b/drivers/gpu/drm/i915/intel_modes.c
@@ -0,0 +1,83 @@
+/*
+ * Copyright (c) 2007 Dave Airlie <airlied@linux.ie>
+ * Copyright (c) 2007 Intel Corporation
+ *   Jesse Barnes <jesse.barnes@intel.com>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+#include <linux/i2c.h>
+#include <linux/fb.h>
+#include "drmP.h"
+#include "intel_drv.h"
+
+/**
+ * intel_ddc_probe
+ *
+ */
+bool intel_ddc_probe(struct intel_output *intel_output)
+{
+	u8 out_buf[] = { 0x0, 0x0};
+	u8 buf[2];
+	int ret;
+	struct i2c_msg msgs[] = {
+		{
+			.addr = 0x50,
+			.flags = 0,
+			.len = 1,
+			.buf = out_buf,
+		},
+		{
+			.addr = 0x50,
+			.flags = I2C_M_RD,
+			.len = 1,
+			.buf = buf,
+		}
+	};
+
+	ret = i2c_transfer(&intel_output->ddc_bus->adapter, msgs, 2);
+	if (ret == 2)
+		return true;
+
+	return false;
+}
+
+/**
+ * intel_ddc_get_modes - get modelist from monitor
+ * @connector: DRM connector device to use
+ *
+ * Fetch the EDID information from @connector using the DDC bus.
+ */
+int intel_ddc_get_modes(struct intel_output *intel_output)
+{
+	struct edid *edid;
+	int ret = 0;
+
+	edid = drm_get_edid(&intel_output->base,
+			    &intel_output->ddc_bus->adapter);
+	if (edid) {
+		drm_mode_connector_update_edid_property(&intel_output->base,
+							edid);
+		ret = drm_add_edid_modes(&intel_output->base, edid);
+		kfree(edid);
+	}
+
+	return ret;
+}
diff --git a/drivers/gpu/drm/i915/intel_sdvo.c b/drivers/gpu/drm/i915/intel_sdvo.c
new file mode 100644
index 000000000000..626258d72c90
--- /dev/null
+++ b/drivers/gpu/drm/i915/intel_sdvo.c
@@ -0,0 +1,1127 @@
+/*
+ * Copyright 2006 Dave Airlie <airlied@linux.ie>
+ * Copyright © 2006-2007 Intel Corporation
+ *   Jesse Barnes <jesse.barnes@intel.com>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ *
+ * Authors:
+ *	Eric Anholt <eric@anholt.net>
+ */
+#include <linux/i2c.h>
+#include <linux/delay.h>
+#include "drmP.h"
+#include "drm.h"
+#include "drm_crtc.h"
+#include "intel_drv.h"
+#include "i915_drm.h"
+#include "i915_drv.h"
+#include "intel_sdvo_regs.h"
+
+#undef SDVO_DEBUG
+
+struct intel_sdvo_priv {
+	struct intel_i2c_chan *i2c_bus;
+	int slaveaddr;
+	int output_device;
+
+	u16 active_outputs;
+
+	struct intel_sdvo_caps caps;
+	int pixel_clock_min, pixel_clock_max;
+
+	int save_sdvo_mult;
+	u16 save_active_outputs;
+	struct intel_sdvo_dtd save_input_dtd_1, save_input_dtd_2;
+	struct intel_sdvo_dtd save_output_dtd[16];
+	u32 save_SDVOX;
+};
+
+/**
+ * Writes the SDVOB or SDVOC with the given value, but always writes both
+ * SDVOB and SDVOC to work around apparent hardware issues (according to
+ * comments in the BIOS).
+ */
+void intel_sdvo_write_sdvox(struct intel_output *intel_output, u32 val)
+{
+	struct drm_device *dev = intel_output->base.dev;
+	struct drm_i915_private *dev_priv = dev->dev_private;
+	struct intel_sdvo_priv   *sdvo_priv = intel_output->dev_priv;
+	u32 bval = val, cval = val;
+	int i;
+
+	if (sdvo_priv->output_device == SDVOB) {
+		cval = I915_READ(SDVOC);
+	} else {
+		bval = I915_READ(SDVOB);
+	}
+	/*
+	 * Write the registers twice for luck. Sometimes,
+	 * writing them only once doesn't appear to 'stick'.
+	 * The BIOS does this too. Yay, magic
+	 */
+	for (i = 0; i < 2; i++)
+	{
+		I915_WRITE(SDVOB, bval);
+		I915_READ(SDVOB);
+		I915_WRITE(SDVOC, cval);
+		I915_READ(SDVOC);
+	}
+}
+
+static bool intel_sdvo_read_byte(struct intel_output *intel_output, u8 addr,
+				 u8 *ch)
+{
+	struct intel_sdvo_priv *sdvo_priv = intel_output->dev_priv;
+	u8 out_buf[2];
+	u8 buf[2];
+	int ret;
+
+	struct i2c_msg msgs[] = {
+		{
+			.addr = sdvo_priv->i2c_bus->slave_addr,
+			.flags = 0,
+			.len = 1,
+			.buf = out_buf,
+		},
+		{
+			.addr = sdvo_priv->i2c_bus->slave_addr,
+			.flags = I2C_M_RD,
+			.len = 1,
+			.buf = buf,
+		}
+	};
+
+	out_buf[0] = addr;
+	out_buf[1] = 0;
+
+	if ((ret = i2c_transfer(&sdvo_priv->i2c_bus->adapter, msgs, 2)) == 2)
+	{
+		*ch = buf[0];
+		return true;
+	}
+
+	DRM_DEBUG("i2c transfer returned %d\n", ret);
+	return false;
+}
+
+static bool intel_sdvo_write_byte(struct intel_output *intel_output, int addr,
+				  u8 ch)
+{
+	u8 out_buf[2];
+	struct i2c_msg msgs[] = {
+		{
+			.addr = intel_output->i2c_bus->slave_addr,
+			.flags = 0,
+			.len = 2,
+			.buf = out_buf,
+		}
+	};
+
+	out_buf[0] = addr;
+	out_buf[1] = ch;
+
+	if (i2c_transfer(&intel_output->i2c_bus->adapter, msgs, 1) == 1)
+	{
+		return true;
+	}
+	return false;
+}
+
+#define SDVO_CMD_NAME_ENTRY(cmd) {cmd, #cmd}
+/** Mapping of command numbers to names, for debug output */
+const static struct _sdvo_cmd_name {
+    u8 cmd;
+    char *name;
+} sdvo_cmd_names[] = {
+    SDVO_CMD_NAME_ENTRY(SDVO_CMD_RESET),
+    SDVO_CMD_NAME_ENTRY(SDVO_CMD_GET_DEVICE_CAPS),
+    SDVO_CMD_NAME_ENTRY(SDVO_CMD_GET_FIRMWARE_REV),
+    SDVO_CMD_NAME_ENTRY(SDVO_CMD_GET_TRAINED_INPUTS),
+    SDVO_CMD_NAME_ENTRY(SDVO_CMD_GET_ACTIVE_OUTPUTS),
+    SDVO_CMD_NAME_ENTRY(SDVO_CMD_SET_ACTIVE_OUTPUTS),
+    SDVO_CMD_NAME_ENTRY(SDVO_CMD_GET_IN_OUT_MAP),
+    SDVO_CMD_NAME_ENTRY(SDVO_CMD_SET_IN_OUT_MAP),
+    SDVO_CMD_NAME_ENTRY(SDVO_CMD_GET_ATTACHED_DISPLAYS),
+    SDVO_CMD_NAME_ENTRY(SDVO_CMD_GET_HOT_PLUG_SUPPORT),
+    SDVO_CMD_NAME_ENTRY(SDVO_CMD_SET_ACTIVE_HOT_PLUG),
+    SDVO_CMD_NAME_ENTRY(SDVO_CMD_GET_ACTIVE_HOT_PLUG),
+    SDVO_CMD_NAME_ENTRY(SDVO_CMD_GET_INTERRUPT_EVENT_SOURCE),
+    SDVO_CMD_NAME_ENTRY(SDVO_CMD_SET_TARGET_INPUT),
+    SDVO_CMD_NAME_ENTRY(SDVO_CMD_SET_TARGET_OUTPUT),
+    SDVO_CMD_NAME_ENTRY(SDVO_CMD_GET_INPUT_TIMINGS_PART1),
+    SDVO_CMD_NAME_ENTRY(SDVO_CMD_GET_INPUT_TIMINGS_PART2),
+    SDVO_CMD_NAME_ENTRY(SDVO_CMD_SET_INPUT_TIMINGS_PART1),
+    SDVO_CMD_NAME_ENTRY(SDVO_CMD_SET_INPUT_TIMINGS_PART2),
+    SDVO_CMD_NAME_ENTRY(SDVO_CMD_SET_INPUT_TIMINGS_PART1),
+    SDVO_CMD_NAME_ENTRY(SDVO_CMD_SET_OUTPUT_TIMINGS_PART1),
+    SDVO_CMD_NAME_ENTRY(SDVO_CMD_SET_OUTPUT_TIMINGS_PART2),
+    SDVO_CMD_NAME_ENTRY(SDVO_CMD_GET_OUTPUT_TIMINGS_PART1),
+    SDVO_CMD_NAME_ENTRY(SDVO_CMD_GET_OUTPUT_TIMINGS_PART2),
+    SDVO_CMD_NAME_ENTRY(SDVO_CMD_CREATE_PREFERRED_INPUT_TIMING),
+    SDVO_CMD_NAME_ENTRY(SDVO_CMD_GET_PREFERRED_INPUT_TIMING_PART1),
+    SDVO_CMD_NAME_ENTRY(SDVO_CMD_GET_PREFERRED_INPUT_TIMING_PART2),
+    SDVO_CMD_NAME_ENTRY(SDVO_CMD_GET_INPUT_PIXEL_CLOCK_RANGE),
+    SDVO_CMD_NAME_ENTRY(SDVO_CMD_GET_OUTPUT_PIXEL_CLOCK_RANGE),
+    SDVO_CMD_NAME_ENTRY(SDVO_CMD_GET_SUPPORTED_CLOCK_RATE_MULTS),
+    SDVO_CMD_NAME_ENTRY(SDVO_CMD_GET_CLOCK_RATE_MULT),
+    SDVO_CMD_NAME_ENTRY(SDVO_CMD_SET_CLOCK_RATE_MULT),
+    SDVO_CMD_NAME_ENTRY(SDVO_CMD_GET_SUPPORTED_TV_FORMATS),
+    SDVO_CMD_NAME_ENTRY(SDVO_CMD_GET_TV_FORMAT),
+    SDVO_CMD_NAME_ENTRY(SDVO_CMD_SET_TV_FORMAT),
+    SDVO_CMD_NAME_ENTRY(SDVO_CMD_SET_TV_RESOLUTION_SUPPORT),
+    SDVO_CMD_NAME_ENTRY(SDVO_CMD_SET_CONTROL_BUS_SWITCH),
+};
+
+#define SDVO_NAME(dev_priv) ((dev_priv)->output_device == SDVOB ? "SDVOB" : "SDVOC")
+#define SDVO_PRIV(output)   ((struct intel_sdvo_priv *) (output)->dev_priv)
+
+#ifdef SDVO_DEBUG
+static void intel_sdvo_debug_write(struct intel_output *intel_output, u8 cmd,
+				   void *args, int args_len)
+{
+	struct intel_sdvo_priv *sdvo_priv = intel_output->dev_priv;
+	int i;
+
+	DRM_DEBUG("%s: W: %02X ", SDVO_NAME(sdvo_priv), cmd);
+	for (i = 0; i < args_len; i++)
+		printk("%02X ", ((u8 *)args)[i]);
+	for (; i < 8; i++)
+		printk("   ");
+	for (i = 0; i < sizeof(sdvo_cmd_names) / sizeof(sdvo_cmd_names[0]); i++) {
+		if (cmd == sdvo_cmd_names[i].cmd) {
+			printk("(%s)", sdvo_cmd_names[i].name);
+			break;
+		}
+	}
+	if (i == sizeof(sdvo_cmd_names)/ sizeof(sdvo_cmd_names[0]))
+		printk("(%02X)",cmd);
+	printk("\n");
+}
+#else
+#define intel_sdvo_debug_write(o, c, a, l)
+#endif
+
+static void intel_sdvo_write_cmd(struct intel_output *intel_output, u8 cmd,
+				 void *args, int args_len)
+{
+	int i;
+
+	intel_sdvo_debug_write(intel_output, cmd, args, args_len);
+
+	for (i = 0; i < args_len; i++) {
+		intel_sdvo_write_byte(intel_output, SDVO_I2C_ARG_0 - i,
+				      ((u8*)args)[i]);
+	}
+
+	intel_sdvo_write_byte(intel_output, SDVO_I2C_OPCODE, cmd);
+}
+
+#ifdef SDVO_DEBUG
+static const char *cmd_status_names[] = {
+	"Power on",
+	"Success",
+	"Not supported",
+	"Invalid arg",
+	"Pending",
+	"Target not specified",
+	"Scaling not supported"
+};
+
+static void intel_sdvo_debug_response(struct intel_output *intel_output,
+				      void *response, int response_len,
+				      u8 status)
+{
+	struct intel_sdvo_priv *sdvo_priv = intel_output->dev_priv;
+
+	DRM_DEBUG("%s: R: ", SDVO_NAME(sdvo_priv));
+	for (i = 0; i < response_len; i++)
+		printk("%02X ", ((u8 *)response)[i]);
+	for (; i < 8; i++)
+		printk("   ");
+	if (status <= SDVO_CMD_STATUS_SCALING_NOT_SUPP)
+		printk("(%s)", cmd_status_names[status]);
+	else
+		printk("(??? %d)", status);
+	printk("\n");
+}
+#else
+#define intel_sdvo_debug_response(o, r, l, s)
+#endif
+
+static u8 intel_sdvo_read_response(struct intel_output *intel_output,
+				   void *response, int response_len)
+{
+	int i;
+	u8 status;
+	u8 retry = 50;
+
+	while (retry--) {
+		/* Read the command response */
+		for (i = 0; i < response_len; i++) {
+			intel_sdvo_read_byte(intel_output,
+					     SDVO_I2C_RETURN_0 + i,
+					     &((u8 *)response)[i]);
+		}
+
+		/* read the return status */
+		intel_sdvo_read_byte(intel_output, SDVO_I2C_CMD_STATUS,
+				     &status);
+
+		intel_sdvo_debug_response(intel_output, response, response_len,
+					  status);
+		if (status != SDVO_CMD_STATUS_PENDING)
+			return status;
+
+		mdelay(50);
+	}
+
+	return status;
+}
+
+int intel_sdvo_get_pixel_multiplier(struct drm_display_mode *mode)
+{
+	if (mode->clock >= 100000)
+		return 1;
+	else if (mode->clock >= 50000)
+		return 2;
+	else
+		return 4;
+}
+
+/**
+ * Don't check status code from this as it switches the bus back to the
+ * SDVO chips which defeats the purpose of doing a bus switch in the first
+ * place.
+ */
+void intel_sdvo_set_control_bus_switch(struct intel_output *intel_output, u8 target)
+{
+	intel_sdvo_write_cmd(intel_output, SDVO_CMD_SET_CONTROL_BUS_SWITCH, &target, 1);
+}
+
+static bool intel_sdvo_set_target_input(struct intel_output *intel_output, bool target_0, bool target_1)
+{
+	struct intel_sdvo_set_target_input_args targets = {0};
+	u8 status;
+
+	if (target_0 && target_1)
+		return SDVO_CMD_STATUS_NOTSUPP;
+
+	if (target_1)
+		targets.target_1 = 1;
+
+	intel_sdvo_write_cmd(intel_output, SDVO_CMD_SET_TARGET_INPUT, &targets,
+			     sizeof(targets));
+
+	status = intel_sdvo_read_response(intel_output, NULL, 0);
+
+	return (status == SDVO_CMD_STATUS_SUCCESS);
+}
+
+/**
+ * Return whether each input is trained.
+ *
+ * This function is making an assumption about the layout of the response,
+ * which should be checked against the docs.
+ */
+static bool intel_sdvo_get_trained_inputs(struct intel_output *intel_output, bool *input_1, bool *input_2)
+{
+	struct intel_sdvo_get_trained_inputs_response response;
+	u8 status;
+
+	intel_sdvo_write_cmd(intel_output, SDVO_CMD_GET_TRAINED_INPUTS, NULL, 0);
+	status = intel_sdvo_read_response(intel_output, &response, sizeof(response));
+	if (status != SDVO_CMD_STATUS_SUCCESS)
+		return false;
+
+	*input_1 = response.input0_trained;
+	*input_2 = response.input1_trained;
+	return true;
+}
+
+static bool intel_sdvo_get_active_outputs(struct intel_output *intel_output,
+					  u16 *outputs)
+{
+	u8 status;
+
+	intel_sdvo_write_cmd(intel_output, SDVO_CMD_GET_ACTIVE_OUTPUTS, NULL, 0);
+	status = intel_sdvo_read_response(intel_output, outputs, sizeof(*outputs));
+
+	return (status == SDVO_CMD_STATUS_SUCCESS);
+}
+
+static bool intel_sdvo_set_active_outputs(struct intel_output *intel_output,
+					  u16 outputs)
+{
+	u8 status;
+
+	intel_sdvo_write_cmd(intel_output, SDVO_CMD_SET_ACTIVE_OUTPUTS, &outputs,
+			     sizeof(outputs));
+	status = intel_sdvo_read_response(intel_output, NULL, 0);
+	return (status == SDVO_CMD_STATUS_SUCCESS);
+}
+
+static bool intel_sdvo_set_encoder_power_state(struct intel_output *intel_output,
+					       int mode)
+{
+	u8 status, state = SDVO_ENCODER_STATE_ON;
+
+	switch (mode) {
+	case DRM_MODE_DPMS_ON:
+		state = SDVO_ENCODER_STATE_ON;
+		break;
+	case DRM_MODE_DPMS_STANDBY:
+		state = SDVO_ENCODER_STATE_STANDBY;
+		break;
+	case DRM_MODE_DPMS_SUSPEND:
+		state = SDVO_ENCODER_STATE_SUSPEND;
+		break;
+	case DRM_MODE_DPMS_OFF:
+		state = SDVO_ENCODER_STATE_OFF;
+		break;
+	}
+
+	intel_sdvo_write_cmd(intel_output, SDVO_CMD_SET_ENCODER_POWER_STATE, &state,
+			     sizeof(state));
+	status = intel_sdvo_read_response(intel_output, NULL, 0);
+
+	return (status == SDVO_CMD_STATUS_SUCCESS);
+}
+
+static bool intel_sdvo_get_input_pixel_clock_range(struct intel_output *intel_output,
+						   int *clock_min,
+						   int *clock_max)
+{
+	struct intel_sdvo_pixel_clock_range clocks;
+	u8 status;
+
+	intel_sdvo_write_cmd(intel_output, SDVO_CMD_GET_INPUT_PIXEL_CLOCK_RANGE,
+			     NULL, 0);
+
+	status = intel_sdvo_read_response(intel_output, &clocks, sizeof(clocks));
+
+	if (status != SDVO_CMD_STATUS_SUCCESS)
+		return false;
+
+	/* Convert the values from units of 10 kHz to kHz. */
+	*clock_min = clocks.min * 10;
+	*clock_max = clocks.max * 10;
+
+	return true;
+}
+
+static bool intel_sdvo_set_target_output(struct intel_output *intel_output,
+					 u16 outputs)
+{
+	u8 status;
+
+	intel_sdvo_write_cmd(intel_output, SDVO_CMD_SET_TARGET_OUTPUT, &outputs,
+			     sizeof(outputs));
+
+	status = intel_sdvo_read_response(intel_output, NULL, 0);
+	return (status == SDVO_CMD_STATUS_SUCCESS);
+}
+
+static bool intel_sdvo_get_timing(struct intel_output *intel_output, u8 cmd,
+				  struct intel_sdvo_dtd *dtd)
+{
+	u8 status;
+
+	intel_sdvo_write_cmd(intel_output, cmd, NULL, 0);
+	status = intel_sdvo_read_response(intel_output, &dtd->part1,
+					  sizeof(dtd->part1));
+	if (status != SDVO_CMD_STATUS_SUCCESS)
+		return false;
+
+	intel_sdvo_write_cmd(intel_output, cmd + 1, NULL, 0);
+	status = intel_sdvo_read_response(intel_output, &dtd->part2,
+					  sizeof(dtd->part2));
+	if (status != SDVO_CMD_STATUS_SUCCESS)
+		return false;
+
+	return true;
+}
+
+static bool intel_sdvo_get_input_timing(struct intel_output *intel_output,
+					 struct intel_sdvo_dtd *dtd)
+{
+	return intel_sdvo_get_timing(intel_output,
+				     SDVO_CMD_GET_INPUT_TIMINGS_PART1, dtd);
+}
+
+static bool intel_sdvo_get_output_timing(struct intel_output *intel_output,
+					 struct intel_sdvo_dtd *dtd)
+{
+	return intel_sdvo_get_timing(intel_output,
+				     SDVO_CMD_GET_OUTPUT_TIMINGS_PART1, dtd);
+}
+
+static bool intel_sdvo_set_timing(struct intel_output *intel_output, u8 cmd,
+				  struct intel_sdvo_dtd *dtd)
+{
+	u8 status;
+
+	intel_sdvo_write_cmd(intel_output, cmd, &dtd->part1, sizeof(dtd->part1));
+	status = intel_sdvo_read_response(intel_output, NULL, 0);
+	if (status != SDVO_CMD_STATUS_SUCCESS)
+		return false;
+
+	intel_sdvo_write_cmd(intel_output, cmd + 1, &dtd->part2, sizeof(dtd->part2));
+	status = intel_sdvo_read_response(intel_output, NULL, 0);
+	if (status != SDVO_CMD_STATUS_SUCCESS)
+		return false;
+
+	return true;
+}
+
+static bool intel_sdvo_set_input_timing(struct intel_output *intel_output,
+					 struct intel_sdvo_dtd *dtd)
+{
+	return intel_sdvo_set_timing(intel_output,
+				     SDVO_CMD_SET_INPUT_TIMINGS_PART1, dtd);
+}
+
+static bool intel_sdvo_set_output_timing(struct intel_output *intel_output,
+					 struct intel_sdvo_dtd *dtd)
+{
+	return intel_sdvo_set_timing(intel_output,
+				     SDVO_CMD_SET_OUTPUT_TIMINGS_PART1, dtd);
+}
+
+
+static int intel_sdvo_get_clock_rate_mult(struct intel_output *intel_output)
+{
+	u8 response, status;
+
+	intel_sdvo_write_cmd(intel_output, SDVO_CMD_GET_CLOCK_RATE_MULT, NULL, 0);
+	status = intel_sdvo_read_response(intel_output, &response, 1);
+
+	if (status != SDVO_CMD_STATUS_SUCCESS) {
+		DRM_DEBUG("Couldn't get SDVO clock rate multiplier\n");
+		return SDVO_CLOCK_RATE_MULT_1X;
+	} else {
+		DRM_DEBUG("Current clock rate multiplier: %d\n", response);
+	}
+
+	return response;
+}
+
+static bool intel_sdvo_set_clock_rate_mult(struct intel_output *intel_output, u8 val)
+{
+	u8 status;
+
+	intel_sdvo_write_cmd(intel_output, SDVO_CMD_SET_CLOCK_RATE_MULT, &val, 1);
+	status = intel_sdvo_read_response(intel_output, NULL, 0);
+	if (status != SDVO_CMD_STATUS_SUCCESS)
+		return false;
+
+	return true;
+}
+
+static bool intel_sdvo_mode_fixup(struct drm_encoder *encoder,
+				  struct drm_display_mode *mode,
+				  struct drm_display_mode *adjusted_mode)
+{
+	/* Make the CRTC code factor in the SDVO pixel multiplier.  The SDVO
+	 * device will be told of the multiplier during mode_set.
+	 */
+	adjusted_mode->clock *= intel_sdvo_get_pixel_multiplier(mode);
+	return true;
+}
+
+static void intel_sdvo_mode_set(struct drm_encoder *encoder,
+				struct drm_display_mode *mode,
+				struct drm_display_mode *adjusted_mode)
+{
+	struct drm_device *dev = encoder->dev;
+	struct drm_i915_private *dev_priv = dev->dev_private;
+	struct drm_crtc *crtc = encoder->crtc;
+	struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
+	struct intel_output *intel_output = enc_to_intel_output(encoder);
+	struct intel_sdvo_priv *sdvo_priv = intel_output->dev_priv;
+	u16 width, height;
+	u16 h_blank_len, h_sync_len, v_blank_len, v_sync_len;
+	u16 h_sync_offset, v_sync_offset;
+	u32 sdvox;
+	struct intel_sdvo_dtd output_dtd;
+	int sdvo_pixel_multiply;
+
+	if (!mode)
+		return;
+
+	width = mode->crtc_hdisplay;
+	height = mode->crtc_vdisplay;
+
+	/* do some mode translations */
+	h_blank_len = mode->crtc_hblank_end - mode->crtc_hblank_start;
+	h_sync_len = mode->crtc_hsync_end - mode->crtc_hsync_start;
+
+	v_blank_len = mode->crtc_vblank_end - mode->crtc_vblank_start;
+	v_sync_len = mode->crtc_vsync_end - mode->crtc_vsync_start;
+
+	h_sync_offset = mode->crtc_hsync_start - mode->crtc_hblank_start;
+	v_sync_offset = mode->crtc_vsync_start - mode->crtc_vblank_start;
+
+	output_dtd.part1.clock = mode->clock / 10;
+	output_dtd.part1.h_active = width & 0xff;
+	output_dtd.part1.h_blank = h_blank_len & 0xff;
+	output_dtd.part1.h_high = (((width >> 8) & 0xf) << 4) |
+		((h_blank_len >> 8) & 0xf);
+	output_dtd.part1.v_active = height & 0xff;
+	output_dtd.part1.v_blank = v_blank_len & 0xff;
+	output_dtd.part1.v_high = (((height >> 8) & 0xf) << 4) |
+		((v_blank_len >> 8) & 0xf);
+
+	output_dtd.part2.h_sync_off = h_sync_offset;
+	output_dtd.part2.h_sync_width = h_sync_len & 0xff;
+	output_dtd.part2.v_sync_off_width = (v_sync_offset & 0xf) << 4 |
+		(v_sync_len & 0xf);
+	output_dtd.part2.sync_off_width_high = ((h_sync_offset & 0x300) >> 2) |
+		((h_sync_len & 0x300) >> 4) | ((v_sync_offset & 0x30) >> 2) |
+		((v_sync_len & 0x30) >> 4);
+
+	output_dtd.part2.dtd_flags = 0x18;
+	if (mode->flags & DRM_MODE_FLAG_PHSYNC)
+		output_dtd.part2.dtd_flags |= 0x2;
+	if (mode->flags & DRM_MODE_FLAG_PVSYNC)
+		output_dtd.part2.dtd_flags |= 0x4;
+
+	output_dtd.part2.sdvo_flags = 0;
+	output_dtd.part2.v_sync_off_high = v_sync_offset & 0xc0;
+	output_dtd.part2.reserved = 0;
+
+	/* Set the output timing to the screen */
+	intel_sdvo_set_target_output(intel_output, sdvo_priv->active_outputs);
+	intel_sdvo_set_output_timing(intel_output, &output_dtd);
+
+	/* Set the input timing to the screen. Assume always input 0. */
+	intel_sdvo_set_target_input(intel_output, true, false);
+
+	/* We would like to use i830_sdvo_create_preferred_input_timing() to
+	 * provide the device with a timing it can support, if it supports that
+	 * feature.  However, presumably we would need to adjust the CRTC to
+	 * output the preferred timing, and we don't support that currently.
+	 */
+	intel_sdvo_set_input_timing(intel_output, &output_dtd);
+
+	switch (intel_sdvo_get_pixel_multiplier(mode)) {
+	case 1:
+		intel_sdvo_set_clock_rate_mult(intel_output,
+					       SDVO_CLOCK_RATE_MULT_1X);
+		break;
+	case 2:
+		intel_sdvo_set_clock_rate_mult(intel_output,
+					       SDVO_CLOCK_RATE_MULT_2X);
+		break;
+	case 4:
+		intel_sdvo_set_clock_rate_mult(intel_output,
+					       SDVO_CLOCK_RATE_MULT_4X);
+		break;
+	}
+
+	/* Set the SDVO control regs. */
+        if (0/*IS_I965GM(dev)*/) {
+                sdvox = SDVO_BORDER_ENABLE;
+        } else {
+                sdvox = I915_READ(sdvo_priv->output_device);
+                switch (sdvo_priv->output_device) {
+                case SDVOB:
+                        sdvox &= SDVOB_PRESERVE_MASK;
+                        break;
+                case SDVOC:
+                        sdvox &= SDVOC_PRESERVE_MASK;
+                        break;
+                }
+                sdvox |= (9 << 19) | SDVO_BORDER_ENABLE;
+        }
+	if (intel_crtc->pipe == 1)
+		sdvox |= SDVO_PIPE_B_SELECT;
+
+	sdvo_pixel_multiply = intel_sdvo_get_pixel_multiplier(mode);
+	if (IS_I965G(dev)) {
+		/* done in crtc_mode_set as the dpll_md reg must be written
+		   early */
+	} else if (IS_I945G(dev) || IS_I945GM(dev)) {
+		/* done in crtc_mode_set as it lives inside the
+		   dpll register */
+	} else {
+		sdvox |= (sdvo_pixel_multiply - 1) << SDVO_PORT_MULTIPLY_SHIFT;
+	}
+
+	intel_sdvo_write_sdvox(intel_output, sdvox);
+}
+
+static void intel_sdvo_dpms(struct drm_encoder *encoder, int mode)
+{
+	struct drm_device *dev = encoder->dev;
+	struct drm_i915_private *dev_priv = dev->dev_private;
+	struct intel_output *intel_output = enc_to_intel_output(encoder);
+	struct intel_sdvo_priv *sdvo_priv = intel_output->dev_priv;
+	u32 temp;
+
+	if (mode != DRM_MODE_DPMS_ON) {
+		intel_sdvo_set_active_outputs(intel_output, 0);
+		if (0)
+			intel_sdvo_set_encoder_power_state(intel_output, mode);
+
+		if (mode == DRM_MODE_DPMS_OFF) {
+			temp = I915_READ(sdvo_priv->output_device);
+			if ((temp & SDVO_ENABLE) != 0) {
+				intel_sdvo_write_sdvox(intel_output, temp & ~SDVO_ENABLE);
+			}
+		}
+	} else {
+		bool input1, input2;
+		int i;
+		u8 status;
+
+		temp = I915_READ(sdvo_priv->output_device);
+		if ((temp & SDVO_ENABLE) == 0)
+			intel_sdvo_write_sdvox(intel_output, temp | SDVO_ENABLE);
+		for (i = 0; i < 2; i++)
+		  intel_wait_for_vblank(dev);
+
+		status = intel_sdvo_get_trained_inputs(intel_output, &input1,
+						       &input2);
+
+
+		/* Warn if the device reported failure to sync.
+		 * A lot of SDVO devices fail to notify of sync, but it's
+		 * a given it the status is a success, we succeeded.
+		 */
+		if (status == SDVO_CMD_STATUS_SUCCESS && !input1) {
+			DRM_DEBUG("First %s output reported failure to sync\n",
+				   SDVO_NAME(sdvo_priv));
+		}
+
+		if (0)
+			intel_sdvo_set_encoder_power_state(intel_output, mode);
+		intel_sdvo_set_active_outputs(intel_output, sdvo_priv->active_outputs);
+	}
+	return;
+}
+
+static void intel_sdvo_save(struct drm_connector *connector)
+{
+	struct drm_device *dev = connector->dev;
+	struct drm_i915_private *dev_priv = dev->dev_private;
+	struct intel_output *intel_output = to_intel_output(connector);
+	struct intel_sdvo_priv *sdvo_priv = intel_output->dev_priv;
+	int o;
+
+	sdvo_priv->save_sdvo_mult = intel_sdvo_get_clock_rate_mult(intel_output);
+	intel_sdvo_get_active_outputs(intel_output, &sdvo_priv->save_active_outputs);
+
+	if (sdvo_priv->caps.sdvo_inputs_mask & 0x1) {
+		intel_sdvo_set_target_input(intel_output, true, false);
+		intel_sdvo_get_input_timing(intel_output,
+					    &sdvo_priv->save_input_dtd_1);
+	}
+
+	if (sdvo_priv->caps.sdvo_inputs_mask & 0x2) {
+		intel_sdvo_set_target_input(intel_output, false, true);
+		intel_sdvo_get_input_timing(intel_output,
+					    &sdvo_priv->save_input_dtd_2);
+	}
+
+	for (o = SDVO_OUTPUT_FIRST; o <= SDVO_OUTPUT_LAST; o++)
+	{
+	        u16  this_output = (1 << o);
+		if (sdvo_priv->caps.output_flags & this_output)
+		{
+			intel_sdvo_set_target_output(intel_output, this_output);
+			intel_sdvo_get_output_timing(intel_output,
+						     &sdvo_priv->save_output_dtd[o]);
+		}
+	}
+
+	sdvo_priv->save_SDVOX = I915_READ(sdvo_priv->output_device);
+}
+
+static void intel_sdvo_restore(struct drm_connector *connector)
+{
+	struct drm_device *dev = connector->dev;
+	struct drm_i915_private *dev_priv = dev->dev_private;
+	struct intel_output *intel_output = to_intel_output(connector);
+	struct intel_sdvo_priv *sdvo_priv = intel_output->dev_priv;
+	int o;
+	int i;
+	bool input1, input2;
+	u8 status;
+
+	intel_sdvo_set_active_outputs(intel_output, 0);
+
+	for (o = SDVO_OUTPUT_FIRST; o <= SDVO_OUTPUT_LAST; o++)
+	{
+		u16  this_output = (1 << o);
+		if (sdvo_priv->caps.output_flags & this_output) {
+			intel_sdvo_set_target_output(intel_output, this_output);
+			intel_sdvo_set_output_timing(intel_output, &sdvo_priv->save_output_dtd[o]);
+		}
+	}
+
+	if (sdvo_priv->caps.sdvo_inputs_mask & 0x1) {
+		intel_sdvo_set_target_input(intel_output, true, false);
+		intel_sdvo_set_input_timing(intel_output, &sdvo_priv->save_input_dtd_1);
+	}
+
+	if (sdvo_priv->caps.sdvo_inputs_mask & 0x2) {
+		intel_sdvo_set_target_input(intel_output, false, true);
+		intel_sdvo_set_input_timing(intel_output, &sdvo_priv->save_input_dtd_2);
+	}
+
+	intel_sdvo_set_clock_rate_mult(intel_output, sdvo_priv->save_sdvo_mult);
+
+	I915_WRITE(sdvo_priv->output_device, sdvo_priv->save_SDVOX);
+
+	if (sdvo_priv->save_SDVOX & SDVO_ENABLE)
+	{
+		for (i = 0; i < 2; i++)
+			intel_wait_for_vblank(dev);
+		status = intel_sdvo_get_trained_inputs(intel_output, &input1, &input2);
+		if (status == SDVO_CMD_STATUS_SUCCESS && !input1)
+			DRM_DEBUG("First %s output reported failure to sync\n",
+				   SDVO_NAME(sdvo_priv));
+	}
+
+	intel_sdvo_set_active_outputs(intel_output, sdvo_priv->save_active_outputs);
+}
+
+static int intel_sdvo_mode_valid(struct drm_connector *connector,
+				 struct drm_display_mode *mode)
+{
+	struct intel_output *intel_output = to_intel_output(connector);
+	struct intel_sdvo_priv *sdvo_priv = intel_output->dev_priv;
+
+	if (mode->flags & DRM_MODE_FLAG_DBLSCAN)
+		return MODE_NO_DBLESCAN;
+
+	if (sdvo_priv->pixel_clock_min > mode->clock)
+		return MODE_CLOCK_LOW;
+
+	if (sdvo_priv->pixel_clock_max < mode->clock)
+		return MODE_CLOCK_HIGH;
+
+	return MODE_OK;
+}
+
+static bool intel_sdvo_get_capabilities(struct intel_output *intel_output, struct intel_sdvo_caps *caps)
+{
+	u8 status;
+
+	intel_sdvo_write_cmd(intel_output, SDVO_CMD_GET_DEVICE_CAPS, NULL, 0);
+	status = intel_sdvo_read_response(intel_output, caps, sizeof(*caps));
+	if (status != SDVO_CMD_STATUS_SUCCESS)
+		return false;
+
+	return true;
+}
+
+struct drm_connector* intel_sdvo_find(struct drm_device *dev, int sdvoB)
+{
+	struct drm_connector *connector = NULL;
+	struct intel_output *iout = NULL;
+	struct intel_sdvo_priv *sdvo;
+
+	/* find the sdvo connector */
+	list_for_each_entry(connector, &dev->mode_config.connector_list, head) {
+		iout = to_intel_output(connector);
+
+		if (iout->type != INTEL_OUTPUT_SDVO)
+			continue;
+
+		sdvo = iout->dev_priv;
+
+		if (sdvo->output_device == SDVOB && sdvoB)
+			return connector;
+
+		if (sdvo->output_device == SDVOC && !sdvoB)
+			return connector;
+
+	}
+
+	return NULL;
+}
+
+int intel_sdvo_supports_hotplug(struct drm_connector *connector)
+{
+	u8 response[2];
+	u8 status;
+	struct intel_output *intel_output;
+	DRM_DEBUG("\n");
+
+	if (!connector)
+		return 0;
+
+	intel_output = to_intel_output(connector);
+
+	intel_sdvo_write_cmd(intel_output, SDVO_CMD_GET_HOT_PLUG_SUPPORT, NULL, 0);
+	status = intel_sdvo_read_response(intel_output, &response, 2);
+
+	if (response[0] !=0)
+		return 1;
+
+	return 0;
+}
+
+void intel_sdvo_set_hotplug(struct drm_connector *connector, int on)
+{
+	u8 response[2];
+	u8 status;
+	struct intel_output *intel_output = to_intel_output(connector);
+
+	intel_sdvo_write_cmd(intel_output, SDVO_CMD_GET_ACTIVE_HOT_PLUG, NULL, 0);
+	intel_sdvo_read_response(intel_output, &response, 2);
+
+	if (on) {
+		intel_sdvo_write_cmd(intel_output, SDVO_CMD_GET_HOT_PLUG_SUPPORT, NULL, 0);
+		status = intel_sdvo_read_response(intel_output, &response, 2);
+
+		intel_sdvo_write_cmd(intel_output, SDVO_CMD_SET_ACTIVE_HOT_PLUG, &response, 2);
+	} else {
+		response[0] = 0;
+		response[1] = 0;
+		intel_sdvo_write_cmd(intel_output, SDVO_CMD_SET_ACTIVE_HOT_PLUG, &response, 2);
+	}
+
+	intel_sdvo_write_cmd(intel_output, SDVO_CMD_GET_ACTIVE_HOT_PLUG, NULL, 0);
+	intel_sdvo_read_response(intel_output, &response, 2);
+}
+
+static enum drm_connector_status intel_sdvo_detect(struct drm_connector *connector)
+{
+	u8 response[2];
+	u8 status;
+	struct intel_output *intel_output = to_intel_output(connector);
+
+	intel_sdvo_write_cmd(intel_output, SDVO_CMD_GET_ATTACHED_DISPLAYS, NULL, 0);
+	status = intel_sdvo_read_response(intel_output, &response, 2);
+
+	DRM_DEBUG("SDVO response %d %d\n", response[0], response[1]);
+	if ((response[0] != 0) || (response[1] != 0))
+		return connector_status_connected;
+	else
+		return connector_status_disconnected;
+}
+
+static int intel_sdvo_get_modes(struct drm_connector *connector)
+{
+	struct intel_output *intel_output = to_intel_output(connector);
+
+	/* set the bus switch and get the modes */
+	intel_sdvo_set_control_bus_switch(intel_output, SDVO_CONTROL_BUS_DDC2);
+	intel_ddc_get_modes(intel_output);
+
+	if (list_empty(&connector->probed_modes))
+		return 0;
+	return 1;
+}
+
+static void intel_sdvo_destroy(struct drm_connector *connector)
+{
+	struct intel_output *intel_output = to_intel_output(connector);
+
+	if (intel_output->i2c_bus)
+		intel_i2c_destroy(intel_output->i2c_bus);
+	drm_sysfs_connector_remove(connector);
+	drm_connector_cleanup(connector);
+	kfree(intel_output);
+}
+
+static const struct drm_encoder_helper_funcs intel_sdvo_helper_funcs = {
+	.dpms = intel_sdvo_dpms,
+	.mode_fixup = intel_sdvo_mode_fixup,
+	.prepare = intel_encoder_prepare,
+	.mode_set = intel_sdvo_mode_set,
+	.commit = intel_encoder_commit,
+};
+
+static const struct drm_connector_funcs intel_sdvo_connector_funcs = {
+	.save = intel_sdvo_save,
+	.restore = intel_sdvo_restore,
+	.detect = intel_sdvo_detect,
+	.fill_modes = drm_helper_probe_single_connector_modes,
+	.destroy = intel_sdvo_destroy,
+};
+
+static const struct drm_connector_helper_funcs intel_sdvo_connector_helper_funcs = {
+	.get_modes = intel_sdvo_get_modes,
+	.mode_valid = intel_sdvo_mode_valid,
+	.best_encoder = intel_best_encoder,
+};
+
+void intel_sdvo_enc_destroy(struct drm_encoder *encoder)
+{
+	drm_encoder_cleanup(encoder);
+}
+
+static const struct drm_encoder_funcs intel_sdvo_enc_funcs = {
+	.destroy = intel_sdvo_enc_destroy,
+};
+
+
+void intel_sdvo_init(struct drm_device *dev, int output_device)
+{
+	struct drm_connector *connector;
+	struct intel_output *intel_output;
+	struct intel_sdvo_priv *sdvo_priv;
+	struct intel_i2c_chan *i2cbus = NULL;
+	int connector_type;
+	u8 ch[0x40];
+	int i;
+	int encoder_type, output_id;
+
+	intel_output = kcalloc(sizeof(struct intel_output)+sizeof(struct intel_sdvo_priv), 1, GFP_KERNEL);
+	if (!intel_output) {
+		return;
+	}
+
+	connector = &intel_output->base;
+
+	drm_connector_init(dev, connector, &intel_sdvo_connector_funcs,
+			   DRM_MODE_CONNECTOR_Unknown);
+	drm_connector_helper_add(connector, &intel_sdvo_connector_helper_funcs);
+	sdvo_priv = (struct intel_sdvo_priv *)(intel_output + 1);
+	intel_output->type = INTEL_OUTPUT_SDVO;
+
+	connector->interlace_allowed = 0;
+	connector->doublescan_allowed = 0;
+
+	/* setup the DDC bus. */
+	if (output_device == SDVOB)
+		i2cbus = intel_i2c_create(dev, GPIOE, "SDVOCTRL_E for SDVOB");
+	else
+		i2cbus = intel_i2c_create(dev, GPIOE, "SDVOCTRL_E for SDVOC");
+
+	if (!i2cbus)
+		goto err_connector;
+
+	sdvo_priv->i2c_bus = i2cbus;
+
+	if (output_device == SDVOB) {
+		output_id = 1;
+		sdvo_priv->i2c_bus->slave_addr = 0x38;
+	} else {
+		output_id = 2;
+		sdvo_priv->i2c_bus->slave_addr = 0x39;
+	}
+
+	sdvo_priv->output_device = output_device;
+	intel_output->i2c_bus = i2cbus;
+	intel_output->dev_priv = sdvo_priv;
+
+
+	/* Read the regs to test if we can talk to the device */
+	for (i = 0; i < 0x40; i++) {
+		if (!intel_sdvo_read_byte(intel_output, i, &ch[i])) {
+			DRM_DEBUG("No SDVO device found on SDVO%c\n",
+				  output_device == SDVOB ? 'B' : 'C');
+			goto err_i2c;
+		}
+	}
+
+	intel_sdvo_get_capabilities(intel_output, &sdvo_priv->caps);
+
+	memset(&sdvo_priv->active_outputs, 0, sizeof(sdvo_priv->active_outputs));
+
+	/* TODO, CVBS, SVID, YPRPB & SCART outputs. */
+	if (sdvo_priv->caps.output_flags & SDVO_OUTPUT_RGB0)
+	{
+		sdvo_priv->active_outputs = SDVO_OUTPUT_RGB0;
+		connector->display_info.subpixel_order = SubPixelHorizontalRGB;
+		encoder_type = DRM_MODE_ENCODER_DAC;
+		connector_type = DRM_MODE_CONNECTOR_VGA;
+	}
+	else if (sdvo_priv->caps.output_flags & SDVO_OUTPUT_RGB1)
+	{
+		sdvo_priv->active_outputs = SDVO_OUTPUT_RGB1;
+		connector->display_info.subpixel_order = SubPixelHorizontalRGB;
+		encoder_type = DRM_MODE_ENCODER_DAC;
+		connector_type = DRM_MODE_CONNECTOR_VGA;
+	}
+	else if (sdvo_priv->caps.output_flags & SDVO_OUTPUT_TMDS0)
+	{
+		sdvo_priv->active_outputs = SDVO_OUTPUT_TMDS0;
+		connector->display_info.subpixel_order = SubPixelHorizontalRGB;
+		encoder_type = DRM_MODE_ENCODER_TMDS;
+		connector_type = DRM_MODE_CONNECTOR_DVID;
+	}
+	else if (sdvo_priv->caps.output_flags & SDVO_OUTPUT_TMDS1)
+	{
+		sdvo_priv->active_outputs = SDVO_OUTPUT_TMDS1;
+		connector->display_info.subpixel_order = SubPixelHorizontalRGB;
+		encoder_type = DRM_MODE_ENCODER_TMDS;
+		connector_type = DRM_MODE_CONNECTOR_DVID;
+	}
+	else
+	{
+		unsigned char bytes[2];
+
+		memcpy (bytes, &sdvo_priv->caps.output_flags, 2);
+		DRM_DEBUG("%s: No active RGB or TMDS outputs (0x%02x%02x)\n",
+			  SDVO_NAME(sdvo_priv),
+			  bytes[0], bytes[1]);
+		goto err_i2c;
+	}
+
+	drm_encoder_init(dev, &intel_output->enc, &intel_sdvo_enc_funcs, encoder_type);
+	drm_encoder_helper_add(&intel_output->enc, &intel_sdvo_helper_funcs);
+	connector->connector_type = connector_type;
+
+	drm_mode_connector_attach_encoder(&intel_output->base, &intel_output->enc);
+	drm_sysfs_connector_add(connector);
+
+	/* Set the input timing to the screen. Assume always input 0. */
+	intel_sdvo_set_target_input(intel_output, true, false);
+
+	intel_sdvo_get_input_pixel_clock_range(intel_output,
+					       &sdvo_priv->pixel_clock_min,
+					       &sdvo_priv->pixel_clock_max);
+
+
+	DRM_DEBUG("%s device VID/DID: %02X:%02X.%02X, "
+		  "clock range %dMHz - %dMHz, "
+		  "input 1: %c, input 2: %c, "
+		  "output 1: %c, output 2: %c\n",
+		  SDVO_NAME(sdvo_priv),
+		  sdvo_priv->caps.vendor_id, sdvo_priv->caps.device_id,
+		  sdvo_priv->caps.device_rev_id,
+		  sdvo_priv->pixel_clock_min / 1000,
+		  sdvo_priv->pixel_clock_max / 1000,
+		  (sdvo_priv->caps.sdvo_inputs_mask & 0x1) ? 'Y' : 'N',
+		  (sdvo_priv->caps.sdvo_inputs_mask & 0x2) ? 'Y' : 'N',
+		  /* check currently supported outputs */
+		  sdvo_priv->caps.output_flags &
+			(SDVO_OUTPUT_TMDS0 | SDVO_OUTPUT_RGB0) ? 'Y' : 'N',
+		  sdvo_priv->caps.output_flags &
+			(SDVO_OUTPUT_TMDS1 | SDVO_OUTPUT_RGB1) ? 'Y' : 'N');
+
+	intel_output->ddc_bus = i2cbus;
+
+	return;
+
+err_i2c:
+	intel_i2c_destroy(intel_output->i2c_bus);
+err_connector:
+	drm_connector_cleanup(connector);
+	kfree(intel_output);
+
+	return;
+}
diff --git a/drivers/gpu/drm/i915/intel_sdvo_regs.h b/drivers/gpu/drm/i915/intel_sdvo_regs.h
new file mode 100644
index 000000000000..861a43f8693c
--- /dev/null
+++ b/drivers/gpu/drm/i915/intel_sdvo_regs.h
@@ -0,0 +1,327 @@
+/*
+ * Copyright � 2006-2007 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ *
+ * Authors:
+ *	Eric Anholt <eric@anholt.net>
+ */
+
+/**
+ * @file SDVO command definitions and structures.
+ */
+
+#define SDVO_OUTPUT_FIRST   (0)
+#define SDVO_OUTPUT_TMDS0   (1 << 0)
+#define SDVO_OUTPUT_RGB0    (1 << 1)
+#define SDVO_OUTPUT_CVBS0   (1 << 2)
+#define SDVO_OUTPUT_SVID0   (1 << 3)
+#define SDVO_OUTPUT_YPRPB0  (1 << 4)
+#define SDVO_OUTPUT_SCART0  (1 << 5)
+#define SDVO_OUTPUT_LVDS0   (1 << 6)
+#define SDVO_OUTPUT_TMDS1   (1 << 8)
+#define SDVO_OUTPUT_RGB1    (1 << 9)
+#define SDVO_OUTPUT_CVBS1   (1 << 10)
+#define SDVO_OUTPUT_SVID1   (1 << 11)
+#define SDVO_OUTPUT_YPRPB1  (1 << 12)
+#define SDVO_OUTPUT_SCART1  (1 << 13)
+#define SDVO_OUTPUT_LVDS1   (1 << 14)
+#define SDVO_OUTPUT_LAST    (14)
+
+struct intel_sdvo_caps {
+    u8 vendor_id;
+    u8 device_id;
+    u8 device_rev_id;
+    u8 sdvo_version_major;
+    u8 sdvo_version_minor;
+    unsigned int sdvo_inputs_mask:2;
+    unsigned int smooth_scaling:1;
+    unsigned int sharp_scaling:1;
+    unsigned int up_scaling:1;
+    unsigned int down_scaling:1;
+    unsigned int stall_support:1;
+    unsigned int pad:1;
+    u16 output_flags;
+} __attribute__((packed));
+
+/** This matches the EDID DTD structure, more or less */
+struct intel_sdvo_dtd {
+    struct {
+	u16 clock;		/**< pixel clock, in 10kHz units */
+	u8 h_active;		/**< lower 8 bits (pixels) */
+	u8 h_blank;		/**< lower 8 bits (pixels) */
+	u8 h_high;		/**< upper 4 bits each h_active, h_blank */
+	u8 v_active;		/**< lower 8 bits (lines) */
+	u8 v_blank;		/**< lower 8 bits (lines) */
+	u8 v_high;		/**< upper 4 bits each v_active, v_blank */
+    } part1;
+
+    struct {
+	u8 h_sync_off;	/**< lower 8 bits, from hblank start */
+	u8 h_sync_width;	/**< lower 8 bits (pixels) */
+	/** lower 4 bits each vsync offset, vsync width */
+	u8 v_sync_off_width;
+	/**
+	 * 2 high bits of hsync offset, 2 high bits of hsync width,
+	 * bits 4-5 of vsync offset, and 2 high bits of vsync width.
+	 */
+	u8 sync_off_width_high;
+	u8 dtd_flags;
+	u8 sdvo_flags;
+	/** bits 6-7 of vsync offset at bits 6-7 */
+	u8 v_sync_off_high;
+	u8 reserved;
+    } part2;
+} __attribute__((packed));
+
+struct intel_sdvo_pixel_clock_range {
+    u16 min;			/**< pixel clock, in 10kHz units */
+    u16 max;			/**< pixel clock, in 10kHz units */
+} __attribute__((packed));
+
+struct intel_sdvo_preferred_input_timing_args {
+    u16 clock;
+    u16 width;
+    u16 height;
+} __attribute__((packed));
+
+/* I2C registers for SDVO */
+#define SDVO_I2C_ARG_0				0x07
+#define SDVO_I2C_ARG_1				0x06
+#define SDVO_I2C_ARG_2				0x05
+#define SDVO_I2C_ARG_3				0x04
+#define SDVO_I2C_ARG_4				0x03
+#define SDVO_I2C_ARG_5				0x02
+#define SDVO_I2C_ARG_6				0x01
+#define SDVO_I2C_ARG_7				0x00
+#define SDVO_I2C_OPCODE				0x08
+#define SDVO_I2C_CMD_STATUS			0x09
+#define SDVO_I2C_RETURN_0			0x0a
+#define SDVO_I2C_RETURN_1			0x0b
+#define SDVO_I2C_RETURN_2			0x0c
+#define SDVO_I2C_RETURN_3			0x0d
+#define SDVO_I2C_RETURN_4			0x0e
+#define SDVO_I2C_RETURN_5			0x0f
+#define SDVO_I2C_RETURN_6			0x10
+#define SDVO_I2C_RETURN_7			0x11
+#define SDVO_I2C_VENDOR_BEGIN			0x20
+
+/* Status results */
+#define SDVO_CMD_STATUS_POWER_ON		0x0
+#define SDVO_CMD_STATUS_SUCCESS			0x1
+#define SDVO_CMD_STATUS_NOTSUPP			0x2
+#define SDVO_CMD_STATUS_INVALID_ARG		0x3
+#define SDVO_CMD_STATUS_PENDING			0x4
+#define SDVO_CMD_STATUS_TARGET_NOT_SPECIFIED	0x5
+#define SDVO_CMD_STATUS_SCALING_NOT_SUPP	0x6
+
+/* SDVO commands, argument/result registers */
+
+#define SDVO_CMD_RESET					0x01
+
+/** Returns a struct intel_sdvo_caps */
+#define SDVO_CMD_GET_DEVICE_CAPS			0x02
+
+#define SDVO_CMD_GET_FIRMWARE_REV			0x86
+# define SDVO_DEVICE_FIRMWARE_MINOR			SDVO_I2C_RETURN_0
+# define SDVO_DEVICE_FIRMWARE_MAJOR			SDVO_I2C_RETURN_1
+# define SDVO_DEVICE_FIRMWARE_PATCH			SDVO_I2C_RETURN_2
+
+/**
+ * Reports which inputs are trained (managed to sync).
+ *
+ * Devices must have trained within 2 vsyncs of a mode change.
+ */
+#define SDVO_CMD_GET_TRAINED_INPUTS			0x03
+struct intel_sdvo_get_trained_inputs_response {
+    unsigned int input0_trained:1;
+    unsigned int input1_trained:1;
+    unsigned int pad:6;
+} __attribute__((packed));
+
+/** Returns a struct intel_sdvo_output_flags of active outputs. */
+#define SDVO_CMD_GET_ACTIVE_OUTPUTS			0x04
+
+/**
+ * Sets the current set of active outputs.
+ *
+ * Takes a struct intel_sdvo_output_flags.  Must be preceded by a SET_IN_OUT_MAP
+ * on multi-output devices.
+ */
+#define SDVO_CMD_SET_ACTIVE_OUTPUTS			0x05
+
+/**
+ * Returns the current mapping of SDVO inputs to outputs on the device.
+ *
+ * Returns two struct intel_sdvo_output_flags structures.
+ */
+#define SDVO_CMD_GET_IN_OUT_MAP				0x06
+
+/**
+ * Sets the current mapping of SDVO inputs to outputs on the device.
+ *
+ * Takes two struct i380_sdvo_output_flags structures.
+ */
+#define SDVO_CMD_SET_IN_OUT_MAP				0x07
+
+/**
+ * Returns a struct intel_sdvo_output_flags of attached displays.
+ */
+#define SDVO_CMD_GET_ATTACHED_DISPLAYS			0x0b
+
+/**
+ * Returns a struct intel_sdvo_ouptut_flags of displays supporting hot plugging.
+ */
+#define SDVO_CMD_GET_HOT_PLUG_SUPPORT			0x0c
+
+/**
+ * Takes a struct intel_sdvo_output_flags.
+ */
+#define SDVO_CMD_SET_ACTIVE_HOT_PLUG			0x0d
+
+/**
+ * Returns a struct intel_sdvo_output_flags of displays with hot plug
+ * interrupts enabled.
+ */
+#define SDVO_CMD_GET_ACTIVE_HOT_PLUG			0x0e
+
+#define SDVO_CMD_GET_INTERRUPT_EVENT_SOURCE		0x0f
+struct intel_sdvo_get_interrupt_event_source_response {
+    u16 interrupt_status;
+    unsigned int ambient_light_interrupt:1;
+    unsigned int pad:7;
+} __attribute__((packed));
+
+/**
+ * Selects which input is affected by future input commands.
+ *
+ * Commands affected include SET_INPUT_TIMINGS_PART[12],
+ * GET_INPUT_TIMINGS_PART[12], GET_PREFERRED_INPUT_TIMINGS_PART[12],
+ * GET_INPUT_PIXEL_CLOCK_RANGE, and CREATE_PREFERRED_INPUT_TIMINGS.
+ */
+#define SDVO_CMD_SET_TARGET_INPUT			0x10
+struct intel_sdvo_set_target_input_args {
+    unsigned int target_1:1;
+    unsigned int pad:7;
+} __attribute__((packed));
+
+/**
+ * Takes a struct intel_sdvo_output_flags of which outputs are targetted by
+ * future output commands.
+ *
+ * Affected commands inclue SET_OUTPUT_TIMINGS_PART[12],
+ * GET_OUTPUT_TIMINGS_PART[12], and GET_OUTPUT_PIXEL_CLOCK_RANGE.
+ */
+#define SDVO_CMD_SET_TARGET_OUTPUT			0x11
+
+#define SDVO_CMD_GET_INPUT_TIMINGS_PART1		0x12
+#define SDVO_CMD_GET_INPUT_TIMINGS_PART2		0x13
+#define SDVO_CMD_SET_INPUT_TIMINGS_PART1		0x14
+#define SDVO_CMD_SET_INPUT_TIMINGS_PART2		0x15
+#define SDVO_CMD_SET_OUTPUT_TIMINGS_PART1		0x16
+#define SDVO_CMD_SET_OUTPUT_TIMINGS_PART2		0x17
+#define SDVO_CMD_GET_OUTPUT_TIMINGS_PART1		0x18
+#define SDVO_CMD_GET_OUTPUT_TIMINGS_PART2		0x19
+/* Part 1 */
+# define SDVO_DTD_CLOCK_LOW				SDVO_I2C_ARG_0
+# define SDVO_DTD_CLOCK_HIGH				SDVO_I2C_ARG_1
+# define SDVO_DTD_H_ACTIVE				SDVO_I2C_ARG_2
+# define SDVO_DTD_H_BLANK				SDVO_I2C_ARG_3
+# define SDVO_DTD_H_HIGH				SDVO_I2C_ARG_4
+# define SDVO_DTD_V_ACTIVE				SDVO_I2C_ARG_5
+# define SDVO_DTD_V_BLANK				SDVO_I2C_ARG_6
+# define SDVO_DTD_V_HIGH				SDVO_I2C_ARG_7
+/* Part 2 */
+# define SDVO_DTD_HSYNC_OFF				SDVO_I2C_ARG_0
+# define SDVO_DTD_HSYNC_WIDTH				SDVO_I2C_ARG_1
+# define SDVO_DTD_VSYNC_OFF_WIDTH			SDVO_I2C_ARG_2
+# define SDVO_DTD_SYNC_OFF_WIDTH_HIGH			SDVO_I2C_ARG_3
+# define SDVO_DTD_DTD_FLAGS				SDVO_I2C_ARG_4
+# define SDVO_DTD_DTD_FLAG_INTERLACED				(1 << 7)
+# define SDVO_DTD_DTD_FLAG_STEREO_MASK				(3 << 5)
+# define SDVO_DTD_DTD_FLAG_INPUT_MASK				(3 << 3)
+# define SDVO_DTD_DTD_FLAG_SYNC_MASK				(3 << 1)
+# define SDVO_DTD_SDVO_FLAS				SDVO_I2C_ARG_5
+# define SDVO_DTD_SDVO_FLAG_STALL				(1 << 7)
+# define SDVO_DTD_SDVO_FLAG_CENTERED				(0 << 6)
+# define SDVO_DTD_SDVO_FLAG_UPPER_LEFT				(1 << 6)
+# define SDVO_DTD_SDVO_FLAG_SCALING_MASK			(3 << 4)
+# define SDVO_DTD_SDVO_FLAG_SCALING_NONE			(0 << 4)
+# define SDVO_DTD_SDVO_FLAG_SCALING_SHARP			(1 << 4)
+# define SDVO_DTD_SDVO_FLAG_SCALING_SMOOTH			(2 << 4)
+# define SDVO_DTD_VSYNC_OFF_HIGH			SDVO_I2C_ARG_6
+
+/**
+ * Generates a DTD based on the given width, height, and flags.
+ *
+ * This will be supported by any device supporting scaling or interlaced
+ * modes.
+ */
+#define SDVO_CMD_CREATE_PREFERRED_INPUT_TIMING		0x1a
+# define SDVO_PREFERRED_INPUT_TIMING_CLOCK_LOW		SDVO_I2C_ARG_0
+# define SDVO_PREFERRED_INPUT_TIMING_CLOCK_HIGH		SDVO_I2C_ARG_1
+# define SDVO_PREFERRED_INPUT_TIMING_WIDTH_LOW		SDVO_I2C_ARG_2
+# define SDVO_PREFERRED_INPUT_TIMING_WIDTH_HIGH		SDVO_I2C_ARG_3
+# define SDVO_PREFERRED_INPUT_TIMING_HEIGHT_LOW		SDVO_I2C_ARG_4
+# define SDVO_PREFERRED_INPUT_TIMING_HEIGHT_HIGH	SDVO_I2C_ARG_5
+# define SDVO_PREFERRED_INPUT_TIMING_FLAGS		SDVO_I2C_ARG_6
+# define SDVO_PREFERRED_INPUT_TIMING_FLAGS_INTERLACED		(1 << 0)
+# define SDVO_PREFERRED_INPUT_TIMING_FLAGS_SCALED		(1 << 1)
+
+#define SDVO_CMD_GET_PREFERRED_INPUT_TIMING_PART1	0x1b
+#define SDVO_CMD_GET_PREFERRED_INPUT_TIMING_PART2	0x1c
+
+/** Returns a struct intel_sdvo_pixel_clock_range */
+#define SDVO_CMD_GET_INPUT_PIXEL_CLOCK_RANGE		0x1d
+/** Returns a struct intel_sdvo_pixel_clock_range */
+#define SDVO_CMD_GET_OUTPUT_PIXEL_CLOCK_RANGE		0x1e
+
+/** Returns a byte bitfield containing SDVO_CLOCK_RATE_MULT_* flags */
+#define SDVO_CMD_GET_SUPPORTED_CLOCK_RATE_MULTS		0x1f
+
+/** Returns a byte containing a SDVO_CLOCK_RATE_MULT_* flag */
+#define SDVO_CMD_GET_CLOCK_RATE_MULT			0x20
+/** Takes a byte containing a SDVO_CLOCK_RATE_MULT_* flag */
+#define SDVO_CMD_SET_CLOCK_RATE_MULT			0x21
+# define SDVO_CLOCK_RATE_MULT_1X				(1 << 0)
+# define SDVO_CLOCK_RATE_MULT_2X				(1 << 1)
+# define SDVO_CLOCK_RATE_MULT_4X				(1 << 3)
+
+#define SDVO_CMD_GET_SUPPORTED_TV_FORMATS		0x27
+
+#define SDVO_CMD_GET_TV_FORMAT				0x28
+
+#define SDVO_CMD_SET_TV_FORMAT				0x29
+
+#define SDVO_CMD_GET_SUPPORTED_POWER_STATES		0x2a
+#define SDVO_CMD_GET_ENCODER_POWER_STATE		0x2b
+#define SDVO_CMD_SET_ENCODER_POWER_STATE		0x2c
+# define SDVO_ENCODER_STATE_ON					(1 << 0)
+# define SDVO_ENCODER_STATE_STANDBY				(1 << 1)
+# define SDVO_ENCODER_STATE_SUSPEND				(1 << 2)
+# define SDVO_ENCODER_STATE_OFF					(1 << 3)
+
+#define SDVO_CMD_SET_TV_RESOLUTION_SUPPORT		0x93
+
+#define SDVO_CMD_SET_CONTROL_BUS_SWITCH			0x7a
+# define SDVO_CONTROL_BUS_PROM				0x0
+# define SDVO_CONTROL_BUS_DDC1				0x1
+# define SDVO_CONTROL_BUS_DDC2				0x2
+# define SDVO_CONTROL_BUS_DDC3				0x3
diff --git a/drivers/gpu/drm/i915/intel_tv.c b/drivers/gpu/drm/i915/intel_tv.c
new file mode 100644
index 000000000000..d409b8637883
--- /dev/null
+++ b/drivers/gpu/drm/i915/intel_tv.c
@@ -0,0 +1,1725 @@
+/*
+ * Copyright © 2006-2008 Intel Corporation
+ *   Jesse Barnes <jesse.barnes@intel.com>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ *
+ * Authors:
+ *    Eric Anholt <eric@anholt.net>
+ *
+ */
+
+/** @file
+ * Integrated TV-out support for the 915GM and 945GM.
+ */
+
+#include "drmP.h"
+#include "drm.h"
+#include "drm_crtc.h"
+#include "drm_edid.h"
+#include "intel_drv.h"
+#include "i915_drm.h"
+#include "i915_drv.h"
+
+enum tv_margin {
+	TV_MARGIN_LEFT, TV_MARGIN_TOP,
+	TV_MARGIN_RIGHT, TV_MARGIN_BOTTOM
+};
+
+/** Private structure for the integrated TV support */
+struct intel_tv_priv {
+	int type;
+	char *tv_format;
+	int margin[4];
+	u32 save_TV_H_CTL_1;
+	u32 save_TV_H_CTL_2;
+	u32 save_TV_H_CTL_3;
+	u32 save_TV_V_CTL_1;
+	u32 save_TV_V_CTL_2;
+	u32 save_TV_V_CTL_3;
+	u32 save_TV_V_CTL_4;
+	u32 save_TV_V_CTL_5;
+	u32 save_TV_V_CTL_6;
+	u32 save_TV_V_CTL_7;
+	u32 save_TV_SC_CTL_1, save_TV_SC_CTL_2, save_TV_SC_CTL_3;
+
+	u32 save_TV_CSC_Y;
+	u32 save_TV_CSC_Y2;
+	u32 save_TV_CSC_U;
+	u32 save_TV_CSC_U2;
+	u32 save_TV_CSC_V;
+	u32 save_TV_CSC_V2;
+	u32 save_TV_CLR_KNOBS;
+	u32 save_TV_CLR_LEVEL;
+	u32 save_TV_WIN_POS;
+	u32 save_TV_WIN_SIZE;
+	u32 save_TV_FILTER_CTL_1;
+	u32 save_TV_FILTER_CTL_2;
+	u32 save_TV_FILTER_CTL_3;
+
+	u32 save_TV_H_LUMA[60];
+	u32 save_TV_H_CHROMA[60];
+	u32 save_TV_V_LUMA[43];
+	u32 save_TV_V_CHROMA[43];
+
+	u32 save_TV_DAC;
+	u32 save_TV_CTL;
+};
+
+struct video_levels {
+	int blank, black, burst;
+};
+
+struct color_conversion {
+	u16 ry, gy, by, ay;
+	u16 ru, gu, bu, au;
+	u16 rv, gv, bv, av;
+};
+
+static const u32 filter_table[] = {
+	0xB1403000, 0x2E203500, 0x35002E20, 0x3000B140,
+	0x35A0B160, 0x2DC02E80, 0xB1403480, 0xB1603000,
+	0x2EA03640, 0x34002D80, 0x3000B120, 0x36E0B160,
+	0x2D202EF0, 0xB1203380, 0xB1603000, 0x2F303780,
+	0x33002CC0, 0x3000B100, 0x3820B160, 0x2C802F50,
+	0xB10032A0, 0xB1603000, 0x2F9038C0, 0x32202C20,
+	0x3000B0E0, 0x3980B160, 0x2BC02FC0, 0xB0E031C0,
+	0xB1603000, 0x2FF03A20, 0x31602B60, 0xB020B0C0,
+	0x3AE0B160, 0x2B001810, 0xB0C03120, 0xB140B020,
+	0x18283BA0, 0x30C02A80, 0xB020B0A0, 0x3C60B140,
+	0x2A201838, 0xB0A03080, 0xB120B020, 0x18383D20,
+	0x304029C0, 0xB040B080, 0x3DE0B100, 0x29601848,
+	0xB0803000, 0xB100B040, 0x18483EC0, 0xB0402900,
+	0xB040B060, 0x3F80B0C0, 0x28801858, 0xB060B080,
+	0xB0A0B060, 0x18602820, 0xB0A02820, 0x0000B060,
+	0xB1403000, 0x2E203500, 0x35002E20, 0x3000B140,
+	0x35A0B160, 0x2DC02E80, 0xB1403480, 0xB1603000,
+	0x2EA03640, 0x34002D80, 0x3000B120, 0x36E0B160,
+	0x2D202EF0, 0xB1203380, 0xB1603000, 0x2F303780,
+	0x33002CC0, 0x3000B100, 0x3820B160, 0x2C802F50,
+	0xB10032A0, 0xB1603000, 0x2F9038C0, 0x32202C20,
+	0x3000B0E0, 0x3980B160, 0x2BC02FC0, 0xB0E031C0,
+	0xB1603000, 0x2FF03A20, 0x31602B60, 0xB020B0C0,
+	0x3AE0B160, 0x2B001810, 0xB0C03120, 0xB140B020,
+	0x18283BA0, 0x30C02A80, 0xB020B0A0, 0x3C60B140,
+	0x2A201838, 0xB0A03080, 0xB120B020, 0x18383D20,
+	0x304029C0, 0xB040B080, 0x3DE0B100, 0x29601848,
+	0xB0803000, 0xB100B040, 0x18483EC0, 0xB0402900,
+	0xB040B060, 0x3F80B0C0, 0x28801858, 0xB060B080,
+	0xB0A0B060, 0x18602820, 0xB0A02820, 0x0000B060,
+	0x36403000, 0x2D002CC0, 0x30003640, 0x2D0036C0,
+	0x35C02CC0, 0x37403000, 0x2C802D40, 0x30003540,
+	0x2D8037C0, 0x34C02C40, 0x38403000, 0x2BC02E00,
+	0x30003440, 0x2E2038C0, 0x34002B80, 0x39803000,
+	0x2B402E40, 0x30003380, 0x2E603A00, 0x33402B00,
+	0x3A803040, 0x2A802EA0, 0x30403300, 0x2EC03B40,
+	0x32802A40, 0x3C003040, 0x2A002EC0, 0x30803240,
+	0x2EC03C80, 0x320029C0, 0x3D403080, 0x29402F00,
+	0x308031C0, 0x2F203DC0, 0x31802900, 0x3E8030C0,
+	0x28802F40, 0x30C03140, 0x2F203F40, 0x31402840,
+	0x28003100, 0x28002F00, 0x00003100, 0x36403000,
+	0x2D002CC0, 0x30003640, 0x2D0036C0,
+	0x35C02CC0, 0x37403000, 0x2C802D40, 0x30003540,
+	0x2D8037C0, 0x34C02C40, 0x38403000, 0x2BC02E00,
+	0x30003440, 0x2E2038C0, 0x34002B80, 0x39803000,
+	0x2B402E40, 0x30003380, 0x2E603A00, 0x33402B00,
+	0x3A803040, 0x2A802EA0, 0x30403300, 0x2EC03B40,
+	0x32802A40, 0x3C003040, 0x2A002EC0, 0x30803240,
+	0x2EC03C80, 0x320029C0, 0x3D403080, 0x29402F00,
+	0x308031C0, 0x2F203DC0, 0x31802900, 0x3E8030C0,
+	0x28802F40, 0x30C03140, 0x2F203F40, 0x31402840,
+	0x28003100, 0x28002F00, 0x00003100,
+};
+
+/*
+ * Color conversion values have 3 separate fixed point formats:
+ *
+ * 10 bit fields (ay, au)
+ *   1.9 fixed point (b.bbbbbbbbb)
+ * 11 bit fields (ry, by, ru, gu, gv)
+ *   exp.mantissa (ee.mmmmmmmmm)
+ *   ee = 00 = 10^-1 (0.mmmmmmmmm)
+ *   ee = 01 = 10^-2 (0.0mmmmmmmmm)
+ *   ee = 10 = 10^-3 (0.00mmmmmmmmm)
+ *   ee = 11 = 10^-4 (0.000mmmmmmmmm)
+ * 12 bit fields (gy, rv, bu)
+ *   exp.mantissa (eee.mmmmmmmmm)
+ *   eee = 000 = 10^-1 (0.mmmmmmmmm)
+ *   eee = 001 = 10^-2 (0.0mmmmmmmmm)
+ *   eee = 010 = 10^-3 (0.00mmmmmmmmm)
+ *   eee = 011 = 10^-4 (0.000mmmmmmmmm)
+ *   eee = 100 = reserved
+ *   eee = 101 = reserved
+ *   eee = 110 = reserved
+ *   eee = 111 = 10^0 (m.mmmmmmmm) (only usable for 1.0 representation)
+ *
+ * Saturation and contrast are 8 bits, with their own representation:
+ * 8 bit field (saturation, contrast)
+ *   exp.mantissa (ee.mmmmmm)
+ *   ee = 00 = 10^-1 (0.mmmmmm)
+ *   ee = 01 = 10^0 (m.mmmmm)
+ *   ee = 10 = 10^1 (mm.mmmm)
+ *   ee = 11 = 10^2 (mmm.mmm)
+ *
+ * Simple conversion function:
+ *
+ * static u32
+ * float_to_csc_11(float f)
+ * {
+ *     u32 exp;
+ *     u32 mant;
+ *     u32 ret;
+ *
+ *     if (f < 0)
+ *         f = -f;
+ *
+ *     if (f >= 1) {
+ *         exp = 0x7;
+ * 	   mant = 1 << 8;
+ *     } else {
+ *         for (exp = 0; exp < 3 && f < 0.5; exp++)
+ * 	       f *= 2.0;
+ *         mant = (f * (1 << 9) + 0.5);
+ *         if (mant >= (1 << 9))
+ *             mant = (1 << 9) - 1;
+ *     }
+ *     ret = (exp << 9) | mant;
+ *     return ret;
+ * }
+ */
+
+/*
+ * Behold, magic numbers!  If we plant them they might grow a big
+ * s-video cable to the sky... or something.
+ *
+ * Pre-converted to appropriate hex value.
+ */
+
+/*
+ * PAL & NTSC values for composite & s-video connections
+ */
+static const struct color_conversion ntsc_m_csc_composite = {
+	.ry = 0x0332, .gy = 0x012d, .by = 0x07d3, .ay = 0x0104,
+	.ru = 0x0733, .gu = 0x052d, .bu = 0x05c7, .au = 0x0f00,
+	.rv = 0x0340, .gv = 0x030c, .bv = 0x06d0, .av = 0x0f00,
+};
+
+static const struct video_levels ntsc_m_levels_composite = {
+	.blank = 225, .black = 267, .burst = 113,
+};
+
+static const struct color_conversion ntsc_m_csc_svideo = {
+	.ry = 0x0332, .gy = 0x012d, .by = 0x07d3, .ay = 0x0134,
+	.ru = 0x076a, .gu = 0x0564, .bu = 0x030d, .au = 0x0f00,
+	.rv = 0x037a, .gv = 0x033d, .bv = 0x06f6, .av = 0x0f00,
+};
+
+static const struct video_levels ntsc_m_levels_svideo = {
+	.blank = 266, .black = 316, .burst = 133,
+};
+
+static const struct color_conversion ntsc_j_csc_composite = {
+	.ry = 0x0332, .gy = 0x012d, .by = 0x07d3, .ay = 0x0119,
+	.ru = 0x074c, .gu = 0x0546, .bu = 0x05ec, .au = 0x0f00,
+	.rv = 0x035a, .gv = 0x0322, .bv = 0x06e1, .av = 0x0f00,
+};
+
+static const struct video_levels ntsc_j_levels_composite = {
+	.blank = 225, .black = 225, .burst = 113,
+};
+
+static const struct color_conversion ntsc_j_csc_svideo = {
+	.ry = 0x0332, .gy = 0x012d, .by = 0x07d3, .ay = 0x014c,
+	.ru = 0x0788, .gu = 0x0581, .bu = 0x0322, .au = 0x0f00,
+	.rv = 0x0399, .gv = 0x0356, .bv = 0x070a, .av = 0x0f00,
+};
+
+static const struct video_levels ntsc_j_levels_svideo = {
+	.blank = 266, .black = 266, .burst = 133,
+};
+
+static const struct color_conversion pal_csc_composite = {
+	.ry = 0x0332, .gy = 0x012d, .by = 0x07d3, .ay = 0x0113,
+	.ru = 0x0745, .gu = 0x053f, .bu = 0x05e1, .au = 0x0f00,
+	.rv = 0x0353, .gv = 0x031c, .bv = 0x06dc, .av = 0x0f00,
+};
+
+static const struct video_levels pal_levels_composite = {
+	.blank = 237, .black = 237, .burst = 118,
+};
+
+static const struct color_conversion pal_csc_svideo = {
+	.ry = 0x0332, .gy = 0x012d, .by = 0x07d3, .ay = 0x0145,
+	.ru = 0x0780, .gu = 0x0579, .bu = 0x031c, .au = 0x0f00,
+	.rv = 0x0390, .gv = 0x034f, .bv = 0x0705, .av = 0x0f00,
+};
+
+static const struct video_levels pal_levels_svideo = {
+	.blank = 280, .black = 280, .burst = 139,
+};
+
+static const struct color_conversion pal_m_csc_composite = {
+	.ry = 0x0332, .gy = 0x012d, .by = 0x07d3, .ay = 0x0104,
+	.ru = 0x0733, .gu = 0x052d, .bu = 0x05c7, .au = 0x0f00,
+	.rv = 0x0340, .gv = 0x030c, .bv = 0x06d0, .av = 0x0f00,
+};
+
+static const struct video_levels pal_m_levels_composite = {
+	.blank = 225, .black = 267, .burst = 113,
+};
+
+static const struct color_conversion pal_m_csc_svideo = {
+	.ry = 0x0332, .gy = 0x012d, .by = 0x07d3, .ay = 0x0134,
+	.ru = 0x076a, .gu = 0x0564, .bu = 0x030d, .au = 0x0f00,
+	.rv = 0x037a, .gv = 0x033d, .bv = 0x06f6, .av = 0x0f00,
+};
+
+static const struct video_levels pal_m_levels_svideo = {
+	.blank = 266, .black = 316, .burst = 133,
+};
+
+static const struct color_conversion pal_n_csc_composite = {
+	.ry = 0x0332, .gy = 0x012d, .by = 0x07d3, .ay = 0x0104,
+	.ru = 0x0733, .gu = 0x052d, .bu = 0x05c7, .au = 0x0f00,
+	.rv = 0x0340, .gv = 0x030c, .bv = 0x06d0, .av = 0x0f00,
+};
+
+static const struct video_levels pal_n_levels_composite = {
+	.blank = 225, .black = 267, .burst = 118,
+};
+
+static const struct color_conversion pal_n_csc_svideo = {
+	.ry = 0x0332, .gy = 0x012d, .by = 0x07d3, .ay = 0x0134,
+	.ru = 0x076a, .gu = 0x0564, .bu = 0x030d, .au = 0x0f00,
+	.rv = 0x037a, .gv = 0x033d, .bv = 0x06f6, .av = 0x0f00,
+};
+
+static const struct video_levels pal_n_levels_svideo = {
+	.blank = 266, .black = 316, .burst = 139,
+};
+
+/*
+ * Component connections
+ */
+static const struct color_conversion sdtv_csc_yprpb = {
+	.ry = 0x0332, .gy = 0x012d, .by = 0x07d3, .ay = 0x0146,
+	.ru = 0x0559, .gu = 0x0353, .bu = 0x0100, .au = 0x0f00,
+	.rv = 0x0100, .gv = 0x03ad, .bv = 0x074d, .av = 0x0f00,
+};
+
+static const struct color_conversion sdtv_csc_rgb = {
+	.ry = 0x0000, .gy = 0x0f00, .by = 0x0000, .ay = 0x0166,
+	.ru = 0x0000, .gu = 0x0000, .bu = 0x0f00, .au = 0x0166,
+	.rv = 0x0f00, .gv = 0x0000, .bv = 0x0000, .av = 0x0166,
+};
+
+static const struct color_conversion hdtv_csc_yprpb = {
+	.ry = 0x05b3, .gy = 0x016e, .by = 0x0728, .ay = 0x0146,
+	.ru = 0x07d5, .gu = 0x038b, .bu = 0x0100, .au = 0x0f00,
+	.rv = 0x0100, .gv = 0x03d1, .bv = 0x06bc, .av = 0x0f00,
+};
+
+static const struct color_conversion hdtv_csc_rgb = {
+	.ry = 0x0000, .gy = 0x0f00, .by = 0x0000, .ay = 0x0166,
+	.ru = 0x0000, .gu = 0x0000, .bu = 0x0f00, .au = 0x0166,
+	.rv = 0x0f00, .gv = 0x0000, .bv = 0x0000, .av = 0x0166,
+};
+
+static const struct video_levels component_levels = {
+	.blank = 279, .black = 279, .burst = 0,
+};
+
+
+struct tv_mode {
+	char *name;
+	int clock;
+	int refresh; /* in millihertz (for precision) */
+	u32 oversample;
+	int hsync_end, hblank_start, hblank_end, htotal;
+	bool progressive, trilevel_sync, component_only;
+	int vsync_start_f1, vsync_start_f2, vsync_len;
+	bool veq_ena;
+	int veq_start_f1, veq_start_f2, veq_len;
+	int vi_end_f1, vi_end_f2, nbr_end;
+	bool burst_ena;
+	int hburst_start, hburst_len;
+	int vburst_start_f1, vburst_end_f1;
+	int vburst_start_f2, vburst_end_f2;
+	int vburst_start_f3, vburst_end_f3;
+	int vburst_start_f4, vburst_end_f4;
+	/*
+	 * subcarrier programming
+	 */
+	int dda2_size, dda3_size, dda1_inc, dda2_inc, dda3_inc;
+	u32 sc_reset;
+	bool pal_burst;
+	/*
+	 * blank/black levels
+	 */
+	const struct video_levels *composite_levels, *svideo_levels;
+	const struct color_conversion *composite_color, *svideo_color;
+	const u32 *filter_table;
+	int max_srcw;
+};
+
+
+/*
+ * Sub carrier DDA
+ *
+ *  I think this works as follows:
+ *
+ *  subcarrier freq = pixel_clock * (dda1_inc + dda2_inc / dda2_size) / 4096
+ *
+ * Presumably, when dda3 is added in, it gets to adjust the dda2_inc value
+ *
+ * So,
+ *  dda1_ideal = subcarrier/pixel * 4096
+ *  dda1_inc = floor (dda1_ideal)
+ *  dda2 = dda1_ideal - dda1_inc
+ *
+ *  then pick a ratio for dda2 that gives the closest approximation. If
+ *  you can't get close enough, you can play with dda3 as well. This
+ *  seems likely to happen when dda2 is small as the jumps would be larger
+ *
+ * To invert this,
+ *
+ *  pixel_clock = subcarrier * 4096 / (dda1_inc + dda2_inc / dda2_size)
+ *
+ * The constants below were all computed using a 107.520MHz clock
+ */
+
+/**
+ * Register programming values for TV modes.
+ *
+ * These values account for -1s required.
+ */
+
+const static struct tv_mode tv_modes[] = {
+	{
+		.name		= "NTSC-M",
+		.clock		= 107520,
+		.refresh	= 29970,
+		.oversample	= TV_OVERSAMPLE_8X,
+		.component_only = 0,
+		/* 525 Lines, 60 Fields, 15.734KHz line, Sub-Carrier 3.580MHz */
+
+		.hsync_end	= 64,		    .hblank_end		= 124,
+		.hblank_start	= 836,		    .htotal		= 857,
+
+		.progressive	= false,	    .trilevel_sync = false,
+
+		.vsync_start_f1	= 6,		    .vsync_start_f2	= 7,
+		.vsync_len	= 6,
+
+		.veq_ena	= true,		    .veq_start_f1    	= 0,
+		.veq_start_f2	= 1,		    .veq_len		= 18,
+
+		.vi_end_f1	= 20,		    .vi_end_f2		= 21,
+		.nbr_end	= 240,
+
+		.burst_ena	= true,
+		.hburst_start	= 72,		    .hburst_len		= 34,
+		.vburst_start_f1 = 9,		    .vburst_end_f1	= 240,
+		.vburst_start_f2 = 10,		    .vburst_end_f2	= 240,
+		.vburst_start_f3 = 9,		    .vburst_end_f3	= 240,
+		.vburst_start_f4 = 10,		    .vburst_end_f4	= 240,
+
+		/* desired 3.5800000 actual 3.5800000 clock 107.52 */
+		.dda1_inc	=    136,
+		.dda2_inc	=   7624,	    .dda2_size		=  20013,
+		.dda3_inc	=      0,	    .dda3_size		=      0,
+		.sc_reset	= TV_SC_RESET_EVERY_4,
+		.pal_burst	= false,
+
+		.composite_levels = &ntsc_m_levels_composite,
+		.composite_color = &ntsc_m_csc_composite,
+		.svideo_levels  = &ntsc_m_levels_svideo,
+		.svideo_color = &ntsc_m_csc_svideo,
+
+		.filter_table = filter_table,
+	},
+	{
+		.name		= "NTSC-443",
+		.clock		= 107520,
+		.refresh	= 29970,
+		.oversample	= TV_OVERSAMPLE_8X,
+		.component_only = 0,
+		/* 525 Lines, 60 Fields, 15.734KHz line, Sub-Carrier 4.43MHz */
+		.hsync_end	= 64,		    .hblank_end		= 124,
+		.hblank_start	= 836,		    .htotal		= 857,
+
+		.progressive	= false,	    .trilevel_sync = false,
+
+		.vsync_start_f1 = 6,		    .vsync_start_f2	= 7,
+		.vsync_len	= 6,
+
+		.veq_ena	= true,		    .veq_start_f1    	= 0,
+		.veq_start_f2	= 1,		    .veq_len		= 18,
+
+		.vi_end_f1	= 20,		    .vi_end_f2		= 21,
+		.nbr_end	= 240,
+
+		.burst_ena	= 8,
+		.hburst_start	= 72,		    .hburst_len		= 34,
+		.vburst_start_f1 = 9,		    .vburst_end_f1	= 240,
+		.vburst_start_f2 = 10,		    .vburst_end_f2	= 240,
+		.vburst_start_f3 = 9,		    .vburst_end_f3	= 240,
+		.vburst_start_f4 = 10,		    .vburst_end_f4	= 240,
+
+		/* desired 4.4336180 actual 4.4336180 clock 107.52 */
+		.dda1_inc       =    168,
+		.dda2_inc       =  18557,       .dda2_size      =  20625,
+		.dda3_inc       =      0,       .dda3_size      =      0,
+		.sc_reset   = TV_SC_RESET_EVERY_8,
+		.pal_burst  = true,
+
+		.composite_levels = &ntsc_m_levels_composite,
+		.composite_color = &ntsc_m_csc_composite,
+		.svideo_levels  = &ntsc_m_levels_svideo,
+		.svideo_color = &ntsc_m_csc_svideo,
+
+		.filter_table = filter_table,
+	},
+	{
+		.name		= "NTSC-J",
+		.clock		= 107520,
+		.refresh	= 29970,
+		.oversample	= TV_OVERSAMPLE_8X,
+		.component_only = 0,
+
+		/* 525 Lines, 60 Fields, 15.734KHz line, Sub-Carrier 3.580MHz */
+		.hsync_end	= 64,		    .hblank_end		= 124,
+		.hblank_start = 836,	    .htotal		= 857,
+
+		.progressive	= false,    .trilevel_sync = false,
+
+		.vsync_start_f1	= 6,	    .vsync_start_f2	= 7,
+		.vsync_len	= 6,
+
+		.veq_ena	= true,		    .veq_start_f1    	= 0,
+		.veq_start_f2 = 1,	    .veq_len		= 18,
+
+		.vi_end_f1	= 20,		    .vi_end_f2		= 21,
+		.nbr_end	= 240,
+
+		.burst_ena	= true,
+		.hburst_start	= 72,		    .hburst_len		= 34,
+		.vburst_start_f1 = 9,		    .vburst_end_f1	= 240,
+		.vburst_start_f2 = 10,		    .vburst_end_f2	= 240,
+		.vburst_start_f3 = 9,		    .vburst_end_f3	= 240,
+		.vburst_start_f4 = 10,		    .vburst_end_f4	= 240,
+
+		/* desired 3.5800000 actual 3.5800000 clock 107.52 */
+		.dda1_inc	=    136,
+		.dda2_inc	=   7624,	    .dda2_size		=  20013,
+		.dda3_inc	=      0,	    .dda3_size		=      0,
+		.sc_reset	= TV_SC_RESET_EVERY_4,
+		.pal_burst	= false,
+
+		.composite_levels = &ntsc_j_levels_composite,
+		.composite_color = &ntsc_j_csc_composite,
+		.svideo_levels  = &ntsc_j_levels_svideo,
+		.svideo_color = &ntsc_j_csc_svideo,
+
+		.filter_table = filter_table,
+	},
+	{
+		.name		= "PAL-M",
+		.clock		= 107520,
+		.refresh	= 29970,
+		.oversample	= TV_OVERSAMPLE_8X,
+		.component_only = 0,
+
+		/* 525 Lines, 60 Fields, 15.734KHz line, Sub-Carrier 3.580MHz */
+		.hsync_end	= 64,		  .hblank_end		= 124,
+		.hblank_start = 836,	  .htotal		= 857,
+
+		.progressive	= false,	    .trilevel_sync = false,
+
+		.vsync_start_f1	= 6,		    .vsync_start_f2	= 7,
+		.vsync_len	= 6,
+
+		.veq_ena	= true,		    .veq_start_f1    	= 0,
+		.veq_start_f2	= 1,		    .veq_len		= 18,
+
+		.vi_end_f1	= 20,		    .vi_end_f2		= 21,
+		.nbr_end	= 240,
+
+		.burst_ena	= true,
+		.hburst_start	= 72,		    .hburst_len		= 34,
+		.vburst_start_f1 = 9,		    .vburst_end_f1	= 240,
+		.vburst_start_f2 = 10,		    .vburst_end_f2	= 240,
+		.vburst_start_f3 = 9,		    .vburst_end_f3	= 240,
+		.vburst_start_f4 = 10,		    .vburst_end_f4	= 240,
+
+		/* desired 3.5800000 actual 3.5800000 clock 107.52 */
+		.dda1_inc	=    136,
+		.dda2_inc	=    7624,	    .dda2_size		=  20013,
+		.dda3_inc	=      0,	    .dda3_size		=      0,
+		.sc_reset	= TV_SC_RESET_EVERY_4,
+		.pal_burst  = false,
+
+		.composite_levels = &pal_m_levels_composite,
+		.composite_color = &pal_m_csc_composite,
+		.svideo_levels  = &pal_m_levels_svideo,
+		.svideo_color = &pal_m_csc_svideo,
+
+		.filter_table = filter_table,
+	},
+	{
+		/* 625 Lines, 50 Fields, 15.625KHz line, Sub-Carrier 4.434MHz */
+		.name	    = "PAL-N",
+		.clock		= 107520,
+		.refresh	= 25000,
+		.oversample	= TV_OVERSAMPLE_8X,
+		.component_only = 0,
+
+		.hsync_end	= 64,		    .hblank_end		= 128,
+		.hblank_start = 844,	    .htotal		= 863,
+
+		.progressive  = false,    .trilevel_sync = false,
+
+
+		.vsync_start_f1	= 6,	   .vsync_start_f2	= 7,
+		.vsync_len	= 6,
+
+		.veq_ena	= true,		    .veq_start_f1    	= 0,
+		.veq_start_f2	= 1,		    .veq_len		= 18,
+
+		.vi_end_f1	= 24,		    .vi_end_f2		= 25,
+		.nbr_end	= 286,
+
+		.burst_ena	= true,
+		.hburst_start = 73,	    	    .hburst_len		= 34,
+		.vburst_start_f1 = 8,	    .vburst_end_f1	= 285,
+		.vburst_start_f2 = 8,	    .vburst_end_f2	= 286,
+		.vburst_start_f3 = 9,	    .vburst_end_f3	= 286,
+		.vburst_start_f4 = 9,	    .vburst_end_f4	= 285,
+
+
+		/* desired 4.4336180 actual 4.4336180 clock 107.52 */
+		.dda1_inc       =    168,
+		.dda2_inc       =  18557,       .dda2_size      =  20625,
+		.dda3_inc       =      0,       .dda3_size      =      0,
+		.sc_reset   = TV_SC_RESET_EVERY_8,
+		.pal_burst  = true,
+
+		.composite_levels = &pal_n_levels_composite,
+		.composite_color = &pal_n_csc_composite,
+		.svideo_levels  = &pal_n_levels_svideo,
+		.svideo_color = &pal_n_csc_svideo,
+
+		.filter_table = filter_table,
+	},
+	{
+		/* 625 Lines, 50 Fields, 15.625KHz line, Sub-Carrier 4.434MHz */
+		.name	    = "PAL",
+		.clock		= 107520,
+		.refresh	= 25000,
+		.oversample	= TV_OVERSAMPLE_8X,
+		.component_only = 0,
+
+		.hsync_end	= 64,		    .hblank_end		= 128,
+		.hblank_start	= 844,	    .htotal		= 863,
+
+		.progressive	= false,    .trilevel_sync = false,
+
+		.vsync_start_f1	= 5,	    .vsync_start_f2	= 6,
+		.vsync_len	= 5,
+
+		.veq_ena	= true,		    .veq_start_f1    	= 0,
+		.veq_start_f2	= 1,	    .veq_len		= 15,
+
+		.vi_end_f1	= 24,		    .vi_end_f2		= 25,
+		.nbr_end	= 286,
+
+		.burst_ena	= true,
+		.hburst_start	= 73,		    .hburst_len		= 32,
+		.vburst_start_f1 = 8,		    .vburst_end_f1	= 285,
+		.vburst_start_f2 = 8,		    .vburst_end_f2	= 286,
+		.vburst_start_f3 = 9,		    .vburst_end_f3	= 286,
+		.vburst_start_f4 = 9,		    .vburst_end_f4	= 285,
+
+		/* desired 4.4336180 actual 4.4336180 clock 107.52 */
+		.dda1_inc       =    168,
+		.dda2_inc       =  18557,       .dda2_size      =  20625,
+		.dda3_inc       =      0,       .dda3_size      =      0,
+		.sc_reset   = TV_SC_RESET_EVERY_8,
+		.pal_burst  = true,
+
+		.composite_levels = &pal_levels_composite,
+		.composite_color = &pal_csc_composite,
+		.svideo_levels  = &pal_levels_svideo,
+		.svideo_color = &pal_csc_svideo,
+
+		.filter_table = filter_table,
+	},
+	{
+		.name       = "480p@59.94Hz",
+		.clock 	= 107520,
+		.refresh	= 59940,
+		.oversample     = TV_OVERSAMPLE_4X,
+		.component_only = 1,
+
+		.hsync_end      = 64,               .hblank_end         = 122,
+		.hblank_start   = 842,              .htotal             = 857,
+
+		.progressive    = true,.trilevel_sync = false,
+
+		.vsync_start_f1 = 12,               .vsync_start_f2     = 12,
+		.vsync_len      = 12,
+
+		.veq_ena        = false,
+
+		.vi_end_f1      = 44,               .vi_end_f2          = 44,
+		.nbr_end        = 496,
+
+		.burst_ena      = false,
+
+		.filter_table = filter_table,
+	},
+	{
+		.name       = "480p@60Hz",
+		.clock 	= 107520,
+		.refresh	= 60000,
+		.oversample     = TV_OVERSAMPLE_4X,
+		.component_only = 1,
+
+		.hsync_end      = 64,               .hblank_end         = 122,
+		.hblank_start   = 842,              .htotal             = 856,
+
+		.progressive    = true,.trilevel_sync = false,
+
+		.vsync_start_f1 = 12,               .vsync_start_f2     = 12,
+		.vsync_len      = 12,
+
+		.veq_ena        = false,
+
+		.vi_end_f1      = 44,               .vi_end_f2          = 44,
+		.nbr_end        = 496,
+
+		.burst_ena      = false,
+
+		.filter_table = filter_table,
+	},
+	{
+		.name       = "576p",
+		.clock 	= 107520,
+		.refresh	= 50000,
+		.oversample     = TV_OVERSAMPLE_4X,
+		.component_only = 1,
+
+		.hsync_end      = 64,               .hblank_end         = 139,
+		.hblank_start   = 859,              .htotal             = 863,
+
+		.progressive    = true,		.trilevel_sync = false,
+
+		.vsync_start_f1 = 10,               .vsync_start_f2     = 10,
+		.vsync_len      = 10,
+
+		.veq_ena        = false,
+
+		.vi_end_f1      = 48,               .vi_end_f2          = 48,
+		.nbr_end        = 575,
+
+		.burst_ena      = false,
+
+		.filter_table = filter_table,
+	},
+	{
+		.name       = "720p@60Hz",
+		.clock		= 148800,
+		.refresh	= 60000,
+		.oversample     = TV_OVERSAMPLE_2X,
+		.component_only = 1,
+
+		.hsync_end      = 80,               .hblank_end         = 300,
+		.hblank_start   = 1580,             .htotal             = 1649,
+
+		.progressive    = true, 	    .trilevel_sync = true,
+
+		.vsync_start_f1 = 10,               .vsync_start_f2     = 10,
+		.vsync_len      = 10,
+
+		.veq_ena        = false,
+
+		.vi_end_f1      = 29,               .vi_end_f2          = 29,
+		.nbr_end        = 719,
+
+		.burst_ena      = false,
+
+		.filter_table = filter_table,
+	},
+	{
+		.name       = "720p@59.94Hz",
+		.clock		= 148800,
+		.refresh	= 59940,
+		.oversample     = TV_OVERSAMPLE_2X,
+		.component_only = 1,
+
+		.hsync_end      = 80,               .hblank_end         = 300,
+		.hblank_start   = 1580,             .htotal             = 1651,
+
+		.progressive    = true, 	    .trilevel_sync = true,
+
+		.vsync_start_f1 = 10,               .vsync_start_f2     = 10,
+		.vsync_len      = 10,
+
+		.veq_ena        = false,
+
+		.vi_end_f1      = 29,               .vi_end_f2          = 29,
+		.nbr_end        = 719,
+
+		.burst_ena      = false,
+
+		.filter_table = filter_table,
+	},
+	{
+		.name       = "720p@50Hz",
+		.clock		= 148800,
+		.refresh	= 50000,
+		.oversample     = TV_OVERSAMPLE_2X,
+		.component_only = 1,
+
+		.hsync_end      = 80,               .hblank_end         = 300,
+		.hblank_start   = 1580,             .htotal             = 1979,
+
+		.progressive    = true, 	        .trilevel_sync = true,
+
+		.vsync_start_f1 = 10,               .vsync_start_f2     = 10,
+		.vsync_len      = 10,
+
+		.veq_ena        = false,
+
+		.vi_end_f1      = 29,               .vi_end_f2          = 29,
+		.nbr_end        = 719,
+
+		.burst_ena      = false,
+
+		.filter_table = filter_table,
+		.max_srcw = 800
+	},
+	{
+		.name       = "1080i@50Hz",
+		.clock		= 148800,
+		.refresh	= 25000,
+		.oversample     = TV_OVERSAMPLE_2X,
+		.component_only = 1,
+
+		.hsync_end      = 88,               .hblank_end         = 235,
+		.hblank_start   = 2155,             .htotal             = 2639,
+
+		.progressive    = false, 	    .trilevel_sync = true,
+
+		.vsync_start_f1 = 4,              .vsync_start_f2     = 5,
+		.vsync_len      = 10,
+
+		.veq_ena	= true,		    .veq_start_f1    	= 4,
+		.veq_start_f2   = 4,	    .veq_len		= 10,
+
+
+		.vi_end_f1      = 21,           .vi_end_f2          = 22,
+		.nbr_end        = 539,
+
+		.burst_ena      = false,
+
+		.filter_table = filter_table,
+	},
+	{
+		.name       = "1080i@60Hz",
+		.clock		= 148800,
+		.refresh	= 30000,
+		.oversample     = TV_OVERSAMPLE_2X,
+		.component_only = 1,
+
+		.hsync_end      = 88,               .hblank_end         = 235,
+		.hblank_start   = 2155,             .htotal             = 2199,
+
+		.progressive    = false, 	    .trilevel_sync = true,
+
+		.vsync_start_f1 = 4,               .vsync_start_f2     = 5,
+		.vsync_len      = 10,
+
+		.veq_ena	= true,		    .veq_start_f1    	= 4,
+		.veq_start_f2	= 4,		    .veq_len		= 10,
+
+
+		.vi_end_f1      = 21,               .vi_end_f2          = 22,
+		.nbr_end        = 539,
+
+		.burst_ena      = false,
+
+		.filter_table = filter_table,
+	},
+	{
+		.name       = "1080i@59.94Hz",
+		.clock		= 148800,
+		.refresh	= 29970,
+		.oversample     = TV_OVERSAMPLE_2X,
+		.component_only = 1,
+
+		.hsync_end      = 88,               .hblank_end         = 235,
+		.hblank_start   = 2155,             .htotal             = 2200,
+
+		.progressive    = false, 	    .trilevel_sync = true,
+
+		.vsync_start_f1 = 4,            .vsync_start_f2    = 5,
+		.vsync_len      = 10,
+
+		.veq_ena	= true,		    .veq_start_f1	= 4,
+		.veq_start_f2 = 4,	    	    .veq_len = 10,
+
+
+		.vi_end_f1      = 21,           .vi_end_f2         	= 22,
+		.nbr_end        = 539,
+
+		.burst_ena      = false,
+
+		.filter_table = filter_table,
+	},
+};
+
+#define NUM_TV_MODES sizeof(tv_modes) / sizeof (tv_modes[0])
+
+static void
+intel_tv_dpms(struct drm_encoder *encoder, int mode)
+{
+	struct drm_device *dev = encoder->dev;
+	struct drm_i915_private *dev_priv = dev->dev_private;
+
+	switch(mode) {
+	case DRM_MODE_DPMS_ON:
+		I915_WRITE(TV_CTL, I915_READ(TV_CTL) | TV_ENC_ENABLE);
+		break;
+	case DRM_MODE_DPMS_STANDBY:
+	case DRM_MODE_DPMS_SUSPEND:
+	case DRM_MODE_DPMS_OFF:
+		I915_WRITE(TV_CTL, I915_READ(TV_CTL) & ~TV_ENC_ENABLE);
+		break;
+	}
+}
+
+static void
+intel_tv_save(struct drm_connector *connector)
+{
+	struct drm_device *dev = connector->dev;
+	struct drm_i915_private *dev_priv = dev->dev_private;
+	struct intel_output *intel_output = to_intel_output(connector);
+	struct intel_tv_priv *tv_priv = intel_output->dev_priv;
+	int i;
+
+	tv_priv->save_TV_H_CTL_1 = I915_READ(TV_H_CTL_1);
+	tv_priv->save_TV_H_CTL_2 = I915_READ(TV_H_CTL_2);
+	tv_priv->save_TV_H_CTL_3 = I915_READ(TV_H_CTL_3);
+	tv_priv->save_TV_V_CTL_1 = I915_READ(TV_V_CTL_1);
+	tv_priv->save_TV_V_CTL_2 = I915_READ(TV_V_CTL_2);
+	tv_priv->save_TV_V_CTL_3 = I915_READ(TV_V_CTL_3);
+	tv_priv->save_TV_V_CTL_4 = I915_READ(TV_V_CTL_4);
+	tv_priv->save_TV_V_CTL_5 = I915_READ(TV_V_CTL_5);
+	tv_priv->save_TV_V_CTL_6 = I915_READ(TV_V_CTL_6);
+	tv_priv->save_TV_V_CTL_7 = I915_READ(TV_V_CTL_7);
+	tv_priv->save_TV_SC_CTL_1 = I915_READ(TV_SC_CTL_1);
+	tv_priv->save_TV_SC_CTL_2 = I915_READ(TV_SC_CTL_2);
+	tv_priv->save_TV_SC_CTL_3 = I915_READ(TV_SC_CTL_3);
+
+	tv_priv->save_TV_CSC_Y = I915_READ(TV_CSC_Y);
+	tv_priv->save_TV_CSC_Y2 = I915_READ(TV_CSC_Y2);
+	tv_priv->save_TV_CSC_U = I915_READ(TV_CSC_U);
+	tv_priv->save_TV_CSC_U2 = I915_READ(TV_CSC_U2);
+	tv_priv->save_TV_CSC_V = I915_READ(TV_CSC_V);
+	tv_priv->save_TV_CSC_V2 = I915_READ(TV_CSC_V2);
+	tv_priv->save_TV_CLR_KNOBS = I915_READ(TV_CLR_KNOBS);
+	tv_priv->save_TV_CLR_LEVEL = I915_READ(TV_CLR_LEVEL);
+	tv_priv->save_TV_WIN_POS = I915_READ(TV_WIN_POS);
+	tv_priv->save_TV_WIN_SIZE = I915_READ(TV_WIN_SIZE);
+	tv_priv->save_TV_FILTER_CTL_1 = I915_READ(TV_FILTER_CTL_1);
+	tv_priv->save_TV_FILTER_CTL_2 = I915_READ(TV_FILTER_CTL_2);
+	tv_priv->save_TV_FILTER_CTL_3 = I915_READ(TV_FILTER_CTL_3);
+
+	for (i = 0; i < 60; i++)
+		tv_priv->save_TV_H_LUMA[i] = I915_READ(TV_H_LUMA_0 + (i <<2));
+	for (i = 0; i < 60; i++)
+		tv_priv->save_TV_H_CHROMA[i] = I915_READ(TV_H_CHROMA_0 + (i <<2));
+	for (i = 0; i < 43; i++)
+		tv_priv->save_TV_V_LUMA[i] = I915_READ(TV_V_LUMA_0 + (i <<2));
+	for (i = 0; i < 43; i++)
+		tv_priv->save_TV_V_CHROMA[i] = I915_READ(TV_V_CHROMA_0 + (i <<2));
+
+	tv_priv->save_TV_DAC = I915_READ(TV_DAC);
+	tv_priv->save_TV_CTL = I915_READ(TV_CTL);
+}
+
+static void
+intel_tv_restore(struct drm_connector *connector)
+{
+	struct drm_device *dev = connector->dev;
+	struct drm_i915_private *dev_priv = dev->dev_private;
+	struct intel_output *intel_output = to_intel_output(connector);
+	struct intel_tv_priv *tv_priv = intel_output->dev_priv;
+	struct drm_crtc *crtc = connector->encoder->crtc;
+	struct intel_crtc *intel_crtc;
+	int i;
+
+	/* FIXME: No CRTC? */
+	if (!crtc)
+		return;
+
+	intel_crtc = to_intel_crtc(crtc);
+	I915_WRITE(TV_H_CTL_1, tv_priv->save_TV_H_CTL_1);
+	I915_WRITE(TV_H_CTL_2, tv_priv->save_TV_H_CTL_2);
+	I915_WRITE(TV_H_CTL_3, tv_priv->save_TV_H_CTL_3);
+	I915_WRITE(TV_V_CTL_1, tv_priv->save_TV_V_CTL_1);
+	I915_WRITE(TV_V_CTL_2, tv_priv->save_TV_V_CTL_2);
+	I915_WRITE(TV_V_CTL_3, tv_priv->save_TV_V_CTL_3);
+	I915_WRITE(TV_V_CTL_4, tv_priv->save_TV_V_CTL_4);
+	I915_WRITE(TV_V_CTL_5, tv_priv->save_TV_V_CTL_5);
+	I915_WRITE(TV_V_CTL_6, tv_priv->save_TV_V_CTL_6);
+	I915_WRITE(TV_V_CTL_7, tv_priv->save_TV_V_CTL_7);
+	I915_WRITE(TV_SC_CTL_1, tv_priv->save_TV_SC_CTL_1);
+	I915_WRITE(TV_SC_CTL_2, tv_priv->save_TV_SC_CTL_2);
+	I915_WRITE(TV_SC_CTL_3, tv_priv->save_TV_SC_CTL_3);
+
+	I915_WRITE(TV_CSC_Y, tv_priv->save_TV_CSC_Y);
+	I915_WRITE(TV_CSC_Y2, tv_priv->save_TV_CSC_Y2);
+	I915_WRITE(TV_CSC_U, tv_priv->save_TV_CSC_U);
+	I915_WRITE(TV_CSC_U2, tv_priv->save_TV_CSC_U2);
+	I915_WRITE(TV_CSC_V, tv_priv->save_TV_CSC_V);
+	I915_WRITE(TV_CSC_V2, tv_priv->save_TV_CSC_V2);
+	I915_WRITE(TV_CLR_KNOBS, tv_priv->save_TV_CLR_KNOBS);
+	I915_WRITE(TV_CLR_LEVEL, tv_priv->save_TV_CLR_LEVEL);
+
+	{
+		int pipeconf_reg = (intel_crtc->pipe == 0) ?
+			PIPEACONF : PIPEBCONF;
+		int dspcntr_reg = (intel_crtc->plane == 0) ?
+			DSPACNTR : DSPBCNTR;
+		int pipeconf = I915_READ(pipeconf_reg);
+		int dspcntr = I915_READ(dspcntr_reg);
+		int dspbase_reg = (intel_crtc->plane == 0) ?
+			DSPAADDR : DSPBADDR;
+		/* Pipe must be off here */
+		I915_WRITE(dspcntr_reg, dspcntr & ~DISPLAY_PLANE_ENABLE);
+		/* Flush the plane changes */
+		I915_WRITE(dspbase_reg, I915_READ(dspbase_reg));
+
+		if (!IS_I9XX(dev)) {
+			/* Wait for vblank for the disable to take effect */
+			intel_wait_for_vblank(dev);
+		}
+
+		I915_WRITE(pipeconf_reg, pipeconf & ~PIPEACONF_ENABLE);
+		/* Wait for vblank for the disable to take effect. */
+		intel_wait_for_vblank(dev);
+
+		/* Filter ctl must be set before TV_WIN_SIZE */
+		I915_WRITE(TV_FILTER_CTL_1, tv_priv->save_TV_FILTER_CTL_1);
+		I915_WRITE(TV_FILTER_CTL_2, tv_priv->save_TV_FILTER_CTL_2);
+		I915_WRITE(TV_FILTER_CTL_3, tv_priv->save_TV_FILTER_CTL_3);
+		I915_WRITE(TV_WIN_POS, tv_priv->save_TV_WIN_POS);
+		I915_WRITE(TV_WIN_SIZE, tv_priv->save_TV_WIN_SIZE);
+		I915_WRITE(pipeconf_reg, pipeconf);
+		I915_WRITE(dspcntr_reg, dspcntr);
+		/* Flush the plane changes */
+		I915_WRITE(dspbase_reg, I915_READ(dspbase_reg));
+	}
+
+	for (i = 0; i < 60; i++)
+		I915_WRITE(TV_H_LUMA_0 + (i <<2), tv_priv->save_TV_H_LUMA[i]);
+	for (i = 0; i < 60; i++)
+		I915_WRITE(TV_H_CHROMA_0 + (i <<2), tv_priv->save_TV_H_CHROMA[i]);
+	for (i = 0; i < 43; i++)
+		I915_WRITE(TV_V_LUMA_0 + (i <<2), tv_priv->save_TV_V_LUMA[i]);
+	for (i = 0; i < 43; i++)
+		I915_WRITE(TV_V_CHROMA_0 + (i <<2), tv_priv->save_TV_V_CHROMA[i]);
+
+	I915_WRITE(TV_DAC, tv_priv->save_TV_DAC);
+	I915_WRITE(TV_CTL, tv_priv->save_TV_CTL);
+}
+
+static const struct tv_mode *
+intel_tv_mode_lookup (char *tv_format)
+{
+	int i;
+
+	for (i = 0; i < sizeof(tv_modes) / sizeof (tv_modes[0]); i++) {
+		const struct tv_mode *tv_mode = &tv_modes[i];
+
+		if (!strcmp(tv_format, tv_mode->name))
+			return tv_mode;
+	}
+	return NULL;
+}
+
+static const struct tv_mode *
+intel_tv_mode_find (struct intel_output *intel_output)
+{
+	struct intel_tv_priv *tv_priv = intel_output->dev_priv;
+
+	return intel_tv_mode_lookup(tv_priv->tv_format);
+}
+
+static enum drm_mode_status
+intel_tv_mode_valid(struct drm_connector *connector, struct drm_display_mode *mode)
+{
+	struct intel_output *intel_output = to_intel_output(connector);
+	const struct tv_mode *tv_mode = intel_tv_mode_find(intel_output);
+
+	/* Ensure TV refresh is close to desired refresh */
+	if (tv_mode && abs(tv_mode->refresh - drm_mode_vrefresh(mode)) < 1)
+		return MODE_OK;
+	return MODE_CLOCK_RANGE;
+}
+
+
+static bool
+intel_tv_mode_fixup(struct drm_encoder *encoder, struct drm_display_mode *mode,
+		    struct drm_display_mode *adjusted_mode)
+{
+	struct drm_device *dev = encoder->dev;
+	struct drm_mode_config *drm_config = &dev->mode_config;
+	struct intel_output *intel_output = enc_to_intel_output(encoder);
+	const struct tv_mode *tv_mode = intel_tv_mode_find (intel_output);
+	struct drm_encoder *other_encoder;
+
+	if (!tv_mode)
+		return false;
+
+	/* FIXME: lock encoder list */
+	list_for_each_entry(other_encoder, &drm_config->encoder_list, head) {
+		if (other_encoder != encoder &&
+		    other_encoder->crtc == encoder->crtc)
+			return false;
+	}
+
+	adjusted_mode->clock = tv_mode->clock;
+	return true;
+}
+
+static void
+intel_tv_mode_set(struct drm_encoder *encoder, struct drm_display_mode *mode,
+		  struct drm_display_mode *adjusted_mode)
+{
+	struct drm_device *dev = encoder->dev;
+	struct drm_i915_private *dev_priv = dev->dev_private;
+	struct drm_crtc *crtc = encoder->crtc;
+	struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
+	struct intel_output *intel_output = enc_to_intel_output(encoder);
+	struct intel_tv_priv *tv_priv = intel_output->dev_priv;
+	const struct tv_mode *tv_mode = intel_tv_mode_find(intel_output);
+	u32 tv_ctl;
+	u32 hctl1, hctl2, hctl3;
+	u32 vctl1, vctl2, vctl3, vctl4, vctl5, vctl6, vctl7;
+	u32 scctl1, scctl2, scctl3;
+	int i, j;
+	const struct video_levels *video_levels;
+	const struct color_conversion *color_conversion;
+	bool burst_ena;
+
+	if (!tv_mode)
+		return;	/* can't happen (mode_prepare prevents this) */
+
+	tv_ctl = 0;
+
+	switch (tv_priv->type) {
+	default:
+	case DRM_MODE_CONNECTOR_Unknown:
+	case DRM_MODE_CONNECTOR_Composite:
+		tv_ctl |= TV_ENC_OUTPUT_COMPOSITE;
+		video_levels = tv_mode->composite_levels;
+		color_conversion = tv_mode->composite_color;
+		burst_ena = tv_mode->burst_ena;
+		break;
+	case DRM_MODE_CONNECTOR_Component:
+		tv_ctl |= TV_ENC_OUTPUT_COMPONENT;
+		video_levels = &component_levels;
+		if (tv_mode->burst_ena)
+			color_conversion = &sdtv_csc_yprpb;
+		else
+			color_conversion = &hdtv_csc_yprpb;
+		burst_ena = false;
+		break;
+	case DRM_MODE_CONNECTOR_SVIDEO:
+		tv_ctl |= TV_ENC_OUTPUT_SVIDEO;
+		video_levels = tv_mode->svideo_levels;
+		color_conversion = tv_mode->svideo_color;
+		burst_ena = tv_mode->burst_ena;
+		break;
+	}
+	hctl1 = (tv_mode->hsync_end << TV_HSYNC_END_SHIFT) |
+		(tv_mode->htotal << TV_HTOTAL_SHIFT);
+
+	hctl2 = (tv_mode->hburst_start << 16) |
+		(tv_mode->hburst_len << TV_HBURST_LEN_SHIFT);
+
+	if (burst_ena)
+		hctl2 |= TV_BURST_ENA;
+
+	hctl3 = (tv_mode->hblank_start << TV_HBLANK_START_SHIFT) |
+		(tv_mode->hblank_end << TV_HBLANK_END_SHIFT);
+
+	vctl1 = (tv_mode->nbr_end << TV_NBR_END_SHIFT) |
+		(tv_mode->vi_end_f1 << TV_VI_END_F1_SHIFT) |
+		(tv_mode->vi_end_f2 << TV_VI_END_F2_SHIFT);
+
+	vctl2 = (tv_mode->vsync_len << TV_VSYNC_LEN_SHIFT) |
+		(tv_mode->vsync_start_f1 << TV_VSYNC_START_F1_SHIFT) |
+		(tv_mode->vsync_start_f2 << TV_VSYNC_START_F2_SHIFT);
+
+	vctl3 = (tv_mode->veq_len << TV_VEQ_LEN_SHIFT) |
+		(tv_mode->veq_start_f1 << TV_VEQ_START_F1_SHIFT) |
+		(tv_mode->veq_start_f2 << TV_VEQ_START_F2_SHIFT);
+
+	if (tv_mode->veq_ena)
+		vctl3 |= TV_EQUAL_ENA;
+
+	vctl4 = (tv_mode->vburst_start_f1 << TV_VBURST_START_F1_SHIFT) |
+		(tv_mode->vburst_end_f1 << TV_VBURST_END_F1_SHIFT);
+
+	vctl5 = (tv_mode->vburst_start_f2 << TV_VBURST_START_F2_SHIFT) |
+		(tv_mode->vburst_end_f2 << TV_VBURST_END_F2_SHIFT);
+
+	vctl6 = (tv_mode->vburst_start_f3 << TV_VBURST_START_F3_SHIFT) |
+		(tv_mode->vburst_end_f3 << TV_VBURST_END_F3_SHIFT);
+
+	vctl7 = (tv_mode->vburst_start_f4 << TV_VBURST_START_F4_SHIFT) |
+		(tv_mode->vburst_end_f4 << TV_VBURST_END_F4_SHIFT);
+
+	if (intel_crtc->pipe == 1)
+		tv_ctl |= TV_ENC_PIPEB_SELECT;
+	tv_ctl |= tv_mode->oversample;
+
+	if (tv_mode->progressive)
+		tv_ctl |= TV_PROGRESSIVE;
+	if (tv_mode->trilevel_sync)
+		tv_ctl |= TV_TRILEVEL_SYNC;
+	if (tv_mode->pal_burst)
+		tv_ctl |= TV_PAL_BURST;
+	scctl1 = 0;
+	/* dda1 implies valid video levels */
+	if (tv_mode->dda1_inc) {
+		scctl1 |= TV_SC_DDA1_EN;
+		scctl1 |= video_levels->burst << TV_BURST_LEVEL_SHIFT;
+	}
+
+	if (tv_mode->dda2_inc)
+		scctl1 |= TV_SC_DDA2_EN;
+
+	if (tv_mode->dda3_inc)
+		scctl1 |= TV_SC_DDA3_EN;
+
+	scctl1 |= tv_mode->sc_reset;
+	scctl1 |= tv_mode->dda1_inc << TV_SCDDA1_INC_SHIFT;
+
+	scctl2 = tv_mode->dda2_size << TV_SCDDA2_SIZE_SHIFT |
+		tv_mode->dda2_inc << TV_SCDDA2_INC_SHIFT;
+
+	scctl3 = tv_mode->dda3_size << TV_SCDDA3_SIZE_SHIFT |
+		tv_mode->dda3_inc << TV_SCDDA3_INC_SHIFT;
+
+	/* Enable two fixes for the chips that need them. */
+	if (dev->pci_device < 0x2772)
+		tv_ctl |= TV_ENC_C0_FIX | TV_ENC_SDP_FIX;
+
+	I915_WRITE(TV_H_CTL_1, hctl1);
+	I915_WRITE(TV_H_CTL_2, hctl2);
+	I915_WRITE(TV_H_CTL_3, hctl3);
+	I915_WRITE(TV_V_CTL_1, vctl1);
+	I915_WRITE(TV_V_CTL_2, vctl2);
+	I915_WRITE(TV_V_CTL_3, vctl3);
+	I915_WRITE(TV_V_CTL_4, vctl4);
+	I915_WRITE(TV_V_CTL_5, vctl5);
+	I915_WRITE(TV_V_CTL_6, vctl6);
+	I915_WRITE(TV_V_CTL_7, vctl7);
+	I915_WRITE(TV_SC_CTL_1, scctl1);
+	I915_WRITE(TV_SC_CTL_2, scctl2);
+	I915_WRITE(TV_SC_CTL_3, scctl3);
+
+	if (color_conversion) {
+		I915_WRITE(TV_CSC_Y, (color_conversion->ry << 16) |
+			   color_conversion->gy);
+		I915_WRITE(TV_CSC_Y2,(color_conversion->by << 16) |
+			   color_conversion->ay);
+		I915_WRITE(TV_CSC_U, (color_conversion->ru << 16) |
+			   color_conversion->gu);
+		I915_WRITE(TV_CSC_U2, (color_conversion->bu << 16) |
+			   color_conversion->au);
+		I915_WRITE(TV_CSC_V, (color_conversion->rv << 16) |
+			   color_conversion->gv);
+		I915_WRITE(TV_CSC_V2, (color_conversion->bv << 16) |
+			   color_conversion->av);
+	}
+
+	I915_WRITE(TV_CLR_KNOBS, 0x00606000);
+	if (video_levels)
+		I915_WRITE(TV_CLR_LEVEL,
+			   ((video_levels->black << TV_BLACK_LEVEL_SHIFT) |
+			    (video_levels->blank << TV_BLANK_LEVEL_SHIFT)));
+	{
+		int pipeconf_reg = (intel_crtc->pipe == 0) ?
+			PIPEACONF : PIPEBCONF;
+		int dspcntr_reg = (intel_crtc->plane == 0) ?
+			DSPACNTR : DSPBCNTR;
+		int pipeconf = I915_READ(pipeconf_reg);
+		int dspcntr = I915_READ(dspcntr_reg);
+		int dspbase_reg = (intel_crtc->plane == 0) ?
+			DSPAADDR : DSPBADDR;
+		int xpos = 0x0, ypos = 0x0;
+		unsigned int xsize, ysize;
+		/* Pipe must be off here */
+		I915_WRITE(dspcntr_reg, dspcntr & ~DISPLAY_PLANE_ENABLE);
+		/* Flush the plane changes */
+		I915_WRITE(dspbase_reg, I915_READ(dspbase_reg));
+
+		/* Wait for vblank for the disable to take effect */
+		if (!IS_I9XX(dev))
+			intel_wait_for_vblank(dev);
+
+		I915_WRITE(pipeconf_reg, pipeconf & ~PIPEACONF_ENABLE);
+		/* Wait for vblank for the disable to take effect. */
+		intel_wait_for_vblank(dev);
+
+		/* Filter ctl must be set before TV_WIN_SIZE */
+		I915_WRITE(TV_FILTER_CTL_1, TV_AUTO_SCALE);
+		xsize = tv_mode->hblank_start - tv_mode->hblank_end;
+		if (tv_mode->progressive)
+			ysize = tv_mode->nbr_end + 1;
+		else
+			ysize = 2*tv_mode->nbr_end + 1;
+
+		xpos += tv_priv->margin[TV_MARGIN_LEFT];
+		ypos += tv_priv->margin[TV_MARGIN_TOP];
+		xsize -= (tv_priv->margin[TV_MARGIN_LEFT] +
+			  tv_priv->margin[TV_MARGIN_RIGHT]);
+		ysize -= (tv_priv->margin[TV_MARGIN_TOP] +
+			  tv_priv->margin[TV_MARGIN_BOTTOM]);
+		I915_WRITE(TV_WIN_POS, (xpos<<16)|ypos);
+		I915_WRITE(TV_WIN_SIZE, (xsize<<16)|ysize);
+
+		I915_WRITE(pipeconf_reg, pipeconf);
+		I915_WRITE(dspcntr_reg, dspcntr);
+		/* Flush the plane changes */
+		I915_WRITE(dspbase_reg, I915_READ(dspbase_reg));
+	}
+
+	j = 0;
+	for (i = 0; i < 60; i++)
+		I915_WRITE(TV_H_LUMA_0 + (i<<2), tv_mode->filter_table[j++]);
+	for (i = 0; i < 60; i++)
+		I915_WRITE(TV_H_CHROMA_0 + (i<<2), tv_mode->filter_table[j++]);
+	for (i = 0; i < 43; i++)
+		I915_WRITE(TV_V_LUMA_0 + (i<<2), tv_mode->filter_table[j++]);
+	for (i = 0; i < 43; i++)
+		I915_WRITE(TV_V_CHROMA_0 + (i<<2), tv_mode->filter_table[j++]);
+	I915_WRITE(TV_DAC, 0);
+	I915_WRITE(TV_CTL, tv_ctl);
+}
+
+static const struct drm_display_mode reported_modes[] = {
+	{
+		.name = "NTSC 480i",
+		.clock = 107520,
+		.hdisplay = 1280,
+		.hsync_start = 1368,
+		.hsync_end = 1496,
+		.htotal = 1712,
+
+		.vdisplay = 1024,
+		.vsync_start = 1027,
+		.vsync_end = 1034,
+		.vtotal = 1104,
+		.type = DRM_MODE_TYPE_DRIVER,
+	},
+};
+
+/**
+ * Detects TV presence by checking for load.
+ *
+ * Requires that the current pipe's DPLL is active.
+
+ * \return true if TV is connected.
+ * \return false if TV is disconnected.
+ */
+static int
+intel_tv_detect_type (struct drm_crtc *crtc, struct intel_output *intel_output)
+{
+	struct drm_encoder *encoder = &intel_output->enc;
+	struct drm_device *dev = encoder->dev;
+	struct drm_i915_private *dev_priv = dev->dev_private;
+	unsigned long irqflags;
+	u32 tv_ctl, save_tv_ctl;
+	u32 tv_dac, save_tv_dac;
+	int type = DRM_MODE_CONNECTOR_Unknown;
+
+	tv_dac = I915_READ(TV_DAC);
+
+	/* Disable TV interrupts around load detect or we'll recurse */
+	spin_lock_irqsave(&dev_priv->user_irq_lock, irqflags);
+	i915_disable_pipestat(dev_priv, 0, PIPE_HOTPLUG_INTERRUPT_ENABLE |
+			      PIPE_HOTPLUG_TV_INTERRUPT_ENABLE);
+	spin_unlock_irqrestore(&dev_priv->user_irq_lock, irqflags);
+
+	/*
+	 * Detect TV by polling)
+	 */
+	if (intel_output->load_detect_temp) {
+		/* TV not currently running, prod it with destructive detect */
+		save_tv_dac = tv_dac;
+		tv_ctl = I915_READ(TV_CTL);
+		save_tv_ctl = tv_ctl;
+		tv_ctl &= ~TV_ENC_ENABLE;
+		tv_ctl &= ~TV_TEST_MODE_MASK;
+		tv_ctl |= TV_TEST_MODE_MONITOR_DETECT;
+		tv_dac &= ~TVDAC_SENSE_MASK;
+		tv_dac |= (TVDAC_STATE_CHG_EN |
+			   TVDAC_A_SENSE_CTL |
+			   TVDAC_B_SENSE_CTL |
+			   TVDAC_C_SENSE_CTL |
+			   DAC_CTL_OVERRIDE |
+			   DAC_A_0_7_V |
+			   DAC_B_0_7_V |
+			   DAC_C_0_7_V);
+		I915_WRITE(TV_CTL, tv_ctl);
+		I915_WRITE(TV_DAC, tv_dac);
+		intel_wait_for_vblank(dev);
+		tv_dac = I915_READ(TV_DAC);
+		I915_WRITE(TV_DAC, save_tv_dac);
+		I915_WRITE(TV_CTL, save_tv_ctl);
+	}
+	/*
+	 *  A B C
+	 *  0 1 1 Composite
+	 *  1 0 X svideo
+	 *  0 0 0 Component
+	 */
+	if ((tv_dac & TVDAC_SENSE_MASK) == (TVDAC_B_SENSE | TVDAC_C_SENSE)) {
+		DRM_DEBUG("Detected Composite TV connection\n");
+		type = DRM_MODE_CONNECTOR_Composite;
+	} else if ((tv_dac & (TVDAC_A_SENSE|TVDAC_B_SENSE)) == TVDAC_A_SENSE) {
+		DRM_DEBUG("Detected S-Video TV connection\n");
+		type = DRM_MODE_CONNECTOR_SVIDEO;
+	} else if ((tv_dac & TVDAC_SENSE_MASK) == 0) {
+		DRM_DEBUG("Detected Component TV connection\n");
+		type = DRM_MODE_CONNECTOR_Component;
+	} else {
+		DRM_DEBUG("No TV connection detected\n");
+		type = -1;
+	}
+
+	/* Restore interrupt config */
+	spin_lock_irqsave(&dev_priv->user_irq_lock, irqflags);
+	i915_enable_pipestat(dev_priv, 0, PIPE_HOTPLUG_INTERRUPT_ENABLE |
+			     PIPE_HOTPLUG_TV_INTERRUPT_ENABLE);
+	spin_unlock_irqrestore(&dev_priv->user_irq_lock, irqflags);
+
+	return type;
+}
+
+/**
+ * Detect the TV connection.
+ *
+ * Currently this always returns CONNECTOR_STATUS_UNKNOWN, as we need to be sure
+ * we have a pipe programmed in order to probe the TV.
+ */
+static enum drm_connector_status
+intel_tv_detect(struct drm_connector *connector)
+{
+	struct drm_crtc *crtc;
+	struct drm_display_mode mode;
+	struct intel_output *intel_output = to_intel_output(connector);
+	struct intel_tv_priv *tv_priv = intel_output->dev_priv;
+	struct drm_encoder *encoder = &intel_output->enc;
+	int dpms_mode;
+	int type = tv_priv->type;
+
+	mode = reported_modes[0];
+	drm_mode_set_crtcinfo(&mode, CRTC_INTERLACE_HALVE_V);
+
+	if (encoder->crtc) {
+		type = intel_tv_detect_type(encoder->crtc, intel_output);
+	} else {
+		crtc = intel_get_load_detect_pipe(intel_output, &mode, &dpms_mode);
+		if (crtc) {
+			type = intel_tv_detect_type(crtc, intel_output);
+			intel_release_load_detect_pipe(intel_output, dpms_mode);
+		} else
+			type = -1;
+	}
+
+	if (type < 0)
+		return connector_status_disconnected;
+
+	return connector_status_connected;
+}
+
+static struct input_res {
+	char *name;
+	int w, h;
+} input_res_table[] =
+{
+	{"640x480", 640, 480},
+	{"800x600", 800, 600},
+	{"1024x768", 1024, 768},
+	{"1280x1024", 1280, 1024},
+	{"848x480", 848, 480},
+	{"1280x720", 1280, 720},
+	{"1920x1080", 1920, 1080},
+};
+
+/**
+ * Stub get_modes function.
+ *
+ * This should probably return a set of fixed modes, unless we can figure out
+ * how to probe modes off of TV connections.
+ */
+
+static int
+intel_tv_get_modes(struct drm_connector *connector)
+{
+	struct drm_display_mode *mode_ptr;
+	struct intel_output *intel_output = to_intel_output(connector);
+	const struct tv_mode *tv_mode = intel_tv_mode_find(intel_output);
+	int j;
+
+	for (j = 0; j < sizeof(input_res_table) / sizeof(input_res_table[0]);
+	     j++) {
+		struct input_res *input = &input_res_table[j];
+		unsigned int hactive_s = input->w;
+		unsigned int vactive_s = input->h;
+
+		if (tv_mode->max_srcw && input->w > tv_mode->max_srcw)
+			continue;
+
+		if (input->w > 1024 && (!tv_mode->progressive
+					&& !tv_mode->component_only))
+			continue;
+
+		mode_ptr = drm_calloc(1, sizeof(struct drm_display_mode),
+				      DRM_MEM_DRIVER);
+		strncpy(mode_ptr->name, input->name, DRM_DISPLAY_MODE_LEN);
+
+		mode_ptr->hdisplay = hactive_s;
+		mode_ptr->hsync_start = hactive_s + 1;
+		mode_ptr->hsync_end = hactive_s + 64;
+		if (mode_ptr->hsync_end <= mode_ptr->hsync_start)
+			mode_ptr->hsync_end = mode_ptr->hsync_start + 1;
+		mode_ptr->htotal = hactive_s + 96;
+
+		mode_ptr->vdisplay = vactive_s;
+		mode_ptr->vsync_start = vactive_s + 1;
+		mode_ptr->vsync_end = vactive_s + 32;
+		if (mode_ptr->vsync_end <= mode_ptr->vsync_start)
+			mode_ptr->vsync_end = mode_ptr->vsync_start  + 1;
+		mode_ptr->vtotal = vactive_s + 33;
+
+		mode_ptr->clock = (int) (tv_mode->refresh *
+					 mode_ptr->vtotal *
+					 mode_ptr->htotal / 1000) / 1000;
+
+		mode_ptr->type = DRM_MODE_TYPE_DRIVER;
+		drm_mode_probed_add(connector, mode_ptr);
+	}
+
+	return 0;
+}
+
+static void
+intel_tv_destroy (struct drm_connector *connector)
+{
+	struct intel_output *intel_output = to_intel_output(connector);
+
+	drm_sysfs_connector_remove(connector);
+	drm_connector_cleanup(connector);
+	drm_free(intel_output, sizeof(struct intel_output) + sizeof(struct intel_tv_priv),
+		 DRM_MEM_DRIVER);
+}
+
+
+static int
+intel_tv_set_property(struct drm_connector *connector, struct drm_property *property,
+		      uint64_t val)
+{
+	struct drm_device *dev = connector->dev;
+	struct intel_output *intel_output = to_intel_output(connector);
+	struct intel_tv_priv *tv_priv = intel_output->dev_priv;
+	int ret = 0;
+
+	ret = drm_connector_property_set_value(connector, property, val);
+	if (ret < 0)
+		goto out;
+
+	if (property == dev->mode_config.tv_left_margin_property)
+		tv_priv->margin[TV_MARGIN_LEFT] = val;
+	else if (property == dev->mode_config.tv_right_margin_property)
+		tv_priv->margin[TV_MARGIN_RIGHT] = val;
+	else if (property == dev->mode_config.tv_top_margin_property)
+		tv_priv->margin[TV_MARGIN_TOP] = val;
+	else if (property == dev->mode_config.tv_bottom_margin_property)
+		tv_priv->margin[TV_MARGIN_BOTTOM] = val;
+	else if (property == dev->mode_config.tv_mode_property) {
+		if (val >= NUM_TV_MODES) {
+			ret = -EINVAL;
+			goto out;
+		}
+		tv_priv->tv_format = tv_modes[val].name;
+		intel_tv_mode_set(&intel_output->enc, NULL, NULL);
+	} else {
+		ret = -EINVAL;
+		goto out;
+	}
+
+	intel_tv_mode_set(&intel_output->enc, NULL, NULL);
+out:
+	return ret;
+}
+
+static const struct drm_encoder_helper_funcs intel_tv_helper_funcs = {
+	.dpms = intel_tv_dpms,
+	.mode_fixup = intel_tv_mode_fixup,
+	.prepare = intel_encoder_prepare,
+	.mode_set = intel_tv_mode_set,
+	.commit = intel_encoder_commit,
+};
+
+static const struct drm_connector_funcs intel_tv_connector_funcs = {
+	.save = intel_tv_save,
+	.restore = intel_tv_restore,
+	.detect = intel_tv_detect,
+	.destroy = intel_tv_destroy,
+	.set_property = intel_tv_set_property,
+	.fill_modes = drm_helper_probe_single_connector_modes,
+};
+
+static const struct drm_connector_helper_funcs intel_tv_connector_helper_funcs = {
+	.mode_valid = intel_tv_mode_valid,
+	.get_modes = intel_tv_get_modes,
+	.best_encoder = intel_best_encoder,
+};
+
+void intel_tv_enc_destroy(struct drm_encoder *encoder)
+{
+	drm_encoder_cleanup(encoder);
+}
+
+static const struct drm_encoder_funcs intel_tv_enc_funcs = {
+	.destroy = intel_tv_enc_destroy,
+};
+
+
+void
+intel_tv_init(struct drm_device *dev)
+{
+	struct drm_i915_private *dev_priv = dev->dev_private;
+	struct drm_connector *connector;
+	struct intel_output *intel_output;
+	struct intel_tv_priv *tv_priv;
+	u32 tv_dac_on, tv_dac_off, save_tv_dac;
+	char **tv_format_names;
+	int i, initial_mode = 0;
+
+	if ((I915_READ(TV_CTL) & TV_FUSE_STATE_MASK) == TV_FUSE_STATE_DISABLED)
+		return;
+
+	/* Even if we have an encoder we may not have a connector */
+	if (!dev_priv->int_tv_support)
+		return;
+
+	/*
+	 * Sanity check the TV output by checking to see if the
+	 * DAC register holds a value
+	 */
+	save_tv_dac = I915_READ(TV_DAC);
+
+	I915_WRITE(TV_DAC, save_tv_dac | TVDAC_STATE_CHG_EN);
+	tv_dac_on = I915_READ(TV_DAC);
+
+	I915_WRITE(TV_DAC, save_tv_dac & ~TVDAC_STATE_CHG_EN);
+	tv_dac_off = I915_READ(TV_DAC);
+
+	I915_WRITE(TV_DAC, save_tv_dac);
+
+	/*
+	 * If the register does not hold the state change enable
+	 * bit, (either as a 0 or a 1), assume it doesn't really
+	 * exist
+	 */
+	if ((tv_dac_on & TVDAC_STATE_CHG_EN) == 0 ||
+	    (tv_dac_off & TVDAC_STATE_CHG_EN) != 0)
+		return;
+
+	intel_output = drm_calloc(1, sizeof(struct intel_output) +
+				  sizeof(struct intel_tv_priv), DRM_MEM_DRIVER);
+	if (!intel_output) {
+		return;
+	}
+	connector = &intel_output->base;
+
+	drm_connector_init(dev, connector, &intel_tv_connector_funcs,
+			   DRM_MODE_CONNECTOR_SVIDEO);
+
+	drm_encoder_init(dev, &intel_output->enc, &intel_tv_enc_funcs,
+			 DRM_MODE_ENCODER_TVDAC);
+
+	drm_mode_connector_attach_encoder(&intel_output->base, &intel_output->enc);
+	tv_priv = (struct intel_tv_priv *)(intel_output + 1);
+	intel_output->type = INTEL_OUTPUT_TVOUT;
+	intel_output->enc.possible_crtcs = ((1 << 0) | (1 << 1));
+	intel_output->enc.possible_clones = (1 << INTEL_OUTPUT_TVOUT);
+	intel_output->dev_priv = tv_priv;
+	tv_priv->type = DRM_MODE_CONNECTOR_Unknown;
+
+	/* BIOS margin values */
+	tv_priv->margin[TV_MARGIN_LEFT] = 54;
+	tv_priv->margin[TV_MARGIN_TOP] = 36;
+	tv_priv->margin[TV_MARGIN_RIGHT] = 46;
+	tv_priv->margin[TV_MARGIN_BOTTOM] = 37;
+
+	tv_priv->tv_format = kstrdup(tv_modes[initial_mode].name, GFP_KERNEL);
+
+	drm_encoder_helper_add(&intel_output->enc, &intel_tv_helper_funcs);
+	drm_connector_helper_add(connector, &intel_tv_connector_helper_funcs);
+	connector->interlace_allowed = false;
+	connector->doublescan_allowed = false;
+
+	/* Create TV properties then attach current values */
+	tv_format_names = drm_alloc(sizeof(char *) * NUM_TV_MODES,
+				    DRM_MEM_DRIVER);
+	if (!tv_format_names)
+		goto out;
+	for (i = 0; i < NUM_TV_MODES; i++)
+		tv_format_names[i] = tv_modes[i].name;
+	drm_mode_create_tv_properties(dev, NUM_TV_MODES, tv_format_names);
+
+	drm_connector_attach_property(connector, dev->mode_config.tv_mode_property,
+				   initial_mode);
+	drm_connector_attach_property(connector,
+				   dev->mode_config.tv_left_margin_property,
+				   tv_priv->margin[TV_MARGIN_LEFT]);
+	drm_connector_attach_property(connector,
+				   dev->mode_config.tv_top_margin_property,
+				   tv_priv->margin[TV_MARGIN_TOP]);
+	drm_connector_attach_property(connector,
+				   dev->mode_config.tv_right_margin_property,
+				   tv_priv->margin[TV_MARGIN_RIGHT]);
+	drm_connector_attach_property(connector,
+				   dev->mode_config.tv_bottom_margin_property,
+				   tv_priv->margin[TV_MARGIN_BOTTOM]);
+out:
+	drm_sysfs_connector_add(connector);
+}
diff --git a/include/drm/i915_drm.h b/include/drm/i915_drm.h
index 3f663ecc3dbb..0e506f438f48 100644
--- a/include/drm/i915_drm.h
+++ b/include/drm/i915_drm.h
@@ -198,7 +198,7 @@ typedef struct _drm_i915_sarea {
 /* Allow drivers to submit batchbuffers directly to hardware, relying
  * on the security mechanisms provided by hardware.
  */
-typedef struct _drm_i915_batchbuffer {
+typedef struct drm_i915_batchbuffer {
 	int start;		/* agp offset */
 	int used;		/* nr bytes in use */
 	int DR1;		/* hw flags for GFX_OP_DRAWRECT_INFO */
-- 
cgit v1.2.3


From e0c8463a8b00b467611607df0ff369d062528875 Mon Sep 17 00:00:00 2001
From: Jakob Bornecrantz <jakob@tungstengraphics.com>
Date: Fri, 19 Dec 2008 14:50:50 +1000
Subject: drm: sanitise drm modesetting API + remove unused hotplug

The initially merged modesetting API has some uglies in it, this
cleans up the struct members and ioctl ordering for initial submission.

It also removes the unneeded hotplug infrastructure.

airlied:- I've pulled this patch in from git modesetting-gem tree.

Signed-off-by: Dave Airlie <airlied@redhat.com>
---
 drivers/gpu/drm/drm_crtc.c        |  21 +---
 drivers/gpu/drm/drm_crtc_helper.c |   2 -
 drivers/gpu/drm/drm_drv.c         |  20 ++--
 include/drm/drm_crtc.h            |   3 -
 include/drm/drm_mode.h            | 205 ++++++++++++++++++--------------------
 5 files changed, 113 insertions(+), 138 deletions(-)

(limited to 'include')

diff --git a/drivers/gpu/drm/drm_crtc.c b/drivers/gpu/drm/drm_crtc.c
index 2e880240477e..ece947d3b25b 100644
--- a/drivers/gpu/drm/drm_crtc.c
+++ b/drivers/gpu/drm/drm_crtc.c
@@ -804,7 +804,6 @@ void drm_mode_config_init(struct drm_device *dev)
 	dev->mode_config.num_connector = 0;
 	dev->mode_config.num_crtc = 0;
 	dev->mode_config.num_encoder = 0;
-	dev->mode_config.hotplug_counter = 0;
 }
 EXPORT_SYMBOL(drm_mode_config_init);
 
@@ -900,16 +899,6 @@ void drm_mode_config_cleanup(struct drm_device *dev)
 }
 EXPORT_SYMBOL(drm_mode_config_cleanup);
 
-int drm_mode_hotplug_ioctl(struct drm_device *dev,
-			   void *data, struct drm_file *file_priv)
-{
-	struct drm_mode_hotplug *arg = data;
-
-	arg->counter = dev->mode_config.hotplug_counter;
-
-	return 0;
-}
-
 /**
  * drm_crtc_convert_to_umode - convert a drm_display_mode into a modeinfo
  * @out: drm_mode_modeinfo struct to return to the user
@@ -1543,9 +1532,9 @@ int drm_mode_cursor_ioctl(struct drm_device *dev,
 	}
 
 	mutex_lock(&dev->mode_config.mutex);
-	obj = drm_mode_object_find(dev, req->crtc, DRM_MODE_OBJECT_CRTC);
+	obj = drm_mode_object_find(dev, req->crtc_id, DRM_MODE_OBJECT_CRTC);
 	if (!obj) {
-		DRM_DEBUG("Unknown CRTC ID %d\n", req->crtc);
+		DRM_DEBUG("Unknown CRTC ID %d\n", req->crtc_id);
 		ret = -EINVAL;
 		goto out;
 	}
@@ -1622,7 +1611,7 @@ int drm_mode_addfb(struct drm_device *dev,
 		goto out;
 	}
 
-	r->buffer_id = fb->base.id;
+	r->fb_id = fb->base.id;
 	list_add(&fb->filp_head, &file_priv->fbs);
 
 out:
@@ -1714,7 +1703,7 @@ int drm_mode_getfb(struct drm_device *dev,
 	int ret = 0;
 
 	mutex_lock(&dev->mode_config.mutex);
-	obj = drm_mode_object_find(dev, r->buffer_id, DRM_MODE_OBJECT_FB);
+	obj = drm_mode_object_find(dev, r->fb_id, DRM_MODE_OBJECT_FB);
 	if (!obj) {
 		DRM_ERROR("invalid framebuffer id\n");
 		ret = -EINVAL;
@@ -2326,7 +2315,7 @@ int drm_mode_replacefb(struct drm_device *dev,
 
 	/* right replace the current bo attached to this fb with a new bo */
 	mutex_lock(&dev->mode_config.mutex);
-	obj = drm_mode_object_find(dev, r->buffer_id, DRM_MODE_OBJECT_FB);
+	obj = drm_mode_object_find(dev, r->fb_id, DRM_MODE_OBJECT_FB);
 	if (!obj) {
 		ret = -EINVAL;
 		goto out;
diff --git a/drivers/gpu/drm/drm_crtc_helper.c b/drivers/gpu/drm/drm_crtc_helper.c
index f914044ad971..58e335967617 100644
--- a/drivers/gpu/drm/drm_crtc_helper.c
+++ b/drivers/gpu/drm/drm_crtc_helper.c
@@ -780,8 +780,6 @@ EXPORT_SYMBOL(drm_helper_initial_config);
  */
 int drm_helper_hotplug_stage_two(struct drm_device *dev)
 {
-	dev->mode_config.hotplug_counter++;
-
 	drm_helper_plugged_event(dev);
 
 	return 0;
diff --git a/drivers/gpu/drm/drm_drv.c b/drivers/gpu/drm/drm_drv.c
index c6e338282588..7eb0e09fc73b 100644
--- a/drivers/gpu/drm/drm_drv.c
+++ b/drivers/gpu/drm/drm_drv.c
@@ -129,23 +129,21 @@ static struct drm_ioctl_desc drm_ioctls[] = {
 
 	DRM_IOCTL_DEF(DRM_IOCTL_MODE_GETRESOURCES, drm_mode_getresources, DRM_MASTER|DRM_CONTROL_ALLOW),
 	DRM_IOCTL_DEF(DRM_IOCTL_MODE_GETCRTC, drm_mode_getcrtc, DRM_MASTER|DRM_CONTROL_ALLOW),
-	DRM_IOCTL_DEF(DRM_IOCTL_MODE_GETCONNECTOR, drm_mode_getconnector, DRM_MASTER|DRM_CONTROL_ALLOW),
 	DRM_IOCTL_DEF(DRM_IOCTL_MODE_SETCRTC, drm_mode_setcrtc, DRM_MASTER|DRM_CONTROL_ALLOW),
 	DRM_IOCTL_DEF(DRM_IOCTL_MODE_CURSOR, drm_mode_cursor_ioctl, DRM_MASTER|DRM_CONTROL_ALLOW),
-	DRM_IOCTL_DEF(DRM_IOCTL_MODE_ADDFB, drm_mode_addfb, DRM_MASTER|DRM_CONTROL_ALLOW),
-	DRM_IOCTL_DEF(DRM_IOCTL_MODE_RMFB, drm_mode_rmfb, DRM_MASTER|DRM_CONTROL_ALLOW),
-	DRM_IOCTL_DEF(DRM_IOCTL_MODE_GETFB, drm_mode_getfb, DRM_MASTER|DRM_CONTROL_ALLOW),
-
-	DRM_IOCTL_DEF(DRM_IOCTL_MODE_SETPROPERTY, drm_mode_connector_property_set_ioctl, DRM_MASTER|DRM_CONTROL_ALLOW),
-	DRM_IOCTL_DEF(DRM_IOCTL_MODE_GETPROPBLOB, drm_mode_getblob_ioctl, DRM_MASTER|DRM_CONTROL_ALLOW),
+	DRM_IOCTL_DEF(DRM_IOCTL_MODE_GETGAMMA, drm_mode_gamma_get_ioctl, DRM_MASTER),
+	DRM_IOCTL_DEF(DRM_IOCTL_MODE_SETGAMMA, drm_mode_gamma_set_ioctl, DRM_MASTER),
+	DRM_IOCTL_DEF(DRM_IOCTL_MODE_GETENCODER, drm_mode_getencoder, DRM_MASTER|DRM_CONTROL_ALLOW),
+	DRM_IOCTL_DEF(DRM_IOCTL_MODE_GETCONNECTOR, drm_mode_getconnector, DRM_MASTER|DRM_CONTROL_ALLOW),
 	DRM_IOCTL_DEF(DRM_IOCTL_MODE_ATTACHMODE, drm_mode_attachmode_ioctl, DRM_MASTER|DRM_CONTROL_ALLOW),
 	DRM_IOCTL_DEF(DRM_IOCTL_MODE_DETACHMODE, drm_mode_detachmode_ioctl, DRM_MASTER|DRM_CONTROL_ALLOW),
 	DRM_IOCTL_DEF(DRM_IOCTL_MODE_GETPROPERTY, drm_mode_getproperty_ioctl, DRM_MASTER | DRM_CONTROL_ALLOW),
-
+	DRM_IOCTL_DEF(DRM_IOCTL_MODE_SETPROPERTY, drm_mode_connector_property_set_ioctl, DRM_MASTER|DRM_CONTROL_ALLOW),
+	DRM_IOCTL_DEF(DRM_IOCTL_MODE_GETPROPBLOB, drm_mode_getblob_ioctl, DRM_MASTER|DRM_CONTROL_ALLOW),
+	DRM_IOCTL_DEF(DRM_IOCTL_MODE_GETFB, drm_mode_getfb, DRM_MASTER|DRM_CONTROL_ALLOW),
+	DRM_IOCTL_DEF(DRM_IOCTL_MODE_ADDFB, drm_mode_addfb, DRM_MASTER|DRM_CONTROL_ALLOW),
+	DRM_IOCTL_DEF(DRM_IOCTL_MODE_RMFB, drm_mode_rmfb, DRM_MASTER|DRM_CONTROL_ALLOW),
 	DRM_IOCTL_DEF(DRM_IOCTL_MODE_REPLACEFB, drm_mode_replacefb, DRM_MASTER|DRM_ROOT_ONLY|DRM_CONTROL_ALLOW),
-	DRM_IOCTL_DEF(DRM_IOCTL_MODE_GETENCODER, drm_mode_getencoder, DRM_MASTER|DRM_CONTROL_ALLOW),
-	DRM_IOCTL_DEF(DRM_IOCTL_MODE_GETGAMMA, drm_mode_gamma_get_ioctl, DRM_MASTER),
-	DRM_IOCTL_DEF(DRM_IOCTL_MODE_SETGAMMA, drm_mode_gamma_set_ioctl, DRM_MASTER),
 };
 
 #define DRM_CORE_IOCTL_COUNT	ARRAY_SIZE( drm_ioctls )
diff --git a/include/drm/drm_crtc.h b/include/drm/drm_crtc.h
index 08a884bea446..395c6139c893 100644
--- a/include/drm/drm_crtc.h
+++ b/include/drm/drm_crtc.h
@@ -573,9 +573,6 @@ struct drm_mode_config {
 	/* Optional properties */
 	struct drm_property *scaling_mode_property;
 	struct drm_property *dithering_mode_property;
-
-	/* hotplug */
-	uint32_t hotplug_counter;
 };
 
 #define obj_to_crtc(x) container_of(x, struct drm_crtc, base)
diff --git a/include/drm/drm_mode.h b/include/drm/drm_mode.h
index d2e791920aba..601d2bd839f6 100644
--- a/include/drm/drm_mode.h
+++ b/include/drm/drm_mode.h
@@ -33,10 +33,10 @@
 #include <linux/kernel.h>
 #endif
 
-#define DRM_DISPLAY_INFO_LEN 32
-#define DRM_CONNECTOR_NAME_LEN 32
-#define DRM_DISPLAY_MODE_LEN 32
-#define DRM_PROP_NAME_LEN 32
+#define DRM_DISPLAY_INFO_LEN	32
+#define DRM_CONNECTOR_NAME_LEN	32
+#define DRM_DISPLAY_MODE_LEN	32
+#define DRM_PROP_NAME_LEN	32
 
 #define DRM_MODE_TYPE_BUILTIN	(1<<0)
 #define DRM_MODE_TYPE_CLOCK_C	((1<<1) | DRM_MODE_TYPE_BUILTIN)
@@ -65,30 +65,30 @@
 
 /* DPMS flags */
 /* bit compatible with the xorg definitions. */
-#define DRM_MODE_DPMS_ON 0
-#define DRM_MODE_DPMS_STANDBY 1
-#define DRM_MODE_DPMS_SUSPEND 2
-#define DRM_MODE_DPMS_OFF 3
+#define DRM_MODE_DPMS_ON	0
+#define DRM_MODE_DPMS_STANDBY	1
+#define DRM_MODE_DPMS_SUSPEND	2
+#define DRM_MODE_DPMS_OFF	3
 
 /* Scaling mode options */
-#define DRM_MODE_SCALE_NON_GPU 0
-#define DRM_MODE_SCALE_FULLSCREEN 1
-#define DRM_MODE_SCALE_NO_SCALE 2
-#define DRM_MODE_SCALE_ASPECT 3
+#define DRM_MODE_SCALE_NON_GPU		0
+#define DRM_MODE_SCALE_FULLSCREEN	1
+#define DRM_MODE_SCALE_NO_SCALE		2
+#define DRM_MODE_SCALE_ASPECT		3
 
 /* Dithering mode options */
-#define DRM_MODE_DITHERING_OFF 0
-#define DRM_MODE_DITHERING_ON 1
+#define DRM_MODE_DITHERING_OFF	0
+#define DRM_MODE_DITHERING_ON	1
 
 struct drm_mode_modeinfo {
-	unsigned int clock;
-	unsigned short hdisplay, hsync_start, hsync_end, htotal, hskew;
-	unsigned short vdisplay, vsync_start, vsync_end, vtotal, vscan;
+	uint32_t clock;
+	uint16_t hdisplay, hsync_start, hsync_end, htotal, hskew;
+	uint16_t vdisplay, vsync_start, vsync_end, vtotal, vscan;
 
-	unsigned int vrefresh; /* vertical refresh * 1000 */
+	uint32_t vrefresh; /* vertical refresh * 1000 */
 
-	unsigned int flags;
-	unsigned int type;
+	uint32_t flags;
+	uint32_t type;
 	char name[DRM_DISPLAY_MODE_LEN];
 };
 
@@ -97,39 +97,39 @@ struct drm_mode_card_res {
 	uint64_t crtc_id_ptr;
 	uint64_t connector_id_ptr;
 	uint64_t encoder_id_ptr;
-	int count_fbs;
-	int count_crtcs;
-	int count_connectors;
-	int count_encoders;
-	int min_width, max_width;
-	int min_height, max_height;
+	uint32_t count_fbs;
+	uint32_t count_crtcs;
+	uint32_t count_connectors;
+	uint32_t count_encoders;
+	uint32_t min_width, max_width;
+	uint32_t min_height, max_height;
 };
 
 struct drm_mode_crtc {
 	uint64_t set_connectors_ptr;
-	int count_connectors;
+	uint32_t count_connectors;
 
-	unsigned int crtc_id; /**< Id */
-	unsigned int fb_id; /**< Id of framebuffer */
+	uint32_t crtc_id; /**< Id */
+	uint32_t fb_id; /**< Id of framebuffer */
 
-	int x, y; /**< Position on the frameuffer */
+	uint32_t x, y; /**< Position on the frameuffer */
 
 	uint32_t gamma_size;
-	int mode_valid;
+	uint32_t mode_valid;
 	struct drm_mode_modeinfo mode;
 };
 
-#define DRM_MODE_ENCODER_NONE 0
-#define DRM_MODE_ENCODER_DAC  1
-#define DRM_MODE_ENCODER_TMDS 2
-#define DRM_MODE_ENCODER_LVDS 3
-#define DRM_MODE_ENCODER_TVDAC 4
+#define DRM_MODE_ENCODER_NONE	0
+#define DRM_MODE_ENCODER_DAC	1
+#define DRM_MODE_ENCODER_TMDS	2
+#define DRM_MODE_ENCODER_LVDS	3
+#define DRM_MODE_ENCODER_TVDAC	4
 
 struct drm_mode_get_encoder {
-	unsigned int encoder_id;
-	unsigned int encoder_type;
+	uint32_t encoder_id;
+	uint32_t encoder_type;
 
-	unsigned int crtc_id; /**< Id of crtc */
+	uint32_t crtc_id; /**< Id of crtc */
 
 	uint32_t possible_crtcs;
 	uint32_t possible_clones;
@@ -137,27 +137,27 @@ struct drm_mode_get_encoder {
 
 /* This is for connectors with multiple signal types. */
 /* Try to match DRM_MODE_CONNECTOR_X as closely as possible. */
-#define DRM_MODE_SUBCONNECTOR_Automatic 0
-#define DRM_MODE_SUBCONNECTOR_Unknown 0
-#define DRM_MODE_SUBCONNECTOR_DVID 3
-#define DRM_MODE_SUBCONNECTOR_DVIA 4
-#define DRM_MODE_SUBCONNECTOR_Composite 5
-#define DRM_MODE_SUBCONNECTOR_SVIDEO 6
-#define DRM_MODE_SUBCONNECTOR_Component 8
-
-#define DRM_MODE_CONNECTOR_Unknown 0
-#define DRM_MODE_CONNECTOR_VGA 1
-#define DRM_MODE_CONNECTOR_DVII 2
-#define DRM_MODE_CONNECTOR_DVID 3
-#define DRM_MODE_CONNECTOR_DVIA 4
-#define DRM_MODE_CONNECTOR_Composite 5
-#define DRM_MODE_CONNECTOR_SVIDEO 6
-#define DRM_MODE_CONNECTOR_LVDS 7
-#define DRM_MODE_CONNECTOR_Component 8
-#define DRM_MODE_CONNECTOR_9PinDIN 9
-#define DRM_MODE_CONNECTOR_DisplayPort 10
-#define DRM_MODE_CONNECTOR_HDMIA 11
-#define DRM_MODE_CONNECTOR_HDMIB 12
+#define DRM_MODE_SUBCONNECTOR_Automatic	0
+#define DRM_MODE_SUBCONNECTOR_Unknown	0
+#define DRM_MODE_SUBCONNECTOR_DVID	3
+#define DRM_MODE_SUBCONNECTOR_DVIA	4
+#define DRM_MODE_SUBCONNECTOR_Composite	5
+#define DRM_MODE_SUBCONNECTOR_SVIDEO	6
+#define DRM_MODE_SUBCONNECTOR_Component	8
+
+#define DRM_MODE_CONNECTOR_Unknown	0
+#define DRM_MODE_CONNECTOR_VGA		1
+#define DRM_MODE_CONNECTOR_DVII		2
+#define DRM_MODE_CONNECTOR_DVID		3
+#define DRM_MODE_CONNECTOR_DVIA		4
+#define DRM_MODE_CONNECTOR_Composite	5
+#define DRM_MODE_CONNECTOR_SVIDEO	6
+#define DRM_MODE_CONNECTOR_LVDS		7
+#define DRM_MODE_CONNECTOR_Component	8
+#define DRM_MODE_CONNECTOR_9PinDIN	9
+#define DRM_MODE_CONNECTOR_DisplayPort	10
+#define DRM_MODE_CONNECTOR_HDMIA	11
+#define DRM_MODE_CONNECTOR_HDMIB	12
 
 struct drm_mode_get_connector {
 
@@ -166,47 +166,47 @@ struct drm_mode_get_connector {
 	uint64_t props_ptr;
 	uint64_t prop_values_ptr;
 
-	int count_modes;
-	int count_props;
-	int count_encoders;
+	uint32_t count_modes;
+	uint32_t count_props;
+	uint32_t count_encoders;
 
-	unsigned int encoder_id; /**< Current Encoder */
-	unsigned int connector_id; /**< Id */
-	unsigned int connector_type;
-	unsigned int connector_type_id;
+	uint32_t encoder_id; /**< Current Encoder */
+	uint32_t connector_id; /**< Id */
+	uint32_t connector_type;
+	uint32_t connector_type_id;
 
-	unsigned int connection;
-	unsigned int mm_width, mm_height; /**< HxW in millimeters */
-	unsigned int subpixel;
+	uint32_t connection;
+	uint32_t mm_width, mm_height; /**< HxW in millimeters */
+	uint32_t subpixel;
 };
 
-#define DRM_MODE_PROP_PENDING (1<<0)
-#define DRM_MODE_PROP_RANGE (1<<1)
-#define DRM_MODE_PROP_IMMUTABLE (1<<2)
-#define DRM_MODE_PROP_ENUM (1<<3) /* enumerated type with text strings */
-#define DRM_MODE_PROP_BLOB (1<<4)
+#define DRM_MODE_PROP_PENDING	(1<<0)
+#define DRM_MODE_PROP_RANGE	(1<<1)
+#define DRM_MODE_PROP_IMMUTABLE	(1<<2)
+#define DRM_MODE_PROP_ENUM	(1<<3) /* enumerated type with text strings */
+#define DRM_MODE_PROP_BLOB	(1<<4)
 
 struct drm_mode_property_enum {
 	uint64_t value;
-	unsigned char name[DRM_PROP_NAME_LEN];
+	char name[DRM_PROP_NAME_LEN];
 };
 
 struct drm_mode_get_property {
 	uint64_t values_ptr; /* values and blob lengths */
 	uint64_t enum_blob_ptr; /* enum and blob id ptrs */
 
-	unsigned int prop_id;
-	unsigned int flags;
-	unsigned char name[DRM_PROP_NAME_LEN];
+	uint32_t prop_id;
+	uint32_t flags;
+	char name[DRM_PROP_NAME_LEN];
 
-	int count_values;
-	int count_enum_blobs;
+	uint32_t count_values;
+	uint32_t count_enum_blobs;
 };
 
 struct drm_mode_connector_set_property {
 	uint64_t value;
-	unsigned int prop_id;
-	unsigned int connector_id;
+	uint32_t prop_id;
+	uint32_t connector_id;
 };
 
 struct drm_mode_get_blob {
@@ -216,22 +216,22 @@ struct drm_mode_get_blob {
 };
 
 struct drm_mode_fb_cmd {
-	unsigned int buffer_id;
-	unsigned int width, height;
-	unsigned int pitch;
-	unsigned int bpp;
-	unsigned int depth;
-
-	unsigned int handle;
+	uint32_t fb_id;
+	uint32_t width, height;
+	uint32_t pitch;
+	uint32_t bpp;
+	uint32_t depth;
+	/* driver specific handle */
+	uint32_t handle;
 };
 
 struct drm_mode_mode_cmd {
-	unsigned int connector_id;
+	uint32_t connector_id;
 	struct drm_mode_modeinfo mode;
 };
 
-#define DRM_MODE_CURSOR_BO   0x01
-#define DRM_MODE_CURSOR_MOVE 0x02
+#define DRM_MODE_CURSOR_BO	(1<<0)
+#define DRM_MODE_CURSOR_MOVE	(1<<1)
 
 /*
  * depending on the value in flags diffrent members are used.
@@ -248,24 +248,17 @@ struct drm_mode_mode_cmd {
  *    y
  */
 struct drm_mode_cursor {
-	unsigned int flags;
-	unsigned int crtc;
-	int x;
-	int y;
+	uint32_t flags;
+	uint32_t crtc_id;
+	int32_t x;
+	int32_t y;
 	uint32_t width;
 	uint32_t height;
-	unsigned int handle;
-};
-
-/*
- * oh so ugly hotplug
- */
-struct drm_mode_hotplug {
-	uint32_t counter;
+	/* driver specific handle */
+	uint32_t handle;
 };
 
 struct drm_mode_crtc_lut {
-
 	uint32_t crtc_id;
 	uint32_t gamma_size;
 
-- 
cgit v1.2.3


From 0c7c26647579e9e647e8b969bb0e7b3f5f1a1492 Mon Sep 17 00:00:00 2001
From: Kristian H�gsberg <krh@redhat.com>
Date: Thu, 18 Dec 2008 13:14:37 +1000
Subject: drm: drop DRM_IOCTL_MODE_REPLACEFB, add+remove works just as well.

The replace fb ioctl replaces the backing buffer object for a modesetting
framebuffer object.  This can be acheived by just creating a new
framebuffer backed by the new buffer object, setting that for the crtcs
in question and then removing the old framebuffer object.

Signed-off-by: Kristian Hogsberg <krh@redhat.com>
Acked-by: Jakob Bornecrantz <jakob@vmware.com>
Signed-off-by: Dave Airlie <airlied@redhat.com>
---
 drivers/gpu/drm/drm_crtc.c           | 40 ------------------------------------
 drivers/gpu/drm/drm_drv.c            |  1 -
 drivers/gpu/drm/i915/intel_display.c | 31 ----------------------------
 include/drm/drm.h                    |  1 -
 include/drm/drm_crtc.h               |  1 -
 5 files changed, 74 deletions(-)

(limited to 'include')

diff --git a/drivers/gpu/drm/drm_crtc.c b/drivers/gpu/drm/drm_crtc.c
index ece947d3b25b..53c87254be4c 100644
--- a/drivers/gpu/drm/drm_crtc.c
+++ b/drivers/gpu/drm/drm_crtc.c
@@ -2302,46 +2302,6 @@ out:
 	return ret;
 }
 
-
-int drm_mode_replacefb(struct drm_device *dev,
-		       void *data, struct drm_file *file_priv)
-{
-	struct drm_mode_fb_cmd *r = data;
-	struct drm_mode_object *obj;
-	struct drm_framebuffer *fb;
-	int found = 0;
-	struct drm_framebuffer *fbl = NULL;
-	int ret = 0;
-
-	/* right replace the current bo attached to this fb with a new bo */
-	mutex_lock(&dev->mode_config.mutex);
-	obj = drm_mode_object_find(dev, r->fb_id, DRM_MODE_OBJECT_FB);
-	if (!obj) {
-		ret = -EINVAL;
-		goto out;
-	}
-	fb = obj_to_fb(obj);
-
-	list_for_each_entry(fbl, &file_priv->fbs, filp_head)
-		if (fb == fbl)
-			found = 1;
-
-	if (!found) {
-		DRM_ERROR("tried to replace an fb we didn't own\n");
-		ret = -EINVAL;
-		goto out;
-	}
-
-	if (dev->mode_config.funcs->resize_fb)
-		ret = dev->mode_config.funcs->resize_fb(dev, file_priv, fb, r);
-	else
-		ret = -EINVAL;
-out:
-	mutex_unlock(&dev->mode_config.mutex);
-	return ret;
-
-}
-
 int drm_mode_connector_attach_encoder(struct drm_connector *connector,
 				      struct drm_encoder *encoder)
 {
diff --git a/drivers/gpu/drm/drm_drv.c b/drivers/gpu/drm/drm_drv.c
index 7eb0e09fc73b..373e3de64340 100644
--- a/drivers/gpu/drm/drm_drv.c
+++ b/drivers/gpu/drm/drm_drv.c
@@ -143,7 +143,6 @@ static struct drm_ioctl_desc drm_ioctls[] = {
 	DRM_IOCTL_DEF(DRM_IOCTL_MODE_GETFB, drm_mode_getfb, DRM_MASTER|DRM_CONTROL_ALLOW),
 	DRM_IOCTL_DEF(DRM_IOCTL_MODE_ADDFB, drm_mode_addfb, DRM_MASTER|DRM_CONTROL_ALLOW),
 	DRM_IOCTL_DEF(DRM_IOCTL_MODE_RMFB, drm_mode_rmfb, DRM_MASTER|DRM_CONTROL_ALLOW),
-	DRM_IOCTL_DEF(DRM_IOCTL_MODE_REPLACEFB, drm_mode_replacefb, DRM_MASTER|DRM_ROOT_ONLY|DRM_CONTROL_ALLOW),
 };
 
 #define DRM_CORE_IOCTL_COUNT	ARRAY_SIZE( drm_ioctls )
diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c
index 96c2da5b74e3..5689e44d30b3 100644
--- a/drivers/gpu/drm/i915/intel_display.c
+++ b/drivers/gpu/drm/i915/intel_display.c
@@ -1528,38 +1528,7 @@ intel_user_framebuffer_create(struct drm_device *dev,
 	return fb;
 }
 
-static int intel_insert_new_fb(struct drm_device *dev,
-			       struct drm_file *file_priv,
-			       struct drm_framebuffer *fb,
-			       struct drm_mode_fb_cmd *mode_cmd)
-{
-	struct intel_framebuffer *intel_fb;
-	struct drm_gem_object *obj;
-	struct drm_crtc *crtc;
-
-	intel_fb = to_intel_framebuffer(fb);
-
-	obj = drm_gem_object_lookup(dev, file_priv, mode_cmd->handle);
-
-	if (!obj)
-		return -EINVAL;
-
-	intel_fb->obj = obj;
-	drm_gem_object_unreference(intel_fb->obj);
-	drm_helper_mode_fill_fb_struct(fb, mode_cmd);
-	mutex_unlock(&dev->struct_mutex);
-
-	list_for_each_entry(crtc, &dev->mode_config.crtc_list, head) {
-		if (crtc->fb == fb) {
-			struct drm_crtc_helper_funcs *crtc_funcs = crtc->helper_private;
-			crtc_funcs->mode_set_base(crtc, crtc->x, crtc->y);
-		}
-	}
-	return 0;
-}
-
 static const struct drm_mode_config_funcs intel_mode_funcs = {
-	.resize_fb = intel_insert_new_fb,
 	.fb_create = intel_user_framebuffer_create,
 	.fb_changed = intelfb_probe,
 };
diff --git a/include/drm/drm.h b/include/drm/drm.h
index 76ce6fe300b7..32e5096554e9 100644
--- a/include/drm/drm.h
+++ b/include/drm/drm.h
@@ -687,7 +687,6 @@ struct drm_gem_open {
 #define DRM_IOCTL_MODE_GETFB		DRM_IOWR(0xAD, struct drm_mode_fb_cmd)
 #define DRM_IOCTL_MODE_ADDFB		DRM_IOWR(0xAE, struct drm_mode_fb_cmd)
 #define DRM_IOCTL_MODE_RMFB		DRM_IOWR(0xAF, unsigned int)
-#define DRM_IOCTL_MODE_REPLACEFB	DRM_IOWR(0xB0, struct drm_mode_fb_cmd)
 
 /**
  * Device specific ioctls should only be in their respective headers
diff --git a/include/drm/drm_crtc.h b/include/drm/drm_crtc.h
index 395c6139c893..0acb07f31fa4 100644
--- a/include/drm/drm_crtc.h
+++ b/include/drm/drm_crtc.h
@@ -510,7 +510,6 @@ struct drm_mode_set {
  * the CRTC<->connector mappings as needed and update its view of the screen.
  */
 struct drm_mode_config_funcs {
-	int (*resize_fb)(struct drm_device *dev, struct drm_file *file_priv, struct drm_framebuffer *fb, struct drm_mode_fb_cmd *mode_cmd);
 	struct drm_framebuffer *(*fb_create)(struct drm_device *dev, struct drm_file *file_priv, struct drm_mode_fb_cmd *mode_cmd);
 	int (*fb_changed)(struct drm_device *dev);
 };
-- 
cgit v1.2.3


From dfef24592257805af0bee42dced099459c68a307 Mon Sep 17 00:00:00 2001
From: Dave Airlie <airlied@redhat.com>
Date: Fri, 19 Dec 2008 15:07:46 +1000
Subject: i915/drm: provide compat defines for userspace for certain struct
 members.

Painfully userspace started using new names that were never actually to be
used from the external repo.

Also fill out the gaps in the structure for old/new userspace compat

Add compat defines for these structs.

Signed-off-by: Dave Airlie <airlied@redhat.com>
---
 include/drm/i915_drm.h | 23 +++++++++++++++++++++++
 1 file changed, 23 insertions(+)

(limited to 'include')

diff --git a/include/drm/i915_drm.h b/include/drm/i915_drm.h
index 0e506f438f48..5e7d81d1df54 100644
--- a/include/drm/i915_drm.h
+++ b/include/drm/i915_drm.h
@@ -113,8 +113,31 @@ typedef struct _drm_i915_sarea {
 	int pipeB_y;
 	int pipeB_w;
 	int pipeB_h;
+
+	/* fill out some space for old userspace triple buffer */
+	drm_handle_t unused_handle;
+	uint32_t unused1, unused2, unused3;
+
+	/* buffer object handles for static buffers. May change
+	 * over the lifetime of the client.
+	 */
+	uint32_t front_bo_handle;
+	uint32_t back_bo_handle;
+	uint32_t unused_bo_handle;
+	uint32_t depth_bo_handle;
+
 } drm_i915_sarea_t;
 
+/* due to userspace building against these headers we need some compat here */
+#define planeA_x pipeA_x
+#define planeA_y pipeA_y
+#define planeA_w pipeA_w
+#define planeA_h pipeA_h
+#define planeB_x pipeB_x
+#define planeB_y pipeB_y
+#define planeB_w pipeB_w
+#define planeB_h pipeB_h
+
 /* Flags for perf_boxes
  */
 #define I915_BOX_RING_EMPTY    0x1
-- 
cgit v1.2.3


From 8d391aa410ecb230fc4c3147b94eec25b9f3c20f Mon Sep 17 00:00:00 2001
From: Eric Anholt <eric@anholt.net>
Date: Wed, 17 Dec 2008 22:32:14 -0800
Subject: drm/i915: Add missing userland definitions for gem init/execbuffer.

fdo bug #19132.

Signed-off-by: Eric Anholt <eric@anholt.net>
Signed-off-by: Dave Airlie <airlied@redhat.com>
---
 include/drm/i915_drm.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include')

diff --git a/include/drm/i915_drm.h b/include/drm/i915_drm.h
index 5e7d81d1df54..b3bcf72dc656 100644
--- a/include/drm/i915_drm.h
+++ b/include/drm/i915_drm.h
@@ -201,6 +201,8 @@ typedef struct _drm_i915_sarea {
 #define DRM_IOCTL_I915_SET_VBLANK_PIPE	DRM_IOW( DRM_COMMAND_BASE + DRM_I915_SET_VBLANK_PIPE, drm_i915_vblank_pipe_t)
 #define DRM_IOCTL_I915_GET_VBLANK_PIPE	DRM_IOR( DRM_COMMAND_BASE + DRM_I915_GET_VBLANK_PIPE, drm_i915_vblank_pipe_t)
 #define DRM_IOCTL_I915_VBLANK_SWAP	DRM_IOWR(DRM_COMMAND_BASE + DRM_I915_VBLANK_SWAP, drm_i915_vblank_swap_t)
+#define DRM_IOCTL_I915_GEM_INIT		DRM_IOW(DRM_COMMAND_BASE + DRM_I915_GEM_INIT, struct drm_i915_gem_init)
+#define DRM_IOCTL_I915_GEM_EXECBUFFER	DRM_IOW(DRM_COMMAND_BASE + DRM_I915_GEM_EXECBUFFER, struct drm_i915_gem_execbuffer)
 #define DRM_IOCTL_I915_GEM_PIN		DRM_IOWR(DRM_COMMAND_BASE + DRM_I915_GEM_PIN, struct drm_i915_gem_pin)
 #define DRM_IOCTL_I915_GEM_UNPIN	DRM_IOW(DRM_COMMAND_BASE + DRM_I915_GEM_UNPIN, struct drm_i915_gem_unpin)
 #define DRM_IOCTL_I915_GEM_BUSY		DRM_IOWR(DRM_COMMAND_BASE + DRM_I915_GEM_BUSY, struct drm_i915_gem_busy)
-- 
cgit v1.2.3


From 3c4fdcfb2941dc36b6a16bc509a2adb97c131716 Mon Sep 17 00:00:00 2001
From: Kristian Høgsberg <krh@redhat.com>
Date: Wed, 17 Dec 2008 22:14:46 -0500
Subject: drm: pin new and unpin old buffer when setting a mode.
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This removes the requirement for user space to pin a buffer before
setting a mode that is backed by the pixels from that buffer.

Signed-off-by: Kristian Høgsberg <krh@redhat.com>
Signed-off-by: Eric Anholt <eric@anholt.net>
Signed-off-by: Dave Airlie <airlied@linux.ie>
---
 drivers/gpu/drm/drm_crtc_helper.c    | 20 ++++++++++------
 drivers/gpu/drm/i915/intel_display.c | 44 +++++++++++++++++++++++++++++-------
 include/drm/drm_crtc_helper.h        |  9 +++++---
 3 files changed, 55 insertions(+), 18 deletions(-)

(limited to 'include')

diff --git a/drivers/gpu/drm/drm_crtc_helper.c b/drivers/gpu/drm/drm_crtc_helper.c
index 58e335967617..d8a982b71296 100644
--- a/drivers/gpu/drm/drm_crtc_helper.c
+++ b/drivers/gpu/drm/drm_crtc_helper.c
@@ -432,7 +432,8 @@ static void drm_setup_crtcs(struct drm_device *dev)
  */
 bool drm_crtc_helper_set_mode(struct drm_crtc *crtc,
 			      struct drm_display_mode *mode,
-			      int x, int y)
+			      int x, int y,
+			      struct drm_framebuffer *old_fb)
 {
 	struct drm_device *dev = crtc->dev;
 	struct drm_display_mode *adjusted_mode, saved_mode;
@@ -462,7 +463,8 @@ bool drm_crtc_helper_set_mode(struct drm_crtc *crtc,
 
 	if (drm_mode_equal(&saved_mode, &crtc->mode)) {
 		if (saved_x != crtc->x || saved_y != crtc->y) {
-			crtc_funcs->mode_set_base(crtc, crtc->x, crtc->y);
+			crtc_funcs->mode_set_base(crtc, crtc->x, crtc->y,
+						  old_fb);
 			goto done;
 		}
 	}
@@ -501,7 +503,7 @@ bool drm_crtc_helper_set_mode(struct drm_crtc *crtc,
 	/* Set up the DPLL and any encoders state that needs to adjust or depend
 	 * on the DPLL.
 	 */
-	crtc_funcs->mode_set(crtc, mode, adjusted_mode, x, y);
+	crtc_funcs->mode_set(crtc, mode, adjusted_mode, x, y, old_fb);
 
 	list_for_each_entry(encoder, &dev->mode_config.encoder_list, head) {
 
@@ -564,6 +566,7 @@ int drm_crtc_helper_set_config(struct drm_mode_set *set)
 	struct drm_device *dev;
 	struct drm_crtc **save_crtcs, *new_crtc;
 	struct drm_encoder **save_encoders, *new_encoder;
+	struct drm_framebuffer *old_fb;
 	bool save_enabled;
 	bool changed = false;
 	bool flip_or_move = false;
@@ -684,13 +687,15 @@ int drm_crtc_helper_set_config(struct drm_mode_set *set)
 		changed = true;
 
 	if (changed) {
+		old_fb = set->crtc->fb;
 		set->crtc->fb = set->fb;
 		set->crtc->enabled = (set->mode != NULL);
 		if (set->mode != NULL) {
 			DRM_DEBUG("attempting to set mode from userspace\n");
 			drm_mode_debug_printmodeline(set->mode);
 			if (!drm_crtc_helper_set_mode(set->crtc, set->mode,
-						      set->x, set->y)) {
+						      set->x, set->y,
+						      old_fb)) {
 				ret = -EINVAL;
 				goto fail_set_mode;
 			}
@@ -701,9 +706,10 @@ int drm_crtc_helper_set_config(struct drm_mode_set *set)
 		}
 		drm_helper_disable_unused_functions(dev);
 	} else if (flip_or_move) {
+		old_fb = set->crtc->fb;
 		if (set->crtc->fb != set->fb)
 			set->crtc->fb = set->fb;
-		crtc_funcs->mode_set_base(set->crtc, set->x, set->y);
+		crtc_funcs->mode_set_base(set->crtc, set->x, set->y, old_fb);
 	}
 
 	kfree(save_encoders);
@@ -809,8 +815,8 @@ int drm_helper_resume_force_mode(struct drm_device *dev)
 		if (!crtc->enabled)
 			continue;
 
-		ret = drm_crtc_helper_set_mode(crtc, &crtc->mode, crtc->x,
-					       crtc->y);
+		ret = drm_crtc_helper_set_mode(crtc, &crtc->mode,
+					       crtc->x, crtc->y, crtc->fb);
 
 		if (ret == false)
 			DRM_ERROR("failed to set mode on crtc %p\n", crtc);
diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c
index 6e8b9ab83e97..e5c1c80d1f90 100644
--- a/drivers/gpu/drm/i915/intel_display.c
+++ b/drivers/gpu/drm/i915/intel_display.c
@@ -344,7 +344,8 @@ intel_wait_for_vblank(struct drm_device *dev)
 }
 
 static void
-intel_pipe_set_base(struct drm_crtc *crtc, int x, int y)
+intel_pipe_set_base(struct drm_crtc *crtc, int x, int y,
+		    struct drm_framebuffer *old_fb)
 {
 	struct drm_device *dev = crtc->dev;
 	struct drm_i915_private *dev_priv = dev->dev_private;
@@ -359,7 +360,7 @@ intel_pipe_set_base(struct drm_crtc *crtc, int x, int y)
 	int dspsurf = (pipe == 0 ? DSPASURF : DSPBSURF);
 	int dspstride = (pipe == 0) ? DSPASTRIDE : DSPBSTRIDE;
 	int dspcntr_reg = (pipe == 0) ? DSPACNTR : DSPBCNTR;
-	u32 dspcntr;
+	u32 dspcntr, alignment;
 
 	/* no fb bound */
 	if (!crtc->fb) {
@@ -368,10 +369,32 @@ intel_pipe_set_base(struct drm_crtc *crtc, int x, int y)
 	}
 
 	intel_fb = to_intel_framebuffer(crtc->fb);
-
 	obj = intel_fb->obj;
 	obj_priv = obj->driver_private;
 
+	switch (obj_priv->tiling_mode) {
+	case I915_TILING_NONE:
+		alignment = 64 * 1024;
+		break;
+	case I915_TILING_X:
+		if (IS_I9XX(dev))
+			alignment = 1024 * 1024;
+		else
+			alignment = 512 * 1024;
+		break;
+	case I915_TILING_Y:
+		/* FIXME: Is this true? */
+		DRM_ERROR("Y tiled not allowed for scan out buffers\n");
+		return;
+	default:
+		BUG();
+	}
+
+	if (i915_gem_object_pin(intel_fb->obj, alignment))
+		return;
+
+	i915_gem_object_set_to_gtt_domain(intel_fb->obj, 1);
+
 	Start = obj_priv->gtt_offset;
 	Offset = y * crtc->fb->pitch + x * (crtc->fb->bits_per_pixel / 8);
 
@@ -409,6 +432,12 @@ intel_pipe_set_base(struct drm_crtc *crtc, int x, int y)
 		I915_READ(dspbase);
 	}
 
+	intel_wait_for_vblank(dev);
+
+	if (old_fb) {
+		intel_fb = to_intel_framebuffer(old_fb);
+		i915_gem_object_unpin(intel_fb->obj);
+	}
 
 	if (!dev->primary->master)
 		return;
@@ -680,7 +709,8 @@ static int intel_panel_fitter_pipe (struct drm_device *dev)
 static void intel_crtc_mode_set(struct drm_crtc *crtc,
 				struct drm_display_mode *mode,
 				struct drm_display_mode *adjusted_mode,
-				int x, int y)
+				int x, int y,
+				struct drm_framebuffer *old_fb)
 {
 	struct drm_device *dev = crtc->dev;
 	struct drm_i915_private *dev_priv = dev->dev_private;
@@ -915,9 +945,7 @@ static void intel_crtc_mode_set(struct drm_crtc *crtc,
 	I915_WRITE(dspcntr_reg, dspcntr);
 
 	/* Flush the plane changes */
-	intel_pipe_set_base(crtc, x, y);
-
-	intel_wait_for_vblank(dev);
+	intel_pipe_set_base(crtc, x, y, old_fb);
 
 	drm_vblank_post_modeset(dev, pipe);
 }
@@ -1153,7 +1181,7 @@ struct drm_crtc *intel_get_load_detect_pipe(struct intel_output *intel_output,
 	if (!crtc->enabled) {
 		if (!mode)
 			mode = &load_detect_mode;
-		drm_crtc_helper_set_mode(crtc, mode, 0, 0);
+		drm_crtc_helper_set_mode(crtc, mode, 0, 0, crtc->fb);
 	} else {
 		if (intel_crtc->dpms_mode != DRM_MODE_DPMS_ON) {
 			crtc_funcs = crtc->helper_private;
diff --git a/include/drm/drm_crtc_helper.h b/include/drm/drm_crtc_helper.h
index a341828d1d15..4bc04cf460a7 100644
--- a/include/drm/drm_crtc_helper.h
+++ b/include/drm/drm_crtc_helper.h
@@ -55,10 +55,12 @@ struct drm_crtc_helper_funcs {
 			   struct drm_display_mode *adjusted_mode);
 	/* Actually set the mode */
 	void (*mode_set)(struct drm_crtc *crtc, struct drm_display_mode *mode,
-			 struct drm_display_mode *adjusted_mode, int x, int y);
+			 struct drm_display_mode *adjusted_mode, int x, int y,
+			 struct drm_framebuffer *old_fb);
 
 	/* Move the crtc on the current fb to the given position *optional* */
-	void (*mode_set_base)(struct drm_crtc *crtc, int x, int y);
+	void (*mode_set_base)(struct drm_crtc *crtc, int x, int y,
+			      struct drm_framebuffer *old_fb);
 };
 
 struct drm_encoder_helper_funcs {
@@ -93,7 +95,8 @@ extern bool drm_helper_initial_config(struct drm_device *dev, bool can_grow);
 extern int drm_crtc_helper_set_config(struct drm_mode_set *set);
 extern bool drm_crtc_helper_set_mode(struct drm_crtc *crtc,
 				     struct drm_display_mode *mode,
-				     int x, int y);
+				     int x, int y,
+				     struct drm_framebuffer *old_fb);
 extern bool drm_helper_crtc_in_use(struct drm_crtc *crtc);
 
 extern int drm_helper_mode_fill_fb_struct(struct drm_framebuffer *fb,
-- 
cgit v1.2.3


From fede5c91c4a8a7701d205b2b84b9835ddc7d6f02 Mon Sep 17 00:00:00 2001
From: Eric Anholt <eric@anholt.net>
Date: Fri, 19 Dec 2008 17:23:38 -0800
Subject: drm: Add a debug node for vblank state.

Signed-off-by: Eric Anholt <eric@anholt.net>
Signed-off-by: Dave Airlie <airlied@linux.ie>
---
 drivers/gpu/drm/drm_irq.c  |  9 +++++++
 drivers/gpu/drm/drm_proc.c | 63 ++++++++++++++++++++++++++++++++++++++++++++++
 include/drm/drmP.h         |  1 +
 3 files changed, 73 insertions(+)

(limited to 'include')

diff --git a/drivers/gpu/drm/drm_irq.c b/drivers/gpu/drm/drm_irq.c
index 1608f8dbfda0..724e505873cf 100644
--- a/drivers/gpu/drm/drm_irq.c
+++ b/drivers/gpu/drm/drm_irq.c
@@ -116,6 +116,9 @@ void drm_vblank_cleanup(struct drm_device *dev)
 		 dev->num_crtcs, DRM_MEM_DRIVER);
 	drm_free(dev->last_vblank, sizeof(*dev->last_vblank) * dev->num_crtcs,
 		 DRM_MEM_DRIVER);
+	drm_free(dev->last_vblank_wait,
+		 sizeof(*dev->last_vblank_wait) * dev->num_crtcs,
+		 DRM_MEM_DRIVER);
 	drm_free(dev->vblank_inmodeset, sizeof(*dev->vblank_inmodeset) *
 		 dev->num_crtcs, DRM_MEM_DRIVER);
 
@@ -161,6 +164,11 @@ int drm_vblank_init(struct drm_device *dev, int num_crtcs)
 	if (!dev->last_vblank)
 		goto err;
 
+	dev->last_vblank_wait = drm_calloc(num_crtcs, sizeof(u32),
+					   DRM_MEM_DRIVER);
+	if (!dev->last_vblank_wait)
+		goto err;
+
 	dev->vblank_inmodeset = drm_calloc(num_crtcs, sizeof(int),
 					 DRM_MEM_DRIVER);
 	if (!dev->vblank_inmodeset)
@@ -642,6 +650,7 @@ int drm_wait_vblank(struct drm_device *dev, void *data,
 	} else {
 		DRM_DEBUG("waiting on vblank count %d, crtc %d\n",
 			  vblwait->request.sequence, crtc);
+		dev->last_vblank_wait[crtc] = vblwait->request.sequence;
 		DRM_WAIT_ON(ret, dev->vbl_queue[crtc], 3 * DRM_HZ,
 			    ((drm_vblank_count(dev, crtc)
 			      - vblwait->request.sequence) <= (1 << 23)));
diff --git a/drivers/gpu/drm/drm_proc.c b/drivers/gpu/drm/drm_proc.c
index 7dbaa1a19ea4..8df849f66830 100644
--- a/drivers/gpu/drm/drm_proc.c
+++ b/drivers/gpu/drm/drm_proc.c
@@ -49,6 +49,8 @@ static int drm_queues_info(char *buf, char **start, off_t offset,
 			   int request, int *eof, void *data);
 static int drm_bufs_info(char *buf, char **start, off_t offset,
 			 int request, int *eof, void *data);
+static int drm_vblank_info(char *buf, char **start, off_t offset,
+			   int request, int *eof, void *data);
 static int drm_gem_name_info(char *buf, char **start, off_t offset,
 			     int request, int *eof, void *data);
 static int drm_gem_object_info(char *buf, char **start, off_t offset,
@@ -72,6 +74,7 @@ static struct drm_proc_list {
 	{"clients", drm_clients_info, 0},
 	{"queues", drm_queues_info, 0},
 	{"bufs", drm_bufs_info, 0},
+	{"vblank", drm_vblank_info, 0},
 	{"gem_names", drm_gem_name_info, DRIVER_GEM},
 	{"gem_objects", drm_gem_object_info, DRIVER_GEM},
 #if DRM_DEBUG_CODE
@@ -457,6 +460,66 @@ static int drm_bufs_info(char *buf, char **start, off_t offset, int request,
 	return ret;
 }
 
+/**
+ * Called when "/proc/dri/.../vblank" is read.
+ *
+ * \param buf output buffer.
+ * \param start start of output data.
+ * \param offset requested start offset.
+ * \param request requested number of bytes.
+ * \param eof whether there is no more data to return.
+ * \param data private data.
+ * \return number of written bytes.
+ */
+static int drm__vblank_info(char *buf, char **start, off_t offset, int request,
+			  int *eof, void *data)
+{
+	struct drm_minor *minor = (struct drm_minor *) data;
+	struct drm_device *dev = minor->dev;
+	int len = 0;
+	int crtc;
+
+	if (offset > DRM_PROC_LIMIT) {
+		*eof = 1;
+		return 0;
+	}
+
+	*start = &buf[offset];
+	*eof = 0;
+
+	for (crtc = 0; crtc < dev->num_crtcs; crtc++) {
+		DRM_PROC_PRINT("CRTC %d enable:     %d\n",
+			       crtc, atomic_read(&dev->vblank_refcount[crtc]));
+		DRM_PROC_PRINT("CRTC %d counter:    %d\n",
+			       crtc, drm_vblank_count(dev, crtc));
+		DRM_PROC_PRINT("CRTC %d last wait:  %d\n",
+			       crtc, dev->last_vblank_wait[crtc]);
+		DRM_PROC_PRINT("CRTC %d in modeset: %d\n",
+			       crtc, dev->vblank_inmodeset[crtc]);
+	}
+
+	if (len > request + offset)
+		return request;
+	*eof = 1;
+	return len - offset;
+}
+
+/**
+ * Simply calls _vblank_info() while holding the drm_device::struct_mutex lock.
+ */
+static int drm_vblank_info(char *buf, char **start, off_t offset, int request,
+			 int *eof, void *data)
+{
+	struct drm_minor *minor = (struct drm_minor *) data;
+	struct drm_device *dev = minor->dev;
+	int ret;
+
+	mutex_lock(&dev->struct_mutex);
+	ret = drm__vblank_info(buf, start, offset, request, eof, data);
+	mutex_unlock(&dev->struct_mutex);
+	return ret;
+}
+
 /**
  * Called when "/proc/dri/.../clients" is read.
  *
diff --git a/include/drm/drmP.h b/include/drm/drmP.h
index 7802c80f2b23..afb7858c068d 100644
--- a/include/drm/drmP.h
+++ b/include/drm/drmP.h
@@ -911,6 +911,7 @@ struct drm_device {
 	int *vblank_enabled;            /* so we don't call enable more than
 					   once per disable */
 	int *vblank_inmodeset;          /* Display driver is setting mode */
+	u32 *last_vblank_wait;		/* Last vblank seqno waited per CRTC */
 	struct timer_list vblank_disable_timer;
 
 	u32 max_vblank_count;           /**< size of vblank counter register */
-- 
cgit v1.2.3


From 773ff60e841461cb1f9374a713ffcda029b8c317 Mon Sep 17 00:00:00 2001
From: Akinobu Mita <akinobu.mita@gmail.com>
Date: Tue, 23 Dec 2008 19:37:01 +0900
Subject: SLUB: failslab support

Currently fault-injection capability for SLAB allocator is only
available to SLAB. This patch makes it available to SLUB, too.

[penberg@cs.helsinki.fi: unify slab and slub implementations]
Cc: Christoph Lameter <cl@linux-foundation.org>
Cc: Matt Mackall <mpm@selenic.com>
Signed-off-by: Akinobu Mita <akinobu.mita@gmail.com>
Signed-off-by: Pekka Enberg <penberg@cs.helsinki.fi>
---
 include/linux/fault-inject.h |  9 ++++++
 lib/Kconfig.debug            |  1 +
 mm/Makefile                  |  1 +
 mm/failslab.c                | 59 ++++++++++++++++++++++++++++++++++
 mm/slab.c                    | 75 +++-----------------------------------------
 mm/slub.c                    |  4 +++
 6 files changed, 79 insertions(+), 70 deletions(-)
 create mode 100644 mm/failslab.c

(limited to 'include')

diff --git a/include/linux/fault-inject.h b/include/linux/fault-inject.h
index 32368c4f0326..06ca9b21dad2 100644
--- a/include/linux/fault-inject.h
+++ b/include/linux/fault-inject.h
@@ -81,4 +81,13 @@ static inline void cleanup_fault_attr_dentries(struct fault_attr *attr)
 
 #endif /* CONFIG_FAULT_INJECTION */
 
+#ifdef CONFIG_FAILSLAB
+extern bool should_failslab(size_t size, gfp_t gfpflags);
+#else
+static inline bool should_failslab(size_t size, gfp_t gfpflags)
+{
+	return false;
+}
+#endif /* CONFIG_FAILSLAB */
+
 #endif /* _LINUX_FAULT_INJECT_H */
diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
index b0f239e443bc..af65ae7f0549 100644
--- a/lib/Kconfig.debug
+++ b/lib/Kconfig.debug
@@ -699,6 +699,7 @@ config FAULT_INJECTION
 config FAILSLAB
 	bool "Fault-injection capability for kmalloc"
 	depends on FAULT_INJECTION
+	depends on SLAB || SLUB
 	help
 	  Provide fault-injection capability for kmalloc.
 
diff --git a/mm/Makefile b/mm/Makefile
index c06b45a1ff5f..51c27709cc7c 100644
--- a/mm/Makefile
+++ b/mm/Makefile
@@ -28,6 +28,7 @@ obj-$(CONFIG_SLOB) += slob.o
 obj-$(CONFIG_MMU_NOTIFIER) += mmu_notifier.o
 obj-$(CONFIG_SLAB) += slab.o
 obj-$(CONFIG_SLUB) += slub.o
+obj-$(CONFIG_FAILSLAB) += failslab.o
 obj-$(CONFIG_MEMORY_HOTPLUG) += memory_hotplug.o
 obj-$(CONFIG_FS_XIP) += filemap_xip.o
 obj-$(CONFIG_MIGRATION) += migrate.o
diff --git a/mm/failslab.c b/mm/failslab.c
new file mode 100644
index 000000000000..7c6ea6493f80
--- /dev/null
+++ b/mm/failslab.c
@@ -0,0 +1,59 @@
+#include <linux/fault-inject.h>
+
+static struct {
+	struct fault_attr attr;
+	u32 ignore_gfp_wait;
+#ifdef CONFIG_FAULT_INJECTION_DEBUG_FS
+	struct dentry *ignore_gfp_wait_file;
+#endif
+} failslab = {
+	.attr = FAULT_ATTR_INITIALIZER,
+	.ignore_gfp_wait = 1,
+};
+
+bool should_failslab(size_t size, gfp_t gfpflags)
+{
+	if (gfpflags & __GFP_NOFAIL)
+		return false;
+
+        if (failslab.ignore_gfp_wait && (gfpflags & __GFP_WAIT))
+		return false;
+
+	return should_fail(&failslab.attr, size);
+}
+
+static int __init setup_failslab(char *str)
+{
+	return setup_fault_attr(&failslab.attr, str);
+}
+__setup("failslab=", setup_failslab);
+
+#ifdef CONFIG_FAULT_INJECTION_DEBUG_FS
+
+static int __init failslab_debugfs_init(void)
+{
+	mode_t mode = S_IFREG | S_IRUSR | S_IWUSR;
+	struct dentry *dir;
+	int err;
+
+	err = init_fault_attr_dentries(&failslab.attr, "failslab");
+	if (err)
+		return err;
+	dir = failslab.attr.dentries.dir;
+
+	failslab.ignore_gfp_wait_file =
+		debugfs_create_bool("ignore-gfp-wait", mode, dir,
+				      &failslab.ignore_gfp_wait);
+
+	if (!failslab.ignore_gfp_wait_file) {
+		err = -ENOMEM;
+		debugfs_remove(failslab.ignore_gfp_wait_file);
+		cleanup_fault_attr_dentries(&failslab.attr);
+	}
+
+	return err;
+}
+
+late_initcall(failslab_debugfs_init);
+
+#endif /* CONFIG_FAULT_INJECTION_DEBUG_FS */
diff --git a/mm/slab.c b/mm/slab.c
index 09187517f9dc..c347dd8480cc 100644
--- a/mm/slab.c
+++ b/mm/slab.c
@@ -3106,79 +3106,14 @@ static void *cache_alloc_debugcheck_after(struct kmem_cache *cachep,
 #define cache_alloc_debugcheck_after(a,b,objp,d) (objp)
 #endif
 
-#ifdef CONFIG_FAILSLAB
-
-static struct failslab_attr {
-
-	struct fault_attr attr;
-
-	u32 ignore_gfp_wait;
-#ifdef CONFIG_FAULT_INJECTION_DEBUG_FS
-	struct dentry *ignore_gfp_wait_file;
-#endif
-
-} failslab = {
-	.attr = FAULT_ATTR_INITIALIZER,
-	.ignore_gfp_wait = 1,
-};
-
-static int __init setup_failslab(char *str)
-{
-	return setup_fault_attr(&failslab.attr, str);
-}
-__setup("failslab=", setup_failslab);
-
-static int should_failslab(struct kmem_cache *cachep, gfp_t flags)
+static bool slab_should_failslab(struct kmem_cache *cachep, gfp_t flags)
 {
 	if (cachep == &cache_cache)
-		return 0;
-	if (flags & __GFP_NOFAIL)
-		return 0;
-	if (failslab.ignore_gfp_wait && (flags & __GFP_WAIT))
-		return 0;
+		return false;
 
-	return should_fail(&failslab.attr, obj_size(cachep));
+	return should_failslab(obj_size(cachep), flags);
 }
 
-#ifdef CONFIG_FAULT_INJECTION_DEBUG_FS
-
-static int __init failslab_debugfs(void)
-{
-	mode_t mode = S_IFREG | S_IRUSR | S_IWUSR;
-	struct dentry *dir;
-	int err;
-
-	err = init_fault_attr_dentries(&failslab.attr, "failslab");
-	if (err)
-		return err;
-	dir = failslab.attr.dentries.dir;
-
-	failslab.ignore_gfp_wait_file =
-		debugfs_create_bool("ignore-gfp-wait", mode, dir,
-				      &failslab.ignore_gfp_wait);
-
-	if (!failslab.ignore_gfp_wait_file) {
-		err = -ENOMEM;
-		debugfs_remove(failslab.ignore_gfp_wait_file);
-		cleanup_fault_attr_dentries(&failslab.attr);
-	}
-
-	return err;
-}
-
-late_initcall(failslab_debugfs);
-
-#endif /* CONFIG_FAULT_INJECTION_DEBUG_FS */
-
-#else /* CONFIG_FAILSLAB */
-
-static inline int should_failslab(struct kmem_cache *cachep, gfp_t flags)
-{
-	return 0;
-}
-
-#endif /* CONFIG_FAILSLAB */
-
 static inline void *____cache_alloc(struct kmem_cache *cachep, gfp_t flags)
 {
 	void *objp;
@@ -3381,7 +3316,7 @@ __cache_alloc_node(struct kmem_cache *cachep, gfp_t flags, int nodeid,
 	unsigned long save_flags;
 	void *ptr;
 
-	if (should_failslab(cachep, flags))
+	if (slab_should_failslab(cachep, flags))
 		return NULL;
 
 	cache_alloc_debugcheck_before(cachep, flags);
@@ -3457,7 +3392,7 @@ __cache_alloc(struct kmem_cache *cachep, gfp_t flags, void *caller)
 	unsigned long save_flags;
 	void *objp;
 
-	if (should_failslab(cachep, flags))
+	if (slab_should_failslab(cachep, flags))
 		return NULL;
 
 	cache_alloc_debugcheck_before(cachep, flags);
diff --git a/mm/slub.c b/mm/slub.c
index a2cd47d89e0a..640fde7e354c 100644
--- a/mm/slub.c
+++ b/mm/slub.c
@@ -24,6 +24,7 @@
 #include <linux/kallsyms.h>
 #include <linux/memory.h>
 #include <linux/math64.h>
+#include <linux/fault-inject.h>
 
 /*
  * Lock order:
@@ -1591,6 +1592,9 @@ static __always_inline void *slab_alloc(struct kmem_cache *s,
 	unsigned long flags;
 	unsigned int objsize;
 
+	if (should_failslab(s->objsize, gfpflags))
+		return NULL;
+
 	local_irq_save(flags);
 	c = get_cpu_slab(s, smp_processor_id());
 	objsize = c->objsize;
-- 
cgit v1.2.3


From dfcd3610289132a762b7dc0eaf33998262cd9e20 Mon Sep 17 00:00:00 2001
From: Pascal Terjan <pterjan@mandriva.com>
Date: Tue, 25 Nov 2008 15:08:19 +0100
Subject: slab: Fix comment on #endif

This #endif in slab.h is described as closing the inner block while it's for
the big CONFIG_NUMA one. That makes reading the code a bit harder.

This trivial patch fixes the comment.

Signed-off-by: Pascal Terjan <pterjan@mandriva.com>
Signed-off-by: Pekka Enberg <penberg@cs.helsinki.fi>
---
 include/linux/slab.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/slab.h b/include/linux/slab.h
index 000da12b5cf0..9d8ca14be3c4 100644
--- a/include/linux/slab.h
+++ b/include/linux/slab.h
@@ -285,7 +285,7 @@ extern void *__kmalloc_node_track_caller(size_t, gfp_t, int, void *);
 #define kmalloc_node_track_caller(size, flags, node) \
 	kmalloc_track_caller(size, flags)
 
-#endif /* DEBUG_SLAB */
+#endif /* CONFIG_NUMA */
 
 /*
  * Shortcuts
-- 
cgit v1.2.3


From 43a256322ac1fc105c181b3cade3b9bfc0b63ca1 Mon Sep 17 00:00:00 2001
From: Yinghai Lu <yhlu.kernel@gmail.com>
Date: Sun, 28 Dec 2008 16:01:13 -0800
Subject: sparseirq: move __weak symbols into separate compilation unit

GCC has a bug with __weak alias functions: if the functions are in
the same compilation unit as their call site, GCC can decide to
inline them - and thus rob the linker of the opportunity to override
the weak alias with the real thing.

So move all the IRQ handling related __weak symbols to kernel/irq/chip.c.

Signed-off-by: Yinghai Lu <yinghai@kernel.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/interrupt.h |  6 ++++++
 include/linux/irq.h       |  3 ---
 init/main.c               | 10 ----------
 kernel/irq/manage.c       |  9 ---------
 kernel/softirq.c          | 20 ++++++++++++++++++++
 5 files changed, 26 insertions(+), 22 deletions(-)

(limited to 'include')

diff --git a/include/linux/interrupt.h b/include/linux/interrupt.h
index 777f89e00b4a..d9a370325ae2 100644
--- a/include/linux/interrupt.h
+++ b/include/linux/interrupt.h
@@ -467,4 +467,10 @@ static inline void init_irq_proc(void)
 
 int show_interrupts(struct seq_file *p, void *v);
 
+struct irq_desc;
+
+extern int early_irq_init(void);
+extern int arch_early_irq_init(void);
+extern int arch_init_chip_data(struct irq_desc *desc, int cpu);
+
 #endif
diff --git a/include/linux/irq.h b/include/linux/irq.h
index 0e40af4bac40..d64a6d49bdef 100644
--- a/include/linux/irq.h
+++ b/include/linux/irq.h
@@ -193,9 +193,6 @@ struct irq_desc {
 	const char		*name;
 } ____cacheline_internodealigned_in_smp;
 
-extern int early_irq_init(void);
-extern int arch_early_irq_init(void);
-extern int arch_init_chip_data(struct irq_desc *desc, int cpu);
 extern void arch_init_copy_chip_data(struct irq_desc *old_desc,
 					struct irq_desc *desc, int cpu);
 extern void arch_free_chip_data(struct irq_desc *old_desc, struct irq_desc *desc);
diff --git a/init/main.c b/init/main.c
index c314aa15370e..2c183abbf61c 100644
--- a/init/main.c
+++ b/init/main.c
@@ -539,16 +539,6 @@ void __init __weak thread_info_cache_init(void)
 {
 }
 
-int __init __weak arch_early_irq_init(void)
-{
-	return 0;
-}
-
-int __init __weak early_irq_init(void)
-{
-	return arch_early_irq_init();
-}
-
 asmlinkage void __init start_kernel(void)
 {
 	char * command_line;
diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c
index c2741b02ad38..46953a06f4a8 100644
--- a/kernel/irq/manage.c
+++ b/kernel/irq/manage.c
@@ -261,15 +261,6 @@ void enable_irq(unsigned int irq)
 }
 EXPORT_SYMBOL(enable_irq);
 
-/*
- * [ Not in kernel/irq/handle.c, so that GCC does not
- *   inline the __weak alias: ]
- */
-int __weak arch_init_chip_data(struct irq_desc *desc, int cpu)
-{
-	return 0;
-}
-
 static int set_irq_wake_real(unsigned int irq, unsigned int on)
 {
 	struct irq_desc *desc = irq_to_desc(irq);
diff --git a/kernel/softirq.c b/kernel/softirq.c
index e7c69a720d69..daf46358d2dd 100644
--- a/kernel/softirq.c
+++ b/kernel/softirq.c
@@ -797,3 +797,23 @@ int on_each_cpu(void (*func) (void *info), void *info, int wait)
 }
 EXPORT_SYMBOL(on_each_cpu);
 #endif
+
+/*
+ * [ These __weak aliases are kept in a separate compilation unit, so that
+ *   GCC does not inline them incorrectly. ]
+ */
+
+int __init __weak early_irq_init(void)
+{
+	return 0;
+}
+
+int __init __weak arch_early_irq_init(void)
+{
+	return 0;
+}
+
+int __weak arch_init_chip_data(struct irq_desc *desc, int cpu)
+{
+	return 0;
+}
-- 
cgit v1.2.3


From d61c72e52b98411d1cfef1fdb3f5a8bb070f8966 Mon Sep 17 00:00:00 2001
From: Jiri Slaby <jirislaby@gmail.com>
Date: Wed, 10 Dec 2008 14:07:21 +0100
Subject: DMI: add dmi_match

Add a wrapper for testing system_info which will handle also NULL
system infos.

This will be used by the ata PIIX driver.

Signed-off-by: Jiri Slaby <jirislaby@gmail.com>
Cc: Alexandru Romanescu <a_romanescu@yahoo.co.uk>
Cc: Tejun Heo <tj@kernel.org>
Cc: Alan Cox <alan@lxorguk.ukuu.org.uk>
Signed-off-by: Jeff Garzik <jgarzik@redhat.com>
---
 drivers/firmware/dmi_scan.c | 16 ++++++++++++++++
 include/linux/dmi.h         |  3 +++
 2 files changed, 19 insertions(+)

(limited to 'include')

diff --git a/drivers/firmware/dmi_scan.c b/drivers/firmware/dmi_scan.c
index 4a597d8c2f70..78b989d202a3 100644
--- a/drivers/firmware/dmi_scan.c
+++ b/drivers/firmware/dmi_scan.c
@@ -582,3 +582,19 @@ int dmi_walk(void (*decode)(const struct dmi_header *))
 	return 0;
 }
 EXPORT_SYMBOL_GPL(dmi_walk);
+
+/**
+ * dmi_match - compare a string to the dmi field (if exists)
+ *
+ * Returns true if the requested field equals to the str (including NULL).
+ */
+bool dmi_match(enum dmi_field f, const char *str)
+{
+	const char *info = dmi_get_system_info(f);
+
+	if (info == NULL || str == NULL)
+		return info == str;
+
+	return !strcmp(info, str);
+}
+EXPORT_SYMBOL_GPL(dmi_match);
diff --git a/include/linux/dmi.h b/include/linux/dmi.h
index 2bfda178f274..34161907b2f8 100644
--- a/include/linux/dmi.h
+++ b/include/linux/dmi.h
@@ -47,6 +47,7 @@ extern int dmi_name_in_vendors(const char *str);
 extern int dmi_name_in_serial(const char *str);
 extern int dmi_available;
 extern int dmi_walk(void (*decode)(const struct dmi_header *));
+extern bool dmi_match(enum dmi_field f, const char *str);
 
 #else
 
@@ -61,6 +62,8 @@ static inline int dmi_name_in_serial(const char *s) { return 0; }
 #define dmi_available 0
 static inline int dmi_walk(void (*decode)(const struct dmi_header *))
 	{ return -1; }
+static inline bool dmi_match(enum dmi_field f, const char *str)
+	{ return false; }
 
 #endif
 
-- 
cgit v1.2.3


From b29f841378460c37e99a2398d0015d9bd7901a09 Mon Sep 17 00:00:00 2001
From: James Bottomley <James.Bottomley@HansenPartnership.com>
Date: Sun, 30 Nov 2008 10:39:01 -0600
Subject: [SCSI] remove timeout from struct scsi_device

by removing the unused timeout parameter we ensure a compile failure if
anyone is accidentally still using it rather than the block timeout.

Signed-off-by: James Bottomley <James.Bottomley@HansenPartnership.com>
---
 include/scsi/scsi_device.h | 2 --
 1 file changed, 2 deletions(-)

(limited to 'include')

diff --git a/include/scsi/scsi_device.h b/include/scsi/scsi_device.h
index a37a8148a310..2a1c1419994d 100644
--- a/include/scsi/scsi_device.h
+++ b/include/scsi/scsi_device.h
@@ -159,8 +159,6 @@ struct scsi_device {
 	atomic_t iodone_cnt;
 	atomic_t ioerr_cnt;
 
-	int timeout;
-
 	struct device		sdev_gendev,
 				sdev_dev;
 
-- 
cgit v1.2.3


From 63c62f1cb980241513c82cacd5b9f878527c6647 Mon Sep 17 00:00:00 2001
From: Mike Christie <michaelc@cs.wisc.edu>
Date: Tue, 2 Dec 2008 00:32:04 -0600
Subject: [SCSI] iscsi_tcp: prepare helpers for LLDs that can offload some
 operations

cxgb3i is unlike qla4xxx and bnx2i in that it does not offload entire
scsi commands or iscsi sequences. Instead it only offloads the transfer
of a ISCSI DATA_IN pdu's data, the digests and padding. This patch fixes up the
iscsi tcp recv path so that it exports its skb recv processing so
cxgb3i and other drivers can call them. All they have to do is pass
the function the skb with the hdr or data pdu header and this function
will do the rest.

Signed-off-by: Mike Christie <michaelc@cs.wisc.edu>
Signed-off-by: James Bottomley <James.Bottomley@HansenPartnership.com>
---
 drivers/scsi/iscsi_tcp.c | 120 +++++++++++++++++++++++++++++++++++------------
 include/scsi/iscsi_if.h  |   5 +-
 2 files changed, 92 insertions(+), 33 deletions(-)

(limited to 'include')

diff --git a/drivers/scsi/iscsi_tcp.c b/drivers/scsi/iscsi_tcp.c
index e11bce6ab63c..464de780d953 100644
--- a/drivers/scsi/iscsi_tcp.c
+++ b/drivers/scsi/iscsi_tcp.c
@@ -486,7 +486,8 @@ iscsi_tcp_data_recv_prep(struct iscsi_tcp_conn *tcp_conn)
 	struct iscsi_conn *conn = tcp_conn->iscsi_conn;
 	struct hash_desc *rx_hash = NULL;
 
-	if (conn->datadgst_en)
+	if (conn->datadgst_en &
+	    !(conn->session->tt->caps & CAP_DIGEST_OFFLOAD))
 		rx_hash = &tcp_conn->rx_hash;
 
 	iscsi_segment_init_linear(&tcp_conn->in.segment,
@@ -774,7 +775,8 @@ iscsi_tcp_hdr_dissect(struct iscsi_conn *conn, struct iscsi_hdr *hdr)
 			 * we move on to the next scatterlist entry and
 			 * update the digest per-entry.
 			 */
-			if (conn->datadgst_en)
+			if (conn->datadgst_en &&
+			    !(conn->session->tt->caps & CAP_DIGEST_OFFLOAD))
 				rx_hash = &tcp_conn->rx_hash;
 
 			debug_tcp("iscsi_tcp_begin_data_in(%p, offset=%d, "
@@ -902,34 +904,52 @@ iscsi_tcp_hdr_recv_done(struct iscsi_tcp_conn *tcp_conn,
 	 * and go back for more. */
 	if (conn->hdrdgst_en) {
 		if (segment->digest_len == 0) {
+			/*
+			 * Even if we offload the digest processing we
+			 * splice it in so we can increment the skb/segment
+			 * counters in preparation for the data segment.
+			 */
 			iscsi_tcp_segment_splice_digest(segment,
 							segment->recv_digest);
 			return 0;
 		}
-		iscsi_tcp_dgst_header(&tcp_conn->rx_hash, hdr,
-				      segment->total_copied - ISCSI_DIGEST_SIZE,
-				      segment->digest);
 
-		if (!iscsi_tcp_dgst_verify(tcp_conn, segment))
-			return ISCSI_ERR_HDR_DGST;
+		if (!(conn->session->tt->caps & CAP_DIGEST_OFFLOAD)) {
+			iscsi_tcp_dgst_header(&tcp_conn->rx_hash, hdr,
+				segment->total_copied - ISCSI_DIGEST_SIZE,
+				segment->digest);
+
+			if (!iscsi_tcp_dgst_verify(tcp_conn, segment))
+				return ISCSI_ERR_HDR_DGST;
+		}
 	}
 
 	tcp_conn->in.hdr = hdr;
 	return iscsi_tcp_hdr_dissect(conn, hdr);
 }
 
+inline int iscsi_tcp_recv_segment_is_hdr(struct iscsi_tcp_conn *tcp_conn)
+{
+	return tcp_conn->in.segment.done == iscsi_tcp_hdr_recv_done;
+}
+
+enum {
+	ISCSI_TCP_SEGMENT_DONE,		/* curr seg has been processed */
+	ISCSI_TCP_SKB_DONE,		/* skb is out of data */
+	ISCSI_TCP_CONN_ERR,		/* iscsi layer has fired a conn err */
+	ISCSI_TCP_SUSPENDED,		/* conn is suspended */
+};
+
 /**
- * iscsi_tcp_recv - TCP receive in sendfile fashion
- * @rd_desc: read descriptor
- * @skb: socket buffer
+ * iscsi_tcp_recv_skb - Process skb
+ * @conn: iscsi connection
+ * @skb: network buffer with header and/or data segment
  * @offset: offset in skb
- * @len: skb->len - offset
- **/
-static int
-iscsi_tcp_recv(read_descriptor_t *rd_desc, struct sk_buff *skb,
-	       unsigned int offset, size_t len)
+ * @offload: bool indicating if transfer was offloaded
+ */
+int iscsi_tcp_recv_skb(struct iscsi_conn *conn, struct sk_buff *skb,
+		       unsigned int offset, bool offloaded, int *status)
 {
-	struct iscsi_conn *conn = rd_desc->arg.data;
 	struct iscsi_tcp_conn *tcp_conn = conn->dd_data;
 	struct iscsi_segment *segment = &tcp_conn->in.segment;
 	struct skb_seq_state seq;
@@ -940,9 +960,15 @@ iscsi_tcp_recv(read_descriptor_t *rd_desc, struct sk_buff *skb,
 
 	if (unlikely(conn->suspend_rx)) {
 		debug_tcp("conn %d Rx suspended!\n", conn->id);
+		*status = ISCSI_TCP_SUSPENDED;
 		return 0;
 	}
 
+	if (offloaded) {
+		segment->total_copied = segment->total_size;
+		goto segment_done;
+	}
+
 	skb_prepare_seq_read(skb, offset, skb->len, &seq);
 	while (1) {
 		unsigned int avail;
@@ -952,7 +978,9 @@ iscsi_tcp_recv(read_descriptor_t *rd_desc, struct sk_buff *skb,
 		if (avail == 0) {
 			debug_tcp("no more data avail. Consumed %d\n",
 				  consumed);
-			break;
+			*status = ISCSI_TCP_SKB_DONE;
+			skb_abort_seq_read(&seq);
+			goto skb_done;
 		}
 		BUG_ON(segment->copied >= segment->size);
 
@@ -962,25 +990,55 @@ iscsi_tcp_recv(read_descriptor_t *rd_desc, struct sk_buff *skb,
 		consumed += rc;
 
 		if (segment->total_copied >= segment->total_size) {
-			debug_tcp("segment done\n");
-			rc = segment->done(tcp_conn, segment);
-			if (rc != 0) {
-				skb_abort_seq_read(&seq);
-				goto error;
-			}
-
-			/* The done() functions sets up the
-			 * next segment. */
+			skb_abort_seq_read(&seq);
+			goto segment_done;
 		}
 	}
-	skb_abort_seq_read(&seq);
+
+segment_done:
+	*status = ISCSI_TCP_SEGMENT_DONE;
+	debug_tcp("segment done\n");
+	rc = segment->done(tcp_conn, segment);
+	if (rc != 0) {
+		*status = ISCSI_TCP_CONN_ERR;
+		debug_tcp("Error receiving PDU, errno=%d\n", rc);
+		iscsi_conn_failure(conn, rc);
+		return 0;
+	}
+	/* The done() functions sets up the next segment. */
+
+skb_done:
 	conn->rxdata_octets += consumed;
 	return consumed;
+}
+EXPORT_SYMBOL_GPL(iscsi_tcp_recv_skb);
 
-error:
-	debug_tcp("Error receiving PDU, errno=%d\n", rc);
-	iscsi_conn_failure(conn, rc);
-	return 0;
+/**
+ * iscsi_tcp_recv - TCP receive in sendfile fashion
+ * @rd_desc: read descriptor
+ * @skb: socket buffer
+ * @offset: offset in skb
+ * @len: skb->len - offset
+ **/
+static int
+iscsi_tcp_recv(read_descriptor_t *rd_desc, struct sk_buff *skb,
+	       unsigned int offset, size_t len)
+{
+	struct iscsi_conn *conn = rd_desc->arg.data;
+	unsigned int consumed, total_consumed = 0;
+	int status;
+
+	debug_tcp("in %d bytes\n", skb->len - offset);
+
+	do {
+		status = 0;
+		consumed = iscsi_tcp_recv_skb(conn, skb, offset, 0, &status);
+		offset += consumed;
+		total_consumed += consumed;
+	} while (consumed != 0 && status != ISCSI_TCP_SKB_DONE);
+
+	debug_tcp("read %d bytes status %d\n", skb->len - offset, status);
+	return total_consumed;
 }
 
 static void
diff --git a/include/scsi/iscsi_if.h b/include/scsi/iscsi_if.h
index 0c9514de5df7..8e008c96e795 100644
--- a/include/scsi/iscsi_if.h
+++ b/include/scsi/iscsi_if.h
@@ -333,8 +333,9 @@ enum iscsi_host_param {
 #define CAP_TEXT_NEGO		0x80
 #define CAP_MARKERS		0x100
 #define CAP_FW_DB		0x200
-#define CAP_SENDTARGETS_OFFLOAD	0x400
-#define CAP_DATA_PATH_OFFLOAD	0x800
+#define CAP_SENDTARGETS_OFFLOAD	0x400	/* offload discovery process */
+#define CAP_DATA_PATH_OFFLOAD	0x800	/* offload entire IO path */
+#define CAP_DIGEST_OFFLOAD	0x1000	/* offload hdr and data digests */
 
 /*
  * These flags describes reason of stop_conn() call
-- 
cgit v1.2.3


From 577577da6d197ea3dcf3ee19c4f902fbd3a9390a Mon Sep 17 00:00:00 2001
From: Mike Christie <michaelc@cs.wisc.edu>
Date: Tue, 2 Dec 2008 00:32:05 -0600
Subject: [SCSI] libiscsi: prepare libiscsi for new offload engines by
 modifying unsol data code

cxgb3i offloads data transfers. It does not offload the entire scsi/iscsi
procssing like qla4xxx and it does not offload the iscsi sequence
processing like how bnx2i does. cxgb3i relies on iscsi_tcp for the
seqeunce handling so this changes how we transfer unsolicitied data by
adding a common r2t struct and helpers.

Signed-off-by: Mike Christie <michaelc@cs.wisc.edu>
Signed-off-by: James Bottomley <James.Bottomley@HansenPartnership.com>
---
 drivers/scsi/libiscsi.c             | 183 ++++++++++++++++++++++++------------
 include/scsi/libiscsi.h             |  34 +++++--
 include/scsi/scsi_transport_iscsi.h |   9 +-
 3 files changed, 156 insertions(+), 70 deletions(-)

(limited to 'include')

diff --git a/drivers/scsi/libiscsi.c b/drivers/scsi/libiscsi.c
index 3fdee7370ccc..80cbcdce7fa3 100644
--- a/drivers/scsi/libiscsi.c
+++ b/drivers/scsi/libiscsi.c
@@ -88,34 +88,47 @@ iscsi_update_cmdsn(struct iscsi_session *session, struct iscsi_nopin *hdr)
 }
 EXPORT_SYMBOL_GPL(iscsi_update_cmdsn);
 
-void iscsi_prep_unsolicit_data_pdu(struct iscsi_task *task,
-				   struct iscsi_data *hdr)
+/**
+ * iscsi_prep_data_out_pdu - initialize Data-Out
+ * @task: scsi command task
+ * @r2t: R2T info
+ * @hdr: iscsi data in pdu
+ *
+ * Notes:
+ *	Initialize Data-Out within this R2T sequence and finds
+ *	proper data_offset within this SCSI command.
+ *
+ *	This function is called with connection lock taken.
+ **/
+void iscsi_prep_data_out_pdu(struct iscsi_task *task, struct iscsi_r2t_info *r2t,
+			   struct iscsi_data *hdr)
 {
 	struct iscsi_conn *conn = task->conn;
+	unsigned int left = r2t->data_length - r2t->sent;
+
+	task->hdr_len = sizeof(struct iscsi_data);
 
 	memset(hdr, 0, sizeof(struct iscsi_data));
-	hdr->ttt = cpu_to_be32(ISCSI_RESERVED_TAG);
-	hdr->datasn = cpu_to_be32(task->unsol_datasn);
-	task->unsol_datasn++;
+	hdr->ttt = r2t->ttt;
+	hdr->datasn = cpu_to_be32(r2t->datasn);
+	r2t->datasn++;
 	hdr->opcode = ISCSI_OP_SCSI_DATA_OUT;
-	memcpy(hdr->lun, task->hdr->lun, sizeof(hdr->lun));
-
-	hdr->itt = task->hdr->itt;
-	hdr->exp_statsn = cpu_to_be32(conn->exp_statsn);
-	hdr->offset = cpu_to_be32(task->unsol_offset);
-
-	if (task->unsol_count > conn->max_xmit_dlength) {
+	memcpy(hdr->lun, task->lun, sizeof(hdr->lun));
+	hdr->itt = task->hdr_itt;
+	hdr->exp_statsn = r2t->exp_statsn;
+	hdr->offset = cpu_to_be32(r2t->data_offset + r2t->sent);
+	if (left > conn->max_xmit_dlength) {
 		hton24(hdr->dlength, conn->max_xmit_dlength);
-		task->data_count = conn->max_xmit_dlength;
-		task->unsol_offset += task->data_count;
+		r2t->data_count = conn->max_xmit_dlength;
 		hdr->flags = 0;
 	} else {
-		hton24(hdr->dlength, task->unsol_count);
-		task->data_count = task->unsol_count;
+		hton24(hdr->dlength, left);
+		r2t->data_count = left;
 		hdr->flags = ISCSI_FLAG_CMD_FINAL;
 	}
+	conn->dataout_pdus_cnt++;
 }
-EXPORT_SYMBOL_GPL(iscsi_prep_unsolicit_data_pdu);
+EXPORT_SYMBOL_GPL(iscsi_prep_data_out_pdu);
 
 static int iscsi_add_hdr(struct iscsi_task *task, unsigned len)
 {
@@ -206,11 +219,17 @@ static int iscsi_prep_scsi_cmd_pdu(struct iscsi_task *task)
 {
 	struct iscsi_conn *conn = task->conn;
 	struct iscsi_session *session = conn->session;
-	struct iscsi_cmd *hdr = task->hdr;
 	struct scsi_cmnd *sc = task->sc;
+	struct iscsi_cmd *hdr;
 	unsigned hdrlength, cmd_len;
 	int rc;
 
+	rc = conn->session->tt->alloc_pdu(task);
+	if (rc)
+		return rc;
+	hdr = (struct iscsi_cmd *) task->hdr;
+	memset(hdr, 0, sizeof(*hdr));
+
 	task->hdr_len = 0;
 	rc = iscsi_add_hdr(task, sizeof(*hdr));
 	if (rc)
@@ -218,8 +237,9 @@ static int iscsi_prep_scsi_cmd_pdu(struct iscsi_task *task)
 	hdr->opcode = ISCSI_OP_SCSI_CMD;
 	hdr->flags = ISCSI_ATTR_SIMPLE;
 	int_to_scsilun(sc->device->lun, (struct scsi_lun *)hdr->lun);
-	hdr->itt = build_itt(task->itt, session->age);
-	hdr->cmdsn = cpu_to_be32(session->cmdsn);
+	memcpy(task->lun, hdr->lun, sizeof(task->lun));
+	hdr->itt = task->hdr_itt = build_itt(task->itt, session->age);
+	hdr->cmdsn = task->cmdsn = cpu_to_be32(session->cmdsn);
 	session->cmdsn++;
 	hdr->exp_statsn = cpu_to_be32(conn->exp_statsn);
 	cmd_len = sc->cmd_len;
@@ -242,6 +262,8 @@ static int iscsi_prep_scsi_cmd_pdu(struct iscsi_task *task)
 	}
 	if (sc->sc_data_direction == DMA_TO_DEVICE) {
 		unsigned out_len = scsi_out(sc)->length;
+		struct iscsi_r2t_info *r2t = &task->unsol_r2t;
+
 		hdr->data_length = cpu_to_be32(out_len);
 		hdr->flags |= ISCSI_FLAG_CMD_WRITE;
 		/*
@@ -254,13 +276,11 @@ static int iscsi_prep_scsi_cmd_pdu(struct iscsi_task *task)
 		 *			without	R2T ack right after
 		 *			immediate data
 		 *
-		 *	r2t_data_count	bytes to be sent via R2T ack's
+		 *	r2t data_length bytes to be sent via R2T ack's
 		 *
 		 *      pad_count       bytes to be sent as zero-padding
 		 */
-		task->unsol_count = 0;
-		task->unsol_offset = 0;
-		task->unsol_datasn = 0;
+		memset(r2t, 0, sizeof(*r2t));
 
 		if (session->imm_data_en) {
 			if (out_len >= session->first_burst)
@@ -274,12 +294,14 @@ static int iscsi_prep_scsi_cmd_pdu(struct iscsi_task *task)
 			zero_data(hdr->dlength);
 
 		if (!session->initial_r2t_en) {
-			task->unsol_count = min(session->first_burst, out_len)
-							     - task->imm_count;
-			task->unsol_offset = task->imm_count;
+			r2t->data_length = min(session->first_burst, out_len) -
+					       task->imm_count;
+			r2t->data_offset = task->imm_count;
+			r2t->ttt = cpu_to_be32(ISCSI_RESERVED_TAG);
+			r2t->exp_statsn = cpu_to_be32(conn->exp_statsn);
 		}
 
-		if (!task->unsol_count)
+		if (!task->unsol_r2t.data_length)
 			/* No unsolicit Data-Out's */
 			hdr->flags |= ISCSI_FLAG_CMD_FINAL;
 	} else {
@@ -300,8 +322,7 @@ static int iscsi_prep_scsi_cmd_pdu(struct iscsi_task *task)
 	WARN_ON(hdrlength >= 256);
 	hdr->hlength = hdrlength & 0xFF;
 
-	if (conn->session->tt->init_task &&
-	    conn->session->tt->init_task(task))
+	if (session->tt->init_task && session->tt->init_task(task))
 		return -EIO;
 
 	task->state = ISCSI_TASK_RUNNING;
@@ -332,6 +353,7 @@ static void iscsi_complete_command(struct iscsi_task *task)
 	struct iscsi_session *session = conn->session;
 	struct scsi_cmnd *sc = task->sc;
 
+	session->tt->cleanup_task(task);
 	list_del_init(&task->running);
 	task->state = ISCSI_TASK_COMPLETED;
 	task->sc = NULL;
@@ -402,8 +424,6 @@ static void fail_command(struct iscsi_conn *conn, struct iscsi_task *task,
 		 * the cmd in the sequencing
 		 */
 		conn->session->queued_cmdsn--;
-	else
-		conn->session->tt->cleanup_task(conn, task);
 
 	sc->result = err;
 	if (!scsi_bidi_cmnd(sc))
@@ -423,7 +443,7 @@ static int iscsi_prep_mgmt_task(struct iscsi_conn *conn,
 				struct iscsi_task *task)
 {
 	struct iscsi_session *session = conn->session;
-	struct iscsi_hdr *hdr = (struct iscsi_hdr *)task->hdr;
+	struct iscsi_hdr *hdr = task->hdr;
 	struct iscsi_nopout *nop = (struct iscsi_nopout *)hdr;
 
 	if (conn->session->state == ISCSI_STATE_LOGGING_OUT)
@@ -456,6 +476,7 @@ static int iscsi_prep_mgmt_task(struct iscsi_conn *conn,
 	if ((hdr->opcode & ISCSI_OPCODE_MASK) == ISCSI_OP_LOGOUT)
 		session->state = ISCSI_STATE_LOGGING_OUT;
 
+	task->state = ISCSI_TASK_RUNNING;
 	list_move_tail(&task->running, &conn->mgmt_run_list);
 	debug_scsi("mgmtpdu [op 0x%x hdr->itt 0x%x datalen %d]\n",
 		   hdr->opcode & ISCSI_OPCODE_MASK, hdr->itt,
@@ -505,23 +526,38 @@ __iscsi_conn_send_pdu(struct iscsi_conn *conn, struct iscsi_hdr *hdr,
 	} else
 		task->data_count = 0;
 
+	if (conn->session->tt->alloc_pdu(task)) {
+		iscsi_conn_printk(KERN_ERR, conn, "Could not allocate "
+				 "pdu for mgmt task.\n");
+		goto requeue_task;
+	}
+	task->hdr_len = sizeof(struct iscsi_hdr);
+
 	memcpy(task->hdr, hdr, sizeof(struct iscsi_hdr));
 	INIT_LIST_HEAD(&task->running);
 	list_add_tail(&task->running, &conn->mgmtqueue);
 
 	if (session->tt->caps & CAP_DATA_PATH_OFFLOAD) {
-		if (iscsi_prep_mgmt_task(conn, task)) {
-			__iscsi_put_task(task);
-			return NULL;
-		}
+		if (iscsi_prep_mgmt_task(conn, task))
+			goto free_task;
 
 		if (session->tt->xmit_task(task))
-			task = NULL;
+			goto free_task;
 
 	} else
 		scsi_queue_work(conn->session->host, &conn->xmitwork);
 
 	return task;
+
+free_task:
+	__iscsi_put_task(task);
+	return NULL;
+
+requeue_task:
+	if (task != conn->login_task)
+		__kfifo_put(session->cmdpool.queue, (void*)&task,
+			    sizeof(void*));
+	return NULL;
 }
 
 int iscsi_conn_send_pdu(struct iscsi_cls_conn *cls_conn, struct iscsi_hdr *hdr,
@@ -1136,8 +1172,13 @@ check_mgmt:
 			fail_command(conn, conn->task, DID_IMM_RETRY << 16);
 			continue;
 		}
-		if (iscsi_prep_scsi_cmd_pdu(conn->task)) {
-			fail_command(conn, conn->task, DID_ABORT << 16);
+		rc = iscsi_prep_scsi_cmd_pdu(conn->task);
+		if (rc) {
+			if (rc == -ENOMEM) {
+				conn->task = NULL;
+				goto again;
+			} else
+				fail_command(conn, conn->task, DID_ABORT << 16);
 			continue;
 		}
 		rc = iscsi_xmit_task(conn);
@@ -1195,6 +1236,26 @@ static void iscsi_xmitworker(struct work_struct *work)
 	} while (rc >= 0 || rc == -EAGAIN);
 }
 
+static inline struct iscsi_task *iscsi_alloc_task(struct iscsi_conn *conn,
+						  struct scsi_cmnd *sc)
+{
+	struct iscsi_task *task;
+
+	if (!__kfifo_get(conn->session->cmdpool.queue,
+			 (void *) &task, sizeof(void *)))
+		return NULL;
+
+	sc->SCp.phase = conn->session->age;
+	sc->SCp.ptr = (char *) task;
+
+	atomic_set(&task->refcount, 1);
+	task->state = ISCSI_TASK_PENDING;
+	task->conn = conn;
+	task->sc = sc;
+	INIT_LIST_HEAD(&task->running);
+	return task;
+}
+
 enum {
 	FAILURE_BAD_HOST = 1,
 	FAILURE_SESSION_FAILED,
@@ -1281,33 +1342,27 @@ int iscsi_queuecommand(struct scsi_cmnd *sc, void (*done)(struct scsi_cmnd *))
 		goto reject;
 	}
 
-	if (!__kfifo_get(session->cmdpool.queue, (void*)&task,
-			 sizeof(void*))) {
+	task = iscsi_alloc_task(conn, sc);
+	if (!task) {
 		reason = FAILURE_OOM;
 		goto reject;
 	}
-	sc->SCp.phase = session->age;
-	sc->SCp.ptr = (char *)task;
-
-	atomic_set(&task->refcount, 1);
-	task->state = ISCSI_TASK_PENDING;
-	task->conn = conn;
-	task->sc = sc;
-	INIT_LIST_HEAD(&task->running);
 	list_add_tail(&task->running, &conn->xmitqueue);
 
 	if (session->tt->caps & CAP_DATA_PATH_OFFLOAD) {
-		if (iscsi_prep_scsi_cmd_pdu(task)) {
-			sc->result = DID_ABORT << 16;
-			sc->scsi_done = NULL;
-			iscsi_complete_command(task);
-			goto fault;
+		reason = iscsi_prep_scsi_cmd_pdu(task);
+		if (reason) {
+			if (reason == -ENOMEM) {
+				reason = FAILURE_OOM;
+				goto prepd_reject;
+			} else {
+				sc->result = DID_ABORT << 16;
+				goto prepd_fault;
+			}
 		}
 		if (session->tt->xmit_task(task)) {
-			sc->scsi_done = NULL;
-			iscsi_complete_command(task);
 			reason = FAILURE_SESSION_NOT_READY;
-			goto reject;
+			goto prepd_reject;
 		}
 	} else
 		scsi_queue_work(session->host, &conn->xmitwork);
@@ -1317,12 +1372,18 @@ int iscsi_queuecommand(struct scsi_cmnd *sc, void (*done)(struct scsi_cmnd *))
 	spin_lock(host->host_lock);
 	return 0;
 
+prepd_reject:
+	sc->scsi_done = NULL;
+	iscsi_complete_command(task);
 reject:
 	spin_unlock(&session->lock);
 	debug_scsi("cmd 0x%x rejected (%d)\n", sc->cmnd[0], reason);
 	spin_lock(host->host_lock);
 	return SCSI_MLQUEUE_TARGET_BUSY;
 
+prepd_fault:
+	sc->scsi_done = NULL;
+	iscsi_complete_command(task);
 fault:
 	spin_unlock(&session->lock);
 	debug_scsi("iscsi: cmd 0x%x is not queued (%d)\n", sc->cmnd[0], reason);
@@ -1634,9 +1695,9 @@ static void iscsi_prep_abort_task_pdu(struct iscsi_task *task,
 	hdr->opcode = ISCSI_OP_SCSI_TMFUNC | ISCSI_OP_IMMEDIATE;
 	hdr->flags = ISCSI_TM_FUNC_ABORT_TASK & ISCSI_FLAG_TM_FUNC_MASK;
 	hdr->flags |= ISCSI_FLAG_CMD_FINAL;
-	memcpy(hdr->lun, task->hdr->lun, sizeof(hdr->lun));
-	hdr->rtt = task->hdr->itt;
-	hdr->refcmdsn = task->hdr->cmdsn;
+	memcpy(hdr->lun, task->lun, sizeof(hdr->lun));
+	hdr->rtt = task->hdr_itt;
+	hdr->refcmdsn = task->cmdsn;
 }
 
 int iscsi_eh_abort(struct scsi_cmnd *sc)
diff --git a/include/scsi/libiscsi.h b/include/scsi/libiscsi.h
index 61e53f14f7e1..51500573c0b8 100644
--- a/include/scsi/libiscsi.h
+++ b/include/scsi/libiscsi.h
@@ -93,24 +93,38 @@ enum {
 	ISCSI_TASK_RUNNING,
 };
 
+struct iscsi_r2t_info {
+	__be32			ttt;		/* copied from R2T */
+	__be32			exp_statsn;	/* copied from R2T */
+	uint32_t		data_length;	/* copied from R2T */
+	uint32_t		data_offset;	/* copied from R2T */
+	int			data_count;	/* DATA-Out payload progress */
+	int			datasn;
+	/* LLDs should set/update these values */
+	int			sent;		/* R2T sequence progress */
+};
+
 struct iscsi_task {
 	/*
 	 * Because LLDs allocate their hdr differently, this is a pointer
 	 * and length to that storage. It must be setup at session
 	 * creation time.
 	 */
-	struct iscsi_cmd	*hdr;
+	struct iscsi_hdr	*hdr;
 	unsigned short		hdr_max;
 	unsigned short		hdr_len;	/* accumulated size of hdr used */
+	/* copied values in case we need to send tmfs */
+	itt_t			hdr_itt;
+	__be32			cmdsn;
+	uint8_t			lun[8];
+
 	int			itt;		/* this ITT */
 
-	uint32_t		unsol_datasn;
 	unsigned		imm_count;	/* imm-data (bytes)   */
-	unsigned		unsol_count;	/* unsolicited (bytes)*/
 	/* offset in unsolicited stream (bytes); */
-	unsigned		unsol_offset;
-	unsigned		data_count;	/* remaining Data-Out */
+	struct iscsi_r2t_info	unsol_r2t;
 	char			*data;		/* mgmt payload */
+	unsigned		data_count;
 	struct scsi_cmnd	*sc;		/* associated SCSI cmd*/
 	struct iscsi_conn	*conn;		/* used connection    */
 
@@ -121,6 +135,11 @@ struct iscsi_task {
 	void			*dd_data;	/* driver/transport data */
 };
 
+static inline int iscsi_task_has_unsol_data(struct iscsi_task *task)
+{
+	return task->unsol_r2t.data_length > task->unsol_r2t.sent;
+}
+
 static inline void* iscsi_next_hdr(struct iscsi_task *task)
 {
 	return (void*)task->hdr + task->hdr_len;
@@ -376,8 +395,9 @@ extern void iscsi_suspend_tx(struct iscsi_conn *conn);
  * pdu and task processing
  */
 extern void iscsi_update_cmdsn(struct iscsi_session *, struct iscsi_nopin *);
-extern void iscsi_prep_unsolicit_data_pdu(struct iscsi_task *,
-					struct iscsi_data *hdr);
+extern void iscsi_prep_data_out_pdu(struct iscsi_task *task,
+				    struct iscsi_r2t_info *r2t,
+				    struct iscsi_data *hdr);
 extern int iscsi_conn_send_pdu(struct iscsi_cls_conn *, struct iscsi_hdr *,
 				char *, uint32_t);
 extern int iscsi_complete_pdu(struct iscsi_conn *, struct iscsi_hdr *,
diff --git a/include/scsi/scsi_transport_iscsi.h b/include/scsi/scsi_transport_iscsi.h
index c667cc396545..c928234c018f 100644
--- a/include/scsi/scsi_transport_iscsi.h
+++ b/include/scsi/scsi_transport_iscsi.h
@@ -113,10 +113,15 @@ struct iscsi_transport {
 			 char *data, uint32_t data_size);
 	void (*get_stats) (struct iscsi_cls_conn *conn,
 			   struct iscsi_stats *stats);
+
 	int (*init_task) (struct iscsi_task *task);
 	int (*xmit_task) (struct iscsi_task *task);
-	void (*cleanup_task) (struct iscsi_conn *conn,
-				  struct iscsi_task *task);
+	void (*cleanup_task) (struct iscsi_task *task);
+
+	int (*alloc_pdu) (struct iscsi_task *task);
+	int (*xmit_pdu) (struct iscsi_task *task);
+	int (*init_pdu) (struct iscsi_task *task, unsigned int offset,
+			 unsigned int count);
 	void (*session_recovery_timedout) (struct iscsi_cls_session *session);
 	struct iscsi_endpoint *(*ep_connect) (struct sockaddr *dst_addr,
 					      int non_blocking);
-- 
cgit v1.2.3


From a081c13e39b5c17052a7b46fafa61019c4c110ff Mon Sep 17 00:00:00 2001
From: Mike Christie <michaelc@cs.wisc.edu>
Date: Tue, 2 Dec 2008 00:32:11 -0600
Subject: [SCSI] iscsi_tcp: split module into lib and lld

As explained in the previous mails, cxgb3i needs iscsi_tcp's
r2t/data_out and data_in procesing so this just moves functions
that both drivers want to use to a new module libiscsi_tcp. The
next patch will hook iscsi_tcp in.

Signed-off-by: Mike Christie <michaelc@cs.wisc.edu>
Signed-off-by: James Bottomley <James.Bottomley@HansenPartnership.com>
---
 drivers/scsi/libiscsi_tcp.c | 1160 +++++++++++++++++++++++++++++++++++++++++++
 include/scsi/libiscsi_tcp.h |  131 +++++
 2 files changed, 1291 insertions(+)
 create mode 100644 drivers/scsi/libiscsi_tcp.c
 create mode 100644 include/scsi/libiscsi_tcp.h

(limited to 'include')

diff --git a/drivers/scsi/libiscsi_tcp.c b/drivers/scsi/libiscsi_tcp.c
new file mode 100644
index 000000000000..e86508936314
--- /dev/null
+++ b/drivers/scsi/libiscsi_tcp.c
@@ -0,0 +1,1160 @@
+/*
+ * iSCSI over TCP/IP Data-Path lib
+ *
+ * Copyright (C) 2004 Dmitry Yusupov
+ * Copyright (C) 2004 Alex Aizman
+ * Copyright (C) 2005 - 2006 Mike Christie
+ * Copyright (C) 2006 Red Hat, Inc.  All rights reserved.
+ * maintained by open-iscsi@googlegroups.com
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published
+ * by the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * See the file COPYING included with this distribution for more details.
+ *
+ * Credits:
+ *	Christoph Hellwig
+ *	FUJITA Tomonori
+ *	Arne Redlich
+ *	Zhenyu Wang
+ */
+
+#include <linux/types.h>
+#include <linux/list.h>
+#include <linux/inet.h>
+#include <linux/file.h>
+#include <linux/blkdev.h>
+#include <linux/crypto.h>
+#include <linux/delay.h>
+#include <linux/kfifo.h>
+#include <linux/scatterlist.h>
+#include <net/tcp.h>
+#include <scsi/scsi_cmnd.h>
+#include <scsi/scsi_device.h>
+#include <scsi/scsi_host.h>
+#include <scsi/scsi.h>
+#include <scsi/scsi_transport_iscsi.h>
+
+#include "iscsi_tcp.h"
+
+MODULE_AUTHOR("Mike Christie <michaelc@cs.wisc.edu>, "
+	      "Dmitry Yusupov <dmitry_yus@yahoo.com>, "
+	      "Alex Aizman <itn780@yahoo.com>");
+MODULE_DESCRIPTION("iSCSI/TCP data-path");
+MODULE_LICENSE("GPL");
+#undef DEBUG_TCP
+
+#ifdef DEBUG_TCP
+#define debug_tcp(fmt...) printk(KERN_INFO "tcp: " fmt)
+#else
+#define debug_tcp(fmt...)
+#endif
+
+static int iscsi_tcp_hdr_recv_done(struct iscsi_tcp_conn *tcp_conn,
+				   struct iscsi_segment *segment);
+
+/*
+ * Scatterlist handling: inside the iscsi_segment, we
+ * remember an index into the scatterlist, and set data/size
+ * to the current scatterlist entry. For highmem pages, we
+ * kmap as needed.
+ *
+ * Note that the page is unmapped when we return from
+ * TCP's data_ready handler, so we may end up mapping and
+ * unmapping the same page repeatedly. The whole reason
+ * for this is that we shouldn't keep the page mapped
+ * outside the softirq.
+ */
+
+/**
+ * iscsi_tcp_segment_init_sg - init indicated scatterlist entry
+ * @segment: the buffer object
+ * @sg: scatterlist
+ * @offset: byte offset into that sg entry
+ *
+ * This function sets up the segment so that subsequent
+ * data is copied to the indicated sg entry, at the given
+ * offset.
+ */
+static inline void
+iscsi_tcp_segment_init_sg(struct iscsi_segment *segment,
+			  struct scatterlist *sg, unsigned int offset)
+{
+	segment->sg = sg;
+	segment->sg_offset = offset;
+	segment->size = min(sg->length - offset,
+			    segment->total_size - segment->total_copied);
+	segment->data = NULL;
+}
+
+/**
+ * iscsi_tcp_segment_map - map the current S/G page
+ * @segment: iscsi_segment
+ * @recv: 1 if called from recv path
+ *
+ * We only need to possibly kmap data if scatter lists are being used,
+ * because the iscsi passthrough and internal IO paths will never use high
+ * mem pages.
+ */
+static void iscsi_tcp_segment_map(struct iscsi_segment *segment, int recv)
+{
+	struct scatterlist *sg;
+
+	if (segment->data != NULL || !segment->sg)
+		return;
+
+	sg = segment->sg;
+	BUG_ON(segment->sg_mapped);
+	BUG_ON(sg->length == 0);
+
+	/*
+	 * If the page count is greater than one it is ok to send
+	 * to the network layer's zero copy send path. If not we
+	 * have to go the slow sendmsg path. We always map for the
+	 * recv path.
+	 */
+	if (page_count(sg_page(sg)) >= 1 && !recv)
+		return;
+
+	debug_tcp("iscsi_tcp_segment_map %s %p\n", recv ? "recv" : "xmit",
+		  segment);
+	segment->sg_mapped = kmap_atomic(sg_page(sg), KM_SOFTIRQ0);
+	segment->data = segment->sg_mapped + sg->offset + segment->sg_offset;
+}
+
+void iscsi_tcp_segment_unmap(struct iscsi_segment *segment)
+{
+	debug_tcp("iscsi_tcp_segment_unmap %p\n", segment);
+
+	if (segment->sg_mapped) {
+		debug_tcp("iscsi_tcp_segment_unmap valid\n");
+		kunmap_atomic(segment->sg_mapped, KM_SOFTIRQ0);
+		segment->sg_mapped = NULL;
+		segment->data = NULL;
+	}
+}
+EXPORT_SYMBOL_GPL(iscsi_tcp_segment_unmap);
+
+/*
+ * Splice the digest buffer into the buffer
+ */
+static inline void
+iscsi_tcp_segment_splice_digest(struct iscsi_segment *segment, void *digest)
+{
+	segment->data = digest;
+	segment->digest_len = ISCSI_DIGEST_SIZE;
+	segment->total_size += ISCSI_DIGEST_SIZE;
+	segment->size = ISCSI_DIGEST_SIZE;
+	segment->copied = 0;
+	segment->sg = NULL;
+	segment->hash = NULL;
+}
+
+/**
+ * iscsi_tcp_segment_done - check whether the segment is complete
+ * @segment: iscsi segment to check
+ * @recv: set to one of this is called from the recv path
+ * @copied: number of bytes copied
+ *
+ * Check if we're done receiving this segment. If the receive
+ * buffer is full but we expect more data, move on to the
+ * next entry in the scatterlist.
+ *
+ * If the amount of data we received isn't a multiple of 4,
+ * we will transparently receive the pad bytes, too.
+ *
+ * This function must be re-entrant.
+ */
+int iscsi_tcp_segment_done(struct iscsi_segment *segment, int recv,
+			   unsigned copied)
+{
+	static unsigned char padbuf[ISCSI_PAD_LEN];
+	struct scatterlist sg;
+	unsigned int pad;
+
+	debug_tcp("copied %u %u size %u %s\n", segment->copied, copied,
+		  segment->size, recv ? "recv" : "xmit");
+	if (segment->hash && copied) {
+		/*
+		 * If a segment is kmapd we must unmap it before sending
+		 * to the crypto layer since that will try to kmap it again.
+		 */
+		iscsi_tcp_segment_unmap(segment);
+
+		if (!segment->data) {
+			sg_init_table(&sg, 1);
+			sg_set_page(&sg, sg_page(segment->sg), copied,
+				    segment->copied + segment->sg_offset +
+							segment->sg->offset);
+		} else
+			sg_init_one(&sg, segment->data + segment->copied,
+				    copied);
+		crypto_hash_update(segment->hash, &sg, copied);
+	}
+
+	segment->copied += copied;
+	if (segment->copied < segment->size) {
+		iscsi_tcp_segment_map(segment, recv);
+		return 0;
+	}
+
+	segment->total_copied += segment->copied;
+	segment->copied = 0;
+	segment->size = 0;
+
+	/* Unmap the current scatterlist page, if there is one. */
+	iscsi_tcp_segment_unmap(segment);
+
+	/* Do we have more scatterlist entries? */
+	debug_tcp("total copied %u total size %u\n", segment->total_copied,
+		   segment->total_size);
+	if (segment->total_copied < segment->total_size) {
+		/* Proceed to the next entry in the scatterlist. */
+		iscsi_tcp_segment_init_sg(segment, sg_next(segment->sg),
+					  0);
+		iscsi_tcp_segment_map(segment, recv);
+		BUG_ON(segment->size == 0);
+		return 0;
+	}
+
+	/* Do we need to handle padding? */
+	pad = iscsi_padding(segment->total_copied);
+	if (pad != 0) {
+		debug_tcp("consume %d pad bytes\n", pad);
+		segment->total_size += pad;
+		segment->size = pad;
+		segment->data = padbuf;
+		return 0;
+	}
+
+	/*
+	 * Set us up for transferring the data digest. hdr digest
+	 * is completely handled in hdr done function.
+	 */
+	if (segment->hash) {
+		crypto_hash_final(segment->hash, segment->digest);
+		iscsi_tcp_segment_splice_digest(segment,
+				 recv ? segment->recv_digest : segment->digest);
+		return 0;
+	}
+
+	return 1;
+}
+EXPORT_SYMBOL_GPL(iscsi_tcp_segment_done);
+
+/**
+ * iscsi_tcp_segment_recv - copy data to segment
+ * @tcp_conn: the iSCSI TCP connection
+ * @segment: the buffer to copy to
+ * @ptr: data pointer
+ * @len: amount of data available
+ *
+ * This function copies up to @len bytes to the
+ * given buffer, and returns the number of bytes
+ * consumed, which can actually be less than @len.
+ *
+ * If hash digest is enabled, the function will update the
+ * hash while copying.
+ * Combining these two operations doesn't buy us a lot (yet),
+ * but in the future we could implement combined copy+crc,
+ * just way we do for network layer checksums.
+ */
+static int
+iscsi_tcp_segment_recv(struct iscsi_tcp_conn *tcp_conn,
+		       struct iscsi_segment *segment, const void *ptr,
+		       unsigned int len)
+{
+	unsigned int copy = 0, copied = 0;
+
+	while (!iscsi_tcp_segment_done(segment, 1, copy)) {
+		if (copied == len) {
+			debug_tcp("iscsi_tcp_segment_recv copied %d bytes\n",
+				  len);
+			break;
+		}
+
+		copy = min(len - copied, segment->size - segment->copied);
+		debug_tcp("iscsi_tcp_segment_recv copying %d\n", copy);
+		memcpy(segment->data + segment->copied, ptr + copied, copy);
+		copied += copy;
+	}
+	return copied;
+}
+
+inline void
+iscsi_tcp_dgst_header(struct hash_desc *hash, const void *hdr, size_t hdrlen,
+		      unsigned char digest[ISCSI_DIGEST_SIZE])
+{
+	struct scatterlist sg;
+
+	sg_init_one(&sg, hdr, hdrlen);
+	crypto_hash_digest(hash, &sg, hdrlen, digest);
+}
+EXPORT_SYMBOL_GPL(iscsi_tcp_dgst_header);
+
+static inline int
+iscsi_tcp_dgst_verify(struct iscsi_tcp_conn *tcp_conn,
+		      struct iscsi_segment *segment)
+{
+	if (!segment->digest_len)
+		return 1;
+
+	if (memcmp(segment->recv_digest, segment->digest,
+		   segment->digest_len)) {
+		debug_scsi("digest mismatch\n");
+		return 0;
+	}
+
+	return 1;
+}
+
+/*
+ * Helper function to set up segment buffer
+ */
+static inline void
+__iscsi_segment_init(struct iscsi_segment *segment, size_t size,
+		     iscsi_segment_done_fn_t *done, struct hash_desc *hash)
+{
+	memset(segment, 0, sizeof(*segment));
+	segment->total_size = size;
+	segment->done = done;
+
+	if (hash) {
+		segment->hash = hash;
+		crypto_hash_init(hash);
+	}
+}
+
+inline void
+iscsi_segment_init_linear(struct iscsi_segment *segment, void *data,
+			  size_t size, iscsi_segment_done_fn_t *done,
+			  struct hash_desc *hash)
+{
+	__iscsi_segment_init(segment, size, done, hash);
+	segment->data = data;
+	segment->size = size;
+}
+EXPORT_SYMBOL_GPL(iscsi_segment_init_linear);
+
+inline int
+iscsi_segment_seek_sg(struct iscsi_segment *segment,
+		      struct scatterlist *sg_list, unsigned int sg_count,
+		      unsigned int offset, size_t size,
+		      iscsi_segment_done_fn_t *done, struct hash_desc *hash)
+{
+	struct scatterlist *sg;
+	unsigned int i;
+
+	debug_scsi("iscsi_segment_seek_sg offset %u size %llu\n",
+		  offset, size);
+	__iscsi_segment_init(segment, size, done, hash);
+	for_each_sg(sg_list, sg, sg_count, i) {
+		debug_scsi("sg %d, len %u offset %u\n", i, sg->length,
+			   sg->offset);
+		if (offset < sg->length) {
+			iscsi_tcp_segment_init_sg(segment, sg, offset);
+			return 0;
+		}
+		offset -= sg->length;
+	}
+
+	return ISCSI_ERR_DATA_OFFSET;
+}
+EXPORT_SYMBOL_GPL(iscsi_segment_seek_sg);
+
+/**
+ * iscsi_tcp_hdr_recv_prep - prep segment for hdr reception
+ * @tcp_conn: iscsi connection to prep for
+ *
+ * This function always passes NULL for the hash argument, because when this
+ * function is called we do not yet know the final size of the header and want
+ * to delay the digest processing until we know that.
+ */
+void iscsi_tcp_hdr_recv_prep(struct iscsi_tcp_conn *tcp_conn)
+{
+	debug_tcp("iscsi_tcp_hdr_recv_prep(%p%s)\n", tcp_conn,
+		  tcp_conn->iscsi_conn->hdrdgst_en ? ", digest enabled" : "");
+	iscsi_segment_init_linear(&tcp_conn->in.segment,
+				tcp_conn->in.hdr_buf, sizeof(struct iscsi_hdr),
+				iscsi_tcp_hdr_recv_done, NULL);
+}
+EXPORT_SYMBOL_GPL(iscsi_tcp_hdr_recv_prep);
+
+/*
+ * Handle incoming reply to any other type of command
+ */
+static int
+iscsi_tcp_data_recv_done(struct iscsi_tcp_conn *tcp_conn,
+			 struct iscsi_segment *segment)
+{
+	struct iscsi_conn *conn = tcp_conn->iscsi_conn;
+	int rc = 0;
+
+	if (!iscsi_tcp_dgst_verify(tcp_conn, segment))
+		return ISCSI_ERR_DATA_DGST;
+
+	rc = iscsi_complete_pdu(conn, tcp_conn->in.hdr,
+			conn->data, tcp_conn->in.datalen);
+	if (rc)
+		return rc;
+
+	iscsi_tcp_hdr_recv_prep(tcp_conn);
+	return 0;
+}
+
+static void
+iscsi_tcp_data_recv_prep(struct iscsi_tcp_conn *tcp_conn)
+{
+	struct iscsi_conn *conn = tcp_conn->iscsi_conn;
+	struct hash_desc *rx_hash = NULL;
+
+	if (conn->datadgst_en &
+	    !(conn->session->tt->caps & CAP_DIGEST_OFFLOAD))
+		rx_hash = tcp_conn->rx_hash;
+
+	iscsi_segment_init_linear(&tcp_conn->in.segment,
+				conn->data, tcp_conn->in.datalen,
+				iscsi_tcp_data_recv_done, rx_hash);
+}
+
+/**
+ * iscsi_tcp_cleanup_task - free tcp_task resources
+ * @task: iscsi task
+ *
+ * must be called with session lock
+ */
+void iscsi_tcp_cleanup_task(struct iscsi_task *task)
+{
+	struct iscsi_tcp_task *tcp_task = task->dd_data;
+	struct iscsi_r2t_info *r2t;
+
+	/* nothing to do for mgmt or pending tasks */
+	if (!task->sc || task->state == ISCSI_TASK_PENDING)
+		return;
+
+	/* flush task's r2t queues */
+	while (__kfifo_get(tcp_task->r2tqueue, (void*)&r2t, sizeof(void*))) {
+		__kfifo_put(tcp_task->r2tpool.queue, (void*)&r2t,
+			    sizeof(void*));
+		debug_scsi("iscsi_tcp_cleanup_task pending r2t dropped\n");
+	}
+
+	r2t = tcp_task->r2t;
+	if (r2t != NULL) {
+		__kfifo_put(tcp_task->r2tpool.queue, (void*)&r2t,
+			    sizeof(void*));
+		tcp_task->r2t = NULL;
+	}
+}
+EXPORT_SYMBOL_GPL(iscsi_tcp_cleanup_task);
+
+/**
+ * iscsi_tcp_data_in - SCSI Data-In Response processing
+ * @conn: iscsi connection
+ * @task: scsi command task
+ */
+static int iscsi_tcp_data_in(struct iscsi_conn *conn, struct iscsi_task *task)
+{
+	struct iscsi_tcp_conn *tcp_conn = conn->dd_data;
+	struct iscsi_tcp_task *tcp_task = task->dd_data;
+	struct iscsi_data_rsp *rhdr = (struct iscsi_data_rsp *)tcp_conn->in.hdr;
+	int datasn = be32_to_cpu(rhdr->datasn);
+	unsigned total_in_length = scsi_in(task->sc)->length;
+
+	iscsi_update_cmdsn(conn->session, (struct iscsi_nopin*)rhdr);
+	if (tcp_conn->in.datalen == 0)
+		return 0;
+
+	if (tcp_task->exp_datasn != datasn) {
+		debug_tcp("%s: task->exp_datasn(%d) != rhdr->datasn(%d)\n",
+		          __func__, tcp_task->exp_datasn, datasn);
+		return ISCSI_ERR_DATASN;
+	}
+
+	tcp_task->exp_datasn++;
+
+	tcp_task->data_offset = be32_to_cpu(rhdr->offset);
+	if (tcp_task->data_offset + tcp_conn->in.datalen > total_in_length) {
+		debug_tcp("%s: data_offset(%d) + data_len(%d) > total_length_in(%d)\n",
+		          __func__, tcp_task->data_offset,
+		          tcp_conn->in.datalen, total_in_length);
+		return ISCSI_ERR_DATA_OFFSET;
+	}
+
+	conn->datain_pdus_cnt++;
+	return 0;
+}
+
+/**
+ * iscsi_tcp_r2t_rsp - iSCSI R2T Response processing
+ * @conn: iscsi connection
+ * @task: scsi command task
+ */
+static int iscsi_tcp_r2t_rsp(struct iscsi_conn *conn, struct iscsi_task *task)
+{
+	struct iscsi_session *session = conn->session;
+	struct iscsi_tcp_task *tcp_task = task->dd_data;
+	struct iscsi_tcp_conn *tcp_conn = conn->dd_data;
+	struct iscsi_r2t_rsp *rhdr = (struct iscsi_r2t_rsp *)tcp_conn->in.hdr;
+	struct iscsi_r2t_info *r2t;
+	int r2tsn = be32_to_cpu(rhdr->r2tsn);
+	int rc;
+
+	if (tcp_conn->in.datalen) {
+		iscsi_conn_printk(KERN_ERR, conn,
+				  "invalid R2t with datalen %d\n",
+				  tcp_conn->in.datalen);
+		return ISCSI_ERR_DATALEN;
+	}
+
+	if (tcp_task->exp_datasn != r2tsn){
+		debug_tcp("%s: task->exp_datasn(%d) != rhdr->r2tsn(%d)\n",
+		          __func__, tcp_task->exp_datasn, r2tsn);
+		return ISCSI_ERR_R2TSN;
+	}
+
+	/* fill-in new R2T associated with the task */
+	iscsi_update_cmdsn(session, (struct iscsi_nopin*)rhdr);
+
+	if (!task->sc || session->state != ISCSI_STATE_LOGGED_IN) {
+		iscsi_conn_printk(KERN_INFO, conn,
+				  "dropping R2T itt %d in recovery.\n",
+				  task->itt);
+		return 0;
+	}
+
+	rc = __kfifo_get(tcp_task->r2tpool.queue, (void*)&r2t, sizeof(void*));
+	if (!rc) {
+		iscsi_conn_printk(KERN_ERR, conn, "Could not allocate R2T. "
+				  "Target has sent more R2Ts than it "
+				  "negotiated for or driver has has leaked.\n");
+		return ISCSI_ERR_PROTO;
+	}
+
+	r2t->exp_statsn = rhdr->statsn;
+	r2t->data_length = be32_to_cpu(rhdr->data_length);
+	if (r2t->data_length == 0) {
+		iscsi_conn_printk(KERN_ERR, conn,
+				  "invalid R2T with zero data len\n");
+		__kfifo_put(tcp_task->r2tpool.queue, (void*)&r2t,
+			    sizeof(void*));
+		return ISCSI_ERR_DATALEN;
+	}
+
+	if (r2t->data_length > session->max_burst)
+		debug_scsi("invalid R2T with data len %u and max burst %u."
+			   "Attempting to execute request.\n",
+			    r2t->data_length, session->max_burst);
+
+	r2t->data_offset = be32_to_cpu(rhdr->data_offset);
+	if (r2t->data_offset + r2t->data_length > scsi_out(task->sc)->length) {
+		iscsi_conn_printk(KERN_ERR, conn,
+				  "invalid R2T with data len %u at offset %u "
+				  "and total length %d\n", r2t->data_length,
+				  r2t->data_offset, scsi_out(task->sc)->length);
+		__kfifo_put(tcp_task->r2tpool.queue, (void*)&r2t,
+			    sizeof(void*));
+		return ISCSI_ERR_DATALEN;
+	}
+
+	r2t->ttt = rhdr->ttt; /* no flip */
+	r2t->datasn = 0;
+	r2t->sent = 0;
+
+	tcp_task->exp_datasn = r2tsn + 1;
+	__kfifo_put(tcp_task->r2tqueue, (void*)&r2t, sizeof(void*));
+	conn->r2t_pdus_cnt++;
+
+	iscsi_requeue_task(task);
+	return 0;
+}
+
+/*
+ * Handle incoming reply to DataIn command
+ */
+static int
+iscsi_tcp_process_data_in(struct iscsi_tcp_conn *tcp_conn,
+			  struct iscsi_segment *segment)
+{
+	struct iscsi_conn *conn = tcp_conn->iscsi_conn;
+	struct iscsi_hdr *hdr = tcp_conn->in.hdr;
+	int rc;
+
+	if (!iscsi_tcp_dgst_verify(tcp_conn, segment))
+		return ISCSI_ERR_DATA_DGST;
+
+	/* check for non-exceptional status */
+	if (hdr->flags & ISCSI_FLAG_DATA_STATUS) {
+		rc = iscsi_complete_pdu(conn, tcp_conn->in.hdr, NULL, 0);
+		if (rc)
+			return rc;
+	}
+
+	iscsi_tcp_hdr_recv_prep(tcp_conn);
+	return 0;
+}
+
+/**
+ * iscsi_tcp_hdr_dissect - process PDU header
+ * @conn: iSCSI connection
+ * @hdr: PDU header
+ *
+ * This function analyzes the header of the PDU received,
+ * and performs several sanity checks. If the PDU is accompanied
+ * by data, the receive buffer is set up to copy the incoming data
+ * to the correct location.
+ */
+static int
+iscsi_tcp_hdr_dissect(struct iscsi_conn *conn, struct iscsi_hdr *hdr)
+{
+	int rc = 0, opcode, ahslen;
+	struct iscsi_tcp_conn *tcp_conn = conn->dd_data;
+	struct iscsi_task *task;
+
+	/* verify PDU length */
+	tcp_conn->in.datalen = ntoh24(hdr->dlength);
+	if (tcp_conn->in.datalen > conn->max_recv_dlength) {
+		iscsi_conn_printk(KERN_ERR, conn,
+				  "iscsi_tcp: datalen %d > %d\n",
+				  tcp_conn->in.datalen, conn->max_recv_dlength);
+		return ISCSI_ERR_DATALEN;
+	}
+
+	/* Additional header segments. So far, we don't
+	 * process additional headers.
+	 */
+	ahslen = hdr->hlength << 2;
+
+	opcode = hdr->opcode & ISCSI_OPCODE_MASK;
+	/* verify itt (itt encoding: age+cid+itt) */
+	rc = iscsi_verify_itt(conn, hdr->itt);
+	if (rc)
+		return rc;
+
+	debug_tcp("opcode 0x%x ahslen %d datalen %d\n",
+		  opcode, ahslen, tcp_conn->in.datalen);
+
+	switch(opcode) {
+	case ISCSI_OP_SCSI_DATA_IN:
+		spin_lock(&conn->session->lock);
+		task = iscsi_itt_to_ctask(conn, hdr->itt);
+		if (!task)
+			rc = ISCSI_ERR_BAD_ITT;
+		else
+			rc = iscsi_tcp_data_in(conn, task);
+		if (rc) {
+			spin_unlock(&conn->session->lock);
+			break;
+		}
+
+		if (tcp_conn->in.datalen) {
+			struct iscsi_tcp_task *tcp_task = task->dd_data;
+			struct hash_desc *rx_hash = NULL;
+			struct scsi_data_buffer *sdb = scsi_in(task->sc);
+
+			/*
+			 * Setup copy of Data-In into the Scsi_Cmnd
+			 * Scatterlist case:
+			 * We set up the iscsi_segment to point to the next
+			 * scatterlist entry to copy to. As we go along,
+			 * we move on to the next scatterlist entry and
+			 * update the digest per-entry.
+			 */
+			if (conn->datadgst_en &&
+			    !(conn->session->tt->caps & CAP_DIGEST_OFFLOAD))
+				rx_hash = tcp_conn->rx_hash;
+
+			debug_tcp("iscsi_tcp_begin_data_in(%p, offset=%d, "
+				  "datalen=%d)\n", tcp_conn,
+				  tcp_task->data_offset,
+				  tcp_conn->in.datalen);
+			rc = iscsi_segment_seek_sg(&tcp_conn->in.segment,
+						   sdb->table.sgl,
+						   sdb->table.nents,
+						   tcp_task->data_offset,
+						   tcp_conn->in.datalen,
+						   iscsi_tcp_process_data_in,
+						   rx_hash);
+			spin_unlock(&conn->session->lock);
+			return rc;
+		}
+		rc = __iscsi_complete_pdu(conn, hdr, NULL, 0);
+		spin_unlock(&conn->session->lock);
+		break;
+	case ISCSI_OP_SCSI_CMD_RSP:
+		if (tcp_conn->in.datalen) {
+			iscsi_tcp_data_recv_prep(tcp_conn);
+			return 0;
+		}
+		rc = iscsi_complete_pdu(conn, hdr, NULL, 0);
+		break;
+	case ISCSI_OP_R2T:
+		spin_lock(&conn->session->lock);
+		task = iscsi_itt_to_ctask(conn, hdr->itt);
+		if (!task)
+			rc = ISCSI_ERR_BAD_ITT;
+		else if (ahslen)
+			rc = ISCSI_ERR_AHSLEN;
+		else if (task->sc->sc_data_direction == DMA_TO_DEVICE)
+			rc = iscsi_tcp_r2t_rsp(conn, task);
+		else
+			rc = ISCSI_ERR_PROTO;
+		spin_unlock(&conn->session->lock);
+		break;
+	case ISCSI_OP_LOGIN_RSP:
+	case ISCSI_OP_TEXT_RSP:
+	case ISCSI_OP_REJECT:
+	case ISCSI_OP_ASYNC_EVENT:
+		/*
+		 * It is possible that we could get a PDU with a buffer larger
+		 * than 8K, but there are no targets that currently do this.
+		 * For now we fail until we find a vendor that needs it
+		 */
+		if (ISCSI_DEF_MAX_RECV_SEG_LEN < tcp_conn->in.datalen) {
+			iscsi_conn_printk(KERN_ERR, conn,
+					  "iscsi_tcp: received buffer of "
+					  "len %u but conn buffer is only %u "
+					  "(opcode %0x)\n",
+					  tcp_conn->in.datalen,
+					  ISCSI_DEF_MAX_RECV_SEG_LEN, opcode);
+			rc = ISCSI_ERR_PROTO;
+			break;
+		}
+
+		/* If there's data coming in with the response,
+		 * receive it to the connection's buffer.
+		 */
+		if (tcp_conn->in.datalen) {
+			iscsi_tcp_data_recv_prep(tcp_conn);
+			return 0;
+		}
+	/* fall through */
+	case ISCSI_OP_LOGOUT_RSP:
+	case ISCSI_OP_NOOP_IN:
+	case ISCSI_OP_SCSI_TMFUNC_RSP:
+		rc = iscsi_complete_pdu(conn, hdr, NULL, 0);
+		break;
+	default:
+		rc = ISCSI_ERR_BAD_OPCODE;
+		break;
+	}
+
+	if (rc == 0) {
+		/* Anything that comes with data should have
+		 * been handled above. */
+		if (tcp_conn->in.datalen)
+			return ISCSI_ERR_PROTO;
+		iscsi_tcp_hdr_recv_prep(tcp_conn);
+	}
+
+	return rc;
+}
+
+/**
+ * iscsi_tcp_hdr_recv_done - process PDU header
+ *
+ * This is the callback invoked when the PDU header has
+ * been received. If the header is followed by additional
+ * header segments, we go back for more data.
+ */
+static int
+iscsi_tcp_hdr_recv_done(struct iscsi_tcp_conn *tcp_conn,
+			struct iscsi_segment *segment)
+{
+	struct iscsi_conn *conn = tcp_conn->iscsi_conn;
+	struct iscsi_hdr *hdr;
+
+	/* Check if there are additional header segments
+	 * *prior* to computing the digest, because we
+	 * may need to go back to the caller for more.
+	 */
+	hdr = (struct iscsi_hdr *) tcp_conn->in.hdr_buf;
+	if (segment->copied == sizeof(struct iscsi_hdr) && hdr->hlength) {
+		/* Bump the header length - the caller will
+		 * just loop around and get the AHS for us, and
+		 * call again. */
+		unsigned int ahslen = hdr->hlength << 2;
+
+		/* Make sure we don't overflow */
+		if (sizeof(*hdr) + ahslen > sizeof(tcp_conn->in.hdr_buf))
+			return ISCSI_ERR_AHSLEN;
+
+		segment->total_size += ahslen;
+		segment->size += ahslen;
+		return 0;
+	}
+
+	/* We're done processing the header. See if we're doing
+	 * header digests; if so, set up the recv_digest buffer
+	 * and go back for more. */
+	if (conn->hdrdgst_en) {
+		if (segment->digest_len == 0) {
+			/*
+			 * Even if we offload the digest processing we
+			 * splice it in so we can increment the skb/segment
+			 * counters in preparation for the data segment.
+			 */
+			iscsi_tcp_segment_splice_digest(segment,
+							segment->recv_digest);
+			return 0;
+		}
+
+		if (!(conn->session->tt->caps & CAP_DIGEST_OFFLOAD)) {
+			iscsi_tcp_dgst_header(tcp_conn->rx_hash, hdr,
+				segment->total_copied - ISCSI_DIGEST_SIZE,
+				segment->digest);
+
+			if (!iscsi_tcp_dgst_verify(tcp_conn, segment))
+				return ISCSI_ERR_HDR_DGST;
+		}
+	}
+
+	tcp_conn->in.hdr = hdr;
+	return iscsi_tcp_hdr_dissect(conn, hdr);
+}
+
+/**
+ * iscsi_tcp_recv_segment_is_hdr - tests if we are reading in a header
+ * @tcp_conn: iscsi tcp conn
+ *
+ * returns non zero if we are currently processing or setup to process
+ * a header.
+ */
+inline int iscsi_tcp_recv_segment_is_hdr(struct iscsi_tcp_conn *tcp_conn)
+{
+	return tcp_conn->in.segment.done == iscsi_tcp_hdr_recv_done;
+}
+EXPORT_SYMBOL_GPL(iscsi_tcp_recv_segment_is_hdr);
+
+/**
+ * iscsi_tcp_recv_skb - Process skb
+ * @conn: iscsi connection
+ * @skb: network buffer with header and/or data segment
+ * @offset: offset in skb
+ * @offload: bool indicating if transfer was offloaded
+ *
+ * Will return status of transfer in status. And will return
+ * number of bytes copied.
+ */
+int iscsi_tcp_recv_skb(struct iscsi_conn *conn, struct sk_buff *skb,
+		       unsigned int offset, bool offloaded, int *status)
+{
+	struct iscsi_tcp_conn *tcp_conn = conn->dd_data;
+	struct iscsi_segment *segment = &tcp_conn->in.segment;
+	struct skb_seq_state seq;
+	unsigned int consumed = 0;
+	int rc = 0;
+
+	debug_tcp("in %d bytes\n", skb->len - offset);
+
+	if (unlikely(conn->suspend_rx)) {
+		debug_tcp("conn %d Rx suspended!\n", conn->id);
+		*status = ISCSI_TCP_SUSPENDED;
+		return 0;
+	}
+
+	if (offloaded) {
+		segment->total_copied = segment->total_size;
+		goto segment_done;
+	}
+
+	skb_prepare_seq_read(skb, offset, skb->len, &seq);
+	while (1) {
+		unsigned int avail;
+		const u8 *ptr;
+
+		avail = skb_seq_read(consumed, &ptr, &seq);
+		if (avail == 0) {
+			debug_tcp("no more data avail. Consumed %d\n",
+				  consumed);
+			*status = ISCSI_TCP_SKB_DONE;
+			skb_abort_seq_read(&seq);
+			goto skb_done;
+		}
+		BUG_ON(segment->copied >= segment->size);
+
+		debug_tcp("skb %p ptr=%p avail=%u\n", skb, ptr, avail);
+		rc = iscsi_tcp_segment_recv(tcp_conn, segment, ptr, avail);
+		BUG_ON(rc == 0);
+		consumed += rc;
+
+		if (segment->total_copied >= segment->total_size) {
+			skb_abort_seq_read(&seq);
+			goto segment_done;
+		}
+	}
+
+segment_done:
+	*status = ISCSI_TCP_SEGMENT_DONE;
+	debug_tcp("segment done\n");
+	rc = segment->done(tcp_conn, segment);
+	if (rc != 0) {
+		*status = ISCSI_TCP_CONN_ERR;
+		debug_tcp("Error receiving PDU, errno=%d\n", rc);
+		iscsi_conn_failure(conn, rc);
+		return 0;
+	}
+	/* The done() functions sets up the next segment. */
+
+skb_done:
+	conn->rxdata_octets += consumed;
+	return consumed;
+}
+EXPORT_SYMBOL_GPL(iscsi_tcp_recv_skb);
+
+/**
+ * iscsi_tcp_task_init - Initialize iSCSI SCSI_READ or SCSI_WRITE commands
+ * @conn: iscsi connection
+ * @task: scsi command task
+ * @sc: scsi command
+ */
+int iscsi_tcp_task_init(struct iscsi_task *task)
+{
+	struct iscsi_tcp_task *tcp_task = task->dd_data;
+	struct iscsi_conn *conn = task->conn;
+	struct scsi_cmnd *sc = task->sc;
+	int err;
+
+	if (!sc) {
+		/*
+		 * mgmt tasks do not have a scatterlist since they come
+		 * in from the iscsi interface.
+		 */
+		debug_scsi("mtask deq [cid %d itt 0x%x]\n", conn->id,
+			   task->itt);
+
+		return conn->session->tt->init_pdu(task, 0, task->data_count);
+	}
+
+	BUG_ON(__kfifo_len(tcp_task->r2tqueue));
+	tcp_task->exp_datasn = 0;
+
+	/* Prepare PDU, optionally w/ immediate data */
+	debug_scsi("task deq [cid %d itt 0x%x imm %d unsol %d]\n",
+		    conn->id, task->itt, task->imm_count,
+		    task->unsol_r2t.data_length);
+
+	err = conn->session->tt->init_pdu(task, 0, task->imm_count);
+	if (err)
+		return err;
+	task->imm_count = 0;
+	return 0;
+}
+EXPORT_SYMBOL_GPL(iscsi_tcp_task_init);
+
+static struct iscsi_r2t_info *iscsi_tcp_get_curr_r2t(struct iscsi_task *task)
+{
+	struct iscsi_session *session = task->conn->session;
+	struct iscsi_tcp_task *tcp_task = task->dd_data;
+	struct iscsi_r2t_info *r2t = NULL;
+
+	if (iscsi_task_has_unsol_data(task))
+		r2t = &task->unsol_r2t;
+	else {
+		spin_lock_bh(&session->lock);
+		if (tcp_task->r2t) {
+			r2t = tcp_task->r2t;
+			/* Continue with this R2T? */
+			if (r2t->data_length <= r2t->sent) {
+				debug_scsi("  done with r2t %p\n", r2t);
+				__kfifo_put(tcp_task->r2tpool.queue,
+					    (void *)&tcp_task->r2t,
+					    sizeof(void *));
+				tcp_task->r2t = r2t = NULL;
+			}
+		}
+
+		if (r2t == NULL) {
+			__kfifo_get(tcp_task->r2tqueue,
+				    (void *)&tcp_task->r2t, sizeof(void *));
+			r2t = tcp_task->r2t;
+		}
+		spin_unlock_bh(&session->lock);
+	}
+
+	return r2t;
+}
+
+/**
+ * iscsi_tcp_task_xmit - xmit normal PDU task
+ * @task: iscsi command task
+ *
+ * We're expected to return 0 when everything was transmitted succesfully,
+ * -EAGAIN if there's still data in the queue, or != 0 for any other kind
+ * of error.
+ */
+int iscsi_tcp_task_xmit(struct iscsi_task *task)
+{
+	struct iscsi_conn *conn = task->conn;
+	struct iscsi_session *session = conn->session;
+	struct iscsi_r2t_info *r2t;
+	int rc = 0;
+
+flush:
+	/* Flush any pending data first. */
+	rc = session->tt->xmit_pdu(task);
+	if (rc < 0)
+		return rc;
+
+	/* mgmt command */
+	if (!task->sc) {
+		if (task->hdr->itt == RESERVED_ITT)
+			iscsi_put_task(task);
+		return 0;
+	}
+
+	/* Are we done already? */
+	if (task->sc->sc_data_direction != DMA_TO_DEVICE)
+		return 0;
+
+	r2t = iscsi_tcp_get_curr_r2t(task);
+	if (r2t == NULL) {
+		/* Waiting for more R2Ts to arrive. */
+		debug_tcp("no R2Ts yet\n");
+		return 0;
+	}
+
+	rc = conn->session->tt->alloc_pdu(task);
+	if (rc)
+		return rc;
+	iscsi_prep_data_out_pdu(task, r2t, (struct iscsi_data *) task->hdr);
+
+	debug_scsi("sol dout %p [dsn %d itt 0x%x doff %d dlen %d]\n",
+		   r2t, r2t->datasn - 1, task->hdr->itt,
+		   r2t->data_offset + r2t->sent, r2t->data_count);
+
+	rc = conn->session->tt->init_pdu(task, r2t->data_offset + r2t->sent,
+					 r2t->data_count);
+	if (rc)
+		return rc;
+	r2t->sent += r2t->data_count;
+	goto flush;
+}
+EXPORT_SYMBOL_GPL(iscsi_tcp_task_xmit);
+
+struct iscsi_cls_conn *
+iscsi_tcp_conn_setup(struct iscsi_cls_session *cls_session, int dd_data_size,
+		      uint32_t conn_idx)
+
+{
+	struct iscsi_conn *conn;
+	struct iscsi_cls_conn *cls_conn;
+	struct iscsi_tcp_conn *tcp_conn;
+
+	cls_conn = iscsi_conn_setup(cls_session, sizeof(*tcp_conn), conn_idx);
+	if (!cls_conn)
+		return NULL;
+	conn = cls_conn->dd_data;
+	/*
+	 * due to strange issues with iser these are not set
+	 * in iscsi_conn_setup
+	 */
+	conn->max_recv_dlength = ISCSI_DEF_MAX_RECV_SEG_LEN;
+
+	tcp_conn = conn->dd_data;
+	tcp_conn->iscsi_conn = conn;
+
+	tcp_conn->dd_data = kzalloc(dd_data_size, GFP_KERNEL);
+	if (!tcp_conn->dd_data) {
+		iscsi_conn_teardown(cls_conn);
+		return NULL;
+	}
+	return cls_conn;
+}
+EXPORT_SYMBOL_GPL(iscsi_tcp_conn_setup);
+
+void iscsi_tcp_conn_teardown(struct iscsi_cls_conn *cls_conn)
+{
+	struct iscsi_conn *conn = cls_conn->dd_data;
+	struct iscsi_tcp_conn *tcp_conn = conn->dd_data;
+
+	kfree(tcp_conn->dd_data);
+	iscsi_conn_teardown(cls_conn);
+}
+EXPORT_SYMBOL_GPL(iscsi_tcp_conn_teardown);
+
+int iscsi_tcp_r2tpool_alloc(struct iscsi_session *session)
+{
+	int i;
+	int cmd_i;
+
+	/*
+	 * initialize per-task: R2T pool and xmit queue
+	 */
+	for (cmd_i = 0; cmd_i < session->cmds_max; cmd_i++) {
+	        struct iscsi_task *task = session->cmds[cmd_i];
+		struct iscsi_tcp_task *tcp_task = task->dd_data;
+
+		/*
+		 * pre-allocated x2 as much r2ts to handle race when
+		 * target acks DataOut faster than we data_xmit() queues
+		 * could replenish r2tqueue.
+		 */
+
+		/* R2T pool */
+		if (iscsi_pool_init(&tcp_task->r2tpool,
+				    session->max_r2t * 2, NULL,
+				    sizeof(struct iscsi_r2t_info))) {
+			goto r2t_alloc_fail;
+		}
+
+		/* R2T xmit queue */
+		tcp_task->r2tqueue = kfifo_alloc(
+		      session->max_r2t * 4 * sizeof(void*), GFP_KERNEL, NULL);
+		if (tcp_task->r2tqueue == ERR_PTR(-ENOMEM)) {
+			iscsi_pool_free(&tcp_task->r2tpool);
+			goto r2t_alloc_fail;
+		}
+	}
+
+	return 0;
+
+r2t_alloc_fail:
+	for (i = 0; i < cmd_i; i++) {
+		struct iscsi_task *task = session->cmds[i];
+		struct iscsi_tcp_task *tcp_task = task->dd_data;
+
+		kfifo_free(tcp_task->r2tqueue);
+		iscsi_pool_free(&tcp_task->r2tpool);
+	}
+	return -ENOMEM;
+}
+EXPORT_SYMBOL_GPL(iscsi_tcp_r2tpool_alloc);
+
+void iscsi_tcp_r2tpool_free(struct iscsi_session *session)
+{
+	int i;
+
+	for (i = 0; i < session->cmds_max; i++) {
+		struct iscsi_task *task = session->cmds[i];
+		struct iscsi_tcp_task *tcp_task = task->dd_data;
+
+		kfifo_free(tcp_task->r2tqueue);
+		iscsi_pool_free(&tcp_task->r2tpool);
+	}
+}
+EXPORT_SYMBOL_GPL(iscsi_tcp_r2tpool_free);
+
+void iscsi_tcp_conn_get_stats(struct iscsi_cls_conn *cls_conn,
+			      struct iscsi_stats *stats)
+{
+	struct iscsi_conn *conn = cls_conn->dd_data;
+
+	stats->txdata_octets = conn->txdata_octets;
+	stats->rxdata_octets = conn->rxdata_octets;
+	stats->scsicmd_pdus = conn->scsicmd_pdus_cnt;
+	stats->dataout_pdus = conn->dataout_pdus_cnt;
+	stats->scsirsp_pdus = conn->scsirsp_pdus_cnt;
+	stats->datain_pdus = conn->datain_pdus_cnt;
+	stats->r2t_pdus = conn->r2t_pdus_cnt;
+	stats->tmfcmd_pdus = conn->tmfcmd_pdus_cnt;
+	stats->tmfrsp_pdus = conn->tmfrsp_pdus_cnt;
+}
+EXPORT_SYMBOL_GPL(iscsi_tcp_conn_get_stats);
diff --git a/include/scsi/libiscsi_tcp.h b/include/scsi/libiscsi_tcp.h
new file mode 100644
index 000000000000..e6bf8ef276bb
--- /dev/null
+++ b/include/scsi/libiscsi_tcp.h
@@ -0,0 +1,131 @@
+/*
+ * iSCSI over TCP/IP Data-Path lib
+ *
+ * Copyright (C) 2008 Mike Christie
+ * Copyright (C) 2008 Red Hat, Inc.  All rights reserved.
+ * maintained by open-iscsi@googlegroups.com
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published
+ * by the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * See the file COPYING included with this distribution for more details.
+ */
+
+#ifndef LIBISCSI_TCP_H
+#define LIBISCSI_TCP_H
+
+#include <scsi/libiscsi.h>
+
+struct iscsi_tcp_conn;
+struct iscsi_segment;
+struct sk_buff;
+struct hash_desc;
+
+typedef int iscsi_segment_done_fn_t(struct iscsi_tcp_conn *,
+				    struct iscsi_segment *);
+
+struct iscsi_segment {
+	unsigned char		*data;
+	unsigned int		size;
+	unsigned int		copied;
+	unsigned int		total_size;
+	unsigned int		total_copied;
+
+	struct hash_desc	*hash;
+	unsigned char		recv_digest[ISCSI_DIGEST_SIZE];
+	unsigned char		digest[ISCSI_DIGEST_SIZE];
+	unsigned int		digest_len;
+
+	struct scatterlist	*sg;
+	void			*sg_mapped;
+	unsigned int		sg_offset;
+
+	iscsi_segment_done_fn_t	*done;
+};
+
+/* Socket connection recieve helper */
+struct iscsi_tcp_recv {
+	struct iscsi_hdr	*hdr;
+	struct iscsi_segment	segment;
+
+	/* Allocate buffer for BHS + AHS */
+	uint32_t		hdr_buf[64];
+
+	/* copied and flipped values */
+	int			datalen;
+};
+
+struct iscsi_tcp_conn {
+	struct iscsi_conn	*iscsi_conn;
+	void			*dd_data;
+	int			stop_stage;	/* conn_stop() flag: *
+						 * stop to recover,  *
+						 * stop to terminate */
+	/* control data */
+	struct iscsi_tcp_recv	in;		/* TCP receive context */
+	/* CRC32C (Rx) LLD should set this is they do not offload */
+	struct hash_desc	*rx_hash;
+};
+
+struct iscsi_tcp_task {
+	uint32_t		exp_datasn;	/* expected target's R2TSN/DataSN */
+	int			data_offset;
+	struct iscsi_r2t_info	*r2t;		/* in progress solict R2T */
+	struct iscsi_pool	r2tpool;
+	struct kfifo		*r2tqueue;
+	void			*dd_data;
+};
+
+enum {
+	ISCSI_TCP_SEGMENT_DONE,		/* curr seg has been processed */
+	ISCSI_TCP_SKB_DONE,		/* skb is out of data */
+	ISCSI_TCP_CONN_ERR,		/* iscsi layer has fired a conn err */
+	ISCSI_TCP_SUSPENDED,		/* conn is suspended */
+};
+
+extern void iscsi_tcp_hdr_recv_prep(struct iscsi_tcp_conn *tcp_conn);
+extern int iscsi_tcp_recv_skb(struct iscsi_conn *conn, struct sk_buff *skb,
+			      unsigned int offset, bool offloaded, int *status);
+extern void iscsi_tcp_cleanup_task(struct iscsi_task *task);
+extern int iscsi_tcp_task_init(struct iscsi_task *task);
+extern int iscsi_tcp_task_xmit(struct iscsi_task *task);
+
+/* segment helpers */
+extern int iscsi_tcp_recv_segment_is_hdr(struct iscsi_tcp_conn *tcp_conn);
+extern int iscsi_tcp_segment_done(struct iscsi_segment *segment, int recv,
+				  unsigned copied);
+extern void iscsi_tcp_segment_unmap(struct iscsi_segment *segment);
+
+extern void iscsi_segment_init_linear(struct iscsi_segment *segment,
+				      void *data, size_t size,
+				      iscsi_segment_done_fn_t *done,
+				      struct hash_desc *hash);
+extern int
+iscsi_segment_seek_sg(struct iscsi_segment *segment,
+		      struct scatterlist *sg_list, unsigned int sg_count,
+		      unsigned int offset, size_t size,
+		      iscsi_segment_done_fn_t *done, struct hash_desc *hash);
+
+/* digest helpers */
+extern void iscsi_tcp_dgst_header(struct hash_desc *hash, const void *hdr,
+				  size_t hdrlen,
+				  unsigned char digest[ISCSI_DIGEST_SIZE]);
+extern struct iscsi_cls_conn *
+iscsi_tcp_conn_setup(struct iscsi_cls_session *cls_session, int dd_data_size,
+		     uint32_t conn_idx);
+extern void iscsi_tcp_conn_teardown(struct iscsi_cls_conn *cls_conn);
+
+/* misc helpers */
+extern int iscsi_tcp_r2tpool_alloc(struct iscsi_session *session);
+extern void iscsi_tcp_r2tpool_free(struct iscsi_session *session);
+
+extern void iscsi_tcp_conn_get_stats(struct iscsi_cls_conn *cls_conn,
+				     struct iscsi_stats *stats);
+#endif /* LIBISCSI_TCP_H */
-- 
cgit v1.2.3


From 262ef63627977acb7d8dd38c4f0f290bf49fbbfd Mon Sep 17 00:00:00 2001
From: Mike Christie <michaelc@cs.wisc.edu>
Date: Tue, 2 Dec 2008 00:32:13 -0600
Subject: [SCSI] libiscsi: allow drivers to modify the itt sent to the target

bnx2i and cxgb3i need to encode LLD info in the itt so that
the firmware/hardware can process the pdu. This patch allows
the LLDs to encode info in the task->hdr->itt that they
setup in the alloc_pdu callout (any resources that are allocated
can be freed with the pdu in the cleanup_task callout). If
the LLD encodes info in the itt they should implement a
parse_pdu_itt callout. If parse_pdu_itt is not implemented
libiscsi will do the right thing for the LLD.

Signed-off-by: Mike Christie <michaelc@cs.wisc.edu>
Signed-off-by: James Bottomley <James.Bottomley@HansenPartnership.com>
---
 drivers/scsi/libiscsi.c             | 47 +++++++++++++++++++++++++++----------
 include/scsi/libiscsi.h             |  5 ++--
 include/scsi/scsi_transport_iscsi.h |  3 +++
 3 files changed, 40 insertions(+), 15 deletions(-)

(limited to 'include')

diff --git a/drivers/scsi/libiscsi.c b/drivers/scsi/libiscsi.c
index c1bb74052aed..6c4034b1561c 100644
--- a/drivers/scsi/libiscsi.c
+++ b/drivers/scsi/libiscsi.c
@@ -222,12 +222,14 @@ static int iscsi_prep_scsi_cmd_pdu(struct iscsi_task *task)
 	struct scsi_cmnd *sc = task->sc;
 	struct iscsi_cmd *hdr;
 	unsigned hdrlength, cmd_len;
+	itt_t itt;
 	int rc;
 
 	rc = conn->session->tt->alloc_pdu(task);
 	if (rc)
 		return rc;
 	hdr = (struct iscsi_cmd *) task->hdr;
+	itt = hdr->itt;
 	memset(hdr, 0, sizeof(*hdr));
 
 	task->hdr_len = 0;
@@ -238,7 +240,11 @@ static int iscsi_prep_scsi_cmd_pdu(struct iscsi_task *task)
 	hdr->flags = ISCSI_ATTR_SIMPLE;
 	int_to_scsilun(sc->device->lun, (struct scsi_lun *)hdr->lun);
 	memcpy(task->lun, hdr->lun, sizeof(task->lun));
-	hdr->itt = task->hdr_itt = build_itt(task->itt, session->age);
+	if (session->tt->parse_pdu_itt)
+		hdr->itt = task->hdr_itt = itt;
+	else
+		hdr->itt = task->hdr_itt = build_itt(task->itt,
+						     task->conn->session->age);
 	hdr->cmdsn = task->cmdsn = cpu_to_be32(session->cmdsn);
 	session->cmdsn++;
 	hdr->exp_statsn = cpu_to_be32(conn->exp_statsn);
@@ -457,7 +463,6 @@ static int iscsi_prep_mgmt_task(struct iscsi_conn *conn,
 	 */
 	nop->cmdsn = cpu_to_be32(session->cmdsn);
 	if (hdr->itt != RESERVED_ITT) {
-		hdr->itt = build_itt(task->itt, session->age);
 		/*
 		 * TODO: We always use immediate, so we never hit this.
 		 * If we start to send tmfs or nops as non-immediate then
@@ -490,6 +495,7 @@ __iscsi_conn_send_pdu(struct iscsi_conn *conn, struct iscsi_hdr *hdr,
 {
 	struct iscsi_session *session = conn->session;
 	struct iscsi_task *task;
+	itt_t itt;
 
 	if (session->state == ISCSI_STATE_TERMINATE)
 		return NULL;
@@ -531,9 +537,18 @@ __iscsi_conn_send_pdu(struct iscsi_conn *conn, struct iscsi_hdr *hdr,
 				 "pdu for mgmt task.\n");
 		goto requeue_task;
 	}
+	itt = task->hdr->itt;
 	task->hdr_len = sizeof(struct iscsi_hdr);
-
 	memcpy(task->hdr, hdr, sizeof(struct iscsi_hdr));
+
+	if (hdr->itt != RESERVED_ITT) {
+		if (session->tt->parse_pdu_itt)
+			task->hdr->itt = itt;
+		else
+			task->hdr->itt = build_itt(task->itt,
+						   task->conn->session->age);
+	}
+
 	INIT_LIST_HEAD(&task->running);
 	list_add_tail(&task->running, &conn->mgmtqueue);
 
@@ -745,7 +760,6 @@ static int iscsi_handle_reject(struct iscsi_conn *conn, struct iscsi_hdr *hdr,
 {
 	struct iscsi_reject *reject = (struct iscsi_reject *)hdr;
 	struct iscsi_hdr rejected_pdu;
-	uint32_t itt;
 
 	conn->exp_statsn = be32_to_cpu(reject->statsn) + 1;
 
@@ -755,10 +769,9 @@ static int iscsi_handle_reject(struct iscsi_conn *conn, struct iscsi_hdr *hdr,
 
 		if (ntoh24(reject->dlength) >= sizeof(struct iscsi_hdr)) {
 			memcpy(&rejected_pdu, data, sizeof(struct iscsi_hdr));
-			itt = get_itt(rejected_pdu.itt);
 			iscsi_conn_printk(KERN_ERR, conn,
-					  "itt 0x%x had pdu (op 0x%x) rejected "
-					  "due to DataDigest error.\n", itt,
+					  "pdu (op 0x%x) rejected "
+					  "due to DataDigest error.\n",
 					  rejected_pdu.opcode);
 		}
 	}
@@ -778,12 +791,15 @@ static int iscsi_handle_reject(struct iscsi_conn *conn, struct iscsi_hdr *hdr,
 static struct iscsi_task *iscsi_itt_to_task(struct iscsi_conn *conn, itt_t itt)
 {
 	struct iscsi_session *session = conn->session;
-	uint32_t i;
+	int i;
 
 	if (itt == RESERVED_ITT)
 		return NULL;
 
-	i = get_itt(itt);
+	if (session->tt->parse_pdu_itt)
+		session->tt->parse_pdu_itt(conn, itt, &i, NULL);
+	else
+		i = get_itt(itt);
 	if (i >= session->cmds_max)
 		return NULL;
 
@@ -958,20 +974,25 @@ EXPORT_SYMBOL_GPL(iscsi_complete_pdu);
 int iscsi_verify_itt(struct iscsi_conn *conn, itt_t itt)
 {
 	struct iscsi_session *session = conn->session;
-	uint32_t i;
+	int age = 0, i = 0;
 
 	if (itt == RESERVED_ITT)
 		return 0;
 
-	if (((__force u32)itt & ISCSI_AGE_MASK) !=
-	    (session->age << ISCSI_AGE_SHIFT)) {
+	if (session->tt->parse_pdu_itt)
+		session->tt->parse_pdu_itt(conn, itt, &i, &age);
+	else {
+		i = get_itt(itt);
+		age = ((__force u32)itt >> ISCSI_AGE_SHIFT) & ISCSI_AGE_MASK;
+	}
+
+	if (age != session->age) {
 		iscsi_conn_printk(KERN_ERR, conn,
 				  "received itt %x expected session age (%x)\n",
 				  (__force u32)itt, session->age);
 		return ISCSI_ERR_BAD_ITT;
 	}
 
-	i = get_itt(itt);
 	if (i >= session->cmds_max) {
 		iscsi_conn_printk(KERN_ERR, conn,
 				  "received invalid itt index %u (max cmds "
diff --git a/include/scsi/libiscsi.h b/include/scsi/libiscsi.h
index 51500573c0b8..7360e1916e75 100644
--- a/include/scsi/libiscsi.h
+++ b/include/scsi/libiscsi.h
@@ -30,6 +30,7 @@
 #include <linux/workqueue.h>
 #include <scsi/iscsi_proto.h>
 #include <scsi/iscsi_if.h>
+#include <scsi/scsi_transport_iscsi.h>
 
 struct scsi_transport_template;
 struct scsi_host_template;
@@ -70,12 +71,12 @@ enum {
 /* Connection suspend "bit" */
 #define ISCSI_SUSPEND_BIT		1
 
-#define ISCSI_ITT_MASK			(0x1fff)
+#define ISCSI_ITT_MASK			0x1fff
 #define ISCSI_TOTAL_CMDS_MAX		4096
 /* this must be a power of two greater than ISCSI_MGMT_CMDS_MAX */
 #define ISCSI_TOTAL_CMDS_MIN		16
 #define ISCSI_AGE_SHIFT			28
-#define ISCSI_AGE_MASK			(0xf << ISCSI_AGE_SHIFT)
+#define ISCSI_AGE_MASK			0xf
 
 #define ISCSI_ADDRESS_BUF_LEN		64
 
diff --git a/include/scsi/scsi_transport_iscsi.h b/include/scsi/scsi_transport_iscsi.h
index c928234c018f..e13cb20f1eae 100644
--- a/include/scsi/scsi_transport_iscsi.h
+++ b/include/scsi/scsi_transport_iscsi.h
@@ -122,6 +122,9 @@ struct iscsi_transport {
 	int (*xmit_pdu) (struct iscsi_task *task);
 	int (*init_pdu) (struct iscsi_task *task, unsigned int offset,
 			 unsigned int count);
+	void (*parse_pdu_itt) (struct iscsi_conn *conn, itt_t itt,
+			       int *index, int *age);
+
 	void (*session_recovery_timedout) (struct iscsi_cls_session *session);
 	struct iscsi_endpoint *(*ep_connect) (struct sockaddr *dst_addr,
 					      int non_blocking);
-- 
cgit v1.2.3


From 2ff79d52d56eebcffd83e9327b89d7daedf1e897 Mon Sep 17 00:00:00 2001
From: Mike Christie <michaelc@cs.wisc.edu>
Date: Tue, 2 Dec 2008 00:32:14 -0600
Subject: [SCSI] libiscsi: pass opcode into alloc_pdu callout

We do not need to allocate a itt for data_out, so this
passes the opcode to the alloc_pdu callout.

Signed-off-by: Mike Christie <michaelc@cs.wisc.edu>
Signed-off-by: James Bottomley <James.Bottomley@HansenPartnership.com>
---
 drivers/infiniband/ulp/iser/iscsi_iser.c |  2 +-
 drivers/scsi/iscsi_tcp.c                 |  2 +-
 drivers/scsi/libiscsi.c                  | 14 +++++++-------
 drivers/scsi/libiscsi_tcp.c              |  2 +-
 include/scsi/scsi_transport_iscsi.h      |  2 +-
 5 files changed, 11 insertions(+), 11 deletions(-)

(limited to 'include')

diff --git a/drivers/infiniband/ulp/iser/iscsi_iser.c b/drivers/infiniband/ulp/iser/iscsi_iser.c
index 78bf5e4538cb..12876392516e 100644
--- a/drivers/infiniband/ulp/iser/iscsi_iser.c
+++ b/drivers/infiniband/ulp/iser/iscsi_iser.c
@@ -119,7 +119,7 @@ error:
 	iscsi_conn_failure(conn, rc);
 }
 
-static int iscsi_iser_pdu_alloc(struct iscsi_task *task)
+static int iscsi_iser_pdu_alloc(struct iscsi_task *task, uint8_t opcode)
 {
 	struct iscsi_iser_task *iser_task = task->dd_data;
 
diff --git a/drivers/scsi/iscsi_tcp.c b/drivers/scsi/iscsi_tcp.c
index 07d9eb14721b..de5c9b3e51fb 100644
--- a/drivers/scsi/iscsi_tcp.c
+++ b/drivers/scsi/iscsi_tcp.c
@@ -457,7 +457,7 @@ static int iscsi_sw_tcp_pdu_init(struct iscsi_task *task,
 	return 0;
 }
 
-static int iscsi_sw_tcp_pdu_alloc(struct iscsi_task *task)
+static int iscsi_sw_tcp_pdu_alloc(struct iscsi_task *task, uint8_t opcode)
 {
 	struct iscsi_tcp_task *tcp_task = task->dd_data;
 
diff --git a/drivers/scsi/libiscsi.c b/drivers/scsi/libiscsi.c
index 6c4034b1561c..ddf53978f0be 100644
--- a/drivers/scsi/libiscsi.c
+++ b/drivers/scsi/libiscsi.c
@@ -225,13 +225,18 @@ static int iscsi_prep_scsi_cmd_pdu(struct iscsi_task *task)
 	itt_t itt;
 	int rc;
 
-	rc = conn->session->tt->alloc_pdu(task);
+	rc = conn->session->tt->alloc_pdu(task, ISCSI_OP_SCSI_CMD);
 	if (rc)
 		return rc;
 	hdr = (struct iscsi_cmd *) task->hdr;
 	itt = hdr->itt;
 	memset(hdr, 0, sizeof(*hdr));
 
+	if (session->tt->parse_pdu_itt)
+		hdr->itt = task->hdr_itt = itt;
+	else
+		hdr->itt = task->hdr_itt = build_itt(task->itt,
+						     task->conn->session->age);
 	task->hdr_len = 0;
 	rc = iscsi_add_hdr(task, sizeof(*hdr));
 	if (rc)
@@ -240,11 +245,6 @@ static int iscsi_prep_scsi_cmd_pdu(struct iscsi_task *task)
 	hdr->flags = ISCSI_ATTR_SIMPLE;
 	int_to_scsilun(sc->device->lun, (struct scsi_lun *)hdr->lun);
 	memcpy(task->lun, hdr->lun, sizeof(task->lun));
-	if (session->tt->parse_pdu_itt)
-		hdr->itt = task->hdr_itt = itt;
-	else
-		hdr->itt = task->hdr_itt = build_itt(task->itt,
-						     task->conn->session->age);
 	hdr->cmdsn = task->cmdsn = cpu_to_be32(session->cmdsn);
 	session->cmdsn++;
 	hdr->exp_statsn = cpu_to_be32(conn->exp_statsn);
@@ -532,7 +532,7 @@ __iscsi_conn_send_pdu(struct iscsi_conn *conn, struct iscsi_hdr *hdr,
 	} else
 		task->data_count = 0;
 
-	if (conn->session->tt->alloc_pdu(task)) {
+	if (conn->session->tt->alloc_pdu(task, hdr->opcode)) {
 		iscsi_conn_printk(KERN_ERR, conn, "Could not allocate "
 				 "pdu for mgmt task.\n");
 		goto requeue_task;
diff --git a/drivers/scsi/libiscsi_tcp.c b/drivers/scsi/libiscsi_tcp.c
index e86508936314..9df6b3436e4a 100644
--- a/drivers/scsi/libiscsi_tcp.c
+++ b/drivers/scsi/libiscsi_tcp.c
@@ -1021,7 +1021,7 @@ flush:
 		return 0;
 	}
 
-	rc = conn->session->tt->alloc_pdu(task);
+	rc = conn->session->tt->alloc_pdu(task, ISCSI_OP_SCSI_DATA_OUT);
 	if (rc)
 		return rc;
 	iscsi_prep_data_out_pdu(task, r2t, (struct iscsi_data *) task->hdr);
diff --git a/include/scsi/scsi_transport_iscsi.h b/include/scsi/scsi_transport_iscsi.h
index e13cb20f1eae..b50aabe2861e 100644
--- a/include/scsi/scsi_transport_iscsi.h
+++ b/include/scsi/scsi_transport_iscsi.h
@@ -118,7 +118,7 @@ struct iscsi_transport {
 	int (*xmit_task) (struct iscsi_task *task);
 	void (*cleanup_task) (struct iscsi_task *task);
 
-	int (*alloc_pdu) (struct iscsi_task *task);
+	int (*alloc_pdu) (struct iscsi_task *task, uint8_t opcode);
 	int (*xmit_pdu) (struct iscsi_task *task);
 	int (*init_pdu) (struct iscsi_task *task, unsigned int offset,
 			 unsigned int count);
-- 
cgit v1.2.3


From 6df19a791bdd5d820cccd8c7a12679888ae62099 Mon Sep 17 00:00:00 2001
From: Mike Christie <michaelc@cs.wisc.edu>
Date: Tue, 2 Dec 2008 00:32:16 -0600
Subject: [SCSI] libiscsi_tcp: support padding offload

cxgb3i does not offload the processing of the header,
but it will always process the padding. This patch
adds a padding offload flag to detect when the LLD
supports this.

The patch also modifies the header processing so that
we do not try to read/bypass the header dugest in the
skb. cxgb3i will not include it with the header like
with other offload cards.

Signed-off-by: Mike Christie <michaelc@cs.wisc.edu>
Signed-off-by: James Bottomley <James.Bottomley@HansenPartnership.com>
---
 drivers/scsi/iscsi_tcp.c    |  9 +++++----
 drivers/scsi/libiscsi_tcp.c | 37 ++++++++++++++++++++-----------------
 include/scsi/iscsi_if.h     |  2 ++
 include/scsi/libiscsi_tcp.h |  3 ++-
 4 files changed, 29 insertions(+), 22 deletions(-)

(limited to 'include')

diff --git a/drivers/scsi/iscsi_tcp.c b/drivers/scsi/iscsi_tcp.c
index de5c9b3e51fb..23808dfe22ba 100644
--- a/drivers/scsi/iscsi_tcp.c
+++ b/drivers/scsi/iscsi_tcp.c
@@ -194,7 +194,7 @@ iscsi_sw_tcp_conn_restore_callbacks(struct iscsi_sw_tcp_conn *tcp_sw_conn)
 
 /**
  * iscsi_sw_tcp_xmit_segment - transmit segment
- * @tcp_sw_conn: the iSCSI TCP connection
+ * @tcp_conn: the iSCSI TCP connection
  * @segment: the buffer to transmnit
  *
  * This function transmits as much of the buffer as
@@ -205,14 +205,15 @@ iscsi_sw_tcp_conn_restore_callbacks(struct iscsi_sw_tcp_conn *tcp_sw_conn)
  * hash as it goes. When the entire segment has been transmitted,
  * it will retrieve the hash value and send it as well.
  */
-static int iscsi_sw_tcp_xmit_segment(struct iscsi_sw_tcp_conn *tcp_sw_conn,
+static int iscsi_sw_tcp_xmit_segment(struct iscsi_tcp_conn *tcp_conn,
 				     struct iscsi_segment *segment)
 {
+	struct iscsi_sw_tcp_conn *tcp_sw_conn = tcp_conn->dd_data;
 	struct socket *sk = tcp_sw_conn->sock;
 	unsigned int copied = 0;
 	int r = 0;
 
-	while (!iscsi_tcp_segment_done(segment, 0, r)) {
+	while (!iscsi_tcp_segment_done(tcp_conn, segment, 0, r)) {
 		struct scatterlist *sg;
 		unsigned int offset, copy;
 		int flags = 0;
@@ -263,7 +264,7 @@ static int iscsi_sw_tcp_xmit(struct iscsi_conn *conn)
 	int rc = 0;
 
 	while (1) {
-		rc = iscsi_sw_tcp_xmit_segment(tcp_sw_conn, segment);
+		rc = iscsi_sw_tcp_xmit_segment(tcp_conn, segment);
 		if (rc < 0) {
 			rc = ISCSI_ERR_XMIT_FAILED;
 			goto error;
diff --git a/drivers/scsi/libiscsi_tcp.c b/drivers/scsi/libiscsi_tcp.c
index 9df6b3436e4a..a745f91d2928 100644
--- a/drivers/scsi/libiscsi_tcp.c
+++ b/drivers/scsi/libiscsi_tcp.c
@@ -159,6 +159,7 @@ iscsi_tcp_segment_splice_digest(struct iscsi_segment *segment, void *digest)
 
 /**
  * iscsi_tcp_segment_done - check whether the segment is complete
+ * @tcp_conn: iscsi tcp connection
  * @segment: iscsi segment to check
  * @recv: set to one of this is called from the recv path
  * @copied: number of bytes copied
@@ -172,7 +173,8 @@ iscsi_tcp_segment_splice_digest(struct iscsi_segment *segment, void *digest)
  *
  * This function must be re-entrant.
  */
-int iscsi_tcp_segment_done(struct iscsi_segment *segment, int recv,
+int iscsi_tcp_segment_done(struct iscsi_tcp_conn *tcp_conn,
+			   struct iscsi_segment *segment, int recv,
 			   unsigned copied)
 {
 	static unsigned char padbuf[ISCSI_PAD_LEN];
@@ -225,13 +227,15 @@ int iscsi_tcp_segment_done(struct iscsi_segment *segment, int recv,
 	}
 
 	/* Do we need to handle padding? */
-	pad = iscsi_padding(segment->total_copied);
-	if (pad != 0) {
-		debug_tcp("consume %d pad bytes\n", pad);
-		segment->total_size += pad;
-		segment->size = pad;
-		segment->data = padbuf;
-		return 0;
+	if (!(tcp_conn->iscsi_conn->session->tt->caps & CAP_PADDING_OFFLOAD)) {
+		pad = iscsi_padding(segment->total_copied);
+		if (pad != 0) {
+			debug_tcp("consume %d pad bytes\n", pad);
+			segment->total_size += pad;
+			segment->size = pad;
+			segment->data = padbuf;
+			return 0;
+		}
 	}
 
 	/*
@@ -273,7 +277,7 @@ iscsi_tcp_segment_recv(struct iscsi_tcp_conn *tcp_conn,
 {
 	unsigned int copy = 0, copied = 0;
 
-	while (!iscsi_tcp_segment_done(segment, 1, copy)) {
+	while (!iscsi_tcp_segment_done(tcp_conn, segment, 1, copy)) {
 		if (copied == len) {
 			debug_tcp("iscsi_tcp_segment_recv copied %d bytes\n",
 				  len);
@@ -794,7 +798,8 @@ iscsi_tcp_hdr_recv_done(struct iscsi_tcp_conn *tcp_conn,
 	/* We're done processing the header. See if we're doing
 	 * header digests; if so, set up the recv_digest buffer
 	 * and go back for more. */
-	if (conn->hdrdgst_en) {
+	if (conn->hdrdgst_en &&
+	    !(conn->session->tt->caps & CAP_DIGEST_OFFLOAD)) {
 		if (segment->digest_len == 0) {
 			/*
 			 * Even if we offload the digest processing we
@@ -806,14 +811,12 @@ iscsi_tcp_hdr_recv_done(struct iscsi_tcp_conn *tcp_conn,
 			return 0;
 		}
 
-		if (!(conn->session->tt->caps & CAP_DIGEST_OFFLOAD)) {
-			iscsi_tcp_dgst_header(tcp_conn->rx_hash, hdr,
-				segment->total_copied - ISCSI_DIGEST_SIZE,
-				segment->digest);
+		iscsi_tcp_dgst_header(tcp_conn->rx_hash, hdr,
+				      segment->total_copied - ISCSI_DIGEST_SIZE,
+				      segment->digest);
 
-			if (!iscsi_tcp_dgst_verify(tcp_conn, segment))
-				return ISCSI_ERR_HDR_DGST;
-		}
+		if (!iscsi_tcp_dgst_verify(tcp_conn, segment))
+			return ISCSI_ERR_HDR_DGST;
 	}
 
 	tcp_conn->in.hdr = hdr;
diff --git a/include/scsi/iscsi_if.h b/include/scsi/iscsi_if.h
index 8e008c96e795..d0ed5226f8c4 100644
--- a/include/scsi/iscsi_if.h
+++ b/include/scsi/iscsi_if.h
@@ -336,6 +336,8 @@ enum iscsi_host_param {
 #define CAP_SENDTARGETS_OFFLOAD	0x400	/* offload discovery process */
 #define CAP_DATA_PATH_OFFLOAD	0x800	/* offload entire IO path */
 #define CAP_DIGEST_OFFLOAD	0x1000	/* offload hdr and data digests */
+#define CAP_PADDING_OFFLOAD	0x2000	/* offload padding insertion, removal,
+					 and verification */
 
 /*
  * These flags describes reason of stop_conn() call
diff --git a/include/scsi/libiscsi_tcp.h b/include/scsi/libiscsi_tcp.h
index e6bf8ef276bb..83e32f6d7859 100644
--- a/include/scsi/libiscsi_tcp.h
+++ b/include/scsi/libiscsi_tcp.h
@@ -99,7 +99,8 @@ extern int iscsi_tcp_task_xmit(struct iscsi_task *task);
 
 /* segment helpers */
 extern int iscsi_tcp_recv_segment_is_hdr(struct iscsi_tcp_conn *tcp_conn);
-extern int iscsi_tcp_segment_done(struct iscsi_segment *segment, int recv,
+extern int iscsi_tcp_segment_done(struct iscsi_tcp_conn *tcp_conn,
+				  struct iscsi_segment *segment, int recv,
 				  unsigned copied);
 extern void iscsi_tcp_segment_unmap(struct iscsi_segment *segment);
 
-- 
cgit v1.2.3


From f4f4e47e4af6b02dd1c425b931c65d0165356e33 Mon Sep 17 00:00:00 2001
From: FUJITA Tomonori <fujita.tomonori@lab.ntt.co.jp>
Date: Thu, 4 Dec 2008 14:24:39 +0900
Subject: [SCSI] add residual argument to scsi_execute and scsi_execute_req

scsi_execute() and scsi_execute_req() discard the residual length
information. Some callers need it. This adds residual argument
(optional) to scsi_execute and scsi_execute_req.

Signed-off-by: FUJITA Tomonori <fujita.tomonori@lab.ntt.co.jp>
Signed-off-by: James Bottomley <James.Bottomley@HansenPartnership.com>
---
 drivers/ata/libata-scsi.c         |  4 ++--
 drivers/scsi/ch.c                 |  2 +-
 drivers/scsi/scsi_ioctl.c         |  2 +-
 drivers/scsi/scsi_lib.c           | 17 +++++++++++------
 drivers/scsi/scsi_scan.c          |  7 ++++---
 drivers/scsi/scsi_transport_spi.c |  3 ++-
 drivers/scsi/sd.c                 | 11 ++++++-----
 drivers/scsi/ses.c                |  9 +++++----
 drivers/scsi/sr.c                 |  4 ++--
 drivers/scsi/sr_ioctl.c           |  2 +-
 include/scsi/scsi_device.h        |  5 +++--
 11 files changed, 38 insertions(+), 28 deletions(-)

(limited to 'include')

diff --git a/drivers/ata/libata-scsi.c b/drivers/ata/libata-scsi.c
index 47c7afcb36f2..b88e443027c4 100644
--- a/drivers/ata/libata-scsi.c
+++ b/drivers/ata/libata-scsi.c
@@ -517,7 +517,7 @@ int ata_cmd_ioctl(struct scsi_device *scsidev, void __user *arg)
 	/* Good values for timeout and retries?  Values below
 	   from scsi_ioctl_send_command() for default case... */
 	cmd_result = scsi_execute(scsidev, scsi_cmd, data_dir, argbuf, argsize,
-				  sensebuf, (10*HZ), 5, 0);
+				  sensebuf, (10*HZ), 5, 0, NULL);
 
 	if (driver_byte(cmd_result) == DRIVER_SENSE) {/* sense data available */
 		u8 *desc = sensebuf + 8;
@@ -603,7 +603,7 @@ int ata_task_ioctl(struct scsi_device *scsidev, void __user *arg)
 	/* Good values for timeout and retries?  Values below
 	   from scsi_ioctl_send_command() for default case... */
 	cmd_result = scsi_execute(scsidev, scsi_cmd, DMA_NONE, NULL, 0,
-				sensebuf, (10*HZ), 5, 0);
+				sensebuf, (10*HZ), 5, 0, NULL);
 
 	if (driver_byte(cmd_result) == DRIVER_SENSE) {/* sense data available */
 		u8 *desc = sensebuf + 8;
diff --git a/drivers/scsi/ch.c b/drivers/scsi/ch.c
index 88ecf94ad979..af9725409f43 100644
--- a/drivers/scsi/ch.c
+++ b/drivers/scsi/ch.c
@@ -190,7 +190,7 @@ ch_do_scsi(scsi_changer *ch, unsigned char *cmd,
 
         result = scsi_execute_req(ch->device, cmd, direction, buffer,
 				  buflength, &sshdr, timeout * HZ,
-				  MAX_RETRIES);
+				  MAX_RETRIES, NULL);
 
 	dprintk("result: 0x%x\n",result);
 	if (driver_byte(result) & DRIVER_SENSE) {
diff --git a/drivers/scsi/scsi_ioctl.c b/drivers/scsi/scsi_ioctl.c
index 2ee49f82ca01..2ae4f8fc5831 100644
--- a/drivers/scsi/scsi_ioctl.c
+++ b/drivers/scsi/scsi_ioctl.c
@@ -94,7 +94,7 @@ static int ioctl_internal_command(struct scsi_device *sdev, char *cmd,
 	SCSI_LOG_IOCTL(1, printk("Trying ioctl with scsi command %d\n", *cmd));
 
 	result = scsi_execute_req(sdev, cmd, DMA_NONE, NULL, 0,
-				  &sshdr, timeout, retries);
+				  &sshdr, timeout, retries, NULL);
 
 	SCSI_LOG_IOCTL(2, printk("Ioctl returned  0x%x\n", result));
 
diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c
index ecfbbd30dce5..f2f51e0333eb 100644
--- a/drivers/scsi/scsi_lib.c
+++ b/drivers/scsi/scsi_lib.c
@@ -183,13 +183,15 @@ int scsi_queue_insert(struct scsi_cmnd *cmd, int reason)
  * @timeout:	request timeout in seconds
  * @retries:	number of times to retry request
  * @flags:	or into request flags;
+ * @resid:	optional residual length
  *
  * returns the req->errors value which is the scsi_cmnd result
  * field.
  */
 int scsi_execute(struct scsi_device *sdev, const unsigned char *cmd,
 		 int data_direction, void *buffer, unsigned bufflen,
-		 unsigned char *sense, int timeout, int retries, int flags)
+		 unsigned char *sense, int timeout, int retries, int flags,
+		 int *resid)
 {
 	struct request *req;
 	int write = (data_direction == DMA_TO_DEVICE);
@@ -224,6 +226,8 @@ int scsi_execute(struct scsi_device *sdev, const unsigned char *cmd,
 	if (unlikely(req->data_len > 0 && req->data_len <= bufflen))
 		memset(buffer + (bufflen - req->data_len), 0, req->data_len);
 
+	if (resid)
+		*resid = req->data_len;
 	ret = req->errors;
  out:
 	blk_put_request(req);
@@ -235,7 +239,8 @@ EXPORT_SYMBOL(scsi_execute);
 
 int scsi_execute_req(struct scsi_device *sdev, const unsigned char *cmd,
 		     int data_direction, void *buffer, unsigned bufflen,
-		     struct scsi_sense_hdr *sshdr, int timeout, int retries)
+		     struct scsi_sense_hdr *sshdr, int timeout, int retries,
+		     int *resid)
 {
 	char *sense = NULL;
 	int result;
@@ -246,7 +251,7 @@ int scsi_execute_req(struct scsi_device *sdev, const unsigned char *cmd,
 			return DRIVER_ERROR << 24;
 	}
 	result = scsi_execute(sdev, cmd, data_direction, buffer, bufflen,
-			      sense, timeout, retries, 0);
+			      sense, timeout, retries, 0, resid);
 	if (sshdr)
 		scsi_normalize_sense(sense, SCSI_SENSE_BUFFERSIZE, sshdr);
 
@@ -2016,7 +2021,7 @@ scsi_mode_select(struct scsi_device *sdev, int pf, int sp, int modepage,
 	}
 
 	ret = scsi_execute_req(sdev, cmd, DMA_TO_DEVICE, real_buffer, len,
-			       sshdr, timeout, retries);
+			       sshdr, timeout, retries, NULL);
 	kfree(real_buffer);
 	return ret;
 }
@@ -2081,7 +2086,7 @@ scsi_mode_sense(struct scsi_device *sdev, int dbd, int modepage,
 	memset(buffer, 0, len);
 
 	result = scsi_execute_req(sdev, cmd, DMA_FROM_DEVICE, buffer, len,
-				  sshdr, timeout, retries);
+				  sshdr, timeout, retries, NULL);
 
 	/* This code looks awful: what it's doing is making sure an
 	 * ILLEGAL REQUEST sense return identifies the actual command
@@ -2163,7 +2168,7 @@ scsi_test_unit_ready(struct scsi_device *sdev, int timeout, int retries,
 	/* try to eat the UNIT_ATTENTION if there are enough retries */
 	do {
 		result = scsi_execute_req(sdev, cmd, DMA_NONE, NULL, 0, sshdr,
-					  timeout, retries);
+					  timeout, retries, NULL);
 		if (sdev->removable && scsi_sense_valid(sshdr) &&
 		    sshdr->sense_key == UNIT_ATTENTION)
 			sdev->changed = 1;
diff --git a/drivers/scsi/scsi_scan.c b/drivers/scsi/scsi_scan.c
index b14dc02c3ded..6963d529e0f0 100644
--- a/drivers/scsi/scsi_scan.c
+++ b/drivers/scsi/scsi_scan.c
@@ -216,7 +216,7 @@ static void scsi_unlock_floptical(struct scsi_device *sdev,
 	scsi_cmd[4] = 0x2a;     /* size */
 	scsi_cmd[5] = 0;
 	scsi_execute_req(sdev, scsi_cmd, DMA_FROM_DEVICE, result, 0x2a, NULL,
-			 SCSI_TIMEOUT, 3);
+			 SCSI_TIMEOUT, 3, NULL);
 }
 
 /**
@@ -581,7 +581,8 @@ static int scsi_probe_lun(struct scsi_device *sdev, unsigned char *inq_result,
 
 		result = scsi_execute_req(sdev,  scsi_cmd, DMA_FROM_DEVICE,
 					  inq_result, try_inquiry_len, &sshdr,
-					  HZ / 2 + HZ * scsi_inq_timeout, 3);
+					  HZ / 2 + HZ * scsi_inq_timeout, 3,
+					  NULL);
 
 		SCSI_LOG_SCAN_BUS(3, printk(KERN_INFO "scsi scan: INQUIRY %s "
 				"with code 0x%x\n",
@@ -1390,7 +1391,7 @@ static int scsi_report_lun_scan(struct scsi_target *starget, int bflags,
 
 		result = scsi_execute_req(sdev, scsi_cmd, DMA_FROM_DEVICE,
 					  lun_data, length, &sshdr,
-					  SCSI_TIMEOUT + 4 * HZ, 3);
+					  SCSI_TIMEOUT + 4 * HZ, 3, NULL);
 
 		SCSI_LOG_SCAN_BUS(3, printk (KERN_INFO "scsi scan: REPORT LUNS"
 				" %s (try %d) result 0x%x\n", result
diff --git a/drivers/scsi/scsi_transport_spi.c b/drivers/scsi/scsi_transport_spi.c
index 7c2d28924d2a..96361aa70906 100644
--- a/drivers/scsi/scsi_transport_spi.c
+++ b/drivers/scsi/scsi_transport_spi.c
@@ -111,7 +111,8 @@ static int spi_execute(struct scsi_device *sdev, const void *cmd,
 				      sense, DV_TIMEOUT, /* retries */ 1,
 				      REQ_FAILFAST_DEV |
 				      REQ_FAILFAST_TRANSPORT |
-				      REQ_FAILFAST_DRIVER);
+				      REQ_FAILFAST_DRIVER,
+				      NULL);
 		if (result & DRIVER_SENSE) {
 			struct scsi_sense_hdr sshdr_tmp;
 			if (!sshdr)
diff --git a/drivers/scsi/sd.c b/drivers/scsi/sd.c
index 5081b3981d3c..62b28d58e65e 100644
--- a/drivers/scsi/sd.c
+++ b/drivers/scsi/sd.c
@@ -884,7 +884,7 @@ static int sd_sync_cache(struct scsi_disk *sdkp)
 		 * flush everything.
 		 */
 		res = scsi_execute_req(sdp, cmd, DMA_NONE, NULL, 0, &sshdr,
-				       SD_TIMEOUT, SD_MAX_RETRIES);
+				       SD_TIMEOUT, SD_MAX_RETRIES, NULL);
 		if (res == 0)
 			break;
 	}
@@ -1134,7 +1134,7 @@ sd_spinup_disk(struct scsi_disk *sdkp)
 			the_result = scsi_execute_req(sdkp->device, cmd,
 						      DMA_NONE, NULL, 0,
 						      &sshdr, SD_TIMEOUT,
-						      SD_MAX_RETRIES);
+						      SD_MAX_RETRIES, NULL);
 
 			/*
 			 * If the drive has indicated to us that it
@@ -1192,7 +1192,8 @@ sd_spinup_disk(struct scsi_disk *sdkp)
 					cmd[4] |= 1 << 4;
 				scsi_execute_req(sdkp->device, cmd, DMA_NONE,
 						 NULL, 0, &sshdr,
-						 SD_TIMEOUT, SD_MAX_RETRIES);
+						 SD_TIMEOUT, SD_MAX_RETRIES,
+						 NULL);
 				spintime_expire = jiffies + 100 * HZ;
 				spintime = 1;
 			}
@@ -1306,7 +1307,7 @@ repeat:
 		
 		the_result = scsi_execute_req(sdp, cmd, DMA_FROM_DEVICE,
 					      buffer, longrc ? 13 : 8, &sshdr,
-					      SD_TIMEOUT, SD_MAX_RETRIES);
+					      SD_TIMEOUT, SD_MAX_RETRIES, NULL);
 
 		if (media_not_present(sdkp, &sshdr))
 			return;
@@ -1986,7 +1987,7 @@ static int sd_start_stop_device(struct scsi_disk *sdkp, int start)
 		return -ENODEV;
 
 	res = scsi_execute_req(sdp, cmd, DMA_NONE, NULL, 0, &sshdr,
-			       SD_TIMEOUT, SD_MAX_RETRIES);
+			       SD_TIMEOUT, SD_MAX_RETRIES, NULL);
 	if (res) {
 		sd_printk(KERN_WARNING, sdkp, "START_STOP FAILED\n");
 		sd_print_result(sdkp, res);
diff --git a/drivers/scsi/ses.c b/drivers/scsi/ses.c
index 1bcf3c33d7ff..7f0df29f3a64 100644
--- a/drivers/scsi/ses.c
+++ b/drivers/scsi/ses.c
@@ -77,7 +77,7 @@ static int ses_recv_diag(struct scsi_device *sdev, int page_code,
 	};
 
 	return scsi_execute_req(sdev, cmd, DMA_FROM_DEVICE, buf, bufflen,
-				NULL, SES_TIMEOUT, SES_RETRIES);
+				NULL, SES_TIMEOUT, SES_RETRIES, NULL);
 }
 
 static int ses_send_diag(struct scsi_device *sdev, int page_code,
@@ -95,7 +95,7 @@ static int ses_send_diag(struct scsi_device *sdev, int page_code,
 	};
 
 	result = scsi_execute_req(sdev, cmd, DMA_TO_DEVICE, buf, bufflen,
-				  NULL, SES_TIMEOUT, SES_RETRIES);
+				  NULL, SES_TIMEOUT, SES_RETRIES, NULL);
 	if (result)
 		sdev_printk(KERN_ERR, sdev, "SEND DIAGNOSTIC result: %8x\n",
 			    result);
@@ -369,7 +369,8 @@ static void ses_match_to_enclosure(struct enclosure_device *edev,
 		return;
 
 	if (scsi_execute_req(sdev, cmd, DMA_FROM_DEVICE, buf,
-			     VPD_INQUIRY_SIZE, NULL, SES_TIMEOUT, SES_RETRIES))
+			     VPD_INQUIRY_SIZE, NULL, SES_TIMEOUT, SES_RETRIES,
+			     NULL))
 		goto free;
 
 	vpd_len = (buf[2] << 8) + buf[3];
@@ -380,7 +381,7 @@ static void ses_match_to_enclosure(struct enclosure_device *edev,
 	cmd[3] = vpd_len >> 8;
 	cmd[4] = vpd_len & 0xff;
 	if (scsi_execute_req(sdev, cmd, DMA_FROM_DEVICE, buf,
-			     vpd_len, NULL, SES_TIMEOUT, SES_RETRIES))
+			     vpd_len, NULL, SES_TIMEOUT, SES_RETRIES, NULL))
 		goto free;
 
 	desc = buf + 4;
diff --git a/drivers/scsi/sr.c b/drivers/scsi/sr.c
index 45b66b98a516..e7fa3caead79 100644
--- a/drivers/scsi/sr.c
+++ b/drivers/scsi/sr.c
@@ -177,7 +177,7 @@ int sr_test_unit_ready(struct scsi_device *sdev, struct scsi_sense_hdr *sshdr)
 	do {
 		the_result = scsi_execute_req(sdev, cmd, DMA_NONE, NULL,
 					      0, sshdr, SR_TIMEOUT,
-					      retries--);
+					      retries--, NULL);
 		if (scsi_sense_valid(sshdr) &&
 		    sshdr->sense_key == UNIT_ATTENTION)
 			sdev->changed = 1;
@@ -681,7 +681,7 @@ static void get_sectorsize(struct scsi_cd *cd)
 		/* Do the command and wait.. */
 		the_result = scsi_execute_req(cd->device, cmd, DMA_FROM_DEVICE,
 					      buffer, sizeof(buffer), NULL,
-					      SR_TIMEOUT, MAX_RETRIES);
+					      SR_TIMEOUT, MAX_RETRIES, NULL);
 
 		retries--;
 
diff --git a/drivers/scsi/sr_ioctl.c b/drivers/scsi/sr_ioctl.c
index ae87d08df588..d92ff512d213 100644
--- a/drivers/scsi/sr_ioctl.c
+++ b/drivers/scsi/sr_ioctl.c
@@ -207,7 +207,7 @@ int sr_do_ioctl(Scsi_CD *cd, struct packet_command *cgc)
 	memset(sense, 0, sizeof(*sense));
 	result = scsi_execute(SDev, cgc->cmd, cgc->data_direction,
 			      cgc->buffer, cgc->buflen, (char *)sense,
-			      cgc->timeout, IOCTL_RETRIES, 0);
+			      cgc->timeout, IOCTL_RETRIES, 0, NULL);
 
 	scsi_normalize_sense((char *)sense, sizeof(*sense), &sshdr);
 
diff --git a/include/scsi/scsi_device.h b/include/scsi/scsi_device.h
index 2a1c1419994d..01a4c58f8bad 100644
--- a/include/scsi/scsi_device.h
+++ b/include/scsi/scsi_device.h
@@ -365,10 +365,11 @@ extern int scsi_is_target_device(const struct device *);
 extern int scsi_execute(struct scsi_device *sdev, const unsigned char *cmd,
 			int data_direction, void *buffer, unsigned bufflen,
 			unsigned char *sense, int timeout, int retries,
-			int flag);
+			int flag, int *resid);
 extern int scsi_execute_req(struct scsi_device *sdev, const unsigned char *cmd,
 			    int data_direction, void *buffer, unsigned bufflen,
-			    struct scsi_sense_hdr *, int timeout, int retries);
+			    struct scsi_sense_hdr *, int timeout, int retries,
+			    int *resid);
 extern int scsi_execute_async(struct scsi_device *sdev,
 			      const unsigned char *cmd, int cmd_len, int data_direction,
 			      void *buffer, unsigned bufflen, int use_sg,
-- 
cgit v1.2.3


From f032c2f7cdaae0e8907cd3b26426fc651dc5c275 Mon Sep 17 00:00:00 2001
From: Robert Love <robert.w.love@intel.com>
Date: Tue, 9 Dec 2008 15:10:11 -0800
Subject: [SCSI] FC protocol definition header files

Signed-off-by: Robert Love <robert.w.love@intel.com>
Signed-off-by: James Bottomley <James.Bottomley@HansenPartnership.com>
---
 include/scsi/fc/fc_els.h    | 816 ++++++++++++++++++++++++++++++++++++++++++++
 include/scsi/fc/fc_encaps.h | 138 ++++++++
 include/scsi/fc/fc_fc2.h    | 124 +++++++
 include/scsi/fc/fc_fcoe.h   | 114 +++++++
 include/scsi/fc/fc_fcp.h    | 199 +++++++++++
 include/scsi/fc/fc_fs.h     | 340 ++++++++++++++++++
 include/scsi/fc/fc_gs.h     |  93 +++++
 include/scsi/fc/fc_ns.h     | 159 +++++++++
 8 files changed, 1983 insertions(+)
 create mode 100644 include/scsi/fc/fc_els.h
 create mode 100644 include/scsi/fc/fc_encaps.h
 create mode 100644 include/scsi/fc/fc_fc2.h
 create mode 100644 include/scsi/fc/fc_fcoe.h
 create mode 100644 include/scsi/fc/fc_fcp.h
 create mode 100644 include/scsi/fc/fc_fs.h
 create mode 100644 include/scsi/fc/fc_gs.h
 create mode 100644 include/scsi/fc/fc_ns.h

(limited to 'include')

diff --git a/include/scsi/fc/fc_els.h b/include/scsi/fc/fc_els.h
new file mode 100644
index 000000000000..195ca014d3ce
--- /dev/null
+++ b/include/scsi/fc/fc_els.h
@@ -0,0 +1,816 @@
+/*
+ * Copyright(c) 2007 Intel Corporation. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Maintained at www.Open-FCoE.org
+ */
+
+#ifndef _FC_ELS_H_
+#define	_FC_ELS_H_
+
+/*
+ * Fibre Channel Switch - Enhanced Link Services definitions.
+ * From T11 FC-LS Rev 1.2 June 7, 2005.
+ */
+
+/*
+ * ELS Command codes - byte 0 of the frame payload
+ */
+enum fc_els_cmd {
+	ELS_LS_RJT =	0x01,	/* ESL reject */
+	ELS_LS_ACC =	0x02,	/* ESL Accept */
+	ELS_PLOGI =	0x03,	/* N_Port login */
+	ELS_FLOGI =	0x04,	/* F_Port login */
+	ELS_LOGO =	0x05,	/* Logout */
+	ELS_ABTX =	0x06,	/* Abort exchange - obsolete */
+	ELS_RCS =	0x07,	/* read connection status */
+	ELS_RES =	0x08,	/* read exchange status block */
+	ELS_RSS =	0x09,	/* read sequence status block */
+	ELS_RSI =	0x0a,	/* read sequence initiative */
+	ELS_ESTS =	0x0b,	/* establish streaming */
+	ELS_ESTC =	0x0c,	/* estimate credit */
+	ELS_ADVC =	0x0d,	/* advise credit */
+	ELS_RTV =	0x0e,	/* read timeout value */
+	ELS_RLS =	0x0f,	/* read link error status block */
+	ELS_ECHO =	0x10,	/* echo */
+	ELS_TEST =	0x11,	/* test */
+	ELS_RRQ =	0x12,	/* reinstate recovery qualifier */
+	ELS_REC =	0x13,	/* read exchange concise */
+	ELS_SRR =	0x14,	/* sequence retransmission request */
+	ELS_PRLI =	0x20,	/* process login */
+	ELS_PRLO =	0x21,	/* process logout */
+	ELS_SCN =	0x22,	/* state change notification */
+	ELS_TPLS =	0x23,	/* test process login state */
+	ELS_TPRLO =	0x24,	/* third party process logout */
+	ELS_LCLM =	0x25,	/* login control list mgmt (obs) */
+	ELS_GAID =	0x30,	/* get alias_ID */
+	ELS_FACT =	0x31,	/* fabric activate alias_id */
+	ELS_FDACDT =	0x32,	/* fabric deactivate alias_id */
+	ELS_NACT =	0x33,	/* N-port activate alias_id */
+	ELS_NDACT =	0x34,	/* N-port deactivate alias_id */
+	ELS_QOSR =	0x40,	/* quality of service request */
+	ELS_RVCS =	0x41,	/* read virtual circuit status */
+	ELS_PDISC =	0x50,	/* discover N_port service params */
+	ELS_FDISC =	0x51,	/* discover F_port service params */
+	ELS_ADISC =	0x52,	/* discover address */
+	ELS_RNC =	0x53,	/* report node cap (obs) */
+	ELS_FARP_REQ =	0x54,	/* FC ARP request */
+	ELS_FARP_REPL =	0x55,	/* FC ARP reply */
+	ELS_RPS =	0x56,	/* read port status block */
+	ELS_RPL =	0x57,	/* read port list */
+	ELS_RPBC =	0x58,	/* read port buffer condition */
+	ELS_FAN =	0x60,	/* fabric address notification */
+	ELS_RSCN =	0x61,	/* registered state change notification */
+	ELS_SCR =	0x62,	/* state change registration */
+	ELS_RNFT =	0x63,	/* report node FC-4 types */
+	ELS_CSR =	0x68,	/* clock synch. request */
+	ELS_CSU =	0x69,	/* clock synch. update */
+	ELS_LINIT =	0x70,	/* loop initialize */
+	ELS_LSTS =	0x72,	/* loop status */
+	ELS_RNID =	0x78,	/* request node ID data */
+	ELS_RLIR =	0x79,	/* registered link incident report */
+	ELS_LIRR =	0x7a,	/* link incident record registration */
+	ELS_SRL =	0x7b,	/* scan remote loop */
+	ELS_SBRP =	0x7c,	/* set bit-error reporting params */
+	ELS_RPSC =	0x7d,	/* report speed capabilities */
+	ELS_QSA =	0x7e,	/* query security attributes */
+	ELS_EVFP =	0x7f,	/* exchange virt. fabrics params */
+	ELS_LKA =	0x80,	/* link keep-alive */
+	ELS_AUTH_ELS =	0x90,	/* authentication ELS */
+};
+
+/*
+ * Initializer useful for decoding table.
+ * Please keep this in sync with the above definitions.
+ */
+#define	FC_ELS_CMDS_INIT {			\
+	[ELS_LS_RJT] =	"LS_RJT",		\
+	[ELS_LS_ACC] =	"LS_ACC",		\
+	[ELS_PLOGI] =	"PLOGI",		\
+	[ELS_FLOGI] =	"FLOGI",		\
+	[ELS_LOGO] =	"LOGO",			\
+	[ELS_ABTX] =	"ABTX",			\
+	[ELS_RCS] =	"RCS",			\
+	[ELS_RES] =	"RES",			\
+	[ELS_RSS] =	"RSS",			\
+	[ELS_RSI] =	"RSI",			\
+	[ELS_ESTS] =	"ESTS",			\
+	[ELS_ESTC] =	"ESTC",			\
+	[ELS_ADVC] =	"ADVC",			\
+	[ELS_RTV] =	"RTV",			\
+	[ELS_RLS] =	"RLS",			\
+	[ELS_ECHO] =	"ECHO",			\
+	[ELS_TEST] =	"TEST",			\
+	[ELS_RRQ] =	"RRQ",			\
+	[ELS_REC] =	"REC",			\
+	[ELS_SRR] =	"SRR",			\
+	[ELS_PRLI] =	"PRLI",			\
+	[ELS_PRLO] =	"PRLO",			\
+	[ELS_SCN] =	"SCN",			\
+	[ELS_TPLS] =	"TPLS",			\
+	[ELS_TPRLO] =	"TPRLO",		\
+	[ELS_LCLM] =	"LCLM",			\
+	[ELS_GAID] =	"GAID",			\
+	[ELS_FACT] =	"FACT",			\
+	[ELS_FDACDT] =	"FDACDT",		\
+	[ELS_NACT] =	"NACT",			\
+	[ELS_NDACT] =	"NDACT",		\
+	[ELS_QOSR] =	"QOSR",			\
+	[ELS_RVCS] =	"RVCS",			\
+	[ELS_PDISC] =	"PDISC",		\
+	[ELS_FDISC] =	"FDISC",		\
+	[ELS_ADISC] =	"ADISC",		\
+	[ELS_RNC] =	"RNC",			\
+	[ELS_FARP_REQ] = "FARP_REQ",		\
+	[ELS_FARP_REPL] =  "FARP_REPL",		\
+	[ELS_RPS] =	"RPS",			\
+	[ELS_RPL] =	"RPL",			\
+	[ELS_RPBC] =	"RPBC",			\
+	[ELS_FAN] =	"FAN",			\
+	[ELS_RSCN] =	"RSCN",			\
+	[ELS_SCR] =	"SCR",			\
+	[ELS_RNFT] =	"RNFT",			\
+	[ELS_CSR] =	"CSR",			\
+	[ELS_CSU] =	"CSU",			\
+	[ELS_LINIT] =	"LINIT",		\
+	[ELS_LSTS] =	"LSTS",			\
+	[ELS_RNID] =	"RNID",			\
+	[ELS_RLIR] =	"RLIR",			\
+	[ELS_LIRR] =	"LIRR",			\
+	[ELS_SRL] =	"SRL",			\
+	[ELS_SBRP] =	"SBRP",			\
+	[ELS_RPSC] =	"RPSC",			\
+	[ELS_QSA] =	"QSA",			\
+	[ELS_EVFP] =	"EVFP",			\
+	[ELS_LKA] =	"LKA",			\
+	[ELS_AUTH_ELS] = "AUTH_ELS",		\
+}
+
+/*
+ * LS_ACC payload.
+ */
+struct fc_els_ls_acc {
+	__u8          la_cmd;		/* command code ELS_LS_ACC */
+	__u8          la_resv[3];	/* reserved */
+};
+
+/*
+ * ELS reject payload.
+ */
+struct fc_els_ls_rjt {
+	__u8	er_cmd;		/* command code ELS_LS_RJT */
+	__u8	er_resv[4];	/* reserved must be zero */
+	__u8	er_reason;	/* reason (enum fc_els_rjt_reason below) */
+	__u8	er_explan;	/* explanation (enum fc_els_rjt_explan below) */
+	__u8	er_vendor;	/* vendor specific code */
+};
+
+/*
+ * ELS reject reason codes (er_reason).
+ */
+enum fc_els_rjt_reason {
+	ELS_RJT_NONE =		0,	/* no reject - not to be sent */
+	ELS_RJT_INVAL =		0x01,	/* invalid ELS command code */
+	ELS_RJT_LOGIC =		0x03,	/* logical error */
+	ELS_RJT_BUSY =		0x05,	/* logical busy */
+	ELS_RJT_PROT =		0x07,	/* protocol error */
+	ELS_RJT_UNAB =		0x09,	/* unable to perform command request */
+	ELS_RJT_UNSUP =		0x0b,	/* command not supported */
+	ELS_RJT_INPROG =	0x0e,	/* command already in progress */
+	ELS_RJT_VENDOR =	0xff,	/* vendor specific error */
+};
+
+
+/*
+ * reason code explanation (er_explan).
+ */
+enum fc_els_rjt_explan {
+	ELS_EXPL_NONE =		0x00,	/* No additional explanation */
+	ELS_EXPL_SPP_OPT_ERR =	0x01,	/* service parameter error - options */
+	ELS_EXPL_SPP_ICTL_ERR =	0x03,	/* service parm error - initiator ctl */
+	ELS_EXPL_AH =		0x11,	/* invalid association header */
+	ELS_EXPL_AH_REQ =	0x13,	/* association_header required */
+	ELS_EXPL_SID =		0x15,	/* invalid originator S_ID */
+	ELS_EXPL_OXID_RXID =	0x17,	/* invalid OX_ID-RX_ID combination */
+	ELS_EXPL_INPROG =	0x19,	/* Request already in progress */
+	ELS_EXPL_PLOGI_REQD =	0x1e,	/* N_Port login required */
+	ELS_EXPL_INSUF_RES =	0x29,	/* insufficient resources */
+	ELS_EXPL_UNAB_DATA =	0x2a,	/* unable to supply requested data */
+	ELS_EXPL_UNSUPR =	0x2c,	/* Request not supported */
+	ELS_EXPL_INV_LEN =	0x2d,	/* Invalid payload length */
+	/* TBD - above definitions incomplete */
+};
+
+/*
+ * Common service parameters (N ports).
+ */
+struct fc_els_csp {
+	__u8		sp_hi_ver;	/* highest version supported (obs.) */
+	__u8		sp_lo_ver;	/* highest version supported (obs.) */
+	__be16		sp_bb_cred;	/* buffer-to-buffer credits */
+	__be16		sp_features;	/* common feature flags */
+	__be16		sp_bb_data;	/* b-b state number and data field sz */
+	union {
+		struct {
+			__be16	_sp_tot_seq; /* total concurrent sequences */
+			__be16	_sp_rel_off; /* rel. offset by info cat */
+		} sp_plogi;
+		struct {
+			__be32	_sp_r_a_tov; /* resource alloc. timeout msec */
+		} sp_flogi_acc;
+	} sp_u;
+	__be32		sp_e_d_tov;	/* error detect timeout value */
+};
+#define	sp_tot_seq	sp_u.sp_plogi._sp_tot_seq
+#define	sp_rel_off	sp_u.sp_plogi._sp_rel_off
+#define	sp_r_a_tov	sp_u.sp_flogi_acc._sp_r_a_tov
+
+#define	FC_SP_BB_DATA_MASK 0xfff /* mask for data field size in sp_bb_data */
+
+/*
+ * Minimum and maximum values for max data field size in service parameters.
+ */
+#define	FC_SP_MIN_MAX_PAYLOAD	FC_MIN_MAX_PAYLOAD
+#define	FC_SP_MAX_MAX_PAYLOAD	FC_MAX_PAYLOAD
+
+/*
+ * sp_features
+ */
+#define	FC_SP_FT_CIRO	0x8000	/* continuously increasing rel. off. */
+#define	FC_SP_FT_CLAD	0x8000	/* clean address (in FLOGI LS_ACC) */
+#define	FC_SP_FT_RAND	0x4000	/* random relative offset */
+#define	FC_SP_FT_VAL	0x2000	/* valid vendor version level */
+#define	FC_SP_FT_FPORT	0x1000	/* F port (1) vs. N port (0) */
+#define	FC_SP_FT_ABB	0x0800	/* alternate BB_credit management */
+#define	FC_SP_FT_EDTR	0x0400	/* E_D_TOV Resolution is nanoseconds */
+#define	FC_SP_FT_MCAST	0x0200	/* multicast */
+#define	FC_SP_FT_BCAST	0x0100	/* broadcast */
+#define	FC_SP_FT_HUNT	0x0080	/* hunt group */
+#define	FC_SP_FT_SIMP	0x0040	/* dedicated simplex */
+#define	FC_SP_FT_SEC	0x0020	/* reserved for security */
+#define	FC_SP_FT_CSYN	0x0010	/* clock synch. supported */
+#define	FC_SP_FT_RTTOV	0x0008	/* R_T_TOV value 100 uS, else 100 mS */
+#define	FC_SP_FT_HALF	0x0004	/* dynamic half duplex */
+#define	FC_SP_FT_SEQC	0x0002	/* SEQ_CNT */
+#define	FC_SP_FT_PAYL	0x0001	/* FLOGI payload length 256, else 116 */
+
+/*
+ * Class-specific service parameters.
+ */
+struct fc_els_cssp {
+	__be16		cp_class;	/* class flags */
+	__be16		cp_init;	/* initiator flags */
+	__be16		cp_recip;	/* recipient flags */
+	__be16		cp_rdfs;	/* receive data field size */
+	__be16		cp_con_seq;	/* concurrent sequences */
+	__be16		cp_ee_cred;	/* N-port end-to-end credit */
+	__u8		cp_resv1;	/* reserved */
+	__u8		cp_open_seq;	/* open sequences per exchange */
+	__u8		_cp_resv2[2];	/* reserved */
+};
+
+/*
+ * cp_class flags.
+ */
+#define	FC_CPC_VALID	0x8000		/* class valid */
+#define	FC_CPC_IMIX	0x4000		/* intermix mode */
+#define	FC_CPC_SEQ	0x0800		/* sequential delivery */
+#define	FC_CPC_CAMP	0x0200		/* camp-on */
+#define	FC_CPC_PRI	0x0080		/* priority */
+
+/*
+ * cp_init flags.
+ * (TBD: not all flags defined here).
+ */
+#define	FC_CPI_CSYN	0x0010		/* clock synch. capable */
+
+/*
+ * cp_recip flags.
+ */
+#define	FC_CPR_CSYN	0x0008		/* clock synch. capable */
+
+/*
+ * NFC_ELS_FLOGI: Fabric login request.
+ * NFC_ELS_PLOGI: Port login request (same format).
+ */
+struct fc_els_flogi {
+	__u8		fl_cmd;		/* command */
+	__u8		_fl_resvd[3];	/* must be zero */
+	struct fc_els_csp fl_csp;	/* common service parameters */
+	__be64		fl_wwpn;	/* port name */
+	__be64		fl_wwnn;	/* node name */
+	struct fc_els_cssp fl_cssp[4];	/* class 1-4 service parameters */
+	__u8		fl_vend[16];	/* vendor version level */
+} __attribute__((__packed__));
+
+/*
+ * Process login service parameter page.
+ */
+struct fc_els_spp {
+	__u8		spp_type;	/* type code or common service params */
+	__u8		spp_type_ext;	/* type code extension */
+	__u8		spp_flags;
+	__u8		_spp_resvd;
+	__be32		spp_orig_pa;	/* originator process associator */
+	__be32		spp_resp_pa;	/* responder process associator */
+	__be32		spp_params;	/* service parameters */
+};
+
+/*
+ * spp_flags.
+ */
+#define	FC_SPP_OPA_VAL	    0x80	/* originator proc. assoc. valid */
+#define	FC_SPP_RPA_VAL	    0x40	/* responder proc. assoc. valid */
+#define	FC_SPP_EST_IMG_PAIR 0x20	/* establish image pair */
+#define	FC_SPP_RESP_MASK    0x0f	/* mask for response code (below) */
+
+/*
+ * SPP response code in spp_flags - lower 4 bits.
+ */
+enum fc_els_spp_resp {
+	FC_SPP_RESP_ACK	=	1,	/* request executed */
+	FC_SPP_RESP_RES =	2,	/* unable due to lack of resources */
+	FC_SPP_RESP_INIT =	3,	/* initialization not complete */
+	FC_SPP_RESP_NO_PA = 	4,	/* unknown process associator */
+	FC_SPP_RESP_CONF = 	5,	/* configuration precludes image pair */
+	FC_SPP_RESP_COND = 	6,	/* request completed conditionally */
+	FC_SPP_RESP_MULT = 	7,	/* unable to handle multiple SPPs */
+	FC_SPP_RESP_INVL = 	8,	/* SPP is invalid */
+};
+
+/*
+ * ELS_RRQ - Reinstate Recovery Qualifier
+ */
+struct fc_els_rrq {
+	__u8		rrq_cmd;	/* command (0x12) */
+	__u8		rrq_zero[3];	/* specified as zero - part of cmd */
+	__u8		rrq_resvd;	/* reserved */
+	__u8		rrq_s_id[3];	/* originator FID */
+	__be16		rrq_ox_id;	/* originator exchange ID */
+	__be16		rrq_rx_id;	/* responders exchange ID */
+};
+
+/*
+ * ELS_REC - Read exchange concise.
+ */
+struct fc_els_rec {
+	__u8		rec_cmd;	/* command (0x13) */
+	__u8		rec_zero[3];	/* specified as zero - part of cmd */
+	__u8		rec_resvd;	/* reserved */
+	__u8		rec_s_id[3];	/* originator FID */
+	__be16		rec_ox_id;	/* originator exchange ID */
+	__be16		rec_rx_id;	/* responders exchange ID */
+};
+
+/*
+ * ELS_REC LS_ACC payload.
+ */
+struct fc_els_rec_acc {
+	__u8		reca_cmd;	/* accept (0x02) */
+	__u8		reca_zero[3];	/* specified as zero - part of cmd */
+	__be16		reca_ox_id;	/* originator exchange ID */
+	__be16		reca_rx_id;	/* responders exchange ID */
+	__u8		reca_resvd1;	/* reserved */
+	__u8		reca_ofid[3];	/* originator FID */
+	__u8		reca_resvd2;	/* reserved */
+	__u8		reca_rfid[3];	/* responder FID */
+	__be32		reca_fc4value;	/* FC4 value */
+	__be32		reca_e_stat;	/* ESB (exchange status block) status */
+};
+
+/*
+ * ELS_PRLI - Process login request and response.
+ */
+struct fc_els_prli {
+	__u8		prli_cmd;	/* command */
+	__u8		prli_spp_len;	/* length of each serv. parm. page */
+	__be16		prli_len;	/* length of entire payload */
+	/* service parameter pages follow */
+};
+
+/*
+ * ELS_ADISC payload
+ */
+struct fc_els_adisc {
+	__u8		adisc_cmd;
+	__u8		adisc_resv[3];
+	__u8            adisc_resv1;
+	__u8            adisc_hard_addr[3];
+	__be64          adisc_wwpn;
+	__be64          adisc_wwnn;
+	__u8            adisc_resv2;
+	__u8            adisc_port_id[3];
+} __attribute__((__packed__));
+
+/*
+ * ELS_LOGO - process or fabric logout.
+ */
+struct fc_els_logo {
+	__u8		fl_cmd;		/* command code */
+	__u8		fl_zero[3];	/* specified as zero - part of cmd */
+	__u8		fl_resvd;	/* reserved */
+	__u8		fl_n_port_id[3];/* N port ID */
+	__be64		fl_n_port_wwn;	/* port name */
+};
+
+/*
+ * ELS_RTV - read timeout value.
+ */
+struct fc_els_rtv {
+	__u8		rtv_cmd;	/* command code 0x0e */
+	__u8		rtv_zero[3];	/* specified as zero - part of cmd */
+};
+
+/*
+ * LS_ACC for ELS_RTV - read timeout value.
+ */
+struct fc_els_rtv_acc {
+	__u8		rtv_cmd;	/* command code 0x02 */
+	__u8		rtv_zero[3];	/* specified as zero - part of cmd */
+	__be32		rtv_r_a_tov;	/* resource allocation timeout value */
+	__be32		rtv_e_d_tov;	/* error detection timeout value */
+	__be32		rtv_toq;	/* timeout qualifier (see below) */
+};
+
+/*
+ * rtv_toq bits.
+ */
+#define	FC_ELS_RTV_EDRES (1 << 26)	/* E_D_TOV resolution is nS else mS */
+#define	FC_ELS_RTV_RTTOV (1 << 19)	/* R_T_TOV is 100 uS else 100 mS */
+
+/*
+ * ELS_SCR - state change registration payload.
+ */
+struct fc_els_scr {
+	__u8		scr_cmd;	/* command code */
+	__u8		scr_resv[6];	/* reserved */
+	__u8		scr_reg_func;	/* registration function (see below) */
+};
+
+enum fc_els_scr_func {
+	ELS_SCRF_FAB =	1,	/* fabric-detected registration */
+	ELS_SCRF_NPORT = 2,	/* Nx_Port-detected registration */
+	ELS_SCRF_FULL =	3,	/* full registration */
+	ELS_SCRF_CLEAR = 255,	/* remove any current registrations */
+};
+
+/*
+ * ELS_RSCN - registered state change notification payload.
+ */
+struct fc_els_rscn {
+	__u8		rscn_cmd;	/* RSCN opcode (0x61) */
+	__u8		rscn_page_len;	/* page length (4) */
+	__be16		rscn_plen;	/* payload length including this word */
+
+	/* followed by 4-byte generic affected Port_ID pages */
+};
+
+struct fc_els_rscn_page {
+	__u8		rscn_page_flags; /* event and address format */
+	__u8		rscn_fid[3];	/* fabric ID */
+};
+
+#define	ELS_RSCN_EV_QUAL_BIT	2	/* shift count for event qualifier */
+#define	ELS_RSCN_EV_QUAL_MASK	0xf	/* mask for event qualifier */
+#define	ELS_RSCN_ADDR_FMT_BIT	0	/* shift count for address format */
+#define	ELS_RSCN_ADDR_FMT_MASK	0x3	/* mask for address format */
+
+enum fc_els_rscn_ev_qual {
+	ELS_EV_QUAL_NONE = 0,		/* unspecified */
+	ELS_EV_QUAL_NS_OBJ = 1,		/* changed name server object */
+	ELS_EV_QUAL_PORT_ATTR = 2,	/* changed port attribute */
+	ELS_EV_QUAL_SERV_OBJ = 3,	/* changed service object */
+	ELS_EV_QUAL_SW_CONFIG = 4,	/* changed switch configuration */
+	ELS_EV_QUAL_REM_OBJ = 5,	/* removed object */
+};
+
+enum fc_els_rscn_addr_fmt {
+	ELS_ADDR_FMT_PORT = 0,	/* rscn_fid is a port address */
+	ELS_ADDR_FMT_AREA = 1,	/* rscn_fid is a area address */
+	ELS_ADDR_FMT_DOM = 2,	/* rscn_fid is a domain address */
+	ELS_ADDR_FMT_FAB = 3,	/* anything on fabric may have changed */
+};
+
+/*
+ * ELS_RNID - request Node ID.
+ */
+struct fc_els_rnid {
+	__u8		rnid_cmd;	/* RNID opcode (0x78) */
+	__u8		rnid_resv[3];	/* reserved */
+	__u8		rnid_fmt;	/* data format */
+	__u8		rnid_resv2[3];	/* reserved */
+};
+
+/*
+ * Node Identification Data formats (rnid_fmt)
+ */
+enum fc_els_rnid_fmt {
+	ELS_RNIDF_NONE = 0,		/* no specific identification data */
+	ELS_RNIDF_GEN = 0xdf,		/* general topology discovery format */
+};
+
+/*
+ * ELS_RNID response.
+ */
+struct fc_els_rnid_resp {
+	__u8		rnid_cmd;	/* response code (LS_ACC) */
+	__u8		rnid_resv[3];	/* reserved */
+	__u8		rnid_fmt;	/* data format */
+	__u8		rnid_cid_len;	/* common ID data length */
+	__u8		rnid_resv2;	/* reserved */
+	__u8		rnid_sid_len;	/* specific ID data length */
+};
+
+struct fc_els_rnid_cid {
+	__be64		rnid_wwpn;	/* N port name */
+	__be64		rnid_wwnn;	/* node name */
+};
+
+struct fc_els_rnid_gen {
+	__u8		rnid_vend_id[16]; /* vendor-unique ID */
+	__be32		rnid_atype;	/* associated type (see below) */
+	__be32		rnid_phys_port;	/* physical port number */
+	__be32		rnid_att_nodes;	/* number of attached nodes */
+	__u8		rnid_node_mgmt;	/* node management (see below) */
+	__u8		rnid_ip_ver;	/* IP version (see below) */
+	__be16		rnid_prot_port;	/* UDP / TCP port number */
+	__be32		rnid_ip_addr[4]; /* IP address */
+	__u8		rnid_resvd[2];	/* reserved */
+	__be16		rnid_vend_spec;	/* vendor-specific field */
+};
+
+enum fc_els_rnid_atype {
+	ELS_RNIDA_UNK =		0x01,	/* unknown */
+	ELS_RNIDA_OTHER =	0x02,	/* none of the following */
+	ELS_RNIDA_HUB =		0x03,
+	ELS_RNIDA_SWITCH =	0x04,
+	ELS_RNIDA_GATEWAY =	0x05,
+	ELS_RNIDA_CONV =	0x06,   /* Obsolete, do not use this value */
+	ELS_RNIDA_HBA =	        0x07,   /* Obsolete, do not use this value */
+	ELS_RNIDA_PROXY =       0x08,   /* Obsolete, do not use this value */
+	ELS_RNIDA_STORAGE =	0x09,
+	ELS_RNIDA_HOST =	0x0a,
+	ELS_RNIDA_SUBSYS =	0x0b,	/* storage subsystem (e.g., RAID) */
+	ELS_RNIDA_ACCESS =	0x0e,	/* access device (e.g. media changer) */
+	ELS_RNIDA_NAS =		0x11,	/* NAS server */
+	ELS_RNIDA_BRIDGE =	0x12,	/* bridge */
+	ELS_RNIDA_VIRT =	0x13,	/* virtualization device */
+	ELS_RNIDA_MF =		0xff,	/* multifunction device (bits below) */
+	ELS_RNIDA_MF_HUB =	1UL << 31, 	/* hub */
+	ELS_RNIDA_MF_SW =	1UL << 30, 	/* switch */
+	ELS_RNIDA_MF_GW =	1UL << 29,	/* gateway */
+	ELS_RNIDA_MF_ST =	1UL << 28,	/* storage */
+	ELS_RNIDA_MF_HOST =	1UL << 27,	/* host */
+	ELS_RNIDA_MF_SUB =	1UL << 26,	/* storage subsystem */
+	ELS_RNIDA_MF_ACC =	1UL << 25,	/* storage access dev */
+	ELS_RNIDA_MF_WDM =	1UL << 24,	/* wavelength division mux */
+	ELS_RNIDA_MF_NAS =	1UL << 23,	/* NAS server */
+	ELS_RNIDA_MF_BR =	1UL << 22,	/* bridge */
+	ELS_RNIDA_MF_VIRT =	1UL << 21,	/* virtualization device */
+};
+
+enum fc_els_rnid_mgmt {
+	ELS_RNIDM_SNMP =	0,
+	ELS_RNIDM_TELNET =	1,
+	ELS_RNIDM_HTTP =	2,
+	ELS_RNIDM_HTTPS =	3,
+	ELS_RNIDM_XML =		4,	/* HTTP + XML */
+};
+
+enum fc_els_rnid_ipver {
+	ELS_RNIDIP_NONE =	0,	/* no IP support or node mgmt. */
+	ELS_RNIDIP_V4 =		1,	/* IPv4 */
+	ELS_RNIDIP_V6 =		2,	/* IPv6 */
+};
+
+/*
+ * ELS RPL - Read Port List.
+ */
+struct fc_els_rpl {
+	__u8		rpl_cmd;	/* command */
+	__u8		rpl_resv[5];	/* reserved - must be zero */
+	__be16		rpl_max_size;	/* maximum response size or zero */
+	__u8		rpl_resv1;	/* reserved - must be zero */
+	__u8		rpl_index[3];	/* starting index */
+};
+
+/*
+ * Port number block in RPL response.
+ */
+struct fc_els_pnb {
+	__be32		pnb_phys_pn;	/* physical port number */
+	__u8		pnb_resv;	/* reserved */
+	__u8		pnb_port_id[3];	/* port ID */
+	__be64		pnb_wwpn;	/* port name */
+};
+
+/*
+ * RPL LS_ACC response.
+ */
+struct fc_els_rpl_resp {
+	__u8		rpl_cmd;	/* ELS_LS_ACC */
+	__u8		rpl_resv1;	/* reserved - must be zero */
+	__be16		rpl_plen;	/* payload length */
+	__u8		rpl_resv2;	/* reserved - must be zero */
+	__u8		rpl_llen[3];	/* list length */
+	__u8		rpl_resv3;	/* reserved - must be zero */
+	__u8		rpl_index[3];	/* starting index */
+	struct fc_els_pnb rpl_pnb[1];	/* variable number of PNBs */
+};
+
+/*
+ * Link Error Status Block.
+ */
+struct fc_els_lesb {
+	__be32		lesb_link_fail;	/* link failure count */
+	__be32		lesb_sync_loss;	/* loss of synchronization count */
+	__be32		lesb_sig_loss;	/* loss of signal count */
+	__be32		lesb_prim_err;	/* primitive sequence error count */
+	__be32		lesb_inv_word;	/* invalid transmission word count */
+	__be32		lesb_inv_crc;	/* invalid CRC count */
+};
+
+/*
+ * ELS RPS - Read Port Status Block request.
+ */
+struct fc_els_rps {
+	__u8		rps_cmd;	/* command */
+	__u8		rps_resv[2];	/* reserved - must be zero */
+	__u8		rps_flag;	/* flag - see below */
+	__be64		rps_port_spec;	/* port selection */
+};
+
+enum fc_els_rps_flag {
+	FC_ELS_RPS_DID =	0x00,	/* port identified by D_ID of req. */
+	FC_ELS_RPS_PPN =	0x01,	/* port_spec is physical port number */
+	FC_ELS_RPS_WWPN =	0x02,	/* port_spec is port WWN */
+};
+
+/*
+ * ELS RPS LS_ACC response.
+ */
+struct fc_els_rps_resp {
+	__u8		rps_cmd;	/* command - LS_ACC */
+	__u8		rps_resv[2];	/* reserved - must be zero */
+	__u8		rps_flag;	/* flag - see below */
+	__u8		rps_resv2[2];	/* reserved */
+	__be16		rps_status;	/* port status - see below */
+	struct fc_els_lesb rps_lesb;	/* link error status block */
+};
+
+enum fc_els_rps_resp_flag {
+	FC_ELS_RPS_LPEV =	0x01,	/* L_port extension valid */
+};
+
+enum fc_els_rps_resp_status {
+	FC_ELS_RPS_PTP =	1 << 5,	/* point-to-point connection */
+	FC_ELS_RPS_LOOP =	1 << 4,	/* loop mode */
+	FC_ELS_RPS_FAB =	1 << 3,	/* fabric present */
+	FC_ELS_RPS_NO_SIG =	1 << 2,	/* loss of signal */
+	FC_ELS_RPS_NO_SYNC =	1 << 1,	/* loss of synchronization */
+	FC_ELS_RPS_RESET =	1 << 0,	/* in link reset protocol */
+};
+
+/*
+ * ELS LIRR - Link Incident Record Registration request.
+ */
+struct fc_els_lirr {
+	__u8		lirr_cmd;	/* command */
+	__u8		lirr_resv[3];	/* reserved - must be zero */
+	__u8		lirr_func;	/* registration function */
+	__u8		lirr_fmt;	/* FC-4 type of RLIR requested */
+	__u8		lirr_resv2[2];	/* reserved - must be zero */
+};
+
+enum fc_els_lirr_func {
+	ELS_LIRR_SET_COND = 	0x01,	/* set - conditionally receive */
+	ELS_LIRR_SET_UNCOND = 	0x02,	/* set - unconditionally receive */
+	ELS_LIRR_CLEAR = 	0xff	/* clear registration */
+};
+
+/*
+ * ELS SRL - Scan Remote Loop request.
+ */
+struct fc_els_srl {
+	__u8		srl_cmd;	/* command */
+	__u8		srl_resv[3];	/* reserved - must be zero */
+	__u8		srl_flag;	/* flag - see below */
+	__u8		srl_flag_param[3];	/* flag parameter */
+};
+
+enum fc_els_srl_flag {
+	FC_ELS_SRL_ALL =	0x00,	/* scan all FL ports */
+	FC_ELS_SRL_ONE =	0x01,	/* scan specified loop */
+	FC_ELS_SRL_EN_PER =	0x02,	/* enable periodic scanning (param) */
+	FC_ELS_SRL_DIS_PER =	0x03,	/* disable periodic scanning */
+};
+
+/*
+ * ELS RLS - Read Link Error Status Block request.
+ */
+struct fc_els_rls {
+	__u8		rls_cmd;	/* command */
+	__u8		rls_resv[4];	/* reserved - must be zero */
+	__u8		rls_port_id[3];	/* port ID */
+};
+
+/*
+ * ELS RLS LS_ACC Response.
+ */
+struct fc_els_rls_resp {
+	__u8		rls_cmd;	/* ELS_LS_ACC */
+	__u8		rls_resv[3];	/* reserved - must be zero */
+	struct fc_els_lesb rls_lesb;	/* link error status block */
+};
+
+/*
+ * ELS RLIR - Registered Link Incident Report.
+ * This is followed by the CLIR and the CLID, described below.
+ */
+struct fc_els_rlir {
+	__u8		rlir_cmd;	/* command */
+	__u8		rlir_resv[3];	/* reserved - must be zero */
+	__u8		rlir_fmt;	/* format (FC4-type if type specific) */
+	__u8		rlir_clr_len;	/* common link incident record length */
+	__u8		rlir_cld_len;	/* common link incident desc. length */
+	__u8		rlir_slr_len;	/* spec. link incident record length */
+};
+
+/*
+ * CLIR - Common Link Incident Record Data. - Sent via RLIR.
+ */
+struct fc_els_clir {
+	__be64		clir_wwpn;	/* incident port name */
+	__be64		clir_wwnn;	/* incident port node name */
+	__u8		clir_port_type;	/* incident port type */
+	__u8		clir_port_id[3];	/* incident port ID */
+
+	__be64		clir_conn_wwpn;	/* connected port name */
+	__be64		clir_conn_wwnn;	/* connected node name */
+	__be64		clir_fab_name;	/* fabric name */
+	__be32		clir_phys_port;	/* physical port number */
+	__be32		clir_trans_id;	/* transaction ID */
+	__u8		clir_resv[3];	/* reserved */
+	__u8		clir_ts_fmt;	/* time stamp format */
+	__be64		clir_timestamp;	/* time stamp */
+};
+
+/*
+ * CLIR clir_ts_fmt - time stamp format values.
+ */
+enum fc_els_clir_ts_fmt {
+	ELS_CLIR_TS_UNKNOWN = 	0,	/* time stamp field unknown */
+	ELS_CLIR_TS_SEC_FRAC = 	1,	/* time in seconds and fractions */
+	ELS_CLIR_TS_CSU =	2,	/* time in clock synch update format */
+};
+
+/*
+ * Common Link Incident Descriptor - sent via RLIR.
+ */
+struct fc_els_clid {
+	__u8		clid_iq;	/* incident qualifier flags */
+	__u8		clid_ic;	/* incident code */
+	__be16		clid_epai;	/* domain/area of ISL */
+};
+
+/*
+ * CLID incident qualifier flags.
+ */
+enum fc_els_clid_iq {
+	ELS_CLID_SWITCH =	0x20,	/* incident port is a switch node */
+	ELS_CLID_E_PORT =	0x10,	/* incident is an ISL (E) port */
+	ELS_CLID_SEV_MASK =	0x0c,	/* severity 2-bit field mask */
+	ELS_CLID_SEV_INFO =	0x00,	/* report is informational */
+	ELS_CLID_SEV_INOP =	0x08,	/* link not operational */
+	ELS_CLID_SEV_DEG =	0x04,	/* link degraded but operational */
+	ELS_CLID_LASER =	0x02,	/* subassembly is a laser */
+	ELS_CLID_FRU =		0x01,	/* format can identify a FRU */
+};
+
+/*
+ * CLID incident code.
+ */
+enum fc_els_clid_ic {
+	ELS_CLID_IC_IMPL =	1,	/* implicit incident */
+	ELS_CLID_IC_BER =	2,	/* bit-error-rate threshold exceeded */
+	ELS_CLID_IC_LOS =	3,	/* loss of synch or signal */
+	ELS_CLID_IC_NOS =	4,	/* non-operational primitive sequence */
+	ELS_CLID_IC_PST =	5,	/* primitive sequence timeout */
+	ELS_CLID_IC_INVAL =	6,	/* invalid primitive sequence */
+	ELS_CLID_IC_LOOP_TO =	7,	/* loop initialization time out */
+	ELS_CLID_IC_LIP =	8,	/* receiving LIP */
+};
+
+#endif /* _FC_ELS_H_ */
diff --git a/include/scsi/fc/fc_encaps.h b/include/scsi/fc/fc_encaps.h
new file mode 100644
index 000000000000..f180c3e16220
--- /dev/null
+++ b/include/scsi/fc/fc_encaps.h
@@ -0,0 +1,138 @@
+/*
+ * Copyright(c) 2007 Intel Corporation. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Maintained at www.Open-FCoE.org
+ */
+#ifndef _FC_ENCAPS_H_
+#define _FC_ENCAPS_H_
+
+/*
+ * Protocol definitions from RFC 3643 - Fibre Channel Frame Encapsulation.
+ *
+ * Note:  The frame length field is the number of 32-bit words in
+ * the encapsulation including the fcip_encaps_header, CRC and EOF words.
+ * The minimum frame length value in bytes is (32 + 24 + 4 + 4) * 4 = 64.
+ * The maximum frame length value in bytes is (32 + 24 + 2112 + 4 + 4) = 2172.
+ */
+#define FC_ENCAPS_MIN_FRAME_LEN 64	/* min frame len (bytes) (see above) */
+#define FC_ENCAPS_MAX_FRAME_LEN (FC_ENCAPS_MIN_FRAME_LEN + FC_MAX_PAYLOAD)
+
+#define FC_ENCAPS_VER       1           /* current version number */
+
+struct fc_encaps_hdr {
+	__u8	fc_proto;	/* protocol number */
+	__u8	fc_ver;		/* version of encapsulation */
+	__u8	fc_proto_n;	/* ones complement of protocol */
+	__u8	fc_ver_n;	/* ones complement of version */
+
+	unsigned char fc_proto_data[8]; /* protocol specific data */
+
+	__be16	fc_len_flags;	/* 10-bit length/4 w/ 6 flag bits */
+	__be16	fc_len_flags_n;	/* ones complement of length / flags */
+
+	/*
+	 * Offset 0x10
+	 */
+	__be32	fc_time[2];	/* time stamp: seconds and fraction */
+	__be32	fc_crc;		/* CRC */
+	__be32	fc_sof;		/* start of frame (see FC_SOF below) */
+
+	/* 0x20 - FC frame content followed by EOF word */
+};
+
+#define FCIP_ENCAPS_HDR_LEN 0x20	/* expected length for asserts */
+
+/*
+ * Macro's for making redundant copies of EOF and SOF.
+ */
+#define FC_XY(x, y)		((((x) & 0xff) << 8) | ((y) & 0xff))
+#define FC_XYXY(x, y)		((FCIP_XY(x, y) << 16) | FCIP_XY(x, y))
+#define FC_XYNN(x, y)		(FCIP_XYXY(x, y) ^ 0xffff)
+
+#define FC_SOF_ENCODE(n)	FC_XYNN(n, n)
+#define FC_EOF_ENCODE(n)	FC_XYNN(n, n)
+
+/*
+ * SOF / EOF bytes.
+ */
+enum fc_sof {
+	FC_SOF_F =	0x28,	/* fabric */
+	FC_SOF_I4 =	0x29,	/* initiate class 4 */
+	FC_SOF_I2 =	0x2d,	/* initiate class 2 */
+	FC_SOF_I3 =	0x2e,	/* initiate class 3 */
+	FC_SOF_N4 =	0x31,	/* normal class 4 */
+	FC_SOF_N2 =	0x35,	/* normal class 2 */
+	FC_SOF_N3 =	0x36,	/* normal class 3 */
+	FC_SOF_C4 =	0x39,	/* activate class 4 */
+} __attribute__((packed));
+
+enum fc_eof {
+	FC_EOF_N =	0x41,	/* normal (not last frame of seq) */
+	FC_EOF_T =	0x42,	/* terminate (last frame of sequence) */
+	FC_EOF_RT =	0x44,
+	FC_EOF_DT =	0x46,	/* disconnect-terminate class-1 */
+	FC_EOF_NI =	0x49,	/* normal-invalid */
+	FC_EOF_DTI =	0x4e,	/* disconnect-terminate-invalid */
+	FC_EOF_RTI =	0x4f,
+	FC_EOF_A =	0x50,	/* abort */
+} __attribute__((packed));
+
+#define FC_SOF_CLASS_MASK 0x06	/* mask for class of service in SOF */
+
+/*
+ * Define classes in terms of the SOF code (initial).
+ */
+enum fc_class {
+	FC_CLASS_NONE = 0,	/* software value indicating no class */
+	FC_CLASS_2 =	FC_SOF_I2,
+	FC_CLASS_3 =	FC_SOF_I3,
+	FC_CLASS_4 =	FC_SOF_I4,
+	FC_CLASS_F =	FC_SOF_F,
+};
+
+/*
+ * Determine whether SOF code indicates the need for a BLS ACK.
+ */
+static inline int fc_sof_needs_ack(enum fc_sof sof)
+{
+	return (~sof) & 0x02;	/* true for class 1, 2, 4, 6, or F */
+}
+
+/*
+ * Given an fc_class, return the normal (non-initial) SOF value.
+ */
+static inline enum fc_sof fc_sof_normal(enum fc_class class)
+{
+	return class + FC_SOF_N3 - FC_SOF_I3;	/* diff is always 8 */
+}
+
+/*
+ * Compute class from SOF value.
+ */
+static inline enum fc_class fc_sof_class(enum fc_sof sof)
+{
+	return (sof & 0x7) | FC_SOF_F;
+}
+
+/*
+ * Determine whether SOF is for the initial frame of a sequence.
+ */
+static inline int fc_sof_is_init(enum fc_sof sof)
+{
+	return sof < 0x30;
+}
+
+#endif /* _FC_ENCAPS_H_ */
diff --git a/include/scsi/fc/fc_fc2.h b/include/scsi/fc/fc_fc2.h
new file mode 100644
index 000000000000..cff8a8c22f50
--- /dev/null
+++ b/include/scsi/fc/fc_fc2.h
@@ -0,0 +1,124 @@
+/*
+ * Copyright(c) 2007 Intel Corporation. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Maintained at www.Open-FCoE.org
+ */
+
+#ifndef _FC_FC2_H_
+#define _FC_FC2_H_
+
+/*
+ * Fibre Channel Exchanges and Sequences.
+ */
+#ifndef PACKED
+#define PACKED  __attribute__ ((__packed__))
+#endif /* PACKED */
+
+
+/*
+ * Sequence Status Block.
+ * This format is set by the FC-FS standard and is sent over the wire.
+ * Note that the fields aren't all naturally aligned.
+ */
+struct fc_ssb {
+	__u8	ssb_seq_id;		/* sequence ID */
+	__u8	_ssb_resvd;
+	__be16	ssb_low_seq_cnt;	/* lowest SEQ_CNT */
+
+	__be16	ssb_high_seq_cnt;	/* highest SEQ_CNT */
+	__be16	ssb_s_stat;		/* sequence status flags */
+
+	__be16	ssb_err_seq_cnt;	/* error SEQ_CNT */
+	__u8	ssb_fh_cs_ctl;		/* frame header CS_CTL */
+	__be16	ssb_fh_ox_id;		/* frame header OX_ID */
+	__be16	ssb_rx_id;		/* responder's exchange ID */
+	__u8	_ssb_resvd2[2];
+} PACKED;
+
+/*
+ * The SSB should be 17 bytes.  Since it's layout is somewhat strange,
+ * we define the size here so that code can ASSERT that the size comes out
+ * correct.
+ */
+#define FC_SSB_SIZE         17          /* length of fc_ssb for assert */
+
+/*
+ * ssb_s_stat - flags from FC-FS-2 T11/1619-D Rev 0.90.
+ */
+#define SSB_ST_RESP         (1 << 15)   /* sequence responder */
+#define SSB_ST_ACTIVE       (1 << 14)   /* sequence is active */
+#define SSB_ST_ABNORMAL     (1 << 12)   /* abnormal ending condition */
+
+#define SSB_ST_REQ_MASK     (3 << 10)   /* ACK, abort sequence condition */
+#define SSB_ST_REQ_CONT     (0 << 10)
+#define SSB_ST_REQ_ABORT    (1 << 10)
+#define SSB_ST_REQ_STOP     (2 << 10)
+#define SSB_ST_REQ_RETRANS  (3 << 10)
+
+#define SSB_ST_ABTS         (1 << 9)    /* ABTS protocol completed */
+#define SSB_ST_RETRANS      (1 << 8)    /* retransmission completed */
+#define SSB_ST_TIMEOUT      (1 << 7)    /* sequence timed out by recipient */
+#define SSB_ST_P_RJT        (1 << 6)    /* P_RJT transmitted */
+
+#define SSB_ST_CLASS_BIT    4           /* class of service field LSB */
+#define SSB_ST_CLASS_MASK   3           /* class of service mask */
+#define SSB_ST_ACK          (1 << 3)    /* ACK (EOFt or EOFdt) transmitted */
+
+/*
+ * Exchange Status Block.
+ * This format is set by the FC-FS standard and is sent over the wire.
+ * Note that the fields aren't all naturally aligned.
+ */
+struct fc_esb {
+	__u8	esb_cs_ctl;		/* CS_CTL for frame header */
+	__be16	esb_ox_id;		/* originator exchange ID */
+	__be16	esb_rx_id;		/* responder exchange ID */
+	__be32	esb_orig_fid;		/* fabric ID of originator */
+	__be32	esb_resp_fid;		/* fabric ID of responder */
+	__be32	esb_e_stat;		/* status */
+	__u8	_esb_resvd[4];
+	__u8	esb_service_params[112]; /* TBD */
+	__u8	esb_seq_status[8];	/* sequence statuses, 8 bytes each */
+} __attribute__((packed));;
+
+
+/*
+ * Define expected size for ASSERTs.
+ * See comments on FC_SSB_SIZE.
+ */
+#define FC_ESB_SIZE         (1 + 5*4 + 112 + 8)     /* expected size */
+
+/*
+ * esb_e_stat - flags from FC-FS-2 T11/1619-D Rev 0.90.
+ */
+#define ESB_ST_RESP         (1 << 31)   /* responder to exchange */
+#define ESB_ST_SEQ_INIT     (1 << 30)   /* port holds sequence initiaive */
+#define ESB_ST_COMPLETE     (1 << 29)   /* exchange is complete */
+#define ESB_ST_ABNORMAL     (1 << 28)   /* abnormal ending condition */
+#define ESB_ST_REC_QUAL     (1 << 26)   /* recovery qualifier active */
+
+#define ESB_ST_ERRP_BIT     24          /* LSB for error policy */
+#define ESB_ST_ERRP_MASK    (3 << 24)   /* mask for error policy */
+#define ESB_ST_ERRP_MULT    (0 << 24)   /* abort, discard multiple sequences */
+#define ESB_ST_ERRP_SING    (1 << 24)   /* abort, discard single sequence */
+#define ESB_ST_ERRP_INF     (2 << 24)   /* process with infinite buffers */
+#define ESB_ST_ERRP_IMM     (3 << 24)   /* discard mult. with immed. retran. */
+
+#define ESB_ST_OX_ID_INVL   (1 << 23)   /* originator XID invalid */
+#define ESB_ST_RX_ID_INVL   (1 << 22)   /* responder XID invalid */
+#define ESB_ST_PRI_INUSE    (1 << 21)   /* priority / preemption in use */
+
+#endif /* _FC_FC2_H_ */
diff --git a/include/scsi/fc/fc_fcoe.h b/include/scsi/fc/fc_fcoe.h
new file mode 100644
index 000000000000..57aaa8f0d613
--- /dev/null
+++ b/include/scsi/fc/fc_fcoe.h
@@ -0,0 +1,114 @@
+/*
+ * Copyright(c) 2007 Intel Corporation. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Maintained at www.Open-FCoE.org
+ */
+
+#ifndef _FC_FCOE_H_
+#define	_FC_FCOE_H_
+
+/*
+ * FCoE - Fibre Channel over Ethernet.
+ */
+
+/*
+ * The FCoE ethertype eventually goes in net/if_ether.h.
+ */
+#ifndef ETH_P_FCOE
+#define	ETH_P_FCOE	0x8906		/* FCOE ether type */
+#endif
+
+#ifndef ETH_P_8021Q
+#define	ETH_P_8021Q	0x8100
+#endif
+
+/*
+ * FC_FCOE_OUI hasn't been standardized yet.   XXX TBD.
+ */
+#ifndef FC_FCOE_OUI
+#define	FC_FCOE_OUI	0x0efc00	/* upper 24 bits of FCOE dest MAC TBD */
+#endif
+
+/*
+ * The destination MAC address for the fabric login may get a different OUI.
+ * This isn't standardized yet.
+ */
+#ifndef FC_FCOE_FLOGI_MAC
+/* gateway MAC - TBD */
+#define	FC_FCOE_FLOGI_MAC { 0x0e, 0xfc, 0x00, 0xff, 0xff, 0xfe }
+#endif
+
+#define	FC_FCOE_VER	0			/* version */
+
+/*
+ * Ethernet Addresses based on FC S_ID and D_ID.
+ * Generated by FC_FCOE_OUI | S_ID/D_ID
+ */
+#define	FC_FCOE_ENCAPS_ID(n)	(((u64) FC_FCOE_OUI << 24) | (n))
+#define	FC_FCOE_DECAPS_ID(n)	((n) >> 24)
+
+/*
+ * FCoE frame header - 14 bytes
+ *
+ * This is the August 2007 version of the FCoE header as defined by T11.
+ * This follows the VLAN header, which includes the ethertype.
+ */
+struct fcoe_hdr {
+	__u8		fcoe_ver;	/* version field - upper 4 bits */
+	__u8		fcoe_resvd[12];	/* reserved - send zero and ignore */
+	__u8		fcoe_sof;	/* start of frame per RFC 3643 */
+};
+
+#define FC_FCOE_DECAPS_VER(hp)	    ((hp)->fcoe_ver >> 4)
+#define FC_FCOE_ENCAPS_VER(hp, ver) ((hp)->fcoe_ver = (ver) << 4)
+
+/*
+ * FCoE CRC & EOF - 8 bytes.
+ */
+struct fcoe_crc_eof {
+	__le32		fcoe_crc32;	/* CRC for FC packet */
+	__u8		fcoe_eof;	/* EOF from RFC 3643 */
+	__u8		fcoe_resvd[3];	/* reserved - send zero and ignore */
+} __attribute__((packed));
+
+/*
+ * Minimum FCoE + FC header length
+ * 14 bytes FCoE header + 24 byte FC header = 38 bytes
+ */
+#define FCOE_HEADER_LEN 38
+
+/*
+ * Minimum FCoE frame size
+ * 14 bytes FCoE header + 24 byte FC header + 8 byte FCoE trailer = 46 bytes
+ */
+#define FCOE_MIN_FRAME 46
+
+/*
+ * fc_fcoe_set_mac - Store OUI + DID into MAC address field.
+ * @mac: mac address to be set
+ * @did: fc dest id to use
+ */
+static inline void fc_fcoe_set_mac(u8 *mac, u8 *did)
+{
+	mac[0] = (u8) (FC_FCOE_OUI >> 16);
+	mac[1] = (u8) (FC_FCOE_OUI >> 8);
+	mac[2] = (u8) FC_FCOE_OUI;
+	mac[3] = did[0];
+	mac[4] = did[1];
+	mac[5] = did[2];
+}
+
+#endif /* _FC_FCOE_H_ */
diff --git a/include/scsi/fc/fc_fcp.h b/include/scsi/fc/fc_fcp.h
new file mode 100644
index 000000000000..5d38f1989f37
--- /dev/null
+++ b/include/scsi/fc/fc_fcp.h
@@ -0,0 +1,199 @@
+/*
+ * Copyright(c) 2007 Intel Corporation. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Maintained at www.Open-FCoE.org
+ */
+
+#ifndef _FC_FCP_H_
+#define	_FC_FCP_H_
+
+/*
+ * Fibre Channel Protocol for SCSI.
+ * From T10 FCP-3, T10 project 1560-D Rev 4, Sept. 13, 2005.
+ */
+
+/*
+ * fc/fs.h defines FC_TYPE_FCP.
+ */
+
+/*
+ * Service parameter page parameters (word 3 bits) for Process Login.
+ */
+#define	FCP_SPPF_TASK_RETRY_ID	0x0200	/* task retry ID requested */
+#define	FCP_SPPF_RETRY		0x0100	/* retry supported */
+#define	FCP_SPPF_CONF_COMPL	0x0080	/* confirmed completion allowed */
+#define	FCP_SPPF_OVLY_ALLOW	0x0040	/* data overlay allowed */
+#define	FCP_SPPF_INIT_FCN	0x0020	/* initiator function */
+#define	FCP_SPPF_TARG_FCN	0x0010	/* target function */
+#define	FCP_SPPF_RD_XRDY_DIS	0x0002	/* disable XFER_RDY for reads */
+#define	FCP_SPPF_WR_XRDY_DIS	0x0001	/* disable XFER_RDY for writes */
+
+/*
+ * FCP_CMND IU Payload.
+ */
+struct fcp_cmnd {
+	__u8		fc_lun[8];	/* logical unit number */
+	__u8		fc_cmdref;	/* commmand reference number */
+	__u8		fc_pri_ta;	/* priority and task attribute */
+	__u8		fc_tm_flags;	/* task management flags */
+	__u8		fc_flags;	/* additional len & flags */
+	__u8		fc_cdb[16];	/* base CDB */
+	__be32		fc_dl;		/* data length (must follow fc_cdb) */
+};
+
+#define	FCP_CMND_LEN	32	/* expected length of structure */
+
+struct fcp_cmnd32 {
+	__u8		fc_lun[8];	/* logical unit number */
+	__u8		fc_cmdref;	/* commmand reference number */
+	__u8		fc_pri_ta;	/* priority and task attribute */
+	__u8		fc_tm_flags;	/* task management flags */
+	__u8		fc_flags;	/* additional len & flags */
+	__u8		fc_cdb[32];	/* base CDB */
+	__be32		fc_dl;		/* data length (must follow fc_cdb) */
+};
+
+#define	FCP_CMND32_LEN	    48	/* expected length of structure */
+#define	FCP_CMND32_ADD_LEN  (16 / 4)	/* Additional cdb length */
+
+/*
+ * fc_pri_ta.
+ */
+#define	FCP_PTA_SIMPLE	    0	/* simple task attribute */
+#define	FCP_PTA_HEADQ	    1	/* head of queue task attribute */
+#define	FCP_PTA_ORDERED     2	/* ordered task attribute */
+#define	FCP_PTA_ACA	    4	/* auto. contigent allegiance */
+#define	FCP_PRI_SHIFT	    3	/* priority field starts in bit 3 */
+#define	FCP_PRI_RESVD_MASK  0x80	/* reserved bits in priority field */
+
+/*
+ * fc_tm_flags - task management flags field.
+ */
+#define	FCP_TMF_CLR_ACA		0x40	/* clear ACA condition */
+#define	FCP_TMF_LUN_RESET	0x10	/* logical unit reset task management */
+#define	FCP_TMF_CLR_TASK_SET	0x04	/* clear task set */
+#define	FCP_TMF_ABT_TASK_SET	0x02	/* abort task set */
+
+/*
+ * fc_flags.
+ *  Bits 7:2 are the additional FCP_CDB length / 4.
+ */
+#define	FCP_CFL_LEN_MASK	0xfc	/* mask for additional length */
+#define	FCP_CFL_LEN_SHIFT	2	/* shift bits for additional length */
+#define	FCP_CFL_RDDATA		0x02	/* read data */
+#define	FCP_CFL_WRDATA		0x01	/* write data */
+
+/*
+ * FCP_TXRDY IU - transfer ready payload.
+ */
+struct fcp_txrdy {
+	__be32		ft_data_ro;	/* data relative offset */
+	__be32		ft_burst_len;	/* burst length */
+	__u8		_ft_resvd[4];	/* reserved */
+};
+
+#define	FCP_TXRDY_LEN	12	/* expected length of structure */
+
+/*
+ * FCP_RESP IU - response payload.
+ *
+ * The response payload comes in three parts: the flags/status, the
+ * sense/response lengths and the sense data/response info section.
+ *
+ * From FCP3r04, note 6 of section 9.5.13:
+ *
+ * Some early implementations presented the FCP_RSP IU without the FCP_RESID,
+ * FCP_SNS_LEN, and FCP_RSP_LEN fields if the FCP_RESID_UNDER, FCP_RESID_OVER,
+ * FCP_SNS_LEN_VALID, and FCP_RSP_LEN_VALID bits were all set to zero. This
+ * non-standard behavior should be tolerated.
+ *
+ * All response frames will always contain the fcp_resp template.  Some
+ * will also include the fcp_resp_len template.
+ */
+struct fcp_resp {
+	__u8		_fr_resvd[8];	/* reserved */
+	__be16		fr_retry_delay;	/* retry delay timer */
+	__u8		fr_flags;	/* flags */
+	__u8		fr_status;	/* SCSI status code */
+};
+
+#define	FCP_RESP_LEN	12	/* expected length of structure */
+
+struct fcp_resp_ext {
+	__be32		fr_resid;	/* Residual value */
+	__be32		fr_sns_len;	/* SCSI Sense length */
+	__be32		fr_rsp_len;	/* Response Info length */
+
+	/*
+	 * Optionally followed by RSP info and/or SNS info and/or
+	 * bidirectional read residual length, if any.
+	 */
+};
+
+#define FCP_RESP_EXT_LEN    12  /* expected length of the structure */
+
+struct fcp_resp_rsp_info {
+    __u8      _fr_resvd[3];       /* reserved */
+    __u8      rsp_code;           /* Response Info Code */
+    __u8      _fr_resvd2[4];      /* reserved */
+};
+
+struct fcp_resp_with_ext {
+	struct fcp_resp resp;
+	struct fcp_resp_ext ext;
+};
+
+#define	FCP_RESP_WITH_EXT   (FCP_RESP_LEN + FCP_RESP_EXT_LEN)
+
+/*
+ * fr_flags.
+ */
+#define	FCP_BIDI_RSP	    0x80	/* bidirectional read response */
+#define	FCP_BIDI_READ_UNDER 0x40	/* bidir. read less than requested */
+#define	FCP_BIDI_READ_OVER  0x20	/* DL insufficient for full transfer */
+#define	FCP_CONF_REQ	    0x10	/* confirmation requested */
+#define	FCP_RESID_UNDER     0x08	/* transfer shorter than expected */
+#define	FCP_RESID_OVER	    0x04	/* DL insufficient for full transfer */
+#define	FCP_SNS_LEN_VAL     0x02	/* SNS_LEN field is valid */
+#define	FCP_RSP_LEN_VAL     0x01	/* RSP_LEN field is valid */
+
+/*
+ * rsp_codes
+ */
+enum fcp_resp_rsp_codes {
+	FCP_TMF_CMPL = 0,
+	FCP_DATA_LEN_INVALID = 1,
+	FCP_CMND_FIELDS_INVALID = 2,
+	FCP_DATA_PARAM_MISMATCH = 3,
+	FCP_TMF_REJECTED = 4,
+	FCP_TMF_FAILED = 5,
+	FCP_TMF_INVALID_LUN = 9,
+};
+
+/*
+ * FCP SRR Link Service request - Sequence Retransmission Request.
+ */
+struct fcp_srr {
+	__u8		srr_op;		/* opcode ELS_SRR */
+	__u8		srr_resvd[3];	/* opcode / reserved - must be zero */
+	__be16		srr_ox_id;	/* OX_ID of failed command */
+	__be16		srr_rx_id;	/* RX_ID of failed command */
+	__be32		srr_rel_off;	/* relative offset */
+	__u8		srr_r_ctl;	/* r_ctl for the information unit */
+	__u8		srr_resvd2[3];	/* reserved */
+};
+
+#endif /* _FC_FCP_H_ */
diff --git a/include/scsi/fc/fc_fs.h b/include/scsi/fc/fc_fs.h
new file mode 100644
index 000000000000..3e4801d2bdbb
--- /dev/null
+++ b/include/scsi/fc/fc_fs.h
@@ -0,0 +1,340 @@
+/*
+ * Copyright(c) 2007 Intel Corporation. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Maintained at www.Open-FCoE.org
+ */
+
+#ifndef _FC_FS_H_
+#define _FC_FS_H_
+
+/*
+ * Fibre Channel Framing and Signalling definitions.
+ * From T11 FC-FS-2 Rev 0.90 - 9 August 2005.
+ */
+
+/*
+ * Frame header
+ */
+struct fc_frame_header {
+	__u8          fh_r_ctl;	/* routing control */
+	__u8          fh_d_id[3];	/* Destination ID */
+
+	__u8          fh_cs_ctl;	/* class of service control / pri */
+	__u8          fh_s_id[3];	/* Source ID */
+
+	__u8          fh_type;		/* see enum fc_fh_type below */
+	__u8          fh_f_ctl[3];	/* frame control */
+
+	__u8          fh_seq_id;	/* sequence ID */
+	__u8          fh_df_ctl;	/* data field control */
+	__be16        fh_seq_cnt;	/* sequence count */
+
+	__be16        fh_ox_id;		/* originator exchange ID */
+	__be16        fh_rx_id;		/* responder exchange ID */
+	__be32        fh_parm_offset;	/* parameter or relative offset */
+};
+
+#define FC_FRAME_HEADER_LEN 24	/* expected length of structure */
+
+#define FC_MAX_PAYLOAD  2112U		/* max payload length in bytes */
+#define FC_MIN_MAX_PAYLOAD  256U 	/* lower limit on max payload */
+
+#define FC_MAX_FRAME	(FC_MAX_PAYLOAD + FC_FRAME_HEADER_LEN)
+#define FC_MIN_MAX_FRAME (FC_MIN_MAX_PAYLOAD + FC_FRAME_HEADER_LEN)
+
+/*
+ * fh_r_ctl - Routing control definitions.
+ */
+    /*
+     * FC-4 device_data.
+     */
+enum fc_rctl {
+	FC_RCTL_DD_UNCAT = 0x00,	/* uncategorized information */
+	FC_RCTL_DD_SOL_DATA = 0x01,	/* solicited data */
+	FC_RCTL_DD_UNSOL_CTL = 0x02,	/* unsolicited control */
+	FC_RCTL_DD_SOL_CTL = 0x03,	/* solicited control or reply */
+	FC_RCTL_DD_UNSOL_DATA = 0x04,	/* unsolicited data */
+	FC_RCTL_DD_DATA_DESC = 0x05,	/* data descriptor */
+	FC_RCTL_DD_UNSOL_CMD = 0x06,	/* unsolicited command */
+	FC_RCTL_DD_CMD_STATUS = 0x07,	/* command status */
+
+#define FC_RCTL_ILS_REQ FC_RCTL_DD_UNSOL_CTL	/* ILS request */
+#define FC_RCTL_ILS_REP FC_RCTL_DD_SOL_CTL	/* ILS reply */
+
+	/*
+	 * Extended Link_Data
+	 */
+	FC_RCTL_ELS_REQ = 0x22,	/* extended link services request */
+	FC_RCTL_ELS_REP = 0x23,	/* extended link services reply */
+	FC_RCTL_ELS4_REQ = 0x32, /* FC-4 ELS request */
+	FC_RCTL_ELS4_REP = 0x33, /* FC-4 ELS reply */
+	/*
+	 * Optional Extended Headers
+	 */
+	FC_RCTL_VFTH = 0x50,	/* virtual fabric tagging header */
+	FC_RCTL_IFRH = 0x51,	/* inter-fabric routing header */
+	FC_RCTL_ENCH = 0x52,	/* encapsulation header */
+	/*
+	 * Basic Link Services fh_r_ctl values.
+	 */
+	FC_RCTL_BA_NOP = 0x80,	/* basic link service NOP */
+	FC_RCTL_BA_ABTS = 0x81,	/* basic link service abort */
+	FC_RCTL_BA_RMC = 0x82,	/* remove connection */
+	FC_RCTL_BA_ACC = 0x84,	/* basic accept */
+	FC_RCTL_BA_RJT = 0x85,	/* basic reject */
+	FC_RCTL_BA_PRMT = 0x86,	/* dedicated connection preempted */
+	/*
+	 * Link Control Information.
+	 */
+	FC_RCTL_ACK_1 = 0xc0,	/* acknowledge_1 */
+	FC_RCTL_ACK_0 = 0xc1,	/* acknowledge_0 */
+	FC_RCTL_P_RJT = 0xc2,	/* port reject */
+	FC_RCTL_F_RJT = 0xc3,	/* fabric reject */
+	FC_RCTL_P_BSY = 0xc4,	/* port busy */
+	FC_RCTL_F_BSY = 0xc5,	/* fabric busy to data frame */
+	FC_RCTL_F_BSYL = 0xc6,	/* fabric busy to link control frame */
+	FC_RCTL_LCR = 0xc7,	/* link credit reset */
+	FC_RCTL_END = 0xc9,	/* end */
+};
+				    /* incomplete list of definitions */
+
+/*
+ * R_CTL names initializer.
+ * Please keep this matching the above definitions.
+ */
+#define FC_RCTL_NAMES_INIT { \
+	[FC_RCTL_DD_UNCAT] =		"uncat",			\
+	[FC_RCTL_DD_SOL_DATA] =		"sol data",			\
+	[FC_RCTL_DD_UNSOL_CTL] =	"unsol ctl",			\
+	[FC_RCTL_DD_SOL_CTL] =		"sol ctl/reply",		\
+	[FC_RCTL_DD_UNSOL_DATA] =	"unsol data",			\
+	[FC_RCTL_DD_DATA_DESC] =	"data desc",			\
+	[FC_RCTL_DD_UNSOL_CMD] =	"unsol cmd",			\
+	[FC_RCTL_DD_CMD_STATUS] =	"cmd status",			\
+	[FC_RCTL_ELS_REQ] =		"ELS req",			\
+	[FC_RCTL_ELS_REP] =		"ELS rep",			\
+	[FC_RCTL_ELS4_REQ] =		"FC-4 ELS req",			\
+	[FC_RCTL_ELS4_REP] =		"FC-4 ELS rep",			\
+	[FC_RCTL_BA_NOP] =		"BLS NOP",			\
+	[FC_RCTL_BA_ABTS] =		"BLS abort",			\
+	[FC_RCTL_BA_RMC] =		"BLS remove connection",	\
+	[FC_RCTL_BA_ACC] =		"BLS accept",			\
+	[FC_RCTL_BA_RJT] =		"BLS reject",			\
+	[FC_RCTL_BA_PRMT] =		"BLS dedicated connection preempted", \
+	[FC_RCTL_ACK_1] =		"LC ACK_1",			\
+	[FC_RCTL_ACK_0] =		"LC ACK_0",			\
+	[FC_RCTL_P_RJT] =		"LC port reject",		\
+	[FC_RCTL_F_RJT] =		"LC fabric reject",		\
+	[FC_RCTL_P_BSY] =		"LC port busy",			\
+	[FC_RCTL_F_BSY] =		"LC fabric busy to data frame",	\
+	[FC_RCTL_F_BSYL] =		"LC fabric busy to link control frame",\
+	[FC_RCTL_LCR] =			"LC link credit reset",		\
+	[FC_RCTL_END] =			"LC end",			\
+}
+
+/*
+ * Well-known fabric addresses.
+ */
+enum fc_well_known_fid {
+	FC_FID_BCAST =		0xffffff,	/* broadcast */
+	FC_FID_FLOGI =		0xfffffe,	/* fabric login */
+	FC_FID_FCTRL =		0xfffffd,	/* fabric controller */
+	FC_FID_DIR_SERV =	0xfffffc,	/* directory server */
+	FC_FID_TIME_SERV =	0xfffffb,	/* time server */
+	FC_FID_MGMT_SERV =	0xfffffa,	/* management server */
+	FC_FID_QOS =		0xfffff9,	/* QoS Facilitator */
+	FC_FID_ALIASES =	0xfffff8,	/* alias server (FC-PH2) */
+	FC_FID_SEC_KEY =	0xfffff7,	/* Security key dist. server */
+	FC_FID_CLOCK =		0xfffff6,	/* clock synch server */
+	FC_FID_MCAST_SERV =	0xfffff5,	/* multicast server */
+};
+
+#define	FC_FID_WELL_KNOWN_MAX	0xffffff /* highest well-known fabric ID */
+#define	FC_FID_WELL_KNOWN_BASE	0xfffff5 /* start of well-known fabric ID */
+
+/*
+ * Other well-known addresses, outside the above contiguous range.
+ */
+#define	FC_FID_DOM_MGR		0xfffc00	/* domain manager base */
+
+/*
+ * Fabric ID bytes.
+ */
+#define	FC_FID_DOMAIN		0
+#define	FC_FID_PORT		1
+#define	FC_FID_LINK		2
+
+/*
+ * fh_type codes
+ */
+enum fc_fh_type {
+	FC_TYPE_BLS =	0x00,	/* basic link service */
+	FC_TYPE_ELS =	0x01,	/* extended link service */
+	FC_TYPE_IP =	0x05,	/* IP over FC, RFC 4338 */
+	FC_TYPE_FCP =	0x08,	/* SCSI FCP */
+	FC_TYPE_CT =	0x20,	/* Fibre Channel Services (FC-CT) */
+	FC_TYPE_ILS =	0x22,	/* internal link service */
+};
+
+/*
+ * FC_TYPE names initializer.
+ * Please keep this matching the above definitions.
+ */
+#define FC_TYPE_NAMES_INIT {				\
+	[FC_TYPE_BLS] =		"BLS",			\
+	[FC_TYPE_ELS] =		"ELS",			\
+	[FC_TYPE_IP] =		"IP",			\
+	[FC_TYPE_FCP] =		"FCP",			\
+	[FC_TYPE_CT] =		"CT",			\
+	[FC_TYPE_ILS] =		"ILS",			\
+}
+
+/*
+ * Exchange IDs.
+ */
+#define FC_XID_UNKNOWN  0xffff	/* unknown exchange ID */
+#define FC_XID_MIN	0x0	/* supported min exchange ID */
+#define FC_XID_MAX	0xfffe	/* supported max exchange ID */
+
+/*
+ * fh_f_ctl - Frame control flags.
+ */
+#define	FC_FC_EX_CTX	(1 << 23)	/* sent by responder to exchange */
+#define	FC_FC_SEQ_CTX	(1 << 22)	/* sent by responder to sequence */
+#define	FC_FC_FIRST_SEQ (1 << 21)	/* first sequence of this exchange */
+#define	FC_FC_LAST_SEQ	(1 << 20)	/* last sequence of this exchange */
+#define	FC_FC_END_SEQ	(1 << 19)	/* last frame of sequence */
+#define	FC_FC_END_CONN	(1 << 18)	/* end of class 1 connection pending */
+#define	FC_FC_RES_B17	(1 << 17)	/* reserved */
+#define	FC_FC_SEQ_INIT	(1 << 16)	/* transfer of sequence initiative */
+#define	FC_FC_X_ID_REASS (1 << 15)	/* exchange ID has been changed */
+#define	FC_FC_X_ID_INVAL (1 << 14)	/* exchange ID invalidated */
+
+#define	FC_FC_ACK_1	(1 << 12)	/* 13:12 = 1: ACK_1 expected */
+#define	FC_FC_ACK_N	(2 << 12)	/* 13:12 = 2: ACK_N expected */
+#define	FC_FC_ACK_0	(3 << 12)	/* 13:12 = 3: ACK_0 expected */
+
+#define	FC_FC_RES_B11	(1 << 11)	/* reserved */
+#define	FC_FC_RES_B10	(1 << 10)	/* reserved */
+#define	FC_FC_RETX_SEQ	(1 << 9)	/* retransmitted sequence */
+#define	FC_FC_UNI_TX	(1 << 8)	/* unidirectional transmit (class 1) */
+#define	FC_FC_CONT_SEQ(i) ((i) << 6)
+#define	FC_FC_ABT_SEQ(i) ((i) << 4)
+#define	FC_FC_REL_OFF	(1 << 3)	/* parameter is relative offset */
+#define	FC_FC_RES2	(1 << 2)	/* reserved */
+#define	FC_FC_FILL(i)	((i) & 3)	/* 1:0: bytes of trailing fill */
+
+/*
+ * BA_ACC payload.
+ */
+struct fc_ba_acc {
+	__u8		ba_seq_id_val;	/* SEQ_ID validity */
+#define FC_BA_SEQ_ID_VAL 0x80
+	__u8		ba_seq_id;	/* SEQ_ID of seq last deliverable */
+	__u8		ba_resvd[2];	/* reserved */
+	__be16		ba_ox_id;	/* OX_ID for aborted seq or exch */
+	__be16		ba_rx_id;	/* RX_ID for aborted seq or exch */
+	__be16		ba_low_seq_cnt;	/* low SEQ_CNT of aborted seq */
+	__be16		ba_high_seq_cnt; /* high SEQ_CNT of aborted seq */
+};
+
+/*
+ * BA_RJT: Basic Reject payload.
+ */
+struct fc_ba_rjt {
+	__u8		br_resvd;	/* reserved */
+	__u8		br_reason;	/* reason code */
+	__u8		br_explan;	/* reason explanation */
+	__u8		br_vendor;	/* vendor unique code */
+};
+
+/*
+ * BA_RJT reason codes.
+ * From FS-2.
+ */
+enum fc_ba_rjt_reason {
+	FC_BA_RJT_NONE =	0,	/* in software this means no reject */
+	FC_BA_RJT_INVL_CMD =	0x01,	/* invalid command code */
+	FC_BA_RJT_LOG_ERR =	0x03,	/* logical error */
+	FC_BA_RJT_LOG_BUSY =	0x05,	/* logical busy */
+	FC_BA_RJT_PROTO_ERR =	0x07,	/* protocol error */
+	FC_BA_RJT_UNABLE =	0x09,	/* unable to perform request */
+	FC_BA_RJT_VENDOR =	0xff,	/* vendor-specific (see br_vendor) */
+};
+
+/*
+ * BA_RJT reason code explanations.
+ */
+enum fc_ba_rjt_explan {
+	FC_BA_RJT_EXP_NONE =	0x00,	/* no additional expanation */
+	FC_BA_RJT_INV_XID =	0x03,	/* invalid OX_ID-RX_ID combination */
+	FC_BA_RJT_ABT =		0x05,	/* sequence aborted, no seq info */
+};
+
+/*
+ * P_RJT or F_RJT: Port Reject or Fabric Reject parameter field.
+ */
+struct fc_pf_rjt {
+	__u8		rj_action;	/* reserved */
+	__u8		rj_reason;	/* reason code */
+	__u8		rj_resvd;	/* reserved */
+	__u8		rj_vendor;	/* vendor unique code */
+};
+
+/*
+ * P_RJT and F_RJT reject reason codes.
+ */
+enum fc_pf_rjt_reason {
+	FC_RJT_NONE =		0,	/* non-reject (reserved by standard) */
+	FC_RJT_INVL_DID =	0x01,	/* invalid destination ID */
+	FC_RJT_INVL_SID =	0x02,	/* invalid source ID */
+	FC_RJT_P_UNAV_T =	0x03,	/* port unavailable, temporary */
+	FC_RJT_P_UNAV =		0x04,	/* port unavailable, permanent */
+	FC_RJT_CLS_UNSUP =	0x05,	/* class not supported */
+	FC_RJT_DEL_USAGE =	0x06,	/* delimiter usage error */
+	FC_RJT_TYPE_UNSUP =	0x07,	/* type not supported */
+	FC_RJT_LINK_CTL =	0x08,	/* invalid link control */
+	FC_RJT_R_CTL =		0x09,	/* invalid R_CTL field */
+	FC_RJT_F_CTL =		0x0a,	/* invalid F_CTL field */
+	FC_RJT_OX_ID =		0x0b,	/* invalid originator exchange ID */
+	FC_RJT_RX_ID =		0x0c,	/* invalid responder exchange ID */
+	FC_RJT_SEQ_ID =		0x0d,	/* invalid sequence ID */
+	FC_RJT_DF_CTL =		0x0e,	/* invalid DF_CTL field */
+	FC_RJT_SEQ_CNT =	0x0f,	/* invalid SEQ_CNT field */
+	FC_RJT_PARAM =		0x10,	/* invalid parameter field */
+	FC_RJT_EXCH_ERR =	0x11,	/* exchange error */
+	FC_RJT_PROTO =		0x12,	/* protocol error */
+	FC_RJT_LEN =		0x13,	/* incorrect length */
+	FC_RJT_UNEXP_ACK =	0x14,	/* unexpected ACK */
+	FC_RJT_FAB_CLASS =	0x15,	/* class unsupported by fabric entity */
+	FC_RJT_LOGI_REQ =	0x16,	/* login required */
+	FC_RJT_SEQ_XS =		0x17,	/* excessive sequences attempted */
+	FC_RJT_EXCH_EST =	0x18,	/* unable to establish exchange */
+	FC_RJT_FAB_UNAV =	0x1a,	/* fabric unavailable */
+	FC_RJT_VC_ID =		0x1b,	/* invalid VC_ID (class 4) */
+	FC_RJT_CS_CTL =		0x1c,	/* invalid CS_CTL field */
+	FC_RJT_INSUF_RES =	0x1d,	/* insuff. resources for VC (Class 4) */
+	FC_RJT_INVL_CLS =	0x1f,	/* invalid class of service */
+	FC_RJT_PREEMT_RJT =	0x20,	/* preemption request rejected */
+	FC_RJT_PREEMT_DIS =	0x21,	/* preemption not enabled */
+	FC_RJT_MCAST_ERR =	0x22,	/* multicast error */
+	FC_RJT_MCAST_ET =	0x23,	/* multicast error terminate */
+	FC_RJT_PRLI_REQ =	0x24,	/* process login required */
+	FC_RJT_INVL_ATT =	0x25,	/* invalid attachment */
+	FC_RJT_VENDOR =		0xff,	/* vendor specific reject */
+};
+
+#endif /* _FC_FS_H_ */
diff --git a/include/scsi/fc/fc_gs.h b/include/scsi/fc/fc_gs.h
new file mode 100644
index 000000000000..ffab0272c65a
--- /dev/null
+++ b/include/scsi/fc/fc_gs.h
@@ -0,0 +1,93 @@
+/*
+ * Copyright(c) 2007 Intel Corporation. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Maintained at www.Open-FCoE.org
+ */
+
+#ifndef _FC_GS_H_
+#define	_FC_GS_H_
+
+/*
+ * Fibre Channel Services - Common Transport.
+ * From T11.org FC-GS-2 Rev 5.3 November 1998.
+ */
+
+struct fc_ct_hdr {
+	__u8		ct_rev;		/* revision */
+	__u8		ct_in_id[3];	/* N_Port ID of original requestor */
+	__u8		ct_fs_type;	/* type of fibre channel service */
+	__u8		ct_fs_subtype;	/* subtype */
+	__u8		ct_options;
+	__u8		_ct_resvd1;
+	__be16		ct_cmd;		/* command / response code */
+	__be16		ct_mr_size;	/* maximum / residual size */
+	__u8		_ct_resvd2;
+	__u8		ct_reason;	/* reject reason */
+	__u8		ct_explan;	/* reason code explanation */
+	__u8		ct_vendor;	/* vendor unique data */
+};
+
+#define	FC_CT_HDR_LEN	16	/* expected sizeof (struct fc_ct_hdr) */
+
+enum fc_ct_rev {
+	FC_CT_REV = 1		/* common transport revision */
+};
+
+/*
+ * ct_fs_type values.
+ */
+enum fc_ct_fs_type {
+	FC_FST_ALIAS =	0xf8,	/* alias service */
+	FC_FST_MGMT =	0xfa,	/* management service */
+	FC_FST_TIME =	0xfb,	/* time service */
+	FC_FST_DIR =	0xfc,	/* directory service */
+};
+
+/*
+ * ct_cmd: Command / response codes
+ */
+enum fc_ct_cmd {
+	FC_FS_RJT =	0x8001,	/* reject */
+	FC_FS_ACC =	0x8002,	/* accept */
+};
+
+/*
+ * FS_RJT reason codes.
+ */
+enum fc_ct_reason {
+	FC_FS_RJT_CMD =		0x01,	/* invalid command code */
+	FC_FS_RJT_VER =		0x02,	/* invalid version level */
+	FC_FS_RJT_LOG =		0x03,	/* logical error */
+	FC_FS_RJT_IUSIZ =	0x04,	/* invalid IU size */
+	FC_FS_RJT_BSY =		0x05,	/* logical busy */
+	FC_FS_RJT_PROTO =	0x07,	/* protocol error */
+	FC_FS_RJT_UNABL =	0x09,	/* unable to perform command request */
+	FC_FS_RJT_UNSUP =	0x0b,	/* command not supported */
+};
+
+/*
+ * FS_RJT reason code explanations.
+ */
+enum fc_ct_explan {
+	FC_FS_EXP_NONE =	0x00,	/* no additional explanation */
+	FC_FS_EXP_PID =		0x01,	/* port ID not registered */
+	FC_FS_EXP_PNAM =	0x02,	/* port name not registered */
+	FC_FS_EXP_NNAM =	0x03,	/* node name not registered */
+	FC_FS_EXP_COS =		0x04,	/* class of service not registered */
+	/* definitions not complete */
+};
+
+#endif /* _FC_GS_H_ */
diff --git a/include/scsi/fc/fc_ns.h b/include/scsi/fc/fc_ns.h
new file mode 100644
index 000000000000..790d7b97d4bc
--- /dev/null
+++ b/include/scsi/fc/fc_ns.h
@@ -0,0 +1,159 @@
+/*
+ * Copyright(c) 2007 Intel Corporation. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Maintained at www.Open-FCoE.org
+ */
+
+#ifndef _FC_NS_H_
+#define	_FC_NS_H_
+
+/*
+ * Fibre Channel Services - Name Service (dNS)
+ * From T11.org FC-GS-2 Rev 5.3 November 1998.
+ */
+
+/*
+ * Common-transport sub-type for Name Server.
+ */
+#define	FC_NS_SUBTYPE	    2	/* fs_ct_hdr.ct_fs_subtype */
+
+/*
+ * Name server Requests.
+ * Note:  this is an incomplete list, some unused requests are omitted.
+ */
+enum fc_ns_req {
+	FC_NS_GA_NXT =	0x0100,		/* get all next */
+	FC_NS_GI_A =	0x0101,		/* get identifiers - scope */
+	FC_NS_GPN_ID =	0x0112,		/* get port name by ID */
+	FC_NS_GNN_ID =	0x0113,		/* get node name by ID */
+	FC_NS_GID_PN =	0x0121,		/* get ID for port name */
+	FC_NS_GID_NN =	0x0131,		/* get IDs for node name */
+	FC_NS_GID_FT =	0x0171,		/* get IDs by FC4 type */
+	FC_NS_GPN_FT =	0x0172,		/* get port names by FC4 type */
+	FC_NS_GID_PT =	0x01a1,		/* get IDs by port type */
+	FC_NS_RFT_ID =	0x0217,		/* reg FC4 type for ID */
+	FC_NS_RPN_ID =	0x0212,		/* reg port name for ID */
+	FC_NS_RNN_ID =	0x0213,		/* reg node name for ID */
+};
+
+/*
+ * Port type values.
+ */
+enum fc_ns_pt {
+	FC_NS_UNID_PORT = 0x00,	/* unidentified */
+	FC_NS_N_PORT =	0x01,	/* N port */
+	FC_NS_NL_PORT =	0x02,	/* NL port */
+	FC_NS_FNL_PORT = 0x03,	/* F/NL port */
+	FC_NS_NX_PORT =	0x7f,	/* Nx port */
+	FC_NS_F_PORT =	0x81,	/* F port */
+	FC_NS_FL_PORT =	0x82,	/* FL port */
+	FC_NS_E_PORT =	0x84,	/* E port */
+	FC_NS_B_PORT =	0x85,	/* B port */
+};
+
+/*
+ * Port type object.
+ */
+struct fc_ns_pt_obj {
+	__u8		pt_type;
+};
+
+/*
+ * Port ID object
+ */
+struct fc_ns_fid {
+	__u8		fp_flags;	/* flags for responses only */
+	__u8		fp_fid[3];
+};
+
+/*
+ * fp_flags in port ID object, for responses only.
+ */
+#define	FC_NS_FID_LAST	0x80		/* last object */
+
+/*
+ * FC4-types object.
+ */
+#define	FC_NS_TYPES	256	/* number of possible FC-4 types */
+#define	FC_NS_BPW	32	/* bits per word in bitmap */
+
+struct fc_ns_fts {
+	__be32	ff_type_map[FC_NS_TYPES / FC_NS_BPW]; /* bitmap of FC-4 types */
+};
+
+/*
+ * GID_PT request.
+ */
+struct fc_ns_gid_pt {
+	__u8		fn_pt_type;
+	__u8		fn_domain_id_scope;
+	__u8		fn_area_id_scope;
+	__u8		fn_resvd;
+};
+
+/*
+ * GID_FT or GPN_FT request.
+ */
+struct fc_ns_gid_ft {
+	__u8		fn_resvd;
+	__u8		fn_domain_id_scope;
+	__u8		fn_area_id_scope;
+	__u8		fn_fc4_type;
+};
+
+/*
+ * GPN_FT response.
+ */
+struct fc_gpn_ft_resp {
+	__u8		fp_flags;	/* see fp_flags definitions above */
+	__u8		fp_fid[3];	/* port ID */
+	__be32		fp_resvd;
+	__be64		fp_wwpn;	/* port name */
+};
+
+/*
+ * GID_PN request
+ */
+struct fc_ns_gid_pn {
+	__be64     fn_wwpn;    /* port name */
+};
+
+/*
+ * GID_PN response
+ */
+struct fc_gid_pn_resp {
+	__u8      fp_resvd;
+	__u8      fp_fid[3];     /* port ID */
+};
+
+/*
+ * RFT_ID request - register FC-4 types for ID.
+ */
+struct fc_ns_rft_id {
+	struct fc_ns_fid fr_fid;	/* port ID object */
+	struct fc_ns_fts fr_fts;	/* FC-4 types object */
+};
+
+/*
+ * RPN_ID request - register port name for ID.
+ * RNN_ID request - register node name for ID.
+ */
+struct fc_ns_rn_id {
+	struct fc_ns_fid fr_fid;	/* port ID object */
+	__be64		fr_wwn;		/* node name or port name */
+} __attribute__((__packed__));
+
+#endif /* _FC_NS_H_ */
-- 
cgit v1.2.3


From 42e9a92fe6a9095bd68a379aaec7ad2be0337f7a Mon Sep 17 00:00:00 2001
From: Robert Love <robert.w.love@intel.com>
Date: Tue, 9 Dec 2008 15:10:17 -0800
Subject: [SCSI] libfc: A modular Fibre Channel library

libFC is composed of 4 blocks supported by an exchange manager
and a framing library. The upper 4 layers are fc_lport, fc_disc,
fc_rport and fc_fcp. A LLD that uses libfc could choose to
either use libfc's block, or using the transport template
defined in libfc.h, override one or more blocks with its own
implementation.

The EM (Exchange Manager) manages exhcanges/sequences for all
commands- ELS, CT and FCP.

The framing library frames ELS and CT commands.

The fc_lport block manages the library's representation of the
host's FC enabled ports.

The fc_disc block manages discovery of targets as well as
handling changes that occur in the FC fabric (via. RSCN events).

The fc_rport block manages the library's representation of other
entities in the FC fabric. Currently the library uses this block
for targets, its peer when in point-to-point mode and the
directory server, but can be extended for other entities if
needed.

The fc_fcp block interacts with the scsi-ml and handles all
I/O.

Signed-off-by: Robert Love <robert.w.love@intel.com>
[jejb: added include of delay.h to fix ppc64 compile prob spotted by sfr]
Signed-off-by: James Bottomley <James.Bottomley@HansenPartnership.com>
---
 drivers/scsi/Kconfig          |    6 +
 drivers/scsi/Makefile         |    1 +
 drivers/scsi/libfc/Makefile   |   12 +
 drivers/scsi/libfc/fc_disc.c  |  845 ++++++++++++++++
 drivers/scsi/libfc/fc_elsct.c |   71 ++
 drivers/scsi/libfc/fc_exch.c  | 1970 +++++++++++++++++++++++++++++++++++++
 drivers/scsi/libfc/fc_fcp.c   | 2131 +++++++++++++++++++++++++++++++++++++++++
 drivers/scsi/libfc/fc_frame.c |   89 ++
 drivers/scsi/libfc/fc_lport.c | 1604 +++++++++++++++++++++++++++++++
 drivers/scsi/libfc/fc_rport.c | 1291 +++++++++++++++++++++++++
 include/scsi/fc_encode.h      |  309 ++++++
 include/scsi/fc_frame.h       |  242 +++++
 include/scsi/libfc.h          |  938 ++++++++++++++++++
 13 files changed, 9509 insertions(+)
 create mode 100644 drivers/scsi/libfc/Makefile
 create mode 100644 drivers/scsi/libfc/fc_disc.c
 create mode 100644 drivers/scsi/libfc/fc_elsct.c
 create mode 100644 drivers/scsi/libfc/fc_exch.c
 create mode 100644 drivers/scsi/libfc/fc_fcp.c
 create mode 100644 drivers/scsi/libfc/fc_frame.c
 create mode 100644 drivers/scsi/libfc/fc_lport.c
 create mode 100644 drivers/scsi/libfc/fc_rport.c
 create mode 100644 include/scsi/fc_encode.h
 create mode 100644 include/scsi/fc_frame.h
 create mode 100644 include/scsi/libfc.h

(limited to 'include')

diff --git a/drivers/scsi/Kconfig b/drivers/scsi/Kconfig
index 1badcec18f41..24d762aab7c5 100644
--- a/drivers/scsi/Kconfig
+++ b/drivers/scsi/Kconfig
@@ -603,6 +603,12 @@ config SCSI_FLASHPOINT
 	  substantial, so users of MultiMaster Host Adapters may not
 	  wish to include it.
 
+config LIBFC
+	tristate "LibFC module"
+	depends on SCSI && SCSI_FC_ATTRS
+	---help---
+	  Fibre Channel library module
+
 config SCSI_DMX3191D
 	tristate "DMX3191D SCSI support"
 	depends on PCI && SCSI
diff --git a/drivers/scsi/Makefile b/drivers/scsi/Makefile
index b89aedfa9ed7..87355f573d60 100644
--- a/drivers/scsi/Makefile
+++ b/drivers/scsi/Makefile
@@ -36,6 +36,7 @@ obj-$(CONFIG_SCSI_SAS_LIBSAS)	+= libsas/
 obj-$(CONFIG_SCSI_SRP_ATTRS)	+= scsi_transport_srp.o
 obj-$(CONFIG_SCSI_DH)		+= device_handler/
 
+obj-$(CONFIG_LIBFC)		+= libfc/
 obj-$(CONFIG_ISCSI_TCP) 	+= libiscsi.o	libiscsi_tcp.o iscsi_tcp.o
 obj-$(CONFIG_INFINIBAND_ISER) 	+= libiscsi.o
 obj-$(CONFIG_SCSI_A4000T)	+= 53c700.o	a4000t.o
diff --git a/drivers/scsi/libfc/Makefile b/drivers/scsi/libfc/Makefile
new file mode 100644
index 000000000000..55f982de3a9a
--- /dev/null
+++ b/drivers/scsi/libfc/Makefile
@@ -0,0 +1,12 @@
+# $Id: Makefile
+
+obj-$(CONFIG_LIBFC) += libfc.o
+
+libfc-objs := \
+	fc_disc.o \
+	fc_exch.o \
+	fc_elsct.o \
+	fc_frame.o \
+	fc_lport.o \
+	fc_rport.o \
+	fc_fcp.o
diff --git a/drivers/scsi/libfc/fc_disc.c b/drivers/scsi/libfc/fc_disc.c
new file mode 100644
index 000000000000..dd1564c9e04a
--- /dev/null
+++ b/drivers/scsi/libfc/fc_disc.c
@@ -0,0 +1,845 @@
+/*
+ * Copyright(c) 2007 - 2008 Intel Corporation. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Maintained at www.Open-FCoE.org
+ */
+
+/*
+ * Target Discovery
+ *
+ * This block discovers all FC-4 remote ports, including FCP initiators. It
+ * also handles RSCN events and re-discovery if necessary.
+ */
+
+/*
+ * DISC LOCKING
+ *
+ * The disc mutex is can be locked when acquiring rport locks, but may not
+ * be held when acquiring the lport lock. Refer to fc_lport.c for more
+ * details.
+ */
+
+#include <linux/timer.h>
+#include <linux/err.h>
+#include <asm/unaligned.h>
+
+#include <scsi/fc/fc_gs.h>
+
+#include <scsi/libfc.h>
+
+#define FC_DISC_RETRY_LIMIT	3	/* max retries */
+#define FC_DISC_RETRY_DELAY	500UL	/* (msecs) delay */
+
+#define	FC_DISC_DELAY		3
+
+static int fc_disc_debug;
+
+#define FC_DEBUG_DISC(fmt...)			\
+	do {					\
+		if (fc_disc_debug)		\
+			FC_DBG(fmt);		\
+	} while (0)
+
+static void fc_disc_gpn_ft_req(struct fc_disc *);
+static void fc_disc_gpn_ft_resp(struct fc_seq *, struct fc_frame *, void *);
+static int fc_disc_new_target(struct fc_disc *, struct fc_rport *,
+			      struct fc_rport_identifiers *);
+static void fc_disc_del_target(struct fc_disc *, struct fc_rport *);
+static void fc_disc_done(struct fc_disc *);
+static void fc_disc_timeout(struct work_struct *);
+static void fc_disc_single(struct fc_disc *, struct fc_disc_port *);
+static void fc_disc_restart(struct fc_disc *);
+
+/**
+ * fc_disc_lookup_rport - lookup a remote port by port_id
+ * @lport: Fibre Channel host port instance
+ * @port_id: remote port port_id to match
+ */
+struct fc_rport *fc_disc_lookup_rport(const struct fc_lport *lport,
+				      u32 port_id)
+{
+	const struct fc_disc *disc = &lport->disc;
+	struct fc_rport *rport, *found = NULL;
+	struct fc_rport_libfc_priv *rdata;
+	int disc_found = 0;
+
+	list_for_each_entry(rdata, &disc->rports, peers) {
+		rport = PRIV_TO_RPORT(rdata);
+		if (rport->port_id == port_id) {
+			disc_found = 1;
+			found = rport;
+			break;
+		}
+	}
+
+	if (!disc_found)
+		found = NULL;
+
+	return found;
+}
+
+/**
+ * fc_disc_stop_rports - delete all the remote ports associated with the lport
+ * @disc: The discovery job to stop rports on
+ *
+ * Locking Note: This function expects that the lport mutex is locked before
+ * calling it.
+ */
+void fc_disc_stop_rports(struct fc_disc *disc)
+{
+	struct fc_lport *lport;
+	struct fc_rport *rport;
+	struct fc_rport_libfc_priv *rdata, *next;
+
+	lport = disc->lport;
+
+	mutex_lock(&disc->disc_mutex);
+	list_for_each_entry_safe(rdata, next, &disc->rports, peers) {
+		rport = PRIV_TO_RPORT(rdata);
+		list_del(&rdata->peers);
+		lport->tt.rport_logoff(rport);
+	}
+
+	mutex_unlock(&disc->disc_mutex);
+}
+
+/**
+ * fc_disc_rport_callback - Event handler for rport events
+ * @lport: The lport which is receiving the event
+ * @rport: The rport which the event has occured on
+ * @event: The event that occured
+ *
+ * Locking Note: The rport lock should not be held when calling
+ *		 this function.
+ */
+static void fc_disc_rport_callback(struct fc_lport *lport,
+				   struct fc_rport *rport,
+				   enum fc_rport_event event)
+{
+	struct fc_rport_libfc_priv *rdata = rport->dd_data;
+	struct fc_disc *disc = &lport->disc;
+	int found = 0;
+
+	FC_DEBUG_DISC("Received a %d event for port (%6x)\n", event,
+		      rport->port_id);
+
+	if (event == RPORT_EV_CREATED) {
+		if (disc) {
+			found = 1;
+			mutex_lock(&disc->disc_mutex);
+			list_add_tail(&rdata->peers, &disc->rports);
+			mutex_unlock(&disc->disc_mutex);
+		}
+	}
+
+	if (!found)
+		FC_DEBUG_DISC("The rport (%6x) is not maintained "
+			      "by the discovery layer\n", rport->port_id);
+}
+
+/**
+ * fc_disc_recv_rscn_req - Handle Registered State Change Notification (RSCN)
+ * @sp: Current sequence of the RSCN exchange
+ * @fp: RSCN Frame
+ * @lport: Fibre Channel host port instance
+ *
+ * Locking Note: This function expects that the disc_mutex is locked
+ *		 before it is called.
+ */
+static void fc_disc_recv_rscn_req(struct fc_seq *sp, struct fc_frame *fp,
+				  struct fc_disc *disc)
+{
+	struct fc_lport *lport;
+	struct fc_rport *rport;
+	struct fc_rport_libfc_priv *rdata;
+	struct fc_els_rscn *rp;
+	struct fc_els_rscn_page *pp;
+	struct fc_seq_els_data rjt_data;
+	unsigned int len;
+	int redisc = 0;
+	enum fc_els_rscn_ev_qual ev_qual;
+	enum fc_els_rscn_addr_fmt fmt;
+	LIST_HEAD(disc_ports);
+	struct fc_disc_port *dp, *next;
+
+	lport = disc->lport;
+
+	FC_DEBUG_DISC("Received an RSCN event on port (%6x)\n",
+		      fc_host_port_id(lport->host));
+
+	/* make sure the frame contains an RSCN message */
+	rp = fc_frame_payload_get(fp, sizeof(*rp));
+	if (!rp)
+		goto reject;
+	/* make sure the page length is as expected (4 bytes) */
+	if (rp->rscn_page_len != sizeof(*pp))
+		goto reject;
+	/* get the RSCN payload length */
+	len = ntohs(rp->rscn_plen);
+	if (len < sizeof(*rp))
+		goto reject;
+	/* make sure the frame contains the expected payload */
+	rp = fc_frame_payload_get(fp, len);
+	if (!rp)
+		goto reject;
+	/* payload must be a multiple of the RSCN page size */
+	len -= sizeof(*rp);
+	if (len % sizeof(*pp))
+		goto reject;
+
+	for (pp = (void *)(rp + 1); len > 0; len -= sizeof(*pp), pp++) {
+		ev_qual = pp->rscn_page_flags >> ELS_RSCN_EV_QUAL_BIT;
+		ev_qual &= ELS_RSCN_EV_QUAL_MASK;
+		fmt = pp->rscn_page_flags >> ELS_RSCN_ADDR_FMT_BIT;
+		fmt &= ELS_RSCN_ADDR_FMT_MASK;
+		/*
+		 * if we get an address format other than port
+		 * (area, domain, fabric), then do a full discovery
+		 */
+		switch (fmt) {
+		case ELS_ADDR_FMT_PORT:
+			FC_DEBUG_DISC("Port address format for port (%6x)\n",
+				      ntoh24(pp->rscn_fid));
+			dp = kzalloc(sizeof(*dp), GFP_KERNEL);
+			if (!dp) {
+				redisc = 1;
+				break;
+			}
+			dp->lp = lport;
+			dp->ids.port_id = ntoh24(pp->rscn_fid);
+			dp->ids.port_name = -1;
+			dp->ids.node_name = -1;
+			dp->ids.roles = FC_RPORT_ROLE_UNKNOWN;
+			list_add_tail(&dp->peers, &disc_ports);
+			break;
+		case ELS_ADDR_FMT_AREA:
+		case ELS_ADDR_FMT_DOM:
+		case ELS_ADDR_FMT_FAB:
+		default:
+			FC_DEBUG_DISC("Address format is (%d)\n", fmt);
+			redisc = 1;
+			break;
+		}
+	}
+	lport->tt.seq_els_rsp_send(sp, ELS_LS_ACC, NULL);
+	if (redisc) {
+		FC_DEBUG_DISC("RSCN received: rediscovering\n");
+		fc_disc_restart(disc);
+	} else {
+		FC_DEBUG_DISC("RSCN received: not rediscovering. "
+			      "redisc %d state %d in_prog %d\n",
+			      redisc, lport->state, disc->pending);
+		list_for_each_entry_safe(dp, next, &disc_ports, peers) {
+			list_del(&dp->peers);
+			rport = lport->tt.rport_lookup(lport, dp->ids.port_id);
+			if (rport) {
+				rdata = RPORT_TO_PRIV(rport);
+				list_del(&rdata->peers);
+				lport->tt.rport_logoff(rport);
+			}
+			fc_disc_single(disc, dp);
+		}
+	}
+	fc_frame_free(fp);
+	return;
+reject:
+	FC_DEBUG_DISC("Received a bad RSCN frame\n");
+	rjt_data.fp = NULL;
+	rjt_data.reason = ELS_RJT_LOGIC;
+	rjt_data.explan = ELS_EXPL_NONE;
+	lport->tt.seq_els_rsp_send(sp, ELS_LS_RJT, &rjt_data);
+	fc_frame_free(fp);
+}
+
+/**
+ * fc_disc_recv_req - Handle incoming requests
+ * @sp: Current sequence of the request exchange
+ * @fp: The frame
+ * @lport: The FC local port
+ *
+ * Locking Note: This function is called from the EM and will lock
+ *		 the disc_mutex before calling the handler for the
+ *		 request.
+ */
+static void fc_disc_recv_req(struct fc_seq *sp, struct fc_frame *fp,
+			     struct fc_lport *lport)
+{
+	u8 op;
+	struct fc_disc *disc = &lport->disc;
+
+	op = fc_frame_payload_op(fp);
+	switch (op) {
+	case ELS_RSCN:
+		mutex_lock(&disc->disc_mutex);
+		fc_disc_recv_rscn_req(sp, fp, disc);
+		mutex_unlock(&disc->disc_mutex);
+		break;
+	default:
+		FC_DBG("Received an unsupported request. opcode (%x)\n", op);
+		break;
+	}
+}
+
+/**
+ * fc_disc_restart - Restart discovery
+ * @lport: FC discovery context
+ *
+ * Locking Note: This function expects that the disc mutex
+ *		 is already locked.
+ */
+static void fc_disc_restart(struct fc_disc *disc)
+{
+	struct fc_rport *rport;
+	struct fc_rport_libfc_priv *rdata, *next;
+	struct fc_lport *lport = disc->lport;
+
+	FC_DEBUG_DISC("Restarting discovery for port (%6x)\n",
+		      fc_host_port_id(lport->host));
+
+	list_for_each_entry_safe(rdata, next, &disc->rports, peers) {
+		rport = PRIV_TO_RPORT(rdata);
+		FC_DEBUG_DISC("list_del(%6x)\n", rport->port_id);
+		list_del(&rdata->peers);
+		lport->tt.rport_logoff(rport);
+	}
+
+	disc->requested = 1;
+	if (!disc->pending)
+		fc_disc_gpn_ft_req(disc);
+}
+
+/**
+ * fc_disc_start - Fibre Channel Target discovery
+ * @lport: FC local port
+ *
+ * Returns non-zero if discovery cannot be started.
+ */
+static void fc_disc_start(void (*disc_callback)(struct fc_lport *,
+						enum fc_disc_event),
+			  struct fc_lport *lport)
+{
+	struct fc_rport *rport;
+	struct fc_rport_identifiers ids;
+	struct fc_disc *disc = &lport->disc;
+
+	/*
+	 * At this point we may have a new disc job or an existing
+	 * one. Either way, let's lock when we make changes to it
+	 * and send the GPN_FT request.
+	 */
+	mutex_lock(&disc->disc_mutex);
+
+	disc->disc_callback = disc_callback;
+
+	/*
+	 * If not ready, or already running discovery, just set request flag.
+	 */
+	disc->requested = 1;
+
+	if (disc->pending) {
+		mutex_unlock(&disc->disc_mutex);
+		return;
+	}
+
+	/*
+	 * Handle point-to-point mode as a simple discovery
+	 * of the remote port. Yucky, yucky, yuck, yuck!
+	 */
+	rport = disc->lport->ptp_rp;
+	if (rport) {
+		ids.port_id = rport->port_id;
+		ids.port_name = rport->port_name;
+		ids.node_name = rport->node_name;
+		ids.roles = FC_RPORT_ROLE_UNKNOWN;
+		get_device(&rport->dev);
+
+		if (!fc_disc_new_target(disc, rport, &ids)) {
+			disc->event = DISC_EV_SUCCESS;
+			fc_disc_done(disc);
+		}
+		put_device(&rport->dev);
+	} else {
+		fc_disc_gpn_ft_req(disc);	/* get ports by FC-4 type */
+	}
+
+	mutex_unlock(&disc->disc_mutex);
+}
+
+static struct fc_rport_operations fc_disc_rport_ops = {
+	.event_callback = fc_disc_rport_callback,
+};
+
+/**
+ * fc_disc_new_target - Handle new target found by discovery
+ * @lport: FC local port
+ * @rport: The previous FC remote port (NULL if new remote port)
+ * @ids: Identifiers for the new FC remote port
+ *
+ * Locking Note: This function expects that the disc_mutex is locked
+ *		 before it is called.
+ */
+static int fc_disc_new_target(struct fc_disc *disc,
+			      struct fc_rport *rport,
+			      struct fc_rport_identifiers *ids)
+{
+	struct fc_lport *lport = disc->lport;
+	struct fc_rport_libfc_priv *rp;
+	int error = 0;
+
+	if (rport && ids->port_name) {
+		if (rport->port_name == -1) {
+			/*
+			 * Set WWN and fall through to notify of create.
+			 */
+			fc_rport_set_name(rport, ids->port_name,
+					  rport->node_name);
+		} else if (rport->port_name != ids->port_name) {
+			/*
+			 * This is a new port with the same FCID as
+			 * a previously-discovered port.  Presumably the old
+			 * port logged out and a new port logged in and was
+			 * assigned the same FCID.  This should be rare.
+			 * Delete the old one and fall thru to re-create.
+			 */
+			fc_disc_del_target(disc, rport);
+			rport = NULL;
+		}
+	}
+	if (((ids->port_name != -1) || (ids->port_id != -1)) &&
+	    ids->port_id != fc_host_port_id(lport->host) &&
+	    ids->port_name != lport->wwpn) {
+		if (!rport) {
+			rport = lport->tt.rport_lookup(lport, ids->port_id);
+			if (!rport) {
+				struct fc_disc_port dp;
+				dp.lp = lport;
+				dp.ids.port_id = ids->port_id;
+				dp.ids.port_name = ids->port_name;
+				dp.ids.node_name = ids->node_name;
+				dp.ids.roles = ids->roles;
+				rport = fc_rport_rogue_create(&dp);
+			}
+			if (!rport)
+				error = -ENOMEM;
+		}
+		if (rport) {
+			rp = rport->dd_data;
+			rp->ops = &fc_disc_rport_ops;
+			rp->rp_state = RPORT_ST_INIT;
+			lport->tt.rport_login(rport);
+		}
+	}
+	return error;
+}
+
+/**
+ * fc_disc_del_target - Delete a target
+ * @disc: FC discovery context
+ * @rport: The remote port to be removed
+ */
+static void fc_disc_del_target(struct fc_disc *disc, struct fc_rport *rport)
+{
+	struct fc_lport *lport = disc->lport;
+	struct fc_rport_libfc_priv *rdata = RPORT_TO_PRIV(rport);
+	list_del(&rdata->peers);
+	lport->tt.rport_logoff(rport);
+}
+
+/**
+ * fc_disc_done - Discovery has been completed
+ * @disc: FC discovery context
+ */
+static void fc_disc_done(struct fc_disc *disc)
+{
+	struct fc_lport *lport = disc->lport;
+
+	FC_DEBUG_DISC("Discovery complete for port (%6x)\n",
+		      fc_host_port_id(lport->host));
+
+	disc->disc_callback(lport, disc->event);
+	disc->event = DISC_EV_NONE;
+
+	if (disc->requested)
+		fc_disc_gpn_ft_req(disc);
+	else
+		disc->pending = 0;
+}
+
+/**
+ * fc_disc_error - Handle error on dNS request
+ * @disc: FC discovery context
+ * @fp: The frame pointer
+ */
+static void fc_disc_error(struct fc_disc *disc, struct fc_frame *fp)
+{
+	struct fc_lport *lport = disc->lport;
+	unsigned long delay = 0;
+	if (fc_disc_debug)
+		FC_DBG("Error %ld, retries %d/%d\n",
+		       PTR_ERR(fp), disc->retry_count,
+		       FC_DISC_RETRY_LIMIT);
+
+	if (!fp || PTR_ERR(fp) == -FC_EX_TIMEOUT) {
+		/*
+		 * Memory allocation failure, or the exchange timed out,
+		 * retry after delay.
+		 */
+		if (disc->retry_count < FC_DISC_RETRY_LIMIT) {
+			/* go ahead and retry */
+			if (!fp)
+				delay = msecs_to_jiffies(FC_DISC_RETRY_DELAY);
+			else {
+				delay = msecs_to_jiffies(lport->e_d_tov);
+
+				/* timeout faster first time */
+				if (!disc->retry_count)
+					delay /= 4;
+			}
+			disc->retry_count++;
+			schedule_delayed_work(&disc->disc_work, delay);
+		} else {
+			/* exceeded retries */
+			disc->event = DISC_EV_FAILED;
+			fc_disc_done(disc);
+		}
+	}
+}
+
+/**
+ * fc_disc_gpn_ft_req - Send Get Port Names by FC-4 type (GPN_FT) request
+ * @lport: FC discovery context
+ *
+ * Locking Note: This function expects that the disc_mutex is locked
+ *		 before it is called.
+ */
+static void fc_disc_gpn_ft_req(struct fc_disc *disc)
+{
+	struct fc_frame *fp;
+	struct fc_lport *lport = disc->lport;
+
+	WARN_ON(!fc_lport_test_ready(lport));
+
+	disc->pending = 1;
+	disc->requested = 0;
+
+	disc->buf_len = 0;
+	disc->seq_count = 0;
+	fp = fc_frame_alloc(lport,
+			    sizeof(struct fc_ct_hdr) +
+			    sizeof(struct fc_ns_gid_ft));
+	if (!fp)
+		goto err;
+
+	if (lport->tt.elsct_send(lport, NULL, fp,
+				 FC_NS_GPN_FT,
+				 fc_disc_gpn_ft_resp,
+				 disc, lport->e_d_tov))
+		return;
+err:
+	fc_disc_error(disc, fp);
+}
+
+/**
+ * fc_disc_gpn_ft_parse - Parse the list of IDs and names resulting from a request
+ * @lport: Fibre Channel host port instance
+ * @buf: GPN_FT response buffer
+ * @len: size of response buffer
+ */
+static int fc_disc_gpn_ft_parse(struct fc_disc *disc, void *buf, size_t len)
+{
+	struct fc_lport *lport;
+	struct fc_gpn_ft_resp *np;
+	char *bp;
+	size_t plen;
+	size_t tlen;
+	int error = 0;
+	struct fc_disc_port dp;
+	struct fc_rport *rport;
+	struct fc_rport_libfc_priv *rdata;
+
+	lport = disc->lport;
+
+	/*
+	 * Handle partial name record left over from previous call.
+	 */
+	bp = buf;
+	plen = len;
+	np = (struct fc_gpn_ft_resp *)bp;
+	tlen = disc->buf_len;
+	if (tlen) {
+		WARN_ON(tlen >= sizeof(*np));
+		plen = sizeof(*np) - tlen;
+		WARN_ON(plen <= 0);
+		WARN_ON(plen >= sizeof(*np));
+		if (plen > len)
+			plen = len;
+		np = &disc->partial_buf;
+		memcpy((char *)np + tlen, bp, plen);
+
+		/*
+		 * Set bp so that the loop below will advance it to the
+		 * first valid full name element.
+		 */
+		bp -= tlen;
+		len += tlen;
+		plen += tlen;
+		disc->buf_len = (unsigned char) plen;
+		if (plen == sizeof(*np))
+			disc->buf_len = 0;
+	}
+
+	/*
+	 * Handle full name records, including the one filled from above.
+	 * Normally, np == bp and plen == len, but from the partial case above,
+	 * bp, len describe the overall buffer, and np, plen describe the
+	 * partial buffer, which if would usually be full now.
+	 * After the first time through the loop, things return to "normal".
+	 */
+	while (plen >= sizeof(*np)) {
+		dp.lp = lport;
+		dp.ids.port_id = ntoh24(np->fp_fid);
+		dp.ids.port_name = ntohll(np->fp_wwpn);
+		dp.ids.node_name = -1;
+		dp.ids.roles = FC_RPORT_ROLE_UNKNOWN;
+
+		if ((dp.ids.port_id != fc_host_port_id(lport->host)) &&
+		    (dp.ids.port_name != lport->wwpn)) {
+			rport = fc_rport_rogue_create(&dp);
+			if (rport) {
+				rdata = rport->dd_data;
+				rdata->ops = &fc_disc_rport_ops;
+				rdata->local_port = lport;
+				lport->tt.rport_login(rport);
+			} else
+				FC_DBG("Failed to allocate memory for "
+				       "the newly discovered port (%6x)\n",
+				       dp.ids.port_id);
+		}
+
+		if (np->fp_flags & FC_NS_FID_LAST) {
+			disc->event = DISC_EV_SUCCESS;
+			fc_disc_done(disc);
+			len = 0;
+			break;
+		}
+		len -= sizeof(*np);
+		bp += sizeof(*np);
+		np = (struct fc_gpn_ft_resp *)bp;
+		plen = len;
+	}
+
+	/*
+	 * Save any partial record at the end of the buffer for next time.
+	 */
+	if (error == 0 && len > 0 && len < sizeof(*np)) {
+		if (np != &disc->partial_buf) {
+			FC_DEBUG_DISC("Partial buffer remains "
+				      "for discovery by (%6x)\n",
+				      fc_host_port_id(lport->host));
+			memcpy(&disc->partial_buf, np, len);
+		}
+		disc->buf_len = (unsigned char) len;
+	} else {
+		disc->buf_len = 0;
+	}
+	return error;
+}
+
+/*
+ * Handle retry of memory allocation for remote ports.
+ */
+static void fc_disc_timeout(struct work_struct *work)
+{
+	struct fc_disc *disc = container_of(work,
+					    struct fc_disc,
+					    disc_work.work);
+	mutex_lock(&disc->disc_mutex);
+	if (disc->requested && !disc->pending)
+		fc_disc_gpn_ft_req(disc);
+	mutex_unlock(&disc->disc_mutex);
+}
+
+/**
+ * fc_disc_gpn_ft_resp - Handle a response frame from Get Port Names (GPN_FT)
+ * @sp: Current sequence of GPN_FT exchange
+ * @fp: response frame
+ * @lp_arg: Fibre Channel host port instance
+ *
+ * Locking Note: This function expects that the disc_mutex is locked
+ *		 before it is called.
+ */
+static void fc_disc_gpn_ft_resp(struct fc_seq *sp, struct fc_frame *fp,
+				void *disc_arg)
+{
+	struct fc_disc *disc = disc_arg;
+	struct fc_ct_hdr *cp;
+	struct fc_frame_header *fh;
+	unsigned int seq_cnt;
+	void *buf = NULL;
+	unsigned int len;
+	int error;
+
+	FC_DEBUG_DISC("Received a GPN_FT response on port (%6x)\n",
+		      fc_host_port_id(disc->lport->host));
+
+	if (IS_ERR(fp)) {
+		fc_disc_error(disc, fp);
+		return;
+	}
+
+	WARN_ON(!fc_frame_is_linear(fp));	/* buffer must be contiguous */
+	fh = fc_frame_header_get(fp);
+	len = fr_len(fp) - sizeof(*fh);
+	seq_cnt = ntohs(fh->fh_seq_cnt);
+	if (fr_sof(fp) == FC_SOF_I3 && seq_cnt == 0 &&
+	    disc->seq_count == 0) {
+		cp = fc_frame_payload_get(fp, sizeof(*cp));
+		if (!cp) {
+			FC_DBG("GPN_FT response too short, len %d\n",
+			       fr_len(fp));
+		} else if (ntohs(cp->ct_cmd) == FC_FS_ACC) {
+
+			/*
+			 * Accepted.  Parse response.
+			 */
+			buf = cp + 1;
+			len -= sizeof(*cp);
+		} else if (ntohs(cp->ct_cmd) == FC_FS_RJT) {
+			FC_DBG("GPN_FT rejected reason %x exp %x "
+			       "(check zoning)\n", cp->ct_reason,
+			       cp->ct_explan);
+			disc->event = DISC_EV_FAILED;
+			fc_disc_done(disc);
+		} else {
+			FC_DBG("GPN_FT unexpected response code %x\n",
+			       ntohs(cp->ct_cmd));
+		}
+	} else if (fr_sof(fp) == FC_SOF_N3 &&
+		   seq_cnt == disc->seq_count) {
+		buf = fh + 1;
+	} else {
+		FC_DBG("GPN_FT unexpected frame - out of sequence? "
+		       "seq_cnt %x expected %x sof %x eof %x\n",
+		       seq_cnt, disc->seq_count, fr_sof(fp), fr_eof(fp));
+	}
+	if (buf) {
+		error = fc_disc_gpn_ft_parse(disc, buf, len);
+		if (error)
+			fc_disc_error(disc, fp);
+		else
+			disc->seq_count++;
+	}
+	fc_frame_free(fp);
+}
+
+/**
+ * fc_disc_single - Discover the directory information for a single target
+ * @lport: FC local port
+ * @dp: The port to rediscover
+ *
+ * Locking Note: This function expects that the disc_mutex is locked
+ *		 before it is called.
+ */
+static void fc_disc_single(struct fc_disc *disc, struct fc_disc_port *dp)
+{
+	struct fc_lport *lport;
+	struct fc_rport *rport;
+	struct fc_rport *new_rport;
+	struct fc_rport_libfc_priv *rdata;
+
+	lport = disc->lport;
+
+	if (dp->ids.port_id == fc_host_port_id(lport->host))
+		goto out;
+
+	rport = lport->tt.rport_lookup(lport, dp->ids.port_id);
+	if (rport)
+		fc_disc_del_target(disc, rport);
+
+	new_rport = fc_rport_rogue_create(dp);
+	if (new_rport) {
+		rdata = new_rport->dd_data;
+		rdata->ops = &fc_disc_rport_ops;
+		kfree(dp);
+		lport->tt.rport_login(new_rport);
+	}
+	return;
+out:
+	kfree(dp);
+}
+
+/**
+ * fc_disc_stop - Stop discovery for a given lport
+ * @lport: The lport that discovery should stop for
+ */
+void fc_disc_stop(struct fc_lport *lport)
+{
+	struct fc_disc *disc = &lport->disc;
+
+	if (disc) {
+		cancel_delayed_work_sync(&disc->disc_work);
+		fc_disc_stop_rports(disc);
+	}
+}
+
+/**
+ * fc_disc_stop_final - Stop discovery for a given lport
+ * @lport: The lport that discovery should stop for
+ *
+ * This function will block until discovery has been
+ * completely stopped and all rports have been deleted.
+ */
+void fc_disc_stop_final(struct fc_lport *lport)
+{
+	fc_disc_stop(lport);
+	lport->tt.rport_flush_queue();
+}
+
+/**
+ * fc_disc_init - Initialize the discovery block
+ * @lport: FC local port
+ */
+int fc_disc_init(struct fc_lport *lport)
+{
+	struct fc_disc *disc;
+
+	if (!lport->tt.disc_start)
+		lport->tt.disc_start = fc_disc_start;
+
+	if (!lport->tt.disc_stop)
+		lport->tt.disc_stop = fc_disc_stop;
+
+	if (!lport->tt.disc_stop_final)
+		lport->tt.disc_stop_final = fc_disc_stop_final;
+
+	if (!lport->tt.disc_recv_req)
+		lport->tt.disc_recv_req = fc_disc_recv_req;
+
+	if (!lport->tt.rport_lookup)
+		lport->tt.rport_lookup = fc_disc_lookup_rport;
+
+	disc = &lport->disc;
+	INIT_DELAYED_WORK(&disc->disc_work, fc_disc_timeout);
+	mutex_init(&disc->disc_mutex);
+	INIT_LIST_HEAD(&disc->rports);
+
+	disc->lport = lport;
+	disc->delay = FC_DISC_DELAY;
+	disc->event = DISC_EV_NONE;
+
+	return 0;
+}
+EXPORT_SYMBOL(fc_disc_init);
diff --git a/drivers/scsi/libfc/fc_elsct.c b/drivers/scsi/libfc/fc_elsct.c
new file mode 100644
index 000000000000..dd47fe619d1e
--- /dev/null
+++ b/drivers/scsi/libfc/fc_elsct.c
@@ -0,0 +1,71 @@
+/*
+ * Copyright(c) 2008 Intel Corporation. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Maintained at www.Open-FCoE.org
+ */
+
+/*
+ * Provide interface to send ELS/CT FC frames
+ */
+
+#include <asm/unaligned.h>
+#include <scsi/fc/fc_gs.h>
+#include <scsi/fc/fc_ns.h>
+#include <scsi/fc/fc_els.h>
+#include <scsi/libfc.h>
+#include <scsi/fc_encode.h>
+
+/*
+ * fc_elsct_send - sends ELS/CT frame
+ */
+static struct fc_seq *fc_elsct_send(struct fc_lport *lport,
+				    struct fc_rport *rport,
+				    struct fc_frame *fp,
+				    unsigned int op,
+				    void (*resp)(struct fc_seq *,
+						 struct fc_frame *fp,
+						 void *arg),
+				    void *arg, u32 timer_msec)
+{
+	enum fc_rctl r_ctl;
+	u32 did;
+	enum fc_fh_type fh_type;
+	int rc;
+
+	/* ELS requests */
+	if ((op >= ELS_LS_RJT) && (op <= ELS_AUTH_ELS))
+		rc = fc_els_fill(lport, rport, fp, op, &r_ctl, &did, &fh_type);
+	else
+		/* CT requests */
+		rc = fc_ct_fill(lport, fp, op, &r_ctl, &did, &fh_type);
+
+	if (rc)
+		return NULL;
+
+	fc_fill_fc_hdr(fp, r_ctl, did, fc_host_port_id(lport->host), fh_type,
+		       FC_FC_FIRST_SEQ | FC_FC_END_SEQ | FC_FC_SEQ_INIT, 0);
+
+	return lport->tt.exch_seq_send(lport, fp, resp, NULL, arg, timer_msec);
+}
+
+int fc_elsct_init(struct fc_lport *lport)
+{
+	if (!lport->tt.elsct_send)
+		lport->tt.elsct_send = fc_elsct_send;
+
+	return 0;
+}
+EXPORT_SYMBOL(fc_elsct_init);
diff --git a/drivers/scsi/libfc/fc_exch.c b/drivers/scsi/libfc/fc_exch.c
new file mode 100644
index 000000000000..66db08a5f27f
--- /dev/null
+++ b/drivers/scsi/libfc/fc_exch.c
@@ -0,0 +1,1970 @@
+/*
+ * Copyright(c) 2007 Intel Corporation. All rights reserved.
+ * Copyright(c) 2008 Red Hat, Inc.  All rights reserved.
+ * Copyright(c) 2008 Mike Christie
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Maintained at www.Open-FCoE.org
+ */
+
+/*
+ * Fibre Channel exchange and sequence handling.
+ */
+
+#include <linux/timer.h>
+#include <linux/gfp.h>
+#include <linux/err.h>
+
+#include <scsi/fc/fc_fc2.h>
+
+#include <scsi/libfc.h>
+#include <scsi/fc_encode.h>
+
+#define	  FC_DEF_R_A_TOV      (10 * 1000) /* resource allocation timeout */
+
+/*
+ * fc_exch_debug can be set in debugger or at compile time to get more logs.
+ */
+static int fc_exch_debug;
+
+#define FC_DEBUG_EXCH(fmt...)			\
+	do {					\
+		if (fc_exch_debug)		\
+			FC_DBG(fmt);		\
+	} while (0)
+
+static struct kmem_cache *fc_em_cachep;	/* cache for exchanges */
+
+/*
+ * Structure and function definitions for managing Fibre Channel Exchanges
+ * and Sequences.
+ *
+ * The three primary structures used here are fc_exch_mgr, fc_exch, and fc_seq.
+ *
+ * fc_exch_mgr holds the exchange state for an N port
+ *
+ * fc_exch holds state for one exchange and links to its active sequence.
+ *
+ * fc_seq holds the state for an individual sequence.
+ */
+
+/*
+ * Exchange manager.
+ *
+ * This structure is the center for creating exchanges and sequences.
+ * It manages the allocation of exchange IDs.
+ */
+struct fc_exch_mgr {
+	enum fc_class	class;		/* default class for sequences */
+	spinlock_t	em_lock;	/* exchange manager lock,
+					   must be taken before ex_lock */
+	u16		last_xid;	/* last allocated exchange ID */
+	u16		min_xid;	/* min exchange ID */
+	u16		max_xid;	/* max exchange ID */
+	u16		max_read;	/* max exchange ID for read */
+	u16		last_read;	/* last xid allocated for read */
+	u32	total_exches;		/* total allocated exchanges */
+	struct list_head	ex_list;	/* allocated exchanges list */
+	struct fc_lport	*lp;		/* fc device instance */
+	mempool_t	*ep_pool;	/* reserve ep's */
+
+	/*
+	 * currently exchange mgr stats are updated but not used.
+	 * either stats can be expose via sysfs or remove them
+	 * all together if not used XXX
+	 */
+	struct {
+		atomic_t no_free_exch;
+		atomic_t no_free_exch_xid;
+		atomic_t xid_not_found;
+		atomic_t xid_busy;
+		atomic_t seq_not_found;
+		atomic_t non_bls_resp;
+	} stats;
+	struct fc_exch **exches;	/* for exch pointers indexed by xid */
+};
+#define	fc_seq_exch(sp) container_of(sp, struct fc_exch, seq)
+
+static void fc_exch_rrq(struct fc_exch *);
+static void fc_seq_ls_acc(struct fc_seq *);
+static void fc_seq_ls_rjt(struct fc_seq *, enum fc_els_rjt_reason,
+			  enum fc_els_rjt_explan);
+static void fc_exch_els_rec(struct fc_seq *, struct fc_frame *);
+static void fc_exch_els_rrq(struct fc_seq *, struct fc_frame *);
+static struct fc_seq *fc_seq_start_next_locked(struct fc_seq *sp);
+
+/*
+ * Internal implementation notes.
+ *
+ * The exchange manager is one by default in libfc but LLD may choose
+ * to have one per CPU. The sequence manager is one per exchange manager
+ * and currently never separated.
+ *
+ * Section 9.8 in FC-FS-2 specifies:  "The SEQ_ID is a one-byte field
+ * assigned by the Sequence Initiator that shall be unique for a specific
+ * D_ID and S_ID pair while the Sequence is open."   Note that it isn't
+ * qualified by exchange ID, which one might think it would be.
+ * In practice this limits the number of open sequences and exchanges to 256
+ * per session.	 For most targets we could treat this limit as per exchange.
+ *
+ * The exchange and its sequence are freed when the last sequence is received.
+ * It's possible for the remote port to leave an exchange open without
+ * sending any sequences.
+ *
+ * Notes on reference counts:
+ *
+ * Exchanges are reference counted and exchange gets freed when the reference
+ * count becomes zero.
+ *
+ * Timeouts:
+ * Sequences are timed out for E_D_TOV and R_A_TOV.
+ *
+ * Sequence event handling:
+ *
+ * The following events may occur on initiator sequences:
+ *
+ *	Send.
+ *	    For now, the whole thing is sent.
+ *	Receive ACK
+ *	    This applies only to class F.
+ *	    The sequence is marked complete.
+ *	ULP completion.
+ *	    The upper layer calls fc_exch_done() when done
+ *	    with exchange and sequence tuple.
+ *	RX-inferred completion.
+ *	    When we receive the next sequence on the same exchange, we can
+ *	    retire the previous sequence ID.  (XXX not implemented).
+ *	Timeout.
+ *	    R_A_TOV frees the sequence ID.  If we're waiting for ACK,
+ *	    E_D_TOV causes abort and calls upper layer response handler
+ *	    with FC_EX_TIMEOUT error.
+ *	Receive RJT
+ *	    XXX defer.
+ *	Send ABTS
+ *	    On timeout.
+ *
+ * The following events may occur on recipient sequences:
+ *
+ *	Receive
+ *	    Allocate sequence for first frame received.
+ *	    Hold during receive handler.
+ *	    Release when final frame received.
+ *	    Keep status of last N of these for the ELS RES command.  XXX TBD.
+ *	Receive ABTS
+ *	    Deallocate sequence
+ *	Send RJT
+ *	    Deallocate
+ *
+ * For now, we neglect conditions where only part of a sequence was
+ * received or transmitted, or where out-of-order receipt is detected.
+ */
+
+/*
+ * Locking notes:
+ *
+ * The EM code run in a per-CPU worker thread.
+ *
+ * To protect against concurrency between a worker thread code and timers,
+ * sequence allocation and deallocation must be locked.
+ *  - exchange refcnt can be done atomicly without locks.
+ *  - sequence allocation must be locked by exch lock.
+ *  - If the em_lock and ex_lock must be taken at the same time, then the
+ *    em_lock must be taken before the ex_lock.
+ */
+
+/*
+ * opcode names for debugging.
+ */
+static char *fc_exch_rctl_names[] = FC_RCTL_NAMES_INIT;
+
+#define FC_TABLE_SIZE(x)   (sizeof(x) / sizeof(x[0]))
+
+static inline const char *fc_exch_name_lookup(unsigned int op, char **table,
+					      unsigned int max_index)
+{
+	const char *name = NULL;
+
+	if (op < max_index)
+		name = table[op];
+	if (!name)
+		name = "unknown";
+	return name;
+}
+
+static const char *fc_exch_rctl_name(unsigned int op)
+{
+	return fc_exch_name_lookup(op, fc_exch_rctl_names,
+				   FC_TABLE_SIZE(fc_exch_rctl_names));
+}
+
+/*
+ * Hold an exchange - keep it from being freed.
+ */
+static void fc_exch_hold(struct fc_exch *ep)
+{
+	atomic_inc(&ep->ex_refcnt);
+}
+
+/*
+ * setup fc hdr by initializing few more FC header fields and sof/eof.
+ * Initialized fields by this func:
+ *	- fh_ox_id, fh_rx_id, fh_seq_id, fh_seq_cnt
+ *	- sof and eof
+ */
+static void fc_exch_setup_hdr(struct fc_exch *ep, struct fc_frame *fp,
+			      u32 f_ctl)
+{
+	struct fc_frame_header *fh = fc_frame_header_get(fp);
+	u16 fill;
+
+	fr_sof(fp) = ep->class;
+	if (ep->seq.cnt)
+		fr_sof(fp) = fc_sof_normal(ep->class);
+
+	if (f_ctl & FC_FC_END_SEQ) {
+		fr_eof(fp) = FC_EOF_T;
+		if (fc_sof_needs_ack(ep->class))
+			fr_eof(fp) = FC_EOF_N;
+		/*
+		 * Form f_ctl.
+		 * The number of fill bytes to make the length a 4-byte
+		 * multiple is the low order 2-bits of the f_ctl.
+		 * The fill itself will have been cleared by the frame
+		 * allocation.
+		 * After this, the length will be even, as expected by
+		 * the transport.
+		 */
+		fill = fr_len(fp) & 3;
+		if (fill) {
+			fill = 4 - fill;
+			/* TODO, this may be a problem with fragmented skb */
+			skb_put(fp_skb(fp), fill);
+			hton24(fh->fh_f_ctl, f_ctl | fill);
+		}
+	} else {
+		WARN_ON(fr_len(fp) % 4 != 0);	/* no pad to non last frame */
+		fr_eof(fp) = FC_EOF_N;
+	}
+
+	/*
+	 * Initialize remainig fh fields
+	 * from fc_fill_fc_hdr
+	 */
+	fh->fh_ox_id = htons(ep->oxid);
+	fh->fh_rx_id = htons(ep->rxid);
+	fh->fh_seq_id = ep->seq.id;
+	fh->fh_seq_cnt = htons(ep->seq.cnt);
+}
+
+
+/*
+ * Release a reference to an exchange.
+ * If the refcnt goes to zero and the exchange is complete, it is freed.
+ */
+static void fc_exch_release(struct fc_exch *ep)
+{
+	struct fc_exch_mgr *mp;
+
+	if (atomic_dec_and_test(&ep->ex_refcnt)) {
+		mp = ep->em;
+		if (ep->destructor)
+			ep->destructor(&ep->seq, ep->arg);
+		if (ep->lp->tt.exch_put)
+			ep->lp->tt.exch_put(ep->lp, mp, ep->xid);
+		WARN_ON(!ep->esb_stat & ESB_ST_COMPLETE);
+		mempool_free(ep, mp->ep_pool);
+	}
+}
+
+static int fc_exch_done_locked(struct fc_exch *ep)
+{
+	int rc = 1;
+
+	/*
+	 * We must check for completion in case there are two threads
+	 * tyring to complete this. But the rrq code will reuse the
+	 * ep, and in that case we only clear the resp and set it as
+	 * complete, so it can be reused by the timer to send the rrq.
+	 */
+	ep->resp = NULL;
+	if (ep->state & FC_EX_DONE)
+		return rc;
+	ep->esb_stat |= ESB_ST_COMPLETE;
+
+	if (!(ep->esb_stat & ESB_ST_REC_QUAL)) {
+		ep->state |= FC_EX_DONE;
+		if (cancel_delayed_work(&ep->timeout_work))
+			atomic_dec(&ep->ex_refcnt); /* drop hold for timer */
+		rc = 0;
+	}
+	return rc;
+}
+
+static void fc_exch_mgr_delete_ep(struct fc_exch *ep)
+{
+	struct fc_exch_mgr *mp;
+
+	mp = ep->em;
+	spin_lock_bh(&mp->em_lock);
+	WARN_ON(mp->total_exches <= 0);
+	mp->total_exches--;
+	mp->exches[ep->xid - mp->min_xid] = NULL;
+	list_del(&ep->ex_list);
+	spin_unlock_bh(&mp->em_lock);
+	fc_exch_release(ep);	/* drop hold for exch in mp */
+}
+
+/*
+ * Internal version of fc_exch_timer_set - used with lock held.
+ */
+static inline void fc_exch_timer_set_locked(struct fc_exch *ep,
+					    unsigned int timer_msec)
+{
+	if (ep->state & (FC_EX_RST_CLEANUP | FC_EX_DONE))
+		return;
+
+	FC_DEBUG_EXCH("Exchange (%4x) timed out, notifying the upper layer\n",
+		      ep->xid);
+	if (schedule_delayed_work(&ep->timeout_work,
+				  msecs_to_jiffies(timer_msec)))
+		fc_exch_hold(ep);		/* hold for timer */
+}
+
+/*
+ * Set timer for an exchange.
+ * The time is a minimum delay in milliseconds until the timer fires.
+ * Used for upper level protocols to time out the exchange.
+ * The timer is cancelled when it fires or when the exchange completes.
+ * Returns non-zero if a timer couldn't be allocated.
+ */
+static void fc_exch_timer_set(struct fc_exch *ep, unsigned int timer_msec)
+{
+	spin_lock_bh(&ep->ex_lock);
+	fc_exch_timer_set_locked(ep, timer_msec);
+	spin_unlock_bh(&ep->ex_lock);
+}
+
+int fc_seq_exch_abort(const struct fc_seq *req_sp, unsigned int timer_msec)
+{
+	struct fc_seq *sp;
+	struct fc_exch *ep;
+	struct fc_frame *fp;
+	int error;
+
+	ep = fc_seq_exch(req_sp);
+
+	spin_lock_bh(&ep->ex_lock);
+	if (ep->esb_stat & (ESB_ST_COMPLETE | ESB_ST_ABNORMAL) ||
+	    ep->state & (FC_EX_DONE | FC_EX_RST_CLEANUP)) {
+		spin_unlock_bh(&ep->ex_lock);
+		return -ENXIO;
+	}
+
+	/*
+	 * Send the abort on a new sequence if possible.
+	 */
+	sp = fc_seq_start_next_locked(&ep->seq);
+	if (!sp) {
+		spin_unlock_bh(&ep->ex_lock);
+		return -ENOMEM;
+	}
+
+	ep->esb_stat |= ESB_ST_SEQ_INIT | ESB_ST_ABNORMAL;
+	if (timer_msec)
+		fc_exch_timer_set_locked(ep, timer_msec);
+	spin_unlock_bh(&ep->ex_lock);
+
+	/*
+	 * If not logged into the fabric, don't send ABTS but leave
+	 * sequence active until next timeout.
+	 */
+	if (!ep->sid)
+		return 0;
+
+	/*
+	 * Send an abort for the sequence that timed out.
+	 */
+	fp = fc_frame_alloc(ep->lp, 0);
+	if (fp) {
+		fc_fill_fc_hdr(fp, FC_RCTL_BA_ABTS, ep->did, ep->sid,
+			       FC_TYPE_BLS, FC_FC_END_SEQ | FC_FC_SEQ_INIT, 0);
+		error = fc_seq_send(ep->lp, sp, fp);
+	} else
+		error = -ENOBUFS;
+	return error;
+}
+EXPORT_SYMBOL(fc_seq_exch_abort);
+
+/*
+ * Exchange timeout - handle exchange timer expiration.
+ * The timer will have been cancelled before this is called.
+ */
+static void fc_exch_timeout(struct work_struct *work)
+{
+	struct fc_exch *ep = container_of(work, struct fc_exch,
+					  timeout_work.work);
+	struct fc_seq *sp = &ep->seq;
+	void (*resp)(struct fc_seq *, struct fc_frame *fp, void *arg);
+	void *arg;
+	u32 e_stat;
+	int rc = 1;
+
+	spin_lock_bh(&ep->ex_lock);
+	if (ep->state & (FC_EX_RST_CLEANUP | FC_EX_DONE))
+		goto unlock;
+
+	e_stat = ep->esb_stat;
+	if (e_stat & ESB_ST_COMPLETE) {
+		ep->esb_stat = e_stat & ~ESB_ST_REC_QUAL;
+		if (e_stat & ESB_ST_REC_QUAL)
+			fc_exch_rrq(ep);
+		spin_unlock_bh(&ep->ex_lock);
+		goto done;
+	} else {
+		resp = ep->resp;
+		arg = ep->arg;
+		ep->resp = NULL;
+		if (e_stat & ESB_ST_ABNORMAL)
+			rc = fc_exch_done_locked(ep);
+		spin_unlock_bh(&ep->ex_lock);
+		if (!rc)
+			fc_exch_mgr_delete_ep(ep);
+		if (resp)
+			resp(sp, ERR_PTR(-FC_EX_TIMEOUT), arg);
+		fc_seq_exch_abort(sp, 2 * ep->r_a_tov);
+		goto done;
+	}
+unlock:
+	spin_unlock_bh(&ep->ex_lock);
+done:
+	/*
+	 * This release matches the hold taken when the timer was set.
+	 */
+	fc_exch_release(ep);
+}
+
+/*
+ * Allocate a sequence.
+ *
+ * We don't support multiple originated sequences on the same exchange.
+ * By implication, any previously originated sequence on this exchange
+ * is complete, and we reallocate the same sequence.
+ */
+static struct fc_seq *fc_seq_alloc(struct fc_exch *ep, u8 seq_id)
+{
+	struct fc_seq *sp;
+
+	sp = &ep->seq;
+	sp->ssb_stat = 0;
+	sp->cnt = 0;
+	sp->id = seq_id;
+	return sp;
+}
+
+/*
+ * fc_em_alloc_xid - returns an xid based on request type
+ * @lp : ptr to associated lport
+ * @fp : ptr to the assocated frame
+ *
+ * check the associated fc_fsp_pkt to get scsi command type and
+ * command direction to decide from which range this exch id
+ * will be allocated from.
+ *
+ * Returns : 0 or an valid xid
+ */
+static u16 fc_em_alloc_xid(struct fc_exch_mgr *mp, const struct fc_frame *fp)
+{
+	u16 xid, min, max;
+	u16 *plast;
+	struct fc_exch *ep = NULL;
+
+	if (mp->max_read) {
+		if (fc_frame_is_read(fp)) {
+			min = mp->min_xid;
+			max = mp->max_read;
+			plast = &mp->last_read;
+		} else {
+			min = mp->max_read + 1;
+			max = mp->max_xid;
+			plast = &mp->last_xid;
+		}
+	} else {
+		min = mp->min_xid;
+		max = mp->max_xid;
+		plast = &mp->last_xid;
+	}
+	xid = *plast;
+	do {
+		xid = (xid == max) ? min : xid + 1;
+		ep = mp->exches[xid - mp->min_xid];
+	} while ((ep != NULL) && (xid != *plast));
+
+	if (unlikely(ep))
+		xid = 0;
+	else
+		*plast = xid;
+
+	return xid;
+}
+
+/*
+ * fc_exch_alloc - allocate an exchange.
+ * @mp : ptr to the exchange manager
+ * @xid: input xid
+ *
+ * if xid is supplied zero then assign next free exchange ID
+ * from exchange manager, otherwise use supplied xid.
+ * Returns with exch lock held.
+ */
+struct fc_exch *fc_exch_alloc(struct fc_exch_mgr *mp,
+			      struct fc_frame *fp, u16 xid)
+{
+	struct fc_exch *ep;
+
+	/* allocate memory for exchange */
+	ep = mempool_alloc(mp->ep_pool, GFP_ATOMIC);
+	if (!ep) {
+		atomic_inc(&mp->stats.no_free_exch);
+		goto out;
+	}
+	memset(ep, 0, sizeof(*ep));
+
+	spin_lock_bh(&mp->em_lock);
+	/* alloc xid if input xid 0 */
+	if (!xid) {
+		/* alloc a new xid */
+		xid = fc_em_alloc_xid(mp, fp);
+		if (!xid) {
+			printk(KERN_ERR "fc_em_alloc_xid() failed\n");
+			goto err;
+		}
+	}
+
+	fc_exch_hold(ep);	/* hold for exch in mp */
+	spin_lock_init(&ep->ex_lock);
+	/*
+	 * Hold exch lock for caller to prevent fc_exch_reset()
+	 * from releasing exch	while fc_exch_alloc() caller is
+	 * still working on exch.
+	 */
+	spin_lock_bh(&ep->ex_lock);
+
+	mp->exches[xid - mp->min_xid] = ep;
+	list_add_tail(&ep->ex_list, &mp->ex_list);
+	fc_seq_alloc(ep, ep->seq_id++);
+	mp->total_exches++;
+	spin_unlock_bh(&mp->em_lock);
+
+	/*
+	 *  update exchange
+	 */
+	ep->oxid = ep->xid = xid;
+	ep->em = mp;
+	ep->lp = mp->lp;
+	ep->f_ctl = FC_FC_FIRST_SEQ;	/* next seq is first seq */
+	ep->rxid = FC_XID_UNKNOWN;
+	ep->class = mp->class;
+	INIT_DELAYED_WORK(&ep->timeout_work, fc_exch_timeout);
+out:
+	return ep;
+err:
+	spin_unlock_bh(&mp->em_lock);
+	atomic_inc(&mp->stats.no_free_exch_xid);
+	mempool_free(ep, mp->ep_pool);
+	return NULL;
+}
+EXPORT_SYMBOL(fc_exch_alloc);
+
+/*
+ * Lookup and hold an exchange.
+ */
+static struct fc_exch *fc_exch_find(struct fc_exch_mgr *mp, u16 xid)
+{
+	struct fc_exch *ep = NULL;
+
+	if ((xid >= mp->min_xid) && (xid <= mp->max_xid)) {
+		spin_lock_bh(&mp->em_lock);
+		ep = mp->exches[xid - mp->min_xid];
+		if (ep) {
+			fc_exch_hold(ep);
+			WARN_ON(ep->xid != xid);
+		}
+		spin_unlock_bh(&mp->em_lock);
+	}
+	return ep;
+}
+
+void fc_exch_done(struct fc_seq *sp)
+{
+	struct fc_exch *ep = fc_seq_exch(sp);
+	int rc;
+
+	spin_lock_bh(&ep->ex_lock);
+	rc = fc_exch_done_locked(ep);
+	spin_unlock_bh(&ep->ex_lock);
+	if (!rc)
+		fc_exch_mgr_delete_ep(ep);
+}
+EXPORT_SYMBOL(fc_exch_done);
+
+/*
+ * Allocate a new exchange as responder.
+ * Sets the responder ID in the frame header.
+ */
+static struct fc_exch *fc_exch_resp(struct fc_exch_mgr *mp, struct fc_frame *fp)
+{
+	struct fc_exch *ep;
+	struct fc_frame_header *fh;
+	u16 rxid;
+
+	ep = mp->lp->tt.exch_get(mp->lp, fp);
+	if (ep) {
+		ep->class = fc_frame_class(fp);
+
+		/*
+		 * Set EX_CTX indicating we're responding on this exchange.
+		 */
+		ep->f_ctl |= FC_FC_EX_CTX;	/* we're responding */
+		ep->f_ctl &= ~FC_FC_FIRST_SEQ;	/* not new */
+		fh = fc_frame_header_get(fp);
+		ep->sid = ntoh24(fh->fh_d_id);
+		ep->did = ntoh24(fh->fh_s_id);
+		ep->oid = ep->did;
+
+		/*
+		 * Allocated exchange has placed the XID in the
+		 * originator field. Move it to the responder field,
+		 * and set the originator XID from the frame.
+		 */
+		ep->rxid = ep->xid;
+		ep->oxid = ntohs(fh->fh_ox_id);
+		ep->esb_stat |= ESB_ST_RESP | ESB_ST_SEQ_INIT;
+		if ((ntoh24(fh->fh_f_ctl) & FC_FC_SEQ_INIT) == 0)
+			ep->esb_stat &= ~ESB_ST_SEQ_INIT;
+
+		/*
+		 * Set the responder ID in the frame header.
+		 * The old one should've been 0xffff.
+		 * If it isn't, don't assign one.
+		 * Incoming basic link service frames may specify
+		 * a referenced RX_ID.
+		 */
+		if (fh->fh_type != FC_TYPE_BLS) {
+			rxid = ntohs(fh->fh_rx_id);
+			WARN_ON(rxid != FC_XID_UNKNOWN);
+			fh->fh_rx_id = htons(ep->rxid);
+		}
+		fc_exch_hold(ep);	/* hold for caller */
+		spin_unlock_bh(&ep->ex_lock);	/* lock from exch_get */
+	}
+	return ep;
+}
+
+/*
+ * Find a sequence for receive where the other end is originating the sequence.
+ * If fc_pf_rjt_reason is FC_RJT_NONE then this function will have a hold
+ * on the ep that should be released by the caller.
+ */
+static enum fc_pf_rjt_reason
+fc_seq_lookup_recip(struct fc_exch_mgr *mp, struct fc_frame *fp)
+{
+	struct fc_frame_header *fh = fc_frame_header_get(fp);
+	struct fc_exch *ep = NULL;
+	struct fc_seq *sp = NULL;
+	enum fc_pf_rjt_reason reject = FC_RJT_NONE;
+	u32 f_ctl;
+	u16 xid;
+
+	f_ctl = ntoh24(fh->fh_f_ctl);
+	WARN_ON((f_ctl & FC_FC_SEQ_CTX) != 0);
+
+	/*
+	 * Lookup or create the exchange if we will be creating the sequence.
+	 */
+	if (f_ctl & FC_FC_EX_CTX) {
+		xid = ntohs(fh->fh_ox_id);	/* we originated exch */
+		ep = fc_exch_find(mp, xid);
+		if (!ep) {
+			atomic_inc(&mp->stats.xid_not_found);
+			reject = FC_RJT_OX_ID;
+			goto out;
+		}
+		if (ep->rxid == FC_XID_UNKNOWN)
+			ep->rxid = ntohs(fh->fh_rx_id);
+		else if (ep->rxid != ntohs(fh->fh_rx_id)) {
+			reject = FC_RJT_OX_ID;
+			goto rel;
+		}
+	} else {
+		xid = ntohs(fh->fh_rx_id);	/* we are the responder */
+
+		/*
+		 * Special case for MDS issuing an ELS TEST with a
+		 * bad rxid of 0.
+		 * XXX take this out once we do the proper reject.
+		 */
+		if (xid == 0 && fh->fh_r_ctl == FC_RCTL_ELS_REQ &&
+		    fc_frame_payload_op(fp) == ELS_TEST) {
+			fh->fh_rx_id = htons(FC_XID_UNKNOWN);
+			xid = FC_XID_UNKNOWN;
+		}
+
+		/*
+		 * new sequence - find the exchange
+		 */
+		ep = fc_exch_find(mp, xid);
+		if ((f_ctl & FC_FC_FIRST_SEQ) && fc_sof_is_init(fr_sof(fp))) {
+			if (ep) {
+				atomic_inc(&mp->stats.xid_busy);
+				reject = FC_RJT_RX_ID;
+				goto rel;
+			}
+			ep = fc_exch_resp(mp, fp);
+			if (!ep) {
+				reject = FC_RJT_EXCH_EST;	/* XXX */
+				goto out;
+			}
+			xid = ep->xid;	/* get our XID */
+		} else if (!ep) {
+			atomic_inc(&mp->stats.xid_not_found);
+			reject = FC_RJT_RX_ID;	/* XID not found */
+			goto out;
+		}
+	}
+
+	/*
+	 * At this point, we have the exchange held.
+	 * Find or create the sequence.
+	 */
+	if (fc_sof_is_init(fr_sof(fp))) {
+		sp = fc_seq_start_next(&ep->seq);
+		if (!sp) {
+			reject = FC_RJT_SEQ_XS;	/* exchange shortage */
+			goto rel;
+		}
+		sp->id = fh->fh_seq_id;
+		sp->ssb_stat |= SSB_ST_RESP;
+	} else {
+		sp = &ep->seq;
+		if (sp->id != fh->fh_seq_id) {
+			atomic_inc(&mp->stats.seq_not_found);
+			reject = FC_RJT_SEQ_ID;	/* sequence/exch should exist */
+			goto rel;
+		}
+	}
+	WARN_ON(ep != fc_seq_exch(sp));
+
+	if (f_ctl & FC_FC_SEQ_INIT)
+		ep->esb_stat |= ESB_ST_SEQ_INIT;
+
+	fr_seq(fp) = sp;
+out:
+	return reject;
+rel:
+	fc_exch_done(&ep->seq);
+	fc_exch_release(ep);	/* hold from fc_exch_find/fc_exch_resp */
+	return reject;
+}
+
+/*
+ * Find the sequence for a frame being received.
+ * We originated the sequence, so it should be found.
+ * We may or may not have originated the exchange.
+ * Does not hold the sequence for the caller.
+ */
+static struct fc_seq *fc_seq_lookup_orig(struct fc_exch_mgr *mp,
+					 struct fc_frame *fp)
+{
+	struct fc_frame_header *fh = fc_frame_header_get(fp);
+	struct fc_exch *ep;
+	struct fc_seq *sp = NULL;
+	u32 f_ctl;
+	u16 xid;
+
+	f_ctl = ntoh24(fh->fh_f_ctl);
+	WARN_ON((f_ctl & FC_FC_SEQ_CTX) != FC_FC_SEQ_CTX);
+	xid = ntohs((f_ctl & FC_FC_EX_CTX) ? fh->fh_ox_id : fh->fh_rx_id);
+	ep = fc_exch_find(mp, xid);
+	if (!ep)
+		return NULL;
+	if (ep->seq.id == fh->fh_seq_id) {
+		/*
+		 * Save the RX_ID if we didn't previously know it.
+		 */
+		sp = &ep->seq;
+		if ((f_ctl & FC_FC_EX_CTX) != 0 &&
+		    ep->rxid == FC_XID_UNKNOWN) {
+			ep->rxid = ntohs(fh->fh_rx_id);
+		}
+	}
+	fc_exch_release(ep);
+	return sp;
+}
+
+/*
+ * Set addresses for an exchange.
+ * Note this must be done before the first sequence of the exchange is sent.
+ */
+static void fc_exch_set_addr(struct fc_exch *ep,
+			     u32 orig_id, u32 resp_id)
+{
+	ep->oid = orig_id;
+	if (ep->esb_stat & ESB_ST_RESP) {
+		ep->sid = resp_id;
+		ep->did = orig_id;
+	} else {
+		ep->sid = orig_id;
+		ep->did = resp_id;
+	}
+}
+
+static struct fc_seq *fc_seq_start_next_locked(struct fc_seq *sp)
+{
+	struct fc_exch *ep = fc_seq_exch(sp);
+
+	sp = fc_seq_alloc(ep, ep->seq_id++);
+	FC_DEBUG_EXCH("exch %4x f_ctl %6x seq %2x\n",
+		      ep->xid, ep->f_ctl, sp->id);
+	return sp;
+}
+/*
+ * Allocate a new sequence on the same exchange as the supplied sequence.
+ * This will never return NULL.
+ */
+struct fc_seq *fc_seq_start_next(struct fc_seq *sp)
+{
+	struct fc_exch *ep = fc_seq_exch(sp);
+
+	spin_lock_bh(&ep->ex_lock);
+	WARN_ON((ep->esb_stat & ESB_ST_COMPLETE) != 0);
+	sp = fc_seq_start_next_locked(sp);
+	spin_unlock_bh(&ep->ex_lock);
+
+	return sp;
+}
+EXPORT_SYMBOL(fc_seq_start_next);
+
+int fc_seq_send(struct fc_lport *lp, struct fc_seq *sp, struct fc_frame *fp)
+{
+	struct fc_exch *ep;
+	struct fc_frame_header *fh = fc_frame_header_get(fp);
+	int error;
+	u32	f_ctl;
+
+	ep = fc_seq_exch(sp);
+	WARN_ON((ep->esb_stat & ESB_ST_SEQ_INIT) != ESB_ST_SEQ_INIT);
+
+	f_ctl = ntoh24(fh->fh_f_ctl);
+	fc_exch_setup_hdr(ep, fp, f_ctl);
+
+	/*
+	 * update sequence count if this frame is carrying
+	 * multiple FC frames when sequence offload is enabled
+	 * by LLD.
+	 */
+	if (fr_max_payload(fp))
+		sp->cnt += DIV_ROUND_UP((fr_len(fp) - sizeof(*fh)),
+					fr_max_payload(fp));
+	else
+		sp->cnt++;
+
+	/*
+	 * Send the frame.
+	 */
+	error = lp->tt.frame_send(lp, fp);
+
+	/*
+	 * Update the exchange and sequence flags,
+	 * assuming all frames for the sequence have been sent.
+	 * We can only be called to send once for each sequence.
+	 */
+	spin_lock_bh(&ep->ex_lock);
+	ep->f_ctl = f_ctl & ~FC_FC_FIRST_SEQ;	/* not first seq */
+	if (f_ctl & (FC_FC_END_SEQ | FC_FC_SEQ_INIT))
+		ep->esb_stat &= ~ESB_ST_SEQ_INIT;
+	spin_unlock_bh(&ep->ex_lock);
+	return error;
+}
+EXPORT_SYMBOL(fc_seq_send);
+
+void fc_seq_els_rsp_send(struct fc_seq *sp, enum fc_els_cmd els_cmd,
+			 struct fc_seq_els_data *els_data)
+{
+	switch (els_cmd) {
+	case ELS_LS_RJT:
+		fc_seq_ls_rjt(sp, els_data->reason, els_data->explan);
+		break;
+	case ELS_LS_ACC:
+		fc_seq_ls_acc(sp);
+		break;
+	case ELS_RRQ:
+		fc_exch_els_rrq(sp, els_data->fp);
+		break;
+	case ELS_REC:
+		fc_exch_els_rec(sp, els_data->fp);
+		break;
+	default:
+		FC_DBG("Invalid ELS CMD:%x\n", els_cmd);
+	}
+}
+EXPORT_SYMBOL(fc_seq_els_rsp_send);
+
+/*
+ * Send a sequence, which is also the last sequence in the exchange.
+ */
+static void fc_seq_send_last(struct fc_seq *sp, struct fc_frame *fp,
+			     enum fc_rctl rctl, enum fc_fh_type fh_type)
+{
+	u32 f_ctl;
+	struct fc_exch *ep = fc_seq_exch(sp);
+
+	f_ctl = FC_FC_LAST_SEQ | FC_FC_END_SEQ | FC_FC_SEQ_INIT;
+	f_ctl |= ep->f_ctl;
+	fc_fill_fc_hdr(fp, rctl, ep->did, ep->sid, fh_type, f_ctl, 0);
+	fc_seq_send(ep->lp, sp, fp);
+}
+
+/*
+ * Send ACK_1 (or equiv.) indicating we received something.
+ * The frame we're acking is supplied.
+ */
+static void fc_seq_send_ack(struct fc_seq *sp, const struct fc_frame *rx_fp)
+{
+	struct fc_frame *fp;
+	struct fc_frame_header *rx_fh;
+	struct fc_frame_header *fh;
+	struct fc_exch *ep = fc_seq_exch(sp);
+	struct fc_lport *lp = ep->lp;
+	unsigned int f_ctl;
+
+	/*
+	 * Don't send ACKs for class 3.
+	 */
+	if (fc_sof_needs_ack(fr_sof(rx_fp))) {
+		fp = fc_frame_alloc(lp, 0);
+		if (!fp)
+			return;
+
+		fh = fc_frame_header_get(fp);
+		fh->fh_r_ctl = FC_RCTL_ACK_1;
+		fh->fh_type = FC_TYPE_BLS;
+
+		/*
+		 * Form f_ctl by inverting EX_CTX and SEQ_CTX (bits 23, 22).
+		 * Echo FIRST_SEQ, LAST_SEQ, END_SEQ, END_CONN, SEQ_INIT.
+		 * Bits 9-8 are meaningful (retransmitted or unidirectional).
+		 * Last ACK uses bits 7-6 (continue sequence),
+		 * bits 5-4 are meaningful (what kind of ACK to use).
+		 */
+		rx_fh = fc_frame_header_get(rx_fp);
+		f_ctl = ntoh24(rx_fh->fh_f_ctl);
+		f_ctl &= FC_FC_EX_CTX | FC_FC_SEQ_CTX |
+			FC_FC_FIRST_SEQ | FC_FC_LAST_SEQ |
+			FC_FC_END_SEQ | FC_FC_END_CONN | FC_FC_SEQ_INIT |
+			FC_FC_RETX_SEQ | FC_FC_UNI_TX;
+		f_ctl ^= FC_FC_EX_CTX | FC_FC_SEQ_CTX;
+		hton24(fh->fh_f_ctl, f_ctl);
+
+		fc_exch_setup_hdr(ep, fp, f_ctl);
+		fh->fh_seq_id = rx_fh->fh_seq_id;
+		fh->fh_seq_cnt = rx_fh->fh_seq_cnt;
+		fh->fh_parm_offset = htonl(1);	/* ack single frame */
+
+		fr_sof(fp) = fr_sof(rx_fp);
+		if (f_ctl & FC_FC_END_SEQ)
+			fr_eof(fp) = FC_EOF_T;
+		else
+			fr_eof(fp) = FC_EOF_N;
+
+		(void) lp->tt.frame_send(lp, fp);
+	}
+}
+
+/*
+ * Send BLS Reject.
+ * This is for rejecting BA_ABTS only.
+ */
+static void
+fc_exch_send_ba_rjt(struct fc_frame *rx_fp, enum fc_ba_rjt_reason reason,
+		    enum fc_ba_rjt_explan explan)
+{
+	struct fc_frame *fp;
+	struct fc_frame_header *rx_fh;
+	struct fc_frame_header *fh;
+	struct fc_ba_rjt *rp;
+	struct fc_lport *lp;
+	unsigned int f_ctl;
+
+	lp = fr_dev(rx_fp);
+	fp = fc_frame_alloc(lp, sizeof(*rp));
+	if (!fp)
+		return;
+	fh = fc_frame_header_get(fp);
+	rx_fh = fc_frame_header_get(rx_fp);
+
+	memset(fh, 0, sizeof(*fh) + sizeof(*rp));
+
+	rp = fc_frame_payload_get(fp, sizeof(*rp));
+	rp->br_reason = reason;
+	rp->br_explan = explan;
+
+	/*
+	 * seq_id, cs_ctl, df_ctl and param/offset are zero.
+	 */
+	memcpy(fh->fh_s_id, rx_fh->fh_d_id, 3);
+	memcpy(fh->fh_d_id, rx_fh->fh_s_id, 3);
+	fh->fh_ox_id = rx_fh->fh_rx_id;
+	fh->fh_rx_id = rx_fh->fh_ox_id;
+	fh->fh_seq_cnt = rx_fh->fh_seq_cnt;
+	fh->fh_r_ctl = FC_RCTL_BA_RJT;
+	fh->fh_type = FC_TYPE_BLS;
+
+	/*
+	 * Form f_ctl by inverting EX_CTX and SEQ_CTX (bits 23, 22).
+	 * Echo FIRST_SEQ, LAST_SEQ, END_SEQ, END_CONN, SEQ_INIT.
+	 * Bits 9-8 are meaningful (retransmitted or unidirectional).
+	 * Last ACK uses bits 7-6 (continue sequence),
+	 * bits 5-4 are meaningful (what kind of ACK to use).
+	 * Always set LAST_SEQ, END_SEQ.
+	 */
+	f_ctl = ntoh24(rx_fh->fh_f_ctl);
+	f_ctl &= FC_FC_EX_CTX | FC_FC_SEQ_CTX |
+		FC_FC_END_CONN | FC_FC_SEQ_INIT |
+		FC_FC_RETX_SEQ | FC_FC_UNI_TX;
+	f_ctl ^= FC_FC_EX_CTX | FC_FC_SEQ_CTX;
+	f_ctl |= FC_FC_LAST_SEQ | FC_FC_END_SEQ;
+	f_ctl &= ~FC_FC_FIRST_SEQ;
+	hton24(fh->fh_f_ctl, f_ctl);
+
+	fr_sof(fp) = fc_sof_class(fr_sof(rx_fp));
+	fr_eof(fp) = FC_EOF_T;
+	if (fc_sof_needs_ack(fr_sof(fp)))
+		fr_eof(fp) = FC_EOF_N;
+
+	(void) lp->tt.frame_send(lp, fp);
+}
+
+/*
+ * Handle an incoming ABTS.  This would be for target mode usually,
+ * but could be due to lost FCP transfer ready, confirm or RRQ.
+ * We always handle this as an exchange abort, ignoring the parameter.
+ */
+static void fc_exch_recv_abts(struct fc_exch *ep, struct fc_frame *rx_fp)
+{
+	struct fc_frame *fp;
+	struct fc_ba_acc *ap;
+	struct fc_frame_header *fh;
+	struct fc_seq *sp;
+
+	if (!ep)
+		goto reject;
+	spin_lock_bh(&ep->ex_lock);
+	if (ep->esb_stat & ESB_ST_COMPLETE) {
+		spin_unlock_bh(&ep->ex_lock);
+		goto reject;
+	}
+	if (!(ep->esb_stat & ESB_ST_REC_QUAL))
+		fc_exch_hold(ep);		/* hold for REC_QUAL */
+	ep->esb_stat |= ESB_ST_ABNORMAL | ESB_ST_REC_QUAL;
+	fc_exch_timer_set_locked(ep, ep->r_a_tov);
+
+	fp = fc_frame_alloc(ep->lp, sizeof(*ap));
+	if (!fp) {
+		spin_unlock_bh(&ep->ex_lock);
+		goto free;
+	}
+	fh = fc_frame_header_get(fp);
+	ap = fc_frame_payload_get(fp, sizeof(*ap));
+	memset(ap, 0, sizeof(*ap));
+	sp = &ep->seq;
+	ap->ba_high_seq_cnt = htons(0xffff);
+	if (sp->ssb_stat & SSB_ST_RESP) {
+		ap->ba_seq_id = sp->id;
+		ap->ba_seq_id_val = FC_BA_SEQ_ID_VAL;
+		ap->ba_high_seq_cnt = fh->fh_seq_cnt;
+		ap->ba_low_seq_cnt = htons(sp->cnt);
+	}
+	sp = fc_seq_start_next(sp);
+	spin_unlock_bh(&ep->ex_lock);
+	fc_seq_send_last(sp, fp, FC_RCTL_BA_ACC, FC_TYPE_BLS);
+	fc_frame_free(rx_fp);
+	return;
+
+reject:
+	fc_exch_send_ba_rjt(rx_fp, FC_BA_RJT_UNABLE, FC_BA_RJT_INV_XID);
+free:
+	fc_frame_free(rx_fp);
+}
+
+/*
+ * Handle receive where the other end is originating the sequence.
+ */
+static void fc_exch_recv_req(struct fc_lport *lp, struct fc_exch_mgr *mp,
+			     struct fc_frame *fp)
+{
+	struct fc_frame_header *fh = fc_frame_header_get(fp);
+	struct fc_seq *sp = NULL;
+	struct fc_exch *ep = NULL;
+	enum fc_sof sof;
+	enum fc_eof eof;
+	u32 f_ctl;
+	enum fc_pf_rjt_reason reject;
+
+	fr_seq(fp) = NULL;
+	reject = fc_seq_lookup_recip(mp, fp);
+	if (reject == FC_RJT_NONE) {
+		sp = fr_seq(fp);	/* sequence will be held */
+		ep = fc_seq_exch(sp);
+		sof = fr_sof(fp);
+		eof = fr_eof(fp);
+		f_ctl = ntoh24(fh->fh_f_ctl);
+		fc_seq_send_ack(sp, fp);
+
+		/*
+		 * Call the receive function.
+		 *
+		 * The receive function may allocate a new sequence
+		 * over the old one, so we shouldn't change the
+		 * sequence after this.
+		 *
+		 * The frame will be freed by the receive function.
+		 * If new exch resp handler is valid then call that
+		 * first.
+		 */
+		if (ep->resp)
+			ep->resp(sp, fp, ep->arg);
+		else
+			lp->tt.lport_recv(lp, sp, fp);
+		fc_exch_release(ep);	/* release from lookup */
+	} else {
+		FC_DEBUG_EXCH("exch/seq lookup failed: reject %x\n", reject);
+		fc_frame_free(fp);
+	}
+}
+
+/*
+ * Handle receive where the other end is originating the sequence in
+ * response to our exchange.
+ */
+static void fc_exch_recv_seq_resp(struct fc_exch_mgr *mp, struct fc_frame *fp)
+{
+	struct fc_frame_header *fh = fc_frame_header_get(fp);
+	struct fc_seq *sp;
+	struct fc_exch *ep;
+	enum fc_sof sof;
+	u32 f_ctl;
+	void (*resp)(struct fc_seq *, struct fc_frame *fp, void *arg);
+	void *ex_resp_arg;
+	int rc;
+
+	ep = fc_exch_find(mp, ntohs(fh->fh_ox_id));
+	if (!ep) {
+		atomic_inc(&mp->stats.xid_not_found);
+		goto out;
+	}
+	if (ep->rxid == FC_XID_UNKNOWN)
+		ep->rxid = ntohs(fh->fh_rx_id);
+	if (ep->sid != 0 && ep->sid != ntoh24(fh->fh_d_id)) {
+		atomic_inc(&mp->stats.xid_not_found);
+		goto rel;
+	}
+	if (ep->did != ntoh24(fh->fh_s_id) &&
+	    ep->did != FC_FID_FLOGI) {
+		atomic_inc(&mp->stats.xid_not_found);
+		goto rel;
+	}
+	sof = fr_sof(fp);
+	if (fc_sof_is_init(sof)) {
+		sp = fc_seq_start_next(&ep->seq);
+		sp->id = fh->fh_seq_id;
+		sp->ssb_stat |= SSB_ST_RESP;
+	} else {
+		sp = &ep->seq;
+		if (sp->id != fh->fh_seq_id) {
+			atomic_inc(&mp->stats.seq_not_found);
+			goto rel;
+		}
+	}
+	f_ctl = ntoh24(fh->fh_f_ctl);
+	fr_seq(fp) = sp;
+	if (f_ctl & FC_FC_SEQ_INIT)
+		ep->esb_stat |= ESB_ST_SEQ_INIT;
+
+	if (fc_sof_needs_ack(sof))
+		fc_seq_send_ack(sp, fp);
+	resp = ep->resp;
+	ex_resp_arg = ep->arg;
+
+	if (fh->fh_type != FC_TYPE_FCP && fr_eof(fp) == FC_EOF_T &&
+	    (f_ctl & (FC_FC_LAST_SEQ | FC_FC_END_SEQ)) ==
+	    (FC_FC_LAST_SEQ | FC_FC_END_SEQ)) {
+		spin_lock_bh(&ep->ex_lock);
+		rc = fc_exch_done_locked(ep);
+		WARN_ON(fc_seq_exch(sp) != ep);
+		spin_unlock_bh(&ep->ex_lock);
+		if (!rc)
+			fc_exch_mgr_delete_ep(ep);
+	}
+
+	/*
+	 * Call the receive function.
+	 * The sequence is held (has a refcnt) for us,
+	 * but not for the receive function.
+	 *
+	 * The receive function may allocate a new sequence
+	 * over the old one, so we shouldn't change the
+	 * sequence after this.
+	 *
+	 * The frame will be freed by the receive function.
+	 * If new exch resp handler is valid then call that
+	 * first.
+	 */
+	if (resp)
+		resp(sp, fp, ex_resp_arg);
+	else
+		fc_frame_free(fp);
+	fc_exch_release(ep);
+	return;
+rel:
+	fc_exch_release(ep);
+out:
+	fc_frame_free(fp);
+}
+
+/*
+ * Handle receive for a sequence where other end is responding to our sequence.
+ */
+static void fc_exch_recv_resp(struct fc_exch_mgr *mp, struct fc_frame *fp)
+{
+	struct fc_seq *sp;
+
+	sp = fc_seq_lookup_orig(mp, fp);	/* doesn't hold sequence */
+	if (!sp) {
+		atomic_inc(&mp->stats.xid_not_found);
+		FC_DEBUG_EXCH("seq lookup failed\n");
+	} else {
+		atomic_inc(&mp->stats.non_bls_resp);
+		FC_DEBUG_EXCH("non-BLS response to sequence");
+	}
+	fc_frame_free(fp);
+}
+
+/*
+ * Handle the response to an ABTS for exchange or sequence.
+ * This can be BA_ACC or BA_RJT.
+ */
+static void fc_exch_abts_resp(struct fc_exch *ep, struct fc_frame *fp)
+{
+	void (*resp)(struct fc_seq *, struct fc_frame *fp, void *arg);
+	void *ex_resp_arg;
+	struct fc_frame_header *fh;
+	struct fc_ba_acc *ap;
+	struct fc_seq *sp;
+	u16 low;
+	u16 high;
+	int rc = 1, has_rec = 0;
+
+	fh = fc_frame_header_get(fp);
+	FC_DEBUG_EXCH("exch: BLS rctl %x - %s\n",
+		      fh->fh_r_ctl, fc_exch_rctl_name(fh->fh_r_ctl));
+
+	if (cancel_delayed_work_sync(&ep->timeout_work))
+		fc_exch_release(ep);	/* release from pending timer hold */
+
+	spin_lock_bh(&ep->ex_lock);
+	switch (fh->fh_r_ctl) {
+	case FC_RCTL_BA_ACC:
+		ap = fc_frame_payload_get(fp, sizeof(*ap));
+		if (!ap)
+			break;
+
+		/*
+		 * Decide whether to establish a Recovery Qualifier.
+		 * We do this if there is a non-empty SEQ_CNT range and
+		 * SEQ_ID is the same as the one we aborted.
+		 */
+		low = ntohs(ap->ba_low_seq_cnt);
+		high = ntohs(ap->ba_high_seq_cnt);
+		if ((ep->esb_stat & ESB_ST_REC_QUAL) == 0 &&
+		    (ap->ba_seq_id_val != FC_BA_SEQ_ID_VAL ||
+		     ap->ba_seq_id == ep->seq_id) && low != high) {
+			ep->esb_stat |= ESB_ST_REC_QUAL;
+			fc_exch_hold(ep);  /* hold for recovery qualifier */
+			has_rec = 1;
+		}
+		break;
+	case FC_RCTL_BA_RJT:
+		break;
+	default:
+		break;
+	}
+
+	resp = ep->resp;
+	ex_resp_arg = ep->arg;
+
+	/* do we need to do some other checks here. Can we reuse more of
+	 * fc_exch_recv_seq_resp
+	 */
+	sp = &ep->seq;
+	/*
+	 * do we want to check END_SEQ as well as LAST_SEQ here?
+	 */
+	if (ep->fh_type != FC_TYPE_FCP &&
+	    ntoh24(fh->fh_f_ctl) & FC_FC_LAST_SEQ)
+		rc = fc_exch_done_locked(ep);
+	spin_unlock_bh(&ep->ex_lock);
+	if (!rc)
+		fc_exch_mgr_delete_ep(ep);
+
+	if (resp)
+		resp(sp, fp, ex_resp_arg);
+	else
+		fc_frame_free(fp);
+
+	if (has_rec)
+		fc_exch_timer_set(ep, ep->r_a_tov);
+
+}
+
+/*
+ * Receive BLS sequence.
+ * This is always a sequence initiated by the remote side.
+ * We may be either the originator or recipient of the exchange.
+ */
+static void fc_exch_recv_bls(struct fc_exch_mgr *mp, struct fc_frame *fp)
+{
+	struct fc_frame_header *fh;
+	struct fc_exch *ep;
+	u32 f_ctl;
+
+	fh = fc_frame_header_get(fp);
+	f_ctl = ntoh24(fh->fh_f_ctl);
+	fr_seq(fp) = NULL;
+
+	ep = fc_exch_find(mp, (f_ctl & FC_FC_EX_CTX) ?
+			  ntohs(fh->fh_ox_id) : ntohs(fh->fh_rx_id));
+	if (ep && (f_ctl & FC_FC_SEQ_INIT)) {
+		spin_lock_bh(&ep->ex_lock);
+		ep->esb_stat |= ESB_ST_SEQ_INIT;
+		spin_unlock_bh(&ep->ex_lock);
+	}
+	if (f_ctl & FC_FC_SEQ_CTX) {
+		/*
+		 * A response to a sequence we initiated.
+		 * This should only be ACKs for class 2 or F.
+		 */
+		switch (fh->fh_r_ctl) {
+		case FC_RCTL_ACK_1:
+		case FC_RCTL_ACK_0:
+			break;
+		default:
+			FC_DEBUG_EXCH("BLS rctl %x - %s received",
+				      fh->fh_r_ctl,
+				      fc_exch_rctl_name(fh->fh_r_ctl));
+			break;
+		}
+		fc_frame_free(fp);
+	} else {
+		switch (fh->fh_r_ctl) {
+		case FC_RCTL_BA_RJT:
+		case FC_RCTL_BA_ACC:
+			if (ep)
+				fc_exch_abts_resp(ep, fp);
+			else
+				fc_frame_free(fp);
+			break;
+		case FC_RCTL_BA_ABTS:
+			fc_exch_recv_abts(ep, fp);
+			break;
+		default:			/* ignore junk */
+			fc_frame_free(fp);
+			break;
+		}
+	}
+	if (ep)
+		fc_exch_release(ep);	/* release hold taken by fc_exch_find */
+}
+
+/*
+ * Accept sequence with LS_ACC.
+ * If this fails due to allocation or transmit congestion, assume the
+ * originator will repeat the sequence.
+ */
+static void fc_seq_ls_acc(struct fc_seq *req_sp)
+{
+	struct fc_seq *sp;
+	struct fc_els_ls_acc *acc;
+	struct fc_frame *fp;
+
+	sp = fc_seq_start_next(req_sp);
+	fp = fc_frame_alloc(fc_seq_exch(sp)->lp, sizeof(*acc));
+	if (fp) {
+		acc = fc_frame_payload_get(fp, sizeof(*acc));
+		memset(acc, 0, sizeof(*acc));
+		acc->la_cmd = ELS_LS_ACC;
+		fc_seq_send_last(sp, fp, FC_RCTL_ELS_REP, FC_TYPE_ELS);
+	}
+}
+
+/*
+ * Reject sequence with ELS LS_RJT.
+ * If this fails due to allocation or transmit congestion, assume the
+ * originator will repeat the sequence.
+ */
+static void fc_seq_ls_rjt(struct fc_seq *req_sp, enum fc_els_rjt_reason reason,
+			  enum fc_els_rjt_explan explan)
+{
+	struct fc_seq *sp;
+	struct fc_els_ls_rjt *rjt;
+	struct fc_frame *fp;
+
+	sp = fc_seq_start_next(req_sp);
+	fp = fc_frame_alloc(fc_seq_exch(sp)->lp, sizeof(*rjt));
+	if (fp) {
+		rjt = fc_frame_payload_get(fp, sizeof(*rjt));
+		memset(rjt, 0, sizeof(*rjt));
+		rjt->er_cmd = ELS_LS_RJT;
+		rjt->er_reason = reason;
+		rjt->er_explan = explan;
+		fc_seq_send_last(sp, fp, FC_RCTL_ELS_REP, FC_TYPE_ELS);
+	}
+}
+
+static void fc_exch_reset(struct fc_exch *ep)
+{
+	struct fc_seq *sp;
+	void (*resp)(struct fc_seq *, struct fc_frame *, void *);
+	void *arg;
+	int rc = 1;
+
+	spin_lock_bh(&ep->ex_lock);
+	ep->state |= FC_EX_RST_CLEANUP;
+	/*
+	 * we really want to call del_timer_sync, but cannot due
+	 * to the lport calling with the lport lock held (some resp
+	 * functions can also grab the lport lock which could cause
+	 * a deadlock).
+	 */
+	if (cancel_delayed_work(&ep->timeout_work))
+		atomic_dec(&ep->ex_refcnt);	/* drop hold for timer */
+	resp = ep->resp;
+	ep->resp = NULL;
+	if (ep->esb_stat & ESB_ST_REC_QUAL)
+		atomic_dec(&ep->ex_refcnt);	/* drop hold for rec_qual */
+	ep->esb_stat &= ~ESB_ST_REC_QUAL;
+	arg = ep->arg;
+	sp = &ep->seq;
+	rc = fc_exch_done_locked(ep);
+	spin_unlock_bh(&ep->ex_lock);
+	if (!rc)
+		fc_exch_mgr_delete_ep(ep);
+
+	if (resp)
+		resp(sp, ERR_PTR(-FC_EX_CLOSED), arg);
+}
+
+/*
+ * Reset an exchange manager, releasing all sequences and exchanges.
+ * If sid is non-zero, reset only exchanges we source from that FID.
+ * If did is non-zero, reset only exchanges destined to that FID.
+ */
+void fc_exch_mgr_reset(struct fc_exch_mgr *mp, u32 sid, u32 did)
+{
+	struct fc_exch *ep;
+	struct fc_exch *next;
+
+	spin_lock_bh(&mp->em_lock);
+restart:
+	list_for_each_entry_safe(ep, next, &mp->ex_list, ex_list) {
+		if ((sid == 0 || sid == ep->sid) &&
+		    (did == 0 || did == ep->did)) {
+			fc_exch_hold(ep);
+			spin_unlock_bh(&mp->em_lock);
+
+			fc_exch_reset(ep);
+
+			fc_exch_release(ep);
+			spin_lock_bh(&mp->em_lock);
+
+			/*
+			 * must restart loop incase while lock was down
+			 * multiple eps were released.
+			 */
+			goto restart;
+		}
+	}
+	spin_unlock_bh(&mp->em_lock);
+}
+EXPORT_SYMBOL(fc_exch_mgr_reset);
+
+/*
+ * Handle incoming ELS REC - Read Exchange Concise.
+ * Note that the requesting port may be different than the S_ID in the request.
+ */
+static void fc_exch_els_rec(struct fc_seq *sp, struct fc_frame *rfp)
+{
+	struct fc_frame *fp;
+	struct fc_exch *ep;
+	struct fc_exch_mgr *em;
+	struct fc_els_rec *rp;
+	struct fc_els_rec_acc *acc;
+	enum fc_els_rjt_reason reason = ELS_RJT_LOGIC;
+	enum fc_els_rjt_explan explan;
+	u32 sid;
+	u16 rxid;
+	u16 oxid;
+
+	rp = fc_frame_payload_get(rfp, sizeof(*rp));
+	explan = ELS_EXPL_INV_LEN;
+	if (!rp)
+		goto reject;
+	sid = ntoh24(rp->rec_s_id);
+	rxid = ntohs(rp->rec_rx_id);
+	oxid = ntohs(rp->rec_ox_id);
+
+	/*
+	 * Currently it's hard to find the local S_ID from the exchange
+	 * manager.  This will eventually be fixed, but for now it's easier
+	 * to lookup the subject exchange twice, once as if we were
+	 * the initiator, and then again if we weren't.
+	 */
+	em = fc_seq_exch(sp)->em;
+	ep = fc_exch_find(em, oxid);
+	explan = ELS_EXPL_OXID_RXID;
+	if (ep && ep->oid == sid) {
+		if (ep->rxid != FC_XID_UNKNOWN &&
+		    rxid != FC_XID_UNKNOWN &&
+		    ep->rxid != rxid)
+			goto rel;
+	} else {
+		if (ep)
+			fc_exch_release(ep);
+		ep = NULL;
+		if (rxid != FC_XID_UNKNOWN)
+			ep = fc_exch_find(em, rxid);
+		if (!ep)
+			goto reject;
+	}
+
+	fp = fc_frame_alloc(fc_seq_exch(sp)->lp, sizeof(*acc));
+	if (!fp) {
+		fc_exch_done(sp);
+		goto out;
+	}
+	sp = fc_seq_start_next(sp);
+	acc = fc_frame_payload_get(fp, sizeof(*acc));
+	memset(acc, 0, sizeof(*acc));
+	acc->reca_cmd = ELS_LS_ACC;
+	acc->reca_ox_id = rp->rec_ox_id;
+	memcpy(acc->reca_ofid, rp->rec_s_id, 3);
+	acc->reca_rx_id = htons(ep->rxid);
+	if (ep->sid == ep->oid)
+		hton24(acc->reca_rfid, ep->did);
+	else
+		hton24(acc->reca_rfid, ep->sid);
+	acc->reca_fc4value = htonl(ep->seq.rec_data);
+	acc->reca_e_stat = htonl(ep->esb_stat & (ESB_ST_RESP |
+						 ESB_ST_SEQ_INIT |
+						 ESB_ST_COMPLETE));
+	sp = fc_seq_start_next(sp);
+	fc_seq_send_last(sp, fp, FC_RCTL_ELS_REP, FC_TYPE_ELS);
+out:
+	fc_exch_release(ep);
+	fc_frame_free(rfp);
+	return;
+
+rel:
+	fc_exch_release(ep);
+reject:
+	fc_seq_ls_rjt(sp, reason, explan);
+	fc_frame_free(rfp);
+}
+
+/*
+ * Handle response from RRQ.
+ * Not much to do here, really.
+ * Should report errors.
+ *
+ * TODO: fix error handler.
+ */
+static void fc_exch_rrq_resp(struct fc_seq *sp, struct fc_frame *fp, void *arg)
+{
+	struct fc_exch *aborted_ep = arg;
+	unsigned int op;
+
+	if (IS_ERR(fp)) {
+		int err = PTR_ERR(fp);
+
+		if (err == -FC_EX_CLOSED)
+			goto cleanup;
+		FC_DBG("Cannot process RRQ, because of frame error %d\n", err);
+		return;
+	}
+
+	op = fc_frame_payload_op(fp);
+	fc_frame_free(fp);
+
+	switch (op) {
+	case ELS_LS_RJT:
+		FC_DBG("LS_RJT for RRQ");
+		/* fall through */
+	case ELS_LS_ACC:
+		goto cleanup;
+	default:
+		FC_DBG("unexpected response op %x for RRQ", op);
+		return;
+	}
+
+cleanup:
+	fc_exch_done(&aborted_ep->seq);
+	/* drop hold for rec qual */
+	fc_exch_release(aborted_ep);
+}
+
+/*
+ * Send ELS RRQ - Reinstate Recovery Qualifier.
+ * This tells the remote port to stop blocking the use of
+ * the exchange and the seq_cnt range.
+ */
+static void fc_exch_rrq(struct fc_exch *ep)
+{
+	struct fc_lport *lp;
+	struct fc_els_rrq *rrq;
+	struct fc_frame *fp;
+	struct fc_seq *rrq_sp;
+	u32 did;
+
+	lp = ep->lp;
+
+	fp = fc_frame_alloc(lp, sizeof(*rrq));
+	if (!fp)
+		return;
+	rrq = fc_frame_payload_get(fp, sizeof(*rrq));
+	memset(rrq, 0, sizeof(*rrq));
+	rrq->rrq_cmd = ELS_RRQ;
+	hton24(rrq->rrq_s_id, ep->sid);
+	rrq->rrq_ox_id = htons(ep->oxid);
+	rrq->rrq_rx_id = htons(ep->rxid);
+
+	did = ep->did;
+	if (ep->esb_stat & ESB_ST_RESP)
+		did = ep->sid;
+
+	fc_fill_fc_hdr(fp, FC_RCTL_ELS_REQ, did,
+		       fc_host_port_id(lp->host), FC_TYPE_ELS,
+		       FC_FC_FIRST_SEQ | FC_FC_END_SEQ | FC_FC_SEQ_INIT, 0);
+
+	rrq_sp = fc_exch_seq_send(lp, fp, fc_exch_rrq_resp, NULL, ep,
+				  lp->e_d_tov);
+	if (!rrq_sp) {
+		ep->esb_stat |= ESB_ST_REC_QUAL;
+		fc_exch_timer_set_locked(ep, ep->r_a_tov);
+		return;
+	}
+}
+
+
+/*
+ * Handle incoming ELS RRQ - Reset Recovery Qualifier.
+ */
+static void fc_exch_els_rrq(struct fc_seq *sp, struct fc_frame *fp)
+{
+	struct fc_exch *ep;		/* request or subject exchange */
+	struct fc_els_rrq *rp;
+	u32 sid;
+	u16 xid;
+	enum fc_els_rjt_explan explan;
+
+	rp = fc_frame_payload_get(fp, sizeof(*rp));
+	explan = ELS_EXPL_INV_LEN;
+	if (!rp)
+		goto reject;
+
+	/*
+	 * lookup subject exchange.
+	 */
+	ep = fc_seq_exch(sp);
+	sid = ntoh24(rp->rrq_s_id);		/* subject source */
+	xid = ep->did == sid ? ntohs(rp->rrq_ox_id) : ntohs(rp->rrq_rx_id);
+	ep = fc_exch_find(ep->em, xid);
+
+	explan = ELS_EXPL_OXID_RXID;
+	if (!ep)
+		goto reject;
+	spin_lock_bh(&ep->ex_lock);
+	if (ep->oxid != ntohs(rp->rrq_ox_id))
+		goto unlock_reject;
+	if (ep->rxid != ntohs(rp->rrq_rx_id) &&
+	    ep->rxid != FC_XID_UNKNOWN)
+		goto unlock_reject;
+	explan = ELS_EXPL_SID;
+	if (ep->sid != sid)
+		goto unlock_reject;
+
+	/*
+	 * Clear Recovery Qualifier state, and cancel timer if complete.
+	 */
+	if (ep->esb_stat & ESB_ST_REC_QUAL) {
+		ep->esb_stat &= ~ESB_ST_REC_QUAL;
+		atomic_dec(&ep->ex_refcnt);	/* drop hold for rec qual */
+	}
+	if (ep->esb_stat & ESB_ST_COMPLETE) {
+		if (cancel_delayed_work(&ep->timeout_work))
+			atomic_dec(&ep->ex_refcnt);	/* drop timer hold */
+	}
+
+	spin_unlock_bh(&ep->ex_lock);
+
+	/*
+	 * Send LS_ACC.
+	 */
+	fc_seq_ls_acc(sp);
+	fc_frame_free(fp);
+	return;
+
+unlock_reject:
+	spin_unlock_bh(&ep->ex_lock);
+	fc_exch_release(ep);	/* drop hold from fc_exch_find */
+reject:
+	fc_seq_ls_rjt(sp, ELS_RJT_LOGIC, explan);
+	fc_frame_free(fp);
+}
+
+struct fc_exch_mgr *fc_exch_mgr_alloc(struct fc_lport *lp,
+				      enum fc_class class,
+				      u16 min_xid, u16 max_xid)
+{
+	struct fc_exch_mgr *mp;
+	size_t len;
+
+	if (max_xid <= min_xid || min_xid == 0 || max_xid == FC_XID_UNKNOWN) {
+		FC_DBG("Invalid min_xid 0x:%x and max_xid 0x:%x\n",
+		       min_xid, max_xid);
+		return NULL;
+	}
+
+	/*
+	 * Memory need for EM
+	 */
+#define xid_ok(i, m1, m2) (((i) >= (m1)) && ((i) <= (m2)))
+	len = (max_xid - min_xid + 1) * (sizeof(struct fc_exch *));
+	len += sizeof(struct fc_exch_mgr);
+
+	mp = kzalloc(len, GFP_ATOMIC);
+	if (!mp)
+		return NULL;
+
+	mp->class = class;
+	mp->total_exches = 0;
+	mp->exches = (struct fc_exch **)(mp + 1);
+	mp->lp = lp;
+	/* adjust em exch xid range for offload */
+	mp->min_xid = min_xid;
+	mp->max_xid = max_xid;
+	mp->last_xid = min_xid - 1;
+	mp->max_read = 0;
+	mp->last_read = 0;
+	if (lp->lro_enabled && xid_ok(lp->lro_xid, min_xid, max_xid)) {
+		mp->max_read = lp->lro_xid;
+		mp->last_read = min_xid - 1;
+		mp->last_xid = mp->max_read;
+	} else {
+		/* disable lro if no xid control over read */
+		lp->lro_enabled = 0;
+	}
+
+	INIT_LIST_HEAD(&mp->ex_list);
+	spin_lock_init(&mp->em_lock);
+
+	mp->ep_pool = mempool_create_slab_pool(2, fc_em_cachep);
+	if (!mp->ep_pool)
+		goto free_mp;
+
+	return mp;
+
+free_mp:
+	kfree(mp);
+	return NULL;
+}
+EXPORT_SYMBOL(fc_exch_mgr_alloc);
+
+void fc_exch_mgr_free(struct fc_exch_mgr *mp)
+{
+	WARN_ON(!mp);
+	/*
+	 * The total exch count must be zero
+	 * before freeing exchange manager.
+	 */
+	WARN_ON(mp->total_exches != 0);
+	mempool_destroy(mp->ep_pool);
+	kfree(mp);
+}
+EXPORT_SYMBOL(fc_exch_mgr_free);
+
+struct fc_exch *fc_exch_get(struct fc_lport *lp, struct fc_frame *fp)
+{
+	if (!lp || !lp->emp)
+		return NULL;
+
+	return fc_exch_alloc(lp->emp, fp, 0);
+}
+EXPORT_SYMBOL(fc_exch_get);
+
+struct fc_seq *fc_exch_seq_send(struct fc_lport *lp,
+				struct fc_frame *fp,
+				void (*resp)(struct fc_seq *,
+					     struct fc_frame *fp,
+					     void *arg),
+				void (*destructor)(struct fc_seq *, void *),
+				void *arg, u32 timer_msec)
+{
+	struct fc_exch *ep;
+	struct fc_seq *sp = NULL;
+	struct fc_frame_header *fh;
+	int rc = 1;
+
+	ep = lp->tt.exch_get(lp, fp);
+	if (!ep) {
+		fc_frame_free(fp);
+		return NULL;
+	}
+	ep->esb_stat |= ESB_ST_SEQ_INIT;
+	fh = fc_frame_header_get(fp);
+	fc_exch_set_addr(ep, ntoh24(fh->fh_s_id), ntoh24(fh->fh_d_id));
+	ep->resp = resp;
+	ep->destructor = destructor;
+	ep->arg = arg;
+	ep->r_a_tov = FC_DEF_R_A_TOV;
+	ep->lp = lp;
+	sp = &ep->seq;
+
+	ep->fh_type = fh->fh_type; /* save for possbile timeout handling */
+	ep->f_ctl = ntoh24(fh->fh_f_ctl);
+	fc_exch_setup_hdr(ep, fp, ep->f_ctl);
+	sp->cnt++;
+
+	if (unlikely(lp->tt.frame_send(lp, fp)))
+		goto err;
+
+	if (timer_msec)
+		fc_exch_timer_set_locked(ep, timer_msec);
+	ep->f_ctl &= ~FC_FC_FIRST_SEQ;	/* not first seq */
+
+	if (ep->f_ctl & FC_FC_SEQ_INIT)
+		ep->esb_stat &= ~ESB_ST_SEQ_INIT;
+	spin_unlock_bh(&ep->ex_lock);
+	return sp;
+err:
+	rc = fc_exch_done_locked(ep);
+	spin_unlock_bh(&ep->ex_lock);
+	if (!rc)
+		fc_exch_mgr_delete_ep(ep);
+	return NULL;
+}
+EXPORT_SYMBOL(fc_exch_seq_send);
+
+/*
+ * Receive a frame
+ */
+void fc_exch_recv(struct fc_lport *lp, struct fc_exch_mgr *mp,
+		  struct fc_frame *fp)
+{
+	struct fc_frame_header *fh = fc_frame_header_get(fp);
+	u32 f_ctl;
+
+	/* lport lock ? */
+	if (!lp || !mp || (lp->state == LPORT_ST_NONE)) {
+		FC_DBG("fc_lport or EM is not allocated and configured");
+		fc_frame_free(fp);
+		return;
+	}
+
+	/*
+	 * If frame is marked invalid, just drop it.
+	 */
+	f_ctl = ntoh24(fh->fh_f_ctl);
+	switch (fr_eof(fp)) {
+	case FC_EOF_T:
+		if (f_ctl & FC_FC_END_SEQ)
+			skb_trim(fp_skb(fp), fr_len(fp) - FC_FC_FILL(f_ctl));
+		/* fall through */
+	case FC_EOF_N:
+		if (fh->fh_type == FC_TYPE_BLS)
+			fc_exch_recv_bls(mp, fp);
+		else if ((f_ctl & (FC_FC_EX_CTX | FC_FC_SEQ_CTX)) ==
+			 FC_FC_EX_CTX)
+			fc_exch_recv_seq_resp(mp, fp);
+		else if (f_ctl & FC_FC_SEQ_CTX)
+			fc_exch_recv_resp(mp, fp);
+		else
+			fc_exch_recv_req(lp, mp, fp);
+		break;
+	default:
+		FC_DBG("dropping invalid frame (eof %x)", fr_eof(fp));
+		fc_frame_free(fp);
+		break;
+	}
+}
+EXPORT_SYMBOL(fc_exch_recv);
+
+int fc_exch_init(struct fc_lport *lp)
+{
+	if (!lp->tt.exch_get) {
+		/*
+		 *  exch_put() should be NULL if
+		 *  exch_get() is NULL
+		 */
+		WARN_ON(lp->tt.exch_put);
+		lp->tt.exch_get = fc_exch_get;
+	}
+
+	if (!lp->tt.seq_start_next)
+		lp->tt.seq_start_next = fc_seq_start_next;
+
+	if (!lp->tt.exch_seq_send)
+		lp->tt.exch_seq_send = fc_exch_seq_send;
+
+	if (!lp->tt.seq_send)
+		lp->tt.seq_send = fc_seq_send;
+
+	if (!lp->tt.seq_els_rsp_send)
+		lp->tt.seq_els_rsp_send = fc_seq_els_rsp_send;
+
+	if (!lp->tt.exch_done)
+		lp->tt.exch_done = fc_exch_done;
+
+	if (!lp->tt.exch_mgr_reset)
+		lp->tt.exch_mgr_reset = fc_exch_mgr_reset;
+
+	if (!lp->tt.seq_exch_abort)
+		lp->tt.seq_exch_abort = fc_seq_exch_abort;
+
+	return 0;
+}
+EXPORT_SYMBOL(fc_exch_init);
+
+int fc_setup_exch_mgr(void)
+{
+	fc_em_cachep = kmem_cache_create("libfc_em", sizeof(struct fc_exch),
+					 0, SLAB_HWCACHE_ALIGN, NULL);
+	if (!fc_em_cachep)
+		return -ENOMEM;
+	return 0;
+}
+
+void fc_destroy_exch_mgr(void)
+{
+	kmem_cache_destroy(fc_em_cachep);
+}
diff --git a/drivers/scsi/libfc/fc_fcp.c b/drivers/scsi/libfc/fc_fcp.c
new file mode 100644
index 000000000000..404e63ff46b8
--- /dev/null
+++ b/drivers/scsi/libfc/fc_fcp.c
@@ -0,0 +1,2131 @@
+/*
+ * Copyright(c) 2007 Intel Corporation. All rights reserved.
+ * Copyright(c) 2008 Red Hat, Inc.  All rights reserved.
+ * Copyright(c) 2008 Mike Christie
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Maintained at www.Open-FCoE.org
+ */
+
+#include <linux/module.h>
+#include <linux/delay.h>
+#include <linux/kernel.h>
+#include <linux/types.h>
+#include <linux/spinlock.h>
+#include <linux/scatterlist.h>
+#include <linux/err.h>
+#include <linux/crc32.h>
+
+#include <scsi/scsi_tcq.h>
+#include <scsi/scsi.h>
+#include <scsi/scsi_host.h>
+#include <scsi/scsi_device.h>
+#include <scsi/scsi_cmnd.h>
+
+#include <scsi/fc/fc_fc2.h>
+
+#include <scsi/libfc.h>
+#include <scsi/fc_encode.h>
+
+MODULE_AUTHOR("Open-FCoE.org");
+MODULE_DESCRIPTION("libfc");
+MODULE_LICENSE("GPL");
+
+static int fc_fcp_debug;
+
+#define FC_DEBUG_FCP(fmt...)			\
+	do {					\
+		if (fc_fcp_debug)		\
+			FC_DBG(fmt);		\
+	} while (0)
+
+static struct kmem_cache *scsi_pkt_cachep;
+
+/* SRB state definitions */
+#define FC_SRB_FREE		0		/* cmd is free */
+#define FC_SRB_CMD_SENT		(1 << 0)	/* cmd has been sent */
+#define FC_SRB_RCV_STATUS	(1 << 1)	/* response has arrived */
+#define FC_SRB_ABORT_PENDING	(1 << 2)	/* cmd abort sent to device */
+#define FC_SRB_ABORTED		(1 << 3)	/* abort acknowleged */
+#define FC_SRB_DISCONTIG	(1 << 4)	/* non-sequential data recvd */
+#define FC_SRB_COMPL		(1 << 5)	/* fc_io_compl has been run */
+#define FC_SRB_FCP_PROCESSING_TMO (1 << 6)	/* timer function processing */
+#define FC_SRB_NOMEM		(1 << 7)	/* dropped to out of mem */
+
+#define FC_SRB_READ		(1 << 1)
+#define FC_SRB_WRITE		(1 << 0)
+
+/*
+ * The SCp.ptr should be tested and set under the host lock. NULL indicates
+ * that the command has been retruned to the scsi layer.
+ */
+#define CMD_SP(Cmnd)		    ((struct fc_fcp_pkt *)(Cmnd)->SCp.ptr)
+#define CMD_ENTRY_STATUS(Cmnd)	    ((Cmnd)->SCp.have_data_in)
+#define CMD_COMPL_STATUS(Cmnd)	    ((Cmnd)->SCp.this_residual)
+#define CMD_SCSI_STATUS(Cmnd)	    ((Cmnd)->SCp.Status)
+#define CMD_RESID_LEN(Cmnd)	    ((Cmnd)->SCp.buffers_residual)
+
+struct fc_fcp_internal {
+	mempool_t	*scsi_pkt_pool;
+	struct list_head scsi_pkt_queue;
+	u8		throttled;
+};
+
+#define fc_get_scsi_internal(x)	((struct fc_fcp_internal *)(x)->scsi_priv)
+
+/*
+ * function prototypes
+ * FC scsi I/O related functions
+ */
+static void fc_fcp_recv_data(struct fc_fcp_pkt *, struct fc_frame *);
+static void fc_fcp_recv(struct fc_seq *, struct fc_frame *, void *);
+static void fc_fcp_resp(struct fc_fcp_pkt *, struct fc_frame *);
+static void fc_fcp_complete_locked(struct fc_fcp_pkt *);
+static void fc_tm_done(struct fc_seq *, struct fc_frame *, void *);
+static void fc_fcp_error(struct fc_fcp_pkt *fsp, struct fc_frame *fp);
+static void fc_timeout_error(struct fc_fcp_pkt *);
+static void fc_fcp_timeout(unsigned long data);
+static void fc_fcp_rec(struct fc_fcp_pkt *);
+static void fc_fcp_rec_error(struct fc_fcp_pkt *, struct fc_frame *);
+static void fc_fcp_rec_resp(struct fc_seq *, struct fc_frame *, void *);
+static void fc_io_compl(struct fc_fcp_pkt *);
+
+static void fc_fcp_srr(struct fc_fcp_pkt *, enum fc_rctl, u32);
+static void fc_fcp_srr_resp(struct fc_seq *, struct fc_frame *, void *);
+static void fc_fcp_srr_error(struct fc_fcp_pkt *, struct fc_frame *);
+
+/*
+ * command status codes
+ */
+#define FC_COMPLETE		0
+#define FC_CMD_ABORTED		1
+#define FC_CMD_RESET		2
+#define FC_CMD_PLOGO		3
+#define FC_SNS_RCV		4
+#define FC_TRANS_ERR		5
+#define FC_DATA_OVRRUN		6
+#define FC_DATA_UNDRUN		7
+#define FC_ERROR		8
+#define FC_HRD_ERROR		9
+#define FC_CMD_TIME_OUT		10
+
+/*
+ * Error recovery timeout values.
+ */
+#define FC_SCSI_ER_TIMEOUT	(10 * HZ)
+#define FC_SCSI_TM_TOV		(10 * HZ)
+#define FC_SCSI_REC_TOV		(2 * HZ)
+#define FC_HOST_RESET_TIMEOUT	(30 * HZ)
+
+#define FC_MAX_ERROR_CNT	5
+#define FC_MAX_RECOV_RETRY	3
+
+#define FC_FCP_DFLT_QUEUE_DEPTH 32
+
+/**
+ * fc_fcp_pkt_alloc - allocation routine for scsi_pkt packet
+ * @lp:		fc lport struct
+ * @gfp:	gfp flags for allocation
+ *
+ * This is used by upper layer scsi driver.
+ * Return Value : scsi_pkt structure or null on allocation failure.
+ * Context	: call from process context. no locking required.
+ */
+static struct fc_fcp_pkt *fc_fcp_pkt_alloc(struct fc_lport *lp, gfp_t gfp)
+{
+	struct fc_fcp_internal *si = fc_get_scsi_internal(lp);
+	struct fc_fcp_pkt *fsp;
+
+	fsp = mempool_alloc(si->scsi_pkt_pool, gfp);
+	if (fsp) {
+		memset(fsp, 0, sizeof(*fsp));
+		fsp->lp = lp;
+		atomic_set(&fsp->ref_cnt, 1);
+		init_timer(&fsp->timer);
+		INIT_LIST_HEAD(&fsp->list);
+		spin_lock_init(&fsp->scsi_pkt_lock);
+	}
+	return fsp;
+}
+
+/**
+ * fc_fcp_pkt_release - release hold on scsi_pkt packet
+ * @fsp:	fcp packet struct
+ *
+ * This is used by upper layer scsi driver.
+ * Context	: call from process  and interrupt context.
+ *		  no locking required
+ */
+static void fc_fcp_pkt_release(struct fc_fcp_pkt *fsp)
+{
+	if (atomic_dec_and_test(&fsp->ref_cnt)) {
+		struct fc_fcp_internal *si = fc_get_scsi_internal(fsp->lp);
+
+		mempool_free(fsp, si->scsi_pkt_pool);
+	}
+}
+
+static void fc_fcp_pkt_hold(struct fc_fcp_pkt *fsp)
+{
+	atomic_inc(&fsp->ref_cnt);
+}
+
+/**
+ * fc_fcp_pkt_destory - release hold on scsi_pkt packet
+ *
+ * @seq:		exchange sequence
+ * @fsp:	fcp packet struct
+ *
+ * Release hold on scsi_pkt packet set to keep scsi_pkt
+ * till EM layer exch resource is not freed.
+ * Context	: called from from EM layer.
+ *		  no locking required
+ */
+static void fc_fcp_pkt_destroy(struct fc_seq *seq, void *fsp)
+{
+	fc_fcp_pkt_release(fsp);
+}
+
+/**
+ * fc_fcp_lock_pkt - lock a packet and get a ref to it.
+ * @fsp:	fcp packet
+ *
+ * We should only return error if we return a command to scsi-ml before
+ * getting a response. This can happen in cases where we send a abort, but
+ * do not wait for the response and the abort and command can be passing
+ * each other on the wire/network-layer.
+ *
+ * Note: this function locks the packet and gets a reference to allow
+ * callers to call the completion function while the lock is held and
+ * not have to worry about the packets refcount.
+ *
+ * TODO: Maybe we should just have callers grab/release the lock and
+ * have a function that they call to verify the fsp and grab a ref if
+ * needed.
+ */
+static inline int fc_fcp_lock_pkt(struct fc_fcp_pkt *fsp)
+{
+	spin_lock_bh(&fsp->scsi_pkt_lock);
+	if (fsp->state & FC_SRB_COMPL) {
+		spin_unlock_bh(&fsp->scsi_pkt_lock);
+		return -EPERM;
+	}
+
+	fc_fcp_pkt_hold(fsp);
+	return 0;
+}
+
+static inline void fc_fcp_unlock_pkt(struct fc_fcp_pkt *fsp)
+{
+	spin_unlock_bh(&fsp->scsi_pkt_lock);
+	fc_fcp_pkt_release(fsp);
+}
+
+static void fc_fcp_timer_set(struct fc_fcp_pkt *fsp, unsigned long delay)
+{
+	if (!(fsp->state & FC_SRB_COMPL))
+		mod_timer(&fsp->timer, jiffies + delay);
+}
+
+static int fc_fcp_send_abort(struct fc_fcp_pkt *fsp)
+{
+	if (!fsp->seq_ptr)
+		return -EINVAL;
+
+	fsp->state |= FC_SRB_ABORT_PENDING;
+	return fsp->lp->tt.seq_exch_abort(fsp->seq_ptr, 0);
+}
+
+/*
+ * Retry command.
+ * An abort isn't needed.
+ */
+static void fc_fcp_retry_cmd(struct fc_fcp_pkt *fsp)
+{
+	if (fsp->seq_ptr) {
+		fsp->lp->tt.exch_done(fsp->seq_ptr);
+		fsp->seq_ptr = NULL;
+	}
+
+	fsp->state &= ~FC_SRB_ABORT_PENDING;
+	fsp->io_status = SUGGEST_RETRY << 24;
+	fsp->status_code = FC_ERROR;
+	fc_fcp_complete_locked(fsp);
+}
+
+/*
+ * Receive SCSI data from target.
+ * Called after receiving solicited data.
+ */
+static void fc_fcp_recv_data(struct fc_fcp_pkt *fsp, struct fc_frame *fp)
+{
+	struct scsi_cmnd *sc = fsp->cmd;
+	struct fc_lport *lp = fsp->lp;
+	struct fcoe_dev_stats *stats;
+	struct fc_frame_header *fh;
+	size_t start_offset;
+	size_t offset;
+	u32 crc;
+	u32 copy_len = 0;
+	size_t len;
+	void *buf;
+	struct scatterlist *sg;
+	size_t remaining;
+
+	fh = fc_frame_header_get(fp);
+	offset = ntohl(fh->fh_parm_offset);
+	start_offset = offset;
+	len = fr_len(fp) - sizeof(*fh);
+	buf = fc_frame_payload_get(fp, 0);
+
+	if (offset + len > fsp->data_len) {
+		/*
+		 * this should never happen
+		 */
+		if ((fr_flags(fp) & FCPHF_CRC_UNCHECKED) &&
+		    fc_frame_crc_check(fp))
+			goto crc_err;
+		FC_DEBUG_FCP("data received past end. len %zx offset %zx "
+			     "data_len %x\n", len, offset, fsp->data_len);
+		fc_fcp_retry_cmd(fsp);
+		return;
+	}
+	if (offset != fsp->xfer_len)
+		fsp->state |= FC_SRB_DISCONTIG;
+
+	crc = 0;
+	if (fr_flags(fp) & FCPHF_CRC_UNCHECKED)
+		crc = crc32(~0, (u8 *) fh, sizeof(*fh));
+
+	sg = scsi_sglist(sc);
+	remaining = len;
+
+	while (remaining > 0 && sg) {
+		size_t off;
+		void *page_addr;
+		size_t sg_bytes;
+
+		if (offset >= sg->length) {
+			offset -= sg->length;
+			sg = sg_next(sg);
+			continue;
+		}
+		sg_bytes = min(remaining, sg->length - offset);
+
+		/*
+		 * The scatterlist item may be bigger than PAGE_SIZE,
+		 * but we are limited to mapping PAGE_SIZE at a time.
+		 */
+		off = offset + sg->offset;
+		sg_bytes = min(sg_bytes, (size_t)
+			       (PAGE_SIZE - (off & ~PAGE_MASK)));
+		page_addr = kmap_atomic(sg_page(sg) + (off >> PAGE_SHIFT),
+					KM_SOFTIRQ0);
+		if (!page_addr)
+			break;		/* XXX panic? */
+
+		if (fr_flags(fp) & FCPHF_CRC_UNCHECKED)
+			crc = crc32(crc, buf, sg_bytes);
+		memcpy((char *)page_addr + (off & ~PAGE_MASK), buf,
+		       sg_bytes);
+
+		kunmap_atomic(page_addr, KM_SOFTIRQ0);
+		buf += sg_bytes;
+		offset += sg_bytes;
+		remaining -= sg_bytes;
+		copy_len += sg_bytes;
+	}
+
+	if (fr_flags(fp) & FCPHF_CRC_UNCHECKED) {
+		buf = fc_frame_payload_get(fp, 0);
+		if (len % 4) {
+			crc = crc32(crc, buf + len, 4 - (len % 4));
+			len += 4 - (len % 4);
+		}
+
+		if (~crc != le32_to_cpu(fr_crc(fp))) {
+crc_err:
+			stats = lp->dev_stats[smp_processor_id()];
+			stats->ErrorFrames++;
+			if (stats->InvalidCRCCount++ < 5)
+				FC_DBG("CRC error on data frame\n");
+			/*
+			 * Assume the frame is total garbage.
+			 * We may have copied it over the good part
+			 * of the buffer.
+			 * If so, we need to retry the entire operation.
+			 * Otherwise, ignore it.
+			 */
+			if (fsp->state & FC_SRB_DISCONTIG)
+				fc_fcp_retry_cmd(fsp);
+			return;
+		}
+	}
+
+	if (fsp->xfer_contig_end == start_offset)
+		fsp->xfer_contig_end += copy_len;
+	fsp->xfer_len += copy_len;
+
+	/*
+	 * In the very rare event that this data arrived after the response
+	 * and completes the transfer, call the completion handler.
+	 */
+	if (unlikely(fsp->state & FC_SRB_RCV_STATUS) &&
+	    fsp->xfer_len == fsp->data_len - fsp->scsi_resid)
+		fc_fcp_complete_locked(fsp);
+}
+
+/*
+ * fc_fcp_send_data -  Send SCSI data to target.
+ * @fsp: ptr to fc_fcp_pkt
+ * @sp: ptr to this sequence
+ * @offset: starting offset for this data request
+ * @seq_blen: the burst length for this data request
+ *
+ * Called after receiving a Transfer Ready data descriptor.
+ * if LLD is capable of seq offload then send down seq_blen
+ * size of data in single frame, otherwise send multiple FC
+ * frames of max FC frame payload supported by target port.
+ *
+ * Returns : 0 for success.
+ */
+static int fc_fcp_send_data(struct fc_fcp_pkt *fsp, struct fc_seq *seq,
+			    size_t offset, size_t seq_blen)
+{
+	struct fc_exch *ep;
+	struct scsi_cmnd *sc;
+	struct scatterlist *sg;
+	struct fc_frame *fp = NULL;
+	struct fc_lport *lp = fsp->lp;
+	size_t remaining;
+	size_t t_blen;
+	size_t tlen;
+	size_t sg_bytes;
+	size_t frame_offset, fh_parm_offset;
+	int error;
+	void *data = NULL;
+	void *page_addr;
+	int using_sg = lp->sg_supp;
+	u32 f_ctl;
+
+	WARN_ON(seq_blen <= 0);
+	if (unlikely(offset + seq_blen > fsp->data_len)) {
+		/* this should never happen */
+		FC_DEBUG_FCP("xfer-ready past end. seq_blen %zx offset %zx\n",
+			     seq_blen, offset);
+		fc_fcp_send_abort(fsp);
+		return 0;
+	} else if (offset != fsp->xfer_len) {
+		/* Out of Order Data Request - no problem, but unexpected. */
+		FC_DEBUG_FCP("xfer-ready non-contiguous. "
+			     "seq_blen %zx offset %zx\n", seq_blen, offset);
+	}
+
+	/*
+	 * if LLD is capable of seq_offload then set transport
+	 * burst length (t_blen) to seq_blen, otherwise set t_blen
+	 * to max FC frame payload previously set in fsp->max_payload.
+	 */
+	t_blen = lp->seq_offload ? seq_blen : fsp->max_payload;
+	WARN_ON(t_blen < FC_MIN_MAX_PAYLOAD);
+	if (t_blen > 512)
+		t_blen &= ~(512 - 1);	/* round down to block size */
+	WARN_ON(t_blen < FC_MIN_MAX_PAYLOAD);	/* won't go below 256 */
+	sc = fsp->cmd;
+
+	remaining = seq_blen;
+	fh_parm_offset = frame_offset = offset;
+	tlen = 0;
+	seq = lp->tt.seq_start_next(seq);
+	f_ctl = FC_FC_REL_OFF;
+	WARN_ON(!seq);
+
+	/*
+	 * If a get_page()/put_page() will fail, don't use sg lists
+	 * in the fc_frame structure.
+	 *
+	 * The put_page() may be long after the I/O has completed
+	 * in the case of FCoE, since the network driver does it
+	 * via free_skb().  See the test in free_pages_check().
+	 *
+	 * Test this case with 'dd </dev/zero >/dev/st0 bs=64k'.
+	 */
+	if (using_sg) {
+		for (sg = scsi_sglist(sc); sg; sg = sg_next(sg)) {
+			if (page_count(sg_page(sg)) == 0 ||
+			    (sg_page(sg)->flags & (1 << PG_lru |
+						   1 << PG_private |
+						   1 << PG_locked |
+						   1 << PG_active |
+						   1 << PG_slab |
+						   1 << PG_swapcache |
+						   1 << PG_writeback |
+						   1 << PG_reserved |
+						   1 << PG_buddy))) {
+				using_sg = 0;
+				break;
+			}
+		}
+	}
+	sg = scsi_sglist(sc);
+
+	while (remaining > 0 && sg) {
+		if (offset >= sg->length) {
+			offset -= sg->length;
+			sg = sg_next(sg);
+			continue;
+		}
+		if (!fp) {
+			tlen = min(t_blen, remaining);
+
+			/*
+			 * TODO.  Temporary workaround.	 fc_seq_send() can't
+			 * handle odd lengths in non-linear skbs.
+			 * This will be the final fragment only.
+			 */
+			if (tlen % 4)
+				using_sg = 0;
+			if (using_sg) {
+				fp = _fc_frame_alloc(lp, 0);
+				if (!fp)
+					return -ENOMEM;
+			} else {
+				fp = fc_frame_alloc(lp, tlen);
+				if (!fp)
+					return -ENOMEM;
+
+				data = (void *)(fr_hdr(fp)) +
+					sizeof(struct fc_frame_header);
+			}
+			fh_parm_offset = frame_offset;
+			fr_max_payload(fp) = fsp->max_payload;
+		}
+		sg_bytes = min(tlen, sg->length - offset);
+		if (using_sg) {
+			WARN_ON(skb_shinfo(fp_skb(fp))->nr_frags >
+				FC_FRAME_SG_LEN);
+			get_page(sg_page(sg));
+			skb_fill_page_desc(fp_skb(fp),
+					   skb_shinfo(fp_skb(fp))->nr_frags,
+					   sg_page(sg), sg->offset + offset,
+					   sg_bytes);
+			fp_skb(fp)->data_len += sg_bytes;
+			fr_len(fp) += sg_bytes;
+			fp_skb(fp)->truesize += PAGE_SIZE;
+		} else {
+			size_t off = offset + sg->offset;
+
+			/*
+			 * The scatterlist item may be bigger than PAGE_SIZE,
+			 * but we must not cross pages inside the kmap.
+			 */
+			sg_bytes = min(sg_bytes, (size_t) (PAGE_SIZE -
+							   (off & ~PAGE_MASK)));
+			page_addr = kmap_atomic(sg_page(sg) +
+						(off >> PAGE_SHIFT),
+						KM_SOFTIRQ0);
+			memcpy(data, (char *)page_addr + (off & ~PAGE_MASK),
+			       sg_bytes);
+			kunmap_atomic(page_addr, KM_SOFTIRQ0);
+			data += sg_bytes;
+		}
+		offset += sg_bytes;
+		frame_offset += sg_bytes;
+		tlen -= sg_bytes;
+		remaining -= sg_bytes;
+
+		if (tlen)
+			continue;
+
+		/*
+		 * Send sequence with transfer sequence initiative in case
+		 * this is last FCP frame of the sequence.
+		 */
+		if (remaining == 0)
+			f_ctl |= FC_FC_SEQ_INIT | FC_FC_END_SEQ;
+
+		ep = fc_seq_exch(seq);
+		fc_fill_fc_hdr(fp, FC_RCTL_DD_SOL_DATA, ep->did, ep->sid,
+			       FC_TYPE_FCP, f_ctl, fh_parm_offset);
+
+		/*
+		 * send fragment using for a sequence.
+		 */
+		error = lp->tt.seq_send(lp, seq, fp);
+		if (error) {
+			WARN_ON(1);		/* send error should be rare */
+			fc_fcp_retry_cmd(fsp);
+			return 0;
+		}
+		fp = NULL;
+	}
+	fsp->xfer_len += seq_blen;	/* premature count? */
+	return 0;
+}
+
+static void fc_fcp_abts_resp(struct fc_fcp_pkt *fsp, struct fc_frame *fp)
+{
+	int ba_done = 1;
+	struct fc_ba_rjt *brp;
+	struct fc_frame_header *fh;
+
+	fh = fc_frame_header_get(fp);
+	switch (fh->fh_r_ctl) {
+	case FC_RCTL_BA_ACC:
+		break;
+	case FC_RCTL_BA_RJT:
+		brp = fc_frame_payload_get(fp, sizeof(*brp));
+		if (brp && brp->br_reason == FC_BA_RJT_LOG_ERR)
+			break;
+		/* fall thru */
+	default:
+		/*
+		 * we will let the command timeout
+		 * and scsi-ml recover in this case,
+		 * therefore cleared the ba_done flag.
+		 */
+		ba_done = 0;
+	}
+
+	if (ba_done) {
+		fsp->state |= FC_SRB_ABORTED;
+		fsp->state &= ~FC_SRB_ABORT_PENDING;
+
+		if (fsp->wait_for_comp)
+			complete(&fsp->tm_done);
+		else
+			fc_fcp_complete_locked(fsp);
+	}
+}
+
+/*
+ * fc_fcp_reduce_can_queue - drop can_queue
+ * @lp: lport to drop queueing for
+ *
+ * If we are getting memory allocation failures, then we may
+ * be trying to execute too many commands. We let the running
+ * commands complete or timeout, then try again with a reduced
+ * can_queue. Eventually we will hit the point where we run
+ * on all reserved structs.
+ */
+static void fc_fcp_reduce_can_queue(struct fc_lport *lp)
+{
+	struct fc_fcp_internal *si = fc_get_scsi_internal(lp);
+	unsigned long flags;
+	int can_queue;
+
+	spin_lock_irqsave(lp->host->host_lock, flags);
+	if (si->throttled)
+		goto done;
+	si->throttled = 1;
+
+	can_queue = lp->host->can_queue;
+	can_queue >>= 1;
+	if (!can_queue)
+		can_queue = 1;
+	lp->host->can_queue = can_queue;
+	shost_printk(KERN_ERR, lp->host, "Could not allocate frame.\n"
+		     "Reducing can_queue to %d.\n", can_queue);
+done:
+	spin_unlock_irqrestore(lp->host->host_lock, flags);
+}
+
+/*
+ * exch mgr calls this routine to process scsi
+ * exchanges.
+ *
+ * Return   : None
+ * Context  : called from Soft IRQ context
+ *	      can not called holding list lock
+ */
+static void fc_fcp_recv(struct fc_seq *seq, struct fc_frame *fp, void *arg)
+{
+	struct fc_fcp_pkt *fsp = (struct fc_fcp_pkt *)arg;
+	struct fc_lport *lp;
+	struct fc_frame_header *fh;
+	struct fcp_txrdy *dd;
+	u8 r_ctl;
+	int rc = 0;
+
+	if (IS_ERR(fp))
+		goto errout;
+
+	fh = fc_frame_header_get(fp);
+	r_ctl = fh->fh_r_ctl;
+	lp = fsp->lp;
+
+	if (!(lp->state & LPORT_ST_READY))
+		goto out;
+	if (fc_fcp_lock_pkt(fsp))
+		goto out;
+	fsp->last_pkt_time = jiffies;
+
+	if (fh->fh_type == FC_TYPE_BLS) {
+		fc_fcp_abts_resp(fsp, fp);
+		goto unlock;
+	}
+
+	if (fsp->state & (FC_SRB_ABORTED | FC_SRB_ABORT_PENDING))
+		goto unlock;
+
+	if (r_ctl == FC_RCTL_DD_DATA_DESC) {
+		/*
+		 * received XFER RDY from the target
+		 * need to send data to the target
+		 */
+		WARN_ON(fr_flags(fp) & FCPHF_CRC_UNCHECKED);
+		dd = fc_frame_payload_get(fp, sizeof(*dd));
+		WARN_ON(!dd);
+
+		rc = fc_fcp_send_data(fsp, seq,
+				      (size_t) ntohl(dd->ft_data_ro),
+				      (size_t) ntohl(dd->ft_burst_len));
+		if (!rc)
+			seq->rec_data = fsp->xfer_len;
+		else if (rc == -ENOMEM)
+			fsp->state |= FC_SRB_NOMEM;
+	} else if (r_ctl == FC_RCTL_DD_SOL_DATA) {
+		/*
+		 * received a DATA frame
+		 * next we will copy the data to the system buffer
+		 */
+		WARN_ON(fr_len(fp) < sizeof(*fh));	/* len may be 0 */
+		fc_fcp_recv_data(fsp, fp);
+		seq->rec_data = fsp->xfer_contig_end;
+	} else if (r_ctl == FC_RCTL_DD_CMD_STATUS) {
+		WARN_ON(fr_flags(fp) & FCPHF_CRC_UNCHECKED);
+
+		fc_fcp_resp(fsp, fp);
+	} else {
+		FC_DBG("unexpected frame.  r_ctl %x\n", r_ctl);
+	}
+unlock:
+	fc_fcp_unlock_pkt(fsp);
+out:
+	fc_frame_free(fp);
+errout:
+	if (IS_ERR(fp))
+		fc_fcp_error(fsp, fp);
+	else if (rc == -ENOMEM)
+		fc_fcp_reduce_can_queue(lp);
+}
+
+static void fc_fcp_resp(struct fc_fcp_pkt *fsp, struct fc_frame *fp)
+{
+	struct fc_frame_header *fh;
+	struct fcp_resp *fc_rp;
+	struct fcp_resp_ext *rp_ex;
+	struct fcp_resp_rsp_info *fc_rp_info;
+	u32 plen;
+	u32 expected_len;
+	u32 respl = 0;
+	u32 snsl = 0;
+	u8 flags = 0;
+
+	plen = fr_len(fp);
+	fh = (struct fc_frame_header *)fr_hdr(fp);
+	if (unlikely(plen < sizeof(*fh) + sizeof(*fc_rp)))
+		goto len_err;
+	plen -= sizeof(*fh);
+	fc_rp = (struct fcp_resp *)(fh + 1);
+	fsp->cdb_status = fc_rp->fr_status;
+	flags = fc_rp->fr_flags;
+	fsp->scsi_comp_flags = flags;
+	expected_len = fsp->data_len;
+
+	if (unlikely((flags & ~FCP_CONF_REQ) || fc_rp->fr_status)) {
+		rp_ex = (void *)(fc_rp + 1);
+		if (flags & (FCP_RSP_LEN_VAL | FCP_SNS_LEN_VAL)) {
+			if (plen < sizeof(*fc_rp) + sizeof(*rp_ex))
+				goto len_err;
+			fc_rp_info = (struct fcp_resp_rsp_info *)(rp_ex + 1);
+			if (flags & FCP_RSP_LEN_VAL) {
+				respl = ntohl(rp_ex->fr_rsp_len);
+				if (respl != sizeof(*fc_rp_info))
+					goto len_err;
+				if (fsp->wait_for_comp) {
+					/* Abuse cdb_status for rsp code */
+					fsp->cdb_status = fc_rp_info->rsp_code;
+					complete(&fsp->tm_done);
+					/*
+					 * tmfs will not have any scsi cmd so
+					 * exit here
+					 */
+					return;
+				} else
+					goto err;
+			}
+			if (flags & FCP_SNS_LEN_VAL) {
+				snsl = ntohl(rp_ex->fr_sns_len);
+				if (snsl > SCSI_SENSE_BUFFERSIZE)
+					snsl = SCSI_SENSE_BUFFERSIZE;
+				memcpy(fsp->cmd->sense_buffer,
+				       (char *)fc_rp_info + respl, snsl);
+			}
+		}
+		if (flags & (FCP_RESID_UNDER | FCP_RESID_OVER)) {
+			if (plen < sizeof(*fc_rp) + sizeof(rp_ex->fr_resid))
+				goto len_err;
+			if (flags & FCP_RESID_UNDER) {
+				fsp->scsi_resid = ntohl(rp_ex->fr_resid);
+				/*
+				 * The cmnd->underflow is the minimum number of
+				 * bytes that must be transfered for this
+				 * command.  Provided a sense condition is not
+				 * present, make sure the actual amount
+				 * transferred is at least the underflow value
+				 * or fail.
+				 */
+				if (!(flags & FCP_SNS_LEN_VAL) &&
+				    (fc_rp->fr_status == 0) &&
+				    (scsi_bufflen(fsp->cmd) -
+				     fsp->scsi_resid) < fsp->cmd->underflow)
+					goto err;
+				expected_len -= fsp->scsi_resid;
+			} else {
+				fsp->status_code = FC_ERROR;
+			}
+		}
+	}
+	fsp->state |= FC_SRB_RCV_STATUS;
+
+	/*
+	 * Check for missing or extra data frames.
+	 */
+	if (unlikely(fsp->xfer_len != expected_len)) {
+		if (fsp->xfer_len < expected_len) {
+			/*
+			 * Some data may be queued locally,
+			 * Wait a at least one jiffy to see if it is delivered.
+			 * If this expires without data, we may do SRR.
+			 */
+			fc_fcp_timer_set(fsp, 2);
+			return;
+		}
+		fsp->status_code = FC_DATA_OVRRUN;
+		FC_DBG("tgt %6x xfer len %zx greater than expected len %x. "
+		       "data len %x\n",
+		       fsp->rport->port_id,
+		       fsp->xfer_len, expected_len, fsp->data_len);
+	}
+	fc_fcp_complete_locked(fsp);
+	return;
+
+len_err:
+	FC_DBG("short FCP response. flags 0x%x len %u respl %u snsl %u\n",
+	       flags, fr_len(fp), respl, snsl);
+err:
+	fsp->status_code = FC_ERROR;
+	fc_fcp_complete_locked(fsp);
+}
+
+/**
+ * fc_fcp_complete_locked - complete processing of a fcp packet
+ * @fsp:	fcp packet
+ *
+ * This function may sleep if a timer is pending. The packet lock must be
+ * held, and the host lock must not be held.
+ */
+static void fc_fcp_complete_locked(struct fc_fcp_pkt *fsp)
+{
+	struct fc_lport *lp = fsp->lp;
+	struct fc_seq *seq;
+	struct fc_exch *ep;
+	u32 f_ctl;
+
+	if (fsp->state & FC_SRB_ABORT_PENDING)
+		return;
+
+	if (fsp->state & FC_SRB_ABORTED) {
+		if (!fsp->status_code)
+			fsp->status_code = FC_CMD_ABORTED;
+	} else {
+		/*
+		 * Test for transport underrun, independent of response
+		 * underrun status.
+		 */
+		if (fsp->xfer_len < fsp->data_len && !fsp->io_status &&
+		    (!(fsp->scsi_comp_flags & FCP_RESID_UNDER) ||
+		     fsp->xfer_len < fsp->data_len - fsp->scsi_resid)) {
+			fsp->status_code = FC_DATA_UNDRUN;
+			fsp->io_status = SUGGEST_RETRY << 24;
+		}
+	}
+
+	seq = fsp->seq_ptr;
+	if (seq) {
+		fsp->seq_ptr = NULL;
+		if (unlikely(fsp->scsi_comp_flags & FCP_CONF_REQ)) {
+			struct fc_frame *conf_frame;
+			struct fc_seq *csp;
+
+			csp = lp->tt.seq_start_next(seq);
+			conf_frame = fc_frame_alloc(fsp->lp, 0);
+			if (conf_frame) {
+				f_ctl = FC_FC_SEQ_INIT;
+				f_ctl |= FC_FC_LAST_SEQ | FC_FC_END_SEQ;
+				ep = fc_seq_exch(seq);
+				fc_fill_fc_hdr(conf_frame, FC_RCTL_DD_SOL_CTL,
+					       ep->did, ep->sid,
+					       FC_TYPE_FCP, f_ctl, 0);
+				lp->tt.seq_send(lp, csp, conf_frame);
+			}
+		}
+		lp->tt.exch_done(seq);
+	}
+	fc_io_compl(fsp);
+}
+
+static void fc_fcp_cleanup_cmd(struct fc_fcp_pkt *fsp, int error)
+{
+	struct fc_lport *lp = fsp->lp;
+
+	if (fsp->seq_ptr) {
+		lp->tt.exch_done(fsp->seq_ptr);
+		fsp->seq_ptr = NULL;
+	}
+	fsp->status_code = error;
+}
+
+/**
+ * fc_fcp_cleanup_each_cmd - run fn on each active command
+ * @lp:		logical port
+ * @id:		target id
+ * @lun:	lun
+ * @error:	fsp status code
+ *
+ * If lun or id is -1, they are ignored.
+ */
+static void fc_fcp_cleanup_each_cmd(struct fc_lport *lp, unsigned int id,
+				    unsigned int lun, int error)
+{
+	struct fc_fcp_internal *si = fc_get_scsi_internal(lp);
+	struct fc_fcp_pkt *fsp;
+	struct scsi_cmnd *sc_cmd;
+	unsigned long flags;
+
+	spin_lock_irqsave(lp->host->host_lock, flags);
+restart:
+	list_for_each_entry(fsp, &si->scsi_pkt_queue, list) {
+		sc_cmd = fsp->cmd;
+		if (id != -1 && scmd_id(sc_cmd) != id)
+			continue;
+
+		if (lun != -1 && sc_cmd->device->lun != lun)
+			continue;
+
+		fc_fcp_pkt_hold(fsp);
+		spin_unlock_irqrestore(lp->host->host_lock, flags);
+
+		if (!fc_fcp_lock_pkt(fsp)) {
+			fc_fcp_cleanup_cmd(fsp, error);
+			fc_io_compl(fsp);
+			fc_fcp_unlock_pkt(fsp);
+		}
+
+		fc_fcp_pkt_release(fsp);
+		spin_lock_irqsave(lp->host->host_lock, flags);
+		/*
+		 * while we dropped the lock multiple pkts could
+		 * have been released, so we have to start over.
+		 */
+		goto restart;
+	}
+	spin_unlock_irqrestore(lp->host->host_lock, flags);
+}
+
+static void fc_fcp_abort_io(struct fc_lport *lp)
+{
+	fc_fcp_cleanup_each_cmd(lp, -1, -1, FC_HRD_ERROR);
+}
+
+/**
+ * fc_fcp_pkt_send - send a fcp packet to the lower level.
+ * @lp:		fc lport
+ * @fsp:	fc packet.
+ *
+ * This is called by upper layer protocol.
+ * Return   : zero for success and -1 for failure
+ * Context  : called from queuecommand which can be called from process
+ *	      or scsi soft irq.
+ * Locks    : called with the host lock and irqs disabled.
+ */
+static int fc_fcp_pkt_send(struct fc_lport *lp, struct fc_fcp_pkt *fsp)
+{
+	struct fc_fcp_internal *si = fc_get_scsi_internal(lp);
+	int rc;
+
+	fsp->cmd->SCp.ptr = (char *)fsp;
+	fsp->cdb_cmd.fc_dl = htonl(fsp->data_len);
+	fsp->cdb_cmd.fc_flags = fsp->req_flags & ~FCP_CFL_LEN_MASK;
+
+	int_to_scsilun(fsp->cmd->device->lun,
+		       (struct scsi_lun *)fsp->cdb_cmd.fc_lun);
+	memcpy(fsp->cdb_cmd.fc_cdb, fsp->cmd->cmnd, fsp->cmd->cmd_len);
+	list_add_tail(&fsp->list, &si->scsi_pkt_queue);
+
+	spin_unlock_irq(lp->host->host_lock);
+	rc = lp->tt.fcp_cmd_send(lp, fsp, fc_fcp_recv);
+	spin_lock_irq(lp->host->host_lock);
+	if (rc)
+		list_del(&fsp->list);
+
+	return rc;
+}
+
+static int fc_fcp_cmd_send(struct fc_lport *lp, struct fc_fcp_pkt *fsp,
+			   void (*resp)(struct fc_seq *,
+					struct fc_frame *fp,
+					void *arg))
+{
+	struct fc_frame *fp;
+	struct fc_seq *seq;
+	struct fc_rport *rport;
+	struct fc_rport_libfc_priv *rp;
+	const size_t len = sizeof(fsp->cdb_cmd);
+	int rc = 0;
+
+	if (fc_fcp_lock_pkt(fsp))
+		return 0;
+
+	fp = fc_frame_alloc(lp, sizeof(fsp->cdb_cmd));
+	if (!fp) {
+		rc = -1;
+		goto unlock;
+	}
+
+	memcpy(fc_frame_payload_get(fp, len), &fsp->cdb_cmd, len);
+	fr_cmd(fp) = fsp->cmd;
+	rport = fsp->rport;
+	fsp->max_payload = rport->maxframe_size;
+	rp = rport->dd_data;
+
+	fc_fill_fc_hdr(fp, FC_RCTL_DD_UNSOL_CMD, rport->port_id,
+		       fc_host_port_id(rp->local_port->host), FC_TYPE_FCP,
+		       FC_FC_FIRST_SEQ | FC_FC_END_SEQ | FC_FC_SEQ_INIT, 0);
+
+	seq = lp->tt.exch_seq_send(lp, fp, resp, fc_fcp_pkt_destroy, fsp, 0);
+	if (!seq) {
+		fc_frame_free(fp);
+		rc = -1;
+		goto unlock;
+	}
+	fsp->last_pkt_time = jiffies;
+	fsp->seq_ptr = seq;
+	fc_fcp_pkt_hold(fsp);	/* hold for fc_fcp_pkt_destroy */
+
+	setup_timer(&fsp->timer, fc_fcp_timeout, (unsigned long)fsp);
+	fc_fcp_timer_set(fsp,
+			 (fsp->tgt_flags & FC_RP_FLAGS_REC_SUPPORTED) ?
+			 FC_SCSI_REC_TOV : FC_SCSI_ER_TIMEOUT);
+unlock:
+	fc_fcp_unlock_pkt(fsp);
+	return rc;
+}
+
+/*
+ * transport error handler
+ */
+static void fc_fcp_error(struct fc_fcp_pkt *fsp, struct fc_frame *fp)
+{
+	int error = PTR_ERR(fp);
+
+	if (fc_fcp_lock_pkt(fsp))
+		return;
+
+	switch (error) {
+	case -FC_EX_CLOSED:
+		fc_fcp_retry_cmd(fsp);
+		goto unlock;
+	default:
+		FC_DBG("unknown error %ld\n", PTR_ERR(fp));
+	}
+	/*
+	 * clear abort pending, because the lower layer
+	 * decided to force completion.
+	 */
+	fsp->state &= ~FC_SRB_ABORT_PENDING;
+	fsp->status_code = FC_CMD_PLOGO;
+	fc_fcp_complete_locked(fsp);
+unlock:
+	fc_fcp_unlock_pkt(fsp);
+}
+
+/*
+ * Scsi abort handler- calls to send an abort
+ * and then wait for abort completion
+ */
+static int fc_fcp_pkt_abort(struct fc_lport *lp, struct fc_fcp_pkt *fsp)
+{
+	int rc = FAILED;
+
+	if (fc_fcp_send_abort(fsp))
+		return FAILED;
+
+	init_completion(&fsp->tm_done);
+	fsp->wait_for_comp = 1;
+
+	spin_unlock_bh(&fsp->scsi_pkt_lock);
+	rc = wait_for_completion_timeout(&fsp->tm_done, FC_SCSI_TM_TOV);
+	spin_lock_bh(&fsp->scsi_pkt_lock);
+	fsp->wait_for_comp = 0;
+
+	if (!rc) {
+		FC_DBG("target abort cmd  failed\n");
+		rc = FAILED;
+	} else if (fsp->state & FC_SRB_ABORTED) {
+		FC_DBG("target abort cmd  passed\n");
+		rc = SUCCESS;
+		fc_fcp_complete_locked(fsp);
+	}
+
+	return rc;
+}
+
+/*
+ * Retry LUN reset after resource allocation failed.
+ */
+static void fc_lun_reset_send(unsigned long data)
+{
+	struct fc_fcp_pkt *fsp = (struct fc_fcp_pkt *)data;
+	struct fc_lport *lp = fsp->lp;
+	if (lp->tt.fcp_cmd_send(lp, fsp, fc_tm_done)) {
+		if (fsp->recov_retry++ >= FC_MAX_RECOV_RETRY)
+			return;
+		if (fc_fcp_lock_pkt(fsp))
+			return;
+		setup_timer(&fsp->timer, fc_lun_reset_send, (unsigned long)fsp);
+		fc_fcp_timer_set(fsp, FC_SCSI_REC_TOV);
+		fc_fcp_unlock_pkt(fsp);
+	}
+}
+
+/*
+ * Scsi device reset handler- send a LUN RESET to the device
+ * and wait for reset reply
+ */
+static int fc_lun_reset(struct fc_lport *lp, struct fc_fcp_pkt *fsp,
+			unsigned int id, unsigned int lun)
+{
+	int rc;
+
+	fsp->cdb_cmd.fc_dl = htonl(fsp->data_len);
+	fsp->cdb_cmd.fc_tm_flags = FCP_TMF_LUN_RESET;
+	int_to_scsilun(lun, (struct scsi_lun *)fsp->cdb_cmd.fc_lun);
+
+	fsp->wait_for_comp = 1;
+	init_completion(&fsp->tm_done);
+
+	fc_lun_reset_send((unsigned long)fsp);
+
+	/*
+	 * wait for completion of reset
+	 * after that make sure all commands are terminated
+	 */
+	rc = wait_for_completion_timeout(&fsp->tm_done, FC_SCSI_TM_TOV);
+
+	spin_lock_bh(&fsp->scsi_pkt_lock);
+	fsp->state |= FC_SRB_COMPL;
+	spin_unlock_bh(&fsp->scsi_pkt_lock);
+
+	del_timer_sync(&fsp->timer);
+
+	spin_lock_bh(&fsp->scsi_pkt_lock);
+	if (fsp->seq_ptr) {
+		lp->tt.exch_done(fsp->seq_ptr);
+		fsp->seq_ptr = NULL;
+	}
+	fsp->wait_for_comp = 0;
+	spin_unlock_bh(&fsp->scsi_pkt_lock);
+
+	if (!rc) {
+		FC_DBG("lun reset failed\n");
+		return FAILED;
+	}
+
+	/* cdb_status holds the tmf's rsp code */
+	if (fsp->cdb_status != FCP_TMF_CMPL)
+		return FAILED;
+
+	FC_DBG("lun reset to lun %u completed\n", lun);
+	fc_fcp_cleanup_each_cmd(lp, id, lun, FC_CMD_ABORTED);
+	return SUCCESS;
+}
+
+/*
+ * Task Managment response handler
+ */
+static void fc_tm_done(struct fc_seq *seq, struct fc_frame *fp, void *arg)
+{
+	struct fc_fcp_pkt *fsp = arg;
+	struct fc_frame_header *fh;
+
+	if (IS_ERR(fp)) {
+		/*
+		 * If there is an error just let it timeout or wait
+		 * for TMF to be aborted if it timedout.
+		 *
+		 * scsi-eh will escalate for when either happens.
+		 */
+		return;
+	}
+
+	if (fc_fcp_lock_pkt(fsp))
+		return;
+
+	/*
+	 * raced with eh timeout handler.
+	 */
+	if (!fsp->seq_ptr || !fsp->wait_for_comp) {
+		spin_unlock_bh(&fsp->scsi_pkt_lock);
+		return;
+	}
+
+	fh = fc_frame_header_get(fp);
+	if (fh->fh_type != FC_TYPE_BLS)
+		fc_fcp_resp(fsp, fp);
+	fsp->seq_ptr = NULL;
+	fsp->lp->tt.exch_done(seq);
+	fc_frame_free(fp);
+	fc_fcp_unlock_pkt(fsp);
+}
+
+static void fc_fcp_cleanup(struct fc_lport *lp)
+{
+	fc_fcp_cleanup_each_cmd(lp, -1, -1, FC_ERROR);
+}
+
+/*
+ * fc_fcp_timeout: called by OS timer function.
+ *
+ * The timer has been inactivated and must be reactivated if desired
+ * using fc_fcp_timer_set().
+ *
+ * Algorithm:
+ *
+ * If REC is supported, just issue it, and return.  The REC exchange will
+ * complete or time out, and recovery can continue at that point.
+ *
+ * Otherwise, if the response has been received without all the data,
+ * it has been ER_TIMEOUT since the response was received.
+ *
+ * If the response has not been received,
+ * we see if data was received recently.  If it has been, we continue waiting,
+ * otherwise, we abort the command.
+ */
+static void fc_fcp_timeout(unsigned long data)
+{
+	struct fc_fcp_pkt *fsp = (struct fc_fcp_pkt *)data;
+	struct fc_rport *rport = fsp->rport;
+	struct fc_rport_libfc_priv *rp = rport->dd_data;
+
+	if (fc_fcp_lock_pkt(fsp))
+		return;
+
+	if (fsp->cdb_cmd.fc_tm_flags)
+		goto unlock;
+
+	fsp->state |= FC_SRB_FCP_PROCESSING_TMO;
+
+	if (rp->flags & FC_RP_FLAGS_REC_SUPPORTED)
+		fc_fcp_rec(fsp);
+	else if (time_after_eq(fsp->last_pkt_time + (FC_SCSI_ER_TIMEOUT / 2),
+			       jiffies))
+		fc_fcp_timer_set(fsp, FC_SCSI_ER_TIMEOUT);
+	else if (fsp->state & FC_SRB_RCV_STATUS)
+		fc_fcp_complete_locked(fsp);
+	else
+		fc_timeout_error(fsp);
+	fsp->state &= ~FC_SRB_FCP_PROCESSING_TMO;
+unlock:
+	fc_fcp_unlock_pkt(fsp);
+}
+
+/*
+ * Send a REC ELS request
+ */
+static void fc_fcp_rec(struct fc_fcp_pkt *fsp)
+{
+	struct fc_lport *lp;
+	struct fc_frame *fp;
+	struct fc_rport *rport;
+	struct fc_rport_libfc_priv *rp;
+
+	lp = fsp->lp;
+	rport = fsp->rport;
+	rp = rport->dd_data;
+	if (!fsp->seq_ptr || rp->rp_state != RPORT_ST_READY) {
+		fsp->status_code = FC_HRD_ERROR;
+		fsp->io_status = SUGGEST_RETRY << 24;
+		fc_fcp_complete_locked(fsp);
+		return;
+	}
+	fp = fc_frame_alloc(lp, sizeof(struct fc_els_rec));
+	if (!fp)
+		goto retry;
+
+	fr_seq(fp) = fsp->seq_ptr;
+	fc_fill_fc_hdr(fp, FC_RCTL_ELS_REQ, rport->port_id,
+		       fc_host_port_id(rp->local_port->host), FC_TYPE_ELS,
+		       FC_FC_FIRST_SEQ | FC_FC_END_SEQ | FC_FC_SEQ_INIT, 0);
+	if (lp->tt.elsct_send(lp, rport, fp, ELS_REC, fc_fcp_rec_resp,
+			      fsp, jiffies_to_msecs(FC_SCSI_REC_TOV))) {
+		fc_fcp_pkt_hold(fsp);		/* hold while REC outstanding */
+		return;
+	}
+	fc_frame_free(fp);
+retry:
+	if (fsp->recov_retry++ < FC_MAX_RECOV_RETRY)
+		fc_fcp_timer_set(fsp, FC_SCSI_REC_TOV);
+	else
+		fc_timeout_error(fsp);
+}
+
+/*
+ * Receive handler for REC ELS frame
+ * if it is a reject then let the scsi layer to handle
+ * the timeout. if it is a LS_ACC then if the io was not completed
+ * then set the timeout and return otherwise complete the exchange
+ * and tell the scsi layer to restart the I/O.
+ */
+static void fc_fcp_rec_resp(struct fc_seq *seq, struct fc_frame *fp, void *arg)
+{
+	struct fc_fcp_pkt *fsp = (struct fc_fcp_pkt *)arg;
+	struct fc_els_rec_acc *recp;
+	struct fc_els_ls_rjt *rjt;
+	u32 e_stat;
+	u8 opcode;
+	u32 offset;
+	enum dma_data_direction data_dir;
+	enum fc_rctl r_ctl;
+	struct fc_rport_libfc_priv *rp;
+
+	if (IS_ERR(fp)) {
+		fc_fcp_rec_error(fsp, fp);
+		return;
+	}
+
+	if (fc_fcp_lock_pkt(fsp))
+		goto out;
+
+	fsp->recov_retry = 0;
+	opcode = fc_frame_payload_op(fp);
+	if (opcode == ELS_LS_RJT) {
+		rjt = fc_frame_payload_get(fp, sizeof(*rjt));
+		switch (rjt->er_reason) {
+		default:
+			FC_DEBUG_FCP("device %x unexpected REC reject "
+				     "reason %d expl %d\n",
+				     fsp->rport->port_id, rjt->er_reason,
+				     rjt->er_explan);
+			/* fall through */
+		case ELS_RJT_UNSUP:
+			FC_DEBUG_FCP("device does not support REC\n");
+			rp = fsp->rport->dd_data;
+			/*
+			 * if we do not spport RECs or got some bogus
+			 * reason then resetup timer so we check for
+			 * making progress.
+			 */
+			rp->flags &= ~FC_RP_FLAGS_REC_SUPPORTED;
+			fc_fcp_timer_set(fsp, FC_SCSI_ER_TIMEOUT);
+			break;
+		case ELS_RJT_LOGIC:
+		case ELS_RJT_UNAB:
+			/*
+			 * If no data transfer, the command frame got dropped
+			 * so we just retry.  If data was transferred, we
+			 * lost the response but the target has no record,
+			 * so we abort and retry.
+			 */
+			if (rjt->er_explan == ELS_EXPL_OXID_RXID &&
+			    fsp->xfer_len == 0) {
+				fc_fcp_retry_cmd(fsp);
+				break;
+			}
+			fc_timeout_error(fsp);
+			break;
+		}
+	} else if (opcode == ELS_LS_ACC) {
+		if (fsp->state & FC_SRB_ABORTED)
+			goto unlock_out;
+
+		data_dir = fsp->cmd->sc_data_direction;
+		recp = fc_frame_payload_get(fp, sizeof(*recp));
+		offset = ntohl(recp->reca_fc4value);
+		e_stat = ntohl(recp->reca_e_stat);
+
+		if (e_stat & ESB_ST_COMPLETE) {
+
+			/*
+			 * The exchange is complete.
+			 *
+			 * For output, we must've lost the response.
+			 * For input, all data must've been sent.
+			 * We lost may have lost the response
+			 * (and a confirmation was requested) and maybe
+			 * some data.
+			 *
+			 * If all data received, send SRR
+			 * asking for response.	 If partial data received,
+			 * or gaps, SRR requests data at start of gap.
+			 * Recovery via SRR relies on in-order-delivery.
+			 */
+			if (data_dir == DMA_TO_DEVICE) {
+				r_ctl = FC_RCTL_DD_CMD_STATUS;
+			} else if (fsp->xfer_contig_end == offset) {
+				r_ctl = FC_RCTL_DD_CMD_STATUS;
+			} else {
+				offset = fsp->xfer_contig_end;
+				r_ctl = FC_RCTL_DD_SOL_DATA;
+			}
+			fc_fcp_srr(fsp, r_ctl, offset);
+		} else if (e_stat & ESB_ST_SEQ_INIT) {
+
+			/*
+			 * The remote port has the initiative, so just
+			 * keep waiting for it to complete.
+			 */
+			fc_fcp_timer_set(fsp, FC_SCSI_REC_TOV);
+		} else {
+
+			/*
+			 * The exchange is incomplete, we have seq. initiative.
+			 * Lost response with requested confirmation,
+			 * lost confirmation, lost transfer ready or
+			 * lost write data.
+			 *
+			 * For output, if not all data was received, ask
+			 * for transfer ready to be repeated.
+			 *
+			 * If we received or sent all the data, send SRR to
+			 * request response.
+			 *
+			 * If we lost a response, we may have lost some read
+			 * data as well.
+			 */
+			r_ctl = FC_RCTL_DD_SOL_DATA;
+			if (data_dir == DMA_TO_DEVICE) {
+				r_ctl = FC_RCTL_DD_CMD_STATUS;
+				if (offset < fsp->data_len)
+					r_ctl = FC_RCTL_DD_DATA_DESC;
+			} else if (offset == fsp->xfer_contig_end) {
+				r_ctl = FC_RCTL_DD_CMD_STATUS;
+			} else if (fsp->xfer_contig_end < offset) {
+				offset = fsp->xfer_contig_end;
+			}
+			fc_fcp_srr(fsp, r_ctl, offset);
+		}
+	}
+unlock_out:
+	fc_fcp_unlock_pkt(fsp);
+out:
+	fc_fcp_pkt_release(fsp);	/* drop hold for outstanding REC */
+	fc_frame_free(fp);
+}
+
+/*
+ * Handle error response or timeout for REC exchange.
+ */
+static void fc_fcp_rec_error(struct fc_fcp_pkt *fsp, struct fc_frame *fp)
+{
+	int error = PTR_ERR(fp);
+
+	if (fc_fcp_lock_pkt(fsp))
+		goto out;
+
+	switch (error) {
+	case -FC_EX_CLOSED:
+		fc_fcp_retry_cmd(fsp);
+		break;
+
+	default:
+		FC_DBG("REC %p fid %x error unexpected error %d\n",
+		       fsp, fsp->rport->port_id, error);
+		fsp->status_code = FC_CMD_PLOGO;
+		/* fall through */
+
+	case -FC_EX_TIMEOUT:
+		/*
+		 * Assume REC or LS_ACC was lost.
+		 * The exchange manager will have aborted REC, so retry.
+		 */
+		FC_DBG("REC fid %x error error %d retry %d/%d\n",
+		       fsp->rport->port_id, error, fsp->recov_retry,
+		       FC_MAX_RECOV_RETRY);
+		if (fsp->recov_retry++ < FC_MAX_RECOV_RETRY)
+			fc_fcp_rec(fsp);
+		else
+			fc_timeout_error(fsp);
+		break;
+	}
+	fc_fcp_unlock_pkt(fsp);
+out:
+	fc_fcp_pkt_release(fsp);	/* drop hold for outstanding REC */
+}
+
+/*
+ * Time out error routine:
+ * abort's the I/O close the exchange and
+ * send completion notification to scsi layer
+ */
+static void fc_timeout_error(struct fc_fcp_pkt *fsp)
+{
+	fsp->status_code = FC_CMD_TIME_OUT;
+	fsp->cdb_status = 0;
+	fsp->io_status = 0;
+	/*
+	 * if this fails then we let the scsi command timer fire and
+	 * scsi-ml escalate.
+	 */
+	fc_fcp_send_abort(fsp);
+}
+
+/*
+ * Sequence retransmission request.
+ * This is called after receiving status but insufficient data, or
+ * when expecting status but the request has timed out.
+ */
+static void fc_fcp_srr(struct fc_fcp_pkt *fsp, enum fc_rctl r_ctl, u32 offset)
+{
+	struct fc_lport *lp = fsp->lp;
+	struct fc_rport *rport;
+	struct fc_rport_libfc_priv *rp;
+	struct fc_exch *ep = fc_seq_exch(fsp->seq_ptr);
+	struct fc_seq *seq;
+	struct fcp_srr *srr;
+	struct fc_frame *fp;
+	u8 cdb_op;
+
+	rport = fsp->rport;
+	rp = rport->dd_data;
+	cdb_op = fsp->cdb_cmd.fc_cdb[0];
+
+	if (!(rp->flags & FC_RP_FLAGS_RETRY) || rp->rp_state != RPORT_ST_READY)
+		goto retry;			/* shouldn't happen */
+	fp = fc_frame_alloc(lp, sizeof(*srr));
+	if (!fp)
+		goto retry;
+
+	srr = fc_frame_payload_get(fp, sizeof(*srr));
+	memset(srr, 0, sizeof(*srr));
+	srr->srr_op = ELS_SRR;
+	srr->srr_ox_id = htons(ep->oxid);
+	srr->srr_rx_id = htons(ep->rxid);
+	srr->srr_r_ctl = r_ctl;
+	srr->srr_rel_off = htonl(offset);
+
+	fc_fill_fc_hdr(fp, FC_RCTL_ELS4_REQ, rport->port_id,
+		       fc_host_port_id(rp->local_port->host), FC_TYPE_FCP,
+		       FC_FC_FIRST_SEQ | FC_FC_END_SEQ | FC_FC_SEQ_INIT, 0);
+
+	seq = lp->tt.exch_seq_send(lp, fp, fc_fcp_srr_resp, NULL,
+				   fsp, jiffies_to_msecs(FC_SCSI_REC_TOV));
+	if (!seq) {
+		fc_frame_free(fp);
+		goto retry;
+	}
+	fsp->recov_seq = seq;
+	fsp->xfer_len = offset;
+	fsp->xfer_contig_end = offset;
+	fsp->state &= ~FC_SRB_RCV_STATUS;
+	fc_fcp_pkt_hold(fsp);		/* hold for outstanding SRR */
+	return;
+retry:
+	fc_fcp_retry_cmd(fsp);
+}
+
+/*
+ * Handle response from SRR.
+ */
+static void fc_fcp_srr_resp(struct fc_seq *seq, struct fc_frame *fp, void *arg)
+{
+	struct fc_fcp_pkt *fsp = arg;
+	struct fc_frame_header *fh;
+
+	if (IS_ERR(fp)) {
+		fc_fcp_srr_error(fsp, fp);
+		return;
+	}
+
+	if (fc_fcp_lock_pkt(fsp))
+		goto out;
+
+	fh = fc_frame_header_get(fp);
+	/*
+	 * BUG? fc_fcp_srr_error calls exch_done which would release
+	 * the ep. But if fc_fcp_srr_error had got -FC_EX_TIMEOUT,
+	 * then fc_exch_timeout would be sending an abort. The exch_done
+	 * call by fc_fcp_srr_error would prevent fc_exch.c from seeing
+	 * an abort response though.
+	 */
+	if (fh->fh_type == FC_TYPE_BLS) {
+		fc_fcp_unlock_pkt(fsp);
+		return;
+	}
+
+	fsp->recov_seq = NULL;
+	switch (fc_frame_payload_op(fp)) {
+	case ELS_LS_ACC:
+		fsp->recov_retry = 0;
+		fc_fcp_timer_set(fsp, FC_SCSI_REC_TOV);
+		break;
+	case ELS_LS_RJT:
+	default:
+		fc_timeout_error(fsp);
+		break;
+	}
+	fc_fcp_unlock_pkt(fsp);
+	fsp->lp->tt.exch_done(seq);
+out:
+	fc_frame_free(fp);
+	fc_fcp_pkt_release(fsp);	/* drop hold for outstanding SRR */
+}
+
+static void fc_fcp_srr_error(struct fc_fcp_pkt *fsp, struct fc_frame *fp)
+{
+	if (fc_fcp_lock_pkt(fsp))
+		goto out;
+	fsp->lp->tt.exch_done(fsp->recov_seq);
+	fsp->recov_seq = NULL;
+	switch (PTR_ERR(fp)) {
+	case -FC_EX_TIMEOUT:
+		if (fsp->recov_retry++ < FC_MAX_RECOV_RETRY)
+			fc_fcp_rec(fsp);
+		else
+			fc_timeout_error(fsp);
+		break;
+	case -FC_EX_CLOSED:			/* e.g., link failure */
+		/* fall through */
+	default:
+		fc_fcp_retry_cmd(fsp);
+		break;
+	}
+	fc_fcp_unlock_pkt(fsp);
+out:
+	fc_fcp_pkt_release(fsp);	/* drop hold for outstanding SRR */
+}
+
+static inline int fc_fcp_lport_queue_ready(struct fc_lport *lp)
+{
+	/* lock ? */
+	return (lp->state == LPORT_ST_READY) && (lp->link_status & FC_LINK_UP);
+}
+
+/**
+ * fc_queuecommand - The queuecommand function of the scsi template
+ * @cmd:	struct scsi_cmnd to be executed
+ * @done:	Callback function to be called when cmd is completed
+ *
+ * this is the i/o strategy routine, called by the scsi layer
+ * this routine is called with holding the host_lock.
+ */
+int fc_queuecommand(struct scsi_cmnd *sc_cmd, void (*done)(struct scsi_cmnd *))
+{
+	struct fc_lport *lp;
+	struct fc_rport *rport = starget_to_rport(scsi_target(sc_cmd->device));
+	struct fc_fcp_pkt *fsp;
+	struct fc_rport_libfc_priv *rp;
+	int rval;
+	int rc = 0;
+	struct fcoe_dev_stats *stats;
+
+	lp = shost_priv(sc_cmd->device->host);
+
+	rval = fc_remote_port_chkready(rport);
+	if (rval) {
+		sc_cmd->result = rval;
+		done(sc_cmd);
+		goto out;
+	}
+
+	if (!*(struct fc_remote_port **)rport->dd_data) {
+		/*
+		 * rport is transitioning from blocked/deleted to
+		 * online
+		 */
+		sc_cmd->result = DID_IMM_RETRY << 16;
+		done(sc_cmd);
+		goto out;
+	}
+
+	rp = rport->dd_data;
+
+	if (!fc_fcp_lport_queue_ready(lp)) {
+		rc = SCSI_MLQUEUE_HOST_BUSY;
+		goto out;
+	}
+
+	fsp = fc_fcp_pkt_alloc(lp, GFP_ATOMIC);
+	if (fsp == NULL) {
+		rc = SCSI_MLQUEUE_HOST_BUSY;
+		goto out;
+	}
+
+	/*
+	 * build the libfc request pkt
+	 */
+	fsp->cmd = sc_cmd;	/* save the cmd */
+	fsp->lp = lp;		/* save the softc ptr */
+	fsp->rport = rport;	/* set the remote port ptr */
+	sc_cmd->scsi_done = done;
+
+	/*
+	 * set up the transfer length
+	 */
+	fsp->data_len = scsi_bufflen(sc_cmd);
+	fsp->xfer_len = 0;
+
+	/*
+	 * setup the data direction
+	 */
+	stats = lp->dev_stats[smp_processor_id()];
+	if (sc_cmd->sc_data_direction == DMA_FROM_DEVICE) {
+		fsp->req_flags = FC_SRB_READ;
+		stats->InputRequests++;
+		stats->InputMegabytes = fsp->data_len;
+	} else if (sc_cmd->sc_data_direction == DMA_TO_DEVICE) {
+		fsp->req_flags = FC_SRB_WRITE;
+		stats->OutputRequests++;
+		stats->OutputMegabytes = fsp->data_len;
+	} else {
+		fsp->req_flags = 0;
+		stats->ControlRequests++;
+	}
+
+	fsp->tgt_flags = rp->flags;
+
+	init_timer(&fsp->timer);
+	fsp->timer.data = (unsigned long)fsp;
+
+	/*
+	 * send it to the lower layer
+	 * if we get -1 return then put the request in the pending
+	 * queue.
+	 */
+	rval = fc_fcp_pkt_send(lp, fsp);
+	if (rval != 0) {
+		fsp->state = FC_SRB_FREE;
+		fc_fcp_pkt_release(fsp);
+		rc = SCSI_MLQUEUE_HOST_BUSY;
+	}
+out:
+	return rc;
+}
+EXPORT_SYMBOL(fc_queuecommand);
+
+/**
+ * fc_io_compl -  Handle responses for completed commands
+ * @fsp:	scsi packet
+ *
+ * Translates a error to a Linux SCSI error.
+ *
+ * The fcp packet lock must be held when calling.
+ */
+static void fc_io_compl(struct fc_fcp_pkt *fsp)
+{
+	struct fc_fcp_internal *si;
+	struct scsi_cmnd *sc_cmd;
+	struct fc_lport *lp;
+	unsigned long flags;
+
+	fsp->state |= FC_SRB_COMPL;
+	if (!(fsp->state & FC_SRB_FCP_PROCESSING_TMO)) {
+		spin_unlock_bh(&fsp->scsi_pkt_lock);
+		del_timer_sync(&fsp->timer);
+		spin_lock_bh(&fsp->scsi_pkt_lock);
+	}
+
+	lp = fsp->lp;
+	si = fc_get_scsi_internal(lp);
+	spin_lock_irqsave(lp->host->host_lock, flags);
+	if (!fsp->cmd) {
+		spin_unlock_irqrestore(lp->host->host_lock, flags);
+		return;
+	}
+
+	/*
+	 * if a command timed out while we had to try and throttle IO
+	 * and it is now getting cleaned up, then we are about to
+	 * try again so clear the throttled flag incase we get more
+	 * time outs.
+	 */
+	if (si->throttled && fsp->state & FC_SRB_NOMEM)
+		si->throttled = 0;
+
+	sc_cmd = fsp->cmd;
+	fsp->cmd = NULL;
+
+	if (!sc_cmd->SCp.ptr) {
+		spin_unlock_irqrestore(lp->host->host_lock, flags);
+		return;
+	}
+
+	CMD_SCSI_STATUS(sc_cmd) = fsp->cdb_status;
+	switch (fsp->status_code) {
+	case FC_COMPLETE:
+		if (fsp->cdb_status == 0) {
+			/*
+			 * good I/O status
+			 */
+			sc_cmd->result = DID_OK << 16;
+			if (fsp->scsi_resid)
+				CMD_RESID_LEN(sc_cmd) = fsp->scsi_resid;
+		} else if (fsp->cdb_status == QUEUE_FULL) {
+			struct scsi_device *tmp_sdev;
+			struct scsi_device *sdev = sc_cmd->device;
+
+			shost_for_each_device(tmp_sdev, sdev->host) {
+				if (tmp_sdev->id != sdev->id)
+					continue;
+
+				if (tmp_sdev->queue_depth > 1) {
+					scsi_track_queue_full(tmp_sdev,
+							      tmp_sdev->
+							      queue_depth - 1);
+				}
+			}
+			sc_cmd->result = (DID_OK << 16) | fsp->cdb_status;
+		} else {
+			/*
+			 * transport level I/O was ok but scsi
+			 * has non zero status
+			 */
+			sc_cmd->result = (DID_OK << 16) | fsp->cdb_status;
+		}
+		break;
+	case FC_ERROR:
+		sc_cmd->result = DID_ERROR << 16;
+		break;
+	case FC_DATA_UNDRUN:
+		if (fsp->cdb_status == 0) {
+			/*
+			 * scsi status is good but transport level
+			 * underrun. for read it should be an error??
+			 */
+			sc_cmd->result = (DID_OK << 16) | fsp->cdb_status;
+		} else {
+			/*
+			 * scsi got underrun, this is an error
+			 */
+			CMD_RESID_LEN(sc_cmd) = fsp->scsi_resid;
+			sc_cmd->result = (DID_ERROR << 16) | fsp->cdb_status;
+		}
+		break;
+	case FC_DATA_OVRRUN:
+		/*
+		 * overrun is an error
+		 */
+		sc_cmd->result = (DID_ERROR << 16) | fsp->cdb_status;
+		break;
+	case FC_CMD_ABORTED:
+		sc_cmd->result = (DID_ABORT << 16) | fsp->io_status;
+		break;
+	case FC_CMD_TIME_OUT:
+		sc_cmd->result = (DID_BUS_BUSY << 16) | fsp->io_status;
+		break;
+	case FC_CMD_RESET:
+		sc_cmd->result = (DID_RESET << 16);
+		break;
+	case FC_HRD_ERROR:
+		sc_cmd->result = (DID_NO_CONNECT << 16);
+		break;
+	default:
+		sc_cmd->result = (DID_ERROR << 16);
+		break;
+	}
+
+	list_del(&fsp->list);
+	sc_cmd->SCp.ptr = NULL;
+	sc_cmd->scsi_done(sc_cmd);
+	spin_unlock_irqrestore(lp->host->host_lock, flags);
+
+	/* release ref from initial allocation in queue command */
+	fc_fcp_pkt_release(fsp);
+}
+
+/**
+ * fc_fcp_complete - complete processing of a fcp packet
+ * @fsp:	fcp packet
+ *
+ * This function may sleep if a fsp timer is pending.
+ * The host lock must not be held by caller.
+ */
+void fc_fcp_complete(struct fc_fcp_pkt *fsp)
+{
+	if (fc_fcp_lock_pkt(fsp))
+		return;
+
+	fc_fcp_complete_locked(fsp);
+	fc_fcp_unlock_pkt(fsp);
+}
+EXPORT_SYMBOL(fc_fcp_complete);
+
+/**
+ * fc_eh_abort - Abort a command...from scsi host template
+ * @sc_cmd:	scsi command to abort
+ *
+ * send ABTS to the target device  and wait for the response
+ * sc_cmd is the pointer to the command to be aborted.
+ */
+int fc_eh_abort(struct scsi_cmnd *sc_cmd)
+{
+	struct fc_fcp_pkt *fsp;
+	struct fc_lport *lp;
+	int rc = FAILED;
+	unsigned long flags;
+
+	lp = shost_priv(sc_cmd->device->host);
+	if (lp->state != LPORT_ST_READY)
+		return rc;
+	else if (!(lp->link_status & FC_LINK_UP))
+		return rc;
+
+	spin_lock_irqsave(lp->host->host_lock, flags);
+	fsp = CMD_SP(sc_cmd);
+	if (!fsp) {
+		/* command completed while scsi eh was setting up */
+		spin_unlock_irqrestore(lp->host->host_lock, flags);
+		return SUCCESS;
+	}
+	/* grab a ref so the fsp and sc_cmd cannot be relased from under us */
+	fc_fcp_pkt_hold(fsp);
+	spin_unlock_irqrestore(lp->host->host_lock, flags);
+
+	if (fc_fcp_lock_pkt(fsp)) {
+		/* completed while we were waiting for timer to be deleted */
+		rc = SUCCESS;
+		goto release_pkt;
+	}
+
+	rc = fc_fcp_pkt_abort(lp, fsp);
+	fc_fcp_unlock_pkt(fsp);
+
+release_pkt:
+	fc_fcp_pkt_release(fsp);
+	return rc;
+}
+EXPORT_SYMBOL(fc_eh_abort);
+
+/**
+ * fc_eh_device_reset: Reset a single LUN
+ * @sc_cmd:	scsi command
+ *
+ * Set from scsi host template to send tm cmd to the target and wait for the
+ * response.
+ */
+int fc_eh_device_reset(struct scsi_cmnd *sc_cmd)
+{
+	struct fc_lport *lp;
+	struct fc_fcp_pkt *fsp;
+	struct fc_rport *rport = starget_to_rport(scsi_target(sc_cmd->device));
+	int rc = FAILED;
+	struct fc_rport_libfc_priv *rp;
+	int rval;
+
+	rval = fc_remote_port_chkready(rport);
+	if (rval)
+		goto out;
+
+	rp = rport->dd_data;
+	lp = shost_priv(sc_cmd->device->host);
+
+	if (lp->state != LPORT_ST_READY)
+		return rc;
+
+	fsp = fc_fcp_pkt_alloc(lp, GFP_NOIO);
+	if (fsp == NULL) {
+		FC_DBG("could not allocate scsi_pkt\n");
+		sc_cmd->result = DID_NO_CONNECT << 16;
+		goto out;
+	}
+
+	/*
+	 * Build the libfc request pkt. Do not set the scsi cmnd, because
+	 * the sc passed in is not setup for execution like when sent
+	 * through the queuecommand callout.
+	 */
+	fsp->lp = lp;		/* save the softc ptr */
+	fsp->rport = rport;	/* set the remote port ptr */
+
+	/*
+	 * flush outstanding commands
+	 */
+	rc = fc_lun_reset(lp, fsp, scmd_id(sc_cmd), sc_cmd->device->lun);
+	fsp->state = FC_SRB_FREE;
+	fc_fcp_pkt_release(fsp);
+
+out:
+	return rc;
+}
+EXPORT_SYMBOL(fc_eh_device_reset);
+
+/**
+ * fc_eh_host_reset - The reset function will reset the ports on the host.
+ * @sc_cmd:	scsi command
+ */
+int fc_eh_host_reset(struct scsi_cmnd *sc_cmd)
+{
+	struct Scsi_Host *shost = sc_cmd->device->host;
+	struct fc_lport *lp = shost_priv(shost);
+	unsigned long wait_tmo;
+
+	lp->tt.lport_reset(lp);
+	wait_tmo = jiffies + FC_HOST_RESET_TIMEOUT;
+	while (!fc_fcp_lport_queue_ready(lp) && time_before(jiffies, wait_tmo))
+		msleep(1000);
+
+	if (fc_fcp_lport_queue_ready(lp)) {
+		shost_printk(KERN_INFO, shost, "Host reset succeeded.\n");
+		return SUCCESS;
+	} else {
+		shost_printk(KERN_INFO, shost, "Host reset failed. "
+			     "lport not ready.\n");
+		return FAILED;
+	}
+}
+EXPORT_SYMBOL(fc_eh_host_reset);
+
+/**
+ * fc_slave_alloc - configure queue depth
+ * @sdev:	scsi device
+ *
+ * Configures queue depth based on host's cmd_per_len. If not set
+ * then we use the libfc default.
+ */
+int fc_slave_alloc(struct scsi_device *sdev)
+{
+	struct fc_rport *rport = starget_to_rport(scsi_target(sdev));
+	int queue_depth;
+
+	if (!rport || fc_remote_port_chkready(rport))
+		return -ENXIO;
+
+	if (sdev->tagged_supported) {
+		if (sdev->host->hostt->cmd_per_lun)
+			queue_depth = sdev->host->hostt->cmd_per_lun;
+		else
+			queue_depth = FC_FCP_DFLT_QUEUE_DEPTH;
+		scsi_activate_tcq(sdev, queue_depth);
+	}
+	return 0;
+}
+EXPORT_SYMBOL(fc_slave_alloc);
+
+int fc_change_queue_depth(struct scsi_device *sdev, int qdepth)
+{
+	scsi_adjust_queue_depth(sdev, scsi_get_tag_type(sdev), qdepth);
+	return sdev->queue_depth;
+}
+EXPORT_SYMBOL(fc_change_queue_depth);
+
+int fc_change_queue_type(struct scsi_device *sdev, int tag_type)
+{
+	if (sdev->tagged_supported) {
+		scsi_set_tag_type(sdev, tag_type);
+		if (tag_type)
+			scsi_activate_tcq(sdev, sdev->queue_depth);
+		else
+			scsi_deactivate_tcq(sdev, sdev->queue_depth);
+	} else
+		tag_type = 0;
+
+	return tag_type;
+}
+EXPORT_SYMBOL(fc_change_queue_type);
+
+void fc_fcp_destroy(struct fc_lport *lp)
+{
+	struct fc_fcp_internal *si = fc_get_scsi_internal(lp);
+
+	if (!list_empty(&si->scsi_pkt_queue))
+		printk(KERN_ERR "Leaked scsi packets.\n");
+
+	mempool_destroy(si->scsi_pkt_pool);
+	kfree(si);
+	lp->scsi_priv = NULL;
+}
+EXPORT_SYMBOL(fc_fcp_destroy);
+
+int fc_fcp_init(struct fc_lport *lp)
+{
+	int rc;
+	struct fc_fcp_internal *si;
+
+	if (!lp->tt.fcp_cmd_send)
+		lp->tt.fcp_cmd_send = fc_fcp_cmd_send;
+
+	if (!lp->tt.fcp_cleanup)
+		lp->tt.fcp_cleanup = fc_fcp_cleanup;
+
+	if (!lp->tt.fcp_abort_io)
+		lp->tt.fcp_abort_io = fc_fcp_abort_io;
+
+	si = kzalloc(sizeof(struct fc_fcp_internal), GFP_KERNEL);
+	if (!si)
+		return -ENOMEM;
+	lp->scsi_priv = si;
+	INIT_LIST_HEAD(&si->scsi_pkt_queue);
+
+	si->scsi_pkt_pool = mempool_create_slab_pool(2, scsi_pkt_cachep);
+	if (!si->scsi_pkt_pool) {
+		rc = -ENOMEM;
+		goto free_internal;
+	}
+	return 0;
+
+free_internal:
+	kfree(si);
+	return rc;
+}
+EXPORT_SYMBOL(fc_fcp_init);
+
+static int __init libfc_init(void)
+{
+	int rc;
+
+	scsi_pkt_cachep = kmem_cache_create("libfc_fcp_pkt",
+					    sizeof(struct fc_fcp_pkt),
+					    0, SLAB_HWCACHE_ALIGN, NULL);
+	if (scsi_pkt_cachep == NULL) {
+		FC_DBG("Unable to allocate SRB cache...module load failed!");
+		return -ENOMEM;
+	}
+
+	rc = fc_setup_exch_mgr();
+	if (rc)
+		goto destroy_pkt_cache;
+
+	rc = fc_setup_rport();
+	if (rc)
+		goto destroy_em;
+
+	return rc;
+destroy_em:
+	fc_destroy_exch_mgr();
+destroy_pkt_cache:
+	kmem_cache_destroy(scsi_pkt_cachep);
+	return rc;
+}
+
+static void __exit libfc_exit(void)
+{
+	kmem_cache_destroy(scsi_pkt_cachep);
+	fc_destroy_exch_mgr();
+	fc_destroy_rport();
+}
+
+module_init(libfc_init);
+module_exit(libfc_exit);
diff --git a/drivers/scsi/libfc/fc_frame.c b/drivers/scsi/libfc/fc_frame.c
new file mode 100644
index 000000000000..63fe00cfe667
--- /dev/null
+++ b/drivers/scsi/libfc/fc_frame.c
@@ -0,0 +1,89 @@
+/*
+ * Copyright(c) 2007 Intel Corporation. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Maintained at www.Open-FCoE.org
+ */
+
+/*
+ * Frame allocation.
+ */
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/skbuff.h>
+#include <linux/crc32.h>
+
+#include <scsi/fc_frame.h>
+
+/*
+ * Check the CRC in a frame.
+ */
+u32 fc_frame_crc_check(struct fc_frame *fp)
+{
+	u32 crc;
+	u32 error;
+	const u8 *bp;
+	unsigned int len;
+
+	WARN_ON(!fc_frame_is_linear(fp));
+	fr_flags(fp) &= ~FCPHF_CRC_UNCHECKED;
+	len = (fr_len(fp) + 3) & ~3;	/* round up length to include fill */
+	bp = (const u8 *) fr_hdr(fp);
+	crc = ~crc32(~0, bp, len);
+	error = crc ^ fr_crc(fp);
+	return error;
+}
+EXPORT_SYMBOL(fc_frame_crc_check);
+
+/*
+ * Allocate a frame intended to be sent via fcoe_xmit.
+ * Get an sk_buff for the frame and set the length.
+ */
+struct fc_frame *__fc_frame_alloc(size_t len)
+{
+	struct fc_frame *fp;
+	struct sk_buff *skb;
+
+	WARN_ON((len % sizeof(u32)) != 0);
+	len += sizeof(struct fc_frame_header);
+	skb = dev_alloc_skb(len + FC_FRAME_HEADROOM + FC_FRAME_TAILROOM);
+	if (!skb)
+		return NULL;
+	fp = (struct fc_frame *) skb;
+	fc_frame_init(fp);
+	skb_reserve(skb, FC_FRAME_HEADROOM);
+	skb_put(skb, len);
+	return fp;
+}
+EXPORT_SYMBOL(__fc_frame_alloc);
+
+
+struct fc_frame *fc_frame_alloc_fill(struct fc_lport *lp, size_t payload_len)
+{
+	struct fc_frame *fp;
+	size_t fill;
+
+	fill = payload_len % 4;
+	if (fill != 0)
+		fill = 4 - fill;
+	fp = __fc_frame_alloc(payload_len + fill);
+	if (fp) {
+		memset((char *) fr_hdr(fp) + payload_len, 0, fill);
+		/* trim is OK, we just allocated it so there are no fragments */
+		skb_trim(fp_skb(fp),
+			 payload_len + sizeof(struct fc_frame_header));
+	}
+	return fp;
+}
diff --git a/drivers/scsi/libfc/fc_lport.c b/drivers/scsi/libfc/fc_lport.c
new file mode 100644
index 000000000000..0b9bdb1fb807
--- /dev/null
+++ b/drivers/scsi/libfc/fc_lport.c
@@ -0,0 +1,1604 @@
+/*
+ * Copyright(c) 2007 Intel Corporation. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Maintained at www.Open-FCoE.org
+ */
+
+/*
+ * PORT LOCKING NOTES
+ *
+ * These comments only apply to the 'port code' which consists of the lport,
+ * disc and rport blocks.
+ *
+ * MOTIVATION
+ *
+ * The lport, disc and rport blocks all have mutexes that are used to protect
+ * those objects. The main motivation for these locks is to prevent from
+ * having an lport reset just before we send a frame. In that scenario the
+ * lport's FID would get set to zero and then we'd send a frame with an
+ * invalid SID. We also need to ensure that states don't change unexpectedly
+ * while processing another state.
+ *
+ * HEIRARCHY
+ *
+ * The following heirarchy defines the locking rules. A greater lock
+ * may be held before acquiring a lesser lock, but a lesser lock should never
+ * be held while attempting to acquire a greater lock. Here is the heirarchy-
+ *
+ * lport > disc, lport > rport, disc > rport
+ *
+ * CALLBACKS
+ *
+ * The callbacks cause complications with this scheme. There is a callback
+ * from the rport (to either lport or disc) and a callback from disc
+ * (to the lport).
+ *
+ * As rports exit the rport state machine a callback is made to the owner of
+ * the rport to notify success or failure. Since the callback is likely to
+ * cause the lport or disc to grab its lock we cannot hold the rport lock
+ * while making the callback. To ensure that the rport is not free'd while
+ * processing the callback the rport callbacks are serialized through a
+ * single-threaded workqueue. An rport would never be free'd while in a
+ * callback handler becuase no other rport work in this queue can be executed
+ * at the same time.
+ *
+ * When discovery succeeds or fails a callback is made to the lport as
+ * notification. Currently, succesful discovery causes the lport to take no
+ * action. A failure will cause the lport to reset. There is likely a circular
+ * locking problem with this implementation.
+ */
+
+/*
+ * LPORT LOCKING
+ *
+ * The critical sections protected by the lport's mutex are quite broad and
+ * may be improved upon in the future. The lport code and its locking doesn't
+ * influence the I/O path, so excessive locking doesn't penalize I/O
+ * performance.
+ *
+ * The strategy is to lock whenever processing a request or response. Note
+ * that every _enter_* function corresponds to a state change. They generally
+ * change the lports state and then send a request out on the wire. We lock
+ * before calling any of these functions to protect that state change. This
+ * means that the entry points into the lport block manage the locks while
+ * the state machine can transition between states (i.e. _enter_* functions)
+ * while always staying protected.
+ *
+ * When handling responses we also hold the lport mutex broadly. When the
+ * lport receives the response frame it locks the mutex and then calls the
+ * appropriate handler for the particuar response. Generally a response will
+ * trigger a state change and so the lock must already be held.
+ *
+ * Retries also have to consider the locking. The retries occur from a work
+ * context and the work function will lock the lport and then retry the state
+ * (i.e. _enter_* function).
+ */
+
+#include <linux/timer.h>
+#include <asm/unaligned.h>
+
+#include <scsi/fc/fc_gs.h>
+
+#include <scsi/libfc.h>
+#include <scsi/fc_encode.h>
+
+/* Fabric IDs to use for point-to-point mode, chosen on whims. */
+#define FC_LOCAL_PTP_FID_LO   0x010101
+#define FC_LOCAL_PTP_FID_HI   0x010102
+
+#define	DNS_DELAY	      3 /* Discovery delay after RSCN (in seconds)*/
+
+static int fc_lport_debug;
+
+#define FC_DEBUG_LPORT(fmt...)			\
+	do {					\
+		if (fc_lport_debug)		\
+			FC_DBG(fmt);		\
+	} while (0)
+
+static void fc_lport_error(struct fc_lport *, struct fc_frame *);
+
+static void fc_lport_enter_reset(struct fc_lport *);
+static void fc_lport_enter_flogi(struct fc_lport *);
+static void fc_lport_enter_dns(struct fc_lport *);
+static void fc_lport_enter_rpn_id(struct fc_lport *);
+static void fc_lport_enter_rft_id(struct fc_lport *);
+static void fc_lport_enter_scr(struct fc_lport *);
+static void fc_lport_enter_ready(struct fc_lport *);
+static void fc_lport_enter_logo(struct fc_lport *);
+
+static const char *fc_lport_state_names[] = {
+	[LPORT_ST_NONE] =     "none",
+	[LPORT_ST_FLOGI] =    "FLOGI",
+	[LPORT_ST_DNS] =      "dNS",
+	[LPORT_ST_RPN_ID] =   "RPN_ID",
+	[LPORT_ST_RFT_ID] =   "RFT_ID",
+	[LPORT_ST_SCR] =      "SCR",
+	[LPORT_ST_READY] =    "Ready",
+	[LPORT_ST_LOGO] =     "LOGO",
+	[LPORT_ST_RESET] =    "reset",
+};
+
+static int fc_frame_drop(struct fc_lport *lport, struct fc_frame *fp)
+{
+	fc_frame_free(fp);
+	return 0;
+}
+
+/**
+ * fc_lport_rport_callback - Event handler for rport events
+ * @lport: The lport which is receiving the event
+ * @rport: The rport which the event has occured on
+ * @event: The event that occured
+ *
+ * Locking Note: The rport lock should not be held when calling
+ *		 this function.
+ */
+static void fc_lport_rport_callback(struct fc_lport *lport,
+				    struct fc_rport *rport,
+				    enum fc_rport_event event)
+{
+	FC_DEBUG_LPORT("Received a %d event for port (%6x)\n", event,
+		       rport->port_id);
+
+	switch (event) {
+	case RPORT_EV_CREATED:
+		if (rport->port_id == FC_FID_DIR_SERV) {
+			mutex_lock(&lport->lp_mutex);
+			if (lport->state == LPORT_ST_DNS) {
+				lport->dns_rp = rport;
+				fc_lport_enter_rpn_id(lport);
+			} else {
+				FC_DEBUG_LPORT("Received an CREATED event on "
+					       "port (%6x) for the directory "
+					       "server, but the lport is not "
+					       "in the DNS state, it's in the "
+					       "%d state", rport->port_id,
+					       lport->state);
+				lport->tt.rport_logoff(rport);
+			}
+			mutex_unlock(&lport->lp_mutex);
+		} else
+			FC_DEBUG_LPORT("Received an event for port (%6x) "
+				       "which is not the directory server\n",
+				       rport->port_id);
+		break;
+	case RPORT_EV_LOGO:
+	case RPORT_EV_FAILED:
+	case RPORT_EV_STOP:
+		if (rport->port_id == FC_FID_DIR_SERV) {
+			mutex_lock(&lport->lp_mutex);
+			lport->dns_rp = NULL;
+			mutex_unlock(&lport->lp_mutex);
+
+		} else
+			FC_DEBUG_LPORT("Received an event for port (%6x) "
+				       "which is not the directory server\n",
+				       rport->port_id);
+		break;
+	case RPORT_EV_NONE:
+		break;
+	}
+}
+
+/**
+ * fc_lport_state - Return a string which represents the lport's state
+ * @lport: The lport whose state is to converted to a string
+ */
+static const char *fc_lport_state(struct fc_lport *lport)
+{
+	const char *cp;
+
+	cp = fc_lport_state_names[lport->state];
+	if (!cp)
+		cp = "unknown";
+	return cp;
+}
+
+/**
+ * fc_lport_ptp_setup - Create an rport for point-to-point mode
+ * @lport: The lport to attach the ptp rport to
+ * @fid: The FID of the ptp rport
+ * @remote_wwpn: The WWPN of the ptp rport
+ * @remote_wwnn: The WWNN of the ptp rport
+ */
+static void fc_lport_ptp_setup(struct fc_lport *lport,
+			       u32 remote_fid, u64 remote_wwpn,
+			       u64 remote_wwnn)
+{
+	struct fc_disc_port dp;
+
+	dp.lp = lport;
+	dp.ids.port_id = remote_fid;
+	dp.ids.port_name = remote_wwpn;
+	dp.ids.node_name = remote_wwnn;
+	dp.ids.roles = FC_RPORT_ROLE_UNKNOWN;
+
+	if (lport->ptp_rp) {
+		lport->tt.rport_logoff(lport->ptp_rp);
+		lport->ptp_rp = NULL;
+	}
+
+	lport->ptp_rp = fc_rport_rogue_create(&dp);
+
+	lport->tt.rport_login(lport->ptp_rp);
+
+	fc_lport_enter_ready(lport);
+}
+
+void fc_get_host_port_type(struct Scsi_Host *shost)
+{
+	/* TODO - currently just NPORT */
+	fc_host_port_type(shost) = FC_PORTTYPE_NPORT;
+}
+EXPORT_SYMBOL(fc_get_host_port_type);
+
+void fc_get_host_port_state(struct Scsi_Host *shost)
+{
+	struct fc_lport *lp = shost_priv(shost);
+
+	if ((lp->link_status & FC_LINK_UP) == FC_LINK_UP)
+		fc_host_port_state(shost) = FC_PORTSTATE_ONLINE;
+	else
+		fc_host_port_state(shost) = FC_PORTSTATE_OFFLINE;
+}
+EXPORT_SYMBOL(fc_get_host_port_state);
+
+void fc_get_host_speed(struct Scsi_Host *shost)
+{
+	struct fc_lport *lport = shost_priv(shost);
+
+	fc_host_speed(shost) = lport->link_speed;
+}
+EXPORT_SYMBOL(fc_get_host_speed);
+
+struct fc_host_statistics *fc_get_host_stats(struct Scsi_Host *shost)
+{
+	int i;
+	struct fc_host_statistics *fcoe_stats;
+	struct fc_lport *lp = shost_priv(shost);
+	struct timespec v0, v1;
+
+	fcoe_stats = &lp->host_stats;
+	memset(fcoe_stats, 0, sizeof(struct fc_host_statistics));
+
+	jiffies_to_timespec(jiffies, &v0);
+	jiffies_to_timespec(lp->boot_time, &v1);
+	fcoe_stats->seconds_since_last_reset = (v0.tv_sec - v1.tv_sec);
+
+	for_each_online_cpu(i) {
+		struct fcoe_dev_stats *stats = lp->dev_stats[i];
+		if (stats == NULL)
+			continue;
+		fcoe_stats->tx_frames += stats->TxFrames;
+		fcoe_stats->tx_words += stats->TxWords;
+		fcoe_stats->rx_frames += stats->RxFrames;
+		fcoe_stats->rx_words += stats->RxWords;
+		fcoe_stats->error_frames += stats->ErrorFrames;
+		fcoe_stats->invalid_crc_count += stats->InvalidCRCCount;
+		fcoe_stats->fcp_input_requests += stats->InputRequests;
+		fcoe_stats->fcp_output_requests += stats->OutputRequests;
+		fcoe_stats->fcp_control_requests += stats->ControlRequests;
+		fcoe_stats->fcp_input_megabytes += stats->InputMegabytes;
+		fcoe_stats->fcp_output_megabytes += stats->OutputMegabytes;
+		fcoe_stats->link_failure_count += stats->LinkFailureCount;
+	}
+	fcoe_stats->lip_count = -1;
+	fcoe_stats->nos_count = -1;
+	fcoe_stats->loss_of_sync_count = -1;
+	fcoe_stats->loss_of_signal_count = -1;
+	fcoe_stats->prim_seq_protocol_err_count = -1;
+	fcoe_stats->dumped_frames = -1;
+	return fcoe_stats;
+}
+EXPORT_SYMBOL(fc_get_host_stats);
+
+/*
+ * Fill in FLOGI command for request.
+ */
+static void
+fc_lport_flogi_fill(struct fc_lport *lport, struct fc_els_flogi *flogi,
+		    unsigned int op)
+{
+	struct fc_els_csp *sp;
+	struct fc_els_cssp *cp;
+
+	memset(flogi, 0, sizeof(*flogi));
+	flogi->fl_cmd = (u8) op;
+	put_unaligned_be64(lport->wwpn, &flogi->fl_wwpn);
+	put_unaligned_be64(lport->wwnn, &flogi->fl_wwnn);
+	sp = &flogi->fl_csp;
+	sp->sp_hi_ver = 0x20;
+	sp->sp_lo_ver = 0x20;
+	sp->sp_bb_cred = htons(10);	/* this gets set by gateway */
+	sp->sp_bb_data = htons((u16) lport->mfs);
+	cp = &flogi->fl_cssp[3 - 1];	/* class 3 parameters */
+	cp->cp_class = htons(FC_CPC_VALID | FC_CPC_SEQ);
+	if (op != ELS_FLOGI) {
+		sp->sp_features = htons(FC_SP_FT_CIRO);
+		sp->sp_tot_seq = htons(255);	/* seq. we accept */
+		sp->sp_rel_off = htons(0x1f);
+		sp->sp_e_d_tov = htonl(lport->e_d_tov);
+
+		cp->cp_rdfs = htons((u16) lport->mfs);
+		cp->cp_con_seq = htons(255);
+		cp->cp_open_seq = 1;
+	}
+}
+
+/*
+ * Add a supported FC-4 type.
+ */
+static void fc_lport_add_fc4_type(struct fc_lport *lport, enum fc_fh_type type)
+{
+	__be32 *mp;
+
+	mp = &lport->fcts.ff_type_map[type / FC_NS_BPW];
+	*mp = htonl(ntohl(*mp) | 1UL << (type % FC_NS_BPW));
+}
+
+/**
+ * fc_lport_recv_rlir_req - Handle received Registered Link Incident Report.
+ * @lport: Fibre Channel local port recieving the RLIR
+ * @sp: current sequence in the RLIR exchange
+ * @fp: RLIR request frame
+ *
+ * Locking Note: The lport lock is exected to be held before calling
+ * this function.
+ */
+static void fc_lport_recv_rlir_req(struct fc_seq *sp, struct fc_frame *fp,
+				   struct fc_lport *lport)
+{
+	FC_DEBUG_LPORT("Received RLIR request while in state %s\n",
+		       fc_lport_state(lport));
+
+	lport->tt.seq_els_rsp_send(sp, ELS_LS_ACC, NULL);
+	fc_frame_free(fp);
+}
+
+/**
+ * fc_lport_recv_echo_req - Handle received ECHO request
+ * @lport: Fibre Channel local port recieving the ECHO
+ * @sp: current sequence in the ECHO exchange
+ * @fp: ECHO request frame
+ *
+ * Locking Note: The lport lock is exected to be held before calling
+ * this function.
+ */
+static void fc_lport_recv_echo_req(struct fc_seq *sp, struct fc_frame *in_fp,
+				   struct fc_lport *lport)
+{
+	struct fc_frame *fp;
+	struct fc_exch *ep = fc_seq_exch(sp);
+	unsigned int len;
+	void *pp;
+	void *dp;
+	u32 f_ctl;
+
+	FC_DEBUG_LPORT("Received RLIR request while in state %s\n",
+		       fc_lport_state(lport));
+
+	len = fr_len(in_fp) - sizeof(struct fc_frame_header);
+	pp = fc_frame_payload_get(in_fp, len);
+
+	if (len < sizeof(__be32))
+		len = sizeof(__be32);
+
+	fp = fc_frame_alloc(lport, len);
+	if (fp) {
+		dp = fc_frame_payload_get(fp, len);
+		memcpy(dp, pp, len);
+		*((u32 *)dp) = htonl(ELS_LS_ACC << 24);
+		sp = lport->tt.seq_start_next(sp);
+		f_ctl = FC_FC_EX_CTX | FC_FC_LAST_SEQ | FC_FC_END_SEQ;
+		fc_fill_fc_hdr(fp, FC_RCTL_ELS_REP, ep->did, ep->sid,
+			       FC_TYPE_ELS, f_ctl, 0);
+		lport->tt.seq_send(lport, sp, fp);
+	}
+	fc_frame_free(in_fp);
+}
+
+/**
+ * fc_lport_recv_echo_req - Handle received Request Node ID data request
+ * @lport: Fibre Channel local port recieving the RNID
+ * @sp: current sequence in the RNID exchange
+ * @fp: RNID request frame
+ *
+ * Locking Note: The lport lock is exected to be held before calling
+ * this function.
+ */
+static void fc_lport_recv_rnid_req(struct fc_seq *sp, struct fc_frame *in_fp,
+				   struct fc_lport *lport)
+{
+	struct fc_frame *fp;
+	struct fc_exch *ep = fc_seq_exch(sp);
+	struct fc_els_rnid *req;
+	struct {
+		struct fc_els_rnid_resp rnid;
+		struct fc_els_rnid_cid cid;
+		struct fc_els_rnid_gen gen;
+	} *rp;
+	struct fc_seq_els_data rjt_data;
+	u8 fmt;
+	size_t len;
+	u32 f_ctl;
+
+	FC_DEBUG_LPORT("Received RNID request while in state %s\n",
+		       fc_lport_state(lport));
+
+	req = fc_frame_payload_get(in_fp, sizeof(*req));
+	if (!req) {
+		rjt_data.fp = NULL;
+		rjt_data.reason = ELS_RJT_LOGIC;
+		rjt_data.explan = ELS_EXPL_NONE;
+		lport->tt.seq_els_rsp_send(sp, ELS_LS_RJT, &rjt_data);
+	} else {
+		fmt = req->rnid_fmt;
+		len = sizeof(*rp);
+		if (fmt != ELS_RNIDF_GEN ||
+		    ntohl(lport->rnid_gen.rnid_atype) == 0) {
+			fmt = ELS_RNIDF_NONE;	/* nothing to provide */
+			len -= sizeof(rp->gen);
+		}
+		fp = fc_frame_alloc(lport, len);
+		if (fp) {
+			rp = fc_frame_payload_get(fp, len);
+			memset(rp, 0, len);
+			rp->rnid.rnid_cmd = ELS_LS_ACC;
+			rp->rnid.rnid_fmt = fmt;
+			rp->rnid.rnid_cid_len = sizeof(rp->cid);
+			rp->cid.rnid_wwpn = htonll(lport->wwpn);
+			rp->cid.rnid_wwnn = htonll(lport->wwnn);
+			if (fmt == ELS_RNIDF_GEN) {
+				rp->rnid.rnid_sid_len = sizeof(rp->gen);
+				memcpy(&rp->gen, &lport->rnid_gen,
+				       sizeof(rp->gen));
+			}
+			sp = lport->tt.seq_start_next(sp);
+			f_ctl = FC_FC_EX_CTX | FC_FC_LAST_SEQ;
+			f_ctl |= FC_FC_END_SEQ | FC_FC_SEQ_INIT;
+			fc_fill_fc_hdr(fp, FC_RCTL_ELS_REP, ep->did, ep->sid,
+				       FC_TYPE_ELS, f_ctl, 0);
+			lport->tt.seq_send(lport, sp, fp);
+		}
+	}
+	fc_frame_free(in_fp);
+}
+
+/**
+ * fc_lport_recv_adisc_req - Handle received Address Discovery Request
+ * @lport: Fibre Channel local port recieving the ADISC
+ * @sp: current sequence in the ADISC exchange
+ * @fp: ADISC request frame
+ *
+ * Locking Note: The lport lock is expected to be held before calling
+ * this function.
+ */
+static void fc_lport_recv_adisc_req(struct fc_seq *sp, struct fc_frame *in_fp,
+				    struct fc_lport *lport)
+{
+	struct fc_frame *fp;
+	struct fc_exch *ep = fc_seq_exch(sp);
+	struct fc_els_adisc *req, *rp;
+	struct fc_seq_els_data rjt_data;
+	size_t len;
+	u32 f_ctl;
+
+	FC_DEBUG_LPORT("Received ADISC request while in state %s\n",
+		       fc_lport_state(lport));
+
+	req = fc_frame_payload_get(in_fp, sizeof(*req));
+	if (!req) {
+		rjt_data.fp = NULL;
+		rjt_data.reason = ELS_RJT_LOGIC;
+		rjt_data.explan = ELS_EXPL_NONE;
+		lport->tt.seq_els_rsp_send(sp, ELS_LS_RJT, &rjt_data);
+	} else {
+		len = sizeof(*rp);
+		fp = fc_frame_alloc(lport, len);
+		if (fp) {
+			rp = fc_frame_payload_get(fp, len);
+			memset(rp, 0, len);
+			rp->adisc_cmd = ELS_LS_ACC;
+			rp->adisc_wwpn = htonll(lport->wwpn);
+			rp->adisc_wwnn = htonll(lport->wwnn);
+			hton24(rp->adisc_port_id,
+			       fc_host_port_id(lport->host));
+			sp = lport->tt.seq_start_next(sp);
+			f_ctl = FC_FC_EX_CTX | FC_FC_LAST_SEQ;
+			f_ctl |= FC_FC_END_SEQ | FC_FC_SEQ_INIT;
+			fc_fill_fc_hdr(fp, FC_RCTL_ELS_REP, ep->did, ep->sid,
+				       FC_TYPE_ELS, f_ctl, 0);
+			lport->tt.seq_send(lport, sp, fp);
+		}
+	}
+	fc_frame_free(in_fp);
+}
+
+/**
+ * fc_lport_recv_logo_req - Handle received fabric LOGO request
+ * @lport: Fibre Channel local port recieving the LOGO
+ * @sp: current sequence in the LOGO exchange
+ * @fp: LOGO request frame
+ *
+ * Locking Note: The lport lock is exected to be held before calling
+ * this function.
+ */
+static void fc_lport_recv_logo_req(struct fc_seq *sp, struct fc_frame *fp,
+				   struct fc_lport *lport)
+{
+	lport->tt.seq_els_rsp_send(sp, ELS_LS_ACC, NULL);
+	fc_lport_enter_reset(lport);
+	fc_frame_free(fp);
+}
+
+/**
+ * fc_fabric_login - Start the lport state machine
+ * @lport: The lport that should log into the fabric
+ *
+ * Locking Note: This function should not be called
+ *		 with the lport lock held.
+ */
+int fc_fabric_login(struct fc_lport *lport)
+{
+	int rc = -1;
+
+	mutex_lock(&lport->lp_mutex);
+	if (lport->state == LPORT_ST_NONE) {
+		fc_lport_enter_reset(lport);
+		rc = 0;
+	}
+	mutex_unlock(&lport->lp_mutex);
+
+	return rc;
+}
+EXPORT_SYMBOL(fc_fabric_login);
+
+/**
+ * fc_linkup - Handler for transport linkup events
+ * @lport: The lport whose link is up
+ */
+void fc_linkup(struct fc_lport *lport)
+{
+	FC_DEBUG_LPORT("Link is up for port (%6x)\n",
+		       fc_host_port_id(lport->host));
+
+	mutex_lock(&lport->lp_mutex);
+	if ((lport->link_status & FC_LINK_UP) != FC_LINK_UP) {
+		lport->link_status |= FC_LINK_UP;
+
+		if (lport->state == LPORT_ST_RESET)
+			fc_lport_enter_flogi(lport);
+	}
+	mutex_unlock(&lport->lp_mutex);
+}
+EXPORT_SYMBOL(fc_linkup);
+
+/**
+ * fc_linkdown - Handler for transport linkdown events
+ * @lport: The lport whose link is down
+ */
+void fc_linkdown(struct fc_lport *lport)
+{
+	mutex_lock(&lport->lp_mutex);
+	FC_DEBUG_LPORT("Link is down for port (%6x)\n",
+		       fc_host_port_id(lport->host));
+
+	if ((lport->link_status & FC_LINK_UP) == FC_LINK_UP) {
+		lport->link_status &= ~(FC_LINK_UP);
+		fc_lport_enter_reset(lport);
+		lport->tt.fcp_cleanup(lport);
+	}
+	mutex_unlock(&lport->lp_mutex);
+}
+EXPORT_SYMBOL(fc_linkdown);
+
+/**
+ * fc_pause - Pause the flow of frames
+ * @lport: The lport to be paused
+ */
+void fc_pause(struct fc_lport *lport)
+{
+	mutex_lock(&lport->lp_mutex);
+	lport->link_status |= FC_PAUSE;
+	mutex_unlock(&lport->lp_mutex);
+}
+EXPORT_SYMBOL(fc_pause);
+
+/**
+ * fc_unpause - Unpause the flow of frames
+ * @lport: The lport to be unpaused
+ */
+void fc_unpause(struct fc_lport *lport)
+{
+	mutex_lock(&lport->lp_mutex);
+	lport->link_status &= ~(FC_PAUSE);
+	mutex_unlock(&lport->lp_mutex);
+}
+EXPORT_SYMBOL(fc_unpause);
+
+/**
+ * fc_fabric_logoff - Logout of the fabric
+ * @lport:	      fc_lport pointer to logoff the fabric
+ *
+ * Return value:
+ *	0 for success, -1 for failure
+ **/
+int fc_fabric_logoff(struct fc_lport *lport)
+{
+	lport->tt.disc_stop_final(lport);
+	mutex_lock(&lport->lp_mutex);
+	fc_lport_enter_logo(lport);
+	mutex_unlock(&lport->lp_mutex);
+	return 0;
+}
+EXPORT_SYMBOL(fc_fabric_logoff);
+
+/**
+ * fc_lport_destroy - unregister a fc_lport
+ * @lport:	      fc_lport pointer to unregister
+ *
+ * Return value:
+ *	None
+ * Note:
+ * exit routine for fc_lport instance
+ * clean-up all the allocated memory
+ * and free up other system resources.
+ *
+ **/
+int fc_lport_destroy(struct fc_lport *lport)
+{
+	lport->tt.frame_send = fc_frame_drop;
+	lport->tt.fcp_abort_io(lport);
+	lport->tt.exch_mgr_reset(lport->emp, 0, 0);
+	return 0;
+}
+EXPORT_SYMBOL(fc_lport_destroy);
+
+/**
+ * fc_set_mfs - sets up the mfs for the corresponding fc_lport
+ * @lport: fc_lport pointer to unregister
+ * @mfs: the new mfs for fc_lport
+ *
+ * Set mfs for the given fc_lport to the new mfs.
+ *
+ * Return: 0 for success
+ *
+ **/
+int fc_set_mfs(struct fc_lport *lport, u32 mfs)
+{
+	unsigned int old_mfs;
+	int rc = -EINVAL;
+
+	mutex_lock(&lport->lp_mutex);
+
+	old_mfs = lport->mfs;
+
+	if (mfs >= FC_MIN_MAX_FRAME) {
+		mfs &= ~3;
+		if (mfs > FC_MAX_FRAME)
+			mfs = FC_MAX_FRAME;
+		mfs -= sizeof(struct fc_frame_header);
+		lport->mfs = mfs;
+		rc = 0;
+	}
+
+	if (!rc && mfs < old_mfs)
+		fc_lport_enter_reset(lport);
+
+	mutex_unlock(&lport->lp_mutex);
+
+	return rc;
+}
+EXPORT_SYMBOL(fc_set_mfs);
+
+/**
+ * fc_lport_disc_callback - Callback for discovery events
+ * @lport: FC local port
+ * @event: The discovery event
+ */
+void fc_lport_disc_callback(struct fc_lport *lport, enum fc_disc_event event)
+{
+	switch (event) {
+	case DISC_EV_SUCCESS:
+		FC_DEBUG_LPORT("Got a SUCCESS event for port (%6x)\n",
+			       fc_host_port_id(lport->host));
+		break;
+	case DISC_EV_FAILED:
+		FC_DEBUG_LPORT("Got a FAILED event for port (%6x)\n",
+			       fc_host_port_id(lport->host));
+		mutex_lock(&lport->lp_mutex);
+		fc_lport_enter_reset(lport);
+		mutex_unlock(&lport->lp_mutex);
+		break;
+	case DISC_EV_NONE:
+		WARN_ON(1);
+		break;
+	}
+}
+
+/**
+ * fc_rport_enter_ready - Enter the ready state and start discovery
+ * @lport: Fibre Channel local port that is ready
+ *
+ * Locking Note: The lport lock is expected to be held before calling
+ * this routine.
+ */
+static void fc_lport_enter_ready(struct fc_lport *lport)
+{
+	FC_DEBUG_LPORT("Port (%6x) entered Ready from state %s\n",
+		       fc_host_port_id(lport->host), fc_lport_state(lport));
+
+	fc_lport_state_enter(lport, LPORT_ST_READY);
+
+	lport->tt.disc_start(fc_lport_disc_callback, lport);
+}
+
+/**
+ * fc_lport_recv_flogi_req - Receive a FLOGI request
+ * @sp_in: The sequence the FLOGI is on
+ * @rx_fp: The frame the FLOGI is in
+ * @lport: The lport that recieved the request
+ *
+ * A received FLOGI request indicates a point-to-point connection.
+ * Accept it with the common service parameters indicating our N port.
+ * Set up to do a PLOGI if we have the higher-number WWPN.
+ *
+ * Locking Note: The lport lock is exected to be held before calling
+ * this function.
+ */
+static void fc_lport_recv_flogi_req(struct fc_seq *sp_in,
+				    struct fc_frame *rx_fp,
+				    struct fc_lport *lport)
+{
+	struct fc_frame *fp;
+	struct fc_frame_header *fh;
+	struct fc_seq *sp;
+	struct fc_exch *ep;
+	struct fc_els_flogi *flp;
+	struct fc_els_flogi *new_flp;
+	u64 remote_wwpn;
+	u32 remote_fid;
+	u32 local_fid;
+	u32 f_ctl;
+
+	FC_DEBUG_LPORT("Received FLOGI request while in state %s\n",
+		       fc_lport_state(lport));
+
+	fh = fc_frame_header_get(rx_fp);
+	remote_fid = ntoh24(fh->fh_s_id);
+	flp = fc_frame_payload_get(rx_fp, sizeof(*flp));
+	if (!flp)
+		goto out;
+	remote_wwpn = get_unaligned_be64(&flp->fl_wwpn);
+	if (remote_wwpn == lport->wwpn) {
+		FC_DBG("FLOGI from port with same WWPN %llx "
+		       "possible configuration error\n", remote_wwpn);
+		goto out;
+	}
+	FC_DBG("FLOGI from port WWPN %llx\n", remote_wwpn);
+
+	/*
+	 * XXX what is the right thing to do for FIDs?
+	 * The originator might expect our S_ID to be 0xfffffe.
+	 * But if so, both of us could end up with the same FID.
+	 */
+	local_fid = FC_LOCAL_PTP_FID_LO;
+	if (remote_wwpn < lport->wwpn) {
+		local_fid = FC_LOCAL_PTP_FID_HI;
+		if (!remote_fid || remote_fid == local_fid)
+			remote_fid = FC_LOCAL_PTP_FID_LO;
+	} else if (!remote_fid) {
+		remote_fid = FC_LOCAL_PTP_FID_HI;
+	}
+
+	fc_host_port_id(lport->host) = local_fid;
+
+	fp = fc_frame_alloc(lport, sizeof(*flp));
+	if (fp) {
+		sp = lport->tt.seq_start_next(fr_seq(rx_fp));
+		new_flp = fc_frame_payload_get(fp, sizeof(*flp));
+		fc_lport_flogi_fill(lport, new_flp, ELS_FLOGI);
+		new_flp->fl_cmd = (u8) ELS_LS_ACC;
+
+		/*
+		 * Send the response.  If this fails, the originator should
+		 * repeat the sequence.
+		 */
+		f_ctl = FC_FC_EX_CTX | FC_FC_LAST_SEQ | FC_FC_END_SEQ;
+		ep = fc_seq_exch(sp);
+		fc_fill_fc_hdr(fp, FC_RCTL_ELS_REP, ep->did, ep->sid,
+			       FC_TYPE_ELS, f_ctl, 0);
+		lport->tt.seq_send(lport, sp, fp);
+
+	} else {
+		fc_lport_error(lport, fp);
+	}
+	fc_lport_ptp_setup(lport, remote_fid, remote_wwpn,
+			   get_unaligned_be64(&flp->fl_wwnn));
+
+	lport->tt.disc_start(fc_lport_disc_callback, lport);
+
+out:
+	sp = fr_seq(rx_fp);
+	fc_frame_free(rx_fp);
+}
+
+/**
+ * fc_lport_recv_req - The generic lport request handler
+ * @lport: The lport that received the request
+ * @sp: The sequence the request is on
+ * @fp: The frame the request is in
+ *
+ * This function will see if the lport handles the request or
+ * if an rport should handle the request.
+ *
+ * Locking Note: This function should not be called with the lport
+ *		 lock held becuase it will grab the lock.
+ */
+static void fc_lport_recv_req(struct fc_lport *lport, struct fc_seq *sp,
+			      struct fc_frame *fp)
+{
+	struct fc_frame_header *fh = fc_frame_header_get(fp);
+	void (*recv) (struct fc_seq *, struct fc_frame *, struct fc_lport *);
+	struct fc_rport *rport;
+	u32 s_id;
+	u32 d_id;
+	struct fc_seq_els_data rjt_data;
+
+	mutex_lock(&lport->lp_mutex);
+
+	/*
+	 * Handle special ELS cases like FLOGI, LOGO, and
+	 * RSCN here.  These don't require a session.
+	 * Even if we had a session, it might not be ready.
+	 */
+	if (fh->fh_type == FC_TYPE_ELS && fh->fh_r_ctl == FC_RCTL_ELS_REQ) {
+		/*
+		 * Check opcode.
+		 */
+		recv = NULL;
+		switch (fc_frame_payload_op(fp)) {
+		case ELS_FLOGI:
+			recv = fc_lport_recv_flogi_req;
+			break;
+		case ELS_LOGO:
+			fh = fc_frame_header_get(fp);
+			if (ntoh24(fh->fh_s_id) == FC_FID_FLOGI)
+				recv = fc_lport_recv_logo_req;
+			break;
+		case ELS_RSCN:
+			recv = lport->tt.disc_recv_req;
+			break;
+		case ELS_ECHO:
+			recv = fc_lport_recv_echo_req;
+			break;
+		case ELS_RLIR:
+			recv = fc_lport_recv_rlir_req;
+			break;
+		case ELS_RNID:
+			recv = fc_lport_recv_rnid_req;
+			break;
+		case ELS_ADISC:
+			recv = fc_lport_recv_adisc_req;
+			break;
+		}
+
+		if (recv)
+			recv(sp, fp, lport);
+		else {
+			/*
+			 * Find session.
+			 * If this is a new incoming PLOGI, we won't find it.
+			 */
+			s_id = ntoh24(fh->fh_s_id);
+			d_id = ntoh24(fh->fh_d_id);
+
+			rport = lport->tt.rport_lookup(lport, s_id);
+			if (rport)
+				lport->tt.rport_recv_req(sp, fp, rport);
+			else {
+				rjt_data.fp = NULL;
+				rjt_data.reason = ELS_RJT_UNAB;
+				rjt_data.explan = ELS_EXPL_NONE;
+				lport->tt.seq_els_rsp_send(sp,
+							   ELS_LS_RJT,
+							   &rjt_data);
+				fc_frame_free(fp);
+			}
+		}
+	} else {
+		FC_DBG("dropping invalid frame (eof %x)\n", fr_eof(fp));
+		fc_frame_free(fp);
+	}
+	mutex_unlock(&lport->lp_mutex);
+
+	/*
+	 *  The common exch_done for all request may not be good
+	 *  if any request requires longer hold on exhange. XXX
+	 */
+	lport->tt.exch_done(sp);
+}
+
+/**
+ * fc_lport_reset - Reset an lport
+ * @lport: The lport which should be reset
+ *
+ * Locking Note: This functions should not be called with the
+ *		 lport lock held.
+ */
+int fc_lport_reset(struct fc_lport *lport)
+{
+	mutex_lock(&lport->lp_mutex);
+	fc_lport_enter_reset(lport);
+	mutex_unlock(&lport->lp_mutex);
+	return 0;
+}
+EXPORT_SYMBOL(fc_lport_reset);
+
+/**
+ * fc_rport_enter_reset - Reset the local port
+ * @lport: Fibre Channel local port to be reset
+ *
+ * Locking Note: The lport lock is expected to be held before calling
+ * this routine.
+ */
+static void fc_lport_enter_reset(struct fc_lport *lport)
+{
+	FC_DEBUG_LPORT("Port (%6x) entered RESET state from %s state\n",
+		       fc_host_port_id(lport->host), fc_lport_state(lport));
+
+	fc_lport_state_enter(lport, LPORT_ST_RESET);
+
+	if (lport->dns_rp)
+		lport->tt.rport_logoff(lport->dns_rp);
+
+	if (lport->ptp_rp) {
+		lport->tt.rport_logoff(lport->ptp_rp);
+		lport->ptp_rp = NULL;
+	}
+
+	lport->tt.disc_stop(lport);
+
+	lport->tt.exch_mgr_reset(lport->emp, 0, 0);
+	fc_host_fabric_name(lport->host) = 0;
+	fc_host_port_id(lport->host) = 0;
+
+	if ((lport->link_status & FC_LINK_UP) == FC_LINK_UP)
+		fc_lport_enter_flogi(lport);
+}
+
+/**
+ * fc_lport_error - Handler for any errors
+ * @lport: The fc_lport object
+ * @fp: The frame pointer
+ *
+ * If the error was caused by a resource allocation failure
+ * then wait for half a second and retry, otherwise retry
+ * after the e_d_tov time.
+ */
+static void fc_lport_error(struct fc_lport *lport, struct fc_frame *fp)
+{
+	unsigned long delay = 0;
+	FC_DEBUG_LPORT("Error %ld in state %s, retries %d\n",
+		       PTR_ERR(fp), fc_lport_state(lport),
+		       lport->retry_count);
+
+	if (!fp || PTR_ERR(fp) == -FC_EX_TIMEOUT) {
+		/*
+		 * Memory allocation failure, or the exchange timed out.
+		 *  Retry after delay
+		 */
+		if (lport->retry_count < lport->max_retry_count) {
+			lport->retry_count++;
+			if (!fp)
+				delay = msecs_to_jiffies(500);
+			else
+				delay =	msecs_to_jiffies(lport->e_d_tov);
+
+			schedule_delayed_work(&lport->retry_work, delay);
+		} else {
+			switch (lport->state) {
+			case LPORT_ST_NONE:
+			case LPORT_ST_READY:
+			case LPORT_ST_RESET:
+			case LPORT_ST_RPN_ID:
+			case LPORT_ST_RFT_ID:
+			case LPORT_ST_SCR:
+			case LPORT_ST_DNS:
+			case LPORT_ST_FLOGI:
+			case LPORT_ST_LOGO:
+				fc_lport_enter_reset(lport);
+				break;
+			}
+		}
+	}
+}
+
+/**
+ * fc_lport_rft_id_resp - Handle response to Register Fibre
+ *			  Channel Types by ID (RPN_ID) request
+ * @sp: current sequence in RPN_ID exchange
+ * @fp: response frame
+ * @lp_arg: Fibre Channel host port instance
+ *
+ * Locking Note: This function will be called without the lport lock
+ * held, but it will lock, call an _enter_* function or fc_lport_error
+ * and then unlock the lport.
+ */
+static void fc_lport_rft_id_resp(struct fc_seq *sp, struct fc_frame *fp,
+				 void *lp_arg)
+{
+	struct fc_lport *lport = lp_arg;
+	struct fc_frame_header *fh;
+	struct fc_ct_hdr *ct;
+
+	if (fp == ERR_PTR(-FC_EX_CLOSED))
+		return;
+
+	mutex_lock(&lport->lp_mutex);
+
+	FC_DEBUG_LPORT("Received a RFT_ID response\n");
+
+	if (lport->state != LPORT_ST_RFT_ID) {
+		FC_DBG("Received a RFT_ID response, but in state %s\n",
+		       fc_lport_state(lport));
+		goto out;
+	}
+
+	if (IS_ERR(fp)) {
+		fc_lport_error(lport, fp);
+		goto err;
+	}
+
+	fh = fc_frame_header_get(fp);
+	ct = fc_frame_payload_get(fp, sizeof(*ct));
+
+	if (fh && ct && fh->fh_type == FC_TYPE_CT &&
+	    ct->ct_fs_type == FC_FST_DIR &&
+	    ct->ct_fs_subtype == FC_NS_SUBTYPE &&
+	    ntohs(ct->ct_cmd) == FC_FS_ACC)
+		fc_lport_enter_scr(lport);
+	else
+		fc_lport_error(lport, fp);
+out:
+	fc_frame_free(fp);
+err:
+	mutex_unlock(&lport->lp_mutex);
+}
+
+/**
+ * fc_lport_rpn_id_resp - Handle response to Register Port
+ *			  Name by ID (RPN_ID) request
+ * @sp: current sequence in RPN_ID exchange
+ * @fp: response frame
+ * @lp_arg: Fibre Channel host port instance
+ *
+ * Locking Note: This function will be called without the lport lock
+ * held, but it will lock, call an _enter_* function or fc_lport_error
+ * and then unlock the lport.
+ */
+static void fc_lport_rpn_id_resp(struct fc_seq *sp, struct fc_frame *fp,
+				 void *lp_arg)
+{
+	struct fc_lport *lport = lp_arg;
+	struct fc_frame_header *fh;
+	struct fc_ct_hdr *ct;
+
+	if (fp == ERR_PTR(-FC_EX_CLOSED))
+		return;
+
+	mutex_lock(&lport->lp_mutex);
+
+	FC_DEBUG_LPORT("Received a RPN_ID response\n");
+
+	if (lport->state != LPORT_ST_RPN_ID) {
+		FC_DBG("Received a RPN_ID response, but in state %s\n",
+		       fc_lport_state(lport));
+		goto out;
+	}
+
+	if (IS_ERR(fp)) {
+		fc_lport_error(lport, fp);
+		goto err;
+	}
+
+	fh = fc_frame_header_get(fp);
+	ct = fc_frame_payload_get(fp, sizeof(*ct));
+	if (fh && ct && fh->fh_type == FC_TYPE_CT &&
+	    ct->ct_fs_type == FC_FST_DIR &&
+	    ct->ct_fs_subtype == FC_NS_SUBTYPE &&
+	    ntohs(ct->ct_cmd) == FC_FS_ACC)
+		fc_lport_enter_rft_id(lport);
+	else
+		fc_lport_error(lport, fp);
+
+out:
+	fc_frame_free(fp);
+err:
+	mutex_unlock(&lport->lp_mutex);
+}
+
+/**
+ * fc_lport_scr_resp - Handle response to State Change Register (SCR) request
+ * @sp: current sequence in SCR exchange
+ * @fp: response frame
+ * @lp_arg: Fibre Channel lport port instance that sent the registration request
+ *
+ * Locking Note: This function will be called without the lport lock
+ * held, but it will lock, call an _enter_* function or fc_lport_error
+ * and then unlock the lport.
+ */
+static void fc_lport_scr_resp(struct fc_seq *sp, struct fc_frame *fp,
+			      void *lp_arg)
+{
+	struct fc_lport *lport = lp_arg;
+	u8 op;
+
+	if (fp == ERR_PTR(-FC_EX_CLOSED))
+		return;
+
+	mutex_lock(&lport->lp_mutex);
+
+	FC_DEBUG_LPORT("Received a SCR response\n");
+
+	if (lport->state != LPORT_ST_SCR) {
+		FC_DBG("Received a SCR response, but in state %s\n",
+		       fc_lport_state(lport));
+		goto out;
+	}
+
+	if (IS_ERR(fp)) {
+		fc_lport_error(lport, fp);
+		goto err;
+	}
+
+	op = fc_frame_payload_op(fp);
+	if (op == ELS_LS_ACC)
+		fc_lport_enter_ready(lport);
+	else
+		fc_lport_error(lport, fp);
+
+out:
+	fc_frame_free(fp);
+err:
+	mutex_unlock(&lport->lp_mutex);
+}
+
+/**
+ * fc_lport_enter_scr - Send a State Change Register (SCR) request
+ * @lport: Fibre Channel local port to register for state changes
+ *
+ * Locking Note: The lport lock is expected to be held before calling
+ * this routine.
+ */
+static void fc_lport_enter_scr(struct fc_lport *lport)
+{
+	struct fc_frame *fp;
+
+	FC_DEBUG_LPORT("Port (%6x) entered SCR state from %s state\n",
+		       fc_host_port_id(lport->host), fc_lport_state(lport));
+
+	fc_lport_state_enter(lport, LPORT_ST_SCR);
+
+	fp = fc_frame_alloc(lport, sizeof(struct fc_els_scr));
+	if (!fp) {
+		fc_lport_error(lport, fp);
+		return;
+	}
+
+	if (!lport->tt.elsct_send(lport, NULL, fp, ELS_SCR,
+				  fc_lport_scr_resp, lport, lport->e_d_tov))
+		fc_lport_error(lport, fp);
+}
+
+/**
+ * fc_lport_enter_rft_id - Register FC4-types with the name server
+ * @lport: Fibre Channel local port to register
+ *
+ * Locking Note: The lport lock is expected to be held before calling
+ * this routine.
+ */
+static void fc_lport_enter_rft_id(struct fc_lport *lport)
+{
+	struct fc_frame *fp;
+	struct fc_ns_fts *lps;
+	int i;
+
+	FC_DEBUG_LPORT("Port (%6x) entered RFT_ID state from %s state\n",
+		       fc_host_port_id(lport->host), fc_lport_state(lport));
+
+	fc_lport_state_enter(lport, LPORT_ST_RFT_ID);
+
+	lps = &lport->fcts;
+	i = sizeof(lps->ff_type_map) / sizeof(lps->ff_type_map[0]);
+	while (--i >= 0)
+		if (ntohl(lps->ff_type_map[i]) != 0)
+			break;
+	if (i < 0) {
+		/* nothing to register, move on to SCR */
+		fc_lport_enter_scr(lport);
+		return;
+	}
+
+	fp = fc_frame_alloc(lport, sizeof(struct fc_ct_hdr) +
+			    sizeof(struct fc_ns_rft));
+	if (!fp) {
+		fc_lport_error(lport, fp);
+		return;
+	}
+
+	if (!lport->tt.elsct_send(lport, NULL, fp, FC_NS_RFT_ID,
+				  fc_lport_rft_id_resp,
+				  lport, lport->e_d_tov))
+		fc_lport_error(lport, fp);
+}
+
+/**
+ * fc_rport_enter_rft_id - Register port name with the name server
+ * @lport: Fibre Channel local port to register
+ *
+ * Locking Note: The lport lock is expected to be held before calling
+ * this routine.
+ */
+static void fc_lport_enter_rpn_id(struct fc_lport *lport)
+{
+	struct fc_frame *fp;
+
+	FC_DEBUG_LPORT("Port (%6x) entered RPN_ID state from %s state\n",
+		       fc_host_port_id(lport->host), fc_lport_state(lport));
+
+	fc_lport_state_enter(lport, LPORT_ST_RPN_ID);
+
+	fp = fc_frame_alloc(lport, sizeof(struct fc_ct_hdr) +
+			    sizeof(struct fc_ns_rn_id));
+	if (!fp) {
+		fc_lport_error(lport, fp);
+		return;
+	}
+
+	if (!lport->tt.elsct_send(lport, NULL, fp, FC_NS_RPN_ID,
+				  fc_lport_rpn_id_resp,
+				  lport, lport->e_d_tov))
+		fc_lport_error(lport, fp);
+}
+
+static struct fc_rport_operations fc_lport_rport_ops = {
+	.event_callback = fc_lport_rport_callback,
+};
+
+/**
+ * fc_rport_enter_dns - Create a rport to the name server
+ * @lport: Fibre Channel local port requesting a rport for the name server
+ *
+ * Locking Note: The lport lock is expected to be held before calling
+ * this routine.
+ */
+static void fc_lport_enter_dns(struct fc_lport *lport)
+{
+	struct fc_rport *rport;
+	struct fc_rport_libfc_priv *rdata;
+	struct fc_disc_port dp;
+
+	dp.ids.port_id = FC_FID_DIR_SERV;
+	dp.ids.port_name = -1;
+	dp.ids.node_name = -1;
+	dp.ids.roles = FC_RPORT_ROLE_UNKNOWN;
+	dp.lp = lport;
+
+	FC_DEBUG_LPORT("Port (%6x) entered DNS state from %s state\n",
+		       fc_host_port_id(lport->host), fc_lport_state(lport));
+
+	fc_lport_state_enter(lport, LPORT_ST_DNS);
+
+	rport = fc_rport_rogue_create(&dp);
+	if (!rport)
+		goto err;
+
+	rdata = rport->dd_data;
+	rdata->ops = &fc_lport_rport_ops;
+	lport->tt.rport_login(rport);
+	return;
+
+err:
+	fc_lport_error(lport, NULL);
+}
+
+/**
+ * fc_lport_timeout - Handler for the retry_work timer.
+ * @work: The work struct of the fc_lport
+ */
+static void fc_lport_timeout(struct work_struct *work)
+{
+	struct fc_lport *lport =
+		container_of(work, struct fc_lport,
+			     retry_work.work);
+
+	mutex_lock(&lport->lp_mutex);
+
+	switch (lport->state) {
+	case LPORT_ST_NONE:
+	case LPORT_ST_READY:
+	case LPORT_ST_RESET:
+		WARN_ON(1);
+		break;
+	case LPORT_ST_FLOGI:
+		fc_lport_enter_flogi(lport);
+		break;
+	case LPORT_ST_DNS:
+		fc_lport_enter_dns(lport);
+		break;
+	case LPORT_ST_RPN_ID:
+		fc_lport_enter_rpn_id(lport);
+		break;
+	case LPORT_ST_RFT_ID:
+		fc_lport_enter_rft_id(lport);
+		break;
+	case LPORT_ST_SCR:
+		fc_lport_enter_scr(lport);
+		break;
+	case LPORT_ST_LOGO:
+		fc_lport_enter_logo(lport);
+		break;
+	}
+
+	mutex_unlock(&lport->lp_mutex);
+}
+
+/**
+ * fc_lport_logo_resp - Handle response to LOGO request
+ * @sp: current sequence in LOGO exchange
+ * @fp: response frame
+ * @lp_arg: Fibre Channel lport port instance that sent the LOGO request
+ *
+ * Locking Note: This function will be called without the lport lock
+ * held, but it will lock, call an _enter_* function or fc_lport_error
+ * and then unlock the lport.
+ */
+static void fc_lport_logo_resp(struct fc_seq *sp, struct fc_frame *fp,
+			       void *lp_arg)
+{
+	struct fc_lport *lport = lp_arg;
+	u8 op;
+
+	if (fp == ERR_PTR(-FC_EX_CLOSED))
+		return;
+
+	mutex_lock(&lport->lp_mutex);
+
+	FC_DEBUG_LPORT("Received a LOGO response\n");
+
+	if (lport->state != LPORT_ST_LOGO) {
+		FC_DBG("Received a LOGO response, but in state %s\n",
+		       fc_lport_state(lport));
+		goto out;
+	}
+
+	if (IS_ERR(fp)) {
+		fc_lport_error(lport, fp);
+		goto err;
+	}
+
+	op = fc_frame_payload_op(fp);
+	if (op == ELS_LS_ACC)
+		fc_lport_enter_reset(lport);
+	else
+		fc_lport_error(lport, fp);
+
+out:
+	fc_frame_free(fp);
+err:
+	mutex_unlock(&lport->lp_mutex);
+}
+
+/**
+ * fc_rport_enter_logo - Logout of the fabric
+ * @lport: Fibre Channel local port to be logged out
+ *
+ * Locking Note: The lport lock is expected to be held before calling
+ * this routine.
+ */
+static void fc_lport_enter_logo(struct fc_lport *lport)
+{
+	struct fc_frame *fp;
+	struct fc_els_logo *logo;
+
+	FC_DEBUG_LPORT("Port (%6x) entered LOGO state from %s state\n",
+		       fc_host_port_id(lport->host), fc_lport_state(lport));
+
+	fc_lport_state_enter(lport, LPORT_ST_LOGO);
+
+	/* DNS session should be closed so we can release it here */
+	if (lport->dns_rp)
+		lport->tt.rport_logoff(lport->dns_rp);
+
+	fp = fc_frame_alloc(lport, sizeof(*logo));
+	if (!fp) {
+		fc_lport_error(lport, fp);
+		return;
+	}
+
+	if (!lport->tt.elsct_send(lport, NULL, fp, ELS_LOGO, fc_lport_logo_resp,
+				  lport, lport->e_d_tov))
+		fc_lport_error(lport, fp);
+}
+
+/**
+ * fc_lport_flogi_resp - Handle response to FLOGI request
+ * @sp: current sequence in FLOGI exchange
+ * @fp: response frame
+ * @lp_arg: Fibre Channel lport port instance that sent the FLOGI request
+ *
+ * Locking Note: This function will be called without the lport lock
+ * held, but it will lock, call an _enter_* function or fc_lport_error
+ * and then unlock the lport.
+ */
+static void fc_lport_flogi_resp(struct fc_seq *sp, struct fc_frame *fp,
+				void *lp_arg)
+{
+	struct fc_lport *lport = lp_arg;
+	struct fc_frame_header *fh;
+	struct fc_els_flogi *flp;
+	u32 did;
+	u16 csp_flags;
+	unsigned int r_a_tov;
+	unsigned int e_d_tov;
+	u16 mfs;
+
+	if (fp == ERR_PTR(-FC_EX_CLOSED))
+		return;
+
+	mutex_lock(&lport->lp_mutex);
+
+	FC_DEBUG_LPORT("Received a FLOGI response\n");
+
+	if (lport->state != LPORT_ST_FLOGI) {
+		FC_DBG("Received a FLOGI response, but in state %s\n",
+		       fc_lport_state(lport));
+		goto out;
+	}
+
+	if (IS_ERR(fp)) {
+		fc_lport_error(lport, fp);
+		goto err;
+	}
+
+	fh = fc_frame_header_get(fp);
+	did = ntoh24(fh->fh_d_id);
+	if (fc_frame_payload_op(fp) == ELS_LS_ACC && did != 0) {
+
+		FC_DEBUG_LPORT("Assigned fid %x\n", did);
+		fc_host_port_id(lport->host) = did;
+
+		flp = fc_frame_payload_get(fp, sizeof(*flp));
+		if (flp) {
+			mfs = ntohs(flp->fl_csp.sp_bb_data) &
+				FC_SP_BB_DATA_MASK;
+			if (mfs >= FC_SP_MIN_MAX_PAYLOAD &&
+			    mfs < lport->mfs)
+				lport->mfs = mfs;
+			csp_flags = ntohs(flp->fl_csp.sp_features);
+			r_a_tov = ntohl(flp->fl_csp.sp_r_a_tov);
+			e_d_tov = ntohl(flp->fl_csp.sp_e_d_tov);
+			if (csp_flags & FC_SP_FT_EDTR)
+				e_d_tov /= 1000000;
+			if ((csp_flags & FC_SP_FT_FPORT) == 0) {
+				if (e_d_tov > lport->e_d_tov)
+					lport->e_d_tov = e_d_tov;
+				lport->r_a_tov = 2 * e_d_tov;
+				FC_DBG("Point-to-Point mode\n");
+				fc_lport_ptp_setup(lport, ntoh24(fh->fh_s_id),
+						   get_unaligned_be64(
+							   &flp->fl_wwpn),
+						   get_unaligned_be64(
+							   &flp->fl_wwnn));
+			} else {
+				lport->e_d_tov = e_d_tov;
+				lport->r_a_tov = r_a_tov;
+				fc_host_fabric_name(lport->host) =
+					get_unaligned_be64(&flp->fl_wwnn);
+				fc_lport_enter_dns(lport);
+			}
+		}
+
+		if (flp) {
+			csp_flags = ntohs(flp->fl_csp.sp_features);
+			if ((csp_flags & FC_SP_FT_FPORT) == 0) {
+				lport->tt.disc_start(fc_lport_disc_callback,
+						     lport);
+			}
+		}
+	} else {
+		FC_DBG("bad FLOGI response\n");
+	}
+
+out:
+	fc_frame_free(fp);
+err:
+	mutex_unlock(&lport->lp_mutex);
+}
+
+/**
+ * fc_rport_enter_flogi - Send a FLOGI request to the fabric manager
+ * @lport: Fibre Channel local port to be logged in to the fabric
+ *
+ * Locking Note: The lport lock is expected to be held before calling
+ * this routine.
+ */
+void fc_lport_enter_flogi(struct fc_lport *lport)
+{
+	struct fc_frame *fp;
+
+	FC_DEBUG_LPORT("Processing FLOGI state\n");
+
+	fc_lport_state_enter(lport, LPORT_ST_FLOGI);
+
+	fp = fc_frame_alloc(lport, sizeof(struct fc_els_flogi));
+	if (!fp)
+		return fc_lport_error(lport, fp);
+
+	if (!lport->tt.elsct_send(lport, NULL, fp, ELS_FLOGI,
+				  fc_lport_flogi_resp, lport, lport->e_d_tov))
+		fc_lport_error(lport, fp);
+}
+
+/* Configure a fc_lport */
+int fc_lport_config(struct fc_lport *lport)
+{
+	INIT_DELAYED_WORK(&lport->retry_work, fc_lport_timeout);
+	mutex_init(&lport->lp_mutex);
+
+	fc_lport_state_enter(lport, LPORT_ST_NONE);
+
+	fc_lport_add_fc4_type(lport, FC_TYPE_FCP);
+	fc_lport_add_fc4_type(lport, FC_TYPE_CT);
+
+	return 0;
+}
+EXPORT_SYMBOL(fc_lport_config);
+
+int fc_lport_init(struct fc_lport *lport)
+{
+	if (!lport->tt.lport_recv)
+		lport->tt.lport_recv = fc_lport_recv_req;
+
+	if (!lport->tt.lport_reset)
+		lport->tt.lport_reset = fc_lport_reset;
+
+	fc_host_port_type(lport->host) = FC_PORTTYPE_NPORT;
+	fc_host_node_name(lport->host) = lport->wwnn;
+	fc_host_port_name(lport->host) = lport->wwpn;
+	fc_host_supported_classes(lport->host) = FC_COS_CLASS3;
+	memset(fc_host_supported_fc4s(lport->host), 0,
+	       sizeof(fc_host_supported_fc4s(lport->host)));
+	fc_host_supported_fc4s(lport->host)[2] = 1;
+	fc_host_supported_fc4s(lport->host)[7] = 1;
+
+	/* This value is also unchanging */
+	memset(fc_host_active_fc4s(lport->host), 0,
+	       sizeof(fc_host_active_fc4s(lport->host)));
+	fc_host_active_fc4s(lport->host)[2] = 1;
+	fc_host_active_fc4s(lport->host)[7] = 1;
+	fc_host_maxframe_size(lport->host) = lport->mfs;
+	fc_host_supported_speeds(lport->host) = 0;
+	if (lport->link_supported_speeds & FC_PORTSPEED_1GBIT)
+		fc_host_supported_speeds(lport->host) |= FC_PORTSPEED_1GBIT;
+	if (lport->link_supported_speeds & FC_PORTSPEED_10GBIT)
+		fc_host_supported_speeds(lport->host) |= FC_PORTSPEED_10GBIT;
+
+	return 0;
+}
+EXPORT_SYMBOL(fc_lport_init);
diff --git a/drivers/scsi/libfc/fc_rport.c b/drivers/scsi/libfc/fc_rport.c
new file mode 100644
index 000000000000..e780d8caf70e
--- /dev/null
+++ b/drivers/scsi/libfc/fc_rport.c
@@ -0,0 +1,1291 @@
+/*
+ * Copyright(c) 2007 - 2008 Intel Corporation. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Maintained at www.Open-FCoE.org
+ */
+
+/*
+ * RPORT GENERAL INFO
+ *
+ * This file contains all processing regarding fc_rports. It contains the
+ * rport state machine and does all rport interaction with the transport class.
+ * There should be no other places in libfc that interact directly with the
+ * transport class in regards to adding and deleting rports.
+ *
+ * fc_rport's represent N_Port's within the fabric.
+ */
+
+/*
+ * RPORT LOCKING
+ *
+ * The rport should never hold the rport mutex and then attempt to acquire
+ * either the lport or disc mutexes. The rport's mutex is considered lesser
+ * than both the lport's mutex and the disc mutex. Refer to fc_lport.c for
+ * more comments on the heirarchy.
+ *
+ * The locking strategy is similar to the lport's strategy. The lock protects
+ * the rport's states and is held and released by the entry points to the rport
+ * block. All _enter_* functions correspond to rport states and expect the rport
+ * mutex to be locked before calling them. This means that rports only handle
+ * one request or response at a time, since they're not critical for the I/O
+ * path this potential over-use of the mutex is acceptable.
+ */
+
+#include <linux/kernel.h>
+#include <linux/spinlock.h>
+#include <linux/interrupt.h>
+#include <linux/rcupdate.h>
+#include <linux/timer.h>
+#include <linux/workqueue.h>
+#include <asm/unaligned.h>
+
+#include <scsi/libfc.h>
+#include <scsi/fc_encode.h>
+
+static int fc_rport_debug;
+
+#define FC_DEBUG_RPORT(fmt...)			\
+	do {					\
+		if (fc_rport_debug)		\
+			FC_DBG(fmt);		\
+	} while (0)
+
+struct workqueue_struct *rport_event_queue;
+
+static void fc_rport_enter_plogi(struct fc_rport *);
+static void fc_rport_enter_prli(struct fc_rport *);
+static void fc_rport_enter_rtv(struct fc_rport *);
+static void fc_rport_enter_ready(struct fc_rport *);
+static void fc_rport_enter_logo(struct fc_rport *);
+
+static void fc_rport_recv_plogi_req(struct fc_rport *,
+				    struct fc_seq *, struct fc_frame *);
+static void fc_rport_recv_prli_req(struct fc_rport *,
+				   struct fc_seq *, struct fc_frame *);
+static void fc_rport_recv_prlo_req(struct fc_rport *,
+				   struct fc_seq *, struct fc_frame *);
+static void fc_rport_recv_logo_req(struct fc_rport *,
+				   struct fc_seq *, struct fc_frame *);
+static void fc_rport_timeout(struct work_struct *);
+static void fc_rport_error(struct fc_rport *, struct fc_frame *);
+static void fc_rport_work(struct work_struct *);
+
+static const char *fc_rport_state_names[] = {
+	[RPORT_ST_NONE] = "None",
+	[RPORT_ST_INIT] = "Init",
+	[RPORT_ST_PLOGI] = "PLOGI",
+	[RPORT_ST_PRLI] = "PRLI",
+	[RPORT_ST_RTV] = "RTV",
+	[RPORT_ST_READY] = "Ready",
+	[RPORT_ST_LOGO] = "LOGO",
+};
+
+static void fc_rport_rogue_destroy(struct device *dev)
+{
+	struct fc_rport *rport = dev_to_rport(dev);
+	FC_DEBUG_RPORT("Destroying rogue rport (%6x)\n", rport->port_id);
+	kfree(rport);
+}
+
+struct fc_rport *fc_rport_rogue_create(struct fc_disc_port *dp)
+{
+	struct fc_rport *rport;
+	struct fc_rport_libfc_priv *rdata;
+	rport = kzalloc(sizeof(*rport) + sizeof(*rdata), GFP_KERNEL);
+
+	if (!rport)
+		return NULL;
+
+	rdata = RPORT_TO_PRIV(rport);
+
+	rport->dd_data = rdata;
+	rport->port_id = dp->ids.port_id;
+	rport->port_name = dp->ids.port_name;
+	rport->node_name = dp->ids.node_name;
+	rport->roles = dp->ids.roles;
+	rport->maxframe_size = FC_MIN_MAX_PAYLOAD;
+	/*
+	 * Note: all this libfc rogue rport code will be removed for
+	 * upstream so it fine that this is really ugly and hacky right now.
+	 */
+	device_initialize(&rport->dev);
+	rport->dev.release = fc_rport_rogue_destroy;
+
+	mutex_init(&rdata->rp_mutex);
+	rdata->local_port = dp->lp;
+	rdata->trans_state = FC_PORTSTATE_ROGUE;
+	rdata->rp_state = RPORT_ST_INIT;
+	rdata->event = RPORT_EV_NONE;
+	rdata->flags = FC_RP_FLAGS_REC_SUPPORTED;
+	rdata->ops = NULL;
+	rdata->e_d_tov = dp->lp->e_d_tov;
+	rdata->r_a_tov = dp->lp->r_a_tov;
+	INIT_DELAYED_WORK(&rdata->retry_work, fc_rport_timeout);
+	INIT_WORK(&rdata->event_work, fc_rport_work);
+	/*
+	 * For good measure, but not necessary as we should only
+	 * add REAL rport to the lport list.
+	 */
+	INIT_LIST_HEAD(&rdata->peers);
+
+	return rport;
+}
+
+/**
+ * fc_rport_state - return a string for the state the rport is in
+ * @rport: The rport whose state we want to get a string for
+ */
+static const char *fc_rport_state(struct fc_rport *rport)
+{
+	const char *cp;
+	struct fc_rport_libfc_priv *rdata = rport->dd_data;
+
+	cp = fc_rport_state_names[rdata->rp_state];
+	if (!cp)
+		cp = "Unknown";
+	return cp;
+}
+
+/**
+ * fc_set_rport_loss_tmo - Set the remote port loss timeout in seconds.
+ * @rport: Pointer to Fibre Channel remote port structure
+ * @timeout: timeout in seconds
+ */
+void fc_set_rport_loss_tmo(struct fc_rport *rport, u32 timeout)
+{
+	if (timeout)
+		rport->dev_loss_tmo = timeout + 5;
+	else
+		rport->dev_loss_tmo = 30;
+}
+EXPORT_SYMBOL(fc_set_rport_loss_tmo);
+
+/**
+ * fc_plogi_get_maxframe - Get max payload from the common service parameters
+ * @flp: FLOGI payload structure
+ * @maxval: upper limit, may be less than what is in the service parameters
+ */
+static unsigned int
+fc_plogi_get_maxframe(struct fc_els_flogi *flp, unsigned int maxval)
+{
+	unsigned int mfs;
+
+	/*
+	 * Get max payload from the common service parameters and the
+	 * class 3 receive data field size.
+	 */
+	mfs = ntohs(flp->fl_csp.sp_bb_data) & FC_SP_BB_DATA_MASK;
+	if (mfs >= FC_SP_MIN_MAX_PAYLOAD && mfs < maxval)
+		maxval = mfs;
+	mfs = ntohs(flp->fl_cssp[3 - 1].cp_rdfs);
+	if (mfs >= FC_SP_MIN_MAX_PAYLOAD && mfs < maxval)
+		maxval = mfs;
+	return maxval;
+}
+
+/**
+ * fc_rport_state_enter - Change the rport's state
+ * @rport: The rport whose state should change
+ * @new: The new state of the rport
+ *
+ * Locking Note: Called with the rport lock held
+ */
+static void fc_rport_state_enter(struct fc_rport *rport,
+				 enum fc_rport_state new)
+{
+	struct fc_rport_libfc_priv *rdata = rport->dd_data;
+	if (rdata->rp_state != new)
+		rdata->retries = 0;
+	rdata->rp_state = new;
+}
+
+static void fc_rport_work(struct work_struct *work)
+{
+	struct fc_rport_libfc_priv *rdata =
+		container_of(work, struct fc_rport_libfc_priv, event_work);
+	enum fc_rport_event event;
+	enum fc_rport_trans_state trans_state;
+	struct fc_lport *lport = rdata->local_port;
+	struct fc_rport_operations *rport_ops;
+	struct fc_rport *rport = PRIV_TO_RPORT(rdata);
+
+	mutex_lock(&rdata->rp_mutex);
+	event = rdata->event;
+	rport_ops = rdata->ops;
+
+	if (event == RPORT_EV_CREATED) {
+		struct fc_rport *new_rport;
+		struct fc_rport_libfc_priv *new_rdata;
+		struct fc_rport_identifiers ids;
+
+		ids.port_id = rport->port_id;
+		ids.roles = rport->roles;
+		ids.port_name = rport->port_name;
+		ids.node_name = rport->node_name;
+
+		mutex_unlock(&rdata->rp_mutex);
+
+		new_rport = fc_remote_port_add(lport->host, 0, &ids);
+		if (new_rport) {
+			/*
+			 * Switch from the rogue rport to the rport
+			 * returned by the FC class.
+			 */
+			new_rport->maxframe_size = rport->maxframe_size;
+
+			new_rdata = new_rport->dd_data;
+			new_rdata->e_d_tov = rdata->e_d_tov;
+			new_rdata->r_a_tov = rdata->r_a_tov;
+			new_rdata->ops = rdata->ops;
+			new_rdata->local_port = rdata->local_port;
+			new_rdata->flags = FC_RP_FLAGS_REC_SUPPORTED;
+			new_rdata->trans_state = FC_PORTSTATE_REAL;
+			mutex_init(&new_rdata->rp_mutex);
+			INIT_DELAYED_WORK(&new_rdata->retry_work,
+					  fc_rport_timeout);
+			INIT_LIST_HEAD(&new_rdata->peers);
+			INIT_WORK(&new_rdata->event_work, fc_rport_work);
+
+			fc_rport_state_enter(new_rport, RPORT_ST_READY);
+		} else {
+			FC_DBG("Failed to create the rport for port "
+			       "(%6x).\n", ids.port_id);
+			event = RPORT_EV_FAILED;
+		}
+		put_device(&rport->dev);
+		rport = new_rport;
+		rdata = new_rport->dd_data;
+		if (rport_ops->event_callback)
+			rport_ops->event_callback(lport, rport, event);
+	} else if ((event == RPORT_EV_FAILED) ||
+		   (event == RPORT_EV_LOGO) ||
+		   (event == RPORT_EV_STOP)) {
+		trans_state = rdata->trans_state;
+		mutex_unlock(&rdata->rp_mutex);
+		if (rport_ops->event_callback)
+			rport_ops->event_callback(lport, rport, event);
+		if (trans_state == FC_PORTSTATE_ROGUE)
+			put_device(&rport->dev);
+		else
+			fc_remote_port_delete(rport);
+	} else
+		mutex_unlock(&rdata->rp_mutex);
+}
+
+/**
+ * fc_rport_login - Start the remote port login state machine
+ * @rport: Fibre Channel remote port
+ *
+ * Locking Note: Called without the rport lock held. This
+ * function will hold the rport lock, call an _enter_*
+ * function and then unlock the rport.
+ */
+int fc_rport_login(struct fc_rport *rport)
+{
+	struct fc_rport_libfc_priv *rdata = rport->dd_data;
+
+	mutex_lock(&rdata->rp_mutex);
+
+	FC_DEBUG_RPORT("Login to port (%6x)\n", rport->port_id);
+
+	fc_rport_enter_plogi(rport);
+
+	mutex_unlock(&rdata->rp_mutex);
+
+	return 0;
+}
+
+/**
+ * fc_rport_logoff - Logoff and remove an rport
+ * @rport: Fibre Channel remote port to be removed
+ *
+ * Locking Note: Called without the rport lock held. This
+ * function will hold the rport lock, call an _enter_*
+ * function and then unlock the rport.
+ */
+int fc_rport_logoff(struct fc_rport *rport)
+{
+	struct fc_rport_libfc_priv *rdata = rport->dd_data;
+
+	mutex_lock(&rdata->rp_mutex);
+
+	FC_DEBUG_RPORT("Remove port (%6x)\n", rport->port_id);
+
+	fc_rport_enter_logo(rport);
+
+	/*
+	 * Change the state to NONE so that we discard
+	 * the response.
+	 */
+	fc_rport_state_enter(rport, RPORT_ST_NONE);
+
+	mutex_unlock(&rdata->rp_mutex);
+
+	cancel_delayed_work_sync(&rdata->retry_work);
+
+	mutex_lock(&rdata->rp_mutex);
+
+	rdata->event = RPORT_EV_STOP;
+	queue_work(rport_event_queue, &rdata->event_work);
+
+	mutex_unlock(&rdata->rp_mutex);
+
+	return 0;
+}
+
+/**
+ * fc_rport_enter_ready - The rport is ready
+ * @rport: Fibre Channel remote port that is ready
+ *
+ * Locking Note: The rport lock is expected to be held before calling
+ * this routine.
+ */
+static void fc_rport_enter_ready(struct fc_rport *rport)
+{
+	struct fc_rport_libfc_priv *rdata = rport->dd_data;
+
+	fc_rport_state_enter(rport, RPORT_ST_READY);
+
+	FC_DEBUG_RPORT("Port (%6x) is Ready\n", rport->port_id);
+
+	rdata->event = RPORT_EV_CREATED;
+	queue_work(rport_event_queue, &rdata->event_work);
+}
+
+/**
+ * fc_rport_timeout - Handler for the retry_work timer.
+ * @work: The work struct of the fc_rport_libfc_priv
+ *
+ * Locking Note: Called without the rport lock held. This
+ * function will hold the rport lock, call an _enter_*
+ * function and then unlock the rport.
+ */
+static void fc_rport_timeout(struct work_struct *work)
+{
+	struct fc_rport_libfc_priv *rdata =
+		container_of(work, struct fc_rport_libfc_priv, retry_work.work);
+	struct fc_rport *rport = PRIV_TO_RPORT(rdata);
+
+	mutex_lock(&rdata->rp_mutex);
+
+	switch (rdata->rp_state) {
+	case RPORT_ST_PLOGI:
+		fc_rport_enter_plogi(rport);
+		break;
+	case RPORT_ST_PRLI:
+		fc_rport_enter_prli(rport);
+		break;
+	case RPORT_ST_RTV:
+		fc_rport_enter_rtv(rport);
+		break;
+	case RPORT_ST_LOGO:
+		fc_rport_enter_logo(rport);
+		break;
+	case RPORT_ST_READY:
+	case RPORT_ST_INIT:
+	case RPORT_ST_NONE:
+		break;
+	}
+
+	mutex_unlock(&rdata->rp_mutex);
+	put_device(&rport->dev);
+}
+
+/**
+ * fc_rport_error - Handler for any errors
+ * @rport: The fc_rport object
+ * @fp: The frame pointer
+ *
+ * If the error was caused by a resource allocation failure
+ * then wait for half a second and retry, otherwise retry
+ * immediately.
+ *
+ * Locking Note: The rport lock is expected to be held before
+ * calling this routine
+ */
+static void fc_rport_error(struct fc_rport *rport, struct fc_frame *fp)
+{
+	struct fc_rport_libfc_priv *rdata = rport->dd_data;
+	unsigned long delay = 0;
+
+	FC_DEBUG_RPORT("Error %ld in state %s, retries %d\n",
+		       PTR_ERR(fp), fc_rport_state(rport), rdata->retries);
+
+	if (!fp || PTR_ERR(fp) == -FC_EX_TIMEOUT) {
+		/*
+		 * Memory allocation failure, or the exchange timed out.
+		 *  Retry after delay
+		 */
+		if (rdata->retries < rdata->local_port->max_retry_count) {
+			rdata->retries++;
+			if (!fp)
+				delay = msecs_to_jiffies(500);
+			get_device(&rport->dev);
+			schedule_delayed_work(&rdata->retry_work, delay);
+		} else {
+			switch (rdata->rp_state) {
+			case RPORT_ST_PLOGI:
+			case RPORT_ST_PRLI:
+			case RPORT_ST_LOGO:
+				rdata->event = RPORT_EV_FAILED;
+				queue_work(rport_event_queue,
+					   &rdata->event_work);
+				break;
+			case RPORT_ST_RTV:
+				fc_rport_enter_ready(rport);
+				break;
+			case RPORT_ST_NONE:
+			case RPORT_ST_READY:
+			case RPORT_ST_INIT:
+				break;
+			}
+		}
+	}
+}
+
+/**
+ * fc_rport_plogi_recv_resp - Handle incoming ELS PLOGI response
+ * @sp: current sequence in the PLOGI exchange
+ * @fp: response frame
+ * @rp_arg: Fibre Channel remote port
+ *
+ * Locking Note: This function will be called without the rport lock
+ * held, but it will lock, call an _enter_* function or fc_rport_error
+ * and then unlock the rport.
+ */
+static void fc_rport_plogi_resp(struct fc_seq *sp, struct fc_frame *fp,
+				void *rp_arg)
+{
+	struct fc_rport *rport = rp_arg;
+	struct fc_rport_libfc_priv *rdata = rport->dd_data;
+	struct fc_lport *lport = rdata->local_port;
+	struct fc_els_flogi *plp;
+	unsigned int tov;
+	u16 csp_seq;
+	u16 cssp_seq;
+	u8 op;
+
+	mutex_lock(&rdata->rp_mutex);
+
+	FC_DEBUG_RPORT("Received a PLOGI response from port (%6x)\n",
+		       rport->port_id);
+
+	if (rdata->rp_state != RPORT_ST_PLOGI) {
+		FC_DBG("Received a PLOGI response, but in state %s\n",
+		       fc_rport_state(rport));
+		goto out;
+	}
+
+	if (IS_ERR(fp)) {
+		fc_rport_error(rport, fp);
+		goto err;
+	}
+
+	op = fc_frame_payload_op(fp);
+	if (op == ELS_LS_ACC &&
+	    (plp = fc_frame_payload_get(fp, sizeof(*plp))) != NULL) {
+		rport->port_name = get_unaligned_be64(&plp->fl_wwpn);
+		rport->node_name = get_unaligned_be64(&plp->fl_wwnn);
+
+		tov = ntohl(plp->fl_csp.sp_e_d_tov);
+		if (ntohs(plp->fl_csp.sp_features) & FC_SP_FT_EDTR)
+			tov /= 1000;
+		if (tov > rdata->e_d_tov)
+			rdata->e_d_tov = tov;
+		csp_seq = ntohs(plp->fl_csp.sp_tot_seq);
+		cssp_seq = ntohs(plp->fl_cssp[3 - 1].cp_con_seq);
+		if (cssp_seq < csp_seq)
+			csp_seq = cssp_seq;
+		rdata->max_seq = csp_seq;
+		rport->maxframe_size =
+			fc_plogi_get_maxframe(plp, lport->mfs);
+
+		/*
+		 * If the rport is one of the well known addresses
+		 * we skip PRLI and RTV and go straight to READY.
+		 */
+		if (rport->port_id >= FC_FID_DOM_MGR)
+			fc_rport_enter_ready(rport);
+		else
+			fc_rport_enter_prli(rport);
+	} else
+		fc_rport_error(rport, fp);
+
+out:
+	fc_frame_free(fp);
+err:
+	mutex_unlock(&rdata->rp_mutex);
+	put_device(&rport->dev);
+}
+
+/**
+ * fc_rport_enter_plogi - Send Port Login (PLOGI) request to peer
+ * @rport: Fibre Channel remote port to send PLOGI to
+ *
+ * Locking Note: The rport lock is expected to be held before calling
+ * this routine.
+ */
+static void fc_rport_enter_plogi(struct fc_rport *rport)
+{
+	struct fc_rport_libfc_priv *rdata = rport->dd_data;
+	struct fc_lport *lport = rdata->local_port;
+	struct fc_frame *fp;
+
+	FC_DEBUG_RPORT("Port (%6x) entered PLOGI state from %s state\n",
+		       rport->port_id, fc_rport_state(rport));
+
+	fc_rport_state_enter(rport, RPORT_ST_PLOGI);
+
+	rport->maxframe_size = FC_MIN_MAX_PAYLOAD;
+	fp = fc_frame_alloc(lport, sizeof(struct fc_els_flogi));
+	if (!fp) {
+		fc_rport_error(rport, fp);
+		return;
+	}
+	rdata->e_d_tov = lport->e_d_tov;
+
+	if (!lport->tt.elsct_send(lport, rport, fp, ELS_PLOGI,
+				  fc_rport_plogi_resp, rport, lport->e_d_tov))
+		fc_rport_error(rport, fp);
+	else
+		get_device(&rport->dev);
+}
+
+/**
+ * fc_rport_prli_resp - Process Login (PRLI) response handler
+ * @sp: current sequence in the PRLI exchange
+ * @fp: response frame
+ * @rp_arg: Fibre Channel remote port
+ *
+ * Locking Note: This function will be called without the rport lock
+ * held, but it will lock, call an _enter_* function or fc_rport_error
+ * and then unlock the rport.
+ */
+static void fc_rport_prli_resp(struct fc_seq *sp, struct fc_frame *fp,
+			       void *rp_arg)
+{
+	struct fc_rport *rport = rp_arg;
+	struct fc_rport_libfc_priv *rdata = rport->dd_data;
+	struct {
+		struct fc_els_prli prli;
+		struct fc_els_spp spp;
+	} *pp;
+	u32 roles = FC_RPORT_ROLE_UNKNOWN;
+	u32 fcp_parm = 0;
+	u8 op;
+
+	mutex_lock(&rdata->rp_mutex);
+
+	FC_DEBUG_RPORT("Received a PRLI response from port (%6x)\n",
+		       rport->port_id);
+
+	if (rdata->rp_state != RPORT_ST_PRLI) {
+		FC_DBG("Received a PRLI response, but in state %s\n",
+		       fc_rport_state(rport));
+		goto out;
+	}
+
+	if (IS_ERR(fp)) {
+		fc_rport_error(rport, fp);
+		goto err;
+	}
+
+	op = fc_frame_payload_op(fp);
+	if (op == ELS_LS_ACC) {
+		pp = fc_frame_payload_get(fp, sizeof(*pp));
+		if (pp && pp->prli.prli_spp_len >= sizeof(pp->spp)) {
+			fcp_parm = ntohl(pp->spp.spp_params);
+			if (fcp_parm & FCP_SPPF_RETRY)
+				rdata->flags |= FC_RP_FLAGS_RETRY;
+		}
+
+		rport->supported_classes = FC_COS_CLASS3;
+		if (fcp_parm & FCP_SPPF_INIT_FCN)
+			roles |= FC_RPORT_ROLE_FCP_INITIATOR;
+		if (fcp_parm & FCP_SPPF_TARG_FCN)
+			roles |= FC_RPORT_ROLE_FCP_TARGET;
+
+		rport->roles = roles;
+		fc_rport_enter_rtv(rport);
+
+	} else {
+		FC_DBG("Bad ELS response\n");
+		rdata->event = RPORT_EV_FAILED;
+		queue_work(rport_event_queue, &rdata->event_work);
+	}
+
+out:
+	fc_frame_free(fp);
+err:
+	mutex_unlock(&rdata->rp_mutex);
+	put_device(&rport->dev);
+}
+
+/**
+ * fc_rport_logo_resp - Logout (LOGO) response handler
+ * @sp: current sequence in the LOGO exchange
+ * @fp: response frame
+ * @rp_arg: Fibre Channel remote port
+ *
+ * Locking Note: This function will be called without the rport lock
+ * held, but it will lock, call an _enter_* function or fc_rport_error
+ * and then unlock the rport.
+ */
+static void fc_rport_logo_resp(struct fc_seq *sp, struct fc_frame *fp,
+			       void *rp_arg)
+{
+	struct fc_rport *rport = rp_arg;
+	struct fc_rport_libfc_priv *rdata = rport->dd_data;
+	u8 op;
+
+	mutex_lock(&rdata->rp_mutex);
+
+	FC_DEBUG_RPORT("Received a LOGO response from port (%6x)\n",
+		       rport->port_id);
+
+	if (IS_ERR(fp)) {
+		fc_rport_error(rport, fp);
+		goto err;
+	}
+
+	if (rdata->rp_state != RPORT_ST_LOGO) {
+		FC_DEBUG_RPORT("Received a LOGO response, but in state %s\n",
+			       fc_rport_state(rport));
+		goto out;
+	}
+
+	op = fc_frame_payload_op(fp);
+	if (op == ELS_LS_ACC) {
+		fc_rport_enter_rtv(rport);
+	} else {
+		FC_DBG("Bad ELS response\n");
+		rdata->event = RPORT_EV_LOGO;
+		queue_work(rport_event_queue, &rdata->event_work);
+	}
+
+out:
+	fc_frame_free(fp);
+err:
+	mutex_unlock(&rdata->rp_mutex);
+	put_device(&rport->dev);
+}
+
+/**
+ * fc_rport_enter_prli - Send Process Login (PRLI) request to peer
+ * @rport: Fibre Channel remote port to send PRLI to
+ *
+ * Locking Note: The rport lock is expected to be held before calling
+ * this routine.
+ */
+static void fc_rport_enter_prli(struct fc_rport *rport)
+{
+	struct fc_rport_libfc_priv *rdata = rport->dd_data;
+	struct fc_lport *lport = rdata->local_port;
+	struct {
+		struct fc_els_prli prli;
+		struct fc_els_spp spp;
+	} *pp;
+	struct fc_frame *fp;
+
+	FC_DEBUG_RPORT("Port (%6x) entered PRLI state from %s state\n",
+		       rport->port_id, fc_rport_state(rport));
+
+	fc_rport_state_enter(rport, RPORT_ST_PRLI);
+
+	fp = fc_frame_alloc(lport, sizeof(*pp));
+	if (!fp) {
+		fc_rport_error(rport, fp);
+		return;
+	}
+
+	if (!lport->tt.elsct_send(lport, rport, fp, ELS_PRLI,
+				  fc_rport_prli_resp, rport, lport->e_d_tov))
+		fc_rport_error(rport, fp);
+	else
+		get_device(&rport->dev);
+}
+
+/**
+ * fc_rport_els_rtv_resp - Request Timeout Value response handler
+ * @sp: current sequence in the RTV exchange
+ * @fp: response frame
+ * @rp_arg: Fibre Channel remote port
+ *
+ * Many targets don't seem to support this.
+ *
+ * Locking Note: This function will be called without the rport lock
+ * held, but it will lock, call an _enter_* function or fc_rport_error
+ * and then unlock the rport.
+ */
+static void fc_rport_rtv_resp(struct fc_seq *sp, struct fc_frame *fp,
+			      void *rp_arg)
+{
+	struct fc_rport *rport = rp_arg;
+	struct fc_rport_libfc_priv *rdata = rport->dd_data;
+	u8 op;
+
+	mutex_lock(&rdata->rp_mutex);
+
+	FC_DEBUG_RPORT("Received a RTV response from port (%6x)\n",
+		       rport->port_id);
+
+	if (rdata->rp_state != RPORT_ST_RTV) {
+		FC_DBG("Received a RTV response, but in state %s\n",
+		       fc_rport_state(rport));
+		goto out;
+	}
+
+	if (IS_ERR(fp)) {
+		fc_rport_error(rport, fp);
+		goto err;
+	}
+
+	op = fc_frame_payload_op(fp);
+	if (op == ELS_LS_ACC) {
+		struct fc_els_rtv_acc *rtv;
+		u32 toq;
+		u32 tov;
+
+		rtv = fc_frame_payload_get(fp, sizeof(*rtv));
+		if (rtv) {
+			toq = ntohl(rtv->rtv_toq);
+			tov = ntohl(rtv->rtv_r_a_tov);
+			if (tov == 0)
+				tov = 1;
+			rdata->r_a_tov = tov;
+			tov = ntohl(rtv->rtv_e_d_tov);
+			if (toq & FC_ELS_RTV_EDRES)
+				tov /= 1000000;
+			if (tov == 0)
+				tov = 1;
+			rdata->e_d_tov = tov;
+		}
+	}
+
+	fc_rport_enter_ready(rport);
+
+out:
+	fc_frame_free(fp);
+err:
+	mutex_unlock(&rdata->rp_mutex);
+	put_device(&rport->dev);
+}
+
+/**
+ * fc_rport_enter_rtv - Send Request Timeout Value (RTV) request to peer
+ * @rport: Fibre Channel remote port to send RTV to
+ *
+ * Locking Note: The rport lock is expected to be held before calling
+ * this routine.
+ */
+static void fc_rport_enter_rtv(struct fc_rport *rport)
+{
+	struct fc_frame *fp;
+	struct fc_rport_libfc_priv *rdata = rport->dd_data;
+	struct fc_lport *lport = rdata->local_port;
+
+	FC_DEBUG_RPORT("Port (%6x) entered RTV state from %s state\n",
+		       rport->port_id, fc_rport_state(rport));
+
+	fc_rport_state_enter(rport, RPORT_ST_RTV);
+
+	fp = fc_frame_alloc(lport, sizeof(struct fc_els_rtv));
+	if (!fp) {
+		fc_rport_error(rport, fp);
+		return;
+	}
+
+	if (!lport->tt.elsct_send(lport, rport, fp, ELS_RTV,
+				     fc_rport_rtv_resp, rport, lport->e_d_tov))
+		fc_rport_error(rport, fp);
+	else
+		get_device(&rport->dev);
+}
+
+/**
+ * fc_rport_enter_logo - Send Logout (LOGO) request to peer
+ * @rport: Fibre Channel remote port to send LOGO to
+ *
+ * Locking Note: The rport lock is expected to be held before calling
+ * this routine.
+ */
+static void fc_rport_enter_logo(struct fc_rport *rport)
+{
+	struct fc_rport_libfc_priv *rdata = rport->dd_data;
+	struct fc_lport *lport = rdata->local_port;
+	struct fc_frame *fp;
+
+	FC_DEBUG_RPORT("Port (%6x) entered LOGO state from %s state\n",
+		       rport->port_id, fc_rport_state(rport));
+
+	fc_rport_state_enter(rport, RPORT_ST_LOGO);
+
+	fp = fc_frame_alloc(lport, sizeof(struct fc_els_logo));
+	if (!fp) {
+		fc_rport_error(rport, fp);
+		return;
+	}
+
+	if (!lport->tt.elsct_send(lport, rport, fp, ELS_LOGO,
+				  fc_rport_logo_resp, rport, lport->e_d_tov))
+		fc_rport_error(rport, fp);
+	else
+		get_device(&rport->dev);
+}
+
+
+/**
+ * fc_rport_recv_req - Receive a request from a rport
+ * @sp: current sequence in the PLOGI exchange
+ * @fp: response frame
+ * @rp_arg: Fibre Channel remote port
+ *
+ * Locking Note: Called without the rport lock held. This
+ * function will hold the rport lock, call an _enter_*
+ * function and then unlock the rport.
+ */
+void fc_rport_recv_req(struct fc_seq *sp, struct fc_frame *fp,
+		       struct fc_rport *rport)
+{
+	struct fc_rport_libfc_priv *rdata = rport->dd_data;
+	struct fc_lport *lport = rdata->local_port;
+
+	struct fc_frame_header *fh;
+	struct fc_seq_els_data els_data;
+	u8 op;
+
+	mutex_lock(&rdata->rp_mutex);
+
+	els_data.fp = NULL;
+	els_data.explan = ELS_EXPL_NONE;
+	els_data.reason = ELS_RJT_NONE;
+
+	fh = fc_frame_header_get(fp);
+
+	if (fh->fh_r_ctl == FC_RCTL_ELS_REQ && fh->fh_type == FC_TYPE_ELS) {
+		op = fc_frame_payload_op(fp);
+		switch (op) {
+		case ELS_PLOGI:
+			fc_rport_recv_plogi_req(rport, sp, fp);
+			break;
+		case ELS_PRLI:
+			fc_rport_recv_prli_req(rport, sp, fp);
+			break;
+		case ELS_PRLO:
+			fc_rport_recv_prlo_req(rport, sp, fp);
+			break;
+		case ELS_LOGO:
+			fc_rport_recv_logo_req(rport, sp, fp);
+			break;
+		case ELS_RRQ:
+			els_data.fp = fp;
+			lport->tt.seq_els_rsp_send(sp, ELS_RRQ, &els_data);
+			break;
+		case ELS_REC:
+			els_data.fp = fp;
+			lport->tt.seq_els_rsp_send(sp, ELS_REC, &els_data);
+			break;
+		default:
+			els_data.reason = ELS_RJT_UNSUP;
+			lport->tt.seq_els_rsp_send(sp, ELS_LS_RJT, &els_data);
+			break;
+		}
+	}
+
+	mutex_unlock(&rdata->rp_mutex);
+}
+
+/**
+ * fc_rport_recv_plogi_req - Handle incoming Port Login (PLOGI) request
+ * @rport: Fibre Channel remote port that initiated PLOGI
+ * @sp: current sequence in the PLOGI exchange
+ * @fp: PLOGI request frame
+ *
+ * Locking Note: The rport lock is exected to be held before calling
+ * this function.
+ */
+static void fc_rport_recv_plogi_req(struct fc_rport *rport,
+				    struct fc_seq *sp, struct fc_frame *rx_fp)
+{
+	struct fc_rport_libfc_priv *rdata = rport->dd_data;
+	struct fc_lport *lport = rdata->local_port;
+	struct fc_frame *fp = rx_fp;
+	struct fc_exch *ep;
+	struct fc_frame_header *fh;
+	struct fc_els_flogi *pl;
+	struct fc_seq_els_data rjt_data;
+	u32 sid;
+	u64 wwpn;
+	u64 wwnn;
+	enum fc_els_rjt_reason reject = 0;
+	u32 f_ctl;
+	rjt_data.fp = NULL;
+
+	fh = fc_frame_header_get(fp);
+
+	FC_DEBUG_RPORT("Received PLOGI request from port (%6x) "
+		       "while in state %s\n", ntoh24(fh->fh_s_id),
+		       fc_rport_state(rport));
+
+	sid = ntoh24(fh->fh_s_id);
+	pl = fc_frame_payload_get(fp, sizeof(*pl));
+	if (!pl) {
+		FC_DBG("incoming PLOGI from %x too short\n", sid);
+		WARN_ON(1);
+		/* XXX TBD: send reject? */
+		fc_frame_free(fp);
+		return;
+	}
+	wwpn = get_unaligned_be64(&pl->fl_wwpn);
+	wwnn = get_unaligned_be64(&pl->fl_wwnn);
+
+	/*
+	 * If the session was just created, possibly due to the incoming PLOGI,
+	 * set the state appropriately and accept the PLOGI.
+	 *
+	 * If we had also sent a PLOGI, and if the received PLOGI is from a
+	 * higher WWPN, we accept it, otherwise an LS_RJT is sent with reason
+	 * "command already in progress".
+	 *
+	 * XXX TBD: If the session was ready before, the PLOGI should result in
+	 * all outstanding exchanges being reset.
+	 */
+	switch (rdata->rp_state) {
+	case RPORT_ST_INIT:
+		FC_DEBUG_RPORT("incoming PLOGI from %6x wwpn %llx state INIT "
+			       "- reject\n", sid, wwpn);
+		reject = ELS_RJT_UNSUP;
+		break;
+	case RPORT_ST_PLOGI:
+		FC_DEBUG_RPORT("incoming PLOGI from %x in PLOGI state %d\n",
+			       sid, rdata->rp_state);
+		if (wwpn < lport->wwpn)
+			reject = ELS_RJT_INPROG;
+		break;
+	case RPORT_ST_PRLI:
+	case RPORT_ST_READY:
+		FC_DEBUG_RPORT("incoming PLOGI from %x in logged-in state %d "
+			       "- ignored for now\n", sid, rdata->rp_state);
+		/* XXX TBD - should reset */
+		break;
+	case RPORT_ST_NONE:
+	default:
+		FC_DEBUG_RPORT("incoming PLOGI from %x in unexpected "
+			       "state %d\n", sid, rdata->rp_state);
+		break;
+	}
+
+	if (reject) {
+		rjt_data.reason = reject;
+		rjt_data.explan = ELS_EXPL_NONE;
+		lport->tt.seq_els_rsp_send(sp, ELS_LS_RJT, &rjt_data);
+		fc_frame_free(fp);
+	} else {
+		fp = fc_frame_alloc(lport, sizeof(*pl));
+		if (fp == NULL) {
+			fp = rx_fp;
+			rjt_data.reason = ELS_RJT_UNAB;
+			rjt_data.explan = ELS_EXPL_NONE;
+			lport->tt.seq_els_rsp_send(sp, ELS_LS_RJT, &rjt_data);
+			fc_frame_free(fp);
+		} else {
+			sp = lport->tt.seq_start_next(sp);
+			WARN_ON(!sp);
+			fc_rport_set_name(rport, wwpn, wwnn);
+
+			/*
+			 * Get session payload size from incoming PLOGI.
+			 */
+			rport->maxframe_size =
+				fc_plogi_get_maxframe(pl, lport->mfs);
+			fc_frame_free(rx_fp);
+			fc_plogi_fill(lport, fp, ELS_LS_ACC);
+
+			/*
+			 * Send LS_ACC.	 If this fails,
+			 * the originator should retry.
+			 */
+			f_ctl = FC_FC_EX_CTX | FC_FC_LAST_SEQ;
+			f_ctl |= FC_FC_END_SEQ | FC_FC_SEQ_INIT;
+			ep = fc_seq_exch(sp);
+			fc_fill_fc_hdr(fp, FC_RCTL_ELS_REP, ep->did, ep->sid,
+				       FC_TYPE_ELS, f_ctl, 0);
+			lport->tt.seq_send(lport, sp, fp);
+			if (rdata->rp_state == RPORT_ST_PLOGI)
+				fc_rport_enter_prli(rport);
+		}
+	}
+}
+
+/**
+ * fc_rport_recv_prli_req - Handle incoming Process Login (PRLI) request
+ * @rport: Fibre Channel remote port that initiated PRLI
+ * @sp: current sequence in the PRLI exchange
+ * @fp: PRLI request frame
+ *
+ * Locking Note: The rport lock is exected to be held before calling
+ * this function.
+ */
+static void fc_rport_recv_prli_req(struct fc_rport *rport,
+				   struct fc_seq *sp, struct fc_frame *rx_fp)
+{
+	struct fc_rport_libfc_priv *rdata = rport->dd_data;
+	struct fc_lport *lport = rdata->local_port;
+	struct fc_exch *ep;
+	struct fc_frame *fp;
+	struct fc_frame_header *fh;
+	struct {
+		struct fc_els_prli prli;
+		struct fc_els_spp spp;
+	} *pp;
+	struct fc_els_spp *rspp;	/* request service param page */
+	struct fc_els_spp *spp;	/* response spp */
+	unsigned int len;
+	unsigned int plen;
+	enum fc_els_rjt_reason reason = ELS_RJT_UNAB;
+	enum fc_els_rjt_explan explan = ELS_EXPL_NONE;
+	enum fc_els_spp_resp resp;
+	struct fc_seq_els_data rjt_data;
+	u32 f_ctl;
+	u32 fcp_parm;
+	u32 roles = FC_RPORT_ROLE_UNKNOWN;
+	rjt_data.fp = NULL;
+
+	fh = fc_frame_header_get(rx_fp);
+
+	FC_DEBUG_RPORT("Received PRLI request from port (%6x) "
+		       "while in state %s\n", ntoh24(fh->fh_s_id),
+		       fc_rport_state(rport));
+
+	switch (rdata->rp_state) {
+	case RPORT_ST_PRLI:
+	case RPORT_ST_READY:
+		reason = ELS_RJT_NONE;
+		break;
+	default:
+		break;
+	}
+	len = fr_len(rx_fp) - sizeof(*fh);
+	pp = fc_frame_payload_get(rx_fp, sizeof(*pp));
+	if (pp == NULL) {
+		reason = ELS_RJT_PROT;
+		explan = ELS_EXPL_INV_LEN;
+	} else {
+		plen = ntohs(pp->prli.prli_len);
+		if ((plen % 4) != 0 || plen > len) {
+			reason = ELS_RJT_PROT;
+			explan = ELS_EXPL_INV_LEN;
+		} else if (plen < len) {
+			len = plen;
+		}
+		plen = pp->prli.prli_spp_len;
+		if ((plen % 4) != 0 || plen < sizeof(*spp) ||
+		    plen > len || len < sizeof(*pp)) {
+			reason = ELS_RJT_PROT;
+			explan = ELS_EXPL_INV_LEN;
+		}
+		rspp = &pp->spp;
+	}
+	if (reason != ELS_RJT_NONE ||
+	    (fp = fc_frame_alloc(lport, len)) == NULL) {
+		rjt_data.reason = reason;
+		rjt_data.explan = explan;
+		lport->tt.seq_els_rsp_send(sp, ELS_LS_RJT, &rjt_data);
+	} else {
+		sp = lport->tt.seq_start_next(sp);
+		WARN_ON(!sp);
+		pp = fc_frame_payload_get(fp, len);
+		WARN_ON(!pp);
+		memset(pp, 0, len);
+		pp->prli.prli_cmd = ELS_LS_ACC;
+		pp->prli.prli_spp_len = plen;
+		pp->prli.prli_len = htons(len);
+		len -= sizeof(struct fc_els_prli);
+
+		/*
+		 * Go through all the service parameter pages and build
+		 * response.  If plen indicates longer SPP than standard,
+		 * use that.  The entire response has been pre-cleared above.
+		 */
+		spp = &pp->spp;
+		while (len >= plen) {
+			spp->spp_type = rspp->spp_type;
+			spp->spp_type_ext = rspp->spp_type_ext;
+			spp->spp_flags = rspp->spp_flags & FC_SPP_EST_IMG_PAIR;
+			resp = FC_SPP_RESP_ACK;
+			if (rspp->spp_flags & FC_SPP_RPA_VAL)
+				resp = FC_SPP_RESP_NO_PA;
+			switch (rspp->spp_type) {
+			case 0:	/* common to all FC-4 types */
+				break;
+			case FC_TYPE_FCP:
+				fcp_parm = ntohl(rspp->spp_params);
+				if (fcp_parm * FCP_SPPF_RETRY)
+					rdata->flags |= FC_RP_FLAGS_RETRY;
+				rport->supported_classes = FC_COS_CLASS3;
+				if (fcp_parm & FCP_SPPF_INIT_FCN)
+					roles |= FC_RPORT_ROLE_FCP_INITIATOR;
+				if (fcp_parm & FCP_SPPF_TARG_FCN)
+					roles |= FC_RPORT_ROLE_FCP_TARGET;
+				rport->roles = roles;
+
+				spp->spp_params =
+					htonl(lport->service_params);
+				break;
+			default:
+				resp = FC_SPP_RESP_INVL;
+				break;
+			}
+			spp->spp_flags |= resp;
+			len -= plen;
+			rspp = (struct fc_els_spp *)((char *)rspp + plen);
+			spp = (struct fc_els_spp *)((char *)spp + plen);
+		}
+
+		/*
+		 * Send LS_ACC.	 If this fails, the originator should retry.
+		 */
+		f_ctl = FC_FC_EX_CTX | FC_FC_LAST_SEQ;
+		f_ctl |= FC_FC_END_SEQ | FC_FC_SEQ_INIT;
+		ep = fc_seq_exch(sp);
+		fc_fill_fc_hdr(fp, FC_RCTL_ELS_REP, ep->did, ep->sid,
+			       FC_TYPE_ELS, f_ctl, 0);
+		lport->tt.seq_send(lport, sp, fp);
+
+		/*
+		 * Get lock and re-check state.
+		 */
+		switch (rdata->rp_state) {
+		case RPORT_ST_PRLI:
+			fc_rport_enter_ready(rport);
+			break;
+		case RPORT_ST_READY:
+			break;
+		default:
+			break;
+		}
+	}
+	fc_frame_free(rx_fp);
+}
+
+/**
+ * fc_rport_recv_prlo_req - Handle incoming Process Logout (PRLO) request
+ * @rport: Fibre Channel remote port that initiated PRLO
+ * @sp: current sequence in the PRLO exchange
+ * @fp: PRLO request frame
+ *
+ * Locking Note: The rport lock is exected to be held before calling
+ * this function.
+ */
+static void fc_rport_recv_prlo_req(struct fc_rport *rport, struct fc_seq *sp,
+				   struct fc_frame *fp)
+{
+	struct fc_rport_libfc_priv *rdata = rport->dd_data;
+	struct fc_lport *lport = rdata->local_port;
+
+	struct fc_frame_header *fh;
+	struct fc_seq_els_data rjt_data;
+
+	fh = fc_frame_header_get(fp);
+
+	FC_DEBUG_RPORT("Received PRLO request from port (%6x) "
+		       "while in state %s\n", ntoh24(fh->fh_s_id),
+		       fc_rport_state(rport));
+
+	rjt_data.fp = NULL;
+	rjt_data.reason = ELS_RJT_UNAB;
+	rjt_data.explan = ELS_EXPL_NONE;
+	lport->tt.seq_els_rsp_send(sp, ELS_LS_RJT, &rjt_data);
+	fc_frame_free(fp);
+}
+
+/**
+ * fc_rport_recv_logo_req - Handle incoming Logout (LOGO) request
+ * @rport: Fibre Channel remote port that initiated LOGO
+ * @sp: current sequence in the LOGO exchange
+ * @fp: LOGO request frame
+ *
+ * Locking Note: The rport lock is exected to be held before calling
+ * this function.
+ */
+static void fc_rport_recv_logo_req(struct fc_rport *rport, struct fc_seq *sp,
+				   struct fc_frame *fp)
+{
+	struct fc_frame_header *fh;
+	struct fc_rport_libfc_priv *rdata = rport->dd_data;
+	struct fc_lport *lport = rdata->local_port;
+
+	fh = fc_frame_header_get(fp);
+
+	FC_DEBUG_RPORT("Received LOGO request from port (%6x) "
+		       "while in state %s\n", ntoh24(fh->fh_s_id),
+		       fc_rport_state(rport));
+
+	rdata->event = RPORT_EV_LOGO;
+	queue_work(rport_event_queue, &rdata->event_work);
+
+	lport->tt.seq_els_rsp_send(sp, ELS_LS_ACC, NULL);
+	fc_frame_free(fp);
+}
+
+static void fc_rport_flush_queue(void)
+{
+	flush_workqueue(rport_event_queue);
+}
+
+
+int fc_rport_init(struct fc_lport *lport)
+{
+	if (!lport->tt.rport_login)
+		lport->tt.rport_login = fc_rport_login;
+
+	if (!lport->tt.rport_logoff)
+		lport->tt.rport_logoff = fc_rport_logoff;
+
+	if (!lport->tt.rport_recv_req)
+		lport->tt.rport_recv_req = fc_rport_recv_req;
+
+	if (!lport->tt.rport_flush_queue)
+		lport->tt.rport_flush_queue = fc_rport_flush_queue;
+
+	return 0;
+}
+EXPORT_SYMBOL(fc_rport_init);
+
+int fc_setup_rport()
+{
+	rport_event_queue = create_singlethread_workqueue("fc_rport_eq");
+	if (!rport_event_queue)
+		return -ENOMEM;
+	return 0;
+}
+EXPORT_SYMBOL(fc_setup_rport);
+
+void fc_destroy_rport()
+{
+	destroy_workqueue(rport_event_queue);
+}
+EXPORT_SYMBOL(fc_destroy_rport);
+
+void fc_rport_terminate_io(struct fc_rport *rport)
+{
+	struct fc_rport_libfc_priv *rdata = rport->dd_data;
+	struct fc_lport *lport = rdata->local_port;
+
+	lport->tt.exch_mgr_reset(lport->emp, 0, rport->port_id);
+	lport->tt.exch_mgr_reset(lport->emp, rport->port_id, 0);
+}
+EXPORT_SYMBOL(fc_rport_terminate_io);
diff --git a/include/scsi/fc_encode.h b/include/scsi/fc_encode.h
new file mode 100644
index 000000000000..6300f556bce5
--- /dev/null
+++ b/include/scsi/fc_encode.h
@@ -0,0 +1,309 @@
+/*
+ * Copyright(c) 2008 Intel Corporation. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Maintained at www.Open-FCoE.org
+ */
+
+#ifndef _FC_ENCODE_H_
+#define _FC_ENCODE_H_
+#include <asm/unaligned.h>
+
+struct fc_ns_rft {
+	struct fc_ns_fid fid;	/* port ID object */
+	struct fc_ns_fts fts;	/* FC4-types object */
+};
+
+struct fc_ct_req {
+	struct fc_ct_hdr hdr;
+	union {
+		struct fc_ns_gid_ft gid;
+		struct fc_ns_rn_id  rn;
+		struct fc_ns_rft rft;
+	} payload;
+};
+
+/**
+ * fill FC header fields in specified fc_frame
+ */
+static inline void fc_fill_fc_hdr(struct fc_frame *fp, enum fc_rctl r_ctl,
+				  u32 did, u32 sid, enum fc_fh_type type,
+				  u32 f_ctl, u32 parm_offset)
+{
+	struct fc_frame_header *fh;
+
+	fh = fc_frame_header_get(fp);
+	WARN_ON(r_ctl == 0);
+	fh->fh_r_ctl = r_ctl;
+	hton24(fh->fh_d_id, did);
+	hton24(fh->fh_s_id, sid);
+	fh->fh_type = type;
+	hton24(fh->fh_f_ctl, f_ctl);
+	fh->fh_cs_ctl = 0;
+	fh->fh_df_ctl = 0;
+	fh->fh_parm_offset = htonl(parm_offset);
+}
+
+/**
+ * fc_ct_hdr_fill- fills ct header and reset ct payload
+ * returns pointer to ct request.
+ */
+static inline struct fc_ct_req *fc_ct_hdr_fill(const struct fc_frame *fp,
+					       unsigned int op, size_t req_size)
+{
+	struct fc_ct_req *ct;
+	size_t ct_plen;
+
+	ct_plen  = sizeof(struct fc_ct_hdr) + req_size;
+	ct = fc_frame_payload_get(fp, ct_plen);
+	memset(ct, 0, ct_plen);
+	ct->hdr.ct_rev = FC_CT_REV;
+	ct->hdr.ct_fs_type = FC_FST_DIR;
+	ct->hdr.ct_fs_subtype = FC_NS_SUBTYPE;
+	ct->hdr.ct_cmd = htons((u16) op);
+	return ct;
+}
+
+/**
+ * fc_ct_fill - Fill in a name service request frame
+ */
+static inline int fc_ct_fill(struct fc_lport *lport, struct fc_frame *fp,
+		      unsigned int op, enum fc_rctl *r_ctl, u32 *did,
+		      enum fc_fh_type *fh_type)
+{
+	struct fc_ct_req *ct;
+
+	switch (op) {
+	case FC_NS_GPN_FT:
+		ct = fc_ct_hdr_fill(fp, op, sizeof(struct fc_ns_gid_ft));
+		ct->payload.gid.fn_fc4_type = FC_TYPE_FCP;
+		break;
+
+	case FC_NS_RFT_ID:
+		ct = fc_ct_hdr_fill(fp, op, sizeof(struct fc_ns_rft));
+		hton24(ct->payload.rft.fid.fp_fid,
+		       fc_host_port_id(lport->host));
+		ct->payload.rft.fts = lport->fcts;
+		break;
+
+	case FC_NS_RPN_ID:
+		ct = fc_ct_hdr_fill(fp, op, sizeof(struct fc_ns_rn_id));
+		hton24(ct->payload.rn.fr_fid.fp_fid,
+		       fc_host_port_id(lport->host));
+		ct->payload.rft.fts = lport->fcts;
+		put_unaligned_be64(lport->wwpn, &ct->payload.rn.fr_wwn);
+		break;
+
+	default:
+		FC_DBG("Invalid op code %x \n", op);
+		return -EINVAL;
+	}
+	*r_ctl = FC_RCTL_DD_UNSOL_CTL;
+	*did = FC_FID_DIR_SERV;
+	*fh_type = FC_TYPE_CT;
+	return 0;
+}
+
+/**
+ * fc_plogi_fill - Fill in plogi request frame
+ */
+static inline void fc_plogi_fill(struct fc_lport *lport, struct fc_frame *fp,
+				 unsigned int op)
+{
+	struct fc_els_flogi *plogi;
+	struct fc_els_csp *csp;
+	struct fc_els_cssp *cp;
+
+	plogi = fc_frame_payload_get(fp, sizeof(*plogi));
+	memset(plogi, 0, sizeof(*plogi));
+	plogi->fl_cmd = (u8) op;
+	put_unaligned_be64(lport->wwpn, &plogi->fl_wwpn);
+	put_unaligned_be64(lport->wwnn, &plogi->fl_wwnn);
+
+	csp = &plogi->fl_csp;
+	csp->sp_hi_ver = 0x20;
+	csp->sp_lo_ver = 0x20;
+	csp->sp_bb_cred = htons(10);	/* this gets set by gateway */
+	csp->sp_bb_data = htons((u16) lport->mfs);
+	cp = &plogi->fl_cssp[3 - 1];	/* class 3 parameters */
+	cp->cp_class = htons(FC_CPC_VALID | FC_CPC_SEQ);
+	csp->sp_features = htons(FC_SP_FT_CIRO);
+	csp->sp_tot_seq = htons(255);	/* seq. we accept */
+	csp->sp_rel_off = htons(0x1f);
+	csp->sp_e_d_tov = htonl(lport->e_d_tov);
+
+	cp->cp_rdfs = htons((u16) lport->mfs);
+	cp->cp_con_seq = htons(255);
+	cp->cp_open_seq = 1;
+}
+
+/**
+ * fc_flogi_fill - Fill in a flogi request frame.
+ */
+static inline void fc_flogi_fill(struct fc_lport *lport, struct fc_frame *fp)
+{
+	struct fc_els_csp *sp;
+	struct fc_els_cssp *cp;
+	struct fc_els_flogi *flogi;
+
+	flogi = fc_frame_payload_get(fp, sizeof(*flogi));
+	memset(flogi, 0, sizeof(*flogi));
+	flogi->fl_cmd = (u8) ELS_FLOGI;
+	put_unaligned_be64(lport->wwpn, &flogi->fl_wwpn);
+	put_unaligned_be64(lport->wwnn, &flogi->fl_wwnn);
+	sp = &flogi->fl_csp;
+	sp->sp_hi_ver = 0x20;
+	sp->sp_lo_ver = 0x20;
+	sp->sp_bb_cred = htons(10);	/* this gets set by gateway */
+	sp->sp_bb_data = htons((u16) lport->mfs);
+	cp = &flogi->fl_cssp[3 - 1];	/* class 3 parameters */
+	cp->cp_class = htons(FC_CPC_VALID | FC_CPC_SEQ);
+}
+
+/**
+ * fc_logo_fill - Fill in a logo request frame.
+ */
+static inline void fc_logo_fill(struct fc_lport *lport, struct fc_frame *fp)
+{
+	struct fc_els_logo *logo;
+
+	logo = fc_frame_payload_get(fp, sizeof(*logo));
+	memset(logo, 0, sizeof(*logo));
+	logo->fl_cmd = ELS_LOGO;
+	hton24(logo->fl_n_port_id, fc_host_port_id(lport->host));
+	logo->fl_n_port_wwn = htonll(lport->wwpn);
+}
+
+/**
+ * fc_rtv_fill - Fill in RTV (read timeout value) request frame.
+ */
+static inline void fc_rtv_fill(struct fc_lport *lport, struct fc_frame *fp)
+{
+	struct fc_els_rtv *rtv;
+
+	rtv = fc_frame_payload_get(fp, sizeof(*rtv));
+	memset(rtv, 0, sizeof(*rtv));
+	rtv->rtv_cmd = ELS_RTV;
+}
+
+/**
+ * fc_rec_fill - Fill in rec request frame
+ */
+static inline void fc_rec_fill(struct fc_lport *lport, struct fc_frame *fp)
+{
+	struct fc_els_rec *rec;
+	struct fc_exch *ep = fc_seq_exch(fr_seq(fp));
+
+	rec = fc_frame_payload_get(fp, sizeof(*rec));
+	memset(rec, 0, sizeof(*rec));
+	rec->rec_cmd = ELS_REC;
+	hton24(rec->rec_s_id, fc_host_port_id(lport->host));
+	rec->rec_ox_id = htons(ep->oxid);
+	rec->rec_rx_id = htons(ep->rxid);
+}
+
+/**
+ * fc_prli_fill - Fill in prli request frame
+ */
+static inline void fc_prli_fill(struct fc_lport *lport, struct fc_frame *fp)
+{
+	struct {
+		struct fc_els_prli prli;
+		struct fc_els_spp spp;
+	} *pp;
+
+	pp = fc_frame_payload_get(fp, sizeof(*pp));
+	memset(pp, 0, sizeof(*pp));
+	pp->prli.prli_cmd = ELS_PRLI;
+	pp->prli.prli_spp_len = sizeof(struct fc_els_spp);
+	pp->prli.prli_len = htons(sizeof(*pp));
+	pp->spp.spp_type = FC_TYPE_FCP;
+	pp->spp.spp_flags = FC_SPP_EST_IMG_PAIR;
+	pp->spp.spp_params = htonl(lport->service_params);
+}
+
+/**
+ * fc_scr_fill - Fill in a scr request frame.
+ */
+static inline void fc_scr_fill(struct fc_lport *lport, struct fc_frame *fp)
+{
+	struct fc_els_scr *scr;
+
+	scr = fc_frame_payload_get(fp, sizeof(*scr));
+	memset(scr, 0, sizeof(*scr));
+	scr->scr_cmd = ELS_SCR;
+	scr->scr_reg_func = ELS_SCRF_FULL;
+}
+
+/**
+ * fc_els_fill - Fill in an ELS  request frame
+ */
+static inline int fc_els_fill(struct fc_lport *lport, struct fc_rport *rport,
+		       struct fc_frame *fp, unsigned int op,
+		       enum fc_rctl *r_ctl, u32 *did, enum fc_fh_type *fh_type)
+{
+	switch (op) {
+	case ELS_PLOGI:
+		fc_plogi_fill(lport, fp, ELS_PLOGI);
+		*did = rport->port_id;
+		break;
+
+	case ELS_FLOGI:
+		fc_flogi_fill(lport, fp);
+		*did = FC_FID_FLOGI;
+		break;
+
+	case ELS_LOGO:
+		fc_logo_fill(lport, fp);
+		*did = FC_FID_FLOGI;
+		/*
+		 * if rport is valid then it
+		 * is port logo, therefore
+		 * set did to rport id.
+		 */
+		if (rport)
+			*did = rport->port_id;
+		break;
+
+	case ELS_RTV:
+		fc_rtv_fill(lport, fp);
+		*did = rport->port_id;
+		break;
+
+	case ELS_REC:
+		fc_rec_fill(lport, fp);
+		*did = rport->port_id;
+		break;
+
+	case ELS_PRLI:
+		fc_prli_fill(lport, fp);
+		*did = rport->port_id;
+		break;
+
+	case ELS_SCR:
+		fc_scr_fill(lport, fp);
+		*did = FC_FID_FCTRL;
+		break;
+
+	default:
+		FC_DBG("Invalid op code %x \n", op);
+		return -EINVAL;
+	}
+
+	*r_ctl = FC_RCTL_ELS_REQ;
+	*fh_type = FC_TYPE_ELS;
+	return 0;
+}
+#endif /* _FC_ENCODE_H_ */
diff --git a/include/scsi/fc_frame.h b/include/scsi/fc_frame.h
new file mode 100644
index 000000000000..04d34a71355f
--- /dev/null
+++ b/include/scsi/fc_frame.h
@@ -0,0 +1,242 @@
+/*
+ * Copyright(c) 2007 Intel Corporation. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Maintained at www.Open-FCoE.org
+ */
+
+#ifndef _FC_FRAME_H_
+#define _FC_FRAME_H_
+
+#include <linux/scatterlist.h>
+#include <linux/skbuff.h>
+#include <scsi/scsi_cmnd.h>
+
+#include <scsi/fc/fc_fs.h>
+#include <scsi/fc/fc_fcp.h>
+#include <scsi/fc/fc_encaps.h>
+
+/*
+ * The fc_frame interface is used to pass frame data between functions.
+ * The frame includes the data buffer, length, and SOF / EOF delimiter types.
+ * A pointer to the port structure of the receiving port is also includeded.
+ */
+
+#define	FC_FRAME_HEADROOM	32	/* headroom for VLAN + FCoE headers */
+#define	FC_FRAME_TAILROOM	8	/* trailer space for FCoE */
+
+/*
+ * Information about an individual fibre channel frame received or to be sent.
+ * The buffer may be in up to 4 additional non-contiguous sections,
+ * but the linear section must hold the frame header.
+ */
+#define FC_FRAME_SG_LEN		4	/* scatter/gather list maximum length */
+
+#define fp_skb(fp)	(&((fp)->skb))
+#define fr_hdr(fp)	((fp)->skb.data)
+#define fr_len(fp)	((fp)->skb.len)
+#define fr_cb(fp)	((struct fcoe_rcv_info *)&((fp)->skb.cb[0]))
+#define fr_dev(fp)	(fr_cb(fp)->fr_dev)
+#define fr_seq(fp)	(fr_cb(fp)->fr_seq)
+#define fr_sof(fp)	(fr_cb(fp)->fr_sof)
+#define fr_eof(fp)	(fr_cb(fp)->fr_eof)
+#define fr_flags(fp)	(fr_cb(fp)->fr_flags)
+#define fr_max_payload(fp)	(fr_cb(fp)->fr_max_payload)
+#define fr_cmd(fp)	(fr_cb(fp)->fr_cmd)
+#define fr_dir(fp)	(fr_cmd(fp)->sc_data_direction)
+#define fr_crc(fp)	(fr_cb(fp)->fr_crc)
+
+struct fc_frame {
+	struct sk_buff skb;
+};
+
+struct fcoe_rcv_info {
+	struct packet_type  *ptype;
+	struct fc_lport	*fr_dev;	/* transport layer private pointer */
+	struct fc_seq	*fr_seq;	/* for use with exchange manager */
+	struct scsi_cmnd *fr_cmd;	/* for use of scsi command */
+	u32		fr_crc;
+	u16		fr_max_payload;	/* max FC payload */
+	enum fc_sof	fr_sof;		/* start of frame delimiter */
+	enum fc_eof	fr_eof;		/* end of frame delimiter */
+	u8		fr_flags;	/* flags - see below */
+};
+
+
+/*
+ * Get fc_frame pointer for an skb that's already been imported.
+ */
+static inline struct fcoe_rcv_info *fcoe_dev_from_skb(const struct sk_buff *skb)
+{
+	BUILD_BUG_ON(sizeof(struct fcoe_rcv_info) > sizeof(skb->cb));
+	return (struct fcoe_rcv_info *) skb->cb;
+}
+
+/*
+ * fr_flags.
+ */
+#define	FCPHF_CRC_UNCHECKED	0x01	/* CRC not computed, still appended */
+
+/*
+ * Initialize a frame.
+ * We don't do a complete memset here for performance reasons.
+ * The caller must set fr_free, fr_hdr, fr_len, fr_sof, and fr_eof eventually.
+ */
+static inline void fc_frame_init(struct fc_frame *fp)
+{
+	fr_dev(fp) = NULL;
+	fr_seq(fp) = NULL;
+	fr_flags(fp) = 0;
+}
+
+struct fc_frame *fc_frame_alloc_fill(struct fc_lport *, size_t payload_len);
+
+struct fc_frame *__fc_frame_alloc(size_t payload_len);
+
+/*
+ * Get frame for sending via port.
+ */
+static inline struct fc_frame *_fc_frame_alloc(struct fc_lport *dev,
+					       size_t payload_len)
+{
+	return __fc_frame_alloc(payload_len);
+}
+
+/*
+ * Allocate fc_frame structure and buffer.  Set the initial length to
+ * payload_size + sizeof (struct fc_frame_header).
+ */
+static inline struct fc_frame *fc_frame_alloc(struct fc_lport *dev, size_t len)
+{
+	struct fc_frame *fp;
+
+	/*
+	 * Note: Since len will often be a constant multiple of 4,
+	 * this check will usually be evaluated and eliminated at compile time.
+	 */
+	if ((len % 4) != 0)
+		fp = fc_frame_alloc_fill(dev, len);
+	else
+		fp = _fc_frame_alloc(dev, len);
+	return fp;
+}
+
+/*
+ * Free the fc_frame structure and buffer.
+ */
+static inline void fc_frame_free(struct fc_frame *fp)
+{
+	kfree_skb(fp_skb(fp));
+}
+
+static inline int fc_frame_is_linear(struct fc_frame *fp)
+{
+	return !skb_is_nonlinear(fp_skb(fp));
+}
+
+/*
+ * Get frame header from message in fc_frame structure.
+ * This hides a cast and provides a place to add some checking.
+ */
+static inline
+struct fc_frame_header *fc_frame_header_get(const struct fc_frame *fp)
+{
+	WARN_ON(fr_len(fp) < sizeof(struct fc_frame_header));
+	return (struct fc_frame_header *) fr_hdr(fp);
+}
+
+/*
+ * Get frame payload from message in fc_frame structure.
+ * This hides a cast and provides a place to add some checking.
+ * The len parameter is the minimum length for the payload portion.
+ * Returns NULL if the frame is too short.
+ *
+ * This assumes the interesting part of the payload is in the first part
+ * of the buffer for received data.  This may not be appropriate to use for
+ * buffers being transmitted.
+ */
+static inline void *fc_frame_payload_get(const struct fc_frame *fp,
+					 size_t len)
+{
+	void *pp = NULL;
+
+	if (fr_len(fp) >= sizeof(struct fc_frame_header) + len)
+		pp = fc_frame_header_get(fp) + 1;
+	return pp;
+}
+
+/*
+ * Get frame payload opcode (first byte) from message in fc_frame structure.
+ * This hides a cast and provides a place to add some checking. Return 0
+ * if the frame has no payload.
+ */
+static inline u8 fc_frame_payload_op(const struct fc_frame *fp)
+{
+	u8 *cp;
+
+	cp = fc_frame_payload_get(fp, sizeof(u8));
+	if (!cp)
+		return 0;
+	return *cp;
+
+}
+
+/*
+ * Get FC class from frame.
+ */
+static inline enum fc_class fc_frame_class(const struct fc_frame *fp)
+{
+	return fc_sof_class(fr_sof(fp));
+}
+
+/*
+ * Check the CRC in a frame.
+ * The CRC immediately follows the last data item *AFTER* the length.
+ * The return value is zero if the CRC matches.
+ */
+u32 fc_frame_crc_check(struct fc_frame *);
+
+static inline u8 fc_frame_rctl(const struct fc_frame *fp)
+{
+	return fc_frame_header_get(fp)->fh_r_ctl;
+}
+
+static inline bool fc_frame_is_cmd(const struct fc_frame *fp)
+{
+	return fc_frame_rctl(fp) == FC_RCTL_DD_UNSOL_CMD;
+}
+
+static inline bool fc_frame_is_read(const struct fc_frame *fp)
+{
+	if (fc_frame_is_cmd(fp) && fr_cmd(fp))
+		return fr_dir(fp) == DMA_FROM_DEVICE;
+	return false;
+}
+
+static inline bool fc_frame_is_write(const struct fc_frame *fp)
+{
+	if (fc_frame_is_cmd(fp) && fr_cmd(fp))
+		return fr_dir(fp) == DMA_TO_DEVICE;
+	return false;
+}
+
+/*
+ * Check for leaks.
+ * Print the frame header of any currently allocated frame, assuming there
+ * should be none at this point.
+ */
+void fc_frame_leak_check(void);
+
+#endif /* _FC_FRAME_H_ */
diff --git a/include/scsi/libfc.h b/include/scsi/libfc.h
new file mode 100644
index 000000000000..9f2876397dda
--- /dev/null
+++ b/include/scsi/libfc.h
@@ -0,0 +1,938 @@
+/*
+ * Copyright(c) 2007 Intel Corporation. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Maintained at www.Open-FCoE.org
+ */
+
+#ifndef _LIBFC_H_
+#define _LIBFC_H_
+
+#include <linux/timer.h>
+#include <linux/if.h>
+
+#include <scsi/scsi_transport.h>
+#include <scsi/scsi_transport_fc.h>
+
+#include <scsi/fc/fc_fcp.h>
+#include <scsi/fc/fc_ns.h>
+#include <scsi/fc/fc_els.h>
+#include <scsi/fc/fc_gs.h>
+
+#include <scsi/fc_frame.h>
+
+#define LIBFC_DEBUG
+
+#ifdef LIBFC_DEBUG
+/* Log messages */
+#define FC_DBG(fmt, args...)						\
+	do {								\
+		printk(KERN_INFO "%s " fmt, __func__, ##args);		\
+	} while (0)
+#else
+#define FC_DBG(fmt, args...)
+#endif
+
+/*
+ * libfc error codes
+ */
+#define	FC_NO_ERR	0	/* no error */
+#define	FC_EX_TIMEOUT	1	/* Exchange timeout */
+#define	FC_EX_CLOSED	2	/* Exchange closed */
+
+/* some helpful macros */
+
+#define ntohll(x) be64_to_cpu(x)
+#define htonll(x) cpu_to_be64(x)
+
+#define ntoh24(p) (((p)[0] << 16) | ((p)[1] << 8) | ((p)[2]))
+
+#define hton24(p, v)	do {			\
+		p[0] = (((v) >> 16) & 0xFF);	\
+		p[1] = (((v) >> 8) & 0xFF);	\
+		p[2] = ((v) & 0xFF);		\
+	} while (0)
+
+/*
+ * FC HBA status
+ */
+#define FC_PAUSE		    (1 << 1)
+#define FC_LINK_UP		    (1 << 0)
+
+enum fc_lport_state {
+	LPORT_ST_NONE = 0,
+	LPORT_ST_FLOGI,
+	LPORT_ST_DNS,
+	LPORT_ST_RPN_ID,
+	LPORT_ST_RFT_ID,
+	LPORT_ST_SCR,
+	LPORT_ST_READY,
+	LPORT_ST_LOGO,
+	LPORT_ST_RESET
+};
+
+enum fc_disc_event {
+	DISC_EV_NONE = 0,
+	DISC_EV_SUCCESS,
+	DISC_EV_FAILED
+};
+
+enum fc_rport_state {
+	RPORT_ST_NONE = 0,
+	RPORT_ST_INIT,		/* initialized */
+	RPORT_ST_PLOGI,		/* waiting for PLOGI completion */
+	RPORT_ST_PRLI,		/* waiting for PRLI completion */
+	RPORT_ST_RTV,		/* waiting for RTV completion */
+	RPORT_ST_READY,		/* ready for use */
+	RPORT_ST_LOGO,		/* port logout sent */
+};
+
+enum fc_rport_trans_state {
+	FC_PORTSTATE_ROGUE,
+	FC_PORTSTATE_REAL,
+};
+
+/**
+ * struct fc_disc_port - temporary discovery port to hold rport identifiers
+ * @lp: Fibre Channel host port instance
+ * @peers: node for list management during discovery and RSCN processing
+ * @ids: identifiers structure to pass to fc_remote_port_add()
+ * @rport_work: work struct for starting the rport state machine
+ */
+struct fc_disc_port {
+	struct fc_lport             *lp;
+	struct list_head            peers;
+	struct fc_rport_identifiers ids;
+	struct work_struct	    rport_work;
+};
+
+enum fc_rport_event {
+	RPORT_EV_NONE = 0,
+	RPORT_EV_CREATED,
+	RPORT_EV_FAILED,
+	RPORT_EV_STOP,
+	RPORT_EV_LOGO
+};
+
+struct fc_rport_operations {
+	void (*event_callback)(struct fc_lport *, struct fc_rport *,
+			       enum fc_rport_event);
+};
+
+/**
+ * struct fc_rport_libfc_priv - libfc internal information about a remote port
+ * @local_port: Fibre Channel host port instance
+ * @rp_state: state tracks progress of PLOGI, PRLI, and RTV exchanges
+ * @flags: REC and RETRY supported flags
+ * @max_seq: maximum number of concurrent sequences
+ * @retries: retry count in current state
+ * @e_d_tov: error detect timeout value (in msec)
+ * @r_a_tov: resource allocation timeout value (in msec)
+ * @rp_mutex: mutex protects rport
+ * @retry_work:
+ * @event_callback: Callback for rport READY, FAILED or LOGO
+ */
+struct fc_rport_libfc_priv {
+	struct fc_lport		   *local_port;
+	enum fc_rport_state        rp_state;
+	u16			   flags;
+	#define FC_RP_FLAGS_REC_SUPPORTED	(1 << 0)
+	#define FC_RP_FLAGS_RETRY		(1 << 1)
+	u16		           max_seq;
+	unsigned int	           retries;
+	unsigned int	           e_d_tov;
+	unsigned int	           r_a_tov;
+	enum fc_rport_trans_state  trans_state;
+	struct mutex               rp_mutex;
+	struct delayed_work	   retry_work;
+	enum fc_rport_event        event;
+	struct fc_rport_operations *ops;
+	struct list_head           peers;
+	struct work_struct         event_work;
+};
+
+#define PRIV_TO_RPORT(x)						\
+	(struct fc_rport *)((void *)x - sizeof(struct fc_rport));
+#define RPORT_TO_PRIV(x)						\
+	(struct fc_rport_libfc_priv *)((void *)x + sizeof(struct fc_rport));
+
+struct fc_rport *fc_rport_rogue_create(struct fc_disc_port *);
+
+static inline void fc_rport_set_name(struct fc_rport *rport, u64 wwpn, u64 wwnn)
+{
+	rport->node_name = wwnn;
+	rport->port_name = wwpn;
+}
+
+/*
+ * fcoe stats structure
+ */
+struct fcoe_dev_stats {
+	u64		SecondsSinceLastReset;
+	u64		TxFrames;
+	u64		TxWords;
+	u64		RxFrames;
+	u64		RxWords;
+	u64		ErrorFrames;
+	u64		DumpedFrames;
+	u64		LinkFailureCount;
+	u64		LossOfSignalCount;
+	u64		InvalidTxWordCount;
+	u64		InvalidCRCCount;
+	u64		InputRequests;
+	u64		OutputRequests;
+	u64		ControlRequests;
+	u64		InputMegabytes;
+	u64		OutputMegabytes;
+};
+
+/*
+ * els data is used for passing ELS respone specific
+ * data to send ELS response mainly using infomation
+ * in exchange and sequence in EM layer.
+ */
+struct fc_seq_els_data {
+	struct fc_frame *fp;
+	enum fc_els_rjt_reason reason;
+	enum fc_els_rjt_explan explan;
+};
+
+/*
+ * FCP request structure, one for each scsi cmd request
+ */
+struct fc_fcp_pkt {
+	/*
+	 * housekeeping stuff
+	 */
+	struct fc_lport *lp;	/* handle to hba struct */
+	u16		state;		/* scsi_pkt state state */
+	u16		tgt_flags;	/* target flags	 */
+	atomic_t	ref_cnt;	/* fcp pkt ref count */
+	spinlock_t	scsi_pkt_lock;	/* Must be taken before the host lock
+					 * if both are held at the same time */
+	/*
+	 * SCSI I/O related stuff
+	 */
+	struct scsi_cmnd *cmd;		/* scsi command pointer. set/clear
+					 * under host lock */
+	struct list_head list;		/* tracks queued commands. access under
+					 * host lock */
+	/*
+	 * timeout related stuff
+	 */
+	struct timer_list timer;	/* command timer */
+	struct completion tm_done;
+	int	wait_for_comp;
+	unsigned long	start_time;	/* start jiffie */
+	unsigned long	end_time;	/* end jiffie */
+	unsigned long	last_pkt_time;	 /* jiffies of last frame received */
+
+	/*
+	 * scsi cmd and data transfer information
+	 */
+	u32		data_len;
+	/*
+	 * transport related veriables
+	 */
+	struct fcp_cmnd cdb_cmd;
+	size_t		xfer_len;
+	u32		xfer_contig_end; /* offset of end of contiguous xfer */
+	u16		max_payload;	/* max payload size in bytes */
+
+	/*
+	 * scsi/fcp return status
+	 */
+	u32		io_status;	/* SCSI result upper 24 bits */
+	u8		cdb_status;
+	u8		status_code;	/* FCP I/O status */
+	/* bit 3 Underrun bit 2: overrun */
+	u8		scsi_comp_flags;
+	u32		req_flags;	/* bit 0: read bit:1 write */
+	u32		scsi_resid;	/* residule length */
+
+	struct fc_rport	*rport;		/* remote port pointer */
+	struct fc_seq	*seq_ptr;	/* current sequence pointer */
+	/*
+	 * Error Processing
+	 */
+	u8		recov_retry;	/* count of recovery retries */
+	struct fc_seq	*recov_seq;	/* sequence for REC or SRR */
+};
+
+/*
+ * Structure and function definitions for managing Fibre Channel Exchanges
+ * and Sequences
+ *
+ * fc_exch holds state for one exchange and links to its active sequence.
+ *
+ * fc_seq holds the state for an individual sequence.
+ */
+
+struct fc_exch_mgr;
+
+/*
+ * Sequence.
+ */
+struct fc_seq {
+	u8	id;		/* seq ID */
+	u16	ssb_stat;	/* status flags for sequence status block */
+	u16	cnt;		/* frames sent so far on sequence */
+	u32	rec_data;	/* FC-4 value for REC */
+};
+
+#define FC_EX_DONE		(1 << 0) /* ep is completed */
+#define FC_EX_RST_CLEANUP	(1 << 1) /* reset is forcing completion */
+
+/*
+ * Exchange.
+ *
+ * Locking notes: The ex_lock protects following items:
+ *	state, esb_stat, f_ctl, seq.ssb_stat
+ *	seq_id
+ *	sequence allocation
+ */
+struct fc_exch {
+	struct fc_exch_mgr *em;		/* exchange manager */
+	u32		state;		/* internal driver state */
+	u16		xid;		/* our exchange ID */
+	struct list_head	ex_list;	/* free or busy list linkage */
+	spinlock_t	ex_lock;	/* lock covering exchange state */
+	atomic_t	ex_refcnt;	/* reference counter */
+	struct delayed_work timeout_work; /* timer for upper level protocols */
+	struct fc_lport	*lp;		/* fc device instance */
+	u16		oxid;		/* originator's exchange ID */
+	u16		rxid;		/* responder's exchange ID */
+	u32		oid;		/* originator's FCID */
+	u32		sid;		/* source FCID */
+	u32		did;		/* destination FCID */
+	u32		esb_stat;	/* exchange status for ESB */
+	u32		r_a_tov;	/* r_a_tov from rport (msec) */
+	u8		seq_id;		/* next sequence ID to use */
+	u32		f_ctl;		/* F_CTL flags for sequences */
+	u8		fh_type;	/* frame type */
+	enum fc_class	class;		/* class of service */
+	struct fc_seq	seq;		/* single sequence */
+	/*
+	 * Handler for responses to this current exchange.
+	 */
+	void		(*resp)(struct fc_seq *, struct fc_frame *, void *);
+	void		(*destructor)(struct fc_seq *, void *);
+	/*
+	 * arg is passed as void pointer to exchange
+	 * resp and destructor handlers
+	 */
+	void		*arg;
+};
+#define	fc_seq_exch(sp) container_of(sp, struct fc_exch, seq)
+
+struct libfc_function_template {
+
+	/**
+	 * Mandatory Fields
+	 *
+	 * These handlers must be implemented by the LLD.
+	 */
+
+	/*
+	 * Interface to send a FC frame
+	 */
+	int (*frame_send)(struct fc_lport *lp, struct fc_frame *fp);
+
+	/**
+	 * Optional Fields
+	 *
+	 * The LLD may choose to implement any of the following handlers.
+	 * If LLD doesn't specify hander and leaves its pointer NULL then
+	 * the default libfc function will be used for that handler.
+	 */
+
+	/**
+	 * ELS/CT interfaces
+	 */
+
+	/*
+	 * elsct_send - sends ELS/CT frame
+	 */
+	struct fc_seq *(*elsct_send)(struct fc_lport *lport,
+				     struct fc_rport *rport,
+				     struct fc_frame *fp,
+				     unsigned int op,
+				     void (*resp)(struct fc_seq *,
+					     struct fc_frame *fp,
+					     void *arg),
+				     void *arg, u32 timer_msec);
+	/**
+	 * Exhance Manager interfaces
+	 */
+
+	/*
+	 * Send the FC frame payload using a new exchange and sequence.
+	 *
+	 * The frame pointer with some of the header's fields must be
+	 * filled before calling exch_seq_send(), those fields are,
+	 *
+	 * - routing control
+	 * - FC port did
+	 * - FC port sid
+	 * - FC header type
+	 * - frame control
+	 * - parameter or relative offset
+	 *
+	 * The exchange response handler is set in this routine to resp()
+	 * function pointer. It can be called in two scenarios: if a timeout
+	 * occurs or if a response frame is received for the exchange. The
+	 * fc_frame pointer in response handler will also indicate timeout
+	 * as error using IS_ERR related macros.
+	 *
+	 * The exchange destructor handler is also set in this routine.
+	 * The destructor handler is invoked by EM layer when exchange
+	 * is about to free, this can be used by caller to free its
+	 * resources along with exchange free.
+	 *
+	 * The arg is passed back to resp and destructor handler.
+	 *
+	 * The timeout value (in msec) for an exchange is set if non zero
+	 * timer_msec argument is specified. The timer is canceled when
+	 * it fires or when the exchange is done. The exchange timeout handler
+	 * is registered by EM layer.
+	 */
+	struct fc_seq *(*exch_seq_send)(struct fc_lport *lp,
+					struct fc_frame *fp,
+					void (*resp)(struct fc_seq *sp,
+						     struct fc_frame *fp,
+						     void *arg),
+					void (*destructor)(struct fc_seq *sp,
+							   void *arg),
+					void *arg, unsigned int timer_msec);
+
+	/*
+	 * send a frame using existing sequence and exchange.
+	 */
+	int (*seq_send)(struct fc_lport *lp, struct fc_seq *sp,
+			struct fc_frame *fp);
+
+	/*
+	 * Send ELS response using mainly infomation
+	 * in exchange and sequence in EM layer.
+	 */
+	void (*seq_els_rsp_send)(struct fc_seq *sp, enum fc_els_cmd els_cmd,
+				 struct fc_seq_els_data *els_data);
+
+	/*
+	 * Abort an exchange and sequence. Generally called because of a
+	 * exchange timeout or an abort from the upper layer.
+	 *
+	 * A timer_msec can be specified for abort timeout, if non-zero
+	 * timer_msec value is specified then exchange resp handler
+	 * will be called with timeout error if no response to abort.
+	 */
+	int (*seq_exch_abort)(const struct fc_seq *req_sp,
+			      unsigned int timer_msec);
+
+	/*
+	 * Indicate that an exchange/sequence tuple is complete and the memory
+	 * allocated for the related objects may be freed.
+	 */
+	void (*exch_done)(struct fc_seq *sp);
+
+	/*
+	 * Assigns a EM and a free XID for an new exchange and then
+	 * allocates a new exchange and sequence pair.
+	 * The fp can be used to determine free XID.
+	 */
+	struct fc_exch *(*exch_get)(struct fc_lport *lp, struct fc_frame *fp);
+
+	/*
+	 * Release previously assigned XID by exch_get API.
+	 * The LLD may implement this if XID is assigned by LLD
+	 * in exch_get().
+	 */
+	void (*exch_put)(struct fc_lport *lp, struct fc_exch_mgr *mp,
+			 u16 ex_id);
+
+	/*
+	 * Start a new sequence on the same exchange/sequence tuple.
+	 */
+	struct fc_seq *(*seq_start_next)(struct fc_seq *sp);
+
+	/*
+	 * Reset an exchange manager, completing all sequences and exchanges.
+	 * If s_id is non-zero, reset only exchanges originating from that FID.
+	 * If d_id is non-zero, reset only exchanges sending to that FID.
+	 */
+	void (*exch_mgr_reset)(struct fc_exch_mgr *,
+			       u32 s_id, u32 d_id);
+
+	void (*rport_flush_queue)(void);
+	/**
+	 * Local Port interfaces
+	 */
+
+	/*
+	 * Receive a frame to a local port.
+	 */
+	void (*lport_recv)(struct fc_lport *lp, struct fc_seq *sp,
+			   struct fc_frame *fp);
+
+	int (*lport_reset)(struct fc_lport *);
+
+	/**
+	 * Remote Port interfaces
+	 */
+
+	/*
+	 * Initiates the RP state machine. It is called from the LP module.
+	 * This function will issue the following commands to the N_Port
+	 * identified by the FC ID provided.
+	 *
+	 * - PLOGI
+	 * - PRLI
+	 * - RTV
+	 */
+	int (*rport_login)(struct fc_rport *rport);
+
+	/*
+	 * Logoff, and remove the rport from the transport if
+	 * it had been added. This will send a LOGO to the target.
+	 */
+	int (*rport_logoff)(struct fc_rport *rport);
+
+	/*
+	 * Recieve a request from a remote port.
+	 */
+	void (*rport_recv_req)(struct fc_seq *, struct fc_frame *,
+			       struct fc_rport *);
+
+	struct fc_rport *(*rport_lookup)(const struct fc_lport *, u32);
+
+	/**
+	 * FCP interfaces
+	 */
+
+	/*
+	 * Send a fcp cmd from fsp pkt.
+	 * Called with the SCSI host lock unlocked and irqs disabled.
+	 *
+	 * The resp handler is called when FCP_RSP received.
+	 *
+	 */
+	int (*fcp_cmd_send)(struct fc_lport *lp, struct fc_fcp_pkt *fsp,
+			    void (*resp)(struct fc_seq *, struct fc_frame *fp,
+					 void *arg));
+
+	/*
+	 * Used at least durring linkdown and reset
+	 */
+	void (*fcp_cleanup)(struct fc_lport *lp);
+
+	/*
+	 * Abort all I/O on a local port
+	 */
+	void (*fcp_abort_io)(struct fc_lport *lp);
+
+	/**
+	 * Discovery interfaces
+	 */
+
+	void (*disc_recv_req)(struct fc_seq *,
+			      struct fc_frame *, struct fc_lport *);
+
+	/*
+	 * Start discovery for a local port.
+	 */
+	void (*disc_start)(void (*disc_callback)(struct fc_lport *,
+						 enum fc_disc_event),
+			   struct fc_lport *);
+
+	/*
+	 * Stop discovery for a given lport. This will remove
+	 * all discovered rports
+	 */
+	void (*disc_stop) (struct fc_lport *);
+
+	/*
+	 * Stop discovery for a given lport. This will block
+	 * until all discovered rports are deleted from the
+	 * FC transport class
+	 */
+	void (*disc_stop_final) (struct fc_lport *);
+};
+
+/* information used by the discovery layer */
+struct fc_disc {
+	unsigned char		retry_count;
+	unsigned char		delay;
+	unsigned char		pending;
+	unsigned char		requested;
+	unsigned short		seq_count;
+	unsigned char		buf_len;
+	enum fc_disc_event	event;
+
+	void (*disc_callback)(struct fc_lport *,
+			      enum fc_disc_event);
+
+	struct list_head	 rports;
+	struct fc_lport		*lport;
+	struct mutex		disc_mutex;
+	struct fc_gpn_ft_resp	partial_buf;	/* partial name buffer */
+	struct delayed_work	disc_work;
+};
+
+struct fc_lport {
+	struct list_head list;
+
+	/* Associations */
+	struct Scsi_Host	*host;
+	struct fc_exch_mgr	*emp;
+	struct fc_rport		*dns_rp;
+	struct fc_rport		*ptp_rp;
+	void			*scsi_priv;
+	struct fc_disc          disc;
+
+	/* Operational Information */
+	struct libfc_function_template tt;
+	u16			link_status;
+	enum fc_lport_state	state;
+	unsigned long		boot_time;
+
+	struct fc_host_statistics host_stats;
+	struct fcoe_dev_stats	*dev_stats[NR_CPUS];
+	u64			wwpn;
+	u64			wwnn;
+	u8			retry_count;
+
+	/* Capabilities */
+	u32			sg_supp:1;	/* scatter gather supported */
+	u32			seq_offload:1;	/* seq offload supported */
+	u32			crc_offload:1;	/* crc offload supported */
+	u32			lro_enabled:1;	/* large receive offload */
+	u32			mfs;	        /* max FC payload size */
+	unsigned int		service_params;
+	unsigned int		e_d_tov;
+	unsigned int		r_a_tov;
+	u8			max_retry_count;
+	u16			link_speed;
+	u16			link_supported_speeds;
+	u16			lro_xid;	/* max xid for fcoe lro */
+	struct fc_ns_fts	fcts;	        /* FC-4 type masks */
+	struct fc_els_rnid_gen	rnid_gen;	/* RNID information */
+
+	/* Semaphores */
+	struct mutex lp_mutex;
+
+	/* Miscellaneous */
+	struct delayed_work	retry_work;
+	struct delayed_work	disc_work;
+};
+
+/**
+ * FC_LPORT HELPER FUNCTIONS
+ *****************************/
+static inline void *lport_priv(const struct fc_lport *lp)
+{
+	return (void *)(lp + 1);
+}
+
+static inline int fc_lport_test_ready(struct fc_lport *lp)
+{
+	return lp->state == LPORT_ST_READY;
+}
+
+static inline void fc_set_wwnn(struct fc_lport *lp, u64 wwnn)
+{
+	lp->wwnn = wwnn;
+}
+
+static inline void fc_set_wwpn(struct fc_lport *lp, u64 wwnn)
+{
+	lp->wwpn = wwnn;
+}
+
+static inline void fc_lport_state_enter(struct fc_lport *lp,
+					enum fc_lport_state state)
+{
+	if (state != lp->state)
+		lp->retry_count = 0;
+	lp->state = state;
+}
+
+
+/**
+ * LOCAL PORT LAYER
+ *****************************/
+int fc_lport_init(struct fc_lport *lp);
+
+/*
+ * Destroy the specified local port by finding and freeing all
+ * fc_rports associated with it and then by freeing the fc_lport
+ * itself.
+ */
+int fc_lport_destroy(struct fc_lport *lp);
+
+/*
+ * Logout the specified local port from the fabric
+ */
+int fc_fabric_logoff(struct fc_lport *lp);
+
+/*
+ * Initiate the LP state machine. This handler will use fc_host_attr
+ * to store the FLOGI service parameters, so fc_host_attr must be
+ * initialized before calling this handler.
+ */
+int fc_fabric_login(struct fc_lport *lp);
+
+/*
+ * The link is up for the given local port.
+ */
+void fc_linkup(struct fc_lport *);
+
+/*
+ * Link is down for the given local port.
+ */
+void fc_linkdown(struct fc_lport *);
+
+/*
+ * Pause and unpause traffic.
+ */
+void fc_pause(struct fc_lport *);
+void fc_unpause(struct fc_lport *);
+
+/*
+ * Configure the local port.
+ */
+int fc_lport_config(struct fc_lport *);
+
+/*
+ * Reset the local port.
+ */
+int fc_lport_reset(struct fc_lport *);
+
+/*
+ * Set the mfs or reset
+ */
+int fc_set_mfs(struct fc_lport *lp, u32 mfs);
+
+
+/**
+ * REMOTE PORT LAYER
+ *****************************/
+int fc_rport_init(struct fc_lport *lp);
+void fc_rport_terminate_io(struct fc_rport *rp);
+
+/**
+ * DISCOVERY LAYER
+ *****************************/
+int fc_disc_init(struct fc_lport *lp);
+
+
+/**
+ * SCSI LAYER
+ *****************************/
+/*
+ * Initialize the SCSI block of libfc
+ */
+int fc_fcp_init(struct fc_lport *);
+
+/*
+ * This section provides an API which allows direct interaction
+ * with the SCSI-ml. Each of these functions satisfies a function
+ * pointer defined in Scsi_Host and therefore is always called
+ * directly from the SCSI-ml.
+ */
+int fc_queuecommand(struct scsi_cmnd *sc_cmd,
+		    void (*done)(struct scsi_cmnd *));
+
+/*
+ * complete processing of a fcp packet
+ *
+ * This function may sleep if a fsp timer is pending.
+ * The host lock must not be held by caller.
+ */
+void fc_fcp_complete(struct fc_fcp_pkt *fsp);
+
+/*
+ * Send an ABTS frame to the target device. The sc_cmd argument
+ * is a pointer to the SCSI command to be aborted.
+ */
+int fc_eh_abort(struct scsi_cmnd *sc_cmd);
+
+/*
+ * Reset a LUN by sending send the tm cmd to the target.
+ */
+int fc_eh_device_reset(struct scsi_cmnd *sc_cmd);
+
+/*
+ * Reset the host adapter.
+ */
+int fc_eh_host_reset(struct scsi_cmnd *sc_cmd);
+
+/*
+ * Check rport status.
+ */
+int fc_slave_alloc(struct scsi_device *sdev);
+
+/*
+ * Adjust the queue depth.
+ */
+int fc_change_queue_depth(struct scsi_device *sdev, int qdepth);
+
+/*
+ * Change the tag type.
+ */
+int fc_change_queue_type(struct scsi_device *sdev, int tag_type);
+
+/*
+ * Free memory pools used by the FCP layer.
+ */
+void fc_fcp_destroy(struct fc_lport *);
+
+/**
+ * ELS/CT interface
+ *****************************/
+/*
+ * Initializes ELS/CT interface
+ */
+int fc_elsct_init(struct fc_lport *lp);
+
+
+/**
+ * EXCHANGE MANAGER LAYER
+ *****************************/
+/*
+ * Initializes Exchange Manager related
+ * function pointers in struct libfc_function_template.
+ */
+int fc_exch_init(struct fc_lport *lp);
+
+/*
+ * Allocates an Exchange Manager (EM).
+ *
+ * The EM manages exchanges for their allocation and
+ * free, also allows exchange lookup for received
+ * frame.
+ *
+ * The class is used for initializing FC class of
+ * allocated exchange from EM.
+ *
+ * The min_xid and max_xid will limit new
+ * exchange ID (XID) within this range for
+ * a new exchange.
+ * The LLD may choose to have multiple EMs,
+ * e.g. one EM instance per CPU receive thread in LLD.
+ * The LLD can use exch_get() of struct libfc_function_template
+ * to specify XID for a new exchange within
+ * a specified EM instance.
+ *
+ * The em_idx to uniquely identify an EM instance.
+ */
+struct fc_exch_mgr *fc_exch_mgr_alloc(struct fc_lport *lp,
+				      enum fc_class class,
+				      u16 min_xid,
+				      u16 max_xid);
+
+/*
+ * Free an exchange manager.
+ */
+void fc_exch_mgr_free(struct fc_exch_mgr *mp);
+
+/*
+ * Receive a frame on specified local port and exchange manager.
+ */
+void fc_exch_recv(struct fc_lport *lp, struct fc_exch_mgr *mp,
+		  struct fc_frame *fp);
+
+/*
+ * This function is for exch_seq_send function pointer in
+ * struct libfc_function_template, see comment block on
+ * exch_seq_send for description of this function.
+ */
+struct fc_seq *fc_exch_seq_send(struct fc_lport *lp,
+				struct fc_frame *fp,
+				void (*resp)(struct fc_seq *sp,
+					     struct fc_frame *fp,
+					     void *arg),
+				void (*destructor)(struct fc_seq *sp,
+						   void *arg),
+				void *arg, u32 timer_msec);
+
+/*
+ * send a frame using existing sequence and exchange.
+ */
+int fc_seq_send(struct fc_lport *lp, struct fc_seq *sp, struct fc_frame *fp);
+
+/*
+ * Send ELS response using mainly infomation
+ * in exchange and sequence in EM layer.
+ */
+void fc_seq_els_rsp_send(struct fc_seq *sp, enum fc_els_cmd els_cmd,
+			 struct fc_seq_els_data *els_data);
+
+/*
+ * This function is for seq_exch_abort function pointer in
+ * struct libfc_function_template, see comment block on
+ * seq_exch_abort for description of this function.
+ */
+int fc_seq_exch_abort(const struct fc_seq *req_sp, unsigned int timer_msec);
+
+/*
+ * Indicate that an exchange/sequence tuple is complete and the memory
+ * allocated for the related objects may be freed.
+ */
+void fc_exch_done(struct fc_seq *sp);
+
+/*
+ * Assigns a EM and XID for a frame and then allocates
+ * a new exchange and sequence pair.
+ * The fp can be used to determine free XID.
+ */
+struct fc_exch *fc_exch_get(struct fc_lport *lp, struct fc_frame *fp);
+
+/*
+ * Allocate a new exchange and sequence pair.
+ * if ex_id is zero then next free exchange id
+ * from specified exchange manger mp will be assigned.
+ */
+struct fc_exch *fc_exch_alloc(struct fc_exch_mgr *mp,
+			      struct fc_frame *fp, u16 ex_id);
+/*
+ * Start a new sequence on the same exchange as the supplied sequence.
+ */
+struct fc_seq *fc_seq_start_next(struct fc_seq *sp);
+
+/*
+ * Reset an exchange manager, completing all sequences and exchanges.
+ * If s_id is non-zero, reset only exchanges originating from that FID.
+ * If d_id is non-zero, reset only exchanges sending to that FID.
+ */
+void fc_exch_mgr_reset(struct fc_exch_mgr *, u32 s_id, u32 d_id);
+
+/*
+ * Functions for fc_functions_template
+ */
+void fc_get_host_speed(struct Scsi_Host *shost);
+void fc_get_host_port_type(struct Scsi_Host *shost);
+void fc_get_host_port_state(struct Scsi_Host *shost);
+void fc_set_rport_loss_tmo(struct fc_rport *rport, u32 timeout);
+struct fc_host_statistics *fc_get_host_stats(struct Scsi_Host *);
+
+/*
+ * module setup functions.
+ */
+int fc_setup_exch_mgr(void);
+void fc_destroy_exch_mgr(void);
+int fc_setup_rport(void);
+void fc_destroy_rport(void);
+
+#endif /* _LIBFC_H_ */
-- 
cgit v1.2.3


From 85b4aa4926a50210b683ac89326e338e7d131211 Mon Sep 17 00:00:00 2001
From: Robert Love <robert.w.love@intel.com>
Date: Tue, 9 Dec 2008 15:10:24 -0800
Subject: [SCSI] fcoe: Fibre Channel over Ethernet

Encapsulation protocol for running Fibre Channel over Ethernet interfaces.
Creates virtual Fibre Channel host adapters using libfc.

This layer is the LLD to the scsi-ml. It allocates the Scsi_Host, utilizes
libfc for Fibre Channel protocol processing and interacts with netdev to
send/receive Ethernet packets.

Signed-off-by: Robert Love <robert.w.love@intel.com>
Signed-off-by: James Bottomley <James.Bottomley@HansenPartnership.com>
---
 drivers/scsi/Kconfig                  |    7 +
 drivers/scsi/Makefile                 |    1 +
 drivers/scsi/fcoe/Makefile            |    8 +
 drivers/scsi/fcoe/fc_transport_fcoe.c |  446 ++++++++++
 drivers/scsi/fcoe/fcoe_sw.c           |  494 +++++++++++
 drivers/scsi/fcoe/libfcoe.c           | 1510 +++++++++++++++++++++++++++++++++
 include/scsi/fc_transport_fcoe.h      |   54 ++
 include/scsi/libfcoe.h                |  176 ++++
 8 files changed, 2696 insertions(+)
 create mode 100644 drivers/scsi/fcoe/Makefile
 create mode 100644 drivers/scsi/fcoe/fc_transport_fcoe.c
 create mode 100644 drivers/scsi/fcoe/fcoe_sw.c
 create mode 100644 drivers/scsi/fcoe/libfcoe.c
 create mode 100644 include/scsi/fc_transport_fcoe.h
 create mode 100644 include/scsi/libfcoe.h

(limited to 'include')

diff --git a/drivers/scsi/Kconfig b/drivers/scsi/Kconfig
index 24d762aab7c5..673463e4bbf0 100644
--- a/drivers/scsi/Kconfig
+++ b/drivers/scsi/Kconfig
@@ -609,6 +609,13 @@ config LIBFC
 	---help---
 	  Fibre Channel library module
 
+config FCOE
+	tristate "FCoE module"
+	depends on SCSI
+	select LIBFC
+	---help---
+	  Fibre Channel over Ethernet module
+
 config SCSI_DMX3191D
 	tristate "DMX3191D SCSI support"
 	depends on PCI && SCSI
diff --git a/drivers/scsi/Makefile b/drivers/scsi/Makefile
index 87355f573d60..07d0f58de9bb 100644
--- a/drivers/scsi/Makefile
+++ b/drivers/scsi/Makefile
@@ -37,6 +37,7 @@ obj-$(CONFIG_SCSI_SRP_ATTRS)	+= scsi_transport_srp.o
 obj-$(CONFIG_SCSI_DH)		+= device_handler/
 
 obj-$(CONFIG_LIBFC)		+= libfc/
+obj-$(CONFIG_FCOE)		+= fcoe/
 obj-$(CONFIG_ISCSI_TCP) 	+= libiscsi.o	libiscsi_tcp.o iscsi_tcp.o
 obj-$(CONFIG_INFINIBAND_ISER) 	+= libiscsi.o
 obj-$(CONFIG_SCSI_A4000T)	+= 53c700.o	a4000t.o
diff --git a/drivers/scsi/fcoe/Makefile b/drivers/scsi/fcoe/Makefile
new file mode 100644
index 000000000000..b78da06d7c0e
--- /dev/null
+++ b/drivers/scsi/fcoe/Makefile
@@ -0,0 +1,8 @@
+# $Id: Makefile
+
+obj-$(CONFIG_FCOE) += fcoe.o
+
+fcoe-y := \
+	libfcoe.o \
+	fcoe_sw.o \
+	fc_transport_fcoe.o
diff --git a/drivers/scsi/fcoe/fc_transport_fcoe.c b/drivers/scsi/fcoe/fc_transport_fcoe.c
new file mode 100644
index 000000000000..bf7fe6fc0820
--- /dev/null
+++ b/drivers/scsi/fcoe/fc_transport_fcoe.c
@@ -0,0 +1,446 @@
+/*
+ * Copyright(c) 2007 - 2008 Intel Corporation. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Maintained at www.Open-FCoE.org
+ */
+
+#include <linux/pci.h>
+#include <scsi/libfcoe.h>
+#include <scsi/fc_transport_fcoe.h>
+
+/* internal fcoe transport */
+struct fcoe_transport_internal {
+	struct fcoe_transport *t;
+	struct net_device *netdev;
+	struct list_head list;
+};
+
+/* fcoe transports list and its lock */
+static LIST_HEAD(fcoe_transports);
+static DEFINE_MUTEX(fcoe_transports_lock);
+
+/**
+ * fcoe_transport_default - returns ptr to the default transport fcoe_sw
+ **/
+struct fcoe_transport *fcoe_transport_default(void)
+{
+	return &fcoe_sw_transport;
+}
+
+/**
+ * fcoe_transport_to_pcidev - get the pci dev from a netdev
+ * @netdev: the netdev that pci dev will be retrived from
+ *
+ * Returns: NULL or the corrsponding pci_dev
+ **/
+struct pci_dev *fcoe_transport_pcidev(const struct net_device *netdev)
+{
+	if (!netdev->dev.parent)
+		return NULL;
+	return to_pci_dev(netdev->dev.parent);
+}
+
+/**
+ * fcoe_transport_device_lookup - find out netdev is managed by the
+ * transport
+ * assign a transport to a device
+ * @netdev: the netdev the transport to be attached to
+ *
+ * This will look for existing offload driver, if not found, it falls back to
+ * the default sw hba (fcoe_sw) as its fcoe transport.
+ *
+ * Returns: 0 for success
+ **/
+static struct fcoe_transport_internal *fcoe_transport_device_lookup(
+	struct fcoe_transport *t, struct net_device *netdev)
+{
+	struct fcoe_transport_internal *ti;
+
+	/* assign the transpor to this device */
+	mutex_lock(&t->devlock);
+	list_for_each_entry(ti, &t->devlist, list) {
+		if (ti->netdev == netdev) {
+			mutex_unlock(&t->devlock);
+			return ti;
+		}
+	}
+	mutex_unlock(&t->devlock);
+	return NULL;
+}
+/**
+ * fcoe_transport_device_add - assign a transport to a device
+ * @netdev: the netdev the transport to be attached to
+ *
+ * This will look for existing offload driver, if not found, it falls back to
+ * the default sw hba (fcoe_sw) as its fcoe transport.
+ *
+ * Returns: 0 for success
+ **/
+static int fcoe_transport_device_add(struct fcoe_transport *t,
+				     struct net_device *netdev)
+{
+	struct fcoe_transport_internal *ti;
+
+	ti = fcoe_transport_device_lookup(t, netdev);
+	if (ti) {
+		printk(KERN_DEBUG "fcoe_transport_device_add:"
+		       "device %s is already added to transport %s\n",
+		       netdev->name, t->name);
+		return -EEXIST;
+	}
+	/* allocate an internal struct to host the netdev and the list */
+	ti = kzalloc(sizeof(*ti), GFP_KERNEL);
+	if (!ti)
+		return -ENOMEM;
+
+	ti->t = t;
+	ti->netdev = netdev;
+	INIT_LIST_HEAD(&ti->list);
+	dev_hold(ti->netdev);
+
+	mutex_lock(&t->devlock);
+	list_add(&ti->list, &t->devlist);
+	mutex_unlock(&t->devlock);
+
+	printk(KERN_DEBUG "fcoe_transport_device_add:"
+		       "device %s added to transport %s\n",
+		       netdev->name, t->name);
+
+	return 0;
+}
+
+/**
+ * fcoe_transport_device_remove - remove a device from its transport
+ * @netdev: the netdev the transport to be attached to
+ *
+ * this removes the device from the transport so the given transport will
+ * not manage this device any more
+ *
+ * Returns: 0 for success
+ **/
+static int fcoe_transport_device_remove(struct fcoe_transport *t,
+					struct net_device *netdev)
+{
+	struct fcoe_transport_internal *ti;
+
+	ti = fcoe_transport_device_lookup(t, netdev);
+	if (!ti) {
+		printk(KERN_DEBUG "fcoe_transport_device_remove:"
+		       "device %s is not managed by transport %s\n",
+		       netdev->name, t->name);
+		return -ENODEV;
+	}
+	mutex_lock(&t->devlock);
+	list_del(&ti->list);
+	mutex_unlock(&t->devlock);
+	printk(KERN_DEBUG "fcoe_transport_device_remove:"
+	       "device %s removed from transport %s\n",
+	       netdev->name, t->name);
+	dev_put(ti->netdev);
+	kfree(ti);
+	return 0;
+}
+
+/**
+ * fcoe_transport_device_remove_all - remove all from transport devlist
+ *
+ * this removes the device from the transport so the given transport will
+ * not manage this device any more
+ *
+ * Returns: 0 for success
+ **/
+static void fcoe_transport_device_remove_all(struct fcoe_transport *t)
+{
+	struct fcoe_transport_internal *ti, *tmp;
+
+	mutex_lock(&t->devlock);
+	list_for_each_entry_safe(ti, tmp, &t->devlist, list) {
+		list_del(&ti->list);
+		kfree(ti);
+	}
+	mutex_unlock(&t->devlock);
+}
+
+/**
+ * fcoe_transport_match - use the bus device match function to match the hw
+ * @t: the fcoe transport
+ * @netdev:
+ *
+ * This function is used to check if the givne transport wants to manage the
+ * input netdev. if the transports implements the match function, it will be
+ * called, o.w. we just compare the pci vendor and device id.
+ *
+ * Returns: true for match up
+ **/
+static bool fcoe_transport_match(struct fcoe_transport *t,
+				struct net_device *netdev)
+{
+	/* match transport by vendor and device id */
+	struct pci_dev *pci;
+
+	pci = fcoe_transport_pcidev(netdev);
+
+	if (pci) {
+		printk(KERN_DEBUG "fcoe_transport_match:"
+		       "%s:%x:%x -- %s:%x:%x\n",
+		       t->name, t->vendor, t->device,
+		       netdev->name, pci->vendor, pci->device);
+
+		/* if transport supports match */
+		if (t->match)
+			return t->match(netdev);
+
+		/* else just compare the vendor and device id: pci only */
+		return (t->vendor == pci->vendor) && (t->device == pci->device);
+	}
+	return false;
+}
+
+/**
+ * fcoe_transport_lookup - check if the transport is already registered
+ * @t: the transport to be looked up
+ *
+ * This compares the parent device (pci) vendor and device id
+ *
+ * Returns: NULL if not found
+ *
+ * TODO - return default sw transport if no other transport is found
+ **/
+static struct fcoe_transport *fcoe_transport_lookup(
+	struct net_device *netdev)
+{
+	struct fcoe_transport *t;
+
+	mutex_lock(&fcoe_transports_lock);
+	list_for_each_entry(t, &fcoe_transports, list) {
+		if (fcoe_transport_match(t, netdev)) {
+			mutex_unlock(&fcoe_transports_lock);
+			return t;
+		}
+	}
+	mutex_unlock(&fcoe_transports_lock);
+
+	printk(KERN_DEBUG "fcoe_transport_lookup:"
+	       "use default transport for %s\n", netdev->name);
+	return fcoe_transport_default();
+}
+
+/**
+ * fcoe_transport_register - adds a fcoe transport to the fcoe transports list
+ * @t: ptr to the fcoe transport to be added
+ *
+ * Returns: 0 for success
+ **/
+int fcoe_transport_register(struct fcoe_transport *t)
+{
+	struct fcoe_transport *tt;
+
+	/* TODO - add fcoe_transport specific initialization here */
+	mutex_lock(&fcoe_transports_lock);
+	list_for_each_entry(tt, &fcoe_transports, list) {
+		if (tt == t) {
+			mutex_unlock(&fcoe_transports_lock);
+			return -EEXIST;
+		}
+	}
+	list_add_tail(&t->list, &fcoe_transports);
+	mutex_unlock(&fcoe_transports_lock);
+
+	mutex_init(&t->devlock);
+	INIT_LIST_HEAD(&t->devlist);
+
+	printk(KERN_DEBUG "fcoe_transport_register:%s\n", t->name);
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(fcoe_transport_register);
+
+/**
+ * fcoe_transport_unregister - remove the tranport fro the fcoe transports list
+ * @t: ptr to the fcoe transport to be removed
+ *
+ * Returns: 0 for success
+ **/
+int fcoe_transport_unregister(struct fcoe_transport *t)
+{
+	struct fcoe_transport *tt, *tmp;
+
+	mutex_lock(&fcoe_transports_lock);
+	list_for_each_entry_safe(tt, tmp, &fcoe_transports, list) {
+		if (tt == t) {
+			list_del(&t->list);
+			mutex_unlock(&fcoe_transports_lock);
+			fcoe_transport_device_remove_all(t);
+			printk(KERN_DEBUG "fcoe_transport_unregister:%s\n",
+			       t->name);
+			return 0;
+		}
+	}
+	mutex_unlock(&fcoe_transports_lock);
+	return -ENODEV;
+}
+EXPORT_SYMBOL_GPL(fcoe_transport_unregister);
+
+/*
+ * fcoe_load_transport_driver - load an offload driver by alias name
+ * @netdev: the target net device
+ *
+ * Requests for an offload driver module as the fcoe transport, if fails, it
+ * falls back to use the SW HBA (fcoe_sw) as its transport
+ *
+ * TODO -
+ * 	1. supports only PCI device
+ * 	2. needs fix for VLAn and bonding
+ * 	3. pure hw fcoe hba may not have netdev
+ *
+ * Returns: 0 for success
+ **/
+int fcoe_load_transport_driver(struct net_device *netdev)
+{
+	struct pci_dev *pci;
+	struct device *dev = netdev->dev.parent;
+
+	if (fcoe_transport_lookup(netdev)) {
+		/* load default transport */
+		printk(KERN_DEBUG "fcoe: already loaded transport for %s\n",
+		       netdev->name);
+		return -EEXIST;
+	}
+
+	pci = to_pci_dev(dev);
+	if (dev->bus != &pci_bus_type) {
+		printk(KERN_DEBUG "fcoe: support noly PCI device\n");
+		return -ENODEV;
+	}
+	printk(KERN_DEBUG "fcoe: loading driver fcoe-pci-0x%04x-0x%04x\n",
+	       pci->vendor, pci->device);
+
+	return request_module("fcoe-pci-0x%04x-0x%04x",
+			      pci->vendor, pci->device);
+
+}
+EXPORT_SYMBOL_GPL(fcoe_load_transport_driver);
+
+/**
+ * fcoe_transport_attach - load transport to fcoe
+ * @netdev: the netdev the transport to be attached to
+ *
+ * This will look for existing offload driver, if not found, it falls back to
+ * the default sw hba (fcoe_sw) as its fcoe transport.
+ *
+ * Returns: 0 for success
+ **/
+int fcoe_transport_attach(struct net_device *netdev)
+{
+	struct fcoe_transport *t;
+
+	/* find the corresponding transport */
+	t = fcoe_transport_lookup(netdev);
+	if (!t) {
+		printk(KERN_DEBUG "fcoe_transport_attach"
+		       ":no transport for %s:use %s\n",
+		       netdev->name, t->name);
+		return -ENODEV;
+	}
+	/* add to the transport */
+	if (fcoe_transport_device_add(t, netdev)) {
+		printk(KERN_DEBUG "fcoe_transport_attach"
+		       ":failed to add %s to tramsport %s\n",
+		       netdev->name, t->name);
+		return -EIO;
+	}
+	/* transport create function */
+	if (t->create)
+		t->create(netdev);
+
+	printk(KERN_DEBUG "fcoe_transport_attach:transport %s for %s\n",
+	       t->name, netdev->name);
+	return 0;
+}
+EXPORT_SYMBOL_GPL(fcoe_transport_attach);
+
+/**
+ * fcoe_transport_release - unload transport from fcoe
+ * @netdev: the net device on which fcoe is to be released
+ *
+ * Returns: 0 for success
+ **/
+int fcoe_transport_release(struct net_device *netdev)
+{
+	struct fcoe_transport *t;
+
+	/* find the corresponding transport */
+	t = fcoe_transport_lookup(netdev);
+	if (!t) {
+		printk(KERN_DEBUG "fcoe_transport_release:"
+		       "no transport for %s:use %s\n",
+		       netdev->name, t->name);
+		return -ENODEV;
+	}
+	/* remove the device from the transport */
+	if (fcoe_transport_device_remove(t, netdev)) {
+		printk(KERN_DEBUG "fcoe_transport_release:"
+		       "failed to add %s to tramsport %s\n",
+		       netdev->name, t->name);
+		return -EIO;
+	}
+	/* transport destroy function */
+	if (t->destroy)
+		t->destroy(netdev);
+
+	printk(KERN_DEBUG "fcoe_transport_release:"
+	       "device %s dettached from transport %s\n",
+	       netdev->name, t->name);
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(fcoe_transport_release);
+
+/**
+ * fcoe_transport_init - initializes fcoe transport layer
+ *
+ * This prepares for the fcoe transport layer
+ *
+ * Returns: none
+ **/
+int __init fcoe_transport_init(void)
+{
+	INIT_LIST_HEAD(&fcoe_transports);
+	mutex_init(&fcoe_transports_lock);
+	return 0;
+}
+
+/**
+ * fcoe_transport_exit - cleans up the fcoe transport layer
+ * This cleans up the fcoe transport layer. removing any transport on the list,
+ * note that the transport destroy func is not called here.
+ *
+ * Returns: none
+ **/
+int __exit fcoe_transport_exit(void)
+{
+	struct fcoe_transport *t, *tmp;
+
+	mutex_lock(&fcoe_transports_lock);
+	list_for_each_entry_safe(t, tmp, &fcoe_transports, list) {
+		list_del(&t->list);
+		mutex_unlock(&fcoe_transports_lock);
+		fcoe_transport_device_remove_all(t);
+		mutex_lock(&fcoe_transports_lock);
+	}
+	mutex_unlock(&fcoe_transports_lock);
+	return 0;
+}
diff --git a/drivers/scsi/fcoe/fcoe_sw.c b/drivers/scsi/fcoe/fcoe_sw.c
new file mode 100644
index 000000000000..dc4cd5e25760
--- /dev/null
+++ b/drivers/scsi/fcoe/fcoe_sw.c
@@ -0,0 +1,494 @@
+/*
+ * Copyright(c) 2007 - 2008 Intel Corporation. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Maintained at www.Open-FCoE.org
+ */
+
+#include <linux/module.h>
+#include <linux/version.h>
+#include <linux/kernel.h>
+#include <linux/pci.h>
+#include <linux/init.h>
+#include <linux/spinlock.h>
+#include <linux/netdevice.h>
+#include <linux/etherdevice.h>
+#include <linux/if_vlan.h>
+#include <net/rtnetlink.h>
+
+#include <scsi/fc/fc_els.h>
+#include <scsi/fc/fc_encaps.h>
+#include <scsi/fc/fc_fs.h>
+#include <scsi/scsi_transport.h>
+#include <scsi/scsi_transport_fc.h>
+
+#include <scsi/libfc.h>
+#include <scsi/libfcoe.h>
+#include <scsi/fc_transport_fcoe.h>
+
+#define FCOE_SW_VERSION	"0.1"
+#define	FCOE_SW_NAME	"fcoesw"
+#define	FCOE_SW_VENDOR	"Open-FCoE.org"
+
+#define FCOE_MAX_LUN		255
+#define FCOE_MAX_FCP_TARGET	256
+
+#define FCOE_MAX_OUTSTANDING_COMMANDS	1024
+
+#define FCOE_MIN_XID		0x0001	/* the min xid supported by fcoe_sw */
+#define FCOE_MAX_XID		0x07ef	/* the max xid supported by fcoe_sw */
+
+static struct scsi_transport_template *scsi_transport_fcoe_sw;
+
+struct fc_function_template fcoe_sw_transport_function = {
+	.show_host_node_name = 1,
+	.show_host_port_name = 1,
+	.show_host_supported_classes = 1,
+	.show_host_supported_fc4s = 1,
+	.show_host_active_fc4s = 1,
+	.show_host_maxframe_size = 1,
+
+	.show_host_port_id = 1,
+	.show_host_supported_speeds = 1,
+	.get_host_speed = fc_get_host_speed,
+	.show_host_speed = 1,
+	.show_host_port_type = 1,
+	.get_host_port_state = fc_get_host_port_state,
+	.show_host_port_state = 1,
+	.show_host_symbolic_name = 1,
+
+	.dd_fcrport_size = sizeof(struct fc_rport_libfc_priv),
+	.show_rport_maxframe_size = 1,
+	.show_rport_supported_classes = 1,
+
+	.show_host_fabric_name = 1,
+	.show_starget_node_name = 1,
+	.show_starget_port_name = 1,
+	.show_starget_port_id = 1,
+	.set_rport_dev_loss_tmo = fc_set_rport_loss_tmo,
+	.show_rport_dev_loss_tmo = 1,
+	.get_fc_host_stats = fc_get_host_stats,
+	.issue_fc_host_lip = fcoe_reset,
+
+	.terminate_rport_io = fc_rport_terminate_io,
+};
+
+static struct scsi_host_template fcoe_sw_shost_template = {
+	.module = THIS_MODULE,
+	.name = "FCoE Driver",
+	.proc_name = FCOE_SW_NAME,
+	.queuecommand = fc_queuecommand,
+	.eh_abort_handler = fc_eh_abort,
+	.eh_device_reset_handler = fc_eh_device_reset,
+	.eh_host_reset_handler = fc_eh_host_reset,
+	.slave_alloc = fc_slave_alloc,
+	.change_queue_depth = fc_change_queue_depth,
+	.change_queue_type = fc_change_queue_type,
+	.this_id = -1,
+	.cmd_per_lun = 32,
+	.can_queue = FCOE_MAX_OUTSTANDING_COMMANDS,
+	.use_clustering = ENABLE_CLUSTERING,
+	.sg_tablesize = SG_ALL,
+	.max_sectors = 0xffff,
+};
+
+/*
+ * fcoe_sw_lport_config - sets up the fc_lport
+ * @lp: ptr to the fc_lport
+ * @shost: ptr to the parent scsi host
+ *
+ * Returns: 0 for success
+ *
+ */
+static int fcoe_sw_lport_config(struct fc_lport *lp)
+{
+	int i = 0;
+
+	lp->link_status = 0;
+	lp->max_retry_count = 3;
+	lp->e_d_tov = 2 * 1000;	/* FC-FS default */
+	lp->r_a_tov = 2 * 2 * 1000;
+	lp->service_params = (FCP_SPPF_INIT_FCN | FCP_SPPF_RD_XRDY_DIS |
+			      FCP_SPPF_RETRY | FCP_SPPF_CONF_COMPL);
+
+	/*
+	 * allocate per cpu stats block
+	 */
+	for_each_online_cpu(i)
+		lp->dev_stats[i] = kzalloc(sizeof(struct fcoe_dev_stats),
+					   GFP_KERNEL);
+
+	/* lport fc_lport related configuration */
+	fc_lport_config(lp);
+
+	return 0;
+}
+
+/*
+ * fcoe_sw_netdev_config - sets up fcoe_softc for lport and network
+ * related properties
+ * @lp : ptr to the fc_lport
+ * @netdev : ptr to the associated netdevice struct
+ *
+ * Must be called after fcoe_sw_lport_config() as it will use lport mutex
+ *
+ * Returns : 0 for success
+ *
+ */
+static int fcoe_sw_netdev_config(struct fc_lport *lp, struct net_device *netdev)
+{
+	u32 mfs;
+	u64 wwnn, wwpn;
+	struct fcoe_softc *fc;
+	u8 flogi_maddr[ETH_ALEN];
+
+	/* Setup lport private data to point to fcoe softc */
+	fc = lport_priv(lp);
+	fc->lp = lp;
+	fc->real_dev = netdev;
+	fc->phys_dev = netdev;
+
+	/* Require support for get_pauseparam ethtool op. */
+	if (netdev->priv_flags & IFF_802_1Q_VLAN)
+		fc->phys_dev = vlan_dev_real_dev(netdev);
+
+	/* Do not support for bonding device */
+	if ((fc->real_dev->priv_flags & IFF_MASTER_ALB) ||
+	    (fc->real_dev->priv_flags & IFF_SLAVE_INACTIVE) ||
+	    (fc->real_dev->priv_flags & IFF_MASTER_8023AD)) {
+		return -EOPNOTSUPP;
+	}
+
+	/*
+	 * Determine max frame size based on underlying device and optional
+	 * user-configured limit.  If the MFS is too low, fcoe_link_ok()
+	 * will return 0, so do this first.
+	 */
+	mfs = fc->real_dev->mtu - (sizeof(struct fcoe_hdr) +
+				   sizeof(struct fcoe_crc_eof));
+	if (fc_set_mfs(lp, mfs))
+		return -EINVAL;
+
+	lp->link_status = ~FC_PAUSE & ~FC_LINK_UP;
+	if (!fcoe_link_ok(lp))
+		lp->link_status |= FC_LINK_UP;
+
+	/* offload features support */
+	if (fc->real_dev->features & NETIF_F_SG)
+		lp->sg_supp = 1;
+
+
+	skb_queue_head_init(&fc->fcoe_pending_queue);
+
+	/* setup Source Mac Address */
+	memcpy(fc->ctl_src_addr, fc->real_dev->dev_addr,
+	       fc->real_dev->addr_len);
+
+	wwnn = fcoe_wwn_from_mac(fc->real_dev->dev_addr, 1, 0);
+	fc_set_wwnn(lp, wwnn);
+	/* XXX - 3rd arg needs to be vlan id */
+	wwpn = fcoe_wwn_from_mac(fc->real_dev->dev_addr, 2, 0);
+	fc_set_wwpn(lp, wwpn);
+
+	/*
+	 * Add FCoE MAC address as second unicast MAC address
+	 * or enter promiscuous mode if not capable of listening
+	 * for multiple unicast MACs.
+	 */
+	rtnl_lock();
+	memcpy(flogi_maddr, (u8[6]) FC_FCOE_FLOGI_MAC, ETH_ALEN);
+	dev_unicast_add(fc->real_dev, flogi_maddr, ETH_ALEN);
+	rtnl_unlock();
+
+	/*
+	 * setup the receive function from ethernet driver
+	 * on the ethertype for the given device
+	 */
+	fc->fcoe_packet_type.func = fcoe_rcv;
+	fc->fcoe_packet_type.type = __constant_htons(ETH_P_FCOE);
+	fc->fcoe_packet_type.dev = fc->real_dev;
+	dev_add_pack(&fc->fcoe_packet_type);
+
+	return 0;
+}
+
+/*
+ * fcoe_sw_shost_config - sets up fc_lport->host
+ * @lp : ptr to the fc_lport
+ * @shost : ptr to the associated scsi host
+ * @dev : device associated to scsi host
+ *
+ * Must be called after fcoe_sw_lport_config) and fcoe_sw_netdev_config()
+ *
+ * Returns : 0 for success
+ *
+ */
+static int fcoe_sw_shost_config(struct fc_lport *lp, struct Scsi_Host *shost,
+				struct device *dev)
+{
+	int rc = 0;
+
+	/* lport scsi host config */
+	lp->host = shost;
+
+	lp->host->max_lun = FCOE_MAX_LUN;
+	lp->host->max_id = FCOE_MAX_FCP_TARGET;
+	lp->host->max_channel = 0;
+	lp->host->transportt = scsi_transport_fcoe_sw;
+
+	/* add the new host to the SCSI-ml */
+	rc = scsi_add_host(lp->host, dev);
+	if (rc) {
+		FC_DBG("fcoe_sw_shost_config:error on scsi_add_host\n");
+		return rc;
+	}
+	sprintf(fc_host_symbolic_name(lp->host), "%s v%s over %s",
+		FCOE_SW_NAME, FCOE_SW_VERSION,
+		fcoe_netdev(lp)->name);
+
+	return 0;
+}
+
+/*
+ * fcoe_sw_em_config - allocates em for this lport
+ * @lp: the port that em is to allocated for
+ *
+ * Returns : 0 on success
+ */
+static inline int fcoe_sw_em_config(struct fc_lport *lp)
+{
+	BUG_ON(lp->emp);
+
+	lp->emp = fc_exch_mgr_alloc(lp, FC_CLASS_3,
+				    FCOE_MIN_XID, FCOE_MAX_XID);
+	if (!lp->emp)
+		return -ENOMEM;
+
+	return 0;
+}
+
+/*
+ * fcoe_sw_destroy - FCoE software HBA tear-down function
+ * @netdev: ptr to the associated net_device
+ *
+ * Returns: 0 if link is OK for use by FCoE.
+ */
+static int fcoe_sw_destroy(struct net_device *netdev)
+{
+	int cpu;
+	struct fc_lport *lp = NULL;
+	struct fcoe_softc *fc;
+	u8 flogi_maddr[ETH_ALEN];
+
+	BUG_ON(!netdev);
+
+	printk(KERN_DEBUG "fcoe_sw_destroy:interface on %s\n",
+	       netdev->name);
+
+	lp = fcoe_hostlist_lookup(netdev);
+	if (!lp)
+		return -ENODEV;
+
+	fc = fcoe_softc(lp);
+
+	/* Logout of the fabric */
+	fc_fabric_logoff(lp);
+
+	/* Remove the instance from fcoe's list */
+	fcoe_hostlist_remove(lp);
+
+	/* Don't listen for Ethernet packets anymore */
+	dev_remove_pack(&fc->fcoe_packet_type);
+
+	/* Cleanup the fc_lport */
+	fc_lport_destroy(lp);
+	fc_fcp_destroy(lp);
+
+	/* Detach from the scsi-ml */
+	fc_remove_host(lp->host);
+	scsi_remove_host(lp->host);
+
+	/* There are no more rports or I/O, free the EM */
+	if (lp->emp)
+		fc_exch_mgr_free(lp->emp);
+
+	/* Delete secondary MAC addresses */
+	rtnl_lock();
+	memcpy(flogi_maddr, (u8[6]) FC_FCOE_FLOGI_MAC, ETH_ALEN);
+	dev_unicast_delete(fc->real_dev, flogi_maddr, ETH_ALEN);
+	if (compare_ether_addr(fc->data_src_addr, (u8[6]) { 0 }))
+		dev_unicast_delete(fc->real_dev, fc->data_src_addr, ETH_ALEN);
+	rtnl_unlock();
+
+	/* Free the per-CPU revieve threads */
+	fcoe_percpu_clean(lp);
+
+	/* Free existing skbs */
+	fcoe_clean_pending_queue(lp);
+
+	/* Free memory used by statistical counters */
+	for_each_online_cpu(cpu)
+		kfree(lp->dev_stats[cpu]);
+
+	/* Release the net_device and Scsi_Host */
+	dev_put(fc->real_dev);
+	scsi_host_put(lp->host);
+
+	return 0;
+}
+
+static struct libfc_function_template fcoe_sw_libfc_fcn_templ = {
+	.frame_send = fcoe_xmit,
+};
+
+/*
+ * fcoe_sw_create - this function creates the fcoe interface
+ * @netdev: pointer the associated netdevice
+ *
+ * Creates fc_lport struct and scsi_host for lport, configures lport
+ * and starts fabric login.
+ *
+ * Returns : 0 on success
+ */
+static int fcoe_sw_create(struct net_device *netdev)
+{
+	int rc;
+	struct fc_lport *lp = NULL;
+	struct fcoe_softc *fc;
+	struct Scsi_Host *shost;
+
+	BUG_ON(!netdev);
+
+	printk(KERN_DEBUG "fcoe_sw_create:interface on %s\n",
+	       netdev->name);
+
+	lp = fcoe_hostlist_lookup(netdev);
+	if (lp)
+		return -EEXIST;
+
+	shost = fcoe_host_alloc(&fcoe_sw_shost_template,
+				sizeof(struct fcoe_softc));
+	if (!shost) {
+		FC_DBG("Could not allocate host structure\n");
+		return -ENOMEM;
+	}
+	lp = shost_priv(shost);
+	fc = lport_priv(lp);
+
+	/* configure fc_lport, e.g., em */
+	rc = fcoe_sw_lport_config(lp);
+	if (rc) {
+		FC_DBG("Could not configure lport\n");
+		goto out_host_put;
+	}
+
+	/* configure lport network properties */
+	rc = fcoe_sw_netdev_config(lp, netdev);
+	if (rc) {
+		FC_DBG("Could not configure netdev for lport\n");
+		goto out_host_put;
+	}
+
+	/* configure lport scsi host properties */
+	rc = fcoe_sw_shost_config(lp, shost, &netdev->dev);
+	if (rc) {
+		FC_DBG("Could not configure shost for lport\n");
+		goto out_host_put;
+	}
+
+	/* lport exch manager allocation */
+	rc = fcoe_sw_em_config(lp);
+	if (rc) {
+		FC_DBG("Could not configure em for lport\n");
+		goto out_host_put;
+	}
+
+	/* Initialize the library */
+	rc = fcoe_libfc_config(lp, &fcoe_sw_libfc_fcn_templ);
+	if (rc) {
+		FC_DBG("Could not configure libfc for lport!\n");
+		goto out_lp_destroy;
+	}
+
+	/* add to lports list */
+	fcoe_hostlist_add(lp);
+
+	lp->boot_time = jiffies;
+
+	fc_fabric_login(lp);
+
+	dev_hold(netdev);
+
+	return rc;
+
+out_lp_destroy:
+	fc_exch_mgr_free(lp->emp); /* Free the EM */
+out_host_put:
+	scsi_host_put(lp->host);
+	return rc;
+}
+
+/*
+ * fcoe_sw_match - the fcoe sw transport match function
+ *
+ * Returns : false always
+ */
+static bool fcoe_sw_match(struct net_device *netdev)
+{
+	/* FIXME - for sw transport, always return false */
+	return false;
+}
+
+/* the sw hba fcoe transport */
+struct fcoe_transport fcoe_sw_transport = {
+	.name = "fcoesw",
+	.create = fcoe_sw_create,
+	.destroy = fcoe_sw_destroy,
+	.match = fcoe_sw_match,
+	.vendor = 0x0,
+	.device = 0xffff,
+};
+
+/*
+ * fcoe_sw_init - registers fcoe_sw_transport
+ *
+ * Returns : 0 on success
+ */
+int __init fcoe_sw_init(void)
+{
+	/* attach to scsi transport */
+	scsi_transport_fcoe_sw =
+		fc_attach_transport(&fcoe_sw_transport_function);
+	if (!scsi_transport_fcoe_sw) {
+		printk(KERN_ERR "fcoe_sw_init:fc_attach_transport() failed\n");
+		return -ENODEV;
+	}
+	/* register sw transport */
+	fcoe_transport_register(&fcoe_sw_transport);
+	return 0;
+}
+
+/*
+ * fcoe_sw_exit - unregisters fcoe_sw_transport
+ *
+ * Returns : 0 on success
+ */
+int __exit fcoe_sw_exit(void)
+{
+	/* dettach the transport */
+	fc_release_transport(scsi_transport_fcoe_sw);
+	fcoe_transport_unregister(&fcoe_sw_transport);
+	return 0;
+}
diff --git a/drivers/scsi/fcoe/libfcoe.c b/drivers/scsi/fcoe/libfcoe.c
new file mode 100644
index 000000000000..1cb549c4fac4
--- /dev/null
+++ b/drivers/scsi/fcoe/libfcoe.c
@@ -0,0 +1,1510 @@
+/*
+ * Copyright(c) 2007 - 2008 Intel Corporation. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Maintained at www.Open-FCoE.org
+ */
+
+#include <linux/module.h>
+#include <linux/version.h>
+#include <linux/kernel.h>
+#include <linux/spinlock.h>
+#include <linux/skbuff.h>
+#include <linux/netdevice.h>
+#include <linux/etherdevice.h>
+#include <linux/ethtool.h>
+#include <linux/if_ether.h>
+#include <linux/if_vlan.h>
+#include <linux/kthread.h>
+#include <linux/crc32.h>
+#include <linux/cpu.h>
+#include <linux/fs.h>
+#include <linux/sysfs.h>
+#include <linux/ctype.h>
+#include <scsi/scsi_tcq.h>
+#include <scsi/scsicam.h>
+#include <scsi/scsi_transport.h>
+#include <scsi/scsi_transport_fc.h>
+#include <net/rtnetlink.h>
+
+#include <scsi/fc/fc_encaps.h>
+
+#include <scsi/libfc.h>
+#include <scsi/fc_frame.h>
+#include <scsi/libfcoe.h>
+#include <scsi/fc_transport_fcoe.h>
+
+static int debug_fcoe;
+
+#define FCOE_MAX_QUEUE_DEPTH  256
+
+/* destination address mode */
+#define FCOE_GW_ADDR_MODE	    0x00
+#define FCOE_FCOUI_ADDR_MODE	    0x01
+
+#define FCOE_WORD_TO_BYTE  4
+
+MODULE_AUTHOR("Open-FCoE.org");
+MODULE_DESCRIPTION("FCoE");
+MODULE_LICENSE("GPL");
+
+/* fcoe host list */
+LIST_HEAD(fcoe_hostlist);
+DEFINE_RWLOCK(fcoe_hostlist_lock);
+DEFINE_TIMER(fcoe_timer, NULL, 0, 0);
+struct fcoe_percpu_s *fcoe_percpu[NR_CPUS];
+
+
+/* Function Prototyes */
+static int fcoe_check_wait_queue(struct fc_lport *);
+static void fcoe_insert_wait_queue_head(struct fc_lport *, struct sk_buff *);
+static void fcoe_insert_wait_queue(struct fc_lport *, struct sk_buff *);
+static void fcoe_recv_flogi(struct fcoe_softc *, struct fc_frame *, u8 *);
+#ifdef CONFIG_HOTPLUG_CPU
+static int fcoe_cpu_callback(struct notifier_block *, ulong, void *);
+#endif /* CONFIG_HOTPLUG_CPU */
+static int fcoe_device_notification(struct notifier_block *, ulong, void *);
+static void fcoe_dev_setup(void);
+static void fcoe_dev_cleanup(void);
+
+/* notification function from net device */
+static struct notifier_block fcoe_notifier = {
+	.notifier_call = fcoe_device_notification,
+};
+
+
+#ifdef CONFIG_HOTPLUG_CPU
+static struct notifier_block fcoe_cpu_notifier = {
+	.notifier_call = fcoe_cpu_callback,
+};
+
+/**
+ * fcoe_create_percpu_data - creates the associated cpu data
+ * @cpu: index for the cpu where fcoe cpu data will be created
+ *
+ * create percpu stats block, from cpu add notifier
+ *
+ * Returns: none
+ **/
+static void fcoe_create_percpu_data(int cpu)
+{
+	struct fc_lport *lp;
+	struct fcoe_softc *fc;
+
+	write_lock_bh(&fcoe_hostlist_lock);
+	list_for_each_entry(fc, &fcoe_hostlist, list) {
+		lp = fc->lp;
+		if (lp->dev_stats[cpu] == NULL)
+			lp->dev_stats[cpu] =
+				kzalloc(sizeof(struct fcoe_dev_stats),
+					GFP_KERNEL);
+	}
+	write_unlock_bh(&fcoe_hostlist_lock);
+}
+
+/**
+ * fcoe_destroy_percpu_data - destroys the associated cpu data
+ * @cpu: index for the cpu where fcoe cpu data will destroyed
+ *
+ * destroy percpu stats block called by cpu add/remove notifier
+ *
+ * Retuns: none
+ **/
+static void fcoe_destroy_percpu_data(int cpu)
+{
+	struct fc_lport *lp;
+	struct fcoe_softc *fc;
+
+	write_lock_bh(&fcoe_hostlist_lock);
+	list_for_each_entry(fc, &fcoe_hostlist, list) {
+		lp = fc->lp;
+		kfree(lp->dev_stats[cpu]);
+		lp->dev_stats[cpu] = NULL;
+	}
+	write_unlock_bh(&fcoe_hostlist_lock);
+}
+
+/**
+ * fcoe_cpu_callback - fcoe cpu hotplug event callback
+ * @nfb: callback data block
+ * @action: event triggering the callback
+ * @hcpu: index for the cpu of this event
+ *
+ * this creates or destroys per cpu data for fcoe
+ *
+ * Returns NOTIFY_OK always.
+ **/
+static int fcoe_cpu_callback(struct notifier_block *nfb, unsigned long action,
+			     void *hcpu)
+{
+	unsigned int cpu = (unsigned long)hcpu;
+
+	switch (action) {
+	case CPU_ONLINE:
+		fcoe_create_percpu_data(cpu);
+		break;
+	case CPU_DEAD:
+		fcoe_destroy_percpu_data(cpu);
+		break;
+	default:
+		break;
+	}
+	return NOTIFY_OK;
+}
+#endif /* CONFIG_HOTPLUG_CPU */
+
+/**
+ * foce_rcv - this is the fcoe receive function called by NET_RX_SOFTIRQ
+ * @skb: the receive skb
+ * @dev: associated net device
+ * @ptype: context
+ * @odldev: last device
+ *
+ * this function will receive the packet and build fc frame and pass it up
+ *
+ * Returns: 0 for success
+ **/
+int fcoe_rcv(struct sk_buff *skb, struct net_device *dev,
+	     struct packet_type *ptype, struct net_device *olddev)
+{
+	struct fc_lport *lp;
+	struct fcoe_rcv_info *fr;
+	struct fcoe_softc *fc;
+	struct fcoe_dev_stats *stats;
+	struct fc_frame_header *fh;
+	unsigned short oxid;
+	int cpu_idx;
+	struct fcoe_percpu_s *fps;
+
+	fc = container_of(ptype, struct fcoe_softc, fcoe_packet_type);
+	lp = fc->lp;
+	if (unlikely(lp == NULL)) {
+		FC_DBG("cannot find hba structure");
+		goto err2;
+	}
+
+	if (unlikely(debug_fcoe)) {
+		FC_DBG("skb_info: len:%d data_len:%d head:%p data:%p tail:%p "
+		       "end:%p sum:%d dev:%s", skb->len, skb->data_len,
+		       skb->head, skb->data, skb_tail_pointer(skb),
+		       skb_end_pointer(skb), skb->csum,
+		       skb->dev ? skb->dev->name : "<NULL>");
+
+	}
+
+	/* check for FCOE packet type */
+	if (unlikely(eth_hdr(skb)->h_proto != htons(ETH_P_FCOE))) {
+		FC_DBG("wrong FC type frame");
+		goto err;
+	}
+
+	/*
+	 * Check for minimum frame length, and make sure required FCoE
+	 * and FC headers are pulled into the linear data area.
+	 */
+	if (unlikely((skb->len < FCOE_MIN_FRAME) ||
+	    !pskb_may_pull(skb, FCOE_HEADER_LEN)))
+		goto err;
+
+	skb_set_transport_header(skb, sizeof(struct fcoe_hdr));
+	fh = (struct fc_frame_header *) skb_transport_header(skb);
+
+	oxid = ntohs(fh->fh_ox_id);
+
+	fr = fcoe_dev_from_skb(skb);
+	fr->fr_dev = lp;
+	fr->ptype = ptype;
+	cpu_idx = 0;
+#ifdef CONFIG_SMP
+	/*
+	 * The incoming frame exchange id(oxid) is ANDed with num of online
+	 * cpu bits to get cpu_idx and then this cpu_idx is used for selecting
+	 * a per cpu kernel thread from fcoe_percpu. In case the cpu is
+	 * offline or no kernel thread for derived cpu_idx then cpu_idx is
+	 * initialize to first online cpu index.
+	 */
+	cpu_idx = oxid & (num_online_cpus() - 1);
+	if (!fcoe_percpu[cpu_idx] || !cpu_online(cpu_idx))
+		cpu_idx = first_cpu(cpu_online_map);
+#endif
+	fps = fcoe_percpu[cpu_idx];
+
+	spin_lock_bh(&fps->fcoe_rx_list.lock);
+	__skb_queue_tail(&fps->fcoe_rx_list, skb);
+	if (fps->fcoe_rx_list.qlen == 1)
+		wake_up_process(fps->thread);
+
+	spin_unlock_bh(&fps->fcoe_rx_list.lock);
+
+	return 0;
+err:
+#ifdef CONFIG_SMP
+	stats = lp->dev_stats[smp_processor_id()];
+#else
+	stats = lp->dev_stats[0];
+#endif
+	if (stats)
+		stats->ErrorFrames++;
+
+err2:
+	kfree_skb(skb);
+	return -1;
+}
+EXPORT_SYMBOL_GPL(fcoe_rcv);
+
+/**
+ * fcoe_start_io - pass to netdev to start xmit for fcoe
+ * @skb: the skb to be xmitted
+ *
+ * Returns: 0 for success
+ **/
+static inline int fcoe_start_io(struct sk_buff *skb)
+{
+	int rc;
+
+	skb_get(skb);
+	rc = dev_queue_xmit(skb);
+	if (rc != 0)
+		return rc;
+	kfree_skb(skb);
+	return 0;
+}
+
+/**
+ * fcoe_get_paged_crc_eof - in case we need alloc a page for crc_eof
+ * @skb: the skb to be xmitted
+ * @tlen: total len
+ *
+ * Returns: 0 for success
+ **/
+static int fcoe_get_paged_crc_eof(struct sk_buff *skb, int tlen)
+{
+	struct fcoe_percpu_s *fps;
+	struct page *page;
+	int cpu_idx;
+
+	cpu_idx = get_cpu();
+	fps = fcoe_percpu[cpu_idx];
+	page = fps->crc_eof_page;
+	if (!page) {
+		page = alloc_page(GFP_ATOMIC);
+		if (!page) {
+			put_cpu();
+			return -ENOMEM;
+		}
+		fps->crc_eof_page = page;
+		WARN_ON(fps->crc_eof_offset != 0);
+	}
+
+	get_page(page);
+	skb_fill_page_desc(skb, skb_shinfo(skb)->nr_frags, page,
+			   fps->crc_eof_offset, tlen);
+	skb->len += tlen;
+	skb->data_len += tlen;
+	skb->truesize += tlen;
+	fps->crc_eof_offset += sizeof(struct fcoe_crc_eof);
+
+	if (fps->crc_eof_offset >= PAGE_SIZE) {
+		fps->crc_eof_page = NULL;
+		fps->crc_eof_offset = 0;
+		put_page(page);
+	}
+	put_cpu();
+	return 0;
+}
+
+/**
+ * fcoe_fc_crc - calculates FC CRC in this fcoe skb
+ * @fp: the fc_frame containg data to be checksummed
+ *
+ * This uses crc32() to calculate the crc for fc frame
+ * Return   : 32 bit crc
+ *
+ **/
+u32 fcoe_fc_crc(struct fc_frame *fp)
+{
+	struct sk_buff *skb = fp_skb(fp);
+	struct skb_frag_struct *frag;
+	unsigned char *data;
+	unsigned long off, len, clen;
+	u32 crc;
+	unsigned i;
+
+	crc = crc32(~0, skb->data, skb_headlen(skb));
+
+	for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
+		frag = &skb_shinfo(skb)->frags[i];
+		off = frag->page_offset;
+		len = frag->size;
+		while (len > 0) {
+			clen = min(len, PAGE_SIZE - (off & ~PAGE_MASK));
+			data = kmap_atomic(frag->page + (off >> PAGE_SHIFT),
+					   KM_SKB_DATA_SOFTIRQ);
+			crc = crc32(crc, data + (off & ~PAGE_MASK), clen);
+			kunmap_atomic(data, KM_SKB_DATA_SOFTIRQ);
+			off += clen;
+			len -= clen;
+		}
+	}
+	return crc;
+}
+EXPORT_SYMBOL_GPL(fcoe_fc_crc);
+
+/**
+ * fcoe_xmit - FCoE frame transmit function
+ * @lp:	the associated local port
+ * @fp: the fc_frame to be transmitted
+ *
+ * Return   : 0 for success
+ *
+ **/
+int fcoe_xmit(struct fc_lport *lp, struct fc_frame *fp)
+{
+	int wlen, rc = 0;
+	u32 crc;
+	struct ethhdr *eh;
+	struct fcoe_crc_eof *cp;
+	struct sk_buff *skb;
+	struct fcoe_dev_stats *stats;
+	struct fc_frame_header *fh;
+	unsigned int hlen;		/* header length implies the version */
+	unsigned int tlen;		/* trailer length */
+	unsigned int elen;		/* eth header, may include vlan */
+	int flogi_in_progress = 0;
+	struct fcoe_softc *fc;
+	u8 sof, eof;
+	struct fcoe_hdr *hp;
+
+	WARN_ON((fr_len(fp) % sizeof(u32)) != 0);
+
+	fc = fcoe_softc(lp);
+	/*
+	 * if it is a flogi then we need to learn gw-addr
+	 * and my own fcid
+	 */
+	fh = fc_frame_header_get(fp);
+	if (unlikely(fh->fh_r_ctl == FC_RCTL_ELS_REQ)) {
+		if (fc_frame_payload_op(fp) == ELS_FLOGI) {
+			fc->flogi_oxid = ntohs(fh->fh_ox_id);
+			fc->address_mode = FCOE_FCOUI_ADDR_MODE;
+			fc->flogi_progress = 1;
+			flogi_in_progress = 1;
+		} else if (fc->flogi_progress && ntoh24(fh->fh_s_id) != 0) {
+			/*
+			 * Here we must've gotten an SID by accepting an FLOGI
+			 * from a point-to-point connection.  Switch to using
+			 * the source mac based on the SID.  The destination
+			 * MAC in this case would have been set by receving the
+			 * FLOGI.
+			 */
+			fc_fcoe_set_mac(fc->data_src_addr, fh->fh_s_id);
+			fc->flogi_progress = 0;
+		}
+	}
+
+	skb = fp_skb(fp);
+	sof = fr_sof(fp);
+	eof = fr_eof(fp);
+
+	elen = (fc->real_dev->priv_flags & IFF_802_1Q_VLAN) ?
+		sizeof(struct vlan_ethhdr) : sizeof(struct ethhdr);
+	hlen = sizeof(struct fcoe_hdr);
+	tlen = sizeof(struct fcoe_crc_eof);
+	wlen = (skb->len - tlen + sizeof(crc)) / FCOE_WORD_TO_BYTE;
+
+	/* crc offload */
+	if (likely(lp->crc_offload)) {
+		skb->ip_summed = CHECKSUM_COMPLETE;
+		skb->csum_start = skb_headroom(skb);
+		skb->csum_offset = skb->len;
+		crc = 0;
+	} else {
+		skb->ip_summed = CHECKSUM_NONE;
+		crc = fcoe_fc_crc(fp);
+	}
+
+	/* copy fc crc and eof to the skb buff */
+	if (skb_is_nonlinear(skb)) {
+		skb_frag_t *frag;
+		if (fcoe_get_paged_crc_eof(skb, tlen)) {
+			kfree(skb);
+			return -ENOMEM;
+		}
+		frag = &skb_shinfo(skb)->frags[skb_shinfo(skb)->nr_frags - 1];
+		cp = kmap_atomic(frag->page, KM_SKB_DATA_SOFTIRQ)
+			+ frag->page_offset;
+	} else {
+		cp = (struct fcoe_crc_eof *)skb_put(skb, tlen);
+	}
+
+	memset(cp, 0, sizeof(*cp));
+	cp->fcoe_eof = eof;
+	cp->fcoe_crc32 = cpu_to_le32(~crc);
+
+	if (skb_is_nonlinear(skb)) {
+		kunmap_atomic(cp, KM_SKB_DATA_SOFTIRQ);
+		cp = NULL;
+	}
+
+	/* adjust skb netowrk/transport offsets to match mac/fcoe/fc */
+	skb_push(skb, elen + hlen);
+	skb_reset_mac_header(skb);
+	skb_reset_network_header(skb);
+	skb->mac_len = elen;
+	skb->protocol = htons(ETH_P_802_3);
+	skb->dev = fc->real_dev;
+
+	/* fill up mac and fcoe headers */
+	eh = eth_hdr(skb);
+	eh->h_proto = htons(ETH_P_FCOE);
+	if (fc->address_mode == FCOE_FCOUI_ADDR_MODE)
+		fc_fcoe_set_mac(eh->h_dest, fh->fh_d_id);
+	else
+		/* insert GW address */
+		memcpy(eh->h_dest, fc->dest_addr, ETH_ALEN);
+
+	if (unlikely(flogi_in_progress))
+		memcpy(eh->h_source, fc->ctl_src_addr, ETH_ALEN);
+	else
+		memcpy(eh->h_source, fc->data_src_addr, ETH_ALEN);
+
+	hp = (struct fcoe_hdr *)(eh + 1);
+	memset(hp, 0, sizeof(*hp));
+	if (FC_FCOE_VER)
+		FC_FCOE_ENCAPS_VER(hp, FC_FCOE_VER);
+	hp->fcoe_sof = sof;
+
+	/* update tx stats: regardless if LLD fails */
+	stats = lp->dev_stats[smp_processor_id()];
+	if (stats) {
+		stats->TxFrames++;
+		stats->TxWords += wlen;
+	}
+
+	/* send down to lld */
+	fr_dev(fp) = lp;
+	if (fc->fcoe_pending_queue.qlen)
+		rc = fcoe_check_wait_queue(lp);
+
+	if (rc == 0)
+		rc = fcoe_start_io(skb);
+
+	if (rc) {
+		fcoe_insert_wait_queue(lp, skb);
+		if (fc->fcoe_pending_queue.qlen > FCOE_MAX_QUEUE_DEPTH)
+			fc_pause(lp);
+	}
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(fcoe_xmit);
+
+/*
+ * fcoe_percpu_receive_thread - recv thread per cpu
+ * @arg: ptr to the fcoe per cpu struct
+ *
+ * Return: 0 for success
+ *
+ */
+int fcoe_percpu_receive_thread(void *arg)
+{
+	struct fcoe_percpu_s *p = arg;
+	u32 fr_len;
+	struct fc_lport *lp;
+	struct fcoe_rcv_info *fr;
+	struct fcoe_dev_stats *stats;
+	struct fc_frame_header *fh;
+	struct sk_buff *skb;
+	struct fcoe_crc_eof crc_eof;
+	struct fc_frame *fp;
+	u8 *mac = NULL;
+	struct fcoe_softc *fc;
+	struct fcoe_hdr *hp;
+
+	set_user_nice(current, 19);
+
+	while (!kthread_should_stop()) {
+
+		spin_lock_bh(&p->fcoe_rx_list.lock);
+		while ((skb = __skb_dequeue(&p->fcoe_rx_list)) == NULL) {
+			set_current_state(TASK_INTERRUPTIBLE);
+			spin_unlock_bh(&p->fcoe_rx_list.lock);
+			schedule();
+			set_current_state(TASK_RUNNING);
+			if (kthread_should_stop())
+				return 0;
+			spin_lock_bh(&p->fcoe_rx_list.lock);
+		}
+		spin_unlock_bh(&p->fcoe_rx_list.lock);
+		fr = fcoe_dev_from_skb(skb);
+		lp = fr->fr_dev;
+		if (unlikely(lp == NULL)) {
+			FC_DBG("invalid HBA Structure");
+			kfree_skb(skb);
+			continue;
+		}
+
+		stats = lp->dev_stats[smp_processor_id()];
+
+		if (unlikely(debug_fcoe)) {
+			FC_DBG("skb_info: len:%d data_len:%d head:%p data:%p "
+			       "tail:%p end:%p sum:%d dev:%s",
+			       skb->len, skb->data_len,
+			       skb->head, skb->data, skb_tail_pointer(skb),
+			       skb_end_pointer(skb), skb->csum,
+			       skb->dev ? skb->dev->name : "<NULL>");
+		}
+
+		/*
+		 * Save source MAC address before discarding header.
+		 */
+		fc = lport_priv(lp);
+		if (unlikely(fc->flogi_progress))
+			mac = eth_hdr(skb)->h_source;
+
+		if (skb_is_nonlinear(skb))
+			skb_linearize(skb);	/* not ideal */
+
+		/*
+		 * Frame length checks and setting up the header pointers
+		 * was done in fcoe_rcv already.
+		 */
+		hp = (struct fcoe_hdr *) skb_network_header(skb);
+		fh = (struct fc_frame_header *) skb_transport_header(skb);
+
+		if (unlikely(FC_FCOE_DECAPS_VER(hp) != FC_FCOE_VER)) {
+			if (stats) {
+				if (stats->ErrorFrames < 5)
+					FC_DBG("unknown FCoE version %x",
+					       FC_FCOE_DECAPS_VER(hp));
+				stats->ErrorFrames++;
+			}
+			kfree_skb(skb);
+			continue;
+		}
+
+		skb_pull(skb, sizeof(struct fcoe_hdr));
+		fr_len = skb->len - sizeof(struct fcoe_crc_eof);
+
+		if (stats) {
+			stats->RxFrames++;
+			stats->RxWords += fr_len / FCOE_WORD_TO_BYTE;
+		}
+
+		fp = (struct fc_frame *)skb;
+		fc_frame_init(fp);
+		fr_dev(fp) = lp;
+		fr_sof(fp) = hp->fcoe_sof;
+
+		/* Copy out the CRC and EOF trailer for access */
+		if (skb_copy_bits(skb, fr_len, &crc_eof, sizeof(crc_eof))) {
+			kfree_skb(skb);
+			continue;
+		}
+		fr_eof(fp) = crc_eof.fcoe_eof;
+		fr_crc(fp) = crc_eof.fcoe_crc32;
+		if (pskb_trim(skb, fr_len)) {
+			kfree_skb(skb);
+			continue;
+		}
+
+		/*
+		 * We only check CRC if no offload is available and if it is
+		 * it's solicited data, in which case, the FCP layer would
+		 * check it during the copy.
+		 */
+		if (lp->crc_offload)
+			fr_flags(fp) &= ~FCPHF_CRC_UNCHECKED;
+		else
+			fr_flags(fp) |= FCPHF_CRC_UNCHECKED;
+
+		fh = fc_frame_header_get(fp);
+		if (fh->fh_r_ctl == FC_RCTL_DD_SOL_DATA &&
+		    fh->fh_type == FC_TYPE_FCP) {
+			fc_exch_recv(lp, lp->emp, fp);
+			continue;
+		}
+		if (fr_flags(fp) & FCPHF_CRC_UNCHECKED) {
+			if (le32_to_cpu(fr_crc(fp)) !=
+			    ~crc32(~0, skb->data, fr_len)) {
+				if (debug_fcoe || stats->InvalidCRCCount < 5)
+					printk(KERN_WARNING "fcoe: dropping "
+					       "frame with CRC error\n");
+				stats->InvalidCRCCount++;
+				stats->ErrorFrames++;
+				fc_frame_free(fp);
+				continue;
+			}
+			fr_flags(fp) &= ~FCPHF_CRC_UNCHECKED;
+		}
+		/* non flogi and non data exchanges are handled here */
+		if (unlikely(fc->flogi_progress))
+			fcoe_recv_flogi(fc, fp, mac);
+		fc_exch_recv(lp, lp->emp, fp);
+	}
+	return 0;
+}
+
+/**
+ * fcoe_recv_flogi - flogi receive function
+ * @fc: associated fcoe_softc
+ * @fp: the recieved frame
+ * @sa: the source address of this flogi
+ *
+ * This is responsible to parse the flogi response and sets the corresponding
+ * mac address for the initiator, eitehr OUI based or GW based.
+ *
+ * Returns: none
+ **/
+static void fcoe_recv_flogi(struct fcoe_softc *fc, struct fc_frame *fp, u8 *sa)
+{
+	struct fc_frame_header *fh;
+	u8 op;
+
+	fh = fc_frame_header_get(fp);
+	if (fh->fh_type != FC_TYPE_ELS)
+		return;
+	op = fc_frame_payload_op(fp);
+	if (op == ELS_LS_ACC && fh->fh_r_ctl == FC_RCTL_ELS_REP &&
+	    fc->flogi_oxid == ntohs(fh->fh_ox_id)) {
+		/*
+		 * FLOGI accepted.
+		 * If the src mac addr is FC_OUI-based, then we mark the
+		 * address_mode flag to use FC_OUI-based Ethernet DA.
+		 * Otherwise we use the FCoE gateway addr
+		 */
+		if (!compare_ether_addr(sa, (u8[6]) FC_FCOE_FLOGI_MAC)) {
+			fc->address_mode = FCOE_FCOUI_ADDR_MODE;
+		} else {
+			memcpy(fc->dest_addr, sa, ETH_ALEN);
+			fc->address_mode = FCOE_GW_ADDR_MODE;
+		}
+
+		/*
+		 * Remove any previously-set unicast MAC filter.
+		 * Add secondary FCoE MAC address filter for our OUI.
+		 */
+		rtnl_lock();
+		if (compare_ether_addr(fc->data_src_addr, (u8[6]) { 0 }))
+			dev_unicast_delete(fc->real_dev, fc->data_src_addr,
+					   ETH_ALEN);
+		fc_fcoe_set_mac(fc->data_src_addr, fh->fh_d_id);
+		dev_unicast_add(fc->real_dev, fc->data_src_addr, ETH_ALEN);
+		rtnl_unlock();
+
+		fc->flogi_progress = 0;
+	} else if (op == ELS_FLOGI && fh->fh_r_ctl == FC_RCTL_ELS_REQ && sa) {
+		/*
+		 * Save source MAC for point-to-point responses.
+		 */
+		memcpy(fc->dest_addr, sa, ETH_ALEN);
+		fc->address_mode = FCOE_GW_ADDR_MODE;
+	}
+}
+
+/**
+ * fcoe_watchdog - fcoe timer callback
+ * @vp:
+ *
+ * This checks the pending queue length for fcoe and put fcoe to be paused state
+ * if the FCOE_MAX_QUEUE_DEPTH is reached. This is done for all fc_lport on the
+ * fcoe_hostlist.
+ *
+ * Returns: 0 for success
+ **/
+void fcoe_watchdog(ulong vp)
+{
+	struct fc_lport *lp;
+	struct fcoe_softc *fc;
+	int paused = 0;
+
+	read_lock(&fcoe_hostlist_lock);
+	list_for_each_entry(fc, &fcoe_hostlist, list) {
+		lp = fc->lp;
+		if (lp) {
+			if (fc->fcoe_pending_queue.qlen > FCOE_MAX_QUEUE_DEPTH)
+				paused = 1;
+			if (fcoe_check_wait_queue(lp) <	 FCOE_MAX_QUEUE_DEPTH) {
+				if (paused)
+					fc_unpause(lp);
+			}
+		}
+	}
+	read_unlock(&fcoe_hostlist_lock);
+
+	fcoe_timer.expires = jiffies + (1 * HZ);
+	add_timer(&fcoe_timer);
+}
+
+
+/**
+ * fcoe_check_wait_queue - put the skb into fcoe pending xmit queue
+ * @lp: the fc_port for this skb
+ * @skb: the associated skb to be xmitted
+ *
+ * This empties the wait_queue, dequeue the head of the wait_queue queue
+ * and calls fcoe_start_io() for each packet, if all skb have been
+ * transmitted, return 0 if a error occurs, then restore wait_queue and
+ * try again later.
+ *
+ * The wait_queue is used when the skb transmit fails. skb will go
+ * in the wait_queue which will be emptied by the time function OR
+ * by the next skb transmit.
+ *
+ * Returns: 0 for success
+ **/
+static int fcoe_check_wait_queue(struct fc_lport *lp)
+{
+	int rc, unpause = 0;
+	int paused = 0;
+	struct sk_buff *skb;
+	struct fcoe_softc *fc;
+
+	fc = fcoe_softc(lp);
+	spin_lock_bh(&fc->fcoe_pending_queue.lock);
+
+	/*
+	 * is this interface paused?
+	 */
+	if (fc->fcoe_pending_queue.qlen > FCOE_MAX_QUEUE_DEPTH)
+		paused = 1;
+	if (fc->fcoe_pending_queue.qlen) {
+		while ((skb = __skb_dequeue(&fc->fcoe_pending_queue)) != NULL) {
+			spin_unlock_bh(&fc->fcoe_pending_queue.lock);
+			rc = fcoe_start_io(skb);
+			if (rc) {
+				fcoe_insert_wait_queue_head(lp, skb);
+				return rc;
+			}
+			spin_lock_bh(&fc->fcoe_pending_queue.lock);
+		}
+		if (fc->fcoe_pending_queue.qlen < FCOE_MAX_QUEUE_DEPTH)
+			unpause = 1;
+	}
+	spin_unlock_bh(&fc->fcoe_pending_queue.lock);
+	if ((unpause) && (paused))
+		fc_unpause(lp);
+	return fc->fcoe_pending_queue.qlen;
+}
+
+/**
+ * fcoe_insert_wait_queue_head - puts skb to fcoe pending queue head
+ * @lp: the fc_port for this skb
+ * @skb: the associated skb to be xmitted
+ *
+ * Returns: none
+ **/
+static void fcoe_insert_wait_queue_head(struct fc_lport *lp,
+					struct sk_buff *skb)
+{
+	struct fcoe_softc *fc;
+
+	fc = fcoe_softc(lp);
+	spin_lock_bh(&fc->fcoe_pending_queue.lock);
+	__skb_queue_head(&fc->fcoe_pending_queue, skb);
+	spin_unlock_bh(&fc->fcoe_pending_queue.lock);
+}
+
+/**
+ * fcoe_insert_wait_queue - put the skb into fcoe pending queue tail
+ * @lp: the fc_port for this skb
+ * @skb: the associated skb to be xmitted
+ *
+ * Returns: none
+ **/
+static void fcoe_insert_wait_queue(struct fc_lport *lp,
+				   struct sk_buff *skb)
+{
+	struct fcoe_softc *fc;
+
+	fc = fcoe_softc(lp);
+	spin_lock_bh(&fc->fcoe_pending_queue.lock);
+	__skb_queue_tail(&fc->fcoe_pending_queue, skb);
+	spin_unlock_bh(&fc->fcoe_pending_queue.lock);
+}
+
+/**
+ * fcoe_dev_setup - setup link change notification interface
+ *
+ **/
+static void fcoe_dev_setup(void)
+{
+	/*
+	 * here setup a interface specific wd time to
+	 * monitor the link state
+	 */
+	register_netdevice_notifier(&fcoe_notifier);
+}
+
+/**
+ * fcoe_dev_setup - cleanup link change notification interface
+ **/
+static void fcoe_dev_cleanup(void)
+{
+	unregister_netdevice_notifier(&fcoe_notifier);
+}
+
+/**
+ * fcoe_device_notification - netdev event notification callback
+ * @notifier: context of the notification
+ * @event: type of event
+ * @ptr: fixed array for output parsed ifname
+ *
+ * This function is called by the ethernet driver in case of link change event
+ *
+ * Returns: 0 for success
+ **/
+static int fcoe_device_notification(struct notifier_block *notifier,
+				    ulong event, void *ptr)
+{
+	struct fc_lport *lp = NULL;
+	struct net_device *real_dev = ptr;
+	struct fcoe_softc *fc;
+	struct fcoe_dev_stats *stats;
+	u16 new_status;
+	u32 mfs;
+	int rc = NOTIFY_OK;
+
+	read_lock(&fcoe_hostlist_lock);
+	list_for_each_entry(fc, &fcoe_hostlist, list) {
+		if (fc->real_dev == real_dev) {
+			lp = fc->lp;
+			break;
+		}
+	}
+	read_unlock(&fcoe_hostlist_lock);
+	if (lp == NULL) {
+		rc = NOTIFY_DONE;
+		goto out;
+	}
+
+	new_status = lp->link_status;
+	switch (event) {
+	case NETDEV_DOWN:
+	case NETDEV_GOING_DOWN:
+		new_status &= ~FC_LINK_UP;
+		break;
+	case NETDEV_UP:
+	case NETDEV_CHANGE:
+		new_status &= ~FC_LINK_UP;
+		if (!fcoe_link_ok(lp))
+			new_status |= FC_LINK_UP;
+		break;
+	case NETDEV_CHANGEMTU:
+		mfs = fc->real_dev->mtu -
+			(sizeof(struct fcoe_hdr) +
+			 sizeof(struct fcoe_crc_eof));
+		if (mfs >= FC_MIN_MAX_FRAME)
+			fc_set_mfs(lp, mfs);
+		new_status &= ~FC_LINK_UP;
+		if (!fcoe_link_ok(lp))
+			new_status |= FC_LINK_UP;
+		break;
+	case NETDEV_REGISTER:
+		break;
+	default:
+		FC_DBG("unknown event %ld call", event);
+	}
+	if (lp->link_status != new_status) {
+		if ((new_status & FC_LINK_UP) == FC_LINK_UP)
+			fc_linkup(lp);
+		else {
+			stats = lp->dev_stats[smp_processor_id()];
+			if (stats)
+				stats->LinkFailureCount++;
+			fc_linkdown(lp);
+			fcoe_clean_pending_queue(lp);
+		}
+	}
+out:
+	return rc;
+}
+
+/**
+ * fcoe_if_to_netdev - parse a name buffer to get netdev
+ * @ifname: fixed array for output parsed ifname
+ * @buffer: incoming buffer to be copied
+ *
+ * Returns: NULL or ptr to netdeive
+ **/
+static struct net_device *fcoe_if_to_netdev(const char *buffer)
+{
+	char *cp;
+	char ifname[IFNAMSIZ + 2];
+
+	if (buffer) {
+		strlcpy(ifname, buffer, IFNAMSIZ);
+		cp = ifname + strlen(ifname);
+		while (--cp >= ifname && *cp == '\n')
+			*cp = '\0';
+		return dev_get_by_name(&init_net, ifname);
+	}
+	return NULL;
+}
+
+/**
+ * fcoe_netdev_to_module_owner - finds out the nic drive moddule of the netdev
+ * @netdev: the target netdev
+ *
+ * Returns: ptr to the struct module, NULL for failure
+ **/
+static struct module *fcoe_netdev_to_module_owner(
+	const struct net_device *netdev)
+{
+	struct device *dev;
+
+	if (!netdev)
+		return NULL;
+
+	dev = netdev->dev.parent;
+	if (!dev)
+		return NULL;
+
+	if (!dev->driver)
+		return NULL;
+
+	return dev->driver->owner;
+}
+
+/**
+ * fcoe_ethdrv_get - holds the nic driver module by  try_module_get() for
+ * the corresponding netdev.
+ * @netdev: the target netdev
+ *
+ * Returns: 0 for succsss
+ **/
+static int fcoe_ethdrv_get(const struct net_device *netdev)
+{
+	struct module *owner;
+
+	owner = fcoe_netdev_to_module_owner(netdev);
+	if (owner) {
+		printk(KERN_DEBUG "foce:hold driver module %s for %s\n",
+		       owner->name, netdev->name);
+		return  try_module_get(owner);
+	}
+	return -ENODEV;
+}
+
+/**
+ * fcoe_ethdrv_get - releases the nic driver module by module_put for
+ * the corresponding netdev.
+ * @netdev: the target netdev
+ *
+ * Returns: 0 for succsss
+ **/
+static int fcoe_ethdrv_put(const struct net_device *netdev)
+{
+	struct module *owner;
+
+	owner = fcoe_netdev_to_module_owner(netdev);
+	if (owner) {
+		printk(KERN_DEBUG "foce:release driver module %s for %s\n",
+		       owner->name, netdev->name);
+		module_put(owner);
+		return 0;
+	}
+	return -ENODEV;
+}
+
+/**
+ * fcoe_destroy- handles the destroy from sysfs
+ * @buffer: expcted to be a eth if name
+ * @kp: associated kernel param
+ *
+ * Returns: 0 for success
+ **/
+static int fcoe_destroy(const char *buffer, struct kernel_param *kp)
+{
+	int rc;
+	struct net_device *netdev;
+
+	netdev = fcoe_if_to_netdev(buffer);
+	if (!netdev) {
+		rc = -ENODEV;
+		goto out_nodev;
+	}
+	/* look for existing lport */
+	if (!fcoe_hostlist_lookup(netdev)) {
+		rc = -ENODEV;
+		goto out_putdev;
+	}
+	/* pass to transport */
+	rc = fcoe_transport_release(netdev);
+	if (rc) {
+		printk(KERN_ERR "fcoe: fcoe_transport_release(%s) failed\n",
+		       netdev->name);
+		rc = -EIO;
+		goto out_putdev;
+	}
+	fcoe_ethdrv_put(netdev);
+	rc = 0;
+out_putdev:
+	dev_put(netdev);
+out_nodev:
+	return rc;
+}
+
+/**
+ * fcoe_create - handles the create call from sysfs
+ * @buffer: expcted to be a eth if name
+ * @kp: associated kernel param
+ *
+ * Returns: 0 for success
+ **/
+static int fcoe_create(const char *buffer, struct kernel_param *kp)
+{
+	int rc;
+	struct net_device *netdev;
+
+	netdev = fcoe_if_to_netdev(buffer);
+	if (!netdev) {
+		rc = -ENODEV;
+		goto out_nodev;
+	}
+	/* look for existing lport */
+	if (fcoe_hostlist_lookup(netdev)) {
+		rc = -EEXIST;
+		goto out_putdev;
+	}
+	fcoe_ethdrv_get(netdev);
+
+	/* pass to transport */
+	rc = fcoe_transport_attach(netdev);
+	if (rc) {
+		printk(KERN_ERR "fcoe: fcoe_transport_attach(%s) failed\n",
+		       netdev->name);
+		fcoe_ethdrv_put(netdev);
+		rc = -EIO;
+		goto out_putdev;
+	}
+	rc = 0;
+out_putdev:
+	dev_put(netdev);
+out_nodev:
+	return rc;
+}
+
+module_param_call(create, fcoe_create, NULL, NULL, S_IWUSR);
+__MODULE_PARM_TYPE(create, "string");
+MODULE_PARM_DESC(create, "Create fcoe port using net device passed in.");
+module_param_call(destroy, fcoe_destroy, NULL, NULL, S_IWUSR);
+__MODULE_PARM_TYPE(destroy, "string");
+MODULE_PARM_DESC(destroy, "Destroy fcoe port");
+
+/*
+ * fcoe_link_ok - check if link is ok for the fc_lport
+ * @lp: ptr to the fc_lport
+ *
+ * Any permanently-disqualifying conditions have been previously checked.
+ * This also updates the speed setting, which may change with link for 100/1000.
+ *
+ * This function should probably be checking for PAUSE support at some point
+ * in the future. Currently Per-priority-pause is not determinable using
+ * ethtool, so we shouldn't be restrictive until that problem is resolved.
+ *
+ * Returns: 0 if link is OK for use by FCoE.
+ *
+ */
+int fcoe_link_ok(struct fc_lport *lp)
+{
+	struct fcoe_softc *fc = fcoe_softc(lp);
+	struct net_device *dev = fc->real_dev;
+	struct ethtool_cmd ecmd = { ETHTOOL_GSET };
+	int rc = 0;
+
+	if ((dev->flags & IFF_UP) && netif_carrier_ok(dev)) {
+		dev = fc->phys_dev;
+		if (dev->ethtool_ops->get_settings) {
+			dev->ethtool_ops->get_settings(dev, &ecmd);
+			lp->link_supported_speeds &=
+				~(FC_PORTSPEED_1GBIT | FC_PORTSPEED_10GBIT);
+			if (ecmd.supported & (SUPPORTED_1000baseT_Half |
+					      SUPPORTED_1000baseT_Full))
+				lp->link_supported_speeds |= FC_PORTSPEED_1GBIT;
+			if (ecmd.supported & SUPPORTED_10000baseT_Full)
+				lp->link_supported_speeds |=
+					FC_PORTSPEED_10GBIT;
+			if (ecmd.speed == SPEED_1000)
+				lp->link_speed = FC_PORTSPEED_1GBIT;
+			if (ecmd.speed == SPEED_10000)
+				lp->link_speed = FC_PORTSPEED_10GBIT;
+		}
+	} else
+		rc = -1;
+
+	return rc;
+}
+EXPORT_SYMBOL_GPL(fcoe_link_ok);
+
+/*
+ * fcoe_percpu_clean - frees skb of the corresponding lport from the per
+ * cpu queue.
+ * @lp: the fc_lport
+ */
+void fcoe_percpu_clean(struct fc_lport *lp)
+{
+	int idx;
+	struct fcoe_percpu_s *pp;
+	struct fcoe_rcv_info *fr;
+	struct sk_buff_head *list;
+	struct sk_buff *skb, *next;
+	struct sk_buff *head;
+
+	for (idx = 0; idx < NR_CPUS; idx++) {
+		if (fcoe_percpu[idx]) {
+			pp = fcoe_percpu[idx];
+			spin_lock_bh(&pp->fcoe_rx_list.lock);
+			list = &pp->fcoe_rx_list;
+			head = list->next;
+			for (skb = head; skb != (struct sk_buff *)list;
+			     skb = next) {
+				next = skb->next;
+				fr = fcoe_dev_from_skb(skb);
+				if (fr->fr_dev == lp) {
+					__skb_unlink(skb, list);
+					kfree_skb(skb);
+				}
+			}
+			spin_unlock_bh(&pp->fcoe_rx_list.lock);
+		}
+	}
+}
+EXPORT_SYMBOL_GPL(fcoe_percpu_clean);
+
+/**
+ * fcoe_clean_pending_queue - dequeue skb and free it
+ * @lp: the corresponding fc_lport
+ *
+ * Returns: none
+ **/
+void fcoe_clean_pending_queue(struct fc_lport *lp)
+{
+	struct fcoe_softc  *fc = lport_priv(lp);
+	struct sk_buff *skb;
+
+	spin_lock_bh(&fc->fcoe_pending_queue.lock);
+	while ((skb = __skb_dequeue(&fc->fcoe_pending_queue)) != NULL) {
+		spin_unlock_bh(&fc->fcoe_pending_queue.lock);
+		kfree_skb(skb);
+		spin_lock_bh(&fc->fcoe_pending_queue.lock);
+	}
+	spin_unlock_bh(&fc->fcoe_pending_queue.lock);
+}
+EXPORT_SYMBOL_GPL(fcoe_clean_pending_queue);
+
+/**
+ * libfc_host_alloc - allocate a Scsi_Host with room for the fc_lport
+ * @sht: ptr to the scsi host templ
+ * @priv_size: size of private data after fc_lport
+ *
+ * Returns: ptr to Scsi_Host
+ * TODO - to libfc?
+ */
+static inline struct Scsi_Host *libfc_host_alloc(
+	struct scsi_host_template *sht, int priv_size)
+{
+	return scsi_host_alloc(sht, sizeof(struct fc_lport) + priv_size);
+}
+
+/**
+ * fcoe_host_alloc - allocate a Scsi_Host with room for the fcoe_softc
+ * @sht: ptr to the scsi host templ
+ * @priv_size: size of private data after fc_lport
+ *
+ * Returns: ptr to Scsi_Host
+ */
+struct Scsi_Host *fcoe_host_alloc(struct scsi_host_template *sht, int priv_size)
+{
+	return libfc_host_alloc(sht, sizeof(struct fcoe_softc) + priv_size);
+}
+EXPORT_SYMBOL_GPL(fcoe_host_alloc);
+
+/*
+ * fcoe_reset - resets the fcoe
+ * @shost: shost the reset is from
+ *
+ * Returns: always 0
+ */
+int fcoe_reset(struct Scsi_Host *shost)
+{
+	struct fc_lport *lport = shost_priv(shost);
+	fc_lport_reset(lport);
+	return 0;
+}
+EXPORT_SYMBOL_GPL(fcoe_reset);
+
+/*
+ * fcoe_wwn_from_mac - converts 48-bit IEEE MAC address to 64-bit FC WWN.
+ * @mac: mac address
+ * @scheme: check port
+ * @port: port indicator for converting
+ *
+ * Returns: u64 fc world wide name
+ */
+u64 fcoe_wwn_from_mac(unsigned char mac[MAX_ADDR_LEN],
+		      unsigned int scheme, unsigned int port)
+{
+	u64 wwn;
+	u64 host_mac;
+
+	/* The MAC is in NO, so flip only the low 48 bits */
+	host_mac = ((u64) mac[0] << 40) |
+		((u64) mac[1] << 32) |
+		((u64) mac[2] << 24) |
+		((u64) mac[3] << 16) |
+		((u64) mac[4] << 8) |
+		(u64) mac[5];
+
+	WARN_ON(host_mac >= (1ULL << 48));
+	wwn = host_mac | ((u64) scheme << 60);
+	switch (scheme) {
+	case 1:
+		WARN_ON(port != 0);
+		break;
+	case 2:
+		WARN_ON(port >= 0xfff);
+		wwn |= (u64) port << 48;
+		break;
+	default:
+		WARN_ON(1);
+		break;
+	}
+
+	return wwn;
+}
+EXPORT_SYMBOL_GPL(fcoe_wwn_from_mac);
+/*
+ * fcoe_hostlist_lookup_softc - find the corresponding lport by a given device
+ * @device: this is currently ptr to net_device
+ *
+ * Returns: NULL or the located fcoe_softc
+ */
+static struct fcoe_softc *fcoe_hostlist_lookup_softc(
+	const struct net_device *dev)
+{
+	struct fcoe_softc *fc;
+
+	read_lock(&fcoe_hostlist_lock);
+	list_for_each_entry(fc, &fcoe_hostlist, list) {
+		if (fc->real_dev == dev) {
+			read_unlock(&fcoe_hostlist_lock);
+			return fc;
+		}
+	}
+	read_unlock(&fcoe_hostlist_lock);
+	return NULL;
+}
+
+/*
+ * fcoe_hostlist_lookup - find the corresponding lport by netdev
+ * @netdev: ptr to net_device
+ *
+ * Returns: 0 for success
+ */
+struct fc_lport *fcoe_hostlist_lookup(const struct net_device *netdev)
+{
+	struct fcoe_softc *fc;
+
+	fc = fcoe_hostlist_lookup_softc(netdev);
+
+	return (fc) ? fc->lp : NULL;
+}
+EXPORT_SYMBOL_GPL(fcoe_hostlist_lookup);
+
+/*
+ * fcoe_hostlist_add - add a lport to lports list
+ * @lp: ptr to the fc_lport to badded
+ *
+ * Returns: 0 for success
+ */
+int fcoe_hostlist_add(const struct fc_lport *lp)
+{
+	struct fcoe_softc *fc;
+
+	fc = fcoe_hostlist_lookup_softc(fcoe_netdev(lp));
+	if (!fc) {
+		fc = fcoe_softc(lp);
+		write_lock_bh(&fcoe_hostlist_lock);
+		list_add_tail(&fc->list, &fcoe_hostlist);
+		write_unlock_bh(&fcoe_hostlist_lock);
+	}
+	return 0;
+}
+EXPORT_SYMBOL_GPL(fcoe_hostlist_add);
+
+/*
+ * fcoe_hostlist_remove - remove a lport from lports list
+ * @lp: ptr to the fc_lport to badded
+ *
+ * Returns: 0 for success
+ */
+int fcoe_hostlist_remove(const struct fc_lport *lp)
+{
+	struct fcoe_softc *fc;
+
+	fc = fcoe_hostlist_lookup_softc(fcoe_netdev(lp));
+	BUG_ON(!fc);
+	write_lock_bh(&fcoe_hostlist_lock);
+	list_del(&fc->list);
+	write_unlock_bh(&fcoe_hostlist_lock);
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(fcoe_hostlist_remove);
+
+/**
+ * fcoe_libfc_config - sets up libfc related properties for lport
+ * @lp: ptr to the fc_lport
+ * @tt: libfc function template
+ *
+ * Returns : 0 for success
+ **/
+int fcoe_libfc_config(struct fc_lport *lp, struct libfc_function_template *tt)
+{
+	/* Set the function pointers set by the LLDD */
+	memcpy(&lp->tt, tt, sizeof(*tt));
+	if (fc_fcp_init(lp))
+		return -ENOMEM;
+	fc_exch_init(lp);
+	fc_elsct_init(lp);
+	fc_lport_init(lp);
+	fc_rport_init(lp);
+	fc_disc_init(lp);
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(fcoe_libfc_config);
+
+/**
+ * fcoe_init - fcoe module loading initialization
+ *
+ * Initialization routine
+ * 1. Will create fc transport software structure
+ * 2. initialize the link list of port information structure
+ *
+ * Returns 0 on success, negative on failure
+ **/
+static int __init fcoe_init(void)
+{
+	int cpu;
+	struct fcoe_percpu_s *p;
+
+
+	INIT_LIST_HEAD(&fcoe_hostlist);
+	rwlock_init(&fcoe_hostlist_lock);
+
+#ifdef CONFIG_HOTPLUG_CPU
+	register_cpu_notifier(&fcoe_cpu_notifier);
+#endif /* CONFIG_HOTPLUG_CPU */
+
+	/*
+	 * initialize per CPU interrupt thread
+	 */
+	for_each_online_cpu(cpu) {
+		p = kzalloc(sizeof(struct fcoe_percpu_s), GFP_KERNEL);
+		if (p) {
+			p->thread = kthread_create(fcoe_percpu_receive_thread,
+						   (void *)p,
+						   "fcoethread/%d", cpu);
+
+			/*
+			 * if there is no error then bind the thread to the cpu
+			 * initialize the semaphore and skb queue head
+			 */
+			if (likely(!IS_ERR(p->thread))) {
+				p->cpu = cpu;
+				fcoe_percpu[cpu] = p;
+				skb_queue_head_init(&p->fcoe_rx_list);
+				kthread_bind(p->thread, cpu);
+				wake_up_process(p->thread);
+			} else {
+				fcoe_percpu[cpu] = NULL;
+				kfree(p);
+
+			}
+		}
+	}
+
+	/*
+	 * setup link change notification
+	 */
+	fcoe_dev_setup();
+
+	init_timer(&fcoe_timer);
+	fcoe_timer.data = 0;
+	fcoe_timer.function = fcoe_watchdog;
+	fcoe_timer.expires = (jiffies + (10 * HZ));
+	add_timer(&fcoe_timer);
+
+	/* initiatlize the fcoe transport */
+	fcoe_transport_init();
+
+	fcoe_sw_init();
+
+	return 0;
+}
+module_init(fcoe_init);
+
+/**
+ * fcoe_exit - fcoe module unloading cleanup
+ *
+ * Returns 0 on success, negative on failure
+ **/
+static void __exit fcoe_exit(void)
+{
+	u32 idx;
+	struct fcoe_softc *fc, *tmp;
+	struct fcoe_percpu_s *p;
+	struct sk_buff *skb;
+
+	/*
+	 * Stop all call back interfaces
+	 */
+#ifdef CONFIG_HOTPLUG_CPU
+	unregister_cpu_notifier(&fcoe_cpu_notifier);
+#endif /* CONFIG_HOTPLUG_CPU */
+	fcoe_dev_cleanup();
+
+	/*
+	 * stop timer
+	 */
+	del_timer_sync(&fcoe_timer);
+
+	/* releases the assocaited fcoe transport for each lport */
+	list_for_each_entry_safe(fc, tmp, &fcoe_hostlist, list)
+		fcoe_transport_release(fc->real_dev);
+
+	for (idx = 0; idx < NR_CPUS; idx++) {
+		if (fcoe_percpu[idx]) {
+			kthread_stop(fcoe_percpu[idx]->thread);
+			p = fcoe_percpu[idx];
+			spin_lock_bh(&p->fcoe_rx_list.lock);
+			while ((skb = __skb_dequeue(&p->fcoe_rx_list)) != NULL)
+				kfree_skb(skb);
+			spin_unlock_bh(&p->fcoe_rx_list.lock);
+			if (fcoe_percpu[idx]->crc_eof_page)
+				put_page(fcoe_percpu[idx]->crc_eof_page);
+			kfree(fcoe_percpu[idx]);
+		}
+	}
+
+	/* remove sw trasnport */
+	fcoe_sw_exit();
+
+	/* detach the transport */
+	fcoe_transport_exit();
+}
+module_exit(fcoe_exit);
diff --git a/include/scsi/fc_transport_fcoe.h b/include/scsi/fc_transport_fcoe.h
new file mode 100644
index 000000000000..8dca2af14ffc
--- /dev/null
+++ b/include/scsi/fc_transport_fcoe.h
@@ -0,0 +1,54 @@
+#ifndef FC_TRANSPORT_FCOE_H
+#define FC_TRANSPORT_FCOE_H
+
+#include <linux/device.h>
+#include <linux/netdevice.h>
+#include <scsi/scsi_host.h>
+#include <scsi/libfc.h>
+
+/**
+ * struct fcoe_transport - FCoE transport struct for generic transport
+ * for Ethernet devices as well as pure HBAs
+ *
+ * @name: name for thsi transport
+ * @bus: physical bus type (pci_bus_type)
+ * @driver: physical bus driver for network device
+ * @create: entry create function
+ * @destroy: exit destroy function
+ * @list: list of transports
+ */
+struct fcoe_transport {
+	char *name;
+	unsigned short vendor;
+	unsigned short device;
+	struct bus_type *bus;
+	struct device_driver *driver;
+	int (*create)(struct net_device *device);
+	int (*destroy)(struct net_device *device);
+	bool (*match)(struct net_device *device);
+	struct list_head list;
+	struct list_head devlist;
+	struct mutex devlock;
+};
+
+/**
+ * MODULE_ALIAS_FCOE_PCI
+ *
+ * some care must be taken with this, vendor and device MUST be a hex value
+ * preceded with 0x and with letters in lower case (0x12ab, not 0x12AB or 12AB)
+ */
+#define MODULE_ALIAS_FCOE_PCI(vendor, device) \
+	MODULE_ALIAS("fcoe-pci-" __stringify(vendor) "-" __stringify(device))
+
+/* exported funcs */
+int fcoe_transport_attach(struct net_device *netdev);
+int fcoe_transport_release(struct net_device *netdev);
+int fcoe_transport_register(struct fcoe_transport *t);
+int fcoe_transport_unregister(struct fcoe_transport *t);
+int fcoe_load_transport_driver(struct net_device *netdev);
+int __init fcoe_transport_init(void);
+int __exit fcoe_transport_exit(void);
+
+/* fcow_sw is the default transport */
+extern struct fcoe_transport fcoe_sw_transport;
+#endif /* FC_TRANSPORT_FCOE_H */
diff --git a/include/scsi/libfcoe.h b/include/scsi/libfcoe.h
new file mode 100644
index 000000000000..89fdbb9a6a1b
--- /dev/null
+++ b/include/scsi/libfcoe.h
@@ -0,0 +1,176 @@
+/*
+ * Copyright(c) 2007 - 2008 Intel Corporation. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Maintained at www.Open-FCoE.org
+ */
+
+#ifndef _LIBFCOE_H
+#define _LIBFCOE_H
+
+#include <linux/netdevice.h>
+#include <linux/skbuff.h>
+#include <scsi/fc/fc_fcoe.h>
+#include <scsi/libfc.h>
+
+/*
+ * this percpu struct for fcoe
+ */
+struct fcoe_percpu_s {
+	int		cpu;
+	struct task_struct *thread;
+	struct sk_buff_head fcoe_rx_list;
+	struct page *crc_eof_page;
+	int crc_eof_offset;
+};
+
+/*
+ * the fcoe sw transport private data
+ */
+struct fcoe_softc {
+	struct list_head list;
+	struct fc_lport *lp;
+	struct net_device *real_dev;
+	struct net_device *phys_dev;		/* device with ethtool_ops */
+	struct packet_type  fcoe_packet_type;
+	struct sk_buff_head fcoe_pending_queue;
+
+	u8 dest_addr[ETH_ALEN];
+	u8 ctl_src_addr[ETH_ALEN];
+	u8 data_src_addr[ETH_ALEN];
+	/*
+	 * fcoe protocol address learning related stuff
+	 */
+	u16 flogi_oxid;
+	u8 flogi_progress;
+	u8 address_mode;
+};
+
+static inline struct fcoe_softc *fcoe_softc(
+	const struct fc_lport *lp)
+{
+	return (struct fcoe_softc *)lport_priv(lp);
+}
+
+static inline struct net_device *fcoe_netdev(
+	const struct fc_lport *lp)
+{
+	return fcoe_softc(lp)->real_dev;
+}
+
+static inline struct fcoe_hdr *skb_fcoe_header(const struct sk_buff *skb)
+{
+	return (struct fcoe_hdr *)skb_network_header(skb);
+}
+
+static inline int skb_fcoe_offset(const struct sk_buff *skb)
+{
+	return skb_network_offset(skb);
+}
+
+static inline struct fc_frame_header *skb_fc_header(const struct sk_buff *skb)
+{
+	return (struct fc_frame_header *)skb_transport_header(skb);
+}
+
+static inline int skb_fc_offset(const struct sk_buff *skb)
+{
+	return skb_transport_offset(skb);
+}
+
+static inline void skb_reset_fc_header(struct sk_buff *skb)
+{
+	skb_reset_network_header(skb);
+	skb_set_transport_header(skb, skb_network_offset(skb) +
+				 sizeof(struct fcoe_hdr));
+}
+
+static inline bool skb_fc_is_data(const struct sk_buff *skb)
+{
+	return skb_fc_header(skb)->fh_r_ctl == FC_RCTL_DD_SOL_DATA;
+}
+
+static inline bool skb_fc_is_cmd(const struct sk_buff *skb)
+{
+	return skb_fc_header(skb)->fh_r_ctl == FC_RCTL_DD_UNSOL_CMD;
+}
+
+static inline bool skb_fc_has_exthdr(const struct sk_buff *skb)
+{
+	return (skb_fc_header(skb)->fh_r_ctl == FC_RCTL_VFTH) ||
+	    (skb_fc_header(skb)->fh_r_ctl == FC_RCTL_IFRH) ||
+	    (skb_fc_header(skb)->fh_r_ctl == FC_RCTL_ENCH);
+}
+
+static inline bool skb_fc_is_roff(const struct sk_buff *skb)
+{
+	return skb_fc_header(skb)->fh_f_ctl[2] & FC_FC_REL_OFF;
+}
+
+static inline u16 skb_fc_oxid(const struct sk_buff *skb)
+{
+	return be16_to_cpu(skb_fc_header(skb)->fh_ox_id);
+}
+
+static inline u16 skb_fc_rxid(const struct sk_buff *skb)
+{
+	return be16_to_cpu(skb_fc_header(skb)->fh_rx_id);
+}
+
+/* FIXME - DMA_BIDIRECTIONAL ? */
+#define skb_cb(skb)	((struct fcoe_rcv_info *)&((skb)->cb[0]))
+#define skb_cmd(skb)	(skb_cb(skb)->fr_cmd)
+#define skb_dir(skb)	(skb_cmd(skb)->sc_data_direction)
+static inline bool skb_fc_is_read(const struct sk_buff *skb)
+{
+	if (skb_fc_is_cmd(skb) && skb_cmd(skb))
+		return skb_dir(skb) == DMA_FROM_DEVICE;
+	return false;
+}
+
+static inline bool skb_fc_is_write(const struct sk_buff *skb)
+{
+	if (skb_fc_is_cmd(skb) && skb_cmd(skb))
+		return skb_dir(skb) == DMA_TO_DEVICE;
+	return false;
+}
+
+/* libfcoe funcs */
+int fcoe_reset(struct Scsi_Host *shost);
+u64 fcoe_wwn_from_mac(unsigned char mac[MAX_ADDR_LEN],
+		      unsigned int scheme, unsigned int port);
+
+u32 fcoe_fc_crc(struct fc_frame *fp);
+int fcoe_xmit(struct fc_lport *, struct fc_frame *);
+int fcoe_rcv(struct sk_buff *, struct net_device *,
+	     struct packet_type *, struct net_device *);
+
+int fcoe_percpu_receive_thread(void *arg);
+void fcoe_clean_pending_queue(struct fc_lport *lp);
+void fcoe_percpu_clean(struct fc_lport *lp);
+void fcoe_watchdog(ulong vp);
+int fcoe_link_ok(struct fc_lport *lp);
+
+struct fc_lport *fcoe_hostlist_lookup(const struct net_device *);
+int fcoe_hostlist_add(const struct fc_lport *);
+int fcoe_hostlist_remove(const struct fc_lport *);
+
+struct Scsi_Host *fcoe_host_alloc(struct scsi_host_template *, int);
+int fcoe_libfc_config(struct fc_lport *, struct libfc_function_template *);
+
+/* fcoe sw hba */
+int __init fcoe_sw_init(void);
+int __exit fcoe_sw_exit(void);
+#endif /* _LIBFCOE_H */
-- 
cgit v1.2.3


From 2a2ca6a96194c4744a2adeefbc09ce881f3c5abe Mon Sep 17 00:00:00 2001
From: Bartlomiej Zolnierkiewicz <bzolnier@gmail.com>
Date: Mon, 29 Dec 2008 20:27:31 +0100
Subject: ide: replace the global ide_lock spinlock by per-hwgroup spinlocks
 (v2)

Now that (almost) all host drivers have been fixed not to abuse ide_lock
and core code usage of ide_lock has been sanitized we may safely replace
ide_lock by per-hwgroup locks.

This patch is partially based on earlier patch from Ravikiran G Thirumalai.

While at it:
- don't use deprecated HWIF() and HWGROUP() macros
- update locking documentation in ide.h

v2:
Add missing spin_lock_init(&hwgroup->lock).  (Noticed by Elias Oltmanns)

Cc: Vaibhav V. Nivargi <vaibhav.nivargi@gmail.com>
Cc: Alok N. Kataria <alokk@calsoftinc.com>
Cc: Shai Fultheim <shai@scalex86.org>
Signed-off-by: Ravikiran Thirumalai <kiran@scalex86.org>
Cc: Elias Oltmanns <eo@nebensachen.de>
Signed-off-by: Bartlomiej Zolnierkiewicz <bzolnier@gmail.com>
---
 drivers/ide/ide-io.c    | 33 ++++++++++++++++-----------------
 drivers/ide/ide-iops.c  | 48 +++++++++++++++++++++++-------------------------
 drivers/ide/ide-park.c  | 16 ++++++++--------
 drivers/ide/ide-probe.c | 26 +++++++++++++++-----------
 drivers/ide/ide.c       |  8 +++-----
 drivers/ide/umc8672.c   | 11 +++++++----
 drivers/scsi/ide-scsi.c | 32 +++++++++++++++++++++-----------
 include/linux/ide.h     | 12 +++++++-----
 8 files changed, 100 insertions(+), 86 deletions(-)

(limited to 'include')

diff --git a/drivers/ide/ide-io.c b/drivers/ide/ide-io.c
index 02059e96e6cd..72d0d702d5da 100644
--- a/drivers/ide/ide-io.c
+++ b/drivers/ide/ide-io.c
@@ -907,7 +907,7 @@ repeat:
 
 /*
  * Issue a new request to a drive from hwgroup
- * Caller must have already done spin_lock_irqsave(&ide_lock, ..);
+ * Caller must have already done spin_lock_irqsave(&hwgroup->lock, ..);
  *
  * A hwgroup is a serialized group of IDE interfaces.  Usually there is
  * exactly one hwif (interface) per hwgroup, but buggy controllers (eg. CMD640)
@@ -919,7 +919,7 @@ repeat:
  * possibly along with many other devices.  This is especially common in
  * PCI-based systems with off-board IDE controller cards.
  *
- * The IDE driver uses the single global ide_lock spinlock to protect
+ * The IDE driver uses a per-hwgroup spinlock to protect
  * access to the request queues, and to protect the hwgroup->busy flag.
  *
  * The first thread into the driver for a particular hwgroup sets the
@@ -935,7 +935,7 @@ repeat:
  * will start the next request from the queue.  If no more work remains,
  * the driver will clear the hwgroup->busy flag and exit.
  *
- * The ide_lock (spinlock) is used to protect all access to the
+ * The per-hwgroup spinlock is used to protect all access to the
  * hwgroup->busy flag, but is otherwise not needed for most processing in
  * the driver.  This makes the driver much more friendlier to shared IRQs
  * than previous designs, while remaining 100% (?) SMP safe and capable.
@@ -948,7 +948,7 @@ static void ide_do_request (ide_hwgroup_t *hwgroup, int masked_irq)
 	ide_startstop_t	startstop;
 	int             loops = 0;
 
-	/* caller must own ide_lock */
+	/* caller must own hwgroup->lock */
 	BUG_ON(!irqs_disabled());
 
 	while (!hwgroup->busy) {
@@ -1070,11 +1070,11 @@ static void ide_do_request (ide_hwgroup_t *hwgroup, int masked_irq)
 		 */
 		if (masked_irq != IDE_NO_IRQ && hwif->irq != masked_irq)
 			disable_irq_nosync(hwif->irq);
-		spin_unlock(&ide_lock);
+		spin_unlock(&hwgroup->lock);
 		local_irq_enable_in_hardirq();
 			/* allow other IRQs while we start this request */
 		startstop = start_request(drive, rq);
-		spin_lock_irq(&ide_lock);
+		spin_lock_irq(&hwgroup->lock);
 		if (masked_irq != IDE_NO_IRQ && hwif->irq != masked_irq)
 			enable_irq(hwif->irq);
 		if (startstop == ide_stopped)
@@ -1172,7 +1172,7 @@ void ide_timer_expiry (unsigned long data)
 	unsigned long	flags;
 	unsigned long	wait = -1;
 
-	spin_lock_irqsave(&ide_lock, flags);
+	spin_lock_irqsave(&hwgroup->lock, flags);
 
 	if (((handler = hwgroup->handler) == NULL) ||
 	    (hwgroup->req_gen != hwgroup->req_gen_timer)) {
@@ -1205,7 +1205,7 @@ void ide_timer_expiry (unsigned long data)
 					hwgroup->timer.expires  = jiffies + wait;
 					hwgroup->req_gen_timer = hwgroup->req_gen;
 					add_timer(&hwgroup->timer);
-					spin_unlock_irqrestore(&ide_lock, flags);
+					spin_unlock_irqrestore(&hwgroup->lock, flags);
 					return;
 				}
 			}
@@ -1215,7 +1215,7 @@ void ide_timer_expiry (unsigned long data)
 			 * the handler() function, which means we need to
 			 * globally mask the specific IRQ:
 			 */
-			spin_unlock(&ide_lock);
+			spin_unlock(&hwgroup->lock);
 			hwif  = HWIF(drive);
 			/* disable_irq_nosync ?? */
 			disable_irq(hwif->irq);
@@ -1239,14 +1239,14 @@ void ide_timer_expiry (unsigned long data)
 						  hwif->tp_ops->read_status(hwif));
 			}
 			drive->service_time = jiffies - drive->service_start;
-			spin_lock_irq(&ide_lock);
+			spin_lock_irq(&hwgroup->lock);
 			enable_irq(hwif->irq);
 			if (startstop == ide_stopped)
 				hwgroup->busy = 0;
 		}
 	}
 	ide_do_request(hwgroup, IDE_NO_IRQ);
-	spin_unlock_irqrestore(&ide_lock, flags);
+	spin_unlock_irqrestore(&hwgroup->lock, flags);
 }
 
 /**
@@ -1339,14 +1339,13 @@ irqreturn_t ide_intr (int irq, void *dev_id)
 {
 	unsigned long flags;
 	ide_hwgroup_t *hwgroup = (ide_hwgroup_t *)dev_id;
-	ide_hwif_t *hwif;
+	ide_hwif_t *hwif = hwgroup->hwif;
 	ide_drive_t *drive;
 	ide_handler_t *handler;
 	ide_startstop_t startstop;
 	irqreturn_t irq_ret = IRQ_NONE;
 
-	spin_lock_irqsave(&ide_lock, flags);
-	hwif = hwgroup->hwif;
+	spin_lock_irqsave(&hwgroup->lock, flags);
 
 	if (!ide_ack_intr(hwif))
 		goto out;
@@ -1416,7 +1415,7 @@ irqreturn_t ide_intr (int irq, void *dev_id)
 	hwgroup->handler = NULL;
 	hwgroup->req_gen++;
 	del_timer(&hwgroup->timer);
-	spin_unlock(&ide_lock);
+	spin_unlock(&hwgroup->lock);
 
 	if (hwif->port_ops && hwif->port_ops->clear_irq)
 		hwif->port_ops->clear_irq(drive);
@@ -1427,7 +1426,7 @@ irqreturn_t ide_intr (int irq, void *dev_id)
 	/* service this interrupt, may set handler for next interrupt */
 	startstop = handler(drive);
 
-	spin_lock_irq(&ide_lock);
+	spin_lock_irq(&hwgroup->lock);
 	/*
 	 * Note that handler() may have set things up for another
 	 * interrupt to occur soon, but it cannot happen until
@@ -1448,7 +1447,7 @@ irqreturn_t ide_intr (int irq, void *dev_id)
 out_handled:
 	irq_ret = IRQ_HANDLED;
 out:
-	spin_unlock_irqrestore(&ide_lock, flags);
+	spin_unlock_irqrestore(&hwgroup->lock, flags);
 	return irq_ret;
 }
 
diff --git a/drivers/ide/ide-iops.c b/drivers/ide/ide-iops.c
index c41c3b9b6f02..ad8bd6539283 100644
--- a/drivers/ide/ide-iops.c
+++ b/drivers/ide/ide-iops.c
@@ -835,10 +835,12 @@ static void __ide_set_handler (ide_drive_t *drive, ide_handler_t *handler,
 void ide_set_handler (ide_drive_t *drive, ide_handler_t *handler,
 		      unsigned int timeout, ide_expiry_t *expiry)
 {
+	ide_hwgroup_t *hwgroup = drive->hwif->hwgroup;
 	unsigned long flags;
-	spin_lock_irqsave(&ide_lock, flags);
+
+	spin_lock_irqsave(&hwgroup->lock, flags);
 	__ide_set_handler(drive, handler, timeout, expiry);
-	spin_unlock_irqrestore(&ide_lock, flags);
+	spin_unlock_irqrestore(&hwgroup->lock, flags);
 }
 
 EXPORT_SYMBOL(ide_set_handler);
@@ -860,10 +862,11 @@ EXPORT_SYMBOL(ide_set_handler);
 void ide_execute_command(ide_drive_t *drive, u8 cmd, ide_handler_t *handler,
 			 unsigned timeout, ide_expiry_t *expiry)
 {
+	ide_hwif_t *hwif = drive->hwif;
+	ide_hwgroup_t *hwgroup = hwif->hwgroup;
 	unsigned long flags;
-	ide_hwif_t *hwif = HWIF(drive);
 
-	spin_lock_irqsave(&ide_lock, flags);
+	spin_lock_irqsave(&hwgroup->lock, flags);
 	__ide_set_handler(drive, handler, timeout, expiry);
 	hwif->tp_ops->exec_command(hwif, cmd);
 	/*
@@ -873,19 +876,20 @@ void ide_execute_command(ide_drive_t *drive, u8 cmd, ide_handler_t *handler,
 	 * FIXME: we could skip this delay with care on non shared devices
 	 */
 	ndelay(400);
-	spin_unlock_irqrestore(&ide_lock, flags);
+	spin_unlock_irqrestore(&hwgroup->lock, flags);
 }
 EXPORT_SYMBOL(ide_execute_command);
 
 void ide_execute_pkt_cmd(ide_drive_t *drive)
 {
 	ide_hwif_t *hwif = drive->hwif;
+	ide_hwgroup_t *hwgroup = hwif->hwgroup;
 	unsigned long flags;
 
-	spin_lock_irqsave(&ide_lock, flags);
+	spin_lock_irqsave(&hwgroup->lock, flags);
 	hwif->tp_ops->exec_command(hwif, ATA_CMD_PACKET);
 	ndelay(400);
-	spin_unlock_irqrestore(&ide_lock, flags);
+	spin_unlock_irqrestore(&hwgroup->lock, flags);
 }
 EXPORT_SYMBOL_GPL(ide_execute_pkt_cmd);
 
@@ -1076,22 +1080,16 @@ static void pre_reset(ide_drive_t *drive)
  */
 static ide_startstop_t do_reset1 (ide_drive_t *drive, int do_not_try_atapi)
 {
-	unsigned int unit;
-	unsigned long flags, timeout;
-	ide_hwif_t *hwif;
-	ide_hwgroup_t *hwgroup;
-	struct ide_io_ports *io_ports;
-	const struct ide_tp_ops *tp_ops;
+	ide_hwif_t *hwif = drive->hwif;
+	ide_hwgroup_t *hwgroup = hwif->hwgroup;
+	struct ide_io_ports *io_ports = &hwif->io_ports;
+	const struct ide_tp_ops *tp_ops = hwif->tp_ops;
 	const struct ide_port_ops *port_ops;
+	unsigned long flags, timeout;
+	unsigned int unit;
 	DEFINE_WAIT(wait);
 
-	spin_lock_irqsave(&ide_lock, flags);
-	hwif = HWIF(drive);
-	hwgroup = HWGROUP(drive);
-
-	io_ports = &hwif->io_ports;
-
-	tp_ops = hwif->tp_ops;
+	spin_lock_irqsave(&hwgroup->lock, flags);
 
 	/* We must not reset with running handlers */
 	BUG_ON(hwgroup->handler != NULL);
@@ -1106,7 +1104,7 @@ static ide_startstop_t do_reset1 (ide_drive_t *drive, int do_not_try_atapi)
 		hwgroup->poll_timeout = jiffies + WAIT_WORSTCASE;
 		hwgroup->polling = 1;
 		__ide_set_handler(drive, &atapi_reset_pollfunc, HZ/20, NULL);
-		spin_unlock_irqrestore(&ide_lock, flags);
+		spin_unlock_irqrestore(&hwgroup->lock, flags);
 		return ide_started;
 	}
 
@@ -1129,9 +1127,9 @@ static ide_startstop_t do_reset1 (ide_drive_t *drive, int do_not_try_atapi)
 		if (time_before_eq(timeout, now))
 			break;
 
-		spin_unlock_irqrestore(&ide_lock, flags);
+		spin_unlock_irqrestore(&hwgroup->lock, flags);
 		timeout = schedule_timeout_uninterruptible(timeout - now);
-		spin_lock_irqsave(&ide_lock, flags);
+		spin_lock_irqsave(&hwgroup->lock, flags);
 	} while (timeout);
 	finish_wait(&ide_park_wq, &wait);
 
@@ -1143,7 +1141,7 @@ static ide_startstop_t do_reset1 (ide_drive_t *drive, int do_not_try_atapi)
 		pre_reset(&hwif->drives[unit]);
 
 	if (io_ports->ctl_addr == 0) {
-		spin_unlock_irqrestore(&ide_lock, flags);
+		spin_unlock_irqrestore(&hwgroup->lock, flags);
 		ide_complete_drive_reset(drive, -ENXIO);
 		return ide_stopped;
 	}
@@ -1179,7 +1177,7 @@ static ide_startstop_t do_reset1 (ide_drive_t *drive, int do_not_try_atapi)
 	if (port_ops && port_ops->resetproc)
 		port_ops->resetproc(drive);
 
-	spin_unlock_irqrestore(&ide_lock, flags);
+	spin_unlock_irqrestore(&hwgroup->lock, flags);
 	return ide_started;
 }
 
diff --git a/drivers/ide/ide-park.c b/drivers/ide/ide-park.c
index 03b00e57e93f..63d01c55f865 100644
--- a/drivers/ide/ide-park.c
+++ b/drivers/ide/ide-park.c
@@ -7,17 +7,16 @@ DECLARE_WAIT_QUEUE_HEAD(ide_park_wq);
 
 static void issue_park_cmd(ide_drive_t *drive, unsigned long timeout)
 {
+	ide_hwgroup_t *hwgroup = drive->hwif->hwgroup;
 	struct request_queue *q = drive->queue;
 	struct request *rq;
 	int rc;
 
 	timeout += jiffies;
-	spin_lock_irq(&ide_lock);
+	spin_lock_irq(&hwgroup->lock);
 	if (drive->dev_flags & IDE_DFLAG_PARKED) {
-		ide_hwgroup_t *hwgroup = drive->hwif->hwgroup;
-		int reset_timer;
+		int reset_timer = time_before(timeout, drive->sleep);
 
-		reset_timer = time_before(timeout, drive->sleep);
 		drive->sleep = timeout;
 		wake_up_all(&ide_park_wq);
 		if (reset_timer && hwgroup->sleeping &&
@@ -26,10 +25,10 @@ static void issue_park_cmd(ide_drive_t *drive, unsigned long timeout)
 			hwgroup->busy = 0;
 			blk_start_queueing(q);
 		}
-		spin_unlock_irq(&ide_lock);
+		spin_unlock_irq(&hwgroup->lock);
 		return;
 	}
-	spin_unlock_irq(&ide_lock);
+	spin_unlock_irq(&hwgroup->lock);
 
 	rq = blk_get_request(q, READ, __GFP_WAIT);
 	rq->cmd[0] = REQ_PARK_HEADS;
@@ -62,20 +61,21 @@ ssize_t ide_park_show(struct device *dev, struct device_attribute *attr,
 		      char *buf)
 {
 	ide_drive_t *drive = to_ide_device(dev);
+	ide_hwgroup_t *hwgroup = drive->hwif->hwgroup;
 	unsigned long now;
 	unsigned int msecs;
 
 	if (drive->dev_flags & IDE_DFLAG_NO_UNLOAD)
 		return -EOPNOTSUPP;
 
-	spin_lock_irq(&ide_lock);
+	spin_lock_irq(&hwgroup->lock);
 	now = jiffies;
 	if (drive->dev_flags & IDE_DFLAG_PARKED &&
 	    time_after(drive->sleep, now))
 		msecs = jiffies_to_msecs(drive->sleep - now);
 	else
 		msecs = 0;
-	spin_unlock_irq(&ide_lock);
+	spin_unlock_irq(&hwgroup->lock);
 
 	return snprintf(buf, 20, "%u\n", msecs);
 }
diff --git a/drivers/ide/ide-probe.c b/drivers/ide/ide-probe.c
index c55bdbd22314..504bc9480e93 100644
--- a/drivers/ide/ide-probe.c
+++ b/drivers/ide/ide-probe.c
@@ -906,7 +906,8 @@ static int ide_init_queue(ide_drive_t *drive)
 	 *	do not.
 	 */
 
-	q = blk_init_queue_node(do_ide_request, &ide_lock, hwif_to_node(hwif));
+	q = blk_init_queue_node(do_ide_request, &hwif->hwgroup->lock,
+				hwif_to_node(hwif));
 	if (!q)
 		return 1;
 
@@ -947,7 +948,7 @@ static void ide_add_drive_to_hwgroup(ide_drive_t *drive)
 {
 	ide_hwgroup_t *hwgroup = drive->hwif->hwgroup;
 
-	spin_lock_irq(&ide_lock);
+	spin_lock_irq(&hwgroup->lock);
 	if (!hwgroup->drive) {
 		/* first drive for hwgroup. */
 		drive->next = drive;
@@ -957,7 +958,7 @@ static void ide_add_drive_to_hwgroup(ide_drive_t *drive)
 		drive->next = hwgroup->drive->next;
 		hwgroup->drive->next = drive;
 	}
-	spin_unlock_irq(&ide_lock);
+	spin_unlock_irq(&hwgroup->lock);
 }
 
 /*
@@ -1002,7 +1003,7 @@ void ide_remove_port_from_hwgroup(ide_hwif_t *hwif)
 
 	ide_ports[hwif->index] = NULL;
 
-	spin_lock_irq(&ide_lock);
+	spin_lock_irq(&hwgroup->lock);
 	/*
 	 * Remove us from the hwgroup, and free
 	 * the hwgroup if we were the only member
@@ -1030,7 +1031,7 @@ void ide_remove_port_from_hwgroup(ide_hwif_t *hwif)
 		}
 		BUG_ON(hwgroup->hwif == hwif);
 	}
-	spin_unlock_irq(&ide_lock);
+	spin_unlock_irq(&hwgroup->lock);
 }
 
 /*
@@ -1092,17 +1093,19 @@ static int init_irq (ide_hwif_t *hwif)
 		 * linked list, the first entry is the hwif that owns
 		 * hwgroup->handler - do not change that.
 		 */
-		spin_lock_irq(&ide_lock);
+		spin_lock_irq(&hwgroup->lock);
 		hwif->next = hwgroup->hwif->next;
 		hwgroup->hwif->next = hwif;
 		BUG_ON(hwif->next == hwif);
-		spin_unlock_irq(&ide_lock);
+		spin_unlock_irq(&hwgroup->lock);
 	} else {
 		hwgroup = kmalloc_node(sizeof(*hwgroup), GFP_KERNEL|__GFP_ZERO,
 				       hwif_to_node(hwif));
 		if (hwgroup == NULL)
 			goto out_up;
 
+		spin_lock_init(&hwgroup->lock);
+
 		hwif->hwgroup = hwgroup;
 		hwgroup->hwif = hwif->next = hwif;
 
@@ -1263,20 +1266,21 @@ static void ide_remove_drive_from_hwgroup(ide_drive_t *drive)
 static void drive_release_dev (struct device *dev)
 {
 	ide_drive_t *drive = container_of(dev, ide_drive_t, gendev);
+	ide_hwgroup_t *hwgroup = drive->hwif->hwgroup;
 
 	ide_proc_unregister_device(drive);
 
-	spin_lock_irq(&ide_lock);
+	spin_lock_irq(&hwgroup->lock);
 	ide_remove_drive_from_hwgroup(drive);
 	kfree(drive->id);
 	drive->id = NULL;
 	drive->dev_flags &= ~IDE_DFLAG_PRESENT;
 	/* Messed up locking ... */
-	spin_unlock_irq(&ide_lock);
+	spin_unlock_irq(&hwgroup->lock);
 	blk_cleanup_queue(drive->queue);
-	spin_lock_irq(&ide_lock);
+	spin_lock_irq(&hwgroup->lock);
 	drive->queue = NULL;
-	spin_unlock_irq(&ide_lock);
+	spin_unlock_irq(&hwgroup->lock);
 
 	complete(&drive->gendev_rel_comp);
 }
diff --git a/drivers/ide/ide.c b/drivers/ide/ide.c
index bca6877ee6a5..41d042053548 100644
--- a/drivers/ide/ide.c
+++ b/drivers/ide/ide.c
@@ -74,9 +74,6 @@ static const u8 ide_hwif_to_major[] = { IDE0_MAJOR, IDE1_MAJOR,
 
 DEFINE_MUTEX(ide_cfg_mtx);
 
-__cacheline_aligned_in_smp DEFINE_SPINLOCK(ide_lock);
-EXPORT_SYMBOL(ide_lock);
-
 static void ide_port_init_devices_data(ide_hwif_t *);
 
 /*
@@ -333,6 +330,7 @@ static int set_pio_mode_abuse(ide_hwif_t *hwif, u8 req_pio)
 static int set_pio_mode(ide_drive_t *drive, int arg)
 {
 	ide_hwif_t *hwif = drive->hwif;
+	ide_hwgroup_t *hwgroup = hwif->hwgroup;
 	const struct ide_port_ops *port_ops = hwif->port_ops;
 
 	if (arg < 0 || arg > 255)
@@ -347,9 +345,9 @@ static int set_pio_mode(ide_drive_t *drive, int arg)
 			unsigned long flags;
 
 			/* take lock for IDE_DFLAG_[NO_]UNMASK/[NO_]IO_32BIT */
-			spin_lock_irqsave(&ide_lock, flags);
+			spin_lock_irqsave(&hwgroup->lock, flags);
 			port_ops->set_pio_mode(drive, arg);
-			spin_unlock_irqrestore(&ide_lock, flags);
+			spin_unlock_irqrestore(&hwgroup->lock, flags);
 		} else
 			port_ops->set_pio_mode(drive, arg);
 	} else {
diff --git a/drivers/ide/umc8672.c b/drivers/ide/umc8672.c
index 1da076e0c917..e29978cf6197 100644
--- a/drivers/ide/umc8672.c
+++ b/drivers/ide/umc8672.c
@@ -107,18 +107,21 @@ static void umc_set_speeds(u8 speeds[])
 static void umc_set_pio_mode(ide_drive_t *drive, const u8 pio)
 {
 	ide_hwif_t *hwif = drive->hwif;
-	unsigned long flags;
+	ide_hwgroup_t *mate_hwgroup = hwif->mate ? hwif->mate->hwgroup : NULL;
+	unsigned long uninitialized_var(flags);
 
 	printk("%s: setting umc8672 to PIO mode%d (speed %d)\n",
 		drive->name, pio, pio_to_umc[pio]);
-	spin_lock_irqsave(&ide_lock, flags);
-	if (hwif->mate && hwif->mate->hwgroup->handler) {
+	if (mate_hwgroup)
+		spin_lock_irqsave(&mate_hwgroup->lock, flags);
+	if (mate_hwgroup && mate_hwgroup->handler) {
 		printk(KERN_ERR "umc8672: other interface is busy: exiting tune_umc()\n");
 	} else {
 		current_speeds[drive->name[2] - 'a'] = pio_to_umc[pio];
 		umc_set_speeds(current_speeds);
 	}
-	spin_unlock_irqrestore(&ide_lock, flags);
+	if (mate_hwgroup)
+		spin_unlock_irqrestore(&mate_hwgroup->lock, flags);
 }
 
 static const struct ide_port_ops umc8672_port_ops = {
diff --git a/drivers/scsi/ide-scsi.c b/drivers/scsi/ide-scsi.c
index 2370fd82ebfe..c24140aff8e7 100644
--- a/drivers/scsi/ide-scsi.c
+++ b/drivers/scsi/ide-scsi.c
@@ -578,6 +578,8 @@ static int idescsi_eh_abort (struct scsi_cmnd *cmd)
 {
 	idescsi_scsi_t *scsi  = scsihost_to_idescsi(cmd->device->host);
 	ide_drive_t    *drive = scsi->drive;
+	ide_hwif_t     *hwif;
+	ide_hwgroup_t  *hwgroup;
 	int		busy;
 	int             ret   = FAILED;
 
@@ -594,13 +596,16 @@ static int idescsi_eh_abort (struct scsi_cmnd *cmd)
 		goto no_drive;
 	}
 
-	/* First give it some more time, how much is "right" is hard to say :-( */
+	hwif = drive->hwif;
+	hwgroup = hwif->hwgroup;
 
-	busy = ide_wait_not_busy(HWIF(drive), 100);	/* FIXME - uses mdelay which causes latency? */
+	/* First give it some more time, how much is "right" is hard to say :-(
+	   FIXME - uses mdelay which causes latency? */
+	busy = ide_wait_not_busy(hwif, 100);
 	if (test_bit(IDESCSI_LOG_CMD, &scsi->log))
 		printk (KERN_WARNING "ide-scsi: drive did%s become ready\n", busy?" not":"");
 
-	spin_lock_irq(&ide_lock);
+	spin_lock_irq(&hwgroup->lock);
 
 	/* If there is no pc running we're done (our interrupt took care of it) */
 	pc = drive->pc;
@@ -629,7 +634,7 @@ static int idescsi_eh_abort (struct scsi_cmnd *cmd)
 	}
 
 ide_unlock:
-	spin_unlock_irq(&ide_lock);
+	spin_unlock_irq(&hwgroup->lock);
 no_drive:
 	if (test_bit(IDESCSI_LOG_CMD, &scsi->log))
 		printk (KERN_WARNING "ide-scsi: abort returns %s\n", ret == SUCCESS?"success":"failed");
@@ -642,6 +647,7 @@ static int idescsi_eh_reset (struct scsi_cmnd *cmd)
 	struct request *req;
 	idescsi_scsi_t *scsi  = scsihost_to_idescsi(cmd->device->host);
 	ide_drive_t    *drive = scsi->drive;
+	ide_hwgroup_t  *hwgroup;
 	int             ready = 0;
 	int             ret   = SUCCESS;
 
@@ -658,14 +664,18 @@ static int idescsi_eh_reset (struct scsi_cmnd *cmd)
 		return FAILED;
 	}
 
+	hwgroup = drive->hwif->hwgroup;
+
 	spin_lock_irq(cmd->device->host->host_lock);
-	spin_lock(&ide_lock);
+	spin_lock(&hwgroup->lock);
 
 	pc = drive->pc;
+	if (pc)
+		req = pc->rq;
 
-	if (pc == NULL || (req = pc->rq) != HWGROUP(drive)->rq || !HWGROUP(drive)->handler) {
+	if (pc == NULL || req != hwgroup->rq || hwgroup->handler == NULL) {
 		printk (KERN_WARNING "ide-scsi: No active request in idescsi_eh_reset\n");
-		spin_unlock(&ide_lock);
+		spin_unlock(&hwgroup->lock);
 		spin_unlock_irq(cmd->device->host->host_lock);
 		return FAILED;
 	}
@@ -685,10 +695,10 @@ static int idescsi_eh_reset (struct scsi_cmnd *cmd)
 			BUG();
 	}
 
-	HWGROUP(drive)->rq = NULL;
-	HWGROUP(drive)->handler = NULL;
-	HWGROUP(drive)->busy = 1;		/* will set this to zero when ide reset finished */
-	spin_unlock(&ide_lock);
+	hwgroup->rq = NULL;
+	hwgroup->handler = NULL;
+	hwgroup->busy = 1; /* will set this to zero when ide reset finished */
+	spin_unlock(&hwgroup->lock);
 
 	ide_do_reset(drive);
 
diff --git a/include/linux/ide.h b/include/linux/ide.h
index 010fb26a1579..c871d325cedb 100644
--- a/include/linux/ide.h
+++ b/include/linux/ide.h
@@ -909,6 +909,8 @@ typedef struct hwgroup_s {
 
 	int req_gen;
 	int req_gen_timer;
+
+	spinlock_t lock;
 } ide_hwgroup_t;
 
 typedef struct ide_driver_s ide_driver_t;
@@ -1610,13 +1612,13 @@ extern struct mutex ide_cfg_mtx;
 /*
  * Structure locking:
  *
- * ide_cfg_mtx and ide_lock together protect changes to
- * ide_hwif_t->{next,hwgroup}
+ * ide_cfg_mtx and hwgroup->lock together protect changes to
+ * ide_hwif_t->next
  * ide_drive_t->next
  *
- * ide_hwgroup_t->busy: ide_lock
- * ide_hwgroup_t->hwif: ide_lock
- * ide_hwif_t->mate: constant, no locking
+ * ide_hwgroup_t->busy: hwgroup->lock
+ * ide_hwgroup_t->hwif: hwgroup->lock
+ * ide_hwif_t->{hwgroup,mate}: constant, no locking
  * ide_drive_t->hwif: constant, no locking
  */
 
-- 
cgit v1.2.3


From 27c01c2db05c3cf8824975e50403cd4fd9356dca Mon Sep 17 00:00:00 2001
From: Bartlomiej Zolnierkiewicz <bzolnier@gmail.com>
Date: Mon, 29 Dec 2008 20:27:32 +0100
Subject: ide-cd: remove obsolete seek optimization

It doesn't make much sense nowadays and is problematic on some drives.

Cc: Borislav Petkov <petkovbb@googlemail.com>
Signed-off-by: Bartlomiej Zolnierkiewicz <bzolnier@gmail.com>
---
 drivers/ide/ide-cd.c | 89 ++++------------------------------------------------
 drivers/ide/ide-cd.h |  2 --
 include/linux/ide.h  |  4 ---
 3 files changed, 6 insertions(+), 89 deletions(-)

(limited to 'include')

diff --git a/drivers/ide/ide-cd.c b/drivers/ide/ide-cd.c
index 692fd4570df6..ea35e94d0980 100644
--- a/drivers/ide/ide-cd.c
+++ b/drivers/ide/ide-cd.c
@@ -774,52 +774,6 @@ static ide_startstop_t cdrom_start_rw_cont(ide_drive_t *drive)
 	return cdrom_transfer_packet_command(drive, rq, cdrom_newpc_intr);
 }
 
-#define IDECD_SEEK_THRESHOLD	(1000)			/* 1000 blocks */
-#define IDECD_SEEK_TIMER	(5 * WAIT_MIN_SLEEP)	/* 100 ms */
-#define IDECD_SEEK_TIMEOUT	(2 * WAIT_CMD)		/* 20 sec */
-
-static ide_startstop_t cdrom_seek_intr(ide_drive_t *drive)
-{
-	struct cdrom_info *info = drive->driver_data;
-	int stat;
-	static int retry = 10;
-
-	ide_debug_log(IDE_DBG_FUNC, "Call %s\n", __func__);
-
-	if (cdrom_decode_status(drive, 0, &stat))
-		return ide_stopped;
-
-	drive->atapi_flags |= IDE_AFLAG_SEEKING;
-
-	if (retry && time_after(jiffies, info->start_seek + IDECD_SEEK_TIMER)) {
-		if (--retry == 0)
-			drive->dev_flags &= ~IDE_DFLAG_DSC_OVERLAP;
-	}
-	return ide_stopped;
-}
-
-static void ide_cd_prepare_seek_request(ide_drive_t *drive, struct request *rq)
-{
-	sector_t frame = rq->sector;
-
-	ide_debug_log(IDE_DBG_FUNC, "Call %s\n", __func__);
-
-	sector_div(frame, queue_hardsect_size(drive->queue) >> SECTOR_BITS);
-
-	memset(rq->cmd, 0, BLK_MAX_CDB);
-	rq->cmd[0] = GPCMD_SEEK;
-	put_unaligned(cpu_to_be32(frame), (unsigned int *) &rq->cmd[2]);
-
-	rq->timeout = ATAPI_WAIT_PC;
-}
-
-static ide_startstop_t cdrom_start_seek_continuation(ide_drive_t *drive)
-{
-	struct request *rq = drive->hwif->hwgroup->rq;
-
-	return cdrom_transfer_packet_command(drive, rq, &cdrom_seek_intr);
-}
-
 /*
  * Fix up a possibly partially-processed request so that we can start it over
  * entirely, or even put it back on the request queue.
@@ -1260,7 +1214,6 @@ static void cdrom_do_block_pc(ide_drive_t *drive, struct request *rq)
 static ide_startstop_t ide_cd_do_request(ide_drive_t *drive, struct request *rq,
 					sector_t block)
 {
-	struct cdrom_info *info = drive->driver_data;
 	ide_handler_t *fn;
 	int xferlen;
 
@@ -1270,44 +1223,14 @@ static ide_startstop_t ide_cd_do_request(ide_drive_t *drive, struct request *rq,
 		      (unsigned long long)block);
 
 	if (blk_fs_request(rq)) {
-		if (drive->atapi_flags & IDE_AFLAG_SEEKING) {
-			ide_hwif_t *hwif = drive->hwif;
-			unsigned long elapsed = jiffies - info->start_seek;
-			int stat = hwif->tp_ops->read_status(hwif);
-
-			if ((stat & ATA_DSC) != ATA_DSC) {
-				if (elapsed < IDECD_SEEK_TIMEOUT) {
-					ide_stall_queue(drive,
-							IDECD_SEEK_TIMER);
-					return ide_stopped;
-				}
-				printk(KERN_ERR PFX "%s: DSC timeout\n",
-						drive->name);
-			}
-			drive->atapi_flags &= ~IDE_AFLAG_SEEKING;
-		}
-		if (rq_data_dir(rq) == READ &&
-		    IDE_LARGE_SEEK(info->last_block, block,
-			    IDECD_SEEK_THRESHOLD) &&
-		    (drive->dev_flags & IDE_DFLAG_DSC_OVERLAP)) {
-			xferlen = 0;
-			fn = cdrom_start_seek_continuation;
-
-			drive->dma = 0;
-			info->start_seek = jiffies;
-
-			ide_cd_prepare_seek_request(drive, rq);
-		} else {
-			xferlen = 32768;
-			fn = cdrom_start_rw_cont;
+		xferlen = 32768;
+		fn = cdrom_start_rw_cont;
 
-			if (cdrom_start_rw(drive, rq) == ide_stopped)
-				return ide_stopped;
+		if (cdrom_start_rw(drive, rq) == ide_stopped)
+			return ide_stopped;
 
-			if (ide_cd_prepare_rw_request(drive, rq) == ide_stopped)
-				return ide_stopped;
-		}
-		info->last_block = block;
+		if (ide_cd_prepare_rw_request(drive, rq) == ide_stopped)
+			return ide_stopped;
 	} else if (blk_sense_request(rq) || blk_pc_request(rq) ||
 		   rq->cmd_type == REQ_TYPE_ATA_PC) {
 		xferlen = rq->data_len;
diff --git a/drivers/ide/ide-cd.h b/drivers/ide/ide-cd.h
index 5882b9a9ea8b..d5ce3362dbd1 100644
--- a/drivers/ide/ide-cd.h
+++ b/drivers/ide/ide-cd.h
@@ -88,8 +88,6 @@ struct cdrom_info {
 	struct request_sense sense_data;
 
 	struct request request_sense_request;
-	unsigned long last_block;
-	unsigned long start_seek;
 
 	u8 max_speed;		/* Max speed of the drive. */
 	u8 current_speed;	/* Current speed of the drive. */
diff --git a/include/linux/ide.h b/include/linux/ide.h
index c871d325cedb..0b8af0535d85 100644
--- a/include/linux/ide.h
+++ b/include/linux/ide.h
@@ -122,8 +122,6 @@ struct ide_io_ports {
 #define MAX_DRIVES	2	/* per interface; 2 assumed by lots of code */
 #define SECTOR_SIZE	512
 
-#define IDE_LARGE_SEEK(b1,b2,t)	(((b1) > (b2) + (t)) || ((b2) > (b1) + (t)))
-
 /*
  * Timeouts for various operations:
  */
@@ -496,8 +494,6 @@ enum {
 	 * when more than one interrupt is needed.
 	 */
 	IDE_AFLAG_LIMIT_NFRAMES		= (1 << 7),
-	/* Seeking in progress. */
-	IDE_AFLAG_SEEKING		= (1 << 8),
 	/* Saved TOC information is current. */
 	IDE_AFLAG_TOC_VALID		= (1 << 9),
 	/* We think that the drive door is locked. */
-- 
cgit v1.2.3


From 6b5cde3629701258004b94cde75dd1089b556b02 Mon Sep 17 00:00:00 2001
From: Bartlomiej Zolnierkiewicz <bzolnier@gmail.com>
Date: Mon, 29 Dec 2008 20:27:32 +0100
Subject: cmd64x: set IDE_HFLAG_SERIALIZE explictly for CMD646

* Set IDE_HFLAG_SERIALIZE explictly for CMD646.

* Remove no longer needed ide_cmd646 chipset type (which has
  a nice side-effect of fixing handling of unexpected IRQs).

Cc: Sergei Shtylyov <sshtylyov@ru.mvista.com>
Signed-off-by: Bartlomiej Zolnierkiewicz <bzolnier@gmail.com>
---
 drivers/ide/cmd64x.c    | 4 ++--
 drivers/ide/ide-probe.c | 2 +-
 drivers/ide/ide-proc.c  | 1 -
 include/linux/ide.h     | 2 +-
 4 files changed, 4 insertions(+), 5 deletions(-)

(limited to 'include')

diff --git a/drivers/ide/cmd64x.c b/drivers/ide/cmd64x.c
index 935385c77e06..3623bf013bcf 100644
--- a/drivers/ide/cmd64x.c
+++ b/drivers/ide/cmd64x.c
@@ -424,10 +424,10 @@ static const struct ide_port_info cmd64x_chipsets[] __devinitdata = {
 		.name		= DRV_NAME,
 		.init_chipset	= init_chipset_cmd64x,
 		.enablebits	= {{0x51,0x04,0x04}, {0x51,0x08,0x08}},
-		.chipset	= ide_cmd646,
 		.port_ops	= &cmd64x_port_ops,
 		.dma_ops	= &cmd648_dma_ops,
-		.host_flags	= IDE_HFLAG_ABUSE_PREFETCH,
+		.host_flags	= IDE_HFLAG_SERIALIZE |
+				  IDE_HFLAG_ABUSE_PREFETCH,
 		.pio_mask	= ATA_PIO5,
 		.mwdma_mask	= ATA_MWDMA2,
 		.udma_mask	= ATA_UDMA2,
diff --git a/drivers/ide/ide-probe.c b/drivers/ide/ide-probe.c
index b0f9ededfad7..a0eb72e2a026 100644
--- a/drivers/ide/ide-probe.c
+++ b/drivers/ide/ide-probe.c
@@ -1125,7 +1125,7 @@ static int init_irq (ide_hwif_t *hwif)
 		sa = IRQF_SHARED;
 #endif /* __mc68000__ */
 
-		if (hwif->chipset == ide_pci || hwif->chipset == ide_cmd646)
+		if (hwif->chipset == ide_pci)
 			sa = IRQF_SHARED;
 
 		if (io_ports->ctl_addr)
diff --git a/drivers/ide/ide-proc.c b/drivers/ide/ide-proc.c
index dd899e1f3841..e41669eec2c2 100644
--- a/drivers/ide/ide-proc.c
+++ b/drivers/ide/ide-proc.c
@@ -48,7 +48,6 @@ static int proc_ide_read_imodel
 	case ide_ht6560b:	name = "ht6560b";	break;
 	case ide_rz1000:	name = "rz1000";	break;
 	case ide_trm290:	name = "trm290";	break;
-	case ide_cmd646:	name = "cmd646";	break;
 	case ide_cy82c693:	name = "cy82c693";	break;
 	case ide_4drives:	name = "4drives";	break;
 	case ide_pmac:		name = "mac-io";	break;
diff --git a/include/linux/ide.h b/include/linux/ide.h
index 0b8af0535d85..150e42311ee0 100644
--- a/include/linux/ide.h
+++ b/include/linux/ide.h
@@ -171,7 +171,7 @@ enum {		ide_unknown,	ide_generic,	ide_pci,
 		ide_cmd640,	ide_dtc2278,	ide_ali14xx,
 		ide_qd65xx,	ide_umc8672,	ide_ht6560b,
 		ide_rz1000,	ide_trm290,
-		ide_cmd646,	ide_cy82c693,	ide_4drives,
+		ide_cy82c693,	ide_4drives,
 		ide_pmac,	ide_acorn,
 		ide_au1xxx,	ide_palm3710
 };
-- 
cgit v1.2.3


From f58c1ab8deebc2360cef998f169a6727c288210f Mon Sep 17 00:00:00 2001
From: Bartlomiej Zolnierkiewicz <bzolnier@gmail.com>
Date: Mon, 29 Dec 2008 20:27:33 +0100
Subject: ide: always set nIEN on idle devices

* Set nIEN for previous port/device in ide_do_request()
  also if port uses a non-shared IRQ.

* Remove no longer needed ide_hwif_t.sharing_irq.

Signed-off-by: Bartlomiej Zolnierkiewicz <bzolnier@gmail.com>
---
 drivers/ide/ide-io.c    | 2 +-
 drivers/ide/ide-probe.c | 4 +---
 include/linux/ide.h     | 1 -
 3 files changed, 2 insertions(+), 5 deletions(-)

(limited to 'include')

diff --git a/drivers/ide/ide-io.c b/drivers/ide/ide-io.c
index ca51460af756..f776bd475010 100644
--- a/drivers/ide/ide-io.c
+++ b/drivers/ide/ide-io.c
@@ -1003,7 +1003,7 @@ static void ide_do_request (ide_hwgroup_t *hwgroup, int masked_irq)
 		}
 	again:
 		hwif = HWIF(drive);
-		if (hwgroup->hwif->sharing_irq && hwif != hwgroup->hwif) {
+		if (hwif != hwgroup->hwif) {
 			/*
 			 * set nIEN for previous hwif, drives in the
 			 * quirk_list may not like intr setups/cleanups
diff --git a/drivers/ide/ide-probe.c b/drivers/ide/ide-probe.c
index a0eb72e2a026..81f61e8ea97f 100644
--- a/drivers/ide/ide-probe.c
+++ b/drivers/ide/ide-probe.c
@@ -1060,7 +1060,6 @@ static int init_irq (ide_hwif_t *hwif)
 
 		if (h && h->hwgroup) {  /* scan only initialized ports */
 			if (hwif->irq == h->irq) {
-				hwif->sharing_irq = h->sharing_irq = 1;
 				if (hwif->chipset != ide_pci ||
 				    h->chipset != ide_pci) {
 					save_match(hwif, h, &match);
@@ -1152,8 +1151,7 @@ static int init_irq (ide_hwif_t *hwif)
 		io_ports->data_addr, hwif->irq);
 #endif /* __mc68000__ */
 	if (match)
-		printk(KERN_CONT " (%sed with %s)",
-			hwif->sharing_irq ? "shar" : "serializ", match->name);
+		printk(KERN_CONT " (serialized with %s)", match->name);
 	printk(KERN_CONT "\n");
 
 	mutex_unlock(&ide_cfg_mtx);
diff --git a/include/linux/ide.h b/include/linux/ide.h
index 150e42311ee0..1d28006aec68 100644
--- a/include/linux/ide.h
+++ b/include/linux/ide.h
@@ -842,7 +842,6 @@ typedef struct hwif_s {
 
 	unsigned	present    : 1;	/* this interface exists */
 	unsigned	serialized : 1;	/* serialized all channel operation */
-	unsigned	sharing_irq: 1;	/* 1 = sharing irq with another hwif */
 	unsigned	sg_mapped  : 1;	/* sg_table and sg_nents are ready */
 
 	struct device		gendev;
-- 
cgit v1.2.3


From 7f1ac8c4b9dadc55ec656b368f5f470f2cbe3083 Mon Sep 17 00:00:00 2001
From: Bartlomiej Zolnierkiewicz <bzolnier@gmail.com>
Date: Mon, 29 Dec 2008 20:27:33 +0100
Subject: rz1000: apply chipset quirks early (v2)

* Use pci_name(dev) instead of hwif->name in init_hwif_rz1000().

* init_hwif_rz1000() -> rz1000_init_chipset().  Update rz1000_init_one()
  to use rz1000_init_chipset() and add now required rz1000_remove().

* Remove superfluous ide_rz1000 chipset type.

v2:
* unsigned int rz1000_init_chipset() -> int rz1000_disable_readahead()
  per Sergei's suggestion.

Cc: Sergei Shtylyov <sshtylyov@ru.mvista.com>
Signed-off-by: Bartlomiej Zolnierkiewicz <bzolnier@gmail.com>
---
 drivers/ide/ide-proc.c |  1 -
 drivers/ide/rz1000.c   | 36 +++++++++++++++++++++++++-----------
 include/linux/ide.h    |  3 +--
 3 files changed, 26 insertions(+), 14 deletions(-)

(limited to 'include')

diff --git a/drivers/ide/ide-proc.c b/drivers/ide/ide-proc.c
index e41669eec2c2..c2e6b8927bdc 100644
--- a/drivers/ide/ide-proc.c
+++ b/drivers/ide/ide-proc.c
@@ -46,7 +46,6 @@ static int proc_ide_read_imodel
 	case ide_qd65xx:	name = "qd65xx";	break;
 	case ide_umc8672:	name = "umc8672";	break;
 	case ide_ht6560b:	name = "ht6560b";	break;
-	case ide_rz1000:	name = "rz1000";	break;
 	case ide_trm290:	name = "trm290";	break;
 	case ide_cy82c693:	name = "cy82c693";	break;
 	case ide_4drives:	name = "4drives";	break;
diff --git a/drivers/ide/rz1000.c b/drivers/ide/rz1000.c
index 7daf0135cbac..a6414a884eb1 100644
--- a/drivers/ide/rz1000.c
+++ b/drivers/ide/rz1000.c
@@ -22,34 +22,48 @@
 
 #define DRV_NAME "rz1000"
 
-static void __devinit init_hwif_rz1000 (ide_hwif_t *hwif)
+static int __devinit rz1000_disable_readahead(struct pci_dev *dev)
 {
-	struct pci_dev *dev = to_pci_dev(hwif->dev);
 	u16 reg;
 
 	if (!pci_read_config_word (dev, 0x40, &reg) &&
 	    !pci_write_config_word(dev, 0x40, reg & 0xdfff)) {
 		printk(KERN_INFO "%s: disabled chipset read-ahead "
-			"(buggy RZ1000/RZ1001)\n", hwif->name);
+			"(buggy RZ1000/RZ1001)\n", pci_name(dev));
+		return 0;
 	} else {
-		if (hwif->mate)
-			hwif->mate->serialized = hwif->serialized = 1;
-		hwif->host_flags |= IDE_HFLAG_NO_UNMASK_IRQS;
 		printk(KERN_INFO "%s: serialized, disabled unmasking "
-			"(buggy RZ1000/RZ1001)\n", hwif->name);
+			"(buggy RZ1000/RZ1001)\n", pci_name(dev));
+		return 1;
 	}
 }
 
 static const struct ide_port_info rz1000_chipset __devinitdata = {
 	.name		= DRV_NAME,
-	.init_hwif	= init_hwif_rz1000,
-	.chipset	= ide_rz1000,
 	.host_flags	= IDE_HFLAG_NO_DMA,
 };
 
 static int __devinit rz1000_init_one(struct pci_dev *dev, const struct pci_device_id *id)
 {
-	return ide_pci_init_one(dev, &rz1000_chipset, NULL);
+	struct ide_port_info d = rz1000_chipset;
+	int rc;
+
+	rc = pci_enable_device(dev);
+	if (rc)
+		return rc;
+
+	if (rz1000_disable_readahead(dev)) {
+		d.host_flags |= IDE_HFLAG_SERIALIZE;
+		d.host_flags |= IDE_HFLAG_NO_UNMASK_IRQS;
+	}
+
+	return ide_pci_init_one(dev, &d, NULL);
+}
+
+static void rz1000_remove(struct pci_dev *dev)
+{
+	ide_pci_remove(dev);
+	pci_disable_device(dev);
 }
 
 static const struct pci_device_id rz1000_pci_tbl[] = {
@@ -63,7 +77,7 @@ static struct pci_driver rz1000_pci_driver = {
 	.name		= "RZ1000_IDE",
 	.id_table	= rz1000_pci_tbl,
 	.probe		= rz1000_init_one,
-	.remove		= ide_pci_remove,
+	.remove		= rz1000_remove,
 };
 
 static int __init rz1000_ide_init(void)
diff --git a/include/linux/ide.h b/include/linux/ide.h
index 1d28006aec68..fc1a966c7b7d 100644
--- a/include/linux/ide.h
+++ b/include/linux/ide.h
@@ -170,8 +170,7 @@ typedef int (ide_ack_intr_t)(struct hwif_s *);
 enum {		ide_unknown,	ide_generic,	ide_pci,
 		ide_cmd640,	ide_dtc2278,	ide_ali14xx,
 		ide_qd65xx,	ide_umc8672,	ide_ht6560b,
-		ide_rz1000,	ide_trm290,
-		ide_cy82c693,	ide_4drives,
+		ide_trm290,	ide_cy82c693,	ide_4drives,
 		ide_pmac,	ide_acorn,
 		ide_au1xxx,	ide_palm3710
 };
-- 
cgit v1.2.3


From 6b4924962c49655494d2c8e9d3faab0e349a3062 Mon Sep 17 00:00:00 2001
From: Bartlomiej Zolnierkiewicz <bzolnier@gmail.com>
Date: Mon, 29 Dec 2008 20:27:34 +0100
Subject: ide: add ->max_sectors field to struct ide_port_info

* Add ->max_sectors field to struct ide_port_info to allow host drivers
  to specify value used for hwif->rqsize (if smaller than the default).

* Convert pdc202xx_old to use ->max_sectors and remove no longer needed
  IDE_HFLAG_RQSIZE_256 flag.

There should be no functional changes caused by this patch.

Acked-by: Sergei Shtyltov <sshtylyov@ru.mvista.com>
Signed-off-by: Bartlomiej Zolnierkiewicz <bzolnier@gmail.com>
---
 drivers/ide/ide-probe.c    | 4 ++--
 drivers/ide/pdc202xx_old.c | 9 +++++----
 include/linux/ide.h        | 5 +++--
 3 files changed, 10 insertions(+), 8 deletions(-)

(limited to 'include')

diff --git a/drivers/ide/ide-probe.c b/drivers/ide/ide-probe.c
index 81f61e8ea97f..f76c22c45086 100644
--- a/drivers/ide/ide-probe.c
+++ b/drivers/ide/ide-probe.c
@@ -1442,8 +1442,8 @@ static void ide_init_port(ide_hwif_t *hwif, unsigned int port,
 			hwif->mate->serialized = hwif->serialized = 1;
 	}
 
-	if (d->host_flags & IDE_HFLAG_RQSIZE_256)
-		hwif->rqsize = 256;
+	if (d->max_sectors)
+		hwif->rqsize = d->max_sectors;
 
 	/* call chipset specific routine for each enabled port */
 	if (d->init_hwif)
diff --git a/drivers/ide/pdc202xx_old.c b/drivers/ide/pdc202xx_old.c
index 799557c25eef..624e62e5cc9a 100644
--- a/drivers/ide/pdc202xx_old.c
+++ b/drivers/ide/pdc202xx_old.c
@@ -350,16 +350,17 @@ static const struct ide_dma_ops pdc2026x_dma_ops = {
 	.dma_timeout		= pdc202xx_dma_timeout,
 };
 
-#define DECLARE_PDC2026X_DEV(udma, extra_flags) \
+#define DECLARE_PDC2026X_DEV(udma, sectors) \
 	{ \
 		.name		= DRV_NAME, \
 		.init_chipset	= init_chipset_pdc202xx, \
 		.port_ops	= &pdc2026x_port_ops, \
 		.dma_ops	= &pdc2026x_dma_ops, \
-		.host_flags	= IDE_HFLAGS_PDC202XX | extra_flags, \
+		.host_flags	= IDE_HFLAGS_PDC202XX, \
 		.pio_mask	= ATA_PIO4, \
 		.mwdma_mask	= ATA_MWDMA2, \
 		.udma_mask	= udma, \
+		.max_sectors	= sectors, \
 	}
 
 static const struct ide_port_info pdc202xx_chipsets[] __devinitdata = {
@@ -376,8 +377,8 @@ static const struct ide_port_info pdc202xx_chipsets[] __devinitdata = {
 
 	/* 1: PDC2026{2,3} */
 	DECLARE_PDC2026X_DEV(ATA_UDMA4, 0),
-	/* 2: PDC2026{5,7} */
-	DECLARE_PDC2026X_DEV(ATA_UDMA5, IDE_HFLAG_RQSIZE_256),
+	/* 2: PDC2026{5,7}: UDMA5, limit LBA48 requests to 256 sectors */
+	DECLARE_PDC2026X_DEV(ATA_UDMA5, 256),
 };
 
 /**
diff --git a/include/linux/ide.h b/include/linux/ide.h
index fc1a966c7b7d..2574dda4a3e7 100644
--- a/include/linux/ide.h
+++ b/include/linux/ide.h
@@ -1372,8 +1372,6 @@ enum {
 	IDE_HFLAG_LEGACY_IRQS		= (1 << 21),
 	/* force use of legacy IRQs */
 	IDE_HFLAG_FORCE_LEGACY_IRQS	= (1 << 22),
-	/* limit LBA48 requests to 256 sectors */
-	IDE_HFLAG_RQSIZE_256		= (1 << 23),
 	/* use 32-bit I/O ops */
 	IDE_HFLAG_IO_32BIT		= (1 << 24),
 	/* unmask IRQs */
@@ -1411,6 +1409,9 @@ struct ide_port_info {
 
 	ide_pci_enablebit_t	enablebits[2];
 	hwif_chipset_t		chipset;
+
+	u16			max_sectors;	/* if < than the default one */
+
 	u32			host_flags;
 	u8			pio_mask;
 	u8			swdma_mask;
-- 
cgit v1.2.3


From 1f66019bdf902cb59adf959e462bcd3f4c01f683 Mon Sep 17 00:00:00 2001
From: Bartlomiej Zolnierkiewicz <bzolnier@gmail.com>
Date: Mon, 29 Dec 2008 20:27:34 +0100
Subject: trm290: add IDE_HFLAG_TRM290 host flag

* Add IDE_HFLAG_TRM290 host flag and use it in ide_build_dmatable().

* Remove no longer needed ide_trm290 chipset type.

Acked-by: Sergei Shtylyov <sshtylyov@ru.mvista.com>
Signed-off-by: Bartlomiej Zolnierkiewicz <bzolnier@gmail.com>
---
 drivers/ide/ide-dma-sff.c | 2 +-
 drivers/ide/ide-proc.c    | 1 -
 drivers/ide/trm290.c      | 4 ++--
 include/linux/ide.h       | 4 +++-
 4 files changed, 6 insertions(+), 5 deletions(-)

(limited to 'include')

diff --git a/drivers/ide/ide-dma-sff.c b/drivers/ide/ide-dma-sff.c
index cac431f0df17..1f2a5f56f81c 100644
--- a/drivers/ide/ide-dma-sff.c
+++ b/drivers/ide/ide-dma-sff.c
@@ -98,10 +98,10 @@ int ide_build_dmatable(ide_drive_t *drive, struct request *rq)
 {
 	ide_hwif_t *hwif = drive->hwif;
 	__le32 *table = (__le32 *)hwif->dmatable_cpu;
-	unsigned int is_trm290	= (hwif->chipset == ide_trm290) ? 1 : 0;
 	unsigned int count = 0;
 	int i;
 	struct scatterlist *sg;
+	u8 is_trm290 = !!(hwif->host_flags & IDE_HFLAG_TRM290);
 
 	hwif->sg_nents = ide_build_sglist(drive, rq);
 	if (hwif->sg_nents == 0)
diff --git a/drivers/ide/ide-proc.c b/drivers/ide/ide-proc.c
index c2e6b8927bdc..066d2317545b 100644
--- a/drivers/ide/ide-proc.c
+++ b/drivers/ide/ide-proc.c
@@ -46,7 +46,6 @@ static int proc_ide_read_imodel
 	case ide_qd65xx:	name = "qd65xx";	break;
 	case ide_umc8672:	name = "umc8672";	break;
 	case ide_ht6560b:	name = "ht6560b";	break;
-	case ide_trm290:	name = "trm290";	break;
 	case ide_cy82c693:	name = "cy82c693";	break;
 	case ide_4drives:	name = "4drives";	break;
 	case ide_pmac:		name = "mac-io";	break;
diff --git a/drivers/ide/trm290.c b/drivers/ide/trm290.c
index 75ea61526566..2a5ea90cf8b8 100644
--- a/drivers/ide/trm290.c
+++ b/drivers/ide/trm290.c
@@ -328,10 +328,10 @@ static struct ide_dma_ops trm290_dma_ops = {
 static const struct ide_port_info trm290_chipset __devinitdata = {
 	.name		= DRV_NAME,
 	.init_hwif	= init_hwif_trm290,
-	.chipset	= ide_trm290,
 	.port_ops	= &trm290_port_ops,
 	.dma_ops	= &trm290_dma_ops,
-	.host_flags	= IDE_HFLAG_NO_ATAPI_DMA |
+	.host_flags	= IDE_HFLAG_TRM290 |
+			  IDE_HFLAG_NO_ATAPI_DMA |
 #if 0 /* play it safe for now */
 			  IDE_HFLAG_TRUST_BIOS_FOR_DMA |
 #endif
diff --git a/include/linux/ide.h b/include/linux/ide.h
index 2574dda4a3e7..f62d35a5fb71 100644
--- a/include/linux/ide.h
+++ b/include/linux/ide.h
@@ -170,7 +170,7 @@ typedef int (ide_ack_intr_t)(struct hwif_s *);
 enum {		ide_unknown,	ide_generic,	ide_pci,
 		ide_cmd640,	ide_dtc2278,	ide_ali14xx,
 		ide_qd65xx,	ide_umc8672,	ide_ht6560b,
-		ide_trm290,	ide_cy82c693,	ide_4drives,
+		ide_cy82c693,	ide_4drives,
 		ide_pmac,	ide_acorn,
 		ide_au1xxx,	ide_palm3710
 };
@@ -1372,6 +1372,8 @@ enum {
 	IDE_HFLAG_LEGACY_IRQS		= (1 << 21),
 	/* force use of legacy IRQs */
 	IDE_HFLAG_FORCE_LEGACY_IRQS	= (1 << 22),
+	/* host is TRM290 */
+	IDE_HFLAG_TRM290		= (1 << 23),
 	/* use 32-bit I/O ops */
 	IDE_HFLAG_IO_32BIT		= (1 << 24),
 	/* unmask IRQs */
-- 
cgit v1.2.3


From b7876a6fb6e9bf6cbcf7b40cf034aa4138d7978f Mon Sep 17 00:00:00 2001
From: Bartlomiej Zolnierkiewicz <bzolnier@gmail.com>
Date: Mon, 29 Dec 2008 20:27:34 +0100
Subject: cy82c693: remove superfluous ide_cy82c693 chipset type

Since CY82C693 doesn't require serialization we may as well
use the default ide_pci chipset type.

Signed-off-by: Bartlomiej Zolnierkiewicz <bzolnier@gmail.com>
---
 drivers/ide/cy82c693.c | 1 -
 drivers/ide/ide-proc.c | 1 -
 include/linux/ide.h    | 3 +--
 3 files changed, 1 insertion(+), 4 deletions(-)

(limited to 'include')

diff --git a/drivers/ide/cy82c693.c b/drivers/ide/cy82c693.c
index 5297f07d2933..d37baf8ecc5f 100644
--- a/drivers/ide/cy82c693.c
+++ b/drivers/ide/cy82c693.c
@@ -292,7 +292,6 @@ static const struct ide_port_info cy82c693_chipset __devinitdata = {
 	.name		= DRV_NAME,
 	.init_iops	= init_iops_cy82c693,
 	.port_ops	= &cy82c693_port_ops,
-	.chipset	= ide_cy82c693,
 	.host_flags	= IDE_HFLAG_SINGLE,
 	.pio_mask	= ATA_PIO4,
 	.swdma_mask	= ATA_SWDMA2,
diff --git a/drivers/ide/ide-proc.c b/drivers/ide/ide-proc.c
index 066d2317545b..a14e2938e4f3 100644
--- a/drivers/ide/ide-proc.c
+++ b/drivers/ide/ide-proc.c
@@ -46,7 +46,6 @@ static int proc_ide_read_imodel
 	case ide_qd65xx:	name = "qd65xx";	break;
 	case ide_umc8672:	name = "umc8672";	break;
 	case ide_ht6560b:	name = "ht6560b";	break;
-	case ide_cy82c693:	name = "cy82c693";	break;
 	case ide_4drives:	name = "4drives";	break;
 	case ide_pmac:		name = "mac-io";	break;
 	case ide_au1xxx:	name = "au1xxx";	break;
diff --git a/include/linux/ide.h b/include/linux/ide.h
index f62d35a5fb71..f89d6d69a386 100644
--- a/include/linux/ide.h
+++ b/include/linux/ide.h
@@ -170,8 +170,7 @@ typedef int (ide_ack_intr_t)(struct hwif_s *);
 enum {		ide_unknown,	ide_generic,	ide_pci,
 		ide_cmd640,	ide_dtc2278,	ide_ali14xx,
 		ide_qd65xx,	ide_umc8672,	ide_ht6560b,
-		ide_cy82c693,	ide_4drives,
-		ide_pmac,	ide_acorn,
+		ide_4drives,	ide_pmac,	ide_acorn,
 		ide_au1xxx,	ide_palm3710
 };
 
-- 
cgit v1.2.3


From 702c026be87ef8374ae58122969a4b0b081ce6f2 Mon Sep 17 00:00:00 2001
From: Bartlomiej Zolnierkiewicz <bzolnier@gmail.com>
Date: Mon, 29 Dec 2008 20:27:36 +0100
Subject: ide: rework handling of serialized ports (v2)

* hpt366: set IDE_HFLAG_SERIALIZE in ->host_flags if needed
  in init_hwif_hpt366().  Remove HPT_SERIALIZE_IO while at it.

* Set IDE_HFLAG_SERIALIZE in ->host_flags if needed in
  ide_init_port().

* Convert init_irq() to use IDE_HFLAG_SERIALIZE together with
  hwif->host to find out ports which need to be serialized.

* Remove no longer needed save_match() and ide_hwif_t.serialized.

v2:
* Set host's ->host_flags field instead of port's copy.

This patch should fix the incorrect grouping of port(s) from
host(s) that need serialization with port(s) that happen to use
the same IRQ(s) but are from the host(s) that don't need it.

Cc: Sergei Shtylyov <sshtylyov@ru.mvista.com>
Signed-off-by: Bartlomiej Zolnierkiewicz <bzolnier@gmail.com>
---
 drivers/ide/hpt366.c    |  8 +-------
 drivers/ide/ide-probe.c | 50 +++++--------------------------------------------
 include/linux/ide.h     |  1 -
 3 files changed, 6 insertions(+), 53 deletions(-)

(limited to 'include')

diff --git a/drivers/ide/hpt366.c b/drivers/ide/hpt366.c
index f5afd46ed51c..b18e10d99d2e 100644
--- a/drivers/ide/hpt366.c
+++ b/drivers/ide/hpt366.c
@@ -135,7 +135,6 @@
 /* various tuning parameters */
 #define HPT_RESET_STATE_ENGINE
 #undef	HPT_DELAY_INTERRUPT
-#define HPT_SERIALIZE_IO	0
 
 static const char *quirk_drives[] = {
 	"QUANTUM FIREBALLlct08 08",
@@ -1288,7 +1287,6 @@ static u8 hpt3xx_cable_detect(ide_hwif_t *hwif)
 static void __devinit init_hwif_hpt366(ide_hwif_t *hwif)
 {
 	struct hpt_info *info	= hpt3xx_get_info(hwif->dev);
-	int serialize		= HPT_SERIALIZE_IO;
 	u8  chip_type		= info->chip_type;
 
 	/* Cache the channel's MISC. control registers' offset */
@@ -1305,13 +1303,9 @@ static void __devinit init_hwif_hpt366(ide_hwif_t *hwif)
 		 * Clock is shared between the channels,
 		 * so we'll have to serialize them... :-(
 		 */
-		serialize = 1;
+		hwif->host->host_flags |= IDE_HFLAG_SERIALIZE;
 		hwif->rw_disk = &hpt3xxn_rw_disk;
 	}
-
-	/* Serialize access to this device if needed */
-	if (serialize && hwif->mate)
-		hwif->serialized = hwif->mate->serialized = 1;
 }
 
 static int __devinit init_dma_hpt366(ide_hwif_t *hwif,
diff --git a/drivers/ide/ide-probe.c b/drivers/ide/ide-probe.c
index f76c22c45086..c1cd1af2646b 100644
--- a/drivers/ide/ide-probe.c
+++ b/drivers/ide/ide-probe.c
@@ -863,31 +863,6 @@ static void ide_port_tune_devices(ide_hwif_t *hwif)
 	}
 }
 
-/*
- * save_match() is used to simplify logic in init_irq() below.
- *
- * A loophole here is that we may not know about a particular
- * hwif's irq until after that hwif is actually probed/initialized..
- * This could be a problem for the case where an hwif is on a
- * dual interface that requires serialization (eg. cmd640) and another
- * hwif using one of the same irqs is initialized beforehand.
- *
- * This routine detects and reports such situations, but does not fix them.
- */
-static void save_match(ide_hwif_t *hwif, ide_hwif_t *new, ide_hwif_t **match)
-{
-	ide_hwif_t *m = *match;
-
-	if (m && m->hwgroup && m->hwgroup != new->hwgroup) {
-		if (!new->hwgroup)
-			return;
-		printk(KERN_WARNING "%s: potential IRQ problem with %s and %s\n",
-			hwif->name, new->name, m->name);
-	}
-	if (!m || m->irq != hwif->irq) /* don't undo a prior perfect match */
-		*match = new;
-}
-
 /*
  * init request queue
  */
@@ -1052,26 +1027,13 @@ static int init_irq (ide_hwif_t *hwif)
 	mutex_lock(&ide_cfg_mtx);
 	hwif->hwgroup = NULL;
 
-	/*
-	 * Group up with any other hwifs that share our irq(s).
-	 */
 	for (index = 0; index < MAX_HWIFS; index++) {
 		ide_hwif_t *h = ide_ports[index];
 
 		if (h && h->hwgroup) {  /* scan only initialized ports */
-			if (hwif->irq == h->irq) {
-				if (hwif->chipset != ide_pci ||
-				    h->chipset != ide_pci) {
-					save_match(hwif, h, &match);
-				}
-			}
-			if (hwif->serialized) {
-				if (hwif->mate && hwif->mate->irq == h->irq)
-					save_match(hwif, h, &match);
-			}
-			if (h->serialized) {
-				if (h->mate && hwif->irq == h->mate->irq)
-					save_match(hwif, h, &match);
+			if (hwif->host->host_flags & IDE_HFLAG_SERIALIZE) {
+				if (hwif->host == h->host)
+					match = h;
 			}
 		}
 	}
@@ -1437,10 +1399,8 @@ static void ide_init_port(ide_hwif_t *hwif, unsigned int port,
 	}
 
 	if ((d->host_flags & IDE_HFLAG_SERIALIZE) ||
-	    ((d->host_flags & IDE_HFLAG_SERIALIZE_DMA) && hwif->dma_base)) {
-		if (hwif->mate)
-			hwif->mate->serialized = hwif->serialized = 1;
-	}
+	    ((d->host_flags & IDE_HFLAG_SERIALIZE_DMA) && hwif->dma_base))
+		hwif->host->host_flags |= IDE_HFLAG_SERIALIZE;
 
 	if (d->max_sectors)
 		hwif->rqsize = d->max_sectors;
diff --git a/include/linux/ide.h b/include/linux/ide.h
index f89d6d69a386..9b89cab6d493 100644
--- a/include/linux/ide.h
+++ b/include/linux/ide.h
@@ -839,7 +839,6 @@ typedef struct hwif_s {
 	unsigned	extra_ports;	/* number of extra dma ports */
 
 	unsigned	present    : 1;	/* this interface exists */
-	unsigned	serialized : 1;	/* serialized all channel operation */
 	unsigned	sg_mapped  : 1;	/* sg_table and sg_nents are ready */
 
 	struct device		gendev;
-- 
cgit v1.2.3


From e2984c628c924442132304ae662da433f41c05c9 Mon Sep 17 00:00:00 2001
From: Bartlomiej Zolnierkiewicz <bzolnier@gmail.com>
Date: Mon, 29 Dec 2008 20:27:37 +0100
Subject: ide: move Power Management support to ide-pm.c

There should be no functional changes caused by this patch.

Signed-off-by: Bartlomiej Zolnierkiewicz <bzolnier@gmail.com>
---
 drivers/ide/Makefile |   2 +-
 drivers/ide/ide-io.c | 159 ----------------------------------
 drivers/ide/ide-pm.c | 235 +++++++++++++++++++++++++++++++++++++++++++++++++++
 drivers/ide/ide.c    |  74 ----------------
 include/linux/ide.h  |   8 ++
 5 files changed, 244 insertions(+), 234 deletions(-)
 create mode 100644 drivers/ide/ide-pm.c

(limited to 'include')

diff --git a/drivers/ide/Makefile b/drivers/ide/Makefile
index 7818d402b188..bdc7b81bc044 100644
--- a/drivers/ide/Makefile
+++ b/drivers/ide/Makefile
@@ -5,7 +5,7 @@
 EXTRA_CFLAGS				+= -Idrivers/ide
 
 ide-core-y += ide.o ide-ioctls.o ide-io.o ide-iops.o ide-lib.o ide-probe.o \
-	      ide-taskfile.o ide-park.o ide-pio-blacklist.o
+	      ide-taskfile.o ide-pm.o ide-park.o ide-pio-blacklist.o
 
 # core IDE code
 ide-core-$(CONFIG_IDE_TIMINGS)		+= ide-timings.o
diff --git a/drivers/ide/ide-io.c b/drivers/ide/ide-io.c
index 26d3f3cacc8a..ecacc008fdaf 100644
--- a/drivers/ide/ide-io.c
+++ b/drivers/ide/ide-io.c
@@ -120,98 +120,6 @@ int ide_end_request (ide_drive_t *drive, int uptodate, int nr_sectors)
 }
 EXPORT_SYMBOL(ide_end_request);
 
-static void ide_complete_power_step(ide_drive_t *drive, struct request *rq)
-{
-	struct request_pm_state *pm = rq->data;
-
-#ifdef DEBUG_PM
-	printk(KERN_INFO "%s: complete_power_step(step: %d)\n",
-		drive->name, pm->pm_step);
-#endif
-	if (drive->media != ide_disk)
-		return;
-
-	switch (pm->pm_step) {
-	case IDE_PM_FLUSH_CACHE:	/* Suspend step 1 (flush cache) */
-		if (pm->pm_state == PM_EVENT_FREEZE)
-			pm->pm_step = IDE_PM_COMPLETED;
-		else
-			pm->pm_step = IDE_PM_STANDBY;
-		break;
-	case IDE_PM_STANDBY:		/* Suspend step 2 (standby) */
-		pm->pm_step = IDE_PM_COMPLETED;
-		break;
-	case IDE_PM_RESTORE_PIO:	/* Resume step 1 (restore PIO) */
-		pm->pm_step = IDE_PM_IDLE;
-		break;
-	case IDE_PM_IDLE:		/* Resume step 2 (idle)*/
-		pm->pm_step = IDE_PM_RESTORE_DMA;
-		break;
-	}
-}
-
-static ide_startstop_t ide_start_power_step(ide_drive_t *drive, struct request *rq)
-{
-	struct request_pm_state *pm = rq->data;
-	ide_task_t *args = rq->special;
-
-	memset(args, 0, sizeof(*args));
-
-	switch (pm->pm_step) {
-	case IDE_PM_FLUSH_CACHE:	/* Suspend step 1 (flush cache) */
-		if (drive->media != ide_disk)
-			break;
-		/* Not supported? Switch to next step now. */
-		if (ata_id_flush_enabled(drive->id) == 0 ||
-		    (drive->dev_flags & IDE_DFLAG_WCACHE) == 0) {
-			ide_complete_power_step(drive, rq);
-			return ide_stopped;
-		}
-		if (ata_id_flush_ext_enabled(drive->id))
-			args->tf.command = ATA_CMD_FLUSH_EXT;
-		else
-			args->tf.command = ATA_CMD_FLUSH;
-		goto out_do_tf;
-	case IDE_PM_STANDBY:		/* Suspend step 2 (standby) */
-		args->tf.command = ATA_CMD_STANDBYNOW1;
-		goto out_do_tf;
-	case IDE_PM_RESTORE_PIO:	/* Resume step 1 (restore PIO) */
-		ide_set_max_pio(drive);
-		/*
-		 * skip IDE_PM_IDLE for ATAPI devices
-		 */
-		if (drive->media != ide_disk)
-			pm->pm_step = IDE_PM_RESTORE_DMA;
-		else
-			ide_complete_power_step(drive, rq);
-		return ide_stopped;
-	case IDE_PM_IDLE:		/* Resume step 2 (idle) */
-		args->tf.command = ATA_CMD_IDLEIMMEDIATE;
-		goto out_do_tf;
-	case IDE_PM_RESTORE_DMA:	/* Resume step 3 (restore DMA) */
-		/*
-		 * Right now, all we do is call ide_set_dma(drive),
-		 * we could be smarter and check for current xfer_speed
-		 * in struct drive etc...
-		 */
-		if (drive->hwif->dma_ops == NULL)
-			break;
-		/*
-		 * TODO: respect IDE_DFLAG_USING_DMA
-		 */
-		ide_set_dma(drive);
-		break;
-	}
-
-	pm->pm_step = IDE_PM_COMPLETED;
-	return ide_stopped;
-
-out_do_tf:
-	args->tf_flags	 = IDE_TFLAG_TF | IDE_TFLAG_DEVICE;
-	args->data_phase = TASKFILE_NO_DATA;
-	return do_rw_taskfile(drive, args);
-}
-
 /**
  *	ide_end_dequeued_request	-	complete an IDE I/O
  *	@drive: IDE device for the I/O
@@ -236,39 +144,6 @@ int ide_end_dequeued_request(ide_drive_t *drive, struct request *rq,
 }
 EXPORT_SYMBOL_GPL(ide_end_dequeued_request);
 
-
-/**
- *	ide_complete_pm_request - end the current Power Management request
- *	@drive: target drive
- *	@rq: request
- *
- *	This function cleans up the current PM request and stops the queue
- *	if necessary.
- */
-static void ide_complete_pm_request (ide_drive_t *drive, struct request *rq)
-{
-	struct request_queue *q = drive->queue;
-	unsigned long flags;
-
-#ifdef DEBUG_PM
-	printk("%s: completing PM request, %s\n", drive->name,
-	       blk_pm_suspend_request(rq) ? "suspend" : "resume");
-#endif
-	spin_lock_irqsave(q->queue_lock, flags);
-	if (blk_pm_suspend_request(rq)) {
-		blk_stop_queue(q);
-	} else {
-		drive->dev_flags &= ~IDE_DFLAG_BLOCKED;
-		blk_start_queue(q);
-	}
-	spin_unlock_irqrestore(q->queue_lock, flags);
-
-	drive->hwif->hwgroup->rq = NULL;
-
-	if (blk_end_request(rq, 0, 0))
-		BUG();
-}
-
 /**
  *	ide_end_drive_cmd	-	end an explicit drive command
  *	@drive: command 
@@ -697,40 +572,6 @@ static ide_startstop_t ide_special_rq(ide_drive_t *drive, struct request *rq)
 	}
 }
 
-static void ide_check_pm_state(ide_drive_t *drive, struct request *rq)
-{
-	struct request_pm_state *pm = rq->data;
-
-	if (blk_pm_suspend_request(rq) &&
-	    pm->pm_step == IDE_PM_START_SUSPEND)
-		/* Mark drive blocked when starting the suspend sequence. */
-		drive->dev_flags |= IDE_DFLAG_BLOCKED;
-	else if (blk_pm_resume_request(rq) &&
-		 pm->pm_step == IDE_PM_START_RESUME) {
-		/* 
-		 * The first thing we do on wakeup is to wait for BSY bit to
-		 * go away (with a looong timeout) as a drive on this hwif may
-		 * just be POSTing itself.
-		 * We do that before even selecting as the "other" device on
-		 * the bus may be broken enough to walk on our toes at this
-		 * point.
-		 */
-		ide_hwif_t *hwif = drive->hwif;
-		int rc;
-#ifdef DEBUG_PM
-		printk("%s: Wakeup request inited, waiting for !BSY...\n", drive->name);
-#endif
-		rc = ide_wait_not_busy(hwif, 35000);
-		if (rc)
-			printk(KERN_WARNING "%s: bus not ready on wakeup\n", drive->name);
-		SELECT_DRIVE(drive);
-		hwif->tp_ops->set_irq(hwif, 1);
-		rc = ide_wait_not_busy(hwif, 100000);
-		if (rc)
-			printk(KERN_WARNING "%s: drive not ready on wakeup\n", drive->name);
-	}
-}
-
 /**
  *	start_request	-	start of I/O and command issuing for IDE
  *
diff --git a/drivers/ide/ide-pm.c b/drivers/ide/ide-pm.c
new file mode 100644
index 000000000000..8282c6086e6a
--- /dev/null
+++ b/drivers/ide/ide-pm.c
@@ -0,0 +1,235 @@
+#include <linux/kernel.h>
+#include <linux/ide.h>
+#include <linux/hdreg.h>
+
+int generic_ide_suspend(struct device *dev, pm_message_t mesg)
+{
+	ide_drive_t *drive = dev->driver_data, *pair = ide_get_pair_dev(drive);
+	ide_hwif_t *hwif = HWIF(drive);
+	struct request *rq;
+	struct request_pm_state rqpm;
+	ide_task_t args;
+	int ret;
+
+	/* call ACPI _GTM only once */
+	if ((drive->dn & 1) == 0 || pair == NULL)
+		ide_acpi_get_timing(hwif);
+
+	memset(&rqpm, 0, sizeof(rqpm));
+	memset(&args, 0, sizeof(args));
+	rq = blk_get_request(drive->queue, READ, __GFP_WAIT);
+	rq->cmd_type = REQ_TYPE_PM_SUSPEND;
+	rq->special = &args;
+	rq->data = &rqpm;
+	rqpm.pm_step = IDE_PM_START_SUSPEND;
+	if (mesg.event == PM_EVENT_PRETHAW)
+		mesg.event = PM_EVENT_FREEZE;
+	rqpm.pm_state = mesg.event;
+
+	ret = blk_execute_rq(drive->queue, NULL, rq, 0);
+	blk_put_request(rq);
+
+	/* call ACPI _PS3 only after both devices are suspended */
+	if (ret == 0 && ((drive->dn & 1) || pair == NULL))
+		ide_acpi_set_state(hwif, 0);
+
+	return ret;
+}
+
+int generic_ide_resume(struct device *dev)
+{
+	ide_drive_t *drive = dev->driver_data, *pair = ide_get_pair_dev(drive);
+	ide_hwif_t *hwif = HWIF(drive);
+	struct request *rq;
+	struct request_pm_state rqpm;
+	ide_task_t args;
+	int err;
+
+	/* call ACPI _PS0 / _STM only once */
+	if ((drive->dn & 1) == 0 || pair == NULL) {
+		ide_acpi_set_state(hwif, 1);
+		ide_acpi_push_timing(hwif);
+	}
+
+	ide_acpi_exec_tfs(drive);
+
+	memset(&rqpm, 0, sizeof(rqpm));
+	memset(&args, 0, sizeof(args));
+	rq = blk_get_request(drive->queue, READ, __GFP_WAIT);
+	rq->cmd_type = REQ_TYPE_PM_RESUME;
+	rq->cmd_flags |= REQ_PREEMPT;
+	rq->special = &args;
+	rq->data = &rqpm;
+	rqpm.pm_step = IDE_PM_START_RESUME;
+	rqpm.pm_state = PM_EVENT_ON;
+
+	err = blk_execute_rq(drive->queue, NULL, rq, 1);
+	blk_put_request(rq);
+
+	if (err == 0 && dev->driver) {
+		ide_driver_t *drv = to_ide_driver(dev->driver);
+
+		if (drv->resume)
+			drv->resume(drive);
+	}
+
+	return err;
+}
+
+void ide_complete_power_step(ide_drive_t *drive, struct request *rq)
+{
+	struct request_pm_state *pm = rq->data;
+
+#ifdef DEBUG_PM
+	printk(KERN_INFO "%s: complete_power_step(step: %d)\n",
+		drive->name, pm->pm_step);
+#endif
+	if (drive->media != ide_disk)
+		return;
+
+	switch (pm->pm_step) {
+	case IDE_PM_FLUSH_CACHE:	/* Suspend step 1 (flush cache) */
+		if (pm->pm_state == PM_EVENT_FREEZE)
+			pm->pm_step = IDE_PM_COMPLETED;
+		else
+			pm->pm_step = IDE_PM_STANDBY;
+		break;
+	case IDE_PM_STANDBY:		/* Suspend step 2 (standby) */
+		pm->pm_step = IDE_PM_COMPLETED;
+		break;
+	case IDE_PM_RESTORE_PIO:	/* Resume step 1 (restore PIO) */
+		pm->pm_step = IDE_PM_IDLE;
+		break;
+	case IDE_PM_IDLE:		/* Resume step 2 (idle)*/
+		pm->pm_step = IDE_PM_RESTORE_DMA;
+		break;
+	}
+}
+
+ide_startstop_t ide_start_power_step(ide_drive_t *drive, struct request *rq)
+{
+	struct request_pm_state *pm = rq->data;
+	ide_task_t *args = rq->special;
+
+	memset(args, 0, sizeof(*args));
+
+	switch (pm->pm_step) {
+	case IDE_PM_FLUSH_CACHE:	/* Suspend step 1 (flush cache) */
+		if (drive->media != ide_disk)
+			break;
+		/* Not supported? Switch to next step now. */
+		if (ata_id_flush_enabled(drive->id) == 0 ||
+		    (drive->dev_flags & IDE_DFLAG_WCACHE) == 0) {
+			ide_complete_power_step(drive, rq);
+			return ide_stopped;
+		}
+		if (ata_id_flush_ext_enabled(drive->id))
+			args->tf.command = ATA_CMD_FLUSH_EXT;
+		else
+			args->tf.command = ATA_CMD_FLUSH;
+		goto out_do_tf;
+	case IDE_PM_STANDBY:		/* Suspend step 2 (standby) */
+		args->tf.command = ATA_CMD_STANDBYNOW1;
+		goto out_do_tf;
+	case IDE_PM_RESTORE_PIO:	/* Resume step 1 (restore PIO) */
+		ide_set_max_pio(drive);
+		/*
+		 * skip IDE_PM_IDLE for ATAPI devices
+		 */
+		if (drive->media != ide_disk)
+			pm->pm_step = IDE_PM_RESTORE_DMA;
+		else
+			ide_complete_power_step(drive, rq);
+		return ide_stopped;
+	case IDE_PM_IDLE:		/* Resume step 2 (idle) */
+		args->tf.command = ATA_CMD_IDLEIMMEDIATE;
+		goto out_do_tf;
+	case IDE_PM_RESTORE_DMA:	/* Resume step 3 (restore DMA) */
+		/*
+		 * Right now, all we do is call ide_set_dma(drive),
+		 * we could be smarter and check for current xfer_speed
+		 * in struct drive etc...
+		 */
+		if (drive->hwif->dma_ops == NULL)
+			break;
+		/*
+		 * TODO: respect IDE_DFLAG_USING_DMA
+		 */
+		ide_set_dma(drive);
+		break;
+	}
+
+	pm->pm_step = IDE_PM_COMPLETED;
+	return ide_stopped;
+
+out_do_tf:
+	args->tf_flags	 = IDE_TFLAG_TF | IDE_TFLAG_DEVICE;
+	args->data_phase = TASKFILE_NO_DATA;
+	return do_rw_taskfile(drive, args);
+}
+
+/**
+ *	ide_complete_pm_request - end the current Power Management request
+ *	@drive: target drive
+ *	@rq: request
+ *
+ *	This function cleans up the current PM request and stops the queue
+ *	if necessary.
+ */
+void ide_complete_pm_request(ide_drive_t *drive, struct request *rq)
+{
+	struct request_queue *q = drive->queue;
+	unsigned long flags;
+
+#ifdef DEBUG_PM
+	printk("%s: completing PM request, %s\n", drive->name,
+	       blk_pm_suspend_request(rq) ? "suspend" : "resume");
+#endif
+	spin_lock_irqsave(q->queue_lock, flags);
+	if (blk_pm_suspend_request(rq)) {
+		blk_stop_queue(q);
+	} else {
+		drive->dev_flags &= ~IDE_DFLAG_BLOCKED;
+		blk_start_queue(q);
+	}
+	spin_unlock_irqrestore(q->queue_lock, flags);
+
+	drive->hwif->hwgroup->rq = NULL;
+
+	if (blk_end_request(rq, 0, 0))
+		BUG();
+}
+
+void ide_check_pm_state(ide_drive_t *drive, struct request *rq)
+{
+	struct request_pm_state *pm = rq->data;
+
+	if (blk_pm_suspend_request(rq) &&
+	    pm->pm_step == IDE_PM_START_SUSPEND)
+		/* Mark drive blocked when starting the suspend sequence. */
+		drive->dev_flags |= IDE_DFLAG_BLOCKED;
+	else if (blk_pm_resume_request(rq) &&
+		 pm->pm_step == IDE_PM_START_RESUME) {
+		/*
+		 * The first thing we do on wakeup is to wait for BSY bit to
+		 * go away (with a looong timeout) as a drive on this hwif may
+		 * just be POSTing itself.
+		 * We do that before even selecting as the "other" device on
+		 * the bus may be broken enough to walk on our toes at this
+		 * point.
+		 */
+		ide_hwif_t *hwif = drive->hwif;
+		int rc;
+#ifdef DEBUG_PM
+		printk("%s: Wakeup request inited, waiting for !BSY...\n", drive->name);
+#endif
+		rc = ide_wait_not_busy(hwif, 35000);
+		if (rc)
+			printk(KERN_WARNING "%s: bus not ready on wakeup\n", drive->name);
+		SELECT_DRIVE(drive);
+		hwif->tp_ops->set_irq(hwif, 1);
+		rc = ide_wait_not_busy(hwif, 100000);
+		if (rc)
+			printk(KERN_WARNING "%s: drive not ready on wakeup\n", drive->name);
+	}
+}
diff --git a/drivers/ide/ide.c b/drivers/ide/ide.c
index 41d042053548..f0f09f702e9c 100644
--- a/drivers/ide/ide.c
+++ b/drivers/ide/ide.c
@@ -388,80 +388,6 @@ ide_ext_devset_rw_sync(unmaskirq, unmaskirq);
 ide_ext_devset_rw_sync(using_dma, using_dma);
 __IDE_DEVSET(pio_mode, DS_SYNC, NULL, set_pio_mode);
 
-static int generic_ide_suspend(struct device *dev, pm_message_t mesg)
-{
-	ide_drive_t *drive = dev->driver_data, *pair = ide_get_pair_dev(drive);
-	ide_hwif_t *hwif = HWIF(drive);
-	struct request *rq;
-	struct request_pm_state rqpm;
-	ide_task_t args;
-	int ret;
-
-	/* call ACPI _GTM only once */
-	if ((drive->dn & 1) == 0 || pair == NULL)
-		ide_acpi_get_timing(hwif);
-
-	memset(&rqpm, 0, sizeof(rqpm));
-	memset(&args, 0, sizeof(args));
-	rq = blk_get_request(drive->queue, READ, __GFP_WAIT);
-	rq->cmd_type = REQ_TYPE_PM_SUSPEND;
-	rq->special = &args;
-	rq->data = &rqpm;
-	rqpm.pm_step = IDE_PM_START_SUSPEND;
-	if (mesg.event == PM_EVENT_PRETHAW)
-		mesg.event = PM_EVENT_FREEZE;
-	rqpm.pm_state = mesg.event;
-
-	ret = blk_execute_rq(drive->queue, NULL, rq, 0);
-	blk_put_request(rq);
-
-	/* call ACPI _PS3 only after both devices are suspended */
-	if (ret == 0 && ((drive->dn & 1) || pair == NULL))
-		ide_acpi_set_state(hwif, 0);
-
-	return ret;
-}
-
-static int generic_ide_resume(struct device *dev)
-{
-	ide_drive_t *drive = dev->driver_data, *pair = ide_get_pair_dev(drive);
-	ide_hwif_t *hwif = HWIF(drive);
-	struct request *rq;
-	struct request_pm_state rqpm;
-	ide_task_t args;
-	int err;
-
-	/* call ACPI _PS0 / _STM only once */
-	if ((drive->dn & 1) == 0 || pair == NULL) {
-		ide_acpi_set_state(hwif, 1);
-		ide_acpi_push_timing(hwif);
-	}
-
-	ide_acpi_exec_tfs(drive);
-
-	memset(&rqpm, 0, sizeof(rqpm));
-	memset(&args, 0, sizeof(args));
-	rq = blk_get_request(drive->queue, READ, __GFP_WAIT);
-	rq->cmd_type = REQ_TYPE_PM_RESUME;
-	rq->cmd_flags |= REQ_PREEMPT;
-	rq->special = &args;
-	rq->data = &rqpm;
-	rqpm.pm_step = IDE_PM_START_RESUME;
-	rqpm.pm_state = PM_EVENT_ON;
-
-	err = blk_execute_rq(drive->queue, NULL, rq, 1);
-	blk_put_request(rq);
-
-	if (err == 0 && dev->driver) {
-		ide_driver_t *drv = to_ide_driver(dev->driver);
-
-		if (drv->resume)
-			drv->resume(drive);
-	}
-
-	return err;
-}
-
 /**
  * ide_device_get	-	get an additional reference to a ide_drive_t
  * @drive:	device to get a reference to
diff --git a/include/linux/ide.h b/include/linux/ide.h
index 9b89cab6d493..e99c56de7f56 100644
--- a/include/linux/ide.h
+++ b/include/linux/ide.h
@@ -1116,6 +1116,14 @@ enum {
 	IDE_PM_COMPLETED,
 };
 
+int generic_ide_suspend(struct device *, pm_message_t);
+int generic_ide_resume(struct device *);
+
+void ide_complete_power_step(ide_drive_t *, struct request *);
+ide_startstop_t ide_start_power_step(ide_drive_t *, struct request *);
+void ide_complete_pm_request(ide_drive_t *, struct request *);
+void ide_check_pm_state(ide_drive_t *, struct request *);
+
 /*
  * Subdrivers support.
  *
-- 
cgit v1.2.3


From 08a66aea55154b50f9e9e2e89cc85d8b75121568 Mon Sep 17 00:00:00 2001
From: Kuninori Morimoto <morimoto.kuninori@renesas.com>
Date: Fri, 24 Oct 2008 15:43:07 -0300
Subject: V4L/DVB (9488): Add ov772x driver

This patch adds ov772x driver that use soc_camera framework.
It was tested on SH Migo-r board.

Signed-off-by: Kuninori Morimoto <morimoto.kuninori@renesas.com>
Signed-off-by: Guennadi Liakhovetski <g.liakhovetski@gmx.de>
Signed-off-by: Mauro Carvalho Chehab <mchehab@redhat.com>
---
 drivers/media/video/Kconfig     |   6 +
 drivers/media/video/Makefile    |   1 +
 drivers/media/video/ov772x.c    | 966 ++++++++++++++++++++++++++++++++++++++++
 include/media/ov772x.h          |  21 +
 include/media/v4l2-chip-ident.h |   1 +
 5 files changed, 995 insertions(+)
 create mode 100644 drivers/media/video/ov772x.c
 create mode 100644 include/media/ov772x.h

(limited to 'include')

diff --git a/drivers/media/video/Kconfig b/drivers/media/video/Kconfig
index 057fd7e160c4..af951bdc111d 100644
--- a/drivers/media/video/Kconfig
+++ b/drivers/media/video/Kconfig
@@ -750,6 +750,12 @@ config SOC_CAMERA_PLATFORM
 	help
 	  This is a generic SoC camera platform driver, useful for testing
 
+config SOC_CAMERA_OV772X
+	tristate "ov772x camera support"
+	depends on SOC_CAMERA && I2C
+	help
+	  This is a ov772x camera driver
+
 config VIDEO_PXA27x
 	tristate "PXA27x Quick Capture Interface driver"
 	depends on VIDEO_DEV && PXA27x && SOC_CAMERA
diff --git a/drivers/media/video/Makefile b/drivers/media/video/Makefile
index 52f2e958db77..e5b801379003 100644
--- a/drivers/media/video/Makefile
+++ b/drivers/media/video/Makefile
@@ -133,6 +133,7 @@ obj-$(CONFIG_SOC_CAMERA)	+= soc_camera.o
 obj-$(CONFIG_SOC_CAMERA_MT9M001)	+= mt9m001.o
 obj-$(CONFIG_SOC_CAMERA_MT9M111)	+= mt9m111.o
 obj-$(CONFIG_SOC_CAMERA_MT9V022)	+= mt9v022.o
+obj-$(CONFIG_SOC_CAMERA_OV772X)		+= ov772x.o
 obj-$(CONFIG_SOC_CAMERA_PLATFORM)	+= soc_camera_platform.o
 
 obj-$(CONFIG_VIDEO_AU0828) += au0828/
diff --git a/drivers/media/video/ov772x.c b/drivers/media/video/ov772x.c
new file mode 100644
index 000000000000..6206dff8d02d
--- /dev/null
+++ b/drivers/media/video/ov772x.c
@@ -0,0 +1,966 @@
+/*
+ * ov772x Camera Driver
+ *
+ * Copyright (C) 2008 Renesas Solutions Corp.
+ * Kuninori Morimoto <morimoto.kuninori@renesas.com>
+ *
+ * Based on ov7670 and soc_camera_platform driver,
+ *
+ * Copyright 2006-7 Jonathan Corbet <corbet@lwn.net>
+ * Copyright (C) 2008 Magnus Damm
+ * Copyright (C) 2008, Guennadi Liakhovetski <kernel@pengutronix.de>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/i2c.h>
+#include <linux/slab.h>
+#include <linux/delay.h>
+#include <linux/videodev2.h>
+#include <media/v4l2-chip-ident.h>
+#include <media/v4l2-common.h>
+#include <media/soc_camera.h>
+#include <media/ov772x.h>
+
+/*
+ * register offset
+ */
+#define GAIN        0x00 /* AGC - Gain control gain setting */
+#define BLUE        0x01 /* AWB - Blue channel gain setting */
+#define RED         0x02 /* AWB - Red   channel gain setting */
+#define GREEN       0x03 /* AWB - Green channel gain setting */
+#define COM1        0x04 /* Common control 1 */
+#define BAVG        0x05 /* U/B Average Level */
+#define GAVG        0x06 /* Y/Gb Average Level */
+#define RAVG        0x07 /* V/R Average Level */
+#define AECH        0x08 /* Exposure Value - AEC MSBs */
+#define COM2        0x09 /* Common control 2 */
+#define PID         0x0A /* Product ID Number MSB */
+#define VER         0x0B /* Product ID Number LSB */
+#define COM3        0x0C /* Common control 3 */
+#define COM4        0x0D /* Common control 4 */
+#define COM5        0x0E /* Common control 5 */
+#define COM6        0x0F /* Common control 6 */
+#define AEC         0x10 /* Exposure Value */
+#define CLKRC       0x11 /* Internal clock */
+#define COM7        0x12 /* Common control 7 */
+#define COM8        0x13 /* Common control 8 */
+#define COM9        0x14 /* Common control 9 */
+#define COM10       0x15 /* Common control 10 */
+#define HSTART      0x17 /* Horizontal sensor size */
+#define HSIZE       0x18 /* Horizontal frame (HREF column) end high 8-bit */
+#define VSTART      0x19 /* Vertical frame (row) start high 8-bit */
+#define VSIZE       0x1A /* Vertical sensor size */
+#define PSHFT       0x1B /* Data format - pixel delay select */
+#define MIDH        0x1C /* Manufacturer ID byte - high */
+#define MIDL        0x1D /* Manufacturer ID byte - low  */
+#define LAEC        0x1F /* Fine AEC value */
+#define COM11       0x20 /* Common control 11 */
+#define BDBASE      0x22 /* Banding filter Minimum AEC value */
+#define DBSTEP      0x23 /* Banding filter Maximum Setp */
+#define AEW         0x24 /* AGC/AEC - Stable operating region (upper limit) */
+#define AEB         0x25 /* AGC/AEC - Stable operating region (lower limit) */
+#define VPT         0x26 /* AGC/AEC Fast mode operating region */
+#define HOUTSIZE    0x29 /* Horizontal data output size MSBs */
+#define EXHCH       0x2A /* Dummy pixel insert MSB */
+#define EXHCL       0x2B /* Dummy pixel insert LSB */
+#define VOUTSIZE    0x2C /* Vertical data output size MSBs */
+#define ADVFL       0x2D /* LSB of insert dummy lines in Vertical direction */
+#define ADVFH       0x2E /* MSG of insert dummy lines in Vertical direction */
+#define YAVE        0x2F /* Y/G Channel Average value */
+#define LUMHTH      0x30 /* Histogram AEC/AGC Luminance high level threshold */
+#define LUMLTH      0x31 /* Histogram AEC/AGC Luminance low  level threshold */
+#define HREF        0x32 /* Image start and size control */
+#define DM_LNL      0x33 /* Dummy line low  8 bits */
+#define DM_LNH      0x34 /* Dummy line high 8 bits */
+#define ADOFF_B     0x35 /* AD offset compensation value for B  channel */
+#define ADOFF_R     0x36 /* AD offset compensation value for R  channel */
+#define ADOFF_GB    0x37 /* AD offset compensation value for Gb channel */
+#define ADOFF_GR    0x38 /* AD offset compensation value for Gr channel */
+#define OFF_B       0x39 /* Analog process B  channel offset value */
+#define OFF_R       0x3A /* Analog process R  channel offset value */
+#define OFF_GB      0x3B /* Analog process Gb channel offset value */
+#define OFF_GR      0x3C /* Analog process Gr channel offset value */
+#define COM12       0x3D /* Common control 12 */
+#define COM13       0x3E /* Common control 13 */
+#define COM14       0x3F /* Common control 14 */
+#define COM15       0x40 /* Common control 15*/
+#define COM16       0x41 /* Common control 16 */
+#define TGT_B       0x42 /* BLC blue channel target value */
+#define TGT_R       0x43 /* BLC red  channel target value */
+#define TGT_GB      0x44 /* BLC Gb   channel target value */
+#define TGT_GR      0x45 /* BLC Gr   channel target value */
+#define LCC0        0x46 /* Lens correction control 0 */
+#define LCC1        0x47 /* Lens correction option 1 - X coordinate */
+#define LCC2        0x48 /* Lens correction option 2 - Y coordinate */
+#define LCC3        0x49 /* Lens correction option 3 */
+#define LCC4        0x4A /* Lens correction option 4 - radius of the circular */
+#define LCC5        0x4B /* Lens correction option 5 */
+#define LCC6        0x4C /* Lens correction option 6 */
+#define FIXGAIN     0x4D /* Analog fix gain amplifer */
+#define AREF0       0x4E /* Sensor reference control */
+#define AREF1       0x4F /* Sensor reference current control */
+#define AREF2       0x50 /* Analog reference control */
+#define AREF3       0x51 /* ADC    reference control */
+#define AREF4       0x52 /* ADC    reference control */
+#define AREF5       0x53 /* ADC    reference control */
+#define AREF6       0x54 /* Analog reference control */
+#define AREF7       0x55 /* Analog reference control */
+#define UFIX        0x60 /* U channel fixed value output */
+#define VFIX        0x61 /* V channel fixed value output */
+#define AW_BB_BLK   0x62 /* AWB option for advanced AWB */
+#define AW_B_CTRL0  0x63 /* AWB control byte 0 */
+#define DSP_CTRL1   0x64 /* DSP control byte 1 */
+#define DSP_CTRL2   0x65 /* DSP control byte 2 */
+#define DSP_CTRL3   0x66 /* DSP control byte 3 */
+#define DSP_CTRL4   0x67 /* DSP control byte 4 */
+#define AW_B_BIAS   0x68 /* AWB BLC level clip */
+#define AW_BCTRL1   0x69 /* AWB control  1 */
+#define AW_BCTRL2   0x6A /* AWB control  2 */
+#define AW_BCTRL3   0x6B /* AWB control  3 */
+#define AW_BCTRL4   0x6C /* AWB control  4 */
+#define AW_BCTRL5   0x6D /* AWB control  5 */
+#define AW_BCTRL6   0x6E /* AWB control  6 */
+#define AW_BCTRL7   0x6F /* AWB control  7 */
+#define AW_BCTRL8   0x70 /* AWB control  8 */
+#define AW_BCTRL9   0x71 /* AWB control  9 */
+#define AW_BCTRL10  0x72 /* AWB control 10 */
+#define AW_BCTRL11  0x73 /* AWB control 11 */
+#define AW_BCTRL12  0x74 /* AWB control 12 */
+#define AW_BCTRL13  0x75 /* AWB control 13 */
+#define AW_BCTRL14  0x76 /* AWB control 14 */
+#define AW_BCTRL15  0x77 /* AWB control 15 */
+#define AW_BCTRL16  0x78 /* AWB control 16 */
+#define AW_BCTRL17  0x79 /* AWB control 17 */
+#define AW_BCTRL18  0x7A /* AWB control 18 */
+#define AW_BCTRL19  0x7B /* AWB control 19 */
+#define AW_BCTRL20  0x7C /* AWB control 20 */
+#define AW_BCTRL21  0x7D /* AWB control 21 */
+#define GAM1        0x7E /* Gamma Curve  1st segment input end point */
+#define GAM2        0x7F /* Gamma Curve  2nd segment input end point */
+#define GAM3        0x80 /* Gamma Curve  3rd segment input end point */
+#define GAM4        0x81 /* Gamma Curve  4th segment input end point */
+#define GAM5        0x82 /* Gamma Curve  5th segment input end point */
+#define GAM6        0x83 /* Gamma Curve  6th segment input end point */
+#define GAM7        0x84 /* Gamma Curve  7th segment input end point */
+#define GAM8        0x85 /* Gamma Curve  8th segment input end point */
+#define GAM9        0x86 /* Gamma Curve  9th segment input end point */
+#define GAM10       0x87 /* Gamma Curve 10th segment input end point */
+#define GAM11       0x88 /* Gamma Curve 11th segment input end point */
+#define GAM12       0x89 /* Gamma Curve 12th segment input end point */
+#define GAM13       0x8A /* Gamma Curve 13th segment input end point */
+#define GAM14       0x8B /* Gamma Curve 14th segment input end point */
+#define GAM15       0x8C /* Gamma Curve 15th segment input end point */
+#define SLOP        0x8D /* Gamma curve highest segment slope */
+#define DNSTH       0x8E /* De-noise threshold */
+#define EDGE0       0x8F /* Edge enhancement control 0 */
+#define EDGE1       0x90 /* Edge enhancement control 1 */
+#define DNSOFF      0x91 /* Auto De-noise threshold control */
+#define EDGE2       0x92 /* Edge enhancement strength low  point control */
+#define EDGE3       0x93 /* Edge enhancement strength high point control */
+#define MTX1        0x94 /* Matrix coefficient 1 */
+#define MTX2        0x95 /* Matrix coefficient 2 */
+#define MTX3        0x96 /* Matrix coefficient 3 */
+#define MTX4        0x97 /* Matrix coefficient 4 */
+#define MTX5        0x98 /* Matrix coefficient 5 */
+#define MTX6        0x99 /* Matrix coefficient 6 */
+#define MTX_CTRL    0x9A /* Matrix control */
+#define BRIGHT      0x9B /* Brightness control */
+#define CNTRST      0x9C /* Contrast contrast */
+#define CNTRST_CTRL 0x9D /* Contrast contrast center */
+#define UVAD_J0     0x9E /* Auto UV adjust contrast 0 */
+#define UVAD_J1     0x9F /* Auto UV adjust contrast 1 */
+#define SCAL0       0xA0 /* Scaling control 0 */
+#define SCAL1       0xA1 /* Scaling control 1 */
+#define SCAL2       0xA2 /* Scaling control 2 */
+#define FIFODLYM    0xA3 /* FIFO manual mode delay control */
+#define FIFODLYA    0xA4 /* FIFO auto   mode delay control */
+#define SDE         0xA6 /* Special digital effect control */
+#define USAT        0xA7 /* U component saturation control */
+#define VSAT        0xA8 /* V component saturation control */
+#define HUE0        0xA9 /* Hue control 0 */
+#define HUE1        0xAA /* Hue control 1 */
+#define SIGN        0xAB /* Sign bit for Hue and contrast */
+#define DSPAUTO     0xAC /* DSP auto function ON/OFF control */
+
+/*
+ * register detail
+ */
+
+/* COM2 */
+#define SOFT_SLEEP_MODE 0x10	/* Soft sleep mode */
+				/* Output drive capability */
+#define OCAP_1x         0x00	/* 1x */
+#define OCAP_2x         0x01	/* 2x */
+#define OCAP_3x         0x02	/* 3x */
+#define OCAP_4x         0x03	/* 4x */
+
+/* COM3 */
+#define SWAP_MASK       0x38
+
+#define VFIMG_ON_OFF    0x80	/* Vertical flip image ON/OFF selection */
+#define HMIMG_ON_OFF    0x40	/* Horizontal mirror image ON/OFF selection */
+#define SWAP_RGB        0x20	/* Swap B/R  output sequence in RGB mode */
+#define SWAP_YUV        0x10	/* Swap Y/UV output sequence in YUV mode */
+#define SWAP_ML         0x08	/* Swap output MSB/LSB */
+				/* Tri-state option for output clock */
+#define NOTRI_CLOCK     0x04	/*   0: Tri-state    at this period */
+				/*   1: No tri-state at this period */
+				/* Tri-state option for output data */
+#define NOTRI_DATA      0x02	/*   0: Tri-state    at this period */
+				/*   1: No tri-state at this period */
+#define SCOLOR_TEST     0x01	/* Sensor color bar test pattern */
+
+/* COM4 */
+				/* PLL frequency control */
+#define PLL_BYPASS      0x00	/*  00: Bypass PLL */
+#define PLL_4x          0x40	/*  01: PLL 4x */
+#define PLL_6x          0x80	/*  10: PLL 6x */
+#define PLL_8x          0xc0	/*  11: PLL 8x */
+				/* AEC evaluate window */
+#define AEC_FULL        0x00	/*  00: Full window */
+#define AEC_1p2         0x10	/*  01: 1/2  window */
+#define AEC_1p4         0x20	/*  10: 1/4  window */
+#define AEC_2p3         0x30	/*  11: Low 2/3 window */
+
+/* COM5 */
+#define AFR_ON_OFF      0x80	/* Auto frame rate control ON/OFF selection */
+#define AFR_SPPED       0x40	/* Auto frame rate control speed slection */
+				/* Auto frame rate max rate control */
+#define AFR_NO_RATE     0x00	/*     No  reduction of frame rate */
+#define AFR_1p2         0x10	/*     Max reduction to 1/2 frame rate */
+#define AFR_1p4         0x20	/*     Max reduction to 1/4 frame rate */
+#define AFR_1p8         0x30	/* Max reduction to 1/8 frame rate */
+				/* Auto frame rate active point control */
+#define AF_2x           0x00	/*     Add frame when AGC reaches  2x gain */
+#define AF_4x           0x04	/*     Add frame when AGC reaches  4x gain */
+#define AF_8x           0x08	/*     Add frame when AGC reaches  8x gain */
+#define AF_16x          0x0c	/* Add frame when AGC reaches 16x gain */
+				/* AEC max step control */
+#define AEC_NO_LIMIT    0x01	/*   0 : AEC incease step has limit */
+				/*   1 : No limit to AEC increase step */
+
+/* COM7 */
+				/* SCCB Register Reset */
+#define SCCB_RESET      0x80	/*   0 : No change */
+				/*   1 : Resets all registers to default */
+				/* Resolution selection */
+#define SLCT_MASK       0x40	/*   Mask of VGA or QVGA */
+#define SLCT_VGA        0x00	/*   0 : VGA */
+#define SLCT_QVGA       0x40	/*   1 : QVGA */
+#define ITU656_ON_OFF   0x20	/* ITU656 protocol ON/OFF selection */
+				/* RGB output format control */
+#define FMT_GBR422      0x00	/*      00 : GBR 4:2:2 */
+#define FMT_RGB565      0x04	/*      01 : RGB 565 */
+#define FMT_RGB555      0x08	/*      10 : RGB 555 */
+#define FMT_RGB444      0x0c	/* 11 : RGB 444 */
+				/* Output format control */
+#define OFMT_YUV        0x00	/*      00 : YUV */
+#define OFMT_P_BRAW     0x01	/*      01 : Processed Bayer RAW */
+#define OFMT_RGB        0x02	/*      10 : RGB */
+#define OFMT_BRAW       0x03	/* 11 : Bayer RAW */
+
+/* COM8 */
+#define FAST_ALGO       0x80	/* Enable fast AGC/AEC algorithm */
+				/* AEC Setp size limit */
+#define UNLMT_STEP      0x40	/*   0 : Step size is limited */
+				/*   1 : Unlimited step size */
+#define BNDF_ON_OFF     0x20	/* Banding filter ON/OFF */
+#define AEC_BND         0x10	/* Enable AEC below banding value */
+#define AEC_ON_OFF      0x08	/* Fine AEC ON/OFF control */
+#define AGC_ON          0x04	/* AGC Enable */
+#define AWB_ON          0x02	/* AWB Enable */
+#define AEC_ON          0x01	/* AEC Enable */
+
+/* COM9 */
+#define BASE_AECAGC     0x80	/* Histogram or average based AEC/AGC */
+				/* Automatic gain ceiling - maximum AGC value */
+#define GAIN_2x         0x00	/*    000 :   2x */
+#define GAIN_4x         0x10	/*    001 :   4x */
+#define GAIN_8x         0x20	/*    010 :   8x */
+#define GAIN_16x        0x30	/* 011 :  16x */
+#define GAIN_32x        0x40	/*    100 :  32x */
+#define GAIN_64x        0x50	/* 101 :  64x */
+#define GAIN_128x       0x60	/* 110 : 128x */
+#define DROP_VSYNC      0x04	/* Drop VSYNC output of corrupt frame */
+#define DROP_HREF       0x02	/* Drop HREF  output of corrupt frame */
+
+/* COM11 */
+#define SGLF_ON_OFF     0x02	/* Single frame ON/OFF selection */
+#define SGLF_TRIG       0x01	/* Single frame transfer trigger */
+
+/* EXHCH */
+#define VSIZE_LSB       0x04	/* Vertical data output size LSB */
+
+/* DSP_CTRL1 */
+#define FIFO_ON         0x80	/* FIFO enable/disable selection */
+#define UV_ON_OFF       0x40	/* UV adjust function ON/OFF selection */
+#define YUV444_2_422    0x20	/* YUV444 to 422 UV channel option selection */
+#define CLR_MTRX_ON_OFF 0x10	/* Color matrix ON/OFF selection */
+#define INTPLT_ON_OFF   0x08	/* Interpolation ON/OFF selection */
+#define GMM_ON_OFF      0x04	/* Gamma function ON/OFF selection */
+#define AUTO_BLK_ON_OFF 0x02	/* Black defect auto correction ON/OFF */
+#define AUTO_WHT_ON_OFF 0x01	/* White define auto correction ON/OFF */
+
+/* DSP_CTRL3 */
+#define UV_MASK         0x80	/* UV output sequence option */
+#define UV_ON           0x80	/*   ON */
+#define UV_OFF          0x00	/*   OFF */
+#define CBAR_MASK       0x20	/* DSP Color bar mask */
+#define CBAR_ON         0x20	/*   ON */
+#define CBAR_OFF        0x00	/*   OFF */
+
+/* HSTART */
+#define HST_VGA         0x23
+#define HST_QVGA        0x3F
+
+/* HSIZE */
+#define HSZ_VGA         0xA0
+#define HSZ_QVGA        0x50
+
+/* VSTART */
+#define VST_VGA         0x07
+#define VST_QVGA        0x03
+
+/* VSIZE */
+#define VSZ_VGA         0xF0
+#define VSZ_QVGA        0x78
+
+/* HOUTSIZE */
+#define HOSZ_VGA        0xA0
+#define HOSZ_QVGA       0x50
+
+/* VOUTSIZE */
+#define VOSZ_VGA        0xF0
+#define VOSZ_QVGA       0x78
+
+/*
+ * bit configure (32 bit)
+ * this is used in struct ov772x_color_format :: option
+ */
+#define OP_UV       0x00000001
+#define OP_SWAP_RGB 0x00000002
+
+/*
+ * struct
+ */
+struct regval_list {
+	unsigned char reg_num;
+	unsigned char value;
+};
+
+struct ov772x_color_format {
+	char                     *name;
+	__u32                     fourcc;
+	const struct regval_list *regs;
+	unsigned int              option;
+};
+
+struct ov772x_win_size {
+	char                     *name;
+	__u32                     width;
+	__u32                     height;
+	unsigned char             com7_bit;
+	const struct regval_list *regs;
+};
+
+struct ov772x_priv {
+	struct ov772x_camera_info        *info;
+	struct i2c_client                *client;
+	struct soc_camera_device          icd;
+	const struct ov772x_color_format *fmt;
+	const struct ov772x_win_size     *win;
+};
+
+#define ENDMARKER { 0xff, 0xff }
+
+static const struct regval_list ov772x_default_regs[] =
+{
+	{ COM3,  0x00 },
+	{ COM4,  PLL_4x | 0x01 },
+	{ 0x16,  0x00 }, /* Mystery */
+	{ COM11, 0x10 }, /* Mystery */
+	{ 0x28,  0x00 }, /* Mystery */
+	{ HREF,  0x00 },
+	{ COM13, 0xe2 }, /* Mystery */
+	{ AREF0, 0xef },
+	{ AREF2, 0x60 },
+	{ AREF6, 0x7a },
+	ENDMARKER,
+};
+
+/*
+ * register setting for color format
+ */
+static const struct regval_list ov772x_RGB555_regs[] = {
+	{ COM7, FMT_RGB555 | OFMT_RGB },
+	ENDMARKER,
+};
+
+static const struct regval_list ov772x_RGB565_regs[] = {
+	{ COM7, FMT_RGB565 | OFMT_RGB },
+	ENDMARKER,
+};
+
+static const struct regval_list ov772x_YYUV_regs[] = {
+	{ COM3, SWAP_YUV },
+	{ COM7, OFMT_YUV },
+	ENDMARKER,
+};
+
+static const struct regval_list ov772x_UVYY_regs[] = {
+	{ COM7, OFMT_YUV },
+	ENDMARKER,
+};
+
+
+/*
+ * register setting for window size
+ */
+static const struct regval_list ov772x_qvga_regs[] = {
+	{ HSTART,   HST_QVGA },
+	{ HSIZE,    HSZ_QVGA },
+	{ VSTART,   VST_QVGA },
+	{ VSIZE,    VSZ_QVGA  },
+	{ HOUTSIZE, HOSZ_QVGA },
+	{ VOUTSIZE, VOSZ_QVGA },
+	ENDMARKER,
+};
+
+static const struct regval_list ov772x_vga_regs[] = {
+	{ HSTART,   HST_VGA },
+	{ HSIZE,    HSZ_VGA },
+	{ VSTART,   VST_VGA },
+	{ VSIZE,    VSZ_VGA },
+	{ HOUTSIZE, HOSZ_VGA },
+	{ VOUTSIZE, VOSZ_VGA },
+	ENDMARKER,
+};
+
+/*
+ * supported format list
+ */
+
+#define SETFOURCC(type) .name = (#type), .fourcc = (V4L2_PIX_FMT_ ## type)
+static const struct soc_camera_data_format ov772x_fmt_lists[] = {
+	{
+		SETFOURCC(YUYV),
+		.depth      = 16,
+		.colorspace = V4L2_COLORSPACE_JPEG,
+	},
+	{
+		SETFOURCC(YVYU),
+		.depth      = 16,
+		.colorspace = V4L2_COLORSPACE_JPEG,
+	},
+	{
+		SETFOURCC(UYVY),
+		.depth      = 16,
+		.colorspace = V4L2_COLORSPACE_JPEG,
+	},
+	{
+		SETFOURCC(RGB555),
+		.depth      = 16,
+		.colorspace = V4L2_COLORSPACE_SRGB,
+	},
+	{
+		SETFOURCC(RGB555X),
+		.depth      = 16,
+		.colorspace = V4L2_COLORSPACE_SRGB,
+	},
+	{
+		SETFOURCC(RGB565),
+		.depth      = 16,
+		.colorspace = V4L2_COLORSPACE_SRGB,
+	},
+	{
+		SETFOURCC(RGB565X),
+		.depth      = 16,
+		.colorspace = V4L2_COLORSPACE_SRGB,
+	},
+};
+
+/*
+ * color format list
+ */
+#define T_YUYV 0
+static const struct ov772x_color_format ov772x_cfmts[] = {
+	[T_YUYV] = {
+		SETFOURCC(YUYV),
+		.regs   = ov772x_YYUV_regs,
+	},
+	{
+		SETFOURCC(YVYU),
+		.regs   = ov772x_YYUV_regs,
+		.option = OP_UV,
+	},
+	{
+		SETFOURCC(UYVY),
+		.regs   = ov772x_UVYY_regs,
+	},
+	{
+		SETFOURCC(RGB555),
+		.regs   = ov772x_RGB555_regs,
+		.option = OP_SWAP_RGB,
+	},
+	{
+		SETFOURCC(RGB555X),
+		.regs   = ov772x_RGB555_regs,
+	},
+	{
+		SETFOURCC(RGB565),
+		.regs   = ov772x_RGB565_regs,
+		.option = OP_SWAP_RGB,
+	},
+	{
+		SETFOURCC(RGB565X),
+		.regs   = ov772x_RGB565_regs,
+	},
+};
+
+
+/*
+ * window size list
+ */
+#define VGA_WIDTH   640
+#define VGA_HEIGHT  480
+#define QVGA_WIDTH  320
+#define QVGA_HEIGHT 240
+#define MAX_WIDTH   VGA_WIDTH
+#define MAX_HEIGHT  VGA_HEIGHT
+
+static const struct ov772x_win_size ov772x_win_vga = {
+	.name     = "VGA",
+	.width    = VGA_WIDTH,
+	.height   = VGA_HEIGHT,
+	.com7_bit = SLCT_VGA,
+	.regs     = ov772x_vga_regs,
+};
+
+static const struct ov772x_win_size ov772x_win_qvga = {
+	.name     = "QVGA",
+	.width    = QVGA_WIDTH,
+	.height   = QVGA_HEIGHT,
+	.com7_bit = SLCT_QVGA,
+	.regs     = ov772x_qvga_regs,
+};
+
+
+/*
+ * general function
+ */
+
+static int ov772x_write_array(struct i2c_client        *client,
+			      const struct regval_list *vals)
+{
+	while (vals->reg_num != 0xff) {
+		int ret = i2c_smbus_write_byte_data(client,
+						    vals->reg_num,
+						    vals->value);
+		if (ret < 0)
+			return ret;
+		vals++;
+	}
+	return 0;
+}
+
+static int ov772x_mask_set(struct i2c_client *client,
+					  u8  command,
+					  u8  mask,
+					  u8  set)
+{
+	s32 val = i2c_smbus_read_byte_data(client, command);
+	val &= ~mask;
+	val |=  set;
+
+	return i2c_smbus_write_byte_data(client, command, val);
+}
+
+static int ov772x_reset(struct i2c_client *client)
+{
+	int ret = i2c_smbus_write_byte_data(client, COM7, SCCB_RESET);
+	msleep(1);
+	return ret;
+}
+
+/*
+ * soc_camera_ops function
+ */
+
+static int ov772x_init(struct soc_camera_device *icd)
+{
+	return 0;
+}
+
+static int ov772x_release(struct soc_camera_device *icd)
+{
+	return 0;
+}
+
+static int ov772x_start_capture(struct soc_camera_device *icd)
+{
+	struct ov772x_priv *priv = container_of(icd, struct ov772x_priv, icd);
+	int                 ret;
+
+
+	if (!priv->win)
+		priv->win = &ov772x_win_vga;
+	if (!priv->fmt)
+		priv->fmt = &ov772x_cfmts[T_YUYV];
+
+	/*
+	 * reset hardware
+	 */
+	ov772x_reset(priv->client);
+	ret = ov772x_write_array(priv->client, ov772x_default_regs);
+	if (ret < 0)
+		goto start_end;
+
+	/*
+	 * set color format
+	 */
+	ret = ov772x_write_array(priv->client, priv->fmt->regs);
+	if (ret < 0)
+		goto start_end;
+
+	/*
+	 * set size format
+	 */
+	ret = ov772x_write_array(priv->client, priv->win->regs);
+	if (ret < 0)
+		goto start_end;
+
+	/*
+	 * set COM7 bit ( QVGA or VGA )
+	 */
+	ret = ov772x_mask_set(priv->client,
+			      COM7, SLCT_MASK, priv->win->com7_bit);
+	if (ret < 0)
+		goto start_end;
+
+	/*
+	 * set UV setting
+	 */
+	if (priv->fmt->option & OP_UV) {
+		ret = ov772x_mask_set(priv->client,
+				      DSP_CTRL3, UV_MASK, UV_ON);
+		if (ret < 0)
+			goto start_end;
+	}
+
+	/*
+	 * set SWAP setting
+	 */
+	if (priv->fmt->option & OP_SWAP_RGB) {
+		ret = ov772x_mask_set(priv->client,
+				      COM3, SWAP_MASK, SWAP_RGB);
+		if (ret < 0)
+			goto start_end;
+	}
+
+	dev_info(&icd->dev,
+		 "format %s, win %s\n", priv->fmt->name, priv->win->name);
+
+start_end:
+	priv->fmt = NULL;
+	priv->win = NULL;
+
+	return ret;
+}
+
+static int ov772x_stop_capture(struct soc_camera_device *icd)
+{
+	struct ov772x_priv *priv = container_of(icd, struct ov772x_priv, icd);
+	ov772x_reset(priv->client);
+	return 0;
+}
+
+static int ov772x_set_bus_param(struct soc_camera_device *icd,
+				unsigned long		  flags)
+{
+	return 0;
+}
+
+static unsigned long ov772x_query_bus_param(struct soc_camera_device *icd)
+{
+	struct ov772x_priv *priv = container_of(icd, struct ov772x_priv, icd);
+
+	return  SOCAM_PCLK_SAMPLE_RISING |
+		SOCAM_HSYNC_ACTIVE_HIGH  |
+		SOCAM_VSYNC_ACTIVE_HIGH  |
+		SOCAM_MASTER             |
+		priv->info->buswidth;
+}
+
+static int ov772x_get_chip_id(struct soc_camera_device *icd,
+			      struct v4l2_chip_ident   *id)
+{
+	id->ident    = V4L2_IDENT_OV772X;
+	id->revision = 0;
+
+	return 0;
+}
+
+#ifdef CONFIG_VIDEO_ADV_DEBUG
+static int ov772x_get_register(struct soc_camera_device *icd,
+			       struct v4l2_register *reg)
+{
+	struct ov772x_priv *priv = container_of(icd, struct ov772x_priv, icd);
+	int                 ret;
+
+	if (reg->reg > 0xff)
+		return -EINVAL;
+
+	ret = i2c_smbus_read_byte_data(priv->client, reg->reg);
+	if (ret < 0)
+		return ret;
+
+	reg->val = (__u64)ret;
+
+	return 0;
+}
+
+static int ov772x_set_register(struct soc_camera_device *icd,
+			       struct v4l2_register *reg)
+{
+	struct ov772x_priv *priv = container_of(icd, struct ov772x_priv, icd);
+
+	if (reg->reg > 0xff ||
+	    reg->val > 0xff)
+		return -EINVAL;
+
+	return i2c_smbus_write_byte_data(priv->client, reg->reg, reg->val);
+}
+#endif
+
+static int ov772x_set_fmt_cap(struct soc_camera_device *icd,
+			      __u32                     pixfmt,
+			      struct v4l2_rect         *rect)
+{
+	struct ov772x_priv *priv = container_of(icd, struct ov772x_priv, icd);
+	int ret = -EINVAL;
+	int i;
+
+	/*
+	 * select format
+	 */
+	priv->fmt = NULL;
+	for (i = 0; i < ARRAY_SIZE(ov772x_cfmts); i++) {
+		if (pixfmt == ov772x_cfmts[i].fourcc) {
+			priv->fmt = ov772x_cfmts + i;
+			ret = 0;
+			break;
+		}
+	}
+
+	return ret;
+}
+
+static int ov772x_try_fmt_cap(struct soc_camera_device *icd,
+			      struct v4l2_format       *f)
+{
+	struct v4l2_pix_format *pix  = &f->fmt.pix;
+	struct ov772x_priv     *priv;
+
+	priv = container_of(icd, struct ov772x_priv, icd);
+
+	/* QVGA */
+	if (pix->width  <= ov772x_win_qvga.width ||
+	    pix->height <= ov772x_win_qvga.height) {
+		priv->win   = &ov772x_win_qvga;
+		pix->width  =  ov772x_win_qvga.width;
+		pix->height =  ov772x_win_qvga.height;
+	}
+
+	/* VGA */
+	else if (pix->width  <= ov772x_win_vga.width ||
+		 pix->height <= ov772x_win_vga.height) {
+		priv->win   = &ov772x_win_vga;
+		pix->width  =  ov772x_win_vga.width;
+		pix->height =  ov772x_win_vga.height;
+	}
+
+	pix->field = V4L2_FIELD_NONE;
+
+	return 0;
+}
+
+static int ov772x_video_probe(struct soc_camera_device *icd)
+{
+	struct ov772x_priv *priv = container_of(icd, struct ov772x_priv, icd);
+	u8                  pid, ver;
+
+	/*
+	 * We must have a parent by now. And it cannot be a wrong one.
+	 * So this entire test is completely redundant.
+	 */
+	if (!icd->dev.parent ||
+	    to_soc_camera_host(icd->dev.parent)->nr != icd->iface)
+		return -ENODEV;
+
+	/*
+	 * ov772x only use 8 or 10 bit bus width
+	 */
+	if (SOCAM_DATAWIDTH_10 != priv->info->buswidth &&
+	    SOCAM_DATAWIDTH_8  != priv->info->buswidth) {
+		dev_err(&icd->dev, "bus width error\n");
+		return -ENODEV;
+	}
+
+	icd->formats     = ov772x_fmt_lists;
+	icd->num_formats = ARRAY_SIZE(ov772x_fmt_lists);
+
+	if (priv->info->link.power)
+		priv->info->link.power(&priv->client->dev, 1);
+
+	/*
+	 * check and show product ID and manufacturer ID
+	 */
+	pid = i2c_smbus_read_byte_data(priv->client, PID);
+	ver = i2c_smbus_read_byte_data(priv->client, VER);
+	if (pid != 0x77 ||
+	    ver != 0x21) {
+		if (priv->info->link.power)
+			priv->info->link.power(&priv->client->dev, 0);
+		return -ENODEV;
+	}
+
+	dev_info(&icd->dev,
+		 "ov772x Product ID %0x:%0x Manufacturer ID %x:%x\n",
+		 pid,
+		 ver,
+		 i2c_smbus_read_byte_data(priv->client, MIDH),
+		 i2c_smbus_read_byte_data(priv->client, MIDL));
+
+
+	return soc_camera_video_start(icd);
+}
+
+static void ov772x_video_remove(struct soc_camera_device *icd)
+{
+	struct ov772x_priv *priv = container_of(icd, struct ov772x_priv, icd);
+
+	soc_camera_video_stop(icd);
+
+	if (priv->info->link.power)
+		priv->info->link.power(&priv->client->dev, 0);
+
+}
+
+static struct soc_camera_ops ov772x_ops = {
+	.owner			= THIS_MODULE,
+	.probe			= ov772x_video_probe,
+	.remove			= ov772x_video_remove,
+	.init			= ov772x_init,
+	.release		= ov772x_release,
+	.start_capture		= ov772x_start_capture,
+	.stop_capture		= ov772x_stop_capture,
+	.set_fmt_cap		= ov772x_set_fmt_cap,
+	.try_fmt_cap		= ov772x_try_fmt_cap,
+	.set_bus_param		= ov772x_set_bus_param,
+	.query_bus_param	= ov772x_query_bus_param,
+	.get_chip_id		= ov772x_get_chip_id,
+#ifdef CONFIG_VIDEO_ADV_DEBUG
+	.get_register		= ov772x_get_register,
+	.set_register		= ov772x_set_register,
+#endif
+};
+
+/*
+ * i2c_driver function
+ */
+
+static int ov772x_probe(struct i2c_client          *client,
+			const struct i2c_device_id *did)
+
+{
+	struct ov772x_priv        *priv;
+	struct ov772x_camera_info *info;
+	struct soc_camera_device  *icd;
+	struct i2c_adapter        *adapter = to_i2c_adapter(client->dev.parent);
+	int                        ret;
+
+	info = client->dev.platform_data;
+	if (!info)
+		return -EINVAL;
+
+	if (!i2c_check_functionality(adapter, I2C_FUNC_SMBUS_BYTE_DATA)) {
+		dev_err(&adapter->dev,
+			"I2C-Adapter doesn't support "
+			"I2C_FUNC_SMBUS_BYTE_DATA\n");
+		return -EIO;
+	}
+
+	priv = kzalloc(sizeof(*priv), GFP_KERNEL);
+	if (!priv)
+		return -ENOMEM;
+
+	priv->info   = info;
+	priv->client = client;
+	i2c_set_clientdata(client, priv);
+
+	icd             = &priv->icd;
+	icd->ops        = &ov772x_ops;
+	icd->control    = &client->dev;
+	icd->width_max  = MAX_WIDTH;
+	icd->height_max = MAX_HEIGHT;
+	icd->iface      = priv->info->link.bus_id;
+
+	ret = soc_camera_device_register(icd);
+
+	if (ret)
+		kfree(priv);
+
+	return ret;
+}
+
+static int ov772x_remove(struct i2c_client *client)
+{
+	struct ov772x_priv *priv = i2c_get_clientdata(client);
+
+	soc_camera_device_unregister(&priv->icd);
+	kfree(priv);
+	return 0;
+}
+
+static const struct i2c_device_id ov772x_id[] = {
+	{"ov772x", 0},
+	{ }
+};
+MODULE_DEVICE_TABLE(i2c, ov772x_id);
+
+
+static struct i2c_driver ov772x_i2c_driver = {
+	.driver = {
+		.name = "ov772x",
+	},
+	.probe    = ov772x_probe,
+	.remove   = ov772x_remove,
+	.id_table = ov772x_id,
+};
+
+/*
+ * module function
+ */
+
+static int __init ov772x_module_init(void)
+{
+	printk(KERN_INFO "ov772x driver\n");
+	return i2c_add_driver(&ov772x_i2c_driver);
+}
+
+static void __exit ov772x_module_exit(void)
+{
+	i2c_del_driver(&ov772x_i2c_driver);
+}
+
+module_init(ov772x_module_init);
+module_exit(ov772x_module_exit);
+
+MODULE_DESCRIPTION("SoC Camera driver for ov772x");
+MODULE_AUTHOR("Kuninori Morimoto");
+MODULE_LICENSE("GPL v2");
diff --git a/include/media/ov772x.h b/include/media/ov772x.h
new file mode 100644
index 000000000000..e391d55edb95
--- /dev/null
+++ b/include/media/ov772x.h
@@ -0,0 +1,21 @@
+/* ov772x Camera
+ *
+ * Copyright (C) 2008 Renesas Solutions Corp.
+ * Kuninori Morimoto <morimoto.kuninori@renesas.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#ifndef __OV772X_H__
+#define __OV772X_H__
+
+#include <media/soc_camera.h>
+
+struct ov772x_camera_info {
+	unsigned long          buswidth;
+	struct soc_camera_link link;
+};
+
+#endif /* __OV772X_H__ */
diff --git a/include/media/v4l2-chip-ident.h b/include/media/v4l2-chip-ident.h
index d73a8e9028a5..bfe5142e6672 100644
--- a/include/media/v4l2-chip-ident.h
+++ b/include/media/v4l2-chip-ident.h
@@ -60,6 +60,7 @@ enum {
 
 	/* OmniVision sensors: reserved range 250-299 */
 	V4L2_IDENT_OV7670 = 250,
+	V4L2_IDENT_OV772X = 251,
 
 	/* Conexant MPEG encoder/decoders: reserved range 410-420 */
 	V4L2_IDENT_CX23415 = 415,
-- 
cgit v1.2.3


From f473bf76c71ca734a16f9331ce6b6e9603641888 Mon Sep 17 00:00:00 2001
From: Hans Verkuil <hverkuil@xs4all.nl>
Date: Sat, 1 Nov 2008 08:25:11 -0300
Subject: V4L/DVB (9503): v4l: remove inode argument from video_usercopy

The inode argument was never used. Removing it from video_usercopy
brings the function pointer type of video_usercopy in line with similar
v4l2 functions, thus simplifying several drivers.

Signed-off-by: Hans Verkuil <hverkuil@xs4all.nl>
Signed-off-by: Mauro Carvalho Chehab <mchehab@redhat.com>
---
 drivers/media/common/saa7146_fops.c             |  2 +-
 drivers/media/common/saa7146_video.c            | 10 ++--------
 drivers/media/video/arv.c                       |  5 ++---
 drivers/media/video/bw-qcam.c                   |  5 ++---
 drivers/media/video/c-qcam.c                    |  7 +++----
 drivers/media/video/cpia.c                      |  9 ++++-----
 drivers/media/video/cpia2/cpia2_v4l.c           | 13 ++++++-------
 drivers/media/video/ov511.c                     |  5 ++---
 drivers/media/video/pms.c                       |  5 ++---
 drivers/media/video/pvrusb2/pvrusb2-v4l2.c      | 13 +++----------
 drivers/media/video/pwc/pwc-if.c                |  2 +-
 drivers/media/video/pwc/pwc-v4l.c               |  3 +--
 drivers/media/video/pwc/pwc.h                   |  3 +--
 drivers/media/video/saa5246a.c                  |  5 ++---
 drivers/media/video/saa5249.c                   |  5 ++---
 drivers/media/video/se401.c                     |  5 ++---
 drivers/media/video/stv680.c                    |  5 ++---
 drivers/media/video/usbvideo/usbvideo.c         |  5 ++---
 drivers/media/video/usbvision/usbvision-video.c |  4 ++--
 drivers/media/video/uvc/uvc_v4l2.c              | 13 +++----------
 drivers/media/video/v4l2-ioctl.c                |  8 +++-----
 drivers/media/video/vino.c                      |  5 ++---
 drivers/media/video/w9966.c                     |  5 ++---
 drivers/media/video/zoran/zoran_driver.c        |  8 ++------
 include/media/saa7146_vv.h                      |  3 +--
 include/media/v4l2-ioctl.h                      |  6 ++----
 26 files changed, 57 insertions(+), 102 deletions(-)

(limited to 'include')

diff --git a/drivers/media/common/saa7146_fops.c b/drivers/media/common/saa7146_fops.c
index 127b0526a727..7d844af88384 100644
--- a/drivers/media/common/saa7146_fops.c
+++ b/drivers/media/common/saa7146_fops.c
@@ -313,7 +313,7 @@ static int fops_ioctl(struct inode *inode, struct file *file, unsigned int cmd,
 /*
 	DEB_EE(("inode:%p, file:%p, cmd:%d, arg:%li\n",inode, file, cmd, arg));
 */
-	return video_usercopy(inode, file, cmd, arg, saa7146_video_do_ioctl);
+	return video_usercopy(file, cmd, arg, saa7146_video_do_ioctl);
 }
 
 static int fops_mmap(struct file *file, struct vm_area_struct * vma)
diff --git a/drivers/media/common/saa7146_video.c b/drivers/media/common/saa7146_video.c
index fe0bd55977e3..101b01dbb8ea 100644
--- a/drivers/media/common/saa7146_video.c
+++ b/drivers/media/common/saa7146_video.c
@@ -834,7 +834,7 @@ static int video_end(struct saa7146_fh *fh, struct file *file)
  * copying is done already, arg is a kernel pointer.
  */
 
-static int __saa7146_video_do_ioctl(struct file *file, unsigned int cmd, void *arg)
+int saa7146_video_do_ioctl(struct file *file, unsigned int cmd, void *arg)
 {
 	struct saa7146_fh *fh  = file->private_data;
 	struct saa7146_dev *dev = fh->dev;
@@ -1216,17 +1216,11 @@ static int __saa7146_video_do_ioctl(struct file *file, unsigned int cmd, void *a
 #endif
 	default:
 		return v4l_compat_translate_ioctl(file, cmd, arg,
-						  __saa7146_video_do_ioctl);
+						  saa7146_video_do_ioctl);
 	}
 	return 0;
 }
 
-int saa7146_video_do_ioctl(struct inode *inode, struct file *file,
-				    unsigned int cmd, void *arg)
-{
-	return __saa7146_video_do_ioctl(file, cmd, arg);
-}
-
 /*********************************************************************************/
 /* buffer handling functions                                                  */
 
diff --git a/drivers/media/video/arv.c b/drivers/media/video/arv.c
index e09b00693230..2ba6abd92b6f 100644
--- a/drivers/media/video/arv.c
+++ b/drivers/media/video/arv.c
@@ -396,8 +396,7 @@ out_up:
 	return ret;
 }
 
-static int ar_do_ioctl(struct inode *inode, struct file *file,
-		       unsigned int cmd, void *arg)
+static int ar_do_ioctl(struct file *file, unsigned int cmd, void *arg)
 {
 	struct video_device *dev = video_devdata(file);
 	struct ar_device *ar = video_get_drvdata(dev);
@@ -543,7 +542,7 @@ static int ar_do_ioctl(struct inode *inode, struct file *file,
 static int ar_ioctl(struct inode *inode, struct file *file, unsigned int cmd,
 		    unsigned long arg)
 {
-	return video_usercopy(inode, file, cmd, arg, ar_do_ioctl);
+	return video_usercopy(file, cmd, arg, ar_do_ioctl);
 }
 
 #if USE_INT
diff --git a/drivers/media/video/bw-qcam.c b/drivers/media/video/bw-qcam.c
index ace4ff9ea023..17f80d03f38e 100644
--- a/drivers/media/video/bw-qcam.c
+++ b/drivers/media/video/bw-qcam.c
@@ -706,8 +706,7 @@ static long qc_capture(struct qcam_device * q, char __user *buf, unsigned long l
  *	Video4linux interfacing
  */
 
-static int qcam_do_ioctl(struct inode *inode, struct file *file,
-			 unsigned int cmd, void *arg)
+static int qcam_do_ioctl(struct file *file, unsigned int cmd, void *arg)
 {
 	struct video_device *dev = video_devdata(file);
 	struct qcam_device *qcam=(struct qcam_device *)dev;
@@ -867,7 +866,7 @@ static int qcam_do_ioctl(struct inode *inode, struct file *file,
 static int qcam_ioctl(struct inode *inode, struct file *file,
 		     unsigned int cmd, unsigned long arg)
 {
-	return video_usercopy(inode, file, cmd, arg, qcam_do_ioctl);
+	return video_usercopy(file, cmd, arg, qcam_do_ioctl);
 }
 
 static ssize_t qcam_read(struct file *file, char __user *buf,
diff --git a/drivers/media/video/c-qcam.c b/drivers/media/video/c-qcam.c
index 0f930d351466..21c71eb085db 100644
--- a/drivers/media/video/c-qcam.c
+++ b/drivers/media/video/c-qcam.c
@@ -500,8 +500,7 @@ static long qc_capture(struct qcam_device *q, char __user *buf, unsigned long le
  *	Video4linux interfacing
  */
 
-static int qcam_do_ioctl(struct inode *inode, struct file *file,
-			 unsigned int cmd, void *arg)
+static int qcam_do_ioctl(struct file *file, unsigned int cmd, void *arg)
 {
 	struct video_device *dev = video_devdata(file);
 	struct qcam_device *qcam=(struct qcam_device *)dev;
@@ -667,9 +666,9 @@ static int qcam_do_ioctl(struct inode *inode, struct file *file,
 }
 
 static int qcam_ioctl(struct inode *inode, struct file *file,
-		     unsigned int cmd, unsigned long arg)
+		      unsigned int cmd, unsigned long arg)
 {
-	return video_usercopy(inode, file, cmd, arg, qcam_do_ioctl);
+	return video_usercopy(file, cmd, arg, qcam_do_ioctl);
 }
 
 static ssize_t qcam_read(struct file *file, char __user *buf,
diff --git a/drivers/media/video/cpia.c b/drivers/media/video/cpia.c
index 16c094f77852..028a400d2453 100644
--- a/drivers/media/video/cpia.c
+++ b/drivers/media/video/cpia.c
@@ -3333,8 +3333,7 @@ static ssize_t cpia_read(struct file *file, char __user *buf,
 	return cam->decompressed_frame.count;
 }
 
-static int cpia_do_ioctl(struct inode *inode, struct file *file,
-			 unsigned int ioctlnr, void *arg)
+static int cpia_do_ioctl(struct file *file, unsigned int cmd, void *arg)
 {
 	struct video_device *dev = file->private_data;
 	struct cam_data *cam = video_get_drvdata(dev);
@@ -3347,9 +3346,9 @@ static int cpia_do_ioctl(struct inode *inode, struct file *file,
 	if (mutex_lock_interruptible(&cam->busy_lock))
 		return -EINTR;
 
-	//DBG("cpia_ioctl: %u\n", ioctlnr);
+	/* DBG("cpia_ioctl: %u\n", cmd); */
 
-	switch (ioctlnr) {
+	switch (cmd) {
 	/* query capabilities */
 	case VIDIOCGCAP:
 	{
@@ -3724,7 +3723,7 @@ static int cpia_do_ioctl(struct inode *inode, struct file *file,
 static int cpia_ioctl(struct inode *inode, struct file *file,
 		     unsigned int cmd, unsigned long arg)
 {
-	return video_usercopy(inode, file, cmd, arg, cpia_do_ioctl);
+	return video_usercopy(file, cmd, arg, cpia_do_ioctl);
 }
 
 
diff --git a/drivers/media/video/cpia2/cpia2_v4l.c b/drivers/media/video/cpia2/cpia2_v4l.c
index d9de2e8e00d4..3c2d7eac1197 100644
--- a/drivers/media/video/cpia2/cpia2_v4l.c
+++ b/drivers/media/video/cpia2/cpia2_v4l.c
@@ -1572,8 +1572,7 @@ static int ioctl_dqbuf(void *arg,struct camera_data *cam, struct file *file)
  *  cpia2_ioctl
  *
  *****************************************************************************/
-static int cpia2_do_ioctl(struct inode *inode, struct file *file,
-			  unsigned int ioctl_nr, void *arg)
+static int cpia2_do_ioctl(struct file *file, unsigned int cmd, void *arg)
 {
 	struct camera_data *cam = video_drvdata(file);
 	int retval = 0;
@@ -1591,7 +1590,7 @@ static int cpia2_do_ioctl(struct inode *inode, struct file *file,
 	}
 
 	/* Priority check */
-	switch (ioctl_nr) {
+	switch (cmd) {
 	case VIDIOCSWIN:
 	case VIDIOCMCAPTURE:
 	case VIDIOC_S_FMT:
@@ -1618,7 +1617,7 @@ static int cpia2_do_ioctl(struct inode *inode, struct file *file,
 		break;
 	}
 
-	switch (ioctl_nr) {
+	switch (cmd) {
 	case VIDIOCGCAP:	/* query capabilities */
 		retval = ioctl_cap_query(arg, cam);
 		break;
@@ -1683,7 +1682,7 @@ static int cpia2_do_ioctl(struct inode *inode, struct file *file,
 	case VIDIOC_ENUMINPUT:
 	case VIDIOC_G_INPUT:
 	case VIDIOC_S_INPUT:
-		retval = ioctl_input(ioctl_nr, arg,cam);
+		retval = ioctl_input(cmd, arg, cam);
 		break;
 
 	case VIDIOC_ENUM_FMT:
@@ -1843,9 +1842,9 @@ static int cpia2_do_ioctl(struct inode *inode, struct file *file,
 }
 
 static int cpia2_ioctl(struct inode *inode, struct file *file,
-		       unsigned int ioctl_nr, unsigned long iarg)
+		       unsigned int cmd, unsigned long arg)
 {
-	return video_usercopy(inode, file, ioctl_nr, iarg, cpia2_do_ioctl);
+	return video_usercopy(file, cmd, arg, cpia2_do_ioctl);
 }
 
 /******************************************************************************
diff --git a/drivers/media/video/ov511.c b/drivers/media/video/ov511.c
index 210f1240b331..6ee9b69cc4a9 100644
--- a/drivers/media/video/ov511.c
+++ b/drivers/media/video/ov511.c
@@ -4011,8 +4011,7 @@ ov51x_v4l1_close(struct inode *inode, struct file *file)
 
 /* Do not call this function directly! */
 static int
-ov51x_v4l1_ioctl_internal(struct inode *inode, struct file *file,
-			  unsigned int cmd, void *arg)
+ov51x_v4l1_ioctl_internal(struct file *file, unsigned int cmd, void *arg)
 {
 	struct video_device *vdev = file->private_data;
 	struct usb_ov511 *ov = video_get_drvdata(vdev);
@@ -4461,7 +4460,7 @@ ov51x_v4l1_ioctl(struct inode *inode, struct file *file,
 	if (mutex_lock_interruptible(&ov->lock))
 		return -EINTR;
 
-	rc = video_usercopy(inode, file, cmd, arg, ov51x_v4l1_ioctl_internal);
+	rc = video_usercopy(file, cmd, arg, ov51x_v4l1_ioctl_internal);
 
 	mutex_unlock(&ov->lock);
 	return rc;
diff --git a/drivers/media/video/pms.c b/drivers/media/video/pms.c
index 8062d0016997..45730fac1570 100644
--- a/drivers/media/video/pms.c
+++ b/drivers/media/video/pms.c
@@ -680,8 +680,7 @@ static int pms_capture(struct pms_device *dev, char __user *buf, int rgb555, int
  *	Video4linux interfacing
  */
 
-static int pms_do_ioctl(struct inode *inode, struct file *file,
-			unsigned int cmd, void *arg)
+static int pms_do_ioctl(struct file *file, unsigned int cmd, void *arg)
 {
 	struct video_device *dev = video_devdata(file);
 	struct pms_device *pd=(struct pms_device *)dev;
@@ -866,7 +865,7 @@ static int pms_do_ioctl(struct inode *inode, struct file *file,
 static int pms_ioctl(struct inode *inode, struct file *file,
 		     unsigned int cmd, unsigned long arg)
 {
-	return video_usercopy(inode, file, cmd, arg, pms_do_ioctl);
+	return video_usercopy(file, cmd, arg, pms_do_ioctl);
 }
 
 static ssize_t pms_read(struct file *file, char __user *buf,
diff --git a/drivers/media/video/pvrusb2/pvrusb2-v4l2.c b/drivers/media/video/pvrusb2/pvrusb2-v4l2.c
index 97ed95957992..52af1c435965 100644
--- a/drivers/media/video/pvrusb2/pvrusb2-v4l2.c
+++ b/drivers/media/video/pvrusb2/pvrusb2-v4l2.c
@@ -168,8 +168,7 @@ static const char *get_v4l_name(int v4l_type)
  * This is part of Video 4 Linux API. The procedure handles ioctl() calls.
  *
  */
-static int __pvr2_v4l2_do_ioctl(struct file *file,
-			      unsigned int cmd, void *arg)
+static int pvr2_v4l2_do_ioctl(struct file *file, unsigned int cmd, void *arg)
 {
 	struct pvr2_v4l2_fh *fh = file->private_data;
 	struct pvr2_v4l2 *vp = fh->vhead;
@@ -864,7 +863,7 @@ static int __pvr2_v4l2_do_ioctl(struct file *file,
 
 	default :
 		ret = v4l_compat_translate_ioctl(file, cmd,
-						 arg, __pvr2_v4l2_do_ioctl);
+						 arg, pvr2_v4l2_do_ioctl);
 	}
 
 	pvr2_hdw_commit_ctl(hdw);
@@ -890,12 +889,6 @@ static int __pvr2_v4l2_do_ioctl(struct file *file,
 	return ret;
 }
 
-static int pvr2_v4l2_do_ioctl(struct inode *inode, struct file *file,
-			      unsigned int cmd, void *arg)
-{
-	return __pvr2_v4l2_do_ioctl(file, cmd, arg);
-}
-
 static void pvr2_v4l2_dev_destroy(struct pvr2_v4l2_dev *dip)
 {
 	int num = dip->devbase.num;
@@ -963,7 +956,7 @@ static int pvr2_v4l2_ioctl(struct inode *inode, struct file *file,
 #define IVTV_IOC_G_CODEC        0xFFEE7703
 #define IVTV_IOC_S_CODEC        0xFFEE7704
 	if (cmd == IVTV_IOC_G_CODEC || cmd == IVTV_IOC_S_CODEC) return 0;
-	return video_usercopy(inode, file, cmd, arg, pvr2_v4l2_do_ioctl);
+	return video_usercopy(file, cmd, arg, pvr2_v4l2_do_ioctl);
 }
 
 
diff --git a/drivers/media/video/pwc/pwc-if.c b/drivers/media/video/pwc/pwc-if.c
index f3897a3fdb75..1ce9da167b7e 100644
--- a/drivers/media/video/pwc/pwc-if.c
+++ b/drivers/media/video/pwc/pwc-if.c
@@ -1412,7 +1412,7 @@ static int pwc_video_ioctl(struct inode *inode, struct file *file,
 
 	mutex_lock(&pdev->modlock);
 	if (!pdev->unplugged)
-		r = video_usercopy(inode, file, cmd, arg, pwc_video_do_ioctl);
+		r = video_usercopy(file, cmd, arg, pwc_video_do_ioctl);
 	mutex_unlock(&pdev->modlock);
 out:
 	return r;
diff --git a/drivers/media/video/pwc/pwc-v4l.c b/drivers/media/video/pwc/pwc-v4l.c
index 76a1376c9751..d7c147328e35 100644
--- a/drivers/media/video/pwc/pwc-v4l.c
+++ b/drivers/media/video/pwc/pwc-v4l.c
@@ -337,8 +337,7 @@ static int pwc_vidioc_set_fmt(struct pwc_device *pdev, struct v4l2_format *f)
 
 }
 
-int pwc_video_do_ioctl(struct inode *inode, struct file *file,
-		       unsigned int cmd, void *arg)
+int pwc_video_do_ioctl(struct file *file, unsigned int cmd, void *arg)
 {
 	struct video_device *vdev = video_devdata(file);
 	struct pwc_device *pdev;
diff --git a/drivers/media/video/pwc/pwc.h b/drivers/media/video/pwc/pwc.h
index 74178754b39b..c046a2535668 100644
--- a/drivers/media/video/pwc/pwc.h
+++ b/drivers/media/video/pwc/pwc.h
@@ -340,8 +340,7 @@ extern int pwc_camera_power(struct pwc_device *pdev, int power);
 extern int pwc_ioctl(struct pwc_device *pdev, unsigned int cmd, void *arg);
 
 /** Functions in pwc-v4l.c */
-extern int pwc_video_do_ioctl(struct inode *inode, struct file *file,
-			      unsigned int cmd, void *arg);
+extern int pwc_video_do_ioctl(struct file *file, unsigned int cmd, void *arg);
 
 /** pwc-uncompress.c */
 /* Expand frame to image, possibly including decompression. Uses read_frame and fill_image */
diff --git a/drivers/media/video/saa5246a.c b/drivers/media/video/saa5246a.c
index 9d4582651467..f159441e9375 100644
--- a/drivers/media/video/saa5246a.c
+++ b/drivers/media/video/saa5246a.c
@@ -804,8 +804,7 @@ static inline int saa5246a_stop_dau(struct saa5246a_device *t,
  *
  *  Returns 0 if successful
  */
-static int do_saa5246a_ioctl(struct inode *inode, struct file *file,
-			    unsigned int cmd, void *arg)
+static int do_saa5246a_ioctl(struct file *file, unsigned int cmd, void *arg)
 {
 	struct saa5246a_device *t = video_drvdata(file);
 
@@ -953,7 +952,7 @@ static int saa5246a_ioctl(struct inode *inode, struct file *file,
 
 	cmd = vtx_fix_command(cmd);
 	mutex_lock(&t->lock);
-	err = video_usercopy(inode, file, cmd, arg, do_saa5246a_ioctl);
+	err = video_usercopy(file, cmd, arg, do_saa5246a_ioctl);
 	mutex_unlock(&t->lock);
 	return err;
 }
diff --git a/drivers/media/video/saa5249.c b/drivers/media/video/saa5249.c
index 1594773e02d0..6ef3affb97f1 100644
--- a/drivers/media/video/saa5249.c
+++ b/drivers/media/video/saa5249.c
@@ -190,8 +190,7 @@ static int i2c_getdata(struct saa5249_device *t, int count, u8 *buf)
  *	Standard character-device-driver functions
  */
 
-static int do_saa5249_ioctl(struct inode *inode, struct file *file,
-			    unsigned int cmd, void *arg)
+static int do_saa5249_ioctl(struct file *file, unsigned int cmd, void *arg)
 {
 	static int virtual_mode = false;
 	struct saa5249_device *t = video_drvdata(file);
@@ -488,7 +487,7 @@ static int saa5249_ioctl(struct inode *inode, struct file *file,
 
 	cmd = vtx_fix_command(cmd);
 	mutex_lock(&t->lock);
-	err = video_usercopy(inode,file,cmd,arg,do_saa5249_ioctl);
+	err = video_usercopy(file, cmd, arg, do_saa5249_ioctl);
 	mutex_unlock(&t->lock);
 	return err;
 }
diff --git a/drivers/media/video/se401.c b/drivers/media/video/se401.c
index 044a2e94c34d..d652f25eef0e 100644
--- a/drivers/media/video/se401.c
+++ b/drivers/media/video/se401.c
@@ -975,8 +975,7 @@ static int se401_close(struct inode *inode, struct file *file)
 	return 0;
 }
 
-static int se401_do_ioctl(struct inode *inode, struct file *file,
-			  unsigned int cmd, void *arg)
+static int se401_do_ioctl(struct file *file, unsigned int cmd, void *arg)
 {
 	struct video_device *vdev = file->private_data;
 	struct usb_se401 *se401 = (struct usb_se401 *)vdev;
@@ -1142,7 +1141,7 @@ static int se401_do_ioctl(struct inode *inode, struct file *file,
 static int se401_ioctl(struct inode *inode, struct file *file,
 		       unsigned int cmd, unsigned long arg)
 {
-	return video_usercopy(inode, file, cmd, arg, se401_do_ioctl);
+	return video_usercopy(file, cmd, arg, se401_do_ioctl);
 }
 
 static ssize_t se401_read(struct file *file, char __user *buf,
diff --git a/drivers/media/video/stv680.c b/drivers/media/video/stv680.c
index 328c41b1517d..42acc92c182d 100644
--- a/drivers/media/video/stv680.c
+++ b/drivers/media/video/stv680.c
@@ -1132,8 +1132,7 @@ static int stv_close (struct inode *inode, struct file *file)
 	return 0;
 }
 
-static int stv680_do_ioctl (struct inode *inode, struct file *file,
-			    unsigned int cmd, void *arg)
+static int stv680_do_ioctl(struct file *file, unsigned int cmd, void *arg)
 {
 	struct video_device *vdev = file->private_data;
 	struct usb_stv *stv680 = video_get_drvdata(vdev);
@@ -1303,7 +1302,7 @@ static int stv680_do_ioctl (struct inode *inode, struct file *file,
 static int stv680_ioctl(struct inode *inode, struct file *file,
 			unsigned int cmd, unsigned long arg)
 {
-	return video_usercopy(inode, file, cmd, arg, stv680_do_ioctl);
+	return video_usercopy(file, cmd, arg, stv680_do_ioctl);
 }
 
 static int stv680_mmap (struct file *file, struct vm_area_struct *vma)
diff --git a/drivers/media/video/usbvideo/usbvideo.c b/drivers/media/video/usbvideo/usbvideo.c
index 7c575bb8184f..a42049de2bb6 100644
--- a/drivers/media/video/usbvideo/usbvideo.c
+++ b/drivers/media/video/usbvideo/usbvideo.c
@@ -1281,8 +1281,7 @@ static int usbvideo_v4l_close(struct inode *inode, struct file *file)
  * History:
  * 22-Jan-2000 Corrected VIDIOCSPICT to reject unsupported settings.
  */
-static int usbvideo_v4l_do_ioctl(struct inode *inode, struct file *file,
-				 unsigned int cmd, void *arg)
+static int usbvideo_v4l_do_ioctl(struct file *file, unsigned int cmd, void *arg)
 {
 	struct uvd *uvd = file->private_data;
 
@@ -1505,7 +1504,7 @@ static int usbvideo_v4l_do_ioctl(struct inode *inode, struct file *file,
 static int usbvideo_v4l_ioctl(struct inode *inode, struct file *file,
 		       unsigned int cmd, unsigned long arg)
 {
-	return video_usercopy(inode, file, cmd, arg, usbvideo_v4l_do_ioctl);
+	return video_usercopy(file, cmd, arg, usbvideo_v4l_do_ioctl);
 }
 
 /*
diff --git a/drivers/media/video/usbvision/usbvision-video.c b/drivers/media/video/usbvision/usbvision-video.c
index d185b57fdcd0..062819cabec4 100644
--- a/drivers/media/video/usbvision/usbvision-video.c
+++ b/drivers/media/video/usbvision/usbvision-video.c
@@ -1278,7 +1278,7 @@ static int usbvision_vbi_close(struct inode *inode, struct file *file)
 	return -ENODEV;
 }
 
-static int usbvision_do_vbi_ioctl(struct inode *inode, struct file *file,
+static int usbvision_do_vbi_ioctl(struct file *file,
 				 unsigned int cmd, void *arg)
 {
 	/* TODO */
@@ -1288,7 +1288,7 @@ static int usbvision_do_vbi_ioctl(struct inode *inode, struct file *file,
 static int usbvision_vbi_ioctl(struct inode *inode, struct file *file,
 		       unsigned int cmd, unsigned long arg)
 {
-	return video_usercopy(inode, file, cmd, arg, usbvision_do_vbi_ioctl);
+	return video_usercopy(file, cmd, arg, usbvision_do_vbi_ioctl);
 }
 
 
diff --git a/drivers/media/video/uvc/uvc_v4l2.c b/drivers/media/video/uvc/uvc_v4l2.c
index 758dfefaba8d..8361367806a9 100644
--- a/drivers/media/video/uvc/uvc_v4l2.c
+++ b/drivers/media/video/uvc/uvc_v4l2.c
@@ -464,8 +464,7 @@ static int uvc_v4l2_release(struct inode *inode, struct file *file)
 	return 0;
 }
 
-static int __uvc_v4l2_do_ioctl(struct file *file,
-		     unsigned int cmd, void *arg)
+static int uvc_v4l2_do_ioctl(struct file *file, unsigned int cmd, void *arg)
 {
 	struct video_device *vdev = video_devdata(file);
 	struct uvc_video_device *video = video_get_drvdata(vdev);
@@ -979,7 +978,7 @@ static int __uvc_v4l2_do_ioctl(struct file *file,
 
 	default:
 		if ((ret = v4l_compat_translate_ioctl(file, cmd, arg,
-			__uvc_v4l2_do_ioctl)) == -ENOIOCTLCMD)
+			uvc_v4l2_do_ioctl)) == -ENOIOCTLCMD)
 			uvc_trace(UVC_TRACE_IOCTL, "Unknown ioctl 0x%08x\n",
 				  cmd);
 		return ret;
@@ -988,17 +987,11 @@ static int __uvc_v4l2_do_ioctl(struct file *file,
 	return ret;
 }
 
-static int uvc_v4l2_do_ioctl(struct inode *inode, struct file *file,
-			      unsigned int cmd, void *arg)
-{
-	return __uvc_v4l2_do_ioctl(file, cmd, arg);
-}
-
 static int uvc_v4l2_ioctl(struct inode *inode, struct file *file,
 		     unsigned int cmd, unsigned long arg)
 {
 	uvc_trace(UVC_TRACE_CALLS, "uvc_v4l2_ioctl\n");
-	return video_usercopy(inode, file, cmd, arg, uvc_v4l2_do_ioctl);
+	return video_usercopy(file, cmd, arg, uvc_v4l2_do_ioctl);
 }
 
 static ssize_t uvc_v4l2_read(struct file *file, char __user *data,
diff --git a/drivers/media/video/v4l2-ioctl.c b/drivers/media/video/v4l2-ioctl.c
index eef5814aebe5..0e648cb3b399 100644
--- a/drivers/media/video/v4l2-ioctl.c
+++ b/drivers/media/video/v4l2-ioctl.c
@@ -393,10 +393,8 @@ video_fix_command(unsigned int cmd)
  * Obsolete usercopy function - Should be removed soon
  */
 int
-video_usercopy(struct inode *inode, struct file *file,
-	       unsigned int cmd, unsigned long arg,
-	       int (*func)(struct inode *inode, struct file *file,
-			   unsigned int cmd, void *arg))
+video_usercopy(struct file *file, unsigned int cmd, unsigned long arg,
+		v4l2_kioctl func)
 {
 	char	sbuf[128];
 	void    *mbuf = NULL;
@@ -458,7 +456,7 @@ video_usercopy(struct inode *inode, struct file *file,
 	}
 
 	/* call driver */
-	err = func(inode, file, cmd, parg);
+	err = func(file, cmd, parg);
 	if (err == -ENOIOCTLCMD)
 		err = -EINVAL;
 	if (is_ext_ctrl) {
diff --git a/drivers/media/video/vino.c b/drivers/media/video/vino.c
index 1efc5f3462c6..a72a361daade 100644
--- a/drivers/media/video/vino.c
+++ b/drivers/media/video/vino.c
@@ -4237,8 +4237,7 @@ error:
 	return ret;
 }
 
-static int vino_do_ioctl(struct inode *inode, struct file *file,
-		      unsigned int cmd, void *arg)
+static int vino_do_ioctl(struct file *file, unsigned int cmd, void *arg)
 {
 	struct vino_channel_settings *vcs = video_drvdata(file);
 
@@ -4353,7 +4352,7 @@ static int vino_ioctl(struct inode *inode, struct file *file,
 	if (mutex_lock_interruptible(&vcs->mutex))
 		return -EINTR;
 
-	ret = video_usercopy(inode, file, cmd, arg, vino_do_ioctl);
+	ret = video_usercopy(file, cmd, arg, vino_do_ioctl);
 
 	mutex_unlock(&vcs->mutex);
 
diff --git a/drivers/media/video/w9966.c b/drivers/media/video/w9966.c
index b2dbe48a92bb..56c570c267ea 100644
--- a/drivers/media/video/w9966.c
+++ b/drivers/media/video/w9966.c
@@ -727,8 +727,7 @@ static int w9966_wReg_i2c(struct w9966_dev* cam, int reg, int data)
  *	Video4linux interfacing
  */
 
-static int w9966_v4l_do_ioctl(struct inode *inode, struct file *file,
-			      unsigned int cmd, void *arg)
+static int w9966_v4l_do_ioctl(struct file *file, unsigned int cmd, void *arg)
 {
 	struct w9966_dev *cam = video_drvdata(file);
 
@@ -881,7 +880,7 @@ static int w9966_v4l_do_ioctl(struct inode *inode, struct file *file,
 static int w9966_v4l_ioctl(struct inode *inode, struct file *file,
 			   unsigned int cmd, unsigned long arg)
 {
-	return video_usercopy(inode, file, cmd, arg, w9966_v4l_do_ioctl);
+	return video_usercopy(file, cmd, arg, w9966_v4l_do_ioctl);
 }
 
 // Capture data
diff --git a/drivers/media/video/zoran/zoran_driver.c b/drivers/media/video/zoran/zoran_driver.c
index db11ab9e60da..00b97d97aeaa 100644
--- a/drivers/media/video/zoran/zoran_driver.c
+++ b/drivers/media/video/zoran/zoran_driver.c
@@ -1940,11 +1940,7 @@ zoran_set_input (struct zoran *zr,
  *   ioctl routine
  */
 
-static int
-zoran_do_ioctl (struct inode *inode,
-		struct file  *file,
-		unsigned int  cmd,
-		void         *arg)
+static int zoran_do_ioctl(struct file *file, unsigned int cmd, void *arg)
 {
 	struct zoran_fh *fh = file->private_data;
 	struct zoran *zr = fh->zr;
@@ -4201,7 +4197,7 @@ zoran_ioctl (struct inode *inode,
 	     unsigned int  cmd,
 	     unsigned long arg)
 {
-	return video_usercopy(inode, file, cmd, arg, zoran_do_ioctl);
+	return video_usercopy(file, cmd, arg, zoran_do_ioctl);
 }
 
 static unsigned int
diff --git a/include/media/saa7146_vv.h b/include/media/saa7146_vv.h
index 1d104096619c..6bbb0d93bb5f 100644
--- a/include/media/saa7146_vv.h
+++ b/include/media/saa7146_vv.h
@@ -216,8 +216,7 @@ void saa7146_set_gpio(struct saa7146_dev *saa, u8 pin, u8 data);
 extern struct saa7146_use_ops saa7146_video_uops;
 int saa7146_start_preview(struct saa7146_fh *fh);
 int saa7146_stop_preview(struct saa7146_fh *fh);
-int saa7146_video_do_ioctl(struct inode *inode, struct file *file,
-			   unsigned int cmd, void *arg);
+int saa7146_video_do_ioctl(struct file *file, unsigned int cmd, void *arg);
 
 /* from saa7146_vbi.c */
 extern struct saa7146_use_ops saa7146_vbi_uops;
diff --git a/include/media/v4l2-ioctl.h b/include/media/v4l2-ioctl.h
index e6ba25b3d7c8..f5985724c456 100644
--- a/include/media/v4l2-ioctl.h
+++ b/include/media/v4l2-ioctl.h
@@ -285,10 +285,8 @@ extern long v4l_compat_ioctl32(struct file *file, unsigned int cmd,
 				unsigned long arg);
 
 /* Include support for obsoleted stuff */
-extern int video_usercopy(struct inode *inode, struct file *file,
-			  unsigned int cmd, unsigned long arg,
-			  int (*func)(struct inode *inode, struct file *file,
-				      unsigned int cmd, void *arg));
+extern int video_usercopy(struct file *file, unsigned int cmd,
+				unsigned long arg, v4l2_kioctl func);
 
 /* Standard handlers for V4L ioctl's */
 
-- 
cgit v1.2.3


From af128a102c4aee994b4ff6e422b3cfab17127578 Mon Sep 17 00:00:00 2001
From: Kay Sievers <kay.sievers@vrfy.org>
Date: Thu, 30 Oct 2008 00:51:46 -0300
Subject: V4L/DVB (9521): V4L: struct device - replace bus_id with dev_name(),
 dev_set_name()

This patch is part of a larger patch series which will remove
the "char bus_id[20]" name string from struct device. The device
name is managed in the kobject anyway, and without any size
limitation, and just needlessly copied into "struct device".

To set and read the device name dev_name(dev) and dev_set_name(dev)
must be used. If your code uses static kobjects, which it shouldn't
do, "const char *init_name" can be used to statically provide the
name the registered device should have. At registration time, the
init_name field is cleared, to enforce the use of dev_name(dev) to
access the device name at a later time.

We need to get rid of all occurrences of bus_id in the entire tree
to be able to enable the new interface. Please apply this patch,
and possibly convert any remaining remaining occurrences of bus_id.

We want to submit a patch to -next, which will remove bus_id from
"struct device", to find the remaining pieces to convert, and finally
switch over to the new api, which will remove the 20 bytes array
and does no longer have a size limitation.

Thanks,
Kay

Signed-off-by: Kay Sievers <kay.sievers@vrfy.org>
Acked-by: Greg Kroah-Hartman <gregkh@suse.de>
Signed-off-by: Mauro Carvalho Chehab <mchehab@redhat.com>
---
 drivers/media/video/bt8xx/bttv-gpio.c           | 7 +++----
 drivers/media/video/bt8xx/bttv.h                | 2 +-
 drivers/media/video/em28xx/em28xx-video.c       | 4 ++--
 drivers/media/video/et61x251/et61x251_core.c    | 2 +-
 drivers/media/video/ir-kbd-i2c.c                | 6 +++---
 drivers/media/video/pvrusb2/pvrusb2-hdw.c       | 2 +-
 drivers/media/video/pvrusb2/pvrusb2-sysfs.c     | 4 ++--
 drivers/media/video/sh_mobile_ceu_camera.c      | 6 +++---
 drivers/media/video/sn9c102/sn9c102_core.c      | 2 +-
 drivers/media/video/soc_camera.c                | 5 ++---
 drivers/media/video/usbvision/usbvision-video.c | 2 +-
 drivers/media/video/v4l2-dev.c                  | 2 +-
 drivers/media/video/zc0301/zc0301_core.c        | 2 +-
 include/media/soc_camera.h                      | 4 ++--
 14 files changed, 24 insertions(+), 26 deletions(-)

(limited to 'include')

diff --git a/drivers/media/video/bt8xx/bttv-gpio.c b/drivers/media/video/bt8xx/bttv-gpio.c
index dce6dae5740e..74c325e594a2 100644
--- a/drivers/media/video/bt8xx/bttv-gpio.c
+++ b/drivers/media/video/bt8xx/bttv-gpio.c
@@ -42,7 +42,7 @@ static int bttv_sub_bus_match(struct device *dev, struct device_driver *drv)
 	struct bttv_sub_driver *sub = to_bttv_sub_drv(drv);
 	int len = strlen(sub->wanted);
 
-	if (0 == strncmp(dev->bus_id, sub->wanted, len))
+	if (0 == strncmp(dev_name(dev), sub->wanted, len))
 		return 1;
 	return 0;
 }
@@ -91,15 +91,14 @@ int bttv_sub_add_device(struct bttv_core *core, char *name)
 	sub->dev.parent  = &core->pci->dev;
 	sub->dev.bus     = &bttv_sub_bus_type;
 	sub->dev.release = release_sub_device;
-	snprintf(sub->dev.bus_id,sizeof(sub->dev.bus_id),"%s%d",
-		 name, core->nr);
+	dev_set_name(&sub->dev, "%s%d", name, core->nr);
 
 	err = device_register(&sub->dev);
 	if (0 != err) {
 		kfree(sub);
 		return err;
 	}
-	printk("bttv%d: add subdevice \"%s\"\n", core->nr, sub->dev.bus_id);
+	printk("bttv%d: add subdevice \"%s\"\n", core->nr, dev_name(&sub->dev));
 	list_add_tail(&sub->list,&core->subs);
 	return 0;
 }
diff --git a/drivers/media/video/bt8xx/bttv.h b/drivers/media/video/bt8xx/bttv.h
index 46cb90e0985b..bc2c88499ab9 100644
--- a/drivers/media/video/bt8xx/bttv.h
+++ b/drivers/media/video/bt8xx/bttv.h
@@ -308,7 +308,7 @@ struct bttv_sub_device {
 
 struct bttv_sub_driver {
 	struct device_driver   drv;
-	char                   wanted[BUS_ID_SIZE];
+	char                   wanted[20];
 	int                    (*probe)(struct bttv_sub_device *sub);
 	void                   (*remove)(struct bttv_sub_device *sub);
 };
diff --git a/drivers/media/video/em28xx/em28xx-video.c b/drivers/media/video/em28xx/em28xx-video.c
index 4ea1f1e04897..415dff830f88 100644
--- a/drivers/media/video/em28xx/em28xx-video.c
+++ b/drivers/media/video/em28xx/em28xx-video.c
@@ -1271,7 +1271,7 @@ static int vidioc_querycap(struct file *file, void  *priv,
 
 	strlcpy(cap->driver, "em28xx", sizeof(cap->driver));
 	strlcpy(cap->card, em28xx_boards[dev->model].name, sizeof(cap->card));
-	strlcpy(cap->bus_info, dev->udev->dev.bus_id, sizeof(cap->bus_info));
+	strlcpy(cap->bus_info, dev_name(&dev->udev->dev), sizeof(cap->bus_info));
 
 	cap->version = EM28XX_VERSION_CODE;
 
@@ -1424,7 +1424,7 @@ static int radio_querycap(struct file *file, void  *priv,
 
 	strlcpy(cap->driver, "em28xx", sizeof(cap->driver));
 	strlcpy(cap->card, em28xx_boards[dev->model].name, sizeof(cap->card));
-	strlcpy(cap->bus_info, dev->udev->dev.bus_id, sizeof(cap->bus_info));
+	strlcpy(cap->bus_info, dev_name(&dev->udev->dev), sizeof(cap->bus_info));
 
 	cap->version = EM28XX_VERSION_CODE;
 	cap->capabilities = V4L2_CAP_TUNER;
diff --git a/drivers/media/video/et61x251/et61x251_core.c b/drivers/media/video/et61x251/et61x251_core.c
index 9d0ef96c23ff..83c07112c59d 100644
--- a/drivers/media/video/et61x251/et61x251_core.c
+++ b/drivers/media/video/et61x251/et61x251_core.c
@@ -1581,7 +1581,7 @@ et61x251_vidioc_querycap(struct et61x251_device* cam, void __user * arg)
 
 	strlcpy(cap.card, cam->v4ldev->name, sizeof(cap.card));
 	if (usb_make_path(cam->usbdev, cap.bus_info, sizeof(cap.bus_info)) < 0)
-		strlcpy(cap.bus_info, cam->usbdev->dev.bus_id,
+		strlcpy(cap.bus_info, dev_name(&cam->usbdev->dev),
 			sizeof(cap.bus_info));
 
 	if (copy_to_user(arg, &cap, sizeof(cap)))
diff --git a/drivers/media/video/ir-kbd-i2c.c b/drivers/media/video/ir-kbd-i2c.c
index efe849981ab7..d4658c56eddc 100644
--- a/drivers/media/video/ir-kbd-i2c.c
+++ b/drivers/media/video/ir-kbd-i2c.c
@@ -385,10 +385,10 @@ static int ir_attach(struct i2c_adapter *adap, int addr,
 		goto err_out_detach;
 	}
 
-	/* Phys addr can only be set after attaching (for ir->c.dev.bus_id) */
+	/* Phys addr can only be set after attaching (for ir->c.dev) */
 	snprintf(ir->phys, sizeof(ir->phys), "%s/%s/ir0",
-		 ir->c.adapter->dev.bus_id,
-		 ir->c.dev.bus_id);
+		 dev_name(&ir->c.adapter->dev),
+		 dev_name(&ir->c.dev));
 
 	/* init + register input device */
 	ir_input_init(input_dev, &ir->ir, ir_type, ir->ir_codes);
diff --git a/drivers/media/video/pvrusb2/pvrusb2-hdw.c b/drivers/media/video/pvrusb2/pvrusb2-hdw.c
index 5b81ba469641..4358079f1966 100644
--- a/drivers/media/video/pvrusb2/pvrusb2-hdw.c
+++ b/drivers/media/video/pvrusb2/pvrusb2-hdw.c
@@ -2395,7 +2395,7 @@ struct pvr2_hdw *pvr2_hdw_create(struct usb_interface *intf,
 
 	scnprintf(hdw->bus_info,sizeof(hdw->bus_info),
 		  "usb %s address %d",
-		  hdw->usb_dev->dev.bus_id,
+		  dev_name(&hdw->usb_dev->dev),
 		  hdw->usb_dev->devnum);
 
 	ifnum = hdw->usb_intf->cur_altsetting->desc.bInterfaceNumber;
diff --git a/drivers/media/video/pvrusb2/pvrusb2-sysfs.c b/drivers/media/video/pvrusb2/pvrusb2-sysfs.c
index 733680f21317..e641cd971453 100644
--- a/drivers/media/video/pvrusb2/pvrusb2-sysfs.c
+++ b/drivers/media/video/pvrusb2/pvrusb2-sysfs.c
@@ -628,10 +628,10 @@ static void class_dev_create(struct pvr2_sysfs *sfp,
 
 	class_dev->class = &class_ptr->class;
 	if (pvr2_hdw_get_sn(sfp->channel.hdw)) {
-		snprintf(class_dev->bus_id, BUS_ID_SIZE, "sn-%lu",
+		dev_set_name(class_dev, "sn-%lu",
 			 pvr2_hdw_get_sn(sfp->channel.hdw));
 	} else if (pvr2_hdw_get_unit_number(sfp->channel.hdw) >= 0) {
-		snprintf(class_dev->bus_id, BUS_ID_SIZE, "unit-%c",
+		dev_set_name(class_dev, "unit-%c",
 			 pvr2_hdw_get_unit_number(sfp->channel.hdw) + 'a');
 	} else {
 		kfree(class_dev);
diff --git a/drivers/media/video/sh_mobile_ceu_camera.c b/drivers/media/video/sh_mobile_ceu_camera.c
index 536b1a9b310c..63bc0a6e68b0 100644
--- a/drivers/media/video/sh_mobile_ceu_camera.c
+++ b/drivers/media/video/sh_mobile_ceu_camera.c
@@ -616,7 +616,7 @@ static int sh_mobile_ceu_probe(struct platform_device *pdev)
 
 	/* request irq */
 	err = request_irq(pcdev->irq, sh_mobile_ceu_irq, IRQF_DISABLED,
-			  pdev->dev.bus_id, pcdev);
+			  dev_name(&pdev->dev), pcdev);
 	if (err) {
 		dev_err(&pdev->dev, "Unable to register CEU interrupt.\n");
 		goto exit_release_mem;
@@ -633,8 +633,8 @@ static int sh_mobile_ceu_probe(struct platform_device *pdev)
 	pcdev->ici.priv = pcdev;
 	pcdev->ici.dev.parent = &pdev->dev;
 	pcdev->ici.nr = pdev->id;
-	pcdev->ici.drv_name = pdev->dev.bus_id,
-	pcdev->ici.ops = &sh_mobile_ceu_host_ops,
+	pcdev->ici.drv_name = dev_name(&pdev->dev);
+	pcdev->ici.ops = &sh_mobile_ceu_host_ops;
 
 	err = soc_camera_host_register(&pcdev->ici);
 	if (err)
diff --git a/drivers/media/video/sn9c102/sn9c102_core.c b/drivers/media/video/sn9c102/sn9c102_core.c
index fcd2b62f92c4..01a8efb8deb1 100644
--- a/drivers/media/video/sn9c102/sn9c102_core.c
+++ b/drivers/media/video/sn9c102/sn9c102_core.c
@@ -2162,7 +2162,7 @@ sn9c102_vidioc_querycap(struct sn9c102_device* cam, void __user * arg)
 
 	strlcpy(cap.card, cam->v4ldev->name, sizeof(cap.card));
 	if (usb_make_path(cam->usbdev, cap.bus_info, sizeof(cap.bus_info)) < 0)
-		strlcpy(cap.bus_info, cam->usbdev->dev.bus_id,
+		strlcpy(cap.bus_info, dev_name(&cam->usbdev->dev),
 			sizeof(cap.bus_info));
 
 	if (copy_to_user(arg, &cap, sizeof(cap)))
diff --git a/drivers/media/video/soc_camera.c b/drivers/media/video/soc_camera.c
index 66ebe5956a87..28cf5c94bd61 100644
--- a/drivers/media/video/soc_camera.c
+++ b/drivers/media/video/soc_camera.c
@@ -783,7 +783,7 @@ int soc_camera_host_register(struct soc_camera_host *ici)
 		return -EINVAL;
 
 	/* Number might be equal to the platform device ID */
-	sprintf(ici->dev.bus_id, "camera_host%d", ici->nr);
+	dev_set_name(&ici->dev, "camera_host%d", ici->nr);
 
 	mutex_lock(&list_lock);
 	list_for_each_entry(ix, &hosts, list) {
@@ -867,8 +867,7 @@ int soc_camera_device_register(struct soc_camera_device *icd)
 
 	icd->devnum = num;
 	icd->dev.bus = &soc_camera_bus_type;
-	snprintf(icd->dev.bus_id, sizeof(icd->dev.bus_id),
-		 "%u-%u", icd->iface, icd->devnum);
+	dev_set_name(&icd->dev, "%u-%u", icd->iface, icd->devnum);
 
 	icd->dev.release = dummy_release;
 
diff --git a/drivers/media/video/usbvision/usbvision-video.c b/drivers/media/video/usbvision/usbvision-video.c
index 062819cabec4..48c8c4399eb8 100644
--- a/drivers/media/video/usbvision/usbvision-video.c
+++ b/drivers/media/video/usbvision/usbvision-video.c
@@ -523,7 +523,7 @@ static int vidioc_querycap (struct file *file, void  *priv,
 	strlcpy(vc->card,
 		usbvision_device_data[usbvision->DevModel].ModelString,
 		sizeof(vc->card));
-	strlcpy(vc->bus_info, usbvision->dev->dev.bus_id,
+	strlcpy(vc->bus_info, dev_name(&usbvision->dev->dev),
 		sizeof(vc->bus_info));
 	vc->version = USBVISION_DRIVER_VERSION;
 	vc->capabilities = V4L2_CAP_VIDEO_CAPTURE |
diff --git a/drivers/media/video/v4l2-dev.c b/drivers/media/video/v4l2-dev.c
index ead6d18e51ad..682f48aadcac 100644
--- a/drivers/media/video/v4l2-dev.c
+++ b/drivers/media/video/v4l2-dev.c
@@ -339,7 +339,7 @@ int video_register_device_index(struct video_device *vfd, int type, int nr,
 	vfd->dev.devt = MKDEV(VIDEO_MAJOR, vfd->minor);
 	if (vfd->parent)
 		vfd->dev.parent = vfd->parent;
-	sprintf(vfd->dev.bus_id, "%s%d", name_base, nr);
+	dev_set_name(&vfd->dev, "%s%d", name_base, nr);
 	ret = device_register(&vfd->dev);
 	if (ret < 0) {
 		printk(KERN_ERR "%s: device_register failed\n", __func__);
diff --git a/drivers/media/video/zc0301/zc0301_core.c b/drivers/media/video/zc0301/zc0301_core.c
index 9fc581707638..9d00e6056491 100644
--- a/drivers/media/video/zc0301/zc0301_core.c
+++ b/drivers/media/video/zc0301/zc0301_core.c
@@ -1020,7 +1020,7 @@ zc0301_vidioc_querycap(struct zc0301_device* cam, void __user * arg)
 
 	strlcpy(cap.card, cam->v4ldev->name, sizeof(cap.card));
 	if (usb_make_path(cam->usbdev, cap.bus_info, sizeof(cap.bus_info)) < 0)
-		strlcpy(cap.bus_info, cam->usbdev->dev.bus_id,
+		strlcpy(cap.bus_info, dev_name(&cam->usbdev->dev),
 			sizeof(cap.bus_info));
 
 	if (copy_to_user(arg, &cap, sizeof(cap)))
diff --git a/include/media/soc_camera.h b/include/media/soc_camera.h
index c5de7bb19fda..9231e2d908f2 100644
--- a/include/media/soc_camera.h
+++ b/include/media/soc_camera.h
@@ -56,7 +56,7 @@ struct soc_camera_host {
 	struct device dev;
 	unsigned char nr;				/* Host number */
 	void *priv;
-	char *drv_name;
+	const char *drv_name;
 	struct soc_camera_host_ops *ops;
 };
 
@@ -107,7 +107,7 @@ extern int soc_camera_video_start(struct soc_camera_device *icd);
 extern void soc_camera_video_stop(struct soc_camera_device *icd);
 
 struct soc_camera_data_format {
-	char *name;
+	const char *name;
 	unsigned int depth;
 	__u32 fourcc;
 	enum v4l2_colorspace colorspace;
-- 
cgit v1.2.3


From 69acdf1e5a9146ec6667f6c4b439acd38c18f5ea Mon Sep 17 00:00:00 2001
From: Robert Jarzmik <robert.jarzmik@free.fr>
Date: Tue, 4 Nov 2008 21:59:37 -0300
Subject: V4L/DVB (9530): Add new pixel format VYUY 16 bits wide.

There were already 3 YUV formats defined :
 - YUYV
 - YVYU
 - UYVY
The only left combination is VYUY, which is added in this
patch.

Signed-off-by: Robert Jarzmik <robert.jarzmik@free.fr>
Signed-off-by: Mauro Carvalho Chehab <mchehab@redhat.com>
---
 include/linux/videodev2.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/linux/videodev2.h b/include/linux/videodev2.h
index 4669d7e72e75..ec311d4616cd 100644
--- a/include/linux/videodev2.h
+++ b/include/linux/videodev2.h
@@ -293,6 +293,7 @@ struct v4l2_pix_format {
 #define V4L2_PIX_FMT_YVU420  v4l2_fourcc('Y', 'V', '1', '2') /* 12  YVU 4:2:0     */
 #define V4L2_PIX_FMT_YUYV    v4l2_fourcc('Y', 'U', 'Y', 'V') /* 16  YUV 4:2:2     */
 #define V4L2_PIX_FMT_UYVY    v4l2_fourcc('U', 'Y', 'V', 'Y') /* 16  YUV 4:2:2     */
+#define V4L2_PIX_FMT_VYUY    v4l2_fourcc('V', 'Y', 'U', 'Y') /* 16  YUV 4:2:2     */
 #define V4L2_PIX_FMT_YUV422P v4l2_fourcc('4', '2', '2', 'P') /* 16  YVU422 planar */
 #define V4L2_PIX_FMT_YUV411P v4l2_fourcc('4', '1', '1', 'P') /* 16  YVU411 planar */
 #define V4L2_PIX_FMT_Y41P    v4l2_fourcc('Y', '4', '1', 'P') /* 12  YUV 4:1:1     */
-- 
cgit v1.2.3


From 74d83fa0241f603a4067f071a88ef8b9a7c415a0 Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab@redhat.com>
Date: Tue, 11 Nov 2008 21:13:47 -0300
Subject: V4L/DVB (9578): v4l core: add support for enumerating frame sizes and
 intervals

video_ioctl2 lacks implementation of those two ioctls:
	- VIDIOC_ENUM_FRAMESIZES and VIDIOC_ENUM_FRAMEINTERVALS

Adds implementation for those.

Signed-off-by: Mauro Carvalho Chehab <mchehab@redhat.com>
---
 drivers/media/video/v4l2-ioctl.c | 67 ++++++++++++++++++++++++++++++++++++++++
 include/media/v4l2-ioctl.h       |  6 ++++
 2 files changed, 73 insertions(+)

(limited to 'include')

diff --git a/drivers/media/video/v4l2-ioctl.c b/drivers/media/video/v4l2-ioctl.c
index 0e648cb3b399..213c4dd6f72c 100644
--- a/drivers/media/video/v4l2-ioctl.c
+++ b/drivers/media/video/v4l2-ioctl.c
@@ -1747,6 +1747,73 @@ static int __video_do_ioctl(struct file *file,
 		ret = ops->vidioc_s_hw_freq_seek(file, fh, p);
 		break;
 	}
+	case VIDIOC_ENUM_FRAMESIZES:
+	{
+		struct v4l2_frmsizeenum *p = arg;
+
+		if (!ops->vidioc_enum_framesizes)
+			break;
+
+		memset(p, 0, sizeof(*p));
+
+		ret = ops->vidioc_enum_framesizes(file, fh, p);
+		dbgarg(cmd,
+			"index=%d, pixelformat=%d, type=%d ",
+			p->index, p->pixel_format, p->type);
+		switch (p->type) {
+		case V4L2_FRMSIZE_TYPE_DISCRETE:
+			dbgarg2("width = %d, height=%d\n",
+				p->discrete.width, p->discrete.height);
+			break;
+		case V4L2_FRMSIZE_TYPE_STEPWISE:
+			dbgarg2("min %dx%d, max %dx%d, step %dx%d\n",
+				p->stepwise.min_width,  p->stepwise.min_height,
+				p->stepwise.step_width, p->stepwise.step_height,
+				p->stepwise.max_width,  p->stepwise.max_height);
+			break;
+		case V4L2_FRMSIZE_TYPE_CONTINUOUS:
+			dbgarg2("continuous\n");
+			break;
+		default:
+			dbgarg2("- Unknown type!\n");
+		}
+
+		break;
+	}
+	case VIDIOC_ENUM_FRAMEINTERVALS:
+	{
+		struct v4l2_frmivalenum *p = arg;
+
+		if (!ops->vidioc_enum_frameintervals)
+			break;
+
+		memset(p, 0, sizeof(*p));
+
+		ret = ops->vidioc_enum_frameintervals(file, fh, p);
+		dbgarg(cmd,
+			"index=%d, pixelformat=%d, width=%d, height=%d, type=%d ",
+			p->index, p->pixel_format,
+			p->width, p->height, p->type);
+		switch (p->type) {
+		case V4L2_FRMIVAL_TYPE_DISCRETE:
+			dbgarg2("fps=%d/%d\n",
+				p->discrete.numerator,
+				p->discrete.denominator);
+			break;
+		case V4L2_FRMIVAL_TYPE_STEPWISE:
+			dbgarg2("min=%d, max=%d, step=%d\n",
+				p->stepwise.min, p->stepwise.max,
+				p->stepwise.step);
+			break;
+		case V4L2_FRMIVAL_TYPE_CONTINUOUS:
+			dbgarg2("continuous\n");
+			break;
+		default:
+			dbgarg2("- Unknown type!\n");
+		}
+		break;
+	}
+
 	default:
 	{
 		if (!ops->vidioc_default)
diff --git a/include/media/v4l2-ioctl.h b/include/media/v4l2-ioctl.h
index f5985724c456..c884432f9383 100644
--- a/include/media/v4l2-ioctl.h
+++ b/include/media/v4l2-ioctl.h
@@ -232,6 +232,12 @@ struct v4l2_ioctl_ops {
 	int (*vidioc_g_chip_ident)     (struct file *file, void *fh,
 					struct v4l2_chip_ident *chip);
 
+	int (*vidioc_enum_framesizes)   (struct file *file, void *fh,
+					 struct v4l2_frmsizeenum *fsize);
+
+	int (*vidioc_enum_frameintervals) (struct file *file, void *fh,
+					   struct v4l2_frmivalenum *fival);
+
 	/* For other private ioctls */
 	int (*vidioc_default)	       (struct file *file, void *fh,
 					int cmd, void *arg);
-- 
cgit v1.2.3


From 60245e858ddb2255d458868be16ba814f6f8d446 Mon Sep 17 00:00:00 2001
From: Devin Heitmueller <devin.heitmueller@gmail.com>
Date: Sun, 16 Nov 2008 08:03:53 -0300
Subject: V4L/DVB (9629): Add support for the ATI TV Wonder HD 600 USB Remote
 Control

Add support for the ATI TV Wonder HD 600 USB Remote Control
(required a new keymap)

[mchehab@redhat.com: Fix CodingStyle]
Signed-off-by: Devin Heitmueller <devin.heitmueller@gmail.com>
Signed-off-by: Mauro Carvalho Chehab <mchehab@redhat.com>
---
 drivers/media/common/ir-keymaps.c         | 32 +++++++++++++++++++++++++++++++
 drivers/media/video/em28xx/em28xx-cards.c |  1 +
 include/media/ir-common.h                 |  1 +
 3 files changed, 34 insertions(+)

(limited to 'include')

diff --git a/drivers/media/common/ir-keymaps.c b/drivers/media/common/ir-keymaps.c
index 4952aeb5dd80..3534cdc1f953 100644
--- a/drivers/media/common/ir-keymaps.c
+++ b/drivers/media/common/ir-keymaps.c
@@ -2511,3 +2511,35 @@ IR_KEYTAB_TYPE ir_codes_real_audio_220_32_keys[IR_KEYTAB_SIZE] = {
 
 };
 EXPORT_SYMBOL_GPL(ir_codes_real_audio_220_32_keys);
+
+/* ATI TV Wonder HD 600 USB
+   Devin Heitmueller <devin.heitmueller@gmail.com>
+ */
+IR_KEYTAB_TYPE ir_codes_ati_tv_wonder_hd_600[IR_KEYTAB_SIZE] = {
+	[0x00] = KEY_RECORD,		/* Row 1 */
+	[0x01] = KEY_PLAYPAUSE,
+	[0x02] = KEY_STOP,
+	[0x03] = KEY_POWER,
+	[0x04] = KEY_PREVIOUS,	/* Row 2 */
+	[0x05] = KEY_REWIND,
+	[0x06] = KEY_FORWARD,
+	[0x07] = KEY_NEXT,
+	[0x08] = KEY_EPG,		/* Row 3 */
+	[0x09] = KEY_HOME,
+	[0x0a] = KEY_MENU,
+	[0x0b] = KEY_CHANNELUP,
+	[0x0c] = KEY_BACK,		/* Row 4 */
+	[0x0d] = KEY_UP,
+	[0x0e] = KEY_INFO,
+	[0x0f] = KEY_CHANNELDOWN,
+	[0x10] = KEY_LEFT,		/* Row 5 */
+	[0x11] = KEY_SELECT,
+	[0x12] = KEY_RIGHT,
+	[0x13] = KEY_VOLUMEUP,
+	[0x14] = KEY_LAST,		/* Row 6 */
+	[0x15] = KEY_DOWN,
+	[0x16] = KEY_MUTE,
+	[0x17] = KEY_VOLUMEDOWN,
+};
+
+EXPORT_SYMBOL_GPL(ir_codes_ati_tv_wonder_hd_600);
diff --git a/drivers/media/video/em28xx/em28xx-cards.c b/drivers/media/video/em28xx/em28xx-cards.c
index 3b585486f7fe..75b2af2bf4cf 100644
--- a/drivers/media/video/em28xx/em28xx-cards.c
+++ b/drivers/media/video/em28xx/em28xx-cards.c
@@ -640,6 +640,7 @@ struct em28xx_board em28xx_boards[] = {
 		.mts_firmware   = 1,
 		.has_12mhz_i2s  = 1,
 		.has_dvb        = 1,
+		.ir_codes       = ir_codes_ati_tv_wonder_hd_600,
 		.decoder        = EM28XX_TVP5150,
 		.input          = { {
 			.type     = EM28XX_VMUX_TELEVISION,
diff --git a/include/media/ir-common.h b/include/media/ir-common.h
index 38f2d93c3957..3a88e13a20e9 100644
--- a/include/media/ir-common.h
+++ b/include/media/ir-common.h
@@ -157,6 +157,7 @@ extern IR_KEYTAB_TYPE ir_codes_avermedia_a16d[IR_KEYTAB_SIZE];
 extern IR_KEYTAB_TYPE ir_codes_encore_enltv_fm53[IR_KEYTAB_SIZE];
 extern IR_KEYTAB_TYPE ir_codes_real_audio_220_32_keys[IR_KEYTAB_SIZE];
 extern IR_KEYTAB_TYPE ir_codes_msi_tvanywhere_plus[IR_KEYTAB_SIZE];
+extern IR_KEYTAB_TYPE ir_codes_ati_tv_wonder_hd_600[IR_KEYTAB_SIZE];
 #endif
 
 /*
-- 
cgit v1.2.3


From 480daab42c4dd74b3c07031ddf9031251c530c77 Mon Sep 17 00:00:00 2001
From: Rusty Russell <rusty@rustcorp.com.au>
Date: Tue, 30 Dec 2008 09:25:56 -0600
Subject: virtio: Don't use PAGE_SIZE in virtio_pci.c

The virtio PCI devices don't depend on the guest page size.  This matters
now PowerPC virtio is gaining ground (they like 64k pages).

Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
---
 drivers/virtio/virtio_pci.c | 2 +-
 include/linux/virtio_pci.h  | 4 ++++
 2 files changed, 5 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/drivers/virtio/virtio_pci.c b/drivers/virtio/virtio_pci.c
index e37d686edfe0..c2cd69ba0a8c 100644
--- a/drivers/virtio/virtio_pci.c
+++ b/drivers/virtio/virtio_pci.c
@@ -245,7 +245,7 @@ static struct virtqueue *vp_find_vq(struct virtio_device *vdev, unsigned index,
 	}
 
 	/* activate the queue */
-	iowrite32(virt_to_phys(info->queue) >> PAGE_SHIFT,
+	iowrite32(virt_to_phys(info->queue) >> VIRTIO_PCI_QUEUE_ADDR_SHIFT,
 		  vp_dev->ioaddr + VIRTIO_PCI_QUEUE_PFN);
 
 	/* create the vring */
diff --git a/include/linux/virtio_pci.h b/include/linux/virtio_pci.h
index cdef35742932..e13d7ebcf576 100644
--- a/include/linux/virtio_pci.h
+++ b/include/linux/virtio_pci.h
@@ -53,4 +53,8 @@
 
 /* Virtio ABI version, this must match exactly */
 #define VIRTIO_PCI_ABI_VERSION		0
+
+/* How many bits to shift physical queue address written to QUEUE_PFN.
+ * 12 is historical, and due to x86 page size. */
+#define VIRTIO_PCI_QUEUE_ADDR_SHIFT	12
 #endif
-- 
cgit v1.2.3


From 5f0d1d7f2286c8a02dab69f5f0bd51681fab161e Mon Sep 17 00:00:00 2001
From: Rusty Russell <rusty@rustcorp.com.au>
Date: Tue, 30 Dec 2008 09:25:57 -0600
Subject: virtio: rename 'pagesize' arg to vring_init/vring_size

It's really the alignment desired for consumer/producer separation;
historically this x86 pagesize, but with PowerPC it'll still be x86
pagesize.  And in theory lguest could choose a different value.

Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
---
 include/linux/virtio_ring.h | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

(limited to 'include')

diff --git a/include/linux/virtio_ring.h b/include/linux/virtio_ring.h
index c4a598fb3826..01bf3124e312 100644
--- a/include/linux/virtio_ring.h
+++ b/include/linux/virtio_ring.h
@@ -83,7 +83,7 @@ struct vring {
  *	__u16 avail_idx;
  *	__u16 available[num];
  *
- *	// Padding to the next page boundary.
+ *	// Padding to the next align boundary.
  *	char pad[];
  *
  *	// A ring of used descriptor heads with free-running index.
@@ -93,19 +93,19 @@ struct vring {
  * };
  */
 static inline void vring_init(struct vring *vr, unsigned int num, void *p,
-			      unsigned long pagesize)
+			      unsigned long align)
 {
 	vr->num = num;
 	vr->desc = p;
 	vr->avail = p + num*sizeof(struct vring_desc);
-	vr->used = (void *)(((unsigned long)&vr->avail->ring[num] + pagesize-1)
-			    & ~(pagesize - 1));
+	vr->used = (void *)(((unsigned long)&vr->avail->ring[num] + align-1)
+			    & ~(align - 1));
 }
 
-static inline unsigned vring_size(unsigned int num, unsigned long pagesize)
+static inline unsigned vring_size(unsigned int num, unsigned long align)
 {
 	return ((sizeof(struct vring_desc) * num + sizeof(__u16) * (2 + num)
-		 + pagesize - 1) & ~(pagesize - 1))
+		 + align - 1) & ~(align - 1))
 		+ sizeof(__u16) * 2 + sizeof(struct vring_used_elem) * num;
 }
 
-- 
cgit v1.2.3


From 498af14783935af487d17dbee4ac451783cbc2b7 Mon Sep 17 00:00:00 2001
From: Rusty Russell <rusty@rustcorp.com.au>
Date: Tue, 30 Dec 2008 09:25:57 -0600
Subject: virtio: Don't use PAGE_SIZE for vring alignment in virtio_pci.

That doesn't work for non-4k guests which are now appearing.

Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
---
 drivers/virtio/virtio_pci.c | 4 ++--
 include/linux/virtio_pci.h  | 4 ++++
 2 files changed, 6 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/drivers/virtio/virtio_pci.c b/drivers/virtio/virtio_pci.c
index c2cd69ba0a8c..f28643f3a4e8 100644
--- a/drivers/virtio/virtio_pci.c
+++ b/drivers/virtio/virtio_pci.c
@@ -237,7 +237,7 @@ static struct virtqueue *vp_find_vq(struct virtio_device *vdev, unsigned index,
 	info->queue_index = index;
 	info->num = num;
 
-	size = PAGE_ALIGN(vring_size(num, PAGE_SIZE));
+	size = PAGE_ALIGN(vring_size(num, VIRTIO_PCI_VRING_ALIGN));
 	info->queue = alloc_pages_exact(size, GFP_KERNEL|__GFP_ZERO);
 	if (info->queue == NULL) {
 		err = -ENOMEM;
@@ -290,7 +290,7 @@ static void vp_del_vq(struct virtqueue *vq)
 	iowrite16(info->queue_index, vp_dev->ioaddr + VIRTIO_PCI_QUEUE_SEL);
 	iowrite32(0, vp_dev->ioaddr + VIRTIO_PCI_QUEUE_PFN);
 
-	size = PAGE_ALIGN(vring_size(info->num, PAGE_SIZE));
+	size = PAGE_ALIGN(vring_size(info->num, VIRTIO_PCI_VRING_ALIGN));
 	free_pages_exact(info->queue, size);
 	kfree(info);
 }
diff --git a/include/linux/virtio_pci.h b/include/linux/virtio_pci.h
index e13d7ebcf576..cd0fd5d181a6 100644
--- a/include/linux/virtio_pci.h
+++ b/include/linux/virtio_pci.h
@@ -57,4 +57,8 @@
 /* How many bits to shift physical queue address written to QUEUE_PFN.
  * 12 is historical, and due to x86 page size. */
 #define VIRTIO_PCI_QUEUE_ADDR_SHIFT	12
+
+/* The alignment to use between consumer and producer parts of vring.
+ * x86 pagesize again. */
+#define VIRTIO_PCI_VRING_ALIGN		4096
 #endif
-- 
cgit v1.2.3


From 2966af73e70dee461c256b5eb877b2ff757f8c82 Mon Sep 17 00:00:00 2001
From: Rusty Russell <rusty@rustcorp.com.au>
Date: Tue, 30 Dec 2008 09:25:58 -0600
Subject: virtio: use LGUEST_VRING_ALIGN instead of relying on pagesize

This doesn't really matter, since lguest is i386 only at the moment,
but we could actually choose a different value.  (lguest doesn't have
a guarenteed ABI).

Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
---
 Documentation/lguest/lguest.c   | 6 +++---
 drivers/lguest/lguest_device.c  | 2 +-
 include/linux/lguest_launcher.h | 4 ++++
 3 files changed, 8 insertions(+), 4 deletions(-)

(limited to 'include')

diff --git a/Documentation/lguest/lguest.c b/Documentation/lguest/lguest.c
index 804520633fcf..aa2574ca94c7 100644
--- a/Documentation/lguest/lguest.c
+++ b/Documentation/lguest/lguest.c
@@ -1030,7 +1030,7 @@ static void update_device_status(struct device *dev)
 		/* Zero out the virtqueues. */
 		for (vq = dev->vq; vq; vq = vq->next) {
 			memset(vq->vring.desc, 0,
-			       vring_size(vq->config.num, getpagesize()));
+			       vring_size(vq->config.num, LGUEST_VRING_ALIGN));
 			lg_last_avail(vq) = 0;
 		}
 	} else if (dev->desc->status & VIRTIO_CONFIG_S_FAILED) {
@@ -1211,7 +1211,7 @@ static void add_virtqueue(struct device *dev, unsigned int num_descs,
 	void *p;
 
 	/* First we need some memory for this virtqueue. */
-	pages = (vring_size(num_descs, getpagesize()) + getpagesize() - 1)
+	pages = (vring_size(num_descs, LGUEST_VRING_ALIGN) + getpagesize() - 1)
 		/ getpagesize();
 	p = get_pages(pages);
 
@@ -1228,7 +1228,7 @@ static void add_virtqueue(struct device *dev, unsigned int num_descs,
 	vq->config.pfn = to_guest_phys(p) / getpagesize();
 
 	/* Initialize the vring. */
-	vring_init(&vq->vring, num_descs, p, getpagesize());
+	vring_init(&vq->vring, num_descs, p, LGUEST_VRING_ALIGN);
 
 	/* Append virtqueue to this device's descriptor.  We use
 	 * device_config() to get the end of the device's current virtqueues;
diff --git a/drivers/lguest/lguest_device.c b/drivers/lguest/lguest_device.c
index a661bbdae3d6..f062dc55c573 100644
--- a/drivers/lguest/lguest_device.c
+++ b/drivers/lguest/lguest_device.c
@@ -250,7 +250,7 @@ static struct virtqueue *lg_find_vq(struct virtio_device *vdev,
 	/* Figure out how many pages the ring will take, and map that memory */
 	lvq->pages = lguest_map((unsigned long)lvq->config.pfn << PAGE_SHIFT,
 				DIV_ROUND_UP(vring_size(lvq->config.num,
-							PAGE_SIZE),
+							LGUEST_VRING_ALIGN),
 					     PAGE_SIZE));
 	if (!lvq->pages) {
 		err = -ENOMEM;
diff --git a/include/linux/lguest_launcher.h b/include/linux/lguest_launcher.h
index e7217dc58f39..bd0eba760522 100644
--- a/include/linux/lguest_launcher.h
+++ b/include/linux/lguest_launcher.h
@@ -59,4 +59,8 @@ enum lguest_req
 	LHREQ_IRQ, /* + irq */
 	LHREQ_BREAK, /* + on/off flag (on blocks until someone does off) */
 };
+
+/* The alignment to use between consumer and producer parts of vring.
+ * x86 pagesize for historical reasons. */
+#define LGUEST_VRING_ALIGN	4096
 #endif /* _LINUX_LGUEST_LAUNCHER */
-- 
cgit v1.2.3


From 87c7d57c17ade5024d95b6ca0da249da49b0672a Mon Sep 17 00:00:00 2001
From: Rusty Russell <rusty@rustcorp.com.au>
Date: Tue, 30 Dec 2008 09:26:03 -0600
Subject: virtio: hand virtio ring alignment as argument to vring_new_virtqueue

This allows each virtio user to hand in the alignment appropriate to
their virtio_ring structures.

Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
Acked-by: Christian Borntraeger <borntraeger@de.ibm.com>
---
 drivers/lguest/lguest_device.c | 4 ++--
 drivers/s390/kvm/kvm_virtio.c  | 3 ++-
 drivers/virtio/virtio_pci.c    | 4 ++--
 drivers/virtio/virtio_ring.c   | 3 ++-
 include/linux/virtio_ring.h    | 1 +
 5 files changed, 9 insertions(+), 6 deletions(-)

(limited to 'include')

diff --git a/drivers/lguest/lguest_device.c b/drivers/lguest/lguest_device.c
index f062dc55c573..b02f6bcb64c4 100644
--- a/drivers/lguest/lguest_device.c
+++ b/drivers/lguest/lguest_device.c
@@ -259,8 +259,8 @@ static struct virtqueue *lg_find_vq(struct virtio_device *vdev,
 
 	/* OK, tell virtio_ring.c to set up a virtqueue now we know its size
 	 * and we've got a pointer to its pages. */
-	vq = vring_new_virtqueue(lvq->config.num, vdev, lvq->pages,
-				 lg_notify, callback);
+	vq = vring_new_virtqueue(lvq->config.num, LGUEST_VRING_ALIGN,
+				 vdev, lvq->pages, lg_notify, callback);
 	if (!vq) {
 		err = -ENOMEM;
 		goto unmap;
diff --git a/drivers/s390/kvm/kvm_virtio.c b/drivers/s390/kvm/kvm_virtio.c
index f5a2dbe75575..4e8354aa8576 100644
--- a/drivers/s390/kvm/kvm_virtio.c
+++ b/drivers/s390/kvm/kvm_virtio.c
@@ -193,7 +193,8 @@ static struct virtqueue *kvm_find_vq(struct virtio_device *vdev,
 	if (err)
 		goto out;
 
-	vq = vring_new_virtqueue(config->num, vdev, (void *) config->address,
+	vq = vring_new_virtqueue(config->num, KVM_S390_VIRTIO_RING_ALIGN,
+				 vdev, (void *) config->address,
 				 kvm_notify, callback);
 	if (!vq) {
 		err = -ENOMEM;
diff --git a/drivers/virtio/virtio_pci.c b/drivers/virtio/virtio_pci.c
index f28643f3a4e8..7462a51e820b 100644
--- a/drivers/virtio/virtio_pci.c
+++ b/drivers/virtio/virtio_pci.c
@@ -249,8 +249,8 @@ static struct virtqueue *vp_find_vq(struct virtio_device *vdev, unsigned index,
 		  vp_dev->ioaddr + VIRTIO_PCI_QUEUE_PFN);
 
 	/* create the vring */
-	vq = vring_new_virtqueue(info->num, vdev, info->queue,
-				 vp_notify, callback);
+	vq = vring_new_virtqueue(info->num, VIRTIO_PCI_VRING_ALIGN,
+				 vdev, info->queue, vp_notify, callback);
 	if (!vq) {
 		err = -ENOMEM;
 		goto out_activate_queue;
diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c
index 6eb5303fed11..5777196bf6c9 100644
--- a/drivers/virtio/virtio_ring.c
+++ b/drivers/virtio/virtio_ring.c
@@ -274,6 +274,7 @@ static struct virtqueue_ops vring_vq_ops = {
 };
 
 struct virtqueue *vring_new_virtqueue(unsigned int num,
+				      unsigned int vring_align,
 				      struct virtio_device *vdev,
 				      void *pages,
 				      void (*notify)(struct virtqueue *),
@@ -292,7 +293,7 @@ struct virtqueue *vring_new_virtqueue(unsigned int num,
 	if (!vq)
 		return NULL;
 
-	vring_init(&vq->vring, num, pages, PAGE_SIZE);
+	vring_init(&vq->vring, num, pages, vring_align);
 	vq->vq.callback = callback;
 	vq->vq.vdev = vdev;
 	vq->vq.vq_ops = &vring_vq_ops;
diff --git a/include/linux/virtio_ring.h b/include/linux/virtio_ring.h
index 01bf3124e312..71e03722fb59 100644
--- a/include/linux/virtio_ring.h
+++ b/include/linux/virtio_ring.h
@@ -115,6 +115,7 @@ struct virtio_device;
 struct virtqueue;
 
 struct virtqueue *vring_new_virtqueue(unsigned int num,
+				      unsigned int vring_align,
 				      struct virtio_device *vdev,
 				      void *pages,
 				      void (*notify)(struct virtqueue *vq),
-- 
cgit v1.2.3


From 1b4aa2faeca1b9922033daf2475b6fc13b0ffea6 Mon Sep 17 00:00:00 2001
From: Hollis Blanchard <hollisb@us.ibm.com>
Date: Thu, 13 Nov 2008 15:48:33 -0600
Subject: virtio: avoid implicit use of Linux page size in balloon interface

Make the balloon interface always use 4K pages, and convert Linux pfns if
necessary. This patch assumes that Linux's PAGE_SHIFT will never be less than
12.

Signed-off-by: Hollis Blanchard <hollisb@us.ibm.com>
Signed-off-by: Rusty Russell <rusty@rustcorp.com.au> (modified)
---
 drivers/virtio/virtio_balloon.c | 13 +++++++++++--
 include/linux/virtio_balloon.h  |  3 +++
 2 files changed, 14 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/drivers/virtio/virtio_balloon.c b/drivers/virtio/virtio_balloon.c
index 62eab43152d2..59268266b79a 100644
--- a/drivers/virtio/virtio_balloon.c
+++ b/drivers/virtio/virtio_balloon.c
@@ -56,6 +56,15 @@ static struct virtio_device_id id_table[] = {
 	{ 0 },
 };
 
+static u32 page_to_balloon_pfn(struct page *page)
+{
+	unsigned long pfn = page_to_pfn(page);
+
+	BUILD_BUG_ON(PAGE_SHIFT < VIRTIO_BALLOON_PFN_SHIFT);
+	/* Convert pfn from Linux page size to balloon page size. */
+	return pfn >> (PAGE_SHIFT - VIRTIO_BALLOON_PFN_SHIFT);
+}
+
 static void balloon_ack(struct virtqueue *vq)
 {
 	struct virtio_balloon *vb;
@@ -99,7 +108,7 @@ static void fill_balloon(struct virtio_balloon *vb, size_t num)
 			msleep(200);
 			break;
 		}
-		vb->pfns[vb->num_pfns] = page_to_pfn(page);
+		vb->pfns[vb->num_pfns] = page_to_balloon_pfn(page);
 		totalram_pages--;
 		vb->num_pages++;
 		list_add(&page->lru, &vb->pages);
@@ -132,7 +141,7 @@ static void leak_balloon(struct virtio_balloon *vb, size_t num)
 	for (vb->num_pfns = 0; vb->num_pfns < num; vb->num_pfns++) {
 		page = list_first_entry(&vb->pages, struct page, lru);
 		list_del(&page->lru);
-		vb->pfns[vb->num_pfns] = page_to_pfn(page);
+		vb->pfns[vb->num_pfns] = page_to_balloon_pfn(page);
 		vb->num_pages--;
 	}
 
diff --git a/include/linux/virtio_balloon.h b/include/linux/virtio_balloon.h
index c30c7bfbf39b..8726ff77763e 100644
--- a/include/linux/virtio_balloon.h
+++ b/include/linux/virtio_balloon.h
@@ -10,6 +10,9 @@
 /* The feature bitmap for virtio balloon */
 #define VIRTIO_BALLOON_F_MUST_TELL_HOST	0 /* Tell before reclaiming pages */
 
+/* Size of a PFN in the balloon interface. */
+#define VIRTIO_BALLOON_PFN_SHIFT 12
+
 struct virtio_balloon_config
 {
 	/* Number of pages host wants Guest to give up. */
-- 
cgit v1.2.3


From c29834584ea4eafccf2f62a0b8a32e64f792044c Mon Sep 17 00:00:00 2001
From: Christian Borntraeger <borntraeger@de.ibm.com>
Date: Tue, 25 Nov 2008 13:36:26 +0100
Subject: virtio_console: support console resizing

this patch uses the new hvc callback hvc_resize to set the window size
which allows to change the tty size of hvc_console via a hvc_resize
function.

I have added a new feature bit VIRTIO_CONSOLE_F_SIZE. The driver will
change the window size on tty open and via the config_changed callback
of the transport. Currently lguest and kvm_s390 have not implemented this
callback, but the callback can be implemented at a later point in time.

Signed-off-by: Christian Borntraeger <borntraeger@de.ibm.com>
Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
---
 drivers/char/hvc_console.c     |  1 +
 drivers/char/virtio_console.c  | 30 +++++++++++++++++++++++++++++-
 include/linux/virtio_console.h | 11 +++++++++++
 3 files changed, 41 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/drivers/char/hvc_console.c b/drivers/char/hvc_console.c
index fb57f67bb427..0587b66d6fc7 100644
--- a/drivers/char/hvc_console.c
+++ b/drivers/char/hvc_console.c
@@ -695,6 +695,7 @@ void hvc_resize(struct hvc_struct *hp, struct winsize ws)
 	hp->ws = ws;
 	schedule_work(&hp->tty_resize);
 }
+EXPORT_SYMBOL_GPL(hvc_resize);
 
 /*
  * This kthread is either polling or interrupt driven.  This is determined by
diff --git a/drivers/char/virtio_console.c b/drivers/char/virtio_console.c
index 3fb0d2c88ba5..ff6f5a4b58fb 100644
--- a/drivers/char/virtio_console.c
+++ b/drivers/char/virtio_console.c
@@ -137,13 +137,34 @@ int __init virtio_cons_early_init(int (*put_chars)(u32, const char *, int))
 	return hvc_instantiate(0, 0, &virtio_cons);
 }
 
+/*
+ * virtio console configuration. This supports:
+ * - console resize
+ */
+static void virtcons_apply_config(struct virtio_device *dev)
+{
+	struct winsize ws;
+
+	if (virtio_has_feature(dev, VIRTIO_CONSOLE_F_SIZE)) {
+		dev->config->get(dev,
+				 offsetof(struct virtio_console_config, cols),
+				 &ws.ws_col, sizeof(u16));
+		dev->config->get(dev,
+				 offsetof(struct virtio_console_config, rows),
+				 &ws.ws_row, sizeof(u16));
+		hvc_resize(hvc, ws);
+	}
+}
+
 /*
  * we support only one console, the hvc struct is a global var
- * There is no need to do anything
+ * We set the configuration at this point, since we now have a tty
  */
 static int notifier_add_vio(struct hvc_struct *hp, int data)
 {
 	hp->irq_requested = 1;
+	virtcons_apply_config(vdev);
+
 	return 0;
 }
 
@@ -234,11 +255,18 @@ static struct virtio_device_id id_table[] = {
 	{ 0 },
 };
 
+static unsigned int features[] = {
+	VIRTIO_CONSOLE_F_SIZE,
+};
+
 static struct virtio_driver virtio_console = {
+	.feature_table = features,
+	.feature_table_size = ARRAY_SIZE(features),
 	.driver.name =	KBUILD_MODNAME,
 	.driver.owner =	THIS_MODULE,
 	.id_table =	id_table,
 	.probe =	virtcons_probe,
+	.config_changed = virtcons_apply_config,
 };
 
 static int __init init(void)
diff --git a/include/linux/virtio_console.h b/include/linux/virtio_console.h
index 19a0da0dba41..7615ffcdd555 100644
--- a/include/linux/virtio_console.h
+++ b/include/linux/virtio_console.h
@@ -7,6 +7,17 @@
 /* The ID for virtio console */
 #define VIRTIO_ID_CONSOLE	3
 
+/* Feature bits */
+#define VIRTIO_CONSOLE_F_SIZE	0	/* Does host provide console size? */
+
+struct virtio_console_config {
+	/* colums of the screens */
+	__u16 cols;
+	/* rows of the screens */
+	__u16 rows;
+} __attribute__((packed));
+
+
 #ifdef __KERNEL__
 int __init virtio_cons_early_init(int (*put_chars)(u32, const char *, int));
 #endif /* __KERNEL__ */
-- 
cgit v1.2.3


From 58a24566449892dda409b9ad92c2e56c76c5670c Mon Sep 17 00:00:00 2001
From: Matias Zabaljauregui <zabaljauregui@gmail.com>
Date: Mon, 29 Sep 2008 01:40:07 -0300
Subject: lguest: move the initial guest page table creation code to the host

This patch moves the initial guest page table creation code to the host,
so the launcher keeps working with PAE enabled configs.

Signed-off-by: Matias Zabaljauregui <zabaljauregui@gmail.com>
Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
---
 Documentation/lguest/lguest.c   | 60 ++++------------------------------
 arch/x86/lguest/i386_head.S     | 15 ---------
 drivers/lguest/lg.h             |  2 +-
 drivers/lguest/lguest_user.c    | 13 +++-----
 drivers/lguest/page_tables.c    | 72 +++++++++++++++++++++++++++++++++++++++--
 include/linux/lguest_launcher.h |  2 +-
 6 files changed, 83 insertions(+), 81 deletions(-)

(limited to 'include')

diff --git a/Documentation/lguest/lguest.c b/Documentation/lguest/lguest.c
index aa2574ca94c7..f2dbbf3bdeab 100644
--- a/Documentation/lguest/lguest.c
+++ b/Documentation/lguest/lguest.c
@@ -481,51 +481,6 @@ static unsigned long load_initrd(const char *name, unsigned long mem)
 	/* We return the initrd size. */
 	return len;
 }
-
-/* Once we know how much memory we have we can construct simple linear page
- * tables which set virtual == physical which will get the Guest far enough
- * into the boot to create its own.
- *
- * We lay them out of the way, just below the initrd (which is why we need to
- * know its size here). */
-static unsigned long setup_pagetables(unsigned long mem,
-				      unsigned long initrd_size)
-{
-	unsigned long *pgdir, *linear;
-	unsigned int mapped_pages, i, linear_pages;
-	unsigned int ptes_per_page = getpagesize()/sizeof(void *);
-
-	mapped_pages = mem/getpagesize();
-
-	/* Each PTE page can map ptes_per_page pages: how many do we need? */
-	linear_pages = (mapped_pages + ptes_per_page-1)/ptes_per_page;
-
-	/* We put the toplevel page directory page at the top of memory. */
-	pgdir = from_guest_phys(mem) - initrd_size - getpagesize();
-
-	/* Now we use the next linear_pages pages as pte pages */
-	linear = (void *)pgdir - linear_pages*getpagesize();
-
-	/* Linear mapping is easy: put every page's address into the mapping in
-	 * order.  PAGE_PRESENT contains the flags Present, Writable and
-	 * Executable. */
-	for (i = 0; i < mapped_pages; i++)
-		linear[i] = ((i * getpagesize()) | PAGE_PRESENT);
-
-	/* The top level points to the linear page table pages above. */
-	for (i = 0; i < mapped_pages; i += ptes_per_page) {
-		pgdir[i/ptes_per_page]
-			= ((to_guest_phys(linear) + i*sizeof(void *))
-			   | PAGE_PRESENT);
-	}
-
-	verbose("Linear mapping of %u pages in %u pte pages at %#lx\n",
-		mapped_pages, linear_pages, to_guest_phys(linear));
-
-	/* We return the top level (guest-physical) address: the kernel needs
-	 * to know where it is. */
-	return to_guest_phys(pgdir);
-}
 /*:*/
 
 /* Simple routine to roll all the commandline arguments together with spaces
@@ -548,13 +503,13 @@ static void concat(char *dst, char *args[])
 
 /*L:185 This is where we actually tell the kernel to initialize the Guest.  We
  * saw the arguments it expects when we looked at initialize() in lguest_user.c:
- * the base of Guest "physical" memory, the top physical page to allow, the
- * top level pagetable and the entry point for the Guest. */
-static int tell_kernel(unsigned long pgdir, unsigned long start)
+ * the base of Guest "physical" memory, the top physical page to allow and the
+ * entry point for the Guest. */
+static int tell_kernel(unsigned long start)
 {
 	unsigned long args[] = { LHREQ_INITIALIZE,
 				 (unsigned long)guest_base,
-				 guest_limit / getpagesize(), pgdir, start };
+				 guest_limit / getpagesize(), start };
 	int fd;
 
 	verbose("Guest: %p - %p (%#lx)\n",
@@ -1941,7 +1896,7 @@ int main(int argc, char *argv[])
 {
 	/* Memory, top-level pagetable, code startpoint and size of the
 	 * (optional) initrd. */
-	unsigned long mem = 0, pgdir, start, initrd_size = 0;
+	unsigned long mem = 0, start, initrd_size = 0;
 	/* Two temporaries and the /dev/lguest file descriptor. */
 	int i, c, lguest_fd;
 	/* The boot information for the Guest. */
@@ -2040,9 +1995,6 @@ int main(int argc, char *argv[])
 		boot->hdr.type_of_loader = 0xFF;
 	}
 
-	/* Set up the initial linear pagetables, starting below the initrd. */
-	pgdir = setup_pagetables(mem, initrd_size);
-
 	/* The Linux boot header contains an "E820" memory map: ours is a
 	 * simple, single region. */
 	boot->e820_entries = 1;
@@ -2064,7 +2016,7 @@ int main(int argc, char *argv[])
 
 	/* We tell the kernel to initialize the Guest: this returns the open
 	 * /dev/lguest file descriptor. */
-	lguest_fd = tell_kernel(pgdir, start);
+	lguest_fd = tell_kernel(start);
 
 	/* We clone off a thread, which wakes the Launcher whenever one of the
 	 * input file descriptors needs attention.  We call this the Waker, and
diff --git a/arch/x86/lguest/i386_head.S b/arch/x86/lguest/i386_head.S
index 5c7cef34c9e7..10b9bd35a8ff 100644
--- a/arch/x86/lguest/i386_head.S
+++ b/arch/x86/lguest/i386_head.S
@@ -30,21 +30,6 @@ ENTRY(lguest_entry)
 	movl $lguest_data - __PAGE_OFFSET, %edx
 	int $LGUEST_TRAP_ENTRY
 
-	/* The Host put the toplevel pagetable in lguest_data.pgdir.  The movsl
-	 * instruction uses %esi implicitly as the source for the copy we're
-	 * about to do. */
-	movl lguest_data - __PAGE_OFFSET + LGUEST_DATA_pgdir, %esi
-
-	/* Copy first 32 entries of page directory to __PAGE_OFFSET entries.
-	 * This means the first 128M of kernel memory will be mapped at
-	 * PAGE_OFFSET where the kernel expects to run.  This will get it far
-	 * enough through boot to switch to its own pagetables. */
-	movl $32, %ecx
-	movl %esi, %edi
-	addl $((__PAGE_OFFSET >> 22) * 4), %edi
-	rep
-	movsl
-
 	/* Set up the initial stack so we can run C code. */
 	movl $(init_thread_union+THREAD_SIZE),%esp
 
diff --git a/drivers/lguest/lg.h b/drivers/lguest/lg.h
index 5faefeaf6790..f2c641e0bdde 100644
--- a/drivers/lguest/lg.h
+++ b/drivers/lguest/lg.h
@@ -164,7 +164,7 @@ void copy_gdt(const struct lg_cpu *cpu, struct desc_struct *gdt);
 void copy_gdt_tls(const struct lg_cpu *cpu, struct desc_struct *gdt);
 
 /* page_tables.c: */
-int init_guest_pagetable(struct lguest *lg, unsigned long pgtable);
+int init_guest_pagetable(struct lguest *lg);
 void free_guest_pagetable(struct lguest *lg);
 void guest_new_pagetable(struct lg_cpu *cpu, unsigned long pgtable);
 void guest_set_pmd(struct lguest *lg, unsigned long gpgdir, u32 i);
diff --git a/drivers/lguest/lguest_user.c b/drivers/lguest/lguest_user.c
index e73a000473cc..34bc017b8b3c 100644
--- a/drivers/lguest/lguest_user.c
+++ b/drivers/lguest/lguest_user.c
@@ -146,7 +146,7 @@ static int lg_cpu_start(struct lg_cpu *cpu, unsigned id, unsigned long start_ip)
 	return 0;
 }
 
-/*L:020 The initialization write supplies 4 pointer sized (32 or 64 bit)
+/*L:020 The initialization write supplies 3 pointer sized (32 or 64 bit)
  * values (in addition to the LHREQ_INITIALIZE value).  These are:
  *
  * base: The start of the Guest-physical memory inside the Launcher memory.
@@ -155,9 +155,6 @@ static int lg_cpu_start(struct lg_cpu *cpu, unsigned id, unsigned long start_ip)
  * allowed to access.  The Guest memory lives inside the Launcher, so it sets
  * this to ensure the Guest can only reach its own memory.
  *
- * pgdir: The (Guest-physical) address of the top of the initial Guest
- * pagetables (which are set up by the Launcher).
- *
  * start: The first instruction to execute ("eip" in x86-speak).
  */
 static int initialize(struct file *file, const unsigned long __user *input)
@@ -166,7 +163,7 @@ static int initialize(struct file *file, const unsigned long __user *input)
 	 * Guest. */
 	struct lguest *lg;
 	int err;
-	unsigned long args[4];
+	unsigned long args[3];
 
 	/* We grab the Big Lguest lock, which protects against multiple
 	 * simultaneous initializations. */
@@ -192,14 +189,14 @@ static int initialize(struct file *file, const unsigned long __user *input)
 	lg->mem_base = (void __user *)args[0];
 	lg->pfn_limit = args[1];
 
-	/* This is the first cpu (cpu 0) and it will start booting at args[3] */
-	err = lg_cpu_start(&lg->cpus[0], 0, args[3]);
+	/* This is the first cpu (cpu 0) and it will start booting at args[2] */
+	err = lg_cpu_start(&lg->cpus[0], 0, args[2]);
 	if (err)
 		goto release_guest;
 
 	/* Initialize the Guest's shadow page tables, using the toplevel
 	 * address the Launcher gave us.  This allocates memory, so can fail. */
-	err = init_guest_pagetable(lg, args[2]);
+	err = init_guest_pagetable(lg);
 	if (err)
 		goto free_regs;
 
diff --git a/drivers/lguest/page_tables.c b/drivers/lguest/page_tables.c
index 81d0c6053447..576a8318221c 100644
--- a/drivers/lguest/page_tables.c
+++ b/drivers/lguest/page_tables.c
@@ -14,6 +14,7 @@
 #include <linux/percpu.h>
 #include <asm/tlbflush.h>
 #include <asm/uaccess.h>
+#include <asm/bootparam.h>
 #include "lg.h"
 
 /*M:008 We hold reference to pages, which prevents them from being swapped.
@@ -581,15 +582,82 @@ void guest_set_pmd(struct lguest *lg, unsigned long gpgdir, u32 idx)
 		release_pgd(lg, lg->pgdirs[pgdir].pgdir + idx);
 }
 
+/* Once we know how much memory we have we can construct simple identity
+ * (which set virtual == physical) and linear mappings
+ * which will get the Guest far enough into the boot to create its own.
+ *
+ * We lay them out of the way, just below the initrd (which is why we need to
+ * know its size here). */
+static unsigned long setup_pagetables(struct lguest *lg,
+				      unsigned long mem,
+				      unsigned long initrd_size)
+{
+	pgd_t __user *pgdir;
+	pte_t __user *linear;
+	unsigned int mapped_pages, i, linear_pages, phys_linear;
+	unsigned long mem_base = (unsigned long)lg->mem_base;
+
+	/* We have mapped_pages frames to map, so we need
+	 * linear_pages page tables to map them. */
+	mapped_pages = mem / PAGE_SIZE;
+	linear_pages = (mapped_pages + PTRS_PER_PTE - 1) / PTRS_PER_PTE;
+
+	/* We put the toplevel page directory page at the top of memory. */
+	pgdir = (pgd_t *)(mem + mem_base - initrd_size - PAGE_SIZE);
+
+	/* Now we use the next linear_pages pages as pte pages */
+	linear = (void *)pgdir - linear_pages * PAGE_SIZE;
+
+	/* Linear mapping is easy: put every page's address into the
+	 * mapping in order. */
+	for (i = 0; i < mapped_pages; i++) {
+		pte_t pte;
+		pte = pfn_pte(i, __pgprot(_PAGE_PRESENT|_PAGE_RW|_PAGE_USER));
+		if (copy_to_user(&linear[i], &pte, sizeof(pte)) != 0)
+			return -EFAULT;
+	}
+
+	/* The top level points to the linear page table pages above.
+	 * We setup the identity and linear mappings here. */
+	phys_linear = (unsigned long)linear - mem_base;
+	for (i = 0; i < mapped_pages; i += PTRS_PER_PTE) {
+		pgd_t pgd;
+		pgd = __pgd((phys_linear + i * sizeof(pte_t)) |
+			    (_PAGE_PRESENT | _PAGE_RW | _PAGE_USER));
+
+		if (copy_to_user(&pgdir[i / PTRS_PER_PTE], &pgd, sizeof(pgd))
+		    || copy_to_user(&pgdir[pgd_index(PAGE_OFFSET)
+					   + i / PTRS_PER_PTE],
+				    &pgd, sizeof(pgd)))
+			return -EFAULT;
+	}
+
+	/* We return the top level (guest-physical) address: remember where
+	 * this is. */
+	return (unsigned long)pgdir - mem_base;
+}
+
 /*H:500 (vii) Setting up the page tables initially.
  *
  * When a Guest is first created, the Launcher tells us where the toplevel of
  * its first page table is.  We set some things up here: */
-int init_guest_pagetable(struct lguest *lg, unsigned long pgtable)
+int init_guest_pagetable(struct lguest *lg)
 {
+	u64 mem;
+	u32 initrd_size;
+	struct boot_params __user *boot = (struct boot_params *)lg->mem_base;
+
+	/* Get the Guest memory size and the ramdisk size from the boot header
+	 * located at lg->mem_base (Guest address 0). */
+	if (copy_from_user(&mem, &boot->e820_map[0].size, sizeof(mem))
+	    || get_user(initrd_size, &boot->hdr.ramdisk_size))
+		return -EFAULT;
+
 	/* We start on the first shadow page table, and give it a blank PGD
 	 * page. */
-	lg->pgdirs[0].gpgdir = pgtable;
+	lg->pgdirs[0].gpgdir = setup_pagetables(lg, mem, initrd_size);
+	if (IS_ERR_VALUE(lg->pgdirs[0].gpgdir))
+		return lg->pgdirs[0].gpgdir;
 	lg->pgdirs[0].pgdir = (pgd_t *)get_zeroed_page(GFP_KERNEL);
 	if (!lg->pgdirs[0].pgdir)
 		return -ENOMEM;
diff --git a/include/linux/lguest_launcher.h b/include/linux/lguest_launcher.h
index bd0eba760522..a53407a4165c 100644
--- a/include/linux/lguest_launcher.h
+++ b/include/linux/lguest_launcher.h
@@ -54,7 +54,7 @@ struct lguest_vqconfig {
 /* Write command first word is a request. */
 enum lguest_req
 {
-	LHREQ_INITIALIZE, /* + base, pfnlimit, pgdir, start */
+	LHREQ_INITIALIZE, /* + base, pfnlimit, start */
 	LHREQ_GETDMA, /* No longer used */
 	LHREQ_IRQ, /* + irq */
 	LHREQ_BREAK, /* + on/off flag (on blocks until someone does off) */
-- 
cgit v1.2.3


From 26d5f3a3fe917232cb77e2e3450f7d7f8698259c Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab@infradead.org>
Date: Sun, 7 Dec 2008 13:19:29 -0300
Subject: V4L/DVB (9772): saa7134: Add support for Kworld Plus TV Analog Lite
 PCI

Thanks to Sistema Fenix (http://www.sistemafenix.com.br/) for sponsoring
this
development.

Signed-off-by: Gilberto <gilberto@sistemafenix.com.br>
Signed-off-by: Mauro Carvalho Chehab <mchehab@redhat.com>
---
 Documentation/video4linux/CARDLIST.saa7134  |  1 +
 drivers/media/common/ir-keymaps.c           | 61 +++++++++++++++++++++++++++++
 drivers/media/video/saa7134/saa7134-cards.c | 44 +++++++++++++++++++++
 drivers/media/video/saa7134/saa7134-input.c | 14 +++++++
 drivers/media/video/saa7134/saa7134.h       |  1 +
 include/media/ir-common.h                   |  1 +
 6 files changed, 122 insertions(+)

(limited to 'include')

diff --git a/Documentation/video4linux/CARDLIST.saa7134 b/Documentation/video4linux/CARDLIST.saa7134
index dc67eef38ff9..dd979dca8af6 100644
--- a/Documentation/video4linux/CARDLIST.saa7134
+++ b/Documentation/video4linux/CARDLIST.saa7134
@@ -151,3 +151,4 @@
 150 -> Zogis Real Angel 220
 151 -> ADS Tech Instant HDTV                    [1421:0380]
 152 -> Asus Tiger Rev:1.00                      [1043:4857]
+153 -> Kworld Plus TV Analog Lite PCI           [17de:7128]
diff --git a/drivers/media/common/ir-keymaps.c b/drivers/media/common/ir-keymaps.c
index 3534cdc1f953..d8229a0e9a9c 100644
--- a/drivers/media/common/ir-keymaps.c
+++ b/drivers/media/common/ir-keymaps.c
@@ -2391,6 +2391,67 @@ IR_KEYTAB_TYPE ir_codes_powercolor_real_angel[IR_KEYTAB_SIZE] = {
 };
 EXPORT_SYMBOL_GPL(ir_codes_powercolor_real_angel);
 
+/* Kworld Plus TV Analog Lite PCI IR
+   Mauro Carvalho Chehab <mchehab@infradead.org>
+ */
+IR_KEYTAB_TYPE ir_codes_kworld_plus_tv_analog[IR_KEYTAB_SIZE] = {
+	[0x0c] = KEY_PROG1,		/* Kworld key */
+	[0x16] = KEY_CLOSECD,		/* -> ) */
+	[0x1d] = KEY_POWER2,
+
+	[0x00] = KEY_1,
+	[0x01] = KEY_2,
+	[0x02] = KEY_3,			/* Two keys have the same code: 3 and left */
+	[0x03] = KEY_4,			/* Two keys have the same code: 3 and right */
+	[0x04] = KEY_5,
+	[0x05] = KEY_6,
+	[0x06] = KEY_7,
+	[0x07] = KEY_8,
+	[0x08] = KEY_9,
+	[0x0a] = KEY_0,
+
+	[0x09] = KEY_AGAIN,
+	[0x14] = KEY_MUTE,
+
+	[0x20] = KEY_UP,
+	[0x21] = KEY_DOWN,
+	[0x0b] = KEY_ENTER,
+
+	[0x10] = KEY_CHANNELUP,
+	[0x11] = KEY_CHANNELDOWN,
+
+	/* Couldn't map key left/key right since those
+	   conflict with '3' and '4' scancodes
+	   I dunno what the original driver does
+	 */
+
+	[0x13] = KEY_VOLUMEUP,
+	[0x12] = KEY_VOLUMEDOWN,
+
+	/* The lower part of the IR
+	   There are several duplicated keycodes there.
+	   Most of them conflict with digits.
+	   Add mappings just to the unused scancodes.
+	   Somehow, the original driver has a way to know,
+	   but this doesn't seem to be on some GPIO.
+	   Also, it is not related to the time between keyup
+	   and keydown.
+	 */
+	[0x19] = KEY_PAUSE,		/* Timeshift */
+	[0x1a] = KEY_STOP,
+	[0x1b] = KEY_RECORD,
+
+	[0x22] = KEY_TEXT,
+
+	[0x15] = KEY_AUDIO,		/* ((*)) */
+	[0x0f] = KEY_ZOOM,
+	[0x1c] = KEY_SHUFFLE,		/* snapshot */
+
+	[0x18] = KEY_RED,		/* B */
+	[0x23] = KEY_GREEN,		/* C */
+};
+EXPORT_SYMBOL_GPL(ir_codes_kworld_plus_tv_analog);
+
 IR_KEYTAB_TYPE ir_codes_avermedia_a16d[IR_KEYTAB_SIZE] = {
 	[0x20] = KEY_LIST,
 	[0x00] = KEY_POWER,
diff --git a/drivers/media/video/saa7134/saa7134-cards.c b/drivers/media/video/saa7134/saa7134-cards.c
index 863522899e85..f0b95804e414 100644
--- a/drivers/media/video/saa7134/saa7134-cards.c
+++ b/drivers/media/video/saa7134/saa7134-cards.c
@@ -4606,6 +4606,43 @@ struct saa7134_board saa7134_boards[] = {
 			.gpio   = 0x0200000,
 		},
 	},
+	[SAA7134_BOARD_KWORLD_PLUS_TV_ANALOG] = {
+		.name           = "Kworld Plus TV Analog Lite PCI",
+		.audio_clock    = 0x00187de7,
+		.tuner_type     = TUNER_YMEC_TVF_5533MF,
+		.radio_type     = TUNER_TEA5767,
+		.tuner_addr     = ADDR_UNSET,
+		.radio_addr     = ADDR_UNSET,
+		.gpiomask       = 0x80000700,
+		.inputs = { {
+			.name   = name_tv,
+			.vmux   = 1,
+			.amux   = LINE2,
+			.tv     = 1,
+			.gpio   = 0x100,
+		}, {
+			.name   = name_comp1,
+			.vmux   = 3,
+			.amux   = LINE1,
+			.gpio   = 0x200,
+		}, {
+			.name   = name_svideo,
+			.vmux   = 8,
+			.amux   = LINE1,
+			.gpio   = 0x200,
+		} },
+		.radio = {
+			.name   = name_radio,
+			.vmux   = 1,
+			.amux   = LINE1,
+			.gpio   = 0x100,
+		},
+		.mute = {
+			.name = name_mute,
+			.vmux = 8,
+			.amux = 2,
+		},
+	},
 };
 
 const unsigned int saa7134_bcount = ARRAY_SIZE(saa7134_boards);
@@ -5652,6 +5689,12 @@ struct pci_device_id saa7134_pci_tbl[] = {
 		.subvendor    = 0x1043,
 		.subdevice    = 0x4878, /* REV:1.02G */
 		.driver_data  = SAA7134_BOARD_ASUSTeK_TIGER_3IN1,
+	}, {
+		.vendor       = PCI_VENDOR_ID_PHILIPS,
+		.device       = PCI_DEVICE_ID_PHILIPS_SAA7134,
+		.subvendor    = 0x17de,
+		.subdevice    = 0x7128,
+		.driver_data  = SAA7134_BOARD_KWORLD_PLUS_TV_ANALOG,
 	}, {
 		/* --- boards without eeprom + subsystem ID --- */
 		.vendor       = PCI_VENDOR_ID_PHILIPS,
@@ -5880,6 +5923,7 @@ int saa7134_board_init1(struct saa7134_dev *dev)
 	case SAA7134_BOARD_BEHOLD_507_9FM:
 	case SAA7134_BOARD_GENIUS_TVGO_A11MCE:
 	case SAA7134_BOARD_REAL_ANGEL_220:
+	case SAA7134_BOARD_KWORLD_PLUS_TV_ANALOG:
 		dev->has_remote = SAA7134_REMOTE_GPIO;
 		break;
 	case SAA7134_BOARD_FLYDVBS_LR300:
diff --git a/drivers/media/video/saa7134/saa7134-input.c b/drivers/media/video/saa7134/saa7134-input.c
index c53fd5f9f6b5..d2124f64e4e2 100644
--- a/drivers/media/video/saa7134/saa7134-input.c
+++ b/drivers/media/video/saa7134/saa7134-input.c
@@ -97,6 +97,15 @@ static int build_key(struct saa7134_dev *dev)
 	dprintk("build_key gpio=0x%x mask=0x%x data=%d\n",
 		gpio, ir->mask_keycode, data);
 
+	switch (dev->board) {
+	case SAA7134_BOARD_KWORLD_PLUS_TV_ANALOG:
+		if (data == ir->mask_keycode)
+			ir_input_nokey(ir->dev, &ir->ir);
+		else
+			ir_input_keydown(ir->dev, &ir->ir, data, data);
+		return 0;
+	}
+
 	if (ir->polling) {
 		if ((ir->mask_keydown  &&  (0 != (gpio & ir->mask_keydown))) ||
 		    (ir->mask_keyup    &&  (0 == (gpio & ir->mask_keyup)))) {
@@ -586,6 +595,11 @@ int saa7134_input_init1(struct saa7134_dev *dev)
 		mask_keyup   = 0x4000;
 		polling = 50; /* ms */
 		break;
+	case SAA7134_BOARD_KWORLD_PLUS_TV_ANALOG:
+		ir_codes     = ir_codes_kworld_plus_tv_analog;
+		mask_keycode = 0x7f;
+		polling = 40; /* ms */
+		break;
 	}
 	if (NULL == ir_codes) {
 		printk("%s: Oops: IR config error [card=%d]\n",
diff --git a/drivers/media/video/saa7134/saa7134.h b/drivers/media/video/saa7134/saa7134.h
index 9070efe4a4d0..f6c1fcc72070 100644
--- a/drivers/media/video/saa7134/saa7134.h
+++ b/drivers/media/video/saa7134/saa7134.h
@@ -275,6 +275,7 @@ struct saa7134_format {
 #define SAA7134_BOARD_REAL_ANGEL_220     150
 #define SAA7134_BOARD_ADS_INSTANT_HDTV_PCI  151
 #define SAA7134_BOARD_ASUSTeK_TIGER         152
+#define SAA7134_BOARD_KWORLD_PLUS_TV_ANALOG 153
 
 #define SAA7134_MAXBOARDS 32
 #define SAA7134_INPUT_MAX 8
diff --git a/include/media/ir-common.h b/include/media/ir-common.h
index 3a88e13a20e9..5bf2ea00678c 100644
--- a/include/media/ir-common.h
+++ b/include/media/ir-common.h
@@ -158,6 +158,7 @@ extern IR_KEYTAB_TYPE ir_codes_encore_enltv_fm53[IR_KEYTAB_SIZE];
 extern IR_KEYTAB_TYPE ir_codes_real_audio_220_32_keys[IR_KEYTAB_SIZE];
 extern IR_KEYTAB_TYPE ir_codes_msi_tvanywhere_plus[IR_KEYTAB_SIZE];
 extern IR_KEYTAB_TYPE ir_codes_ati_tv_wonder_hd_600[IR_KEYTAB_SIZE];
+extern IR_KEYTAB_TYPE ir_codes_kworld_plus_tv_analog[IR_KEYTAB_SIZE];
 #endif
 
 /*
-- 
cgit v1.2.3


From a2c8c68cca3dbb0c87f5034ab8ea29350174ec4a Mon Sep 17 00:00:00 2001
From: Guennadi Liakhovetski <lyakh@axis700.grange>
Date: Mon, 1 Dec 2008 09:44:53 -0300
Subject: V4L/DVB (9785): soc-camera: merge .try_bus_param() into
 .try_fmt_cap()

.try_bus_param() method from struct soc_camera_host_ops is only called at one
location immediately before .try_fmt_cap(), there is no value in keeping these
two methods separate, merge them.

Signed-off-by: Guennadi Liakhovetski <lg@denx.de>
Signed-off-by: Mauro Carvalho Chehab <mchehab@redhat.com>
---
 drivers/media/video/pxa_camera.c           | 6 +++++-
 drivers/media/video/sh_mobile_ceu_camera.c | 6 +++++-
 drivers/media/video/soc_camera.c           | 5 -----
 include/media/soc_camera.h                 | 1 -
 4 files changed, 10 insertions(+), 8 deletions(-)

(limited to 'include')

diff --git a/drivers/media/video/pxa_camera.c b/drivers/media/video/pxa_camera.c
index eb6be5802928..2a811f8584b5 100644
--- a/drivers/media/video/pxa_camera.c
+++ b/drivers/media/video/pxa_camera.c
@@ -913,6 +913,11 @@ static int pxa_camera_set_fmt_cap(struct soc_camera_device *icd,
 static int pxa_camera_try_fmt_cap(struct soc_camera_device *icd,
 				  struct v4l2_format *f)
 {
+	int ret = pxa_camera_try_bus_param(icd, f->fmt.pix.pixelformat);
+
+	if (ret < 0)
+		return ret;
+
 	/* limit to pxa hardware capabilities */
 	if (f->fmt.pix.height < 32)
 		f->fmt.pix.height = 32;
@@ -1039,7 +1044,6 @@ static struct soc_camera_host_ops pxa_soc_camera_host_ops = {
 	.reqbufs	= pxa_camera_reqbufs,
 	.poll		= pxa_camera_poll,
 	.querycap	= pxa_camera_querycap,
-	.try_bus_param	= pxa_camera_try_bus_param,
 	.set_bus_param	= pxa_camera_set_bus_param,
 };
 
diff --git a/drivers/media/video/sh_mobile_ceu_camera.c b/drivers/media/video/sh_mobile_ceu_camera.c
index 63bc0a6e68b0..87d0f3075811 100644
--- a/drivers/media/video/sh_mobile_ceu_camera.c
+++ b/drivers/media/video/sh_mobile_ceu_camera.c
@@ -459,6 +459,11 @@ static int sh_mobile_ceu_set_fmt_cap(struct soc_camera_device *icd,
 static int sh_mobile_ceu_try_fmt_cap(struct soc_camera_device *icd,
 				     struct v4l2_format *f)
 {
+	int ret = sh_mobile_ceu_try_bus_param(icd, f->fmt.pix.pixelformat);
+
+	if (ret < 0)
+		return ret;
+
 	/* FIXME: calculate using depth and bus width */
 
 	if (f->fmt.pix.height < 4)
@@ -546,7 +551,6 @@ static struct soc_camera_host_ops sh_mobile_ceu_host_ops = {
 	.reqbufs	= sh_mobile_ceu_reqbufs,
 	.poll		= sh_mobile_ceu_poll,
 	.querycap	= sh_mobile_ceu_querycap,
-	.try_bus_param	= sh_mobile_ceu_try_bus_param,
 	.set_bus_param	= sh_mobile_ceu_set_bus_param,
 	.init_videobuf	= sh_mobile_ceu_init_videobuf,
 };
diff --git a/drivers/media/video/soc_camera.c b/drivers/media/video/soc_camera.c
index 28cf5c94bd61..2ebdf4d667e3 100644
--- a/drivers/media/video/soc_camera.c
+++ b/drivers/media/video/soc_camera.c
@@ -77,11 +77,6 @@ static int soc_camera_try_fmt_vid_cap(struct file *file, void *priv,
 		return -EINVAL;
 	}
 
-	/* test physical bus parameters */
-	ret = ici->ops->try_bus_param(icd, f->fmt.pix.pixelformat);
-	if (ret)
-		return ret;
-
 	/* limit format to hardware capabilities */
 	ret = ici->ops->try_fmt_cap(icd, f);
 
diff --git a/include/media/soc_camera.h b/include/media/soc_camera.h
index 9231e2d908f2..ee0e6b4bed33 100644
--- a/include/media/soc_camera.h
+++ b/include/media/soc_camera.h
@@ -73,7 +73,6 @@ struct soc_camera_host_ops {
 			      struct soc_camera_device *);
 	int (*reqbufs)(struct soc_camera_file *, struct v4l2_requestbuffers *);
 	int (*querycap)(struct soc_camera_host *, struct v4l2_capability *);
-	int (*try_bus_param)(struct soc_camera_device *, __u32);
 	int (*set_bus_param)(struct soc_camera_device *, __u32);
 	unsigned int (*poll)(struct file *, poll_table *);
 };
-- 
cgit v1.2.3


From 25c4d74ea6f07f2aaa3df537619680ba967043f5 Mon Sep 17 00:00:00 2001
From: Guennadi Liakhovetski <lyakh@axis700.grange>
Date: Mon, 1 Dec 2008 09:44:59 -0300
Subject: V4L/DVB (9787): soc-camera: let camera host drivers decide upon pixel
 format

Pixel format requested by the user is not necessarily the same, as what
a sensor driver provides. There are situations, when a camera host driver
provides the required format, but requires a different format from the
sensor. Further, the list of formats, supported by sensors is pretty static
and can be pretty good described with a constant list of structures. Whereas
decisions, made by camera host drivers to support requested formats can be
quite complex, therefore it is better to let the host driver do the work.

Signed-off-by: Guennadi Liakhovetski <g.liakhovetski@gmx.de>
Signed-off-by: Mauro Carvalho Chehab <mchehab@redhat.com>
---
 drivers/media/video/pxa_camera.c           | 32 ++++++++++++++++-
 drivers/media/video/sh_mobile_ceu_camera.c | 32 ++++++++++++++++-
 drivers/media/video/soc_camera.c           | 58 ++++++++++++------------------
 include/media/soc_camera.h                 |  3 ++
 4 files changed, 87 insertions(+), 38 deletions(-)

(limited to 'include')

diff --git a/drivers/media/video/pxa_camera.c b/drivers/media/video/pxa_camera.c
index 2a811f8584b5..a375872b1342 100644
--- a/drivers/media/video/pxa_camera.c
+++ b/drivers/media/video/pxa_camera.c
@@ -907,17 +907,43 @@ static int pxa_camera_try_bus_param(struct soc_camera_device *icd, __u32 pixfmt)
 static int pxa_camera_set_fmt_cap(struct soc_camera_device *icd,
 				  __u32 pixfmt, struct v4l2_rect *rect)
 {
-	return icd->ops->set_fmt_cap(icd, pixfmt, rect);
+	const struct soc_camera_data_format *cam_fmt;
+	int ret;
+
+	/*
+	 * TODO: find a suitable supported by the SoC output format, check
+	 * whether the sensor supports one of acceptable input formats.
+	 */
+	if (pixfmt) {
+		cam_fmt = soc_camera_format_by_fourcc(icd, pixfmt);
+		if (!cam_fmt)
+			return -EINVAL;
+	}
+
+	ret = icd->ops->set_fmt_cap(icd, pixfmt, rect);
+	if (pixfmt && !ret)
+		icd->current_fmt = cam_fmt;
+
+	return ret;
 }
 
 static int pxa_camera_try_fmt_cap(struct soc_camera_device *icd,
 				  struct v4l2_format *f)
 {
+	const struct soc_camera_data_format *cam_fmt;
 	int ret = pxa_camera_try_bus_param(icd, f->fmt.pix.pixelformat);
 
 	if (ret < 0)
 		return ret;
 
+	/*
+	 * TODO: find a suitable supported by the SoC output format, check
+	 * whether the sensor supports one of acceptable input formats.
+	 */
+	cam_fmt = soc_camera_format_by_fourcc(icd, f->fmt.pix.pixelformat);
+	if (!cam_fmt)
+		return -EINVAL;
+
 	/* limit to pxa hardware capabilities */
 	if (f->fmt.pix.height < 32)
 		f->fmt.pix.height = 32;
@@ -929,6 +955,10 @@ static int pxa_camera_try_fmt_cap(struct soc_camera_device *icd,
 		f->fmt.pix.width = 2048;
 	f->fmt.pix.width &= ~0x01;
 
+	f->fmt.pix.bytesperline = f->fmt.pix.width *
+		DIV_ROUND_UP(cam_fmt->depth, 8);
+	f->fmt.pix.sizeimage = f->fmt.pix.height * f->fmt.pix.bytesperline;
+
 	/* limit to sensor capabilities */
 	return icd->ops->try_fmt_cap(icd, f);
 }
diff --git a/drivers/media/video/sh_mobile_ceu_camera.c b/drivers/media/video/sh_mobile_ceu_camera.c
index 87d0f3075811..02f846d1908b 100644
--- a/drivers/media/video/sh_mobile_ceu_camera.c
+++ b/drivers/media/video/sh_mobile_ceu_camera.c
@@ -453,17 +453,43 @@ static int sh_mobile_ceu_try_bus_param(struct soc_camera_device *icd,
 static int sh_mobile_ceu_set_fmt_cap(struct soc_camera_device *icd,
 				     __u32 pixfmt, struct v4l2_rect *rect)
 {
-	return icd->ops->set_fmt_cap(icd, pixfmt, rect);
+	const struct soc_camera_data_format *cam_fmt;
+	int ret;
+
+	/*
+	 * TODO: find a suitable supported by the SoC output format, check
+	 * whether the sensor supports one of acceptable input formats.
+	 */
+	if (pixfmt) {
+		cam_fmt = soc_camera_format_by_fourcc(icd, pixfmt);
+		if (!cam_fmt)
+			return -EINVAL;
+	}
+
+	ret = icd->ops->set_fmt_cap(icd, pixfmt, rect);
+	if (pixfmt && !ret)
+		icd->current_fmt = cam_fmt;
+
+	return ret;
 }
 
 static int sh_mobile_ceu_try_fmt_cap(struct soc_camera_device *icd,
 				     struct v4l2_format *f)
 {
+	const struct soc_camera_data_format *cam_fmt;
 	int ret = sh_mobile_ceu_try_bus_param(icd, f->fmt.pix.pixelformat);
 
 	if (ret < 0)
 		return ret;
 
+	/*
+	 * TODO: find a suitable supported by the SoC output format, check
+	 * whether the sensor supports one of acceptable input formats.
+	 */
+	cam_fmt = soc_camera_format_by_fourcc(icd, f->fmt.pix.pixelformat);
+	if (!cam_fmt)
+		return -EINVAL;
+
 	/* FIXME: calculate using depth and bus width */
 
 	if (f->fmt.pix.height < 4)
@@ -477,6 +503,10 @@ static int sh_mobile_ceu_try_fmt_cap(struct soc_camera_device *icd,
 	f->fmt.pix.width &= ~0x01;
 	f->fmt.pix.height &= ~0x03;
 
+	f->fmt.pix.bytesperline = f->fmt.pix.width *
+		DIV_ROUND_UP(cam_fmt->depth, 8);
+	f->fmt.pix.sizeimage = f->fmt.pix.height * f->fmt.pix.bytesperline;
+
 	/* limit to sensor capabilities */
 	return icd->ops->try_fmt_cap(icd, f);
 }
diff --git a/drivers/media/video/soc_camera.c b/drivers/media/video/soc_camera.c
index 7217de21e76b..01c33841d1c6 100644
--- a/drivers/media/video/soc_camera.c
+++ b/drivers/media/video/soc_camera.c
@@ -35,7 +35,7 @@ static LIST_HEAD(devices);
 static DEFINE_MUTEX(list_lock);
 static DEFINE_MUTEX(video_lock);
 
-const static struct soc_camera_data_format *format_by_fourcc(
+const struct soc_camera_data_format *soc_camera_format_by_fourcc(
 	struct soc_camera_device *icd, unsigned int fourcc)
 {
 	unsigned int i;
@@ -45,6 +45,7 @@ const static struct soc_camera_data_format *format_by_fourcc(
 			return icd->formats + i;
 	return NULL;
 }
+EXPORT_SYMBOL(soc_camera_format_by_fourcc);
 
 static int soc_camera_try_fmt_vid_cap(struct file *file, void *priv,
 				      struct v4l2_format *f)
@@ -54,25 +55,19 @@ static int soc_camera_try_fmt_vid_cap(struct file *file, void *priv,
 	struct soc_camera_host *ici =
 		to_soc_camera_host(icd->dev.parent);
 	enum v4l2_field field;
-	const struct soc_camera_data_format *fmt;
 	int ret;
 
 	WARN_ON(priv != file->private_data);
 
-	fmt = format_by_fourcc(icd, f->fmt.pix.pixelformat);
-	if (!fmt) {
-		dev_dbg(&icd->dev, "invalid format 0x%08x\n",
-			f->fmt.pix.pixelformat);
-		return -EINVAL;
-	}
-
-	dev_dbg(&icd->dev, "fmt: 0x%08x\n", fmt->fourcc);
-
+	/*
+	 * TODO: this might also have to migrate to host-drivers, if anyone
+	 * wishes to support other fields
+	 */
 	field = f->fmt.pix.field;
 
 	if (field == V4L2_FIELD_ANY) {
-		field = V4L2_FIELD_NONE;
-	} else if (V4L2_FIELD_NONE != field) {
+		f->fmt.pix.field = V4L2_FIELD_NONE;
+	} else if (field != V4L2_FIELD_NONE) {
 		dev_err(&icd->dev, "Field type invalid.\n");
 		return -EINVAL;
 	}
@@ -80,13 +75,6 @@ static int soc_camera_try_fmt_vid_cap(struct file *file, void *priv,
 	/* limit format to hardware capabilities */
 	ret = ici->ops->try_fmt_cap(icd, f);
 
-	/* calculate missing fields */
-	f->fmt.pix.field = field;
-	f->fmt.pix.bytesperline =
-		(f->fmt.pix.width * fmt->depth) >> 3;
-	f->fmt.pix.sizeimage =
-		f->fmt.pix.height * f->fmt.pix.bytesperline;
-
 	return ret;
 }
 
@@ -325,18 +313,10 @@ static int soc_camera_s_fmt_vid_cap(struct file *file, void *priv,
 		to_soc_camera_host(icd->dev.parent);
 	int ret;
 	struct v4l2_rect rect;
-	const static struct soc_camera_data_format *data_fmt;
 
 	WARN_ON(priv != file->private_data);
 
-	data_fmt = format_by_fourcc(icd, f->fmt.pix.pixelformat);
-	if (!data_fmt)
-		return -EINVAL;
-
-	/* buswidth may be further adjusted by the ici */
-	icd->buswidth = data_fmt->depth;
-
-	ret = soc_camera_try_fmt_vid_cap(file, icf, f);
+	ret = soc_camera_try_fmt_vid_cap(file, priv, f);
 	if (ret < 0)
 		return ret;
 
@@ -345,14 +325,21 @@ static int soc_camera_s_fmt_vid_cap(struct file *file, void *priv,
 	rect.width	= f->fmt.pix.width;
 	rect.height	= f->fmt.pix.height;
 	ret = ici->ops->set_fmt_cap(icd, f->fmt.pix.pixelformat, &rect);
-	if (ret < 0)
+	if (ret < 0) {
 		return ret;
+	} else if (!icd->current_fmt ||
+		   icd->current_fmt->fourcc != f->fmt.pix.pixelformat) {
+		dev_err(&ici->dev, "Host driver hasn't set up current "
+			"format correctly!\n");
+		return -EINVAL;
+	}
 
-	icd->current_fmt	= data_fmt;
+	/* buswidth may be further adjusted by the ici */
+	icd->buswidth		= icd->current_fmt->depth;
 	icd->width		= rect.width;
 	icd->height		= rect.height;
 	icf->vb_vidq.field	= f->fmt.pix.field;
-	if (V4L2_BUF_TYPE_VIDEO_CAPTURE != f->type)
+	if (f->type != V4L2_BUF_TYPE_VIDEO_CAPTURE)
 		dev_warn(&icd->dev, "Attention! Wrong buf-type %d\n",
 			 f->type);
 
@@ -394,10 +381,9 @@ static int soc_camera_g_fmt_vid_cap(struct file *file, void *priv,
 	f->fmt.pix.height	= icd->height;
 	f->fmt.pix.field	= icf->vb_vidq.field;
 	f->fmt.pix.pixelformat	= icd->current_fmt->fourcc;
-	f->fmt.pix.bytesperline	=
-		(f->fmt.pix.width * icd->current_fmt->depth) >> 3;
-	f->fmt.pix.sizeimage	=
-		f->fmt.pix.height * f->fmt.pix.bytesperline;
+	f->fmt.pix.bytesperline	= f->fmt.pix.width *
+		DIV_ROUND_UP(icd->current_fmt->depth, 8);
+	f->fmt.pix.sizeimage	= f->fmt.pix.height * f->fmt.pix.bytesperline;
 	dev_dbg(&icd->dev, "current_fmt->fourcc: 0x%08x\n",
 		icd->current_fmt->fourcc);
 	return 0;
diff --git a/include/media/soc_camera.h b/include/media/soc_camera.h
index ee0e6b4bed33..8e8fcb75dacb 100644
--- a/include/media/soc_camera.h
+++ b/include/media/soc_camera.h
@@ -105,6 +105,9 @@ extern void soc_camera_device_unregister(struct soc_camera_device *icd);
 extern int soc_camera_video_start(struct soc_camera_device *icd);
 extern void soc_camera_video_stop(struct soc_camera_device *icd);
 
+extern const struct soc_camera_data_format *soc_camera_format_by_fourcc(
+	struct soc_camera_device *icd, unsigned int fourcc);
+
 struct soc_camera_data_format {
 	const char *name;
 	unsigned int depth;
-- 
cgit v1.2.3


From d8fac217c58f0101a351b9c8c80f1665bd9efef9 Mon Sep 17 00:00:00 2001
From: Guennadi Liakhovetski <lyakh@axis700.grange>
Date: Mon, 1 Dec 2008 09:45:21 -0300
Subject: V4L/DVB (9788): soc-camera: simplify naming

We anyway don't follow the s_fmt_vid_cap / g_fmt_vid_cap / try_fmt_vid_cap
naming, and soc-camera is so far only about video capture, let's simplify
operation names a bit further. set_fmt_cap / try_fmt_cap wasn't a very good
choice too.

Signed-off-by: Guennadi Liakhovetski <g.liakhovetski@gmx.de>
Signed-off-by: Mauro Carvalho Chehab <mchehab@redhat.com>
---
 drivers/media/video/mt9m001.c              | 14 +++++++-------
 drivers/media/video/mt9m111.c              | 12 ++++++------
 drivers/media/video/mt9v022.c              | 14 +++++++-------
 drivers/media/video/ov772x.c               | 14 +++++++-------
 drivers/media/video/pxa_camera.c           | 16 ++++++++--------
 drivers/media/video/sh_mobile_ceu_camera.c | 16 ++++++++--------
 drivers/media/video/soc_camera.c           |  6 +++---
 drivers/media/video/soc_camera_platform.c  | 12 ++++++------
 include/media/soc_camera.h                 | 10 ++++------
 9 files changed, 56 insertions(+), 58 deletions(-)

(limited to 'include')

diff --git a/drivers/media/video/mt9m001.c b/drivers/media/video/mt9m001.c
index 0c524376b67e..0bcfef7b1c17 100644
--- a/drivers/media/video/mt9m001.c
+++ b/drivers/media/video/mt9m001.c
@@ -285,8 +285,8 @@ static unsigned long mt9m001_query_bus_param(struct soc_camera_device *icd)
 		width_flag;
 }
 
-static int mt9m001_set_fmt_cap(struct soc_camera_device *icd,
-		__u32 pixfmt, struct v4l2_rect *rect)
+static int mt9m001_set_fmt(struct soc_camera_device *icd,
+			   __u32 pixfmt, struct v4l2_rect *rect)
 {
 	struct mt9m001 *mt9m001 = container_of(icd, struct mt9m001, icd);
 	int ret;
@@ -298,7 +298,7 @@ static int mt9m001_set_fmt_cap(struct soc_camera_device *icd,
 		ret = reg_write(icd, MT9M001_VERTICAL_BLANKING, vblank);
 
 	/* The caller provides a supported format, as verified per
-	 * call to icd->try_fmt_cap() */
+	 * call to icd->try_fmt() */
 	if (!ret)
 		ret = reg_write(icd, MT9M001_COLUMN_START, rect->left);
 	if (!ret)
@@ -325,8 +325,8 @@ static int mt9m001_set_fmt_cap(struct soc_camera_device *icd,
 	return ret;
 }
 
-static int mt9m001_try_fmt_cap(struct soc_camera_device *icd,
-			       struct v4l2_format *f)
+static int mt9m001_try_fmt(struct soc_camera_device *icd,
+			   struct v4l2_format *f)
 {
 	if (f->fmt.pix.height < 32 + icd->y_skip_top)
 		f->fmt.pix.height = 32 + icd->y_skip_top;
@@ -447,8 +447,8 @@ static struct soc_camera_ops mt9m001_ops = {
 	.release		= mt9m001_release,
 	.start_capture		= mt9m001_start_capture,
 	.stop_capture		= mt9m001_stop_capture,
-	.set_fmt_cap		= mt9m001_set_fmt_cap,
-	.try_fmt_cap		= mt9m001_try_fmt_cap,
+	.set_fmt		= mt9m001_set_fmt,
+	.try_fmt		= mt9m001_try_fmt,
 	.set_bus_param		= mt9m001_set_bus_param,
 	.query_bus_param	= mt9m001_query_bus_param,
 	.controls		= mt9m001_controls,
diff --git a/drivers/media/video/mt9m111.c b/drivers/media/video/mt9m111.c
index da0b2d553fd0..63c52c0b3e72 100644
--- a/drivers/media/video/mt9m111.c
+++ b/drivers/media/video/mt9m111.c
@@ -452,8 +452,8 @@ static int mt9m111_set_pixfmt(struct soc_camera_device *icd, u32 pixfmt)
 	return ret;
 }
 
-static int mt9m111_set_fmt_cap(struct soc_camera_device *icd,
-			       __u32 pixfmt, struct v4l2_rect *rect)
+static int mt9m111_set_fmt(struct soc_camera_device *icd,
+			   __u32 pixfmt, struct v4l2_rect *rect)
 {
 	struct mt9m111 *mt9m111 = container_of(icd, struct mt9m111, icd);
 	int ret;
@@ -473,8 +473,8 @@ static int mt9m111_set_fmt_cap(struct soc_camera_device *icd,
 	return ret;
 }
 
-static int mt9m111_try_fmt_cap(struct soc_camera_device *icd,
-			       struct v4l2_format *f)
+static int mt9m111_try_fmt(struct soc_camera_device *icd,
+			   struct v4l2_format *f)
 {
 	if (f->fmt.pix.height > MT9M111_MAX_HEIGHT)
 		f->fmt.pix.height = MT9M111_MAX_HEIGHT;
@@ -597,8 +597,8 @@ static struct soc_camera_ops mt9m111_ops = {
 	.release		= mt9m111_release,
 	.start_capture		= mt9m111_start_capture,
 	.stop_capture		= mt9m111_stop_capture,
-	.set_fmt_cap		= mt9m111_set_fmt_cap,
-	.try_fmt_cap		= mt9m111_try_fmt_cap,
+	.set_fmt		= mt9m111_set_fmt,
+	.try_fmt		= mt9m111_try_fmt,
 	.query_bus_param	= mt9m111_query_bus_param,
 	.set_bus_param		= mt9m111_set_bus_param,
 	.controls		= mt9m111_controls,
diff --git a/drivers/media/video/mt9v022.c b/drivers/media/video/mt9v022.c
index 2584201059d8..3a39f0288599 100644
--- a/drivers/media/video/mt9v022.c
+++ b/drivers/media/video/mt9v022.c
@@ -337,14 +337,14 @@ static unsigned long mt9v022_query_bus_param(struct soc_camera_device *icd)
 		width_flag;
 }
 
-static int mt9v022_set_fmt_cap(struct soc_camera_device *icd,
-		__u32 pixfmt, struct v4l2_rect *rect)
+static int mt9v022_set_fmt(struct soc_camera_device *icd,
+			   __u32 pixfmt, struct v4l2_rect *rect)
 {
 	struct mt9v022 *mt9v022 = container_of(icd, struct mt9v022, icd);
 	int ret;
 
 	/* The caller provides a supported format, as verified per call to
-	 * icd->try_fmt_cap(), datawidth is from our supported format list */
+	 * icd->try_fmt(), datawidth is from our supported format list */
 	switch (pixfmt) {
 	case V4L2_PIX_FMT_GREY:
 	case V4L2_PIX_FMT_Y16:
@@ -400,8 +400,8 @@ static int mt9v022_set_fmt_cap(struct soc_camera_device *icd,
 	return 0;
 }
 
-static int mt9v022_try_fmt_cap(struct soc_camera_device *icd,
-			       struct v4l2_format *f)
+static int mt9v022_try_fmt(struct soc_camera_device *icd,
+			   struct v4l2_format *f)
 {
 	if (f->fmt.pix.height < 32 + icd->y_skip_top)
 		f->fmt.pix.height = 32 + icd->y_skip_top;
@@ -538,8 +538,8 @@ static struct soc_camera_ops mt9v022_ops = {
 	.release		= mt9v022_release,
 	.start_capture		= mt9v022_start_capture,
 	.stop_capture		= mt9v022_stop_capture,
-	.set_fmt_cap		= mt9v022_set_fmt_cap,
-	.try_fmt_cap		= mt9v022_try_fmt_cap,
+	.set_fmt		= mt9v022_set_fmt,
+	.try_fmt		= mt9v022_try_fmt,
 	.set_bus_param		= mt9v022_set_bus_param,
 	.query_bus_param	= mt9v022_query_bus_param,
 	.controls		= mt9v022_controls,
diff --git a/drivers/media/video/ov772x.c b/drivers/media/video/ov772x.c
index 96964b79afac..76f296642db6 100644
--- a/drivers/media/video/ov772x.c
+++ b/drivers/media/video/ov772x.c
@@ -755,9 +755,9 @@ static int ov772x_set_register(struct soc_camera_device *icd,
 }
 #endif
 
-static int ov772x_set_fmt_cap(struct soc_camera_device *icd,
-			      __u32                     pixfmt,
-			      struct v4l2_rect         *rect)
+static int ov772x_set_fmt(struct soc_camera_device *icd,
+			  __u32                     pixfmt,
+			  struct v4l2_rect         *rect)
 {
 	struct ov772x_priv *priv = container_of(icd, struct ov772x_priv, icd);
 	int ret = -EINVAL;
@@ -778,8 +778,8 @@ static int ov772x_set_fmt_cap(struct soc_camera_device *icd,
 	return ret;
 }
 
-static int ov772x_try_fmt_cap(struct soc_camera_device *icd,
-			      struct v4l2_format       *f)
+static int ov772x_try_fmt(struct soc_camera_device *icd,
+			  struct v4l2_format       *f)
 {
 	struct v4l2_pix_format *pix  = &f->fmt.pix;
 	struct ov772x_priv     *priv;
@@ -868,8 +868,8 @@ static struct soc_camera_ops ov772x_ops = {
 	.release		= ov772x_release,
 	.start_capture		= ov772x_start_capture,
 	.stop_capture		= ov772x_stop_capture,
-	.set_fmt_cap		= ov772x_set_fmt_cap,
-	.try_fmt_cap		= ov772x_try_fmt_cap,
+	.set_fmt		= ov772x_set_fmt,
+	.try_fmt		= ov772x_try_fmt,
 	.set_bus_param		= ov772x_set_bus_param,
 	.query_bus_param	= ov772x_query_bus_param,
 	.get_chip_id		= ov772x_get_chip_id,
diff --git a/drivers/media/video/pxa_camera.c b/drivers/media/video/pxa_camera.c
index a375872b1342..665eef236f5b 100644
--- a/drivers/media/video/pxa_camera.c
+++ b/drivers/media/video/pxa_camera.c
@@ -904,8 +904,8 @@ static int pxa_camera_try_bus_param(struct soc_camera_device *icd, __u32 pixfmt)
 	return soc_camera_bus_param_compatible(camera_flags, bus_flags) ? 0 : -EINVAL;
 }
 
-static int pxa_camera_set_fmt_cap(struct soc_camera_device *icd,
-				  __u32 pixfmt, struct v4l2_rect *rect)
+static int pxa_camera_set_fmt(struct soc_camera_device *icd,
+			      __u32 pixfmt, struct v4l2_rect *rect)
 {
 	const struct soc_camera_data_format *cam_fmt;
 	int ret;
@@ -920,15 +920,15 @@ static int pxa_camera_set_fmt_cap(struct soc_camera_device *icd,
 			return -EINVAL;
 	}
 
-	ret = icd->ops->set_fmt_cap(icd, pixfmt, rect);
+	ret = icd->ops->set_fmt(icd, pixfmt, rect);
 	if (pixfmt && !ret)
 		icd->current_fmt = cam_fmt;
 
 	return ret;
 }
 
-static int pxa_camera_try_fmt_cap(struct soc_camera_device *icd,
-				  struct v4l2_format *f)
+static int pxa_camera_try_fmt(struct soc_camera_device *icd,
+			      struct v4l2_format *f)
 {
 	const struct soc_camera_data_format *cam_fmt;
 	int ret = pxa_camera_try_bus_param(icd, f->fmt.pix.pixelformat);
@@ -960,7 +960,7 @@ static int pxa_camera_try_fmt_cap(struct soc_camera_device *icd,
 	f->fmt.pix.sizeimage = f->fmt.pix.height * f->fmt.pix.bytesperline;
 
 	/* limit to sensor capabilities */
-	return icd->ops->try_fmt_cap(icd, f);
+	return icd->ops->try_fmt(icd, f);
 }
 
 static int pxa_camera_reqbufs(struct soc_camera_file *icf,
@@ -1068,8 +1068,8 @@ static struct soc_camera_host_ops pxa_soc_camera_host_ops = {
 	.remove		= pxa_camera_remove_device,
 	.suspend	= pxa_camera_suspend,
 	.resume		= pxa_camera_resume,
-	.set_fmt_cap	= pxa_camera_set_fmt_cap,
-	.try_fmt_cap	= pxa_camera_try_fmt_cap,
+	.set_fmt	= pxa_camera_set_fmt,
+	.try_fmt	= pxa_camera_try_fmt,
 	.init_videobuf	= pxa_camera_init_videobuf,
 	.reqbufs	= pxa_camera_reqbufs,
 	.poll		= pxa_camera_poll,
diff --git a/drivers/media/video/sh_mobile_ceu_camera.c b/drivers/media/video/sh_mobile_ceu_camera.c
index 02f846d1908b..d284d5d1dd01 100644
--- a/drivers/media/video/sh_mobile_ceu_camera.c
+++ b/drivers/media/video/sh_mobile_ceu_camera.c
@@ -450,8 +450,8 @@ static int sh_mobile_ceu_try_bus_param(struct soc_camera_device *icd,
 	return 0;
 }
 
-static int sh_mobile_ceu_set_fmt_cap(struct soc_camera_device *icd,
-				     __u32 pixfmt, struct v4l2_rect *rect)
+static int sh_mobile_ceu_set_fmt(struct soc_camera_device *icd,
+				 __u32 pixfmt, struct v4l2_rect *rect)
 {
 	const struct soc_camera_data_format *cam_fmt;
 	int ret;
@@ -466,15 +466,15 @@ static int sh_mobile_ceu_set_fmt_cap(struct soc_camera_device *icd,
 			return -EINVAL;
 	}
 
-	ret = icd->ops->set_fmt_cap(icd, pixfmt, rect);
+	ret = icd->ops->set_fmt(icd, pixfmt, rect);
 	if (pixfmt && !ret)
 		icd->current_fmt = cam_fmt;
 
 	return ret;
 }
 
-static int sh_mobile_ceu_try_fmt_cap(struct soc_camera_device *icd,
-				     struct v4l2_format *f)
+static int sh_mobile_ceu_try_fmt(struct soc_camera_device *icd,
+				 struct v4l2_format *f)
 {
 	const struct soc_camera_data_format *cam_fmt;
 	int ret = sh_mobile_ceu_try_bus_param(icd, f->fmt.pix.pixelformat);
@@ -508,7 +508,7 @@ static int sh_mobile_ceu_try_fmt_cap(struct soc_camera_device *icd,
 	f->fmt.pix.sizeimage = f->fmt.pix.height * f->fmt.pix.bytesperline;
 
 	/* limit to sensor capabilities */
-	return icd->ops->try_fmt_cap(icd, f);
+	return icd->ops->try_fmt(icd, f);
 }
 
 static int sh_mobile_ceu_reqbufs(struct soc_camera_file *icf,
@@ -576,8 +576,8 @@ static struct soc_camera_host_ops sh_mobile_ceu_host_ops = {
 	.owner		= THIS_MODULE,
 	.add		= sh_mobile_ceu_add_device,
 	.remove		= sh_mobile_ceu_remove_device,
-	.set_fmt_cap	= sh_mobile_ceu_set_fmt_cap,
-	.try_fmt_cap	= sh_mobile_ceu_try_fmt_cap,
+	.set_fmt	= sh_mobile_ceu_set_fmt,
+	.try_fmt	= sh_mobile_ceu_try_fmt,
 	.reqbufs	= sh_mobile_ceu_reqbufs,
 	.poll		= sh_mobile_ceu_poll,
 	.querycap	= sh_mobile_ceu_querycap,
diff --git a/drivers/media/video/soc_camera.c b/drivers/media/video/soc_camera.c
index 01c33841d1c6..9db66a4fd1a3 100644
--- a/drivers/media/video/soc_camera.c
+++ b/drivers/media/video/soc_camera.c
@@ -73,7 +73,7 @@ static int soc_camera_try_fmt_vid_cap(struct file *file, void *priv,
 	}
 
 	/* limit format to hardware capabilities */
-	ret = ici->ops->try_fmt_cap(icd, f);
+	ret = ici->ops->try_fmt(icd, f);
 
 	return ret;
 }
@@ -324,7 +324,7 @@ static int soc_camera_s_fmt_vid_cap(struct file *file, void *priv,
 	rect.top	= icd->y_current;
 	rect.width	= f->fmt.pix.width;
 	rect.height	= f->fmt.pix.height;
-	ret = ici->ops->set_fmt_cap(icd, f->fmt.pix.pixelformat, &rect);
+	ret = ici->ops->set_fmt(icd, f->fmt.pix.pixelformat, &rect);
 	if (ret < 0) {
 		return ret;
 	} else if (!icd->current_fmt ||
@@ -553,7 +553,7 @@ static int soc_camera_s_crop(struct file *file, void *fh,
 	if (a->type != V4L2_BUF_TYPE_VIDEO_CAPTURE)
 		return -EINVAL;
 
-	ret = ici->ops->set_fmt_cap(icd, 0, &a->c);
+	ret = ici->ops->set_fmt(icd, 0, &a->c);
 	if (!ret) {
 		icd->width	= a->c.width;
 		icd->height	= a->c.height;
diff --git a/drivers/media/video/soc_camera_platform.c b/drivers/media/video/soc_camera_platform.c
index bb7a9d480e8f..c23871e4c1e6 100644
--- a/drivers/media/video/soc_camera_platform.c
+++ b/drivers/media/video/soc_camera_platform.c
@@ -79,14 +79,14 @@ soc_camera_platform_query_bus_param(struct soc_camera_device *icd)
 	return p->bus_param;
 }
 
-static int soc_camera_platform_set_fmt_cap(struct soc_camera_device *icd,
-					   __u32 pixfmt, struct v4l2_rect *rect)
+static int soc_camera_platform_set_fmt(struct soc_camera_device *icd,
+				       __u32 pixfmt, struct v4l2_rect *rect)
 {
 	return 0;
 }
 
-static int soc_camera_platform_try_fmt_cap(struct soc_camera_device *icd,
-					   struct v4l2_format *f)
+static int soc_camera_platform_try_fmt(struct soc_camera_device *icd,
+				       struct v4l2_format *f)
 {
 	struct soc_camera_platform_info *p = soc_camera_platform_get_info(icd);
 
@@ -124,8 +124,8 @@ static struct soc_camera_ops soc_camera_platform_ops = {
 	.release		= soc_camera_platform_release,
 	.start_capture		= soc_camera_platform_start_capture,
 	.stop_capture		= soc_camera_platform_stop_capture,
-	.set_fmt_cap		= soc_camera_platform_set_fmt_cap,
-	.try_fmt_cap		= soc_camera_platform_try_fmt_cap,
+	.set_fmt		= soc_camera_platform_set_fmt,
+	.try_fmt		= soc_camera_platform_try_fmt,
 	.set_bus_param		= soc_camera_platform_set_bus_param,
 	.query_bus_param	= soc_camera_platform_query_bus_param,
 };
diff --git a/include/media/soc_camera.h b/include/media/soc_camera.h
index 8e8fcb75dacb..b14f6ddc9e18 100644
--- a/include/media/soc_camera.h
+++ b/include/media/soc_camera.h
@@ -66,9 +66,8 @@ struct soc_camera_host_ops {
 	void (*remove)(struct soc_camera_device *);
 	int (*suspend)(struct soc_camera_device *, pm_message_t state);
 	int (*resume)(struct soc_camera_device *);
-	int (*set_fmt_cap)(struct soc_camera_device *, __u32,
-			   struct v4l2_rect *);
-	int (*try_fmt_cap)(struct soc_camera_device *, struct v4l2_format *);
+	int (*set_fmt)(struct soc_camera_device *, __u32, struct v4l2_rect *);
+	int (*try_fmt)(struct soc_camera_device *, struct v4l2_format *);
 	void (*init_videobuf)(struct videobuf_queue *,
 			      struct soc_camera_device *);
 	int (*reqbufs)(struct soc_camera_file *, struct v4l2_requestbuffers *);
@@ -125,9 +124,8 @@ struct soc_camera_ops {
 	int (*release)(struct soc_camera_device *);
 	int (*start_capture)(struct soc_camera_device *);
 	int (*stop_capture)(struct soc_camera_device *);
-	int (*set_fmt_cap)(struct soc_camera_device *, __u32,
-			   struct v4l2_rect *);
-	int (*try_fmt_cap)(struct soc_camera_device *, struct v4l2_format *);
+	int (*set_fmt)(struct soc_camera_device *, __u32, struct v4l2_rect *);
+	int (*try_fmt)(struct soc_camera_device *, struct v4l2_format *);
 	unsigned long (*query_bus_param)(struct soc_camera_device *);
 	int (*set_bus_param)(struct soc_camera_device *, unsigned long);
 	int (*get_chip_id)(struct soc_camera_device *,
-- 
cgit v1.2.3


From d2e3dce083bc96b3bfb71603eaa1726181f7184b Mon Sep 17 00:00:00 2001
From: Guennadi Liakhovetski <lyakh@axis700.grange>
Date: Mon, 1 Dec 2008 09:45:24 -0300
Subject: V4L/DVB (9789): soc-camera: add a per-camera device host private data
 pointer

This pointer will be used by pxa_camera.c to point to its pixel format
data.

Signed-off-by: Guennadi Liakhovetski <g.liakhovetski@gmx.de>
Signed-off-by: Mauro Carvalho Chehab <mchehab@redhat.com>
---
 include/media/soc_camera.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/media/soc_camera.h b/include/media/soc_camera.h
index b14f6ddc9e18..dddaf45c9583 100644
--- a/include/media/soc_camera.h
+++ b/include/media/soc_camera.h
@@ -42,6 +42,7 @@ struct soc_camera_device {
 	const struct soc_camera_data_format *formats;
 	int num_formats;
 	struct module *owner;
+	void *host_priv;		/* per-device host private data */
 	/* soc_camera.c private count. Only accessed with video_lock held */
 	int use_count;
 };
-- 
cgit v1.2.3


From c2786ad27104c558b92343e8816e18654aae1759 Mon Sep 17 00:00:00 2001
From: Guennadi Liakhovetski <g.liakhovetski@gmx.de>
Date: Mon, 1 Dec 2008 09:45:27 -0300
Subject: V4L/DVB (9790): soc-camera: pixel format negotiation - core support

Allocate and fill a list of formats, supported by this specific
camera-host combination. Use it for format enumeration. Take care to stay
backwards-compatible.

Camera hosts rely on sensor formats available, as well as
host specific translations. We add a structure so that hosts
can define a translation table and use it for format check
and setup.

Signed-off-by: Guennadi Liakhovetski <g.liakhovetski@gmx.de>
Signed-off-by: Robert Jarzmik <robert.jarzmik@free.fr>
Signed-off-by: Mauro Carvalho Chehab <mchehab@redhat.com>
---
 drivers/media/video/soc_camera.c | 93 ++++++++++++++++++++++++++++++++++------
 include/media/soc_camera.h       | 25 ++++++++++-
 2 files changed, 105 insertions(+), 13 deletions(-)

(limited to 'include')

diff --git a/drivers/media/video/soc_camera.c b/drivers/media/video/soc_camera.c
index 9db66a4fd1a3..5e48c2cc1a44 100644
--- a/drivers/media/video/soc_camera.c
+++ b/drivers/media/video/soc_camera.c
@@ -47,6 +47,18 @@ const struct soc_camera_data_format *soc_camera_format_by_fourcc(
 }
 EXPORT_SYMBOL(soc_camera_format_by_fourcc);
 
+const struct soc_camera_format_xlate *soc_camera_xlate_by_fourcc(
+	struct soc_camera_device *icd, unsigned int fourcc)
+{
+	unsigned int i;
+
+	for (i = 0; i < icd->num_user_formats; i++)
+		if (icd->user_formats[i].host_fmt->fourcc == fourcc)
+			return icd->user_formats + i;
+	return NULL;
+}
+EXPORT_SYMBOL(soc_camera_xlate_by_fourcc);
+
 static int soc_camera_try_fmt_vid_cap(struct file *file, void *priv,
 				      struct v4l2_format *f)
 {
@@ -161,6 +173,59 @@ static int soc_camera_dqbuf(struct file *file, void *priv,
 	return videobuf_dqbuf(&icf->vb_vidq, p, file->f_flags & O_NONBLOCK);
 }
 
+static int soc_camera_init_user_formats(struct soc_camera_device *icd)
+{
+	struct soc_camera_host *ici = to_soc_camera_host(icd->dev.parent);
+	int i, fmts = 0;
+
+	if (!ici->ops->get_formats)
+		/*
+		 * Fallback mode - the host will have to serve all
+		 * sensor-provided formats one-to-one to the user
+		 */
+		fmts = icd->num_formats;
+	else
+		/*
+		 * First pass - only count formats this host-sensor
+		 * configuration can provide
+		 */
+		for (i = 0; i < icd->num_formats; i++)
+			fmts += ici->ops->get_formats(icd, i, NULL);
+
+	if (!fmts)
+		return -ENXIO;
+
+	icd->user_formats =
+		vmalloc(fmts * sizeof(struct soc_camera_format_xlate));
+	if (!icd->user_formats)
+		return -ENOMEM;
+
+	icd->num_user_formats = fmts;
+	fmts = 0;
+
+	dev_dbg(&icd->dev, "Found %d supported formats.\n", fmts);
+
+	/* Second pass - actually fill data formats */
+	for (i = 0; i < icd->num_formats; i++)
+		if (!ici->ops->get_formats) {
+			icd->user_formats[i].host_fmt = icd->formats + i;
+			icd->user_formats[i].cam_fmt = icd->formats + i;
+			icd->user_formats[i].buswidth = icd->formats[i].depth;
+		} else {
+			fmts += ici->ops->get_formats(icd, i,
+						      &icd->user_formats[fmts]);
+		}
+
+	icd->current_fmt = icd->user_formats[0].host_fmt;
+
+	return 0;
+}
+
+static void soc_camera_free_user_formats(struct soc_camera_device *icd)
+{
+	vfree(icd->user_formats);
+}
+
 static int soc_camera_open(struct inode *inode, struct file *file)
 {
 	struct video_device *vdev;
@@ -197,10 +262,12 @@ static int soc_camera_open(struct inode *inode, struct file *file)
 
 	/* Now we really have to activate the camera */
 	if (icd->use_count == 1) {
+		ret = soc_camera_init_user_formats(icd);
+		if (ret < 0)
+			goto eiufmt;
 		ret = ici->ops->add(icd);
 		if (ret < 0) {
 			dev_err(&icd->dev, "Couldn't activate the camera: %d\n", ret);
-			icd->use_count--;
 			goto eiciadd;
 		}
 	}
@@ -216,6 +283,9 @@ static int soc_camera_open(struct inode *inode, struct file *file)
 
 	/* All errors are entered with the video_lock held */
 eiciadd:
+	soc_camera_free_user_formats(icd);
+eiufmt:
+	icd->use_count--;
 	module_put(ici->ops->owner);
 emgi:
 	module_put(icd->ops->owner);
@@ -234,8 +304,10 @@ static int soc_camera_close(struct inode *inode, struct file *file)
 
 	mutex_lock(&video_lock);
 	icd->use_count--;
-	if (!icd->use_count)
+	if (!icd->use_count) {
 		ici->ops->remove(icd);
+		soc_camera_free_user_formats(icd);
+	}
 	module_put(icd->ops->owner);
 	module_put(ici->ops->owner);
 	mutex_unlock(&video_lock);
@@ -311,6 +383,7 @@ static int soc_camera_s_fmt_vid_cap(struct file *file, void *priv,
 	struct soc_camera_device *icd = icf->icd;
 	struct soc_camera_host *ici =
 		to_soc_camera_host(icd->dev.parent);
+	__u32 pixfmt = f->fmt.pix.pixelformat;
 	int ret;
 	struct v4l2_rect rect;
 
@@ -328,14 +401,12 @@ static int soc_camera_s_fmt_vid_cap(struct file *file, void *priv,
 	if (ret < 0) {
 		return ret;
 	} else if (!icd->current_fmt ||
-		   icd->current_fmt->fourcc != f->fmt.pix.pixelformat) {
-		dev_err(&ici->dev, "Host driver hasn't set up current "
-			"format correctly!\n");
+		   icd->current_fmt->fourcc != pixfmt) {
+		dev_err(&ici->dev,
+			"Host driver hasn't set up current format correctly!\n");
 		return -EINVAL;
 	}
 
-	/* buswidth may be further adjusted by the ici */
-	icd->buswidth		= icd->current_fmt->depth;
 	icd->width		= rect.width;
 	icd->height		= rect.height;
 	icf->vb_vidq.field	= f->fmt.pix.field;
@@ -347,7 +418,7 @@ static int soc_camera_s_fmt_vid_cap(struct file *file, void *priv,
 		icd->width, icd->height);
 
 	/* set physical bus parameters */
-	return ici->ops->set_bus_param(icd, f->fmt.pix.pixelformat);
+	return ici->ops->set_bus_param(icd, pixfmt);
 }
 
 static int soc_camera_enum_fmt_vid_cap(struct file *file, void  *priv,
@@ -359,10 +430,10 @@ static int soc_camera_enum_fmt_vid_cap(struct file *file, void  *priv,
 
 	WARN_ON(priv != file->private_data);
 
-	if (f->index >= icd->num_formats)
+	if (f->index >= icd->num_user_formats)
 		return -EINVAL;
 
-	format = &icd->formats[f->index];
+	format = icd->user_formats[f->index].host_fmt;
 
 	strlcpy(f->description, format->name, sizeof(f->description));
 	f->pixelformat = format->fourcc;
@@ -919,8 +990,6 @@ int soc_camera_video_start(struct soc_camera_device *icd)
 	vdev->minor		= -1;
 	vdev->tvnorms		= V4L2_STD_UNKNOWN,
 
-	icd->current_fmt = &icd->formats[0];
-
 	err = video_register_device(vdev, VFL_TYPE_GRABBER, vdev->minor);
 	if (err < 0) {
 		dev_err(vdev->parent, "video_register_device failed\n");
diff --git a/include/media/soc_camera.h b/include/media/soc_camera.h
index dddaf45c9583..da57ffdaec4d 100644
--- a/include/media/soc_camera.h
+++ b/include/media/soc_camera.h
@@ -41,6 +41,8 @@ struct soc_camera_device {
 	const struct soc_camera_data_format *current_fmt;
 	const struct soc_camera_data_format *formats;
 	int num_formats;
+	struct soc_camera_format_xlate *user_formats;
+	int num_user_formats;
 	struct module *owner;
 	void *host_priv;		/* per-device host private data */
 	/* soc_camera.c private count. Only accessed with video_lock held */
@@ -65,8 +67,10 @@ struct soc_camera_host_ops {
 	struct module *owner;
 	int (*add)(struct soc_camera_device *);
 	void (*remove)(struct soc_camera_device *);
-	int (*suspend)(struct soc_camera_device *, pm_message_t state);
+	int (*suspend)(struct soc_camera_device *, pm_message_t);
 	int (*resume)(struct soc_camera_device *);
+	int (*get_formats)(struct soc_camera_device *, int,
+			   struct soc_camera_format_xlate *);
 	int (*set_fmt)(struct soc_camera_device *, __u32, struct v4l2_rect *);
 	int (*try_fmt)(struct soc_camera_device *, struct v4l2_format *);
 	void (*init_videobuf)(struct videobuf_queue *,
@@ -107,6 +111,8 @@ extern void soc_camera_video_stop(struct soc_camera_device *icd);
 
 extern const struct soc_camera_data_format *soc_camera_format_by_fourcc(
 	struct soc_camera_device *icd, unsigned int fourcc);
+extern const struct soc_camera_format_xlate *soc_camera_xlate_by_fourcc(
+	struct soc_camera_device *icd, unsigned int fourcc);
 
 struct soc_camera_data_format {
 	const char *name;
@@ -115,6 +121,23 @@ struct soc_camera_data_format {
 	enum v4l2_colorspace colorspace;
 };
 
+/**
+ * struct soc_camera_format_xlate - match between host and sensor formats
+ * @cam_fmt: sensor format provided by the sensor
+ * @host_fmt: host format after host translation from cam_fmt
+ * @buswidth: bus width for this format
+ *
+ * Host and sensor translation structure. Used in table of host and sensor
+ * formats matchings in soc_camera_device. A host can override the generic list
+ * generation by implementing get_formats(), and use it for format checks and
+ * format setup.
+ */
+struct soc_camera_format_xlate {
+	const struct soc_camera_data_format *cam_fmt;
+	const struct soc_camera_data_format *host_fmt;
+	unsigned char buswidth;
+};
+
 struct soc_camera_ops {
 	struct module *owner;
 	int (*probe)(struct soc_camera_device *);
-- 
cgit v1.2.3


From 67bc04dd04bfe6a6337a9c6773e4c36645360332 Mon Sep 17 00:00:00 2001
From: Vaibhav Hiremath <hvaibhav@ti.com>
Date: Fri, 5 Dec 2008 10:13:40 -0300
Subject: V4L/DVB (9816): v4l2-int-if: add three new ioctls for std handling
 and routing

These changes are needed for the new TVP514x driver.

Signed-off-by: Brijesh Jadav <brijesh.j@ti.com>
Signed-off-by: Hardik Shah <hardik.shah@ti.com>
Signed-off-by: Manjunath Hadli <mrh@ti.com>
Signed-off-by: R Sivaraj <sivaraj@ti.com>
Signed-off-by: Vaibhav Hiremath <hvaibhav@ti.com>
Signed-off-by: Karicheri Muralidharan <m-karicheri2@ti.com>
Signed-off-by: Hans Verkuil <hverkuil@xs4all.nl>
Signed-off-by: Mauro Carvalho Chehab <mchehab@redhat.com>
---
 include/media/v4l2-int-device.h | 6 ++++++
 1 file changed, 6 insertions(+)

(limited to 'include')

diff --git a/include/media/v4l2-int-device.h b/include/media/v4l2-int-device.h
index 9c2df41dbf92..ecda3c725837 100644
--- a/include/media/v4l2-int-device.h
+++ b/include/media/v4l2-int-device.h
@@ -183,6 +183,9 @@ enum v4l2_int_ioctl_num {
 	vidioc_int_s_crop_num,
 	vidioc_int_g_parm_num,
 	vidioc_int_s_parm_num,
+	vidioc_int_querystd_num,
+	vidioc_int_s_std_num,
+	vidioc_int_s_video_routing_num,
 
 	/*
 	 *
@@ -284,6 +287,9 @@ V4L2_INT_WRAPPER_1(g_crop, struct v4l2_crop, *);
 V4L2_INT_WRAPPER_1(s_crop, struct v4l2_crop, *);
 V4L2_INT_WRAPPER_1(g_parm, struct v4l2_streamparm, *);
 V4L2_INT_WRAPPER_1(s_parm, struct v4l2_streamparm, *);
+V4L2_INT_WRAPPER_1(querystd, v4l2_std_id, *);
+V4L2_INT_WRAPPER_1(s_std, v4l2_std_id, *);
+V4L2_INT_WRAPPER_1(s_video_routing, struct v4l2_routing, *);
 
 V4L2_INT_WRAPPER_0(dev_init);
 V4L2_INT_WRAPPER_0(dev_exit);
-- 
cgit v1.2.3


From 07b1747c8d0bb463311f9dd05d4c013765abe2eb Mon Sep 17 00:00:00 2001
From: Vaibhav Hiremath <hvaibhav@ti.com>
Date: Fri, 5 Dec 2008 10:19:36 -0300
Subject: V4L/DVB (9817): v4l: add new tvp514x I2C video decoder driver

Signed-off-by: Brijesh Jadav <brijesh.j@ti.com>
Signed-off-by: Hardik Shah <hardik.shah@ti.com>
Signed-off-by: Manjunath Hadli <mrh@ti.com>
Signed-off-by: R Sivaraj <sivaraj@ti.com>
Signed-off-by: Vaibhav Hiremath <hvaibhav@ti.com>
Signed-off-by: Karicheri Muralidharan <m-karicheri2@ti.com>
Signed-off-by: Hans Verkuil <hverkuil@xs4all.nl>
Reviewed-by: Hans Verkuil <hverkuil@xs4all.nl>
Reviewed-by: David Brownell <david-b@pacbell.net>
Signed-off-by: Mauro Carvalho Chehab <mchehab@redhat.com>
---
 drivers/media/video/Kconfig        |   11 +
 drivers/media/video/Makefile       |    1 +
 drivers/media/video/tvp514x.c      | 1569 ++++++++++++++++++++++++++++++++++++
 drivers/media/video/tvp514x_regs.h |  297 +++++++
 include/media/tvp514x.h            |  118 +++
 5 files changed, 1996 insertions(+)
 create mode 100644 drivers/media/video/tvp514x.c
 create mode 100644 drivers/media/video/tvp514x_regs.h
 create mode 100644 include/media/tvp514x.h

(limited to 'include')

diff --git a/drivers/media/video/Kconfig b/drivers/media/video/Kconfig
index c487d2a1285f..eb9ff7e107d6 100644
--- a/drivers/media/video/Kconfig
+++ b/drivers/media/video/Kconfig
@@ -361,6 +361,17 @@ config VIDEO_SAA7191
 	  To compile this driver as a module, choose M here: the
 	  module will be called saa7191.
 
+config VIDEO_TVP514X
+	tristate "Texas Instruments TVP514x video decoder"
+	depends on VIDEO_V4L2 && I2C
+	---help---
+	  This is a Video4Linux2 sensor-level driver for the TI TVP5146/47
+	  decoder. It is currently working with the TI OMAP3 camera
+	  controller.
+
+	  To compile this driver as a module, choose M here: the
+	  module will be called tvp514x.
+
 config VIDEO_TVP5150
 	tristate "Texas Instruments TVP5150 video decoder"
 	depends on VIDEO_V4L2 && I2C
diff --git a/drivers/media/video/Makefile b/drivers/media/video/Makefile
index ac147b1aea36..492ab3dce71b 100644
--- a/drivers/media/video/Makefile
+++ b/drivers/media/video/Makefile
@@ -69,6 +69,7 @@ obj-$(CONFIG_VIDEO_CX88) += cx88/
 obj-$(CONFIG_VIDEO_EM28XX) += em28xx/
 obj-$(CONFIG_VIDEO_USBVISION) += usbvision/
 obj-$(CONFIG_VIDEO_TVP5150) += tvp5150.o
+obj-$(CONFIG_VIDEO_TVP514X) += tvp514x.o
 obj-$(CONFIG_VIDEO_PVRUSB2) += pvrusb2/
 obj-$(CONFIG_VIDEO_MSP3400) += msp3400.o
 obj-$(CONFIG_VIDEO_CS5345) += cs5345.o
diff --git a/drivers/media/video/tvp514x.c b/drivers/media/video/tvp514x.c
new file mode 100644
index 000000000000..ac9aa40d09f6
--- /dev/null
+++ b/drivers/media/video/tvp514x.c
@@ -0,0 +1,1569 @@
+/*
+ * drivers/media/video/tvp514x.c
+ *
+ * TI TVP5146/47 decoder driver
+ *
+ * Copyright (C) 2008 Texas Instruments Inc
+ * Author: Vaibhav Hiremath <hvaibhav@ti.com>
+ *
+ * Contributors:
+ *     Sivaraj R <sivaraj@ti.com>
+ *     Brijesh R Jadav <brijesh.j@ti.com>
+ *     Hardik Shah <hardik.shah@ti.com>
+ *     Manjunath Hadli <mrh@ti.com>
+ *     Karicheri Muralidharan <m-karicheri2@ti.com>
+ *
+ * This package is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ *
+ */
+
+#include <linux/i2c.h>
+#include <linux/delay.h>
+#include <linux/videodev2.h>
+#include <media/v4l2-int-device.h>
+#include <media/tvp514x.h>
+
+#include "tvp514x_regs.h"
+
+/* Module Name */
+#define TVP514X_MODULE_NAME		"tvp514x"
+
+/* Private macros for TVP */
+#define I2C_RETRY_COUNT                 (5)
+#define LOCK_RETRY_COUNT                (5)
+#define LOCK_RETRY_DELAY                (200)
+
+/* Debug functions */
+static int debug;
+module_param(debug, bool, 0644);
+MODULE_PARM_DESC(debug, "Debug level (0-1)");
+
+#define dump_reg(client, reg, val)				\
+	do {							\
+		val = tvp514x_read_reg(client, reg);		\
+		v4l_info(client, "Reg(0x%.2X): 0x%.2X\n", reg, val); \
+	} while (0)
+
+/**
+ * enum tvp514x_std - enum for supported standards
+ */
+enum tvp514x_std {
+	STD_NTSC_MJ = 0,
+	STD_PAL_BDGHIN,
+	STD_INVALID
+};
+
+/**
+ * enum tvp514x_state - enum for different decoder states
+ */
+enum tvp514x_state {
+	STATE_NOT_DETECTED,
+	STATE_DETECTED
+};
+
+/**
+ * struct tvp514x_std_info - Structure to store standard informations
+ * @width: Line width in pixels
+ * @height:Number of active lines
+ * @video_std: Value to write in REG_VIDEO_STD register
+ * @standard: v4l2 standard structure information
+ */
+struct tvp514x_std_info {
+	unsigned long width;
+	unsigned long height;
+	u8 video_std;
+	struct v4l2_standard standard;
+};
+
+/**
+ * struct tvp514x_decoded - TVP5146/47 decoder object
+ * @v4l2_int_device: Slave handle
+ * @pdata: Board specific
+ * @client: I2C client data
+ * @id: Entry from I2C table
+ * @ver: Chip version
+ * @state: TVP5146/47 decoder state - detected or not-detected
+ * @pix: Current pixel format
+ * @num_fmts: Number of formats
+ * @fmt_list: Format list
+ * @current_std: Current standard
+ * @num_stds: Number of standards
+ * @std_list: Standards list
+ * @route: input and output routing at chip level
+ */
+struct tvp514x_decoder {
+	struct v4l2_int_device *v4l2_int_device;
+	const struct tvp514x_platform_data *pdata;
+	struct i2c_client *client;
+
+	struct i2c_device_id *id;
+
+	int ver;
+	enum tvp514x_state state;
+
+	struct v4l2_pix_format pix;
+	int num_fmts;
+	const struct v4l2_fmtdesc *fmt_list;
+
+	enum tvp514x_std current_std;
+	int num_stds;
+	struct tvp514x_std_info *std_list;
+
+	struct v4l2_routing route;
+};
+
+/* TVP514x default register values */
+static struct tvp514x_reg tvp514x_reg_list[] = {
+	{TOK_WRITE, REG_INPUT_SEL, 0x05},	/* Composite selected */
+	{TOK_WRITE, REG_AFE_GAIN_CTRL, 0x0F},
+	{TOK_WRITE, REG_VIDEO_STD, 0x00},	/* Auto mode */
+	{TOK_WRITE, REG_OPERATION_MODE, 0x00},
+	{TOK_SKIP, REG_AUTOSWITCH_MASK, 0x3F},
+	{TOK_WRITE, REG_COLOR_KILLER, 0x10},
+	{TOK_WRITE, REG_LUMA_CONTROL1, 0x00},
+	{TOK_WRITE, REG_LUMA_CONTROL2, 0x00},
+	{TOK_WRITE, REG_LUMA_CONTROL3, 0x02},
+	{TOK_WRITE, REG_BRIGHTNESS, 0x80},
+	{TOK_WRITE, REG_CONTRAST, 0x80},
+	{TOK_WRITE, REG_SATURATION, 0x80},
+	{TOK_WRITE, REG_HUE, 0x00},
+	{TOK_WRITE, REG_CHROMA_CONTROL1, 0x00},
+	{TOK_WRITE, REG_CHROMA_CONTROL2, 0x0E},
+	{TOK_SKIP, 0x0F, 0x00},	/* Reserved */
+	{TOK_WRITE, REG_COMP_PR_SATURATION, 0x80},
+	{TOK_WRITE, REG_COMP_Y_CONTRAST, 0x80},
+	{TOK_WRITE, REG_COMP_PB_SATURATION, 0x80},
+	{TOK_SKIP, 0x13, 0x00},	/* Reserved */
+	{TOK_WRITE, REG_COMP_Y_BRIGHTNESS, 0x80},
+	{TOK_SKIP, 0x15, 0x00},	/* Reserved */
+	{TOK_SKIP, REG_AVID_START_PIXEL_LSB, 0x55},	/* NTSC timing */
+	{TOK_SKIP, REG_AVID_START_PIXEL_MSB, 0x00},
+	{TOK_SKIP, REG_AVID_STOP_PIXEL_LSB, 0x25},
+	{TOK_SKIP, REG_AVID_STOP_PIXEL_MSB, 0x03},
+	{TOK_SKIP, REG_HSYNC_START_PIXEL_LSB, 0x00},	/* NTSC timing */
+	{TOK_SKIP, REG_HSYNC_START_PIXEL_MSB, 0x00},
+	{TOK_SKIP, REG_HSYNC_STOP_PIXEL_LSB, 0x40},
+	{TOK_SKIP, REG_HSYNC_STOP_PIXEL_MSB, 0x00},
+	{TOK_SKIP, REG_VSYNC_START_LINE_LSB, 0x04},	/* NTSC timing */
+	{TOK_SKIP, REG_VSYNC_START_LINE_MSB, 0x00},
+	{TOK_SKIP, REG_VSYNC_STOP_LINE_LSB, 0x07},
+	{TOK_SKIP, REG_VSYNC_STOP_LINE_MSB, 0x00},
+	{TOK_SKIP, REG_VBLK_START_LINE_LSB, 0x01},	/* NTSC timing */
+	{TOK_SKIP, REG_VBLK_START_LINE_MSB, 0x00},
+	{TOK_SKIP, REG_VBLK_STOP_LINE_LSB, 0x15},
+	{TOK_SKIP, REG_VBLK_STOP_LINE_MSB, 0x00},
+	{TOK_SKIP, 0x26, 0x00},	/* Reserved */
+	{TOK_SKIP, 0x27, 0x00},	/* Reserved */
+	{TOK_SKIP, REG_FAST_SWTICH_CONTROL, 0xCC},
+	{TOK_SKIP, 0x29, 0x00},	/* Reserved */
+	{TOK_SKIP, REG_FAST_SWTICH_SCART_DELAY, 0x00},
+	{TOK_SKIP, 0x2B, 0x00},	/* Reserved */
+	{TOK_SKIP, REG_SCART_DELAY, 0x00},
+	{TOK_SKIP, REG_CTI_DELAY, 0x00},
+	{TOK_SKIP, REG_CTI_CONTROL, 0x00},
+	{TOK_SKIP, 0x2F, 0x00},	/* Reserved */
+	{TOK_SKIP, 0x30, 0x00},	/* Reserved */
+	{TOK_SKIP, 0x31, 0x00},	/* Reserved */
+	{TOK_WRITE, REG_SYNC_CONTROL, 0x00},	/* HS, VS active high */
+	{TOK_WRITE, REG_OUTPUT_FORMATTER1, 0x00},	/* 10-bit BT.656 */
+	{TOK_WRITE, REG_OUTPUT_FORMATTER2, 0x11},	/* Enable clk & data */
+	{TOK_WRITE, REG_OUTPUT_FORMATTER3, 0xEE},	/* Enable AVID & FLD */
+	{TOK_WRITE, REG_OUTPUT_FORMATTER4, 0xAF},	/* Enable VS & HS */
+	{TOK_WRITE, REG_OUTPUT_FORMATTER5, 0xFF},
+	{TOK_WRITE, REG_OUTPUT_FORMATTER6, 0xFF},
+	{TOK_WRITE, REG_CLEAR_LOST_LOCK, 0x01},	/* Clear status */
+	{TOK_TERM, 0, 0},
+};
+
+/* List of image formats supported by TVP5146/47 decoder
+ * Currently we are using 8 bit mode only, but can be
+ * extended to 10/20 bit mode.
+ */
+static const struct v4l2_fmtdesc tvp514x_fmt_list[] = {
+	{
+	 .index = 0,
+	 .type = V4L2_BUF_TYPE_VIDEO_CAPTURE,
+	 .flags = 0,
+	 .description = "8-bit UYVY 4:2:2 Format",
+	 .pixelformat = V4L2_PIX_FMT_UYVY,
+	},
+};
+
+/*
+ * Supported standards -
+ *
+ * Currently supports two standards only, need to add support for rest of the
+ * modes, like SECAM, etc...
+ */
+static struct tvp514x_std_info tvp514x_std_list[] = {
+	/* Standard: STD_NTSC_MJ */
+	[STD_NTSC_MJ] = {
+	 .width = NTSC_NUM_ACTIVE_PIXELS,
+	 .height = NTSC_NUM_ACTIVE_LINES,
+	 .video_std = VIDEO_STD_NTSC_MJ_BIT,
+	 .standard = {
+		      .index = 0,
+		      .id = V4L2_STD_NTSC,
+		      .name = "NTSC",
+		      .frameperiod = {1001, 30000},
+		      .framelines = 525
+		     },
+	/* Standard: STD_PAL_BDGHIN */
+	},
+	[STD_PAL_BDGHIN] = {
+	 .width = PAL_NUM_ACTIVE_PIXELS,
+	 .height = PAL_NUM_ACTIVE_LINES,
+	 .video_std = VIDEO_STD_PAL_BDGHIN_BIT,
+	 .standard = {
+		      .index = 1,
+		      .id = V4L2_STD_PAL,
+		      .name = "PAL",
+		      .frameperiod = {1, 25},
+		      .framelines = 625
+		     },
+	},
+	/* Standard: need to add for additional standard */
+};
+/*
+ * Control structure for Auto Gain
+ *     This is temporary data, will get replaced once
+ *     v4l2_ctrl_query_fill supports it.
+ */
+static const struct v4l2_queryctrl tvp514x_autogain_ctrl = {
+	.id = V4L2_CID_AUTOGAIN,
+	.name = "Gain, Automatic",
+	.type = V4L2_CTRL_TYPE_BOOLEAN,
+	.minimum = 0,
+	.maximum = 1,
+	.step = 1,
+	.default_value = 1,
+};
+
+/*
+ * Read a value from a register in an TVP5146/47 decoder device.
+ * Returns value read if successful, or non-zero (-1) otherwise.
+ */
+static int tvp514x_read_reg(struct i2c_client *client, u8 reg)
+{
+	int err;
+	int retry = 0;
+read_again:
+
+	err = i2c_smbus_read_byte_data(client, reg);
+	if (err == -1) {
+		if (retry <= I2C_RETRY_COUNT) {
+			v4l_warn(client, "Read: retry ... %d\n", retry);
+			retry++;
+			msleep_interruptible(10);
+			goto read_again;
+		}
+	}
+
+	return err;
+}
+
+/*
+ * Write a value to a register in an TVP5146/47 decoder device.
+ * Returns zero if successful, or non-zero otherwise.
+ */
+static int tvp514x_write_reg(struct i2c_client *client, u8 reg, u8 val)
+{
+	int err;
+	int retry = 0;
+write_again:
+
+	err = i2c_smbus_write_byte_data(client, reg, val);
+	if (err) {
+		if (retry <= I2C_RETRY_COUNT) {
+			v4l_warn(client, "Write: retry ... %d\n", retry);
+			retry++;
+			msleep_interruptible(10);
+			goto write_again;
+		}
+	}
+
+	return err;
+}
+
+/*
+ * tvp514x_write_regs : Initializes a list of TVP5146/47 registers
+ *		if token is TOK_TERM, then entire write operation terminates
+ *		if token is TOK_DELAY, then a delay of 'val' msec is introduced
+ *		if token is TOK_SKIP, then the register write is skipped
+ *		if token is TOK_WRITE, then the register write is performed
+ *
+ * reglist - list of registers to be written
+ * Returns zero if successful, or non-zero otherwise.
+ */
+static int tvp514x_write_regs(struct i2c_client *client,
+			      const struct tvp514x_reg reglist[])
+{
+	int err;
+	const struct tvp514x_reg *next = reglist;
+
+	for (; next->token != TOK_TERM; next++) {
+		if (next->token == TOK_DELAY) {
+			msleep(next->val);
+			continue;
+		}
+
+		if (next->token == TOK_SKIP)
+			continue;
+
+		err = tvp514x_write_reg(client, next->reg, (u8) next->val);
+		if (err) {
+			v4l_err(client, "Write failed. Err[%d]\n", err);
+			return err;
+		}
+	}
+	return 0;
+}
+
+/*
+ * tvp514x_get_current_std:
+ * Returns the current standard detected by TVP5146/47
+ */
+static enum tvp514x_std tvp514x_get_current_std(struct tvp514x_decoder
+						*decoder)
+{
+	u8 std, std_status;
+
+	std = tvp514x_read_reg(decoder->client, REG_VIDEO_STD);
+	if ((std & VIDEO_STD_MASK) == VIDEO_STD_AUTO_SWITCH_BIT) {
+		/* use the standard status register */
+		std_status = tvp514x_read_reg(decoder->client,
+				REG_VIDEO_STD_STATUS);
+	} else
+		std_status = std;	/* use the standard register itself */
+
+	switch (std_status & VIDEO_STD_MASK) {
+	case VIDEO_STD_NTSC_MJ_BIT:
+		return STD_NTSC_MJ;
+
+	case VIDEO_STD_PAL_BDGHIN_BIT:
+		return STD_PAL_BDGHIN;
+
+	default:
+		return STD_INVALID;
+	}
+
+	return STD_INVALID;
+}
+
+/*
+ * TVP5146/47 register dump function
+ */
+static void tvp514x_reg_dump(struct tvp514x_decoder *decoder)
+{
+	u8 value;
+
+	dump_reg(decoder->client, REG_INPUT_SEL, value);
+	dump_reg(decoder->client, REG_AFE_GAIN_CTRL, value);
+	dump_reg(decoder->client, REG_VIDEO_STD, value);
+	dump_reg(decoder->client, REG_OPERATION_MODE, value);
+	dump_reg(decoder->client, REG_COLOR_KILLER, value);
+	dump_reg(decoder->client, REG_LUMA_CONTROL1, value);
+	dump_reg(decoder->client, REG_LUMA_CONTROL2, value);
+	dump_reg(decoder->client, REG_LUMA_CONTROL3, value);
+	dump_reg(decoder->client, REG_BRIGHTNESS, value);
+	dump_reg(decoder->client, REG_CONTRAST, value);
+	dump_reg(decoder->client, REG_SATURATION, value);
+	dump_reg(decoder->client, REG_HUE, value);
+	dump_reg(decoder->client, REG_CHROMA_CONTROL1, value);
+	dump_reg(decoder->client, REG_CHROMA_CONTROL2, value);
+	dump_reg(decoder->client, REG_COMP_PR_SATURATION, value);
+	dump_reg(decoder->client, REG_COMP_Y_CONTRAST, value);
+	dump_reg(decoder->client, REG_COMP_PB_SATURATION, value);
+	dump_reg(decoder->client, REG_COMP_Y_BRIGHTNESS, value);
+	dump_reg(decoder->client, REG_AVID_START_PIXEL_LSB, value);
+	dump_reg(decoder->client, REG_AVID_START_PIXEL_MSB, value);
+	dump_reg(decoder->client, REG_AVID_STOP_PIXEL_LSB, value);
+	dump_reg(decoder->client, REG_AVID_STOP_PIXEL_MSB, value);
+	dump_reg(decoder->client, REG_HSYNC_START_PIXEL_LSB, value);
+	dump_reg(decoder->client, REG_HSYNC_START_PIXEL_MSB, value);
+	dump_reg(decoder->client, REG_HSYNC_STOP_PIXEL_LSB, value);
+	dump_reg(decoder->client, REG_HSYNC_STOP_PIXEL_MSB, value);
+	dump_reg(decoder->client, REG_VSYNC_START_LINE_LSB, value);
+	dump_reg(decoder->client, REG_VSYNC_START_LINE_MSB, value);
+	dump_reg(decoder->client, REG_VSYNC_STOP_LINE_LSB, value);
+	dump_reg(decoder->client, REG_VSYNC_STOP_LINE_MSB, value);
+	dump_reg(decoder->client, REG_VBLK_START_LINE_LSB, value);
+	dump_reg(decoder->client, REG_VBLK_START_LINE_MSB, value);
+	dump_reg(decoder->client, REG_VBLK_STOP_LINE_LSB, value);
+	dump_reg(decoder->client, REG_VBLK_STOP_LINE_MSB, value);
+	dump_reg(decoder->client, REG_SYNC_CONTROL, value);
+	dump_reg(decoder->client, REG_OUTPUT_FORMATTER1, value);
+	dump_reg(decoder->client, REG_OUTPUT_FORMATTER2, value);
+	dump_reg(decoder->client, REG_OUTPUT_FORMATTER3, value);
+	dump_reg(decoder->client, REG_OUTPUT_FORMATTER4, value);
+	dump_reg(decoder->client, REG_OUTPUT_FORMATTER5, value);
+	dump_reg(decoder->client, REG_OUTPUT_FORMATTER6, value);
+	dump_reg(decoder->client, REG_CLEAR_LOST_LOCK, value);
+}
+
+/*
+ * Configure the TVP5146/47 with the current register settings
+ * Returns zero if successful, or non-zero otherwise.
+ */
+static int tvp514x_configure(struct tvp514x_decoder *decoder)
+{
+	int err;
+
+	/* common register initialization */
+	err =
+	    tvp514x_write_regs(decoder->client, tvp514x_reg_list);
+	if (err)
+		return err;
+
+	if (debug)
+		tvp514x_reg_dump(decoder);
+
+	return 0;
+}
+
+/*
+ * Detect if an tvp514x is present, and if so which revision.
+ * A device is considered to be detected if the chip ID (LSB and MSB)
+ * registers match the expected values.
+ * Any value of the rom version register is accepted.
+ * Returns ENODEV error number if no device is detected, or zero
+ * if a device is detected.
+ */
+static int tvp514x_detect(struct tvp514x_decoder *decoder)
+{
+	u8 chip_id_msb, chip_id_lsb, rom_ver;
+
+	chip_id_msb = tvp514x_read_reg(decoder->client, REG_CHIP_ID_MSB);
+	chip_id_lsb = tvp514x_read_reg(decoder->client, REG_CHIP_ID_LSB);
+	rom_ver = tvp514x_read_reg(decoder->client, REG_ROM_VERSION);
+
+	v4l_dbg(1, debug, decoder->client,
+		 "chip id detected msb:0x%x lsb:0x%x rom version:0x%x\n",
+		 chip_id_msb, chip_id_lsb, rom_ver);
+	if ((chip_id_msb != TVP514X_CHIP_ID_MSB)
+		|| ((chip_id_lsb != TVP5146_CHIP_ID_LSB)
+		&& (chip_id_lsb != TVP5147_CHIP_ID_LSB))) {
+		/* We didn't read the values we expected, so this must not be
+		 * an TVP5146/47.
+		 */
+		v4l_err(decoder->client,
+			"chip id mismatch msb:0x%x lsb:0x%x\n",
+			chip_id_msb, chip_id_lsb);
+		return -ENODEV;
+	}
+
+	decoder->ver = rom_ver;
+	decoder->state = STATE_DETECTED;
+
+	v4l_info(decoder->client,
+			"%s found at 0x%x (%s)\n", decoder->client->name,
+			decoder->client->addr << 1,
+			decoder->client->adapter->name);
+	return 0;
+}
+
+/*
+ * Following are decoder interface functions implemented by
+ * TVP5146/47 decoder driver.
+ */
+
+/**
+ * ioctl_querystd - V4L2 decoder interface handler for VIDIOC_QUERYSTD ioctl
+ * @s: pointer to standard V4L2 device structure
+ * @std_id: standard V4L2 std_id ioctl enum
+ *
+ * Returns the current standard detected by TVP5146/47. If no active input is
+ * detected, returns -EINVAL
+ */
+static int ioctl_querystd(struct v4l2_int_device *s, v4l2_std_id *std_id)
+{
+	struct tvp514x_decoder *decoder = s->priv;
+	enum tvp514x_std current_std;
+	enum tvp514x_input input_sel;
+	u8 sync_lock_status, lock_mask;
+
+	if (std_id == NULL)
+		return -EINVAL;
+
+	/* get the current standard */
+	current_std = tvp514x_get_current_std(decoder);
+	if (current_std == STD_INVALID)
+		return -EINVAL;
+
+	input_sel = decoder->route.input;
+
+	switch (input_sel) {
+	case INPUT_CVBS_VI1A:
+	case INPUT_CVBS_VI1B:
+	case INPUT_CVBS_VI1C:
+	case INPUT_CVBS_VI2A:
+	case INPUT_CVBS_VI2B:
+	case INPUT_CVBS_VI2C:
+	case INPUT_CVBS_VI3A:
+	case INPUT_CVBS_VI3B:
+	case INPUT_CVBS_VI3C:
+	case INPUT_CVBS_VI4A:
+		lock_mask = STATUS_CLR_SUBCAR_LOCK_BIT |
+			STATUS_HORZ_SYNC_LOCK_BIT |
+			STATUS_VIRT_SYNC_LOCK_BIT;
+		break;
+
+	case INPUT_SVIDEO_VI2A_VI1A:
+	case INPUT_SVIDEO_VI2B_VI1B:
+	case INPUT_SVIDEO_VI2C_VI1C:
+	case INPUT_SVIDEO_VI2A_VI3A:
+	case INPUT_SVIDEO_VI2B_VI3B:
+	case INPUT_SVIDEO_VI2C_VI3C:
+	case INPUT_SVIDEO_VI4A_VI1A:
+	case INPUT_SVIDEO_VI4A_VI1B:
+	case INPUT_SVIDEO_VI4A_VI1C:
+	case INPUT_SVIDEO_VI4A_VI3A:
+	case INPUT_SVIDEO_VI4A_VI3B:
+	case INPUT_SVIDEO_VI4A_VI3C:
+		lock_mask = STATUS_HORZ_SYNC_LOCK_BIT |
+			STATUS_VIRT_SYNC_LOCK_BIT;
+		break;
+		/*Need to add other interfaces*/
+	default:
+		return -EINVAL;
+	}
+	/* check whether signal is locked */
+	sync_lock_status = tvp514x_read_reg(decoder->client, REG_STATUS1);
+	if (lock_mask != (sync_lock_status & lock_mask))
+		return -EINVAL;	/* No input detected */
+
+	decoder->current_std = current_std;
+	*std_id = decoder->std_list[current_std].standard.id;
+
+	v4l_dbg(1, debug, decoder->client, "Current STD: %s",
+			decoder->std_list[current_std].standard.name);
+	return 0;
+}
+
+/**
+ * ioctl_s_std - V4L2 decoder interface handler for VIDIOC_S_STD ioctl
+ * @s: pointer to standard V4L2 device structure
+ * @std_id: standard V4L2 v4l2_std_id ioctl enum
+ *
+ * If std_id is supported, sets the requested standard. Otherwise, returns
+ * -EINVAL
+ */
+static int ioctl_s_std(struct v4l2_int_device *s, v4l2_std_id *std_id)
+{
+	struct tvp514x_decoder *decoder = s->priv;
+	int err, i;
+
+	if (std_id == NULL)
+		return -EINVAL;
+
+	for (i = 0; i < decoder->num_stds; i++)
+		if (*std_id & decoder->std_list[i].standard.id)
+			break;
+
+	if ((i == decoder->num_stds) || (i == STD_INVALID))
+		return -EINVAL;
+
+	err = tvp514x_write_reg(decoder->client, REG_VIDEO_STD,
+				decoder->std_list[i].video_std);
+	if (err)
+		return err;
+
+	decoder->current_std = i;
+	tvp514x_reg_list[REG_VIDEO_STD].val = decoder->std_list[i].video_std;
+
+	v4l_dbg(1, debug, decoder->client, "Standard set to: %s",
+			decoder->std_list[i].standard.name);
+	return 0;
+}
+
+/**
+ * ioctl_s_routing - V4L2 decoder interface handler for VIDIOC_S_INPUT ioctl
+ * @s: pointer to standard V4L2 device structure
+ * @index: number of the input
+ *
+ * If index is valid, selects the requested input. Otherwise, returns -EINVAL if
+ * the input is not supported or there is no active signal present in the
+ * selected input.
+ */
+static int ioctl_s_routing(struct v4l2_int_device *s,
+				struct v4l2_routing *route)
+{
+	struct tvp514x_decoder *decoder = s->priv;
+	int err;
+	enum tvp514x_input input_sel;
+	enum tvp514x_output output_sel;
+	enum tvp514x_std current_std = STD_INVALID;
+	u8 sync_lock_status, lock_mask;
+	int try_count = LOCK_RETRY_COUNT;
+
+	if ((!route) || (route->input >= INPUT_INVALID) ||
+			(route->output >= OUTPUT_INVALID))
+		return -EINVAL;	/* Index out of bound */
+
+	input_sel = route->input;
+	output_sel = route->output;
+
+	err = tvp514x_write_reg(decoder->client, REG_INPUT_SEL, input_sel);
+	if (err)
+		return err;
+
+	output_sel |= tvp514x_read_reg(decoder->client,
+			REG_OUTPUT_FORMATTER1) & 0x7;
+	err = tvp514x_write_reg(decoder->client, REG_OUTPUT_FORMATTER1,
+			output_sel);
+	if (err)
+		return err;
+
+	tvp514x_reg_list[REG_INPUT_SEL].val = input_sel;
+	tvp514x_reg_list[REG_OUTPUT_FORMATTER1].val = output_sel;
+
+	/* Clear status */
+	msleep(LOCK_RETRY_DELAY);
+	err =
+	    tvp514x_write_reg(decoder->client, REG_CLEAR_LOST_LOCK, 0x01);
+	if (err)
+		return err;
+
+	switch (input_sel) {
+	case INPUT_CVBS_VI1A:
+	case INPUT_CVBS_VI1B:
+	case INPUT_CVBS_VI1C:
+	case INPUT_CVBS_VI2A:
+	case INPUT_CVBS_VI2B:
+	case INPUT_CVBS_VI2C:
+	case INPUT_CVBS_VI3A:
+	case INPUT_CVBS_VI3B:
+	case INPUT_CVBS_VI3C:
+	case INPUT_CVBS_VI4A:
+		lock_mask = STATUS_CLR_SUBCAR_LOCK_BIT |
+			STATUS_HORZ_SYNC_LOCK_BIT |
+			STATUS_VIRT_SYNC_LOCK_BIT;
+		break;
+
+	case INPUT_SVIDEO_VI2A_VI1A:
+	case INPUT_SVIDEO_VI2B_VI1B:
+	case INPUT_SVIDEO_VI2C_VI1C:
+	case INPUT_SVIDEO_VI2A_VI3A:
+	case INPUT_SVIDEO_VI2B_VI3B:
+	case INPUT_SVIDEO_VI2C_VI3C:
+	case INPUT_SVIDEO_VI4A_VI1A:
+	case INPUT_SVIDEO_VI4A_VI1B:
+	case INPUT_SVIDEO_VI4A_VI1C:
+	case INPUT_SVIDEO_VI4A_VI3A:
+	case INPUT_SVIDEO_VI4A_VI3B:
+	case INPUT_SVIDEO_VI4A_VI3C:
+		lock_mask = STATUS_HORZ_SYNC_LOCK_BIT |
+			STATUS_VIRT_SYNC_LOCK_BIT;
+		break;
+	/*Need to add other interfaces*/
+	default:
+		return -EINVAL;
+	}
+
+	while (try_count-- > 0) {
+		/* Allow decoder to sync up with new input */
+		msleep(LOCK_RETRY_DELAY);
+
+		/* get the current standard for future reference */
+		current_std = tvp514x_get_current_std(decoder);
+		if (current_std == STD_INVALID)
+			continue;
+
+		sync_lock_status = tvp514x_read_reg(decoder->client,
+				REG_STATUS1);
+		if (lock_mask == (sync_lock_status & lock_mask))
+			break;	/* Input detected */
+	}
+
+	if ((current_std == STD_INVALID) || (try_count < 0))
+		return -EINVAL;
+
+	decoder->current_std = current_std;
+	decoder->route.input = route->input;
+	decoder->route.output = route->output;
+
+	v4l_dbg(1, debug, decoder->client,
+			"Input set to: %d, std : %d",
+			input_sel, current_std);
+
+	return 0;
+}
+
+/**
+ * ioctl_queryctrl - V4L2 decoder interface handler for VIDIOC_QUERYCTRL ioctl
+ * @s: pointer to standard V4L2 device structure
+ * @qctrl: standard V4L2 v4l2_queryctrl structure
+ *
+ * If the requested control is supported, returns the control information.
+ * Otherwise, returns -EINVAL if the control is not supported.
+ */
+static int
+ioctl_queryctrl(struct v4l2_int_device *s, struct v4l2_queryctrl *qctrl)
+{
+	struct tvp514x_decoder *decoder = s->priv;
+	int err = -EINVAL;
+
+	if (qctrl == NULL)
+		return err;
+
+	switch (qctrl->id) {
+	case V4L2_CID_BRIGHTNESS:
+		/* Brightness supported is same as standard one (0-255),
+		 * so make use of standard API provided.
+		 */
+		err = v4l2_ctrl_query_fill_std(qctrl);
+		break;
+	case V4L2_CID_CONTRAST:
+	case V4L2_CID_SATURATION:
+		/* Saturation and Contrast supported is -
+		 *	Contrast: 0 - 255 (Default - 128)
+		 *	Saturation: 0 - 255 (Default - 128)
+		 */
+		err = v4l2_ctrl_query_fill(qctrl, 0, 255, 1, 128);
+		break;
+	case V4L2_CID_HUE:
+		/* Hue Supported is -
+		 *	Hue - -180 - +180 (Default - 0, Step - +180)
+		 */
+		err = v4l2_ctrl_query_fill(qctrl, -180, 180, 180, 0);
+		break;
+	case V4L2_CID_AUTOGAIN:
+		/* Autogain is either 0 or 1*/
+		memcpy(qctrl, &tvp514x_autogain_ctrl,
+				sizeof(struct v4l2_queryctrl));
+		err = 0;
+		break;
+	default:
+		v4l_err(decoder->client,
+			"invalid control id %d\n", qctrl->id);
+		return err;
+	}
+
+	v4l_dbg(1, debug, decoder->client,
+			"Query Control: %s : Min - %d, Max - %d, Def - %d",
+			qctrl->name,
+			qctrl->minimum,
+			qctrl->maximum,
+			qctrl->default_value);
+
+	return err;
+}
+
+/**
+ * ioctl_g_ctrl - V4L2 decoder interface handler for VIDIOC_G_CTRL ioctl
+ * @s: pointer to standard V4L2 device structure
+ * @ctrl: pointer to v4l2_control structure
+ *
+ * If the requested control is supported, returns the control's current
+ * value from the decoder. Otherwise, returns -EINVAL if the control is not
+ * supported.
+ */
+static int
+ioctl_g_ctrl(struct v4l2_int_device *s, struct v4l2_control *ctrl)
+{
+	struct tvp514x_decoder *decoder = s->priv;
+
+	if (ctrl == NULL)
+		return -EINVAL;
+
+	switch (ctrl->id) {
+	case V4L2_CID_BRIGHTNESS:
+		ctrl->value = tvp514x_reg_list[REG_BRIGHTNESS].val;
+		break;
+	case V4L2_CID_CONTRAST:
+		ctrl->value = tvp514x_reg_list[REG_CONTRAST].val;
+		break;
+	case V4L2_CID_SATURATION:
+		ctrl->value = tvp514x_reg_list[REG_SATURATION].val;
+		break;
+	case V4L2_CID_HUE:
+		ctrl->value = tvp514x_reg_list[REG_HUE].val;
+		if (ctrl->value == 0x7F)
+			ctrl->value = 180;
+		else if (ctrl->value == 0x80)
+			ctrl->value = -180;
+		else
+			ctrl->value = 0;
+
+		break;
+	case V4L2_CID_AUTOGAIN:
+		ctrl->value = tvp514x_reg_list[REG_AFE_GAIN_CTRL].val;
+		if ((ctrl->value & 0x3) == 3)
+			ctrl->value = 1;
+		else
+			ctrl->value = 0;
+
+		break;
+	default:
+		v4l_err(decoder->client,
+			"invalid control id %d\n", ctrl->id);
+		return -EINVAL;
+	}
+
+	v4l_dbg(1, debug, decoder->client,
+			"Get Control: ID - %d - %d",
+			ctrl->id, ctrl->value);
+	return 0;
+}
+
+/**
+ * ioctl_s_ctrl - V4L2 decoder interface handler for VIDIOC_S_CTRL ioctl
+ * @s: pointer to standard V4L2 device structure
+ * @ctrl: pointer to v4l2_control structure
+ *
+ * If the requested control is supported, sets the control's current
+ * value in HW. Otherwise, returns -EINVAL if the control is not supported.
+ */
+static int
+ioctl_s_ctrl(struct v4l2_int_device *s, struct v4l2_control *ctrl)
+{
+	struct tvp514x_decoder *decoder = s->priv;
+	int err = -EINVAL, value;
+
+	if (ctrl == NULL)
+		return err;
+
+	value = (__s32) ctrl->value;
+
+	switch (ctrl->id) {
+	case V4L2_CID_BRIGHTNESS:
+		if (ctrl->value < 0 || ctrl->value > 255) {
+			v4l_err(decoder->client,
+					"invalid brightness setting %d\n",
+					ctrl->value);
+			return -ERANGE;
+		}
+		err = tvp514x_write_reg(decoder->client, REG_BRIGHTNESS,
+				value);
+		if (err)
+			return err;
+		tvp514x_reg_list[REG_BRIGHTNESS].val = value;
+		break;
+	case V4L2_CID_CONTRAST:
+		if (ctrl->value < 0 || ctrl->value > 255) {
+			v4l_err(decoder->client,
+					"invalid contrast setting %d\n",
+					ctrl->value);
+			return -ERANGE;
+		}
+		err = tvp514x_write_reg(decoder->client, REG_CONTRAST,
+				value);
+		if (err)
+			return err;
+		tvp514x_reg_list[REG_CONTRAST].val = value;
+		break;
+	case V4L2_CID_SATURATION:
+		if (ctrl->value < 0 || ctrl->value > 255) {
+			v4l_err(decoder->client,
+					"invalid saturation setting %d\n",
+					ctrl->value);
+			return -ERANGE;
+		}
+		err = tvp514x_write_reg(decoder->client, REG_SATURATION,
+				value);
+		if (err)
+			return err;
+		tvp514x_reg_list[REG_SATURATION].val = value;
+		break;
+	case V4L2_CID_HUE:
+		if (value == 180)
+			value = 0x7F;
+		else if (value == -180)
+			value = 0x80;
+		else if (value == 0)
+			value = 0;
+		else {
+			v4l_err(decoder->client,
+					"invalid hue setting %d\n",
+					ctrl->value);
+			return -ERANGE;
+		}
+		err = tvp514x_write_reg(decoder->client, REG_HUE,
+				value);
+		if (err)
+			return err;
+		tvp514x_reg_list[REG_HUE].val = value;
+		break;
+	case V4L2_CID_AUTOGAIN:
+		if (value == 1)
+			value = 0x0F;
+		else if (value == 0)
+			value = 0x0C;
+		else {
+			v4l_err(decoder->client,
+					"invalid auto gain setting %d\n",
+					ctrl->value);
+			return -ERANGE;
+		}
+		err = tvp514x_write_reg(decoder->client, REG_AFE_GAIN_CTRL,
+				value);
+		if (err)
+			return err;
+		tvp514x_reg_list[REG_AFE_GAIN_CTRL].val = value;
+		break;
+	default:
+		v4l_err(decoder->client,
+			"invalid control id %d\n", ctrl->id);
+		return err;
+	}
+
+	v4l_dbg(1, debug, decoder->client,
+			"Set Control: ID - %d - %d",
+			ctrl->id, ctrl->value);
+
+	return err;
+}
+
+/**
+ * ioctl_enum_fmt_cap - Implement the CAPTURE buffer VIDIOC_ENUM_FMT ioctl
+ * @s: pointer to standard V4L2 device structure
+ * @fmt: standard V4L2 VIDIOC_ENUM_FMT ioctl structure
+ *
+ * Implement the VIDIOC_ENUM_FMT ioctl to enumerate supported formats
+ */
+static int
+ioctl_enum_fmt_cap(struct v4l2_int_device *s, struct v4l2_fmtdesc *fmt)
+{
+	struct tvp514x_decoder *decoder = s->priv;
+	int index;
+
+	if (fmt == NULL)
+		return -EINVAL;
+
+	index = fmt->index;
+	if ((index >= decoder->num_fmts) || (index < 0))
+		return -EINVAL;	/* Index out of bound */
+
+	if (fmt->type != V4L2_BUF_TYPE_VIDEO_CAPTURE)
+		return -EINVAL;	/* only capture is supported */
+
+	memcpy(fmt, &decoder->fmt_list[index],
+		sizeof(struct v4l2_fmtdesc));
+
+	v4l_dbg(1, debug, decoder->client,
+			"Current FMT: index - %d (%s)",
+			decoder->fmt_list[index].index,
+			decoder->fmt_list[index].description);
+	return 0;
+}
+
+/**
+ * ioctl_try_fmt_cap - Implement the CAPTURE buffer VIDIOC_TRY_FMT ioctl
+ * @s: pointer to standard V4L2 device structure
+ * @f: pointer to standard V4L2 VIDIOC_TRY_FMT ioctl structure
+ *
+ * Implement the VIDIOC_TRY_FMT ioctl for the CAPTURE buffer type. This
+ * ioctl is used to negotiate the image capture size and pixel format
+ * without actually making it take effect.
+ */
+static int
+ioctl_try_fmt_cap(struct v4l2_int_device *s, struct v4l2_format *f)
+{
+	struct tvp514x_decoder *decoder = s->priv;
+	int ifmt;
+	struct v4l2_pix_format *pix;
+	enum tvp514x_std current_std;
+
+	if (f == NULL)
+		return -EINVAL;
+
+	if (f->type != V4L2_BUF_TYPE_VIDEO_CAPTURE)
+		f->type = V4L2_BUF_TYPE_VIDEO_CAPTURE;
+
+	pix = &f->fmt.pix;
+
+	/* Calculate height and width based on current standard */
+	current_std = tvp514x_get_current_std(decoder);
+	if (current_std == STD_INVALID)
+		return -EINVAL;
+
+	decoder->current_std = current_std;
+	pix->width = decoder->std_list[current_std].width;
+	pix->height = decoder->std_list[current_std].height;
+
+	for (ifmt = 0; ifmt < decoder->num_fmts; ifmt++) {
+		if (pix->pixelformat ==
+			decoder->fmt_list[ifmt].pixelformat)
+			break;
+	}
+	if (ifmt == decoder->num_fmts)
+		ifmt = 0;	/* None of the format matched, select default */
+	pix->pixelformat = decoder->fmt_list[ifmt].pixelformat;
+
+	pix->field = V4L2_FIELD_INTERLACED;
+	pix->bytesperline = pix->width * 2;
+	pix->sizeimage = pix->bytesperline * pix->height;
+	pix->colorspace = V4L2_COLORSPACE_SMPTE170M;
+	pix->priv = 0;
+
+	v4l_dbg(1, debug, decoder->client,
+			"Try FMT: pixelformat - %s, bytesperline - %d"
+			"Width - %d, Height - %d",
+			decoder->fmt_list[ifmt].description, pix->bytesperline,
+			pix->width, pix->height);
+	return 0;
+}
+
+/**
+ * ioctl_s_fmt_cap - V4L2 decoder interface handler for VIDIOC_S_FMT ioctl
+ * @s: pointer to standard V4L2 device structure
+ * @f: pointer to standard V4L2 VIDIOC_S_FMT ioctl structure
+ *
+ * If the requested format is supported, configures the HW to use that
+ * format, returns error code if format not supported or HW can't be
+ * correctly configured.
+ */
+static int
+ioctl_s_fmt_cap(struct v4l2_int_device *s, struct v4l2_format *f)
+{
+	struct tvp514x_decoder *decoder = s->priv;
+	struct v4l2_pix_format *pix;
+	int rval;
+
+	if (f == NULL)
+		return -EINVAL;
+
+	if (f->type != V4L2_BUF_TYPE_VIDEO_CAPTURE)
+		return -EINVAL;	/* only capture is supported */
+
+	pix = &f->fmt.pix;
+	rval = ioctl_try_fmt_cap(s, f);
+	if (rval)
+		return rval;
+
+		decoder->pix = *pix;
+
+	return rval;
+}
+
+/**
+ * ioctl_g_fmt_cap - V4L2 decoder interface handler for ioctl_g_fmt_cap
+ * @s: pointer to standard V4L2 device structure
+ * @f: pointer to standard V4L2 v4l2_format structure
+ *
+ * Returns the decoder's current pixel format in the v4l2_format
+ * parameter.
+ */
+static int
+ioctl_g_fmt_cap(struct v4l2_int_device *s, struct v4l2_format *f)
+{
+	struct tvp514x_decoder *decoder = s->priv;
+
+	if (f == NULL)
+		return -EINVAL;
+
+	if (f->type != V4L2_BUF_TYPE_VIDEO_CAPTURE)
+		return -EINVAL;	/* only capture is supported */
+
+	f->fmt.pix = decoder->pix;
+
+	v4l_dbg(1, debug, decoder->client,
+			"Current FMT: bytesperline - %d"
+			"Width - %d, Height - %d",
+			decoder->pix.bytesperline,
+			decoder->pix.width, decoder->pix.height);
+	return 0;
+}
+
+/**
+ * ioctl_g_parm - V4L2 decoder interface handler for VIDIOC_G_PARM ioctl
+ * @s: pointer to standard V4L2 device structure
+ * @a: pointer to standard V4L2 VIDIOC_G_PARM ioctl structure
+ *
+ * Returns the decoder's video CAPTURE parameters.
+ */
+static int
+ioctl_g_parm(struct v4l2_int_device *s, struct v4l2_streamparm *a)
+{
+	struct tvp514x_decoder *decoder = s->priv;
+	struct v4l2_captureparm *cparm;
+	enum tvp514x_std current_std;
+
+	if (a == NULL)
+		return -EINVAL;
+
+	if (a->type != V4L2_BUF_TYPE_VIDEO_CAPTURE)
+		return -EINVAL;	/* only capture is supported */
+
+	memset(a, 0, sizeof(*a));
+	a->type = V4L2_BUF_TYPE_VIDEO_CAPTURE;
+
+	/* get the current standard */
+	current_std = tvp514x_get_current_std(decoder);
+	if (current_std == STD_INVALID)
+		return -EINVAL;
+
+	decoder->current_std = current_std;
+
+	cparm = &a->parm.capture;
+	cparm->capability = V4L2_CAP_TIMEPERFRAME;
+	cparm->timeperframe =
+		decoder->std_list[current_std].standard.frameperiod;
+
+	return 0;
+}
+
+/**
+ * ioctl_s_parm - V4L2 decoder interface handler for VIDIOC_S_PARM ioctl
+ * @s: pointer to standard V4L2 device structure
+ * @a: pointer to standard V4L2 VIDIOC_S_PARM ioctl structure
+ *
+ * Configures the decoder to use the input parameters, if possible. If
+ * not possible, returns the appropriate error code.
+ */
+static int
+ioctl_s_parm(struct v4l2_int_device *s, struct v4l2_streamparm *a)
+{
+	struct tvp514x_decoder *decoder = s->priv;
+	struct v4l2_fract *timeperframe;
+	enum tvp514x_std current_std;
+
+	if (a == NULL)
+		return -EINVAL;
+
+	if (a->type != V4L2_BUF_TYPE_VIDEO_CAPTURE)
+		return -EINVAL;	/* only capture is supported */
+
+	timeperframe = &a->parm.capture.timeperframe;
+
+	/* get the current standard */
+	current_std = tvp514x_get_current_std(decoder);
+	if (current_std == STD_INVALID)
+		return -EINVAL;
+
+	decoder->current_std = current_std;
+
+	*timeperframe =
+	    decoder->std_list[current_std].standard.frameperiod;
+
+	return 0;
+}
+
+/**
+ * ioctl_g_ifparm - V4L2 decoder interface handler for vidioc_int_g_ifparm_num
+ * @s: pointer to standard V4L2 device structure
+ * @p: pointer to standard V4L2 vidioc_int_g_ifparm_num ioctl structure
+ *
+ * Gets slave interface parameters.
+ * Calculates the required xclk value to support the requested
+ * clock parameters in p. This value is returned in the p
+ * parameter.
+ */
+static int ioctl_g_ifparm(struct v4l2_int_device *s, struct v4l2_ifparm *p)
+{
+	struct tvp514x_decoder *decoder = s->priv;
+	int rval;
+
+	if (p == NULL)
+		return -EINVAL;
+
+	if (NULL == decoder->pdata->ifparm)
+		return -EINVAL;
+
+	rval = decoder->pdata->ifparm(p);
+	if (rval) {
+		v4l_err(decoder->client, "g_ifparm.Err[%d]\n", rval);
+		return rval;
+	}
+
+	p->u.bt656.clock_curr = TVP514X_XCLK_BT656;
+
+	return 0;
+}
+
+/**
+ * ioctl_g_priv - V4L2 decoder interface handler for vidioc_int_g_priv_num
+ * @s: pointer to standard V4L2 device structure
+ * @p: void pointer to hold decoder's private data address
+ *
+ * Returns device's (decoder's) private data area address in p parameter
+ */
+static int ioctl_g_priv(struct v4l2_int_device *s, void *p)
+{
+	struct tvp514x_decoder *decoder = s->priv;
+
+	if (NULL == decoder->pdata->priv_data_set)
+		return -EINVAL;
+
+	return decoder->pdata->priv_data_set(p);
+}
+
+/**
+ * ioctl_s_power - V4L2 decoder interface handler for vidioc_int_s_power_num
+ * @s: pointer to standard V4L2 device structure
+ * @on: power state to which device is to be set
+ *
+ * Sets devices power state to requrested state, if possible.
+ */
+static int ioctl_s_power(struct v4l2_int_device *s, enum v4l2_power on)
+{
+	struct tvp514x_decoder *decoder = s->priv;
+	int err = 0;
+
+	switch (on) {
+	case V4L2_POWER_OFF:
+		/* Power Down Sequence */
+		err =
+		    tvp514x_write_reg(decoder->client, REG_OPERATION_MODE,
+					0x01);
+		/* Disable mux for TVP5146/47 decoder data path */
+		if (decoder->pdata->power_set)
+			err |= decoder->pdata->power_set(on);
+		decoder->state = STATE_NOT_DETECTED;
+		break;
+
+	case V4L2_POWER_STANDBY:
+		if (decoder->pdata->power_set)
+			err = decoder->pdata->power_set(on);
+		break;
+
+	case V4L2_POWER_ON:
+		/* Enable mux for TVP5146/47 decoder data path */
+		if ((decoder->pdata->power_set) &&
+				(decoder->state == STATE_NOT_DETECTED)) {
+			int i;
+			struct tvp514x_init_seq *int_seq =
+				(struct tvp514x_init_seq *)
+				decoder->id->driver_data;
+
+			err = decoder->pdata->power_set(on);
+
+			/* Power Up Sequence */
+			for (i = 0; i < int_seq->no_regs; i++) {
+				err |= tvp514x_write_reg(decoder->client,
+						int_seq->init_reg_seq[i].reg,
+						int_seq->init_reg_seq[i].val);
+			}
+			/* Detect the sensor is not already detected */
+			err |= tvp514x_detect(decoder);
+			if (err) {
+				v4l_err(decoder->client,
+						"Unable to detect decoder\n");
+				return err;
+			}
+		}
+		err |= tvp514x_configure(decoder);
+		break;
+
+	default:
+		err = -ENODEV;
+		break;
+	}
+
+	return err;
+}
+
+/**
+ * ioctl_init - V4L2 decoder interface handler for VIDIOC_INT_INIT
+ * @s: pointer to standard V4L2 device structure
+ *
+ * Initialize the decoder device (calls tvp514x_configure())
+ */
+static int ioctl_init(struct v4l2_int_device *s)
+{
+	struct tvp514x_decoder *decoder = s->priv;
+
+	/* Set default standard to auto */
+	tvp514x_reg_list[REG_VIDEO_STD].val =
+	    VIDEO_STD_AUTO_SWITCH_BIT;
+
+	return tvp514x_configure(decoder);
+}
+
+/**
+ * ioctl_dev_exit - V4L2 decoder interface handler for vidioc_int_dev_exit_num
+ * @s: pointer to standard V4L2 device structure
+ *
+ * Delinitialise the dev. at slave detach. The complement of ioctl_dev_init.
+ */
+static int ioctl_dev_exit(struct v4l2_int_device *s)
+{
+	return 0;
+}
+
+/**
+ * ioctl_dev_init - V4L2 decoder interface handler for vidioc_int_dev_init_num
+ * @s: pointer to standard V4L2 device structure
+ *
+ * Initialise the device when slave attaches to the master. Returns 0 if
+ * TVP5146/47 device could be found, otherwise returns appropriate error.
+ */
+static int ioctl_dev_init(struct v4l2_int_device *s)
+{
+	struct tvp514x_decoder *decoder = s->priv;
+	int err;
+
+	err = tvp514x_detect(decoder);
+	if (err < 0) {
+		v4l_err(decoder->client,
+			"Unable to detect decoder\n");
+		return err;
+	}
+
+	v4l_info(decoder->client,
+		 "chip version 0x%.2x detected\n", decoder->ver);
+
+	return 0;
+}
+
+static struct v4l2_int_ioctl_desc tvp514x_ioctl_desc[] = {
+	{vidioc_int_dev_init_num, (v4l2_int_ioctl_func*) ioctl_dev_init},
+	{vidioc_int_dev_exit_num, (v4l2_int_ioctl_func*) ioctl_dev_exit},
+	{vidioc_int_s_power_num, (v4l2_int_ioctl_func*) ioctl_s_power},
+	{vidioc_int_g_priv_num, (v4l2_int_ioctl_func*) ioctl_g_priv},
+	{vidioc_int_g_ifparm_num, (v4l2_int_ioctl_func*) ioctl_g_ifparm},
+	{vidioc_int_init_num, (v4l2_int_ioctl_func*) ioctl_init},
+	{vidioc_int_enum_fmt_cap_num,
+	 (v4l2_int_ioctl_func *) ioctl_enum_fmt_cap},
+	{vidioc_int_try_fmt_cap_num,
+	 (v4l2_int_ioctl_func *) ioctl_try_fmt_cap},
+	{vidioc_int_g_fmt_cap_num,
+	 (v4l2_int_ioctl_func *) ioctl_g_fmt_cap},
+	{vidioc_int_s_fmt_cap_num,
+	 (v4l2_int_ioctl_func *) ioctl_s_fmt_cap},
+	{vidioc_int_g_parm_num, (v4l2_int_ioctl_func *) ioctl_g_parm},
+	{vidioc_int_s_parm_num, (v4l2_int_ioctl_func *) ioctl_s_parm},
+	{vidioc_int_queryctrl_num,
+	 (v4l2_int_ioctl_func *) ioctl_queryctrl},
+	{vidioc_int_g_ctrl_num, (v4l2_int_ioctl_func *) ioctl_g_ctrl},
+	{vidioc_int_s_ctrl_num, (v4l2_int_ioctl_func *) ioctl_s_ctrl},
+	{vidioc_int_querystd_num, (v4l2_int_ioctl_func *) ioctl_querystd},
+	{vidioc_int_s_std_num, (v4l2_int_ioctl_func *) ioctl_s_std},
+	{vidioc_int_s_video_routing_num,
+		(v4l2_int_ioctl_func *) ioctl_s_routing},
+};
+
+static struct v4l2_int_slave tvp514x_slave = {
+	.ioctls = tvp514x_ioctl_desc,
+	.num_ioctls = ARRAY_SIZE(tvp514x_ioctl_desc),
+};
+
+static struct tvp514x_decoder tvp514x_dev = {
+	.state = STATE_NOT_DETECTED,
+
+	.fmt_list = tvp514x_fmt_list,
+	.num_fmts = ARRAY_SIZE(tvp514x_fmt_list),
+
+	.pix = {		/* Default to NTSC 8-bit YUV 422 */
+		.width = NTSC_NUM_ACTIVE_PIXELS,
+		.height = NTSC_NUM_ACTIVE_LINES,
+		.pixelformat = V4L2_PIX_FMT_UYVY,
+		.field = V4L2_FIELD_INTERLACED,
+		.bytesperline = NTSC_NUM_ACTIVE_PIXELS * 2,
+		.sizeimage =
+		NTSC_NUM_ACTIVE_PIXELS * 2 * NTSC_NUM_ACTIVE_LINES,
+		.colorspace = V4L2_COLORSPACE_SMPTE170M,
+		},
+
+	.current_std = STD_NTSC_MJ,
+	.std_list = tvp514x_std_list,
+	.num_stds = ARRAY_SIZE(tvp514x_std_list),
+
+};
+
+static struct v4l2_int_device tvp514x_int_device = {
+	.module = THIS_MODULE,
+	.name = TVP514X_MODULE_NAME,
+	.priv = &tvp514x_dev,
+	.type = v4l2_int_type_slave,
+	.u = {
+	      .slave = &tvp514x_slave,
+	      },
+};
+
+/**
+ * tvp514x_probe - decoder driver i2c probe handler
+ * @client: i2c driver client device structure
+ *
+ * Register decoder as an i2c client device and V4L2
+ * device.
+ */
+static int
+tvp514x_probe(struct i2c_client *client, const struct i2c_device_id *id)
+{
+	struct tvp514x_decoder *decoder = &tvp514x_dev;
+	int err;
+
+	/* Check if the adapter supports the needed features */
+	if (!i2c_check_functionality(client->adapter, I2C_FUNC_SMBUS_BYTE_DATA))
+		return -EIO;
+
+	decoder->pdata = client->dev.platform_data;
+	if (!decoder->pdata) {
+		v4l_err(client, "No platform data\n!!");
+		return -ENODEV;
+	}
+	/*
+	 * Fetch platform specific data, and configure the
+	 * tvp514x_reg_list[] accordingly. Since this is one
+	 * time configuration, no need to preserve.
+	 */
+	tvp514x_reg_list[REG_OUTPUT_FORMATTER2].val |=
+			(decoder->pdata->clk_polarity << 1);
+	tvp514x_reg_list[REG_SYNC_CONTROL].val |=
+			((decoder->pdata->hs_polarity << 2) |
+			(decoder->pdata->vs_polarity << 3));
+	/*
+	 * Save the id data, required for power up sequence
+	 */
+	decoder->id = (struct i2c_device_id *)id;
+	/* Attach to Master */
+	strcpy(tvp514x_int_device.u.slave->attach_to, decoder->pdata->master);
+	decoder->v4l2_int_device = &tvp514x_int_device;
+	decoder->client = client;
+	i2c_set_clientdata(client, decoder);
+
+	/* Register with V4L2 layer as slave device */
+	err = v4l2_int_device_register(decoder->v4l2_int_device);
+	if (err) {
+		i2c_set_clientdata(client, NULL);
+		v4l_err(client,
+			"Unable to register to v4l2. Err[%d]\n", err);
+
+	} else
+		v4l_info(client, "Registered to v4l2 master %s!!\n",
+				decoder->pdata->master);
+
+	return 0;
+}
+
+/**
+ * tvp514x_remove - decoder driver i2c remove handler
+ * @client: i2c driver client device structure
+ *
+ * Unregister decoder as an i2c client device and V4L2
+ * device. Complement of tvp514x_probe().
+ */
+static int __exit tvp514x_remove(struct i2c_client *client)
+{
+	struct tvp514x_decoder *decoder = i2c_get_clientdata(client);
+
+	if (!client->adapter)
+		return -ENODEV;	/* our client isn't attached */
+
+	v4l2_int_device_unregister(decoder->v4l2_int_device);
+	i2c_set_clientdata(client, NULL);
+
+	return 0;
+}
+/*
+ * TVP5146 Init/Power on Sequence
+ */
+static const struct tvp514x_reg tvp5146_init_reg_seq[] = {
+	{TOK_WRITE, REG_VBUS_ADDRESS_ACCESS1, 0x02},
+	{TOK_WRITE, REG_VBUS_ADDRESS_ACCESS2, 0x00},
+	{TOK_WRITE, REG_VBUS_ADDRESS_ACCESS3, 0x80},
+	{TOK_WRITE, REG_VBUS_DATA_ACCESS_NO_VBUS_ADDR_INCR, 0x01},
+	{TOK_WRITE, REG_VBUS_ADDRESS_ACCESS1, 0x60},
+	{TOK_WRITE, REG_VBUS_ADDRESS_ACCESS2, 0x00},
+	{TOK_WRITE, REG_VBUS_ADDRESS_ACCESS3, 0xB0},
+	{TOK_WRITE, REG_VBUS_DATA_ACCESS_NO_VBUS_ADDR_INCR, 0x01},
+	{TOK_WRITE, REG_VBUS_DATA_ACCESS_NO_VBUS_ADDR_INCR, 0x00},
+	{TOK_WRITE, REG_OPERATION_MODE, 0x01},
+	{TOK_WRITE, REG_OPERATION_MODE, 0x00},
+};
+static const struct tvp514x_init_seq tvp5146_init = {
+	.no_regs = ARRAY_SIZE(tvp5146_init_reg_seq),
+	.init_reg_seq = tvp5146_init_reg_seq,
+};
+/*
+ * TVP5147 Init/Power on Sequence
+ */
+static const struct tvp514x_reg tvp5147_init_reg_seq[] =	{
+	{TOK_WRITE, REG_VBUS_ADDRESS_ACCESS1, 0x02},
+	{TOK_WRITE, REG_VBUS_ADDRESS_ACCESS2, 0x00},
+	{TOK_WRITE, REG_VBUS_ADDRESS_ACCESS3, 0x80},
+	{TOK_WRITE, REG_VBUS_DATA_ACCESS_NO_VBUS_ADDR_INCR, 0x01},
+	{TOK_WRITE, REG_VBUS_ADDRESS_ACCESS1, 0x60},
+	{TOK_WRITE, REG_VBUS_ADDRESS_ACCESS2, 0x00},
+	{TOK_WRITE, REG_VBUS_ADDRESS_ACCESS3, 0xB0},
+	{TOK_WRITE, REG_VBUS_DATA_ACCESS_NO_VBUS_ADDR_INCR, 0x01},
+	{TOK_WRITE, REG_VBUS_ADDRESS_ACCESS1, 0x16},
+	{TOK_WRITE, REG_VBUS_ADDRESS_ACCESS2, 0x00},
+	{TOK_WRITE, REG_VBUS_ADDRESS_ACCESS3, 0xA0},
+	{TOK_WRITE, REG_VBUS_DATA_ACCESS_NO_VBUS_ADDR_INCR, 0x16},
+	{TOK_WRITE, REG_VBUS_ADDRESS_ACCESS1, 0x60},
+	{TOK_WRITE, REG_VBUS_ADDRESS_ACCESS2, 0x00},
+	{TOK_WRITE, REG_VBUS_ADDRESS_ACCESS3, 0xB0},
+	{TOK_WRITE, REG_VBUS_DATA_ACCESS_NO_VBUS_ADDR_INCR, 0x00},
+	{TOK_WRITE, REG_OPERATION_MODE, 0x01},
+	{TOK_WRITE, REG_OPERATION_MODE, 0x00},
+};
+static const struct tvp514x_init_seq tvp5147_init = {
+	.no_regs = ARRAY_SIZE(tvp5147_init_reg_seq),
+	.init_reg_seq = tvp5147_init_reg_seq,
+};
+/*
+ * TVP5146M2/TVP5147M1 Init/Power on Sequence
+ */
+static const struct tvp514x_reg tvp514xm_init_reg_seq[] = {
+	{TOK_WRITE, REG_OPERATION_MODE, 0x01},
+	{TOK_WRITE, REG_OPERATION_MODE, 0x00},
+};
+static const struct tvp514x_init_seq tvp514xm_init = {
+	.no_regs = ARRAY_SIZE(tvp514xm_init_reg_seq),
+	.init_reg_seq = tvp514xm_init_reg_seq,
+};
+/*
+ * I2C Device Table -
+ *
+ * name - Name of the actual device/chip.
+ * driver_data - Driver data
+ */
+static const struct i2c_device_id tvp514x_id[] = {
+	{"tvp5146", (unsigned long)&tvp5146_init},
+	{"tvp5146m2", (unsigned long)&tvp514xm_init},
+	{"tvp5147", (unsigned long)&tvp5147_init},
+	{"tvp5147m1", (unsigned long)&tvp514xm_init},
+	{},
+};
+
+MODULE_DEVICE_TABLE(i2c, tvp514x_id);
+
+static struct i2c_driver tvp514x_i2c_driver = {
+	.driver = {
+		   .name = TVP514X_MODULE_NAME,
+		   .owner = THIS_MODULE,
+		   },
+	.probe = tvp514x_probe,
+	.remove = __exit_p(tvp514x_remove),
+	.id_table = tvp514x_id,
+};
+
+/**
+ * tvp514x_init
+ *
+ * Module init function
+ */
+static int __init tvp514x_init(void)
+{
+	return i2c_add_driver(&tvp514x_i2c_driver);
+}
+
+/**
+ * tvp514x_cleanup
+ *
+ * Module exit function
+ */
+static void __exit tvp514x_cleanup(void)
+{
+	i2c_del_driver(&tvp514x_i2c_driver);
+}
+
+module_init(tvp514x_init);
+module_exit(tvp514x_cleanup);
+
+MODULE_AUTHOR("Texas Instruments");
+MODULE_DESCRIPTION("TVP514X linux decoder driver");
+MODULE_LICENSE("GPL");
diff --git a/drivers/media/video/tvp514x_regs.h b/drivers/media/video/tvp514x_regs.h
new file mode 100644
index 000000000000..351620aeecc2
--- /dev/null
+++ b/drivers/media/video/tvp514x_regs.h
@@ -0,0 +1,297 @@
+/*
+ * drivers/media/video/tvp514x_regs.h
+ *
+ * Copyright (C) 2008 Texas Instruments Inc
+ * Author: Vaibhav Hiremath <hvaibhav@ti.com>
+ *
+ * Contributors:
+ *     Sivaraj R <sivaraj@ti.com>
+ *     Brijesh R Jadav <brijesh.j@ti.com>
+ *     Hardik Shah <hardik.shah@ti.com>
+ *     Manjunath Hadli <mrh@ti.com>
+ *     Karicheri Muralidharan <m-karicheri2@ti.com>
+ *
+ * This package is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ *
+ */
+
+#ifndef _TVP514X_REGS_H
+#define _TVP514X_REGS_H
+
+/*
+ * TVP5146/47 registers
+ */
+#define REG_INPUT_SEL			(0x00)
+#define REG_AFE_GAIN_CTRL		(0x01)
+#define REG_VIDEO_STD			(0x02)
+#define REG_OPERATION_MODE		(0x03)
+#define REG_AUTOSWITCH_MASK		(0x04)
+
+#define REG_COLOR_KILLER		(0x05)
+#define REG_LUMA_CONTROL1		(0x06)
+#define REG_LUMA_CONTROL2		(0x07)
+#define REG_LUMA_CONTROL3		(0x08)
+
+#define REG_BRIGHTNESS			(0x09)
+#define REG_CONTRAST			(0x0A)
+#define REG_SATURATION			(0x0B)
+#define REG_HUE				(0x0C)
+
+#define REG_CHROMA_CONTROL1		(0x0D)
+#define REG_CHROMA_CONTROL2		(0x0E)
+
+/* 0x0F Reserved */
+
+#define REG_COMP_PR_SATURATION		(0x10)
+#define REG_COMP_Y_CONTRAST		(0x11)
+#define REG_COMP_PB_SATURATION		(0x12)
+
+/* 0x13 Reserved */
+
+#define REG_COMP_Y_BRIGHTNESS		(0x14)
+
+/* 0x15 Reserved */
+
+#define REG_AVID_START_PIXEL_LSB	(0x16)
+#define REG_AVID_START_PIXEL_MSB	(0x17)
+#define REG_AVID_STOP_PIXEL_LSB		(0x18)
+#define REG_AVID_STOP_PIXEL_MSB		(0x19)
+
+#define REG_HSYNC_START_PIXEL_LSB	(0x1A)
+#define REG_HSYNC_START_PIXEL_MSB	(0x1B)
+#define REG_HSYNC_STOP_PIXEL_LSB	(0x1C)
+#define REG_HSYNC_STOP_PIXEL_MSB	(0x1D)
+
+#define REG_VSYNC_START_LINE_LSB	(0x1E)
+#define REG_VSYNC_START_LINE_MSB	(0x1F)
+#define REG_VSYNC_STOP_LINE_LSB		(0x20)
+#define REG_VSYNC_STOP_LINE_MSB		(0x21)
+
+#define REG_VBLK_START_LINE_LSB		(0x22)
+#define REG_VBLK_START_LINE_MSB		(0x23)
+#define REG_VBLK_STOP_LINE_LSB		(0x24)
+#define REG_VBLK_STOP_LINE_MSB		(0x25)
+
+/* 0x26 - 0x27 Reserved */
+
+#define REG_FAST_SWTICH_CONTROL		(0x28)
+
+/* 0x29 Reserved */
+
+#define REG_FAST_SWTICH_SCART_DELAY	(0x2A)
+
+/* 0x2B Reserved */
+
+#define REG_SCART_DELAY			(0x2C)
+#define REG_CTI_DELAY			(0x2D)
+#define REG_CTI_CONTROL			(0x2E)
+
+/* 0x2F - 0x31 Reserved */
+
+#define REG_SYNC_CONTROL		(0x32)
+#define REG_OUTPUT_FORMATTER1		(0x33)
+#define REG_OUTPUT_FORMATTER2		(0x34)
+#define REG_OUTPUT_FORMATTER3		(0x35)
+#define REG_OUTPUT_FORMATTER4		(0x36)
+#define REG_OUTPUT_FORMATTER5		(0x37)
+#define REG_OUTPUT_FORMATTER6		(0x38)
+#define REG_CLEAR_LOST_LOCK		(0x39)
+
+#define REG_STATUS1			(0x3A)
+#define REG_STATUS2			(0x3B)
+
+#define REG_AGC_GAIN_STATUS_LSB		(0x3C)
+#define REG_AGC_GAIN_STATUS_MSB		(0x3D)
+
+/* 0x3E Reserved */
+
+#define REG_VIDEO_STD_STATUS		(0x3F)
+#define REG_GPIO_INPUT1			(0x40)
+#define REG_GPIO_INPUT2			(0x41)
+
+/* 0x42 - 0x45 Reserved */
+
+#define REG_AFE_COARSE_GAIN_CH1		(0x46)
+#define REG_AFE_COARSE_GAIN_CH2		(0x47)
+#define REG_AFE_COARSE_GAIN_CH3		(0x48)
+#define REG_AFE_COARSE_GAIN_CH4		(0x49)
+
+#define REG_AFE_FINE_GAIN_PB_B_LSB	(0x4A)
+#define REG_AFE_FINE_GAIN_PB_B_MSB	(0x4B)
+#define REG_AFE_FINE_GAIN_Y_G_CHROMA_LSB	(0x4C)
+#define REG_AFE_FINE_GAIN_Y_G_CHROMA_MSB	(0x4D)
+#define REG_AFE_FINE_GAIN_PR_R_LSB	(0x4E)
+#define REG_AFE_FINE_GAIN_PR_R_MSB	(0x4F)
+#define REG_AFE_FINE_GAIN_CVBS_LUMA_LSB	(0x50)
+#define REG_AFE_FINE_GAIN_CVBS_LUMA_MSB	(0x51)
+
+/* 0x52 - 0x68 Reserved */
+
+#define REG_FBIT_VBIT_CONTROL1		(0x69)
+
+/* 0x6A - 0x6B Reserved */
+
+#define REG_BACKEND_AGC_CONTROL		(0x6C)
+
+/* 0x6D - 0x6E Reserved */
+
+#define REG_AGC_DECREMENT_SPEED_CONTROL	(0x6F)
+#define REG_ROM_VERSION			(0x70)
+
+/* 0x71 - 0x73 Reserved */
+
+#define REG_AGC_WHITE_PEAK_PROCESSING	(0x74)
+#define REG_FBIT_VBIT_CONTROL2		(0x75)
+#define REG_VCR_TRICK_MODE_CONTROL	(0x76)
+#define REG_HORIZONTAL_SHAKE_INCREMENT	(0x77)
+#define REG_AGC_INCREMENT_SPEED		(0x78)
+#define REG_AGC_INCREMENT_DELAY		(0x79)
+
+/* 0x7A - 0x7F Reserved */
+
+#define REG_CHIP_ID_MSB			(0x80)
+#define REG_CHIP_ID_LSB			(0x81)
+
+/* 0x82 Reserved */
+
+#define REG_CPLL_SPEED_CONTROL		(0x83)
+
+/* 0x84 - 0x96 Reserved */
+
+#define REG_STATUS_REQUEST		(0x97)
+
+/* 0x98 - 0x99 Reserved */
+
+#define REG_VERTICAL_LINE_COUNT_LSB	(0x9A)
+#define REG_VERTICAL_LINE_COUNT_MSB	(0x9B)
+
+/* 0x9C - 0x9D Reserved */
+
+#define REG_AGC_DECREMENT_DELAY		(0x9E)
+
+/* 0x9F - 0xB0 Reserved */
+
+#define REG_VDP_TTX_FILTER_1_MASK1	(0xB1)
+#define REG_VDP_TTX_FILTER_1_MASK2	(0xB2)
+#define REG_VDP_TTX_FILTER_1_MASK3	(0xB3)
+#define REG_VDP_TTX_FILTER_1_MASK4	(0xB4)
+#define REG_VDP_TTX_FILTER_1_MASK5	(0xB5)
+#define REG_VDP_TTX_FILTER_2_MASK1	(0xB6)
+#define REG_VDP_TTX_FILTER_2_MASK2	(0xB7)
+#define REG_VDP_TTX_FILTER_2_MASK3	(0xB8)
+#define REG_VDP_TTX_FILTER_2_MASK4	(0xB9)
+#define REG_VDP_TTX_FILTER_2_MASK5	(0xBA)
+#define REG_VDP_TTX_FILTER_CONTROL	(0xBB)
+#define REG_VDP_FIFO_WORD_COUNT		(0xBC)
+#define REG_VDP_FIFO_INTERRUPT_THRLD	(0xBD)
+
+/* 0xBE Reserved */
+
+#define REG_VDP_FIFO_RESET		(0xBF)
+#define REG_VDP_FIFO_OUTPUT_CONTROL	(0xC0)
+#define REG_VDP_LINE_NUMBER_INTERRUPT	(0xC1)
+#define REG_VDP_PIXEL_ALIGNMENT_LSB	(0xC2)
+#define REG_VDP_PIXEL_ALIGNMENT_MSB	(0xC3)
+
+/* 0xC4 - 0xD5 Reserved */
+
+#define REG_VDP_LINE_START		(0xD6)
+#define REG_VDP_LINE_STOP		(0xD7)
+#define REG_VDP_GLOBAL_LINE_MODE	(0xD8)
+#define REG_VDP_FULL_FIELD_ENABLE	(0xD9)
+#define REG_VDP_FULL_FIELD_MODE		(0xDA)
+
+/* 0xDB - 0xDF Reserved */
+
+#define REG_VBUS_DATA_ACCESS_NO_VBUS_ADDR_INCR	(0xE0)
+#define REG_VBUS_DATA_ACCESS_VBUS_ADDR_INCR	(0xE1)
+#define REG_FIFO_READ_DATA			(0xE2)
+
+/* 0xE3 - 0xE7 Reserved */
+
+#define REG_VBUS_ADDRESS_ACCESS1	(0xE8)
+#define REG_VBUS_ADDRESS_ACCESS2	(0xE9)
+#define REG_VBUS_ADDRESS_ACCESS3	(0xEA)
+
+/* 0xEB - 0xEF Reserved */
+
+#define REG_INTERRUPT_RAW_STATUS0	(0xF0)
+#define REG_INTERRUPT_RAW_STATUS1	(0xF1)
+#define REG_INTERRUPT_STATUS0		(0xF2)
+#define REG_INTERRUPT_STATUS1		(0xF3)
+#define REG_INTERRUPT_MASK0		(0xF4)
+#define REG_INTERRUPT_MASK1		(0xF5)
+#define REG_INTERRUPT_CLEAR0		(0xF6)
+#define REG_INTERRUPT_CLEAR1		(0xF7)
+
+/* 0xF8 - 0xFF Reserved */
+
+/*
+ * Mask and bit definitions of TVP5146/47 registers
+ */
+/* The ID values we are looking for */
+#define TVP514X_CHIP_ID_MSB		(0x51)
+#define TVP5146_CHIP_ID_LSB		(0x46)
+#define TVP5147_CHIP_ID_LSB		(0x47)
+
+#define VIDEO_STD_MASK			(0x07)
+#define VIDEO_STD_AUTO_SWITCH_BIT	(0x00)
+#define VIDEO_STD_NTSC_MJ_BIT		(0x01)
+#define VIDEO_STD_PAL_BDGHIN_BIT	(0x02)
+#define VIDEO_STD_PAL_M_BIT		(0x03)
+#define VIDEO_STD_PAL_COMBINATION_N_BIT	(0x04)
+#define VIDEO_STD_NTSC_4_43_BIT		(0x05)
+#define VIDEO_STD_SECAM_BIT		(0x06)
+#define VIDEO_STD_PAL_60_BIT		(0x07)
+
+/*
+ * Status bit
+ */
+#define STATUS_TV_VCR_BIT		(1<<0)
+#define STATUS_HORZ_SYNC_LOCK_BIT	(1<<1)
+#define STATUS_VIRT_SYNC_LOCK_BIT	(1<<2)
+#define STATUS_CLR_SUBCAR_LOCK_BIT	(1<<3)
+#define STATUS_LOST_LOCK_DETECT_BIT	(1<<4)
+#define STATUS_FEILD_RATE_BIT		(1<<5)
+#define STATUS_LINE_ALTERNATING_BIT	(1<<6)
+#define STATUS_PEAK_WHITE_DETECT_BIT	(1<<7)
+
+/* Tokens for register write */
+#define TOK_WRITE                       (0)     /* token for write operation */
+#define TOK_TERM                        (1)     /* terminating token */
+#define TOK_DELAY                       (2)     /* delay token for reg list */
+#define TOK_SKIP                        (3)     /* token to skip a register */
+/**
+ * struct tvp514x_reg - Structure for TVP5146/47 register initialization values
+ * @token - Token: TOK_WRITE, TOK_TERM etc..
+ * @reg - Register offset
+ * @val - Register Value for TOK_WRITE or delay in ms for TOK_DELAY
+ */
+struct tvp514x_reg {
+	u8 token;
+	u8 reg;
+	u32 val;
+};
+
+/**
+ * struct tvp514x_init_seq - Structure for TVP5146/47/46M2/47M1 power up
+ *		Sequence.
+ * @ no_regs - Number of registers to write for power up sequence.
+ * @ init_reg_seq - Array of registers and respective value to write.
+ */
+struct tvp514x_init_seq {
+	unsigned int no_regs;
+	const struct tvp514x_reg *init_reg_seq;
+};
+#endif				/* ifndef _TVP514X_REGS_H */
diff --git a/include/media/tvp514x.h b/include/media/tvp514x.h
new file mode 100644
index 000000000000..5e7ee968c6dc
--- /dev/null
+++ b/include/media/tvp514x.h
@@ -0,0 +1,118 @@
+/*
+ * drivers/media/video/tvp514x.h
+ *
+ * Copyright (C) 2008 Texas Instruments Inc
+ * Author: Vaibhav Hiremath <hvaibhav@ti.com>
+ *
+ * Contributors:
+ *     Sivaraj R <sivaraj@ti.com>
+ *     Brijesh R Jadav <brijesh.j@ti.com>
+ *     Hardik Shah <hardik.shah@ti.com>
+ *     Manjunath Hadli <mrh@ti.com>
+ *     Karicheri Muralidharan <m-karicheri2@ti.com>
+ *
+ * This package is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ *
+ */
+
+#ifndef _TVP514X_H
+#define _TVP514X_H
+
+/*
+ * Other macros
+ */
+#define TVP514X_MODULE_NAME		"tvp514x"
+
+#define TVP514X_XCLK_BT656		(27000000)
+
+/* Number of pixels and number of lines per frame for different standards */
+#define NTSC_NUM_ACTIVE_PIXELS		(720)
+#define NTSC_NUM_ACTIVE_LINES		(480)
+#define PAL_NUM_ACTIVE_PIXELS		(720)
+#define PAL_NUM_ACTIVE_LINES		(576)
+
+/**
+ * enum tvp514x_input - enum for different decoder input pin
+ *		configuration.
+ */
+enum tvp514x_input {
+	/*
+	 * CVBS input selection
+	 */
+	INPUT_CVBS_VI1A = 0x0,
+	INPUT_CVBS_VI1B,
+	INPUT_CVBS_VI1C,
+	INPUT_CVBS_VI2A = 0x04,
+	INPUT_CVBS_VI2B,
+	INPUT_CVBS_VI2C,
+	INPUT_CVBS_VI3A = 0x08,
+	INPUT_CVBS_VI3B,
+	INPUT_CVBS_VI3C,
+	INPUT_CVBS_VI4A = 0x0C,
+	/*
+	 * S-Video input selection
+	 */
+	INPUT_SVIDEO_VI2A_VI1A = 0x44,
+	INPUT_SVIDEO_VI2B_VI1B,
+	INPUT_SVIDEO_VI2C_VI1C,
+	INPUT_SVIDEO_VI2A_VI3A = 0x54,
+	INPUT_SVIDEO_VI2B_VI3B,
+	INPUT_SVIDEO_VI2C_VI3C,
+	INPUT_SVIDEO_VI4A_VI1A = 0x4C,
+	INPUT_SVIDEO_VI4A_VI1B,
+	INPUT_SVIDEO_VI4A_VI1C,
+	INPUT_SVIDEO_VI4A_VI3A = 0x5C,
+	INPUT_SVIDEO_VI4A_VI3B,
+	INPUT_SVIDEO_VI4A_VI3C,
+
+	/* Need to add entries for
+	 * RGB, YPbPr and SCART.
+	 */
+	INPUT_INVALID
+};
+
+/**
+ * enum tvp514x_output - enum for output format
+ *			supported.
+ *
+ */
+enum tvp514x_output {
+	OUTPUT_10BIT_422_EMBEDDED_SYNC = 0,
+	OUTPUT_20BIT_422_SEPERATE_SYNC,
+	OUTPUT_10BIT_422_SEPERATE_SYNC = 3,
+	OUTPUT_INVALID
+};
+
+/**
+ * struct tvp514x_platform_data - Platform data values and access functions.
+ * @power_set: Power state access function, zero is off, non-zero is on.
+ * @ifparm: Interface parameters access function.
+ * @priv_data_set: Device private data (pointer) access function.
+ * @clk_polarity: Clock polarity of the current interface.
+ * @ hs_polarity: HSYNC Polarity configuration for current interface.
+ * @ vs_polarity: VSYNC Polarity configuration for current interface.
+ */
+struct tvp514x_platform_data {
+	char *master;
+	int (*power_set) (enum v4l2_power on);
+	int (*ifparm) (struct v4l2_ifparm *p);
+	int (*priv_data_set) (void *);
+	/* Interface control params */
+	bool clk_polarity;
+	bool hs_polarity;
+	bool vs_polarity;
+};
+
+
+#endif				/* ifndef _TVP514X_H */
-- 
cgit v1.2.3


From 2a1fcdf08230522bd5024f91da24aaa6e8d81f59 Mon Sep 17 00:00:00 2001
From: Hans Verkuil <hverkuil@xs4all.nl>
Date: Sat, 29 Nov 2008 21:36:58 -0300
Subject: V4L/DVB (9820): v4l2: add v4l2_device and v4l2_subdev structs to the
 v4l2 framework.

Start implementing a proper v4l2 framework as discussed during the
Linux Plumbers Conference 2008.

Introduces v4l2_device (for device instances) and v4l2_subdev (representing
sub-device instances).

Signed-off-by: Hans Verkuil <hverkuil@xs4all.nl>
Reviewed-by: Laurent Pinchart <laurent.pinchart@skynet.be>
Reviewed-by: Guennadi Liakhovetski <g.liakhovetski@gmx.de>
Reviewed-by: Andy Walls <awalls@radix.net>
Reviewed-by: David Brownell <david-b@pacbell.net>
Signed-off-by: Mauro Carvalho Chehab <mchehab@redhat.com>
---
 Documentation/video4linux/v4l2-framework.txt | 362 +++++++++++++++++++++++++++
 drivers/media/video/Makefile                 |   2 +-
 drivers/media/video/v4l2-device.c            |  86 +++++++
 drivers/media/video/v4l2-subdev.c            | 108 ++++++++
 include/media/v4l2-device.h                  | 109 ++++++++
 include/media/v4l2-subdev.h                  | 188 ++++++++++++++
 6 files changed, 854 insertions(+), 1 deletion(-)
 create mode 100644 Documentation/video4linux/v4l2-framework.txt
 create mode 100644 drivers/media/video/v4l2-device.c
 create mode 100644 drivers/media/video/v4l2-subdev.c
 create mode 100644 include/media/v4l2-device.h
 create mode 100644 include/media/v4l2-subdev.h

(limited to 'include')

diff --git a/Documentation/video4linux/v4l2-framework.txt b/Documentation/video4linux/v4l2-framework.txt
new file mode 100644
index 000000000000..60eaf54e7eff
--- /dev/null
+++ b/Documentation/video4linux/v4l2-framework.txt
@@ -0,0 +1,362 @@
+Overview of the V4L2 driver framework
+=====================================
+
+This text documents the various structures provided by the V4L2 framework and
+their relationships.
+
+
+Introduction
+------------
+
+The V4L2 drivers tend to be very complex due to the complexity of the
+hardware: most devices have multiple ICs, export multiple device nodes in
+/dev, and create also non-V4L2 devices such as DVB, ALSA, FB, I2C and input
+(IR) devices.
+
+Especially the fact that V4L2 drivers have to setup supporting ICs to
+do audio/video muxing/encoding/decoding makes it more complex than most.
+Usually these ICs are connected to the main bridge driver through one or
+more I2C busses, but other busses can also be used. Such devices are
+called 'sub-devices'.
+
+For a long time the framework was limited to the video_device struct for
+creating V4L device nodes and video_buf for handling the video buffers
+(note that this document does not discuss the video_buf framework).
+
+This meant that all drivers had to do the setup of device instances and
+connecting to sub-devices themselves. Some of this is quite complicated
+to do right and many drivers never did do it correctly.
+
+There is also a lot of common code that could never be refactored due to
+the lack of a framework.
+
+So this framework sets up the basic building blocks that all drivers
+need and this same framework should make it much easier to refactor
+common code into utility functions shared by all drivers.
+
+
+Structure of a driver
+---------------------
+
+All drivers have the following structure:
+
+1) A struct for each device instance containing the device state.
+
+2) A way of initializing and commanding sub-devices (if any).
+
+3) Creating V4L2 device nodes (/dev/videoX, /dev/vbiX, /dev/radioX and
+   /dev/vtxX) and keeping track of device-node specific data.
+
+4) Filehandle-specific structs containing per-filehandle data.
+
+This is a rough schematic of how it all relates:
+
+    device instances
+      |
+      +-sub-device instances
+      |
+      \-V4L2 device nodes
+	  |
+	  \-filehandle instances
+
+
+Structure of the framework
+--------------------------
+
+The framework closely resembles the driver structure: it has a v4l2_device
+struct for the device instance data, a v4l2_subdev struct to refer to
+sub-device instances, the video_device struct stores V4L2 device node data
+and in the future a v4l2_fh struct will keep track of filehandle instances
+(this is not yet implemented).
+
+
+struct v4l2_device
+------------------
+
+Each device instance is represented by a struct v4l2_device (v4l2-device.h).
+Very simple devices can just allocate this struct, but most of the time you
+would embed this struct inside a larger struct.
+
+You must register the device instance:
+
+	v4l2_device_register(struct device *dev, struct v4l2_device *v4l2_dev);
+
+Registration will initialize the v4l2_device struct and link dev->driver_data
+to v4l2_dev. Registration will also set v4l2_dev->name to a value derived from
+dev (driver name followed by the bus_id, to be precise). You may change the
+name after registration if you want.
+
+You unregister with:
+
+	v4l2_device_unregister(struct v4l2_device *v4l2_dev);
+
+Unregistering will also automatically unregister all subdevs from the device.
+
+Sometimes you need to iterate over all devices registered by a specific
+driver. This is usually the case if multiple device drivers use the same
+hardware. E.g. the ivtvfb driver is a framebuffer driver that uses the ivtv
+hardware. The same is true for alsa drivers for example.
+
+You can iterate over all registered devices as follows:
+
+static int callback(struct device *dev, void *p)
+{
+	struct v4l2_device *v4l2_dev = dev_get_drvdata(dev);
+
+	/* test if this device was inited */
+	if (v4l2_dev == NULL)
+		return 0;
+	...
+	return 0;
+}
+
+int iterate(void *p)
+{
+	struct device_driver *drv;
+	int err;
+
+	/* Find driver 'ivtv' on the PCI bus.
+	   pci_bus_type is a global. For USB busses use usb_bus_type. */
+	drv = driver_find("ivtv", &pci_bus_type);
+	/* iterate over all ivtv device instances */
+	err = driver_for_each_device(drv, NULL, p, callback);
+	put_driver(drv);
+	return err;
+}
+
+Sometimes you need to keep a running counter of the device instance. This is
+commonly used to map a device instance to an index of a module option array.
+
+The recommended approach is as follows:
+
+static atomic_t drv_instance = ATOMIC_INIT(0);
+
+static int __devinit drv_probe(struct pci_dev *dev,
+				const struct pci_device_id *pci_id)
+{
+	...
+	state->instance = atomic_inc_return(&drv_instance) - 1;
+}
+
+
+struct v4l2_subdev
+------------------
+
+Many drivers need to communicate with sub-devices. These devices can do all
+sort of tasks, but most commonly they handle audio and/or video muxing,
+encoding or decoding. For webcams common sub-devices are sensors and camera
+controllers.
+
+Usually these are I2C devices, but not necessarily. In order to provide the
+driver with a consistent interface to these sub-devices the v4l2_subdev struct
+(v4l2-subdev.h) was created.
+
+Each sub-device driver must have a v4l2_subdev struct. This struct can be
+stand-alone for simple sub-devices or it might be embedded in a larger struct
+if more state information needs to be stored. Usually there is a low-level
+device struct (e.g. i2c_client) that contains the device data as setup
+by the kernel. It is recommended to store that pointer in the private
+data of v4l2_subdev using v4l2_set_subdevdata(). That makes it easy to go
+from a v4l2_subdev to the actual low-level bus-specific device data.
+
+You also need a way to go from the low-level struct to v4l2_subdev. For the
+common i2c_client struct the i2c_set_clientdata() call is used to store a
+v4l2_subdev pointer, for other busses you may have to use other methods.
+
+From the bridge driver perspective you load the sub-device module and somehow
+obtain the v4l2_subdev pointer. For i2c devices this is easy: you call
+i2c_get_clientdata(). For other busses something similar needs to be done.
+Helper functions exists for sub-devices on an I2C bus that do most of this
+tricky work for you.
+
+Each v4l2_subdev contains function pointers that sub-device drivers can
+implement (or leave NULL if it is not applicable). Since sub-devices can do
+so many different things and you do not want to end up with a huge ops struct
+of which only a handful of ops are commonly implemented, the function pointers
+are sorted according to category and each category has its own ops struct.
+
+The top-level ops struct contains pointers to the category ops structs, which
+may be NULL if the subdev driver does not support anything from that category.
+
+It looks like this:
+
+struct v4l2_subdev_core_ops {
+	int (*g_chip_ident)(struct v4l2_subdev *sd, struct v4l2_chip_ident *chip);
+	int (*log_status)(struct v4l2_subdev *sd);
+	int (*init)(struct v4l2_subdev *sd, u32 val);
+	...
+};
+
+struct v4l2_subdev_tuner_ops {
+	...
+};
+
+struct v4l2_subdev_audio_ops {
+	...
+};
+
+struct v4l2_subdev_video_ops {
+	...
+};
+
+struct v4l2_subdev_ops {
+	const struct v4l2_subdev_core_ops  *core;
+	const struct v4l2_subdev_tuner_ops *tuner;
+	const struct v4l2_subdev_audio_ops *audio;
+	const struct v4l2_subdev_video_ops *video;
+};
+
+The core ops are common to all subdevs, the other categories are implemented
+depending on the sub-device. E.g. a video device is unlikely to support the
+audio ops and vice versa.
+
+This setup limits the number of function pointers while still making it easy
+to add new ops and categories.
+
+A sub-device driver initializes the v4l2_subdev struct using:
+
+	v4l2_subdev_init(subdev, &ops);
+
+Afterwards you need to initialize subdev->name with a unique name and set the
+module owner. This is done for you if you use the i2c helper functions.
+
+A device (bridge) driver needs to register the v4l2_subdev with the
+v4l2_device:
+
+	int err = v4l2_device_register_subdev(device, subdev);
+
+This can fail if the subdev module disappeared before it could be registered.
+After this function was called successfully the subdev->dev field points to
+the v4l2_device.
+
+You can unregister a sub-device using:
+
+	v4l2_device_unregister_subdev(subdev);
+
+Afterwards the subdev module can be unloaded and subdev->dev == NULL.
+
+You can call an ops function either directly:
+
+	err = subdev->ops->core->g_chip_ident(subdev, &chip);
+
+but it is better and easier to use this macro:
+
+	err = v4l2_subdev_call(subdev, core, g_chip_ident, &chip);
+
+The macro will to the right NULL pointer checks and returns -ENODEV if subdev
+is NULL, -ENOIOCTLCMD if either subdev->core or subdev->core->g_chip_ident is
+NULL, or the actual result of the subdev->ops->core->g_chip_ident ops.
+
+It is also possible to call all or a subset of the sub-devices:
+
+	v4l2_device_call_all(dev, 0, core, g_chip_ident, &chip);
+
+Any subdev that does not support this ops is skipped and error results are
+ignored. If you want to check for errors use this:
+
+	err = v4l2_device_call_until_err(dev, 0, core, g_chip_ident, &chip);
+
+Any error except -ENOIOCTLCMD will exit the loop with that error. If no
+errors (except -ENOIOCTLCMD) occured, then 0 is returned.
+
+The second argument to both calls is a group ID. If 0, then all subdevs are
+called. If non-zero, then only those whose group ID match that value will
+be called. Before a bridge driver registers a subdev it can set subdev->grp_id
+to whatever value it wants (it's 0 by default). This value is owned by the
+bridge driver and the sub-device driver will never modify or use it.
+
+The group ID gives the bridge driver more control how callbacks are called.
+For example, there may be multiple audio chips on a board, each capable of
+changing the volume. But usually only one will actually be used when the
+user want to change the volume. You can set the group ID for that subdev to
+e.g. AUDIO_CONTROLLER and specify that as the group ID value when calling
+v4l2_device_call_all(). That ensures that it will only go to the subdev
+that needs it.
+
+The advantage of using v4l2_subdev is that it is a generic struct and does
+not contain any knowledge about the underlying hardware. So a driver might
+contain several subdevs that use an I2C bus, but also a subdev that is
+controlled through GPIO pins. This distinction is only relevant when setting
+up the device, but once the subdev is registered it is completely transparent.
+
+
+I2C sub-device drivers
+----------------------
+
+Since these drivers are so common, special helper functions are available to
+ease the use of these drivers (v4l2-common.h).
+
+The recommended method of adding v4l2_subdev support to an I2C driver is to
+embed the v4l2_subdev struct into the state struct that is created for each
+I2C device instance. Very simple devices have no state struct and in that case
+you can just create a v4l2_subdev directly.
+
+A typical state struct would look like this (where 'chipname' is replaced by
+the name of the chip):
+
+struct chipname_state {
+	struct v4l2_subdev sd;
+	...  /* additional state fields */
+};
+
+Initialize the v4l2_subdev struct as follows:
+
+	v4l2_i2c_subdev_init(&state->sd, client, subdev_ops);
+
+This function will fill in all the fields of v4l2_subdev and ensure that the
+v4l2_subdev and i2c_client both point to one another.
+
+You should also add a helper inline function to go from a v4l2_subdev pointer
+to a chipname_state struct:
+
+static inline struct chipname_state *to_state(struct v4l2_subdev *sd)
+{
+	return container_of(sd, struct chipname_state, sd);
+}
+
+Use this to go from the v4l2_subdev struct to the i2c_client struct:
+
+	struct i2c_client *client = v4l2_get_subdevdata(sd);
+
+And this to go from an i2c_client to a v4l2_subdev struct:
+
+	struct v4l2_subdev *sd = i2c_get_clientdata(client);
+
+Finally you need to make a command function to make driver->command()
+call the right subdev_ops functions:
+
+static int subdev_command(struct i2c_client *client, unsigned cmd, void *arg)
+{
+	return v4l2_subdev_command(i2c_get_clientdata(client), cmd, arg);
+}
+
+If driver->command is never used then you can leave this out. Eventually the
+driver->command usage should be removed from v4l.
+
+Make sure to call v4l2_device_unregister_subdev(sd) when the remove() callback
+is called. This will unregister the sub-device from the bridge driver. It is
+safe to call this even if the sub-device was never registered.
+
+
+The bridge driver also has some helper functions it can use:
+
+struct v4l2_subdev *sd = v4l2_i2c_new_subdev(adapter, "module_foo", "chipid", 0x36);
+
+This loads the given module (can be NULL if no module needs to be loaded) and
+calls i2c_new_device() with the given i2c_adapter and chip/address arguments.
+If all goes well, then it registers the subdev with the v4l2_device. It gets
+the v4l2_device by calling i2c_get_adapdata(adapter), so you should make sure
+that adapdata is set to v4l2_device when you setup the i2c_adapter in your
+driver.
+
+You can also use v4l2_i2c_new_probed_subdev() which is very similar to
+v4l2_i2c_new_subdev(), except that it has an array of possible I2C addresses
+that it should probe. Internally it calls i2c_new_probed_device().
+
+Both functions return NULL if something went wrong.
+
+
+struct video_device
+-------------------
+
+Not yet documented.
diff --git a/drivers/media/video/Makefile b/drivers/media/video/Makefile
index 492ab3dce71b..84a2be0cbbe7 100644
--- a/drivers/media/video/Makefile
+++ b/drivers/media/video/Makefile
@@ -10,7 +10,7 @@ stkwebcam-objs	:=	stk-webcam.o stk-sensor.o
 
 omap2cam-objs	:=	omap24xxcam.o omap24xxcam-dma.o
 
-videodev-objs	:=	v4l2-dev.o v4l2-ioctl.o
+videodev-objs	:=	v4l2-dev.o v4l2-ioctl.o v4l2-device.o v4l2-subdev.o
 
 obj-$(CONFIG_VIDEO_DEV) += videodev.o v4l2-compat-ioctl32.o v4l2-int-device.o
 
diff --git a/drivers/media/video/v4l2-device.c b/drivers/media/video/v4l2-device.c
new file mode 100644
index 000000000000..9eefde031597
--- /dev/null
+++ b/drivers/media/video/v4l2-device.c
@@ -0,0 +1,86 @@
+/*
+    V4L2 device support.
+
+    Copyright (C) 2008  Hans Verkuil <hverkuil@xs4all.nl>
+
+    This program is free software; you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation; either version 2 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with this program; if not, write to the Free Software
+    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ */
+
+#include <linux/types.h>
+#include <linux/ioctl.h>
+#include <linux/i2c.h>
+#include <linux/videodev2.h>
+#include <media/v4l2-device.h>
+
+int v4l2_device_register(struct device *dev, struct v4l2_device *v4l2_dev)
+{
+	if (dev == NULL || v4l2_dev == NULL)
+		return -EINVAL;
+	/* Warn if we apparently re-register a device */
+	WARN_ON(dev_get_drvdata(dev));
+	INIT_LIST_HEAD(&v4l2_dev->subdevs);
+	spin_lock_init(&v4l2_dev->lock);
+	v4l2_dev->dev = dev;
+	snprintf(v4l2_dev->name, sizeof(v4l2_dev->name), "%s %s",
+			dev->driver->name, dev->bus_id);
+	dev_set_drvdata(dev, v4l2_dev);
+	return 0;
+}
+EXPORT_SYMBOL_GPL(v4l2_device_register);
+
+void v4l2_device_unregister(struct v4l2_device *v4l2_dev)
+{
+	struct v4l2_subdev *sd, *next;
+
+	if (v4l2_dev == NULL || v4l2_dev->dev == NULL)
+		return;
+	dev_set_drvdata(v4l2_dev->dev, NULL);
+	/* unregister subdevs */
+	list_for_each_entry_safe(sd, next, &v4l2_dev->subdevs, list)
+		v4l2_device_unregister_subdev(sd);
+
+	v4l2_dev->dev = NULL;
+}
+EXPORT_SYMBOL_GPL(v4l2_device_unregister);
+
+int v4l2_device_register_subdev(struct v4l2_device *dev, struct v4l2_subdev *sd)
+{
+	/* Check for valid input */
+	if (dev == NULL || sd == NULL || !sd->name[0])
+		return -EINVAL;
+	/* Warn if we apparently re-register a subdev */
+	WARN_ON(sd->dev);
+	if (!try_module_get(sd->owner))
+		return -ENODEV;
+	sd->dev = dev;
+	spin_lock(&dev->lock);
+	list_add_tail(&sd->list, &dev->subdevs);
+	spin_unlock(&dev->lock);
+	return 0;
+}
+EXPORT_SYMBOL_GPL(v4l2_device_register_subdev);
+
+void v4l2_device_unregister_subdev(struct v4l2_subdev *sd)
+{
+	/* return if it isn't registered */
+	if (sd == NULL || sd->dev == NULL)
+		return;
+	spin_lock(&sd->dev->lock);
+	list_del(&sd->list);
+	spin_unlock(&sd->dev->lock);
+	sd->dev = NULL;
+	module_put(sd->owner);
+}
+EXPORT_SYMBOL_GPL(v4l2_device_unregister_subdev);
diff --git a/drivers/media/video/v4l2-subdev.c b/drivers/media/video/v4l2-subdev.c
new file mode 100644
index 000000000000..fe1f01c970ac
--- /dev/null
+++ b/drivers/media/video/v4l2-subdev.c
@@ -0,0 +1,108 @@
+/*
+    V4L2 sub-device support.
+
+    Copyright (C) 2008  Hans Verkuil <hverkuil@xs4all.nl>
+
+    This program is free software; you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation; either version 2 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with this program; if not, write to the Free Software
+    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ */
+
+#include <linux/types.h>
+#include <linux/ioctl.h>
+#include <linux/i2c.h>
+#include <linux/videodev2.h>
+#include <media/v4l2-subdev.h>
+
+int v4l2_subdev_command(struct v4l2_subdev *sd, unsigned cmd, void *arg)
+{
+	switch (cmd) {
+	case VIDIOC_QUERYCTRL:
+		return v4l2_subdev_call(sd, core, querymenu, arg);
+	case VIDIOC_G_CTRL:
+		return v4l2_subdev_call(sd, core, g_ctrl, arg);
+	case VIDIOC_S_CTRL:
+		return v4l2_subdev_call(sd, core, s_ctrl, arg);
+	case VIDIOC_QUERYMENU:
+		return v4l2_subdev_call(sd, core, queryctrl, arg);
+	case VIDIOC_LOG_STATUS:
+		return v4l2_subdev_call(sd, core, log_status);
+	case VIDIOC_G_CHIP_IDENT:
+		return v4l2_subdev_call(sd, core, g_chip_ident, arg);
+	case VIDIOC_INT_S_STANDBY:
+		return v4l2_subdev_call(sd, core, s_standby, *(u32 *)arg);
+	case VIDIOC_INT_RESET:
+		return v4l2_subdev_call(sd, core, reset, *(u32 *)arg);
+	case VIDIOC_INT_S_GPIO:
+		return v4l2_subdev_call(sd, core, s_gpio, *(u32 *)arg);
+	case VIDIOC_INT_INIT:
+		return v4l2_subdev_call(sd, core, init, *(u32 *)arg);
+#ifdef CONFIG_VIDEO_ADV_DEBUG
+	case VIDIOC_DBG_G_REGISTER:
+		return v4l2_subdev_call(sd, core, g_register, arg);
+	case VIDIOC_DBG_S_REGISTER:
+		return v4l2_subdev_call(sd, core, s_register, arg);
+#endif
+
+	case VIDIOC_INT_S_TUNER_MODE:
+		return v4l2_subdev_call(sd, tuner, s_mode, *(enum v4l2_tuner_type *)arg);
+	case AUDC_SET_RADIO:
+		return v4l2_subdev_call(sd, tuner, s_radio);
+	case VIDIOC_S_TUNER:
+		return v4l2_subdev_call(sd, tuner, s_tuner, arg);
+	case VIDIOC_G_TUNER:
+		return v4l2_subdev_call(sd, tuner, g_tuner, arg);
+	case VIDIOC_S_STD:
+		return v4l2_subdev_call(sd, tuner, s_std, *(v4l2_std_id *)arg);
+	case VIDIOC_S_FREQUENCY:
+		return v4l2_subdev_call(sd, tuner, s_frequency, arg);
+	case VIDIOC_G_FREQUENCY:
+		return v4l2_subdev_call(sd, tuner, g_frequency, arg);
+	case TUNER_SET_TYPE_ADDR:
+		return v4l2_subdev_call(sd, tuner, s_type_addr, arg);
+	case TUNER_SET_CONFIG:
+		return v4l2_subdev_call(sd, tuner, s_config, arg);
+
+	case VIDIOC_INT_AUDIO_CLOCK_FREQ:
+		return v4l2_subdev_call(sd, audio, s_clock_freq, *(u32 *)arg);
+	case VIDIOC_INT_S_AUDIO_ROUTING:
+		return v4l2_subdev_call(sd, audio, s_routing, arg);
+	case VIDIOC_INT_I2S_CLOCK_FREQ:
+		return v4l2_subdev_call(sd, audio, s_i2s_clock_freq, *(u32 *)arg);
+
+	case VIDIOC_INT_S_VIDEO_ROUTING:
+		return v4l2_subdev_call(sd, video, s_routing, arg);
+	case VIDIOC_INT_S_CRYSTAL_FREQ:
+		return v4l2_subdev_call(sd, video, s_crystal_freq, arg);
+	case VIDIOC_INT_DECODE_VBI_LINE:
+		return v4l2_subdev_call(sd, video, decode_vbi_line, arg);
+	case VIDIOC_INT_S_VBI_DATA:
+		return v4l2_subdev_call(sd, video, s_vbi_data, arg);
+	case VIDIOC_INT_G_VBI_DATA:
+		return v4l2_subdev_call(sd, video, g_vbi_data, arg);
+	case VIDIOC_S_FMT:
+		return v4l2_subdev_call(sd, video, s_fmt, arg);
+	case VIDIOC_G_FMT:
+		return v4l2_subdev_call(sd, video, g_fmt, arg);
+	case VIDIOC_INT_S_STD_OUTPUT:
+		return v4l2_subdev_call(sd, video, s_std_output, *(v4l2_std_id *)arg);
+	case VIDIOC_STREAMON:
+		return v4l2_subdev_call(sd, video, s_stream, 1);
+	case VIDIOC_STREAMOFF:
+		return v4l2_subdev_call(sd, video, s_stream, 0);
+
+	default:
+		return v4l2_subdev_call(sd, core, ioctl, cmd, arg);
+	}
+}
+EXPORT_SYMBOL_GPL(v4l2_subdev_command);
diff --git a/include/media/v4l2-device.h b/include/media/v4l2-device.h
new file mode 100644
index 000000000000..97b283a04289
--- /dev/null
+++ b/include/media/v4l2-device.h
@@ -0,0 +1,109 @@
+/*
+    V4L2 device support header.
+
+    Copyright (C) 2008  Hans Verkuil <hverkuil@xs4all.nl>
+
+    This program is free software; you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation; either version 2 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with this program; if not, write to the Free Software
+    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ */
+
+#ifndef _V4L2_DEVICE_H
+#define _V4L2_DEVICE_H
+
+#include <media/v4l2-subdev.h>
+
+/* Each instance of a V4L2 device should create the v4l2_device struct,
+   either stand-alone or embedded in a larger struct.
+
+   It allows easy access to sub-devices (see v4l2-subdev.h) and provides
+   basic V4L2 device-level support.
+ */
+
+#define V4L2_DEVICE_NAME_SIZE (BUS_ID_SIZE + 16)
+
+struct v4l2_device {
+	/* dev->driver_data points to this struct */
+	struct device *dev;
+	/* used to keep track of the registered subdevs */
+	struct list_head subdevs;
+	/* lock this struct; can be used by the driver as well if this
+	   struct is embedded into a larger struct. */
+	spinlock_t lock;
+	/* unique device name, by default the driver name + bus ID */
+	char name[V4L2_DEVICE_NAME_SIZE];
+};
+
+/* Initialize v4l2_dev and make dev->driver_data point to v4l2_dev */
+int __must_check v4l2_device_register(struct device *dev, struct v4l2_device *v4l2_dev);
+/* Set v4l2_dev->dev->driver_data to NULL and unregister all sub-devices */
+void v4l2_device_unregister(struct v4l2_device *v4l2_dev);
+
+/* Register a subdev with a v4l2 device. While registered the subdev module
+   is marked as in-use. An error is returned if the module is no longer
+   loaded when you attempt to register it. */
+int __must_check v4l2_device_register_subdev(struct v4l2_device *dev, struct v4l2_subdev *sd);
+/* Unregister a subdev with a v4l2 device. Can also be called if the subdev
+   wasn't registered. In that case it will do nothing. */
+void v4l2_device_unregister_subdev(struct v4l2_subdev *sd);
+
+/* Iterate over all subdevs. */
+#define v4l2_device_for_each_subdev(sd, dev)				\
+	list_for_each_entry(sd, &(dev)->subdevs, list)
+
+/* Call the specified callback for all subdevs matching the condition.
+   Ignore any errors. Note that you cannot add or delete a subdev
+   while walking the subdevs list. */
+#define __v4l2_device_call_subdevs(dev, cond, o, f, args...) 		\
+	do { 								\
+		struct v4l2_subdev *sd; 				\
+									\
+		list_for_each_entry(sd, &(dev)->subdevs, list)   	\
+			if ((cond) && sd->ops->o && sd->ops->o->f) 	\
+				sd->ops->o->f(sd , ##args); 		\
+	} while (0)
+
+/* Call the specified callback for all subdevs matching the condition.
+   If the callback returns an error other than 0 or -ENOIOCTLCMD, then
+   return with that error code. Note that you cannot add or delete a
+   subdev while walking the subdevs list. */
+#define __v4l2_device_call_subdevs_until_err(dev, cond, o, f, args...)  \
+({ 									\
+	struct v4l2_subdev *sd; 					\
+	int err = 0; 							\
+									\
+	list_for_each_entry(sd, &(dev)->subdevs, list) { 		\
+		if ((cond) && sd->ops->o && sd->ops->o->f) 		\
+			err = sd->ops->o->f(sd , ##args); 		\
+		if (err && err != -ENOIOCTLCMD)				\
+			break; 						\
+	} 								\
+	(err == -ENOIOCTLCMD) ? 0 : err; 				\
+})
+
+/* Call the specified callback for all subdevs matching grp_id (if 0, then
+   match them all). Ignore any errors. Note that you cannot add or delete
+   a subdev while walking the subdevs list. */
+#define v4l2_device_call_all(dev, grp_id, o, f, args...) 		\
+	__v4l2_device_call_subdevs(dev, 				\
+			!(grp_id) || sd->grp_id == (grp_id), o, f , ##args)
+
+/* Call the specified callback for all subdevs matching grp_id (if 0, then
+   match them all). If the callback returns an error other than 0 or
+   -ENOIOCTLCMD, then return with that error code. Note that you cannot
+   add or delete a subdev while walking the subdevs list. */
+#define v4l2_device_call_until_err(dev, grp_id, o, f, args...) 		\
+	__v4l2_device_call_subdevs_until_err(dev,			\
+		       !(grp_id) || sd->grp_id == (grp_id), o, f , ##args)
+
+#endif
diff --git a/include/media/v4l2-subdev.h b/include/media/v4l2-subdev.h
new file mode 100644
index 000000000000..bc9e0fbf2822
--- /dev/null
+++ b/include/media/v4l2-subdev.h
@@ -0,0 +1,188 @@
+/*
+    V4L2 sub-device support header.
+
+    Copyright (C) 2008  Hans Verkuil <hverkuil@xs4all.nl>
+
+    This program is free software; you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation; either version 2 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with this program; if not, write to the Free Software
+    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ */
+
+#ifndef _V4L2_SUBDEV_H
+#define _V4L2_SUBDEV_H
+
+#include <media/v4l2-common.h>
+
+struct v4l2_device;
+struct v4l2_subdev;
+struct tuner_setup;
+
+/* Sub-devices are devices that are connected somehow to the main bridge
+   device. These devices are usually audio/video muxers/encoders/decoders or
+   sensors and webcam controllers.
+
+   Usually these devices are controlled through an i2c bus, but other busses
+   may also be used.
+
+   The v4l2_subdev struct provides a way of accessing these devices in a
+   generic manner. Most operations that these sub-devices support fall in
+   a few categories: core ops, audio ops, video ops and tuner ops.
+
+   More categories can be added if needed, although this should remain a
+   limited set (no more than approx. 8 categories).
+
+   Each category has its own set of ops that subdev drivers can implement.
+
+   A subdev driver can leave the pointer to the category ops NULL if
+   it does not implement them (e.g. an audio subdev will generally not
+   implement the video category ops). The exception is the core category:
+   this must always be present.
+
+   These ops are all used internally so it is no problem to change, remove
+   or add ops or move ops from one to another category. Currently these
+   ops are based on the original ioctls, but since ops are not limited to
+   one argument there is room for improvement here once all i2c subdev
+   drivers are converted to use these ops.
+ */
+
+/* Core ops: it is highly recommended to implement at least these ops:
+
+   g_chip_ident
+   log_status
+   g_register
+   s_register
+
+   This provides basic debugging support.
+
+   The ioctl ops is meant for generic ioctl-like commands. Depending on
+   the use-case it might be better to use subdev-specific ops (currently
+   not yet implemented) since ops provide proper type-checking.
+ */
+struct v4l2_subdev_core_ops {
+	int (*g_chip_ident)(struct v4l2_subdev *sd, struct v4l2_chip_ident *chip);
+	int (*log_status)(struct v4l2_subdev *sd);
+	int (*init)(struct v4l2_subdev *sd, u32 val);
+	int (*s_standby)(struct v4l2_subdev *sd, u32 standby);
+	int (*reset)(struct v4l2_subdev *sd, u32 val);
+	int (*s_gpio)(struct v4l2_subdev *sd, u32 val);
+	int (*queryctrl)(struct v4l2_subdev *sd, struct v4l2_queryctrl *qc);
+	int (*g_ctrl)(struct v4l2_subdev *sd, struct v4l2_control *ctrl);
+	int (*s_ctrl)(struct v4l2_subdev *sd, struct v4l2_control *ctrl);
+	int (*querymenu)(struct v4l2_subdev *sd, struct v4l2_querymenu *qm);
+	int (*ioctl)(struct v4l2_subdev *sd, int cmd, void *arg);
+#ifdef CONFIG_VIDEO_ADV_DEBUG
+	int (*g_register)(struct v4l2_subdev *sd, struct v4l2_register *reg);
+	int (*s_register)(struct v4l2_subdev *sd, struct v4l2_register *reg);
+#endif
+};
+
+struct v4l2_subdev_tuner_ops {
+	int (*s_mode)(struct v4l2_subdev *sd, enum v4l2_tuner_type);
+	int (*s_radio)(struct v4l2_subdev *sd);
+	int (*s_frequency)(struct v4l2_subdev *sd, struct v4l2_frequency *freq);
+	int (*g_frequency)(struct v4l2_subdev *sd, struct v4l2_frequency *freq);
+	int (*g_tuner)(struct v4l2_subdev *sd, struct v4l2_tuner *vt);
+	int (*s_tuner)(struct v4l2_subdev *sd, struct v4l2_tuner *vt);
+	int (*s_std)(struct v4l2_subdev *sd, v4l2_std_id norm);
+	int (*s_type_addr)(struct v4l2_subdev *sd, struct tuner_setup *type);
+	int (*s_config)(struct v4l2_subdev *sd, const struct v4l2_priv_tun_config *config);
+};
+
+struct v4l2_subdev_audio_ops {
+	int (*s_clock_freq)(struct v4l2_subdev *sd, u32 freq);
+	int (*s_i2s_clock_freq)(struct v4l2_subdev *sd, u32 freq);
+	int (*s_routing)(struct v4l2_subdev *sd, const struct v4l2_routing *route);
+};
+
+struct v4l2_subdev_video_ops {
+	int (*s_routing)(struct v4l2_subdev *sd, const struct v4l2_routing *route);
+	int (*s_crystal_freq)(struct v4l2_subdev *sd, struct v4l2_crystal_freq *freq);
+	int (*decode_vbi_line)(struct v4l2_subdev *sd, struct v4l2_decode_vbi_line *vbi_line);
+	int (*s_vbi_data)(struct v4l2_subdev *sd, const struct v4l2_sliced_vbi_data *vbi_data);
+	int (*g_vbi_data)(struct v4l2_subdev *sd, struct v4l2_sliced_vbi_data *vbi_data);
+	int (*s_std_output)(struct v4l2_subdev *sd, v4l2_std_id std);
+	int (*s_stream)(struct v4l2_subdev *sd, int enable);
+	int (*s_fmt)(struct v4l2_subdev *sd, struct v4l2_format *fmt);
+	int (*g_fmt)(struct v4l2_subdev *sd, struct v4l2_format *fmt);
+};
+
+struct v4l2_subdev_ops {
+	const struct v4l2_subdev_core_ops  *core;
+	const struct v4l2_subdev_tuner_ops *tuner;
+	const struct v4l2_subdev_audio_ops *audio;
+	const struct v4l2_subdev_video_ops *video;
+};
+
+#define V4L2_SUBDEV_NAME_SIZE 32
+
+/* Each instance of a subdev driver should create this struct, either
+   stand-alone or embedded in a larger struct.
+ */
+struct v4l2_subdev {
+	struct list_head list;
+	struct module *owner;
+	struct v4l2_device *dev;
+	const struct v4l2_subdev_ops *ops;
+	/* name must be unique */
+	char name[V4L2_SUBDEV_NAME_SIZE];
+	/* can be used to group similar subdevs, value is driver-specific */
+	u32 grp_id;
+	/* pointer to private data */
+	void *priv;
+};
+
+static inline void v4l2_set_subdevdata(struct v4l2_subdev *sd, void *p)
+{
+	sd->priv = p;
+}
+
+static inline void *v4l2_get_subdevdata(const struct v4l2_subdev *sd)
+{
+	return sd->priv;
+}
+
+/* Convert an ioctl-type command to the proper v4l2_subdev_ops function call.
+   This is used by subdev modules that can be called by both old-style ioctl
+   commands and through the v4l2_subdev_ops.
+
+   The ioctl API of the subdev driver can call this function to call the
+   right ops based on the ioctl cmd and arg.
+
+   Once all subdev drivers have been converted and all drivers no longer
+   use the ioctl interface, then this function can be removed.
+ */
+int v4l2_subdev_command(struct v4l2_subdev *sd, unsigned cmd, void *arg);
+
+static inline void v4l2_subdev_init(struct v4l2_subdev *sd,
+					const struct v4l2_subdev_ops *ops)
+{
+	INIT_LIST_HEAD(&sd->list);
+	/* ops->core MUST be set */
+	BUG_ON(!ops || !ops->core);
+	sd->ops = ops;
+	sd->dev = NULL;
+	sd->name[0] = '\0';
+	sd->grp_id = 0;
+	sd->priv = NULL;
+}
+
+/* Call an ops of a v4l2_subdev, doing the right checks against
+   NULL pointers.
+
+   Example: err = v4l2_subdev_call(sd, core, g_chip_ident, &chip);
+ */
+#define v4l2_subdev_call(sd, o, f, args...)				\
+	(!(sd) ? -ENODEV : (((sd) && (sd)->ops->o && (sd)->ops->o->f) ?	\
+		(sd)->ops->o->f((sd) , ##args) : -ENOIOCTLCMD))
+
+#endif
-- 
cgit v1.2.3


From dd99120c7165c6873a423977d1eaa41b6e2d1ffc Mon Sep 17 00:00:00 2001
From: Hans Verkuil <hverkuil@xs4all.nl>
Date: Sun, 23 Nov 2008 12:19:45 -0300
Subject: V4L/DVB (9821): v4l2-common: add i2c helper functions

Add helper functions to load i2c sub-devices, integrating them
into the v4l2-framework.

Signed-off-by: Hans Verkuil <hverkuil@xs4all.nl>
Signed-off-by: Mauro Carvalho Chehab <mchehab@redhat.com>
---
 drivers/media/video/v4l2-common.c | 113 ++++++++++++++++++++++++++++++++++++++
 include/media/v4l2-common.h       |  41 ++++++++++++++
 2 files changed, 154 insertions(+)

(limited to 'include')

diff --git a/drivers/media/video/v4l2-common.c b/drivers/media/video/v4l2-common.c
index fdc8871f631e..26f3254337da 100644
--- a/drivers/media/video/v4l2-common.c
+++ b/drivers/media/video/v4l2-common.c
@@ -58,6 +58,7 @@
 #include <asm/div64.h>
 #define __OLD_VIDIOC_ /* To allow fixing old calls*/
 #include <media/v4l2-common.h>
+#include <media/v4l2-device.h>
 #include <media/v4l2-chip-ident.h>
 
 #include <linux/videodev2.h>
@@ -801,4 +802,116 @@ int v4l2_i2c_attach(struct i2c_adapter *adapter, int address, struct i2c_driver
 	return err != -ENOMEM ? 0 : err;
 }
 EXPORT_SYMBOL(v4l2_i2c_attach);
+
+void v4l2_i2c_subdev_init(struct v4l2_subdev *sd, struct i2c_client *client,
+		const struct v4l2_subdev_ops *ops)
+{
+	v4l2_subdev_init(sd, ops);
+	/* the owner is the same as the i2c_client's driver owner */
+	sd->owner = client->driver->driver.owner;
+	/* i2c_client and v4l2_subdev point to one another */
+	v4l2_set_subdevdata(sd, client);
+	i2c_set_clientdata(client, sd);
+	/* initialize name */
+	snprintf(sd->name, sizeof(sd->name), "%s %d-%04x",
+		client->driver->driver.name, i2c_adapter_id(client->adapter),
+		client->addr);
+}
+EXPORT_SYMBOL_GPL(v4l2_i2c_subdev_init);
+
+
+
+/* Load an i2c sub-device. It assumes that i2c_get_adapdata(adapter)
+   returns the v4l2_device and that i2c_get_clientdata(client)
+   returns the v4l2_subdev. */
+struct v4l2_subdev *v4l2_i2c_new_subdev(struct i2c_adapter *adapter,
+		const char *module_name, const char *client_type, u8 addr)
+{
+	struct v4l2_device *dev = i2c_get_adapdata(adapter);
+	struct v4l2_subdev *sd = NULL;
+	struct i2c_client *client;
+	struct i2c_board_info info;
+
+	BUG_ON(!dev);
+#ifdef MODULE
+	if (module_name)
+		request_module(module_name);
+#endif
+	/* Setup the i2c board info with the device type and
+	   the device address. */
+	memset(&info, 0, sizeof(info));
+	strlcpy(info.type, client_type, sizeof(info.type));
+	info.addr = addr;
+
+	/* Create the i2c client */
+	client = i2c_new_device(adapter, &info);
+	/* Note: it is possible in the future that
+	   c->driver is NULL if the driver is still being loaded.
+	   We need better support from the kernel so that we
+	   can easily wait for the load to finish. */
+	if (client == NULL || client->driver == NULL)
+		return NULL;
+
+	/* Lock the module so we can safely get the v4l2_subdev pointer */
+	if (!try_module_get(client->driver->driver.owner))
+		return NULL;
+	sd = i2c_get_clientdata(client);
+
+	/* Register with the v4l2_device which increases the module's
+	   use count as well. */
+	if (v4l2_device_register_subdev(dev, sd))
+		sd = NULL;
+	/* Decrease the module use count to match the first try_module_get. */
+	module_put(client->driver->driver.owner);
+	return sd;
+
+}
+EXPORT_SYMBOL_GPL(v4l2_i2c_new_subdev);
+
+/* Probe and load an i2c sub-device. It assumes that i2c_get_adapdata(adapter)
+   returns the v4l2_device and that i2c_get_clientdata(client)
+   returns the v4l2_subdev. */
+struct v4l2_subdev *v4l2_i2c_new_probed_subdev(struct i2c_adapter *adapter,
+	const char *module_name, const char *client_type,
+	const unsigned short *addrs)
+{
+	struct v4l2_device *dev = i2c_get_adapdata(adapter);
+	struct v4l2_subdev *sd = NULL;
+	struct i2c_client *client = NULL;
+	struct i2c_board_info info;
+
+	BUG_ON(!dev);
+#ifdef MODULE
+	if (module_name)
+		request_module(module_name);
+#endif
+	/* Setup the i2c board info with the device type and
+	   the device address. */
+	memset(&info, 0, sizeof(info));
+	strlcpy(info.type, client_type, sizeof(info.type));
+
+	/* Probe and create the i2c client */
+	client = i2c_new_probed_device(adapter, &info, addrs);
+	/* Note: it is possible in the future that
+	   c->driver is NULL if the driver is still being loaded.
+	   We need better support from the kernel so that we
+	   can easily wait for the load to finish. */
+	if (client == NULL || client->driver == NULL)
+		return NULL;
+
+	/* Lock the module so we can safely get the v4l2_subdev pointer */
+	if (!try_module_get(client->driver->driver.owner))
+		return NULL;
+	sd = i2c_get_clientdata(client);
+
+	/* Register with the v4l2_device which increases the module's
+	   use count as well. */
+	if (v4l2_device_register_subdev(dev, sd))
+		sd = NULL;
+	/* Decrease the module use count to match the first try_module_get. */
+	module_put(client->driver->driver.owner);
+	return sd;
+}
+EXPORT_SYMBOL_GPL(v4l2_i2c_new_probed_subdev);
+
 #endif
diff --git a/include/media/v4l2-common.h b/include/media/v4l2-common.h
index 2f8719abf5cb..f99c866d8c37 100644
--- a/include/media/v4l2-common.h
+++ b/include/media/v4l2-common.h
@@ -57,6 +57,29 @@
 
 /* ------------------------------------------------------------------------- */
 
+/* These printk constructs can be used with v4l2_device and v4l2_subdev */
+#define v4l2_printk(level, dev, fmt, arg...) \
+	printk(level "%s: " fmt, (dev)->name , ## arg)
+
+#define v4l2_err(dev, fmt, arg...) \
+	v4l2_printk(KERN_ERR, dev, fmt , ## arg)
+
+#define v4l2_warn(dev, fmt, arg...) \
+	v4l2_printk(KERN_WARNING, dev, fmt , ## arg)
+
+#define v4l2_info(dev, fmt, arg...) \
+	v4l2_printk(KERN_INFO, dev, fmt , ## arg)
+
+/* These three macros assume that the debug level is set with a module
+   parameter called 'debug'. */
+#define v4l2_dbg(level, debug, dev, fmt, arg...)			\
+	do { 								\
+		if (debug >= (level))					\
+			v4l2_printk(KERN_DEBUG, dev, fmt , ## arg); 	\
+	} while (0)
+
+/* ------------------------------------------------------------------------- */
+
 /* Priority helper functions */
 
 struct v4l2_prio_state {
@@ -104,11 +127,29 @@ struct i2c_driver;
 struct i2c_adapter;
 struct i2c_client;
 struct i2c_device_id;
+struct v4l2_device;
+struct v4l2_subdev;
+struct v4l2_subdev_ops;
 
 int v4l2_i2c_attach(struct i2c_adapter *adapter, int address, struct i2c_driver *driver,
 		const char *name,
 		int (*probe)(struct i2c_client *, const struct i2c_device_id *));
 
+/* Load an i2c module and return an initialized v4l2_subdev struct.
+   Only call request_module if module_name != NULL.
+   The client_type argument is the name of the chip that's on the adapter. */
+struct v4l2_subdev *v4l2_i2c_new_subdev(struct i2c_adapter *adapter,
+		const char *module_name, const char *client_type, u8 addr);
+/* Probe and load an i2c module and return an initialized v4l2_subdev struct.
+   Only call request_module if module_name != NULL.
+   The client_type argument is the name of the chip that's on the adapter. */
+struct v4l2_subdev *v4l2_i2c_new_probed_subdev(struct i2c_adapter *adapter,
+		const char *module_name, const char *client_type,
+		const unsigned short *addrs);
+/* Initialize an v4l2_subdev with data from an i2c_client struct */
+void v4l2_i2c_subdev_init(struct v4l2_subdev *sd, struct i2c_client *client,
+		const struct v4l2_subdev_ops *ops);
+
 /* ------------------------------------------------------------------------- */
 
 /* Internal ioctls */
-- 
cgit v1.2.3


From 0877258d98154565def498833ccb208234c85084 Mon Sep 17 00:00:00 2001
From: Laurent Pinchart <laurent.pinchart@skynet.be>
Date: Sun, 14 Dec 2008 16:21:16 -0300
Subject: V4L/DVB (9897): v4l2: Add camera zoom controls

The zoom controls move the zoom lens group to a an absolute position, as a
relative displacement or at a given speed until reaching physical device
limits. Positive values move the zoom lens group towards the telephoto
direction, negative values towards the wide-angle direction.

Signed-off-by: Laurent Pinchart <laurent.pinchart@skynet.be>
Signed-off-by: Mauro Carvalho Chehab <mchehab@redhat.com>
---
 include/linux/videodev2.h | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'include')

diff --git a/include/linux/videodev2.h b/include/linux/videodev2.h
index ec311d4616cd..e450a92a622b 100644
--- a/include/linux/videodev2.h
+++ b/include/linux/videodev2.h
@@ -1118,6 +1118,10 @@ enum  v4l2_exposure_auto_type {
 #define V4L2_CID_FOCUS_RELATIVE			(V4L2_CID_CAMERA_CLASS_BASE+11)
 #define V4L2_CID_FOCUS_AUTO			(V4L2_CID_CAMERA_CLASS_BASE+12)
 
+#define V4L2_CID_ZOOM_ABSOLUTE			(V4L2_CID_CAMERA_CLASS_BASE+13)
+#define V4L2_CID_ZOOM_RELATIVE			(V4L2_CID_CAMERA_CLASS_BASE+14)
+#define V4L2_CID_ZOOM_CONTINUOUS		(V4L2_CID_CAMERA_CLASS_BASE+15)
+
 /*
  *	T U N I N G
  */
-- 
cgit v1.2.3


From 046425f8c4ac431db00c09a6d9fba16560b8e5b9 Mon Sep 17 00:00:00 2001
From: Laurent Pinchart <laurent.pinchart@skynet.be>
Date: Sun, 14 Dec 2008 16:22:05 -0300
Subject: V4L/DVB (9898): v4l2: Add privacy control

The privacy control prevents video from being acquired by the camera. A true
value indicates that no image can be captured. Devices that implement the
privacy control must support read access and may support write access.

Signed-off-by: Laurent Pinchart <laurent.pinchart@skynet.be>
Signed-off-by: Mauro Carvalho Chehab <mchehab@redhat.com>
---
 include/linux/videodev2.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include')

diff --git a/include/linux/videodev2.h b/include/linux/videodev2.h
index e450a92a622b..6e6743bd0f92 100644
--- a/include/linux/videodev2.h
+++ b/include/linux/videodev2.h
@@ -1122,6 +1122,8 @@ enum  v4l2_exposure_auto_type {
 #define V4L2_CID_ZOOM_RELATIVE			(V4L2_CID_CAMERA_CLASS_BASE+14)
 #define V4L2_CID_ZOOM_CONTINUOUS		(V4L2_CID_CAMERA_CLASS_BASE+15)
 
+#define V4L2_CID_PRIVACY			(V4L2_CID_CAMERA_CLASS_BASE+16)
+
 /*
  *	T U N I N G
  */
-- 
cgit v1.2.3


From 92f45badbbaccdbc1be25085292a1e258948e221 Mon Sep 17 00:00:00 2001
From: Hans Verkuil <hverkuil@xs4all.nl>
Date: Sun, 21 Dec 2008 10:35:25 -0300
Subject: V4L/DVB (9932): v4l2-compat32: fix 32-64 compatibility module

Added all missing v4l1/2 ioctls and fix several broken conversions.
Partially based on work done by Cody Pisto <cpisto@gmail.com>.

Tested-by: Brandon Jenkins <bcjenkins@tvwhere.com>
Signed-off-by: Hans Verkuil <hverkuil@xs4all.nl>
Signed-off-by: Mauro Carvalho Chehab <mchehab@redhat.com>
---
 drivers/media/video/v4l2-compat-ioctl32.c | 781 ++++++++++++++++++------------
 include/linux/videodev2.h                 |   2 +
 2 files changed, 467 insertions(+), 316 deletions(-)

(limited to 'include')

diff --git a/drivers/media/video/v4l2-compat-ioctl32.c b/drivers/media/video/v4l2-compat-ioctl32.c
index 9b50b43b76a2..5084773d4f10 100644
--- a/drivers/media/video/v4l2-compat-ioctl32.c
+++ b/drivers/media/video/v4l2-compat-ioctl32.c
@@ -7,12 +7,14 @@
  * Copyright (C) 2001,2002  Andi Kleen, SuSE Labs
  * Copyright (C) 2003       Pavel Machek (pavel@suse.cz)
  * Copyright (C) 2005       Philippe De Muyter (phdm@macqel.be)
+ * Copyright (C) 2008       Hans Verkuil <hverkuil@xs4all.nl>
  *
  * These routines maintain argument size conversion between 32bit and 64bit
  * ioctls.
  */
 
 #include <linux/compat.h>
+#define __OLD_VIDIOC_ /* To allow fixing old calls*/
 #include <linux/videodev.h>
 #include <linux/videodev2.h>
 #include <linux/module.h>
@@ -58,7 +60,6 @@ static int put_video_tuner32(struct video_tuner *kp, struct video_tuner32 __user
 	return 0;
 }
 
-
 struct video_buffer32 {
 	compat_caddr_t base;
 	compat_int_t height, width, depth, bytesperline;
@@ -99,7 +100,7 @@ static int put_video_buffer32(struct video_buffer *kp, struct video_buffer32 __u
 }
 
 struct video_clip32 {
-	s32 x, y, width, height;	/* Its really s32 in videodev.h */
+	s32 x, y, width, height;	/* It's really s32 in videodev.h */
 	compat_caddr_t next;
 };
 
@@ -108,25 +109,72 @@ struct video_window32 {
 	compat_caddr_t clips;
 	compat_int_t clipcount;
 };
-#endif
 
-static int native_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
+static int get_video_window32(struct video_window *kp, struct video_window32 __user *up)
 {
-	int ret = -ENOIOCTLCMD;
+	struct video_clip __user *uclips;
+	struct video_clip __user *kclips;
+	compat_caddr_t p;
+	int nclips;
 
-	if (file->f_op->unlocked_ioctl)
-		ret = file->f_op->unlocked_ioctl(file, cmd, arg);
-	else if (file->f_op->ioctl) {
-		lock_kernel();
-		ret = file->f_op->ioctl(file->f_path.dentry->d_inode, file, cmd, arg);
-		unlock_kernel();
+	if (!access_ok(VERIFY_READ, up, sizeof(struct video_window32)))
+		return -EFAULT;
+
+	if (get_user(nclips, &up->clipcount))
+		return -EFAULT;
+
+	if (!access_ok(VERIFY_READ, up, sizeof(struct video_window32)) ||
+	    get_user(kp->x, &up->x) ||
+	    get_user(kp->y, &up->y) ||
+	    get_user(kp->width, &up->width) ||
+	    get_user(kp->height, &up->height) ||
+	    get_user(kp->chromakey, &up->chromakey) ||
+	    get_user(kp->flags, &up->flags) ||
+	    get_user(kp->clipcount, &up->clipcount))
+		return -EFAULT;
+
+	nclips = kp->clipcount;
+	kp->clips = NULL;
+
+	if (nclips == 0)
+		return 0;
+	if (get_user(p, &up->clips))
+		return -EFAULT;
+	uclips = compat_ptr(p);
+
+	/* If nclips < 0, then it is a clipping bitmap of size
+	   VIDEO_CLIPMAP_SIZE */
+	if (nclips < 0) {
+		if (!access_ok(VERIFY_READ, uclips, VIDEO_CLIPMAP_SIZE))
+			return -EFAULT;
+		kp->clips = compat_alloc_user_space(VIDEO_CLIPMAP_SIZE);
+		if (copy_in_user(kp->clips, uclips, VIDEO_CLIPMAP_SIZE))
+			return -EFAULT;
+		return 0;
 	}
 
-	return ret;
-}
+	/* Otherwise it is an array of video_clip structs. */
+	if (!access_ok(VERIFY_READ, uclips, nclips * sizeof(struct video_clip)))
+		return -EFAULT;
 
+	kp->clips = compat_alloc_user_space(nclips * sizeof(struct video_clip));
+	kclips = kp->clips;
+	while (nclips--) {
+		int err;
+
+		err = copy_in_user(&kclips->x, &uclips->x, sizeof(kclips->x));
+		err |= copy_in_user(&kclips->y, &uclips->y, sizeof(kclips->y));
+		err |= copy_in_user(&kclips->width, &uclips->width, sizeof(kclips->width));
+		err |= copy_in_user(&kclips->height, &uclips->height, sizeof(kclips->height));
+		kclips->next = NULL;
+		if (err)
+			return -EFAULT;
+		kclips++;
+		uclips++;
+	}
+	return 0;
+}
 
-#ifdef CONFIG_VIDEO_V4L1_COMPAT
 /* You get back everything except the clips... */
 static int put_video_window32(struct video_window *kp, struct video_window32 __user *up)
 {
@@ -141,8 +189,55 @@ static int put_video_window32(struct video_window *kp, struct video_window32 __u
 			return -EFAULT;
 	return 0;
 }
+
+struct video_code32 {
+	char		loadwhat[16];	/* name or tag of file being passed */
+	compat_int_t	datasize;
+	unsigned char	*data;
+};
+
+static int get_microcode32(struct video_code *kp, struct video_code32 __user *up)
+{
+	if (!access_ok(VERIFY_READ, up, sizeof(struct video_code32)) ||
+		copy_from_user(kp->loadwhat, up->loadwhat, sizeof(up->loadwhat)) ||
+		get_user(kp->datasize, &up->datasize) ||
+		copy_from_user(kp->data, up->data, up->datasize))
+			return -EFAULT;
+	return 0;
+}
+
+#define VIDIOCGTUNER32		_IOWR('v', 4, struct video_tuner32)
+#define VIDIOCSTUNER32		_IOW('v', 5, struct video_tuner32)
+#define VIDIOCGWIN32		_IOR('v', 9, struct video_window32)
+#define VIDIOCSWIN32		_IOW('v', 10, struct video_window32)
+#define VIDIOCGFBUF32		_IOR('v', 11, struct video_buffer32)
+#define VIDIOCSFBUF32		_IOW('v', 12, struct video_buffer32)
+#define VIDIOCGFREQ32		_IOR('v', 14, u32)
+#define VIDIOCSFREQ32		_IOW('v', 15, u32)
+#define VIDIOCSMICROCODE32	_IOW('v', 27, struct video_code32)
+
+#define VIDIOCCAPTURE32		_IOW('v', 8, s32)
+#define VIDIOCSYNC32		_IOW('v', 18, s32)
+#define VIDIOCSWRITEMODE32	_IOW('v', 25, s32)
+
 #endif
 
+static int native_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
+{
+	int ret = -ENOIOCTLCMD;
+
+	if (file->f_op->unlocked_ioctl)
+		ret = file->f_op->unlocked_ioctl(file, cmd, arg);
+	else if (file->f_op->ioctl) {
+		lock_kernel();
+		ret = file->f_op->ioctl(file->f_path.dentry->d_inode, file, cmd, arg);
+		unlock_kernel();
+	}
+
+	return ret;
+}
+
+
 struct v4l2_clip32 {
 	struct v4l2_rect        c;
 	compat_caddr_t 		next;
@@ -229,12 +324,27 @@ static inline int put_v4l2_vbi_format(struct v4l2_vbi_format *kp, struct v4l2_vb
 	return 0;
 }
 
+static inline int get_v4l2_sliced_vbi_format(struct v4l2_sliced_vbi_format *kp, struct v4l2_sliced_vbi_format __user *up)
+{
+	if (copy_from_user(kp, up, sizeof(struct v4l2_sliced_vbi_format)))
+		return -EFAULT;
+	return 0;
+}
+
+static inline int put_v4l2_sliced_vbi_format(struct v4l2_sliced_vbi_format *kp, struct v4l2_sliced_vbi_format __user *up)
+{
+	if (copy_to_user(up, kp, sizeof(struct v4l2_sliced_vbi_format)))
+		return -EFAULT;
+	return 0;
+}
+
 struct v4l2_format32 {
 	enum v4l2_buf_type type;
 	union {
-		struct v4l2_pix_format	pix;  /* V4L2_BUF_TYPE_VIDEO_CAPTURE */
-		struct v4l2_window32	win;  /* V4L2_BUF_TYPE_VIDEO_OVERLAY */
-		struct v4l2_vbi_format	vbi;  /* V4L2_BUF_TYPE_VBI_CAPTURE */
+		struct v4l2_pix_format	pix;
+		struct v4l2_window32	win;
+		struct v4l2_vbi_format	vbi;
+		struct v4l2_sliced_vbi_format	sliced;
 		__u8	raw_data[200];        /* user-defined */
 	} fmt;
 };
@@ -246,15 +356,27 @@ static int get_v4l2_format32(struct v4l2_format *kp, struct v4l2_format32 __user
 			return -EFAULT;
 	switch (kp->type) {
 	case V4L2_BUF_TYPE_VIDEO_CAPTURE:
+	case V4L2_BUF_TYPE_VIDEO_OUTPUT:
 		return get_v4l2_pix_format(&kp->fmt.pix, &up->fmt.pix);
 	case V4L2_BUF_TYPE_VIDEO_OVERLAY:
+	case V4L2_BUF_TYPE_VIDEO_OUTPUT_OVERLAY:
 		return get_v4l2_window32(&kp->fmt.win, &up->fmt.win);
 	case V4L2_BUF_TYPE_VBI_CAPTURE:
+	case V4L2_BUF_TYPE_VBI_OUTPUT:
 		return get_v4l2_vbi_format(&kp->fmt.vbi, &up->fmt.vbi);
+	case V4L2_BUF_TYPE_SLICED_VBI_CAPTURE:
+	case V4L2_BUF_TYPE_SLICED_VBI_OUTPUT:
+		return get_v4l2_sliced_vbi_format(&kp->fmt.sliced, &up->fmt.sliced);
+	case V4L2_BUF_TYPE_PRIVATE:
+		if (copy_from_user(kp, up, sizeof(kp->fmt.raw_data)))
+			return -EFAULT;
+		return 0;
+	case 0:
+		return -EINVAL;
 	default:
-		printk(KERN_INFO "compat_ioctl: unexpected VIDIOC_FMT type %d\n",
+		printk(KERN_INFO "compat_ioctl32: unexpected VIDIOC_FMT type %d\n",
 								kp->type);
-		return -ENXIO;
+		return -EINVAL;
 	}
 }
 
@@ -265,31 +387,30 @@ static int put_v4l2_format32(struct v4l2_format *kp, struct v4l2_format32 __user
 		return -EFAULT;
 	switch (kp->type) {
 	case V4L2_BUF_TYPE_VIDEO_CAPTURE:
+	case V4L2_BUF_TYPE_VIDEO_OUTPUT:
 		return put_v4l2_pix_format(&kp->fmt.pix, &up->fmt.pix);
 	case V4L2_BUF_TYPE_VIDEO_OVERLAY:
+	case V4L2_BUF_TYPE_VIDEO_OUTPUT_OVERLAY:
 		return put_v4l2_window32(&kp->fmt.win, &up->fmt.win);
 	case V4L2_BUF_TYPE_VBI_CAPTURE:
+	case V4L2_BUF_TYPE_VBI_OUTPUT:
 		return put_v4l2_vbi_format(&kp->fmt.vbi, &up->fmt.vbi);
+	case V4L2_BUF_TYPE_SLICED_VBI_CAPTURE:
+	case V4L2_BUF_TYPE_SLICED_VBI_OUTPUT:
+		return put_v4l2_sliced_vbi_format(&kp->fmt.sliced, &up->fmt.sliced);
+	case V4L2_BUF_TYPE_PRIVATE:
+		if (copy_to_user(up, kp, sizeof(up->fmt.raw_data)))
+			return -EFAULT;
+		return 0;
+	case 0:
+		return -EINVAL;
 	default:
-		return -ENXIO;
+		printk(KERN_INFO "compat_ioctl32: unexpected VIDIOC_FMT type %d\n",
+								kp->type);
+		return -EINVAL;
 	}
 }
 
-static inline int get_v4l2_standard(struct v4l2_standard *kp, struct v4l2_standard __user *up)
-{
-	if (copy_from_user(kp, up, sizeof(struct v4l2_standard)))
-		return -EFAULT;
-	return 0;
-
-}
-
-static inline int put_v4l2_standard(struct v4l2_standard *kp, struct v4l2_standard __user *up)
-{
-	if (copy_to_user(up, kp, sizeof(struct v4l2_standard)))
-		return -EFAULT;
-	return 0;
-}
-
 struct v4l2_standard32 {
 	__u32		     index;
 	__u32		     id[2]; /* __u64 would get the alignment wrong */
@@ -321,21 +442,6 @@ static int put_v4l2_standard32(struct v4l2_standard *kp, struct v4l2_standard32
 	return 0;
 }
 
-static inline int get_v4l2_tuner(struct v4l2_tuner *kp, struct v4l2_tuner __user *up)
-{
-	if (copy_from_user(kp, up, sizeof(struct v4l2_tuner)))
-		return -EFAULT;
-	return 0;
-
-}
-
-static inline int put_v4l2_tuner(struct v4l2_tuner *kp, struct v4l2_tuner __user *up)
-{
-	if (copy_to_user(up, kp, sizeof(struct v4l2_tuner)))
-		return -EFAULT;
-	return 0;
-}
-
 struct v4l2_buffer32 {
 	__u32			index;
 	enum v4l2_buf_type      type;
@@ -459,149 +565,143 @@ static int put_v4l2_framebuffer32(struct v4l2_framebuffer *kp, struct v4l2_frame
 	return 0;
 }
 
-static inline int get_v4l2_input32(struct v4l2_input *kp, struct v4l2_input __user *up)
-{
-	if (copy_from_user(kp, up, sizeof(struct v4l2_input) - 4))
-		return -EFAULT;
-	return 0;
-}
-
-static inline int put_v4l2_input32(struct v4l2_input *kp, struct v4l2_input __user *up)
+struct v4l2_input32 {
+	__u32	     index;		/*  Which input */
+	__u8	     name[32];		/*  Label */
+	__u32	     type;		/*  Type of input */
+	__u32	     audioset;		/*  Associated audios (bitfield) */
+	__u32        tuner;             /*  Associated tuner */
+	v4l2_std_id  std;
+	__u32	     status;
+	__u32	     reserved[4];
+} __attribute__ ((packed));
+
+/* The 64-bit v4l2_input struct has extra padding at the end of the struct.
+   Otherwise it is identical to the 32-bit version. */
+static inline int get_v4l2_input32(struct v4l2_input *kp, struct v4l2_input32 __user *up)
 {
-	if (copy_to_user(up, kp, sizeof(struct v4l2_input) - 4))
+	if (copy_from_user(kp, up, sizeof(struct v4l2_input32)))
 		return -EFAULT;
 	return 0;
 }
 
-static inline int get_v4l2_input(struct v4l2_input *kp, struct v4l2_input __user *up)
+static inline int put_v4l2_input32(struct v4l2_input *kp, struct v4l2_input32 __user *up)
 {
-	if (copy_from_user(kp, up, sizeof(struct v4l2_input)))
+	if (copy_to_user(up, kp, sizeof(struct v4l2_input32)))
 		return -EFAULT;
 	return 0;
 }
 
-static inline int put_v4l2_input(struct v4l2_input *kp, struct v4l2_input __user *up)
-{
-	if (copy_to_user(up, kp, sizeof(struct v4l2_input)))
-		return -EFAULT;
-	return 0;
-}
-
-#ifdef CONFIG_VIDEO_V4L1_COMPAT
-struct video_code32 {
-	char		loadwhat[16];	/* name or tag of file being passed */
-	compat_int_t	datasize;
-	unsigned char	*data;
+struct v4l2_ext_controls32 {
+       __u32 ctrl_class;
+       __u32 count;
+       __u32 error_idx;
+       __u32 reserved[2];
+       compat_caddr_t controls; /* actually struct v4l2_ext_control32 * */
 };
 
-static inline int microcode32(struct video_code *kp, struct video_code32 __user *up)
+static int get_v4l2_ext_controls32(struct v4l2_ext_controls *kp, struct v4l2_ext_controls32 __user *up)
 {
-	if (!access_ok(VERIFY_READ, up, sizeof(struct video_code32)) ||
-		copy_from_user(kp->loadwhat, up->loadwhat, sizeof(up->loadwhat)) ||
-		get_user(kp->datasize, &up->datasize) ||
-		copy_from_user(kp->data, up->data, up->datasize))
+	struct v4l2_ext_control __user *ucontrols;
+	struct v4l2_ext_control __user *kcontrols;
+	int n;
+	compat_caddr_t p;
+
+	if (!access_ok(VERIFY_READ, up, sizeof(struct v4l2_ext_controls32)) ||
+		get_user(kp->ctrl_class, &up->ctrl_class) ||
+		get_user(kp->count, &up->count) ||
+		get_user(kp->error_idx, &up->error_idx) ||
+		copy_from_user(kp->reserved, up->reserved, sizeof(kp->reserved)))
+			return -EFAULT;
+	n = kp->count;
+	if (n == 0) {
+		kp->controls = NULL;
+		return 0;
+	}
+	if (get_user(p, &up->controls))
+		return -EFAULT;
+	ucontrols = compat_ptr(p);
+	if (!access_ok(VERIFY_READ, ucontrols, n * sizeof(struct v4l2_ext_control)))
+		return -EFAULT;
+	kcontrols = compat_alloc_user_space(n * sizeof(struct v4l2_ext_control));
+	kp->controls = kcontrols;
+	while (--n >= 0) {
+		if (copy_in_user(&kcontrols->id, &ucontrols->id, sizeof(__u32)))
+			return -EFAULT;
+		if (copy_in_user(&kcontrols->reserved2, &ucontrols->reserved2, sizeof(ucontrols->reserved2)))
 			return -EFAULT;
+		/* Note: if the void * part of the union ever becomes relevant
+		   then we need to know the type of the control in order to do
+		   the right thing here. Luckily, that is not yet an issue. */
+		if (copy_in_user(&kcontrols->value, &ucontrols->value, sizeof(ucontrols->value)))
+			return -EFAULT;
+		ucontrols++;
+		kcontrols++;
+	}
 	return 0;
 }
 
-#define VIDIOCGTUNER32		_IOWR('v', 4, struct video_tuner32)
-#define VIDIOCSTUNER32		_IOW('v', 5, struct video_tuner32)
-#define VIDIOCGWIN32		_IOR('v', 9, struct video_window32)
-#define VIDIOCSWIN32		_IOW('v', 10, struct video_window32)
-#define VIDIOCGFBUF32		_IOR('v', 11, struct video_buffer32)
-#define VIDIOCSFBUF32		_IOW('v', 12, struct video_buffer32)
-#define VIDIOCGFREQ32		_IOR('v', 14, u32)
-#define VIDIOCSFREQ32		_IOW('v', 15, u32)
-#define VIDIOCSMICROCODE32	_IOW('v', 27, struct video_code32)
-
-#endif
-
-/* VIDIOC_ENUMINPUT32 is VIDIOC_ENUMINPUT minus 4 bytes of padding alignement */
-#define VIDIOC_ENUMINPUT32	(VIDIOC_ENUMINPUT - _IOC(0, 0, 0, 4))
-#define VIDIOC_G_FMT32		_IOWR ('V',  4, struct v4l2_format32)
-#define VIDIOC_S_FMT32		_IOWR ('V',  5, struct v4l2_format32)
-#define VIDIOC_QUERYBUF32	_IOWR ('V',  9, struct v4l2_buffer32)
-#define VIDIOC_G_FBUF32		_IOR  ('V', 10, struct v4l2_framebuffer32)
-#define VIDIOC_S_FBUF32		_IOW  ('V', 11, struct v4l2_framebuffer32)
-/* VIDIOC_OVERLAY is now _IOW, but was _IOWR */
-#define VIDIOC_OVERLAY32	_IOWR ('V', 14, compat_int_t)
-#define VIDIOC_QBUF32		_IOWR ('V', 15, struct v4l2_buffer32)
-#define VIDIOC_DQBUF32		_IOWR ('V', 17, struct v4l2_buffer32)
-#define VIDIOC_STREAMON32	_IOW  ('V', 18, compat_int_t)
-#define VIDIOC_STREAMOFF32	_IOW  ('V', 19, compat_int_t)
-#define VIDIOC_ENUMSTD32	_IOWR ('V', 25, struct v4l2_standard32)
-/* VIDIOC_S_CTRL is now _IOWR, but was _IOW */
-#define VIDIOC_S_CTRL32		_IOW  ('V', 28, struct v4l2_control)
-#define VIDIOC_G_INPUT32	_IOR  ('V', 38, compat_int_t)
-#define VIDIOC_S_INPUT32	_IOWR ('V', 39, compat_int_t)
-#define VIDIOC_TRY_FMT32      	_IOWR ('V', 64, struct v4l2_format32)
-
-#ifdef CONFIG_VIDEO_V4L1_COMPAT
-enum {
-	MaxClips = (~0U-sizeof(struct video_window))/sizeof(struct video_clip)
-};
-
-static int do_set_window(struct file *file, unsigned int cmd, unsigned long arg)
+static int put_v4l2_ext_controls32(struct v4l2_ext_controls *kp, struct v4l2_ext_controls32 __user *up)
 {
-	struct video_window32 __user *up = compat_ptr(arg);
-	struct video_window __user *vw;
-	struct video_clip __user *p;
-	int nclips;
-	u32 n;
-
-	if (!access_ok(VERIFY_READ, up, sizeof(struct video_window32)))
-		return -EFAULT;
+	struct v4l2_ext_control __user *ucontrols;
+	struct v4l2_ext_control __user *kcontrols = kp->controls;
+	int n = kp->count;
+	compat_caddr_t p;
+
+	if (!access_ok(VERIFY_WRITE, up, sizeof(struct v4l2_ext_controls32)) ||
+		put_user(kp->ctrl_class, &up->ctrl_class) ||
+		put_user(kp->count, &up->count) ||
+		put_user(kp->error_idx, &up->error_idx) ||
+		copy_to_user(up->reserved, kp->reserved, sizeof(up->reserved)))
+			return -EFAULT;
+	if (!kp->count)
+		return 0;
 
-	if (get_user(nclips, &up->clipcount))
+	if (get_user(p, &up->controls))
 		return -EFAULT;
-
-	/* Peculiar interface... */
-	if (nclips < 0)
-		nclips = VIDEO_CLIPMAP_SIZE;
-
-	if (nclips > MaxClips)
-		return -ENOMEM;
-
-	vw = compat_alloc_user_space(sizeof(struct video_window) +
-				    nclips * sizeof(struct video_clip));
-
-	p = nclips ? (struct video_clip __user *)(vw + 1) : NULL;
-
-	if (get_user(n, &up->x) || put_user(n, &vw->x) ||
-	    get_user(n, &up->y) || put_user(n, &vw->y) ||
-	    get_user(n, &up->width) || put_user(n, &vw->width) ||
-	    get_user(n, &up->height) || put_user(n, &vw->height) ||
-	    get_user(n, &up->chromakey) || put_user(n, &vw->chromakey) ||
-	    get_user(n, &up->flags) || put_user(n, &vw->flags) ||
-	    get_user(n, &up->clipcount) || put_user(n, &vw->clipcount) ||
-	    get_user(n, &up->clips) || put_user(p, &vw->clips))
+	ucontrols = compat_ptr(p);
+	if (!access_ok(VERIFY_WRITE, ucontrols, n * sizeof(struct v4l2_ext_control)))
 		return -EFAULT;
 
-	if (nclips) {
-		struct video_clip32 __user *u = compat_ptr(n);
-		int i;
-		if (!u)
-			return -EINVAL;
-		for (i = 0; i < nclips; i++, u++, p++) {
-			s32 v;
-			if (!access_ok(VERIFY_READ, u, sizeof(struct video_clip32)) ||
-			    !access_ok(VERIFY_WRITE, p, sizeof(struct video_clip32)) ||
-			    get_user(v, &u->x) ||
-			    put_user(v, &p->x) ||
-			    get_user(v, &u->y) ||
-			    put_user(v, &p->y) ||
-			    get_user(v, &u->width) ||
-			    put_user(v, &p->width) ||
-			    get_user(v, &u->height) ||
-			    put_user(v, &p->height) ||
-			    put_user(NULL, &p->next))
-				return -EFAULT;
-		}
+	while (--n >= 0) {
+		if (copy_in_user(&ucontrols->id, &kcontrols->id, sizeof(__u32)))
+			return -EFAULT;
+		if (copy_in_user(&ucontrols->reserved2, &kcontrols->reserved2,
+					sizeof(ucontrols->reserved2)))
+			return -EFAULT;
+		/* Note: if the void * part of the union ever becomes relevant
+		   then we need to know the type of the control in order to do
+		   the right thing here. Luckily, that is not yet an issue. */
+		if (copy_in_user(&ucontrols->value, &kcontrols->value, sizeof(ucontrols->value)))
+			return -EFAULT;
+		ucontrols++;
+		kcontrols++;
 	}
-
-	return native_ioctl(file, VIDIOCSWIN, (unsigned long)vw);
+	return 0;
 }
-#endif
+
+#define VIDIOC_G_FMT32		_IOWR('V',  4, struct v4l2_format32)
+#define VIDIOC_S_FMT32		_IOWR('V',  5, struct v4l2_format32)
+#define VIDIOC_QUERYBUF32	_IOWR('V',  9, struct v4l2_buffer32)
+#define VIDIOC_G_FBUF32		_IOR ('V', 10, struct v4l2_framebuffer32)
+#define VIDIOC_S_FBUF32		_IOW ('V', 11, struct v4l2_framebuffer32)
+#define VIDIOC_QBUF32		_IOWR('V', 15, struct v4l2_buffer32)
+#define VIDIOC_DQBUF32		_IOWR('V', 17, struct v4l2_buffer32)
+#define VIDIOC_ENUMSTD32	_IOWR('V', 25, struct v4l2_standard32)
+#define VIDIOC_ENUMINPUT32	_IOWR('V', 26, struct v4l2_input32)
+#define VIDIOC_TRY_FMT32      	_IOWR('V', 64, struct v4l2_format32)
+#define VIDIOC_G_EXT_CTRLS32    _IOWR('V', 71, struct v4l2_ext_controls32)
+#define VIDIOC_S_EXT_CTRLS32    _IOWR('V', 72, struct v4l2_ext_controls32)
+#define VIDIOC_TRY_EXT_CTRLS32  _IOWR('V', 73, struct v4l2_ext_controls32)
+
+#define VIDIOC_OVERLAY32	_IOW ('V', 14, s32)
+#define VIDIOC_OVERLAY32_OLD	_IOWR('V', 14, s32)
+#define VIDIOC_STREAMON32	_IOW ('V', 18, s32)
+#define VIDIOC_STREAMOFF32	_IOW ('V', 19, s32)
+#define VIDIOC_G_INPUT32	_IOR ('V', 38, s32)
+#define VIDIOC_S_INPUT32	_IOWR('V', 39, s32)
+#define VIDIOC_G_OUTPUT32	_IOR ('V', 46, s32)
+#define VIDIOC_S_OUTPUT32	_IOWR('V', 47, s32)
 
 static int do_video_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
 {
@@ -616,45 +716,51 @@ static int do_video_ioctl(struct file *file, unsigned int cmd, unsigned long arg
 		struct v4l2_format v2f;
 		struct v4l2_buffer v2b;
 		struct v4l2_framebuffer v2fb;
-		struct v4l2_standard v2s;
 		struct v4l2_input v2i;
-		struct v4l2_tuner v2t;
+		struct v4l2_standard v2s;
+		struct v4l2_ext_controls v2ecs;
 		unsigned long vx;
+		int vi;
 	} karg;
 	void __user *up = compat_ptr(arg);
 	int compatible_arg = 1;
 	int err = 0;
-	int realcmd = cmd;
 
 	/* First, convert the command. */
 	switch (cmd) {
 #ifdef CONFIG_VIDEO_V4L1_COMPAT
-	case VIDIOCGTUNER32: realcmd = cmd = VIDIOCGTUNER; break;
-	case VIDIOCSTUNER32: realcmd = cmd = VIDIOCSTUNER; break;
-	case VIDIOCGWIN32: realcmd = cmd = VIDIOCGWIN; break;
-	case VIDIOCGFBUF32: realcmd = cmd = VIDIOCGFBUF; break;
-	case VIDIOCSFBUF32: realcmd = cmd = VIDIOCSFBUF; break;
-	case VIDIOCGFREQ32: realcmd = cmd = VIDIOCGFREQ; break;
-	case VIDIOCSFREQ32: realcmd = cmd = VIDIOCSFREQ; break;
-	case VIDIOCSMICROCODE32: realcmd = cmd = VIDIOCSMICROCODE; break;
+	case VIDIOCGTUNER32: cmd = VIDIOCGTUNER; break;
+	case VIDIOCSTUNER32: cmd = VIDIOCSTUNER; break;
+	case VIDIOCGWIN32: cmd = VIDIOCGWIN; break;
+	case VIDIOCSWIN32: cmd = VIDIOCSWIN; break;
+	case VIDIOCGFBUF32: cmd = VIDIOCGFBUF; break;
+	case VIDIOCSFBUF32: cmd = VIDIOCSFBUF; break;
+	case VIDIOCGFREQ32: cmd = VIDIOCGFREQ; break;
+	case VIDIOCSFREQ32: cmd = VIDIOCSFREQ; break;
+	case VIDIOCSMICROCODE32: cmd = VIDIOCSMICROCODE; break;
 #endif
-	case VIDIOC_G_FMT32: realcmd = cmd = VIDIOC_G_FMT; break;
-	case VIDIOC_S_FMT32: realcmd = cmd = VIDIOC_S_FMT; break;
-	case VIDIOC_QUERYBUF32: realcmd = cmd = VIDIOC_QUERYBUF; break;
-	case VIDIOC_QBUF32: realcmd = cmd = VIDIOC_QBUF; break;
-	case VIDIOC_DQBUF32: realcmd = cmd = VIDIOC_DQBUF; break;
-	case VIDIOC_STREAMON32: realcmd = cmd = VIDIOC_STREAMON; break;
-	case VIDIOC_STREAMOFF32: realcmd = cmd = VIDIOC_STREAMOFF; break;
-	case VIDIOC_G_FBUF32: realcmd = cmd = VIDIOC_G_FBUF; break;
-	case VIDIOC_S_FBUF32: realcmd = cmd = VIDIOC_S_FBUF; break;
-	case VIDIOC_OVERLAY32: realcmd = cmd = VIDIOC_OVERLAY; break;
-	case VIDIOC_ENUMSTD32: realcmd = VIDIOC_ENUMSTD; break;
-	case VIDIOC_ENUMINPUT32: realcmd = VIDIOC_ENUMINPUT; break;
-	case VIDIOC_S_CTRL32: realcmd = cmd = VIDIOC_S_CTRL; break;
-	case VIDIOC_G_INPUT32: realcmd = cmd = VIDIOC_G_INPUT; break;
-	case VIDIOC_S_INPUT32: realcmd = cmd = VIDIOC_S_INPUT; break;
-	case VIDIOC_TRY_FMT32: realcmd = cmd = VIDIOC_TRY_FMT; break;
-	};
+	case VIDIOC_G_FMT32: cmd = VIDIOC_G_FMT; break;
+	case VIDIOC_S_FMT32: cmd = VIDIOC_S_FMT; break;
+	case VIDIOC_QUERYBUF32: cmd = VIDIOC_QUERYBUF; break;
+	case VIDIOC_G_FBUF32: cmd = VIDIOC_G_FBUF; break;
+	case VIDIOC_S_FBUF32: cmd = VIDIOC_S_FBUF; break;
+	case VIDIOC_QBUF32: cmd = VIDIOC_QBUF; break;
+	case VIDIOC_DQBUF32: cmd = VIDIOC_DQBUF; break;
+	case VIDIOC_ENUMSTD32: cmd = VIDIOC_ENUMSTD; break;
+	case VIDIOC_ENUMINPUT32: cmd = VIDIOC_ENUMINPUT; break;
+	case VIDIOC_TRY_FMT32: cmd = VIDIOC_TRY_FMT; break;
+	case VIDIOC_G_EXT_CTRLS32: cmd = VIDIOC_G_EXT_CTRLS; break;
+	case VIDIOC_S_EXT_CTRLS32: cmd = VIDIOC_S_EXT_CTRLS; break;
+	case VIDIOC_TRY_EXT_CTRLS32: cmd = VIDIOC_TRY_EXT_CTRLS; break;
+	case VIDIOC_OVERLAY32: cmd = VIDIOC_OVERLAY; break;
+	case VIDIOC_OVERLAY32_OLD: cmd = VIDIOC_OVERLAY; break;
+	case VIDIOC_STREAMON32: cmd = VIDIOC_STREAMON; break;
+	case VIDIOC_STREAMOFF32: cmd = VIDIOC_STREAMOFF; break;
+	case VIDIOC_G_INPUT32: cmd = VIDIOC_G_INPUT; break;
+	case VIDIOC_S_INPUT32: cmd = VIDIOC_S_INPUT; break;
+	case VIDIOC_G_OUTPUT32: cmd = VIDIOC_G_OUTPUT; break;
+	case VIDIOC_S_OUTPUT32: cmd = VIDIOC_S_OUTPUT; break;
+	}
 
 	switch (cmd) {
 #ifdef CONFIG_VIDEO_V4L1_COMPAT
@@ -662,7 +768,6 @@ static int do_video_ioctl(struct file *file, unsigned int cmd, unsigned long arg
 	case VIDIOCGTUNER:
 		err = get_video_tuner32(&karg.vt, up);
 		compatible_arg = 0;
-
 		break;
 
 	case VIDIOCSFBUF:
@@ -670,19 +775,42 @@ static int do_video_ioctl(struct file *file, unsigned int cmd, unsigned long arg
 		compatible_arg = 0;
 		break;
 
+	case VIDIOCSWIN:
+		err = get_video_window32(&karg.vw, up);
+		compatible_arg = 0;
+		break;
+
+	case VIDIOCGWIN:
+	case VIDIOCGFBUF:
+	case VIDIOCGFREQ:
+		compatible_arg = 0;
+		break;
+
+	case VIDIOCSMICROCODE:
+		err = get_microcode32(&karg.vc, up);
+		compatible_arg = 0;
+		break;
 
 	case VIDIOCSFREQ:
+		err = get_user(karg.vx, (u32 __user *)up);
+		compatible_arg = 0;
+		break;
+
+	case VIDIOCCAPTURE:
+	case VIDIOCSYNC:
+	case VIDIOCSWRITEMODE:
 #endif
-	case VIDIOC_S_INPUT:
 	case VIDIOC_OVERLAY:
 	case VIDIOC_STREAMON:
 	case VIDIOC_STREAMOFF:
-		err = get_user(karg.vx, (u32 __user *)up);
-		compatible_arg = 1;
+	case VIDIOC_S_INPUT:
+	case VIDIOC_S_OUTPUT:
+		err = get_user(karg.vi, (s32 __user *)up);
+		compatible_arg = 0;
 		break;
 
-	case VIDIOC_S_FBUF:
-		err = get_v4l2_framebuffer32(&karg.v2fb, up);
+	case VIDIOC_G_INPUT:
+	case VIDIOC_G_OUTPUT:
 		compatible_arg = 0;
 		break;
 
@@ -700,122 +828,108 @@ static int do_video_ioctl(struct file *file, unsigned int cmd, unsigned long arg
 		compatible_arg = 0;
 		break;
 
-	case VIDIOC_ENUMSTD:
-		err = get_v4l2_standard(&karg.v2s, up);
+	case VIDIOC_S_FBUF:
+		err = get_v4l2_framebuffer32(&karg.v2fb, up);
 		compatible_arg = 0;
 		break;
 
-	case VIDIOC_ENUMSTD32:
-		err = get_v4l2_standard32(&karg.v2s, up);
+	case VIDIOC_G_FBUF:
 		compatible_arg = 0;
 		break;
 
-	case VIDIOC_ENUMINPUT:
-		err = get_v4l2_input(&karg.v2i, up);
+	case VIDIOC_ENUMSTD:
+		err = get_v4l2_standard32(&karg.v2s, up);
 		compatible_arg = 0;
 		break;
 
-	case VIDIOC_ENUMINPUT32:
+	case VIDIOC_ENUMINPUT:
 		err = get_v4l2_input32(&karg.v2i, up);
 		compatible_arg = 0;
 		break;
 
-	case VIDIOC_G_TUNER:
-	case VIDIOC_S_TUNER:
-		err = get_v4l2_tuner(&karg.v2t, up);
+	case VIDIOC_G_EXT_CTRLS:
+	case VIDIOC_S_EXT_CTRLS:
+	case VIDIOC_TRY_EXT_CTRLS:
+		err = get_v4l2_ext_controls32(&karg.v2ecs, up);
 		compatible_arg = 0;
 		break;
-
-#ifdef CONFIG_VIDEO_V4L1_COMPAT
-	case VIDIOCGWIN:
-	case VIDIOCGFBUF:
-	case VIDIOCGFREQ:
-#endif
-	case VIDIOC_G_FBUF:
-	case VIDIOC_G_INPUT:
-		compatible_arg = 0;
-		break;
-#ifdef CONFIG_VIDEO_V4L1_COMPAT
-	case VIDIOCSMICROCODE:
-		err = microcode32(&karg.vc, up);
-		compatible_arg = 0;
-		break;
-#endif
-	};
+	}
 	if (err)
-		goto out;
+		return err;
 
 	if (compatible_arg)
-		err = native_ioctl(file, realcmd, (unsigned long)up);
+		err = native_ioctl(file, cmd, (unsigned long)up);
 	else {
 		mm_segment_t old_fs = get_fs();
 
 		set_fs(KERNEL_DS);
-		err = native_ioctl(file, realcmd, (unsigned long)&karg);
+		err = native_ioctl(file, cmd, (unsigned long)&karg);
 		set_fs(old_fs);
 	}
-	if (err == 0) {
-		switch (cmd) {
+
+	/* Special case: even after an error we need to put the
+	   results back for these ioctls since the error_idx will
+	   contain information on which control failed. */
+	switch (cmd) {
+	case VIDIOC_G_EXT_CTRLS:
+	case VIDIOC_S_EXT_CTRLS:
+	case VIDIOC_TRY_EXT_CTRLS:
+		if (put_v4l2_ext_controls32(&karg.v2ecs, up))
+			err = -EFAULT;
+		break;
+	}
+	if (err)
+		return err;
+
+	switch (cmd) {
 #ifdef CONFIG_VIDEO_V4L1_COMPAT
-		case VIDIOCGTUNER:
-			err = put_video_tuner32(&karg.vt, up);
-			break;
+	case VIDIOCGTUNER:
+		err = put_video_tuner32(&karg.vt, up);
+		break;
 
-		case VIDIOCGWIN:
-			err = put_video_window32(&karg.vw, up);
-			break;
+	case VIDIOCGWIN:
+		err = put_video_window32(&karg.vw, up);
+		break;
 
-		case VIDIOCGFBUF:
-			err = put_video_buffer32(&karg.vb, up);
-			break;
+	case VIDIOCGFBUF:
+		err = put_video_buffer32(&karg.vb, up);
+		break;
 
+	case VIDIOCGFREQ:
+		err = put_user(((u32)karg.vx), (u32 __user *)up);
+		break;
 #endif
-		case VIDIOC_G_FBUF:
-			err = put_v4l2_framebuffer32(&karg.v2fb, up);
-			break;
-
-		case VIDIOC_G_FMT:
-		case VIDIOC_S_FMT:
-		case VIDIOC_TRY_FMT:
-			err = put_v4l2_format32(&karg.v2f, up);
-			break;
-
-		case VIDIOC_QUERYBUF:
-		case VIDIOC_QBUF:
-		case VIDIOC_DQBUF:
-			err = put_v4l2_buffer32(&karg.v2b, up);
-			break;
-
-		case VIDIOC_ENUMSTD:
-			err = put_v4l2_standard(&karg.v2s, up);
-			break;
-
-		case VIDIOC_ENUMSTD32:
-			err = put_v4l2_standard32(&karg.v2s, up);
-			break;
-
-		case VIDIOC_G_TUNER:
-		case VIDIOC_S_TUNER:
-			err = put_v4l2_tuner(&karg.v2t, up);
-			break;
-
-		case VIDIOC_ENUMINPUT:
-			err = put_v4l2_input(&karg.v2i, up);
-			break;
-
-		case VIDIOC_ENUMINPUT32:
-			err = put_v4l2_input32(&karg.v2i, up);
-			break;
+	case VIDIOC_S_INPUT:
+	case VIDIOC_S_OUTPUT:
+	case VIDIOC_G_INPUT:
+	case VIDIOC_G_OUTPUT:
+		err = put_user(((s32)karg.vi), (s32 __user *)up);
+		break;
 
-#ifdef CONFIG_VIDEO_V4L1_COMPAT
-		case VIDIOCGFREQ:
-#endif
-		case VIDIOC_G_INPUT:
-			err = put_user(((u32)karg.vx), (u32 __user *)up);
-			break;
-		};
+	case VIDIOC_G_FBUF:
+		err = put_v4l2_framebuffer32(&karg.v2fb, up);
+		break;
+
+	case VIDIOC_G_FMT:
+	case VIDIOC_S_FMT:
+	case VIDIOC_TRY_FMT:
+		err = put_v4l2_format32(&karg.v2f, up);
+		break;
+
+	case VIDIOC_QUERYBUF:
+	case VIDIOC_QBUF:
+	case VIDIOC_DQBUF:
+		err = put_v4l2_buffer32(&karg.v2b, up);
+		break;
+
+	case VIDIOC_ENUMSTD:
+		err = put_v4l2_standard32(&karg.v2s, up);
+		break;
+
+	case VIDIOC_ENUMINPUT:
+		err = put_v4l2_input32(&karg.v2i, up);
+		break;
 	}
-out:
 	return err;
 }
 
@@ -828,26 +942,48 @@ long v4l_compat_ioctl32(struct file *file, unsigned int cmd, unsigned long arg)
 
 	switch (cmd) {
 #ifdef CONFIG_VIDEO_V4L1_COMPAT
-	case VIDIOCSWIN32:
-		ret = do_set_window(file, cmd, arg);
-		break;
+	case VIDIOCGCAP:
+	case VIDIOCGCHAN:
+	case VIDIOCSCHAN:
 	case VIDIOCGTUNER32:
 	case VIDIOCSTUNER32:
+	case VIDIOCGPICT:
+	case VIDIOCSPICT:
+	case VIDIOCCAPTURE32:
 	case VIDIOCGWIN32:
+	case VIDIOCSWIN32:
 	case VIDIOCGFBUF32:
 	case VIDIOCSFBUF32:
+	case VIDIOCKEY:
 	case VIDIOCGFREQ32:
 	case VIDIOCSFREQ32:
 	case VIDIOCGAUDIO:
 	case VIDIOCSAUDIO:
+	case VIDIOCSYNC32:
+	case VIDIOCMCAPTURE:
+	case VIDIOCGMBUF:
+	case VIDIOCGUNIT:
+	case VIDIOCGCAPTURE:
+	case VIDIOCSCAPTURE:
+	case VIDIOCSPLAYMODE:
+	case VIDIOCSWRITEMODE32:
+	case VIDIOCGPLAYINFO:
+	case VIDIOCSMICROCODE32:
 	case VIDIOCGVBIFMT:
 	case VIDIOCSVBIFMT:
+#endif
+#ifdef __OLD_VIDIOC_
+	case VIDIOC_OVERLAY32_OLD:
+	case VIDIOC_S_PARM_OLD:
+	case VIDIOC_S_CTRL_OLD:
+	case VIDIOC_G_AUDIO_OLD:
+	case VIDIOC_G_AUDOUT_OLD:
+	case VIDIOC_CROPCAP_OLD:
 #endif
 	case VIDIOC_QUERYCAP:
+	case VIDIOC_RESERVED:
 	case VIDIOC_ENUM_FMT:
 	case VIDIOC_G_FMT32:
-	case VIDIOC_CROPCAP:
-	case VIDIOC_S_CROP:
 	case VIDIOC_S_FMT32:
 	case VIDIOC_REQBUFS:
 	case VIDIOC_QUERYBUF32:
@@ -862,43 +998,56 @@ long v4l_compat_ioctl32(struct file *file, unsigned int cmd, unsigned long arg)
 	case VIDIOC_S_PARM:
 	case VIDIOC_G_STD:
 	case VIDIOC_S_STD:
-	case VIDIOC_G_TUNER:
-	case VIDIOC_S_TUNER:
-	case VIDIOC_ENUMSTD:
 	case VIDIOC_ENUMSTD32:
-	case VIDIOC_ENUMINPUT:
 	case VIDIOC_ENUMINPUT32:
 	case VIDIOC_G_CTRL:
 	case VIDIOC_S_CTRL:
-	case VIDIOC_S_CTRL32:
-	case VIDIOC_S_FREQUENCY:
-	case VIDIOC_G_FREQUENCY:
+	case VIDIOC_G_TUNER:
+	case VIDIOC_S_TUNER:
+	case VIDIOC_G_AUDIO:
+	case VIDIOC_S_AUDIO:
 	case VIDIOC_QUERYCTRL:
+	case VIDIOC_QUERYMENU:
 	case VIDIOC_G_INPUT32:
 	case VIDIOC_S_INPUT32:
+	case VIDIOC_G_OUTPUT32:
+	case VIDIOC_S_OUTPUT32:
+	case VIDIOC_ENUMOUTPUT:
+	case VIDIOC_G_AUDOUT:
+	case VIDIOC_S_AUDOUT:
+	case VIDIOC_G_MODULATOR:
+	case VIDIOC_S_MODULATOR:
+	case VIDIOC_S_FREQUENCY:
+	case VIDIOC_G_FREQUENCY:
+	case VIDIOC_CROPCAP:
+	case VIDIOC_G_CROP:
+	case VIDIOC_S_CROP:
+	case VIDIOC_G_JPEGCOMP:
+	case VIDIOC_S_JPEGCOMP:
+	case VIDIOC_QUERYSTD:
 	case VIDIOC_TRY_FMT32:
-	case VIDIOC_S_HW_FREQ_SEEK:
+	case VIDIOC_ENUMAUDIO:
+	case VIDIOC_ENUMAUDOUT:
+	case VIDIOC_G_PRIORITY:
+	case VIDIOC_S_PRIORITY:
+	case VIDIOC_G_SLICED_VBI_CAP:
+	case VIDIOC_LOG_STATUS:
+	case VIDIOC_G_EXT_CTRLS32:
+	case VIDIOC_S_EXT_CTRLS32:
+	case VIDIOC_TRY_EXT_CTRLS32:
 	case VIDIOC_ENUM_FRAMESIZES:
 	case VIDIOC_ENUM_FRAMEINTERVALS:
+	case VIDIOC_G_ENC_INDEX:
+	case VIDIOC_ENCODER_CMD:
+	case VIDIOC_TRY_ENCODER_CMD:
+	case VIDIOC_DBG_S_REGISTER:
+	case VIDIOC_DBG_G_REGISTER:
+	case VIDIOC_G_CHIP_IDENT:
+	case VIDIOC_S_HW_FREQ_SEEK:
 		ret = do_video_ioctl(file, cmd, arg);
 		break;
 
 #ifdef CONFIG_VIDEO_V4L1_COMPAT
-	/* Little v, the video4linux ioctls (conflict?) */
-	case VIDIOCGCAP:
-	case VIDIOCGCHAN:
-	case VIDIOCSCHAN:
-	case VIDIOCGPICT:
-	case VIDIOCSPICT:
-	case VIDIOCCAPTURE:
-	case VIDIOCKEY:
-	case VIDIOCSYNC:
-	case VIDIOCMCAPTURE:
-	case VIDIOCGMBUF:
-	case VIDIOCGUNIT:
-	case VIDIOCGCAPTURE:
-	case VIDIOCSCAPTURE:
-
 	/* BTTV specific... */
 	case _IOW('v',  BASE_VIDIOCPRIVATE+0, char [256]):
 	case _IOR('v',  BASE_VIDIOCPRIVATE+1, char [256]):
diff --git a/include/linux/videodev2.h b/include/linux/videodev2.h
index 6e6743bd0f92..e2cfd6add78c 100644
--- a/include/linux/videodev2.h
+++ b/include/linux/videodev2.h
@@ -1465,6 +1465,8 @@ struct v4l2_chip_ident {
 #define VIDIOC_G_CHIP_IDENT     _IOWR('V', 81, struct v4l2_chip_ident)
 #endif
 #define VIDIOC_S_HW_FREQ_SEEK	 _IOW('V', 82, struct v4l2_hw_freq_seek)
+/* Reminder: when adding new ioctls please add support for them to
+   drivers/media/video/v4l2-compat-ioctl32.c as well! */
 
 #ifdef __OLD_VIDIOC_
 /* for compatibility, will go away some day */
-- 
cgit v1.2.3


From 77587c5627aab50636ea0f93c28d2013cd0b7004 Mon Sep 17 00:00:00 2001
From: Hans Verkuil <hverkuil@xs4all.nl>
Date: Fri, 19 Dec 2008 07:18:38 -0300
Subject: V4L/DVB (9940): bt832: remove this driver

The bt832 i2c driver was never used or even compiled and is no longer
maintained. It is now removed completely.

Signed-off-by: Hans Verkuil <hverkuil@xs4all.nl>
Signed-off-by: Mauro Carvalho Chehab <mchehab@redhat.com>
---
 drivers/media/video/bt8xx/bt832.c      | 274 -----------------------------
 drivers/media/video/bt8xx/bt832.h      | 305 ---------------------------------
 drivers/media/video/bt8xx/bttv-cards.c |  12 --
 include/media/i2c-addr.h               |   2 -
 4 files changed, 593 deletions(-)
 delete mode 100644 drivers/media/video/bt8xx/bt832.c
 delete mode 100644 drivers/media/video/bt8xx/bt832.h

(limited to 'include')

diff --git a/drivers/media/video/bt8xx/bt832.c b/drivers/media/video/bt8xx/bt832.c
deleted file mode 100644
index 216fc9680e80..000000000000
--- a/drivers/media/video/bt8xx/bt832.c
+++ /dev/null
@@ -1,274 +0,0 @@
-/* Driver for Bt832 CMOS Camera Video Processor
-    i2c-addresses: 0x88 or 0x8a
-
-  The BT832 interfaces to a Quartzsight Digital Camera (352x288, 25 or 30 fps)
-  via a 9 pin connector ( 4-wire SDATA, 2-wire i2c, SCLK, VCC, GND).
-  It outputs an 8-bit 4:2:2 YUV or YCrCb video signal which can be directly
-  connected to bt848/bt878 GPIO pins on this purpose.
-  (see: VLSI Vision Ltd. www.vvl.co.uk for camera datasheets)
-
-  Supported Cards:
-  -  Pixelview Rev.4E: 0x8a
-		GPIO 0x400000 toggles Bt832 RESET, and the chip changes to i2c 0x88 !
-
-  (c) Gunther Mayer, 2002
-
-  STATUS:
-  - detect chip and hexdump
-  - reset chip and leave low power mode
-  - detect camera present
-
-  TODO:
-  - make it work (find correct setup for Bt832 and Bt878)
-*/
-
-#include <linux/module.h>
-#include <linux/kernel.h>
-#include <linux/i2c.h>
-#include <linux/types.h>
-#include <linux/videodev.h>
-#include <linux/init.h>
-#include <linux/errno.h>
-#include <linux/slab.h>
-#include <media/v4l2-common.h>
-
-#include "bttv.h"
-#include "bt832.h"
-
-MODULE_LICENSE("GPL");
-
-/* Addresses to scan */
-static unsigned short normal_i2c[] = { I2C_ADDR_BT832_ALT1>>1, I2C_ADDR_BT832_ALT2>>1,
-				       I2C_CLIENT_END };
-I2C_CLIENT_INSMOD;
-
-int debug;    /* debug output */
-module_param(debug,            int, 0644);
-
-/* ---------------------------------------------------------------------- */
-
-static int bt832_detach(struct i2c_client *client);
-
-
-static struct i2c_driver driver;
-static struct i2c_client client_template;
-
-struct bt832 {
-	struct i2c_client client;
-};
-
-int bt832_hexdump(struct i2c_client *i2c_client_s, unsigned char *buf)
-{
-	int i,rc;
-	buf[0]=0x80; // start at register 0 with auto-increment
-	if (1 != (rc = i2c_master_send(i2c_client_s,buf,1)))
-		v4l_err(i2c_client_s,"i2c i/o error: rc == %d (should be 1)\n",rc);
-
-	for(i=0;i<65;i++)
-		buf[i]=0;
-	if (65 != (rc=i2c_master_recv(i2c_client_s,buf,65)))
-		v4l_err(i2c_client_s,"i2c i/o error: rc == %d (should be 65)\n",rc);
-
-	// Note: On READ the first byte is the current index
-	//  (e.g. 0x80, what we just wrote)
-
-	if(debug>1) {
-		int i;
-		v4l_dbg(2, debug,i2c_client_s,"hexdump:");
-		for(i=1;i<65;i++) {
-			if(i!=1) {
-				if(((i-1)%8)==0) printk(" ");
-				if(((i-1)%16)==0) {
-					printk("\n");
-					v4l_dbg(2, debug,i2c_client_s,"hexdump:");
-				}
-			}
-			printk(" %02x",buf[i]);
-		}
-		printk("\n");
-	}
-	return 0;
-}
-
-// Return: 1 (is a bt832), 0 (No bt832 here)
-int bt832_init(struct i2c_client *i2c_client_s)
-{
-	unsigned char *buf;
-	int rc;
-
-	buf=kmalloc(65,GFP_KERNEL);
-	if (!buf) {
-		v4l_err(&t->client,
-			"Unable to allocate memory. Detaching.\n");
-		return 0;
-	}
-	bt832_hexdump(i2c_client_s,buf);
-
-	if(buf[0x40] != 0x31) {
-		v4l_err(i2c_client_s,"This i2c chip is no bt832 (id=%02x). Detaching.\n",buf[0x40]);
-		kfree(buf);
-		return 0;
-	}
-
-	v4l_err(i2c_client_s,"Write 0 tp VPSTATUS\n");
-	buf[0]=BT832_VP_STATUS; // Reg.52
-	buf[1]= 0x00;
-	if (2 != (rc = i2c_master_send(i2c_client_s,buf,2)))
-		v4l_err(i2c_client_s,"i2c i/o error VPS: rc == %d (should be 2)\n",rc);
-
-	bt832_hexdump(i2c_client_s,buf);
-
-
-	// Leave low power mode:
-	v4l_err(i2c_client_s,"leave low power mode.\n");
-	buf[0]=BT832_CAM_SETUP0; //0x39 57
-	buf[1]=0x08;
-	if (2 != (rc = i2c_master_send(i2c_client_s,buf,2)))
-		v4l_err(i2c_client_s,"i2c i/o error LLPM: rc == %d (should be 2)\n",rc);
-
-	bt832_hexdump(i2c_client_s,buf);
-
-	v4l_info(i2c_client_s,"Write 0 tp VPSTATUS\n");
-	buf[0]=BT832_VP_STATUS; // Reg.52
-	buf[1]= 0x00;
-	if (2 != (rc = i2c_master_send(i2c_client_s,buf,2)))
-		v4l_err(i2c_client_s,"i2c i/o error VPS: rc == %d (should be 2)\n",rc);
-
-	bt832_hexdump(i2c_client_s,buf);
-
-
-	// Enable Output
-	v4l_info(i2c_client_s,"Enable Output\n");
-	buf[0]=BT832_VP_CONTROL1; // Reg.40
-	buf[1]= 0x27 & (~0x01); // Default | !skip
-	if (2 != (rc = i2c_master_send(i2c_client_s,buf,2)))
-		v4l_err(i2c_client_s,"i2c i/o error EO: rc == %d (should be 2)\n",rc);
-
-	bt832_hexdump(i2c_client_s,buf);
-
-
-	// for testing (even works when no camera attached)
-	v4l_info(i2c_client_s,"*** Generate NTSC M Bars *****\n");
-	buf[0]=BT832_VP_TESTCONTROL0; // Reg. 42
-	buf[1]=3; // Generate NTSC System M bars, Generate Frame timing internally
-	if (2 != (rc = i2c_master_send(i2c_client_s,buf,2)))
-		v4l_info(i2c_client_s,"i2c i/o error MBAR: rc == %d (should be 2)\n",rc);
-
-	v4l_info(i2c_client_s,"Camera Present: %s\n",
-		(buf[1+BT832_CAM_STATUS] & BT832_56_CAMERA_PRESENT) ? "yes":"no");
-
-	bt832_hexdump(i2c_client_s,buf);
-	kfree(buf);
-	return 1;
-}
-
-
-
-static int bt832_attach(struct i2c_adapter *adap, int addr, int kind)
-{
-	struct bt832 *t;
-
-	client_template.adapter = adap;
-	client_template.addr    = addr;
-
-	if (NULL == (t = kzalloc(sizeof(*t), GFP_KERNEL)))
-		return -ENOMEM;
-	t->client = client_template;
-	i2c_set_clientdata(&t->client, t);
-	i2c_attach_client(&t->client);
-
-	v4l_info(&t->client,"chip found @ 0x%x\n", addr<<1);
-
-	if(! bt832_init(&t->client)) {
-		bt832_detach(&t->client);
-		return -1;
-	}
-
-	return 0;
-}
-
-static int bt832_probe(struct i2c_adapter *adap)
-{
-	if (adap->class & I2C_CLASS_TV_ANALOG)
-		return i2c_probe(adap, &addr_data, bt832_attach);
-	return 0;
-}
-
-static int bt832_detach(struct i2c_client *client)
-{
-	struct bt832 *t = i2c_get_clientdata(client);
-
-	v4l_info(&t->client,"dettach\n");
-	i2c_detach_client(client);
-	kfree(t);
-	return 0;
-}
-
-static int
-bt832_command(struct i2c_client *client, unsigned int cmd, void *arg)
-{
-	struct bt832 *t = i2c_get_clientdata(client);
-
-	if (debug>1)
-		v4l_i2c_print_ioctl(&t->client,cmd);
-
-	switch (cmd) {
-		case BT832_HEXDUMP: {
-			unsigned char *buf;
-			buf = kmalloc(65, GFP_KERNEL);
-			if (!buf) {
-				v4l_err(&t->client,
-					"Unable to allocate memory\n");
-				break;
-			}
-			bt832_hexdump(&t->client,buf);
-			kfree(buf);
-		}
-		break;
-		case BT832_REATTACH:
-			v4l_info(&t->client,"re-attach\n");
-			i2c_del_driver(&driver);
-			i2c_add_driver(&driver);
-		break;
-	}
-	return 0;
-}
-
-/* ----------------------------------------------------------------------- */
-
-static struct i2c_driver driver = {
-	.driver = {
-		.name   = "bt832",
-	},
-	.id             = 0, /* FIXME */
-	.attach_adapter = bt832_probe,
-	.detach_client  = bt832_detach,
-	.command        = bt832_command,
-};
-static struct i2c_client client_template =
-{
-	.name       = "bt832",
-	.driver     = &driver,
-};
-
-
-static int __init bt832_init_module(void)
-{
-	return i2c_add_driver(&driver);
-}
-
-static void __exit bt832_cleanup_module(void)
-{
-	i2c_del_driver(&driver);
-}
-
-module_init(bt832_init_module);
-module_exit(bt832_cleanup_module);
-
-/*
- * Overrides for Emacs so that we follow Linus's tabbing style.
- * ---------------------------------------------------------------------------
- * Local variables:
- * c-basic-offset: 8
- * End:
- */
diff --git a/drivers/media/video/bt8xx/bt832.h b/drivers/media/video/bt8xx/bt832.h
deleted file mode 100644
index 1ce8fa71f7db..000000000000
--- a/drivers/media/video/bt8xx/bt832.h
+++ /dev/null
@@ -1,305 +0,0 @@
-/* Bt832 CMOS Camera Video Processor (VP)
-
- The Bt832 CMOS Camera Video Processor chip connects a Quartsight CMOS
-  color digital camera directly to video capture devices via an 8-bit,
-  4:2:2 YUV or YCrCb video interface.
-
- i2c addresses: 0x88 or 0x8a
- */
-
-/* The 64 registers: */
-
-// Input Processor
-#define BT832_OFFSET 0
-#define BT832_RCOMP	1
-#define BT832_G1COMP	2
-#define BT832_G2COMP	3
-#define BT832_BCOMP	4
-// Exposures:
-#define BT832_FINEH	5
-#define BT832_FINEL	6
-#define BT832_COARSEH	7
-#define BT832_COARSEL   8
-#define BT832_CAMGAIN	9
-// Main Processor:
-#define BT832_M00	10
-#define BT832_M01	11
-#define BT832_M02	12
-#define BT832_M10	13
-#define BT832_M11	14
-#define BT832_M12	15
-#define BT832_M20	16
-#define BT832_M21	17
-#define BT832_M22	18
-#define BT832_APCOR	19
-#define BT832_GAMCOR	20
-// Level Accumulator Inputs
-#define BT832_VPCONTROL2	21
-#define BT832_ZONECODE0	22
-#define BT832_ZONECODE1	23
-#define BT832_ZONECODE2	24
-#define BT832_ZONECODE3	25
-// Level Accumulator Outputs:
-#define BT832_RACC	26
-#define BT832_GACC	27
-#define BT832_BACC	28
-#define BT832_BLACKACC	29
-#define BT832_EXP_AGC	30
-#define BT832_LACC0	31
-#define BT832_LACC1	32
-#define BT832_LACC2	33
-#define BT832_LACC3	34
-#define BT832_LACC4	35
-#define BT832_LACC5	36
-#define BT832_LACC6	37
-#define BT832_LACC7	38
-// System:
-#define BT832_VP_CONTROL0	39
-#define BT832_VP_CONTROL1	40
-#define BT832_THRESH	41
-#define BT832_VP_TESTCONTROL0	42
-#define BT832_VP_DMCODE	43
-#define BT832_ACB_CONFIG	44
-#define BT832_ACB_GNBASE	45
-#define BT832_ACB_MU	46
-#define BT832_CAM_TEST0	47
-#define BT832_AEC_CONFIG	48
-#define BT832_AEC_TL	49
-#define BT832_AEC_TC	50
-#define BT832_AEC_TH	51
-// Status:
-#define BT832_VP_STATUS	52
-#define BT832_VP_LINECOUNT	53
-#define BT832_CAM_DEVICEL	54 // e.g. 0x19
-#define BT832_CAM_DEVICEH	55 // e.g. 0x40  == 0x194 Mask0, 0x194 = 404 decimal (VVL-404 camera)
-#define BT832_CAM_STATUS		56
- #define BT832_56_CAMERA_PRESENT 0x20
-//Camera Setups:
-#define BT832_CAM_SETUP0	57
-#define BT832_CAM_SETUP1	58
-#define BT832_CAM_SETUP2	59
-#define BT832_CAM_SETUP3	60
-// System:
-#define BT832_DEFCOR		61
-#define BT832_VP_TESTCONTROL1	62
-#define BT832_DEVICE_ID		63
-# define BT832_DEVICE_ID__31		0x31 // Bt832 has ID 0x31
-
-/* STMicroelectronivcs VV5404 camera module
-   i2c: 0x20: sensor address
-   i2c: 0xa0: eeprom for ccd defect map
- */
-#define VV5404_device_h		0x00  // 0x19
-#define VV5404_device_l		0x01  // 0x40
-#define VV5404_status0		0x02
-#define VV5404_linecountc	0x03 // current line counter
-#define VV5404_linecountl	0x04
-#define VV5404_setup0		0x10
-#define VV5404_setup1		0x11
-#define VV5404_setup2		0x12
-#define VV5404_setup4		0x14
-#define VV5404_setup5		0x15
-#define VV5404_fine_h		0x20  // fine exposure
-#define VV5404_fine_l		0x21
-#define VV5404_coarse_h		0x22  //coarse exposure
-#define VV5404_coarse_l		0x23
-#define VV5404_gain		0x24 // ADC pre-amp gain setting
-#define VV5404_clk_div		0x25
-#define VV5404_cr		0x76 // control register
-#define VV5404_as0		0x77 // ADC setup register
-
-
-// IOCTL
-#define BT832_HEXDUMP   _IOR('b',1,int)
-#define BT832_REATTACH	_IOR('b',2,int)
-
-/* from BT8x8VXD/capdrv/dialogs.cpp */
-
-/*
-typedef enum { SVI, Logitech, Rockwell } CAMERA;
-
-static COMBOBOX_ENTRY gwCameraOptions[] =
-{
-   { SVI,      "Silicon Vision 512N" },
-   { Logitech, "Logitech VideoMan 1.3"  },
-   { Rockwell, "Rockwell QuartzSight PCI 1.0"   }
-};
-
-// SRAM table values
-//===========================================================================
-typedef enum { TGB_NTSC624, TGB_NTSC780, TGB_NTSC858, TGB_NTSC392 } TimeGenByte;
-
-BYTE SRAMTable[][ 60 ] =
-{
-   // TGB_NTSC624
-   {
-      0x33, // size of table = 51
-      0x0E, 0xC0, 0x00, 0x00, 0x90, 0x02, 0x03, 0x10, 0x03, 0x06,
-      0x10, 0x04, 0x12, 0x12, 0x05, 0x02, 0x13, 0x04, 0x19, 0x00,
-      0x04, 0x39, 0x00, 0x06, 0x59, 0x08, 0x03, 0x85, 0x08, 0x07,
-      0x03, 0x50, 0x00, 0x91, 0x40, 0x00, 0x11, 0x01, 0x01, 0x4D,
-      0x0D, 0x02, 0x03, 0x11, 0x01, 0x05, 0x37, 0x00, 0x37, 0x21, 0x00
-   },
-   // TGB_NTSC780
-   {
-      0x33, // size of table = 51
-      0x0e, 0xc0, 0x00, 0x00, 0x90, 0xe2, 0x03, 0x10, 0x03, 0x06,
-      0x10, 0x34, 0x12, 0x12, 0x65, 0x02, 0x13, 0x24, 0x19, 0x00,
-      0x24, 0x39, 0x00, 0x96, 0x59, 0x08, 0x93, 0x85, 0x08, 0x97,
-      0x03, 0x50, 0x50, 0xaf, 0x40, 0x30, 0x5f, 0x01, 0xf1, 0x7f,
-      0x0d, 0xf2, 0x03, 0x11, 0xf1, 0x05, 0x37, 0x30, 0x85, 0x21, 0x50
-   },
-   // TGB_NTSC858
-   {
-      0x33, // size of table = 51
-      0x0c, 0xc0, 0x00, 0x00, 0x90, 0xc2, 0x03, 0x10, 0x03, 0x06,
-      0x10, 0x34, 0x12, 0x12, 0x65, 0x02, 0x13, 0x24, 0x19, 0x00,
-      0x24, 0x39, 0x00, 0x96, 0x59, 0x08, 0x93, 0x83, 0x08, 0x97,
-      0x03, 0x50, 0x30, 0xc0, 0x40, 0x30, 0x86, 0x01, 0x01, 0xa6,
-      0x0d, 0x62, 0x03, 0x11, 0x61, 0x05, 0x37, 0x30, 0xac, 0x21, 0x50
-   },
-   // TGB_NTSC392
-   // This table has been modified to be used for Fusion Rev D
-   {
-      0x2A, // size of table = 42
-      0x06, 0x08, 0x04, 0x0a, 0xc0, 0x00, 0x18, 0x08, 0x03, 0x24,
-      0x08, 0x07, 0x02, 0x90, 0x02, 0x08, 0x10, 0x04, 0x0c, 0x10,
-      0x05, 0x2c, 0x11, 0x04, 0x55, 0x48, 0x00, 0x05, 0x50, 0x00,
-      0xbf, 0x0c, 0x02, 0x2f, 0x3d, 0x00, 0x2f, 0x3f, 0x00, 0xc3,
-      0x20, 0x00
-   }
-};
-
-//===========================================================================
-// This is the structure of the camera specifications
-//===========================================================================
-typedef struct tag_cameraSpec
-{
-   SignalFormat signal;       // which digital signal format the camera has
-   VideoFormat  vidFormat;    // video standard
-   SyncVideoRef syncRef;      // which sync video reference is used
-   State        syncOutput;   // enable sync output for sync video input?
-   DecInputClk  iClk;         // which input clock is used
-   TimeGenByte  tgb;          // which timing generator byte does the camera use
-   int          HReset;       // select 64, 48, 32, or 16 CLKx1 for HReset
-   PLLFreq      pllFreq;      // what synthesized frequency to set PLL to
-   VSIZEPARMS   vSize;        // video size the camera produces
-   int          lineCount;    // expected total number of half-line per frame - 1
-   BOOL         interlace;    // interlace signal?
-} CameraSpec;
-
-//===========================================================================
-// <UPDATE REQUIRED>
-// Camera specifications database. Update this table whenever camera spec
-// has been changed or added/deleted supported camera models
-//===========================================================================
-static CameraSpec dbCameraSpec[ N_CAMERAOPTIONS ] =
-{  // Silicon Vision 512N
-   { Signal_CCIR656, VFormat_NTSC, VRef_alignedCb, Off, DecClk_GPCLK, TGB_NTSC624, 64, KHz19636,
-      // Clkx1_HACTIVE, Clkx1_HDELAY, VActive, VDelay, linesPerField; lineCount, Interlace
-   {         512,           0x64,       480,    0x13,      240 },         0,       TRUE
-   },
-   // Logitech VideoMan 1.3
-   { Signal_CCIR656, VFormat_NTSC, VRef_alignedCb, Off, DecClk_GPCLK, TGB_NTSC780, 64, KHz24545,
-      // Clkx1_HACTIVE, Clkx1_HDELAY, VActive, VDelay, linesPerField; lineCount, Interlace
-      {      640,           0x80,       480,    0x1A,      240 },         0,       TRUE
-   },
-   // Rockwell QuartzSight
-   // Note: Fusion Rev D (rev ID 0x02) and later supports 16 pixels for HReset which is preferable.
-   //       Use 32 for earlier version of hardware. Clkx1_HDELAY also changed from 0x27 to 0x20.
-   { Signal_CCIR656, VFormat_NTSC, VRef_alignedCb, Off, DecClk_GPCLK, TGB_NTSC392, 16, KHz28636,
-      // Clkx1_HACTIVE, Clkx1_HDELAY, VActive, VDelay, linesPerField; lineCount, Interlace
-      {      352,           0x20,       576,    0x08,      288 },       607,       FALSE
-   }
-};
-*/
-
-/*
-The corresponding APIs required to be invoked are:
-SetConnector( ConCamera, TRUE/FALSE );
-SetSignalFormat( spec.signal );
-SetVideoFormat( spec.vidFormat );
-SetSyncVideoRef( spec.syncRef );
-SetEnableSyncOutput( spec.syncOutput );
-SetTimGenByte( SRAMTable[ spec.tgb ], SRAMTableSize[ spec.tgb ] );
-SetHReset( spec.HReset );
-SetPLL( spec.pllFreq );
-SetDecInputClock( spec.iClk );
-SetVideoInfo( spec.vSize );
-SetTotalLineCount( spec.lineCount );
-SetInterlaceMode( spec.interlace );
-*/
-
-/* from web:
- Video Sampling
-Digital video is a sampled form of analog video. The most common sampling schemes in use today are:
-		  Pixel Clock   Horiz    Horiz    Vert
-		   Rate         Total    Active
-NTSC square pixel  12.27 MHz    780      640      525
-NTSC CCIR-601      13.5  MHz    858      720      525
-NTSC 4FSc          14.32 MHz    910      768      525
-PAL  square pixel  14.75 MHz    944      768      625
-PAL  CCIR-601      13.5  MHz    864      720      625
-PAL  4FSc          17.72 MHz   1135      948      625
-
-For the CCIR-601 standards, the sampling is based on a static orthogonal sampling grid. The luminance component (Y) is sampled at 13.5 MHz, while the two color difference signals, Cr and Cb are sampled at half that, or 6.75 MHz. The Cr and Cb samples are colocated with alternate Y samples, and they are taken at the same position on each line, such that one sample is coincident with the 50% point of the falling edge of analog sync. The samples are coded to either 8 or 10 bits per component.
-*/
-
-/* from DScaler:*/
-/*
-//===========================================================================
-// CCIR656 Digital Input Support: The tables were taken from DScaler proyect
-//
-// 13 Dec 2000 - Michael Eskin, Conexant Systems - Initial version
-//
-
-//===========================================================================
-// Timing generator SRAM table values for CCIR601 720x480 NTSC
-//===========================================================================
-// For NTSC CCIR656
-BYTE BtCard::SRAMTable_NTSC[] =
-{
-    // SRAM Timing Table for NTSC
-    0x0c, 0xc0, 0x00,
-    0x00, 0x90, 0xc2,
-    0x03, 0x10, 0x03,
-    0x06, 0x10, 0x34,
-    0x12, 0x12, 0x65,
-    0x02, 0x13, 0x24,
-    0x19, 0x00, 0x24,
-    0x39, 0x00, 0x96,
-    0x59, 0x08, 0x93,
-    0x83, 0x08, 0x97,
-    0x03, 0x50, 0x30,
-    0xc0, 0x40, 0x30,
-    0x86, 0x01, 0x01,
-    0xa6, 0x0d, 0x62,
-    0x03, 0x11, 0x61,
-    0x05, 0x37, 0x30,
-    0xac, 0x21, 0x50
-};
-
-//===========================================================================
-// Timing generator SRAM table values for CCIR601 720x576 NTSC
-//===========================================================================
-// For PAL CCIR656
-BYTE BtCard::SRAMTable_PAL[] =
-{
-    // SRAM Timing Table for PAL
-    0x36, 0x11, 0x01,
-    0x00, 0x90, 0x02,
-    0x05, 0x10, 0x04,
-    0x16, 0x14, 0x05,
-    0x11, 0x00, 0x04,
-    0x12, 0xc0, 0x00,
-    0x31, 0x00, 0x06,
-    0x51, 0x08, 0x03,
-    0x89, 0x08, 0x07,
-    0xc0, 0x44, 0x00,
-    0x81, 0x01, 0x01,
-    0xa9, 0x0d, 0x02,
-    0x02, 0x50, 0x03,
-    0x37, 0x3d, 0x00,
-    0xaf, 0x21, 0x00,
-};
-*/
diff --git a/drivers/media/video/bt8xx/bttv-cards.c b/drivers/media/video/bt8xx/bttv-cards.c
index 8629b77666f6..d24dcc025e37 100644
--- a/drivers/media/video/bt8xx/bttv-cards.c
+++ b/drivers/media/video/bt8xx/bttv-cards.c
@@ -44,7 +44,6 @@
 
 /* fwd decl */
 static void boot_msp34xx(struct bttv *btv, int pin);
-static void boot_bt832(struct bttv *btv);
 static void hauppauge_eeprom(struct bttv *btv);
 static void avermedia_eeprom(struct bttv *btv);
 static void osprey_eeprom(struct bttv *btv, const u8 ee[256]);
@@ -3721,13 +3720,6 @@ void __devinit bttv_init_card2(struct bttv *btv)
 	if (bttv_tvcards[btv->c.type].audio_mode_gpio)
 		btv->audio_mode_gpio=bttv_tvcards[btv->c.type].audio_mode_gpio;
 
-	if (bttv_tvcards[btv->c.type].digital_mode == DIGITAL_MODE_CAMERA) {
-		/* detect Bt832 chip for quartzsight digital camera */
-		if ((bttv_I2CRead(btv, I2C_ADDR_BT832_ALT1, "Bt832") >=0) ||
-		    (bttv_I2CRead(btv, I2C_ADDR_BT832_ALT2, "Bt832") >=0))
-			boot_bt832(btv);
-	}
-
 	if (!autoload)
 		return;
 
@@ -4123,10 +4115,6 @@ static void __devinit boot_msp34xx(struct bttv *btv, int pin)
 		       "init [%d]\n", btv->c.nr, pin);
 }
 
-static void __devinit boot_bt832(struct bttv *btv)
-{
-}
-
 /* ----------------------------------------------------------------------- */
 /*  Imagenation L-Model PXC200 Framegrabber */
 /*  This is basically the same procedure as
diff --git a/include/media/i2c-addr.h b/include/media/i2c-addr.h
index e7ff44a35ca0..5d0f56054d26 100644
--- a/include/media/i2c-addr.h
+++ b/include/media/i2c-addr.h
@@ -12,8 +12,6 @@
 /* bttv address list */
 #define I2C_ADDR_TSA5522	0xc2
 #define I2C_ADDR_TDA7432	0x8a
-#define I2C_ADDR_BT832_ALT1	0x88
-#define I2C_ADDR_BT832_ALT2	0x8a // alternate setting
 #define I2C_ADDR_TDA8425	0x82
 #define I2C_ADDR_TDA9840	0x84
 #define I2C_ADDR_TDA9850	0xb6 /* also used by 9855,9873 */
-- 
cgit v1.2.3


From 4b00eb25340c1a9b9eedaf0bc5b0f0d18eddb028 Mon Sep 17 00:00:00 2001
From: Hans Verkuil <hverkuil@xs4all.nl>
Date: Fri, 19 Dec 2008 11:17:56 -0300
Subject: V4L/DVB (9944): videodev2.h: fix typo.

The comment said CX2584X instead of CX2341X.

Signed-off-by: Hans Verkuil <hverkuil@xs4all.nl>
Signed-off-by: Mauro Carvalho Chehab <mchehab@redhat.com>
---
 include/linux/videodev2.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/videodev2.h b/include/linux/videodev2.h
index e2cfd6add78c..754c8d9685a4 100644
--- a/include/linux/videodev2.h
+++ b/include/linux/videodev2.h
@@ -1051,7 +1051,7 @@ enum v4l2_mpeg_video_bitrate_mode {
 #define V4L2_CID_MPEG_VIDEO_MUTE 		(V4L2_CID_MPEG_BASE+210)
 #define V4L2_CID_MPEG_VIDEO_MUTE_YUV 		(V4L2_CID_MPEG_BASE+211)
 
-/*  MPEG-class control IDs specific to the CX2584x driver as defined by V4L2 */
+/*  MPEG-class control IDs specific to the CX2341x driver as defined by V4L2 */
 #define V4L2_CID_MPEG_CX2341X_BASE 				(V4L2_CTRL_CLASS_MPEG | 0x1000)
 #define V4L2_CID_MPEG_CX2341X_VIDEO_SPATIAL_FILTER_MODE 	(V4L2_CID_MPEG_CX2341X_BASE+0)
 enum v4l2_mpeg_cx2341x_video_spatial_filter_mode {
-- 
cgit v1.2.3


From 531c98e71805b32e9ea35a218119100bbd2b7615 Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab@redhat.com>
Date: Mon, 22 Dec 2008 13:18:27 -0300
Subject: V4L/DVB (9953): em28xx: Add suport for debugging AC97 anciliary chips

The em28xx driver can be coupled to an anciliary AC97 chip. This patch
allows read/write AC97 registers directly.

Signed-off-by: Mauro Carvalho Chehab <mchehab@redhat.com>
---
 drivers/media/video/em28xx/em28xx-core.c  |  4 ++--
 drivers/media/video/em28xx/em28xx-video.c | 19 +++++++++++++++++++
 drivers/media/video/em28xx/em28xx.h       |  3 +++
 include/linux/videodev2.h                 |  1 +
 4 files changed, 25 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/drivers/media/video/em28xx/em28xx-core.c b/drivers/media/video/em28xx/em28xx-core.c
index c647dcb20a57..5964998daf2e 100644
--- a/drivers/media/video/em28xx/em28xx-core.c
+++ b/drivers/media/video/em28xx/em28xx-core.c
@@ -259,7 +259,7 @@ static int em28xx_is_ac97_ready(struct em28xx *dev)
  * em28xx_read_ac97()
  * write a 16 bit value to the specified AC97 address (LSB first!)
  */
-static int em28xx_read_ac97(struct em28xx *dev, u8 reg)
+int em28xx_read_ac97(struct em28xx *dev, u8 reg)
 {
 	int ret;
 	u8 addr = (reg & 0x7f) | 0x80;
@@ -285,7 +285,7 @@ static int em28xx_read_ac97(struct em28xx *dev, u8 reg)
  * em28xx_write_ac97()
  * write a 16 bit value to the specified AC97 address (LSB first!)
  */
-static int em28xx_write_ac97(struct em28xx *dev, u8 reg, u16 val)
+int em28xx_write_ac97(struct em28xx *dev, u8 reg, u16 val)
 {
 	int ret;
 	u8 addr = reg & 0x7f;
diff --git a/drivers/media/video/em28xx/em28xx-video.c b/drivers/media/video/em28xx/em28xx-video.c
index 4701b6589b10..4ea4920c927a 100644
--- a/drivers/media/video/em28xx/em28xx-video.c
+++ b/drivers/media/video/em28xx/em28xx-video.c
@@ -1221,6 +1221,17 @@ static int vidioc_g_register(struct file *file, void *priv,
 	struct em28xx         *dev = fh->dev;
 	int ret;
 
+	if (reg->match_type == V4L2_CHIP_MATCH_AC97) {
+		mutex_lock(&dev->lock);
+		ret = em28xx_read_ac97(dev, reg->reg);
+		mutex_unlock(&dev->lock);
+		if (ret < 0)
+			return ret;
+
+		reg->val = ret;
+		return 0;
+	}
+
 	if (!v4l2_chip_match_host(reg->match_type, reg->match_chip))
 		return -EINVAL;
 
@@ -1256,6 +1267,14 @@ static int vidioc_s_register(struct file *file, void *priv,
 	__le64 buf;
 	int    rc;
 
+	if (reg->match_type == V4L2_CHIP_MATCH_AC97) {
+		mutex_lock(&dev->lock);
+		rc = em28xx_write_ac97(dev, reg->reg, reg->val);
+		mutex_unlock(&dev->lock);
+
+		return rc;
+	}
+
 	buf = cpu_to_le64(reg->val);
 
 	mutex_lock(&dev->lock);
diff --git a/drivers/media/video/em28xx/em28xx.h b/drivers/media/video/em28xx/em28xx.h
index 80d1d55d80f9..100f27819cdc 100644
--- a/drivers/media/video/em28xx/em28xx.h
+++ b/drivers/media/video/em28xx/em28xx.h
@@ -564,6 +564,9 @@ int em28xx_write_regs_req(struct em28xx *dev, u8 req, u16 reg, char *buf,
 int em28xx_write_regs(struct em28xx *dev, u16 reg, char *buf, int len);
 int em28xx_write_reg(struct em28xx *dev, u16 reg, u8 val);
 
+int em28xx_read_ac97(struct em28xx *dev, u8 reg);
+int em28xx_write_ac97(struct em28xx *dev, u8 reg, u16 val);
+
 int em28xx_audio_analog_set(struct em28xx *dev);
 int em28xx_audio_setup(struct em28xx *dev);
 
diff --git a/include/linux/videodev2.h b/include/linux/videodev2.h
index 754c8d9685a4..e31144d22237 100644
--- a/include/linux/videodev2.h
+++ b/include/linux/videodev2.h
@@ -1376,6 +1376,7 @@ struct v4l2_streamparm {
 #define V4L2_CHIP_MATCH_HOST       0  /* Match against chip ID on host (0 for the host) */
 #define V4L2_CHIP_MATCH_I2C_DRIVER 1  /* Match against I2C driver ID */
 #define V4L2_CHIP_MATCH_I2C_ADDR   2  /* Match against I2C 7-bit address */
+#define V4L2_CHIP_MATCH_AC97       3  /* Match against anciliary AC97 chip */
 
 struct v4l2_register {
 	__u32 match_type; /* Match type */
-- 
cgit v1.2.3


From 1fe2740ff10b3de1aab8f88f2f05547e5f369035 Mon Sep 17 00:00:00 2001
From: Hans Verkuil <hverkuil@xs4all.nl>
Date: Thu, 18 Dec 2008 11:16:24 -0300
Subject: V4L/DVB (9957): v4l2-subdev: add g_sliced_vbi_cap and add NULL
 pointer checks

Signed-off-by: Hans Verkuil <hverkuil@xs4all.nl>
Signed-off-by: Mauro Carvalho Chehab <mchehab@redhat.com>
---
 drivers/media/video/v4l2-subdev.c | 10 ++++++----
 include/media/v4l2-subdev.h       |  1 +
 2 files changed, 7 insertions(+), 4 deletions(-)

(limited to 'include')

diff --git a/drivers/media/video/v4l2-subdev.c b/drivers/media/video/v4l2-subdev.c
index fe1f01c970ac..e3612f29d0df 100644
--- a/drivers/media/video/v4l2-subdev.c
+++ b/drivers/media/video/v4l2-subdev.c
@@ -40,13 +40,13 @@ int v4l2_subdev_command(struct v4l2_subdev *sd, unsigned cmd, void *arg)
 	case VIDIOC_G_CHIP_IDENT:
 		return v4l2_subdev_call(sd, core, g_chip_ident, arg);
 	case VIDIOC_INT_S_STANDBY:
-		return v4l2_subdev_call(sd, core, s_standby, *(u32 *)arg);
+		return v4l2_subdev_call(sd, core, s_standby, arg ? (*(u32 *)arg) : 0);
 	case VIDIOC_INT_RESET:
-		return v4l2_subdev_call(sd, core, reset, *(u32 *)arg);
+		return v4l2_subdev_call(sd, core, reset, arg ? (*(u32 *)arg) : 0);
 	case VIDIOC_INT_S_GPIO:
-		return v4l2_subdev_call(sd, core, s_gpio, *(u32 *)arg);
+		return v4l2_subdev_call(sd, core, s_gpio, arg ? (*(u32 *)arg) : 0);
 	case VIDIOC_INT_INIT:
-		return v4l2_subdev_call(sd, core, init, *(u32 *)arg);
+		return v4l2_subdev_call(sd, core, init, arg ? (*(u32 *)arg) : 0);
 #ifdef CONFIG_VIDEO_ADV_DEBUG
 	case VIDIOC_DBG_G_REGISTER:
 		return v4l2_subdev_call(sd, core, g_register, arg);
@@ -90,6 +90,8 @@ int v4l2_subdev_command(struct v4l2_subdev *sd, unsigned cmd, void *arg)
 		return v4l2_subdev_call(sd, video, s_vbi_data, arg);
 	case VIDIOC_INT_G_VBI_DATA:
 		return v4l2_subdev_call(sd, video, g_vbi_data, arg);
+	case VIDIOC_G_SLICED_VBI_CAP:
+		return v4l2_subdev_call(sd, video, g_sliced_vbi_cap, arg);
 	case VIDIOC_S_FMT:
 		return v4l2_subdev_call(sd, video, s_fmt, arg);
 	case VIDIOC_G_FMT:
diff --git a/include/media/v4l2-subdev.h b/include/media/v4l2-subdev.h
index bc9e0fbf2822..bca25e8eab62 100644
--- a/include/media/v4l2-subdev.h
+++ b/include/media/v4l2-subdev.h
@@ -110,6 +110,7 @@ struct v4l2_subdev_video_ops {
 	int (*decode_vbi_line)(struct v4l2_subdev *sd, struct v4l2_decode_vbi_line *vbi_line);
 	int (*s_vbi_data)(struct v4l2_subdev *sd, const struct v4l2_sliced_vbi_data *vbi_data);
 	int (*g_vbi_data)(struct v4l2_subdev *sd, struct v4l2_sliced_vbi_data *vbi_data);
+	int (*g_sliced_vbi_cap)(struct v4l2_subdev *sd, struct v4l2_sliced_vbi_cap *cap);
 	int (*s_std_output)(struct v4l2_subdev *sd, v4l2_std_id std);
 	int (*s_stream)(struct v4l2_subdev *sd, int enable);
 	int (*s_fmt)(struct v4l2_subdev *sd, struct v4l2_format *fmt);
-- 
cgit v1.2.3


From 035f8dc1e478c67ea2682fde8f26ee9d0982a2e7 Mon Sep 17 00:00:00 2001
From: Hans Verkuil <hverkuil@xs4all.nl>
Date: Thu, 18 Dec 2008 12:27:28 -0300
Subject: V4L/DVB (9960): v4l2-subdev: ioctl ops should use unsigned for cmd
 arg.

Signed-off-by: Hans Verkuil <hverkuil@xs4all.nl>
Signed-off-by: Mauro Carvalho Chehab <mchehab@redhat.com>
---
 drivers/media/video/msp3400-driver.c | 2 +-
 drivers/media/video/tuner-core.c     | 2 +-
 include/media/v4l2-subdev.h          | 2 +-
 3 files changed, 3 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/drivers/media/video/msp3400-driver.c b/drivers/media/video/msp3400-driver.c
index 79ae7bd23528..a622dbb72ed8 100644
--- a/drivers/media/video/msp3400-driver.c
+++ b/drivers/media/video/msp3400-driver.c
@@ -483,7 +483,7 @@ static int msp_s_ctrl(struct v4l2_subdev *sd, struct v4l2_control *ctrl)
 }
 
 #ifdef CONFIG_VIDEO_ALLOW_V4L1
-static int msp_ioctl(struct v4l2_subdev *sd, int cmd, void *arg)
+static int msp_ioctl(struct v4l2_subdev *sd, unsigned int cmd, void *arg)
 {
 	struct msp_state *state = to_state(sd);
 	struct i2c_client *client = v4l2_get_subdevdata(sd);
diff --git a/drivers/media/video/tuner-core.c b/drivers/media/video/tuner-core.c
index 694087bfa77d..97d7509d212f 100644
--- a/drivers/media/video/tuner-core.c
+++ b/drivers/media/video/tuner-core.c
@@ -800,7 +800,7 @@ static int tuner_s_standby(struct v4l2_subdev *sd, u32 standby)
 }
 
 #ifdef CONFIG_VIDEO_ALLOW_V4L1
-static int tuner_ioctl(struct v4l2_subdev *sd, int cmd, void *arg)
+static int tuner_ioctl(struct v4l2_subdev *sd, unsigned int cmd, void *arg)
 {
 	struct tuner *t = to_tuner(sd);
 	struct i2c_client *client = v4l2_get_subdevdata(sd);
diff --git a/include/media/v4l2-subdev.h b/include/media/v4l2-subdev.h
index bca25e8eab62..ceef016bb0b7 100644
--- a/include/media/v4l2-subdev.h
+++ b/include/media/v4l2-subdev.h
@@ -79,7 +79,7 @@ struct v4l2_subdev_core_ops {
 	int (*g_ctrl)(struct v4l2_subdev *sd, struct v4l2_control *ctrl);
 	int (*s_ctrl)(struct v4l2_subdev *sd, struct v4l2_control *ctrl);
 	int (*querymenu)(struct v4l2_subdev *sd, struct v4l2_querymenu *qm);
-	int (*ioctl)(struct v4l2_subdev *sd, int cmd, void *arg);
+	int (*ioctl)(struct v4l2_subdev *sd, unsigned int cmd, void *arg);
 #ifdef CONFIG_VIDEO_ADV_DEBUG
 	int (*g_register)(struct v4l2_subdev *sd, struct v4l2_register *reg);
 	int (*s_register)(struct v4l2_subdev *sd, struct v4l2_register *reg);
-- 
cgit v1.2.3


From bc97430510960846b6e8f2d62c503653031991e1 Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab@redhat.com>
Date: Mon, 22 Dec 2008 20:34:18 -0300
Subject: V4L/DVB (9969): tvp5150: add support for VIDIOC_G_CHIP_IDENT ioctl

Signed-off-by: Mauro Carvalho Chehab <mchehab@redhat.com>
---
 drivers/media/video/tvp5150.c   | 16 ++++++++++++++++
 include/media/v4l2-chip-ident.h |  3 +++
 2 files changed, 19 insertions(+)

(limited to 'include')

diff --git a/drivers/media/video/tvp5150.c b/drivers/media/video/tvp5150.c
index e8a4637350ec..a388a9f0cb18 100644
--- a/drivers/media/video/tvp5150.c
+++ b/drivers/media/video/tvp5150.c
@@ -12,6 +12,7 @@
 #include <media/v4l2-device.h>
 #include <media/tvp5150.h>
 #include <media/v4l2-i2c-drv-legacy.h>
+#include <media/v4l2-chip-ident.h>
 
 #include "tvp5150_reg.h"
 
@@ -961,6 +962,20 @@ static int tvp5150_g_fmt(struct v4l2_subdev *sd, struct v4l2_format *fmt)
 }
 
 
+static int tvp5150_g_chip_ident(struct v4l2_subdev *sd,
+				struct v4l2_chip_ident *chip)
+{
+	int rev;
+	struct i2c_client *client = v4l2_get_subdevdata(sd);
+
+	rev = tvp5150_read(sd, TVP5150_ROM_MAJOR_VER) << 8 |
+	      tvp5150_read(sd, TVP5150_ROM_MINOR_VER);
+
+	return v4l2_chip_ident_i2c_client(client, chip, V4L2_IDENT_TVP5150,
+					  rev);
+}
+
+
 #ifdef CONFIG_VIDEO_ADV_DEBUG
 static int tvp5150_g_register(struct v4l2_subdev *sd, struct v4l2_register *reg)
 {
@@ -1026,6 +1041,7 @@ static const struct v4l2_subdev_core_ops tvp5150_core_ops = {
 	.s_ctrl = tvp5150_s_ctrl,
 	.queryctrl = tvp5150_queryctrl,
 	.reset = tvp5150_reset,
+	.g_chip_ident = tvp5150_g_chip_ident,
 #ifdef CONFIG_VIDEO_ADV_DEBUG
 	.g_register = tvp5150_g_register,
 	.s_register = tvp5150_s_register,
diff --git a/include/media/v4l2-chip-ident.h b/include/media/v4l2-chip-ident.h
index bfe5142e6672..e3e5b53931de 100644
--- a/include/media/v4l2-chip-ident.h
+++ b/include/media/v4l2-chip-ident.h
@@ -70,6 +70,9 @@ enum {
 	/* module vp27smpx: just ident 2700 */
 	V4L2_IDENT_VP27SMPX = 2700,
 
+	/* module tvp5150 */
+	V4L2_IDENT_TVP5150 = 5150,
+
 	/* module cs5345: just ident 5345 */
 	V4L2_IDENT_CS5345 = 5345,
 
-- 
cgit v1.2.3


From dc93a70cc7f92e1dbaf29fa7dfd914b0f618fb31 Mon Sep 17 00:00:00 2001
From: Hans Verkuil <hverkuil@xs4all.nl>
Date: Fri, 19 Dec 2008 21:28:27 -0300
Subject: V4L/DVB (9973): v4l2-dev: use the release callback from device
 instead of cdev

Instead of relying on the cdev release callback we should rely on the
release callback from the device struct. This requires that we use
get_device/put_device to do proper refcounting. In order to do this
safely v4l2-dev.c now sets up its own file_operations that call
out to the driver's ops.

Signed-off-by: Hans Verkuil <hverkuil@xs4all.nl>
Signed-off-by: Mauro Carvalho Chehab <mchehab@redhat.com>
---
 drivers/media/video/v4l2-dev.c | 356 ++++++++++++++++++++++++++++++-----------
 include/media/v4l2-dev.h       |  47 ++++--
 2 files changed, 295 insertions(+), 108 deletions(-)

(limited to 'include')

diff --git a/drivers/media/video/v4l2-dev.c b/drivers/media/video/v4l2-dev.c
index c5ca51a9020a..4e0db8845e04 100644
--- a/drivers/media/video/v4l2-dev.c
+++ b/drivers/media/video/v4l2-dev.c
@@ -41,17 +41,17 @@
 static ssize_t show_index(struct device *cd,
 			 struct device_attribute *attr, char *buf)
 {
-	struct video_device *vfd = container_of(cd, struct video_device, dev);
+	struct video_device *vdev = to_video_device(cd);
 
-	return sprintf(buf, "%i\n", vfd->index);
+	return sprintf(buf, "%i\n", vdev->index);
 }
 
 static ssize_t show_name(struct device *cd,
 			 struct device_attribute *attr, char *buf)
 {
-	struct video_device *vfd = container_of(cd, struct video_device, dev);
+	struct video_device *vdev = to_video_device(cd);
 
-	return sprintf(buf, "%.*s\n", (int)sizeof(vfd->name), vfd->name);
+	return sprintf(buf, "%.*s\n", (int)sizeof(vdev->name), vdev->name);
 }
 
 static struct device_attribute video_device_attrs[] = {
@@ -73,64 +73,64 @@ struct video_device *video_device_alloc(void)
 }
 EXPORT_SYMBOL(video_device_alloc);
 
-void video_device_release(struct video_device *vfd)
+void video_device_release(struct video_device *vdev)
 {
-	kfree(vfd);
+	kfree(vdev);
 }
 EXPORT_SYMBOL(video_device_release);
 
-void video_device_release_empty(struct video_device *vfd)
+void video_device_release_empty(struct video_device *vdev)
 {
 	/* Do nothing */
 	/* Only valid when the video_device struct is a static. */
 }
 EXPORT_SYMBOL(video_device_release_empty);
 
-/* Called when the last user of the character device is gone. */
-static void v4l2_chardev_release(struct kobject *kobj)
+static inline void video_get(struct video_device *vdev)
 {
-	struct video_device *vfd = container_of(kobj, struct video_device, cdev.kobj);
+	get_device(&vdev->dev);
+}
+
+static inline void video_put(struct video_device *vdev)
+{
+	put_device(&vdev->dev);
+}
+
+/* Called when the last user of the video device exits. */
+static void v4l2_device_release(struct device *cd)
+{
+	struct video_device *vdev = to_video_device(cd);
 
 	mutex_lock(&videodev_lock);
-	if (video_device[vfd->minor] != vfd) {
+	if (video_device[vdev->minor] != vdev) {
 		mutex_unlock(&videodev_lock);
-		BUG();
+		/* should not happen */
+		WARN_ON(1);
 		return;
 	}
 
 	/* Free up this device for reuse */
-	video_device[vfd->minor] = NULL;
-	clear_bit(vfd->num, video_nums[vfd->vfl_type]);
-	mutex_unlock(&videodev_lock);
+	video_device[vdev->minor] = NULL;
 
-	/* Release the character device */
-	vfd->cdev_release(kobj);
-	/* Release video_device and perform other
-	   cleanups as needed. */
-	if (vfd->release)
-		vfd->release(vfd);
-}
+	/* Delete the cdev on this minor as well */
+	cdev_del(vdev->cdev);
+	/* Just in case some driver tries to access this from
+	   the release() callback. */
+	vdev->cdev = NULL;
 
-/* The new kobj_type for the character device */
-static struct kobj_type v4l2_ktype_cdev_default = {
-	.release = v4l2_chardev_release,
-};
+	/* Mark minor as free */
+	clear_bit(vdev->num, video_nums[vdev->vfl_type]);
 
-static void video_release(struct device *cd)
-{
-	struct video_device *vfd = container_of(cd, struct video_device, dev);
+	mutex_unlock(&videodev_lock);
 
-	/* It's now safe to delete the char device.
-	   This will either trigger the v4l2_chardev_release immediately (if
-	   the refcount goes to 0) or later when the last user of the
-	   character device closes it. */
-	cdev_del(&vfd->cdev);
+	/* Release video_device and perform other
+	   cleanups as needed. */
+	vdev->release(vdev);
 }
 
 static struct class video_class = {
 	.name = VIDEO_NAME,
 	.dev_attrs = video_device_attrs,
-	.dev_release = video_release,
 };
 
 struct video_device *video_devdata(struct file *file)
@@ -139,13 +139,163 @@ struct video_device *video_devdata(struct file *file)
 }
 EXPORT_SYMBOL(video_devdata);
 
+static ssize_t v4l2_read(struct file *filp, char __user *buf,
+		size_t sz, loff_t *off)
+{
+	struct video_device *vdev = video_devdata(filp);
+
+	if (!vdev->fops->read)
+		return -EINVAL;
+	if (video_is_unregistered(vdev))
+		return -EIO;
+	return vdev->fops->read(filp, buf, sz, off);
+}
+
+static ssize_t v4l2_write(struct file *filp, const char __user *buf,
+		size_t sz, loff_t *off)
+{
+	struct video_device *vdev = video_devdata(filp);
+
+	if (!vdev->fops->write)
+		return -EINVAL;
+	if (video_is_unregistered(vdev))
+		return -EIO;
+	return vdev->fops->write(filp, buf, sz, off);
+}
+
+static unsigned int v4l2_poll(struct file *filp, struct poll_table_struct *poll)
+{
+	struct video_device *vdev = video_devdata(filp);
+
+	if (!vdev->fops->poll || video_is_unregistered(vdev))
+		return DEFAULT_POLLMASK;
+	return vdev->fops->poll(filp, poll);
+}
+
+static int v4l2_ioctl(struct inode *inode, struct file *filp,
+		unsigned int cmd, unsigned long arg)
+{
+	struct video_device *vdev = video_devdata(filp);
+
+	if (!vdev->fops->ioctl)
+		return -ENOTTY;
+	/* Allow ioctl to continue even if the device was unregistered.
+	   Things like dequeueing buffers might still be useful. */
+	return vdev->fops->ioctl(inode, filp, cmd, arg);
+}
+
+static long v4l2_unlocked_ioctl(struct file *filp,
+		unsigned int cmd, unsigned long arg)
+{
+	struct video_device *vdev = video_devdata(filp);
+
+	if (!vdev->fops->unlocked_ioctl)
+		return -ENOTTY;
+	/* Allow ioctl to continue even if the device was unregistered.
+	   Things like dequeueing buffers might still be useful. */
+	return vdev->fops->unlocked_ioctl(filp, cmd, arg);
+}
+
+#ifdef CONFIG_COMPAT
+static long v4l2_compat_ioctl(struct file *filp,
+		unsigned int cmd, unsigned long arg)
+{
+	struct video_device *vdev = video_devdata(filp);
+
+	if (!vdev->fops->compat_ioctl)
+		return -ENOIOCTLCMD;
+	/* Allow ioctl to continue even if the device was unregistered.
+	   Things like dequeueing buffers might still be useful. */
+	return vdev->fops->compat_ioctl(filp, cmd, arg);
+}
+#endif
+
+static int v4l2_mmap(struct file *filp, struct vm_area_struct *vm)
+{
+	struct video_device *vdev = video_devdata(filp);
+
+	if (!vdev->fops->mmap ||
+	    video_is_unregistered(vdev))
+		return -ENODEV;
+	return vdev->fops->mmap(filp, vm);
+}
+
+/* Override for the open function */
+static int v4l2_open(struct inode *inode, struct file *filp)
+{
+	struct video_device *vdev;
+	int ret;
+
+	/* Check if the video device is available */
+	mutex_lock(&videodev_lock);
+	vdev = video_devdata(filp);
+	/* return ENODEV if the video device has been removed
+	   already or if it is not registered anymore. */
+	if (vdev == NULL || video_is_unregistered(vdev)) {
+		mutex_unlock(&videodev_lock);
+		return -ENODEV;
+	}
+	/* and increase the device refcount */
+	video_get(vdev);
+	mutex_unlock(&videodev_lock);
+	ret = vdev->fops->open(inode, filp);
+	/* decrease the refcount in case of an error */
+	if (ret)
+		video_put(vdev);
+	return ret;
+}
+
+/* Override for the release function */
+static int v4l2_release(struct inode *inode, struct file *filp)
+{
+	struct video_device *vdev = video_devdata(filp);
+	int ret = vdev->fops->release(inode, filp);
+
+	/* decrease the refcount unconditionally since the release()
+	   return value is ignored. */
+	video_put(vdev);
+	return ret;
+}
+
+static const struct file_operations v4l2_unlocked_fops = {
+	.owner = THIS_MODULE,
+	.read = v4l2_read,
+	.write = v4l2_write,
+	.open = v4l2_open,
+	.mmap = v4l2_mmap,
+	.unlocked_ioctl = v4l2_unlocked_ioctl,
+#ifdef CONFIG_COMPAT
+	.compat_ioctl = v4l2_compat_ioctl,
+#endif
+	.release = v4l2_release,
+	.poll = v4l2_poll,
+	.llseek = no_llseek,
+};
+
+static const struct file_operations v4l2_fops = {
+	.owner = THIS_MODULE,
+	.read = v4l2_read,
+	.write = v4l2_write,
+	.open = v4l2_open,
+	.mmap = v4l2_mmap,
+	.ioctl = v4l2_ioctl,
+#ifdef CONFIG_COMPAT
+	.compat_ioctl = v4l2_compat_ioctl,
+#endif
+	.release = v4l2_release,
+	.poll = v4l2_poll,
+	.llseek = no_llseek,
+};
+
 /**
  * get_index - assign stream number based on parent device
- * @vdev: video_device to assign index number to, vdev->dev should be assigned
- * @num: -1 if auto assign, requested number otherwise
+ * @vdev: video_device to assign index number to, vdev->parent should be assigned
+ * @num:  -1 if auto assign, requested number otherwise
  *
+ * Note that when this is called the new device has not yet been registered
+ * in the video_device array.
  *
- * returns -ENFILE if num is already in use, a free index number if
+ * Returns -ENFILE if num is already in use, a free index number if
  * successful.
  */
 static int get_index(struct video_device *vdev, int num)
@@ -168,7 +318,6 @@ static int get_index(struct video_device *vdev, int num)
 
 	for (i = 0; i < VIDEO_NUM_DEVICES; i++) {
 		if (video_device[i] != NULL &&
-		    video_device[i] != vdev &&
 		    video_device[i]->parent == vdev->parent) {
 			used |= 1 << video_device[i]->index;
 		}
@@ -184,17 +333,15 @@ static int get_index(struct video_device *vdev, int num)
 	return i > max_index ? -ENFILE : i;
 }
 
-static const struct file_operations video_fops;
-
-int video_register_device(struct video_device *vfd, int type, int nr)
+int video_register_device(struct video_device *vdev, int type, int nr)
 {
-	return video_register_device_index(vfd, type, nr, -1);
+	return video_register_device_index(vdev, type, nr, -1);
 }
 EXPORT_SYMBOL(video_register_device);
 
 /**
  *	video_register_device_index - register video4linux devices
- *	@vfd:  video device structure we want to register
+ *	@vdev: video device structure we want to register
  *	@type: type of device to register
  *	@nr:   which device number (0 == /dev/video0, 1 == /dev/video1, ...
  *             -1 == first free)
@@ -218,8 +365,7 @@ EXPORT_SYMBOL(video_register_device);
  *
  *	%VFL_TYPE_RADIO - A radio card
  */
-
-int video_register_device_index(struct video_device *vfd, int type, int nr,
+int video_register_device_index(struct video_device *vdev, int type, int nr,
 					int index)
 {
 	int i = 0;
@@ -227,14 +373,19 @@ int video_register_device_index(struct video_device *vfd, int type, int nr,
 	int minor_offset = 0;
 	int minor_cnt = VIDEO_NUM_DEVICES;
 	const char *name_base;
-	void *priv = video_get_drvdata(vfd);
+	void *priv = video_get_drvdata(vdev);
 
-	/* the release callback MUST be present */
-	BUG_ON(!vfd->release);
+	/* A minor value of -1 marks this video device as never
+	   having been registered */
+	if (vdev)
+		vdev->minor = -1;
 
-	if (vfd == NULL)
+	/* the release callback MUST be present */
+	WARN_ON(!vdev || !vdev->release);
+	if (!vdev || !vdev->release)
 		return -EINVAL;
 
+	/* Part 1: check device type */
 	switch (type) {
 	case VFL_TYPE_GRABBER:
 		name_base = "video";
@@ -254,8 +405,10 @@ int video_register_device_index(struct video_device *vfd, int type, int nr,
 		return -EINVAL;
 	}
 
-	vfd->vfl_type = type;
+	vdev->vfl_type = type;
+	vdev->cdev = NULL;
 
+	/* Part 2: find a free minor, kernel number and device index. */
 #ifdef CONFIG_VIDEO_FIXED_MINOR_RANGES
 	/* Keep the ranges for the first four types for historical
 	 * reasons.
@@ -286,10 +439,7 @@ int video_register_device_index(struct video_device *vfd, int type, int nr,
 	}
 #endif
 
-	/* Initialize the character device */
-	cdev_init(&vfd->cdev, vfd->fops);
-	vfd->cdev.owner = vfd->fops->owner;
-	/* pick a minor number */
+	/* Pick a minor number */
 	mutex_lock(&videodev_lock);
 	nr = find_next_zero_bit(video_nums[type], minor_cnt, nr == -1 ? 0 : nr);
 	if (nr == minor_cnt)
@@ -313,72 +463,92 @@ int video_register_device_index(struct video_device *vfd, int type, int nr,
 		return -ENFILE;
 	}
 #endif
-	vfd->minor = i + minor_offset;
-	vfd->num = nr;
+	vdev->minor = i + minor_offset;
+	vdev->num = nr;
 	set_bit(nr, video_nums[type]);
-	BUG_ON(video_device[vfd->minor]);
-	video_device[vfd->minor] = vfd;
-
-	ret = get_index(vfd, index);
-	vfd->index = ret;
-
+	/* Should not happen since we thought this minor was free */
+	WARN_ON(video_device[vdev->minor] != NULL);
+	ret = vdev->index = get_index(vdev, index);
 	mutex_unlock(&videodev_lock);
 
 	if (ret < 0) {
 		printk(KERN_ERR "%s: get_index failed\n", __func__);
-		goto fail_minor;
+		goto cleanup;
 	}
 
-	ret = cdev_add(&vfd->cdev, MKDEV(VIDEO_MAJOR, vfd->minor), 1);
+	/* Part 3: Initialize the character device */
+	vdev->cdev = cdev_alloc();
+	if (vdev->cdev == NULL) {
+		ret = -ENOMEM;
+		goto cleanup;
+	}
+	if (vdev->fops->unlocked_ioctl)
+		vdev->cdev->ops = &v4l2_unlocked_fops;
+	else
+		vdev->cdev->ops = &v4l2_fops;
+	vdev->cdev->owner = vdev->fops->owner;
+	ret = cdev_add(vdev->cdev, MKDEV(VIDEO_MAJOR, vdev->minor), 1);
 	if (ret < 0) {
 		printk(KERN_ERR "%s: cdev_add failed\n", __func__);
-		goto fail_minor;
+		kfree(vdev->cdev);
+		vdev->cdev = NULL;
+		goto cleanup;
 	}
-	/* sysfs class */
-	memset(&vfd->dev, 0, sizeof(vfd->dev));
+
+	/* Part 4: register the device with sysfs */
+	memset(&vdev->dev, 0, sizeof(vdev->dev));
 	/* The memset above cleared the device's drvdata, so
 	   put back the copy we made earlier. */
-	video_set_drvdata(vfd, priv);
-	vfd->dev.class = &video_class;
-	vfd->dev.devt = MKDEV(VIDEO_MAJOR, vfd->minor);
-	if (vfd->parent)
-		vfd->dev.parent = vfd->parent;
-	dev_set_name(&vfd->dev, "%s%d", name_base, nr);
-	ret = device_register(&vfd->dev);
+	video_set_drvdata(vdev, priv);
+	vdev->dev.class = &video_class;
+	vdev->dev.devt = MKDEV(VIDEO_MAJOR, vdev->minor);
+	if (vdev->parent)
+		vdev->dev.parent = vdev->parent;
+	dev_set_name(&vdev->dev, "%s%d", name_base, nr);
+	ret = device_register(&vdev->dev);
 	if (ret < 0) {
 		printk(KERN_ERR "%s: device_register failed\n", __func__);
-		goto del_cdev;
+		goto cleanup;
 	}
-	/* Remember the cdev's release function */
-	vfd->cdev_release = vfd->cdev.kobj.ktype->release;
-	/* Install our own */
-	vfd->cdev.kobj.ktype = &v4l2_ktype_cdev_default;
-	return 0;
+	/* Register the release callback that will be called when the last
+	   reference to the device goes away. */
+	vdev->dev.release = v4l2_device_release;
 
-del_cdev:
-	cdev_del(&vfd->cdev);
+	/* Part 5: Activate this minor. The char device can now be used. */
+	mutex_lock(&videodev_lock);
+	video_device[vdev->minor] = vdev;
+	mutex_unlock(&videodev_lock);
+	return 0;
 
-fail_minor:
+cleanup:
 	mutex_lock(&videodev_lock);
-	video_device[vfd->minor] = NULL;
-	clear_bit(vfd->num, video_nums[type]);
+	if (vdev->cdev)
+		cdev_del(vdev->cdev);
+	clear_bit(vdev->num, video_nums[type]);
 	mutex_unlock(&videodev_lock);
-	vfd->minor = -1;
+	/* Mark this video device as never having been registered. */
+	vdev->minor = -1;
 	return ret;
 }
 EXPORT_SYMBOL(video_register_device_index);
 
 /**
  *	video_unregister_device - unregister a video4linux device
- *	@vfd: the device to unregister
+ *	@vdev: the device to unregister
  *
- *	This unregisters the passed device and deassigns the minor
- *	number. Future open calls will be met with errors.
+ *	This unregisters the passed device. Future open calls will
+ *	be met with errors.
  */
-
-void video_unregister_device(struct video_device *vfd)
+void video_unregister_device(struct video_device *vdev)
 {
-	device_unregister(&vfd->dev);
+	/* Check if vdev was ever registered at all */
+	if (!vdev || vdev->minor < 0)
+		return;
+
+	mutex_lock(&videodev_lock);
+	set_bit(V4L2_FL_UNREGISTERED, &vdev->flags);
+	mutex_unlock(&videodev_lock);
+	device_unregister(&vdev->dev);
 }
 EXPORT_SYMBOL(video_unregister_device);
 
diff --git a/include/media/v4l2-dev.h b/include/media/v4l2-dev.h
index a0a6b41c5e09..e0d72d2c6f0e 100644
--- a/include/media/v4l2-dev.h
+++ b/include/media/v4l2-dev.h
@@ -26,6 +26,11 @@
 
 struct v4l2_ioctl_callbacks;
 
+/* Flag to mark the video_device struct as unregistered.
+   Drivers can set this flag if they want to block all future
+   device access. It is set by video_unregister_device. */
+#define V4L2_FL_UNREGISTERED	(0)
+
 /*
  * Newer version of video_device, handled by videodev2.c
  * 	This version moves redundant code from video device code to
@@ -39,15 +44,17 @@ struct video_device
 
 	/* sysfs */
 	struct device dev;		/* v4l device */
-	struct cdev cdev;		/* character device */
-	void (*cdev_release)(struct kobject *kobj);
+	struct cdev *cdev;		/* character device */
 	struct device *parent;		/* device parent */
 
 	/* device info */
 	char name[32];
 	int vfl_type;
+	/* 'minor' is set to -1 if the registration failed */
 	int minor;
 	u16 num;
+	/* use bitops to set/clear/test flags */
+	unsigned long flags;
 	/* attribute to differentiate multiple indices on one physical device */
 	int index;
 
@@ -58,7 +65,7 @@ struct video_device
 	v4l2_std_id current_norm;	/* Current tvnorm */
 
 	/* callbacks */
-	void (*release)(struct video_device *vfd);
+	void (*release)(struct video_device *vdev);
 
 	/* ioctl callbacks */
 	const struct v4l2_ioctl_ops *ioctl_ops;
@@ -67,36 +74,41 @@ struct video_device
 /* dev to video-device */
 #define to_video_device(cd) container_of(cd, struct video_device, dev)
 
-/* Register and unregister devices. Note that if video_register_device fails,
+/* Register video devices. Note that if video_register_device fails,
    the release() callback of the video_device structure is *not* called, so
    the caller is responsible for freeing any data. Usually that means that
-   you call video_device_release() on failure. */
-int __must_check video_register_device(struct video_device *vfd, int type, int nr);
-int __must_check video_register_device_index(struct video_device *vfd,
+   you call video_device_release() on failure.
+
+   Also note that vdev->minor is set to -1 if the registration failed. */
+int __must_check video_register_device(struct video_device *vdev, int type, int nr);
+int __must_check video_register_device_index(struct video_device *vdev,
 						int type, int nr, int index);
-void video_unregister_device(struct video_device *vfd);
+
+/* Unregister video devices. Will do nothing if vdev == NULL or
+   vdev->minor < 0. */
+void video_unregister_device(struct video_device *vdev);
 
 /* helper functions to alloc/release struct video_device, the
    latter can also be used for video_device->release(). */
 struct video_device * __must_check video_device_alloc(void);
 
-/* this release function frees the vfd pointer */
-void video_device_release(struct video_device *vfd);
+/* this release function frees the vdev pointer */
+void video_device_release(struct video_device *vdev);
 
 /* this release function does nothing, use when the video_device is a
    static global struct. Note that having a static video_device is
    a dubious construction at best. */
-void video_device_release_empty(struct video_device *vfd);
+void video_device_release_empty(struct video_device *vdev);
 
 /* helper functions to access driver private data. */
-static inline void *video_get_drvdata(struct video_device *dev)
+static inline void *video_get_drvdata(struct video_device *vdev)
 {
-	return dev_get_drvdata(&dev->dev);
+	return dev_get_drvdata(&vdev->dev);
 }
 
-static inline void video_set_drvdata(struct video_device *dev, void *data)
+static inline void video_set_drvdata(struct video_device *vdev, void *data)
 {
-	dev_set_drvdata(&dev->dev, data);
+	dev_set_drvdata(&vdev->dev, data);
 }
 
 struct video_device *video_devdata(struct file *file);
@@ -108,4 +120,9 @@ static inline void *video_drvdata(struct file *file)
 	return video_get_drvdata(video_devdata(file));
 }
 
+static inline int video_is_unregistered(struct video_device *vdev)
+{
+	return test_bit(V4L2_FL_UNREGISTERED, &vdev->flags);
+}
+
 #endif /* _V4L2_DEV_H */
-- 
cgit v1.2.3


From 9bea3514dd4a44490b53cc52498b2967e48056dd Mon Sep 17 00:00:00 2001
From: Hans Verkuil <hverkuil@xs4all.nl>
Date: Tue, 23 Dec 2008 07:35:17 -0300
Subject: V4L/DVB (9974): v4l2-dev: allow drivers to pass v4l2_device as parent

Drivers that use v4l2_device can set that as parent pointer in the v4l2_dev
field instead of using the struct device parent field.

This allows v4l2-dev.c to check whether this driver is v4l2_device based,
and if so then it can offer additional services.

Signed-off-by: Hans Verkuil <hverkuil@xs4all.nl>
Signed-off-by: Mauro Carvalho Chehab <mchehab@redhat.com>
---
 drivers/media/video/v4l2-dev.c | 3 +++
 include/media/v4l2-dev.h       | 4 ++++
 2 files changed, 7 insertions(+)

(limited to 'include')

diff --git a/drivers/media/video/v4l2-dev.c b/drivers/media/video/v4l2-dev.c
index 4e0db8845e04..7ad6711ee327 100644
--- a/drivers/media/video/v4l2-dev.c
+++ b/drivers/media/video/v4l2-dev.c
@@ -30,6 +30,7 @@
 #include <asm/system.h>
 
 #include <media/v4l2-common.h>
+#include <media/v4l2-device.h>
 
 #define VIDEO_NUM_DEVICES	256
 #define VIDEO_NAME              "video4linux"
@@ -407,6 +408,8 @@ int video_register_device_index(struct video_device *vdev, int type, int nr,
 
 	vdev->vfl_type = type;
 	vdev->cdev = NULL;
+	if (vdev->v4l2_dev)
+		vdev->parent = vdev->v4l2_dev->dev;
 
 	/* Part 2: find a free minor, kernel number and device index. */
 #ifdef CONFIG_VIDEO_FIXED_MINOR_RANGES
diff --git a/include/media/v4l2-dev.h b/include/media/v4l2-dev.h
index e0d72d2c6f0e..0a88d1d17d30 100644
--- a/include/media/v4l2-dev.h
+++ b/include/media/v4l2-dev.h
@@ -25,6 +25,7 @@
 #define VFL_TYPE_MAX		4
 
 struct v4l2_ioctl_callbacks;
+struct v4l2_device;
 
 /* Flag to mark the video_device struct as unregistered.
    Drivers can set this flag if they want to block all future
@@ -45,7 +46,10 @@ struct video_device
 	/* sysfs */
 	struct device dev;		/* v4l device */
 	struct cdev *cdev;		/* character device */
+
+	/* Set either parent or v4l2_dev if your driver uses v4l2_device */
 	struct device *parent;		/* device parent */
+	struct v4l2_device *v4l2_dev;	/* v4l2_device parent */
 
 	/* device info */
 	char name[32];
-- 
cgit v1.2.3


From aeabc882a3ad9a320783815e0446b12526fd2102 Mon Sep 17 00:00:00 2001
From: Kuninori Morimoto <morimoto.kuninori@renesas.com>
Date: Thu, 18 Dec 2008 11:05:49 -0300
Subject: V4L/DVB (10068): Change device ID selection method on ov772x driver

Signed-off-by: Kuninori Morimoto <morimoto.kuninori@renesas.com>
Signed-off-by: Guennadi Liakhovetski <g.liakhovetski@gmx.de>
Signed-off-by: Mauro Carvalho Chehab <mchehab@redhat.com>
---
 drivers/media/video/ov772x.c    | 27 +++++++++++++++++++++------
 include/media/v4l2-chip-ident.h |  2 +-
 2 files changed, 22 insertions(+), 7 deletions(-)

(limited to 'include')

diff --git a/drivers/media/video/ov772x.c b/drivers/media/video/ov772x.c
index a2d51e27e48d..305befab8943 100644
--- a/drivers/media/video/ov772x.c
+++ b/drivers/media/video/ov772x.c
@@ -345,6 +345,12 @@
 #define OP_UV       0x00000001
 #define OP_SWAP_RGB 0x00000002
 
+/*
+ * ID
+ */
+#define OV7720  0x7720
+#define VERSION(pid, ver) ((pid<<8)|(ver&0xFF))
+
 /*
  * struct
  */
@@ -374,6 +380,7 @@ struct ov772x_priv {
 	struct soc_camera_device          icd;
 	const struct ov772x_color_format *fmt;
 	const struct ov772x_win_size     *win;
+	int                               model;
 };
 
 #define ENDMARKER { 0xff, 0xff }
@@ -702,7 +709,9 @@ static unsigned long ov772x_query_bus_param(struct soc_camera_device *icd)
 static int ov772x_get_chip_id(struct soc_camera_device *icd,
 			      struct v4l2_chip_ident   *id)
 {
-	id->ident    = V4L2_IDENT_OV772X;
+	struct ov772x_priv *priv = container_of(icd, struct ov772x_priv, icd);
+
+	id->ident    = priv->model;
 	id->revision = 0;
 
 	return 0;
@@ -796,6 +805,7 @@ static int ov772x_video_probe(struct soc_camera_device *icd)
 {
 	struct ov772x_priv *priv = container_of(icd, struct ov772x_priv, icd);
 	u8                  pid, ver;
+	const char         *devname;
 
 	/*
 	 * We must have a parent by now. And it cannot be a wrong one.
@@ -822,15 +832,21 @@ static int ov772x_video_probe(struct soc_camera_device *icd)
 	 */
 	pid = i2c_smbus_read_byte_data(priv->client, PID);
 	ver = i2c_smbus_read_byte_data(priv->client, VER);
-	if (pid != 0x77 ||
-	    ver != 0x21) {
+
+	switch (VERSION(pid, ver)) {
+	case OV7720:
+		devname     = "ov7720";
+		priv->model = V4L2_IDENT_OV7720;
+		break;
+	default:
 		dev_err(&icd->dev,
 			"Product ID error %x:%x\n", pid, ver);
 		return -ENODEV;
 	}
 
 	dev_info(&icd->dev,
-		 "ov772x Product ID %0x:%0x Manufacturer ID %x:%x\n",
+		 "%s Product ID %0x:%0x Manufacturer ID %x:%x\n",
+		 devname,
 		 pid,
 		 ver,
 		 i2c_smbus_read_byte_data(priv->client, MIDH),
@@ -921,7 +937,7 @@ static int ov772x_remove(struct i2c_client *client)
 }
 
 static const struct i2c_device_id ov772x_id[] = {
-	{"ov772x", 0},
+	{ "ov772x", 0 },
 	{ }
 };
 MODULE_DEVICE_TABLE(i2c, ov772x_id);
@@ -941,7 +957,6 @@ static struct i2c_driver ov772x_i2c_driver = {
 
 static int __init ov772x_module_init(void)
 {
-	printk(KERN_INFO "ov772x driver\n");
 	return i2c_add_driver(&ov772x_i2c_driver);
 }
 
diff --git a/include/media/v4l2-chip-ident.h b/include/media/v4l2-chip-ident.h
index e3e5b53931de..56974e689a4d 100644
--- a/include/media/v4l2-chip-ident.h
+++ b/include/media/v4l2-chip-ident.h
@@ -60,7 +60,7 @@ enum {
 
 	/* OmniVision sensors: reserved range 250-299 */
 	V4L2_IDENT_OV7670 = 250,
-	V4L2_IDENT_OV772X = 251,
+	V4L2_IDENT_OV7720 = 251,
 
 	/* Conexant MPEG encoder/decoders: reserved range 410-420 */
 	V4L2_IDENT_CX23415 = 415,
-- 
cgit v1.2.3


From 3cac2cab4f5b7eb7d9f7afc42cb251c45b96be36 Mon Sep 17 00:00:00 2001
From: Kuninori Morimoto <morimoto.kuninori@renesas.com>
Date: Thu, 18 Dec 2008 11:07:11 -0300
Subject: V4L/DVB (10069): Add ov7725 support to ov772x driver

Signed-off-by: Kuninori Morimoto <morimoto.kuninori@renesas.com>
Signed-off-by: Guennadi Liakhovetski <g.liakhovetski@gmx.de>
Signed-off-by: Mauro Carvalho Chehab <mchehab@redhat.com>
---
 drivers/media/video/ov772x.c    | 22 ++++++++++++++++++++++
 include/media/v4l2-chip-ident.h |  1 +
 2 files changed, 23 insertions(+)

(limited to 'include')

diff --git a/drivers/media/video/ov772x.c b/drivers/media/video/ov772x.c
index 305befab8943..99dd943aacf3 100644
--- a/drivers/media/video/ov772x.c
+++ b/drivers/media/video/ov772x.c
@@ -51,6 +51,7 @@
 #define COM8        0x13 /* Common control 8 */
 #define COM9        0x14 /* Common control 9 */
 #define COM10       0x15 /* Common control 10 */
+#define REG16       0x16 /* Register 16 */
 #define HSTART      0x17 /* Horizontal sensor size */
 #define HSIZE       0x18 /* Horizontal frame (HREF column) end high 8-bit */
 #define VSTART      0x19 /* Vertical frame (row) start high 8-bit */
@@ -65,6 +66,7 @@
 #define AEW         0x24 /* AGC/AEC - Stable operating region (upper limit) */
 #define AEB         0x25 /* AGC/AEC - Stable operating region (lower limit) */
 #define VPT         0x26 /* AGC/AEC Fast mode operating region */
+#define REG28       0x28 /* Register 28 */
 #define HOUTSIZE    0x29 /* Horizontal data output size MSBs */
 #define EXHCH       0x2A /* Dummy pixel insert MSB */
 #define EXHCL       0x2B /* Dummy pixel insert LSB */
@@ -94,6 +96,7 @@
 #define TGT_R       0x43 /* BLC red  channel target value */
 #define TGT_GB      0x44 /* BLC Gb   channel target value */
 #define TGT_GR      0x45 /* BLC Gr   channel target value */
+/* for ov7720 */
 #define LCC0        0x46 /* Lens correction control 0 */
 #define LCC1        0x47 /* Lens correction option 1 - X coordinate */
 #define LCC2        0x48 /* Lens correction option 2 - Y coordinate */
@@ -101,6 +104,15 @@
 #define LCC4        0x4A /* Lens correction option 4 - radius of the circular */
 #define LCC5        0x4B /* Lens correction option 5 */
 #define LCC6        0x4C /* Lens correction option 6 */
+/* for ov7725 */
+#define LC_CTR      0x46 /* Lens correction control */
+#define LC_XC       0x47 /* X coordinate of lens correction center relative */
+#define LC_YC       0x48 /* Y coordinate of lens correction center relative */
+#define LC_COEF     0x49 /* Lens correction coefficient */
+#define LC_RADI     0x4A /* Lens correction radius */
+#define LC_COEFB    0x4B /* Lens B channel compensation coefficient */
+#define LC_COEFR    0x4C /* Lens R channel compensation coefficient */
+
 #define FIXGAIN     0x4D /* Analog fix gain amplifer */
 #define AREF0       0x4E /* Sensor reference control */
 #define AREF1       0x4F /* Sensor reference current control */
@@ -182,8 +194,13 @@
 #define SDE         0xA6 /* Special digital effect control */
 #define USAT        0xA7 /* U component saturation control */
 #define VSAT        0xA8 /* V component saturation control */
+/* for ov7720 */
 #define HUE0        0xA9 /* Hue control 0 */
 #define HUE1        0xAA /* Hue control 1 */
+/* for ov7725 */
+#define HUECOS      0xA9 /* Cosine value */
+#define HUESIN      0xAA /* Sine value */
+
 #define SIGN        0xAB /* Sign bit for Hue and contrast */
 #define DSPAUTO     0xAC /* DSP auto function ON/OFF control */
 
@@ -349,6 +366,7 @@
  * ID
  */
 #define OV7720  0x7720
+#define OV7725  0x7721
 #define VERSION(pid, ver) ((pid<<8)|(ver&0xFF))
 
 /*
@@ -838,6 +856,10 @@ static int ov772x_video_probe(struct soc_camera_device *icd)
 		devname     = "ov7720";
 		priv->model = V4L2_IDENT_OV7720;
 		break;
+	case OV7725:
+		devname     = "ov7725";
+		priv->model = V4L2_IDENT_OV7725;
+		break;
 	default:
 		dev_err(&icd->dev,
 			"Product ID error %x:%x\n", pid, ver);
diff --git a/include/media/v4l2-chip-ident.h b/include/media/v4l2-chip-ident.h
index 56974e689a4d..15fd93caaf6c 100644
--- a/include/media/v4l2-chip-ident.h
+++ b/include/media/v4l2-chip-ident.h
@@ -61,6 +61,7 @@ enum {
 	/* OmniVision sensors: reserved range 250-299 */
 	V4L2_IDENT_OV7670 = 250,
 	V4L2_IDENT_OV7720 = 251,
+	V4L2_IDENT_OV7725 = 252,
 
 	/* Conexant MPEG encoder/decoders: reserved range 410-420 */
 	V4L2_IDENT_CX23415 = 415,
-- 
cgit v1.2.3


From bd73b36f0c41b0c02ef4b10a307db1c43537e006 Mon Sep 17 00:00:00 2001
From: Guennadi Liakhovetski <g.liakhovetski@gmx.de>
Date: Tue, 23 Dec 2008 05:54:45 -0300
Subject: V4L/DVB (10072): soc-camera: Add signal inversion flags to be used by
 camera drivers

As reported by Antonio Ospite <ospite@studenti.unina.it> two platforms with a
mt9m111 camera require opposite pixel clock polarity, which means one of them
inverts it. This patch adds support for inversion flags and switches all
available camera drivers to using them.

Signed-off-by: Guennadi Liakhovetski <g.liakhovetski@gmx.de>
Signed-off-by: Mauro Carvalho Chehab <mchehab@redhat.com>
---
 drivers/media/video/mt9m001.c    | 15 +++++++--------
 drivers/media/video/mt9m111.c    |  6 +++++-
 drivers/media/video/mt9v022.c    |  3 +++
 drivers/media/video/ov772x.c     | 10 +++++-----
 drivers/media/video/soc_camera.c | 34 ++++++++++++++++++++++++++++++++++
 include/media/soc_camera.h       | 11 +++++++++++
 6 files changed, 65 insertions(+), 14 deletions(-)

(limited to 'include')

diff --git a/drivers/media/video/mt9m001.c b/drivers/media/video/mt9m001.c
index a7f0e6971fe1..b58f0f85e30f 100644
--- a/drivers/media/video/mt9m001.c
+++ b/drivers/media/video/mt9m001.c
@@ -272,17 +272,16 @@ static int mt9m001_set_bus_param(struct soc_camera_device *icd,
 static unsigned long mt9m001_query_bus_param(struct soc_camera_device *icd)
 {
 	struct mt9m001 *mt9m001 = container_of(icd, struct mt9m001, icd);
-	unsigned int width_flag = SOCAM_DATAWIDTH_10;
+	struct soc_camera_link *icl = mt9m001->client->dev.platform_data;
+	/* MT9M001 has all capture_format parameters fixed */
+	unsigned long flags = SOCAM_DATAWIDTH_10 | SOCAM_PCLK_SAMPLE_RISING |
+		SOCAM_HSYNC_ACTIVE_HIGH | SOCAM_VSYNC_ACTIVE_HIGH |
+		SOCAM_MASTER;
 
 	if (bus_switch_possible(mt9m001))
-		width_flag |= SOCAM_DATAWIDTH_8;
+		flags |= SOCAM_DATAWIDTH_8;
 
-	/* MT9M001 has all capture_format parameters fixed */
-	return SOCAM_PCLK_SAMPLE_RISING |
-		SOCAM_HSYNC_ACTIVE_HIGH |
-		SOCAM_VSYNC_ACTIVE_HIGH |
-		SOCAM_MASTER |
-		width_flag;
+	return soc_camera_apply_sensor_flags(icl, flags);
 }
 
 static int mt9m001_set_fmt(struct soc_camera_device *icd,
diff --git a/drivers/media/video/mt9m111.c b/drivers/media/video/mt9m111.c
index b4a238f49600..b0e6046ea967 100644
--- a/drivers/media/video/mt9m111.c
+++ b/drivers/media/video/mt9m111.c
@@ -415,9 +415,13 @@ static int mt9m111_stop_capture(struct soc_camera_device *icd)
 
 static unsigned long mt9m111_query_bus_param(struct soc_camera_device *icd)
 {
-	return SOCAM_MASTER | SOCAM_PCLK_SAMPLE_RISING |
+	struct mt9m111 *mt9m111 = container_of(icd, struct mt9m111, icd);
+	struct soc_camera_link *icl = mt9m111->client->dev.platform_data;
+	unsigned long flags = SOCAM_MASTER | SOCAM_PCLK_SAMPLE_RISING |
 		SOCAM_HSYNC_ACTIVE_HIGH | SOCAM_VSYNC_ACTIVE_HIGH |
 		SOCAM_DATAWIDTH_8;
+
+	return soc_camera_apply_sensor_flags(icl, flags);
 }
 
 static int mt9m111_set_bus_param(struct soc_camera_device *icd, unsigned long f)
diff --git a/drivers/media/video/mt9v022.c b/drivers/media/video/mt9v022.c
index 82e1a3381a7a..3b3a6a027b1d 100644
--- a/drivers/media/video/mt9v022.c
+++ b/drivers/media/video/mt9v022.c
@@ -273,6 +273,7 @@ static int mt9v022_set_bus_param(struct soc_camera_device *icd,
 				 unsigned long flags)
 {
 	struct mt9v022 *mt9v022 = container_of(icd, struct mt9v022, icd);
+	struct soc_camera_link *icl = mt9v022->client->dev.platform_data;
 	unsigned int width_flag = flags & SOCAM_DATAWIDTH_MASK;
 	int ret;
 	u16 pixclk = 0;
@@ -296,6 +297,8 @@ static int mt9v022_set_bus_param(struct soc_camera_device *icd,
 		mt9v022->datawidth = width_flag == SOCAM_DATAWIDTH_8 ? 8 : 10;
 	}
 
+	flags = soc_camera_apply_sensor_flags(icl, flags);
+
 	if (flags & SOCAM_PCLK_SAMPLE_RISING)
 		pixclk |= 0x10;
 
diff --git a/drivers/media/video/ov772x.c b/drivers/media/video/ov772x.c
index 99dd943aacf3..110cb9be09d6 100644
--- a/drivers/media/video/ov772x.c
+++ b/drivers/media/video/ov772x.c
@@ -716,12 +716,12 @@ static int ov772x_set_bus_param(struct soc_camera_device *icd,
 static unsigned long ov772x_query_bus_param(struct soc_camera_device *icd)
 {
 	struct ov772x_priv *priv = container_of(icd, struct ov772x_priv, icd);
-
-	return  SOCAM_PCLK_SAMPLE_RISING |
-		SOCAM_HSYNC_ACTIVE_HIGH  |
-		SOCAM_VSYNC_ACTIVE_HIGH  |
-		SOCAM_MASTER             |
+	struct soc_camera_link *icl = priv->client->dev.platform_data;
+	unsigned long flags = SOCAM_PCLK_SAMPLE_RISING | SOCAM_MASTER |
+		SOCAM_VSYNC_ACTIVE_HIGH | SOCAM_HSYNC_ACTIVE_HIGH |
 		priv->info->buswidth;
+
+	return soc_camera_apply_sensor_flags(icl, flags);
 }
 
 static int ov772x_get_chip_id(struct soc_camera_device *icd,
diff --git a/drivers/media/video/soc_camera.c b/drivers/media/video/soc_camera.c
index 5e48c2cc1a44..176017501055 100644
--- a/drivers/media/video/soc_camera.c
+++ b/drivers/media/video/soc_camera.c
@@ -59,6 +59,40 @@ const struct soc_camera_format_xlate *soc_camera_xlate_by_fourcc(
 }
 EXPORT_SYMBOL(soc_camera_xlate_by_fourcc);
 
+/**
+ * soc_camera_apply_sensor_flags() - apply platform SOCAM_SENSOR_INVERT_* flags
+ * @icl:	camera platform parameters
+ * @flags:	flags to be inverted according to platform configuration
+ * @return:	resulting flags
+ */
+unsigned long soc_camera_apply_sensor_flags(struct soc_camera_link *icl,
+					    unsigned long flags)
+{
+	unsigned long f;
+
+	/* If only one of the two polarities is supported, switch to the opposite */
+	if (icl->flags & SOCAM_SENSOR_INVERT_HSYNC) {
+		f = flags & (SOCAM_HSYNC_ACTIVE_HIGH | SOCAM_HSYNC_ACTIVE_LOW);
+		if (f == SOCAM_HSYNC_ACTIVE_HIGH || f == SOCAM_HSYNC_ACTIVE_LOW)
+			flags ^= SOCAM_HSYNC_ACTIVE_HIGH | SOCAM_HSYNC_ACTIVE_LOW;
+	}
+
+	if (icl->flags & SOCAM_SENSOR_INVERT_VSYNC) {
+		f = flags & (SOCAM_VSYNC_ACTIVE_HIGH | SOCAM_VSYNC_ACTIVE_LOW);
+		if (f == SOCAM_VSYNC_ACTIVE_HIGH || f == SOCAM_VSYNC_ACTIVE_LOW)
+			flags ^= SOCAM_VSYNC_ACTIVE_HIGH | SOCAM_VSYNC_ACTIVE_LOW;
+	}
+
+	if (icl->flags & SOCAM_SENSOR_INVERT_PCLK) {
+		f = flags & (SOCAM_PCLK_SAMPLE_RISING | SOCAM_PCLK_SAMPLE_FALLING);
+		if (f == SOCAM_PCLK_SAMPLE_RISING || f == SOCAM_PCLK_SAMPLE_FALLING)
+			flags ^= SOCAM_PCLK_SAMPLE_RISING | SOCAM_PCLK_SAMPLE_FALLING;
+	}
+
+	return flags;
+}
+EXPORT_SYMBOL(soc_camera_apply_sensor_flags);
+
 static int soc_camera_try_fmt_vid_cap(struct file *file, void *priv,
 				      struct v4l2_format *f)
 {
diff --git a/include/media/soc_camera.h b/include/media/soc_camera.h
index da57ffdaec4d..e6ed0d94ac1b 100644
--- a/include/media/soc_camera.h
+++ b/include/media/soc_camera.h
@@ -81,11 +81,19 @@ struct soc_camera_host_ops {
 	unsigned int (*poll)(struct file *, poll_table *);
 };
 
+#define SOCAM_SENSOR_INVERT_PCLK	(1 << 0)
+#define SOCAM_SENSOR_INVERT_MCLK	(1 << 1)
+#define SOCAM_SENSOR_INVERT_HSYNC	(1 << 2)
+#define SOCAM_SENSOR_INVERT_VSYNC	(1 << 3)
+#define SOCAM_SENSOR_INVERT_DATA	(1 << 4)
+
 struct soc_camera_link {
 	/* Camera bus id, used to match a camera and a bus */
 	int bus_id;
 	/* GPIO number to switch between 8 and 10 bit modes */
 	unsigned int gpio;
+	/* Per camera SOCAM_SENSOR_* bus flags */
+	unsigned long flags;
 	/* Optional callbacks to power on or off and reset the sensor */
 	int (*power)(struct device *, int);
 	int (*reset)(struct device *);
@@ -206,4 +214,7 @@ static inline unsigned long soc_camera_bus_param_compatible(
 	return (!hsync || !vsync || !pclk) ? 0 : common_flags;
 }
 
+extern unsigned long soc_camera_apply_sensor_flags(struct soc_camera_link *icl,
+						   unsigned long flags);
+
 #endif
-- 
cgit v1.2.3


From a9bef518cd78d569a3ff0b1ac2afa5e2d8b3573a Mon Sep 17 00:00:00 2001
From: Guennadi Liakhovetski <g.liakhovetski@gmx.de>
Date: Thu, 18 Dec 2008 11:34:20 -0300
Subject: V4L/DVB (10074): soc-camera: add camera sense data

Add a struct soc_camera_sense, that can be used by camera host drivers to
request additional information from a camera driver, for example, when
changing data format. This struct can be extended in the future, its first use
is to request the camera driver whether the pixel-clock frequency has changed.

Tested-by: Robert Jarzmik <robert.jarzmik@free.fr>
Signed-off-by: Guennadi Liakhovetski <g.liakhovetski@gmx.de>
Signed-off-by: Mauro Carvalho Chehab <mchehab@redhat.com>
---
 include/media/soc_camera.h | 27 +++++++++++++++++++++++++++
 1 file changed, 27 insertions(+)

(limited to 'include')

diff --git a/include/media/soc_camera.h b/include/media/soc_camera.h
index e6ed0d94ac1b..38b826c608be 100644
--- a/include/media/soc_camera.h
+++ b/include/media/soc_camera.h
@@ -36,6 +36,7 @@ struct soc_camera_device {
 	unsigned char iface;		/* Host number */
 	unsigned char devnum;		/* Device number per host */
 	unsigned char buswidth;		/* See comment in .c */
+	struct soc_camera_sense *sense;	/* See comment in struct definition */
 	struct soc_camera_ops *ops;
 	struct video_device *vdev;
 	const struct soc_camera_data_format *current_fmt;
@@ -172,6 +173,32 @@ struct soc_camera_ops {
 	int num_controls;
 };
 
+#define SOCAM_SENSE_PCLK_CHANGED	(1 << 0)
+
+/**
+ * This struct can be attached to struct soc_camera_device by the host driver
+ * to request sense from the camera, for example, when calling .set_fmt(). The
+ * host then can check which flags are set and verify respective values if any.
+ * For example, if SOCAM_SENSE_PCLK_CHANGED is set, it means, pixclock has
+ * changed during this operation. After completion the host should detach sense.
+ *
+ * @flags		ored SOCAM_SENSE_* flags
+ * @master_clock	if the host wants to be informed about pixel-clock
+ *			change, it better set master_clock.
+ * @pixel_clock_max	maximum pixel clock frequency supported by the host,
+ *			camera is not allowed to exceed this.
+ * @pixel_clock		if the camera driver changed pixel clock during this
+ *			operation, it sets SOCAM_SENSE_PCLK_CHANGED, uses
+ *			master_clock to calculate the new pixel-clock and
+ *			sets this field.
+ */
+struct soc_camera_sense {
+	unsigned long flags;
+	unsigned long master_clock;
+	unsigned long pixel_clock_max;
+	unsigned long pixel_clock;
+};
+
 static inline struct v4l2_queryctrl const *soc_camera_find_qctrl(
 	struct soc_camera_ops *ops, int id)
 {
-- 
cgit v1.2.3


From 9b9fd6c71fb46a4a710040c8ef28f1e84b88a830 Mon Sep 17 00:00:00 2001
From: Mike Rapoport <mike@compulab.co.il>
Date: Thu, 18 Dec 2008 11:42:54 -0300
Subject: V4L/DVB (10076): v4l: add chip ID for MT9M112 camera sensor from
 Micron

The chip is largely compatible with MT9M111 and is going to be supported by the
same driver.

Signed-off-by: Mike Rapoport <mike@compulab.co.il>
Signed-off-by: Guennadi Liakhovetski <g.liakhovetski@gmx.de>
Signed-off-by: Mauro Carvalho Chehab <mchehab@redhat.com>
---
 include/media/v4l2-chip-ident.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/media/v4l2-chip-ident.h b/include/media/v4l2-chip-ident.h
index 15fd93caaf6c..30448cd51751 100644
--- a/include/media/v4l2-chip-ident.h
+++ b/include/media/v4l2-chip-ident.h
@@ -171,6 +171,7 @@ enum {
 	V4L2_IDENT_MT9M001C12ST		= 45000,
 	V4L2_IDENT_MT9M001C12STM	= 45005,
 	V4L2_IDENT_MT9M111		= 45007,
+	V4L2_IDENT_MT9M112		= 45008,
 	V4L2_IDENT_MT9V022IX7ATC	= 45010, /* No way to detect "normal" I77ATx */
 	V4L2_IDENT_MT9V022IX7ATM	= 45015, /* and "lead free" IA7ATx chips */
 };
-- 
cgit v1.2.3


From 91962fa713bd8bf47434b02ac661fdc201365fa5 Mon Sep 17 00:00:00 2001
From: Magnus Damm <damm@igel.co.jp>
Date: Thu, 18 Dec 2008 11:45:00 -0300
Subject: V4L/DVB (10078): video: add NV16 and NV61 pixel formats

This patch adds support for NV16 and NV61 pixel formats.

These pixel formats use two planes; one for 8-bit Y values and
one for interleaved 8-bit U and V values. NV16/NV61 formats are
very similar to NV12/NV21 with the exception that NV16/NV61 are
using the same number of lines for both planes. The difference
between NV16 and NV61 is the U and V byte order.

The fourcc values are extrapolated from the NV12/NV21 case.

Signed-off-by: Magnus Damm <damm@igel.co.jp>
Signed-off-by: Guennadi Liakhovetski <g.liakhovetski@gmx.de>
Signed-off-by: Mauro Carvalho Chehab <mchehab@redhat.com>
---
 include/linux/videodev2.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include')

diff --git a/include/linux/videodev2.h b/include/linux/videodev2.h
index e31144d22237..1f126e30766c 100644
--- a/include/linux/videodev2.h
+++ b/include/linux/videodev2.h
@@ -305,6 +305,8 @@ struct v4l2_pix_format {
 /* two planes -- one Y, one Cr + Cb interleaved  */
 #define V4L2_PIX_FMT_NV12    v4l2_fourcc('N', 'V', '1', '2') /* 12  Y/CbCr 4:2:0  */
 #define V4L2_PIX_FMT_NV21    v4l2_fourcc('N', 'V', '2', '1') /* 12  Y/CrCb 4:2:0  */
+#define V4L2_PIX_FMT_NV16    v4l2_fourcc('N', 'V', '1', '6') /* 16  Y/CbCr 4:2:2  */
+#define V4L2_PIX_FMT_NV61    v4l2_fourcc('N', 'V', '6', '1') /* 16  Y/CrCb 4:2:2  */
 
 /*  The following formats are not defined in the V4L2 specification */
 #define V4L2_PIX_FMT_YUV410  v4l2_fourcc('Y', 'U', 'V', '9') /*  9  YUV 4:1:0     */
-- 
cgit v1.2.3


From 1c3bb7431d16f7486a8523d54380bad89c485dc8 Mon Sep 17 00:00:00 2001
From: Guennadi Liakhovetski <lg@denx.de>
Date: Thu, 18 Dec 2008 12:28:54 -0300
Subject: V4L/DVB (10083): soc-camera: unify locking, play nicer with videobuf
 locking

Move mutex from host drivers to camera device object, take into account
videobuf locking.

Signed-off-by: Guennadi Liakhovetski <lg@denx.de>
Signed-off-by: Mauro Carvalho Chehab <mchehab@redhat.com>
---
 drivers/media/video/pxa_camera.c           | 15 ++---
 drivers/media/video/sh_mobile_ceu_camera.c |  9 +--
 drivers/media/video/soc_camera.c           | 99 +++++++++++++++++++++++++-----
 include/media/soc_camera.h                 |  8 ++-
 4 files changed, 96 insertions(+), 35 deletions(-)

(limited to 'include')

diff --git a/drivers/media/video/pxa_camera.c b/drivers/media/video/pxa_camera.c
index aa7efc45d364..c3c50de0aa50 100644
--- a/drivers/media/video/pxa_camera.c
+++ b/drivers/media/video/pxa_camera.c
@@ -25,7 +25,6 @@
 #include <linux/version.h>
 #include <linux/device.h>
 #include <linux/platform_device.h>
-#include <linux/mutex.h>
 #include <linux/clk.h>
 
 #include <media/v4l2-common.h>
@@ -164,8 +163,6 @@
 			CICR0_PERRM | CICR0_QDM | CICR0_CDM | CICR0_SOFM | \
 			CICR0_EOFM | CICR0_FOM)
 
-static DEFINE_MUTEX(camera_lock);
-
 /*
  * Structures
  */
@@ -813,16 +810,17 @@ static irqreturn_t pxa_camera_irq(int irq, void *data)
 	return IRQ_HANDLED;
 }
 
-/* The following two functions absolutely depend on the fact, that
- * there can be only one camera on PXA quick capture interface */
+/*
+ * The following two functions absolutely depend on the fact, that
+ * there can be only one camera on PXA quick capture interface
+ * Called with .video_lock held
+ */
 static int pxa_camera_add_device(struct soc_camera_device *icd)
 {
 	struct soc_camera_host *ici = to_soc_camera_host(icd->dev.parent);
 	struct pxa_camera_dev *pcdev = ici->priv;
 	int ret;
 
-	mutex_lock(&camera_lock);
-
 	if (pcdev->icd) {
 		ret = -EBUSY;
 		goto ebusy;
@@ -838,11 +836,10 @@ static int pxa_camera_add_device(struct soc_camera_device *icd)
 		pcdev->icd = icd;
 
 ebusy:
-	mutex_unlock(&camera_lock);
-
 	return ret;
 }
 
+/* Called with .video_lock held */
 static void pxa_camera_remove_device(struct soc_camera_device *icd)
 {
 	struct soc_camera_host *ici = to_soc_camera_host(icd->dev.parent);
diff --git a/drivers/media/video/sh_mobile_ceu_camera.c b/drivers/media/video/sh_mobile_ceu_camera.c
index a49bec1509f4..47ffa441c869 100644
--- a/drivers/media/video/sh_mobile_ceu_camera.c
+++ b/drivers/media/video/sh_mobile_ceu_camera.c
@@ -29,7 +29,6 @@
 #include <linux/version.h>
 #include <linux/device.h>
 #include <linux/platform_device.h>
-#include <linux/mutex.h>
 #include <linux/videodev2.h>
 #include <linux/clk.h>
 
@@ -75,8 +74,6 @@
 #define CDBYR2 0x98 /* Capture data bottom-field address Y register 2 */
 #define CDBCR2 0x9c /* Capture data bottom-field address C register 2 */
 
-static DEFINE_MUTEX(camera_lock);
-
 /* per video frame buffer */
 struct sh_mobile_ceu_buffer {
 	struct videobuf_buffer vb; /* v4l buffer must be first */
@@ -292,14 +289,13 @@ static irqreturn_t sh_mobile_ceu_irq(int irq, void *data)
 	return IRQ_HANDLED;
 }
 
+/* Called with .video_lock held */
 static int sh_mobile_ceu_add_device(struct soc_camera_device *icd)
 {
 	struct soc_camera_host *ici = to_soc_camera_host(icd->dev.parent);
 	struct sh_mobile_ceu_dev *pcdev = ici->priv;
 	int ret = -EBUSY;
 
-	mutex_lock(&camera_lock);
-
 	if (pcdev->icd)
 		goto err;
 
@@ -319,11 +315,10 @@ static int sh_mobile_ceu_add_device(struct soc_camera_device *icd)
 
 	pcdev->icd = icd;
 err:
-	mutex_unlock(&camera_lock);
-
 	return ret;
 }
 
+/* Called with .video_lock held */
 static void sh_mobile_ceu_remove_device(struct soc_camera_device *icd)
 {
 	struct soc_camera_host *ici = to_soc_camera_host(icd->dev.parent);
diff --git a/drivers/media/video/soc_camera.c b/drivers/media/video/soc_camera.c
index d5613cdd93a6..e869670dbae5 100644
--- a/drivers/media/video/soc_camera.c
+++ b/drivers/media/video/soc_camera.c
@@ -33,7 +33,6 @@
 static LIST_HEAD(hosts);
 static LIST_HEAD(devices);
 static DEFINE_MUTEX(list_lock);
-static DEFINE_MUTEX(video_lock);
 
 const struct soc_camera_data_format *soc_camera_format_by_fourcc(
 	struct soc_camera_device *icd, unsigned int fourcc)
@@ -270,8 +269,10 @@ static int soc_camera_open(struct inode *inode, struct file *file)
 	if (!icf)
 		return -ENOMEM;
 
-	/* Protect against icd->remove() until we module_get() both drivers. */
-	mutex_lock(&video_lock);
+	/*
+	 * It is safe to dereference these pointers now as long as a user has
+	 * the video device open - we are protected by the held cdev reference.
+	 */
 
 	vdev = video_devdata(file);
 	icd = container_of(vdev->parent, struct soc_camera_device, dev);
@@ -289,6 +290,9 @@ static int soc_camera_open(struct inode *inode, struct file *file)
 		goto emgi;
 	}
 
+	/* Protect against icd->remove() until we module_get() both drivers. */
+	mutex_lock(&icd->video_lock);
+
 	icf->icd = icd;
 	icd->use_count++;
 
@@ -304,7 +308,7 @@ static int soc_camera_open(struct inode *inode, struct file *file)
 		}
 	}
 
-	mutex_unlock(&video_lock);
+	mutex_unlock(&icd->video_lock);
 
 	file->private_data = icf;
 	dev_dbg(&icd->dev, "camera device open\n");
@@ -313,16 +317,16 @@ static int soc_camera_open(struct inode *inode, struct file *file)
 
 	return 0;
 
-	/* All errors are entered with the video_lock held */
+	/* First two errors are entered with the .video_lock held */
 eiciadd:
 	soc_camera_free_user_formats(icd);
 eiufmt:
 	icd->use_count--;
+	mutex_unlock(&icd->video_lock);
 	module_put(ici->ops->owner);
 emgi:
 	module_put(icd->ops->owner);
 emgd:
-	mutex_unlock(&video_lock);
 	vfree(icf);
 	return ret;
 }
@@ -334,15 +338,16 @@ static int soc_camera_close(struct inode *inode, struct file *file)
 	struct soc_camera_host *ici = to_soc_camera_host(icd->dev.parent);
 	struct video_device *vdev = icd->vdev;
 
-	mutex_lock(&video_lock);
+	mutex_lock(&icd->video_lock);
 	icd->use_count--;
 	if (!icd->use_count) {
 		ici->ops->remove(icd);
 		soc_camera_free_user_formats(icd);
 	}
+	mutex_unlock(&icd->video_lock);
+
 	module_put(icd->ops->owner);
 	module_put(ici->ops->owner);
-	mutex_unlock(&video_lock);
 
 	vfree(icf);
 
@@ -424,18 +429,27 @@ static int soc_camera_s_fmt_vid_cap(struct file *file, void *priv,
 	if (ret < 0)
 		return ret;
 
+	mutex_lock(&icf->vb_vidq.vb_lock);
+
+	if (videobuf_queue_is_busy(&icf->vb_vidq)) {
+		dev_err(&icd->dev, "S_FMT denied: queue busy\n");
+		ret = -EBUSY;
+		goto unlock;
+	}
+
 	rect.left	= icd->x_current;
 	rect.top	= icd->y_current;
 	rect.width	= pix->width;
 	rect.height	= pix->height;
 	ret = ici->ops->set_fmt(icd, pix->pixelformat, &rect);
 	if (ret < 0) {
-		return ret;
+		goto unlock;
 	} else if (!icd->current_fmt ||
 		   icd->current_fmt->fourcc != pixfmt) {
 		dev_err(&ici->dev,
 			"Host driver hasn't set up current format correctly!\n");
-		return -EINVAL;
+		ret = -EINVAL;
+		goto unlock;
 	}
 
 	icd->width		= rect.width;
@@ -449,7 +463,12 @@ static int soc_camera_s_fmt_vid_cap(struct file *file, void *priv,
 		icd->width, icd->height);
 
 	/* set physical bus parameters */
-	return ici->ops->set_bus_param(icd, pixfmt);
+	ret = ici->ops->set_bus_param(icd, pixfmt);
+
+unlock:
+	mutex_unlock(&icf->vb_vidq.vb_lock);
+
+	return ret;
 }
 
 static int soc_camera_enum_fmt_vid_cap(struct file *file, void  *priv,
@@ -510,6 +529,7 @@ static int soc_camera_streamon(struct file *file, void *priv,
 {
 	struct soc_camera_file *icf = file->private_data;
 	struct soc_camera_device *icd = icf->icd;
+	int ret;
 
 	WARN_ON(priv != file->private_data);
 
@@ -518,10 +538,16 @@ static int soc_camera_streamon(struct file *file, void *priv,
 	if (i != V4L2_BUF_TYPE_VIDEO_CAPTURE)
 		return -EINVAL;
 
+	mutex_lock(&icd->video_lock);
+
 	icd->ops->start_capture(icd);
 
 	/* This calls buf_queue from host driver's videobuf_queue_ops */
-	return videobuf_streamon(&icf->vb_vidq);
+	ret = videobuf_streamon(&icf->vb_vidq);
+
+	mutex_unlock(&icd->video_lock);
+
+	return ret;
 }
 
 static int soc_camera_streamoff(struct file *file, void *priv,
@@ -537,12 +563,16 @@ static int soc_camera_streamoff(struct file *file, void *priv,
 	if (i != V4L2_BUF_TYPE_VIDEO_CAPTURE)
 		return -EINVAL;
 
+	mutex_lock(&icd->video_lock);
+
 	/* This calls buf_release from host driver's videobuf_queue_ops for all
 	 * remaining buffers. When the last buffer is freed, stop capture */
 	videobuf_streamoff(&icf->vb_vidq);
 
 	icd->ops->stop_capture(icd);
 
+	mutex_unlock(&icd->video_lock);
+
 	return 0;
 }
 
@@ -654,6 +684,9 @@ static int soc_camera_s_crop(struct file *file, void *fh,
 	if (a->type != V4L2_BUF_TYPE_VIDEO_CAPTURE)
 		return -EINVAL;
 
+	/* Cropping is allowed during a running capture, guard consistency */
+	mutex_lock(&icf->vb_vidq.vb_lock);
+
 	ret = ici->ops->set_fmt(icd, 0, &a->c);
 	if (!ret) {
 		icd->width	= a->c.width;
@@ -662,6 +695,8 @@ static int soc_camera_s_crop(struct file *file, void *fh,
 		icd->y_current	= a->c.top;
 	}
 
+	mutex_unlock(&icf->vb_vidq.vb_lock);
+
 	return ret;
 }
 
@@ -775,11 +810,32 @@ static int soc_camera_probe(struct device *dev)
 	struct soc_camera_host *ici = to_soc_camera_host(icd->dev.parent);
 	int ret;
 
+	/*
+	 * Possible race scenario:
+	 * modprobe <camera-host-driver> triggers __func__
+	 * at this moment respective <camera-sensor-driver> gets rmmod'ed
+	 * to protect take module references.
+	 */
+
+	if (!try_module_get(icd->ops->owner)) {
+		dev_err(&icd->dev, "Couldn't lock sensor driver.\n");
+		ret = -EINVAL;
+		goto emgd;
+	}
+
+	if (!try_module_get(ici->ops->owner)) {
+		dev_err(&icd->dev, "Couldn't lock capture bus driver.\n");
+		ret = -EINVAL;
+		goto emgi;
+	}
+
+	mutex_lock(&icd->video_lock);
+
 	/* We only call ->add() here to activate and probe the camera.
 	 * We shall ->remove() and deactivate it immediately afterwards. */
 	ret = ici->ops->add(icd);
 	if (ret < 0)
-		return ret;
+		goto eiadd;
 
 	ret = icd->ops->probe(icd);
 	if (ret >= 0) {
@@ -793,6 +849,12 @@ static int soc_camera_probe(struct device *dev)
 	}
 	ici->ops->remove(icd);
 
+eiadd:
+	mutex_unlock(&icd->video_lock);
+	module_put(ici->ops->owner);
+emgi:
+	module_put(icd->ops->owner);
+emgd:
 	return ret;
 }
 
@@ -966,6 +1028,7 @@ int soc_camera_device_register(struct soc_camera_device *icd)
 	icd->dev.release	= dummy_release;
 	icd->use_count		= 0;
 	icd->host_priv		= NULL;
+	mutex_init(&icd->video_lock);
 
 	return scan_add_device(icd);
 }
@@ -1012,6 +1075,10 @@ static const struct v4l2_ioctl_ops soc_camera_ioctl_ops = {
 #endif
 };
 
+/*
+ * Usually called from the struct soc_camera_ops .probe() method, i.e., from
+ * soc_camera_probe() above with .video_lock held
+ */
 int soc_camera_video_start(struct soc_camera_device *icd)
 {
 	struct soc_camera_host *ici = to_soc_camera_host(icd->dev.parent);
@@ -1027,7 +1094,7 @@ int soc_camera_video_start(struct soc_camera_device *icd)
 	dev_dbg(&ici->dev, "Allocated video_device %p\n", vdev);
 
 	strlcpy(vdev->name, ici->drv_name, sizeof(vdev->name));
-	/* Maybe better &ici->dev */
+
 	vdev->parent		= &icd->dev;
 	vdev->current_norm	= V4L2_STD_UNKNOWN;
 	vdev->fops		= &soc_camera_fops;
@@ -1061,10 +1128,10 @@ void soc_camera_video_stop(struct soc_camera_device *icd)
 	if (!icd->dev.parent || !vdev)
 		return;
 
-	mutex_lock(&video_lock);
+	mutex_lock(&icd->video_lock);
 	video_unregister_device(vdev);
 	icd->vdev = NULL;
-	mutex_unlock(&video_lock);
+	mutex_unlock(&icd->video_lock);
 }
 EXPORT_SYMBOL(soc_camera_video_stop);
 
diff --git a/include/media/soc_camera.h b/include/media/soc_camera.h
index 38b826c608be..8bae9a359d93 100644
--- a/include/media/soc_camera.h
+++ b/include/media/soc_camera.h
@@ -12,9 +12,10 @@
 #ifndef SOC_CAMERA_H
 #define SOC_CAMERA_H
 
+#include <linux/mutex.h>
+#include <linux/pm.h>
 #include <linux/videodev2.h>
 #include <media/videobuf-core.h>
-#include <linux/pm.h>
 
 struct soc_camera_device {
 	struct list_head list;
@@ -45,9 +46,10 @@ struct soc_camera_device {
 	struct soc_camera_format_xlate *user_formats;
 	int num_user_formats;
 	struct module *owner;
-	void *host_priv;		/* per-device host private data */
-	/* soc_camera.c private count. Only accessed with video_lock held */
+	void *host_priv;		/* Per-device host private data */
+	/* soc_camera.c private count. Only accessed with .video_lock held */
 	int use_count;
+	struct mutex video_lock;	/* Protects device data */
 };
 
 struct soc_camera_file {
-- 
cgit v1.2.3


From 513791aba6266e0c15d5b697b97e956e83537f5a Mon Sep 17 00:00:00 2001
From: Kuninori Morimoto <morimoto.kuninori@renesas.com>
Date: Thu, 18 Dec 2008 12:46:45 -0300
Subject: V4L/DVB (10086): Add new set_std function on soc_camera

This patch presents new method to be able to check v4l2_std_id

Signed-off-by: Kuninori Morimoto <morimoto.kuninori@renesas.com>
Signed-off-by: Guennadi Liakhovetski <g.liakhovetski@gmx.de>
Signed-off-by: Mauro Carvalho Chehab <mchehab@redhat.com>
---
 drivers/media/video/soc_camera.c | 9 ++++++++-
 include/media/soc_camera.h       | 1 +
 2 files changed, 9 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/drivers/media/video/soc_camera.c b/drivers/media/video/soc_camera.c
index e869670dbae5..b12d11f213bd 100644
--- a/drivers/media/video/soc_camera.c
+++ b/drivers/media/video/soc_camera.c
@@ -152,7 +152,14 @@ static int soc_camera_s_input(struct file *file, void *priv, unsigned int i)
 
 static int soc_camera_s_std(struct file *file, void *priv, v4l2_std_id *a)
 {
-	return 0;
+	struct soc_camera_file *icf = file->private_data;
+	struct soc_camera_device *icd = icf->icd;
+	int ret = 0;
+
+	if (icd->ops->set_std)
+		ret = icd->ops->set_std(icd, a);
+
+	return ret;
 }
 
 static int soc_camera_reqbufs(struct file *file, void *priv,
diff --git a/include/media/soc_camera.h b/include/media/soc_camera.h
index 8bae9a359d93..26dede82057c 100644
--- a/include/media/soc_camera.h
+++ b/include/media/soc_camera.h
@@ -165,6 +165,7 @@ struct soc_camera_ops {
 	int (*set_bus_param)(struct soc_camera_device *, unsigned long);
 	int (*get_chip_id)(struct soc_camera_device *,
 			   struct v4l2_chip_ident *);
+	int (*set_std)(struct soc_camera_device *, v4l2_std_id *);
 #ifdef CONFIG_VIDEO_ADV_DEBUG
 	int (*get_register)(struct soc_camera_device *, struct v4l2_register *);
 	int (*set_register)(struct soc_camera_device *, struct v4l2_register *);
-- 
cgit v1.2.3


From 34d359db7d683e227f27595ad3702fb2ae96108a Mon Sep 17 00:00:00 2001
From: Kuninori Morimoto <morimoto.kuninori@renesas.com>
Date: Thu, 18 Dec 2008 12:47:46 -0300
Subject: V4L/DVB (10087): Add new enum_input function on soc_camera

This patch presents new method to be able to select V4L2 input type

Signed-off-by: Kuninori Morimoto <morimoto.kuninori@renesas.com>
Signed-off-by: Guennadi Liakhovetski <g.liakhovetski@gmx.de>
Signed-off-by: Mauro Carvalho Chehab <mchehab@redhat.com>
---
 drivers/media/video/soc_camera.c | 17 +++++++++++++----
 include/media/soc_camera.h       |  1 +
 2 files changed, 14 insertions(+), 4 deletions(-)

(limited to 'include')

diff --git a/drivers/media/video/soc_camera.c b/drivers/media/video/soc_camera.c
index b12d11f213bd..e86e6bda1b7f 100644
--- a/drivers/media/video/soc_camera.c
+++ b/drivers/media/video/soc_camera.c
@@ -125,14 +125,23 @@ static int soc_camera_try_fmt_vid_cap(struct file *file, void *priv,
 static int soc_camera_enum_input(struct file *file, void *priv,
 				 struct v4l2_input *inp)
 {
+	struct soc_camera_file *icf = file->private_data;
+	struct soc_camera_device *icd = icf->icd;
+	int ret = 0;
+
 	if (inp->index != 0)
 		return -EINVAL;
 
-	inp->type = V4L2_INPUT_TYPE_CAMERA;
-	inp->std = V4L2_STD_UNKNOWN;
-	strcpy(inp->name, "Camera");
+	if (icd->ops->enum_input)
+		ret = icd->ops->enum_input(icd, inp);
+	else {
+		/* default is camera */
+		inp->type = V4L2_INPUT_TYPE_CAMERA;
+		inp->std  = V4L2_STD_UNKNOWN;
+		strcpy(inp->name, "Camera");
+	}
 
-	return 0;
+	return ret;
 }
 
 static int soc_camera_g_input(struct file *file, void *priv, unsigned int *i)
diff --git a/include/media/soc_camera.h b/include/media/soc_camera.h
index 26dede82057c..50f4447566de 100644
--- a/include/media/soc_camera.h
+++ b/include/media/soc_camera.h
@@ -166,6 +166,7 @@ struct soc_camera_ops {
 	int (*get_chip_id)(struct soc_camera_device *,
 			   struct v4l2_chip_ident *);
 	int (*set_std)(struct soc_camera_device *, v4l2_std_id *);
+	int (*enum_input)(struct soc_camera_device *, struct v4l2_input *);
 #ifdef CONFIG_VIDEO_ADV_DEBUG
 	int (*get_register)(struct soc_camera_device *, struct v4l2_register *);
 	int (*set_register)(struct soc_camera_device *, struct v4l2_register *);
-- 
cgit v1.2.3


From 042d87900217228f865654fa70fade8139bd42cf Mon Sep 17 00:00:00 2001
From: Guennadi Liakhovetski <lg@denx.de>
Date: Fri, 19 Dec 2008 10:07:49 -0300
Subject: V4L/DVB (10093): soc-camera: add new bus width and signal polarity
 flags

In preparation for i.MX31 camera host driver add flags for 4 and 15 bit bus
widths and for data lines polarity inversion.

Signed-off-by: Guennadi Liakhovetski <lg@denx.de>
Signed-off-by: Mauro Carvalho Chehab <mchehab@redhat.com>
---
 include/media/soc_camera.h | 23 ++++++++++++++---------
 1 file changed, 14 insertions(+), 9 deletions(-)

(limited to 'include')

diff --git a/include/media/soc_camera.h b/include/media/soc_camera.h
index 50f4447566de..425b6a98c95c 100644
--- a/include/media/soc_camera.h
+++ b/include/media/soc_camera.h
@@ -221,15 +221,20 @@ static inline struct v4l2_queryctrl const *soc_camera_find_qctrl(
 #define SOCAM_HSYNC_ACTIVE_LOW		(1 << 3)
 #define SOCAM_VSYNC_ACTIVE_HIGH		(1 << 4)
 #define SOCAM_VSYNC_ACTIVE_LOW		(1 << 5)
-#define SOCAM_DATAWIDTH_8		(1 << 6)
-#define SOCAM_DATAWIDTH_9		(1 << 7)
-#define SOCAM_DATAWIDTH_10		(1 << 8)
-#define SOCAM_DATAWIDTH_16		(1 << 9)
-#define SOCAM_PCLK_SAMPLE_RISING	(1 << 10)
-#define SOCAM_PCLK_SAMPLE_FALLING	(1 << 11)
-
-#define SOCAM_DATAWIDTH_MASK (SOCAM_DATAWIDTH_8 | SOCAM_DATAWIDTH_9 | \
-			      SOCAM_DATAWIDTH_10 | SOCAM_DATAWIDTH_16)
+#define SOCAM_DATAWIDTH_4		(1 << 6)
+#define SOCAM_DATAWIDTH_8		(1 << 7)
+#define SOCAM_DATAWIDTH_9		(1 << 8)
+#define SOCAM_DATAWIDTH_10		(1 << 9)
+#define SOCAM_DATAWIDTH_15		(1 << 10)
+#define SOCAM_DATAWIDTH_16		(1 << 11)
+#define SOCAM_PCLK_SAMPLE_RISING	(1 << 12)
+#define SOCAM_PCLK_SAMPLE_FALLING	(1 << 13)
+#define SOCAM_DATA_ACTIVE_HIGH		(1 << 14)
+#define SOCAM_DATA_ACTIVE_LOW		(1 << 15)
+
+#define SOCAM_DATAWIDTH_MASK (SOCAM_DATAWIDTH_4 | SOCAM_DATAWIDTH_8 | \
+			      SOCAM_DATAWIDTH_9 | SOCAM_DATAWIDTH_10 | \
+			      SOCAM_DATAWIDTH_15 | SOCAM_DATAWIDTH_16)
 
 static inline unsigned long soc_camera_bus_param_compatible(
 			unsigned long camera_flags, unsigned long bus_flags)
-- 
cgit v1.2.3


From ed922a892e535c14035210b5be328af1f49561c8 Mon Sep 17 00:00:00 2001
From: Kuninori Morimoto <morimoto.kuninori@renesas.com>
Date: Mon, 29 Dec 2008 06:04:06 -0300
Subject: V4L/DVB (10094): Add tw9910 driver

This patch adds tw9910 driver that use soc_camera framework.
It was tested on SH Migo-r board and mplayer.

 create mode 100644 drivers/media/video/tw9910.c
 create mode 100644 include/media/tw9910.h

Signed-off-by: Kuninori Morimoto <morimoto.kuninori@renesas.com>
Signed-off-by: Guennadi Liakhovetski <g.liakhovetski@gmx.de>
Signed-off-by: Mauro Carvalho Chehab <mchehab@redhat.com>
---
 drivers/media/video/Kconfig     |   6 +
 drivers/media/video/Makefile    |   1 +
 drivers/media/video/tw9910.c    | 941 ++++++++++++++++++++++++++++++++++++++++
 include/media/tw9910.h          |  39 ++
 include/media/v4l2-chip-ident.h |   3 +
 5 files changed, 990 insertions(+)
 create mode 100644 drivers/media/video/tw9910.c
 create mode 100644 include/media/tw9910.h

(limited to 'include')

diff --git a/drivers/media/video/Kconfig b/drivers/media/video/Kconfig
index da538d9ca303..9785eeb1ed44 100644
--- a/drivers/media/video/Kconfig
+++ b/drivers/media/video/Kconfig
@@ -755,6 +755,12 @@ config MT9V022_PCA9536_SWITCH
 	  Select this if your MT9V022 camera uses a PCA9536 I2C GPIO
 	  extender to switch between 8 and 10 bit datawidth modes
 
+config SOC_CAMERA_TW9910
+	tristate "tw9910 support"
+	depends on SOC_CAMERA && I2C
+	help
+	  This is a tw9910 video driver
+
 config SOC_CAMERA_PLATFORM
 	tristate "platform camera support"
 	depends on SOC_CAMERA
diff --git a/drivers/media/video/Makefile b/drivers/media/video/Makefile
index 84a2be0cbbe7..40c69afcde7a 100644
--- a/drivers/media/video/Makefile
+++ b/drivers/media/video/Makefile
@@ -140,6 +140,7 @@ obj-$(CONFIG_SOC_CAMERA_MT9M111)	+= mt9m111.o
 obj-$(CONFIG_SOC_CAMERA_MT9V022)	+= mt9v022.o
 obj-$(CONFIG_SOC_CAMERA_OV772X)		+= ov772x.o
 obj-$(CONFIG_SOC_CAMERA_PLATFORM)	+= soc_camera_platform.o
+obj-$(CONFIG_SOC_CAMERA_TW9910)		+= tw9910.o
 
 obj-$(CONFIG_VIDEO_AU0828) += au0828/
 
diff --git a/drivers/media/video/tw9910.c b/drivers/media/video/tw9910.c
new file mode 100644
index 000000000000..75cfde02d026
--- /dev/null
+++ b/drivers/media/video/tw9910.c
@@ -0,0 +1,941 @@
+/*
+ * tw9910 Video Driver
+ *
+ * Copyright (C) 2008 Renesas Solutions Corp.
+ * Kuninori Morimoto <morimoto.kuninori@renesas.com>
+ *
+ * Based on ov772x driver,
+ *
+ * Copyright (C) 2008 Kuninori Morimoto <morimoto.kuninori@renesas.com>
+ * Copyright 2006-7 Jonathan Corbet <corbet@lwn.net>
+ * Copyright (C) 2008 Magnus Damm
+ * Copyright (C) 2008, Guennadi Liakhovetski <kernel@pengutronix.de>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/i2c.h>
+#include <linux/slab.h>
+#include <linux/kernel.h>
+#include <linux/delay.h>
+#include <linux/videodev2.h>
+#include <media/v4l2-chip-ident.h>
+#include <media/v4l2-common.h>
+#include <media/soc_camera.h>
+#include <media/tw9910.h>
+
+#define GET_ID(val)  ((val & 0xF8) >> 3)
+#define GET_ReV(val) (val & 0x07)
+
+/*
+ * register offset
+ */
+#define ID		0x00 /* Product ID Code Register */
+#define STATUS1		0x01 /* Chip Status Register I */
+#define INFORM		0x02 /* Input Format */
+#define OPFORM		0x03 /* Output Format Control Register */
+#define DLYCTR		0x04 /* Hysteresis and HSYNC Delay Control */
+#define OUTCTR1		0x05 /* Output Control I */
+#define ACNTL1		0x06 /* Analog Control Register 1 */
+#define CROP_HI		0x07 /* Cropping Register, High */
+#define VDELAY_LO	0x08 /* Vertical Delay Register, Low */
+#define VACTIVE_LO	0x09 /* Vertical Active Register, Low */
+#define HDELAY_LO	0x0A /* Horizontal Delay Register, Low */
+#define HACTIVE_LO	0x0B /* Horizontal Active Register, Low */
+#define CNTRL1		0x0C /* Control Register I */
+#define VSCALE_LO	0x0D /* Vertical Scaling Register, Low */
+#define SCALE_HI	0x0E /* Scaling Register, High */
+#define HSCALE_LO	0x0F /* Horizontal Scaling Register, Low */
+#define BRIGHT		0x10 /* BRIGHTNESS Control Register */
+#define CONTRAST	0x11 /* CONTRAST Control Register */
+#define SHARPNESS	0x12 /* SHARPNESS Control Register I */
+#define SAT_U		0x13 /* Chroma (U) Gain Register */
+#define SAT_V		0x14 /* Chroma (V) Gain Register */
+#define HUE		0x15 /* Hue Control Register */
+#define CORING1		0x17
+#define CORING2		0x18 /* Coring and IF compensation */
+#define VBICNTL		0x19 /* VBI Control Register */
+#define ACNTL2		0x1A /* Analog Control 2 */
+#define OUTCTR2		0x1B /* Output Control 2 */
+#define SDT		0x1C /* Standard Selection */
+#define SDTR		0x1D /* Standard Recognition */
+#define TEST		0x1F /* Test Control Register */
+#define CLMPG		0x20 /* Clamping Gain */
+#define IAGC		0x21 /* Individual AGC Gain */
+#define AGCGAIN		0x22 /* AGC Gain */
+#define PEAKWT		0x23 /* White Peak Threshold */
+#define CLMPL		0x24 /* Clamp level */
+#define SYNCT		0x25 /* Sync Amplitude */
+#define MISSCNT		0x26 /* Sync Miss Count Register */
+#define PCLAMP		0x27 /* Clamp Position Register */
+#define VCNTL1		0x28 /* Vertical Control I */
+#define VCNTL2		0x29 /* Vertical Control II */
+#define CKILL		0x2A /* Color Killer Level Control */
+#define COMB		0x2B /* Comb Filter Control */
+#define LDLY		0x2C /* Luma Delay and H Filter Control */
+#define MISC1		0x2D /* Miscellaneous Control I */
+#define LOOP		0x2E /* LOOP Control Register */
+#define MISC2		0x2F /* Miscellaneous Control II */
+#define MVSN		0x30 /* Macrovision Detection */
+#define STATUS2		0x31 /* Chip STATUS II */
+#define HFREF		0x32 /* H monitor */
+#define CLMD		0x33 /* CLAMP MODE */
+#define IDCNTL		0x34 /* ID Detection Control */
+#define CLCNTL1		0x35 /* Clamp Control I */
+#define ANAPLLCTL	0x4C
+#define VBIMIN		0x4D
+#define HSLOWCTL	0x4E
+#define WSS3		0x4F
+#define FILLDATA	0x50
+#define SDID		0x51
+#define DID		0x52
+#define WSS1		0x53
+#define WSS2		0x54
+#define VVBI		0x55
+#define LCTL6		0x56
+#define LCTL7		0x57
+#define LCTL8		0x58
+#define LCTL9		0x59
+#define LCTL10		0x5A
+#define LCTL11		0x5B
+#define LCTL12		0x5C
+#define LCTL13		0x5D
+#define LCTL14		0x5E
+#define LCTL15		0x5F
+#define LCTL16		0x60
+#define LCTL17		0x61
+#define LCTL18		0x62
+#define LCTL19		0x63
+#define LCTL20		0x64
+#define LCTL21		0x65
+#define LCTL22		0x66
+#define LCTL23		0x67
+#define LCTL24		0x68
+#define LCTL25		0x69
+#define LCTL26		0x6A
+#define HSGEGIN		0x6B
+#define HSEND		0x6C
+#define OVSDLY		0x6D
+#define OVSEND		0x6E
+#define VBIDELAY	0x6F
+
+/*
+ * register detail
+ */
+
+/* INFORM */
+#define FC27_ON     0x40 /* 1 : Input crystal clock frequency is 27MHz */
+#define FC27_FF     0x00 /* 0 : Square pixel mode. */
+			 /*     Must use 24.54MHz for 60Hz field rate */
+			 /*     source or 29.5MHz for 50Hz field rate */
+#define IFSEL_S     0x10 /* 01 : S-video decoding */
+#define IFSEL_C     0x00 /* 00 : Composite video decoding */
+			 /* Y input video selection */
+#define YSEL_M0     0x00 /*  00 : Mux0 selected */
+#define YSEL_M1     0x04 /*  01 : Mux1 selected */
+#define YSEL_M2     0x08 /*  10 : Mux2 selected */
+#define YSEL_M3     0x10 /*  11 : Mux3 selected */
+
+/* OPFORM */
+#define MODE        0x80 /* 0 : CCIR601 compatible YCrCb 4:2:2 format */
+			 /* 1 : ITU-R-656 compatible data sequence format */
+#define LEN         0x40 /* 0 : 8-bit YCrCb 4:2:2 output format */
+			 /* 1 : 16-bit YCrCb 4:2:2 output format.*/
+#define LLCMODE     0x20 /* 1 : LLC output mode. */
+			 /* 0 : free-run output mode */
+#define AINC        0x10 /* Serial interface auto-indexing control */
+			 /* 0 : auto-increment */
+			 /* 1 : non-auto */
+#define VSCTL       0x08 /* 1 : Vertical out ctrl by DVALID */
+			 /* 0 : Vertical out ctrl by HACTIVE and DVALID */
+#define OEN         0x04 /* Output Enable together with TRI_SEL. */
+
+/* OUTCTR1 */
+#define VSP_LO      0x00 /* 0 : VS pin output polarity is active low */
+#define VSP_HI      0x80 /* 1 : VS pin output polarity is active high. */
+			 /* VS pin output control */
+#define VSSL_VSYNC  0x00 /*   0 : VSYNC  */
+#define VSSL_VACT   0x10 /*   1 : VACT   */
+#define VSSL_FIELD  0x20 /*   2 : FIELD  */
+#define VSSL_VVALID 0x30 /*   3 : VVALID */
+#define VSSL_ZERO   0x70 /*   7 : 0      */
+#define HSP_LOW     0x00 /* 0 : HS pin output polarity is active low */
+#define HSP_HI      0x08 /* 1 : HS pin output polarity is active high.*/
+			 /* HS pin output control */
+#define HSSL_HACT   0x00 /*   0 : HACT   */
+#define HSSL_HSYNC  0x01 /*   1 : HSYNC  */
+#define HSSL_DVALID 0x02 /*   2 : DVALID */
+#define HSSL_HLOCK  0x03 /*   3 : HLOCK  */
+#define HSSL_ASYNCW 0x04 /*   4 : ASYNCW */
+#define HSSL_ZERO   0x07 /*   7 : 0      */
+
+/* ACNTL1 */
+#define SRESET      0x80 /* resets the device to its default state
+			  * but all register content remain unchanged.
+			  * This bit is self-resetting.
+			  */
+
+/* VBICNTL */
+/* RTSEL : control the real time signal
+*          output from the MPOUT pin
+*/
+#define RTSEL_MASK  0x07
+#define RTSEL_VLOSS 0x00 /* 0000 = Video loss */
+#define RTSEL_HLOCK 0x01 /* 0001 = H-lock */
+#define RTSEL_SLOCK 0x02 /* 0010 = S-lock */
+#define RTSEL_VLOCK 0x03 /* 0011 = V-lock */
+#define RTSEL_MONO  0x04 /* 0100 = MONO */
+#define RTSEL_DET50 0x05 /* 0101 = DET50 */
+#define RTSEL_FIELD 0x06 /* 0110 = FIELD */
+#define RTSEL_RTCO  0x07 /* 0111 = RTCO ( Real Time Control ) */
+
+/*
+ * structure
+ */
+
+struct regval_list {
+	unsigned char reg_num;
+	unsigned char value;
+};
+
+struct tw9910_scale_ctrl {
+	char           *name;
+	unsigned short  width;
+	unsigned short  height;
+	u16             hscale;
+	u16             vscale;
+};
+
+struct tw9910_cropping_ctrl {
+	u16 vdelay;
+	u16 vactive;
+	u16 hdelay;
+	u16 hactive;
+};
+
+struct tw9910_hsync_ctrl {
+	u16 start;
+	u16 end;
+};
+
+struct tw9910_priv {
+	struct tw9910_video_info       *info;
+	struct i2c_client              *client;
+	struct soc_camera_device        icd;
+	const struct tw9910_scale_ctrl *scale;
+};
+
+/*
+ * register settings
+ */
+
+#define ENDMARKER { 0xff, 0xff }
+
+static const struct regval_list tw9910_default_regs[] =
+{
+	{ OPFORM,  0x00 },
+	{ OUTCTR1, VSP_LO | VSSL_VVALID | HSP_HI | HSSL_HSYNC },
+	ENDMARKER,
+};
+
+static const struct soc_camera_data_format tw9910_color_fmt[] = {
+	{
+		.name       = "VYUY",
+		.fourcc     = V4L2_PIX_FMT_VYUY,
+		.depth      = 16,
+		.colorspace = V4L2_COLORSPACE_SMPTE170M,
+	}
+};
+
+static const struct tw9910_scale_ctrl tw9910_ntsc_scales[] = {
+	{
+		.name   = "NTSC SQ",
+		.width  = 640,
+		.height = 480,
+		.hscale = 0x0100,
+		.vscale = 0x0100,
+	},
+	{
+		.name   = "NTSC CCIR601",
+		.width  = 720,
+		.height = 480,
+		.hscale = 0x0100,
+		.vscale = 0x0100,
+	},
+	{
+		.name   = "NTSC SQ (CIF)",
+		.width  = 320,
+		.height = 240,
+		.hscale = 0x0200,
+		.vscale = 0x0200,
+	},
+	{
+		.name   = "NTSC CCIR601 (CIF)",
+		.width  = 360,
+		.height = 240,
+		.hscale = 0x0200,
+		.vscale = 0x0200,
+	},
+	{
+		.name   = "NTSC SQ (QCIF)",
+		.width  = 160,
+		.height = 120,
+		.hscale = 0x0400,
+		.vscale = 0x0400,
+	},
+	{
+		.name   = "NTSC CCIR601 (QCIF)",
+		.width  = 180,
+		.height = 120,
+		.hscale = 0x0400,
+		.vscale = 0x0400,
+	},
+};
+
+static const struct tw9910_scale_ctrl tw9910_pal_scales[] = {
+	{
+		.name   = "PAL SQ",
+		.width  = 768,
+		.height = 576,
+		.hscale = 0x0100,
+		.vscale = 0x0100,
+	},
+	{
+		.name   = "PAL CCIR601",
+		.width  = 720,
+		.height = 576,
+		.hscale = 0x0100,
+		.vscale = 0x0100,
+	},
+	{
+		.name   = "PAL SQ (CIF)",
+		.width  = 384,
+		.height = 288,
+		.hscale = 0x0200,
+		.vscale = 0x0200,
+	},
+	{
+		.name   = "PAL CCIR601 (CIF)",
+		.width  = 360,
+		.height = 288,
+		.hscale = 0x0200,
+		.vscale = 0x0200,
+	},
+	{
+		.name   = "PAL SQ (QCIF)",
+		.width  = 192,
+		.height = 144,
+		.hscale = 0x0400,
+		.vscale = 0x0400,
+	},
+	{
+		.name   = "PAL CCIR601 (QCIF)",
+		.width  = 180,
+		.height = 144,
+		.hscale = 0x0400,
+		.vscale = 0x0400,
+	},
+};
+
+static const struct tw9910_cropping_ctrl tw9910_cropping_ctrl = {
+	.vdelay  = 0x0012,
+	.vactive = 0x00F0,
+	.hdelay  = 0x0010,
+	.hactive = 0x02D0,
+};
+
+static const struct tw9910_hsync_ctrl tw9910_hsync_ctrl = {
+	.start = 0x0260,
+	.end   = 0x0300,
+};
+
+/*
+ * general function
+ */
+static int tw9910_set_scale(struct i2c_client *client,
+			    const struct tw9910_scale_ctrl *scale)
+{
+	int ret;
+
+	ret = i2c_smbus_write_byte_data(client, SCALE_HI,
+					(scale->vscale & 0x0F00) >> 4 |
+					(scale->hscale & 0x0F00) >> 8);
+	if (ret < 0)
+		return ret;
+
+	ret = i2c_smbus_write_byte_data(client, HSCALE_LO,
+					scale->hscale & 0x00FF);
+	if (ret < 0)
+		return ret;
+
+	ret = i2c_smbus_write_byte_data(client, VSCALE_LO,
+					scale->vscale & 0x00FF);
+
+	return ret;
+}
+
+static int tw9910_set_cropping(struct i2c_client *client,
+			       const struct tw9910_cropping_ctrl *cropping)
+{
+	int ret;
+
+	ret = i2c_smbus_write_byte_data(client, CROP_HI,
+					(cropping->vdelay  & 0x0300) >> 2 |
+					(cropping->vactive & 0x0300) >> 4 |
+					(cropping->hdelay  & 0x0300) >> 6 |
+					(cropping->hactive & 0x0300) >> 8);
+	if (ret < 0)
+		return ret;
+
+	ret = i2c_smbus_write_byte_data(client, VDELAY_LO,
+					cropping->vdelay & 0x00FF);
+	if (ret < 0)
+		return ret;
+
+	ret = i2c_smbus_write_byte_data(client, VACTIVE_LO,
+					cropping->vactive & 0x00FF);
+	if (ret < 0)
+		return ret;
+
+	ret = i2c_smbus_write_byte_data(client, HDELAY_LO,
+					cropping->hdelay & 0x00FF);
+	if (ret < 0)
+		return ret;
+
+	ret = i2c_smbus_write_byte_data(client, HACTIVE_LO,
+					cropping->hactive & 0x00FF);
+
+	return ret;
+}
+
+static int tw9910_set_hsync(struct i2c_client *client,
+			    const struct tw9910_hsync_ctrl *hsync)
+{
+	int ret;
+
+	/* bit 10 - 3 */
+	ret = i2c_smbus_write_byte_data(client, HSGEGIN,
+					(hsync->start & 0x07F8) >> 3);
+	if (ret < 0)
+		return ret;
+
+	/* bit 10 - 3 */
+	ret = i2c_smbus_write_byte_data(client, HSEND,
+					(hsync->end & 0x07F8) >> 3);
+	if (ret < 0)
+		return ret;
+
+	/* bit 2 - 0 */
+	ret = i2c_smbus_read_byte_data(client, HSLOWCTL);
+	if (ret < 0)
+		return ret;
+
+	ret = i2c_smbus_write_byte_data(client, HSLOWCTL,
+					(ret & 0x88)                 |
+					(hsync->start & 0x0007) << 4 |
+					(hsync->end   & 0x0007));
+
+	return ret;
+}
+
+static int tw9910_write_array(struct i2c_client *client,
+			      const struct regval_list *vals)
+{
+	while (vals->reg_num != 0xff) {
+		int ret = i2c_smbus_write_byte_data(client,
+						    vals->reg_num,
+						    vals->value);
+		if (ret < 0)
+			return ret;
+		vals++;
+	}
+	return 0;
+}
+
+static int tw9910_mask_set(struct i2c_client *client, u8 command,
+			   u8 mask, u8 set)
+{
+	s32 val = i2c_smbus_read_byte_data(client, command);
+
+	val &= ~mask;
+	val |=  set;
+
+	return i2c_smbus_write_byte_data(client, command, val);
+}
+
+static void tw9910_reset(struct i2c_client *client)
+{
+	i2c_smbus_write_byte_data(client, ACNTL1, SRESET);
+	msleep(1);
+}
+
+static const struct tw9910_scale_ctrl*
+tw9910_select_norm(struct soc_camera_device *icd, u32 width, u32 height)
+{
+	const struct tw9910_scale_ctrl *scale;
+	const struct tw9910_scale_ctrl *ret = NULL;
+	v4l2_std_id norm = icd->vdev->current_norm;
+	__u32 diff = 0xffffffff, tmp;
+	int size, i;
+
+	if (norm & V4L2_STD_NTSC) {
+		scale = tw9910_ntsc_scales;
+		size = ARRAY_SIZE(tw9910_ntsc_scales);
+	} else if (norm & V4L2_STD_PAL) {
+		scale = tw9910_pal_scales;
+		size = ARRAY_SIZE(tw9910_pal_scales);
+	} else {
+		return NULL;
+	}
+
+	for (i = 0; i < size; i++) {
+		tmp = abs(width - scale[i].width) +
+			abs(height - scale[i].height);
+		if (tmp < diff) {
+			diff = tmp;
+			ret = scale + i;
+		}
+	}
+
+	return ret;
+}
+
+/*
+ * soc_camera_ops function
+ */
+static int tw9910_init(struct soc_camera_device *icd)
+{
+	struct tw9910_priv *priv = container_of(icd, struct tw9910_priv, icd);
+	int ret = 0;
+
+	if (priv->info->link.power) {
+		ret = priv->info->link.power(&priv->client->dev, 1);
+		if (ret < 0)
+			return ret;
+	}
+
+	if (priv->info->link.reset)
+		ret = priv->info->link.reset(&priv->client->dev);
+
+	return ret;
+}
+
+static int tw9910_release(struct soc_camera_device *icd)
+{
+	struct tw9910_priv *priv = container_of(icd, struct tw9910_priv, icd);
+	int ret = 0;
+
+	if (priv->info->link.power)
+		ret = priv->info->link.power(&priv->client->dev, 0);
+
+	return ret;
+}
+
+static int tw9910_start_capture(struct soc_camera_device *icd)
+{
+	struct tw9910_priv *priv = container_of(icd, struct tw9910_priv, icd);
+
+	if (!priv->scale) {
+		dev_err(&icd->dev, "norm select error\n");
+		return -EPERM;
+	}
+
+	dev_dbg(&icd->dev, "%s %dx%d\n",
+		 priv->scale->name,
+		 priv->scale->width,
+		 priv->scale->height);
+
+	return 0;
+}
+
+static int tw9910_stop_capture(struct soc_camera_device *icd)
+{
+	return 0;
+}
+
+static int tw9910_set_bus_param(struct soc_camera_device *icd,
+				unsigned long flags)
+{
+	return 0;
+}
+
+static unsigned long tw9910_query_bus_param(struct soc_camera_device *icd)
+{
+	struct tw9910_priv *priv = container_of(icd, struct tw9910_priv, icd);
+	struct soc_camera_link *icl = priv->client->dev.platform_data;
+	unsigned long flags = SOCAM_PCLK_SAMPLE_RISING | SOCAM_MASTER |
+		SOCAM_VSYNC_ACTIVE_HIGH | SOCAM_HSYNC_ACTIVE_HIGH |
+		SOCAM_DATA_ACTIVE_HIGH | priv->info->buswidth;
+
+	return soc_camera_apply_sensor_flags(icl, flags);
+}
+
+static int tw9910_get_chip_id(struct soc_camera_device *icd,
+			      struct v4l2_chip_ident *id)
+{
+	id->ident = V4L2_IDENT_TW9910;
+	id->revision = 0;
+
+	return 0;
+}
+
+static int tw9910_set_std(struct soc_camera_device *icd,
+			  v4l2_std_id *a)
+{
+	int ret = -EINVAL;
+
+	if (*a & (V4L2_STD_NTSC | V4L2_STD_PAL))
+		ret = 0;
+
+	return ret;
+}
+
+static int tw9910_enum_input(struct soc_camera_device *icd,
+			     struct v4l2_input *inp)
+{
+	inp->type = V4L2_INPUT_TYPE_TUNER;
+	inp->std  = V4L2_STD_UNKNOWN;
+	strcpy(inp->name, "Video");
+
+	return 0;
+}
+
+#ifdef CONFIG_VIDEO_ADV_DEBUG
+static int tw9910_get_register(struct soc_camera_device *icd,
+			       struct v4l2_register *reg)
+{
+	struct tw9910_priv *priv = container_of(icd, struct tw9910_priv, icd);
+	int ret;
+
+	if (reg->reg > 0xff)
+		return -EINVAL;
+
+	ret = i2c_smbus_read_byte_data(priv->client, reg->reg);
+	if (ret < 0)
+		return ret;
+
+	/* ret      = int
+	 * reg->val = __u64
+	 */
+	reg->val = (__u64)ret;
+
+	return 0;
+}
+
+static int tw9910_set_register(struct soc_camera_device *icd,
+			       struct v4l2_register *reg)
+{
+	struct tw9910_priv *priv = container_of(icd, struct tw9910_priv, icd);
+
+	if (reg->reg > 0xff ||
+	    reg->val > 0xff)
+		return -EINVAL;
+
+	return i2c_smbus_write_byte_data(priv->client, reg->reg, reg->val);
+}
+#endif
+
+static int tw9910_set_fmt(struct soc_camera_device *icd, __u32 pixfmt,
+			      struct v4l2_rect *rect)
+{
+	struct tw9910_priv *priv = container_of(icd, struct tw9910_priv, icd);
+	int                 ret  = -EINVAL;
+	u8                  val;
+
+	/*
+	 * select suitable norm
+	 */
+	priv->scale = tw9910_select_norm(icd, rect->width, rect->height);
+	if (!priv->scale)
+		return ret;
+
+	/*
+	 * reset hardware
+	 */
+	tw9910_reset(priv->client);
+	ret = tw9910_write_array(priv->client, tw9910_default_regs);
+	if (ret < 0)
+		return ret;
+	/*
+	 * set bus width
+	 */
+	val = 0x00;
+	if (SOCAM_DATAWIDTH_16 == priv->info->buswidth)
+		val = LEN;
+
+	ret = tw9910_mask_set(priv->client, OPFORM, LEN, val);
+	if (ret < 0)
+		return ret;
+
+	/*
+	 * select MPOUT behavior
+	 */
+	switch (priv->info->mpout) {
+	case TW9910_MPO_VLOSS:
+		val = RTSEL_VLOSS; break;
+	case TW9910_MPO_HLOCK:
+		val = RTSEL_HLOCK; break;
+	case TW9910_MPO_SLOCK:
+		val = RTSEL_SLOCK; break;
+	case TW9910_MPO_VLOCK:
+		val = RTSEL_VLOCK; break;
+	case TW9910_MPO_MONO:
+		val = RTSEL_MONO;  break;
+	case TW9910_MPO_DET50:
+		val = RTSEL_DET50; break;
+	case TW9910_MPO_FIELD:
+		val = RTSEL_FIELD; break;
+	case TW9910_MPO_RTCO:
+		val = RTSEL_RTCO;  break;
+	default:
+		val = 0;
+	}
+
+	ret = tw9910_mask_set(priv->client, VBICNTL, RTSEL_MASK, val);
+	if (ret < 0)
+		return ret;
+
+	/*
+	 * set scale
+	 */
+	ret = tw9910_set_scale(priv->client, priv->scale);
+	if (ret < 0)
+		return ret;
+
+	/*
+	 * set cropping
+	 */
+	ret = tw9910_set_cropping(priv->client, &tw9910_cropping_ctrl);
+	if (ret < 0)
+		return ret;
+
+	/*
+	 * set hsync
+	 */
+	ret = tw9910_set_hsync(priv->client, &tw9910_hsync_ctrl);
+
+	return ret;
+}
+
+static int tw9910_try_fmt(struct soc_camera_device *icd,
+			      struct v4l2_format *f)
+{
+	struct v4l2_pix_format *pix = &f->fmt.pix;
+	const struct tw9910_scale_ctrl *scale;
+
+	if (V4L2_FIELD_ANY == pix->field) {
+		pix->field = V4L2_FIELD_INTERLACED;
+	} else if (V4L2_FIELD_INTERLACED != pix->field) {
+		dev_err(&icd->dev, "Field type invalid.\n");
+		return -EINVAL;
+	}
+
+	/*
+	 * select suitable norm
+	 */
+	scale = tw9910_select_norm(icd, pix->width, pix->height);
+	if (!scale)
+		return -EINVAL;
+
+	pix->width  = scale->width;
+	pix->height = scale->height;
+
+	return 0;
+}
+
+static int tw9910_video_probe(struct soc_camera_device *icd)
+{
+	struct tw9910_priv *priv = container_of(icd, struct tw9910_priv, icd);
+	s32 val;
+	int ret;
+
+	/*
+	 * We must have a parent by now. And it cannot be a wrong one.
+	 * So this entire test is completely redundant.
+	 */
+	if (!icd->dev.parent ||
+	    to_soc_camera_host(icd->dev.parent)->nr != icd->iface)
+		return -ENODEV;
+
+	/*
+	 * tw9910 only use 8 or 16 bit bus width
+	 */
+	if (SOCAM_DATAWIDTH_16 != priv->info->buswidth &&
+	    SOCAM_DATAWIDTH_8  != priv->info->buswidth) {
+		dev_err(&icd->dev, "bus width error\n");
+		return -ENODEV;
+	}
+
+	icd->formats     = tw9910_color_fmt;
+	icd->num_formats = ARRAY_SIZE(tw9910_color_fmt);
+
+	/*
+	 * check and show Product ID
+	 */
+	val = i2c_smbus_read_byte_data(priv->client, ID);
+	if (0x0B != GET_ID(val) ||
+	    0x00 != GET_ReV(val)) {
+		dev_err(&icd->dev,
+			"Product ID error %x:%x\n", GET_ID(val), GET_ReV(val));
+		return -ENODEV;
+	}
+
+	dev_info(&icd->dev,
+		 "tw9910 Product ID %0x:%0x\n", GET_ID(val), GET_ReV(val));
+
+	ret = soc_camera_video_start(icd);
+	if (ret < 0)
+		return ret;
+
+	icd->vdev->tvnorms      = V4L2_STD_NTSC | V4L2_STD_PAL;
+	icd->vdev->current_norm = V4L2_STD_NTSC;
+
+	return ret;
+}
+
+static void tw9910_video_remove(struct soc_camera_device *icd)
+{
+	soc_camera_video_stop(icd);
+}
+
+static struct soc_camera_ops tw9910_ops = {
+	.owner			= THIS_MODULE,
+	.probe			= tw9910_video_probe,
+	.remove			= tw9910_video_remove,
+	.init			= tw9910_init,
+	.release		= tw9910_release,
+	.start_capture		= tw9910_start_capture,
+	.stop_capture		= tw9910_stop_capture,
+	.set_fmt		= tw9910_set_fmt,
+	.try_fmt		= tw9910_try_fmt,
+	.set_bus_param		= tw9910_set_bus_param,
+	.query_bus_param	= tw9910_query_bus_param,
+	.get_chip_id		= tw9910_get_chip_id,
+	.set_std		= tw9910_set_std,
+	.enum_input		= tw9910_enum_input,
+#ifdef CONFIG_VIDEO_ADV_DEBUG
+	.get_register		= tw9910_get_register,
+	.set_register		= tw9910_set_register,
+#endif
+};
+
+/*
+ * i2c_driver function
+ */
+
+static int tw9910_probe(struct i2c_client *client,
+			const struct i2c_device_id *did)
+
+{
+	struct tw9910_priv             *priv;
+	struct tw9910_video_info       *info;
+	struct soc_camera_device       *icd;
+	const struct tw9910_scale_ctrl *scale;
+	int                             i, ret;
+
+	info = client->dev.platform_data;
+	if (!info)
+		return -EINVAL;
+
+	if (!i2c_check_functionality(to_i2c_adapter(client->dev.parent),
+				     I2C_FUNC_SMBUS_BYTE_DATA)) {
+		dev_err(&client->dev,
+			"I2C-Adapter doesn't support "
+			"I2C_FUNC_SMBUS_BYTE_DATA\n");
+		return -EIO;
+	}
+
+	priv = kzalloc(sizeof(*priv), GFP_KERNEL);
+	if (!priv)
+		return -ENOMEM;
+
+	priv->info   = info;
+	priv->client = client;
+	i2c_set_clientdata(client, priv);
+
+	icd          = &priv->icd;
+	icd->ops     = &tw9910_ops;
+	icd->control = &client->dev;
+	icd->iface   = info->link.bus_id;
+
+	/*
+	 * set width and height
+	 */
+	icd->width_max  = tw9910_ntsc_scales[0].width; /* set default */
+	icd->width_min  = tw9910_ntsc_scales[0].width;
+	icd->height_max = tw9910_ntsc_scales[0].height;
+	icd->height_min = tw9910_ntsc_scales[0].height;
+
+	scale = tw9910_ntsc_scales;
+	for (i = 0; i < ARRAY_SIZE(tw9910_ntsc_scales); i++) {
+		icd->width_max  = max(scale[i].width,  icd->width_max);
+		icd->width_min  = min(scale[i].width,  icd->width_min);
+		icd->height_max = max(scale[i].height, icd->height_max);
+		icd->height_min = min(scale[i].height, icd->height_min);
+	}
+	scale = tw9910_pal_scales;
+	for (i = 0; i < ARRAY_SIZE(tw9910_pal_scales); i++) {
+		icd->width_max  = max(scale[i].width,  icd->width_max);
+		icd->width_min  = min(scale[i].width,  icd->width_min);
+		icd->height_max = max(scale[i].height, icd->height_max);
+		icd->height_min = min(scale[i].height, icd->height_min);
+	}
+
+	ret = soc_camera_device_register(icd);
+
+	if (ret) {
+		i2c_set_clientdata(client, NULL);
+		kfree(priv);
+	}
+
+	return ret;
+}
+
+static int tw9910_remove(struct i2c_client *client)
+{
+	struct tw9910_priv *priv = i2c_get_clientdata(client);
+
+	soc_camera_device_unregister(&priv->icd);
+	i2c_set_clientdata(client, NULL);
+	kfree(priv);
+	return 0;
+}
+
+static const struct i2c_device_id tw9910_id[] = {
+	{ "tw9910", 0 },
+	{ }
+};
+MODULE_DEVICE_TABLE(i2c, tw9910_id);
+
+static struct i2c_driver tw9910_i2c_driver = {
+	.driver = {
+		.name = "tw9910",
+	},
+	.probe    = tw9910_probe,
+	.remove   = tw9910_remove,
+	.id_table = tw9910_id,
+};
+
+/*
+ * module function
+ */
+static int __init tw9910_module_init(void)
+{
+	return i2c_add_driver(&tw9910_i2c_driver);
+}
+
+static void __exit tw9910_module_exit(void)
+{
+	i2c_del_driver(&tw9910_i2c_driver);
+}
+
+module_init(tw9910_module_init);
+module_exit(tw9910_module_exit);
+
+MODULE_DESCRIPTION("SoC Camera driver for tw9910");
+MODULE_AUTHOR("Kuninori Morimoto");
+MODULE_LICENSE("GPL v2");
diff --git a/include/media/tw9910.h b/include/media/tw9910.h
new file mode 100644
index 000000000000..73231e7880d8
--- /dev/null
+++ b/include/media/tw9910.h
@@ -0,0 +1,39 @@
+/*
+ * tw9910 Driver header
+ *
+ * Copyright (C) 2008 Renesas Solutions Corp.
+ * Kuninori Morimoto <morimoto.kuninori@renesas.com>
+ *
+ * Based on ov772x.h
+ *
+ * Copyright (C) Kuninori Morimoto <morimoto.kuninori@renesas.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#ifndef __TW9910_H__
+#define __TW9910_H__
+
+#include <media/soc_camera.h>
+
+enum tw9910_mpout_pin {
+	TW9910_MPO_VLOSS,
+	TW9910_MPO_HLOCK,
+	TW9910_MPO_SLOCK,
+	TW9910_MPO_VLOCK,
+	TW9910_MPO_MONO,
+	TW9910_MPO_DET50,
+	TW9910_MPO_FIELD,
+	TW9910_MPO_RTCO,
+};
+
+struct tw9910_video_info {
+	unsigned long          buswidth;
+	enum tw9910_mpout_pin  mpout;
+	struct soc_camera_link link;
+};
+
+
+#endif /* __TW9910_H__ */
diff --git a/include/media/v4l2-chip-ident.h b/include/media/v4l2-chip-ident.h
index 30448cd51751..f9d42ad99b4f 100644
--- a/include/media/v4l2-chip-ident.h
+++ b/include/media/v4l2-chip-ident.h
@@ -87,6 +87,9 @@ enum {
 	/* module wm8775: just ident 8775 */
 	V4L2_IDENT_WM8775 = 8775,
 
+	/* module tw9910: just ident 9910 */
+	V4L2_IDENT_TW9910 = 9910,
+
 	/* module cs53132a: just ident 53132 */
 	V4L2_IDENT_CS53l32A = 53132,
 
-- 
cgit v1.2.3


From 4e96fd088cf6fb95ba4b212e5e72bac1e6d34e79 Mon Sep 17 00:00:00 2001
From: Guennadi Liakhovetski <lg@denx.de>
Date: Mon, 29 Dec 2008 06:04:59 -0300
Subject: V4L/DVB (10099): soc-camera: add support for MT9T031 CMOS camera
 sensor from Micron

This camera is rather similar to MT9M001, but also has a couple of
enhanced features, like pixel binning.

 create mode 100644 drivers/media/video/mt9t031.c

Signed-off-by: Guennadi Liakhovetski <lg@denx.de>
Acked-by: Jean Delvare <khali@linux-fr.org>
Signed-off-by: Mauro Carvalho Chehab <mchehab@redhat.com>
---
 drivers/media/video/Kconfig     |   6 +
 drivers/media/video/Makefile    |   1 +
 drivers/media/video/mt9t031.c   | 736 ++++++++++++++++++++++++++++++++++++++++
 include/media/v4l2-chip-ident.h |   1 +
 4 files changed, 744 insertions(+)
 create mode 100644 drivers/media/video/mt9t031.c

(limited to 'include')

diff --git a/drivers/media/video/Kconfig b/drivers/media/video/Kconfig
index 9785eeb1ed44..19cf3b8f67c4 100644
--- a/drivers/media/video/Kconfig
+++ b/drivers/media/video/Kconfig
@@ -741,6 +741,12 @@ config SOC_CAMERA_MT9M111
 	help
 	  This driver supports MT9M111 and MT9M112 cameras from Micron
 
+config SOC_CAMERA_MT9T031
+	tristate "mt9t031 support"
+	depends on SOC_CAMERA && I2C
+	help
+	  This driver supports MT9T031 cameras from Micron.
+
 config SOC_CAMERA_MT9V022
 	tristate "mt9v022 support"
 	depends on SOC_CAMERA && I2C
diff --git a/drivers/media/video/Makefile b/drivers/media/video/Makefile
index 40c69afcde7a..1611c33b1aee 100644
--- a/drivers/media/video/Makefile
+++ b/drivers/media/video/Makefile
@@ -137,6 +137,7 @@ obj-$(CONFIG_VIDEO_OMAP2)		+= omap2cam.o
 obj-$(CONFIG_SOC_CAMERA)	+= soc_camera.o
 obj-$(CONFIG_SOC_CAMERA_MT9M001)	+= mt9m001.o
 obj-$(CONFIG_SOC_CAMERA_MT9M111)	+= mt9m111.o
+obj-$(CONFIG_SOC_CAMERA_MT9T031)	+= mt9t031.o
 obj-$(CONFIG_SOC_CAMERA_MT9V022)	+= mt9v022.o
 obj-$(CONFIG_SOC_CAMERA_OV772X)		+= ov772x.o
 obj-$(CONFIG_SOC_CAMERA_PLATFORM)	+= soc_camera_platform.o
diff --git a/drivers/media/video/mt9t031.c b/drivers/media/video/mt9t031.c
new file mode 100644
index 000000000000..1a9d53966d06
--- /dev/null
+++ b/drivers/media/video/mt9t031.c
@@ -0,0 +1,736 @@
+/*
+ * Driver for MT9T031 CMOS Image Sensor from Micron
+ *
+ * Copyright (C) 2008, Guennadi Liakhovetski, DENX Software Engineering <lg@denx.de>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/videodev2.h>
+#include <linux/slab.h>
+#include <linux/i2c.h>
+#include <linux/log2.h>
+
+#include <media/v4l2-common.h>
+#include <media/v4l2-chip-ident.h>
+#include <media/soc_camera.h>
+
+/* mt9t031 i2c address 0x5d
+ * The platform has to define i2c_board_info
+ * and call i2c_register_board_info() */
+
+/* mt9t031 selected register addresses */
+#define MT9T031_CHIP_VERSION		0x00
+#define MT9T031_ROW_START		0x01
+#define MT9T031_COLUMN_START		0x02
+#define MT9T031_WINDOW_HEIGHT		0x03
+#define MT9T031_WINDOW_WIDTH		0x04
+#define MT9T031_HORIZONTAL_BLANKING	0x05
+#define MT9T031_VERTICAL_BLANKING	0x06
+#define MT9T031_OUTPUT_CONTROL		0x07
+#define MT9T031_SHUTTER_WIDTH_UPPER	0x08
+#define MT9T031_SHUTTER_WIDTH		0x09
+#define MT9T031_PIXEL_CLOCK_CONTROL	0x0a
+#define MT9T031_FRAME_RESTART		0x0b
+#define MT9T031_SHUTTER_DELAY		0x0c
+#define MT9T031_RESET			0x0d
+#define MT9T031_READ_MODE_1		0x1e
+#define MT9T031_READ_MODE_2		0x20
+#define MT9T031_READ_MODE_3		0x21
+#define MT9T031_ROW_ADDRESS_MODE	0x22
+#define MT9T031_COLUMN_ADDRESS_MODE	0x23
+#define MT9T031_GLOBAL_GAIN		0x35
+#define MT9T031_CHIP_ENABLE		0xF8
+
+#define MT9T031_MAX_HEIGHT		1536
+#define MT9T031_MAX_WIDTH		2048
+#define MT9T031_MIN_HEIGHT		2
+#define MT9T031_MIN_WIDTH		2
+#define MT9T031_HORIZONTAL_BLANK	142
+#define MT9T031_VERTICAL_BLANK		25
+#define MT9T031_COLUMN_SKIP		32
+#define MT9T031_ROW_SKIP		20
+
+#define MT9T031_BUS_PARAM	(SOCAM_PCLK_SAMPLE_RISING |	\
+	SOCAM_PCLK_SAMPLE_FALLING | SOCAM_HSYNC_ACTIVE_HIGH |	\
+	SOCAM_VSYNC_ACTIVE_HIGH | SOCAM_DATA_ACTIVE_HIGH |	\
+	SOCAM_MASTER | SOCAM_DATAWIDTH_10)
+
+static const struct soc_camera_data_format mt9t031_colour_formats[] = {
+	{
+		.name		= "Bayer (sRGB) 10 bit",
+		.depth		= 10,
+		.fourcc		= V4L2_PIX_FMT_SGRBG10,
+		.colorspace	= V4L2_COLORSPACE_SRGB,
+	}
+};
+
+struct mt9t031 {
+	struct i2c_client *client;
+	struct soc_camera_device icd;
+	int model;	/* V4L2_IDENT_MT9T031* codes from v4l2-chip-ident.h */
+	unsigned char autoexposure;
+	u16 xskip;
+	u16 yskip;
+};
+
+static int reg_read(struct soc_camera_device *icd, const u8 reg)
+{
+	struct mt9t031 *mt9t031 = container_of(icd, struct mt9t031, icd);
+	struct i2c_client *client = mt9t031->client;
+	s32 data = i2c_smbus_read_word_data(client, reg);
+	return data < 0 ? data : swab16(data);
+}
+
+static int reg_write(struct soc_camera_device *icd, const u8 reg,
+		     const u16 data)
+{
+	struct mt9t031 *mt9t031 = container_of(icd, struct mt9t031, icd);
+	return i2c_smbus_write_word_data(mt9t031->client, reg, swab16(data));
+}
+
+static int reg_set(struct soc_camera_device *icd, const u8 reg,
+		   const u16 data)
+{
+	int ret;
+
+	ret = reg_read(icd, reg);
+	if (ret < 0)
+		return ret;
+	return reg_write(icd, reg, ret | data);
+}
+
+static int reg_clear(struct soc_camera_device *icd, const u8 reg,
+		     const u16 data)
+{
+	int ret;
+
+	ret = reg_read(icd, reg);
+	if (ret < 0)
+		return ret;
+	return reg_write(icd, reg, ret & ~data);
+}
+
+static int set_shutter(struct soc_camera_device *icd, const u32 data)
+{
+	int ret;
+
+	ret = reg_write(icd, MT9T031_SHUTTER_WIDTH_UPPER, data >> 16);
+
+	if (ret >= 0)
+		ret = reg_write(icd, MT9T031_SHUTTER_WIDTH, data & 0xffff);
+
+	return ret;
+}
+
+static int get_shutter(struct soc_camera_device *icd, u32 *data)
+{
+	int ret;
+
+	ret = reg_read(icd, MT9T031_SHUTTER_WIDTH_UPPER);
+	*data = ret << 16;
+
+	if (ret >= 0)
+		ret = reg_read(icd, MT9T031_SHUTTER_WIDTH);
+	*data |= ret & 0xffff;
+
+	return ret < 0 ? ret : 0;
+}
+
+static int mt9t031_init(struct soc_camera_device *icd)
+{
+	int ret;
+
+	/* Disable chip output, synchronous option update */
+	dev_dbg(icd->vdev->parent, "%s\n", __func__);
+
+	ret = reg_write(icd, MT9T031_RESET, 1);
+	if (ret >= 0)
+		ret = reg_write(icd, MT9T031_RESET, 0);
+	if (ret >= 0)
+		ret = reg_clear(icd, MT9T031_OUTPUT_CONTROL, 3);
+
+	return ret >= 0 ? 0 : -EIO;
+}
+
+static int mt9t031_release(struct soc_camera_device *icd)
+{
+	/* Disable the chip */
+	reg_clear(icd, MT9T031_OUTPUT_CONTROL, 3);
+	return 0;
+}
+
+static int mt9t031_start_capture(struct soc_camera_device *icd)
+{
+	/* Switch to master "normal" mode */
+	if (reg_set(icd, MT9T031_OUTPUT_CONTROL, 3) < 0)
+		return -EIO;
+	return 0;
+}
+
+static int mt9t031_stop_capture(struct soc_camera_device *icd)
+{
+	/* Stop sensor readout */
+	if (reg_clear(icd, MT9T031_OUTPUT_CONTROL, 3) < 0)
+		return -EIO;
+	return 0;
+}
+
+static int mt9t031_set_bus_param(struct soc_camera_device *icd,
+				 unsigned long flags)
+{
+	/* The caller should have queried our parameters, check anyway */
+	if (flags & ~MT9T031_BUS_PARAM)
+		return -EINVAL;
+
+	if (flags & SOCAM_PCLK_SAMPLE_FALLING)
+		reg_set(icd, MT9T031_PIXEL_CLOCK_CONTROL, 0x8000);
+	else
+		reg_clear(icd, MT9T031_PIXEL_CLOCK_CONTROL, 0x8000);
+
+	return 0;
+}
+
+static unsigned long mt9t031_query_bus_param(struct soc_camera_device *icd)
+{
+	struct mt9t031 *mt9t031 = container_of(icd, struct mt9t031, icd);
+	struct soc_camera_link *icl = mt9t031->client->dev.platform_data;
+
+	return soc_camera_apply_sensor_flags(icl, MT9T031_BUS_PARAM);
+}
+
+static int mt9t031_set_fmt(struct soc_camera_device *icd,
+			   __u32 pixfmt, struct v4l2_rect *rect)
+{
+	struct mt9t031 *mt9t031 = container_of(icd, struct mt9t031, icd);
+	int ret;
+	const u16 hblank = MT9T031_HORIZONTAL_BLANK,
+		vblank = MT9T031_VERTICAL_BLANK;
+	u16 xbin, xskip = mt9t031->xskip, ybin, yskip = mt9t031->yskip,
+		width = rect->width * xskip, height = rect->height * yskip;
+
+	if (pixfmt) {
+		/* S_FMT - use binning and skipping for scaling, recalculate */
+		/* Is this more optimal than just a division? */
+		for (xskip = 8; xskip > 1; xskip--)
+			if (rect->width * xskip <= icd->width_max)
+				break;
+
+		for (yskip = 8; yskip > 1; yskip--)
+			if (rect->height * yskip <= icd->height_max)
+				break;
+
+		width = rect->width * xskip;
+		height = rect->height * yskip;
+
+		dev_dbg(&icd->dev, "xskip %u, width %u, yskip %u, height %u\n",
+			xskip, width, yskip, height);
+	}
+
+	xbin = min(xskip, (u16)3);
+	ybin = min(yskip, (u16)3);
+
+	/* Make sure we don't exceed frame limits */
+	if (rect->left + width > icd->width_max)
+		rect->left = (icd->width_max - width) / 2;
+
+	if (rect->top + height > icd->height_max)
+		rect->top = (icd->height_max - height) / 2;
+
+	/* Could just do roundup(rect->left, [xy]bin); but this is cheaper */
+	switch (xbin) {
+	case 2:
+		rect->left = (rect->left + 1) & ~1;
+		break;
+	case 3:
+		rect->left = roundup(rect->left, 3);
+	}
+
+	switch (ybin) {
+	case 2:
+		rect->top = (rect->top + 1) & ~1;
+		break;
+	case 3:
+		rect->top = roundup(rect->top, 3);
+	}
+
+	/* Blanking and start values - default... */
+	ret = reg_write(icd, MT9T031_HORIZONTAL_BLANKING, hblank);
+	if (ret >= 0)
+		ret = reg_write(icd, MT9T031_VERTICAL_BLANKING, vblank);
+
+	if (pixfmt) {
+		/* Binning, skipping */
+		if (ret >= 0)
+			ret = reg_write(icd, MT9T031_COLUMN_ADDRESS_MODE,
+					((xbin - 1) << 4) | (xskip - 1));
+		if (ret >= 0)
+			ret = reg_write(icd, MT9T031_ROW_ADDRESS_MODE,
+					((ybin - 1) << 4) | (yskip - 1));
+	}
+	dev_dbg(&icd->dev, "new left %u, top %u\n", rect->left, rect->top);
+
+	/* The caller provides a supported format, as guaranteed by
+	 * icd->try_fmt_cap(), soc_camera_s_crop() and soc_camera_cropcap() */
+	if (ret >= 0)
+		ret = reg_write(icd, MT9T031_COLUMN_START, rect->left);
+	if (ret >= 0)
+		ret = reg_write(icd, MT9T031_ROW_START, rect->top);
+	if (ret >= 0)
+		ret = reg_write(icd, MT9T031_WINDOW_WIDTH, width - 1);
+	if (ret >= 0)
+		ret = reg_write(icd, MT9T031_WINDOW_HEIGHT,
+				height + icd->y_skip_top - 1);
+	if (ret >= 0 && mt9t031->autoexposure) {
+		ret = set_shutter(icd, height + icd->y_skip_top + vblank);
+		if (ret >= 0) {
+			const u32 shutter_max = MT9T031_MAX_HEIGHT + vblank;
+			const struct v4l2_queryctrl *qctrl =
+				soc_camera_find_qctrl(icd->ops,
+						      V4L2_CID_EXPOSURE);
+			icd->exposure = (shutter_max / 2 + (height +
+					 icd->y_skip_top + vblank - 1) *
+					 (qctrl->maximum - qctrl->minimum)) /
+				shutter_max + qctrl->minimum;
+		}
+	}
+
+	if (!ret && pixfmt) {
+		mt9t031->xskip = xskip;
+		mt9t031->yskip = yskip;
+	}
+
+	return ret < 0 ? ret : 0;
+}
+
+static int mt9t031_try_fmt(struct soc_camera_device *icd,
+			   struct v4l2_format *f)
+{
+	struct v4l2_pix_format *pix = &f->fmt.pix;
+
+	if (pix->height < icd->height_min)
+		pix->height = icd->height_min;
+	if (pix->height > icd->height_max)
+		pix->height = icd->height_max;
+	if (pix->width < icd->width_min)
+		pix->width = icd->width_min;
+	if (pix->width > icd->width_max)
+		pix->width = icd->width_max;
+
+	pix->width &= ~0x01; /* has to be even */
+	pix->height &= ~0x01; /* has to be even */
+
+	return 0;
+}
+
+static int mt9t031_get_chip_id(struct soc_camera_device *icd,
+			       struct v4l2_chip_ident *id)
+{
+	struct mt9t031 *mt9t031 = container_of(icd, struct mt9t031, icd);
+
+	if (id->match_type != V4L2_CHIP_MATCH_I2C_ADDR)
+		return -EINVAL;
+
+	if (id->match_chip != mt9t031->client->addr)
+		return -ENODEV;
+
+	id->ident	= mt9t031->model;
+	id->revision	= 0;
+
+	return 0;
+}
+
+#ifdef CONFIG_VIDEO_ADV_DEBUG
+static int mt9t031_get_register(struct soc_camera_device *icd,
+				struct v4l2_register *reg)
+{
+	struct mt9t031 *mt9t031 = container_of(icd, struct mt9t031, icd);
+
+	if (reg->match_type != V4L2_CHIP_MATCH_I2C_ADDR || reg->reg > 0xff)
+		return -EINVAL;
+
+	if (reg->match_chip != mt9t031->client->addr)
+		return -ENODEV;
+
+	reg->val = reg_read(icd, reg->reg);
+
+	if (reg->val > 0xffff)
+		return -EIO;
+
+	return 0;
+}
+
+static int mt9t031_set_register(struct soc_camera_device *icd,
+				struct v4l2_register *reg)
+{
+	struct mt9t031 *mt9t031 = container_of(icd, struct mt9t031, icd);
+
+	if (reg->match_type != V4L2_CHIP_MATCH_I2C_ADDR || reg->reg > 0xff)
+		return -EINVAL;
+
+	if (reg->match_chip != mt9t031->client->addr)
+		return -ENODEV;
+
+	if (reg_write(icd, reg->reg, reg->val) < 0)
+		return -EIO;
+
+	return 0;
+}
+#endif
+
+static const struct v4l2_queryctrl mt9t031_controls[] = {
+	{
+		.id		= V4L2_CID_VFLIP,
+		.type		= V4L2_CTRL_TYPE_BOOLEAN,
+		.name		= "Flip Vertically",
+		.minimum	= 0,
+		.maximum	= 1,
+		.step		= 1,
+		.default_value	= 0,
+	}, {
+		.id		= V4L2_CID_GAIN,
+		.type		= V4L2_CTRL_TYPE_INTEGER,
+		.name		= "Gain",
+		.minimum	= 0,
+		.maximum	= 127,
+		.step		= 1,
+		.default_value	= 64,
+		.flags		= V4L2_CTRL_FLAG_SLIDER,
+	}, {
+		.id		= V4L2_CID_EXPOSURE,
+		.type		= V4L2_CTRL_TYPE_INTEGER,
+		.name		= "Exposure",
+		.minimum	= 1,
+		.maximum	= 255,
+		.step		= 1,
+		.default_value	= 255,
+		.flags		= V4L2_CTRL_FLAG_SLIDER,
+	}, {
+		.id		= V4L2_CID_EXPOSURE_AUTO,
+		.type		= V4L2_CTRL_TYPE_BOOLEAN,
+		.name		= "Automatic Exposure",
+		.minimum	= 0,
+		.maximum	= 1,
+		.step		= 1,
+		.default_value	= 1,
+	}
+};
+
+static int mt9t031_video_probe(struct soc_camera_device *);
+static void mt9t031_video_remove(struct soc_camera_device *);
+static int mt9t031_get_control(struct soc_camera_device *, struct v4l2_control *);
+static int mt9t031_set_control(struct soc_camera_device *, struct v4l2_control *);
+
+static struct soc_camera_ops mt9t031_ops = {
+	.owner			= THIS_MODULE,
+	.probe			= mt9t031_video_probe,
+	.remove			= mt9t031_video_remove,
+	.init			= mt9t031_init,
+	.release		= mt9t031_release,
+	.start_capture		= mt9t031_start_capture,
+	.stop_capture		= mt9t031_stop_capture,
+	.set_fmt		= mt9t031_set_fmt,
+	.try_fmt		= mt9t031_try_fmt,
+	.set_bus_param		= mt9t031_set_bus_param,
+	.query_bus_param	= mt9t031_query_bus_param,
+	.controls		= mt9t031_controls,
+	.num_controls		= ARRAY_SIZE(mt9t031_controls),
+	.get_control		= mt9t031_get_control,
+	.set_control		= mt9t031_set_control,
+	.get_chip_id		= mt9t031_get_chip_id,
+#ifdef CONFIG_VIDEO_ADV_DEBUG
+	.get_register		= mt9t031_get_register,
+	.set_register		= mt9t031_set_register,
+#endif
+};
+
+static int mt9t031_get_control(struct soc_camera_device *icd, struct v4l2_control *ctrl)
+{
+	struct mt9t031 *mt9t031 = container_of(icd, struct mt9t031, icd);
+	int data;
+
+	switch (ctrl->id) {
+	case V4L2_CID_VFLIP:
+		data = reg_read(icd, MT9T031_READ_MODE_2);
+		if (data < 0)
+			return -EIO;
+		ctrl->value = !!(data & 0x8000);
+		break;
+	case V4L2_CID_HFLIP:
+		data = reg_read(icd, MT9T031_READ_MODE_2);
+		if (data < 0)
+			return -EIO;
+		ctrl->value = !!(data & 0x4000);
+		break;
+	case V4L2_CID_EXPOSURE_AUTO:
+		ctrl->value = mt9t031->autoexposure;
+		break;
+	}
+	return 0;
+}
+
+static int mt9t031_set_control(struct soc_camera_device *icd, struct v4l2_control *ctrl)
+{
+	struct mt9t031 *mt9t031 = container_of(icd, struct mt9t031, icd);
+	const struct v4l2_queryctrl *qctrl;
+	int data;
+
+	qctrl = soc_camera_find_qctrl(&mt9t031_ops, ctrl->id);
+
+	if (!qctrl)
+		return -EINVAL;
+
+	switch (ctrl->id) {
+	case V4L2_CID_VFLIP:
+		if (ctrl->value)
+			data = reg_set(icd, MT9T031_READ_MODE_2, 0x8000);
+		else
+			data = reg_clear(icd, MT9T031_READ_MODE_2, 0x8000);
+		if (data < 0)
+			return -EIO;
+		break;
+	case V4L2_CID_HFLIP:
+		if (ctrl->value)
+			data = reg_set(icd, MT9T031_READ_MODE_2, 0x4000);
+		else
+			data = reg_clear(icd, MT9T031_READ_MODE_2, 0x4000);
+		if (data < 0)
+			return -EIO;
+		break;
+	case V4L2_CID_GAIN:
+		if (ctrl->value > qctrl->maximum || ctrl->value < qctrl->minimum)
+			return -EINVAL;
+		/* See Datasheet Table 7, Gain settings. */
+		if (ctrl->value <= qctrl->default_value) {
+			/* Pack it into 0..1 step 0.125, register values 0..8 */
+			unsigned long range = qctrl->default_value - qctrl->minimum;
+			data = ((ctrl->value - qctrl->minimum) * 8 + range / 2) / range;
+
+			dev_dbg(&icd->dev, "Setting gain %d\n", data);
+			data = reg_write(icd, MT9T031_GLOBAL_GAIN, data);
+			if (data < 0)
+				return -EIO;
+		} else {
+			/* Pack it into 1.125..15 variable step, register values 9..67 */
+			/* We assume qctrl->maximum - qctrl->default_value - 1 > 0 */
+			unsigned long range = qctrl->maximum - qctrl->default_value - 1;
+			unsigned long gain = ((ctrl->value - qctrl->default_value - 1) *
+					       111 + range / 2) / range + 9;
+
+			if (gain <= 32)
+				data = gain;
+			else if (gain <= 64)
+				data = ((gain - 32) * 16 + 16) / 32 + 80;
+			else
+				data = ((gain - 64) * 7 + 28) / 56 + 96;
+
+			dev_dbg(&icd->dev, "Setting gain from %d to %d\n",
+				 reg_read(icd, MT9T031_GLOBAL_GAIN), data);
+			data = reg_write(icd, MT9T031_GLOBAL_GAIN, data);
+			if (data < 0)
+				return -EIO;
+		}
+
+		/* Success */
+		icd->gain = ctrl->value;
+		break;
+	case V4L2_CID_EXPOSURE:
+		/* mt9t031 has maximum == default */
+		if (ctrl->value > qctrl->maximum || ctrl->value < qctrl->minimum)
+			return -EINVAL;
+		else {
+			const unsigned long range = qctrl->maximum - qctrl->minimum;
+			const u32 shutter = ((ctrl->value - qctrl->minimum) * 1048 +
+					     range / 2) / range + 1;
+			u32 old;
+
+			get_shutter(icd, &old);
+			dev_dbg(&icd->dev, "Setting shutter width from %u to %u\n",
+				old, shutter);
+			if (set_shutter(icd, shutter) < 0)
+				return -EIO;
+			icd->exposure = ctrl->value;
+			mt9t031->autoexposure = 0;
+		}
+		break;
+	case V4L2_CID_EXPOSURE_AUTO:
+		if (ctrl->value) {
+			const u16 vblank = MT9T031_VERTICAL_BLANK;
+			const u32 shutter_max = MT9T031_MAX_HEIGHT + vblank;
+			if (set_shutter(icd, icd->height +
+					icd->y_skip_top + vblank) < 0)
+				return -EIO;
+			qctrl = soc_camera_find_qctrl(icd->ops, V4L2_CID_EXPOSURE);
+			icd->exposure = (shutter_max / 2 + (icd->height +
+					 icd->y_skip_top + vblank - 1) *
+					 (qctrl->maximum - qctrl->minimum)) /
+				shutter_max + qctrl->minimum;
+			mt9t031->autoexposure = 1;
+		} else
+			mt9t031->autoexposure = 0;
+		break;
+	}
+	return 0;
+}
+
+/* Interface active, can use i2c. If it fails, it can indeed mean, that
+ * this wasn't our capture interface, so, we wait for the right one */
+static int mt9t031_video_probe(struct soc_camera_device *icd)
+{
+	struct mt9t031 *mt9t031 = container_of(icd, struct mt9t031, icd);
+	s32 data;
+	int ret;
+
+	/* We must have a parent by now. And it cannot be a wrong one.
+	 * So this entire test is completely redundant. */
+	if (!icd->dev.parent ||
+	    to_soc_camera_host(icd->dev.parent)->nr != icd->iface)
+		return -ENODEV;
+
+	/* Enable the chip */
+	data = reg_write(icd, MT9T031_CHIP_ENABLE, 1);
+	dev_dbg(&icd->dev, "write: %d\n", data);
+
+	/* Read out the chip version register */
+	data = reg_read(icd, MT9T031_CHIP_VERSION);
+
+	switch (data) {
+	case 0x1621:
+		mt9t031->model = V4L2_IDENT_MT9T031;
+		icd->formats = mt9t031_colour_formats;
+		icd->num_formats = ARRAY_SIZE(mt9t031_colour_formats);
+		break;
+	default:
+		ret = -ENODEV;
+		dev_err(&icd->dev,
+			"No MT9T031 chip detected, register read %x\n", data);
+		goto ei2c;
+	}
+
+	dev_info(&icd->dev, "Detected a MT9T031 chip ID %x\n", data);
+
+	/* Now that we know the model, we can start video */
+	ret = soc_camera_video_start(icd);
+	if (ret)
+		goto evstart;
+
+	return 0;
+
+evstart:
+ei2c:
+	return ret;
+}
+
+static void mt9t031_video_remove(struct soc_camera_device *icd)
+{
+	struct mt9t031 *mt9t031 = container_of(icd, struct mt9t031, icd);
+
+	dev_dbg(&icd->dev, "Video %x removed: %p, %p\n", mt9t031->client->addr,
+		icd->dev.parent, icd->vdev);
+	soc_camera_video_stop(icd);
+}
+
+static int mt9t031_probe(struct i2c_client *client,
+			 const struct i2c_device_id *did)
+{
+	struct mt9t031 *mt9t031;
+	struct soc_camera_device *icd;
+	struct i2c_adapter *adapter = to_i2c_adapter(client->dev.parent);
+	struct soc_camera_link *icl = client->dev.platform_data;
+	int ret;
+
+	if (!icl) {
+		dev_err(&client->dev, "MT9T031 driver needs platform data\n");
+		return -EINVAL;
+	}
+
+	if (!i2c_check_functionality(adapter, I2C_FUNC_SMBUS_WORD_DATA)) {
+		dev_warn(&adapter->dev,
+			 "I2C-Adapter doesn't support I2C_FUNC_SMBUS_WORD\n");
+		return -EIO;
+	}
+
+	mt9t031 = kzalloc(sizeof(struct mt9t031), GFP_KERNEL);
+	if (!mt9t031)
+		return -ENOMEM;
+
+	mt9t031->client = client;
+	i2c_set_clientdata(client, mt9t031);
+
+	/* Second stage probe - when a capture adapter is there */
+	icd = &mt9t031->icd;
+	icd->ops	= &mt9t031_ops;
+	icd->control	= &client->dev;
+	icd->x_min	= MT9T031_COLUMN_SKIP;
+	icd->y_min	= MT9T031_ROW_SKIP;
+	icd->x_current	= icd->x_min;
+	icd->y_current	= icd->y_min;
+	icd->width_min	= MT9T031_MIN_WIDTH;
+	icd->width_max	= MT9T031_MAX_WIDTH;
+	icd->height_min	= MT9T031_MIN_HEIGHT;
+	icd->height_max	= MT9T031_MAX_HEIGHT;
+	icd->y_skip_top	= 0;
+	icd->iface	= icl->bus_id;
+	/* Simulated autoexposure. If enabled, we calculate shutter width
+	 * ourselves in the driver based on vertical blanking and frame width */
+	mt9t031->autoexposure = 1;
+
+	mt9t031->xskip = 1;
+	mt9t031->yskip = 1;
+
+	ret = soc_camera_device_register(icd);
+	if (ret)
+		goto eisdr;
+
+	return 0;
+
+eisdr:
+	i2c_set_clientdata(client, NULL);
+	kfree(mt9t031);
+	return ret;
+}
+
+static int mt9t031_remove(struct i2c_client *client)
+{
+	struct mt9t031 *mt9t031 = i2c_get_clientdata(client);
+
+	soc_camera_device_unregister(&mt9t031->icd);
+	i2c_set_clientdata(client, NULL);
+	kfree(mt9t031);
+
+	return 0;
+}
+
+static const struct i2c_device_id mt9t031_id[] = {
+	{ "mt9t031", 0 },
+	{ }
+};
+MODULE_DEVICE_TABLE(i2c, mt9t031_id);
+
+static struct i2c_driver mt9t031_i2c_driver = {
+	.driver = {
+		.name = "mt9t031",
+	},
+	.probe		= mt9t031_probe,
+	.remove		= mt9t031_remove,
+	.id_table	= mt9t031_id,
+};
+
+static int __init mt9t031_mod_init(void)
+{
+	return i2c_add_driver(&mt9t031_i2c_driver);
+}
+
+static void __exit mt9t031_mod_exit(void)
+{
+	i2c_del_driver(&mt9t031_i2c_driver);
+}
+
+module_init(mt9t031_mod_init);
+module_exit(mt9t031_mod_exit);
+
+MODULE_DESCRIPTION("Micron MT9T031 Camera driver");
+MODULE_AUTHOR("Guennadi Liakhovetski <lg@denx.de>");
+MODULE_LICENSE("GPL v2");
diff --git a/include/media/v4l2-chip-ident.h b/include/media/v4l2-chip-ident.h
index f9d42ad99b4f..43dbb659f1f5 100644
--- a/include/media/v4l2-chip-ident.h
+++ b/include/media/v4l2-chip-ident.h
@@ -177,6 +177,7 @@ enum {
 	V4L2_IDENT_MT9M112		= 45008,
 	V4L2_IDENT_MT9V022IX7ATC	= 45010, /* No way to detect "normal" I77ATx */
 	V4L2_IDENT_MT9V022IX7ATM	= 45015, /* and "lead free" IA7ATx chips */
+	V4L2_IDENT_MT9T031		= 45020,
 };
 
 #endif
-- 
cgit v1.2.3


From 92ab7886119da6375a983713eedab444e32094f6 Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab@redhat.com>
Date: Mon, 29 Dec 2008 19:15:43 -0300
Subject: V4L/DVB (10110): v4l2-ioctl: Fix warnings when using .unlocked_ioctl
 = __video_ioctl2

This patch fixes this warning:

drivers/media/video/gspca/gspca.c:1811: warning: initialization from incompatible pointer type

The reason is that the returned argument should be a long, not an
integer.

Signed-off-by: Mauro Carvalho Chehab <mchehab@redhat.com>
---
 drivers/media/video/v4l2-ioctl.c | 2 +-
 include/media/v4l2-ioctl.h       | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/drivers/media/video/v4l2-ioctl.c b/drivers/media/video/v4l2-ioctl.c
index bef4286933a1..b063381f4b3b 100644
--- a/drivers/media/video/v4l2-ioctl.c
+++ b/drivers/media/video/v4l2-ioctl.c
@@ -1852,7 +1852,7 @@ static int __video_do_ioctl(struct file *file,
 	return ret;
 }
 
-int __video_ioctl2(struct file *file,
+long __video_ioctl2(struct file *file,
 	       unsigned int cmd, unsigned long arg)
 {
 	char	sbuf[128];
diff --git a/include/media/v4l2-ioctl.h b/include/media/v4l2-ioctl.h
index c884432f9383..fcdb58c4ce07 100644
--- a/include/media/v4l2-ioctl.h
+++ b/include/media/v4l2-ioctl.h
@@ -297,7 +297,7 @@ extern int video_usercopy(struct file *file, unsigned int cmd,
 /* Standard handlers for V4L ioctl's */
 
 /* This prototype is used on fops.unlocked_ioctl */
-extern int __video_ioctl2(struct file *file,
+extern long __video_ioctl2(struct file *file,
 			unsigned int cmd, unsigned long arg);
 
 /* This prototype is used on fops.ioctl
-- 
cgit v1.2.3


From be6d3e56a6b9b3a4ee44a0685e39e595073c6f0d Mon Sep 17 00:00:00 2001
From: Kentaro Takeda <takedakn@nttdata.co.jp>
Date: Wed, 17 Dec 2008 13:24:15 +0900
Subject: introduce new LSM hooks where vfsmount is available.

Add new LSM hooks for path-based checks.  Call them on directory-modifying
operations at the points where we still know the vfsmount involved.

Signed-off-by: Kentaro Takeda <takedakn@nttdata.co.jp>
Signed-off-by: Tetsuo Handa <penguin-kernel@I-love.SAKURA.ne.jp>
Signed-off-by: Toshiharu Harada <haradats@nttdata.co.jp>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/namei.c               |  36 +++++++++++++
 fs/open.c                |   5 ++
 include/linux/security.h | 137 +++++++++++++++++++++++++++++++++++++++++++++++
 net/unix/af_unix.c       |   4 ++
 security/Kconfig         |   9 ++++
 security/capability.c    |  57 ++++++++++++++++++++
 security/security.c      |  66 +++++++++++++++++++++++
 7 files changed, 314 insertions(+)

(limited to 'include')

diff --git a/fs/namei.c b/fs/namei.c
index af3783fff1de..ab441af4196b 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -1556,6 +1556,9 @@ int may_open(struct nameidata *nd, int acc_mode, int flag)
 		 * Refuse to truncate files with mandatory locks held on them.
 		 */
 		error = locks_verify_locked(inode);
+		if (!error)
+			error = security_path_truncate(&nd->path, 0,
+					       ATTR_MTIME|ATTR_CTIME|ATTR_OPEN);
 		if (!error) {
 			DQUOT_INIT(inode);
 
@@ -1586,7 +1589,11 @@ static int __open_namei_create(struct nameidata *nd, struct path *path,
 
 	if (!IS_POSIXACL(dir->d_inode))
 		mode &= ~current->fs->umask;
+	error = security_path_mknod(&nd->path, path->dentry, mode, 0);
+	if (error)
+		goto out_unlock;
 	error = vfs_create(dir->d_inode, path->dentry, mode, nd);
+out_unlock:
 	mutex_unlock(&dir->d_inode->i_mutex);
 	dput(nd->path.dentry);
 	nd->path.dentry = path->dentry;
@@ -1999,6 +2006,9 @@ asmlinkage long sys_mknodat(int dfd, const char __user *filename, int mode,
 	error = mnt_want_write(nd.path.mnt);
 	if (error)
 		goto out_dput;
+	error = security_path_mknod(&nd.path, dentry, mode, dev);
+	if (error)
+		goto out_drop_write;
 	switch (mode & S_IFMT) {
 		case 0: case S_IFREG:
 			error = vfs_create(nd.path.dentry->d_inode,dentry,mode,&nd);
@@ -2011,6 +2021,7 @@ asmlinkage long sys_mknodat(int dfd, const char __user *filename, int mode,
 			error = vfs_mknod(nd.path.dentry->d_inode,dentry,mode,0);
 			break;
 	}
+out_drop_write:
 	mnt_drop_write(nd.path.mnt);
 out_dput:
 	dput(dentry);
@@ -2070,7 +2081,11 @@ asmlinkage long sys_mkdirat(int dfd, const char __user *pathname, int mode)
 	error = mnt_want_write(nd.path.mnt);
 	if (error)
 		goto out_dput;
+	error = security_path_mkdir(&nd.path, dentry, mode);
+	if (error)
+		goto out_drop_write;
 	error = vfs_mkdir(nd.path.dentry->d_inode, dentry, mode);
+out_drop_write:
 	mnt_drop_write(nd.path.mnt);
 out_dput:
 	dput(dentry);
@@ -2180,7 +2195,11 @@ static long do_rmdir(int dfd, const char __user *pathname)
 	error = mnt_want_write(nd.path.mnt);
 	if (error)
 		goto exit3;
+	error = security_path_rmdir(&nd.path, dentry);
+	if (error)
+		goto exit4;
 	error = vfs_rmdir(nd.path.dentry->d_inode, dentry);
+exit4:
 	mnt_drop_write(nd.path.mnt);
 exit3:
 	dput(dentry);
@@ -2265,7 +2284,11 @@ static long do_unlinkat(int dfd, const char __user *pathname)
 		error = mnt_want_write(nd.path.mnt);
 		if (error)
 			goto exit2;
+		error = security_path_unlink(&nd.path, dentry);
+		if (error)
+			goto exit3;
 		error = vfs_unlink(nd.path.dentry->d_inode, dentry);
+exit3:
 		mnt_drop_write(nd.path.mnt);
 	exit2:
 		dput(dentry);
@@ -2346,7 +2369,11 @@ asmlinkage long sys_symlinkat(const char __user *oldname,
 	error = mnt_want_write(nd.path.mnt);
 	if (error)
 		goto out_dput;
+	error = security_path_symlink(&nd.path, dentry, from);
+	if (error)
+		goto out_drop_write;
 	error = vfs_symlink(nd.path.dentry->d_inode, dentry, from);
+out_drop_write:
 	mnt_drop_write(nd.path.mnt);
 out_dput:
 	dput(dentry);
@@ -2443,7 +2470,11 @@ asmlinkage long sys_linkat(int olddfd, const char __user *oldname,
 	error = mnt_want_write(nd.path.mnt);
 	if (error)
 		goto out_dput;
+	error = security_path_link(old_path.dentry, &nd.path, new_dentry);
+	if (error)
+		goto out_drop_write;
 	error = vfs_link(old_path.dentry, nd.path.dentry->d_inode, new_dentry);
+out_drop_write:
 	mnt_drop_write(nd.path.mnt);
 out_dput:
 	dput(new_dentry);
@@ -2679,8 +2710,13 @@ asmlinkage long sys_renameat(int olddfd, const char __user *oldname,
 	error = mnt_want_write(oldnd.path.mnt);
 	if (error)
 		goto exit5;
+	error = security_path_rename(&oldnd.path, old_dentry,
+				     &newnd.path, new_dentry);
+	if (error)
+		goto exit6;
 	error = vfs_rename(old_dir->d_inode, old_dentry,
 				   new_dir->d_inode, new_dentry);
+exit6:
 	mnt_drop_write(oldnd.path.mnt);
 exit5:
 	dput(new_dentry);
diff --git a/fs/open.c b/fs/open.c
index c0a426d5766c..1cd7d40e9991 100644
--- a/fs/open.c
+++ b/fs/open.c
@@ -272,6 +272,8 @@ static long do_sys_truncate(const char __user *pathname, loff_t length)
 		goto put_write_and_out;
 
 	error = locks_verify_truncate(inode, NULL, length);
+	if (!error)
+		error = security_path_truncate(&path, length, 0);
 	if (!error) {
 		DQUOT_INIT(inode);
 		error = do_truncate(path.dentry, length, 0, NULL);
@@ -328,6 +330,9 @@ static long do_sys_ftruncate(unsigned int fd, loff_t length, int small)
 		goto out_putf;
 
 	error = locks_verify_truncate(inode, file, length);
+	if (!error)
+		error = security_path_truncate(&file->f_path, length,
+					       ATTR_MTIME|ATTR_CTIME);
 	if (!error)
 		error = do_truncate(dentry, length, ATTR_MTIME|ATTR_CTIME, file);
 out_putf:
diff --git a/include/linux/security.h b/include/linux/security.h
index 3416cb85e77b..b92b5e453f64 100644
--- a/include/linux/security.h
+++ b/include/linux/security.h
@@ -335,17 +335,37 @@ static inline void security_free_mnt_opts(struct security_mnt_opts *opts)
  *	@dir contains the inode structure of the parent directory of the new link.
  *	@new_dentry contains the dentry structure for the new link.
  *	Return 0 if permission is granted.
+ * @path_link:
+ *	Check permission before creating a new hard link to a file.
+ *	@old_dentry contains the dentry structure for an existing link
+ *	to the file.
+ *	@new_dir contains the path structure of the parent directory of
+ *	the new link.
+ *	@new_dentry contains the dentry structure for the new link.
+ *	Return 0 if permission is granted.
  * @inode_unlink:
  *	Check the permission to remove a hard link to a file.
  *	@dir contains the inode structure of parent directory of the file.
  *	@dentry contains the dentry structure for file to be unlinked.
  *	Return 0 if permission is granted.
+ * @path_unlink:
+ *	Check the permission to remove a hard link to a file.
+ *	@dir contains the path structure of parent directory of the file.
+ *	@dentry contains the dentry structure for file to be unlinked.
+ *	Return 0 if permission is granted.
  * @inode_symlink:
  *	Check the permission to create a symbolic link to a file.
  *	@dir contains the inode structure of parent directory of the symbolic link.
  *	@dentry contains the dentry structure of the symbolic link.
  *	@old_name contains the pathname of file.
  *	Return 0 if permission is granted.
+ * @path_symlink:
+ *	Check the permission to create a symbolic link to a file.
+ *	@dir contains the path structure of parent directory of
+ *	the symbolic link.
+ *	@dentry contains the dentry structure of the symbolic link.
+ *	@old_name contains the pathname of file.
+ *	Return 0 if permission is granted.
  * @inode_mkdir:
  *	Check permissions to create a new directory in the existing directory
  *	associated with inode strcture @dir.
@@ -353,11 +373,25 @@ static inline void security_free_mnt_opts(struct security_mnt_opts *opts)
  *	@dentry contains the dentry structure of new directory.
  *	@mode contains the mode of new directory.
  *	Return 0 if permission is granted.
+ * @path_mkdir:
+ *	Check permissions to create a new directory in the existing directory
+ *	associated with path strcture @path.
+ *	@dir containst the path structure of parent of the directory
+ *	to be created.
+ *	@dentry contains the dentry structure of new directory.
+ *	@mode contains the mode of new directory.
+ *	Return 0 if permission is granted.
  * @inode_rmdir:
  *	Check the permission to remove a directory.
  *	@dir contains the inode structure of parent of the directory to be removed.
  *	@dentry contains the dentry structure of directory to be removed.
  *	Return 0 if permission is granted.
+ * @path_rmdir:
+ *	Check the permission to remove a directory.
+ *	@dir contains the path structure of parent of the directory to be
+ *	removed.
+ *	@dentry contains the dentry structure of directory to be removed.
+ *	Return 0 if permission is granted.
  * @inode_mknod:
  *	Check permissions when creating a special file (or a socket or a fifo
  *	file created via the mknod system call).  Note that if mknod operation
@@ -368,6 +402,15 @@ static inline void security_free_mnt_opts(struct security_mnt_opts *opts)
  *	@mode contains the mode of the new file.
  *	@dev contains the device number.
  *	Return 0 if permission is granted.
+ * @path_mknod:
+ *	Check permissions when creating a file. Note that this hook is called
+ *	even if mknod operation is being done for a regular file.
+ *	@dir contains the path structure of parent of the new file.
+ *	@dentry contains the dentry structure of the new file.
+ *	@mode contains the mode of the new file.
+ *	@dev contains the undecoded device number. Use new_decode_dev() to get
+ *	the decoded device number.
+ *	Return 0 if permission is granted.
  * @inode_rename:
  *	Check for permission to rename a file or directory.
  *	@old_dir contains the inode structure for parent of the old link.
@@ -375,6 +418,13 @@ static inline void security_free_mnt_opts(struct security_mnt_opts *opts)
  *	@new_dir contains the inode structure for parent of the new link.
  *	@new_dentry contains the dentry structure of the new link.
  *	Return 0 if permission is granted.
+ * @path_rename:
+ *	Check for permission to rename a file or directory.
+ *	@old_dir contains the path structure for parent of the old link.
+ *	@old_dentry contains the dentry structure of the old link.
+ *	@new_dir contains the path structure for parent of the new link.
+ *	@new_dentry contains the dentry structure of the new link.
+ *	Return 0 if permission is granted.
  * @inode_readlink:
  *	Check the permission to read the symbolic link.
  *	@dentry contains the dentry structure for the file link.
@@ -403,6 +453,12 @@ static inline void security_free_mnt_opts(struct security_mnt_opts *opts)
  *	@dentry contains the dentry structure for the file.
  *	@attr is the iattr structure containing the new file attributes.
  *	Return 0 if permission is granted.
+ * @path_truncate:
+ *	Check permission before truncating a file.
+ *	@path contains the path structure for the file.
+ *	@length is the new length of the file.
+ *	@time_attrs is the flags passed to do_truncate().
+ *	Return 0 if permission is granted.
  * @inode_getattr:
  *	Check permission before obtaining file attributes.
  *	@mnt is the vfsmount where the dentry was looked up
@@ -1331,6 +1387,22 @@ struct security_operations {
 				   struct super_block *newsb);
 	int (*sb_parse_opts_str) (char *options, struct security_mnt_opts *opts);
 
+#ifdef CONFIG_SECURITY_PATH
+	int (*path_unlink) (struct path *dir, struct dentry *dentry);
+	int (*path_mkdir) (struct path *dir, struct dentry *dentry, int mode);
+	int (*path_rmdir) (struct path *dir, struct dentry *dentry);
+	int (*path_mknod) (struct path *dir, struct dentry *dentry, int mode,
+			   unsigned int dev);
+	int (*path_truncate) (struct path *path, loff_t length,
+			      unsigned int time_attrs);
+	int (*path_symlink) (struct path *dir, struct dentry *dentry,
+			     const char *old_name);
+	int (*path_link) (struct dentry *old_dentry, struct path *new_dir,
+			  struct dentry *new_dentry);
+	int (*path_rename) (struct path *old_dir, struct dentry *old_dentry,
+			    struct path *new_dir, struct dentry *new_dentry);
+#endif
+
 	int (*inode_alloc_security) (struct inode *inode);
 	void (*inode_free_security) (struct inode *inode);
 	int (*inode_init_security) (struct inode *inode, struct inode *dir,
@@ -2705,6 +2777,71 @@ static inline void security_skb_classify_flow(struct sk_buff *skb, struct flowi
 
 #endif	/* CONFIG_SECURITY_NETWORK_XFRM */
 
+#ifdef CONFIG_SECURITY_PATH
+int security_path_unlink(struct path *dir, struct dentry *dentry);
+int security_path_mkdir(struct path *dir, struct dentry *dentry, int mode);
+int security_path_rmdir(struct path *dir, struct dentry *dentry);
+int security_path_mknod(struct path *dir, struct dentry *dentry, int mode,
+			unsigned int dev);
+int security_path_truncate(struct path *path, loff_t length,
+			   unsigned int time_attrs);
+int security_path_symlink(struct path *dir, struct dentry *dentry,
+			  const char *old_name);
+int security_path_link(struct dentry *old_dentry, struct path *new_dir,
+		       struct dentry *new_dentry);
+int security_path_rename(struct path *old_dir, struct dentry *old_dentry,
+			 struct path *new_dir, struct dentry *new_dentry);
+#else	/* CONFIG_SECURITY_PATH */
+static inline int security_path_unlink(struct path *dir, struct dentry *dentry)
+{
+	return 0;
+}
+
+static inline int security_path_mkdir(struct path *dir, struct dentry *dentry,
+				      int mode)
+{
+	return 0;
+}
+
+static inline int security_path_rmdir(struct path *dir, struct dentry *dentry)
+{
+	return 0;
+}
+
+static inline int security_path_mknod(struct path *dir, struct dentry *dentry,
+				      int mode, unsigned int dev)
+{
+	return 0;
+}
+
+static inline int security_path_truncate(struct path *path, loff_t length,
+					 unsigned int time_attrs)
+{
+	return 0;
+}
+
+static inline int security_path_symlink(struct path *dir, struct dentry *dentry,
+					const char *old_name)
+{
+	return 0;
+}
+
+static inline int security_path_link(struct dentry *old_dentry,
+				     struct path *new_dir,
+				     struct dentry *new_dentry)
+{
+	return 0;
+}
+
+static inline int security_path_rename(struct path *old_dir,
+				       struct dentry *old_dentry,
+				       struct path *new_dir,
+				       struct dentry *new_dentry)
+{
+	return 0;
+}
+#endif	/* CONFIG_SECURITY_PATH */
+
 #ifdef CONFIG_KEYS
 #ifdef CONFIG_SECURITY
 
diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c
index c6250d0055d2..d1b89820ab4f 100644
--- a/net/unix/af_unix.c
+++ b/net/unix/af_unix.c
@@ -836,7 +836,11 @@ static int unix_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
 		err = mnt_want_write(nd.path.mnt);
 		if (err)
 			goto out_mknod_dput;
+		err = security_path_mknod(&nd.path, dentry, mode, 0);
+		if (err)
+			goto out_mknod_drop_write;
 		err = vfs_mknod(nd.path.dentry->d_inode, dentry, mode, 0);
+out_mknod_drop_write:
 		mnt_drop_write(nd.path.mnt);
 		if (err)
 			goto out_mknod_dput;
diff --git a/security/Kconfig b/security/Kconfig
index d9f47ce7e207..9438535d7fd0 100644
--- a/security/Kconfig
+++ b/security/Kconfig
@@ -81,6 +81,15 @@ config SECURITY_NETWORK_XFRM
 	  IPSec.
 	  If you are unsure how to answer this question, answer N.
 
+config SECURITY_PATH
+	bool "Security hooks for pathname based access control"
+	depends on SECURITY
+	help
+	  This enables the security hooks for pathname based access control.
+	  If enabled, a security module can use these hooks to
+	  implement pathname based access controls.
+	  If you are unsure how to answer this question, answer N.
+
 config SECURITY_FILE_CAPABILITIES
 	bool "File POSIX Capabilities"
 	default n
diff --git a/security/capability.c b/security/capability.c
index 2dce66fcb992..c545bd1300b5 100644
--- a/security/capability.c
+++ b/security/capability.c
@@ -263,6 +263,53 @@ static void cap_inode_getsecid(const struct inode *inode, u32 *secid)
 	*secid = 0;
 }
 
+#ifdef CONFIG_SECURITY_PATH
+static int cap_path_mknod(struct path *dir, struct dentry *dentry, int mode,
+			  unsigned int dev)
+{
+	return 0;
+}
+
+static int cap_path_mkdir(struct path *dir, struct dentry *dentry, int mode)
+{
+	return 0;
+}
+
+static int cap_path_rmdir(struct path *dir, struct dentry *dentry)
+{
+	return 0;
+}
+
+static int cap_path_unlink(struct path *dir, struct dentry *dentry)
+{
+	return 0;
+}
+
+static int cap_path_symlink(struct path *dir, struct dentry *dentry,
+			    const char *old_name)
+{
+	return 0;
+}
+
+static int cap_path_link(struct dentry *old_dentry, struct path *new_dir,
+			 struct dentry *new_dentry)
+{
+	return 0;
+}
+
+static int cap_path_rename(struct path *old_path, struct dentry *old_dentry,
+			   struct path *new_path, struct dentry *new_dentry)
+{
+	return 0;
+}
+
+static int cap_path_truncate(struct path *path, loff_t length,
+			     unsigned int time_attrs)
+{
+	return 0;
+}
+#endif
+
 static int cap_file_permission(struct file *file, int mask)
 {
 	return 0;
@@ -883,6 +930,16 @@ void security_fixup_ops(struct security_operations *ops)
 	set_to_cap_if_null(ops, inode_setsecurity);
 	set_to_cap_if_null(ops, inode_listsecurity);
 	set_to_cap_if_null(ops, inode_getsecid);
+#ifdef CONFIG_SECURITY_PATH
+	set_to_cap_if_null(ops, path_mknod);
+	set_to_cap_if_null(ops, path_mkdir);
+	set_to_cap_if_null(ops, path_rmdir);
+	set_to_cap_if_null(ops, path_unlink);
+	set_to_cap_if_null(ops, path_symlink);
+	set_to_cap_if_null(ops, path_link);
+	set_to_cap_if_null(ops, path_rename);
+	set_to_cap_if_null(ops, path_truncate);
+#endif
 	set_to_cap_if_null(ops, file_permission);
 	set_to_cap_if_null(ops, file_alloc_security);
 	set_to_cap_if_null(ops, file_free_security);
diff --git a/security/security.c b/security/security.c
index d85dbb37c972..678d4d07b852 100644
--- a/security/security.c
+++ b/security/security.c
@@ -355,6 +355,72 @@ int security_inode_init_security(struct inode *inode, struct inode *dir,
 }
 EXPORT_SYMBOL(security_inode_init_security);
 
+#ifdef CONFIG_SECURITY_PATH
+int security_path_mknod(struct path *path, struct dentry *dentry, int mode,
+			unsigned int dev)
+{
+	if (unlikely(IS_PRIVATE(path->dentry->d_inode)))
+		return 0;
+	return security_ops->path_mknod(path, dentry, mode, dev);
+}
+EXPORT_SYMBOL(security_path_mknod);
+
+int security_path_mkdir(struct path *path, struct dentry *dentry, int mode)
+{
+	if (unlikely(IS_PRIVATE(path->dentry->d_inode)))
+		return 0;
+	return security_ops->path_mkdir(path, dentry, mode);
+}
+
+int security_path_rmdir(struct path *path, struct dentry *dentry)
+{
+	if (unlikely(IS_PRIVATE(path->dentry->d_inode)))
+		return 0;
+	return security_ops->path_rmdir(path, dentry);
+}
+
+int security_path_unlink(struct path *path, struct dentry *dentry)
+{
+	if (unlikely(IS_PRIVATE(path->dentry->d_inode)))
+		return 0;
+	return security_ops->path_unlink(path, dentry);
+}
+
+int security_path_symlink(struct path *path, struct dentry *dentry,
+			  const char *old_name)
+{
+	if (unlikely(IS_PRIVATE(path->dentry->d_inode)))
+		return 0;
+	return security_ops->path_symlink(path, dentry, old_name);
+}
+
+int security_path_link(struct dentry *old_dentry, struct path *new_dir,
+		       struct dentry *new_dentry)
+{
+	if (unlikely(IS_PRIVATE(old_dentry->d_inode)))
+		return 0;
+	return security_ops->path_link(old_dentry, new_dir, new_dentry);
+}
+
+int security_path_rename(struct path *old_dir, struct dentry *old_dentry,
+			 struct path *new_dir, struct dentry *new_dentry)
+{
+	if (unlikely(IS_PRIVATE(old_dentry->d_inode) ||
+		     (new_dentry->d_inode && IS_PRIVATE(new_dentry->d_inode))))
+		return 0;
+	return security_ops->path_rename(old_dir, old_dentry, new_dir,
+					 new_dentry);
+}
+
+int security_path_truncate(struct path *path, loff_t length,
+			   unsigned int time_attrs)
+{
+	if (unlikely(IS_PRIVATE(path->dentry->d_inode)))
+		return 0;
+	return security_ops->path_truncate(path, length, time_attrs);
+}
+#endif
+
 int security_inode_create(struct inode *dir, struct dentry *dentry, int mode)
 {
 	if (unlikely(IS_PRIVATE(dir)))
-- 
cgit v1.2.3


From c2452f32786159ed85f0e4b21fec09258f822fc8 Mon Sep 17 00:00:00 2001
From: Nick Piggin <npiggin@suse.de>
Date: Mon, 1 Dec 2008 09:33:43 +0100
Subject: shrink struct dentry

struct dentry is one of the most critical structures in the kernel. So it's
sad to see it going neglected.

With CONFIG_PROFILING turned on (which is probably the common case at least
for distros and kernel developers), sizeof(struct dcache) == 208 here
(64-bit). This gives 19 objects per slab.

I packed d_mounted into a hole, and took another 4 bytes off the inline
name length to take the padding out from the end of the structure. This
shinks it to 200 bytes. I could have gone the other way and increased the
length to 40, but I'm aiming for a magic number, read on...

I then got rid of the d_cookie pointer. This shrinks it to 192 bytes. Rant:
why was this ever a good idea? The cookie system should increase its hash
size or use a tree or something if lookups are a problem. Also the "fast
dcookie lookups" in oprofile should be moved into the dcookie code -- how
can oprofile possibly care about the dcookie_mutex? It gets dropped after
get_dcookie() returns so it can't be providing any sort of protection.

At 192 bytes, 21 objects fit into a 4K page, saving about 3MB on my system
with ~140 000 entries allocated. 192 is also a multiple of 64, so we get
nice cacheline alignment on 64 and 32 byte line systems -- any given dentry
will now require 3 cachelines to touch all fields wheras previously it
would require 4.

I know the inline name size was chosen quite carefully, however with the
reduction in cacheline footprint, it should actually be just about as fast
to do a name lookup for a 36 character name as it was before the patch (and
faster for other sizes). The memory footprint savings for names which are
<= 32 or > 36 bytes long should more than make up for the memory cost for
33-36 byte names.

Performance is a feature...

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 arch/powerpc/oprofile/cell/spu_task_sync.c |  2 +-
 drivers/oprofile/buffer_sync.c             |  2 +-
 fs/dcache.c                                |  4 ----
 fs/dcookies.c                              | 28 +++++++++++++++++++---------
 include/linux/dcache.h                     | 21 ++++++++++++++-------
 5 files changed, 35 insertions(+), 22 deletions(-)

(limited to 'include')

diff --git a/arch/powerpc/oprofile/cell/spu_task_sync.c b/arch/powerpc/oprofile/cell/spu_task_sync.c
index 2949126d28d1..6b793aeda72e 100644
--- a/arch/powerpc/oprofile/cell/spu_task_sync.c
+++ b/arch/powerpc/oprofile/cell/spu_task_sync.c
@@ -297,7 +297,7 @@ static inline unsigned long fast_get_dcookie(struct path *path)
 {
 	unsigned long cookie;
 
-	if (path->dentry->d_cookie)
+	if (path->dentry->d_flags & DCACHE_COOKIE)
 		return (unsigned long)path->dentry;
 	get_dcookie(path, &cookie);
 	return cookie;
diff --git a/drivers/oprofile/buffer_sync.c b/drivers/oprofile/buffer_sync.c
index 737bd9484822..65e8294a9e29 100644
--- a/drivers/oprofile/buffer_sync.c
+++ b/drivers/oprofile/buffer_sync.c
@@ -200,7 +200,7 @@ static inline unsigned long fast_get_dcookie(struct path *path)
 {
 	unsigned long cookie;
 
-	if (path->dentry->d_cookie)
+	if (path->dentry->d_flags & DCACHE_COOKIE)
 		return (unsigned long)path->dentry;
 	get_dcookie(path, &cookie);
 	return cookie;
diff --git a/fs/dcache.c b/fs/dcache.c
index a1d86c7f3e66..fd244c7a7cc0 100644
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -34,7 +34,6 @@
 #include <linux/bootmem.h>
 #include "internal.h"
 
-
 int sysctl_vfs_cache_pressure __read_mostly = 100;
 EXPORT_SYMBOL_GPL(sysctl_vfs_cache_pressure);
 
@@ -948,9 +947,6 @@ struct dentry *d_alloc(struct dentry * parent, const struct qstr *name)
 	dentry->d_op = NULL;
 	dentry->d_fsdata = NULL;
 	dentry->d_mounted = 0;
-#ifdef CONFIG_PROFILING
-	dentry->d_cookie = NULL;
-#endif
 	INIT_HLIST_NODE(&dentry->d_hash);
 	INIT_LIST_HEAD(&dentry->d_lru);
 	INIT_LIST_HEAD(&dentry->d_subdirs);
diff --git a/fs/dcookies.c b/fs/dcookies.c
index 855d4b1d619a..180e9fec4ad8 100644
--- a/fs/dcookies.c
+++ b/fs/dcookies.c
@@ -93,10 +93,15 @@ static struct dcookie_struct *alloc_dcookie(struct path *path)
 {
 	struct dcookie_struct *dcs = kmem_cache_alloc(dcookie_cache,
 							GFP_KERNEL);
+	struct dentry *d;
 	if (!dcs)
 		return NULL;
 
-	path->dentry->d_cookie = dcs;
+	d = path->dentry;
+	spin_lock(&d->d_lock);
+	d->d_flags |= DCACHE_COOKIE;
+	spin_unlock(&d->d_lock);
+
 	dcs->path = *path;
 	path_get(path);
 	hash_dcookie(dcs);
@@ -119,14 +124,14 @@ int get_dcookie(struct path *path, unsigned long *cookie)
 		goto out;
 	}
 
-	dcs = path->dentry->d_cookie;
-
-	if (!dcs)
+	if (path->dentry->d_flags & DCACHE_COOKIE) {
+		dcs = find_dcookie((unsigned long)path->dentry);
+	} else {
 		dcs = alloc_dcookie(path);
-
-	if (!dcs) {
-		err = -ENOMEM;
-		goto out;
+		if (!dcs) {
+			err = -ENOMEM;
+			goto out;
+		}
 	}
 
 	*cookie = dcookie_value(dcs);
@@ -251,7 +256,12 @@ out_kmem:
 
 static void free_dcookie(struct dcookie_struct * dcs)
 {
-	dcs->path.dentry->d_cookie = NULL;
+	struct dentry *d = dcs->path.dentry;
+
+	spin_lock(&d->d_lock);
+	d->d_flags &= ~DCACHE_COOKIE;
+	spin_unlock(&d->d_lock);
+
 	path_put(&dcs->path);
 	kmem_cache_free(dcookie_cache, dcs);
 }
diff --git a/include/linux/dcache.h b/include/linux/dcache.h
index a37359d0bad1..c66d22487bf8 100644
--- a/include/linux/dcache.h
+++ b/include/linux/dcache.h
@@ -75,14 +75,22 @@ full_name_hash(const unsigned char *name, unsigned int len)
 	return end_name_hash(hash);
 }
 
-struct dcookie_struct;
-
-#define DNAME_INLINE_LEN_MIN 36
+/*
+ * Try to keep struct dentry aligned on 64 byte cachelines (this will
+ * give reasonable cacheline footprint with larger lines without the
+ * large memory footprint increase).
+ */
+#ifdef CONFIG_64BIT
+#define DNAME_INLINE_LEN_MIN 32 /* 192 bytes */
+#else
+#define DNAME_INLINE_LEN_MIN 40 /* 128 bytes */
+#endif
 
 struct dentry {
 	atomic_t d_count;
 	unsigned int d_flags;		/* protected by d_lock */
 	spinlock_t d_lock;		/* per dentry lock */
+	int d_mounted;
 	struct inode *d_inode;		/* Where the name belongs to - NULL is
 					 * negative */
 	/*
@@ -107,10 +115,7 @@ struct dentry {
 	struct dentry_operations *d_op;
 	struct super_block *d_sb;	/* The root of the dentry tree */
 	void *d_fsdata;			/* fs-specific data */
-#ifdef CONFIG_PROFILING
-	struct dcookie_struct *d_cookie; /* cookie, if any */
-#endif
-	int d_mounted;
+
 	unsigned char d_iname[DNAME_INLINE_LEN_MIN];	/* small names */
 };
 
@@ -177,6 +182,8 @@ d_iput:		no		no		no       yes
 
 #define DCACHE_INOTIFY_PARENT_WATCHED	0x0020 /* Parent inode is watched */
 
+#define DCACHE_COOKIE		0x0040	/* For use by dcookie subsystem */
+
 extern spinlock_t dcache_lock;
 extern seqlock_t rename_lock;
 
-- 
cgit v1.2.3


From dded4f4d5048e64a01cf52eed4d27c8cb2600525 Mon Sep 17 00:00:00 2001
From: Jan Engelhardt <jengelh@medozas.de>
Date: Mon, 1 Dec 2008 14:34:50 -0800
Subject: include: linux/fs.h: put declarations in __KERNEL__

include/linux/fs.h contains externs for a bunch of variables.  That obviously
belongs under ifdef __KERNEL__.

Signed-off-by: Jan Engelhardt <jengelh@medozas.de>
Cc: Al Viro <viro@zeniv.linux.org.uk>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 include/linux/fs.h | 18 +++++++++---------
 1 file changed, 9 insertions(+), 9 deletions(-)

(limited to 'include')

diff --git a/include/linux/fs.h b/include/linux/fs.h
index 001ded4845b4..c5e4c5c74034 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -21,7 +21,6 @@
 
 /* Fixed constants first: */
 #undef NR_OPEN
-extern int sysctl_nr_open;
 #define INR_OPEN 1024		/* Initial setting for nfile rlimits */
 
 #define BLOCK_SIZE_BITS 10
@@ -38,21 +37,13 @@ struct files_stat_struct {
 	int nr_free_files;	/* read only */
 	int max_files;		/* tunable */
 };
-extern struct files_stat_struct files_stat;
-extern int get_max_files(void);
 
 struct inodes_stat_t {
 	int nr_inodes;
 	int nr_unused;
 	int dummy[5];		/* padding for sysctl ABI compatibility */
 };
-extern struct inodes_stat_t inodes_stat;
 
-extern int leases_enable, lease_break_time;
-
-#ifdef CONFIG_DNOTIFY
-extern int dir_notify_enable;
-#endif
 
 #define NR_FILE  8192	/* this can well be larger on a larger system */
 
@@ -330,6 +321,15 @@ extern void __init inode_init(void);
 extern void __init inode_init_early(void);
 extern void __init files_init(unsigned long);
 
+extern struct files_stat_struct files_stat;
+extern int get_max_files(void);
+extern int sysctl_nr_open;
+extern struct inodes_stat_t inodes_stat;
+extern int leases_enable, lease_break_time;
+#ifdef CONFIG_DNOTIFY
+extern int dir_notify_enable;
+#endif
+
 struct buffer_head;
 typedef int (get_block_t)(struct inode *inode, sector_t iblock,
 			struct buffer_head *bh_result, int create);
-- 
cgit v1.2.3


From 035146851cfa2fe24c1d9dc7637bb009ad06b2f7 Mon Sep 17 00:00:00 2001
From: Duane Griffin <duaneg@dghda.com>
Date: Fri, 19 Dec 2008 20:47:11 +0000
Subject: vfs: introduce helper function to safely NUL-terminate symlinks

A number of filesystems were potentially triggering kernel bugs due to
corrupted symlink names on disk. This function helps safely terminate
the names.

Cc: Al Viro <viro@zeniv.linux.org.uk>
Cc: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Duane Griffin <duaneg@dghda.com>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 include/linux/namei.h | 5 +++++
 1 file changed, 5 insertions(+)

(limited to 'include')

diff --git a/include/linux/namei.h b/include/linux/namei.h
index 99eb80306dc5..fc2e03579877 100644
--- a/include/linux/namei.h
+++ b/include/linux/namei.h
@@ -94,4 +94,9 @@ static inline char *nd_get_link(struct nameidata *nd)
 	return nd->saved_names[nd->depth];
 }
 
+static inline void nd_terminate_link(void *name, size_t len, size_t maxlen)
+{
+	((char *) name)[min(len, maxlen)] = '\0';
+}
+
 #endif /* _LINUX_NAMEI_H */
-- 
cgit v1.2.3


From 3fb64190aa3c23c10e6e9fd0124ac030115c99bf Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Fri, 24 Oct 2008 09:58:10 +0200
Subject: pass a struct path * to may_open

No need for the nameidata in may_open - a struct path is enough.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/namei.c         | 14 +++++++-------
 fs/nfsctl.c        |  5 +++--
 include/linux/fs.h |  2 +-
 3 files changed, 11 insertions(+), 10 deletions(-)

(limited to 'include')

diff --git a/fs/namei.c b/fs/namei.c
index d4d0b59ed2cc..5cc0dc95a7a5 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -1487,9 +1487,9 @@ int vfs_create(struct inode *dir, struct dentry *dentry, int mode,
 	return error;
 }
 
-int may_open(struct nameidata *nd, int acc_mode, int flag)
+int may_open(struct path *path, int acc_mode, int flag)
 {
-	struct dentry *dentry = nd->path.dentry;
+	struct dentry *dentry = path->dentry;
 	struct inode *inode = dentry->d_inode;
 	int error;
 
@@ -1510,13 +1510,13 @@ int may_open(struct nameidata *nd, int acc_mode, int flag)
 	if (S_ISFIFO(inode->i_mode) || S_ISSOCK(inode->i_mode)) {
 	    	flag &= ~O_TRUNC;
 	} else if (S_ISBLK(inode->i_mode) || S_ISCHR(inode->i_mode)) {
-		if (nd->path.mnt->mnt_flags & MNT_NODEV)
+		if (path->mnt->mnt_flags & MNT_NODEV)
 			return -EACCES;
 
 		flag &= ~O_TRUNC;
 	}
 
-	error = vfs_permission(nd, acc_mode);
+	error = inode_permission(inode, acc_mode);
 	if (error)
 		return error;
 	/*
@@ -1551,7 +1551,7 @@ int may_open(struct nameidata *nd, int acc_mode, int flag)
 		 */
 		error = locks_verify_locked(inode);
 		if (!error)
-			error = security_path_truncate(&nd->path, 0,
+			error = security_path_truncate(path, 0,
 					       ATTR_MTIME|ATTR_CTIME|ATTR_OPEN);
 		if (!error) {
 			DQUOT_INIT(inode);
@@ -1594,7 +1594,7 @@ out_unlock:
 	if (error)
 		return error;
 	/* Don't check for write permission, don't truncate */
-	return may_open(nd, 0, flag & ~O_TRUNC);
+	return may_open(&nd->path, 0, flag & ~O_TRUNC);
 }
 
 /*
@@ -1780,7 +1780,7 @@ ok:
 		if (error)
 			goto exit;
 	}
-	error = may_open(&nd, acc_mode, flag);
+	error = may_open(&nd.path, acc_mode, flag);
 	if (error) {
 		if (will_write)
 			mnt_drop_write(nd.path.mnt);
diff --git a/fs/nfsctl.c b/fs/nfsctl.c
index b1acbd6ab6fb..b27451909dff 100644
--- a/fs/nfsctl.c
+++ b/fs/nfsctl.c
@@ -38,9 +38,10 @@ static struct file *do_open(char *name, int flags)
 		return ERR_PTR(error);
 
 	if (flags == O_RDWR)
-		error = may_open(&nd,MAY_READ|MAY_WRITE,FMODE_READ|FMODE_WRITE);
+		error = may_open(&nd.path, MAY_READ|MAY_WRITE,
+					   FMODE_READ|FMODE_WRITE);
 	else
-		error = may_open(&nd, MAY_WRITE, FMODE_WRITE);
+		error = may_open(&nd.path, MAY_WRITE, FMODE_WRITE);
 
 	if (!error)
 		return dentry_open(nd.path.dentry, nd.path.mnt, flags,
diff --git a/include/linux/fs.h b/include/linux/fs.h
index c5e4c5c74034..3468df5a06e0 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -1869,7 +1869,7 @@ extern void free_write_pipe(struct file *);
 
 extern struct file *do_filp_open(int dfd, const char *pathname,
 		int open_flag, int mode);
-extern int may_open(struct nameidata *, int, int);
+extern int may_open(struct path *, int, int);
 
 extern int kernel_read(struct file *, unsigned long, char *, unsigned long);
 extern struct file * open_exec(const char *);
-- 
cgit v1.2.3


From cb23beb55100171646e69e248fb45f10db6e99a4 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Fri, 24 Oct 2008 09:59:29 +0200
Subject: kill vfs_permission

With all the nameidata removal there's no point anymore for this helper.
Of the three callers left two will go away with the next lookup series
anyway.

Also add proper kerneldoc to inode_permission as this is the main
permission check routine now.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/exec.c          |  5 +++--
 fs/namei.c         | 31 +++++++++++++------------------
 include/linux/fs.h |  1 -
 3 files changed, 16 insertions(+), 21 deletions(-)

(limited to 'include')

diff --git a/fs/exec.c b/fs/exec.c
index 02d2e120542d..dfbf7009fbe7 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -127,7 +127,8 @@ asmlinkage long sys_uselib(const char __user * library)
 	if (nd.path.mnt->mnt_flags & MNT_NOEXEC)
 		goto exit;
 
-	error = vfs_permission(&nd, MAY_READ | MAY_EXEC | MAY_OPEN);
+	error = inode_permission(nd.path.dentry->d_inode,
+				 MAY_READ | MAY_EXEC | MAY_OPEN);
 	if (error)
 		goto exit;
 
@@ -680,7 +681,7 @@ struct file *open_exec(const char *name)
 	if (nd.path.mnt->mnt_flags & MNT_NOEXEC)
 		goto out_path_put;
 
-	err = vfs_permission(&nd, MAY_EXEC | MAY_OPEN);
+	err = inode_permission(nd.path.dentry->d_inode, MAY_EXEC | MAY_OPEN);
 	if (err)
 		goto out_path_put;
 
diff --git a/fs/namei.c b/fs/namei.c
index 5cc0dc95a7a5..3f88e043d459 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -226,6 +226,16 @@ int generic_permission(struct inode *inode, int mask,
 	return -EACCES;
 }
 
+/**
+ * inode_permission  -  check for access rights to a given inode
+ * @inode:	inode to check permission on
+ * @mask:	right to check for (%MAY_READ, %MAY_WRITE, %MAY_EXEC)
+ *
+ * Used to check for read/write/execute permissions on an inode.
+ * We use "fsuid" for this, letting us set arbitrary permissions
+ * for filesystem access without changing the "normal" uids which
+ * are used for other things.
+ */
 int inode_permission(struct inode *inode, int mask)
 {
 	int retval;
@@ -263,21 +273,6 @@ int inode_permission(struct inode *inode, int mask)
 			mask & (MAY_READ|MAY_WRITE|MAY_EXEC|MAY_APPEND));
 }
 
-/**
- * vfs_permission  -  check for access rights to a given path
- * @nd:		lookup result that describes the path
- * @mask:	right to check for (%MAY_READ, %MAY_WRITE, %MAY_EXEC)
- *
- * Used to check for read/write/execute permissions on a path.
- * We use "fsuid" for this, letting us set arbitrary permissions
- * for filesystem access without changing the "normal" uids which
- * are used for other things.
- */
-int vfs_permission(struct nameidata *nd, int mask)
-{
-	return inode_permission(nd->path.dentry->d_inode, mask);
-}
-
 /**
  * file_permission  -  check for additional access rights to a given file
  * @file:	file to check access rights for
@@ -288,7 +283,7 @@ int vfs_permission(struct nameidata *nd, int mask)
  *
  * Note:
  *	Do not use this function in new code.  All access checks should
- *	be done using vfs_permission().
+ *	be done using inode_permission().
  */
 int file_permission(struct file *file, int mask)
 {
@@ -853,7 +848,8 @@ static int __link_path_walk(const char *name, struct nameidata *nd)
 		nd->flags |= LOOKUP_CONTINUE;
 		err = exec_permission_lite(inode);
 		if (err == -EAGAIN)
-			err = vfs_permission(nd, MAY_EXEC);
+			err = inode_permission(nd->path.dentry->d_inode,
+					       MAY_EXEC);
  		if (err)
 			break;
 
@@ -2882,7 +2878,6 @@ EXPORT_SYMBOL(path_lookup);
 EXPORT_SYMBOL(kern_path);
 EXPORT_SYMBOL(vfs_path_lookup);
 EXPORT_SYMBOL(inode_permission);
-EXPORT_SYMBOL(vfs_permission);
 EXPORT_SYMBOL(file_permission);
 EXPORT_SYMBOL(unlock_rename);
 EXPORT_SYMBOL(vfs_create);
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 3468df5a06e0..fd615986a41c 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -1212,7 +1212,6 @@ extern void unlock_super(struct super_block *);
 /*
  * VFS helper functions..
  */
-extern int vfs_permission(struct nameidata *, int);
 extern int vfs_create(struct inode *, struct dentry *, int, struct nameidata *);
 extern int vfs_mkdir(struct inode *, struct dentry *, int);
 extern int vfs_mknod(struct inode *, struct dentry *, int, dev_t);
-- 
cgit v1.2.3


From 18d8fda7c3c9439be04d7ea2e82da2513b121acb Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Fri, 26 Dec 2008 00:35:37 -0500
Subject: take init_fs to saner place

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 arch/alpha/kernel/init_task.c     | 1 -
 arch/arm/kernel/init_task.c       | 1 -
 arch/avr32/kernel/init_task.c     | 1 -
 arch/blackfin/kernel/init_task.c  | 1 -
 arch/cris/kernel/process.c        | 1 -
 arch/frv/kernel/init_task.c       | 1 -
 arch/h8300/kernel/init_task.c     | 1 -
 arch/ia64/kernel/init_task.c      | 1 -
 arch/m32r/kernel/init_task.c      | 1 -
 arch/m68k/kernel/process.c        | 1 -
 arch/m68knommu/kernel/init_task.c | 1 -
 arch/mips/kernel/init_task.c      | 1 -
 arch/mn10300/kernel/init_task.c   | 1 -
 arch/parisc/kernel/init_task.c    | 1 -
 arch/powerpc/kernel/init_task.c   | 1 -
 arch/s390/kernel/init_task.c      | 1 -
 arch/sh/kernel/init_task.c        | 1 -
 arch/sparc/kernel/init_task.c     | 1 -
 arch/um/kernel/init_task.c        | 1 -
 arch/x86/kernel/init_task.c       | 1 -
 arch/xtensa/kernel/init_task.c    | 1 -
 fs/namei.c                        | 7 +++++++
 include/linux/fs_struct.h         | 6 ------
 include/linux/init_task.h         | 1 +
 24 files changed, 8 insertions(+), 27 deletions(-)

(limited to 'include')

diff --git a/arch/alpha/kernel/init_task.c b/arch/alpha/kernel/init_task.c
index 1f762189fa64..c2938e574a40 100644
--- a/arch/alpha/kernel/init_task.c
+++ b/arch/alpha/kernel/init_task.c
@@ -8,7 +8,6 @@
 #include <asm/uaccess.h>
 
 
-static struct fs_struct init_fs = INIT_FS;
 static struct signal_struct init_signals = INIT_SIGNALS(init_signals);
 static struct sighand_struct init_sighand = INIT_SIGHAND(init_sighand);
 struct mm_struct init_mm = INIT_MM(init_mm);
diff --git a/arch/arm/kernel/init_task.c b/arch/arm/kernel/init_task.c
index 0bbf80625395..e859af349467 100644
--- a/arch/arm/kernel/init_task.c
+++ b/arch/arm/kernel/init_task.c
@@ -12,7 +12,6 @@
 
 #include <asm/pgtable.h>
 
-static struct fs_struct init_fs = INIT_FS;
 static struct signal_struct init_signals = INIT_SIGNALS(init_signals);
 static struct sighand_struct init_sighand = INIT_SIGHAND(init_sighand);
 struct mm_struct init_mm = INIT_MM(init_mm);
diff --git a/arch/avr32/kernel/init_task.c b/arch/avr32/kernel/init_task.c
index 44058469c6ec..993d56ee3cf3 100644
--- a/arch/avr32/kernel/init_task.c
+++ b/arch/avr32/kernel/init_task.c
@@ -13,7 +13,6 @@
 
 #include <asm/pgtable.h>
 
-static struct fs_struct init_fs = INIT_FS;
 static struct signal_struct init_signals = INIT_SIGNALS(init_signals);
 static struct sighand_struct init_sighand = INIT_SIGHAND(init_sighand);
 struct mm_struct init_mm = INIT_MM(init_mm);
diff --git a/arch/blackfin/kernel/init_task.c b/arch/blackfin/kernel/init_task.c
index 6bdba7b21109..2c228c020978 100644
--- a/arch/blackfin/kernel/init_task.c
+++ b/arch/blackfin/kernel/init_task.c
@@ -33,7 +33,6 @@
 #include <linux/mqueue.h>
 #include <linux/fs.h>
 
-static struct fs_struct init_fs = INIT_FS;
 static struct signal_struct init_signals = INIT_SIGNALS(init_signals);
 static struct sighand_struct init_sighand = INIT_SIGHAND(init_sighand);
 
diff --git a/arch/cris/kernel/process.c b/arch/cris/kernel/process.c
index 5933656db5a2..60816e876455 100644
--- a/arch/cris/kernel/process.c
+++ b/arch/cris/kernel/process.c
@@ -37,7 +37,6 @@
  * setup.
  */
 
-static struct fs_struct init_fs = INIT_FS;
 static struct signal_struct init_signals = INIT_SIGNALS(init_signals);
 static struct sighand_struct init_sighand = INIT_SIGHAND(init_sighand);
 struct mm_struct init_mm = INIT_MM(init_mm);
diff --git a/arch/frv/kernel/init_task.c b/arch/frv/kernel/init_task.c
index e2198815b630..29429a8b7f6a 100644
--- a/arch/frv/kernel/init_task.c
+++ b/arch/frv/kernel/init_task.c
@@ -10,7 +10,6 @@
 #include <asm/pgtable.h>
 
 
-static struct fs_struct init_fs = INIT_FS;
 static struct signal_struct init_signals = INIT_SIGNALS(init_signals);
 static struct sighand_struct init_sighand = INIT_SIGHAND(init_sighand);
 struct mm_struct init_mm = INIT_MM(init_mm);
diff --git a/arch/h8300/kernel/init_task.c b/arch/h8300/kernel/init_task.c
index 93a4899e46c2..cb5dc552da97 100644
--- a/arch/h8300/kernel/init_task.c
+++ b/arch/h8300/kernel/init_task.c
@@ -12,7 +12,6 @@
 #include <asm/uaccess.h>
 #include <asm/pgtable.h>
 
-static struct fs_struct init_fs = INIT_FS;
 static struct signal_struct init_signals = INIT_SIGNALS(init_signals);
 static struct sighand_struct init_sighand = INIT_SIGHAND(init_sighand);
 struct mm_struct init_mm = INIT_MM(init_mm);
diff --git a/arch/ia64/kernel/init_task.c b/arch/ia64/kernel/init_task.c
index 9d7e1c66faf4..5b0e830c6f33 100644
--- a/arch/ia64/kernel/init_task.c
+++ b/arch/ia64/kernel/init_task.c
@@ -17,7 +17,6 @@
 #include <asm/uaccess.h>
 #include <asm/pgtable.h>
 
-static struct fs_struct init_fs = INIT_FS;
 static struct signal_struct init_signals = INIT_SIGNALS(init_signals);
 static struct sighand_struct init_sighand = INIT_SIGHAND(init_sighand);
 struct mm_struct init_mm = INIT_MM(init_mm);
diff --git a/arch/m32r/kernel/init_task.c b/arch/m32r/kernel/init_task.c
index 0d658dbb6766..016885c6f260 100644
--- a/arch/m32r/kernel/init_task.c
+++ b/arch/m32r/kernel/init_task.c
@@ -11,7 +11,6 @@
 #include <asm/uaccess.h>
 #include <asm/pgtable.h>
 
-static struct fs_struct init_fs = INIT_FS;
 static struct signal_struct init_signals = INIT_SIGNALS(init_signals);
 static struct sighand_struct init_sighand = INIT_SIGHAND(init_sighand);
 struct mm_struct init_mm = INIT_MM(init_mm);
diff --git a/arch/m68k/kernel/process.c b/arch/m68k/kernel/process.c
index 3042c2bc8c58..632ce016014d 100644
--- a/arch/m68k/kernel/process.c
+++ b/arch/m68k/kernel/process.c
@@ -40,7 +40,6 @@
  * alignment requirements and potentially different initial
  * setup.
  */
-static struct fs_struct init_fs = INIT_FS;
 static struct signal_struct init_signals = INIT_SIGNALS(init_signals);
 static struct sighand_struct init_sighand = INIT_SIGHAND(init_sighand);
 struct mm_struct init_mm = INIT_MM(init_mm);
diff --git a/arch/m68knommu/kernel/init_task.c b/arch/m68knommu/kernel/init_task.c
index 344c01aede08..fe282de1d596 100644
--- a/arch/m68knommu/kernel/init_task.c
+++ b/arch/m68knommu/kernel/init_task.c
@@ -12,7 +12,6 @@
 #include <asm/uaccess.h>
 #include <asm/pgtable.h>
 
-static struct fs_struct init_fs = INIT_FS;
 static struct signal_struct init_signals = INIT_SIGNALS(init_signals);
 static struct sighand_struct init_sighand = INIT_SIGHAND(init_sighand);
 struct mm_struct init_mm = INIT_MM(init_mm);
diff --git a/arch/mips/kernel/init_task.c b/arch/mips/kernel/init_task.c
index d72487ad7c15..149cd914526e 100644
--- a/arch/mips/kernel/init_task.c
+++ b/arch/mips/kernel/init_task.c
@@ -9,7 +9,6 @@
 #include <asm/uaccess.h>
 #include <asm/pgtable.h>
 
-static struct fs_struct init_fs = INIT_FS;
 static struct signal_struct init_signals = INIT_SIGNALS(init_signals);
 static struct sighand_struct init_sighand = INIT_SIGHAND(init_sighand);
 struct mm_struct init_mm = INIT_MM(init_mm);
diff --git a/arch/mn10300/kernel/init_task.c b/arch/mn10300/kernel/init_task.c
index af16f6e5c918..5ac3566f8c98 100644
--- a/arch/mn10300/kernel/init_task.c
+++ b/arch/mn10300/kernel/init_task.c
@@ -18,7 +18,6 @@
 #include <asm/uaccess.h>
 #include <asm/pgtable.h>
 
-static struct fs_struct init_fs = INIT_FS;
 static struct signal_struct init_signals = INIT_SIGNALS(init_signals);
 static struct sighand_struct init_sighand = INIT_SIGHAND(init_sighand);
 struct mm_struct init_mm = INIT_MM(init_mm);
diff --git a/arch/parisc/kernel/init_task.c b/arch/parisc/kernel/init_task.c
index f5941c086551..1e25a45d64c1 100644
--- a/arch/parisc/kernel/init_task.c
+++ b/arch/parisc/kernel/init_task.c
@@ -34,7 +34,6 @@
 #include <asm/pgtable.h>
 #include <asm/pgalloc.h>
 
-static struct fs_struct init_fs = INIT_FS;
 static struct signal_struct init_signals = INIT_SIGNALS(init_signals);
 static struct sighand_struct init_sighand = INIT_SIGHAND(init_sighand);
 struct mm_struct init_mm = INIT_MM(init_mm);
diff --git a/arch/powerpc/kernel/init_task.c b/arch/powerpc/kernel/init_task.c
index 4c85b8d56478..688b329800bd 100644
--- a/arch/powerpc/kernel/init_task.c
+++ b/arch/powerpc/kernel/init_task.c
@@ -7,7 +7,6 @@
 #include <linux/mqueue.h>
 #include <asm/uaccess.h>
 
-static struct fs_struct init_fs = INIT_FS;
 static struct signal_struct init_signals = INIT_SIGNALS(init_signals);
 static struct sighand_struct init_sighand = INIT_SIGHAND(init_sighand);
 struct mm_struct init_mm = INIT_MM(init_mm);
diff --git a/arch/s390/kernel/init_task.c b/arch/s390/kernel/init_task.c
index e80716843619..7db95c0b8693 100644
--- a/arch/s390/kernel/init_task.c
+++ b/arch/s390/kernel/init_task.c
@@ -16,7 +16,6 @@
 #include <asm/uaccess.h>
 #include <asm/pgtable.h>
 
-static struct fs_struct init_fs = INIT_FS;
 static struct signal_struct init_signals = INIT_SIGNALS(init_signals);
 static struct sighand_struct init_sighand = INIT_SIGHAND(init_sighand);
 struct mm_struct init_mm = INIT_MM(init_mm);
diff --git a/arch/sh/kernel/init_task.c b/arch/sh/kernel/init_task.c
index b151a25cb14d..80c35ff71d56 100644
--- a/arch/sh/kernel/init_task.c
+++ b/arch/sh/kernel/init_task.c
@@ -7,7 +7,6 @@
 #include <asm/uaccess.h>
 #include <asm/pgtable.h>
 
-static struct fs_struct init_fs = INIT_FS;
 static struct signal_struct init_signals = INIT_SIGNALS(init_signals);
 static struct sighand_struct init_sighand = INIT_SIGHAND(init_sighand);
 struct pt_regs fake_swapper_regs;
diff --git a/arch/sparc/kernel/init_task.c b/arch/sparc/kernel/init_task.c
index 62126e4cec54..f28cb8278e98 100644
--- a/arch/sparc/kernel/init_task.c
+++ b/arch/sparc/kernel/init_task.c
@@ -8,7 +8,6 @@
 #include <asm/pgtable.h>
 #include <asm/uaccess.h>
 
-static struct fs_struct init_fs = INIT_FS;
 static struct signal_struct init_signals = INIT_SIGNALS(init_signals);
 static struct sighand_struct init_sighand = INIT_SIGHAND(init_sighand);
 struct mm_struct init_mm = INIT_MM(init_mm);
diff --git a/arch/um/kernel/init_task.c b/arch/um/kernel/init_task.c
index 910eda8fca18..806d381947bf 100644
--- a/arch/um/kernel/init_task.c
+++ b/arch/um/kernel/init_task.c
@@ -10,7 +10,6 @@
 #include "linux/mqueue.h"
 #include "asm/uaccess.h"
 
-static struct fs_struct init_fs = INIT_FS;
 struct mm_struct init_mm = INIT_MM(init_mm);
 static struct signal_struct init_signals = INIT_SIGNALS(init_signals);
 static struct sighand_struct init_sighand = INIT_SIGHAND(init_sighand);
diff --git a/arch/x86/kernel/init_task.c b/arch/x86/kernel/init_task.c
index d39918076bb4..df3bf269beab 100644
--- a/arch/x86/kernel/init_task.c
+++ b/arch/x86/kernel/init_task.c
@@ -10,7 +10,6 @@
 #include <asm/pgtable.h>
 #include <asm/desc.h>
 
-static struct fs_struct init_fs = INIT_FS;
 static struct signal_struct init_signals = INIT_SIGNALS(init_signals);
 static struct sighand_struct init_sighand = INIT_SIGHAND(init_sighand);
 struct mm_struct init_mm = INIT_MM(init_mm);
diff --git a/arch/xtensa/kernel/init_task.c b/arch/xtensa/kernel/init_task.c
index 3df469dbe814..e07f5c9fcd35 100644
--- a/arch/xtensa/kernel/init_task.c
+++ b/arch/xtensa/kernel/init_task.c
@@ -21,7 +21,6 @@
 
 #include <asm/uaccess.h>
 
-static struct fs_struct init_fs = INIT_FS;
 static struct signal_struct init_signals = INIT_SIGNALS(init_signals);
 static struct sighand_struct init_sighand = INIT_SIGHAND(init_sighand);
 struct mm_struct init_mm = INIT_MM(init_mm);
diff --git a/fs/namei.c b/fs/namei.c
index 3f88e043d459..e203691b9d12 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -2893,3 +2893,10 @@ EXPORT_SYMBOL(vfs_symlink);
 EXPORT_SYMBOL(vfs_unlink);
 EXPORT_SYMBOL(dentry_unhash);
 EXPORT_SYMBOL(generic_readlink);
+
+/* to be mentioned only in INIT_TASK */
+struct fs_struct init_fs = {
+	.count		= ATOMIC_INIT(1),
+	.lock		= RW_LOCK_UNLOCKED,
+	.umask		= 0022,
+};
diff --git a/include/linux/fs_struct.h b/include/linux/fs_struct.h
index 9e5a06e78d02..a97c053d3a9a 100644
--- a/include/linux/fs_struct.h
+++ b/include/linux/fs_struct.h
@@ -10,12 +10,6 @@ struct fs_struct {
 	struct path root, pwd;
 };
 
-#define INIT_FS {				\
-	.count		= ATOMIC_INIT(1),	\
-	.lock		= RW_LOCK_UNLOCKED,	\
-	.umask		= 0022, \
-}
-
 extern struct kmem_cache *fs_cachep;
 
 extern void exit_fs(struct task_struct *);
diff --git a/include/linux/init_task.h b/include/linux/init_task.h
index 959f5522d10a..2f3c2d4ef73b 100644
--- a/include/linux/init_task.h
+++ b/include/linux/init_task.h
@@ -12,6 +12,7 @@
 #include <net/net_namespace.h>
 
 extern struct files_struct init_files;
+extern struct fs_struct init_fs;
 
 #define INIT_KIOCTX(name, which_mm) \
 {							\
-- 
cgit v1.2.3


From b6b3fdead251d432f32f2cfce2a893ab8a658110 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <dada1@cosmosbay.com>
Date: Wed, 10 Dec 2008 09:35:45 -0800
Subject: filp_cachep can be static in fs/file_table.c

Instead of creating the "filp" kmem_cache in vfs_caches_init(),
we can do it a litle be later in files_init(), so that filp_cachep
is static to fs/file_table.c

Acked-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>

Signed-off-by: Eric Dumazet <dada1@cosmosbay.com>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/dcache.c             |  6 ------
 fs/file_table.c         | 10 +++++++++-
 include/linux/fdtable.h |  2 --
 3 files changed, 9 insertions(+), 9 deletions(-)

(limited to 'include')

diff --git a/fs/dcache.c b/fs/dcache.c
index bdb3f50248a7..e88c23b85a32 100644
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -2314,9 +2314,6 @@ static void __init dcache_init(void)
 /* SLAB cache for __getname() consumers */
 struct kmem_cache *names_cachep __read_mostly;
 
-/* SLAB cache for file structures */
-struct kmem_cache *filp_cachep __read_mostly;
-
 EXPORT_SYMBOL(d_genocide);
 
 void __init vfs_caches_init_early(void)
@@ -2338,9 +2335,6 @@ void __init vfs_caches_init(unsigned long mempages)
 	names_cachep = kmem_cache_create("names_cache", PATH_MAX, 0,
 			SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL);
 
-	filp_cachep = kmem_cache_create("filp", sizeof(struct file), 0,
-			SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL);
-
 	dcache_init();
 	inode_init();
 	files_init(mempages);
diff --git a/fs/file_table.c b/fs/file_table.c
index 0fbcacc3ea75..bbeeac6efa1a 100644
--- a/fs/file_table.c
+++ b/fs/file_table.c
@@ -32,6 +32,9 @@ struct files_stat_struct files_stat = {
 /* public. Not pretty! */
 __cacheline_aligned_in_smp DEFINE_SPINLOCK(files_lock);
 
+/* SLAB cache for file structures */
+static struct kmem_cache *filp_cachep __read_mostly;
+
 static struct percpu_counter nr_files __cacheline_aligned_in_smp;
 
 static inline void file_free_rcu(struct rcu_head *head)
@@ -397,7 +400,12 @@ too_bad:
 void __init files_init(unsigned long mempages)
 { 
 	int n; 
-	/* One file with associated inode and dcache is very roughly 1K. 
+
+	filp_cachep = kmem_cache_create("filp", sizeof(struct file), 0,
+			SLAB_HWCACHE_ALIGN | SLAB_PANIC, NULL);
+
+	/*
+	 * One file with associated inode and dcache is very roughly 1K.
 	 * Per default don't use more than 10% of our memory for files. 
 	 */ 
 
diff --git a/include/linux/fdtable.h b/include/linux/fdtable.h
index 4aab6f12cfab..09d6c5bbdddd 100644
--- a/include/linux/fdtable.h
+++ b/include/linux/fdtable.h
@@ -57,8 +57,6 @@ struct files_struct {
 
 #define files_fdtable(files) (rcu_dereference((files)->fdt))
 
-extern struct kmem_cache *filp_cachep;
-
 struct file_operations;
 struct vfsmount;
 struct dentry;
-- 
cgit v1.2.3


From 6badd79bd002788aaec27b50a74ab69ef65ab8ee Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Fri, 26 Dec 2008 00:57:40 -0500
Subject: kill ->dir_notify()

Remove the hopelessly misguided ->dir_notify().  The only instance (cifs)
has been broken by design from the very beginning; the objects it creates
are never destroyed, keep references to struct file they can outlive, nothing
that could possibly evict them exists on close(2) path *and* no locking
whatsoever is done to prevent races with close(), should the previous, er,
deficiencies someday be dealt with.

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 Documentation/filesystems/Locking |   2 -
 Documentation/filesystems/vfs.txt |   3 -
 fs/bad_inode.c                    |   6 --
 fs/cifs/Makefile                  |   2 +-
 fs/cifs/cifsfs.c                  |   7 ---
 fs/cifs/cifsfs.h                  |   1 -
 fs/cifs/fcntl.c                   | 118 --------------------------------------
 fs/dnotify.c                      |   3 -
 include/linux/fs.h                |   1 -
 9 files changed, 1 insertion(+), 142 deletions(-)
 delete mode 100644 fs/cifs/fcntl.c

(limited to 'include')

diff --git a/Documentation/filesystems/Locking b/Documentation/filesystems/Locking
index 23d2f4460deb..ccec55394380 100644
--- a/Documentation/filesystems/Locking
+++ b/Documentation/filesystems/Locking
@@ -394,7 +394,6 @@ prototypes:
 	unsigned long (*get_unmapped_area)(struct file *, unsigned long,
 			unsigned long, unsigned long, unsigned long);
 	int (*check_flags)(int);
-	int (*dir_notify)(struct file *, unsigned long);
 };
 
 locking rules:
@@ -424,7 +423,6 @@ sendfile:		no
 sendpage:		no
 get_unmapped_area:	no
 check_flags:		no
-dir_notify:		no
 
 ->llseek() locking has moved from llseek to the individual llseek
 implementations.  If your fs is not using generic_file_llseek, you
diff --git a/Documentation/filesystems/vfs.txt b/Documentation/filesystems/vfs.txt
index 041cb771d505..ef19afa186a9 100644
--- a/Documentation/filesystems/vfs.txt
+++ b/Documentation/filesystems/vfs.txt
@@ -733,7 +733,6 @@ struct file_operations {
 	ssize_t (*sendpage) (struct file *, struct page *, int, size_t, loff_t *, int);
 	unsigned long (*get_unmapped_area)(struct file *, unsigned long, unsigned long, unsigned long, unsigned long);
 	int (*check_flags)(int);
-	int (*dir_notify)(struct file *filp, unsigned long arg);
 	int (*flock) (struct file *, int, struct file_lock *);
 	ssize_t (*splice_write)(struct pipe_inode_info *, struct file *, size_t, unsigned int);
 	ssize_t (*splice_read)(struct file *, struct pipe_inode_info *, size_t, unsigned int);
@@ -800,8 +799,6 @@ otherwise noted.
 
   check_flags: called by the fcntl(2) system call for F_SETFL command
 
-  dir_notify: called by the fcntl(2) system call for F_NOTIFY command
-
   flock: called by the flock(2) system call
 
   splice_write: called by the VFS to splice data from a pipe to a file. This
diff --git a/fs/bad_inode.c b/fs/bad_inode.c
index 5f1538c03b1b..a05287a23f62 100644
--- a/fs/bad_inode.c
+++ b/fs/bad_inode.c
@@ -132,11 +132,6 @@ static int bad_file_check_flags(int flags)
 	return -EIO;
 }
 
-static int bad_file_dir_notify(struct file *file, unsigned long arg)
-{
-	return -EIO;
-}
-
 static int bad_file_flock(struct file *filp, int cmd, struct file_lock *fl)
 {
 	return -EIO;
@@ -179,7 +174,6 @@ static const struct file_operations bad_file_ops =
 	.sendpage	= bad_file_sendpage,
 	.get_unmapped_area = bad_file_get_unmapped_area,
 	.check_flags	= bad_file_check_flags,
-	.dir_notify	= bad_file_dir_notify,
 	.flock		= bad_file_flock,
 	.splice_write	= bad_file_splice_write,
 	.splice_read	= bad_file_splice_read,
diff --git a/fs/cifs/Makefile b/fs/cifs/Makefile
index 6ba43fb346fb..9948c0030e86 100644
--- a/fs/cifs/Makefile
+++ b/fs/cifs/Makefile
@@ -5,7 +5,7 @@ obj-$(CONFIG_CIFS) += cifs.o
 
 cifs-y := cifsfs.o cifssmb.o cifs_debug.o connect.o dir.o file.o inode.o \
 	  link.o misc.o netmisc.o smbdes.o smbencrypt.o transport.o asn1.o \
-	  md4.o md5.o cifs_unicode.o nterr.o xattr.o cifsencrypt.o fcntl.o \
+	  md4.o md5.o cifs_unicode.o nterr.o xattr.o cifsencrypt.o \
 	  readdir.o ioctl.o sess.o export.o cifsacl.o
 
 cifs-$(CONFIG_CIFS_UPCALL) += cifs_spnego.o
diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c
index 0005a194a75c..13ea53251dcf 100644
--- a/fs/cifs/cifsfs.c
+++ b/fs/cifs/cifsfs.c
@@ -747,7 +747,6 @@ const struct file_operations cifs_file_ops = {
 #endif /* CONFIG_CIFS_POSIX */
 
 #ifdef CONFIG_CIFS_EXPERIMENTAL
-	.dir_notify = cifs_dir_notify,
 	.setlease = cifs_setlease,
 #endif /* CONFIG_CIFS_EXPERIMENTAL */
 };
@@ -768,7 +767,6 @@ const struct file_operations cifs_file_direct_ops = {
 #endif /* CONFIG_CIFS_POSIX */
 	.llseek = cifs_llseek,
 #ifdef CONFIG_CIFS_EXPERIMENTAL
-	.dir_notify = cifs_dir_notify,
 	.setlease = cifs_setlease,
 #endif /* CONFIG_CIFS_EXPERIMENTAL */
 };
@@ -789,7 +787,6 @@ const struct file_operations cifs_file_nobrl_ops = {
 #endif /* CONFIG_CIFS_POSIX */
 
 #ifdef CONFIG_CIFS_EXPERIMENTAL
-	.dir_notify = cifs_dir_notify,
 	.setlease = cifs_setlease,
 #endif /* CONFIG_CIFS_EXPERIMENTAL */
 };
@@ -809,7 +806,6 @@ const struct file_operations cifs_file_direct_nobrl_ops = {
 #endif /* CONFIG_CIFS_POSIX */
 	.llseek = cifs_llseek,
 #ifdef CONFIG_CIFS_EXPERIMENTAL
-	.dir_notify = cifs_dir_notify,
 	.setlease = cifs_setlease,
 #endif /* CONFIG_CIFS_EXPERIMENTAL */
 };
@@ -818,9 +814,6 @@ const struct file_operations cifs_dir_ops = {
 	.readdir = cifs_readdir,
 	.release = cifs_closedir,
 	.read    = generic_read_dir,
-#ifdef CONFIG_CIFS_EXPERIMENTAL
-	.dir_notify = cifs_dir_notify,
-#endif /* CONFIG_CIFS_EXPERIMENTAL */
 	.unlocked_ioctl  = cifs_ioctl,
 	.llseek = generic_file_llseek,
 };
diff --git a/fs/cifs/cifsfs.h b/fs/cifs/cifsfs.h
index 2ce04c73d74e..7ac481841f87 100644
--- a/fs/cifs/cifsfs.h
+++ b/fs/cifs/cifsfs.h
@@ -76,7 +76,6 @@ extern int cifs_file_mmap(struct file * , struct vm_area_struct *);
 extern const struct file_operations cifs_dir_ops;
 extern int cifs_dir_open(struct inode *inode, struct file *file);
 extern int cifs_readdir(struct file *file, void *direntry, filldir_t filldir);
-extern int cifs_dir_notify(struct file *, unsigned long arg);
 
 /* Functions related to dir entries */
 extern struct dentry_operations cifs_dentry_ops;
diff --git a/fs/cifs/fcntl.c b/fs/cifs/fcntl.c
deleted file mode 100644
index 5a57581eb4b2..000000000000
--- a/fs/cifs/fcntl.c
+++ /dev/null
@@ -1,118 +0,0 @@
-/*
- *   fs/cifs/fcntl.c
- *
- *   vfs operations that deal with the file control API
- *
- *   Copyright (C) International Business Machines  Corp., 2003,2004
- *   Author(s): Steve French (sfrench@us.ibm.com)
- *
- *   This library is free software; you can redistribute it and/or modify
- *   it under the terms of the GNU Lesser General Public License as published
- *   by the Free Software Foundation; either version 2.1 of the License, or
- *   (at your option) any later version.
- *
- *   This library is distributed in the hope that it will be useful,
- *   but WITHOUT ANY WARRANTY; without even the implied warranty of
- *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See
- *   the GNU Lesser General Public License for more details.
- *
- *   You should have received a copy of the GNU Lesser General Public License
- *   along with this library; if not, write to the Free Software
- *   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
- */
-#include <linux/fs.h>
-#include <linux/stat.h>
-#include <linux/fcntl.h>
-#include "cifsglob.h"
-#include "cifsproto.h"
-#include "cifs_unicode.h"
-#include "cifs_debug.h"
-#include "cifsfs.h"
-
-static __u32 convert_to_cifs_notify_flags(unsigned long fcntl_notify_flags)
-{
-	__u32 cifs_ntfy_flags = 0;
-
-	/* No way on Linux VFS to ask to monitor xattr
-	changes (and no stream support either */
-	if (fcntl_notify_flags & DN_ACCESS)
-		cifs_ntfy_flags |= FILE_NOTIFY_CHANGE_LAST_ACCESS;
-	if (fcntl_notify_flags & DN_MODIFY) {
-		/* What does this mean on directories? */
-		cifs_ntfy_flags |= FILE_NOTIFY_CHANGE_LAST_WRITE |
-			FILE_NOTIFY_CHANGE_SIZE;
-	}
-	if (fcntl_notify_flags & DN_CREATE) {
-		cifs_ntfy_flags |= FILE_NOTIFY_CHANGE_CREATION |
-			FILE_NOTIFY_CHANGE_LAST_WRITE;
-	}
-	if (fcntl_notify_flags & DN_DELETE)
-		cifs_ntfy_flags |= FILE_NOTIFY_CHANGE_LAST_WRITE;
-	if (fcntl_notify_flags & DN_RENAME) {
-		/* BB review this - checking various server behaviors */
-		cifs_ntfy_flags |= FILE_NOTIFY_CHANGE_DIR_NAME |
-			FILE_NOTIFY_CHANGE_FILE_NAME;
-	}
-	if (fcntl_notify_flags & DN_ATTRIB) {
-		cifs_ntfy_flags |= FILE_NOTIFY_CHANGE_SECURITY |
-			FILE_NOTIFY_CHANGE_ATTRIBUTES;
-	}
-/*	if (fcntl_notify_flags & DN_MULTISHOT) {
-		cifs_ntfy_flags |= ;
-	} */ /* BB fixme - not sure how to handle this with CIFS yet */
-
-	return cifs_ntfy_flags;
-}
-
-int cifs_dir_notify(struct file *file, unsigned long arg)
-{
-	int xid;
-	int rc = -EINVAL;
-	int oplock = 0;
-	struct cifs_sb_info *cifs_sb;
-	struct cifsTconInfo *pTcon;
-	char *full_path = NULL;
-	__u32 filter = FILE_NOTIFY_CHANGE_NAME | FILE_NOTIFY_CHANGE_ATTRIBUTES;
-	__u16 netfid;
-
-	if (experimEnabled == 0)
-		return 0;
-
-	xid = GetXid();
-	cifs_sb = CIFS_SB(file->f_path.dentry->d_sb);
-	pTcon = cifs_sb->tcon;
-
-	full_path = build_path_from_dentry(file->f_path.dentry);
-
-	if (full_path == NULL) {
-		rc = -ENOMEM;
-	} else {
-		cFYI(1, ("dir notify on file %s Arg 0x%lx", full_path, arg));
-		rc = CIFSSMBOpen(xid, pTcon, full_path, FILE_OPEN,
-			GENERIC_READ | SYNCHRONIZE, 0 /* create options */,
-			&netfid, &oplock, NULL, cifs_sb->local_nls,
-			cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MAP_SPECIAL_CHR);
-		/* BB fixme - add this handle to a notify handle list */
-		if (rc) {
-			cFYI(1, ("Could not open directory for notify"));
-		} else {
-			filter = convert_to_cifs_notify_flags(arg);
-			if (filter != 0) {
-				rc = CIFSSMBNotify(xid, pTcon,
-					0 /* no subdirs */, netfid,
-					filter, file, arg & DN_MULTISHOT,
-					cifs_sb->local_nls);
-			} else {
-				rc = -EINVAL;
-			}
-			/* BB add code to close file eventually (at unmount
-			it would close automatically but may be a way
-			to do it easily when inode freed or when
-			notify info is cleared/changed */
-			cFYI(1, ("notify rc %d", rc));
-		}
-	}
-
-	FreeXid(xid);
-	return rc;
-}
diff --git a/fs/dnotify.c b/fs/dnotify.c
index 676073b8dda5..b0aa2cde80bd 100644
--- a/fs/dnotify.c
+++ b/fs/dnotify.c
@@ -115,9 +115,6 @@ int fcntl_dirnotify(int fd, struct file *filp, unsigned long arg)
 	dn->dn_next = inode->i_dnotify;
 	inode->i_dnotify = dn;
 	spin_unlock(&inode->i_lock);
-
-	if (filp->f_op && filp->f_op->dir_notify)
-		return filp->f_op->dir_notify(filp, arg);
 	return 0;
 
 out_free:
diff --git a/include/linux/fs.h b/include/linux/fs.h
index fd615986a41c..be16ce01fb1b 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -1309,7 +1309,6 @@ struct file_operations {
 	ssize_t (*sendpage) (struct file *, struct page *, int, size_t, loff_t *, int);
 	unsigned long (*get_unmapped_area)(struct file *, unsigned long, unsigned long, unsigned long, unsigned long);
 	int (*check_flags)(int);
-	int (*dir_notify)(struct file *filp, unsigned long arg);
 	int (*flock) (struct file *, int, struct file_lock *);
 	ssize_t (*splice_write)(struct pipe_inode_info *, struct file *, loff_t *, size_t, unsigned int);
 	ssize_t (*splice_read)(struct file *, loff_t *, struct pipe_inode_info *, size_t, unsigned int);
-- 
cgit v1.2.3


From 261bca86ed4f7f391d1938167624e78da61dcc6b Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Tue, 30 Dec 2008 01:48:21 -0500
Subject: nfsd/create race fixes, infrastructure

new helpers - insert_inode_locked() and insert_inode_locked4().
Hash new inode, making sure that there's no such inode in icache
already.  If there is and it does not end up unhashed (as would
happen if we have nfsd trying to resolve a bogus fhandle), fail.
Otherwise insert our inode into hash and succeed.

In either case have i_state set to new+locked; cleanup ends up
being simpler with such calling conventions.

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/inode.c         | 59 ++++++++++++++++++++++++++++++++++++++++++++++++++++++
 include/linux/fs.h |  2 ++
 2 files changed, 61 insertions(+)

(limited to 'include')

diff --git a/fs/inode.c b/fs/inode.c
index 098a2443196f..7de1cda92489 100644
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -1032,6 +1032,65 @@ struct inode *iget_locked(struct super_block *sb, unsigned long ino)
 
 EXPORT_SYMBOL(iget_locked);
 
+int insert_inode_locked(struct inode *inode)
+{
+	struct super_block *sb = inode->i_sb;
+	ino_t ino = inode->i_ino;
+	struct hlist_head *head = inode_hashtable + hash(sb, ino);
+	struct inode *old;
+
+	inode->i_state |= I_LOCK|I_NEW;
+	while (1) {
+		spin_lock(&inode_lock);
+		old = find_inode_fast(sb, head, ino);
+		if (likely(!old)) {
+			hlist_add_head(&inode->i_hash, head);
+			spin_unlock(&inode_lock);
+			return 0;
+		}
+		__iget(old);
+		spin_unlock(&inode_lock);
+		wait_on_inode(old);
+		if (unlikely(!hlist_unhashed(&old->i_hash))) {
+			iput(old);
+			return -EBUSY;
+		}
+		iput(old);
+	}
+}
+
+EXPORT_SYMBOL(insert_inode_locked);
+
+int insert_inode_locked4(struct inode *inode, unsigned long hashval,
+		int (*test)(struct inode *, void *), void *data)
+{
+	struct super_block *sb = inode->i_sb;
+	struct hlist_head *head = inode_hashtable + hash(sb, hashval);
+	struct inode *old;
+
+	inode->i_state |= I_LOCK|I_NEW;
+
+	while (1) {
+		spin_lock(&inode_lock);
+		old = find_inode(sb, head, test, data);
+		if (likely(!old)) {
+			hlist_add_head(&inode->i_hash, head);
+			spin_unlock(&inode_lock);
+			return 0;
+		}
+		__iget(old);
+		spin_unlock(&inode_lock);
+		wait_on_inode(old);
+		if (unlikely(!hlist_unhashed(&old->i_hash))) {
+			iput(old);
+			return -EBUSY;
+		}
+		iput(old);
+	}
+}
+
+EXPORT_SYMBOL(insert_inode_locked4);
+
 /**
  *	__insert_inode_hash - hash an inode
  *	@inode: unhashed inode
diff --git a/include/linux/fs.h b/include/linux/fs.h
index be16ce01fb1b..e2170ee21e18 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -1902,6 +1902,8 @@ extern struct inode *ilookup(struct super_block *sb, unsigned long ino);
 
 extern struct inode * iget5_locked(struct super_block *, unsigned long, int (*test)(struct inode *, void *), int (*set)(struct inode *, void *), void *);
 extern struct inode * iget_locked(struct super_block *, unsigned long);
+extern int insert_inode_locked4(struct inode *, unsigned long, int (*test)(struct inode *, void *), void *);
+extern int insert_inode_locked(struct inode *);
 extern void unlock_new_inode(struct inode *);
 
 extern void __iget(struct inode * inode);
-- 
cgit v1.2.3